From e3aa86671e4f723c228f413385cea2cfd6a6ebf4 Mon Sep 17 00:00:00 2001 From: novacrazy Date: Wed, 19 Jan 2022 01:13:05 -0600 Subject: [PATCH] Initial commit --- .gitignore | 3 + Cargo.toml | 35 ++++++ benches/timestamp.rs | 75 +++++++++++ examples/ts_asm.rs | 15 +++ rustfmt.toml | 2 + src/format.rs | 161 ++++++++++++++++++++++++ src/lib.rs | 288 +++++++++++++++++++++++++++++++++++++++++++ src/macros.rs | 55 +++++++++ src/parse.rs | 280 +++++++++++++++++++++++++++++++++++++++++ src/ts_str.rs | 149 ++++++++++++++++++++++ tests/tests.rs | 82 ++++++++++++ 11 files changed, 1145 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 benches/timestamp.rs create mode 100644 examples/ts_asm.rs create mode 100644 rustfmt.toml create mode 100644 src/format.rs create mode 100644 src/lib.rs create mode 100644 src/macros.rs create mode 100644 src/parse.rs create mode 100644 src/ts_str.rs create mode 100644 tests/tests.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eb1d4ef --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +Cargo.lock +.vscode \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..eb00189 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,35 @@ +[package] +name = "iso8061-timestamp" +version = "0.1.0" +authors = ["novacrazy "] +edition = "2021" +description = "High-performance ISO8061 Timestamp formatting and parsing" +keywords = ["date", "time", "timestamp", "iso8061", "formatting", "parsing"] +categories = ["date-and-time"] + +[features] +std = [] +pg = ["postgres-types", "bytes", "std"] +nightly = [] +default = ["std", "serde"] + +[dependencies] +serde = { optional = true, version = "1" } +time = { version = "0.3", features = ["macros"] } +postgres-types = { optional = true, version = "0.2.2", features = ["with-time-0_3"] } +bytes = { optional = true, version = "1.1.0" } +generic-array = "0.14.4" + +[dev-dependencies] +time = { version = "0.3", features = ["macros", "parsing", "formatting"] } +criterion = "0.3" +chrono = "0.4" + +[[bench]] +name = "timestamp" +harness = false + +[profile.release] +opt-level = 3 +lto = 'fat' +codegen-units = 1 diff --git a/benches/timestamp.rs b/benches/timestamp.rs new file mode 100644 index 0000000..a805045 --- /dev/null +++ b/benches/timestamp.rs @@ -0,0 +1,75 @@ +#![allow(deprecated)] + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use iso8061_timestamp::Timestamp; + +fn criterion_benchmark(c: &mut Criterion) { + let offset = time::UtcOffset::from_hms(-4, 30, 0).unwrap(); + + c.bench_function("format_iso8061", |b| { + let ts = black_box(Timestamp::now_utc()); + + b.iter(|| ts.format()); + }); + + c.bench_function("format_iso8061_short", |b| { + let ts = black_box(Timestamp::now_utc()); + + b.iter(|| ts.format_short()); + }); + + c.bench_function("format_iso8061_offset", |b| { + let ts = black_box(Timestamp::now_utc()); + + b.iter(|| ts.format_with_offset(offset)); + }); + + c.bench_function("format_is8061_slow", |b| { + let ts = black_box(Utc::now().naive_utc()); + + b.iter(|| format_naivedatetime(ts)); + }); + + c.bench_function("format_time", |b| { + let ts = black_box(time::OffsetDateTime::now_utc()); + + b.iter(|| ts.format(&time::format_description::well_known::Rfc3339).unwrap()); + }); + + c.bench_function("parse_iso8061_custom", |b| { + let ts = black_box(Timestamp::now_utc().format()); + + b.iter(|| Timestamp::parse(&ts)); + }); + + c.bench_function("parse_iso8061_chrono", |b| { + let ts = black_box("2021-10-17T02:03:01+00:00"); + + type T = DateTime; + + b.iter(|| T::parse_from_rfc3339(&ts).unwrap()); + }); + + c.bench_function("parse_iso8061_time", |b| { + let ts = black_box("2021-10-17T02:03:01+00:00"); + + use time::{format_description::well_known::Rfc3339, OffsetDateTime}; + + b.iter(|| OffsetDateTime::parse(ts, &Rfc3339).unwrap()); + }); + + c.bench_function("to_unix_timestamp_ms", |b| { + let ts = black_box(Timestamp::now_utc()); + + b.iter(|| ts.to_unix_timestamp_ms()); + }); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); + +use chrono::{DateTime, NaiveDateTime, SecondsFormat, Utc}; + +pub fn format_naivedatetime(dt: NaiveDateTime) -> String { + DateTime::::from_utc(dt, Utc).to_rfc3339_opts(SecondsFormat::Millis, true) +} diff --git a/examples/ts_asm.rs b/examples/ts_asm.rs new file mode 100644 index 0000000..d9bb9c1 --- /dev/null +++ b/examples/ts_asm.rs @@ -0,0 +1,15 @@ +use iso8061_timestamp::{formats::Full, Timestamp, TimestampStr}; + +#[inline(never)] +#[no_mangle] +pub fn format_iso8061(ts: Timestamp) -> TimestampStr { + ts.format() +} + +#[inline(never)] +#[no_mangle] +pub fn parse_iso8061(ts: &str) -> Option { + Timestamp::parse(ts) +} + +fn main() {} diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..63c083e --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,2 @@ +max_width = 110 +single_line_if_else_max_width = 100 \ No newline at end of file diff --git a/src/format.rs b/src/format.rs new file mode 100644 index 0000000..007993a --- /dev/null +++ b/src/format.rs @@ -0,0 +1,161 @@ +use time::{PrimitiveDateTime, UtcOffset}; + +use crate::ts_str::{TimestampStr, TimestampStrStorage}; + +const fn make_table() -> [[u8; 2]; 100] { + let mut table = [[0; 2]; 100]; + + let mut i: u8 = 0; + while i < 10 { + let mut j: u8 = 0; + while j < 10 { + table[(i as usize) * 10 + (j as usize)] = [i + b'0', j + b'0']; + j += 1; + } + i += 1; + } + + table +} + +const LOOKUP: [[u8; 2]; 100] = make_table(); + +/* +use time::{Date, Month}; + +const fn make_day_table(leap: bool) -> [(Month, u8); 366] { + let mut table = [(Month::January, 0); 366]; + + let mut i = 1; + while i < 366 { + if let Ok(date) = Date::from_ordinal_date(if leap { 2020 } else { 2019 }, i) { + let (_, month, day) = date.to_calendar_date(); + table[i as usize] = (month, day); + } + i += 1; + } + + table +} + +const ORDINAL_TABLE: [(Month, u8); 366] = make_day_table(false); +const ORDINAL_TABLE_L: [(Month, u8); 366] = make_day_table(true); + +fn get_ymd(d: Date) -> (i32, Month, u8) { + let year = d.year(); + + let table = match time::util::is_leap_year(year) { + true => &ORDINAL_TABLE_L, + false => &ORDINAL_TABLE, + }; + + let ordinal = d.ordinal(); + + let (month, day) = unsafe { *table.get_unchecked(ordinal as usize) }; + + (year, month, day) +} +*/ + +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::{_mm_prefetch, _MM_HINT_T0}; + +#[cfg(target_arch = "x86")] +use core::arch::x86::{_mm_prefetch, _MM_HINT_T0}; + +#[rustfmt::skip] +#[allow(unused_assignments)] +#[inline(always)] +pub fn format_iso8061(ts: PrimitiveDateTime, offset: UtcOffset) -> TimestampStr { + let lookup = LOOKUP.as_ptr(); + if cfg!(any(target_arch = "x86_64", target_arch = "x86")) { + unsafe { _mm_prefetch::<_MM_HINT_T0>(lookup as _) } + } + + // decompose timestamp + //let (year, month, day) = get_ymd(ts.date()); + let (year, month, day) = ts.to_calendar_date(); + let (hour, minute, second, milliseconds) = ts.as_hms_milli(); + + let mut buf = S::init(); + let mut pos = 0; + + macro_rules! write_num { + ($s: expr, $len: expr, $max: expr) => {unsafe { + let value = $s; + + // tell the compiler that the max value is known + assume!(value <= $max); + + let buf = buf.as_mut_ptr().add(pos); + + match $len { + 2 => { + buf.copy_from_nonoverlapping(lookup.add(value as usize) as *const u8, 2); + } + 3 => { + let ab = value / 10; + let c = value % 10; + + buf.copy_from_nonoverlapping(lookup.add(ab as usize) as *const u8, 2); + *buf.add(2) = (*lookup.add(c as usize))[1]; + } + 4 => { + let value = value as u16; + + let ab = value / 100; + let cd = value % 100; + + buf.copy_from_nonoverlapping(lookup.add(ab as usize) as *const u8, 2); + buf.add(2).copy_from_nonoverlapping(lookup.add(cd as usize) as *const u8, 2); + } + _ => core::hint::unreachable_unchecked() + } + + pos += $len; + + if S::IS_FULL { pos += 1; } + }}; + } + + write_num!(year as u16, 4, 9999); // YYYY- + write_num!(month as u8, 2, 12); // MM- + write_num!(day, 2, 31); // DDT? + if !S::IS_FULL { pos += 1; } // T + write_num!(hour, 2, 59); // HH: + write_num!(minute, 2, 59); // mm: + write_num!(second, 2, 59); // ss.? + if !S::IS_FULL { pos += 1; } // . + write_num!(milliseconds, 3, 999); // SSS + + if S::HAS_OFFSET && S::IS_FULL { + if offset.is_negative() { + // go back one and overwrite + + unsafe { *buf.as_mut_ptr().add(pos - 1) = b'-'; } + } + + let (h, m, _) = offset.as_hms(); + + write_num!(h.abs(), 2, 23); // HZ: + write_num!(m.abs(), 2, 59); // MZ + } + + TimestampStr(buf) +} + +#[cfg(test)] +mod tests { + use crate::ts_str::FullOffset; + + use super::*; + + #[test] + fn test_offset() { + let ts: PrimitiveDateTime = time::macros::datetime!(2014-4-12 4:00 PM); + let o = UtcOffset::from_hms(-4, 30, 0).unwrap(); + + let formatted = format_iso8061::(ts, o); + + assert_eq!("2014-04-12T16:00:00.000-04:30", &*formatted); + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..578970f --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,288 @@ +//! ISO8061 Timestamp +//! +//! This crate provides high-performance formatting and parsing routines for ISO8061 timestamps, primarily focused on UTC values but with support +//! for parsing (and automatically applying) UTC Offsets. +//! +//! The primary purpose of this is to keep the lightweight representation of timestamps within data structures, and only formatting it to +//! a string when needed via Serde. +//! +//! The (`Timestamp`)[Timestamp] struct is only 12 bytes, while the formatted strings can be as large as 29 bytes, +//! and care is taken to avoid heap allocations when formatting. +//! +//! + +#![cfg_attr(not(feature = "std"), no_std)] + +use core::ops::{Deref, DerefMut}; +use core::time::Duration; + +#[cfg(feature = "std")] +use std::time::SystemTime; + +use time::{OffsetDateTime, PrimitiveDateTime, UtcOffset}; + +#[macro_use] +mod macros; + +mod format; +mod parse; +mod ts_str; + +use ts_str::{Full, FullOffset, Short}; + +pub use ts_str::TimestampStr; + +/// Timestamp formats +pub mod formats { + pub use crate::ts_str::{Full, FullOffset, Short}; +} + +/// UTC Timestamp with nanosecond precision, millisecond-precision when serialized to serde (JSON). +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +#[repr(transparent)] +pub struct Timestamp(PrimitiveDateTime); + +use core::fmt; + +impl fmt::Debug for Timestamp { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let ts = self.format(); + + f.debug_tuple("Timestamp").field(&ts).finish() + } +} + +#[cfg(feature = "std")] +impl From for Timestamp { + fn from(ts: SystemTime) -> Self { + Timestamp(match ts.duration_since(SystemTime::UNIX_EPOCH) { + Ok(dur) => Self::PRIMITIVE_UNIX_EPOCH + dur, + Err(err) => Self::PRIMITIVE_UNIX_EPOCH - err.duration(), + }) + } +} + +impl From for Timestamp { + fn from(ts: OffsetDateTime) -> Self { + let utc_datetime = ts.to_offset(UtcOffset::UTC); + let date = utc_datetime.date(); + let time = utc_datetime.time(); + Timestamp(PrimitiveDateTime::new(date, time)) + } +} + +impl From for Timestamp { + #[inline] + fn from(ts: PrimitiveDateTime) -> Self { + Timestamp(ts) + } +} + +#[cfg(feature = "std")] +impl Timestamp { + /// Get the current time, assuming UTC + #[inline] + pub fn now_utc() -> Self { + SystemTime::now().into() + } +} + +impl Timestamp { + const PRIMITIVE_UNIX_EPOCH: PrimitiveDateTime = time::macros::datetime!(1970 - 01 - 01 00:00); + + pub const UNIX_EPOCH: Self = Timestamp(Self::PRIMITIVE_UNIX_EPOCH); + + pub fn from_unix_timestamp(seconds: i64) -> Self { + if seconds < 0 { + Self::UNIX_EPOCH - Duration::from_secs(-seconds as u64) + } else { + Self::UNIX_EPOCH + Duration::from_secs(seconds as u64) + } + } + + pub fn from_unix_timestamp_ms(milliseconds: i64) -> Self { + if milliseconds < 0 { + Self::UNIX_EPOCH - Duration::from_millis(-milliseconds as u64) + } else { + Self::UNIX_EPOCH + Duration::from_millis(milliseconds as u64) + } + } + + pub fn to_unix_timestamp_ms(self) -> i64 { + const UNIX_EPOCH_JULIAN_DAY: i64 = time::macros::date!(1970 - 01 - 01).to_julian_day() as i64; + + let day = self.to_julian_day() as i64 - UNIX_EPOCH_JULIAN_DAY; + let (hour, minute, second, ms) = self.as_hms_milli(); + + let hours = day * 24 + hour as i64; + let minutes = hours * 60 + minute as i64; + let seconds = minutes * 60 + second as i64; + let millis = seconds * 1000 + ms as i64; + + millis + } + + /// Format timestamp to ISO8061 with full punctuation + pub fn format(&self) -> TimestampStr { + format::format_iso8061(self.0, UtcOffset::UTC) + } + + /// Format timestamp to ISO8061 without most punctuation + pub fn format_short(&self) -> TimestampStr { + format::format_iso8061(self.0, UtcOffset::UTC) + } + + /// Format timestamp to ISO8061 with arbitrary UTC offset. Any offset is formatted as `+HH:MM`, + /// and no timezone conversions are done. It is interpreted literally. + pub fn format_with_offset(&self, offset: UtcOffset) -> TimestampStr { + format::format_iso8061(self.0, offset) + } + + /// Parse to UTC timestamp from any ISO8061 string. Offsets are applied during parsing. + #[inline] + pub fn parse(ts: &str) -> Option { + parse::parse_iso8061(ts).map(Timestamp) + } + + /// Convert to `time::OffsetDateTime` with the given offset. + pub const fn assume_offset(self, offset: UtcOffset) -> time::OffsetDateTime { + self.0.assume_offset(offset) + } +} + +impl Deref for Timestamp { + type Target = PrimitiveDateTime; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for Timestamp { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +use core::ops::{Add, Sub}; + +impl Add for Timestamp +where + PrimitiveDateTime: Add, +{ + type Output = Self; + + #[inline] + fn add(self, rhs: T) -> Self::Output { + Timestamp(self.0 + rhs) + } +} + +impl Sub for Timestamp +where + PrimitiveDateTime: Sub, +{ + type Output = Self; + + #[inline] + fn sub(self, rhs: T) -> Self::Output { + Timestamp(self.0 - rhs) + } +} + +#[cfg(feature = "serde")] +mod serde_impl { + use serde::de::{Deserialize, Deserializer, Error, Visitor}; + use serde::ser::{Serialize, Serializer}; + + use super::Timestamp; + + impl Serialize for Timestamp { + #[inline] + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + if serializer.is_human_readable() { + self.format().serialize(serializer) + } else { + self.to_unix_timestamp_ms().serialize(serializer) + } + } + } + + impl<'de> Deserialize<'de> for Timestamp { + #[inline] + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + use core::fmt; + + struct TsVisitor; + + impl<'de> Visitor<'de> for TsVisitor { + type Value = Timestamp; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("an ISO8061 Timestamp") + } + + fn visit_str(self, v: &str) -> Result + where + E: Error, + { + match Timestamp::parse(v) { + Some(ts) => Ok(ts), + None => Err(E::custom("Invalid Format")), + } + } + + fn visit_i64(self, v: i64) -> Result + where + E: Error, + { + Ok(Timestamp::from_unix_timestamp_ms(v)) + } + } + + deserializer.deserialize_str(TsVisitor) + } + } +} + +#[cfg(feature = "pg")] +mod pg_impl { + use postgres_types::{accepts, to_sql_checked, FromSql, IsNull, ToSql, Type}; + use time::PrimitiveDateTime; + + use super::Timestamp; + + impl ToSql for Timestamp { + #[inline] + fn to_sql( + &self, + ty: &Type, + out: &mut bytes::BytesMut, + ) -> Result> + where + Self: Sized, + { + self.0.to_sql(ty, out) + } + + accepts!(TIMESTAMP, TIMESTAMPTZ); + to_sql_checked!(); + } + + impl<'a> FromSql<'a> for Timestamp { + #[inline] + fn from_sql(ty: &Type, raw: &'a [u8]) -> Result> { + PrimitiveDateTime::from_sql(ty, raw).map(Timestamp) + } + + accepts!(TIMESTAMP, TIMESTAMPTZ); + } +} diff --git a/src/macros.rs b/src/macros.rs new file mode 100644 index 0000000..c5f6d75 --- /dev/null +++ b/src/macros.rs @@ -0,0 +1,55 @@ +#![allow(unused)] + +#[cfg(feature = "nightly")] +pub use core::intrinsics::{assume, likely, unlikely}; + +// borrows technique from https://github.com/rust-lang/hashbrown/pull/209 +#[cfg(not(feature = "nightly"))] +#[inline] +#[cold] +fn cold() {} + +#[cfg(not(feature = "nightly"))] +#[rustfmt::skip] +#[inline(always)] +pub unsafe fn likely(b: bool) -> bool { + if !b { cold() } b +} + +#[cfg(not(feature = "nightly"))] +#[rustfmt::skip] +#[inline(always)] +pub unsafe fn unlikely(b: bool) -> bool { + if b { cold() } b +} + +#[cfg(not(feature = "nightly"))] +#[rustfmt::skip] +#[inline(always)] +pub unsafe fn assume(b: bool) { + if !b { core::hint::unreachable_unchecked() } +} + +#[rustfmt::skip] +macro_rules! likely { + ($e:expr) => {{ + #[allow(unused_unsafe)] + unsafe { $crate::macros::likely($e) } + }}; +} + +#[rustfmt::skip] +macro_rules! unlikely { + ($e:expr) => {{ + #[allow(unused_unsafe)] + unsafe { $crate::macros::unlikely($e) } + }}; +} + +#[rustfmt::skip] +macro_rules! assume { + ($e:expr) => {{ + #[allow(unused_unsafe)] + unsafe { $crate::macros::assume($e) } + }} +} diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..aee6298 --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,280 @@ +use core::time::Duration; + +use time::{Date, Month, PrimitiveDateTime, Time}; + +/// Trait implemented locally for very fast parsing of small unsigned integers +trait FastParse: Sized { + fn parse(s: &[u8]) -> Option; +} + +#[inline] +fn parse_2(s: &[u8]) -> u16 { + assume!(s.len() == 2); + + let zero: u16 = 0x3030; + + let mut buf = [0; 2]; + buf.copy_from_slice(s); + + let digits = u16::from_le_bytes(buf).wrapping_sub(zero); + + //println!("DIGITS: {:04X}", digits); + + ((digits & 0x0f00) >> 8) + ((digits & 0x0f) * 10) +} + +#[inline] +fn parse_4(s: &[u8]) -> u16 { + assume!(s.len() == 4); + + let zero: u32 = 0x30303030; + + let mut buf = [0; 4]; + buf.copy_from_slice(s); + + let mut digits = u32::from_le_bytes(buf).wrapping_sub(zero); + digits = ((digits & 0x0f000f00) >> 8) + ((digits & 0x000f000f) * 10); + digits = ((digits & 0x00ff00ff) >> 16) + ((digits & 0x000000ff) * 100); + digits as u16 +} + +#[inline] +fn parse_3(s: &[u8]) -> u16 { + assume!(s.len() == 3); + + let hundreds = (s[0] - b'0') as u16 * 100; + + hundreds + parse_2(&s[1..3]) +} + +// TODO: Parse 5 and 6 + +macro_rules! impl_fp { + ($($t:ty),*) => {$( + impl FastParse for $t { + #[inline] + fn parse(s: &[u8]) -> Option { + match s.len() { + 2 => return Some(parse_2(s) as $t), + 4 => return Some(parse_4(s) as $t), + 3 => return Some(parse_3(s) as $t), + //1 => return Some((s[0].wrapping_sub(b'0')) as $t), + _ => {} + } + + + let mut num = 0; + let mut overflow = false; + + for byte in s { + let digit = byte.wrapping_sub(b'0'); + overflow |= digit > 9; + num = (num * 10) + digit as $t; + } + + match overflow { + false => Some(num), + true => None, + } + } + } + )*}; +} + +impl_fp!(u8, u16, u32); + +pub fn parse_iso8061(ts: &str) -> Option { + let b = ts.as_bytes(); + + #[inline(always)] + fn parse_offset(b: &[u8], offset: usize, len: usize) -> Option { + b.get(offset..(offset + len)).and_then(|x| T::parse(x)) + } + + #[inline(always)] + fn is_byte(b: &[u8], offset: usize, byte: u8) -> usize { + match b.get(offset) { + Some(&b) => (b == byte) as usize, + None => 0, + } + } + + let mut offset = 0; + + let year = parse_offset::(b, offset, 4)?; + offset += 4; + offset += is_byte(b, offset, b'-'); // YYYY-? + + //println!("YEAR: {}", year); + + let month = parse_offset::(b, offset, 2)?; + offset += 2; + offset += is_byte(b, offset, b'-'); // MM-? + + //println!("MONTH: {}", month); + + let day = parse_offset::(b, offset, 2)?; + offset += 2; // DD + + //println!("DAY: {}", day); + + // only parsed 4 digits + assume!(year <= 9999); + + let ymd = Date::from_calendar_date(year as i32, Month::try_from(month).ok()?, day).ok()?; + + //println!("{}-{}-{}", year, month, day); + + // if no T, then return + if b.get(offset).map(|c| *c | 32) != Some(b't') { + return None; + } + + offset += 1; // T + + let hour = parse_offset::(b, offset, 2)?; + offset += 2; + offset += is_byte(b, offset, b':'); + + //println!("HOUR: {}", hour); + + let minute = parse_offset::(b, offset, 2)?; + offset += 2; + offset += is_byte(b, offset, b':'); + + //println!("MINUTE: {}", minute); + + let maybe_time; + + // if the next character is a digit, parse seconds and milliseconds, otherwise move on + match b.get(offset) { + Some(b'0'..=b'9') => { + let second = parse_offset::(b, offset, 2)?; + offset += 2; + + if b.get(offset).copied() == Some(b'.') { + offset += 1; + + let mut factor: u32 = 100_000_000; // up to 9 decimal places + let mut nanosecond: u32 = 0; + + while let Some(c) = b.get(offset) { + let d = c.wrapping_sub(b'0'); + + if unlikely!(d > 9) { + break; // break on non-numeric input + } + + nanosecond += d as u32 * factor; + factor /= 10; + offset += 1; + } + + // if leap seconds, ignore the parsed value and set it to just before 60 + // doing it this way avoids duplicate code to consume the extra characters + if unlikely!(second == 60) { + maybe_time = Time::from_hms_nano(hour, minute, 59, 999_999_999); + } else { + maybe_time = Time::from_hms_nano(hour, minute, second, nanosecond); + } + } else if unlikely!(second == 60) { + maybe_time = Time::from_hms_nano(hour, minute, 59, 999_999_999); + } else { + maybe_time = Time::from_hms(hour, minute, second) + } + } + _ => maybe_time = Time::from_hms(hour, minute, 0), + } + + //println!("SECOND: {}", second); + + let mut date_time = PrimitiveDateTime::new( + ymd, + match maybe_time { + Ok(time) => time, + _ => return None, + }, + ); + + let tz = b.get(offset); + + offset += 1; + + match tz.copied() { + // Z + Some(b'z' | b'Z') => {} + + // timezone, like +00:00 + Some(c @ b'+' | c @ b'-' | c @ 0xe2) => { + if c == 0xe2 { + // check for UTF8 Unicode MINUS SIGN + if likely!(b.get(offset..(offset + 2)) == Some(&[0x88, 0x92])) { + offset += 2; + } else { + return None; + } + } + + let offset_hour = parse_offset::(b, offset, 2)? as u64; + offset += 2; + offset += is_byte(b, offset, b':'); + let offset_minute = parse_offset::(b, offset, 2)? as u64; + offset += 2; + + let dur = Duration::from_secs(60 * 60 * offset_hour + offset_minute * 60); + + if c == b'+' { + date_time += dur; + } else { + date_time -= dur; + } + } + + // Parse trailing "UTC", but it does nothing, same as Z + Some(b'U' | b'u') => match b.get(offset..(offset + 2)) { + None => return None, + Some(tc) => { + for (c, r) in tc.iter().zip(b"tc") { + if (*c | 32) != *r { + return None; + } + } + + offset += 2; + } + }, + _ => return None, + } + + if unlikely!(offset != b.len()) { + return None; + } + + Some(date_time) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_int() { + let i = u32::parse(b"1234567890"); + + assert_eq!(i, Some(1234567890)); + } + + #[test] + fn test_parse_int2() { + let res = parse_2(b"12"); + + assert_eq!(res, 12); + } + + #[test] + fn test_parse_int4() { + let res = parse_4(b"1234"); + + assert_eq!(res, 1234); + } +} diff --git a/src/ts_str.rs b/src/ts_str.rs new file mode 100644 index 0000000..988dddb --- /dev/null +++ b/src/ts_str.rs @@ -0,0 +1,149 @@ +use generic_array::{ArrayLength, GenericArray}; + +mod sealed { + pub trait Sealed {} +} + +#[doc(hidden)] +pub trait TimestampStrStorage: sealed::Sealed { + type Length: ArrayLength; + + fn init() -> GenericArray; + + const IS_FULL: bool; + const HAS_OFFSET: bool; +} + +/// Shorthand format without punctuation, (`YYYYMMDDTHHmmss.SSSZ`) +pub struct Short; +/// Full ISO8061 format with UTC offset, (`YYYY-MM-DDTHH:mm:ss.SSSZ`) with character literal `Z` meaning UTC +pub struct Full; +/// Full ISO8061 format with hour/minute timezone offset, (`YYYY-MM-DDTHH:mm:ss.SSS+HZ:MZ`) with offset at end +pub struct FullOffset; + +impl sealed::Sealed for Short {} +impl sealed::Sealed for Full {} +impl sealed::Sealed for FullOffset {} + +impl TimestampStrStorage for Short { + type Length = generic_array::typenum::consts::U20; + + #[inline(always)] + fn init() -> GenericArray { + //nericArray::from(*b"YYYYMMDDTHHmmss.SSSZ") + GenericArray::from(*b"00000000T000000.000Z") + } + + const IS_FULL: bool = false; + const HAS_OFFSET: bool = false; +} + +impl TimestampStrStorage for Full { + type Length = generic_array::typenum::consts::U24; + + #[inline(always)] + fn init() -> GenericArray { + //nericArray::from(*b"YYYY-MM-DDTHH:mm:ss.SSSZ") + GenericArray::from(*b"0000-00-00T00:00:00.000Z") + } + + const IS_FULL: bool = true; + const HAS_OFFSET: bool = false; +} + +impl TimestampStrStorage for FullOffset { + type Length = generic_array::typenum::consts::U29; + + #[inline(always)] + fn init() -> GenericArray { + //nericArray::from(*b"YYYY-MM-DDTHH:mm:ss.SSS+HH:MM") + GenericArray::from(*b"0000-00-00T00:00:00.000+00:00") + } + + const IS_FULL: bool = true; + const HAS_OFFSET: bool = true; +} + +/// Fixed-size inline string storage that exactly fits the formatted timestamp +pub struct TimestampStr(pub(crate) GenericArray); + +impl AsRef for TimestampStr { + #[inline] + fn as_ref(&self) -> &str { + unsafe { core::str::from_utf8_unchecked(&self.0) } + } +} + +use core::borrow::Borrow; + +impl Borrow for TimestampStr { + #[inline] + fn borrow(&self) -> &str { + self.as_ref() + } +} + +use core::ops::Deref; + +impl Deref for TimestampStr { + type Target = str; + + #[inline] + fn deref(&self) -> &Self::Target { + self.as_ref() + } +} + +impl PartialEq for TimestampStr { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.as_ref() == other.as_ref() + } +} + +impl PartialEq for TimestampStr { + #[inline] + fn eq(&self, other: &str) -> bool { + self.as_ref() == other + } +} + +impl PartialEq> for str { + #[inline] + fn eq(&self, other: &TimestampStr) -> bool { + self == other.as_ref() + } +} + +use core::fmt; + +impl fmt::Debug for TimestampStr { + #[inline(always)] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self.as_ref(), f) + } +} + +impl fmt::Display for TimestampStr { + #[inline(always)] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self.as_ref(), f) + } +} + +#[cfg(feature = "serde")] +mod serde_impl { + use serde::ser::{Serialize, Serializer}; + + use super::{TimestampStr, TimestampStrStorage}; + + impl Serialize for TimestampStr { + #[inline] + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(&*self) + } + } +} diff --git a/tests/tests.rs b/tests/tests.rs new file mode 100644 index 0000000..37604fe --- /dev/null +++ b/tests/tests.rs @@ -0,0 +1,82 @@ +use iso8061_timestamp::Timestamp; + +#[test] +fn test_format_iso8061() { + let now = Timestamp::now_utc(); + + let formatted = now.format(); + + println!("{}", formatted); +} + +#[test] +fn test_format_iso8061_full() { + let now = Timestamp::now_utc(); + + let formatted = now.format(); + + println!("{}", formatted); +} + +#[test] +fn test_parse_iso8061_reflex() { + let now = Timestamp::now_utc(); + + let formatted = now.format(); + + println!("Formatted: {}", formatted); + + let parsed = Timestamp::parse(&formatted).unwrap(); + + assert_eq!(formatted, parsed.format()); +} + +#[test] +fn test_parse_iso8061_variations() { + let fixtures = [ + "2021-10-17T02:03:01+00:00", + "2021-10-17t02:03:01+10:00", + "2021-10-17t02:03+00:00", // without seconds + "2021-10-17t02:03:01.111+00:00", + "2021-10-17T02:03:01-00:00", + "2021-10-17T02:03:01−04:00", // UNICODE MINUS SIGN in offset + "2021-10-17T02:03:01Z", + "20211017T020301Z", + "20211017t020301z", + "20211017T0203z", // without seconds + "20211017T020301.123Z", + "20211017T020301.123+00:00", + "20211017T020301.123uTc", + ]; + + for fixture in fixtures { + let parsed = Timestamp::parse(fixture); + + assert!(parsed.is_some(), "Failed to parse: {}", fixture); + + println!("{:?}", parsed.unwrap()); + } +} + +#[test] +fn test_unix_timestamp_ms() { + let now_ts = Timestamp::now_utc(); + let now_ot = now_ts.assume_offset(time::UtcOffset::UTC); + + let unix_ms_a = now_ts.to_unix_timestamp_ms(); + let unix_ms_b = (now_ot.unix_timestamp_nanos() / 1_000_000) as i64; + + assert_eq!(unix_ms_a, unix_ms_b); +} + +#[test] +fn test_parse_nanoseconds() { + let parsed = Timestamp::parse("2021-11-19T04:12:54.000123Z").unwrap(); + + let time = time::Time::from_hms_nano(4, 12, 54, 123000).unwrap(); + let date = time::Date::from_calendar_date(2021, time::Month::November, 19).unwrap(); + + let expected = Timestamp::from(time::PrimitiveDateTime::new(date, time)); + + assert_eq!(parsed, expected); +}