diff --git a/columnar/src/column_values/monotonic_mapping.rs b/columnar/src/column_values/monotonic_mapping.rs index 374970b18..45e9ecdd2 100644 --- a/columnar/src/column_values/monotonic_mapping.rs +++ b/columnar/src/column_values/monotonic_mapping.rs @@ -1,6 +1,7 @@ use std::fmt::Debug; use std::marker::PhantomData; +use common::DateTime; use fastdivide::DividerU64; use super::MonotonicallyMappableToU128; @@ -195,17 +196,15 @@ impl MonotonicallyMappableToU64 for i64 { } } -impl MonotonicallyMappableToU64 for crate::DateTime { +impl MonotonicallyMappableToU64 for DateTime { #[inline(always)] fn to_u64(self) -> u64 { - common::i64_to_u64(self.timestamp_micros) + common::i64_to_u64(self.into_timestamp_micros()) } #[inline(always)] fn from_u64(val: u64) -> Self { - crate::DateTime { - timestamp_micros: common::u64_to_i64(val), - } + DateTime::from_timestamp_micros(common::u64_to_i64(val)) } } diff --git a/columnar/src/columnar/column_type.rs b/columnar/src/columnar/column_type.rs index 6f9cf99e5..c5bb8182f 100644 --- a/columnar/src/columnar/column_type.rs +++ b/columnar/src/columnar/column_type.rs @@ -111,7 +111,7 @@ impl HasAssociatedColumnType for bool { } } -impl HasAssociatedColumnType for crate::DateTime { +impl HasAssociatedColumnType for common::DateTime { fn column_type() -> ColumnType { ColumnType::DateTime } diff --git a/columnar/src/columnar/writer/mod.rs b/columnar/src/columnar/writer/mod.rs index 43d3f6620..818a89c6a 100644 --- a/columnar/src/columnar/writer/mod.rs +++ b/columnar/src/columnar/writer/mod.rs @@ -266,11 +266,15 @@ impl ColumnarWriter { }); } - pub fn record_datetime(&mut self, doc: RowId, column_name: &str, datetime: crate::DateTime) { + pub fn record_datetime(&mut self, doc: RowId, column_name: &str, datetime: common::DateTime) { let (hash_map, arena) = (&mut self.datetime_field_hash_map, &mut self.arena); mutate_or_create_column(hash_map, column_name, |column_opt: Option| { let mut column: ColumnWriter = column_opt.unwrap_or_default(); - column.record(doc, NumericalValue::I64(datetime.timestamp_micros), arena); + column.record( + doc, + NumericalValue::I64(datetime.into_timestamp_micros()), + arena, + ); column }); } diff --git a/columnar/src/dynamic_column.rs b/columnar/src/dynamic_column.rs index e7e158302..6e9da1ae2 100644 --- a/columnar/src/dynamic_column.rs +++ b/columnar/src/dynamic_column.rs @@ -3,12 +3,12 @@ use std::net::Ipv6Addr; use std::sync::Arc; use common::file_slice::FileSlice; -use common::{HasLen, OwnedBytes}; +use common::{DateTime, HasLen, OwnedBytes}; use crate::column::{BytesColumn, Column, StrColumn}; use crate::column_values::{monotonic_map_column, StrictlyMonotonicFn}; use crate::columnar::ColumnType; -use crate::{Cardinality, DateTime, NumericalType}; +use crate::{Cardinality, NumericalType}; #[derive(Clone)] pub enum DynamicColumn { @@ -188,7 +188,7 @@ static_dynamic_conversions!(Column, Bool); static_dynamic_conversions!(Column, U64); static_dynamic_conversions!(Column, I64); static_dynamic_conversions!(Column, F64); -static_dynamic_conversions!(Column, DateTime); +static_dynamic_conversions!(Column, DateTime); static_dynamic_conversions!(StrColumn, Str); static_dynamic_conversions!(BytesColumn, Bytes); static_dynamic_conversions!(Column, IpAddr); @@ -243,7 +243,7 @@ impl DynamicColumnHandle { ColumnType::Bool => crate::column::open_column_u64::(column_bytes)?.into(), ColumnType::IpAddr => crate::column::open_column_u128::(column_bytes)?.into(), ColumnType::DateTime => { - crate::column::open_column_u64::(column_bytes)?.into() + crate::column::open_column_u64::(column_bytes)?.into() } }; Ok(dynamic_column) diff --git a/columnar/src/lib.rs b/columnar/src/lib.rs index ef8661440..d98484578 100644 --- a/columnar/src/lib.rs +++ b/columnar/src/lib.rs @@ -42,16 +42,7 @@ pub struct RowAddr { pub use sstable::Dictionary; pub type Streamer<'a> = sstable::Streamer<'a, VoidSSTable>; -#[derive(Clone, Copy, PartialOrd, PartialEq, Default, Debug)] -pub struct DateTime { - pub timestamp_micros: i64, -} - -impl DateTime { - pub fn into_timestamp_micros(self) -> i64 { - self.timestamp_micros - } -} +pub use common::DateTime; #[derive(Copy, Clone, Debug)] pub struct InvalidData; diff --git a/columnar/src/value.rs b/columnar/src/value.rs index be5edb100..6e39ac8cc 100644 --- a/columnar/src/value.rs +++ b/columnar/src/value.rs @@ -1,3 +1,5 @@ +use common::DateTime; + use crate::InvalidData; #[derive(Copy, Clone, PartialEq, Debug)] @@ -104,10 +106,10 @@ impl Coerce for f64 { } } -impl Coerce for crate::DateTime { +impl Coerce for DateTime { fn coerce(value: NumericalValue) -> Self { let timestamp_micros = i64::coerce(value); - crate::DateTime { timestamp_micros } + DateTime::from_timestamp_micros(timestamp_micros) } } diff --git a/common/Cargo.toml b/common/Cargo.toml index 9d16079e8..58e4d669c 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -16,6 +16,8 @@ repository = "https://github.com/quickwit-oss/tantivy" byteorder = "1.4.3" ownedbytes = { version= "0.5", path="../ownedbytes" } async-trait = "0.1" +time = { version = "0.3.10", features = ["serde-well-known"] } +serde = { version = "1.0.136", features = ["derive"] } [dev-dependencies] proptest = "1.0.0" diff --git a/common/src/datetime.rs b/common/src/datetime.rs new file mode 100644 index 000000000..b818066aa --- /dev/null +++ b/common/src/datetime.rs @@ -0,0 +1,136 @@ +use std::fmt; + +use serde::{Deserialize, Serialize}; +use time::format_description::well_known::Rfc3339; +use time::{OffsetDateTime, PrimitiveDateTime, UtcOffset}; + +/// DateTime Precision +#[derive( + Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, Default, +)] +#[serde(rename_all = "lowercase")] +pub enum DatePrecision { + /// Seconds precision + #[default] + Seconds, + /// Milli-seconds precision. + Milliseconds, + /// Micro-seconds precision. + Microseconds, +} + +/// A date/time value with microsecond precision. +/// +/// This timestamp does not carry any explicit time zone information. +/// Users are responsible for applying the provided conversion +/// functions consistently. Internally the time zone is assumed +/// to be UTC, which is also used implicitly for JSON serialization. +/// +/// All constructors and conversions are provided as explicit +/// functions and not by implementing any `From`/`Into` traits +/// to prevent unintended usage. +#[derive(Clone, Default, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct DateTime { + // Timestamp in microseconds. + pub(crate) timestamp_micros: i64, +} + +impl DateTime { + /// Create new from UNIX timestamp in seconds + pub const fn from_timestamp_secs(seconds: i64) -> Self { + Self { + timestamp_micros: seconds * 1_000_000, + } + } + + /// Create new from UNIX timestamp in milliseconds + pub const fn from_timestamp_millis(milliseconds: i64) -> Self { + Self { + timestamp_micros: milliseconds * 1_000, + } + } + + /// Create new from UNIX timestamp in microseconds. + pub const fn from_timestamp_micros(microseconds: i64) -> Self { + Self { + timestamp_micros: microseconds, + } + } + + /// Create new from `OffsetDateTime` + /// + /// The given date/time is converted to UTC and the actual + /// time zone is discarded. + pub const fn from_utc(dt: OffsetDateTime) -> Self { + let timestamp_micros = dt.unix_timestamp() * 1_000_000 + dt.microsecond() as i64; + Self { timestamp_micros } + } + + /// Create new from `PrimitiveDateTime` + /// + /// Implicitly assumes that the given date/time is in UTC! + /// Otherwise the original value must only be reobtained with + /// [`Self::into_primitive()`]. + pub fn from_primitive(dt: PrimitiveDateTime) -> Self { + Self::from_utc(dt.assume_utc()) + } + + /// Convert to UNIX timestamp in seconds. + pub const fn into_timestamp_secs(self) -> i64 { + self.timestamp_micros / 1_000_000 + } + + /// Convert to UNIX timestamp in milliseconds. + pub const fn into_timestamp_millis(self) -> i64 { + self.timestamp_micros / 1_000 + } + + /// Convert to UNIX timestamp in microseconds. + pub const fn into_timestamp_micros(self) -> i64 { + self.timestamp_micros + } + + /// Convert to UTC `OffsetDateTime` + pub fn into_utc(self) -> OffsetDateTime { + let timestamp_nanos = self.timestamp_micros as i128 * 1000; + let utc_datetime = OffsetDateTime::from_unix_timestamp_nanos(timestamp_nanos) + .expect("valid UNIX timestamp"); + debug_assert_eq!(UtcOffset::UTC, utc_datetime.offset()); + utc_datetime + } + + /// Convert to `OffsetDateTime` with the given time zone + pub fn into_offset(self, offset: UtcOffset) -> OffsetDateTime { + self.into_utc().to_offset(offset) + } + + /// Convert to `PrimitiveDateTime` without any time zone + /// + /// The value should have been constructed with [`Self::from_primitive()`]. + /// Otherwise the time zone is implicitly assumed to be UTC. + pub fn into_primitive(self) -> PrimitiveDateTime { + let utc_datetime = self.into_utc(); + // Discard the UTC time zone offset + debug_assert_eq!(UtcOffset::UTC, utc_datetime.offset()); + PrimitiveDateTime::new(utc_datetime.date(), utc_datetime.time()) + } + + /// Truncates the microseconds value to the corresponding precision. + pub fn truncate(self, precision: DatePrecision) -> Self { + let truncated_timestamp_micros = match precision { + DatePrecision::Seconds => (self.timestamp_micros / 1_000_000) * 1_000_000, + DatePrecision::Milliseconds => (self.timestamp_micros / 1_000) * 1_000, + DatePrecision::Microseconds => self.timestamp_micros, + }; + Self { + timestamp_micros: truncated_timestamp_micros, + } + } +} + +impl fmt::Debug for DateTime { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let utc_rfc3339 = self.into_utc().format(&Rfc3339).map_err(|_| fmt::Error)?; + f.write_str(&utc_rfc3339) + } +} diff --git a/common/src/lib.rs b/common/src/lib.rs index f73b7b1b8..5692e7636 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -5,12 +5,14 @@ use std::ops::Deref; pub use byteorder::LittleEndian as Endianness; mod bitset; +mod datetime; pub mod file_slice; mod group_by; mod serialize; mod vint; mod writer; pub use bitset::*; +pub use datetime::{DatePrecision, DateTime}; pub use group_by::GroupByIteratorExtended; pub use ownedbytes::{OwnedBytes, StableDeref}; pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize}; diff --git a/src/collector/tests.rs b/src/collector/tests.rs index 29c25d23f..e24d34f09 100644 --- a/src/collector/tests.rs +++ b/src/collector/tests.rs @@ -57,7 +57,7 @@ pub fn test_filter_collector() -> crate::Result<()> { assert_eq!(filtered_top_docs.len(), 0); - fn date_filter(value: columnar::DateTime) -> bool { + fn date_filter(value: DateTime) -> bool { (crate::DateTime::from(value).into_utc() - OffsetDateTime::parse("2019-04-09T00:00:00+00:00", &Rfc3339).unwrap()) .whole_weeks() diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index 0095d22fa..45cfc749b 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -103,18 +103,6 @@ impl FastValue for DateTime { } } -impl columnar::MonotonicallyMappableToU64 for DateTime { - fn to_u64(self) -> u64 { - self.timestamp_micros.to_u64() - } - - fn from_u64(val: u64) -> Self { - DateTime { - timestamp_micros: MonotonicallyMappableToU64::from_u64(val), - } - } -} - #[cfg(test)] mod tests { @@ -368,7 +356,7 @@ mod tests { let file = directory.open_read(path).unwrap(); let fast_field_readers = FastFieldReaders::open(file).unwrap(); let col = fast_field_readers.date("date").unwrap(); - assert_eq!(col.get_val(0), columnar::DateTime::default()); + assert_eq!(col.get_val(0), DateTime::default()); } // Warning: this generates the same permutation at each call @@ -427,7 +415,7 @@ mod tests { let mut index_writer = index.writer_for_tests().unwrap(); index_writer.set_merge_policy(Box::new(NoMergePolicy)); index_writer - .add_document(doc!(date_field =>DateTime::from_utc(OffsetDateTime::now_utc()))) + .add_document(doc!(date_field => DateTime::from_utc(OffsetDateTime::now_utc()))) .unwrap(); index_writer.commit().unwrap(); index_writer.add_document(doc!()).unwrap(); @@ -725,12 +713,12 @@ mod tests { let segment_reader = searcher.segment_reader(0); let fast_fields = segment_reader.fast_fields(); let date_fast_field = fast_fields - .column_opt::("date") + .column_opt::("date") .unwrap() .unwrap() .first_or_default_col(Default::default()); let dates_fast_field = fast_fields - .column_opt::("multi_date") + .column_opt::("multi_date") .unwrap() .unwrap(); let mut dates = vec![]; @@ -895,7 +883,7 @@ mod tests { let col = readers.date("field").unwrap(); for (i, time) in times.iter().enumerate() { - let dt: crate::DateTime = col.get_val(i as u32).into(); + let dt: DateTime = col.get_val(i as u32).into(); assert_eq!(dt, time.truncate(precision)); } readers.column_num_bytes("field").unwrap() diff --git a/src/fastfield/readers.rs b/src/fastfield/readers.rs index f4355855b..8de4406af 100644 --- a/src/fastfield/readers.rs +++ b/src/fastfield/readers.rs @@ -105,7 +105,7 @@ impl FastFieldReaders { /// Returns the `date` fast field reader reader associated with `field`. /// /// If `field` is not a date fast field, this method returns an Error. - pub fn date(&self, field: &str) -> crate::Result>> { + pub fn date(&self, field: &str) -> crate::Result>> { self.column_first_or_default(field) } diff --git a/src/lib.rs b/src/lib.rs index 0e226b79d..9fc6945bd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -123,148 +123,16 @@ mod functional_test; mod macros; mod future_result; +pub use common::DateTime; /// Re-export of the `time` crate /// /// Tantivy uses [`time`](https://crates.io/crates/time) for dates. pub use time; -use crate::time::format_description::well_known::Rfc3339; -use crate::time::{OffsetDateTime, PrimitiveDateTime, UtcOffset}; - -/// A date/time value with microsecond precision. -/// -/// This timestamp does not carry any explicit time zone information. -/// Users are responsible for applying the provided conversion -/// functions consistently. Internally the time zone is assumed -/// to be UTC, which is also used implicitly for JSON serialization. -/// -/// All constructors and conversions are provided as explicit -/// functions and not by implementing any `From`/`Into` traits -/// to prevent unintended usage. -#[derive(Clone, Default, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub struct DateTime { - // Timestamp in microseconds. - pub(crate) timestamp_micros: i64, -} - -impl From for DateTime { - fn from(columnar_datetime: columnar::DateTime) -> Self { - DateTime { - timestamp_micros: columnar_datetime.timestamp_micros, - } - } -} - -impl From for columnar::DateTime { - fn from(datetime: crate::DateTime) -> Self { - columnar::DateTime { - timestamp_micros: datetime.timestamp_micros, - } - } -} - -impl DateTime { - /// Create new from UNIX timestamp in seconds - pub const fn from_timestamp_secs(seconds: i64) -> Self { - Self { - timestamp_micros: seconds * 1_000_000, - } - } - - /// Create new from UNIX timestamp in milliseconds - pub const fn from_timestamp_millis(milliseconds: i64) -> Self { - Self { - timestamp_micros: milliseconds * 1_000, - } - } - - /// Create new from UNIX timestamp in microseconds. - pub const fn from_timestamp_micros(microseconds: i64) -> Self { - Self { - timestamp_micros: microseconds, - } - } - - /// Create new from `OffsetDateTime` - /// - /// The given date/time is converted to UTC and the actual - /// time zone is discarded. - pub const fn from_utc(dt: OffsetDateTime) -> Self { - let timestamp_micros = dt.unix_timestamp() * 1_000_000 + dt.microsecond() as i64; - Self { timestamp_micros } - } - - /// Create new from `PrimitiveDateTime` - /// - /// Implicitly assumes that the given date/time is in UTC! - /// Otherwise the original value must only be reobtained with - /// [`Self::into_primitive()`]. - pub fn from_primitive(dt: PrimitiveDateTime) -> Self { - Self::from_utc(dt.assume_utc()) - } - - /// Convert to UNIX timestamp in seconds. - pub const fn into_timestamp_secs(self) -> i64 { - self.timestamp_micros / 1_000_000 - } - - /// Convert to UNIX timestamp in milliseconds. - pub const fn into_timestamp_millis(self) -> i64 { - self.timestamp_micros / 1_000 - } - - /// Convert to UNIX timestamp in microseconds. - pub const fn into_timestamp_micros(self) -> i64 { - self.timestamp_micros - } - - /// Convert to UTC `OffsetDateTime` - pub fn into_utc(self) -> OffsetDateTime { - let timestamp_nanos = self.timestamp_micros as i128 * 1000; - let utc_datetime = OffsetDateTime::from_unix_timestamp_nanos(timestamp_nanos) - .expect("valid UNIX timestamp"); - debug_assert_eq!(UtcOffset::UTC, utc_datetime.offset()); - utc_datetime - } - - /// Convert to `OffsetDateTime` with the given time zone - pub fn into_offset(self, offset: UtcOffset) -> OffsetDateTime { - self.into_utc().to_offset(offset) - } - - /// Convert to `PrimitiveDateTime` without any time zone - /// - /// The value should have been constructed with [`Self::from_primitive()`]. - /// Otherwise the time zone is implicitly assumed to be UTC. - pub fn into_primitive(self) -> PrimitiveDateTime { - let utc_datetime = self.into_utc(); - // Discard the UTC time zone offset - debug_assert_eq!(UtcOffset::UTC, utc_datetime.offset()); - PrimitiveDateTime::new(utc_datetime.date(), utc_datetime.time()) - } - - /// Truncates the microseconds value to the corresponding precision. - pub(crate) fn truncate(self, precision: DatePrecision) -> Self { - let truncated_timestamp_micros = match precision { - DatePrecision::Seconds => (self.timestamp_micros / 1_000_000) * 1_000_000, - DatePrecision::Milliseconds => (self.timestamp_micros / 1_000) * 1_000, - DatePrecision::Microseconds => self.timestamp_micros, - }; - Self { - timestamp_micros: truncated_timestamp_micros, - } - } -} - -impl fmt::Debug for DateTime { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let utc_rfc3339 = self.into_utc().format(&Rfc3339).map_err(|_| fmt::Error)?; - f.write_str(&utc_rfc3339) - } -} - pub use crate::error::TantivyError; pub use crate::future_result::FutureResult; +use crate::time::format_description::well_known::Rfc3339; +use crate::time::{OffsetDateTime, PrimitiveDateTime, UtcOffset}; /// Tantivy result. /// diff --git a/src/schema/date_time_options.rs b/src/schema/date_time_options.rs index 592cc5d4c..fb73ad808 100644 --- a/src/schema/date_time_options.rs +++ b/src/schema/date_time_options.rs @@ -1,24 +1,10 @@ use std::ops::BitOr; +pub use common::DatePrecision; use serde::{Deserialize, Serialize}; use crate::schema::flags::{FastFlag, IndexedFlag, SchemaFlagList, StoredFlag}; -/// DateTime Precision -#[derive( - Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, Default, -)] -#[serde(rename_all = "lowercase")] -pub enum DatePrecision { - /// Seconds precision - #[default] - Seconds, - /// Milli-seconds precision. - Milliseconds, - /// Micro-seconds precision. - Microseconds, -} - /// Defines how DateTime field should be handled by tantivy. #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)] pub struct DateOptions { diff --git a/src/schema/value.rs b/src/schema/value.rs index 775770b67..aee4fc8bc 100644 --- a/src/schema/value.rs +++ b/src/schema/value.rs @@ -380,9 +380,7 @@ mod binary_serialize { } Value::Date(ref val) => { DATE_CODE.serialize(writer)?; - let DateTime { - timestamp_micros, .. - } = val; + let timestamp_micros = val.into_timestamp_micros(); timestamp_micros.serialize(writer) } Value::Facet(ref facet) => {