move DateTime to tantivy_common (#1861)

* move DateTime to tantivy_common

* resolve imports of columnar::DateTime as import of common::DateTime
This commit is contained in:
trinity-1686a
2023-02-11 17:03:06 +01:00
committed by GitHub
parent dab93df94e
commit 539ff08a79
15 changed files with 172 additions and 196 deletions

View File

@@ -1,6 +1,7 @@
use std::fmt::Debug;
use std::marker::PhantomData;
use common::DateTime;
use fastdivide::DividerU64;
use super::MonotonicallyMappableToU128;
@@ -195,17 +196,15 @@ impl MonotonicallyMappableToU64 for i64 {
}
}
impl MonotonicallyMappableToU64 for crate::DateTime {
impl MonotonicallyMappableToU64 for DateTime {
#[inline(always)]
fn to_u64(self) -> u64 {
common::i64_to_u64(self.timestamp_micros)
common::i64_to_u64(self.into_timestamp_micros())
}
#[inline(always)]
fn from_u64(val: u64) -> Self {
crate::DateTime {
timestamp_micros: common::u64_to_i64(val),
}
DateTime::from_timestamp_micros(common::u64_to_i64(val))
}
}

View File

@@ -111,7 +111,7 @@ impl HasAssociatedColumnType for bool {
}
}
impl HasAssociatedColumnType for crate::DateTime {
impl HasAssociatedColumnType for common::DateTime {
fn column_type() -> ColumnType {
ColumnType::DateTime
}

View File

@@ -266,11 +266,15 @@ impl ColumnarWriter {
});
}
pub fn record_datetime(&mut self, doc: RowId, column_name: &str, datetime: crate::DateTime) {
pub fn record_datetime(&mut self, doc: RowId, column_name: &str, datetime: common::DateTime) {
let (hash_map, arena) = (&mut self.datetime_field_hash_map, &mut self.arena);
mutate_or_create_column(hash_map, column_name, |column_opt: Option<ColumnWriter>| {
let mut column: ColumnWriter = column_opt.unwrap_or_default();
column.record(doc, NumericalValue::I64(datetime.timestamp_micros), arena);
column.record(
doc,
NumericalValue::I64(datetime.into_timestamp_micros()),
arena,
);
column
});
}

View File

@@ -3,12 +3,12 @@ use std::net::Ipv6Addr;
use std::sync::Arc;
use common::file_slice::FileSlice;
use common::{HasLen, OwnedBytes};
use common::{DateTime, HasLen, OwnedBytes};
use crate::column::{BytesColumn, Column, StrColumn};
use crate::column_values::{monotonic_map_column, StrictlyMonotonicFn};
use crate::columnar::ColumnType;
use crate::{Cardinality, DateTime, NumericalType};
use crate::{Cardinality, NumericalType};
#[derive(Clone)]
pub enum DynamicColumn {
@@ -188,7 +188,7 @@ static_dynamic_conversions!(Column<bool>, Bool);
static_dynamic_conversions!(Column<u64>, U64);
static_dynamic_conversions!(Column<i64>, I64);
static_dynamic_conversions!(Column<f64>, F64);
static_dynamic_conversions!(Column<crate::DateTime>, DateTime);
static_dynamic_conversions!(Column<DateTime>, DateTime);
static_dynamic_conversions!(StrColumn, Str);
static_dynamic_conversions!(BytesColumn, Bytes);
static_dynamic_conversions!(Column<Ipv6Addr>, IpAddr);
@@ -243,7 +243,7 @@ impl DynamicColumnHandle {
ColumnType::Bool => crate::column::open_column_u64::<bool>(column_bytes)?.into(),
ColumnType::IpAddr => crate::column::open_column_u128::<Ipv6Addr>(column_bytes)?.into(),
ColumnType::DateTime => {
crate::column::open_column_u64::<crate::DateTime>(column_bytes)?.into()
crate::column::open_column_u64::<DateTime>(column_bytes)?.into()
}
};
Ok(dynamic_column)

View File

@@ -42,16 +42,7 @@ pub struct RowAddr {
pub use sstable::Dictionary;
pub type Streamer<'a> = sstable::Streamer<'a, VoidSSTable>;
#[derive(Clone, Copy, PartialOrd, PartialEq, Default, Debug)]
pub struct DateTime {
pub timestamp_micros: i64,
}
impl DateTime {
pub fn into_timestamp_micros(self) -> i64 {
self.timestamp_micros
}
}
pub use common::DateTime;
#[derive(Copy, Clone, Debug)]
pub struct InvalidData;

View File

@@ -1,3 +1,5 @@
use common::DateTime;
use crate::InvalidData;
#[derive(Copy, Clone, PartialEq, Debug)]
@@ -104,10 +106,10 @@ impl Coerce for f64 {
}
}
impl Coerce for crate::DateTime {
impl Coerce for DateTime {
fn coerce(value: NumericalValue) -> Self {
let timestamp_micros = i64::coerce(value);
crate::DateTime { timestamp_micros }
DateTime::from_timestamp_micros(timestamp_micros)
}
}

View File

@@ -16,6 +16,8 @@ repository = "https://github.com/quickwit-oss/tantivy"
byteorder = "1.4.3"
ownedbytes = { version= "0.5", path="../ownedbytes" }
async-trait = "0.1"
time = { version = "0.3.10", features = ["serde-well-known"] }
serde = { version = "1.0.136", features = ["derive"] }
[dev-dependencies]
proptest = "1.0.0"

136
common/src/datetime.rs Normal file
View File

@@ -0,0 +1,136 @@
use std::fmt;
use serde::{Deserialize, Serialize};
use time::format_description::well_known::Rfc3339;
use time::{OffsetDateTime, PrimitiveDateTime, UtcOffset};
/// DateTime Precision
#[derive(
Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, Default,
)]
#[serde(rename_all = "lowercase")]
pub enum DatePrecision {
/// Seconds precision
#[default]
Seconds,
/// Milli-seconds precision.
Milliseconds,
/// Micro-seconds precision.
Microseconds,
}
/// A date/time value with microsecond precision.
///
/// This timestamp does not carry any explicit time zone information.
/// Users are responsible for applying the provided conversion
/// functions consistently. Internally the time zone is assumed
/// to be UTC, which is also used implicitly for JSON serialization.
///
/// All constructors and conversions are provided as explicit
/// functions and not by implementing any `From`/`Into` traits
/// to prevent unintended usage.
#[derive(Clone, Default, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct DateTime {
// Timestamp in microseconds.
pub(crate) timestamp_micros: i64,
}
impl DateTime {
/// Create new from UNIX timestamp in seconds
pub const fn from_timestamp_secs(seconds: i64) -> Self {
Self {
timestamp_micros: seconds * 1_000_000,
}
}
/// Create new from UNIX timestamp in milliseconds
pub const fn from_timestamp_millis(milliseconds: i64) -> Self {
Self {
timestamp_micros: milliseconds * 1_000,
}
}
/// Create new from UNIX timestamp in microseconds.
pub const fn from_timestamp_micros(microseconds: i64) -> Self {
Self {
timestamp_micros: microseconds,
}
}
/// Create new from `OffsetDateTime`
///
/// The given date/time is converted to UTC and the actual
/// time zone is discarded.
pub const fn from_utc(dt: OffsetDateTime) -> Self {
let timestamp_micros = dt.unix_timestamp() * 1_000_000 + dt.microsecond() as i64;
Self { timestamp_micros }
}
/// Create new from `PrimitiveDateTime`
///
/// Implicitly assumes that the given date/time is in UTC!
/// Otherwise the original value must only be reobtained with
/// [`Self::into_primitive()`].
pub fn from_primitive(dt: PrimitiveDateTime) -> Self {
Self::from_utc(dt.assume_utc())
}
/// Convert to UNIX timestamp in seconds.
pub const fn into_timestamp_secs(self) -> i64 {
self.timestamp_micros / 1_000_000
}
/// Convert to UNIX timestamp in milliseconds.
pub const fn into_timestamp_millis(self) -> i64 {
self.timestamp_micros / 1_000
}
/// Convert to UNIX timestamp in microseconds.
pub const fn into_timestamp_micros(self) -> i64 {
self.timestamp_micros
}
/// Convert to UTC `OffsetDateTime`
pub fn into_utc(self) -> OffsetDateTime {
let timestamp_nanos = self.timestamp_micros as i128 * 1000;
let utc_datetime = OffsetDateTime::from_unix_timestamp_nanos(timestamp_nanos)
.expect("valid UNIX timestamp");
debug_assert_eq!(UtcOffset::UTC, utc_datetime.offset());
utc_datetime
}
/// Convert to `OffsetDateTime` with the given time zone
pub fn into_offset(self, offset: UtcOffset) -> OffsetDateTime {
self.into_utc().to_offset(offset)
}
/// Convert to `PrimitiveDateTime` without any time zone
///
/// The value should have been constructed with [`Self::from_primitive()`].
/// Otherwise the time zone is implicitly assumed to be UTC.
pub fn into_primitive(self) -> PrimitiveDateTime {
let utc_datetime = self.into_utc();
// Discard the UTC time zone offset
debug_assert_eq!(UtcOffset::UTC, utc_datetime.offset());
PrimitiveDateTime::new(utc_datetime.date(), utc_datetime.time())
}
/// Truncates the microseconds value to the corresponding precision.
pub fn truncate(self, precision: DatePrecision) -> Self {
let truncated_timestamp_micros = match precision {
DatePrecision::Seconds => (self.timestamp_micros / 1_000_000) * 1_000_000,
DatePrecision::Milliseconds => (self.timestamp_micros / 1_000) * 1_000,
DatePrecision::Microseconds => self.timestamp_micros,
};
Self {
timestamp_micros: truncated_timestamp_micros,
}
}
}
impl fmt::Debug for DateTime {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let utc_rfc3339 = self.into_utc().format(&Rfc3339).map_err(|_| fmt::Error)?;
f.write_str(&utc_rfc3339)
}
}

View File

@@ -5,12 +5,14 @@ use std::ops::Deref;
pub use byteorder::LittleEndian as Endianness;
mod bitset;
mod datetime;
pub mod file_slice;
mod group_by;
mod serialize;
mod vint;
mod writer;
pub use bitset::*;
pub use datetime::{DatePrecision, DateTime};
pub use group_by::GroupByIteratorExtended;
pub use ownedbytes::{OwnedBytes, StableDeref};
pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};

View File

@@ -57,7 +57,7 @@ pub fn test_filter_collector() -> crate::Result<()> {
assert_eq!(filtered_top_docs.len(), 0);
fn date_filter(value: columnar::DateTime) -> bool {
fn date_filter(value: DateTime) -> bool {
(crate::DateTime::from(value).into_utc()
- OffsetDateTime::parse("2019-04-09T00:00:00+00:00", &Rfc3339).unwrap())
.whole_weeks()

View File

@@ -103,18 +103,6 @@ impl FastValue for DateTime {
}
}
impl columnar::MonotonicallyMappableToU64 for DateTime {
fn to_u64(self) -> u64 {
self.timestamp_micros.to_u64()
}
fn from_u64(val: u64) -> Self {
DateTime {
timestamp_micros: MonotonicallyMappableToU64::from_u64(val),
}
}
}
#[cfg(test)]
mod tests {
@@ -368,7 +356,7 @@ mod tests {
let file = directory.open_read(path).unwrap();
let fast_field_readers = FastFieldReaders::open(file).unwrap();
let col = fast_field_readers.date("date").unwrap();
assert_eq!(col.get_val(0), columnar::DateTime::default());
assert_eq!(col.get_val(0), DateTime::default());
}
// Warning: this generates the same permutation at each call
@@ -427,7 +415,7 @@ mod tests {
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.set_merge_policy(Box::new(NoMergePolicy));
index_writer
.add_document(doc!(date_field =>DateTime::from_utc(OffsetDateTime::now_utc())))
.add_document(doc!(date_field => DateTime::from_utc(OffsetDateTime::now_utc())))
.unwrap();
index_writer.commit().unwrap();
index_writer.add_document(doc!()).unwrap();
@@ -725,12 +713,12 @@ mod tests {
let segment_reader = searcher.segment_reader(0);
let fast_fields = segment_reader.fast_fields();
let date_fast_field = fast_fields
.column_opt::<columnar::DateTime>("date")
.column_opt::<DateTime>("date")
.unwrap()
.unwrap()
.first_or_default_col(Default::default());
let dates_fast_field = fast_fields
.column_opt::<columnar::DateTime>("multi_date")
.column_opt::<DateTime>("multi_date")
.unwrap()
.unwrap();
let mut dates = vec![];
@@ -895,7 +883,7 @@ mod tests {
let col = readers.date("field").unwrap();
for (i, time) in times.iter().enumerate() {
let dt: crate::DateTime = col.get_val(i as u32).into();
let dt: DateTime = col.get_val(i as u32).into();
assert_eq!(dt, time.truncate(precision));
}
readers.column_num_bytes("field").unwrap()

View File

@@ -105,7 +105,7 @@ impl FastFieldReaders {
/// Returns the `date` fast field reader reader associated with `field`.
///
/// If `field` is not a date fast field, this method returns an Error.
pub fn date(&self, field: &str) -> crate::Result<Arc<dyn ColumnValues<columnar::DateTime>>> {
pub fn date(&self, field: &str) -> crate::Result<Arc<dyn ColumnValues<common::DateTime>>> {
self.column_first_or_default(field)
}

View File

@@ -123,148 +123,16 @@ mod functional_test;
mod macros;
mod future_result;
pub use common::DateTime;
/// Re-export of the `time` crate
///
/// Tantivy uses [`time`](https://crates.io/crates/time) for dates.
pub use time;
use crate::time::format_description::well_known::Rfc3339;
use crate::time::{OffsetDateTime, PrimitiveDateTime, UtcOffset};
/// A date/time value with microsecond precision.
///
/// This timestamp does not carry any explicit time zone information.
/// Users are responsible for applying the provided conversion
/// functions consistently. Internally the time zone is assumed
/// to be UTC, which is also used implicitly for JSON serialization.
///
/// All constructors and conversions are provided as explicit
/// functions and not by implementing any `From`/`Into` traits
/// to prevent unintended usage.
#[derive(Clone, Default, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct DateTime {
// Timestamp in microseconds.
pub(crate) timestamp_micros: i64,
}
impl From<columnar::DateTime> for DateTime {
fn from(columnar_datetime: columnar::DateTime) -> Self {
DateTime {
timestamp_micros: columnar_datetime.timestamp_micros,
}
}
}
impl From<DateTime> for columnar::DateTime {
fn from(datetime: crate::DateTime) -> Self {
columnar::DateTime {
timestamp_micros: datetime.timestamp_micros,
}
}
}
impl DateTime {
/// Create new from UNIX timestamp in seconds
pub const fn from_timestamp_secs(seconds: i64) -> Self {
Self {
timestamp_micros: seconds * 1_000_000,
}
}
/// Create new from UNIX timestamp in milliseconds
pub const fn from_timestamp_millis(milliseconds: i64) -> Self {
Self {
timestamp_micros: milliseconds * 1_000,
}
}
/// Create new from UNIX timestamp in microseconds.
pub const fn from_timestamp_micros(microseconds: i64) -> Self {
Self {
timestamp_micros: microseconds,
}
}
/// Create new from `OffsetDateTime`
///
/// The given date/time is converted to UTC and the actual
/// time zone is discarded.
pub const fn from_utc(dt: OffsetDateTime) -> Self {
let timestamp_micros = dt.unix_timestamp() * 1_000_000 + dt.microsecond() as i64;
Self { timestamp_micros }
}
/// Create new from `PrimitiveDateTime`
///
/// Implicitly assumes that the given date/time is in UTC!
/// Otherwise the original value must only be reobtained with
/// [`Self::into_primitive()`].
pub fn from_primitive(dt: PrimitiveDateTime) -> Self {
Self::from_utc(dt.assume_utc())
}
/// Convert to UNIX timestamp in seconds.
pub const fn into_timestamp_secs(self) -> i64 {
self.timestamp_micros / 1_000_000
}
/// Convert to UNIX timestamp in milliseconds.
pub const fn into_timestamp_millis(self) -> i64 {
self.timestamp_micros / 1_000
}
/// Convert to UNIX timestamp in microseconds.
pub const fn into_timestamp_micros(self) -> i64 {
self.timestamp_micros
}
/// Convert to UTC `OffsetDateTime`
pub fn into_utc(self) -> OffsetDateTime {
let timestamp_nanos = self.timestamp_micros as i128 * 1000;
let utc_datetime = OffsetDateTime::from_unix_timestamp_nanos(timestamp_nanos)
.expect("valid UNIX timestamp");
debug_assert_eq!(UtcOffset::UTC, utc_datetime.offset());
utc_datetime
}
/// Convert to `OffsetDateTime` with the given time zone
pub fn into_offset(self, offset: UtcOffset) -> OffsetDateTime {
self.into_utc().to_offset(offset)
}
/// Convert to `PrimitiveDateTime` without any time zone
///
/// The value should have been constructed with [`Self::from_primitive()`].
/// Otherwise the time zone is implicitly assumed to be UTC.
pub fn into_primitive(self) -> PrimitiveDateTime {
let utc_datetime = self.into_utc();
// Discard the UTC time zone offset
debug_assert_eq!(UtcOffset::UTC, utc_datetime.offset());
PrimitiveDateTime::new(utc_datetime.date(), utc_datetime.time())
}
/// Truncates the microseconds value to the corresponding precision.
pub(crate) fn truncate(self, precision: DatePrecision) -> Self {
let truncated_timestamp_micros = match precision {
DatePrecision::Seconds => (self.timestamp_micros / 1_000_000) * 1_000_000,
DatePrecision::Milliseconds => (self.timestamp_micros / 1_000) * 1_000,
DatePrecision::Microseconds => self.timestamp_micros,
};
Self {
timestamp_micros: truncated_timestamp_micros,
}
}
}
impl fmt::Debug for DateTime {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let utc_rfc3339 = self.into_utc().format(&Rfc3339).map_err(|_| fmt::Error)?;
f.write_str(&utc_rfc3339)
}
}
pub use crate::error::TantivyError;
pub use crate::future_result::FutureResult;
use crate::time::format_description::well_known::Rfc3339;
use crate::time::{OffsetDateTime, PrimitiveDateTime, UtcOffset};
/// Tantivy result.
///

View File

@@ -1,24 +1,10 @@
use std::ops::BitOr;
pub use common::DatePrecision;
use serde::{Deserialize, Serialize};
use crate::schema::flags::{FastFlag, IndexedFlag, SchemaFlagList, StoredFlag};
/// DateTime Precision
#[derive(
Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, Default,
)]
#[serde(rename_all = "lowercase")]
pub enum DatePrecision {
/// Seconds precision
#[default]
Seconds,
/// Milli-seconds precision.
Milliseconds,
/// Micro-seconds precision.
Microseconds,
}
/// Defines how DateTime field should be handled by tantivy.
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct DateOptions {

View File

@@ -380,9 +380,7 @@ mod binary_serialize {
}
Value::Date(ref val) => {
DATE_CODE.serialize(writer)?;
let DateTime {
timestamp_micros, ..
} = val;
let timestamp_micros = val.into_timestamp_micros();
timestamp_micros.serialize(writer)
}
Value::Facet(ref facet) => {