Replace chrono with time (#1307)

For date values `chrono` has been replaced with `time` 
- The `time` crate is re-exported as `tantivy::time` instead of `tantivy::chrono`.
- The type alias `tantivy::DateTime` has been removed.
- `Value::Date` wraps `time::PrimitiveDateTime` without time zone information.
- Internally date/time values are stored as seconds since UNIX epoch in UTC.
- Converting a `time::OffsetDateTime` to `Value::Date` implicitly converts the value into UTC.
If this is not desired do the time zone conversion yourself and use `time::PrimitiveDateTime`
directly instead.

Closes #1304
This commit is contained in:
Uwe Klotz
2022-03-21 02:50:19 +01:00
committed by GitHub
parent 46d5de920d
commit 125707dbe0
20 changed files with 323 additions and 190 deletions

View File

@@ -1,3 +1,14 @@
Unreleased
================================
- For date values `chrono` has been replaced with `time` (@uklotzde) #1304 :
- The `time` crate is re-exported as `tantivy::time` instead of `tantivy::chrono`.
- The type alias `tantivy::DateTime` has been removed.
- `Value::Date` wraps `time::PrimitiveDateTime` without time zone information.
- Internally date/time values are stored as seconds since UNIX epoch in UTC.
- Converting a `time::OffsetDateTime` to `Value::Date` implicitly converts the value into UTC.
If this is not desired do the time zone conversion yourself and use `time::PrimitiveDateTime`
directly instead.
Tantivy 0.17
================================
- LogMergePolicy now triggers merges if the ratio of deleted documents reaches a threshold (@shikhar @fulmicoton) [#115](https://github.com/quickwit-oss/tantivy/issues/115)

View File

@@ -48,7 +48,7 @@ thiserror = "1.0.24"
htmlescape = "0.3.1"
fail = "0.5"
murmurhash32 = "0.2"
chrono = "0.4.19"
time = { version = "0.3.7", features = ["serde-well-known"] }
smallvec = "1.6.1"
rayon = "1.5"
lru = "0.7.0"

View File

@@ -67,7 +67,7 @@ fn word<'a>() -> impl Parser<&'a str, Output = String> {
///
/// NOTE: also accepts 999999-99-99T99:99:99.266051969+99:99
/// We delegate rejecting such invalid dates to the logical AST compuation code
/// which invokes chrono::DateTime::parse_from_rfc3339 on the value to actually parse
/// which invokes time::OffsetDateTime::parse(..., &Rfc3339) on the value to actually parse
/// it (instead of merely extracting the datetime value as string as done here).
fn date_time<'a>() -> impl Parser<&'a str, Output = String> {
let two_digits = || recognize::<String, _, _>((digit(), digit()));

View File

@@ -152,9 +152,9 @@ mod tests {
use query::AllQuery;
use super::{add_vecs, HistogramCollector, HistogramComputer};
use crate::chrono::{TimeZone, Utc};
use crate::schema::{Schema, FAST};
use crate::{doc, query, Index};
use crate::time::{Date, Month};
use crate::{doc, query, DateTime, Index};
#[test]
fn test_add_histograms_simple() {
@@ -273,16 +273,20 @@ mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut writer = index.writer_with_num_threads(1, 4_000_000)?;
writer.add_document(doc!(date_field=>Utc.ymd(1982, 9, 17).and_hms(0, 0,0)))?;
writer.add_document(doc!(date_field=>Utc.ymd(1986, 3, 9).and_hms(0, 0, 0)))?;
writer.add_document(doc!(date_field=>Utc.ymd(1983, 9, 27).and_hms(0, 0, 0)))?;
writer.add_document(doc!(date_field=>DateTime::new_primitive(Date::from_calendar_date(1982, Month::September, 17)?.with_hms(0, 0, 0)?)))?;
writer.add_document(
doc!(date_field=>DateTime::new_primitive(Date::from_calendar_date(1986, Month::March, 9)?.with_hms(0, 0, 0)?)),
)?;
writer.add_document(doc!(date_field=>DateTime::new_primitive(Date::from_calendar_date(1983, Month::September, 27)?.with_hms(0, 0, 0)?)))?;
writer.commit()?;
let reader = index.reader()?;
let searcher = reader.searcher();
let all_query = AllQuery;
let week_histogram_collector = HistogramCollector::new(
date_field,
Utc.ymd(1980, 1, 1).and_hms(0, 0, 0),
DateTime::new_primitive(
Date::from_calendar_date(1980, Month::January, 1)?.with_hms(0, 0, 0)?,
),
3600 * 24 * 365, // it is just for a unit test... sorry leap years.
10,
);

View File

@@ -1,11 +1,11 @@
use std::str::FromStr;
use super::*;
use crate::collector::{Count, FilterCollector, TopDocs};
use crate::core::SegmentReader;
use crate::fastfield::{BytesFastFieldReader, DynamicFastFieldReader, FastFieldReader};
use crate::query::{AllQuery, QueryParser};
use crate::schema::{Field, Schema, FAST, TEXT};
use crate::time::format_description::well_known::Rfc3339;
use crate::time::OffsetDateTime;
use crate::{doc, DateTime, DocAddress, DocId, Document, Index, Score, Searcher, SegmentOrdinal};
pub const TEST_COLLECTOR_WITH_SCORE: TestCollector = TestCollector {
@@ -26,11 +26,11 @@ pub fn test_filter_collector() -> crate::Result<()> {
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64, date => DateTime::from_str("1898-04-09T00:00:00+00:00").unwrap()))?;
index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64, date => DateTime::from_str("2020-04-09T00:00:00+00:00").unwrap()))?;
index_writer.add_document(doc!(title => "The Diary of Anne Frank", price => 18_240u64, date => DateTime::from_str("2019-04-20T00:00:00+00:00").unwrap()))?;
index_writer.add_document(doc!(title => "A Dairy Cow", price => 21_240u64, date => DateTime::from_str("2019-04-09T00:00:00+00:00").unwrap()))?;
index_writer.add_document(doc!(title => "The Diary of a Young Girl", price => 20_120u64, date => DateTime::from_str("2018-04-09T00:00:00+00:00").unwrap()))?;
index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64, date => DateTime::new_utc(OffsetDateTime::parse("1898-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?;
index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64, date => DateTime::new_utc(OffsetDateTime::parse("2020-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?;
index_writer.add_document(doc!(title => "The Diary of Anne Frank", price => 18_240u64, date => DateTime::new_utc(OffsetDateTime::parse("2019-04-20T00:00:00+00:00", &Rfc3339).unwrap())))?;
index_writer.add_document(doc!(title => "A Dairy Cow", price => 21_240u64, date => DateTime::new_utc(OffsetDateTime::parse("2019-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?;
index_writer.add_document(doc!(title => "The Diary of a Young Girl", price => 20_120u64, date => DateTime::new_utc(OffsetDateTime::parse("2018-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?;
index_writer.commit()?;
let reader = index.reader()?;
@@ -55,7 +55,9 @@ pub fn test_filter_collector() -> crate::Result<()> {
assert_eq!(filtered_top_docs.len(), 0);
fn date_filter(value: DateTime) -> bool {
(value - DateTime::from_str("2019-04-09T00:00:00+00:00").unwrap()).num_weeks() > 0
(value.to_utc() - OffsetDateTime::parse("2019-04-09T00:00:00+00:00", &Rfc3339).unwrap())
.whole_weeks()
> 0
}
let filter_dates_collector = FilterCollector::new(date, &date_filter, TopDocs::with_limit(5));

View File

@@ -714,7 +714,9 @@ mod tests {
use crate::collector::Collector;
use crate::query::{AllQuery, Query, QueryParser};
use crate::schema::{Field, Schema, FAST, STORED, TEXT};
use crate::{DocAddress, DocId, Index, IndexWriter, Score, SegmentReader};
use crate::time::format_description::well_known::Rfc3339;
use crate::time::OffsetDateTime;
use crate::{DateTime, DocAddress, DocId, Index, IndexWriter, Score, SegmentReader};
fn make_index() -> crate::Result<Index> {
let mut schema_builder = Schema::builder();
@@ -890,28 +892,32 @@ mod tests {
#[test]
fn test_top_field_collector_datetime() -> crate::Result<()> {
use std::str::FromStr;
let mut schema_builder = Schema::builder();
let name = schema_builder.add_text_field("name", TEXT);
let birthday = schema_builder.add_date_field("birthday", FAST);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
let pr_birthday = crate::DateTime::from_str("1898-04-09T00:00:00+00:00")?;
let pr_birthday = DateTime::new_utc(OffsetDateTime::parse(
"1898-04-09T00:00:00+00:00",
&Rfc3339,
)?);
index_writer.add_document(doc!(
name => "Paul Robeson",
birthday => pr_birthday
birthday => pr_birthday,
))?;
let mr_birthday = crate::DateTime::from_str("1947-11-08T00:00:00+00:00")?;
let mr_birthday = DateTime::new_utc(OffsetDateTime::parse(
"1947-11-08T00:00:00+00:00",
&Rfc3339,
)?);
index_writer.add_document(doc!(
name => "Minnie Riperton",
birthday => mr_birthday
birthday => mr_birthday,
))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let top_collector = TopDocs::with_limit(3).order_by_fast_field(birthday);
let top_docs: Vec<(crate::DateTime, DocAddress)> =
searcher.search(&AllQuery, &top_collector)?;
let top_docs: Vec<(DateTime, DocAddress)> = searcher.search(&AllQuery, &top_collector)?;
assert_eq!(
&top_docs[..],
&[

View File

@@ -149,9 +149,21 @@ impl<Guard> From<PoisonError<Guard>> for TantivyError {
}
}
impl From<chrono::ParseError> for TantivyError {
fn from(err: chrono::ParseError) -> TantivyError {
TantivyError::InvalidArgument(err.to_string())
impl From<time::error::Format> for TantivyError {
fn from(err: time::error::Format) -> TantivyError {
TantivyError::InvalidArgument(format!("Date formatting error: {err}"))
}
}
impl From<time::error::Parse> for TantivyError {
fn from(err: time::error::Parse) -> TantivyError {
TantivyError::InvalidArgument(format!("Date parsing error: {err}"))
}
}
impl From<time::error::ComponentRange> for TantivyError {
fn from(err: time::error::ComponentRange) -> TantivyError {
TantivyError::InvalidArgument(format!("Date range error: {err}"))
}
}

View File

@@ -30,9 +30,8 @@ pub use self::readers::FastFieldReaders;
pub(crate) use self::readers::{type_and_cardinality, FastType};
pub use self::serializer::{CompositeFastFieldSerializer, FastFieldDataAccess, FastFieldStats};
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
use crate::chrono::{NaiveDateTime, Utc};
use crate::schema::{Cardinality, FieldType, Type, Value};
use crate::DocId;
use crate::{DateTime, DocId};
mod alive_bitset;
mod bytes;
@@ -161,14 +160,14 @@ impl FastValue for f64 {
}
}
impl FastValue for crate::DateTime {
impl FastValue for DateTime {
fn from_u64(timestamp_u64: u64) -> Self {
let timestamp_i64 = i64::from_u64(timestamp_u64);
crate::DateTime::from_utc(NaiveDateTime::from_timestamp(timestamp_i64, 0), Utc)
let unix_timestamp = i64::from_u64(timestamp_u64);
Self::from_unix_timestamp(unix_timestamp)
}
fn to_u64(&self) -> u64 {
self.timestamp().to_u64()
self.to_unix_timestamp().to_u64()
}
fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality> {
@@ -179,7 +178,7 @@ impl FastValue for crate::DateTime {
}
fn as_u64(&self) -> u64 {
self.timestamp().as_u64()
self.to_unix_timestamp().as_u64()
}
fn to_type() -> Type {
@@ -188,12 +187,12 @@ impl FastValue for crate::DateTime {
}
fn value_to_u64(value: &Value) -> u64 {
match *value {
Value::U64(ref val) => *val,
Value::I64(ref val) => common::i64_to_u64(*val),
Value::F64(ref val) => common::f64_to_u64(*val),
Value::Date(ref datetime) => common::i64_to_u64(datetime.timestamp()),
_ => panic!("Expected a u64/i64/f64 field, got {:?} ", value),
match value {
Value::U64(val) => val.to_u64(),
Value::I64(val) => val.to_u64(),
Value::F64(val) => val.to_u64(),
Value::Date(val) => val.to_u64(),
_ => panic!("Expected a u64/i64/f64/date field, got {:?} ", value),
}
}
@@ -213,6 +212,7 @@ mod tests {
use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr};
use crate::merge_policy::NoMergePolicy;
use crate::schema::{Document, Field, NumericOptions, Schema, FAST};
use crate::time::OffsetDateTime;
use crate::{Index, SegmentId, SegmentReader};
pub static SCHEMA: Lazy<Schema> = Lazy::new(|| {
@@ -233,7 +233,7 @@ mod tests {
#[test]
pub fn test_fastfield_i64_u64() {
let datetime = crate::DateTime::from_utc(NaiveDateTime::from_timestamp(0i64, 0), Utc);
let datetime = DateTime::new_utc(OffsetDateTime::UNIX_EPOCH);
assert_eq!(i64::from_u64(datetime.to_u64()), 0i64);
}
@@ -489,7 +489,8 @@ mod tests {
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.set_merge_policy(Box::new(NoMergePolicy));
index_writer.add_document(doc!(date_field =>crate::chrono::prelude::Utc::now()))?;
index_writer
.add_document(doc!(date_field =>DateTime::new_utc(OffsetDateTime::now_utc())))?;
index_writer.commit()?;
index_writer.add_document(doc!())?;
index_writer.commit()?;
@@ -509,7 +510,7 @@ mod tests {
#[test]
fn test_default_datetime() {
assert_eq!(crate::DateTime::make_zero().timestamp(), 0i64);
assert_eq!(0, DateTime::make_zero().to_unix_timestamp());
}
#[test]
@@ -526,16 +527,16 @@ mod tests {
let mut index_writer = index.writer_for_tests()?;
index_writer.set_merge_policy(Box::new(NoMergePolicy));
index_writer.add_document(doc!(
date_field => crate::DateTime::from_u64(1i64.to_u64()),
multi_date_field => crate::DateTime::from_u64(2i64.to_u64()),
multi_date_field => crate::DateTime::from_u64(3i64.to_u64())
date_field => DateTime::from_u64(1i64.to_u64()),
multi_date_field => DateTime::from_u64(2i64.to_u64()),
multi_date_field => DateTime::from_u64(3i64.to_u64())
))?;
index_writer.add_document(doc!(
date_field => crate::DateTime::from_u64(4i64.to_u64())
date_field => DateTime::from_u64(4i64.to_u64())
))?;
index_writer.add_document(doc!(
multi_date_field => crate::DateTime::from_u64(5i64.to_u64()),
multi_date_field => crate::DateTime::from_u64(6i64.to_u64())
multi_date_field => DateTime::from_u64(5i64.to_u64()),
multi_date_field => DateTime::from_u64(6i64.to_u64())
))?;
index_writer.commit()?;
let reader = index.reader()?;
@@ -547,23 +548,23 @@ mod tests {
let dates_fast_field = fast_fields.dates(multi_date_field).unwrap();
let mut dates = vec![];
{
assert_eq!(date_fast_field.get(0u32).timestamp(), 1i64);
assert_eq!(date_fast_field.get(0u32).to_unix_timestamp(), 1i64);
dates_fast_field.get_vals(0u32, &mut dates);
assert_eq!(dates.len(), 2);
assert_eq!(dates[0].timestamp(), 2i64);
assert_eq!(dates[1].timestamp(), 3i64);
assert_eq!(dates[0].to_unix_timestamp(), 2i64);
assert_eq!(dates[1].to_unix_timestamp(), 3i64);
}
{
assert_eq!(date_fast_field.get(1u32).timestamp(), 4i64);
assert_eq!(date_fast_field.get(1u32).to_unix_timestamp(), 4i64);
dates_fast_field.get_vals(1u32, &mut dates);
assert!(dates.is_empty());
}
{
assert_eq!(date_fast_field.get(2u32).timestamp(), 0i64);
assert_eq!(date_fast_field.get(2u32).to_unix_timestamp(), 0i64);
dates_fast_field.get_vals(2u32, &mut dates);
assert_eq!(dates.len(), 2);
assert_eq!(dates[0].timestamp(), 5i64);
assert_eq!(dates[1].timestamp(), 6i64);
assert_eq!(dates[0].to_unix_timestamp(), 5i64);
assert_eq!(dates[1].to_unix_timestamp(), 6i64);
}
Ok(())
}

View File

@@ -6,8 +6,6 @@ pub use self::writer::MultiValuedFastFieldWriter;
#[cfg(test)]
mod tests {
use chrono::Duration;
use proptest::strategy::Strategy;
use proptest::{prop_oneof, proptest};
use test_log::test;
@@ -16,7 +14,9 @@ mod tests {
use crate::indexer::NoMergePolicy;
use crate::query::QueryParser;
use crate::schema::{Cardinality, Facet, FacetOptions, NumericOptions, Schema};
use crate::{Document, Index, Term};
use crate::time::format_description::well_known::Rfc3339;
use crate::time::{Duration, OffsetDateTime};
use crate::{DateTime, Document, Index, Term};
#[test]
fn test_multivalued_u64() -> crate::Result<()> {
@@ -69,22 +69,27 @@ mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
let first_time_stamp = chrono::Utc::now();
index_writer.add_document(
doc!(date_field=>first_time_stamp, date_field=>first_time_stamp, time_i=>1i64),
)?;
let first_time_stamp = OffsetDateTime::now_utc();
index_writer.add_document(doc!(
date_field => DateTime::new_utc(first_time_stamp),
date_field => DateTime::new_utc(first_time_stamp),
time_i=>1i64))?;
index_writer.add_document(doc!(time_i => 0i64))?;
// add one second
index_writer.add_document(
doc!(date_field=>first_time_stamp + Duration::seconds(1), time_i=>2i64),
)?;
index_writer.add_document(doc!(
date_field => DateTime::new_utc(first_time_stamp + Duration::seconds(1)),
time_i => 2i64))?;
// add another second
let two_secs_ahead = first_time_stamp + Duration::seconds(2);
index_writer.add_document(doc!(date_field=>two_secs_ahead, date_field=>two_secs_ahead,date_field=>two_secs_ahead, time_i=>3i64))?;
index_writer.add_document(doc!(
date_field => DateTime::new_utc(two_secs_ahead),
date_field => DateTime::new_utc(two_secs_ahead),
date_field => DateTime::new_utc(two_secs_ahead),
time_i => 3i64))?;
// add three seconds
index_writer.add_document(
doc!(date_field=>first_time_stamp + Duration::seconds(3), time_i=>4i64),
)?;
index_writer.add_document(doc!(
date_field => DateTime::new_utc(first_time_stamp + Duration::seconds(3)),
time_i => 4i64))?;
index_writer.commit()?;
let reader = index.reader()?;
@@ -96,7 +101,7 @@ mod tests {
let parser = QueryParser::for_index(&index, vec![]);
let query = parser.parse_query(&format!(
"multi_date_field:\"{}\"",
first_time_stamp.to_rfc3339()
first_time_stamp.format(&Rfc3339)?,
))?;
let results = searcher.search(&query, &TopDocs::with_limit(5))?;
assert_eq!(results.len(), 1);
@@ -107,9 +112,8 @@ mod tests {
.get_first(date_field)
.expect("cannot find value")
.as_date()
.unwrap()
.timestamp(),
first_time_stamp.timestamp()
.unwrap(),
DateTime::new_utc(first_time_stamp),
);
assert_eq!(
retrieved_doc
@@ -123,7 +127,7 @@ mod tests {
{
let parser = QueryParser::for_index(&index, vec![date_field]);
let query = parser.parse_query(&format!("\"{}\"", two_secs_ahead.to_rfc3339()))?;
let query = parser.parse_query(&format!("\"{}\"", two_secs_ahead.format(&Rfc3339)?))?;
let results = searcher.search(&query, &TopDocs::with_limit(5))?;
assert_eq!(results.len(), 1);
@@ -135,9 +139,8 @@ mod tests {
.get_first(date_field)
.expect("cannot find value")
.as_date()
.unwrap()
.timestamp(),
two_secs_ahead.timestamp()
.unwrap(),
DateTime::new_utc(two_secs_ahead)
);
assert_eq!(
retrieved_doc
@@ -153,8 +156,8 @@ mod tests {
let parser = QueryParser::for_index(&index, vec![date_field]);
let range_q = format!(
"multi_date_field:[{} TO {}}}",
(first_time_stamp + Duration::seconds(1)).to_rfc3339(),
(first_time_stamp + Duration::seconds(3)).to_rfc3339()
(first_time_stamp + Duration::seconds(1)).format(&Rfc3339)?,
(first_time_stamp + Duration::seconds(3)).format(&Rfc3339)?
);
let query = parser.parse_query(&range_q)?;
let results = searcher.search(&query, &TopDocs::with_limit(5))?;
@@ -177,9 +180,8 @@ mod tests {
.get_first(date_field)
.expect("cannot find value")
.as_date()
.expect("value not of Date type")
.timestamp(),
(first_time_stamp + Duration::seconds(offset_sec)).timestamp()
.expect("value not of Date type"),
DateTime::new_utc(first_time_stamp + Duration::seconds(offset_sec)),
);
assert_eq!(
retrieved_doc

View File

@@ -5,7 +5,7 @@ use crate::fastfield::{
};
use crate::schema::{Cardinality, Field, FieldType, Schema};
use crate::space_usage::PerFieldSpaceUsage;
use crate::TantivyError;
use crate::{DateTime, TantivyError};
/// Provides access to all of the BitpackedFastFieldReader.
///
@@ -147,10 +147,10 @@ impl FastFieldReaders {
self.typed_fast_field_reader(field)
}
/// Returns the `i64` fast field reader reader associated to `field`.
/// Returns the `date` fast field reader reader associated to `field`.
///
/// If `field` is not a i64 fast field, this method returns an Error.
pub fn date(&self, field: Field) -> crate::Result<DynamicFastFieldReader<crate::DateTime>> {
/// If `field` is not a date fast field, this method returns an Error.
pub fn date(&self, field: Field) -> crate::Result<DynamicFastFieldReader<DateTime>> {
self.check_type(field, FastType::Date, Cardinality::SingleValue)?;
self.typed_fast_field_reader(field)
}
@@ -195,13 +195,12 @@ impl FastFieldReaders {
self.typed_fast_field_multi_reader(field)
}
/// Returns a `crate::DateTime` multi-valued fast field reader reader associated to `field`.
/// Returns a `time::OffsetDateTime` multi-valued fast field reader reader associated to
/// `field`.
///
/// If `field` is not a `crate::DateTime` multi-valued fast field, this method returns an Error.
pub fn dates(
&self,
field: Field,
) -> crate::Result<MultiValuedFastFieldReader<crate::DateTime>> {
/// If `field` is not a `time::OffsetDateTime` multi-valued fast field, this method returns an
/// Error.
pub fn dates(&self, field: Field) -> crate::Result<MultiValuedFastFieldReader<DateTime>> {
self.check_type(field, FastType::Date, Cardinality::MultiValues)?;
self.typed_fast_field_multi_reader(field)
}

View File

@@ -1,4 +1,3 @@
use chrono::Utc;
use fnv::FnvHashMap;
use murmurhash32::murmurhash2;
@@ -6,8 +5,10 @@ use crate::fastfield::FastValue;
use crate::postings::{IndexingContext, IndexingPosition, PostingsWriter};
use crate::schema::term::{JSON_END_OF_PATH, JSON_PATH_SEGMENT_SEP};
use crate::schema::Type;
use crate::time::format_description::well_known::Rfc3339;
use crate::time::{OffsetDateTime, UtcOffset};
use crate::tokenizer::TextAnalyzer;
use crate::{DocId, Term};
use crate::{DateTime, DocId, Term};
/// This object is a map storing the last position for a given path for the current document
/// being indexed.
@@ -151,7 +152,7 @@ fn index_json_value<'a>(
);
}
TextOrDateTime::DateTime(dt) => {
json_term_writer.set_fast_value(dt);
json_term_writer.set_fast_value(DateTime::new_utc(dt));
postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx);
}
},
@@ -184,13 +185,13 @@ fn index_json_value<'a>(
enum TextOrDateTime<'a> {
Text(&'a str),
DateTime(crate::DateTime),
DateTime(OffsetDateTime),
}
fn infer_type_from_str(text: &str) -> TextOrDateTime {
match chrono::DateTime::parse_from_rfc3339(text) {
match OffsetDateTime::parse(text, &Rfc3339) {
Ok(dt) => {
let dt_utc = dt.with_timezone(&Utc);
let dt_utc = dt.to_offset(UtcOffset::UTC);
TextOrDateTime::DateTime(dt_utc)
}
Err(_) => TextOrDateTime::Text(text),

View File

@@ -1146,9 +1146,10 @@ mod tests {
Cardinality, Document, Facet, FacetOptions, IndexRecordOption, NumericOptions, Term,
TextFieldIndexing, INDEXED, TEXT,
};
use crate::time::OffsetDateTime;
use crate::{
assert_nearly_equals, schema, DocAddress, DocSet, IndexSettings, IndexSortByField,
IndexWriter, Order, Searcher, SegmentId,
assert_nearly_equals, schema, DateTime, DocAddress, DocSet, IndexSettings,
IndexSortByField, IndexWriter, Order, Searcher, SegmentId,
};
#[test]
@@ -1166,14 +1167,14 @@ mod tests {
let bytes_score_field = schema_builder.add_bytes_field("score_bytes", FAST);
let index = Index::create_in_ram(schema_builder.build());
let reader = index.reader()?;
let curr_time = chrono::Utc::now();
let curr_time = OffsetDateTime::now_utc();
{
let mut index_writer = index.writer_for_tests()?;
// writing the segment
index_writer.add_document(doc!(
text_field => "af b",
score_field => 3u64,
date_field => curr_time,
date_field => DateTime::new_utc(curr_time),
bytes_score_field => 3u32.to_be_bytes().as_ref()
))?;
index_writer.add_document(doc!(
@@ -1190,7 +1191,7 @@ mod tests {
// writing the segment
index_writer.add_document(doc!(
text_field => "af b",
date_field => curr_time,
date_field => DateTime::new_utc(curr_time),
score_field => 11u64,
bytes_score_field => 11u32.to_be_bytes().as_ref()
))?;
@@ -1246,7 +1247,10 @@ mod tests {
]
);
assert_eq!(
get_doc_ids(vec![Term::from_field_date(date_field, &curr_time)])?,
get_doc_ids(vec![Term::from_field_date(
date_field,
DateTime::new_utc(curr_time)
)])?,
vec![DocAddress::new(0, 0), DocAddress::new(0, 3)]
);
}

View File

@@ -1,7 +1,7 @@
use super::doc_id_mapping::{get_doc_id_mapping_from_field, DocIdMapping};
use super::operation::AddOperation;
use crate::core::Segment;
use crate::fastfield::FastFieldsWriter;
use crate::fastfield::{FastFieldsWriter, FastValue as _};
use crate::fieldnorm::{FieldNormReaders, FieldNormsWriter};
use crate::indexer::json_term_writer::index_json_values;
use crate::indexer::segment_serializer::SegmentSerializer;
@@ -244,7 +244,7 @@ impl SegmentWriter {
FieldType::Date(_) => {
for value in values {
let date_val = value.as_date().ok_or_else(make_schema_error)?;
term_buffer.set_i64(date_val.timestamp());
term_buffer.set_u64(date_val.to_u64());
postings_writer.subscribe(doc_id, 0u32, term_buffer, ctx);
}
}
@@ -414,16 +414,16 @@ pub fn prepare_doc_for_store(doc: Document, schema: &Schema) -> Document {
#[cfg(test)]
mod tests {
use chrono::Utc;
use super::compute_initial_table_size;
use crate::collector::Count;
use crate::indexer::json_term_writer::JsonTermWriter;
use crate::postings::TermInfo;
use crate::query::PhraseQuery;
use crate::schema::{IndexRecordOption, Schema, Type, STORED, STRING, TEXT};
use crate::time::format_description::well_known::Rfc3339;
use crate::time::OffsetDateTime;
use crate::tokenizer::{PreTokenizedString, Token};
use crate::{DocAddress, DocSet, Document, Index, Postings, Term, TERMINATED};
use crate::{DateTime, DocAddress, DocSet, Document, Index, Postings, Term, TERMINATED};
#[test]
fn test_hashmap_size() {
@@ -523,11 +523,9 @@ mod tests {
json_term_writer.pop_path_segment();
json_term_writer.pop_path_segment();
json_term_writer.push_path_segment("date");
json_term_writer.set_fast_value(
chrono::DateTime::parse_from_rfc3339("1985-04-12T23:20:50.52Z")
.unwrap()
.with_timezone(&Utc),
);
json_term_writer.set_fast_value(DateTime::new_utc(
OffsetDateTime::parse("1985-04-12T23:20:50.52Z", &Rfc3339).unwrap(),
));
assert!(term_stream.advance());
assert_eq!(term_stream.key(), json_term_writer.term().value_bytes());

View File

@@ -125,7 +125,90 @@ mod functional_test;
mod macros;
mod future_result;
pub use chrono;
/// Re-export of the `time` crate
///
/// Tantivy uses [`time`](https://crates.io/crates/time) for dates.
pub use time;
use crate::time::format_description::well_known::Rfc3339;
use crate::time::{OffsetDateTime, PrimitiveDateTime, UtcOffset};
/// A date/time value with second precision.
///
/// This timestamp does not carry any explicit time zone information.
/// Users are responsible for applying the provided conversion
/// functions consistently. Internally the time zone is assumed
/// to be UTC, which is also used implicitly for JSON serialization.
///
/// All constructors and conversions are provided as explicit
/// functions and not by implementing any `From`/`Into` traits
/// to prevent unintended usage.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct DateTime {
unix_timestamp: i64,
}
impl DateTime {
/// Create new from UNIX timestamp
pub const fn from_unix_timestamp(unix_timestamp: i64) -> Self {
Self { unix_timestamp }
}
/// Create new from `OffsetDateTime`
///
/// The given date/time is converted to UTC and the actual
/// time zone is discarded.
pub const fn new_utc(dt: OffsetDateTime) -> Self {
Self::from_unix_timestamp(dt.unix_timestamp())
}
/// Create new from `PrimitiveDateTime`
///
/// Implicitly assumes that the given date/time is in UTC!
/// Otherwise the original value must only be reobtained with
/// [`to_primitive()`].
pub const fn new_primitive(dt: PrimitiveDateTime) -> Self {
Self::new_utc(dt.assume_utc())
}
/// Convert to UNIX timestamp
pub const fn to_unix_timestamp(self) -> i64 {
let Self { unix_timestamp } = self;
unix_timestamp
}
/// Convert to UTC `OffsetDateTime`
pub fn to_utc(self) -> OffsetDateTime {
let Self { unix_timestamp } = self;
let utc_datetime =
OffsetDateTime::from_unix_timestamp(unix_timestamp).expect("valid UNIX timestamp");
debug_assert_eq!(UtcOffset::UTC, utc_datetime.offset());
utc_datetime
}
/// Convert to `OffsetDateTime` with the given time zone
pub fn to_offset(self, offset: UtcOffset) -> OffsetDateTime {
self.to_utc().to_offset(offset)
}
/// Convert to `PrimitiveDateTime` without any time zone
///
/// The value should have been constructed with [`from_primitive()`].
/// Otherwise the time zone is implicitly assumed to be UTC.
pub fn to_primitive(self) -> PrimitiveDateTime {
let utc_datetime = self.to_utc();
// Discard the UTC time zone offset
debug_assert_eq!(UtcOffset::UTC, utc_datetime.offset());
PrimitiveDateTime::new(utc_datetime.date(), utc_datetime.time())
}
}
impl fmt::Debug for DateTime {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let utc_rfc3339 = self.to_utc().format(&Rfc3339).map_err(|_| fmt::Error)?;
f.write_str(&utc_rfc3339)
}
}
pub use crate::error::TantivyError;
pub use crate::future_result::FutureResult;
@@ -140,9 +223,6 @@ pub type Result<T> = std::result::Result<T, TantivyError>;
#[cfg(feature = "quickwit")]
pub type AsyncIoResult<T> = std::result::Result<T, crate::error::AsyncIoError>;
/// Tantivy DateTime
pub type DateTime = chrono::DateTime<chrono::Utc>;
mod core;
mod indexer;

View File

@@ -244,12 +244,12 @@ impl MoreLikeThis {
FieldType::Date(_) => {
for value in values {
// TODO: Ask if this is the semantic (timestamp) we want
let val = value
let unix_timestamp = value
.as_date()
.ok_or_else(|| TantivyError::InvalidArgument("invalid value".to_string()))?
.timestamp();
if !self.is_noise_word(val.to_string()) {
let term = Term::from_field_i64(field, val);
.to_unix_timestamp();
if !self.is_noise_word(unix_timestamp.to_string()) {
let term = Term::from_field_i64(field, unix_timestamp);
*term_frequencies.entry(term).or_insert(0) += 1;
}
}

View File

@@ -15,8 +15,10 @@ use crate::query::{
use crate::schema::{
Facet, FacetParseError, Field, FieldType, IndexRecordOption, Schema, Term, Type,
};
use crate::time::format_description::well_known::Rfc3339;
use crate::time::{OffsetDateTime, UtcOffset};
use crate::tokenizer::{TextAnalyzer, TokenizerManager};
use crate::Score;
use crate::{DateTime, Score};
/// Possible error that may happen when parsing a query.
#[derive(Debug, PartialEq, Eq, Error)]
@@ -72,7 +74,7 @@ pub enum QueryParserError {
RangeMustNotHavePhrase,
/// The format for the date field is not RFC 3339 compliant.
#[error("The date field has an invalid format")]
DateFormatError(#[from] chrono::ParseError),
DateFormatError(#[from] time::error::Parse),
/// The format for the facet field is invalid.
#[error("The facet field is malformed: {0}")]
FacetFormatError(#[from] FacetParseError),
@@ -331,11 +333,8 @@ impl QueryParser {
Ok(Term::from_field_f64(field, val))
}
FieldType::Date(_) => {
let dt = chrono::DateTime::parse_from_rfc3339(phrase)?;
Ok(Term::from_field_date(
field,
&dt.with_timezone(&chrono::Utc),
))
let dt = OffsetDateTime::parse(phrase, &Rfc3339)?;
Ok(Term::from_field_date(field, DateTime::new_utc(dt)))
}
FieldType::Str(ref str_options) => {
let option = str_options.get_indexing_options().ok_or_else(|| {
@@ -408,8 +407,8 @@ impl QueryParser {
Ok(vec![LogicalLiteral::Term(f64_term)])
}
FieldType::Date(_) => {
let dt = chrono::DateTime::parse_from_rfc3339(phrase)?;
let dt_term = Term::from_field_date(field, &dt.with_timezone(&chrono::Utc));
let dt = OffsetDateTime::parse(phrase, &Rfc3339)?;
let dt_term = Term::from_field_date(field, DateTime::new_utc(dt));
Ok(vec![LogicalLiteral::Term(dt_term)])
}
FieldType::Str(ref str_options) => {
@@ -665,12 +664,12 @@ enum NumValue {
U64(u64),
I64(i64),
F64(f64),
DateTime(crate::DateTime),
DateTime(OffsetDateTime),
}
fn infer_type_num(phrase: &str) -> Option<NumValue> {
if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(phrase) {
let dt_utc = dt.with_timezone(&chrono::Utc);
if let Ok(dt) = OffsetDateTime::parse(phrase, &Rfc3339) {
let dt_utc = dt.to_offset(UtcOffset::UTC);
return Some(NumValue::DateTime(dt_utc));
}
if let Ok(u64_val) = str::parse::<u64>(phrase) {
@@ -712,7 +711,7 @@ fn generate_literals_for_json_object(
json_term_writer.set_fast_value(f64_val);
}
NumValue::DateTime(dt_val) => {
json_term_writer.set_fast_value(dt_val);
json_term_writer.set_fast_value(DateTime::new_utc(dt_val));
}
}
logical_literals.push(LogicalLiteral::Term(json_term_writer.term().clone()));
@@ -1039,6 +1038,7 @@ mod test {
#[test]
fn test_json_field_possibly_a_date() {
// Subseconds are discarded
test_parse_query_to_logical_ast_helper(
r#"json.date:"2019-10-12T07:20:50.52Z""#,
r#"(Term(type=Json, field=14, path=date, vtype=Date, 2019-10-12T07:20:50Z) "[(0, Term(type=Json, field=14, path=date, vtype=Str, "2019")), (1, Term(type=Json, field=14, path=date, vtype=Str, "10")), (2, Term(type=Json, field=14, path=date, vtype=Str, "12t07")), (3, Term(type=Json, field=14, path=date, vtype=Str, "20")), (4, Term(type=Json, field=14, path=date, vtype=Str, "50")), (5, Term(type=Json, field=14, path=date, vtype=Str, "52z"))]")"#,

View File

@@ -110,7 +110,7 @@ impl Document {
self.add_field_value(field, value);
}
/// Add a date field
/// Add a date field with unspecified time zone offset
pub fn add_date(&mut self, field: Field, value: DateTime) {
self.add_field_value(field, value);
}

View File

@@ -1,4 +1,3 @@
use chrono::{FixedOffset, Utc};
use serde::{Deserialize, Serialize};
use serde_json::Value as JsonValue;
use thiserror::Error;
@@ -9,7 +8,10 @@ use crate::schema::{
Facet, IndexRecordOption, JsonObjectOptions, NumericOptions, TextFieldIndexing, TextOptions,
Value,
};
use crate::time::format_description::well_known::Rfc3339;
use crate::time::OffsetDateTime;
use crate::tokenizer::PreTokenizedString;
use crate::DateTime;
/// Possible error that may occur while parsing a field value
/// At this point the JSON is known to be valid.
@@ -244,16 +246,15 @@ impl FieldType {
/// target field is a `Str`, this method will return an Error.
pub fn value_from_json(&self, json: JsonValue) -> Result<Value, ValueParsingError> {
match json {
JsonValue::String(field_text) => match *self {
JsonValue::String(field_text) => {
match *self {
FieldType::Date(_) => {
let dt_with_fixed_tz: chrono::DateTime<FixedOffset> =
chrono::DateTime::parse_from_rfc3339(&field_text).map_err(|_err| {
ValueParsingError::TypeError {
let dt_with_fixed_tz = OffsetDateTime::parse(&field_text, &Rfc3339)
.map_err(|_err| ValueParsingError::TypeError {
expected: "rfc3339 format",
json: JsonValue::String(field_text),
}
})?;
Ok(Value::Date(dt_with_fixed_tz.with_timezone(&Utc)))
Ok(DateTime::new_utc(dt_with_fixed_tz).into())
}
FieldType::Str(_) => Ok(Value::Str(field_text)),
FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) => {
@@ -270,7 +271,8 @@ impl FieldType {
expected: "a json object",
json: JsonValue::String(field_text),
}),
},
}
}
JsonValue::Number(field_val_num) => match self {
FieldType::I64(_) | FieldType::Date(_) => {
if let Some(field_val_i64) = field_val_num.as_i64() {
@@ -342,12 +344,12 @@ impl FieldType {
#[cfg(test)]
mod tests {
use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
use serde_json::json;
use super::FieldType;
use crate::schema::field_type::ValueParsingError;
use crate::schema::{Schema, TextOptions, Type, Value, INDEXED};
use crate::time::{Date, Month, PrimitiveDateTime, Time};
use crate::tokenizer::{PreTokenizedString, Token};
use crate::{DateTime, Document};
@@ -359,7 +361,8 @@ mod tests {
let doc_json = r#"{"date": "2019-10-12T07:20:50.52+02:00"}"#;
let doc = schema.parse_document(doc_json).unwrap();
let date = doc.get_first(date_field).unwrap();
assert_eq!(format!("{:?}", date), "Date(2019-10-12T05:20:50.520Z)");
// Time zone is converted to UTC and subseconds are discarded
assert_eq!("Date(2019-10-12T05:20:50Z)", format!("{:?}", date));
}
#[test]
@@ -368,12 +371,12 @@ mod tests {
let mut schema_builder = Schema::builder();
let date_field = schema_builder.add_date_field("date", INDEXED);
let schema = schema_builder.build();
let naive_date = NaiveDate::from_ymd(1982, 9, 17);
let naive_time = NaiveTime::from_hms(13, 20, 00);
let date_time = DateTime::from_utc(NaiveDateTime::new(naive_date, naive_time), Utc);
doc.add_date(date_field, date_time);
let naive_date = Date::from_calendar_date(1982, Month::September, 17).unwrap();
let naive_time = Time::from_hms(13, 20, 0).unwrap();
let date_time = PrimitiveDateTime::new(naive_date, naive_time);
doc.add_date(date_field, DateTime::new_primitive(date_time));
let doc_json = schema.to_json(&doc);
assert_eq!(doc_json, r#"{"date":["1982-09-17T13:20:00+00:00"]}"#);
assert_eq!(doc_json, r#"{"date":["1982-09-17T13:20:00Z"]}"#);
}
#[test]

View File

@@ -70,8 +70,8 @@ impl Term {
}
/// Builds a term given a field, and a DateTime value
pub fn from_field_date(field: Field, val: &DateTime) -> Term {
Term::from_fast_value(field, val)
pub fn from_field_date(field: Field, val: DateTime) -> Term {
Term::from_fast_value(field, &val)
}
/// Creates a `Term` given a facet.
@@ -126,7 +126,7 @@ impl Term {
}
/// Sets a `i64` value in the term.
pub fn set_date(&mut self, date: crate::DateTime) {
pub fn set_date(&mut self, date: DateTime) {
self.set_fast_value(date);
}
@@ -266,8 +266,8 @@ where B: AsRef<[u8]>
///
/// Returns None if the term is not of the Date type, or if the term byte representation
/// is invalid.
pub fn as_date(&self) -> Option<crate::DateTime> {
self.get_fast_type::<crate::DateTime>()
pub fn as_date(&self) -> Option<DateTime> {
self.get_fast_type::<DateTime>()
}
/// Returns the text associated with the term.
@@ -374,7 +374,7 @@ fn debug_value_bytes(typ: Type, bytes: &[u8], f: &mut fmt::Formatter) -> fmt::Re
}
// TODO pretty print these types too.
Type::Date => {
write_opt(f, get_fast_type::<crate::DateTime>(bytes))?;
write_opt(f, get_fast_type::<DateTime>(bytes))?;
}
Type::Facet => {
let facet_str = str::from_utf8(bytes)

View File

@@ -22,7 +22,7 @@ pub enum Value {
I64(i64),
/// 64-bits Float `f64`
F64(f64),
/// Signed 64-bits Date time stamp `date`
/// Date/time with second precision
Date(DateTime),
/// Facet
Facet(Facet),
@@ -43,7 +43,7 @@ impl Serialize for Value {
Value::U64(u) => serializer.serialize_u64(u),
Value::I64(u) => serializer.serialize_i64(u),
Value::F64(u) => serializer.serialize_f64(u),
Value::Date(ref date) => serializer.serialize_str(&date.to_rfc3339()),
Value::Date(ref date) => time::serde::rfc3339::serialize(&date.to_utc(), serializer),
Value::Facet(ref facet) => facet.serialize(serializer),
Value::Bytes(ref bytes) => serializer.serialize_bytes(bytes),
Value::JsonObject(ref obj) => obj.serialize(serializer),
@@ -154,9 +154,9 @@ impl Value {
/// Returns the Date-value, provided the value is of the `Date` type.
///
/// Returns None if the value is not of type `Date`.
pub fn as_date(&self) -> Option<&DateTime> {
pub fn as_date(&self) -> Option<DateTime> {
if let Value::Date(date) = self {
Some(date)
Some(*date)
} else {
None
}
@@ -209,9 +209,9 @@ impl From<f64> for Value {
}
}
impl From<crate::DateTime> for Value {
fn from(date_time: crate::DateTime) -> Value {
Value::Date(date_time)
impl From<DateTime> for Value {
fn from(dt: DateTime) -> Value {
Value::Date(dt)
}
}
@@ -265,12 +265,12 @@ impl From<serde_json::Value> for Value {
mod binary_serialize {
use std::io::{self, Read, Write};
use chrono::{TimeZone, Utc};
use common::{f64_to_u64, u64_to_f64, BinarySerializable};
use super::Value;
use crate::schema::Facet;
use crate::tokenizer::PreTokenizedString;
use crate::DateTime;
const TEXT_CODE: u8 = 0;
const U64_CODE: u8 = 1;
@@ -319,7 +319,8 @@ mod binary_serialize {
}
Value::Date(ref val) => {
DATE_CODE.serialize(writer)?;
val.timestamp().serialize(writer)
let DateTime { unix_timestamp } = val;
unix_timestamp.serialize(writer)
}
Value::Facet(ref facet) => {
HIERARCHICAL_FACET_CODE.serialize(writer)?;
@@ -357,8 +358,8 @@ mod binary_serialize {
Ok(Value::F64(value))
}
DATE_CODE => {
let timestamp = i64::deserialize(reader)?;
Ok(Value::Date(Utc.timestamp(timestamp, 0)))
let unix_timestamp = i64::deserialize(reader)?;
Ok(Value::Date(DateTime::from_unix_timestamp(unix_timestamp)))
}
HIERARCHICAL_FACET_CODE => Ok(Value::Facet(Facet::deserialize(reader)?)),
BYTES_CODE => Ok(Value::Bytes(Vec::<u8>::deserialize(reader)?)),
@@ -401,15 +402,24 @@ mod binary_serialize {
#[cfg(test)]
mod tests {
use std::str::FromStr;
use super::Value;
use crate::time::format_description::well_known::Rfc3339;
use crate::time::OffsetDateTime;
use crate::DateTime;
#[test]
fn test_serialize_date() {
let value = Value::Date(DateTime::from_str("1996-12-20T00:39:57+00:00").unwrap());
let value = Value::from(DateTime::new_utc(
OffsetDateTime::parse("1996-12-20T00:39:57+00:00", &Rfc3339).unwrap(),
));
let serialized_value_json = serde_json::to_string_pretty(&value).unwrap();
assert_eq!(serialized_value_json, r#""1996-12-20T00:39:57+00:00""#);
assert_eq!(serialized_value_json, r#""1996-12-20T00:39:57Z""#);
let value = Value::from(DateTime::new_utc(
OffsetDateTime::parse("1996-12-20T00:39:57-01:00", &Rfc3339).unwrap(),
));
let serialized_value_json = serde_json::to_string_pretty(&value).unwrap();
// The time zone information gets lost by conversion into `Value::Date` and
// implicitly becomes UTC.
assert_eq!(serialized_value_json, r#""1996-12-20T01:39:57Z""#);
}
}