mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-03 15:52:55 +00:00
Replace chrono with time (#1307)
For date values `chrono` has been replaced with `time` - The `time` crate is re-exported as `tantivy::time` instead of `tantivy::chrono`. - The type alias `tantivy::DateTime` has been removed. - `Value::Date` wraps `time::PrimitiveDateTime` without time zone information. - Internally date/time values are stored as seconds since UNIX epoch in UTC. - Converting a `time::OffsetDateTime` to `Value::Date` implicitly converts the value into UTC. If this is not desired do the time zone conversion yourself and use `time::PrimitiveDateTime` directly instead. Closes #1304
This commit is contained in:
11
CHANGELOG.md
11
CHANGELOG.md
@@ -1,3 +1,14 @@
|
||||
Unreleased
|
||||
================================
|
||||
- For date values `chrono` has been replaced with `time` (@uklotzde) #1304 :
|
||||
- The `time` crate is re-exported as `tantivy::time` instead of `tantivy::chrono`.
|
||||
- The type alias `tantivy::DateTime` has been removed.
|
||||
- `Value::Date` wraps `time::PrimitiveDateTime` without time zone information.
|
||||
- Internally date/time values are stored as seconds since UNIX epoch in UTC.
|
||||
- Converting a `time::OffsetDateTime` to `Value::Date` implicitly converts the value into UTC.
|
||||
If this is not desired do the time zone conversion yourself and use `time::PrimitiveDateTime`
|
||||
directly instead.
|
||||
|
||||
Tantivy 0.17
|
||||
================================
|
||||
- LogMergePolicy now triggers merges if the ratio of deleted documents reaches a threshold (@shikhar @fulmicoton) [#115](https://github.com/quickwit-oss/tantivy/issues/115)
|
||||
|
||||
@@ -48,7 +48,7 @@ thiserror = "1.0.24"
|
||||
htmlescape = "0.3.1"
|
||||
fail = "0.5"
|
||||
murmurhash32 = "0.2"
|
||||
chrono = "0.4.19"
|
||||
time = { version = "0.3.7", features = ["serde-well-known"] }
|
||||
smallvec = "1.6.1"
|
||||
rayon = "1.5"
|
||||
lru = "0.7.0"
|
||||
|
||||
@@ -67,7 +67,7 @@ fn word<'a>() -> impl Parser<&'a str, Output = String> {
|
||||
///
|
||||
/// NOTE: also accepts 999999-99-99T99:99:99.266051969+99:99
|
||||
/// We delegate rejecting such invalid dates to the logical AST compuation code
|
||||
/// which invokes chrono::DateTime::parse_from_rfc3339 on the value to actually parse
|
||||
/// which invokes time::OffsetDateTime::parse(..., &Rfc3339) on the value to actually parse
|
||||
/// it (instead of merely extracting the datetime value as string as done here).
|
||||
fn date_time<'a>() -> impl Parser<&'a str, Output = String> {
|
||||
let two_digits = || recognize::<String, _, _>((digit(), digit()));
|
||||
|
||||
@@ -152,9 +152,9 @@ mod tests {
|
||||
use query::AllQuery;
|
||||
|
||||
use super::{add_vecs, HistogramCollector, HistogramComputer};
|
||||
use crate::chrono::{TimeZone, Utc};
|
||||
use crate::schema::{Schema, FAST};
|
||||
use crate::{doc, query, Index};
|
||||
use crate::time::{Date, Month};
|
||||
use crate::{doc, query, DateTime, Index};
|
||||
|
||||
#[test]
|
||||
fn test_add_histograms_simple() {
|
||||
@@ -273,16 +273,20 @@ mod tests {
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut writer = index.writer_with_num_threads(1, 4_000_000)?;
|
||||
writer.add_document(doc!(date_field=>Utc.ymd(1982, 9, 17).and_hms(0, 0,0)))?;
|
||||
writer.add_document(doc!(date_field=>Utc.ymd(1986, 3, 9).and_hms(0, 0, 0)))?;
|
||||
writer.add_document(doc!(date_field=>Utc.ymd(1983, 9, 27).and_hms(0, 0, 0)))?;
|
||||
writer.add_document(doc!(date_field=>DateTime::new_primitive(Date::from_calendar_date(1982, Month::September, 17)?.with_hms(0, 0, 0)?)))?;
|
||||
writer.add_document(
|
||||
doc!(date_field=>DateTime::new_primitive(Date::from_calendar_date(1986, Month::March, 9)?.with_hms(0, 0, 0)?)),
|
||||
)?;
|
||||
writer.add_document(doc!(date_field=>DateTime::new_primitive(Date::from_calendar_date(1983, Month::September, 27)?.with_hms(0, 0, 0)?)))?;
|
||||
writer.commit()?;
|
||||
let reader = index.reader()?;
|
||||
let searcher = reader.searcher();
|
||||
let all_query = AllQuery;
|
||||
let week_histogram_collector = HistogramCollector::new(
|
||||
date_field,
|
||||
Utc.ymd(1980, 1, 1).and_hms(0, 0, 0),
|
||||
DateTime::new_primitive(
|
||||
Date::from_calendar_date(1980, Month::January, 1)?.with_hms(0, 0, 0)?,
|
||||
),
|
||||
3600 * 24 * 365, // it is just for a unit test... sorry leap years.
|
||||
10,
|
||||
);
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use super::*;
|
||||
use crate::collector::{Count, FilterCollector, TopDocs};
|
||||
use crate::core::SegmentReader;
|
||||
use crate::fastfield::{BytesFastFieldReader, DynamicFastFieldReader, FastFieldReader};
|
||||
use crate::query::{AllQuery, QueryParser};
|
||||
use crate::schema::{Field, Schema, FAST, TEXT};
|
||||
use crate::time::format_description::well_known::Rfc3339;
|
||||
use crate::time::OffsetDateTime;
|
||||
use crate::{doc, DateTime, DocAddress, DocId, Document, Index, Score, Searcher, SegmentOrdinal};
|
||||
|
||||
pub const TEST_COLLECTOR_WITH_SCORE: TestCollector = TestCollector {
|
||||
@@ -26,11 +26,11 @@ pub fn test_filter_collector() -> crate::Result<()> {
|
||||
let index = Index::create_in_ram(schema);
|
||||
|
||||
let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
|
||||
index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64, date => DateTime::from_str("1898-04-09T00:00:00+00:00").unwrap()))?;
|
||||
index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64, date => DateTime::from_str("2020-04-09T00:00:00+00:00").unwrap()))?;
|
||||
index_writer.add_document(doc!(title => "The Diary of Anne Frank", price => 18_240u64, date => DateTime::from_str("2019-04-20T00:00:00+00:00").unwrap()))?;
|
||||
index_writer.add_document(doc!(title => "A Dairy Cow", price => 21_240u64, date => DateTime::from_str("2019-04-09T00:00:00+00:00").unwrap()))?;
|
||||
index_writer.add_document(doc!(title => "The Diary of a Young Girl", price => 20_120u64, date => DateTime::from_str("2018-04-09T00:00:00+00:00").unwrap()))?;
|
||||
index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64, date => DateTime::new_utc(OffsetDateTime::parse("1898-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?;
|
||||
index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64, date => DateTime::new_utc(OffsetDateTime::parse("2020-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?;
|
||||
index_writer.add_document(doc!(title => "The Diary of Anne Frank", price => 18_240u64, date => DateTime::new_utc(OffsetDateTime::parse("2019-04-20T00:00:00+00:00", &Rfc3339).unwrap())))?;
|
||||
index_writer.add_document(doc!(title => "A Dairy Cow", price => 21_240u64, date => DateTime::new_utc(OffsetDateTime::parse("2019-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?;
|
||||
index_writer.add_document(doc!(title => "The Diary of a Young Girl", price => 20_120u64, date => DateTime::new_utc(OffsetDateTime::parse("2018-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?;
|
||||
index_writer.commit()?;
|
||||
|
||||
let reader = index.reader()?;
|
||||
@@ -55,7 +55,9 @@ pub fn test_filter_collector() -> crate::Result<()> {
|
||||
assert_eq!(filtered_top_docs.len(), 0);
|
||||
|
||||
fn date_filter(value: DateTime) -> bool {
|
||||
(value - DateTime::from_str("2019-04-09T00:00:00+00:00").unwrap()).num_weeks() > 0
|
||||
(value.to_utc() - OffsetDateTime::parse("2019-04-09T00:00:00+00:00", &Rfc3339).unwrap())
|
||||
.whole_weeks()
|
||||
> 0
|
||||
}
|
||||
|
||||
let filter_dates_collector = FilterCollector::new(date, &date_filter, TopDocs::with_limit(5));
|
||||
|
||||
@@ -714,7 +714,9 @@ mod tests {
|
||||
use crate::collector::Collector;
|
||||
use crate::query::{AllQuery, Query, QueryParser};
|
||||
use crate::schema::{Field, Schema, FAST, STORED, TEXT};
|
||||
use crate::{DocAddress, DocId, Index, IndexWriter, Score, SegmentReader};
|
||||
use crate::time::format_description::well_known::Rfc3339;
|
||||
use crate::time::OffsetDateTime;
|
||||
use crate::{DateTime, DocAddress, DocId, Index, IndexWriter, Score, SegmentReader};
|
||||
|
||||
fn make_index() -> crate::Result<Index> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
@@ -890,28 +892,32 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_top_field_collector_datetime() -> crate::Result<()> {
|
||||
use std::str::FromStr;
|
||||
let mut schema_builder = Schema::builder();
|
||||
let name = schema_builder.add_text_field("name", TEXT);
|
||||
let birthday = schema_builder.add_date_field("birthday", FAST);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
let pr_birthday = crate::DateTime::from_str("1898-04-09T00:00:00+00:00")?;
|
||||
let pr_birthday = DateTime::new_utc(OffsetDateTime::parse(
|
||||
"1898-04-09T00:00:00+00:00",
|
||||
&Rfc3339,
|
||||
)?);
|
||||
index_writer.add_document(doc!(
|
||||
name => "Paul Robeson",
|
||||
birthday => pr_birthday
|
||||
birthday => pr_birthday,
|
||||
))?;
|
||||
let mr_birthday = crate::DateTime::from_str("1947-11-08T00:00:00+00:00")?;
|
||||
let mr_birthday = DateTime::new_utc(OffsetDateTime::parse(
|
||||
"1947-11-08T00:00:00+00:00",
|
||||
&Rfc3339,
|
||||
)?);
|
||||
index_writer.add_document(doc!(
|
||||
name => "Minnie Riperton",
|
||||
birthday => mr_birthday
|
||||
birthday => mr_birthday,
|
||||
))?;
|
||||
index_writer.commit()?;
|
||||
let searcher = index.reader()?.searcher();
|
||||
let top_collector = TopDocs::with_limit(3).order_by_fast_field(birthday);
|
||||
let top_docs: Vec<(crate::DateTime, DocAddress)> =
|
||||
searcher.search(&AllQuery, &top_collector)?;
|
||||
let top_docs: Vec<(DateTime, DocAddress)> = searcher.search(&AllQuery, &top_collector)?;
|
||||
assert_eq!(
|
||||
&top_docs[..],
|
||||
&[
|
||||
|
||||
18
src/error.rs
18
src/error.rs
@@ -149,9 +149,21 @@ impl<Guard> From<PoisonError<Guard>> for TantivyError {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<chrono::ParseError> for TantivyError {
|
||||
fn from(err: chrono::ParseError) -> TantivyError {
|
||||
TantivyError::InvalidArgument(err.to_string())
|
||||
impl From<time::error::Format> for TantivyError {
|
||||
fn from(err: time::error::Format) -> TantivyError {
|
||||
TantivyError::InvalidArgument(format!("Date formatting error: {err}"))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<time::error::Parse> for TantivyError {
|
||||
fn from(err: time::error::Parse) -> TantivyError {
|
||||
TantivyError::InvalidArgument(format!("Date parsing error: {err}"))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<time::error::ComponentRange> for TantivyError {
|
||||
fn from(err: time::error::ComponentRange) -> TantivyError {
|
||||
TantivyError::InvalidArgument(format!("Date range error: {err}"))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -30,9 +30,8 @@ pub use self::readers::FastFieldReaders;
|
||||
pub(crate) use self::readers::{type_and_cardinality, FastType};
|
||||
pub use self::serializer::{CompositeFastFieldSerializer, FastFieldDataAccess, FastFieldStats};
|
||||
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
|
||||
use crate::chrono::{NaiveDateTime, Utc};
|
||||
use crate::schema::{Cardinality, FieldType, Type, Value};
|
||||
use crate::DocId;
|
||||
use crate::{DateTime, DocId};
|
||||
|
||||
mod alive_bitset;
|
||||
mod bytes;
|
||||
@@ -161,14 +160,14 @@ impl FastValue for f64 {
|
||||
}
|
||||
}
|
||||
|
||||
impl FastValue for crate::DateTime {
|
||||
impl FastValue for DateTime {
|
||||
fn from_u64(timestamp_u64: u64) -> Self {
|
||||
let timestamp_i64 = i64::from_u64(timestamp_u64);
|
||||
crate::DateTime::from_utc(NaiveDateTime::from_timestamp(timestamp_i64, 0), Utc)
|
||||
let unix_timestamp = i64::from_u64(timestamp_u64);
|
||||
Self::from_unix_timestamp(unix_timestamp)
|
||||
}
|
||||
|
||||
fn to_u64(&self) -> u64 {
|
||||
self.timestamp().to_u64()
|
||||
self.to_unix_timestamp().to_u64()
|
||||
}
|
||||
|
||||
fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality> {
|
||||
@@ -179,7 +178,7 @@ impl FastValue for crate::DateTime {
|
||||
}
|
||||
|
||||
fn as_u64(&self) -> u64 {
|
||||
self.timestamp().as_u64()
|
||||
self.to_unix_timestamp().as_u64()
|
||||
}
|
||||
|
||||
fn to_type() -> Type {
|
||||
@@ -188,12 +187,12 @@ impl FastValue for crate::DateTime {
|
||||
}
|
||||
|
||||
fn value_to_u64(value: &Value) -> u64 {
|
||||
match *value {
|
||||
Value::U64(ref val) => *val,
|
||||
Value::I64(ref val) => common::i64_to_u64(*val),
|
||||
Value::F64(ref val) => common::f64_to_u64(*val),
|
||||
Value::Date(ref datetime) => common::i64_to_u64(datetime.timestamp()),
|
||||
_ => panic!("Expected a u64/i64/f64 field, got {:?} ", value),
|
||||
match value {
|
||||
Value::U64(val) => val.to_u64(),
|
||||
Value::I64(val) => val.to_u64(),
|
||||
Value::F64(val) => val.to_u64(),
|
||||
Value::Date(val) => val.to_u64(),
|
||||
_ => panic!("Expected a u64/i64/f64/date field, got {:?} ", value),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -213,6 +212,7 @@ mod tests {
|
||||
use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr};
|
||||
use crate::merge_policy::NoMergePolicy;
|
||||
use crate::schema::{Document, Field, NumericOptions, Schema, FAST};
|
||||
use crate::time::OffsetDateTime;
|
||||
use crate::{Index, SegmentId, SegmentReader};
|
||||
|
||||
pub static SCHEMA: Lazy<Schema> = Lazy::new(|| {
|
||||
@@ -233,7 +233,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
pub fn test_fastfield_i64_u64() {
|
||||
let datetime = crate::DateTime::from_utc(NaiveDateTime::from_timestamp(0i64, 0), Utc);
|
||||
let datetime = DateTime::new_utc(OffsetDateTime::UNIX_EPOCH);
|
||||
assert_eq!(i64::from_u64(datetime.to_u64()), 0i64);
|
||||
}
|
||||
|
||||
@@ -489,7 +489,8 @@ mod tests {
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
||||
index_writer.add_document(doc!(date_field =>crate::chrono::prelude::Utc::now()))?;
|
||||
index_writer
|
||||
.add_document(doc!(date_field =>DateTime::new_utc(OffsetDateTime::now_utc())))?;
|
||||
index_writer.commit()?;
|
||||
index_writer.add_document(doc!())?;
|
||||
index_writer.commit()?;
|
||||
@@ -509,7 +510,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_default_datetime() {
|
||||
assert_eq!(crate::DateTime::make_zero().timestamp(), 0i64);
|
||||
assert_eq!(0, DateTime::make_zero().to_unix_timestamp());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -526,16 +527,16 @@ mod tests {
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
||||
index_writer.add_document(doc!(
|
||||
date_field => crate::DateTime::from_u64(1i64.to_u64()),
|
||||
multi_date_field => crate::DateTime::from_u64(2i64.to_u64()),
|
||||
multi_date_field => crate::DateTime::from_u64(3i64.to_u64())
|
||||
date_field => DateTime::from_u64(1i64.to_u64()),
|
||||
multi_date_field => DateTime::from_u64(2i64.to_u64()),
|
||||
multi_date_field => DateTime::from_u64(3i64.to_u64())
|
||||
))?;
|
||||
index_writer.add_document(doc!(
|
||||
date_field => crate::DateTime::from_u64(4i64.to_u64())
|
||||
date_field => DateTime::from_u64(4i64.to_u64())
|
||||
))?;
|
||||
index_writer.add_document(doc!(
|
||||
multi_date_field => crate::DateTime::from_u64(5i64.to_u64()),
|
||||
multi_date_field => crate::DateTime::from_u64(6i64.to_u64())
|
||||
multi_date_field => DateTime::from_u64(5i64.to_u64()),
|
||||
multi_date_field => DateTime::from_u64(6i64.to_u64())
|
||||
))?;
|
||||
index_writer.commit()?;
|
||||
let reader = index.reader()?;
|
||||
@@ -547,23 +548,23 @@ mod tests {
|
||||
let dates_fast_field = fast_fields.dates(multi_date_field).unwrap();
|
||||
let mut dates = vec![];
|
||||
{
|
||||
assert_eq!(date_fast_field.get(0u32).timestamp(), 1i64);
|
||||
assert_eq!(date_fast_field.get(0u32).to_unix_timestamp(), 1i64);
|
||||
dates_fast_field.get_vals(0u32, &mut dates);
|
||||
assert_eq!(dates.len(), 2);
|
||||
assert_eq!(dates[0].timestamp(), 2i64);
|
||||
assert_eq!(dates[1].timestamp(), 3i64);
|
||||
assert_eq!(dates[0].to_unix_timestamp(), 2i64);
|
||||
assert_eq!(dates[1].to_unix_timestamp(), 3i64);
|
||||
}
|
||||
{
|
||||
assert_eq!(date_fast_field.get(1u32).timestamp(), 4i64);
|
||||
assert_eq!(date_fast_field.get(1u32).to_unix_timestamp(), 4i64);
|
||||
dates_fast_field.get_vals(1u32, &mut dates);
|
||||
assert!(dates.is_empty());
|
||||
}
|
||||
{
|
||||
assert_eq!(date_fast_field.get(2u32).timestamp(), 0i64);
|
||||
assert_eq!(date_fast_field.get(2u32).to_unix_timestamp(), 0i64);
|
||||
dates_fast_field.get_vals(2u32, &mut dates);
|
||||
assert_eq!(dates.len(), 2);
|
||||
assert_eq!(dates[0].timestamp(), 5i64);
|
||||
assert_eq!(dates[1].timestamp(), 6i64);
|
||||
assert_eq!(dates[0].to_unix_timestamp(), 5i64);
|
||||
assert_eq!(dates[1].to_unix_timestamp(), 6i64);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -6,8 +6,6 @@ pub use self::writer::MultiValuedFastFieldWriter;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use chrono::Duration;
|
||||
use proptest::strategy::Strategy;
|
||||
use proptest::{prop_oneof, proptest};
|
||||
use test_log::test;
|
||||
@@ -16,7 +14,9 @@ mod tests {
|
||||
use crate::indexer::NoMergePolicy;
|
||||
use crate::query::QueryParser;
|
||||
use crate::schema::{Cardinality, Facet, FacetOptions, NumericOptions, Schema};
|
||||
use crate::{Document, Index, Term};
|
||||
use crate::time::format_description::well_known::Rfc3339;
|
||||
use crate::time::{Duration, OffsetDateTime};
|
||||
use crate::{DateTime, Document, Index, Term};
|
||||
|
||||
#[test]
|
||||
fn test_multivalued_u64() -> crate::Result<()> {
|
||||
@@ -69,22 +69,27 @@ mod tests {
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
let first_time_stamp = chrono::Utc::now();
|
||||
index_writer.add_document(
|
||||
doc!(date_field=>first_time_stamp, date_field=>first_time_stamp, time_i=>1i64),
|
||||
)?;
|
||||
let first_time_stamp = OffsetDateTime::now_utc();
|
||||
index_writer.add_document(doc!(
|
||||
date_field => DateTime::new_utc(first_time_stamp),
|
||||
date_field => DateTime::new_utc(first_time_stamp),
|
||||
time_i=>1i64))?;
|
||||
index_writer.add_document(doc!(time_i => 0i64))?;
|
||||
// add one second
|
||||
index_writer.add_document(
|
||||
doc!(date_field=>first_time_stamp + Duration::seconds(1), time_i=>2i64),
|
||||
)?;
|
||||
index_writer.add_document(doc!(
|
||||
date_field => DateTime::new_utc(first_time_stamp + Duration::seconds(1)),
|
||||
time_i => 2i64))?;
|
||||
// add another second
|
||||
let two_secs_ahead = first_time_stamp + Duration::seconds(2);
|
||||
index_writer.add_document(doc!(date_field=>two_secs_ahead, date_field=>two_secs_ahead,date_field=>two_secs_ahead, time_i=>3i64))?;
|
||||
index_writer.add_document(doc!(
|
||||
date_field => DateTime::new_utc(two_secs_ahead),
|
||||
date_field => DateTime::new_utc(two_secs_ahead),
|
||||
date_field => DateTime::new_utc(two_secs_ahead),
|
||||
time_i => 3i64))?;
|
||||
// add three seconds
|
||||
index_writer.add_document(
|
||||
doc!(date_field=>first_time_stamp + Duration::seconds(3), time_i=>4i64),
|
||||
)?;
|
||||
index_writer.add_document(doc!(
|
||||
date_field => DateTime::new_utc(first_time_stamp + Duration::seconds(3)),
|
||||
time_i => 4i64))?;
|
||||
index_writer.commit()?;
|
||||
|
||||
let reader = index.reader()?;
|
||||
@@ -96,7 +101,7 @@ mod tests {
|
||||
let parser = QueryParser::for_index(&index, vec![]);
|
||||
let query = parser.parse_query(&format!(
|
||||
"multi_date_field:\"{}\"",
|
||||
first_time_stamp.to_rfc3339()
|
||||
first_time_stamp.format(&Rfc3339)?,
|
||||
))?;
|
||||
let results = searcher.search(&query, &TopDocs::with_limit(5))?;
|
||||
assert_eq!(results.len(), 1);
|
||||
@@ -107,9 +112,8 @@ mod tests {
|
||||
.get_first(date_field)
|
||||
.expect("cannot find value")
|
||||
.as_date()
|
||||
.unwrap()
|
||||
.timestamp(),
|
||||
first_time_stamp.timestamp()
|
||||
.unwrap(),
|
||||
DateTime::new_utc(first_time_stamp),
|
||||
);
|
||||
assert_eq!(
|
||||
retrieved_doc
|
||||
@@ -123,7 +127,7 @@ mod tests {
|
||||
|
||||
{
|
||||
let parser = QueryParser::for_index(&index, vec![date_field]);
|
||||
let query = parser.parse_query(&format!("\"{}\"", two_secs_ahead.to_rfc3339()))?;
|
||||
let query = parser.parse_query(&format!("\"{}\"", two_secs_ahead.format(&Rfc3339)?))?;
|
||||
let results = searcher.search(&query, &TopDocs::with_limit(5))?;
|
||||
|
||||
assert_eq!(results.len(), 1);
|
||||
@@ -135,9 +139,8 @@ mod tests {
|
||||
.get_first(date_field)
|
||||
.expect("cannot find value")
|
||||
.as_date()
|
||||
.unwrap()
|
||||
.timestamp(),
|
||||
two_secs_ahead.timestamp()
|
||||
.unwrap(),
|
||||
DateTime::new_utc(two_secs_ahead)
|
||||
);
|
||||
assert_eq!(
|
||||
retrieved_doc
|
||||
@@ -153,8 +156,8 @@ mod tests {
|
||||
let parser = QueryParser::for_index(&index, vec![date_field]);
|
||||
let range_q = format!(
|
||||
"multi_date_field:[{} TO {}}}",
|
||||
(first_time_stamp + Duration::seconds(1)).to_rfc3339(),
|
||||
(first_time_stamp + Duration::seconds(3)).to_rfc3339()
|
||||
(first_time_stamp + Duration::seconds(1)).format(&Rfc3339)?,
|
||||
(first_time_stamp + Duration::seconds(3)).format(&Rfc3339)?
|
||||
);
|
||||
let query = parser.parse_query(&range_q)?;
|
||||
let results = searcher.search(&query, &TopDocs::with_limit(5))?;
|
||||
@@ -177,9 +180,8 @@ mod tests {
|
||||
.get_first(date_field)
|
||||
.expect("cannot find value")
|
||||
.as_date()
|
||||
.expect("value not of Date type")
|
||||
.timestamp(),
|
||||
(first_time_stamp + Duration::seconds(offset_sec)).timestamp()
|
||||
.expect("value not of Date type"),
|
||||
DateTime::new_utc(first_time_stamp + Duration::seconds(offset_sec)),
|
||||
);
|
||||
assert_eq!(
|
||||
retrieved_doc
|
||||
|
||||
@@ -5,7 +5,7 @@ use crate::fastfield::{
|
||||
};
|
||||
use crate::schema::{Cardinality, Field, FieldType, Schema};
|
||||
use crate::space_usage::PerFieldSpaceUsage;
|
||||
use crate::TantivyError;
|
||||
use crate::{DateTime, TantivyError};
|
||||
|
||||
/// Provides access to all of the BitpackedFastFieldReader.
|
||||
///
|
||||
@@ -147,10 +147,10 @@ impl FastFieldReaders {
|
||||
self.typed_fast_field_reader(field)
|
||||
}
|
||||
|
||||
/// Returns the `i64` fast field reader reader associated to `field`.
|
||||
/// Returns the `date` fast field reader reader associated to `field`.
|
||||
///
|
||||
/// If `field` is not a i64 fast field, this method returns an Error.
|
||||
pub fn date(&self, field: Field) -> crate::Result<DynamicFastFieldReader<crate::DateTime>> {
|
||||
/// If `field` is not a date fast field, this method returns an Error.
|
||||
pub fn date(&self, field: Field) -> crate::Result<DynamicFastFieldReader<DateTime>> {
|
||||
self.check_type(field, FastType::Date, Cardinality::SingleValue)?;
|
||||
self.typed_fast_field_reader(field)
|
||||
}
|
||||
@@ -195,13 +195,12 @@ impl FastFieldReaders {
|
||||
self.typed_fast_field_multi_reader(field)
|
||||
}
|
||||
|
||||
/// Returns a `crate::DateTime` multi-valued fast field reader reader associated to `field`.
|
||||
/// Returns a `time::OffsetDateTime` multi-valued fast field reader reader associated to
|
||||
/// `field`.
|
||||
///
|
||||
/// If `field` is not a `crate::DateTime` multi-valued fast field, this method returns an Error.
|
||||
pub fn dates(
|
||||
&self,
|
||||
field: Field,
|
||||
) -> crate::Result<MultiValuedFastFieldReader<crate::DateTime>> {
|
||||
/// If `field` is not a `time::OffsetDateTime` multi-valued fast field, this method returns an
|
||||
/// Error.
|
||||
pub fn dates(&self, field: Field) -> crate::Result<MultiValuedFastFieldReader<DateTime>> {
|
||||
self.check_type(field, FastType::Date, Cardinality::MultiValues)?;
|
||||
self.typed_fast_field_multi_reader(field)
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use chrono::Utc;
|
||||
use fnv::FnvHashMap;
|
||||
use murmurhash32::murmurhash2;
|
||||
|
||||
@@ -6,8 +5,10 @@ use crate::fastfield::FastValue;
|
||||
use crate::postings::{IndexingContext, IndexingPosition, PostingsWriter};
|
||||
use crate::schema::term::{JSON_END_OF_PATH, JSON_PATH_SEGMENT_SEP};
|
||||
use crate::schema::Type;
|
||||
use crate::time::format_description::well_known::Rfc3339;
|
||||
use crate::time::{OffsetDateTime, UtcOffset};
|
||||
use crate::tokenizer::TextAnalyzer;
|
||||
use crate::{DocId, Term};
|
||||
use crate::{DateTime, DocId, Term};
|
||||
|
||||
/// This object is a map storing the last position for a given path for the current document
|
||||
/// being indexed.
|
||||
@@ -151,7 +152,7 @@ fn index_json_value<'a>(
|
||||
);
|
||||
}
|
||||
TextOrDateTime::DateTime(dt) => {
|
||||
json_term_writer.set_fast_value(dt);
|
||||
json_term_writer.set_fast_value(DateTime::new_utc(dt));
|
||||
postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx);
|
||||
}
|
||||
},
|
||||
@@ -184,13 +185,13 @@ fn index_json_value<'a>(
|
||||
|
||||
enum TextOrDateTime<'a> {
|
||||
Text(&'a str),
|
||||
DateTime(crate::DateTime),
|
||||
DateTime(OffsetDateTime),
|
||||
}
|
||||
|
||||
fn infer_type_from_str(text: &str) -> TextOrDateTime {
|
||||
match chrono::DateTime::parse_from_rfc3339(text) {
|
||||
match OffsetDateTime::parse(text, &Rfc3339) {
|
||||
Ok(dt) => {
|
||||
let dt_utc = dt.with_timezone(&Utc);
|
||||
let dt_utc = dt.to_offset(UtcOffset::UTC);
|
||||
TextOrDateTime::DateTime(dt_utc)
|
||||
}
|
||||
Err(_) => TextOrDateTime::Text(text),
|
||||
|
||||
@@ -1146,9 +1146,10 @@ mod tests {
|
||||
Cardinality, Document, Facet, FacetOptions, IndexRecordOption, NumericOptions, Term,
|
||||
TextFieldIndexing, INDEXED, TEXT,
|
||||
};
|
||||
use crate::time::OffsetDateTime;
|
||||
use crate::{
|
||||
assert_nearly_equals, schema, DocAddress, DocSet, IndexSettings, IndexSortByField,
|
||||
IndexWriter, Order, Searcher, SegmentId,
|
||||
assert_nearly_equals, schema, DateTime, DocAddress, DocSet, IndexSettings,
|
||||
IndexSortByField, IndexWriter, Order, Searcher, SegmentId,
|
||||
};
|
||||
|
||||
#[test]
|
||||
@@ -1166,14 +1167,14 @@ mod tests {
|
||||
let bytes_score_field = schema_builder.add_bytes_field("score_bytes", FAST);
|
||||
let index = Index::create_in_ram(schema_builder.build());
|
||||
let reader = index.reader()?;
|
||||
let curr_time = chrono::Utc::now();
|
||||
let curr_time = OffsetDateTime::now_utc();
|
||||
{
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
// writing the segment
|
||||
index_writer.add_document(doc!(
|
||||
text_field => "af b",
|
||||
score_field => 3u64,
|
||||
date_field => curr_time,
|
||||
date_field => DateTime::new_utc(curr_time),
|
||||
bytes_score_field => 3u32.to_be_bytes().as_ref()
|
||||
))?;
|
||||
index_writer.add_document(doc!(
|
||||
@@ -1190,7 +1191,7 @@ mod tests {
|
||||
// writing the segment
|
||||
index_writer.add_document(doc!(
|
||||
text_field => "af b",
|
||||
date_field => curr_time,
|
||||
date_field => DateTime::new_utc(curr_time),
|
||||
score_field => 11u64,
|
||||
bytes_score_field => 11u32.to_be_bytes().as_ref()
|
||||
))?;
|
||||
@@ -1246,7 +1247,10 @@ mod tests {
|
||||
]
|
||||
);
|
||||
assert_eq!(
|
||||
get_doc_ids(vec![Term::from_field_date(date_field, &curr_time)])?,
|
||||
get_doc_ids(vec![Term::from_field_date(
|
||||
date_field,
|
||||
DateTime::new_utc(curr_time)
|
||||
)])?,
|
||||
vec![DocAddress::new(0, 0), DocAddress::new(0, 3)]
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use super::doc_id_mapping::{get_doc_id_mapping_from_field, DocIdMapping};
|
||||
use super::operation::AddOperation;
|
||||
use crate::core::Segment;
|
||||
use crate::fastfield::FastFieldsWriter;
|
||||
use crate::fastfield::{FastFieldsWriter, FastValue as _};
|
||||
use crate::fieldnorm::{FieldNormReaders, FieldNormsWriter};
|
||||
use crate::indexer::json_term_writer::index_json_values;
|
||||
use crate::indexer::segment_serializer::SegmentSerializer;
|
||||
@@ -244,7 +244,7 @@ impl SegmentWriter {
|
||||
FieldType::Date(_) => {
|
||||
for value in values {
|
||||
let date_val = value.as_date().ok_or_else(make_schema_error)?;
|
||||
term_buffer.set_i64(date_val.timestamp());
|
||||
term_buffer.set_u64(date_val.to_u64());
|
||||
postings_writer.subscribe(doc_id, 0u32, term_buffer, ctx);
|
||||
}
|
||||
}
|
||||
@@ -414,16 +414,16 @@ pub fn prepare_doc_for_store(doc: Document, schema: &Schema) -> Document {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use chrono::Utc;
|
||||
|
||||
use super::compute_initial_table_size;
|
||||
use crate::collector::Count;
|
||||
use crate::indexer::json_term_writer::JsonTermWriter;
|
||||
use crate::postings::TermInfo;
|
||||
use crate::query::PhraseQuery;
|
||||
use crate::schema::{IndexRecordOption, Schema, Type, STORED, STRING, TEXT};
|
||||
use crate::time::format_description::well_known::Rfc3339;
|
||||
use crate::time::OffsetDateTime;
|
||||
use crate::tokenizer::{PreTokenizedString, Token};
|
||||
use crate::{DocAddress, DocSet, Document, Index, Postings, Term, TERMINATED};
|
||||
use crate::{DateTime, DocAddress, DocSet, Document, Index, Postings, Term, TERMINATED};
|
||||
|
||||
#[test]
|
||||
fn test_hashmap_size() {
|
||||
@@ -523,11 +523,9 @@ mod tests {
|
||||
json_term_writer.pop_path_segment();
|
||||
json_term_writer.pop_path_segment();
|
||||
json_term_writer.push_path_segment("date");
|
||||
json_term_writer.set_fast_value(
|
||||
chrono::DateTime::parse_from_rfc3339("1985-04-12T23:20:50.52Z")
|
||||
.unwrap()
|
||||
.with_timezone(&Utc),
|
||||
);
|
||||
json_term_writer.set_fast_value(DateTime::new_utc(
|
||||
OffsetDateTime::parse("1985-04-12T23:20:50.52Z", &Rfc3339).unwrap(),
|
||||
));
|
||||
assert!(term_stream.advance());
|
||||
assert_eq!(term_stream.key(), json_term_writer.term().value_bytes());
|
||||
|
||||
|
||||
88
src/lib.rs
88
src/lib.rs
@@ -125,7 +125,90 @@ mod functional_test;
|
||||
mod macros;
|
||||
mod future_result;
|
||||
|
||||
pub use chrono;
|
||||
/// Re-export of the `time` crate
|
||||
///
|
||||
/// Tantivy uses [`time`](https://crates.io/crates/time) for dates.
|
||||
pub use time;
|
||||
|
||||
use crate::time::format_description::well_known::Rfc3339;
|
||||
use crate::time::{OffsetDateTime, PrimitiveDateTime, UtcOffset};
|
||||
|
||||
/// A date/time value with second precision.
|
||||
///
|
||||
/// This timestamp does not carry any explicit time zone information.
|
||||
/// Users are responsible for applying the provided conversion
|
||||
/// functions consistently. Internally the time zone is assumed
|
||||
/// to be UTC, which is also used implicitly for JSON serialization.
|
||||
///
|
||||
/// All constructors and conversions are provided as explicit
|
||||
/// functions and not by implementing any `From`/`Into` traits
|
||||
/// to prevent unintended usage.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct DateTime {
|
||||
unix_timestamp: i64,
|
||||
}
|
||||
|
||||
impl DateTime {
|
||||
/// Create new from UNIX timestamp
|
||||
pub const fn from_unix_timestamp(unix_timestamp: i64) -> Self {
|
||||
Self { unix_timestamp }
|
||||
}
|
||||
|
||||
/// Create new from `OffsetDateTime`
|
||||
///
|
||||
/// The given date/time is converted to UTC and the actual
|
||||
/// time zone is discarded.
|
||||
pub const fn new_utc(dt: OffsetDateTime) -> Self {
|
||||
Self::from_unix_timestamp(dt.unix_timestamp())
|
||||
}
|
||||
|
||||
/// Create new from `PrimitiveDateTime`
|
||||
///
|
||||
/// Implicitly assumes that the given date/time is in UTC!
|
||||
/// Otherwise the original value must only be reobtained with
|
||||
/// [`to_primitive()`].
|
||||
pub const fn new_primitive(dt: PrimitiveDateTime) -> Self {
|
||||
Self::new_utc(dt.assume_utc())
|
||||
}
|
||||
|
||||
/// Convert to UNIX timestamp
|
||||
pub const fn to_unix_timestamp(self) -> i64 {
|
||||
let Self { unix_timestamp } = self;
|
||||
unix_timestamp
|
||||
}
|
||||
|
||||
/// Convert to UTC `OffsetDateTime`
|
||||
pub fn to_utc(self) -> OffsetDateTime {
|
||||
let Self { unix_timestamp } = self;
|
||||
let utc_datetime =
|
||||
OffsetDateTime::from_unix_timestamp(unix_timestamp).expect("valid UNIX timestamp");
|
||||
debug_assert_eq!(UtcOffset::UTC, utc_datetime.offset());
|
||||
utc_datetime
|
||||
}
|
||||
|
||||
/// Convert to `OffsetDateTime` with the given time zone
|
||||
pub fn to_offset(self, offset: UtcOffset) -> OffsetDateTime {
|
||||
self.to_utc().to_offset(offset)
|
||||
}
|
||||
|
||||
/// Convert to `PrimitiveDateTime` without any time zone
|
||||
///
|
||||
/// The value should have been constructed with [`from_primitive()`].
|
||||
/// Otherwise the time zone is implicitly assumed to be UTC.
|
||||
pub fn to_primitive(self) -> PrimitiveDateTime {
|
||||
let utc_datetime = self.to_utc();
|
||||
// Discard the UTC time zone offset
|
||||
debug_assert_eq!(UtcOffset::UTC, utc_datetime.offset());
|
||||
PrimitiveDateTime::new(utc_datetime.date(), utc_datetime.time())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for DateTime {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let utc_rfc3339 = self.to_utc().format(&Rfc3339).map_err(|_| fmt::Error)?;
|
||||
f.write_str(&utc_rfc3339)
|
||||
}
|
||||
}
|
||||
|
||||
pub use crate::error::TantivyError;
|
||||
pub use crate::future_result::FutureResult;
|
||||
@@ -140,9 +223,6 @@ pub type Result<T> = std::result::Result<T, TantivyError>;
|
||||
#[cfg(feature = "quickwit")]
|
||||
pub type AsyncIoResult<T> = std::result::Result<T, crate::error::AsyncIoError>;
|
||||
|
||||
/// Tantivy DateTime
|
||||
pub type DateTime = chrono::DateTime<chrono::Utc>;
|
||||
|
||||
mod core;
|
||||
mod indexer;
|
||||
|
||||
|
||||
@@ -244,12 +244,12 @@ impl MoreLikeThis {
|
||||
FieldType::Date(_) => {
|
||||
for value in values {
|
||||
// TODO: Ask if this is the semantic (timestamp) we want
|
||||
let val = value
|
||||
let unix_timestamp = value
|
||||
.as_date()
|
||||
.ok_or_else(|| TantivyError::InvalidArgument("invalid value".to_string()))?
|
||||
.timestamp();
|
||||
if !self.is_noise_word(val.to_string()) {
|
||||
let term = Term::from_field_i64(field, val);
|
||||
.to_unix_timestamp();
|
||||
if !self.is_noise_word(unix_timestamp.to_string()) {
|
||||
let term = Term::from_field_i64(field, unix_timestamp);
|
||||
*term_frequencies.entry(term).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,8 +15,10 @@ use crate::query::{
|
||||
use crate::schema::{
|
||||
Facet, FacetParseError, Field, FieldType, IndexRecordOption, Schema, Term, Type,
|
||||
};
|
||||
use crate::time::format_description::well_known::Rfc3339;
|
||||
use crate::time::{OffsetDateTime, UtcOffset};
|
||||
use crate::tokenizer::{TextAnalyzer, TokenizerManager};
|
||||
use crate::Score;
|
||||
use crate::{DateTime, Score};
|
||||
|
||||
/// Possible error that may happen when parsing a query.
|
||||
#[derive(Debug, PartialEq, Eq, Error)]
|
||||
@@ -72,7 +74,7 @@ pub enum QueryParserError {
|
||||
RangeMustNotHavePhrase,
|
||||
/// The format for the date field is not RFC 3339 compliant.
|
||||
#[error("The date field has an invalid format")]
|
||||
DateFormatError(#[from] chrono::ParseError),
|
||||
DateFormatError(#[from] time::error::Parse),
|
||||
/// The format for the facet field is invalid.
|
||||
#[error("The facet field is malformed: {0}")]
|
||||
FacetFormatError(#[from] FacetParseError),
|
||||
@@ -331,11 +333,8 @@ impl QueryParser {
|
||||
Ok(Term::from_field_f64(field, val))
|
||||
}
|
||||
FieldType::Date(_) => {
|
||||
let dt = chrono::DateTime::parse_from_rfc3339(phrase)?;
|
||||
Ok(Term::from_field_date(
|
||||
field,
|
||||
&dt.with_timezone(&chrono::Utc),
|
||||
))
|
||||
let dt = OffsetDateTime::parse(phrase, &Rfc3339)?;
|
||||
Ok(Term::from_field_date(field, DateTime::new_utc(dt)))
|
||||
}
|
||||
FieldType::Str(ref str_options) => {
|
||||
let option = str_options.get_indexing_options().ok_or_else(|| {
|
||||
@@ -408,8 +407,8 @@ impl QueryParser {
|
||||
Ok(vec![LogicalLiteral::Term(f64_term)])
|
||||
}
|
||||
FieldType::Date(_) => {
|
||||
let dt = chrono::DateTime::parse_from_rfc3339(phrase)?;
|
||||
let dt_term = Term::from_field_date(field, &dt.with_timezone(&chrono::Utc));
|
||||
let dt = OffsetDateTime::parse(phrase, &Rfc3339)?;
|
||||
let dt_term = Term::from_field_date(field, DateTime::new_utc(dt));
|
||||
Ok(vec![LogicalLiteral::Term(dt_term)])
|
||||
}
|
||||
FieldType::Str(ref str_options) => {
|
||||
@@ -665,12 +664,12 @@ enum NumValue {
|
||||
U64(u64),
|
||||
I64(i64),
|
||||
F64(f64),
|
||||
DateTime(crate::DateTime),
|
||||
DateTime(OffsetDateTime),
|
||||
}
|
||||
|
||||
fn infer_type_num(phrase: &str) -> Option<NumValue> {
|
||||
if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(phrase) {
|
||||
let dt_utc = dt.with_timezone(&chrono::Utc);
|
||||
if let Ok(dt) = OffsetDateTime::parse(phrase, &Rfc3339) {
|
||||
let dt_utc = dt.to_offset(UtcOffset::UTC);
|
||||
return Some(NumValue::DateTime(dt_utc));
|
||||
}
|
||||
if let Ok(u64_val) = str::parse::<u64>(phrase) {
|
||||
@@ -712,7 +711,7 @@ fn generate_literals_for_json_object(
|
||||
json_term_writer.set_fast_value(f64_val);
|
||||
}
|
||||
NumValue::DateTime(dt_val) => {
|
||||
json_term_writer.set_fast_value(dt_val);
|
||||
json_term_writer.set_fast_value(DateTime::new_utc(dt_val));
|
||||
}
|
||||
}
|
||||
logical_literals.push(LogicalLiteral::Term(json_term_writer.term().clone()));
|
||||
@@ -1039,6 +1038,7 @@ mod test {
|
||||
|
||||
#[test]
|
||||
fn test_json_field_possibly_a_date() {
|
||||
// Subseconds are discarded
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
r#"json.date:"2019-10-12T07:20:50.52Z""#,
|
||||
r#"(Term(type=Json, field=14, path=date, vtype=Date, 2019-10-12T07:20:50Z) "[(0, Term(type=Json, field=14, path=date, vtype=Str, "2019")), (1, Term(type=Json, field=14, path=date, vtype=Str, "10")), (2, Term(type=Json, field=14, path=date, vtype=Str, "12t07")), (3, Term(type=Json, field=14, path=date, vtype=Str, "20")), (4, Term(type=Json, field=14, path=date, vtype=Str, "50")), (5, Term(type=Json, field=14, path=date, vtype=Str, "52z"))]")"#,
|
||||
|
||||
@@ -110,7 +110,7 @@ impl Document {
|
||||
self.add_field_value(field, value);
|
||||
}
|
||||
|
||||
/// Add a date field
|
||||
/// Add a date field with unspecified time zone offset
|
||||
pub fn add_date(&mut self, field: Field, value: DateTime) {
|
||||
self.add_field_value(field, value);
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use chrono::{FixedOffset, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value as JsonValue;
|
||||
use thiserror::Error;
|
||||
@@ -9,7 +8,10 @@ use crate::schema::{
|
||||
Facet, IndexRecordOption, JsonObjectOptions, NumericOptions, TextFieldIndexing, TextOptions,
|
||||
Value,
|
||||
};
|
||||
use crate::time::format_description::well_known::Rfc3339;
|
||||
use crate::time::OffsetDateTime;
|
||||
use crate::tokenizer::PreTokenizedString;
|
||||
use crate::DateTime;
|
||||
|
||||
/// Possible error that may occur while parsing a field value
|
||||
/// At this point the JSON is known to be valid.
|
||||
@@ -244,16 +246,15 @@ impl FieldType {
|
||||
/// target field is a `Str`, this method will return an Error.
|
||||
pub fn value_from_json(&self, json: JsonValue) -> Result<Value, ValueParsingError> {
|
||||
match json {
|
||||
JsonValue::String(field_text) => match *self {
|
||||
JsonValue::String(field_text) => {
|
||||
match *self {
|
||||
FieldType::Date(_) => {
|
||||
let dt_with_fixed_tz: chrono::DateTime<FixedOffset> =
|
||||
chrono::DateTime::parse_from_rfc3339(&field_text).map_err(|_err| {
|
||||
ValueParsingError::TypeError {
|
||||
let dt_with_fixed_tz = OffsetDateTime::parse(&field_text, &Rfc3339)
|
||||
.map_err(|_err| ValueParsingError::TypeError {
|
||||
expected: "rfc3339 format",
|
||||
json: JsonValue::String(field_text),
|
||||
}
|
||||
})?;
|
||||
Ok(Value::Date(dt_with_fixed_tz.with_timezone(&Utc)))
|
||||
Ok(DateTime::new_utc(dt_with_fixed_tz).into())
|
||||
}
|
||||
FieldType::Str(_) => Ok(Value::Str(field_text)),
|
||||
FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) => {
|
||||
@@ -270,7 +271,8 @@ impl FieldType {
|
||||
expected: "a json object",
|
||||
json: JsonValue::String(field_text),
|
||||
}),
|
||||
},
|
||||
}
|
||||
}
|
||||
JsonValue::Number(field_val_num) => match self {
|
||||
FieldType::I64(_) | FieldType::Date(_) => {
|
||||
if let Some(field_val_i64) = field_val_num.as_i64() {
|
||||
@@ -342,12 +344,12 @@ impl FieldType {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Utc};
|
||||
use serde_json::json;
|
||||
|
||||
use super::FieldType;
|
||||
use crate::schema::field_type::ValueParsingError;
|
||||
use crate::schema::{Schema, TextOptions, Type, Value, INDEXED};
|
||||
use crate::time::{Date, Month, PrimitiveDateTime, Time};
|
||||
use crate::tokenizer::{PreTokenizedString, Token};
|
||||
use crate::{DateTime, Document};
|
||||
|
||||
@@ -359,7 +361,8 @@ mod tests {
|
||||
let doc_json = r#"{"date": "2019-10-12T07:20:50.52+02:00"}"#;
|
||||
let doc = schema.parse_document(doc_json).unwrap();
|
||||
let date = doc.get_first(date_field).unwrap();
|
||||
assert_eq!(format!("{:?}", date), "Date(2019-10-12T05:20:50.520Z)");
|
||||
// Time zone is converted to UTC and subseconds are discarded
|
||||
assert_eq!("Date(2019-10-12T05:20:50Z)", format!("{:?}", date));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -368,12 +371,12 @@ mod tests {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let date_field = schema_builder.add_date_field("date", INDEXED);
|
||||
let schema = schema_builder.build();
|
||||
let naive_date = NaiveDate::from_ymd(1982, 9, 17);
|
||||
let naive_time = NaiveTime::from_hms(13, 20, 00);
|
||||
let date_time = DateTime::from_utc(NaiveDateTime::new(naive_date, naive_time), Utc);
|
||||
doc.add_date(date_field, date_time);
|
||||
let naive_date = Date::from_calendar_date(1982, Month::September, 17).unwrap();
|
||||
let naive_time = Time::from_hms(13, 20, 0).unwrap();
|
||||
let date_time = PrimitiveDateTime::new(naive_date, naive_time);
|
||||
doc.add_date(date_field, DateTime::new_primitive(date_time));
|
||||
let doc_json = schema.to_json(&doc);
|
||||
assert_eq!(doc_json, r#"{"date":["1982-09-17T13:20:00+00:00"]}"#);
|
||||
assert_eq!(doc_json, r#"{"date":["1982-09-17T13:20:00Z"]}"#);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -70,8 +70,8 @@ impl Term {
|
||||
}
|
||||
|
||||
/// Builds a term given a field, and a DateTime value
|
||||
pub fn from_field_date(field: Field, val: &DateTime) -> Term {
|
||||
Term::from_fast_value(field, val)
|
||||
pub fn from_field_date(field: Field, val: DateTime) -> Term {
|
||||
Term::from_fast_value(field, &val)
|
||||
}
|
||||
|
||||
/// Creates a `Term` given a facet.
|
||||
@@ -126,7 +126,7 @@ impl Term {
|
||||
}
|
||||
|
||||
/// Sets a `i64` value in the term.
|
||||
pub fn set_date(&mut self, date: crate::DateTime) {
|
||||
pub fn set_date(&mut self, date: DateTime) {
|
||||
self.set_fast_value(date);
|
||||
}
|
||||
|
||||
@@ -266,8 +266,8 @@ where B: AsRef<[u8]>
|
||||
///
|
||||
/// Returns None if the term is not of the Date type, or if the term byte representation
|
||||
/// is invalid.
|
||||
pub fn as_date(&self) -> Option<crate::DateTime> {
|
||||
self.get_fast_type::<crate::DateTime>()
|
||||
pub fn as_date(&self) -> Option<DateTime> {
|
||||
self.get_fast_type::<DateTime>()
|
||||
}
|
||||
|
||||
/// Returns the text associated with the term.
|
||||
@@ -374,7 +374,7 @@ fn debug_value_bytes(typ: Type, bytes: &[u8], f: &mut fmt::Formatter) -> fmt::Re
|
||||
}
|
||||
// TODO pretty print these types too.
|
||||
Type::Date => {
|
||||
write_opt(f, get_fast_type::<crate::DateTime>(bytes))?;
|
||||
write_opt(f, get_fast_type::<DateTime>(bytes))?;
|
||||
}
|
||||
Type::Facet => {
|
||||
let facet_str = str::from_utf8(bytes)
|
||||
|
||||
@@ -22,7 +22,7 @@ pub enum Value {
|
||||
I64(i64),
|
||||
/// 64-bits Float `f64`
|
||||
F64(f64),
|
||||
/// Signed 64-bits Date time stamp `date`
|
||||
/// Date/time with second precision
|
||||
Date(DateTime),
|
||||
/// Facet
|
||||
Facet(Facet),
|
||||
@@ -43,7 +43,7 @@ impl Serialize for Value {
|
||||
Value::U64(u) => serializer.serialize_u64(u),
|
||||
Value::I64(u) => serializer.serialize_i64(u),
|
||||
Value::F64(u) => serializer.serialize_f64(u),
|
||||
Value::Date(ref date) => serializer.serialize_str(&date.to_rfc3339()),
|
||||
Value::Date(ref date) => time::serde::rfc3339::serialize(&date.to_utc(), serializer),
|
||||
Value::Facet(ref facet) => facet.serialize(serializer),
|
||||
Value::Bytes(ref bytes) => serializer.serialize_bytes(bytes),
|
||||
Value::JsonObject(ref obj) => obj.serialize(serializer),
|
||||
@@ -154,9 +154,9 @@ impl Value {
|
||||
/// Returns the Date-value, provided the value is of the `Date` type.
|
||||
///
|
||||
/// Returns None if the value is not of type `Date`.
|
||||
pub fn as_date(&self) -> Option<&DateTime> {
|
||||
pub fn as_date(&self) -> Option<DateTime> {
|
||||
if let Value::Date(date) = self {
|
||||
Some(date)
|
||||
Some(*date)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
@@ -209,9 +209,9 @@ impl From<f64> for Value {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<crate::DateTime> for Value {
|
||||
fn from(date_time: crate::DateTime) -> Value {
|
||||
Value::Date(date_time)
|
||||
impl From<DateTime> for Value {
|
||||
fn from(dt: DateTime) -> Value {
|
||||
Value::Date(dt)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -265,12 +265,12 @@ impl From<serde_json::Value> for Value {
|
||||
mod binary_serialize {
|
||||
use std::io::{self, Read, Write};
|
||||
|
||||
use chrono::{TimeZone, Utc};
|
||||
use common::{f64_to_u64, u64_to_f64, BinarySerializable};
|
||||
|
||||
use super::Value;
|
||||
use crate::schema::Facet;
|
||||
use crate::tokenizer::PreTokenizedString;
|
||||
use crate::DateTime;
|
||||
|
||||
const TEXT_CODE: u8 = 0;
|
||||
const U64_CODE: u8 = 1;
|
||||
@@ -319,7 +319,8 @@ mod binary_serialize {
|
||||
}
|
||||
Value::Date(ref val) => {
|
||||
DATE_CODE.serialize(writer)?;
|
||||
val.timestamp().serialize(writer)
|
||||
let DateTime { unix_timestamp } = val;
|
||||
unix_timestamp.serialize(writer)
|
||||
}
|
||||
Value::Facet(ref facet) => {
|
||||
HIERARCHICAL_FACET_CODE.serialize(writer)?;
|
||||
@@ -357,8 +358,8 @@ mod binary_serialize {
|
||||
Ok(Value::F64(value))
|
||||
}
|
||||
DATE_CODE => {
|
||||
let timestamp = i64::deserialize(reader)?;
|
||||
Ok(Value::Date(Utc.timestamp(timestamp, 0)))
|
||||
let unix_timestamp = i64::deserialize(reader)?;
|
||||
Ok(Value::Date(DateTime::from_unix_timestamp(unix_timestamp)))
|
||||
}
|
||||
HIERARCHICAL_FACET_CODE => Ok(Value::Facet(Facet::deserialize(reader)?)),
|
||||
BYTES_CODE => Ok(Value::Bytes(Vec::<u8>::deserialize(reader)?)),
|
||||
@@ -401,15 +402,24 @@ mod binary_serialize {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::str::FromStr;
|
||||
|
||||
use super::Value;
|
||||
use crate::time::format_description::well_known::Rfc3339;
|
||||
use crate::time::OffsetDateTime;
|
||||
use crate::DateTime;
|
||||
|
||||
#[test]
|
||||
fn test_serialize_date() {
|
||||
let value = Value::Date(DateTime::from_str("1996-12-20T00:39:57+00:00").unwrap());
|
||||
let value = Value::from(DateTime::new_utc(
|
||||
OffsetDateTime::parse("1996-12-20T00:39:57+00:00", &Rfc3339).unwrap(),
|
||||
));
|
||||
let serialized_value_json = serde_json::to_string_pretty(&value).unwrap();
|
||||
assert_eq!(serialized_value_json, r#""1996-12-20T00:39:57+00:00""#);
|
||||
assert_eq!(serialized_value_json, r#""1996-12-20T00:39:57Z""#);
|
||||
let value = Value::from(DateTime::new_utc(
|
||||
OffsetDateTime::parse("1996-12-20T00:39:57-01:00", &Rfc3339).unwrap(),
|
||||
));
|
||||
let serialized_value_json = serde_json::to_string_pretty(&value).unwrap();
|
||||
// The time zone information gets lost by conversion into `Value::Date` and
|
||||
// implicitly becomes UTC.
|
||||
assert_eq!(serialized_value_json, r#""1996-12-20T01:39:57Z""#);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user