mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-17 06:22:54 +00:00
Updated DateTime to hold timestamp in microseconds, while making date field precision configurable (#1396)
This commit is contained in:
@@ -1,3 +1,10 @@
|
||||
Tantivy 0.19
|
||||
================================
|
||||
- Updated [Date Field Type](https://github.com/quickwit-oss/tantivy/pull/1396)
|
||||
The `DateTime` type has been updated to hold timestamps with microseconds precision.
|
||||
`DateOptions` and `DatePrecision` have been added to configure Date fields. The precision is used to hint on fast values compression. Otherwise, seconds precision is used everywhere else (i.e terms, indexing).
|
||||
|
||||
|
||||
Tantivy 0.18
|
||||
================================
|
||||
- For date values `chrono` has been replaced with `time` (@uklotzde) #1304 :
|
||||
|
||||
@@ -49,7 +49,7 @@ thiserror = "1.0.30"
|
||||
htmlescape = "0.3.1"
|
||||
fail = "0.5.0"
|
||||
murmurhash32 = "0.2.0"
|
||||
time = { version = "0.3.9", features = ["serde-well-known"] }
|
||||
time = { version = "0.3.10", features = ["serde-well-known"] }
|
||||
smallvec = "1.8.0"
|
||||
rayon = "1.5.2"
|
||||
lru = "0.7.5"
|
||||
|
||||
69
examples/date_time_field.rs
Normal file
69
examples/date_time_field.rs
Normal file
@@ -0,0 +1,69 @@
|
||||
// # DateTime field example
|
||||
//
|
||||
// This example shows how the DateTime field can be used
|
||||
|
||||
use tantivy::collector::TopDocs;
|
||||
use tantivy::query::QueryParser;
|
||||
use tantivy::schema::{Cardinality, DateOptions, Schema, Value, INDEXED, STORED, STRING};
|
||||
use tantivy::Index;
|
||||
|
||||
fn main() -> tantivy::Result<()> {
|
||||
// # Defining the schema
|
||||
let mut schema_builder = Schema::builder();
|
||||
let opts = DateOptions::from(INDEXED)
|
||||
.set_stored()
|
||||
.set_fast(Cardinality::SingleValue)
|
||||
.set_precision(tantivy::DatePrecision::Seconds);
|
||||
let occurred_at = schema_builder.add_date_field("occurred_at", opts);
|
||||
let event_type = schema_builder.add_text_field("event", STRING | STORED);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
// # Indexing documents
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
|
||||
let mut index_writer = index.writer(50_000_000)?;
|
||||
let doc = schema.parse_document(
|
||||
r#"{
|
||||
"occurred_at": "2022-06-22T12:53:50.53Z",
|
||||
"event": "pull-request"
|
||||
}"#,
|
||||
)?;
|
||||
index_writer.add_document(doc)?;
|
||||
let doc = schema.parse_document(
|
||||
r#"{
|
||||
"occurred_at": "2022-06-22T13:00:00.22Z",
|
||||
"event": "comment"
|
||||
}"#,
|
||||
)?;
|
||||
index_writer.add_document(doc)?;
|
||||
index_writer.commit()?;
|
||||
|
||||
let reader = index.reader()?;
|
||||
let searcher = reader.searcher();
|
||||
|
||||
// # Default fields: event_type
|
||||
let query_parser = QueryParser::for_index(&index, vec![event_type]);
|
||||
{
|
||||
let query = query_parser.parse_query("event:comment")?;
|
||||
let count_docs = searcher.search(&*query, &TopDocs::with_limit(5))?;
|
||||
assert_eq!(count_docs.len(), 1);
|
||||
}
|
||||
{
|
||||
let query = query_parser
|
||||
.parse_query(r#"occurred_at:[2022-06-22T12:58:00Z TO 2022-06-23T00:00:00Z}"#)?;
|
||||
let count_docs = searcher.search(&*query, &TopDocs::with_limit(4))?;
|
||||
assert_eq!(count_docs.len(), 1);
|
||||
for (_score, doc_address) in count_docs {
|
||||
let retrieved_doc = searcher.doc(doc_address)?;
|
||||
assert!(matches!(
|
||||
retrieved_doc.get_first(occurred_at),
|
||||
Some(Value::Date(_))
|
||||
));
|
||||
assert_eq!(
|
||||
schema.to_json(&retrieved_doc),
|
||||
r#"{"event":["comment"],"occurred_at":["2022-06-22T13:00:00.22Z"]}"#
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -14,7 +14,7 @@ pub struct BitpackedFastFieldReader {
|
||||
pub max_value_u64: u64,
|
||||
}
|
||||
|
||||
impl<'data> FastFieldCodecReader for BitpackedFastFieldReader {
|
||||
impl FastFieldCodecReader for BitpackedFastFieldReader {
|
||||
/// Opens a fast field given a file.
|
||||
fn open_from_bytes(bytes: &[u8]) -> io::Result<Self> {
|
||||
let (_data, mut footer) = bytes.split_at(bytes.len() - 16);
|
||||
|
||||
@@ -575,7 +575,7 @@ mod test {
|
||||
for special_char in SPECIAL_CHARS.iter() {
|
||||
let query = &format!("\\{special_char}my\\{special_char}field:a");
|
||||
assert_eq!(
|
||||
super::field_name().parse(&query),
|
||||
super::field_name().parse(query),
|
||||
Ok((format!("{special_char}my{special_char}field"), "a"))
|
||||
);
|
||||
}
|
||||
|
||||
@@ -36,7 +36,10 @@ pub struct IntermediateAggregationResults {
|
||||
|
||||
impl IntermediateAggregationResults {
|
||||
/// Convert intermediate result and its aggregation request to the final result.
|
||||
pub fn into_final_bucket_result(self, req: Aggregations) -> crate::Result<AggregationResults> {
|
||||
pub(crate) fn into_final_bucket_result(
|
||||
self,
|
||||
req: Aggregations,
|
||||
) -> crate::Result<AggregationResults> {
|
||||
self.into_final_bucket_result_internal(&(req.into()))
|
||||
}
|
||||
|
||||
|
||||
@@ -72,8 +72,7 @@ impl HistogramComputer {
|
||||
return;
|
||||
}
|
||||
let delta = value - self.min_value;
|
||||
let delta_u64 = delta.to_u64();
|
||||
let bucket_id: usize = self.divider.divide(delta_u64) as usize;
|
||||
let bucket_id: usize = self.divider.divide(delta) as usize;
|
||||
if bucket_id < self.counts.len() {
|
||||
self.counts[bucket_id] += 1;
|
||||
}
|
||||
@@ -287,7 +286,7 @@ mod tests {
|
||||
DateTime::from_primitive(
|
||||
Date::from_calendar_date(1980, Month::January, 1)?.with_hms(0, 0, 0)?,
|
||||
),
|
||||
3600 * 24 * 365, // it is just for a unit test... sorry leap years.
|
||||
3_600_000_000 * 24 * 365, // it is just for a unit test... sorry leap years.
|
||||
10,
|
||||
);
|
||||
let week_histogram = searcher.search(&all_query, &week_histogram_collector)?;
|
||||
|
||||
@@ -52,11 +52,13 @@ pub trait MultiValueLength {
|
||||
fn get_total_len(&self) -> u64;
|
||||
}
|
||||
|
||||
/// Trait for types that are allowed for fast fields: (u64, i64 and f64).
|
||||
/// Trait for types that are allowed for fast fields:
|
||||
/// (u64, i64 and f64, bool, DateTime).
|
||||
pub trait FastValue: Clone + Copy + Send + Sync + PartialOrd + 'static {
|
||||
/// Converts a value from u64
|
||||
///
|
||||
/// Internally all fast field values are encoded as u64.
|
||||
/// **Note: To be used for converting encoded Term, Posting values.**
|
||||
fn from_u64(val: u64) -> Self;
|
||||
|
||||
/// Converts a value to u64.
|
||||
@@ -189,24 +191,27 @@ impl FastValue for bool {
|
||||
}
|
||||
|
||||
impl FastValue for DateTime {
|
||||
fn from_u64(timestamp_u64: u64) -> Self {
|
||||
let unix_timestamp = i64::from_u64(timestamp_u64);
|
||||
Self::from_unix_timestamp(unix_timestamp)
|
||||
/// Converts a timestamp microseconds into DateTime.
|
||||
///
|
||||
/// **Note the timestamps is expected to be in microseconds.**
|
||||
fn from_u64(timestamp_micros_u64: u64) -> Self {
|
||||
let timestamp_micros = i64::from_u64(timestamp_micros_u64);
|
||||
Self::from_timestamp_micros(timestamp_micros)
|
||||
}
|
||||
|
||||
fn to_u64(&self) -> u64 {
|
||||
self.into_unix_timestamp().to_u64()
|
||||
common::i64_to_u64(self.into_timestamp_micros())
|
||||
}
|
||||
|
||||
fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality> {
|
||||
match *field_type {
|
||||
FieldType::Date(ref integer_options) => integer_options.get_fastfield_cardinality(),
|
||||
FieldType::Date(ref options) => options.get_fastfield_cardinality(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn as_u64(&self) -> u64 {
|
||||
self.into_unix_timestamp().as_u64()
|
||||
self.into_timestamp_micros().as_u64()
|
||||
}
|
||||
|
||||
fn to_type() -> Type {
|
||||
@@ -261,9 +266,9 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr};
|
||||
use crate::merge_policy::NoMergePolicy;
|
||||
use crate::schema::{Document, Field, NumericOptions, Schema, FAST, STRING, TEXT};
|
||||
use crate::schema::{Document, Field, Schema, FAST, STRING, TEXT};
|
||||
use crate::time::OffsetDateTime;
|
||||
use crate::{Index, SegmentId, SegmentReader};
|
||||
use crate::{DateOptions, DatePrecision, Index, SegmentId, SegmentReader};
|
||||
|
||||
pub static SCHEMA: Lazy<Schema> = Lazy::new(|| {
|
||||
let mut schema_builder = Schema::builder();
|
||||
@@ -559,8 +564,8 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_default_datetime() {
|
||||
assert_eq!(0, DateTime::make_zero().into_unix_timestamp());
|
||||
fn test_default_date() {
|
||||
assert_eq!(0, DateTime::make_zero().into_timestamp_secs());
|
||||
}
|
||||
|
||||
fn get_vals_for_docs(ff: &MultiValuedFastFieldReader<u64>, docs: Range<u32>) -> Vec<u64> {
|
||||
@@ -766,10 +771,15 @@ mod tests {
|
||||
fn test_datefastfield() -> crate::Result<()> {
|
||||
use crate::fastfield::FastValue;
|
||||
let mut schema_builder = Schema::builder();
|
||||
let date_field = schema_builder.add_date_field("date", FAST);
|
||||
let date_field = schema_builder.add_date_field(
|
||||
"date",
|
||||
DateOptions::from(FAST).set_precision(DatePrecision::Microseconds),
|
||||
);
|
||||
let multi_date_field = schema_builder.add_date_field(
|
||||
"multi_date",
|
||||
NumericOptions::default().set_fast(Cardinality::MultiValues),
|
||||
DateOptions::default()
|
||||
.set_precision(DatePrecision::Microseconds)
|
||||
.set_fast(Cardinality::MultiValues),
|
||||
);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
@@ -797,23 +807,23 @@ mod tests {
|
||||
let dates_fast_field = fast_fields.dates(multi_date_field).unwrap();
|
||||
let mut dates = vec![];
|
||||
{
|
||||
assert_eq!(date_fast_field.get(0u32).into_unix_timestamp(), 1i64);
|
||||
assert_eq!(date_fast_field.get(0u32).into_timestamp_micros(), 1i64);
|
||||
dates_fast_field.get_vals(0u32, &mut dates);
|
||||
assert_eq!(dates.len(), 2);
|
||||
assert_eq!(dates[0].into_unix_timestamp(), 2i64);
|
||||
assert_eq!(dates[1].into_unix_timestamp(), 3i64);
|
||||
assert_eq!(dates[0].into_timestamp_micros(), 2i64);
|
||||
assert_eq!(dates[1].into_timestamp_micros(), 3i64);
|
||||
}
|
||||
{
|
||||
assert_eq!(date_fast_field.get(1u32).into_unix_timestamp(), 4i64);
|
||||
assert_eq!(date_fast_field.get(1u32).into_timestamp_micros(), 4i64);
|
||||
dates_fast_field.get_vals(1u32, &mut dates);
|
||||
assert!(dates.is_empty());
|
||||
}
|
||||
{
|
||||
assert_eq!(date_fast_field.get(2u32).into_unix_timestamp(), 0i64);
|
||||
assert_eq!(date_fast_field.get(2u32).into_timestamp_micros(), 0i64);
|
||||
dates_fast_field.get_vals(2u32, &mut dates);
|
||||
assert_eq!(dates.len(), 2);
|
||||
assert_eq!(dates[0].into_unix_timestamp(), 5i64);
|
||||
assert_eq!(dates[1].into_unix_timestamp(), 6i64);
|
||||
assert_eq!(dates[0].into_timestamp_micros(), 5i64);
|
||||
assert_eq!(dates[1].into_timestamp_micros(), 6i64);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ mod tests {
|
||||
use crate::collector::TopDocs;
|
||||
use crate::indexer::NoMergePolicy;
|
||||
use crate::query::QueryParser;
|
||||
use crate::schema::{Cardinality, Facet, FacetOptions, NumericOptions, Schema};
|
||||
use crate::schema::{Cardinality, DateOptions, Facet, FacetOptions, NumericOptions, Schema};
|
||||
use crate::time::format_description::well_known::Rfc3339;
|
||||
use crate::time::{Duration, OffsetDateTime};
|
||||
use crate::{DateTime, Document, Index, Term};
|
||||
@@ -58,7 +58,7 @@ mod tests {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let date_field = schema_builder.add_date_field(
|
||||
"multi_date_field",
|
||||
NumericOptions::default()
|
||||
DateOptions::default()
|
||||
.set_fast(Cardinality::MultiValues)
|
||||
.set_indexed()
|
||||
.set_fieldnorm()
|
||||
|
||||
@@ -4,12 +4,12 @@ use fnv::FnvHashMap;
|
||||
use tantivy_bitpacker::minmax;
|
||||
|
||||
use crate::fastfield::serializer::BitpackedFastFieldSerializerLegacy;
|
||||
use crate::fastfield::{value_to_u64, CompositeFastFieldSerializer, FastFieldType};
|
||||
use crate::fastfield::{value_to_u64, CompositeFastFieldSerializer, FastFieldType, FastValue};
|
||||
use crate::indexer::doc_id_mapping::DocIdMapping;
|
||||
use crate::postings::UnorderedTermId;
|
||||
use crate::schema::{Document, Field};
|
||||
use crate::schema::{Document, Field, Value};
|
||||
use crate::termdict::TermOrdinal;
|
||||
use crate::DocId;
|
||||
use crate::{DatePrecision, DocId};
|
||||
|
||||
/// Writer for multi-valued (as in, more than one value per document)
|
||||
/// int fast field.
|
||||
@@ -36,6 +36,7 @@ use crate::DocId;
|
||||
/// term ids when the segment is getting serialized.
|
||||
pub struct MultiValuedFastFieldWriter {
|
||||
field: Field,
|
||||
precision_opt: Option<DatePrecision>,
|
||||
vals: Vec<UnorderedTermId>,
|
||||
doc_index: Vec<u64>,
|
||||
fast_field_type: FastFieldType,
|
||||
@@ -43,9 +44,14 @@ pub struct MultiValuedFastFieldWriter {
|
||||
|
||||
impl MultiValuedFastFieldWriter {
|
||||
/// Creates a new `MultiValuedFastFieldWriter`
|
||||
pub(crate) fn new(field: Field, fast_field_type: FastFieldType) -> Self {
|
||||
pub(crate) fn new(
|
||||
field: Field,
|
||||
fast_field_type: FastFieldType,
|
||||
precision_opt: Option<DatePrecision>,
|
||||
) -> Self {
|
||||
MultiValuedFastFieldWriter {
|
||||
field,
|
||||
precision_opt,
|
||||
vals: Vec::new(),
|
||||
doc_index: Vec::new(),
|
||||
fast_field_type,
|
||||
@@ -83,7 +89,14 @@ impl MultiValuedFastFieldWriter {
|
||||
}
|
||||
for field_value in doc.field_values() {
|
||||
if field_value.field == self.field {
|
||||
self.add_val(value_to_u64(field_value.value()));
|
||||
let value = field_value.value();
|
||||
let value_u64 = match (self.precision_opt, value) {
|
||||
(Some(precision), Value::Date(date_val)) => {
|
||||
date_val.truncate(precision).to_u64()
|
||||
}
|
||||
_ => value_to_u64(value),
|
||||
};
|
||||
self.add_val(value_u64);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,12 +7,13 @@ use tantivy_bitpacker::BlockedBitpacker;
|
||||
|
||||
use super::multivalued::MultiValuedFastFieldWriter;
|
||||
use super::serializer::FastFieldStats;
|
||||
use super::{FastFieldDataAccess, FastFieldType};
|
||||
use super::{FastFieldDataAccess, FastFieldType, FastValue};
|
||||
use crate::fastfield::{BytesFastFieldWriter, CompositeFastFieldSerializer};
|
||||
use crate::indexer::doc_id_mapping::DocIdMapping;
|
||||
use crate::postings::UnorderedTermId;
|
||||
use crate::schema::{Cardinality, Document, Field, FieldEntry, FieldType, Schema};
|
||||
use crate::schema::{Cardinality, Document, Field, FieldEntry, FieldType, Schema, Value};
|
||||
use crate::termdict::TermOrdinal;
|
||||
use crate::DatePrecision;
|
||||
|
||||
/// The `FastFieldsWriter` groups all of the fast field writers.
|
||||
pub struct FastFieldsWriter {
|
||||
@@ -43,31 +44,51 @@ impl FastFieldsWriter {
|
||||
FieldType::I64(ref int_options)
|
||||
| FieldType::U64(ref int_options)
|
||||
| FieldType::F64(ref int_options)
|
||||
| FieldType::Bool(ref int_options)
|
||||
| FieldType::Date(ref int_options) => {
|
||||
| FieldType::Bool(ref int_options) => {
|
||||
match int_options.get_fastfield_cardinality() {
|
||||
Some(Cardinality::SingleValue) => {
|
||||
let mut fast_field_writer = IntFastFieldWriter::new(field);
|
||||
let mut fast_field_writer = IntFastFieldWriter::new(field, None);
|
||||
let default_value = fast_field_default_value(field_entry);
|
||||
fast_field_writer.set_val_if_missing(default_value);
|
||||
single_value_writers.push(fast_field_writer);
|
||||
}
|
||||
Some(Cardinality::MultiValues) => {
|
||||
let fast_field_writer =
|
||||
MultiValuedFastFieldWriter::new(field, FastFieldType::Numeric);
|
||||
let fast_field_writer = MultiValuedFastFieldWriter::new(
|
||||
field,
|
||||
FastFieldType::Numeric,
|
||||
None,
|
||||
);
|
||||
multi_values_writers.push(fast_field_writer);
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
FieldType::Date(ref options) => match options.get_fastfield_cardinality() {
|
||||
Some(Cardinality::SingleValue) => {
|
||||
let mut fast_field_writer =
|
||||
IntFastFieldWriter::new(field, Some(options.get_precision()));
|
||||
let default_value = fast_field_default_value(field_entry);
|
||||
fast_field_writer.set_val_if_missing(default_value);
|
||||
single_value_writers.push(fast_field_writer);
|
||||
}
|
||||
Some(Cardinality::MultiValues) => {
|
||||
let fast_field_writer = MultiValuedFastFieldWriter::new(
|
||||
field,
|
||||
FastFieldType::Numeric,
|
||||
Some(options.get_precision()),
|
||||
);
|
||||
multi_values_writers.push(fast_field_writer);
|
||||
}
|
||||
None => {}
|
||||
},
|
||||
FieldType::Facet(_) => {
|
||||
let fast_field_writer =
|
||||
MultiValuedFastFieldWriter::new(field, FastFieldType::Facet);
|
||||
MultiValuedFastFieldWriter::new(field, FastFieldType::Facet, None);
|
||||
term_id_writers.push(fast_field_writer);
|
||||
}
|
||||
FieldType::Str(_) if field_entry.is_fast() => {
|
||||
let fast_field_writer =
|
||||
MultiValuedFastFieldWriter::new(field, FastFieldType::String);
|
||||
MultiValuedFastFieldWriter::new(field, FastFieldType::String, None);
|
||||
term_id_writers.push(fast_field_writer);
|
||||
}
|
||||
FieldType::Bytes(bytes_option) => {
|
||||
@@ -230,6 +251,7 @@ impl FastFieldsWriter {
|
||||
/// using `common::i64_to_u64` and `common::f64_to_u64`.
|
||||
pub struct IntFastFieldWriter {
|
||||
field: Field,
|
||||
precision_opt: Option<DatePrecision>,
|
||||
vals: BlockedBitpacker,
|
||||
val_count: usize,
|
||||
val_if_missing: u64,
|
||||
@@ -239,9 +261,10 @@ pub struct IntFastFieldWriter {
|
||||
|
||||
impl IntFastFieldWriter {
|
||||
/// Creates a new `IntFastFieldWriter`
|
||||
pub fn new(field: Field) -> IntFastFieldWriter {
|
||||
pub fn new(field: Field, precision_opt: Option<DatePrecision>) -> IntFastFieldWriter {
|
||||
IntFastFieldWriter {
|
||||
field,
|
||||
precision_opt,
|
||||
vals: BlockedBitpacker::new(),
|
||||
val_count: 0,
|
||||
val_if_missing: 0u64,
|
||||
@@ -305,7 +328,13 @@ impl IntFastFieldWriter {
|
||||
pub fn add_document(&mut self, doc: &Document) {
|
||||
match doc.get_first(self.field) {
|
||||
Some(v) => {
|
||||
self.add_val(super::value_to_u64(v));
|
||||
let value = match (self.precision_opt, v) {
|
||||
(Some(precision), Value::Date(date_val)) => {
|
||||
date_val.truncate(precision).to_u64()
|
||||
}
|
||||
_ => super::value_to_u64(v),
|
||||
};
|
||||
self.add_val(value);
|
||||
}
|
||||
None => {
|
||||
self.add_val(self.val_if_missing);
|
||||
|
||||
@@ -8,7 +8,7 @@ use crate::schema::{Field, Type};
|
||||
use crate::time::format_description::well_known::Rfc3339;
|
||||
use crate::time::{OffsetDateTime, UtcOffset};
|
||||
use crate::tokenizer::TextAnalyzer;
|
||||
use crate::{DateTime, DocId, Term};
|
||||
use crate::{DatePrecision, DateTime, DocId, Term};
|
||||
|
||||
/// This object is a map storing the last position for a given path for the current document
|
||||
/// being indexed.
|
||||
@@ -323,9 +323,16 @@ impl<'a> JsonTermWriter<'a> {
|
||||
|
||||
pub fn set_fast_value<T: FastValue>(&mut self, val: T) {
|
||||
self.close_path_and_set_type(T::to_type());
|
||||
let value = if T::to_type() == Type::Date {
|
||||
DateTime::from_u64(val.to_u64())
|
||||
.truncate(DatePrecision::Seconds)
|
||||
.to_u64()
|
||||
} else {
|
||||
val.to_u64()
|
||||
};
|
||||
self.term_buffer
|
||||
.as_mut()
|
||||
.extend_from_slice(val.to_u64().to_be_bytes().as_slice());
|
||||
.extend_from_slice(value.to_be_bytes().as_slice());
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -298,8 +298,16 @@ impl IndexMerger {
|
||||
FieldType::U64(ref options)
|
||||
| FieldType::I64(ref options)
|
||||
| FieldType::F64(ref options)
|
||||
| FieldType::Bool(ref options)
|
||||
| FieldType::Date(ref options) => match options.get_fastfield_cardinality() {
|
||||
| FieldType::Bool(ref options) => match options.get_fastfield_cardinality() {
|
||||
Some(Cardinality::SingleValue) => {
|
||||
self.write_single_fast_field(field, fast_field_serializer, doc_id_mapping)?;
|
||||
}
|
||||
Some(Cardinality::MultiValues) => {
|
||||
self.write_multi_fast_field(field, fast_field_serializer, doc_id_mapping)?;
|
||||
}
|
||||
None => {}
|
||||
},
|
||||
FieldType::Date(ref options) => match options.get_fastfield_cardinality() {
|
||||
Some(Cardinality::SingleValue) => {
|
||||
self.write_single_fast_field(field, fast_field_serializer, doc_id_mapping)?;
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ use crate::store::{StoreReader, StoreWriter};
|
||||
use crate::tokenizer::{
|
||||
BoxTokenStream, FacetTokenizer, PreTokenizedStream, TextAnalyzer, Tokenizer,
|
||||
};
|
||||
use crate::{DocId, Document, Opstamp, SegmentComponent};
|
||||
use crate::{DatePrecision, DocId, Document, Opstamp, SegmentComponent};
|
||||
|
||||
/// Computes the initial size of the hash table.
|
||||
///
|
||||
@@ -248,7 +248,7 @@ impl SegmentWriter {
|
||||
FieldType::Date(_) => {
|
||||
for value in values {
|
||||
let date_val = value.as_date().ok_or_else(make_schema_error)?;
|
||||
term_buffer.set_u64(date_val.to_u64());
|
||||
term_buffer.set_u64(date_val.truncate(DatePrecision::Seconds).to_u64());
|
||||
postings_writer.subscribe(doc_id, 0u32, term_buffer, ctx);
|
||||
}
|
||||
}
|
||||
|
||||
103
src/lib.rs
103
src/lib.rs
@@ -133,7 +133,7 @@ pub use time;
|
||||
use crate::time::format_description::well_known::Rfc3339;
|
||||
use crate::time::{OffsetDateTime, PrimitiveDateTime, UtcOffset};
|
||||
|
||||
/// A date/time value with second precision.
|
||||
/// A date/time value with microsecond precision.
|
||||
///
|
||||
/// This timestamp does not carry any explicit time zone information.
|
||||
/// Users are responsible for applying the provided conversion
|
||||
@@ -145,13 +145,30 @@ use crate::time::{OffsetDateTime, PrimitiveDateTime, UtcOffset};
|
||||
/// to prevent unintended usage.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct DateTime {
|
||||
unix_timestamp: i64,
|
||||
// Timestamp in microseconds.
|
||||
pub(crate) timestamp_micros: i64,
|
||||
}
|
||||
|
||||
impl DateTime {
|
||||
/// Create new from UNIX timestamp
|
||||
pub const fn from_unix_timestamp(unix_timestamp: i64) -> Self {
|
||||
Self { unix_timestamp }
|
||||
/// Create new from UNIX timestamp in seconds
|
||||
pub const fn from_timestamp_secs(seconds: i64) -> Self {
|
||||
Self {
|
||||
timestamp_micros: seconds * 1_000_000,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create new from UNIX timestamp in milliseconds
|
||||
pub const fn from_timestamp_millis(milliseconds: i64) -> Self {
|
||||
Self {
|
||||
timestamp_micros: milliseconds * 1_000,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create new from UNIX timestamp in microseconds.
|
||||
pub const fn from_timestamp_micros(microseconds: i64) -> Self {
|
||||
Self {
|
||||
timestamp_micros: microseconds,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create new from `OffsetDateTime`
|
||||
@@ -159,7 +176,8 @@ impl DateTime {
|
||||
/// The given date/time is converted to UTC and the actual
|
||||
/// time zone is discarded.
|
||||
pub const fn from_utc(dt: OffsetDateTime) -> Self {
|
||||
Self::from_unix_timestamp(dt.unix_timestamp())
|
||||
let timestamp_micros = dt.unix_timestamp() as i64 * 1_000_000 + dt.microsecond() as i64;
|
||||
Self { timestamp_micros }
|
||||
}
|
||||
|
||||
/// Create new from `PrimitiveDateTime`
|
||||
@@ -167,21 +185,30 @@ impl DateTime {
|
||||
/// Implicitly assumes that the given date/time is in UTC!
|
||||
/// Otherwise the original value must only be reobtained with
|
||||
/// [`Self::into_primitive()`].
|
||||
pub const fn from_primitive(dt: PrimitiveDateTime) -> Self {
|
||||
pub fn from_primitive(dt: PrimitiveDateTime) -> Self {
|
||||
Self::from_utc(dt.assume_utc())
|
||||
}
|
||||
|
||||
/// Convert to UNIX timestamp
|
||||
pub const fn into_unix_timestamp(self) -> i64 {
|
||||
let Self { unix_timestamp } = self;
|
||||
unix_timestamp
|
||||
/// Convert to UNIX timestamp in seconds.
|
||||
pub const fn into_timestamp_secs(self) -> i64 {
|
||||
self.timestamp_micros / 1_000_000
|
||||
}
|
||||
|
||||
/// Convert to UNIX timestamp in milliseconds.
|
||||
pub const fn into_timestamp_millis(self) -> i64 {
|
||||
self.timestamp_micros / 1_000
|
||||
}
|
||||
|
||||
/// Convert to UNIX timestamp in microseconds.
|
||||
pub const fn into_timestamp_micros(self) -> i64 {
|
||||
self.timestamp_micros
|
||||
}
|
||||
|
||||
/// Convert to UTC `OffsetDateTime`
|
||||
pub fn into_utc(self) -> OffsetDateTime {
|
||||
let Self { unix_timestamp } = self;
|
||||
let utc_datetime =
|
||||
OffsetDateTime::from_unix_timestamp(unix_timestamp).expect("valid UNIX timestamp");
|
||||
let timestamp_nanos = self.timestamp_micros as i128 * 1000;
|
||||
let utc_datetime = OffsetDateTime::from_unix_timestamp_nanos(timestamp_nanos)
|
||||
.expect("valid UNIX timestamp");
|
||||
debug_assert_eq!(UtcOffset::UTC, utc_datetime.offset());
|
||||
utc_datetime
|
||||
}
|
||||
@@ -201,6 +228,18 @@ impl DateTime {
|
||||
debug_assert_eq!(UtcOffset::UTC, utc_datetime.offset());
|
||||
PrimitiveDateTime::new(utc_datetime.date(), utc_datetime.time())
|
||||
}
|
||||
|
||||
/// Truncates the microseconds value to the corresponding precision.
|
||||
pub(crate) fn truncate(self, precision: DatePrecision) -> Self {
|
||||
let truncated_timestamp_micros = match precision {
|
||||
DatePrecision::Seconds => (self.timestamp_micros / 1_000_000) * 1_000_000,
|
||||
DatePrecision::Milliseconds => (self.timestamp_micros / 1_000) * 1_000,
|
||||
DatePrecision::Microseconds => self.timestamp_micros,
|
||||
};
|
||||
Self {
|
||||
timestamp_micros: truncated_timestamp_micros,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for DateTime {
|
||||
@@ -269,7 +308,7 @@ pub use crate::indexer::operation::UserOperation;
|
||||
pub use crate::indexer::{merge_filtered_segments, merge_indices, IndexWriter, PreparedCommit};
|
||||
pub use crate::postings::Postings;
|
||||
pub use crate::reader::LeasedItem;
|
||||
pub use crate::schema::{Document, Term};
|
||||
pub use crate::schema::{DateOptions, DatePrecision, Document, Term};
|
||||
|
||||
/// Index format version.
|
||||
const INDEX_FORMAT_VERSION: u32 = 4;
|
||||
@@ -385,6 +424,7 @@ pub mod tests {
|
||||
use rand::distributions::{Bernoulli, Uniform};
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE;
|
||||
use crate::core::SegmentReader;
|
||||
@@ -393,7 +433,7 @@ pub mod tests {
|
||||
use crate::merge_policy::NoMergePolicy;
|
||||
use crate::query::BooleanQuery;
|
||||
use crate::schema::*;
|
||||
use crate::{DocAddress, Index, Postings, ReloadPolicy};
|
||||
use crate::{DateTime, DocAddress, Index, Postings, ReloadPolicy};
|
||||
|
||||
pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
|
||||
let mut buffer = Vec::new();
|
||||
@@ -1102,4 +1142,35 @@ pub mod tests {
|
||||
assert!(index.validate_checksum()?.is_empty());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_datetime() {
|
||||
let now = OffsetDateTime::now_utc();
|
||||
|
||||
let dt = DateTime::from_utc(now).into_utc();
|
||||
assert_eq!(dt.to_ordinal_date(), now.to_ordinal_date());
|
||||
assert_eq!(dt.to_hms_micro(), now.to_hms_micro());
|
||||
// We don't store nanosecond level precision.
|
||||
assert_ne!(dt.to_hms_nano(), now.to_hms_nano());
|
||||
|
||||
let dt = DateTime::from_timestamp_secs(now.unix_timestamp()).into_utc();
|
||||
assert_eq!(dt.to_ordinal_date(), now.to_ordinal_date());
|
||||
assert_eq!(dt.to_hms(), now.to_hms());
|
||||
// Constructed from a second precision.
|
||||
assert_ne!(dt.to_hms_micro(), now.to_hms_micro());
|
||||
|
||||
let dt =
|
||||
DateTime::from_timestamp_micros((now.unix_timestamp_nanos() / 1_000) as i64).into_utc();
|
||||
assert_eq!(dt.to_ordinal_date(), now.to_ordinal_date());
|
||||
assert_eq!(dt.to_hms_micro(), now.to_hms_micro());
|
||||
|
||||
let dt_from_ts_nanos =
|
||||
OffsetDateTime::from_unix_timestamp_nanos(18446744073709551615i128).unwrap();
|
||||
let offset_dt = DateTime::from_utc(dt_from_ts_nanos).into_utc();
|
||||
assert_eq!(
|
||||
dt_from_ts_nanos.to_ordinal_date(),
|
||||
offset_dt.to_ordinal_date()
|
||||
);
|
||||
assert_eq!(dt_from_ts_nanos.to_hms_micro(), offset_dt.to_hms_micro());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -243,13 +243,12 @@ impl MoreLikeThis {
|
||||
}
|
||||
FieldType::Date(_) => {
|
||||
for value in values {
|
||||
// TODO: Ask if this is the semantic (timestamp) we want
|
||||
let unix_timestamp = value
|
||||
let timestamp_micros = value
|
||||
.as_date()
|
||||
.ok_or_else(|| TantivyError::InvalidArgument("invalid value".to_string()))?
|
||||
.into_unix_timestamp();
|
||||
if !self.is_noise_word(unix_timestamp.to_string()) {
|
||||
let term = Term::from_field_i64(field, unix_timestamp);
|
||||
.into_timestamp_micros();
|
||||
if !self.is_noise_word(timestamp_micros.to_string()) {
|
||||
let term = Term::from_field_i64(field, timestamp_micros);
|
||||
*term_frequencies.entry(term).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1068,7 +1068,6 @@ mod test {
|
||||
|
||||
#[test]
|
||||
fn test_json_field_possibly_a_date() {
|
||||
// Subseconds are discarded
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
r#"json.date:"2019-10-12T07:20:50.52Z""#,
|
||||
r#"(Term(type=Json, field=14, path=date, vtype=Date, 2019-10-12T07:20:50Z) "[(0, Term(type=Json, field=14, path=date, vtype=Str, "2019")), (1, Term(type=Json, field=14, path=date, vtype=Str, "10")), (2, Term(type=Json, field=14, path=date, vtype=Str, "12t07")), (3, Term(type=Json, field=14, path=date, vtype=Str, "20")), (4, Term(type=Json, field=14, path=date, vtype=Str, "50")), (5, Term(type=Json, field=14, path=date, vtype=Str, "52z"))]")"#,
|
||||
@@ -1352,9 +1351,16 @@ mod test {
|
||||
query_parser.parse_query("date:18a"),
|
||||
Err(QueryParserError::DateFormatError(_))
|
||||
);
|
||||
assert!(query_parser
|
||||
.parse_query("date:\"1985-04-12T23:20:50.52Z\"")
|
||||
.is_ok());
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
r#"date:"2010-11-21T09:55:06.000000000+02:00""#,
|
||||
r#"Term(type=Date, field=9, 2010-11-21T07:55:06Z)"#,
|
||||
true,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
r#"date:"1985-04-12T23:20:50.52Z""#,
|
||||
r#"Term(type=Date, field=9, 1985-04-12T23:20:50Z)"#,
|
||||
true,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
276
src/schema/date_time_options.rs
Normal file
276
src/schema/date_time_options.rs
Normal file
@@ -0,0 +1,276 @@
|
||||
use std::ops::BitOr;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::Cardinality;
|
||||
use crate::schema::flags::{FastFlag, IndexedFlag, SchemaFlagList, StoredFlag};
|
||||
|
||||
/// DateTime Precision
|
||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum DatePrecision {
|
||||
/// Seconds precision
|
||||
Seconds,
|
||||
/// Milli-seconds precision.
|
||||
Milliseconds,
|
||||
/// Micro-seconds precision.
|
||||
Microseconds,
|
||||
}
|
||||
|
||||
impl Default for DatePrecision {
|
||||
fn default() -> Self {
|
||||
DatePrecision::Seconds
|
||||
}
|
||||
}
|
||||
|
||||
/// Defines how DateTime field should be handled by tantivy.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
|
||||
pub struct DateOptions {
|
||||
indexed: bool,
|
||||
// This boolean has no effect if the field is not marked as indexed true.
|
||||
fieldnorms: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
fast: Option<Cardinality>,
|
||||
stored: bool,
|
||||
// Internal storage precision, used to optimize storage
|
||||
// compression on fast fields.
|
||||
#[serde(default)]
|
||||
precision: DatePrecision,
|
||||
}
|
||||
|
||||
impl DateOptions {
|
||||
/// Returns true iff the value is stored.
|
||||
pub fn is_stored(&self) -> bool {
|
||||
self.stored
|
||||
}
|
||||
|
||||
/// Returns true iff the value is indexed and therefore searchable.
|
||||
pub fn is_indexed(&self) -> bool {
|
||||
self.indexed
|
||||
}
|
||||
|
||||
/// Returns true iff the field has fieldnorm.
|
||||
pub fn fieldnorms(&self) -> bool {
|
||||
self.fieldnorms && self.indexed
|
||||
}
|
||||
|
||||
/// Returns true iff the value is a fast field and multivalue.
|
||||
pub fn is_multivalue_fast(&self) -> bool {
|
||||
if let Some(cardinality) = self.fast {
|
||||
cardinality == Cardinality::MultiValues
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true iff the value is a fast field.
|
||||
pub fn is_fast(&self) -> bool {
|
||||
self.fast.is_some()
|
||||
}
|
||||
|
||||
/// Set the field as stored.
|
||||
///
|
||||
/// Only the fields that are set as *stored* are
|
||||
/// persisted into the Tantivy's store.
|
||||
#[must_use]
|
||||
pub fn set_stored(mut self) -> DateOptions {
|
||||
self.stored = true;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the field as indexed.
|
||||
///
|
||||
/// Setting an integer as indexed will generate
|
||||
/// a posting list for each value taken by the integer.
|
||||
///
|
||||
/// This is required for the field to be searchable.
|
||||
#[must_use]
|
||||
pub fn set_indexed(mut self) -> DateOptions {
|
||||
self.indexed = true;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the field with fieldnorm.
|
||||
///
|
||||
/// Setting an integer as fieldnorm will generate
|
||||
/// the fieldnorm data for it.
|
||||
#[must_use]
|
||||
pub fn set_fieldnorm(mut self) -> DateOptions {
|
||||
self.fieldnorms = true;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the field as a single-valued fast field.
|
||||
///
|
||||
/// Fast fields are designed for random access.
|
||||
/// Access time are similar to a random lookup in an array.
|
||||
/// If more than one value is associated to a fast field, only the last one is
|
||||
/// kept.
|
||||
#[must_use]
|
||||
pub fn set_fast(mut self, cardinality: Cardinality) -> DateOptions {
|
||||
self.fast = Some(cardinality);
|
||||
self
|
||||
}
|
||||
|
||||
/// Returns the cardinality of the fastfield.
|
||||
///
|
||||
/// If the field has not been declared as a fastfield, then
|
||||
/// the method returns None.
|
||||
pub fn get_fastfield_cardinality(&self) -> Option<Cardinality> {
|
||||
self.fast
|
||||
}
|
||||
|
||||
/// Sets the precision for this DateTime field.
|
||||
///
|
||||
/// Internal storage precision, used to optimize storage
|
||||
/// compression on fast fields.
|
||||
pub fn set_precision(mut self, precision: DatePrecision) -> DateOptions {
|
||||
self.precision = precision;
|
||||
self
|
||||
}
|
||||
|
||||
/// Returns the storage precision for this DateTime field.
|
||||
///
|
||||
/// Internal storage precision, used to optimize storage
|
||||
/// compression on fast fields.
|
||||
pub fn get_precision(&self) -> DatePrecision {
|
||||
self.precision
|
||||
}
|
||||
}
|
||||
|
||||
impl From<()> for DateOptions {
|
||||
fn from(_: ()) -> DateOptions {
|
||||
DateOptions::default()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<FastFlag> for DateOptions {
|
||||
fn from(_: FastFlag) -> Self {
|
||||
DateOptions {
|
||||
indexed: false,
|
||||
fieldnorms: false,
|
||||
stored: false,
|
||||
fast: Some(Cardinality::SingleValue),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<StoredFlag> for DateOptions {
|
||||
fn from(_: StoredFlag) -> Self {
|
||||
DateOptions {
|
||||
indexed: false,
|
||||
fieldnorms: false,
|
||||
stored: true,
|
||||
fast: None,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<IndexedFlag> for DateOptions {
|
||||
fn from(_: IndexedFlag) -> Self {
|
||||
DateOptions {
|
||||
indexed: true,
|
||||
fieldnorms: true,
|
||||
stored: false,
|
||||
fast: None,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Into<DateOptions>> BitOr<T> for DateOptions {
|
||||
type Output = DateOptions;
|
||||
|
||||
fn bitor(self, other: T) -> DateOptions {
|
||||
let other = other.into();
|
||||
DateOptions {
|
||||
indexed: self.indexed | other.indexed,
|
||||
fieldnorms: self.fieldnorms | other.fieldnorms,
|
||||
stored: self.stored | other.stored,
|
||||
fast: self.fast.or(other.fast),
|
||||
precision: self.precision,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Head, Tail> From<SchemaFlagList<Head, Tail>> for DateOptions
|
||||
where
|
||||
Head: Clone,
|
||||
Tail: Clone,
|
||||
Self: BitOr<Output = Self> + From<Head> + From<Tail>,
|
||||
{
|
||||
fn from(head_tail: SchemaFlagList<Head, Tail>) -> Self {
|
||||
Self::from(head_tail.head) | Self::from(head_tail.tail)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_date_options_consistent_with_default() {
|
||||
let date_time_options: DateOptions = serde_json::from_str(
|
||||
r#"{
|
||||
"indexed": false,
|
||||
"fieldnorms": false,
|
||||
"stored": false
|
||||
}"#,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(date_time_options, DateOptions::default());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_date_option() {
|
||||
let date_options = serde_json::from_str::<DateOptions>(
|
||||
r#"
|
||||
{
|
||||
"indexed": true,
|
||||
"fieldnorms": false,
|
||||
"stored": false,
|
||||
"precision": "milliseconds"
|
||||
}"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let date_options_json = serde_json::to_value(&date_options).unwrap();
|
||||
assert_eq!(
|
||||
date_options_json,
|
||||
serde_json::json!({
|
||||
"precision": "milliseconds",
|
||||
"indexed": true,
|
||||
"fieldnorms": false,
|
||||
"stored": false
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deserialize_date_options_with_wrong_options() {
|
||||
assert!(serde_json::from_str::<DateOptions>(
|
||||
r#"{
|
||||
"indexed": true,
|
||||
"fieldnorms": false,
|
||||
"stored": "wrong_value"
|
||||
}"#
|
||||
)
|
||||
.unwrap_err()
|
||||
.to_string()
|
||||
.contains("expected a boolean"));
|
||||
|
||||
assert!(serde_json::from_str::<DateOptions>(
|
||||
r#"{
|
||||
"indexed": true,
|
||||
"fieldnorms": false,
|
||||
"stored": false,
|
||||
"precision": "hours"
|
||||
}"#
|
||||
)
|
||||
.unwrap_err()
|
||||
.to_string()
|
||||
.contains("unknown variant `hours`"));
|
||||
}
|
||||
}
|
||||
@@ -2,7 +2,8 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::schema::bytes_options::BytesOptions;
|
||||
use crate::schema::{
|
||||
is_valid_field_name, FacetOptions, FieldType, JsonObjectOptions, NumericOptions, TextOptions,
|
||||
is_valid_field_name, DateOptions, FacetOptions, FieldType, JsonObjectOptions, NumericOptions,
|
||||
TextOptions,
|
||||
};
|
||||
|
||||
/// A `FieldEntry` represents a field and its configuration.
|
||||
@@ -55,7 +56,7 @@ impl FieldEntry {
|
||||
}
|
||||
|
||||
/// Creates a new date field entry.
|
||||
pub fn new_date(field_name: String, date_options: NumericOptions) -> FieldEntry {
|
||||
pub fn new_date(field_name: String, date_options: DateOptions) -> FieldEntry {
|
||||
Self::new(field_name, FieldType::Date(date_options))
|
||||
}
|
||||
|
||||
@@ -107,8 +108,8 @@ impl FieldEntry {
|
||||
FieldType::U64(ref options)
|
||||
| FieldType::I64(ref options)
|
||||
| FieldType::F64(ref options)
|
||||
| FieldType::Date(ref options)
|
||||
| FieldType::Bool(ref options) => options.is_stored(),
|
||||
FieldType::Date(ref options) => options.is_stored(),
|
||||
FieldType::Str(ref options) => options.is_stored(),
|
||||
FieldType::Facet(ref options) => options.is_stored(),
|
||||
FieldType::Bytes(ref options) => options.is_stored(),
|
||||
|
||||
@@ -5,8 +5,8 @@ use thiserror::Error;
|
||||
use crate::schema::bytes_options::BytesOptions;
|
||||
use crate::schema::facet_options::FacetOptions;
|
||||
use crate::schema::{
|
||||
Facet, IndexRecordOption, JsonObjectOptions, NumericOptions, TextFieldIndexing, TextOptions,
|
||||
Value,
|
||||
DateOptions, Facet, IndexRecordOption, JsonObjectOptions, NumericOptions, TextFieldIndexing,
|
||||
TextOptions, Value,
|
||||
};
|
||||
use crate::time::format_description::well_known::Rfc3339;
|
||||
use crate::time::OffsetDateTime;
|
||||
@@ -27,6 +27,11 @@ pub enum ValueParsingError {
|
||||
expected: &'static str,
|
||||
json: serde_json::Value,
|
||||
},
|
||||
#[error("Parse error on {json}: {error}")]
|
||||
ParseError {
|
||||
error: String,
|
||||
json: serde_json::Value,
|
||||
},
|
||||
#[error("Invalid base64: {base64}")]
|
||||
InvalidBase64 { base64: String },
|
||||
}
|
||||
@@ -133,7 +138,7 @@ pub enum FieldType {
|
||||
/// Bool field type configuration
|
||||
Bool(NumericOptions),
|
||||
/// Signed 64-bits Date 64 field type configuration,
|
||||
Date(NumericOptions),
|
||||
Date(DateOptions),
|
||||
/// Hierachical Facet
|
||||
Facet(FacetOptions),
|
||||
/// Bytes (one per document)
|
||||
@@ -202,8 +207,8 @@ impl FieldType {
|
||||
FieldType::U64(ref int_options)
|
||||
| FieldType::I64(ref int_options)
|
||||
| FieldType::F64(ref int_options)
|
||||
| FieldType::Date(ref int_options)
|
||||
| FieldType::Bool(ref int_options) => int_options.get_fastfield_cardinality().is_some(),
|
||||
| FieldType::Bool(ref int_options) => int_options.is_fast(),
|
||||
FieldType::Date(ref date_options) => date_options.is_fast(),
|
||||
FieldType::Facet(_) => true,
|
||||
FieldType::JsonObject(_) => false,
|
||||
}
|
||||
@@ -219,8 +224,8 @@ impl FieldType {
|
||||
FieldType::U64(ref int_options)
|
||||
| FieldType::I64(ref int_options)
|
||||
| FieldType::F64(ref int_options)
|
||||
| FieldType::Date(ref int_options)
|
||||
| FieldType::Bool(ref int_options) => int_options.fieldnorms(),
|
||||
FieldType::Date(ref date_options) => date_options.fieldnorms(),
|
||||
FieldType::Facet(_) => false,
|
||||
FieldType::Bytes(ref bytes_options) => bytes_options.fieldnorms(),
|
||||
FieldType::JsonObject(ref _json_object_options) => false,
|
||||
@@ -243,7 +248,6 @@ impl FieldType {
|
||||
FieldType::U64(ref int_options)
|
||||
| FieldType::I64(ref int_options)
|
||||
| FieldType::F64(ref int_options)
|
||||
| FieldType::Date(ref int_options)
|
||||
| FieldType::Bool(ref int_options) => {
|
||||
if int_options.is_indexed() {
|
||||
Some(IndexRecordOption::Basic)
|
||||
@@ -251,6 +255,13 @@ impl FieldType {
|
||||
None
|
||||
}
|
||||
}
|
||||
FieldType::Date(ref date_options) => {
|
||||
if date_options.is_indexed() {
|
||||
Some(IndexRecordOption::Basic)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
FieldType::Facet(ref _facet_options) => Some(IndexRecordOption::Basic),
|
||||
FieldType::Bytes(ref bytes_options) => {
|
||||
if bytes_options.is_indexed() {
|
||||
@@ -273,7 +284,7 @@ impl FieldType {
|
||||
pub fn value_from_json(&self, json: JsonValue) -> Result<Value, ValueParsingError> {
|
||||
match json {
|
||||
JsonValue::String(field_text) => {
|
||||
match *self {
|
||||
match self {
|
||||
FieldType::Date(_) => {
|
||||
let dt_with_fixed_tz = OffsetDateTime::parse(&field_text, &Rfc3339)
|
||||
.map_err(|_err| ValueParsingError::TypeError {
|
||||
@@ -402,8 +413,8 @@ mod tests {
|
||||
let doc_json = r#"{"date": "2019-10-12T07:20:50.52+02:00"}"#;
|
||||
let doc = schema.parse_document(doc_json).unwrap();
|
||||
let date = doc.get_first(date_field).unwrap();
|
||||
// Time zone is converted to UTC and subseconds are discarded
|
||||
assert_eq!("Date(2019-10-12T05:20:50Z)", format!("{:?}", date));
|
||||
// Time zone is converted to UTC
|
||||
assert_eq!("Date(2019-10-12T05:20:50.52Z)", format!("{:?}", date));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::ops::BitOr;
|
||||
|
||||
use crate::schema::{NumericOptions, TextOptions};
|
||||
use crate::DateOptions;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct StoredFlag;
|
||||
@@ -65,6 +66,14 @@ impl<T: Clone + Into<NumericOptions>> BitOr<NumericOptions> for SchemaFlagList<T
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Clone + Into<DateOptions>> BitOr<DateOptions> for SchemaFlagList<T, ()> {
|
||||
type Output = DateOptions;
|
||||
|
||||
fn bitor(self, rhs: DateOptions) -> Self::Output {
|
||||
self.head.into() | rhs
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Clone + Into<TextOptions>> BitOr<TextOptions> for SchemaFlagList<T, ()> {
|
||||
type Output = TextOptions;
|
||||
|
||||
|
||||
@@ -117,6 +117,7 @@ mod field_type;
|
||||
mod field_value;
|
||||
|
||||
mod bytes_options;
|
||||
mod date_time_options;
|
||||
mod field;
|
||||
mod flags;
|
||||
mod index_record_option;
|
||||
@@ -127,6 +128,7 @@ mod text_options;
|
||||
mod value;
|
||||
|
||||
pub use self::bytes_options::BytesOptions;
|
||||
pub use self::date_time_options::{DateOptions, DatePrecision};
|
||||
pub use self::document::Document;
|
||||
pub(crate) use self::facet::FACET_SEP_BYTE;
|
||||
pub use self::facet::{Facet, FacetParseError};
|
||||
|
||||
@@ -134,7 +134,7 @@ impl SchemaBuilder {
|
||||
/// by the second one.
|
||||
/// The first field will get a field id
|
||||
/// but only the second one will be indexed
|
||||
pub fn add_date_field<T: Into<NumericOptions>>(
|
||||
pub fn add_date_field<T: Into<DateOptions>>(
|
||||
&mut self,
|
||||
field_name_str: &str,
|
||||
field_options: T,
|
||||
@@ -813,7 +813,7 @@ mod tests {
|
||||
.set_tokenizer("raw")
|
||||
.set_index_option(IndexRecordOption::Basic),
|
||||
);
|
||||
let timestamp_options = NumericOptions::default()
|
||||
let timestamp_options = DateOptions::default()
|
||||
.set_stored()
|
||||
.set_indexed()
|
||||
.set_fieldnorm()
|
||||
@@ -875,7 +875,8 @@ mod tests {
|
||||
"indexed": true,
|
||||
"fieldnorms": true,
|
||||
"fast": "single",
|
||||
"stored": true
|
||||
"stored": true,
|
||||
"precision": "seconds"
|
||||
}
|
||||
},
|
||||
{
|
||||
|
||||
@@ -5,7 +5,7 @@ use std::{fmt, str};
|
||||
use super::Field;
|
||||
use crate::fastfield::FastValue;
|
||||
use crate::schema::{Facet, Type};
|
||||
use crate::DateTime;
|
||||
use crate::{DatePrecision, DateTime};
|
||||
|
||||
/// Size (in bytes) of the buffer of a fast value (u64, i64, f64, or date) term.
|
||||
/// <field> + <type byte> + <value len>
|
||||
@@ -76,7 +76,7 @@ impl Term {
|
||||
|
||||
/// Builds a term given a field, and a DateTime value
|
||||
pub fn from_field_date(field: Field, val: DateTime) -> Term {
|
||||
Term::from_fast_value(field, &val)
|
||||
Term::from_fast_value(field, &val.truncate(DatePrecision::Seconds))
|
||||
}
|
||||
|
||||
/// Creates a `Term` given a facet.
|
||||
|
||||
@@ -24,7 +24,7 @@ pub enum Value {
|
||||
F64(f64),
|
||||
/// Bool value
|
||||
Bool(bool),
|
||||
/// Date/time with second precision
|
||||
/// Date/time with microseconds precision
|
||||
Date(DateTime),
|
||||
/// Facet
|
||||
Facet(Facet),
|
||||
@@ -251,7 +251,7 @@ impl<'a> From<&'a [u8]> for Value {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<Facet> for Value {
|
||||
impl From<Facet> for Value {
|
||||
fn from(facet: Facet) -> Value {
|
||||
Value::Facet(facet)
|
||||
}
|
||||
@@ -348,8 +348,10 @@ mod binary_serialize {
|
||||
}
|
||||
Value::Date(ref val) => {
|
||||
DATE_CODE.serialize(writer)?;
|
||||
let DateTime { unix_timestamp } = val;
|
||||
unix_timestamp.serialize(writer)
|
||||
let DateTime {
|
||||
timestamp_micros, ..
|
||||
} = val;
|
||||
timestamp_micros.serialize(writer)
|
||||
}
|
||||
Value::Facet(ref facet) => {
|
||||
HIERARCHICAL_FACET_CODE.serialize(writer)?;
|
||||
@@ -391,8 +393,10 @@ mod binary_serialize {
|
||||
Ok(Value::Bool(value))
|
||||
}
|
||||
DATE_CODE => {
|
||||
let unix_timestamp = i64::deserialize(reader)?;
|
||||
Ok(Value::Date(DateTime::from_unix_timestamp(unix_timestamp)))
|
||||
let timestamp_micros = i64::deserialize(reader)?;
|
||||
Ok(Value::Date(DateTime::from_timestamp_micros(
|
||||
timestamp_micros,
|
||||
)))
|
||||
}
|
||||
HIERARCHICAL_FACET_CODE => Ok(Value::Facet(Facet::deserialize(reader)?)),
|
||||
BYTES_CODE => Ok(Value::Bytes(Vec::<u8>::deserialize(reader)?)),
|
||||
|
||||
Reference in New Issue
Block a user