Rename DatePrecision to DateTimePrecision (#2051)

This commit is contained in:
Adrien Guillo
2023-05-23 11:09:11 -04:00
committed by GitHub
parent 8cf26da4b2
commit a789ad9aee
10 changed files with 60 additions and 31 deletions

View File

@@ -1,5 +1,5 @@
test:
echo "Run test only... No examples."
@echo "Run test only... No examples."
cargo test --tests --lib
fmt:

View File

@@ -1,26 +1,41 @@
#![allow(deprecated)]
use std::fmt;
use serde::{Deserialize, Serialize};
use time::format_description::well_known::Rfc3339;
use time::{OffsetDateTime, PrimitiveDateTime, UtcOffset};
/// DateTime Precision
/// Precision with which datetimes are truncated when stored in fast fields. This setting is only
/// relevant for fast fields. In the docstore, datetimes are always saved with nanosecond precision.
#[derive(
Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, Default,
)]
#[serde(rename_all = "lowercase")]
pub enum DatePrecision {
/// Seconds precision
pub enum DateTimePrecision {
/// Second precision.
#[default]
Second,
/// Millisecond precision.
Millisecond,
/// Microsecond precision.
Microsecond,
/// Nanosecond precision.
Nanosecond,
// TODO: Remove deprecated variants after 2 releases.
#[deprecated(since = "0.20.0", note = "Use `Second` instead")]
Seconds,
/// Milli-seconds precision.
#[deprecated(since = "0.20.0", note = "Use `Millisecond` instead")]
Milliseconds,
/// Micro-seconds precision.
#[deprecated(since = "0.20.0", note = "Use `Microsecond` instead")]
Microseconds,
/// Nano-seconds precision.
#[deprecated(since = "0.20.0", note = "Use `Nanosecond` instead")]
Nanoseconds,
}
#[deprecated(since = "0.20.0", note = "Use `DateTimePrecision` instead")]
pub type DatePrecision = DateTimePrecision;
/// A date/time value with nanoseconds precision.
///
/// This timestamp does not carry any explicit time zone information.
@@ -139,12 +154,18 @@ impl DateTime {
}
/// Truncates the microseconds value to the corresponding precision.
pub fn truncate(self, precision: DatePrecision) -> Self {
pub fn truncate(self, precision: DateTimePrecision) -> Self {
let truncated_timestamp_micros = match precision {
DatePrecision::Seconds => (self.timestamp_nanos / 1_000_000_000) * 1_000_000_000,
DatePrecision::Milliseconds => (self.timestamp_nanos / 1_000_000) * 1_000_000,
DatePrecision::Microseconds => (self.timestamp_nanos / 1_000) * 1_000,
DatePrecision::Nanoseconds => self.timestamp_nanos,
DateTimePrecision::Second | DateTimePrecision::Seconds => {
(self.timestamp_nanos / 1_000_000_000) * 1_000_000_000
}
DateTimePrecision::Millisecond | DateTimePrecision::Milliseconds => {
(self.timestamp_nanos / 1_000_000) * 1_000_000
}
DateTimePrecision::Microsecond | DateTimePrecision::Microseconds => {
(self.timestamp_nanos / 1_000) * 1_000
}
DateTimePrecision::Nanosecond | DateTimePrecision::Nanoseconds => self.timestamp_nanos,
};
Self {
timestamp_nanos: truncated_timestamp_micros,

View File

@@ -14,7 +14,9 @@ mod vint;
mod writer;
pub use bitset::*;
pub use byte_count::ByteCount;
pub use datetime::{DatePrecision, DateTime};
#[allow(deprecated)]
pub use datetime::DatePrecision;
pub use datetime::{DateTime, DateTimePrecision};
pub use group_by::GroupByIteratorExtended;
pub use ownedbytes::{OwnedBytes, StableDeref};
pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};

View File

@@ -13,7 +13,7 @@ fn main() -> tantivy::Result<()> {
let opts = DateOptions::from(INDEXED)
.set_stored()
.set_fast()
.set_precision(tantivy::DatePrecision::Seconds);
.set_precision(tantivy::DateTimePrecision::Second);
// Add `occurred_at` date field type
let occurred_at = schema_builder.add_date_field("occurred_at", opts);
let event_type = schema_builder.add_text_field("event", STRING | STORED);

View File

@@ -95,7 +95,7 @@ mod tests {
};
use crate::time::OffsetDateTime;
use crate::tokenizer::{LowerCaser, RawTokenizer, TextAnalyzer, TokenizerManager};
use crate::{DateOptions, DatePrecision, Index, SegmentId, SegmentReader};
use crate::{DateOptions, DateTimePrecision, Index, SegmentId, SegmentReader};
pub static SCHEMA: Lazy<Schema> = Lazy::new(|| {
let mut schema_builder = Schema::builder();
@@ -686,12 +686,12 @@ mod tests {
let mut schema_builder = Schema::builder();
let date_field = schema_builder.add_date_field(
"date",
DateOptions::from(FAST).set_precision(DatePrecision::Nanoseconds),
DateOptions::from(FAST).set_precision(DateTimePrecision::Nanosecond),
);
let multi_date_field = schema_builder.add_date_field(
"multi_date",
DateOptions::default()
.set_precision(DatePrecision::Nanoseconds)
.set_precision(DateTimePrecision::Nanosecond)
.set_fast(),
);
let schema = schema_builder.build();
@@ -862,9 +862,9 @@ mod tests {
#[test]
pub fn test_gcd_date() {
let size_prec_sec = test_gcd_date_with_codec(DatePrecision::Seconds);
let size_prec_sec = test_gcd_date_with_codec(DateTimePrecision::Second);
assert!((1000 * 13 / 8..100 + 1000 * 13 / 8).contains(&size_prec_sec.get_bytes())); // 13 bits per val = ceil(log_2(number of seconds in 2hours);
let size_prec_micros = test_gcd_date_with_codec(DatePrecision::Microseconds);
let size_prec_micros = test_gcd_date_with_codec(DateTimePrecision::Microsecond);
assert!((1000 * 33 / 8..100 + 1000 * 33 / 8).contains(&size_prec_micros.get_bytes()));
// 33 bits per
// val = ceil(log_2(number
@@ -872,7 +872,7 @@ mod tests {
// in 2hours);
}
fn test_gcd_date_with_codec(precision: DatePrecision) -> ByteCount {
fn test_gcd_date_with_codec(precision: DateTimePrecision) -> ByteCount {
let mut rng = StdRng::seed_from_u64(2u64);
const T0: i64 = 1_662_345_825_012_529i64;
const ONE_HOUR_IN_MICROSECS: i64 = 3_600 * 1_000_000;

View File

@@ -8,7 +8,7 @@ use crate::indexer::doc_id_mapping::DocIdMapping;
use crate::schema::term::{JSON_PATH_SEGMENT_SEP, JSON_PATH_SEGMENT_SEP_STR};
use crate::schema::{value_type_to_column_type, Document, FieldType, Schema, Type, Value};
use crate::tokenizer::{TextAnalyzer, TokenizerManager};
use crate::{DatePrecision, DocId, TantivyError};
use crate::{DateTimePrecision, DocId, TantivyError};
/// Only index JSON down to a depth of 20.
/// This is mostly to guard us from a stack overflow triggered by malicious input.
@@ -19,7 +19,7 @@ pub struct FastFieldsWriter {
columnar_writer: ColumnarWriter,
fast_field_names: Vec<Option<String>>, //< TODO see if we can hash the field name hash too.
per_field_tokenizer: Vec<Option<TextAnalyzer>>,
date_precisions: Vec<DatePrecision>,
date_precisions: Vec<DateTimePrecision>,
expand_dots: Vec<bool>,
num_docs: DocId,
// Buffer that we recycle to avoid allocation.
@@ -41,8 +41,8 @@ impl FastFieldsWriter {
let mut columnar_writer = ColumnarWriter::default();
let mut fast_field_names: Vec<Option<String>> = vec![None; schema.num_fields()];
let mut date_precisions: Vec<DatePrecision> =
std::iter::repeat_with(DatePrecision::default)
let mut date_precisions: Vec<DateTimePrecision> =
std::iter::repeat_with(DateTimePrecision::default)
.take(schema.num_fields())
.collect();
let mut expand_dots = vec![false; schema.num_fields()];

View File

@@ -184,7 +184,9 @@ pub use crate::directory::Directory;
pub use crate::indexer::operation::UserOperation;
pub use crate::indexer::{merge_filtered_segments, merge_indices, IndexWriter, PreparedCommit};
pub use crate::postings::Postings;
pub use crate::schema::{DateOptions, DatePrecision, Document, Term};
#[allow(deprecated)]
pub use crate::schema::DatePrecision;
pub use crate::schema::{DateOptions, DateTimePrecision, Document, Term};
/// Index format version.
const INDEX_FORMAT_VERSION: u32 = 5;

View File

@@ -1,12 +1,14 @@
use std::ops::BitOr;
#[allow(deprecated)]
pub use common::DatePrecision;
pub use common::DateTimePrecision;
use serde::{Deserialize, Serialize};
use crate::schema::flags::{FastFlag, IndexedFlag, SchemaFlagList, StoredFlag};
/// The precision of the indexed date/time values in the inverted index.
pub const DATE_TIME_PRECISION_INDEXED: DatePrecision = DatePrecision::Seconds;
pub const DATE_TIME_PRECISION_INDEXED: DateTimePrecision = DateTimePrecision::Second;
/// Defines how DateTime field should be handled by tantivy.
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
@@ -20,7 +22,7 @@ pub struct DateOptions {
// Internal storage precision, used to optimize storage
// compression on fast fields.
#[serde(default)]
precision: DatePrecision,
precision: DateTimePrecision,
}
impl DateOptions {
@@ -93,7 +95,7 @@ impl DateOptions {
///
/// Internal storage precision, used to optimize storage
/// compression on fast fields.
pub fn set_precision(mut self, precision: DatePrecision) -> DateOptions {
pub fn set_precision(mut self, precision: DateTimePrecision) -> DateOptions {
self.precision = precision;
self
}
@@ -102,7 +104,7 @@ impl DateOptions {
///
/// Internal storage precision, used to optimize storage
/// compression on fast fields.
pub fn get_precision(&self) -> DatePrecision {
pub fn get_precision(&self) -> DateTimePrecision {
self.precision
}
}

View File

@@ -129,7 +129,9 @@ mod value;
use columnar::ColumnType;
pub use self::bytes_options::BytesOptions;
pub use self::date_time_options::{DateOptions, DatePrecision, DATE_TIME_PRECISION_INDEXED};
#[allow(deprecated)]
pub use self::date_time_options::DatePrecision;
pub use self::date_time_options::{DateOptions, DateTimePrecision, DATE_TIME_PRECISION_INDEXED};
pub use self::document::Document;
pub(crate) use self::facet::FACET_SEP_BYTE;
pub use self::facet::{Facet, FacetParseError};

View File

@@ -973,7 +973,7 @@ mod tests {
"fieldnorms": true,
"fast": true,
"stored": true,
"precision": "seconds"
"precision": "second"
}
},
{