switch to nanosecond precision (#2016)

This commit is contained in:
PSeitz
2023-05-01 09:32:20 +08:00
committed by GitHub
parent cbf2bdc75b
commit ba309e18a1
14 changed files with 115 additions and 98 deletions

View File

@@ -139,12 +139,12 @@ impl MonotonicallyMappableToU64 for i64 {
impl MonotonicallyMappableToU64 for DateTime {
#[inline(always)]
fn to_u64(self) -> u64 {
common::i64_to_u64(self.into_timestamp_micros())
common::i64_to_u64(self.into_timestamp_nanos())
}
#[inline(always)]
fn from_u64(val: u64) -> Self {
DateTime::from_timestamp_micros(common::u64_to_i64(val))
DateTime::from_timestamp_nanos(common::u64_to_i64(val))
}
}

View File

@@ -27,7 +27,7 @@ pub struct StatsCollector {
// This is the same as computing the difference between the values and the first value.
//
// This way, we can compress i64-converted-to-u64 (e.g. timestamp that were supplied in
// seconds, only to be converted in microseconds).
// seconds, only to be converted in nanoseconds).
increment_gcd_opt: Option<(NonZeroU64, DividerU64)>,
first_value_opt: Option<u64>,
}

View File

@@ -266,7 +266,7 @@ impl ColumnarWriter {
let mut column: ColumnWriter = column_opt.unwrap_or_default();
column.record(
doc,
NumericalValue::I64(datetime.into_timestamp_micros()),
NumericalValue::I64(datetime.into_timestamp_nanos()),
arena,
);
column

View File

@@ -109,7 +109,7 @@ impl Coerce for f64 {
impl Coerce for DateTime {
fn coerce(value: NumericalValue) -> Self {
let timestamp_micros = i64::coerce(value);
DateTime::from_timestamp_micros(timestamp_micros)
DateTime::from_timestamp_nanos(timestamp_micros)
}
}

View File

@@ -17,9 +17,11 @@ pub enum DatePrecision {
Milliseconds,
/// Micro-seconds precision.
Microseconds,
/// Nano-seconds precision.
Nanoseconds,
}
/// A date/time value with microsecond precision.
/// A date/time value with nanoseconds precision.
///
/// This timestamp does not carry any explicit time zone information.
/// Users are responsible for applying the provided conversion
@@ -31,39 +33,46 @@ pub enum DatePrecision {
/// to prevent unintended usage.
#[derive(Clone, Default, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct DateTime {
// Timestamp in microseconds.
pub(crate) timestamp_micros: i64,
// Timestamp in nanoseconds.
pub(crate) timestamp_nanos: i64,
}
impl DateTime {
/// Minimum possible `DateTime` value.
pub const MIN: DateTime = DateTime {
timestamp_micros: i64::MIN,
timestamp_nanos: i64::MIN,
};
/// Maximum possible `DateTime` value.
pub const MAX: DateTime = DateTime {
timestamp_micros: i64::MAX,
timestamp_nanos: i64::MAX,
};
/// Create new from UNIX timestamp in seconds
pub const fn from_timestamp_secs(seconds: i64) -> Self {
Self {
timestamp_micros: seconds * 1_000_000,
timestamp_nanos: seconds * 1_000_000_000,
}
}
/// Create new from UNIX timestamp in milliseconds
pub const fn from_timestamp_millis(milliseconds: i64) -> Self {
Self {
timestamp_micros: milliseconds * 1_000,
timestamp_nanos: milliseconds * 1_000_000,
}
}
/// Create new from UNIX timestamp in microseconds.
pub const fn from_timestamp_micros(microseconds: i64) -> Self {
Self {
timestamp_micros: microseconds,
timestamp_nanos: microseconds * 1_000,
}
}
/// Create new from UNIX timestamp in nanoseconds.
pub const fn from_timestamp_nanos(nanoseconds: i64) -> Self {
Self {
timestamp_nanos: nanoseconds,
}
}
@@ -71,9 +80,9 @@ impl DateTime {
///
/// The given date/time is converted to UTC and the actual
/// time zone is discarded.
pub const fn from_utc(dt: OffsetDateTime) -> Self {
let timestamp_micros = dt.unix_timestamp() * 1_000_000 + dt.microsecond() as i64;
Self { timestamp_micros }
pub fn from_utc(dt: OffsetDateTime) -> Self {
let timestamp_nanos = dt.unix_timestamp_nanos() as i64;
Self { timestamp_nanos }
}
/// Create new from `PrimitiveDateTime`
@@ -87,23 +96,27 @@ impl DateTime {
/// Convert to UNIX timestamp in seconds.
pub const fn into_timestamp_secs(self) -> i64 {
self.timestamp_micros / 1_000_000
self.timestamp_nanos / 1_000_000_000
}
/// Convert to UNIX timestamp in milliseconds.
pub const fn into_timestamp_millis(self) -> i64 {
self.timestamp_micros / 1_000
self.timestamp_nanos / 1_000_000
}
/// Convert to UNIX timestamp in microseconds.
pub const fn into_timestamp_micros(self) -> i64 {
self.timestamp_micros
self.timestamp_nanos / 1_000
}
/// Convert to UNIX timestamp in nanoseconds.
pub const fn into_timestamp_nanos(self) -> i64 {
self.timestamp_nanos
}
/// Convert to UTC `OffsetDateTime`
pub fn into_utc(self) -> OffsetDateTime {
let timestamp_nanos = self.timestamp_micros as i128 * 1000;
let utc_datetime = OffsetDateTime::from_unix_timestamp_nanos(timestamp_nanos)
let utc_datetime = OffsetDateTime::from_unix_timestamp_nanos(self.timestamp_nanos as i128)
.expect("valid UNIX timestamp");
debug_assert_eq!(UtcOffset::UTC, utc_datetime.offset());
utc_datetime
@@ -128,12 +141,13 @@ impl DateTime {
/// Truncates the microseconds value to the corresponding precision.
pub fn truncate(self, precision: DatePrecision) -> Self {
let truncated_timestamp_micros = match precision {
DatePrecision::Seconds => (self.timestamp_micros / 1_000_000) * 1_000_000,
DatePrecision::Milliseconds => (self.timestamp_micros / 1_000) * 1_000,
DatePrecision::Microseconds => self.timestamp_micros,
DatePrecision::Seconds => (self.timestamp_nanos / 1_000_000_000) * 1_000_000_000,
DatePrecision::Milliseconds => (self.timestamp_nanos / 1_000_000) * 1_000_000,
DatePrecision::Microseconds => (self.timestamp_nanos / 1_000) * 1_000,
DatePrecision::Nanoseconds => self.timestamp_nanos,
};
Self {
timestamp_micros: truncated_timestamp_micros,
timestamp_nanos: truncated_timestamp_micros,
}
}
}

View File

@@ -40,7 +40,7 @@ pub struct DateHistogramAggregationReq {
date_interval: Option<String>,
/// The field to aggregate on.
pub field: String,
/// The format to format dates.
/// The format to format dates. Unsupported currently.
pub format: Option<String>,
/// The interval to chunk your data range. Each bucket spans a value range of
/// [0..fixed_interval). Accepted values
@@ -77,7 +77,7 @@ pub struct DateHistogramAggregationReq {
/// hard_bounds only limits the buckets, to force a range set both extended_bounds and
/// hard_bounds to the same range.
///
/// Needs to be provided as timestamp in microseconds precision.
/// Needs to be provided as timestamp in nanosecond precision.
///
/// ## Example
/// ```json
@@ -88,7 +88,7 @@ pub struct DateHistogramAggregationReq {
/// "interval": "1d",
/// "hard_bounds": {
/// "min": 0,
/// "max": 1420502400000000
/// "max": 1420502400000000000
/// }
/// }
/// }
@@ -114,11 +114,11 @@ impl DateHistogramAggregationReq {
self.validate()?;
Ok(HistogramAggregation {
field: self.field.to_string(),
interval: parse_into_microseconds(self.fixed_interval.as_ref().unwrap())? as f64,
interval: parse_into_nanoseconds(self.fixed_interval.as_ref().unwrap())? as f64,
offset: self
.offset
.as_ref()
.map(|offset| parse_offset_into_microseconds(offset))
.map(|offset| parse_offset_into_nanosecs(offset))
.transpose()?
.map(|el| el as f64),
min_doc_count: self.min_doc_count,
@@ -155,7 +155,7 @@ impl DateHistogramAggregationReq {
));
}
parse_into_microseconds(self.fixed_interval.as_ref().unwrap())?;
parse_into_nanoseconds(self.fixed_interval.as_ref().unwrap())?;
Ok(())
}
@@ -176,9 +176,12 @@ pub enum DateHistogramParseError {
/// Offset invalid
#[error("passed offset is invalid {0:?}")]
InvalidOffset(String),
/// Value out of bounds
#[error("passed value is out of bounds: {0:?}")]
OutOfBounds(String),
}
fn parse_offset_into_microseconds(input: &str) -> Result<i64, AggregationError> {
fn parse_offset_into_nanosecs(input: &str) -> Result<i64, AggregationError> {
let is_sign = |byte| &[byte] == b"-" || &[byte] == b"+";
if input.is_empty() {
return Err(DateHistogramParseError::InvalidOffset(input.to_string()).into());
@@ -187,18 +190,18 @@ fn parse_offset_into_microseconds(input: &str) -> Result<i64, AggregationError>
let has_sign = is_sign(input.as_bytes()[0]);
if has_sign {
let (sign, input) = input.split_at(1);
let val = parse_into_microseconds(input)?;
let val = parse_into_nanoseconds(input)?;
if sign == "-" {
Ok(-val)
} else {
Ok(val)
}
} else {
parse_into_microseconds(input)
parse_into_nanoseconds(input)
}
}
fn parse_into_microseconds(input: &str) -> Result<i64, AggregationError> {
fn parse_into_nanoseconds(input: &str) -> Result<i64, AggregationError> {
let split_boundary = input
.as_bytes()
.iter()
@@ -226,7 +229,11 @@ fn parse_into_microseconds(input: &str) -> Result<i64, AggregationError> {
_ => return Err(DateHistogramParseError::UnitNotRecognized(unit.to_string()).into()),
};
Ok(number * multiplier_from_unit * 1000)
let val = (number * multiplier_from_unit)
.checked_mul(1_000_000)
.ok_or_else(|| DateHistogramParseError::OutOfBounds(input.to_string()))?;
Ok(val)
}
#[cfg(test)]
@@ -241,49 +248,49 @@ mod tests {
use crate::Index;
#[test]
fn test_parse_into_microseconds() {
assert_eq!(parse_into_microseconds("1m").unwrap(), 60_000_000);
assert_eq!(parse_into_microseconds("2m").unwrap(), 120_000_000);
fn test_parse_into_nanosecs() {
assert_eq!(parse_into_nanoseconds("1m").unwrap(), 60_000_000_000);
assert_eq!(parse_into_nanoseconds("2m").unwrap(), 120_000_000_000);
assert_eq!(
parse_into_microseconds("2y").unwrap_err(),
parse_into_nanoseconds("2y").unwrap_err(),
DateHistogramParseError::UnitNotRecognized("y".to_string()).into()
);
assert_eq!(
parse_into_microseconds("2000").unwrap_err(),
parse_into_nanoseconds("2000").unwrap_err(),
DateHistogramParseError::UnitMissing("2000".to_string()).into()
);
assert_eq!(
parse_into_microseconds("ms").unwrap_err(),
parse_into_nanoseconds("ms").unwrap_err(),
DateHistogramParseError::NumberMissing("ms".to_string()).into()
);
}
#[test]
fn test_parse_offset_into_microseconds() {
assert_eq!(parse_offset_into_microseconds("1m").unwrap(), 60_000_000);
assert_eq!(parse_offset_into_microseconds("+1m").unwrap(), 60_000_000);
assert_eq!(parse_offset_into_microseconds("-1m").unwrap(), -60_000_000);
assert_eq!(parse_offset_into_microseconds("2m").unwrap(), 120_000_000);
assert_eq!(parse_offset_into_microseconds("+2m").unwrap(), 120_000_000);
assert_eq!(parse_offset_into_microseconds("-2m").unwrap(), -120_000_000);
assert_eq!(parse_offset_into_microseconds("-2ms").unwrap(), -2_000);
fn test_parse_offset_into_nanosecs() {
assert_eq!(parse_offset_into_nanosecs("1m").unwrap(), 60_000_000_000);
assert_eq!(parse_offset_into_nanosecs("+1m").unwrap(), 60_000_000_000);
assert_eq!(parse_offset_into_nanosecs("-1m").unwrap(), -60_000_000_000);
assert_eq!(parse_offset_into_nanosecs("2m").unwrap(), 120_000_000_000);
assert_eq!(parse_offset_into_nanosecs("+2m").unwrap(), 120_000_000_000);
assert_eq!(parse_offset_into_nanosecs("-2m").unwrap(), -120_000_000_000);
assert_eq!(parse_offset_into_nanosecs("-2ms").unwrap(), -2_000_000);
assert_eq!(
parse_offset_into_microseconds("2y").unwrap_err(),
parse_offset_into_nanosecs("2y").unwrap_err(),
DateHistogramParseError::UnitNotRecognized("y".to_string()).into()
);
assert_eq!(
parse_offset_into_microseconds("2000").unwrap_err(),
parse_offset_into_nanosecs("2000").unwrap_err(),
DateHistogramParseError::UnitMissing("2000".to_string()).into()
);
assert_eq!(
parse_offset_into_microseconds("ms").unwrap_err(),
parse_offset_into_nanosecs("ms").unwrap_err(),
DateHistogramParseError::NumberMissing("ms".to_string()).into()
);
}
#[test]
fn test_parse_into_milliseconds_do_not_accept_non_ascii() {
assert!(parse_into_microseconds("m").is_err());
assert!(parse_into_nanoseconds("m").is_err());
}
pub fn get_test_index_from_docs(
@@ -361,7 +368,7 @@ mod tests {
"buckets" : [
{
"key_as_string" : "2015-01-01T00:00:00Z",
"key" : 1420070400000000.0,
"key" : 1420070400000000000.0,
"doc_count" : 4
}
]
@@ -397,7 +404,7 @@ mod tests {
"buckets" : [
{
"key_as_string" : "2015-01-01T00:00:00Z",
"key" : 1420070400000000.0,
"key" : 1420070400000000000.0,
"doc_count" : 4,
"texts": {
"buckets": [
@@ -444,32 +451,32 @@ mod tests {
"buckets": [
{
"doc_count": 2,
"key": 1420070400000000.0,
"key": 1420070400000000000.0,
"key_as_string": "2015-01-01T00:00:00Z"
},
{
"doc_count": 1,
"key": 1420156800000000.0,
"key": 1420156800000000000.0,
"key_as_string": "2015-01-02T00:00:00Z"
},
{
"doc_count": 0,
"key": 1420243200000000.0,
"key": 1420243200000000000.0,
"key_as_string": "2015-01-03T00:00:00Z"
},
{
"doc_count": 0,
"key": 1420329600000000.0,
"key": 1420329600000000000.0,
"key_as_string": "2015-01-04T00:00:00Z"
},
{
"doc_count": 0,
"key": 1420416000000000.0,
"key": 1420416000000000000.0,
"key_as_string": "2015-01-05T00:00:00Z"
},
{
"doc_count": 1,
"key": 1420502400000000.0,
"key": 1420502400000000000.0,
"key_as_string": "2015-01-06T00:00:00Z"
}
]

View File

@@ -1207,7 +1207,7 @@ mod tests {
"histogram": {
"histogram": {
"field": "date",
"interval": 86400000000.0, // one day in microseconds
"interval": 86400000000000.0, // one day in nano seconds
},
}
}))
@@ -1217,14 +1217,14 @@ mod tests {
let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?;
assert_eq!(res["histogram"]["buckets"][0]["key"], 1546300800000000.0);
assert_eq!(res["histogram"]["buckets"][0]["key"], 1546300800000000000.0);
assert_eq!(
res["histogram"]["buckets"][0]["key_as_string"],
"2019-01-01T00:00:00Z"
);
assert_eq!(res["histogram"]["buckets"][0]["doc_count"], 1);
assert_eq!(res["histogram"]["buckets"][1]["key"], 1546387200000000.0);
assert_eq!(res["histogram"]["buckets"][1]["key"], 1546387200000000000.0);
assert_eq!(
res["histogram"]["buckets"][1]["key_as_string"],
"2019-01-02T00:00:00Z"
@@ -1232,7 +1232,7 @@ mod tests {
assert_eq!(res["histogram"]["buckets"][1]["doc_count"], 5);
assert_eq!(res["histogram"]["buckets"][2]["key"], 1546473600000000.0);
assert_eq!(res["histogram"]["buckets"][2]["key"], 1546473600000000000.0);
assert_eq!(
res["histogram"]["buckets"][2]["key_as_string"],
"2019-01-03T00:00:00Z"

View File

@@ -632,8 +632,8 @@ mod tests {
"range": {
"field": "date",
"ranges": [
{"to": 1546300800000000i64},
{"from": 1546300800000000i64, "to": 1546387200000000i64},
{"to": 1546300800000000000i64},
{"from": 1546300800000000000i64, "to": 1546387200000000000i64},
],
"keyed": false
},

View File

@@ -4,13 +4,12 @@ use time::OffsetDateTime;
use crate::TantivyError;
pub(crate) fn format_date(val: i64) -> crate::Result<String> {
let datetime =
OffsetDateTime::from_unix_timestamp_nanos(1_000 * (val as i128)).map_err(|err| {
TantivyError::InvalidArgument(format!(
"Could not convert {:?} to OffsetDateTime, err {:?}",
val, err
))
})?;
let datetime = OffsetDateTime::from_unix_timestamp_nanos(val as i128).map_err(|err| {
TantivyError::InvalidArgument(format!(
"Could not convert {:?} to OffsetDateTime, err {:?}",
val, err
))
})?;
let key_as_string = datetime
.format(&Rfc3339)
.map_err(|_err| TantivyError::InvalidArgument("Could not serialize date".to_string()))?;

View File

@@ -295,7 +295,7 @@ mod tests {
DateTime::from_primitive(
Date::from_calendar_date(1980, Month::January, 1)?.with_hms(0, 0, 0)?,
),
3_600_000_000 * 24 * 365, // it is just for a unit test... sorry leap years.
3_600_000_000_000 * 24 * 365, // it is just for a unit test... sorry leap years.
10,
);
let week_histogram = searcher.search(&all_query, &week_histogram_collector)?;

View File

@@ -685,12 +685,12 @@ mod tests {
let mut schema_builder = Schema::builder();
let date_field = schema_builder.add_date_field(
"date",
DateOptions::from(FAST).set_precision(DatePrecision::Microseconds),
DateOptions::from(FAST).set_precision(DatePrecision::Nanoseconds),
);
let multi_date_field = schema_builder.add_date_field(
"multi_date",
DateOptions::default()
.set_precision(DatePrecision::Microseconds)
.set_precision(DatePrecision::Nanoseconds)
.set_fast(),
);
let schema = schema_builder.build();
@@ -724,25 +724,25 @@ mod tests {
.column_opt::<DateTime>("multi_date")
.unwrap()
.unwrap();
let mut dates = vec![];
let mut dates = Vec::new();
{
assert_eq!(date_fast_field.get_val(0).into_timestamp_micros(), 1i64);
assert_eq!(date_fast_field.get_val(0).into_timestamp_nanos(), 1i64);
dates_fast_field.fill_vals(0u32, &mut dates);
assert_eq!(dates.len(), 2);
assert_eq!(dates[0].into_timestamp_micros(), 2i64);
assert_eq!(dates[1].into_timestamp_micros(), 3i64);
assert_eq!(dates[0].into_timestamp_nanos(), 2i64);
assert_eq!(dates[1].into_timestamp_nanos(), 3i64);
}
{
assert_eq!(date_fast_field.get_val(1).into_timestamp_micros(), 4i64);
assert_eq!(date_fast_field.get_val(1).into_timestamp_nanos(), 4i64);
dates_fast_field.fill_vals(1u32, &mut dates);
assert!(dates.is_empty());
}
{
assert_eq!(date_fast_field.get_val(2).into_timestamp_micros(), 0i64);
assert_eq!(date_fast_field.get_val(2).into_timestamp_nanos(), 0i64);
dates_fast_field.fill_vals(2u32, &mut dates);
assert_eq!(dates.len(), 2);
assert_eq!(dates[0].into_timestamp_micros(), 5i64);
assert_eq!(dates[1].into_timestamp_micros(), 6i64);
assert_eq!(dates[0].into_timestamp_nanos(), 5i64);
assert_eq!(dates[1].into_timestamp_nanos(), 6i64);
}
Ok(())
}

View File

@@ -1025,8 +1025,8 @@ pub mod tests {
let dt = DateTime::from_utc(now).into_utc();
assert_eq!(dt.to_ordinal_date(), now.to_ordinal_date());
assert_eq!(dt.to_hms_micro(), now.to_hms_micro());
// We don't store nanosecond level precision.
assert_eq!(dt.nanosecond(), now.microsecond() * 1000);
// We store nanosecond level precision.
assert_eq!(dt.nanosecond(), now.nanosecond());
let dt = DateTime::from_timestamp_secs(now.unix_timestamp()).into_utc();
assert_eq!(dt.to_ordinal_date(), now.to_ordinal_date());
@@ -1040,7 +1040,7 @@ pub mod tests {
assert_eq!(dt.to_hms_micro(), now.to_hms_micro());
let dt_from_ts_nanos =
OffsetDateTime::from_unix_timestamp_nanos(18446744073709551615i128).unwrap();
OffsetDateTime::from_unix_timestamp_nanos(1492432621123456789).unwrap();
let offset_dt = DateTime::from_utc(dt_from_ts_nanos).into_utc();
assert_eq!(
dt_from_ts_nanos.to_ordinal_date(),

View File

@@ -245,14 +245,11 @@ impl MoreLikeThis {
}
FieldType::Date(_) => {
for value in values {
let timestamp_micros = value
.as_date()
.ok_or_else(|| TantivyError::InvalidArgument("invalid value".to_string()))?
.into_timestamp_micros();
if !self.is_noise_word(timestamp_micros.to_string()) {
let term = Term::from_field_i64(field, timestamp_micros);
*term_frequencies.entry(term).or_insert(0) += 1;
}
let timestamp = value.as_date().ok_or_else(|| {
TantivyError::InvalidArgument("invalid value".to_string())
})?;
let term = Term::from_field_date(field, timestamp);
*term_frequencies.entry(term).or_insert(0) += 1;
}
}
FieldType::I64(_) => {

View File

@@ -27,7 +27,7 @@ pub enum Value {
F64(f64),
/// Bool value
Bool(bool),
/// Date/time with microseconds precision
/// Date/time with nanoseconds precision
Date(DateTime),
/// Facet
Facet(Facet),