fix numeric order, refactor Document (#2209)

fix numeric order to prefer i64
rename and move Document stuff
This commit is contained in:
PSeitz
2023-10-05 16:39:56 +02:00
committed by GitHub
parent b700c42246
commit 6097235eff
8 changed files with 331 additions and 325 deletions

View File

@@ -902,7 +902,7 @@ mod tests {
crate::schema::OwnedValue::Array(vec![
crate::schema::OwnedValue::Null,
crate::schema::OwnedValue::Str(String::from("Hello, world")),
crate::schema::OwnedValue::U64(12345),
crate::schema::OwnedValue::I64(12345),
]),
);
}

View File

@@ -12,8 +12,8 @@ use crate::schema::field_value::FieldValueIter;
use crate::schema::{Facet, Field, FieldValue, NamedFieldDocument, OwnedValue, Schema};
use crate::tokenizer::PreTokenizedString;
/// Tantivy's Document is the object that can be indexed and then searched for.
/// It provides a default implementation of the `Document` trait.
/// TantivyDocument provides a default implementation of the `Document` trait.
/// It is the object that can be indexed and then searched for.
///
/// Documents are fundamentally a collection of unordered couples `(field, value)`.
/// In this list, one field may appear more than once.
@@ -256,7 +256,7 @@ impl DocParsingError {
#[cfg(test)]
mod tests {
use crate::schema::document::default_doc_type::TantivyDocument;
use crate::schema::document::default_document::TantivyDocument;
use crate::schema::*;
#[test]

View File

@@ -25,10 +25,10 @@ impl<'a> Value<'a> for &'a serde_json::Value {
serde_json::Value::Null => ReferenceValue::Null,
serde_json::Value::Bool(value) => ReferenceValue::Bool(*value),
serde_json::Value::Number(number) => {
if let Some(val) = number.as_u64() {
ReferenceValue::U64(val)
} else if let Some(val) = number.as_i64() {
if let Some(val) = number.as_i64() {
ReferenceValue::I64(val)
} else if let Some(val) = number.as_u64() {
ReferenceValue::U64(val)
} else if let Some(val) = number.as_f64() {
ReferenceValue::F64(val)
} else {

View File

@@ -154,25 +154,25 @@
//! TODO: Complete this section...
mod de;
mod default_doc_type;
mod default_document;
mod existing_type_impls;
mod owned_value;
mod se;
mod value;
use std::collections::BTreeMap;
use std::fmt::Debug;
use std::mem;
use std::net::Ipv6Addr;
pub(crate) use self::de::BinaryDocumentDeserializer;
pub use self::de::{
ArrayAccess, DeserializeError, DocumentDeserialize, DocumentDeserializer, ObjectAccess,
ValueDeserialize, ValueDeserializer, ValueType, ValueVisitor,
};
pub use self::default_doc_type::{DocParsingError, TantivyDocument};
pub use self::default_document::{DocParsingError, TantivyDocument};
pub use self::owned_value::OwnedValue;
pub(crate) use self::se::BinaryDocumentSerializer;
pub use self::value::{ReferenceValue, Value};
use super::*;
use crate::tokenizer::PreTokenizedString;
use crate::DateTime;
/// The core trait representing a document within the index.
pub trait Document: DocumentDeserialize + Send + Sync + 'static {
@@ -245,314 +245,6 @@ pub trait Document: DocumentDeserialize + Send + Sync + 'static {
}
}
/// A single field value.
pub trait Value<'a>: Send + Sync + Debug {
/// The child value type returned by this doc value.
type ChildValue: Value<'a>;
/// The iterator for walking through the elements within the array.
type ArrayIter: Iterator<Item = ReferenceValue<'a, Self::ChildValue>>;
/// The visitor walking through the key-value pairs within
/// the object.
type ObjectIter: Iterator<Item = (&'a str, ReferenceValue<'a, Self::ChildValue>)>;
/// Returns the field value represented by an enum which borrows it's data.
fn as_value(&self) -> ReferenceValue<'a, Self>;
#[inline]
/// Returns if the value is `null` or not.
fn is_null(&self) -> bool {
matches!(self.as_value(), ReferenceValue::Null)
}
#[inline]
/// If the Value is a String, returns the associated str. Returns None otherwise.
fn as_str(&self) -> Option<&'a str> {
if let ReferenceValue::Str(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a u64, returns the associated u64. Returns None otherwise.
fn as_u64(&self) -> Option<u64> {
if let ReferenceValue::U64(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a i64, returns the associated i64. Returns None otherwise.
fn as_i64(&self) -> Option<i64> {
if let ReferenceValue::I64(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a f64, returns the associated f64. Returns None otherwise.
fn as_f64(&self) -> Option<f64> {
if let ReferenceValue::F64(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a datetime, returns the associated datetime. Returns None otherwise.
fn as_datetime(&self) -> Option<DateTime> {
if let ReferenceValue::Date(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a IP address, returns the associated IP. Returns None otherwise.
fn as_ip_addr(&self) -> Option<Ipv6Addr> {
if let ReferenceValue::IpAddr(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a bool, returns the associated bool. Returns None otherwise.
fn as_bool(&self) -> Option<bool> {
if let ReferenceValue::Bool(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a pre-tokenized string, returns the associated string. Returns None
/// otherwise.
fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> {
if let ReferenceValue::PreTokStr(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a bytes value, returns the associated set of bytes. Returns None otherwise.
fn as_bytes(&self) -> Option<&'a [u8]> {
if let ReferenceValue::Bytes(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a facet, returns the associated facet. Returns None otherwise.
fn as_facet(&self) -> Option<&'a Facet> {
if let ReferenceValue::Facet(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// Returns the iterator over the array if the Value is an array.
fn as_array(&self) -> Option<Self::ArrayIter> {
if let ReferenceValue::Array(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// Returns the iterator over the object if the Value is an object.
fn as_object(&self) -> Option<Self::ObjectIter> {
if let ReferenceValue::Object(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// Returns true if the Value is an array.
fn is_array(&self) -> bool {
matches!(self.as_value(), ReferenceValue::Object(_))
}
#[inline]
/// Returns true if the Value is an object.
fn is_object(&self) -> bool {
matches!(self.as_value(), ReferenceValue::Object(_))
}
}
/// A enum representing a value for tantivy to index.
pub enum ReferenceValue<'a, V>
where V: Value<'a> + ?Sized
{
/// A null value.
Null,
/// The str type is used for any text information.
Str(&'a str),
/// Unsigned 64-bits Integer `u64`
U64(u64),
/// Signed 64-bits Integer `i64`
I64(i64),
/// 64-bits Float `f64`
F64(f64),
/// Date/time with nanoseconds precision
Date(DateTime),
/// Facet
Facet(&'a Facet),
/// Arbitrarily sized byte array
Bytes(&'a [u8]),
/// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`.
IpAddr(Ipv6Addr),
/// Bool value
Bool(bool),
/// Pre-tokenized str type,
PreTokStr(&'a PreTokenizedString),
/// A an array containing multiple values.
Array(V::ArrayIter),
/// A nested / dynamic object.
Object(V::ObjectIter),
}
impl<'a, V> ReferenceValue<'a, V>
where V: Value<'a>
{
#[inline]
/// Returns if the value is `null` or not.
pub fn is_null(&self) -> bool {
matches!(self, Self::Null)
}
#[inline]
/// If the Value is a String, returns the associated str. Returns None otherwise.
pub fn as_str(&self) -> Option<&'a str> {
if let Self::Str(val) = self {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a u64, returns the associated u64. Returns None otherwise.
pub fn as_u64(&self) -> Option<u64> {
if let Self::U64(val) = self {
Some(*val)
} else {
None
}
}
#[inline]
/// If the Value is a i64, returns the associated i64. Returns None otherwise.
pub fn as_i64(&self) -> Option<i64> {
if let Self::I64(val) = self {
Some(*val)
} else {
None
}
}
#[inline]
/// If the Value is a f64, returns the associated f64. Returns None otherwise.
pub fn as_f64(&self) -> Option<f64> {
if let Self::F64(val) = self {
Some(*val)
} else {
None
}
}
#[inline]
/// If the Value is a datetime, returns the associated datetime. Returns None otherwise.
pub fn as_datetime(&self) -> Option<DateTime> {
if let Self::Date(val) = self {
Some(*val)
} else {
None
}
}
#[inline]
/// If the Value is a IP address, returns the associated IP. Returns None otherwise.
pub fn as_ip_addr(&self) -> Option<Ipv6Addr> {
if let Self::IpAddr(val) = self {
Some(*val)
} else {
None
}
}
#[inline]
/// If the Value is a bool, returns the associated bool. Returns None otherwise.
pub fn as_bool(&self) -> Option<bool> {
if let Self::Bool(val) = self {
Some(*val)
} else {
None
}
}
#[inline]
/// If the Value is a pre-tokenized string, returns the associated string. Returns None
/// otherwise.
pub fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> {
if let Self::PreTokStr(val) = self {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a bytes value, returns the associated set of bytes. Returns None otherwise.
pub fn as_bytes(&self) -> Option<&'a [u8]> {
if let Self::Bytes(val) = self {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a facet, returns the associated facet. Returns None otherwise.
pub fn as_facet(&self) -> Option<&'a Facet> {
if let Self::Facet(val) = self {
Some(val)
} else {
None
}
}
#[inline]
/// Returns true if the Value is an array.
pub fn is_array(&self) -> bool {
matches!(self, Self::Object(_))
}
#[inline]
/// Returns true if the Value is an object.
pub fn is_object(&self) -> bool {
matches!(self, Self::Object(_))
}
}
pub(crate) mod type_codes {
pub const TEXT_CODE: u8 = 0;
pub const U64_CODE: u8 = 1;

View File

@@ -501,7 +501,7 @@ mod tests {
length elements.len(),
type_codes::NULL_CODE => (),
type_codes::TEXT_CODE => String::from("Hello, world"),
type_codes::U64_CODE => 12345u64,
type_codes::I64_CODE => 12345i64,
);
assert_eq!(
result, expected,

View File

@@ -0,0 +1,316 @@
use std::fmt::Debug;
use std::net::Ipv6Addr;
use common::DateTime;
use crate::schema::Facet;
use crate::tokenizer::PreTokenizedString;
/// A single field value.
pub trait Value<'a>: Send + Sync + Debug {
/// The child value type returned by this doc value.
type ChildValue: Value<'a>;
/// The iterator for walking through the elements within the array.
type ArrayIter: Iterator<Item = ReferenceValue<'a, Self::ChildValue>>;
/// The visitor walking through the key-value pairs within
/// the object.
type ObjectIter: Iterator<Item = (&'a str, ReferenceValue<'a, Self::ChildValue>)>;
/// Returns the field value represented by an enum which borrows it's data.
fn as_value(&self) -> ReferenceValue<'a, Self>;
#[inline]
/// Returns if the value is `null` or not.
fn is_null(&self) -> bool {
matches!(self.as_value(), ReferenceValue::Null)
}
#[inline]
/// If the Value is a String, returns the associated str. Returns None otherwise.
fn as_str(&self) -> Option<&'a str> {
if let ReferenceValue::Str(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a u64, returns the associated u64. Returns None otherwise.
fn as_u64(&self) -> Option<u64> {
if let ReferenceValue::U64(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a i64, returns the associated i64. Returns None otherwise.
fn as_i64(&self) -> Option<i64> {
if let ReferenceValue::I64(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a f64, returns the associated f64. Returns None otherwise.
fn as_f64(&self) -> Option<f64> {
if let ReferenceValue::F64(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a datetime, returns the associated datetime. Returns None otherwise.
fn as_datetime(&self) -> Option<DateTime> {
if let ReferenceValue::Date(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a IP address, returns the associated IP. Returns None otherwise.
fn as_ip_addr(&self) -> Option<Ipv6Addr> {
if let ReferenceValue::IpAddr(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a bool, returns the associated bool. Returns None otherwise.
fn as_bool(&self) -> Option<bool> {
if let ReferenceValue::Bool(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a pre-tokenized string, returns the associated string. Returns None
/// otherwise.
fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> {
if let ReferenceValue::PreTokStr(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a bytes value, returns the associated set of bytes. Returns None otherwise.
fn as_bytes(&self) -> Option<&'a [u8]> {
if let ReferenceValue::Bytes(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a facet, returns the associated facet. Returns None otherwise.
fn as_facet(&self) -> Option<&'a Facet> {
if let ReferenceValue::Facet(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// Returns the iterator over the array if the Value is an array.
fn as_array(&self) -> Option<Self::ArrayIter> {
if let ReferenceValue::Array(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// Returns the iterator over the object if the Value is an object.
fn as_object(&self) -> Option<Self::ObjectIter> {
if let ReferenceValue::Object(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// Returns true if the Value is an array.
fn is_array(&self) -> bool {
matches!(self.as_value(), ReferenceValue::Object(_))
}
#[inline]
/// Returns true if the Value is an object.
fn is_object(&self) -> bool {
matches!(self.as_value(), ReferenceValue::Object(_))
}
}
/// A enum representing a value for tantivy to index.
#[derive(Clone, Debug, PartialEq)]
pub enum ReferenceValue<'a, V>
where V: Value<'a> + ?Sized
{
/// A null value.
Null,
/// The str type is used for any text information.
Str(&'a str),
/// Unsigned 64-bits Integer `u64`
U64(u64),
/// Signed 64-bits Integer `i64`
I64(i64),
/// 64-bits Float `f64`
F64(f64),
/// Date/time with nanoseconds precision
Date(DateTime),
/// Facet
Facet(&'a Facet),
/// Arbitrarily sized byte array
Bytes(&'a [u8]),
/// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`.
IpAddr(Ipv6Addr),
/// Bool value
Bool(bool),
/// Pre-tokenized str type,
PreTokStr(&'a PreTokenizedString),
/// A an array containing multiple values.
Array(V::ArrayIter),
/// A nested / dynamic object.
Object(V::ObjectIter),
}
impl<'a, V> ReferenceValue<'a, V>
where V: Value<'a>
{
#[inline]
/// Returns if the value is `null` or not.
pub fn is_null(&self) -> bool {
matches!(self, Self::Null)
}
#[inline]
/// If the Value is a String, returns the associated str. Returns None otherwise.
pub fn as_str(&self) -> Option<&'a str> {
if let Self::Str(val) = self {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a u64, returns the associated u64. Returns None otherwise.
pub fn as_u64(&self) -> Option<u64> {
if let Self::U64(val) = self {
Some(*val)
} else {
None
}
}
#[inline]
/// If the Value is a i64, returns the associated i64. Returns None otherwise.
pub fn as_i64(&self) -> Option<i64> {
if let Self::I64(val) = self {
Some(*val)
} else {
None
}
}
#[inline]
/// If the Value is a f64, returns the associated f64. Returns None otherwise.
pub fn as_f64(&self) -> Option<f64> {
if let Self::F64(val) = self {
Some(*val)
} else {
None
}
}
#[inline]
/// If the Value is a datetime, returns the associated datetime. Returns None otherwise.
pub fn as_datetime(&self) -> Option<DateTime> {
if let Self::Date(val) = self {
Some(*val)
} else {
None
}
}
#[inline]
/// If the Value is a IP address, returns the associated IP. Returns None otherwise.
pub fn as_ip_addr(&self) -> Option<Ipv6Addr> {
if let Self::IpAddr(val) = self {
Some(*val)
} else {
None
}
}
#[inline]
/// If the Value is a bool, returns the associated bool. Returns None otherwise.
pub fn as_bool(&self) -> Option<bool> {
if let Self::Bool(val) = self {
Some(*val)
} else {
None
}
}
#[inline]
/// If the Value is a pre-tokenized string, returns the associated string. Returns None
/// otherwise.
pub fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> {
if let Self::PreTokStr(val) = self {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a bytes value, returns the associated set of bytes. Returns None otherwise.
pub fn as_bytes(&self) -> Option<&'a [u8]> {
if let Self::Bytes(val) = self {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a facet, returns the associated facet. Returns None otherwise.
pub fn as_facet(&self) -> Option<&'a Facet> {
if let Self::Facet(val) = self {
Some(val)
} else {
None
}
}
#[inline]
/// Returns true if the Value is an array.
pub fn is_array(&self) -> bool {
matches!(self, Self::Object(_))
}
#[inline]
/// Returns true if the Value is an object.
pub fn is_object(&self) -> bool {
matches!(self, Self::Object(_))
}
}

View File

@@ -126,7 +126,6 @@ mod json_object_options;
mod named_field_document;
mod numeric_options;
mod text_options;
mod value;
use columnar::ColumnType;
@@ -134,7 +133,7 @@ pub use self::bytes_options::BytesOptions;
#[allow(deprecated)]
pub use self::date_time_options::DatePrecision;
pub use self::date_time_options::{DateOptions, DateTimePrecision, DATE_TIME_PRECISION_INDEXED};
pub use self::document::{DocParsingError, Document, TantivyDocument, Value};
pub use self::document::{DocParsingError, Document, OwnedValue, TantivyDocument, Value};
pub(crate) use self::facet::FACET_SEP_BYTE;
pub use self::facet::{Facet, FacetParseError};
pub use self::facet_options::FacetOptions;
@@ -153,7 +152,6 @@ pub use self::numeric_options::NumericOptions;
pub use self::schema::{Schema, SchemaBuilder};
pub use self::term::{Term, ValueBytes, JSON_END_OF_PATH};
pub use self::text_options::{TextFieldIndexing, TextOptions, STRING, TEXT};
pub use self::value::OwnedValue;
/// Validator for a potential `field_name`.
/// Returns true if the name can be use for a field name.