split into ReferenceValueLeaf (#2217)

This commit is contained in:
PSeitz
2023-10-16 16:31:30 +02:00
committed by GitHub
parent 182f58cea6
commit 5e06e504e6
9 changed files with 428 additions and 324 deletions

View File

@@ -5,7 +5,7 @@ use rustc_hash::FxHashMap;
use crate::fastfield::FastValue;
use crate::postings::{IndexingContext, IndexingPosition, PostingsWriter};
use crate::schema::document::{ReferenceValue, Value};
use crate::schema::document::{ReferenceValue, ReferenceValueLeaf, Value};
use crate::schema::term::{JSON_PATH_SEGMENT_SEP, JSON_PATH_SEGMENT_SEP_STR};
use crate::schema::{Field, Type, DATE_TIME_PRECISION_INDEXED};
use crate::time::format_description::well_known::Rfc3339;
@@ -125,53 +125,57 @@ fn index_json_value<'a, V: Value<'a>>(
positions_per_path: &mut IndexingPositionsPerPath,
) {
match json_value.as_value() {
ReferenceValue::Null => {}
ReferenceValue::Str(val) => {
let mut token_stream = text_analyzer.token_stream(val);
ReferenceValue::Leaf(leaf) => match leaf {
ReferenceValueLeaf::Null => {}
ReferenceValueLeaf::Str(val) => {
let mut token_stream = text_analyzer.token_stream(val);
// TODO: make sure the chain position works out.
json_term_writer.close_path_and_set_type(Type::Str);
let indexing_position = positions_per_path.get_position(json_term_writer.term());
postings_writer.index_text(
doc,
&mut *token_stream,
json_term_writer.term_buffer,
ctx,
indexing_position,
);
}
ReferenceValue::U64(val) => {
json_term_writer.set_fast_value(val);
postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx);
}
ReferenceValue::I64(val) => {
json_term_writer.set_fast_value(val);
postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx);
}
ReferenceValue::F64(val) => {
json_term_writer.set_fast_value(val);
postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx);
}
ReferenceValue::Bool(val) => {
json_term_writer.set_fast_value(val);
postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx);
}
ReferenceValue::Facet(_) => {
unimplemented!("Facet support in dynamic fields is not yet implemented")
}
ReferenceValue::IpAddr(_) => {
unimplemented!("IP address support in dynamic fields is not yet implemented")
}
ReferenceValue::Date(val) => {
json_term_writer.set_fast_value(val);
postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx);
}
ReferenceValue::PreTokStr(_) => {
unimplemented!("Pre-tokenized string support in dynamic fields is not yet implemented")
}
ReferenceValue::Bytes(_) => {
unimplemented!("Bytes support in dynamic fields is not yet implemented")
}
// TODO: make sure the chain position works out.
json_term_writer.close_path_and_set_type(Type::Str);
let indexing_position = positions_per_path.get_position(json_term_writer.term());
postings_writer.index_text(
doc,
&mut *token_stream,
json_term_writer.term_buffer,
ctx,
indexing_position,
);
}
ReferenceValueLeaf::U64(val) => {
json_term_writer.set_fast_value(val);
postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx);
}
ReferenceValueLeaf::I64(val) => {
json_term_writer.set_fast_value(val);
postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx);
}
ReferenceValueLeaf::F64(val) => {
json_term_writer.set_fast_value(val);
postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx);
}
ReferenceValueLeaf::Bool(val) => {
json_term_writer.set_fast_value(val);
postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx);
}
ReferenceValueLeaf::Date(val) => {
json_term_writer.set_fast_value(val);
postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx);
}
ReferenceValueLeaf::PreTokStr(_) => {
unimplemented!(
"Pre-tokenized string support in dynamic fields is not yet implemented"
)
}
ReferenceValueLeaf::Bytes(_) => {
unimplemented!("Bytes support in dynamic fields is not yet implemented")
}
ReferenceValueLeaf::Facet(_) => {
unimplemented!("Facet support in dynamic fields is not yet implemented")
}
ReferenceValueLeaf::IpAddr(_) => {
unimplemented!("IP address support in dynamic fields is not yet implemented")
}
},
ReferenceValue::Array(elements) => {
for val in elements {
index_json_value(

View File

@@ -5,7 +5,7 @@ use std::{fmt, io};
use crate::collector::Collector;
use crate::core::{Executor, SegmentReader};
use crate::query::{Bm25StatisticsProvider, EnableScoring, Query};
use crate::schema::document::{Document, DocumentDeserialize};
use crate::schema::document::DocumentDeserialize;
use crate::schema::{Schema, Term};
use crate::space_usage::SearcherSpaceUsage;
use crate::store::{CacheStats, StoreReader};

View File

@@ -5,7 +5,7 @@ use common::replace_in_place;
use tokenizer_api::Token;
use crate::indexer::doc_id_mapping::DocIdMapping;
use crate::schema::document::{Document, ReferenceValue, Value};
use crate::schema::document::{Document, ReferenceValue, ReferenceValueLeaf, Value};
use crate::schema::term::{JSON_PATH_SEGMENT_SEP, JSON_PATH_SEGMENT_SEP_STR};
use crate::schema::{value_type_to_column_type, Field, FieldType, Schema, Type};
use crate::tokenizer::{TextAnalyzer, TokenizerManager};
@@ -141,64 +141,68 @@ impl FastFieldsWriter {
};
match value.as_value() {
ReferenceValue::Null => {}
ReferenceValue::Str(val) => {
if let Some(tokenizer) = &mut self.per_field_tokenizer[field.field_id() as usize] {
let mut token_stream = tokenizer.token_stream(val);
token_stream.process(&mut |token: &Token| {
ReferenceValue::Leaf(leaf) => match leaf {
ReferenceValueLeaf::Null => {}
ReferenceValueLeaf::Str(val) => {
if let Some(tokenizer) =
&mut self.per_field_tokenizer[field.field_id() as usize]
{
let mut token_stream = tokenizer.token_stream(val);
token_stream.process(&mut |token: &Token| {
self.columnar_writer
.record_str(doc_id, field_name, &token.text);
})
} else {
self.columnar_writer.record_str(doc_id, field_name, val);
}
}
ReferenceValueLeaf::U64(val) => {
self.columnar_writer.record_numerical(
doc_id,
field_name,
NumericalValue::from(val),
);
}
ReferenceValueLeaf::I64(val) => {
self.columnar_writer.record_numerical(
doc_id,
field_name,
NumericalValue::from(val),
);
}
ReferenceValueLeaf::F64(val) => {
self.columnar_writer.record_numerical(
doc_id,
field_name,
NumericalValue::from(val),
);
}
ReferenceValueLeaf::Date(val) => {
let date_precision = self.date_precisions[field.field_id() as usize];
let truncated_datetime = val.truncate(date_precision);
self.columnar_writer
.record_datetime(doc_id, field_name, truncated_datetime);
}
ReferenceValueLeaf::Facet(val) => {
self.columnar_writer
.record_str(doc_id, field_name, val.encoded_str());
}
ReferenceValueLeaf::Bytes(val) => {
self.columnar_writer.record_bytes(doc_id, field_name, val);
}
ReferenceValueLeaf::IpAddr(val) => {
self.columnar_writer.record_ip_addr(doc_id, field_name, val);
}
ReferenceValueLeaf::Bool(val) => {
self.columnar_writer.record_bool(doc_id, field_name, val);
}
ReferenceValueLeaf::PreTokStr(val) => {
for token in &val.tokens {
self.columnar_writer
.record_str(doc_id, field_name, &token.text);
})
} else {
self.columnar_writer.record_str(doc_id, field_name, val);
}
}
}
ReferenceValue::U64(val) => {
self.columnar_writer.record_numerical(
doc_id,
field_name,
NumericalValue::from(val),
);
}
ReferenceValue::I64(val) => {
self.columnar_writer.record_numerical(
doc_id,
field_name,
NumericalValue::from(val),
);
}
ReferenceValue::F64(val) => {
self.columnar_writer.record_numerical(
doc_id,
field_name,
NumericalValue::from(val),
);
}
ReferenceValue::Date(val) => {
let date_precision = self.date_precisions[field.field_id() as usize];
let truncated_datetime = val.truncate(date_precision);
self.columnar_writer
.record_datetime(doc_id, field_name, truncated_datetime);
}
ReferenceValue::Facet(val) => {
self.columnar_writer
.record_str(doc_id, field_name, val.encoded_str());
}
ReferenceValue::Bytes(val) => {
self.columnar_writer.record_bytes(doc_id, field_name, val);
}
ReferenceValue::IpAddr(val) => {
self.columnar_writer.record_ip_addr(doc_id, field_name, val);
}
ReferenceValue::Bool(val) => {
self.columnar_writer.record_bool(doc_id, field_name, val);
}
ReferenceValue::PreTokStr(val) => {
for token in &val.tokens {
self.columnar_writer
.record_str(doc_id, field_name, &token.text);
}
}
},
ReferenceValue::Array(val) => {
// TODO: Check this is the correct behaviour we want.
for value in val {
@@ -297,58 +301,62 @@ fn record_json_value_to_columnar_writer<'a, V: Value<'a>>(
remaining_depth_limit -= 1;
match json_val.as_value() {
ReferenceValue::Null => {} // TODO: Handle null
ReferenceValue::Str(val) => {
if let Some(text_analyzer) = tokenizer.as_mut() {
let mut token_stream = text_analyzer.token_stream(val);
token_stream.process(&mut |token| {
columnar_writer.record_str(doc, json_path_writer.as_str(), &token.text);
})
} else {
columnar_writer.record_str(doc, json_path_writer.as_str(), val);
ReferenceValue::Leaf(leaf) => match leaf {
ReferenceValueLeaf::Null => {} // TODO: Handle null
ReferenceValueLeaf::Str(val) => {
if let Some(text_analyzer) = tokenizer.as_mut() {
let mut token_stream = text_analyzer.token_stream(val);
token_stream.process(&mut |token| {
columnar_writer.record_str(doc, json_path_writer.as_str(), &token.text);
})
} else {
columnar_writer.record_str(doc, json_path_writer.as_str(), val);
}
}
}
ReferenceValue::U64(val) => {
columnar_writer.record_numerical(
doc,
json_path_writer.as_str(),
NumericalValue::from(val),
);
}
ReferenceValue::I64(val) => {
columnar_writer.record_numerical(
doc,
json_path_writer.as_str(),
NumericalValue::from(val),
);
}
ReferenceValue::F64(val) => {
columnar_writer.record_numerical(
doc,
json_path_writer.as_str(),
NumericalValue::from(val),
);
}
ReferenceValue::Bool(val) => {
columnar_writer.record_bool(doc, json_path_writer, val);
}
ReferenceValue::Date(val) => {
columnar_writer.record_datetime(doc, json_path_writer.as_str(), val);
}
ReferenceValue::Facet(_) => {
unimplemented!("Facet support in dynamic fields is not yet implemented")
}
ReferenceValue::Bytes(_) => {
// TODO: This can be re added once it is added to the JSON Utils section as well.
// columnar_writer.record_bytes(doc, json_path_writer.as_str(), val);
unimplemented!("Bytes support in dynamic fields is not yet implemented")
}
ReferenceValue::IpAddr(_) => {
unimplemented!("IP address support in dynamic fields is not yet implemented")
}
ReferenceValue::PreTokStr(_) => {
unimplemented!("Pre-tokenized string support in dynamic fields is not yet implemented")
}
ReferenceValueLeaf::U64(val) => {
columnar_writer.record_numerical(
doc,
json_path_writer.as_str(),
NumericalValue::from(val),
);
}
ReferenceValueLeaf::I64(val) => {
columnar_writer.record_numerical(
doc,
json_path_writer.as_str(),
NumericalValue::from(val),
);
}
ReferenceValueLeaf::F64(val) => {
columnar_writer.record_numerical(
doc,
json_path_writer.as_str(),
NumericalValue::from(val),
);
}
ReferenceValueLeaf::Bool(val) => {
columnar_writer.record_bool(doc, json_path_writer, val);
}
ReferenceValueLeaf::Date(val) => {
columnar_writer.record_datetime(doc, json_path_writer.as_str(), val);
}
ReferenceValueLeaf::Facet(_) => {
unimplemented!("Facet support in dynamic fields is not yet implemented")
}
ReferenceValueLeaf::Bytes(_) => {
// TODO: This can be re added once it is added to the JSON Utils section as well.
// columnar_writer.record_bytes(doc, json_path_writer.as_str(), val);
unimplemented!("Bytes support in dynamic fields is not yet implemented")
}
ReferenceValueLeaf::IpAddr(_) => {
unimplemented!("IP address support in dynamic fields is not yet implemented")
}
ReferenceValueLeaf::PreTokStr(_) => {
unimplemented!(
"Pre-tokenized string support in dynamic fields is not yet implemented"
)
}
},
ReferenceValue::Array(elements) => {
for el in elements {
record_json_value_to_columnar_writer(

View File

@@ -791,7 +791,8 @@ mod tests {
use super::*;
use crate::schema::document::existing_type_impls::JsonObjectIter;
use crate::schema::document::se::BinaryValueSerializer;
use crate::schema::document::ReferenceValue;
use crate::schema::document::{ReferenceValue, ReferenceValueLeaf};
use crate::schema::OwnedValue;
fn serialize_value<'a>(value: ReferenceValue<'a, &'a serde_json::Value>) -> Vec<u8> {
let mut writer = Vec::new();
@@ -810,34 +811,35 @@ mod tests {
#[test]
fn test_simple_value_serialize() {
let result = serialize_value(ReferenceValue::Null);
let result = serialize_value(ReferenceValueLeaf::Null.into());
let value = deserialize_value(result);
assert_eq!(value, crate::schema::OwnedValue::Null);
let result = serialize_value(ReferenceValue::Str("hello, world"));
let result = serialize_value(ReferenceValueLeaf::Str("hello, world").into());
let value = deserialize_value(result);
assert_eq!(
value,
crate::schema::OwnedValue::Str(String::from("hello, world"))
);
let result = serialize_value(ReferenceValue::U64(123));
let result = serialize_value(ReferenceValueLeaf::U64(123).into());
let value = deserialize_value(result);
assert_eq!(value, crate::schema::OwnedValue::U64(123));
let result = serialize_value(ReferenceValue::I64(-123));
let result = serialize_value(ReferenceValueLeaf::I64(-123).into());
let value = deserialize_value(result);
assert_eq!(value, crate::schema::OwnedValue::I64(-123));
let result = serialize_value(ReferenceValue::F64(123.3845));
let result = serialize_value(ReferenceValueLeaf::F64(123.3845).into());
let value = deserialize_value(result);
assert_eq!(value, crate::schema::OwnedValue::F64(123.3845));
let result = serialize_value(ReferenceValue::Bool(false));
let result = serialize_value(ReferenceValueLeaf::Bool(false).into());
let value = deserialize_value(result);
assert_eq!(value, crate::schema::OwnedValue::Bool(false));
let result = serialize_value(ReferenceValue::Date(DateTime::from_timestamp_micros(100)));
let result =
serialize_value(ReferenceValueLeaf::Date(DateTime::from_timestamp_micros(100)).into());
let value = deserialize_value(result);
assert_eq!(
value,
@@ -845,7 +847,7 @@ mod tests {
);
let facet = Facet::from_text("/hello/world").unwrap();
let result = serialize_value(ReferenceValue::Facet(&facet));
let result = serialize_value(ReferenceValueLeaf::Facet(&facet).into());
let value = deserialize_value(result);
assert_eq!(value, crate::schema::OwnedValue::Facet(facet));
@@ -853,7 +855,7 @@ mod tests {
text: "hello, world".to_string(),
tokens: vec![Token::default(), Token::default()],
};
let result = serialize_value(ReferenceValue::PreTokStr(&pre_tok_str));
let result = serialize_value(ReferenceValueLeaf::PreTokStr(&pre_tok_str).into());
let value = deserialize_value(result);
assert_eq!(value, crate::schema::OwnedValue::PreTokStr(pre_tok_str));
}
@@ -1025,11 +1027,11 @@ mod tests {
let mut expected_object = BTreeMap::new();
expected_object.insert(
"my-array".to_string(),
crate::schema::OwnedValue::Array(vec![crate::schema::OwnedValue::Array(vec![
crate::schema::OwnedValue::Array(vec![]),
crate::schema::OwnedValue::Array(vec![crate::schema::OwnedValue::Null]),
OwnedValue::Array(vec![OwnedValue::Array(vec![
OwnedValue::Array(vec![]),
OwnedValue::Array(vec![OwnedValue::Null]),
])]),
);
assert_eq!(value, crate::schema::OwnedValue::Object(expected_object));
assert_eq!(value, OwnedValue::Object(expected_object));
}
}

View File

@@ -8,6 +8,7 @@ use std::collections::{btree_map, hash_map, BTreeMap, HashMap};
use serde_json::Number;
use super::ReferenceValueLeaf;
use crate::schema::document::{
ArrayAccess, DeserializeError, Document, DocumentDeserialize, DocumentDeserializer,
ObjectAccess, ReferenceValue, Value, ValueDeserialize, ValueDeserializer, ValueVisitor,
@@ -21,20 +22,20 @@ impl<'a> Value<'a> for &'a serde_json::Value {
fn as_value(&self) -> ReferenceValue<'a, Self> {
match self {
serde_json::Value::Null => ReferenceValue::Null,
serde_json::Value::Bool(value) => ReferenceValue::Bool(*value),
serde_json::Value::Null => ReferenceValueLeaf::Null.into(),
serde_json::Value::Bool(value) => ReferenceValueLeaf::Bool(*value).into(),
serde_json::Value::Number(number) => {
if let Some(val) = number.as_i64() {
ReferenceValue::I64(val)
ReferenceValueLeaf::I64(val).into()
} else if let Some(val) = number.as_u64() {
ReferenceValue::U64(val)
ReferenceValueLeaf::U64(val).into()
} else if let Some(val) = number.as_f64() {
ReferenceValue::F64(val)
ReferenceValueLeaf::F64(val).into()
} else {
panic!("Unsupported serde_json number {number}");
}
}
serde_json::Value::String(val) => ReferenceValue::Str(val),
serde_json::Value::String(val) => ReferenceValueLeaf::Str(val).into(),
serde_json::Value::Array(elements) => ReferenceValue::Array(elements.iter()),
serde_json::Value::Object(object) => {
ReferenceValue::Object(JsonObjectIter(object.iter()))
@@ -77,7 +78,7 @@ impl ValueDeserialize for serde_json::Value {
}
fn visit_bool(&self, val: bool) -> Result<Self::Value, DeserializeError> {
Ok(serde_json::Value::Bool(val.into()))
Ok(serde_json::Value::Bool(val))
}
fn visit_array<'de, A>(&self, mut access: A) -> Result<Self::Value, DeserializeError>

View File

@@ -116,6 +116,7 @@
//!
//! ```
//! use tantivy::schema::document::ReferenceValue;
//! use tantivy::schema::document::ReferenceValueLeaf;
//! use tantivy::schema::{Value};
//!
//! #[derive(Debug)]
@@ -141,9 +142,9 @@
//! fn as_value(&self) -> ReferenceValue<'a, Self> {
//! // We can support any type that Tantivy itself supports.
//! match self {
//! MyCustomValue::String(val) => ReferenceValue::Str(val),
//! MyCustomValue::Float(val) => ReferenceValue::F64(*val),
//! MyCustomValue::Bool(val) => ReferenceValue::Bool(*val),
//! MyCustomValue::String(val) => ReferenceValue::Leaf(ReferenceValueLeaf::Str(*val)),
//! MyCustomValue::Float(val) => ReferenceValue::Leaf(ReferenceValueLeaf::F64(*val)),
//! MyCustomValue::Bool(val) => ReferenceValue::Leaf(ReferenceValueLeaf::Bool(*val)),
//! }
//! }
//!
@@ -170,7 +171,7 @@ pub use self::de::{
pub use self::default_document::{DocParsingError, TantivyDocument};
pub use self::owned_value::OwnedValue;
pub(crate) use self::se::BinaryDocumentSerializer;
pub use self::value::{ReferenceValue, Value};
pub use self::value::{ReferenceValue, ReferenceValueLeaf, Value};
use super::*;
/// The core trait representing a document within the index.

View File

@@ -8,6 +8,7 @@ use serde::de::{MapAccess, SeqAccess};
use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;
use super::ReferenceValueLeaf;
use crate::schema::document::{
ArrayAccess, DeserializeError, ObjectAccess, ReferenceValue, Value, ValueDeserialize,
ValueDeserializer, ValueVisitor,
@@ -62,17 +63,17 @@ impl<'a> Value<'a> for &'a OwnedValue {
fn as_value(&self) -> ReferenceValue<'a, Self> {
match self {
OwnedValue::Null => ReferenceValue::Null,
OwnedValue::Str(val) => ReferenceValue::Str(val),
OwnedValue::PreTokStr(val) => ReferenceValue::PreTokStr(val),
OwnedValue::U64(val) => ReferenceValue::U64(*val),
OwnedValue::I64(val) => ReferenceValue::I64(*val),
OwnedValue::F64(val) => ReferenceValue::F64(*val),
OwnedValue::Bool(val) => ReferenceValue::Bool(*val),
OwnedValue::Date(val) => ReferenceValue::Date(*val),
OwnedValue::Facet(val) => ReferenceValue::Facet(val),
OwnedValue::Bytes(val) => ReferenceValue::Bytes(val),
OwnedValue::IpAddr(val) => ReferenceValue::IpAddr(*val),
OwnedValue::Null => ReferenceValueLeaf::Null.into(),
OwnedValue::Str(val) => ReferenceValueLeaf::Str(val).into(),
OwnedValue::PreTokStr(val) => ReferenceValueLeaf::PreTokStr(val).into(),
OwnedValue::U64(val) => ReferenceValueLeaf::U64(*val).into(),
OwnedValue::I64(val) => ReferenceValueLeaf::I64(*val).into(),
OwnedValue::F64(val) => ReferenceValueLeaf::F64(*val).into(),
OwnedValue::Bool(val) => ReferenceValueLeaf::Bool(*val).into(),
OwnedValue::Date(val) => ReferenceValueLeaf::Date(*val).into(),
OwnedValue::Facet(val) => ReferenceValueLeaf::Facet(val).into(),
OwnedValue::Bytes(val) => ReferenceValueLeaf::Bytes(val).into(),
OwnedValue::IpAddr(val) => ReferenceValueLeaf::IpAddr(*val).into(),
OwnedValue::Array(array) => ReferenceValue::Array(array.iter()),
OwnedValue::Object(object) => ReferenceValue::Object(ObjectMapIter(object.iter())),
}
@@ -264,17 +265,19 @@ impl<'de> serde::Deserialize<'de> for OwnedValue {
impl<'a, V: Value<'a>> From<ReferenceValue<'a, V>> for OwnedValue {
fn from(val: ReferenceValue<'a, V>) -> OwnedValue {
match val {
ReferenceValue::Null => OwnedValue::Null,
ReferenceValue::Str(val) => OwnedValue::Str(val.to_string()),
ReferenceValue::U64(val) => OwnedValue::U64(val),
ReferenceValue::I64(val) => OwnedValue::I64(val),
ReferenceValue::F64(val) => OwnedValue::F64(val),
ReferenceValue::Date(val) => OwnedValue::Date(val),
ReferenceValue::Facet(val) => OwnedValue::Facet(val.clone()),
ReferenceValue::Bytes(val) => OwnedValue::Bytes(val.to_vec()),
ReferenceValue::IpAddr(val) => OwnedValue::IpAddr(val),
ReferenceValue::Bool(val) => OwnedValue::Bool(val),
ReferenceValue::PreTokStr(val) => OwnedValue::PreTokStr(val.clone()),
ReferenceValue::Leaf(leaf) => match leaf {
ReferenceValueLeaf::Null => OwnedValue::Null,
ReferenceValueLeaf::Str(val) => OwnedValue::Str(val.to_string()),
ReferenceValueLeaf::U64(val) => OwnedValue::U64(val),
ReferenceValueLeaf::I64(val) => OwnedValue::I64(val),
ReferenceValueLeaf::F64(val) => OwnedValue::F64(val),
ReferenceValueLeaf::Date(val) => OwnedValue::Date(val),
ReferenceValueLeaf::Facet(val) => OwnedValue::Facet(val.clone()),
ReferenceValueLeaf::Bytes(val) => OwnedValue::Bytes(val.to_vec()),
ReferenceValueLeaf::IpAddr(val) => OwnedValue::IpAddr(val),
ReferenceValueLeaf::Bool(val) => OwnedValue::Bool(val),
ReferenceValueLeaf::PreTokStr(val) => OwnedValue::PreTokStr(val.clone()),
},
ReferenceValue::Array(val) => {
OwnedValue::Array(val.map(|v| v.as_value().into()).collect())
}

View File

@@ -5,6 +5,7 @@ use std::io::Write;
use columnar::MonotonicallyMappableToU128;
use common::{f64_to_u64, BinarySerializable, VInt};
use super::{OwnedValue, ReferenceValueLeaf};
use crate::schema::document::{type_codes, Document, ReferenceValue, Value};
use crate::schema::Schema;
@@ -39,10 +40,10 @@ where W: Write
let mut serializer = BinaryValueSerializer::new(self.writer);
match value_access.as_value() {
ReferenceValue::PreTokStr(pre_tokenized_text) => {
serializer.serialize_value(ReferenceValue::Str::<
&'_ crate::schema::OwnedValue,
>(&pre_tokenized_text.text))?;
ReferenceValue::Leaf(ReferenceValueLeaf::PreTokStr(pre_tokenized_text)) => {
serializer.serialize_value(ReferenceValue::Leaf::<&'_ OwnedValue>(
ReferenceValueLeaf::Str(&pre_tokenized_text.text),
))?;
}
_ => {
serializer.serialize_value(value_access.as_value())?;
@@ -90,59 +91,61 @@ where W: Write
V: Value<'a>,
{
match value {
ReferenceValue::Null => self.write_type_code(type_codes::NULL_CODE),
ReferenceValue::Str(val) => {
self.write_type_code(type_codes::TEXT_CODE)?;
ReferenceValue::Leaf(leaf) => match leaf {
ReferenceValueLeaf::Null => self.write_type_code(type_codes::NULL_CODE),
ReferenceValueLeaf::Str(val) => {
self.write_type_code(type_codes::TEXT_CODE)?;
let temp_val = Cow::Borrowed(val);
temp_val.serialize(self.writer)
}
ReferenceValue::U64(val) => {
self.write_type_code(type_codes::U64_CODE)?;
let temp_val = Cow::Borrowed(val);
temp_val.serialize(self.writer)
}
ReferenceValueLeaf::U64(val) => {
self.write_type_code(type_codes::U64_CODE)?;
val.serialize(self.writer)
}
ReferenceValue::I64(val) => {
self.write_type_code(type_codes::I64_CODE)?;
val.serialize(self.writer)
}
ReferenceValueLeaf::I64(val) => {
self.write_type_code(type_codes::I64_CODE)?;
val.serialize(self.writer)
}
ReferenceValue::F64(val) => {
self.write_type_code(type_codes::F64_CODE)?;
val.serialize(self.writer)
}
ReferenceValueLeaf::F64(val) => {
self.write_type_code(type_codes::F64_CODE)?;
f64_to_u64(val).serialize(self.writer)
}
ReferenceValue::Date(val) => {
self.write_type_code(type_codes::DATE_CODE)?;
val.serialize(self.writer)
}
ReferenceValue::Facet(val) => {
self.write_type_code(type_codes::HIERARCHICAL_FACET_CODE)?;
f64_to_u64(val).serialize(self.writer)
}
ReferenceValueLeaf::Date(val) => {
self.write_type_code(type_codes::DATE_CODE)?;
val.serialize(self.writer)
}
ReferenceValueLeaf::Facet(val) => {
self.write_type_code(type_codes::HIERARCHICAL_FACET_CODE)?;
val.serialize(self.writer)
}
ReferenceValue::Bytes(val) => {
self.write_type_code(type_codes::BYTES_CODE)?;
val.serialize(self.writer)
}
ReferenceValueLeaf::Bytes(val) => {
self.write_type_code(type_codes::BYTES_CODE)?;
let temp_val = Cow::Borrowed(val);
temp_val.serialize(self.writer)
}
ReferenceValue::IpAddr(val) => {
self.write_type_code(type_codes::IP_CODE)?;
let temp_val = Cow::Borrowed(val);
temp_val.serialize(self.writer)
}
ReferenceValueLeaf::IpAddr(val) => {
self.write_type_code(type_codes::IP_CODE)?;
val.to_u128().serialize(self.writer)
}
ReferenceValue::Bool(val) => {
self.write_type_code(type_codes::BOOL_CODE)?;
val.to_u128().serialize(self.writer)
}
ReferenceValueLeaf::Bool(val) => {
self.write_type_code(type_codes::BOOL_CODE)?;
val.serialize(self.writer)
}
ReferenceValue::PreTokStr(val) => {
self.write_type_code(type_codes::EXT_CODE)?;
self.write_type_code(type_codes::TOK_STR_EXT_CODE)?;
val.serialize(self.writer)
}
ReferenceValueLeaf::PreTokStr(val) => {
self.write_type_code(type_codes::EXT_CODE)?;
self.write_type_code(type_codes::TOK_STR_EXT_CODE)?;
val.serialize(self.writer)
}
val.serialize(self.writer)
}
},
ReferenceValue::Array(elements) => {
self.write_type_code(type_codes::ARRAY_CODE)?;
@@ -272,7 +275,7 @@ where W: Write
// as we could avoid writing the extra byte per key. But the gain is
// largely not worth it for the extra complexity it brings.
self.inner
.serialize_value(ReferenceValue::<'a, V>::Str(key))?;
.serialize_value(ReferenceValue::<'a, V>::Leaf(ReferenceValueLeaf::Str(key)))?;
self.inner.serialize_value(value)?;
self.actual_length += 1;
@@ -361,7 +364,7 @@ mod tests {
#[test]
fn test_simple_value_serialize() {
let result = serialize_value(ReferenceValue::Null);
let result = serialize_value(ReferenceValueLeaf::Null.into());
let expected = binary_repr!(
type_codes::NULL_CODE => (),
);
@@ -370,7 +373,7 @@ mod tests {
"Expected serialized value to match the binary representation"
);
let result = serialize_value(ReferenceValue::Str("hello, world"));
let result = serialize_value(ReferenceValueLeaf::Str("hello, world").into());
let expected = binary_repr!(
type_codes::TEXT_CODE => String::from("hello, world"),
);
@@ -379,7 +382,7 @@ mod tests {
"Expected serialized value to match the binary representation"
);
let result = serialize_value(ReferenceValue::U64(123));
let result = serialize_value(ReferenceValueLeaf::U64(123).into());
let expected = binary_repr!(
type_codes::U64_CODE => 123u64,
);
@@ -388,7 +391,7 @@ mod tests {
"Expected serialized value to match the binary representation"
);
let result = serialize_value(ReferenceValue::I64(-123));
let result = serialize_value(ReferenceValueLeaf::I64(-123).into());
let expected = binary_repr!(
type_codes::I64_CODE => -123i64,
);
@@ -397,7 +400,7 @@ mod tests {
"Expected serialized value to match the binary representation"
);
let result = serialize_value(ReferenceValue::F64(123.3845));
let result = serialize_value(ReferenceValueLeaf::F64(123.3845f64).into());
let expected = binary_repr!(
type_codes::F64_CODE => f64_to_u64(123.3845f64),
);
@@ -406,7 +409,7 @@ mod tests {
"Expected serialized value to match the binary representation"
);
let result = serialize_value(ReferenceValue::Bool(false));
let result = serialize_value(ReferenceValueLeaf::Bool(false).into());
let expected = binary_repr!(
type_codes::BOOL_CODE => false,
);
@@ -415,7 +418,7 @@ mod tests {
"Expected serialized value to match the binary representation"
);
let result = serialize_value(ReferenceValue::Date(DateTime::MAX));
let result = serialize_value(ReferenceValueLeaf::Date(DateTime::MAX).into());
let expected = binary_repr!(
type_codes::DATE_CODE => DateTime::MAX,
);
@@ -425,7 +428,7 @@ mod tests {
);
let facet = Facet::from_text("/hello/world").unwrap();
let result = serialize_value(ReferenceValue::Facet(&facet));
let result = serialize_value(ReferenceValueLeaf::Facet(&facet).into());
let expected = binary_repr!(
type_codes::HIERARCHICAL_FACET_CODE => Facet::from_text("/hello/world").unwrap(),
);
@@ -438,7 +441,7 @@ mod tests {
text: "hello, world".to_string(),
tokens: vec![Token::default(), Token::default()],
};
let result = serialize_value(ReferenceValue::PreTokStr(&pre_tok_str));
let result = serialize_value(ReferenceValueLeaf::PreTokStr(&pre_tok_str).into());
let expected = binary_repr!(
type_codes::EXT_CODE, type_codes::TOK_STR_EXT_CODE => pre_tok_str,
);

View File

@@ -21,108 +21,81 @@ pub trait Value<'a>: Send + Sync + Debug {
#[inline]
/// Returns if the value is `null` or not.
fn is_null(&self) -> bool {
matches!(self.as_value(), ReferenceValue::Null)
matches!(
self.as_value(),
ReferenceValue::Leaf(ReferenceValueLeaf::Null)
)
}
#[inline]
/// If the Value is a String, returns the associated str. Returns None otherwise.
fn as_leaf(&self) -> Option<ReferenceValueLeaf<'a>> {
if let ReferenceValue::Leaf(val) = self.as_value() {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a String, returns the associated str. Returns None otherwise.
fn as_str(&self) -> Option<&'a str> {
if let ReferenceValue::Str(val) = self.as_value() {
Some(val)
} else {
None
}
self.as_leaf().and_then(|leaf| leaf.as_str())
}
#[inline]
/// If the Value is a u64, returns the associated u64. Returns None otherwise.
fn as_u64(&self) -> Option<u64> {
if let ReferenceValue::U64(val) = self.as_value() {
Some(val)
} else {
None
}
self.as_leaf().and_then(|leaf| leaf.as_u64())
}
#[inline]
/// If the Value is a i64, returns the associated i64. Returns None otherwise.
fn as_i64(&self) -> Option<i64> {
if let ReferenceValue::I64(val) = self.as_value() {
Some(val)
} else {
None
}
self.as_leaf().and_then(|leaf| leaf.as_i64())
}
#[inline]
/// If the Value is a f64, returns the associated f64. Returns None otherwise.
fn as_f64(&self) -> Option<f64> {
if let ReferenceValue::F64(val) = self.as_value() {
Some(val)
} else {
None
}
self.as_leaf().and_then(|leaf| leaf.as_f64())
}
#[inline]
/// If the Value is a datetime, returns the associated datetime. Returns None otherwise.
fn as_datetime(&self) -> Option<DateTime> {
if let ReferenceValue::Date(val) = self.as_value() {
Some(val)
} else {
None
}
self.as_leaf().and_then(|leaf| leaf.as_datetime())
}
#[inline]
/// If the Value is a IP address, returns the associated IP. Returns None otherwise.
fn as_ip_addr(&self) -> Option<Ipv6Addr> {
if let ReferenceValue::IpAddr(val) = self.as_value() {
Some(val)
} else {
None
}
self.as_leaf().and_then(|leaf| leaf.as_ip_addr())
}
#[inline]
/// If the Value is a bool, returns the associated bool. Returns None otherwise.
fn as_bool(&self) -> Option<bool> {
if let ReferenceValue::Bool(val) = self.as_value() {
Some(val)
} else {
None
}
self.as_leaf().and_then(|leaf| leaf.as_bool())
}
#[inline]
/// If the Value is a pre-tokenized string, returns the associated string. Returns None
/// otherwise.
fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> {
if let ReferenceValue::PreTokStr(val) = self.as_value() {
Some(val)
} else {
None
}
self.as_leaf().and_then(|leaf| leaf.as_pre_tokenized_text())
}
#[inline]
/// If the Value is a bytes value, returns the associated set of bytes. Returns None otherwise.
fn as_bytes(&self) -> Option<&'a [u8]> {
if let ReferenceValue::Bytes(val) = self.as_value() {
Some(val)
} else {
None
}
self.as_leaf().and_then(|leaf| leaf.as_bytes())
}
#[inline]
/// If the Value is a facet, returns the associated facet. Returns None otherwise.
fn as_facet(&self) -> Option<&'a Facet> {
if let ReferenceValue::Facet(val) = self.as_value() {
Some(val)
} else {
None
}
self.as_leaf().and_then(|leaf| leaf.as_facet())
}
#[inline]
@@ -158,11 +131,9 @@ pub trait Value<'a>: Send + Sync + Debug {
}
}
/// A enum representing a value for tantivy to index.
#[derive(Clone, Debug, PartialEq)]
pub enum ReferenceValue<'a, V>
where V: Value<'a> + ?Sized
{
/// A enum representing a leaf value for tantivy to index.
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum ReferenceValueLeaf<'a> {
/// A null value.
Null,
/// The str type is used for any text information.
@@ -185,15 +156,32 @@ where V: Value<'a> + ?Sized
Bool(bool),
/// Pre-tokenized str type,
PreTokStr(&'a PreTokenizedString),
/// A an array containing multiple values.
Array(V::ArrayIter),
/// A nested / dynamic object.
Object(V::ObjectIter),
}
impl<'a, V> ReferenceValue<'a, V>
where V: Value<'a>
{
impl<'a, T: Value<'a> + ?Sized> From<ReferenceValueLeaf<'a>> for ReferenceValue<'a, T> {
#[inline]
fn from(value: ReferenceValueLeaf<'a>) -> Self {
match value {
ReferenceValueLeaf::Null => ReferenceValue::Leaf(ReferenceValueLeaf::Null),
ReferenceValueLeaf::Str(val) => ReferenceValue::Leaf(ReferenceValueLeaf::Str(val)),
ReferenceValueLeaf::U64(val) => ReferenceValue::Leaf(ReferenceValueLeaf::U64(val)),
ReferenceValueLeaf::I64(val) => ReferenceValue::Leaf(ReferenceValueLeaf::I64(val)),
ReferenceValueLeaf::F64(val) => ReferenceValue::Leaf(ReferenceValueLeaf::F64(val)),
ReferenceValueLeaf::Date(val) => ReferenceValue::Leaf(ReferenceValueLeaf::Date(val)),
ReferenceValueLeaf::Facet(val) => ReferenceValue::Leaf(ReferenceValueLeaf::Facet(val)),
ReferenceValueLeaf::Bytes(val) => ReferenceValue::Leaf(ReferenceValueLeaf::Bytes(val)),
ReferenceValueLeaf::IpAddr(val) => {
ReferenceValue::Leaf(ReferenceValueLeaf::IpAddr(val))
}
ReferenceValueLeaf::Bool(val) => ReferenceValue::Leaf(ReferenceValueLeaf::Bool(val)),
ReferenceValueLeaf::PreTokStr(val) => {
ReferenceValue::Leaf(ReferenceValueLeaf::PreTokStr(val))
}
}
}
}
impl<'a> ReferenceValueLeaf<'a> {
#[inline]
/// Returns if the value is `null` or not.
pub fn is_null(&self) -> bool {
@@ -300,11 +288,105 @@ where V: Value<'a>
None
}
}
}
/// A enum representing a value for tantivy to index.
#[derive(Clone, Debug, PartialEq)]
pub enum ReferenceValue<'a, V>
where V: Value<'a> + ?Sized
{
/// A null value.
Leaf(ReferenceValueLeaf<'a>),
/// A an array containing multiple values.
Array(V::ArrayIter),
/// A nested / dynamic object.
Object(V::ObjectIter),
}
impl<'a, V> ReferenceValue<'a, V>
where V: Value<'a>
{
#[inline]
/// Returns if the value is `null` or not.
pub fn is_null(&self) -> bool {
matches!(self, Self::Leaf(ReferenceValueLeaf::Null))
}
#[inline]
/// If the Value is a leaf, returns the associated leaf. Returns None otherwise.
pub fn as_leaf(&self) -> Option<&ReferenceValueLeaf<'a>> {
if let Self::Leaf(val) = self {
Some(val)
} else {
None
}
}
#[inline]
/// If the Value is a String, returns the associated str. Returns None otherwise.
pub fn as_str(&self) -> Option<&'a str> {
self.as_leaf().and_then(|leaf| leaf.as_str())
}
#[inline]
/// If the Value is a u64, returns the associated u64. Returns None otherwise.
pub fn as_u64(&self) -> Option<u64> {
self.as_leaf().and_then(|leaf| leaf.as_u64())
}
#[inline]
/// If the Value is a i64, returns the associated i64. Returns None otherwise.
pub fn as_i64(&self) -> Option<i64> {
self.as_leaf().and_then(|leaf| leaf.as_i64())
}
#[inline]
/// If the Value is a f64, returns the associated f64. Returns None otherwise.
pub fn as_f64(&self) -> Option<f64> {
self.as_leaf().and_then(|leaf| leaf.as_f64())
}
#[inline]
/// If the Value is a datetime, returns the associated datetime. Returns None otherwise.
pub fn as_datetime(&self) -> Option<DateTime> {
self.as_leaf().and_then(|leaf| leaf.as_datetime())
}
#[inline]
/// If the Value is a IP address, returns the associated IP. Returns None otherwise.
pub fn as_ip_addr(&self) -> Option<Ipv6Addr> {
self.as_leaf().and_then(|leaf| leaf.as_ip_addr())
}
#[inline]
/// If the Value is a bool, returns the associated bool. Returns None otherwise.
pub fn as_bool(&self) -> Option<bool> {
self.as_leaf().and_then(|leaf| leaf.as_bool())
}
#[inline]
/// If the Value is a pre-tokenized string, returns the associated string. Returns None
/// otherwise.
pub fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> {
self.as_leaf().and_then(|leaf| leaf.as_pre_tokenized_text())
}
#[inline]
/// If the Value is a bytes value, returns the associated set of bytes. Returns None otherwise.
pub fn as_bytes(&self) -> Option<&'a [u8]> {
self.as_leaf().and_then(|leaf| leaf.as_bytes())
}
#[inline]
/// If the Value is a facet, returns the associated facet. Returns None otherwise.
pub fn as_facet(&self) -> Option<&'a Facet> {
self.as_leaf().and_then(|leaf| leaf.as_facet())
}
#[inline]
/// Returns true if the Value is an array.
pub fn is_array(&self) -> bool {
matches!(self, Self::Object(_))
matches!(self, Self::Array(_))
}
#[inline]