rename DocValue to Value (#2197)

rename DocValue to Value to avoid confusion with lucene DocValues
rename Value to OwnedValue
This commit is contained in:
PSeitz
2023-10-02 17:03:00 +02:00
committed by GitHub
parent 1c7c6fd591
commit 03a1f40767
26 changed files with 293 additions and 278 deletions

View File

@@ -4,7 +4,7 @@
use tantivy::collector::TopDocs;
use tantivy::query::QueryParser;
use tantivy::schema::{DateOptions, Schema, Value, INDEXED, STORED, STRING};
use tantivy::schema::{DateOptions, OwnedValue, Schema, INDEXED, STORED, STRING};
use tantivy::{Index, IndexWriter, TantivyDocument};
fn main() -> tantivy::Result<()> {
@@ -63,7 +63,7 @@ fn main() -> tantivy::Result<()> {
let retrieved_doc = searcher.doc::<TantivyDocument>(doc_address)?;
assert!(matches!(
retrieved_doc.get_first(occurred_at),
Some(Value::Date(_))
Some(OwnedValue::Date(_))
));
assert_eq!(
retrieved_doc.to_json(&schema),

View File

@@ -5,7 +5,7 @@ use rustc_hash::FxHashMap;
use crate::fastfield::FastValue;
use crate::postings::{IndexingContext, IndexingPosition, PostingsWriter};
use crate::schema::document::{DocValue, ReferenceValue};
use crate::schema::document::{ReferenceValue, Value};
use crate::schema::term::{JSON_PATH_SEGMENT_SEP, JSON_PATH_SEGMENT_SEP_STR};
use crate::schema::{Field, Type, DATE_TIME_PRECISION_INDEXED};
use crate::time::format_description::well_known::Rfc3339;
@@ -65,7 +65,7 @@ impl IndexingPositionsPerPath {
}
}
pub(crate) fn index_json_values<'a, V: DocValue<'a>>(
pub(crate) fn index_json_values<'a, V: Value<'a>>(
doc: DocId,
json_visitors: impl Iterator<Item = crate::Result<V::ObjectIter>>,
text_analyzer: &mut TextAnalyzer,
@@ -91,7 +91,7 @@ pub(crate) fn index_json_values<'a, V: DocValue<'a>>(
Ok(())
}
fn index_json_object<'a, V: DocValue<'a>>(
fn index_json_object<'a, V: Value<'a>>(
doc: DocId,
json_visitor: V::ObjectIter,
text_analyzer: &mut TextAnalyzer,
@@ -115,7 +115,7 @@ fn index_json_object<'a, V: DocValue<'a>>(
}
}
fn index_json_value<'a, V: DocValue<'a>>(
fn index_json_value<'a, V: Value<'a>>(
doc: DocId,
json_value: ReferenceValue<'a, V>,
text_analyzer: &mut TextAnalyzer,

View File

@@ -62,7 +62,7 @@ impl FacetReader {
#[cfg(test)]
mod tests {
use crate::schema::document::DocValue;
use crate::schema::document::Value;
use crate::schema::{Facet, FacetOptions, SchemaBuilder, STORED};
use crate::{DocAddress, Index, IndexWriter, TantivyDocument};

View File

@@ -5,7 +5,7 @@ use common::replace_in_place;
use tokenizer_api::Token;
use crate::indexer::doc_id_mapping::DocIdMapping;
use crate::schema::document::{DocValue, Document, ReferenceValue};
use crate::schema::document::{Document, ReferenceValue, Value};
use crate::schema::term::{JSON_PATH_SEGMENT_SEP, JSON_PATH_SEGMENT_SEP_STR};
use crate::schema::{value_type_to_column_type, Field, FieldType, Schema, Type};
use crate::tokenizer::{TextAnalyzer, TokenizerManager};
@@ -129,7 +129,7 @@ impl FastFieldsWriter {
Ok(())
}
fn add_doc_value<'a, V: DocValue<'a>>(
fn add_doc_value<'a, V: Value<'a>>(
&mut self,
doc_id: DocId,
field: Field,
@@ -243,7 +243,7 @@ impl FastFieldsWriter {
}
}
fn record_json_obj_to_columnar_writer<'a, V: DocValue<'a>>(
fn record_json_obj_to_columnar_writer<'a, V: Value<'a>>(
doc: DocId,
json_visitor: V::ObjectIter,
expand_dots: bool,
@@ -282,7 +282,7 @@ fn record_json_obj_to_columnar_writer<'a, V: DocValue<'a>>(
}
}
fn record_json_value_to_columnar_writer<'a, V: DocValue<'a>>(
fn record_json_value_to_columnar_writer<'a, V: Value<'a>>(
doc: DocId,
json_val: ReferenceValue<'a, V>,
expand_dots: bool,
@@ -382,7 +382,7 @@ mod tests {
use super::record_json_value_to_columnar_writer;
use crate::fastfield::writer::JSON_DEPTH_LIMIT;
use crate::schema::document::DocValue;
use crate::schema::document::Value;
use crate::DocId;
fn test_columnar_from_jsons_aux(

View File

@@ -158,7 +158,7 @@ mod tests_indexsorting {
use crate::indexer::doc_id_mapping::DocIdMapping;
use crate::indexer::NoMergePolicy;
use crate::query::QueryParser;
use crate::schema::document::DocValue;
use crate::schema::document::Value;
use crate::schema::{Schema, *};
use crate::{DocAddress, Index, IndexSettings, IndexSortByField, Order};

View File

@@ -815,7 +815,7 @@ mod tests {
use crate::indexer::index_writer::MEMORY_BUDGET_NUM_BYTES_MIN;
use crate::indexer::NoMergePolicy;
use crate::query::{BooleanQuery, Occur, Query, QueryParser, TermQuery};
use crate::schema::document::DocValue;
use crate::schema::document::Value;
use crate::schema::{
self, Facet, FacetOptions, IndexRecordOption, IpAddrOptions, NumericOptions, Schema,
TextFieldIndexing, TextOptions, FAST, INDEXED, STORED, STRING, TEXT,

View File

@@ -753,7 +753,7 @@ mod tests {
use crate::collector::{Count, FacetCollector};
use crate::core::Index;
use crate::query::{AllQuery, BooleanQuery, EnableScoring, Scorer, TermQuery};
use crate::schema::document::DocValue;
use crate::schema::document::Value;
use crate::schema::{
Facet, FacetOptions, IndexRecordOption, NumericOptions, TantivyDocument, Term,
TextFieldIndexing, INDEXED, TEXT,

View File

@@ -4,7 +4,7 @@ mod tests {
use crate::core::Index;
use crate::fastfield::AliveBitSet;
use crate::query::QueryParser;
use crate::schema::document::DocValue;
use crate::schema::document::Value;
use crate::schema::{
self, BytesOptions, Facet, FacetOptions, IndexRecordOption, NumericOptions,
TextFieldIndexing, TextOptions,

View File

@@ -13,7 +13,7 @@ use crate::postings::{
compute_table_memory_size, serialize_postings, IndexingContext, IndexingPosition,
PerFieldPostingsWriter, PostingsWriter,
};
use crate::schema::document::{DocValue, Document, ReferenceValue};
use crate::schema::document::{Document, ReferenceValue, Value};
use crate::schema::{FieldEntry, FieldType, Schema, Term, DATE_TIME_PRECISION_INDEXED};
use crate::store::{StoreReader, StoreWriter};
use crate::tokenizer::{FacetTokenizer, PreTokenizedStream, TextAnalyzer, Tokenizer};
@@ -492,7 +492,7 @@ mod tests {
use crate::directory::RamDirectory;
use crate::postings::TermInfo;
use crate::query::PhraseQuery;
use crate::schema::document::DocValue;
use crate::schema::document::Value;
use crate::schema::{
IndexRecordOption, Schema, TextFieldIndexing, TextOptions, Type, STORED, STRING, TEXT,
};
@@ -715,7 +715,7 @@ mod tests {
let json_field = schema_builder.add_json_field("json", STORED | TEXT);
let schema = schema_builder.build();
let mut doc = TantivyDocument::default();
let json_val: BTreeMap<String, crate::schema::Value> =
let json_val: BTreeMap<String, crate::schema::OwnedValue> =
serde_json::from_str(r#"{"mykey": "repeated token token"}"#).unwrap();
doc.add_object(json_field, json_val);
let index = Index::create_in_ram(schema);

View File

@@ -342,7 +342,7 @@ pub mod tests {
use crate::docset::{DocSet, TERMINATED};
use crate::merge_policy::NoMergePolicy;
use crate::query::BooleanQuery;
use crate::schema::document::DocValue;
use crate::schema::document::Value;
use crate::schema::*;
use crate::{DateTime, DocAddress, Index, IndexWriter, Postings, ReloadPolicy};
@@ -986,11 +986,11 @@ pub mod tests {
text_field => "some other value",
other_text_field => "short");
assert_eq!(document.len(), 3);
let values: Vec<&Value> = document.get_all(text_field).collect();
let values: Vec<&OwnedValue> = document.get_all(text_field).collect();
assert_eq!(values.len(), 2);
assert_eq!(values[0].as_str(), Some("tantivy"));
assert_eq!(values[1].as_str(), Some("some other value"));
let values: Vec<&Value> = document.get_all(other_text_field).collect();
let values: Vec<&OwnedValue> = document.get_all(other_text_field).collect();
assert_eq!(values.len(), 1);
assert_eq!(values[0].as_str(), Some("short"));
}

View File

@@ -5,7 +5,7 @@ use tokenizer_api::Token;
use crate::query::bm25::idf;
use crate::query::{BooleanQuery, BoostQuery, Occur, Query, TermQuery};
use crate::schema::document::{DocValue, Document};
use crate::schema::document::{Document, Value};
use crate::schema::{Field, FieldType, IndexRecordOption, Term};
use crate::tokenizer::{FacetTokenizer, PreTokenizedStream, TokenStream, Tokenizer};
use crate::{DocAddress, Result, Searcher, TantivyDocument, TantivyError};
@@ -93,7 +93,7 @@ impl MoreLikeThis {
}
/// Creates a [`BooleanQuery`] using a set of field values.
pub fn query_with_document_fields<'a, V: DocValue<'a>>(
pub fn query_with_document_fields<'a, V: Value<'a>>(
&self,
searcher: &Searcher,
doc_fields: &[(Field, Vec<V>)],
@@ -137,7 +137,7 @@ impl MoreLikeThis {
/// Finds terms for a more-like-this query.
/// field_to_field_values is a mapping from field to possible values of that field.
fn retrieve_terms_from_doc_fields<'a, V: DocValue<'a>>(
fn retrieve_terms_from_doc_fields<'a, V: Value<'a>>(
&self,
searcher: &Searcher,
field_to_values: &[(Field, Vec<V>)],
@@ -159,7 +159,7 @@ impl MoreLikeThis {
/// Computes the frequency of values for a field while updating the term frequencies
/// Note: A FieldValue can be made up of multiple terms.
/// We are interested in extracting terms within FieldValue
fn add_term_frequencies<'a, V: DocValue<'a>>(
fn add_term_frequencies<'a, V: Value<'a>>(
&self,
searcher: &Searcher,
field: Field,

View File

@@ -2,7 +2,7 @@ use std::fmt::Debug;
use super::MoreLikeThis;
use crate::query::{EnableScoring, Query, Weight};
use crate::schema::{Field, Value};
use crate::schema::{Field, OwnedValue};
use crate::DocAddress;
/// A query that matches all of the documents similar to a document
@@ -33,7 +33,7 @@ pub struct MoreLikeThisQuery {
#[derive(Debug, Clone, PartialEq)]
enum TargetDocument {
DocumentAddress(DocAddress),
DocumentFields(Vec<(Field, Vec<Value>)>),
DocumentFields(Vec<(Field, Vec<OwnedValue>)>),
}
impl MoreLikeThisQuery {
@@ -60,7 +60,7 @@ impl Query for MoreLikeThisQuery {
TargetDocument::DocumentFields(doc_fields) => {
let values = doc_fields
.iter()
.map(|(field, values)| (*field, values.iter().collect::<Vec<&Value>>()))
.map(|(field, values)| (*field, values.iter().collect::<Vec<&OwnedValue>>()))
.collect::<Vec<_>>();
self.mlt
@@ -175,7 +175,10 @@ impl MoreLikeThisQueryBuilder {
/// that will be used to compose the resulting query.
/// This interface is meant to be used when you want to provide your own set of fields
/// not necessarily from a specific document.
pub fn with_document_fields(self, doc_fields: Vec<(Field, Vec<Value>)>) -> MoreLikeThisQuery {
pub fn with_document_fields(
self,
doc_fields: Vec<(Field, Vec<OwnedValue>)>,
) -> MoreLikeThisQuery {
MoreLikeThisQuery {
mlt: self.mlt,
target: TargetDocument::DocumentFields(doc_fields),

View File

@@ -802,52 +802,52 @@ mod tests {
writer
}
fn deserialize_value(buffer: Vec<u8>) -> crate::schema::Value {
fn deserialize_value(buffer: Vec<u8>) -> crate::schema::OwnedValue {
let mut cursor = Cursor::new(buffer);
let deserializer = BinaryValueDeserializer::from_reader(&mut cursor).unwrap();
crate::schema::Value::deserialize(deserializer).expect("Deserialize value")
crate::schema::OwnedValue::deserialize(deserializer).expect("Deserialize value")
}
#[test]
fn test_simple_value_serialize() {
let result = serialize_value(ReferenceValue::Null);
let value = deserialize_value(result);
assert_eq!(value, crate::schema::Value::Null);
assert_eq!(value, crate::schema::OwnedValue::Null);
let result = serialize_value(ReferenceValue::Str("hello, world"));
let value = deserialize_value(result);
assert_eq!(
value,
crate::schema::Value::Str(String::from("hello, world"))
crate::schema::OwnedValue::Str(String::from("hello, world"))
);
let result = serialize_value(ReferenceValue::U64(123));
let value = deserialize_value(result);
assert_eq!(value, crate::schema::Value::U64(123));
assert_eq!(value, crate::schema::OwnedValue::U64(123));
let result = serialize_value(ReferenceValue::I64(-123));
let value = deserialize_value(result);
assert_eq!(value, crate::schema::Value::I64(-123));
assert_eq!(value, crate::schema::OwnedValue::I64(-123));
let result = serialize_value(ReferenceValue::F64(123.3845));
let value = deserialize_value(result);
assert_eq!(value, crate::schema::Value::F64(123.3845));
assert_eq!(value, crate::schema::OwnedValue::F64(123.3845));
let result = serialize_value(ReferenceValue::Bool(false));
let value = deserialize_value(result);
assert_eq!(value, crate::schema::Value::Bool(false));
assert_eq!(value, crate::schema::OwnedValue::Bool(false));
let result = serialize_value(ReferenceValue::Date(DateTime::from_timestamp_micros(100)));
let value = deserialize_value(result);
assert_eq!(
value,
crate::schema::Value::Date(DateTime::from_timestamp_micros(100))
crate::schema::OwnedValue::Date(DateTime::from_timestamp_micros(100))
);
let facet = Facet::from_text("/hello/world").unwrap();
let result = serialize_value(ReferenceValue::Facet(&facet));
let value = deserialize_value(result);
assert_eq!(value, crate::schema::Value::Facet(facet));
assert_eq!(value, crate::schema::OwnedValue::Facet(facet));
let pre_tok_str = PreTokenizedString {
text: "hello, world".to_string(),
@@ -855,7 +855,7 @@ mod tests {
};
let result = serialize_value(ReferenceValue::PreTokStr(&pre_tok_str));
let value = deserialize_value(result);
assert_eq!(value, crate::schema::Value::PreTokStr(pre_tok_str));
assert_eq!(value, crate::schema::OwnedValue::PreTokStr(pre_tok_str));
}
#[test]
@@ -865,9 +865,9 @@ mod tests {
let value = deserialize_value(result);
assert_eq!(
value,
crate::schema::Value::Array(vec![
crate::schema::Value::Null,
crate::schema::Value::Null,
crate::schema::OwnedValue::Array(vec![
crate::schema::OwnedValue::Null,
crate::schema::OwnedValue::Null,
]),
);
@@ -879,16 +879,16 @@ mod tests {
let value = deserialize_value(result);
assert_eq!(
value,
crate::schema::Value::Array(vec![
crate::schema::Value::Str(String::from("Hello, world")),
crate::schema::Value::Str(String::from("Some demo")),
crate::schema::OwnedValue::Array(vec![
crate::schema::OwnedValue::Str(String::from("Hello, world")),
crate::schema::OwnedValue::Str(String::from("Some demo")),
]),
);
let elements = vec![];
let result = serialize_value(ReferenceValue::Array(JsonArrayIter(elements.iter())));
let value = deserialize_value(result);
assert_eq!(value, crate::schema::Value::Array(vec![]));
assert_eq!(value, crate::schema::OwnedValue::Array(vec![]));
let elements = vec![
serde_json::Value::Null,
@@ -899,10 +899,10 @@ mod tests {
let value = deserialize_value(result);
assert_eq!(
value,
crate::schema::Value::Array(vec![
crate::schema::Value::Null,
crate::schema::Value::Str(String::from("Hello, world")),
crate::schema::Value::U64(12345),
crate::schema::OwnedValue::Array(vec![
crate::schema::OwnedValue::Null,
crate::schema::OwnedValue::Str(String::from("Hello, world")),
crate::schema::OwnedValue::U64(12345),
]),
);
}
@@ -925,17 +925,20 @@ mod tests {
let mut expected_object = BTreeMap::new();
expected_object.insert(
"my-first-key".to_string(),
crate::schema::Value::Str(String::from("Hello")),
crate::schema::OwnedValue::Str(String::from("Hello")),
);
expected_object.insert("my-second-key".to_string(), crate::schema::Value::Null);
expected_object.insert("my-third-key".to_string(), crate::schema::Value::F64(123.0));
assert_eq!(value, crate::schema::Value::Object(expected_object));
expected_object.insert("my-second-key".to_string(), crate::schema::OwnedValue::Null);
expected_object.insert(
"my-third-key".to_string(),
crate::schema::OwnedValue::F64(123.0),
);
assert_eq!(value, crate::schema::OwnedValue::Object(expected_object));
let object = serde_json::Map::new();
let result = serialize_value(ReferenceValue::Object(JsonObjectIter(object.iter())));
let value = deserialize_value(result);
let expected_object = BTreeMap::new();
assert_eq!(value, crate::schema::Value::Object(expected_object));
assert_eq!(value, crate::schema::OwnedValue::Object(expected_object));
let mut object = serde_json::Map::new();
object.insert("my-first-key".into(), serde_json::Value::Null);
@@ -944,10 +947,10 @@ mod tests {
let result = serialize_value(ReferenceValue::Object(JsonObjectIter(object.iter())));
let value = deserialize_value(result);
let mut expected_object = BTreeMap::new();
expected_object.insert("my-first-key".to_string(), crate::schema::Value::Null);
expected_object.insert("my-second-key".to_string(), crate::schema::Value::Null);
expected_object.insert("my-third-key".to_string(), crate::schema::Value::Null);
assert_eq!(value, crate::schema::Value::Object(expected_object));
expected_object.insert("my-first-key".to_string(), crate::schema::OwnedValue::Null);
expected_object.insert("my-second-key".to_string(), crate::schema::OwnedValue::Null);
expected_object.insert("my-third-key".to_string(), crate::schema::OwnedValue::Null);
assert_eq!(value, crate::schema::OwnedValue::Object(expected_object));
}
#[test]
@@ -983,26 +986,29 @@ mod tests {
let mut expected_object = BTreeMap::new();
expected_object.insert(
"my-array".to_string(),
crate::schema::Value::Array(vec![
crate::schema::Value::Null,
crate::schema::Value::Str(String::from("bobby of the sea")),
crate::schema::OwnedValue::Array(vec![
crate::schema::OwnedValue::Null,
crate::schema::OwnedValue::Str(String::from("bobby of the sea")),
]),
);
expected_object.insert(
"my-object".to_string(),
crate::schema::Value::Object(
crate::schema::OwnedValue::Object(
vec![
("inner-1".to_string(), crate::schema::Value::I64(-123i64)),
(
"inner-1".to_string(),
crate::schema::OwnedValue::I64(-123i64),
),
(
"inner-2".to_string(),
crate::schema::Value::Str(String::from("bobby of the sea 2")),
crate::schema::OwnedValue::Str(String::from("bobby of the sea 2")),
),
]
.into_iter()
.collect(),
),
);
assert_eq!(value, crate::schema::Value::Object(expected_object));
assert_eq!(value, crate::schema::OwnedValue::Object(expected_object));
// Some more extreme nesting that might behave weirdly
let mut object = serde_json::Map::new();
@@ -1019,11 +1025,11 @@ mod tests {
let mut expected_object = BTreeMap::new();
expected_object.insert(
"my-array".to_string(),
crate::schema::Value::Array(vec![crate::schema::Value::Array(vec![
crate::schema::Value::Array(vec![]),
crate::schema::Value::Array(vec![crate::schema::Value::Null]),
crate::schema::OwnedValue::Array(vec![crate::schema::OwnedValue::Array(vec![
crate::schema::OwnedValue::Array(vec![]),
crate::schema::OwnedValue::Array(vec![crate::schema::OwnedValue::Null]),
])]),
);
assert_eq!(value, crate::schema::Value::Object(expected_object));
assert_eq!(value, crate::schema::OwnedValue::Object(expected_object));
}
}

View File

@@ -9,7 +9,7 @@ use crate::schema::document::{
};
use crate::schema::field_type::ValueParsingError;
use crate::schema::field_value::FieldValueIter;
use crate::schema::{Facet, Field, FieldValue, NamedFieldDocument, Schema, Value};
use crate::schema::{Facet, Field, FieldValue, NamedFieldDocument, OwnedValue, Schema};
use crate::tokenizer::PreTokenizedString;
/// Tantivy's Document is the object that can be indexed and then searched for.
@@ -23,7 +23,7 @@ pub struct TantivyDocument {
}
impl Document for TantivyDocument {
type Value<'a> = &'a Value;
type Value<'a> = &'a OwnedValue;
type FieldsValuesIter<'a> = FieldValueIter<'a>;
fn iter_fields_and_values(&self) -> Self::FieldsValuesIter<'_> {
@@ -99,13 +99,13 @@ impl TantivyDocument {
pub fn add_facet<F>(&mut self, field: Field, path: F)
where Facet: From<F> {
let facet = Facet::from(path);
let value = Value::Facet(facet);
let value = OwnedValue::Facet(facet);
self.add_field_value(field, value);
}
/// Add a text field.
pub fn add_text<S: ToString>(&mut self, field: Field, text: S) {
let value = Value::Str(text.to_string());
let value = OwnedValue::Str(text.to_string());
self.add_field_value(field, value);
}
@@ -150,12 +150,12 @@ impl TantivyDocument {
}
/// Add a dynamic object field
pub fn add_object(&mut self, field: Field, object: BTreeMap<String, Value>) {
pub fn add_object(&mut self, field: Field, object: BTreeMap<String, OwnedValue>) {
self.add_field_value(field, object);
}
/// Add a (field, value) to the document.
pub fn add_field_value<T: Into<Value>>(&mut self, field: Field, typed_val: T) {
pub fn add_field_value<T: Into<OwnedValue>>(&mut self, field: Field, typed_val: T) {
let value = typed_val.into();
let field_value = FieldValue { field, value };
self.field_values.push(field_value);
@@ -167,7 +167,7 @@ impl TantivyDocument {
}
/// Returns all of the `FieldValue`s associated the given field
pub fn get_all(&self, field: Field) -> impl Iterator<Item = &Value> {
pub fn get_all(&self, field: Field) -> impl Iterator<Item = &OwnedValue> {
self.field_values
.iter()
.filter(move |field_value| field_value.field() == field)
@@ -175,7 +175,7 @@ impl TantivyDocument {
}
/// Returns the first `FieldValue` associated the given field
pub fn get_first(&self, field: Field) -> Option<&Value> {
pub fn get_first(&self, field: Field) -> Option<&OwnedValue> {
self.get_all(field).next()
}
@@ -200,7 +200,7 @@ impl TantivyDocument {
let mut field_map = BTreeMap::new();
for (field, field_values) in self.get_sorted_field_values() {
let field_name = schema.get_field_name(field);
let values: Vec<Value> = field_values.into_iter().cloned().collect();
let values: Vec<OwnedValue> = field_values.into_iter().cloned().collect();
field_map.insert(field_name.to_string(), values);
}
NamedFieldDocument(field_map)

View File

@@ -9,13 +9,13 @@ use std::collections::{btree_map, hash_map, BTreeMap, HashMap};
use serde_json::Number;
use crate::schema::document::{
ArrayAccess, DeserializeError, DocValue, Document, DocumentDeserialize, DocumentDeserializer,
ObjectAccess, ReferenceValue, ValueDeserialize, ValueDeserializer, ValueVisitor,
ArrayAccess, DeserializeError, Document, DocumentDeserialize, DocumentDeserializer,
ObjectAccess, ReferenceValue, Value, ValueDeserialize, ValueDeserializer, ValueVisitor,
};
use crate::schema::Field;
// Serde compatibility support.
impl<'a> DocValue<'a> for &'a serde_json::Value {
impl<'a> Value<'a> for &'a serde_json::Value {
type ChildValue = Self;
type ArrayIter = JsonArrayIter<'a>;
type ObjectIter = JsonObjectIter<'a>;
@@ -137,19 +137,19 @@ impl<'a> Iterator for JsonObjectIter<'a> {
// Custom document types
// BTreeMap based documents
impl Document for BTreeMap<Field, crate::schema::Value> {
type Value<'a> = &'a crate::schema::Value;
impl Document for BTreeMap<Field, crate::schema::OwnedValue> {
type Value<'a> = &'a crate::schema::OwnedValue;
type FieldsValuesIter<'a> = FieldCopyingIterator<
'a,
btree_map::Iter<'a, Field, crate::schema::Value>,
crate::schema::Value,
btree_map::Iter<'a, Field, crate::schema::OwnedValue>,
crate::schema::OwnedValue,
>;
fn iter_fields_and_values(&self) -> Self::FieldsValuesIter<'_> {
FieldCopyingIterator(self.iter())
}
}
impl DocumentDeserialize for BTreeMap<Field, crate::schema::Value> {
impl DocumentDeserialize for BTreeMap<Field, crate::schema::OwnedValue> {
fn deserialize<'de, D>(mut deserializer: D) -> Result<Self, DeserializeError>
where D: DocumentDeserializer<'de> {
let mut document = BTreeMap::new();
@@ -163,19 +163,19 @@ impl DocumentDeserialize for BTreeMap<Field, crate::schema::Value> {
}
// HashMap based documents
impl Document for HashMap<Field, crate::schema::Value> {
type Value<'a> = &'a crate::schema::Value;
impl Document for HashMap<Field, crate::schema::OwnedValue> {
type Value<'a> = &'a crate::schema::OwnedValue;
type FieldsValuesIter<'a> = FieldCopyingIterator<
'a,
hash_map::Iter<'a, Field, crate::schema::Value>,
crate::schema::Value,
hash_map::Iter<'a, Field, crate::schema::OwnedValue>,
crate::schema::OwnedValue,
>;
fn iter_fields_and_values(&self) -> Self::FieldsValuesIter<'_> {
FieldCopyingIterator(self.iter())
}
}
impl DocumentDeserialize for HashMap<Field, crate::schema::Value> {
impl DocumentDeserialize for HashMap<Field, crate::schema::OwnedValue> {
fn deserialize<'de, D>(mut deserializer: D) -> Result<Self, DeserializeError>
where D: DocumentDeserializer<'de> {
let mut document = HashMap::with_capacity(deserializer.size_hint());

View File

@@ -1,8 +1,8 @@
//! Document definition for Tantivy to index and store.
//!
//! A document and its values are defined by a couple core traits:
//! - [DocumentAccess] which describes your top-level document and it's fields.
//! - [DocValue] which provides tantivy with a way to access the document's values in a common way
//! - [Document] which describes your top-level document and it's fields.
//! - [Value] which provides tantivy with a way to access the document's values in a common way
//! without performing any additional allocations.
//! - [DocumentDeserialize] which implements the necessary code to deserialize the document from the
//! doc store.
@@ -24,7 +24,7 @@
//! significant amount of time when indexing by avoiding the additional allocations.
//!
//! ### Important Note
//! The implementor of the `DocumentAccess` trait must be `'static` and safe to send across
//! The implementor of the `Document` trait must be `'static` and safe to send across
//! thread boundaries.
//!
//! ## Reusing existing types
@@ -96,27 +96,27 @@
//! ## Implementing custom values
//! Internally, Tantivy only works with `ReferenceValue` which is an enum that tries to borrow
//! as much data as it can, in order to allow documents to return custom types, they must implement
//! the `DocValue` trait which provides a way for Tantivy to get a `ReferenceValue` that it can then
//! the `Value` trait which provides a way for Tantivy to get a `ReferenceValue` that it can then
//! index and store.
//!
//! Values can just as easily be customised as documents by implementing the `DocValue` trait.
//! Values can just as easily be customised as documents by implementing the `Value` trait.
//!
//! The implementor of this type should not own the data it's returning, instead it should just
//! hold references of the data held by the parent [Document] which can then be passed
//! on to the [ReferenceValue].
//!
//! This is why `DocValue` is implemented for `&'a serde_json::Value` and `&'a
//! This is why `Value` is implemented for `&'a serde_json::Value` and `&'a
//! tantivy::schema::Value` but not for their owned counterparts, as we cannot satisfy the lifetime
//! bounds necessary when indexing the documents.
//!
//! ### A note about returning values
//! The custom value type does not have to be the type stored by the document, instead the
//! implementor of a `DocValue` can just be used as a way to convert between the owned type
//! implementor of a `Value` can just be used as a way to convert between the owned type
//! kept in the parent document, and the value passed into Tantivy.
//!
//! ```
//! use tantivy::schema::document::ReferenceValue;
//! use tantivy::schema::{DocValue};
//! use tantivy::schema::{Value};
//!
//! #[derive(Debug)]
//! /// Our custom value type which has 3 types, a string, float and bool.
@@ -129,7 +129,7 @@
//! Bool(bool),
//! }
//!
//! impl<'a> DocValue<'a> for MyCustomValue<'a> {
//! impl<'a> Value<'a> for MyCustomValue<'a> {
//! type ChildValue = Self;
//! // We don't need to worry about these types here as we're not
//! // working with nested types, but if we wanted to we would
@@ -176,7 +176,7 @@ use crate::DateTime;
/// The core trait representing a document within the index.
pub trait Document: DocumentDeserialize + Send + Sync + 'static {
/// The value of the field.
type Value<'a>: DocValue<'a> + Clone
type Value<'a>: Value<'a> + Clone
where Self: 'a;
/// The iterator over all of the fields and values within the doc.
@@ -223,9 +223,9 @@ pub trait Document: DocumentDeserialize + Send + Sync + 'static {
}
/// A single field value.
pub trait DocValue<'a>: Send + Sync + Debug {
pub trait Value<'a>: Send + Sync + Debug {
/// The child value type returned by this doc value.
type ChildValue: DocValue<'a>;
type ChildValue: Value<'a>;
/// The iterator for walking through the elements within the array.
type ArrayIter: Iterator<Item = ReferenceValue<'a, Self::ChildValue>>;
/// The visitor walking through the key-value pairs within
@@ -357,7 +357,7 @@ pub trait DocValue<'a>: Send + Sync + Debug {
/// A enum representing a value for tantivy to index.
pub enum ReferenceValue<'a, V>
where V: DocValue<'a> + ?Sized
where V: Value<'a> + ?Sized
{
/// A null value.
Null,
@@ -388,7 +388,7 @@ where V: DocValue<'a> + ?Sized
}
impl<'a, V> ReferenceValue<'a, V>
where V: DocValue<'a>
where V: Value<'a>
{
#[inline]
/// Returns if the value is `null` or not.

View File

@@ -5,7 +5,7 @@ use std::io::Write;
use columnar::MonotonicallyMappableToU128;
use common::{f64_to_u64, BinarySerializable, VInt};
use crate::schema::document::{type_codes, DocValue, Document, ReferenceValue};
use crate::schema::document::{type_codes, Document, ReferenceValue, Value};
use crate::schema::Schema;
/// A serializer writing documents which implement [`Document`] to a provided writer.
@@ -40,9 +40,9 @@ where W: Write
let mut serializer = BinaryValueSerializer::new(self.writer);
match value_access.as_value() {
ReferenceValue::PreTokStr(pre_tokenized_text) => {
serializer.serialize_value(ReferenceValue::Str::<&'_ crate::schema::Value>(
&pre_tokenized_text.text,
))?;
serializer.serialize_value(ReferenceValue::Str::<
&'_ crate::schema::OwnedValue,
>(&pre_tokenized_text.text))?;
}
_ => {
serializer.serialize_value(value_access.as_value())?;
@@ -87,7 +87,7 @@ where W: Write
value: ReferenceValue<'a, V>,
) -> io::Result<()>
where
V: DocValue<'a>,
V: Value<'a>,
{
match value {
ReferenceValue::Null => self.write_type_code(type_codes::NULL_CODE),
@@ -209,7 +209,7 @@ where W: Write
value: ReferenceValue<'a, V>,
) -> io::Result<()>
where
V: DocValue<'a>,
V: Value<'a>,
{
let mut serializer = BinaryValueSerializer::new(self.writer);
serializer.serialize_value(value)?;
@@ -265,7 +265,7 @@ where W: Write
value: ReferenceValue<'a, V>,
) -> io::Result<()>
where
V: DocValue<'a>,
V: Value<'a>,
{
// Keys and values are stored inline with one another.
// Technically this isn't the *most* optimal way of storing the objects
@@ -712,8 +712,8 @@ mod tests {
let schema = builder.build();
let mut document = BTreeMap::new();
document.insert(name, crate::schema::Value::Str("ChillFish8".into()));
document.insert(age, crate::schema::Value::U64(20));
document.insert(name, crate::schema::OwnedValue::Str("ChillFish8".into()));
document.insert(age, crate::schema::OwnedValue::U64(20));
let result = serialize_doc(&document, &schema);
let mut expected = expected_doc_data!(length document.len());
@@ -734,8 +734,8 @@ mod tests {
let schema = builder.build();
let mut document = BTreeMap::new();
document.insert(name, crate::schema::Value::Str("ChillFish8".into()));
document.insert(age, crate::schema::Value::U64(20));
document.insert(name, crate::schema::OwnedValue::Str("ChillFish8".into()));
document.insert(age, crate::schema::OwnedValue::U64(20));
let result = serialize_doc(&document, &schema);
let mut expected = expected_doc_data!(length 1);
@@ -749,7 +749,7 @@ mod tests {
let builder = Schema::builder();
let schema = builder.build();
let document = BTreeMap::<Field, crate::schema::Value>::new();
let document = BTreeMap::<Field, crate::schema::OwnedValue>::new();
let result = serialize_doc(&document, &schema);
let expected = expected_doc_data!(length document.len());
assert_eq!(

View File

@@ -12,8 +12,8 @@ use super::IntoIpv6Addr;
use crate::schema::bytes_options::BytesOptions;
use crate::schema::facet_options::FacetOptions;
use crate::schema::{
DateOptions, Facet, IndexRecordOption, JsonObjectOptions, NumericOptions, TextFieldIndexing,
TextOptions, Value,
DateOptions, Facet, IndexRecordOption, JsonObjectOptions, NumericOptions, OwnedValue,
TextFieldIndexing, TextOptions,
};
use crate::time::format_description::well_known::Rfc3339;
use crate::time::OffsetDateTime;
@@ -316,7 +316,7 @@ impl FieldType {
/// Tantivy will not try to cast values.
/// For instance, If the json value is the integer `3` and the
/// target field is a `Str`, this method will return an Error.
pub fn value_from_json(&self, json: JsonValue) -> Result<Value, ValueParsingError> {
pub fn value_from_json(&self, json: JsonValue) -> Result<OwnedValue, ValueParsingError> {
match json {
JsonValue::String(field_text) => {
match self {
@@ -328,10 +328,10 @@ impl FieldType {
})?;
Ok(DateTime::from_utc(dt_with_fixed_tz).into())
}
FieldType::Str(_) => Ok(Value::Str(field_text)),
FieldType::Str(_) => Ok(OwnedValue::Str(field_text)),
FieldType::U64(opt) => {
if opt.should_coerce() {
Ok(Value::U64(field_text.parse().map_err(|_| {
Ok(OwnedValue::U64(field_text.parse().map_err(|_| {
ValueParsingError::TypeError {
expected: "a u64 or a u64 as string",
json: JsonValue::String(field_text),
@@ -346,7 +346,7 @@ impl FieldType {
}
FieldType::I64(opt) => {
if opt.should_coerce() {
Ok(Value::I64(field_text.parse().map_err(|_| {
Ok(OwnedValue::I64(field_text.parse().map_err(|_| {
ValueParsingError::TypeError {
expected: "a i64 or a i64 as string",
json: JsonValue::String(field_text),
@@ -361,7 +361,7 @@ impl FieldType {
}
FieldType::F64(opt) => {
if opt.should_coerce() {
Ok(Value::F64(field_text.parse().map_err(|_| {
Ok(OwnedValue::F64(field_text.parse().map_err(|_| {
ValueParsingError::TypeError {
expected: "a f64 or a f64 as string",
json: JsonValue::String(field_text),
@@ -376,7 +376,7 @@ impl FieldType {
}
FieldType::Bool(opt) => {
if opt.should_coerce() {
Ok(Value::Bool(field_text.parse().map_err(|_| {
Ok(OwnedValue::Bool(field_text.parse().map_err(|_| {
ValueParsingError::TypeError {
expected: "a i64 or a bool as string",
json: JsonValue::String(field_text),
@@ -389,10 +389,10 @@ impl FieldType {
})
}
}
FieldType::Facet(_) => Ok(Value::Facet(Facet::from(&field_text))),
FieldType::Facet(_) => Ok(OwnedValue::Facet(Facet::from(&field_text))),
FieldType::Bytes(_) => BASE64
.decode(&field_text)
.map(Value::Bytes)
.map(OwnedValue::Bytes)
.map_err(|_| ValueParsingError::InvalidBase64 { base64: field_text }),
FieldType::JsonObject(_) => Err(ValueParsingError::TypeError {
expected: "a json object",
@@ -406,14 +406,14 @@ impl FieldType {
}
})?;
Ok(Value::IpAddr(ip_addr.into_ipv6_addr()))
Ok(OwnedValue::IpAddr(ip_addr.into_ipv6_addr()))
}
}
}
JsonValue::Number(field_val_num) => match self {
FieldType::I64(_) | FieldType::Date(_) => {
if let Some(field_val_i64) = field_val_num.as_i64() {
Ok(Value::I64(field_val_i64))
Ok(OwnedValue::I64(field_val_i64))
} else {
Err(ValueParsingError::OverflowError {
expected: "an i64 int",
@@ -423,7 +423,7 @@ impl FieldType {
}
FieldType::U64(_) => {
if let Some(field_val_u64) = field_val_num.as_u64() {
Ok(Value::U64(field_val_u64))
Ok(OwnedValue::U64(field_val_u64))
} else {
Err(ValueParsingError::OverflowError {
expected: "u64",
@@ -433,7 +433,7 @@ impl FieldType {
}
FieldType::F64(_) => {
if let Some(field_val_f64) = field_val_num.as_f64() {
Ok(Value::F64(field_val_f64))
Ok(OwnedValue::F64(field_val_f64))
} else {
Err(ValueParsingError::OverflowError {
expected: "a f64",
@@ -447,7 +447,7 @@ impl FieldType {
}),
FieldType::Str(opt) => {
if opt.should_coerce() {
Ok(Value::Str(field_val_num.to_string()))
Ok(OwnedValue::Str(field_val_num.to_string()))
} else {
Err(ValueParsingError::TypeError {
expected: "a string",
@@ -473,7 +473,7 @@ impl FieldType {
if let Ok(tok_str_val) = serde_json::from_value::<PreTokenizedString>(
serde_json::Value::Object(json_map.clone()),
) {
Ok(Value::PreTokStr(tok_str_val))
Ok(OwnedValue::PreTokStr(tok_str_val))
} else {
Err(ValueParsingError::TypeError {
expected: "a string or an pretokenized string",
@@ -481,17 +481,17 @@ impl FieldType {
})
}
}
FieldType::JsonObject(_) => Ok(Value::from(json_map)),
FieldType::JsonObject(_) => Ok(OwnedValue::from(json_map)),
_ => Err(ValueParsingError::TypeError {
expected: self.value_type().name(),
json: JsonValue::Object(json_map),
}),
},
JsonValue::Bool(json_bool_val) => match self {
FieldType::Bool(_) => Ok(Value::Bool(json_bool_val)),
FieldType::Bool(_) => Ok(OwnedValue::Bool(json_bool_val)),
FieldType::Str(opt) => {
if opt.should_coerce() {
Ok(Value::Str(json_bool_val.to_string()))
Ok(OwnedValue::Str(json_bool_val.to_string()))
} else {
Err(ValueParsingError::TypeError {
expected: "a string",
@@ -508,7 +508,7 @@ impl FieldType {
JsonValue::Null => match self {
FieldType::Str(opt) => {
if opt.should_coerce() {
Ok(Value::Str("null".to_string()))
Ok(OwnedValue::Str("null".to_string()))
} else {
Err(ValueParsingError::TypeError {
expected: "a string",
@@ -535,7 +535,7 @@ mod tests {
use super::FieldType;
use crate::schema::field_type::ValueParsingError;
use crate::schema::{NumericOptions, Schema, TextOptions, Type, Value, COERCE, INDEXED};
use crate::schema::{NumericOptions, OwnedValue, Schema, TextOptions, Type, COERCE, INDEXED};
use crate::time::{Date, Month, PrimitiveDateTime, Time};
use crate::tokenizer::{PreTokenizedString, Token};
use crate::{DateTime, TantivyDocument};
@@ -547,20 +547,20 @@ mod tests {
let schema = schema_builder.build();
let doc = TantivyDocument::parse_json(&schema, r#"{"id": 100}"#).unwrap();
assert_eq!(
&Value::Str("100".to_string()),
&OwnedValue::Str("100".to_string()),
doc.get_first(text_field).unwrap()
);
let doc = TantivyDocument::parse_json(&schema, r#"{"id": true}"#).unwrap();
assert_eq!(
&Value::Str("true".to_string()),
&OwnedValue::Str("true".to_string()),
doc.get_first(text_field).unwrap()
);
// Not sure if this null coercion is the best approach
let doc = TantivyDocument::parse_json(&schema, r#"{"id": null}"#).unwrap();
assert_eq!(
&Value::Str("null".to_string()),
&OwnedValue::Str("null".to_string()),
doc.get_first(text_field).unwrap()
);
}
@@ -574,9 +574,9 @@ mod tests {
let schema = schema_builder.build();
let doc_json = r#"{"i64": "100", "u64": "100", "f64": "100"}"#;
let doc = TantivyDocument::parse_json(&schema, doc_json).unwrap();
assert_eq!(&Value::I64(100), doc.get_first(i64_field).unwrap());
assert_eq!(&Value::U64(100), doc.get_first(u64_field).unwrap());
assert_eq!(&Value::F64(100.0), doc.get_first(f64_field).unwrap());
assert_eq!(&OwnedValue::I64(100), doc.get_first(i64_field).unwrap());
assert_eq!(&OwnedValue::U64(100), doc.get_first(u64_field).unwrap());
assert_eq!(&OwnedValue::F64(100.0), doc.get_first(f64_field).unwrap());
}
#[test]
@@ -586,11 +586,11 @@ mod tests {
let schema = schema_builder.build();
let doc_json = r#"{"bool": "true"}"#;
let doc = TantivyDocument::parse_json(&schema, doc_json).unwrap();
assert_eq!(&Value::Bool(true), doc.get_first(bool_field).unwrap());
assert_eq!(&OwnedValue::Bool(true), doc.get_first(bool_field).unwrap());
let doc_json = r#"{"bool": "false"}"#;
let doc = TantivyDocument::parse_json(&schema, doc_json).unwrap();
assert_eq!(&Value::Bool(false), doc.get_first(bool_field).unwrap());
assert_eq!(&OwnedValue::Bool(false), doc.get_first(bool_field).unwrap());
}
#[test]
@@ -647,7 +647,10 @@ mod tests {
let result = FieldType::Bytes(Default::default())
.value_from_json(json!("dGhpcyBpcyBhIHRlc3Q="))
.unwrap();
assert_eq!(result, Value::Bytes("this is a test".as_bytes().to_vec()));
assert_eq!(
result,
OwnedValue::Bytes("this is a test".as_bytes().to_vec())
);
let result = FieldType::Bytes(Default::default()).value_from_json(json!(521));
match result {
@@ -691,7 +694,7 @@ mod tests {
]
}"#;
let expected_value = Value::PreTokStr(PreTokenizedString {
let expected_value = OwnedValue::PreTokStr(PreTokenizedString {
text: String::from("The Old Man"),
tokens: vec![
Token {

View File

@@ -1,16 +1,16 @@
use crate::schema::{Field, Value};
use crate::schema::{Field, OwnedValue};
/// `FieldValue` holds together a `Field` and its `Value`.
#[allow(missing_docs)]
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct FieldValue {
pub field: Field,
pub value: Value,
pub value: OwnedValue,
}
impl FieldValue {
/// Constructor
pub fn new(field: Field, value: Value) -> FieldValue {
pub fn new(field: Field, value: OwnedValue) -> FieldValue {
FieldValue { field, value }
}
@@ -20,12 +20,12 @@ impl FieldValue {
}
/// Value accessor
pub fn value(&self) -> &Value {
pub fn value(&self) -> &OwnedValue {
&self.value
}
}
impl From<FieldValue> for Value {
impl From<FieldValue> for OwnedValue {
fn from(field_value: FieldValue) -> Self {
field_value.value
}
@@ -36,7 +36,7 @@ impl From<FieldValue> for Value {
pub struct FieldValueIter<'a>(pub(crate) std::slice::Iter<'a, FieldValue>);
impl<'a> Iterator for FieldValueIter<'a> {
type Item = (Field, &'a Value);
type Item = (Field, &'a OwnedValue);
fn next(&mut self) -> Option<Self::Item> {
self.0

View File

@@ -134,7 +134,7 @@ pub use self::bytes_options::BytesOptions;
#[allow(deprecated)]
pub use self::date_time_options::DatePrecision;
pub use self::date_time_options::{DateOptions, DateTimePrecision, DATE_TIME_PRECISION_INDEXED};
pub use self::document::{DocParsingError, DocValue, Document, TantivyDocument};
pub use self::document::{DocParsingError, Document, TantivyDocument, Value};
pub(crate) use self::facet::FACET_SEP_BYTE;
pub use self::facet::{Facet, FacetParseError};
pub use self::facet_options::FacetOptions;
@@ -153,7 +153,7 @@ pub use self::numeric_options::NumericOptions;
pub use self::schema::{Schema, SchemaBuilder};
pub use self::term::{Term, ValueBytes, JSON_END_OF_PATH};
pub use self::text_options::{TextFieldIndexing, TextOptions, STRING, TEXT};
pub use self::value::Value;
pub use self::value::OwnedValue;
/// Validator for a potential `field_name`.
/// Returns true if the name can be use for a field name.

View File

@@ -2,7 +2,7 @@ use std::collections::BTreeMap;
use serde::{Deserialize, Serialize};
use crate::schema::Value;
use crate::schema::OwnedValue;
/// Internal representation of a document used for JSON
/// serialization.
@@ -10,4 +10,4 @@ use crate::schema::Value;
/// A `NamedFieldDocument` is a simple representation of a document
/// as a `BTreeMap<String, Vec<Value>>`.
#[derive(Debug, Deserialize, Serialize)]
pub struct NamedFieldDocument(pub BTreeMap<String, Vec<Value>>);
pub struct NamedFieldDocument(pub BTreeMap<String, Vec<OwnedValue>>);

View File

@@ -413,7 +413,7 @@ mod tests {
use pretty_assertions::assert_eq;
use serde_json;
use crate::schema::document::DocValue;
use crate::schema::document::Value;
use crate::schema::field_type::ValueParsingError;
use crate::schema::schema::DocParsingError::InvalidJson;
use crate::schema::*;
@@ -630,24 +630,24 @@ mod tests {
let mut named_doc_map = BTreeMap::default();
named_doc_map.insert(
"title".to_string(),
vec![Value::from("title1"), Value::from("title2")],
vec![OwnedValue::from("title1"), OwnedValue::from("title2")],
);
named_doc_map.insert(
"val".to_string(),
vec![Value::from(14u64), Value::from(-1i64)],
vec![OwnedValue::from(14u64), OwnedValue::from(-1i64)],
);
let doc =
TantivyDocument::convert_named_doc(&schema, NamedFieldDocument(named_doc_map)).unwrap();
assert_eq!(
doc.get_all(title).collect::<Vec<_>>(),
vec![
&Value::from("title1".to_string()),
&Value::from("title2".to_string())
&OwnedValue::from("title1".to_string()),
&OwnedValue::from("title2".to_string())
]
);
assert_eq!(
doc.get_all(val).collect::<Vec<_>>(),
vec![&Value::from(14u64), &Value::from(-1i64)]
vec![&OwnedValue::from(14u64), &OwnedValue::from(-1i64)]
);
}
@@ -657,7 +657,7 @@ mod tests {
let mut named_doc_map = BTreeMap::default();
named_doc_map.insert(
"title".to_string(),
vec![Value::from("title1"), Value::from("title2")],
vec![OwnedValue::from("title1"), OwnedValue::from("title2")],
);
TantivyDocument::convert_named_doc(&schema, NamedFieldDocument(named_doc_map)).unwrap();
}

View File

@@ -9,17 +9,18 @@ use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;
use crate::schema::document::{
ArrayAccess, DeserializeError, DocValue, ObjectAccess, ReferenceValue, ValueDeserialize,
ArrayAccess, DeserializeError, ObjectAccess, ReferenceValue, Value, ValueDeserialize,
ValueDeserializer, ValueVisitor,
};
use crate::schema::Facet;
use crate::tokenizer::PreTokenizedString;
use crate::DateTime;
/// Value represents the value of a any field.
/// This is a owned variant of `Value`, that can be passed around without lifetimes.
/// Represents the value of a any field.
/// It is an enum over all over all of the possible field type.
#[derive(Debug, Clone, PartialEq)]
pub enum Value {
pub enum OwnedValue {
/// A null value.
Null,
/// The str type is used for any text information.
@@ -48,83 +49,83 @@ pub enum Value {
IpAddr(Ipv6Addr),
}
impl<'a> DocValue<'a> for &'a Value {
impl<'a> Value<'a> for &'a OwnedValue {
type ChildValue = Self;
type ArrayIter = ArrayIter<'a>;
type ObjectIter = ObjectMapIter<'a>;
fn as_value(&self) -> ReferenceValue<'a, Self> {
match self {
Value::Null => ReferenceValue::Null,
Value::Str(val) => ReferenceValue::Str(val),
Value::PreTokStr(val) => ReferenceValue::PreTokStr(val),
Value::U64(val) => ReferenceValue::U64(*val),
Value::I64(val) => ReferenceValue::I64(*val),
Value::F64(val) => ReferenceValue::F64(*val),
Value::Bool(val) => ReferenceValue::Bool(*val),
Value::Date(val) => ReferenceValue::Date(*val),
Value::Facet(val) => ReferenceValue::Facet(val),
Value::Bytes(val) => ReferenceValue::Bytes(val),
Value::IpAddr(val) => ReferenceValue::IpAddr(*val),
Value::Array(array) => ReferenceValue::Array(ArrayIter(array.iter())),
Value::Object(object) => ReferenceValue::Object(ObjectMapIter(object.iter())),
OwnedValue::Null => ReferenceValue::Null,
OwnedValue::Str(val) => ReferenceValue::Str(val),
OwnedValue::PreTokStr(val) => ReferenceValue::PreTokStr(val),
OwnedValue::U64(val) => ReferenceValue::U64(*val),
OwnedValue::I64(val) => ReferenceValue::I64(*val),
OwnedValue::F64(val) => ReferenceValue::F64(*val),
OwnedValue::Bool(val) => ReferenceValue::Bool(*val),
OwnedValue::Date(val) => ReferenceValue::Date(*val),
OwnedValue::Facet(val) => ReferenceValue::Facet(val),
OwnedValue::Bytes(val) => ReferenceValue::Bytes(val),
OwnedValue::IpAddr(val) => ReferenceValue::IpAddr(*val),
OwnedValue::Array(array) => ReferenceValue::Array(ArrayIter(array.iter())),
OwnedValue::Object(object) => ReferenceValue::Object(ObjectMapIter(object.iter())),
}
}
}
impl ValueDeserialize for Value {
impl ValueDeserialize for OwnedValue {
fn deserialize<'de, D>(deserializer: D) -> Result<Self, DeserializeError>
where D: ValueDeserializer<'de> {
struct Visitor;
impl ValueVisitor for Visitor {
type Value = Value;
type Value = OwnedValue;
fn visit_null(&self) -> Result<Self::Value, DeserializeError> {
Ok(Value::Null)
Ok(OwnedValue::Null)
}
fn visit_string(&self, val: String) -> Result<Self::Value, DeserializeError> {
Ok(Value::Str(val))
Ok(OwnedValue::Str(val))
}
fn visit_u64(&self, val: u64) -> Result<Self::Value, DeserializeError> {
Ok(Value::U64(val))
Ok(OwnedValue::U64(val))
}
fn visit_i64(&self, val: i64) -> Result<Self::Value, DeserializeError> {
Ok(Value::I64(val))
Ok(OwnedValue::I64(val))
}
fn visit_f64(&self, val: f64) -> Result<Self::Value, DeserializeError> {
Ok(Value::F64(val))
Ok(OwnedValue::F64(val))
}
fn visit_bool(&self, val: bool) -> Result<Self::Value, DeserializeError> {
Ok(Value::Bool(val))
Ok(OwnedValue::Bool(val))
}
fn visit_datetime(&self, val: DateTime) -> Result<Self::Value, DeserializeError> {
Ok(Value::Date(val))
Ok(OwnedValue::Date(val))
}
fn visit_ip_address(&self, val: Ipv6Addr) -> Result<Self::Value, DeserializeError> {
Ok(Value::IpAddr(val))
Ok(OwnedValue::IpAddr(val))
}
fn visit_facet(&self, val: Facet) -> Result<Self::Value, DeserializeError> {
Ok(Value::Facet(val))
Ok(OwnedValue::Facet(val))
}
fn visit_bytes(&self, val: Vec<u8>) -> Result<Self::Value, DeserializeError> {
Ok(Value::Bytes(val))
Ok(OwnedValue::Bytes(val))
}
fn visit_pre_tokenized_string(
&self,
val: PreTokenizedString,
) -> Result<Self::Value, DeserializeError> {
Ok(Value::PreTokStr(val))
Ok(OwnedValue::PreTokStr(val))
}
fn visit_array<'de, A>(&self, mut access: A) -> Result<Self::Value, DeserializeError>
@@ -135,7 +136,7 @@ impl ValueDeserialize for Value {
elements.push(value);
}
Ok(Value::Array(elements))
Ok(OwnedValue::Array(elements))
}
fn visit_object<'de, A>(&self, mut access: A) -> Result<Self::Value, DeserializeError>
@@ -146,7 +147,7 @@ impl ValueDeserialize for Value {
elements.insert(key, value);
}
Ok(Value::Object(elements))
Ok(OwnedValue::Object(elements))
}
}
@@ -154,24 +155,26 @@ impl ValueDeserialize for Value {
}
}
impl Eq for Value {}
impl Eq for OwnedValue {}
impl serde::Serialize for Value {
impl serde::Serialize for OwnedValue {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: serde::Serializer {
match *self {
Value::Null => serializer.serialize_unit(),
Value::Str(ref v) => serializer.serialize_str(v),
Value::PreTokStr(ref v) => v.serialize(serializer),
Value::U64(u) => serializer.serialize_u64(u),
Value::I64(u) => serializer.serialize_i64(u),
Value::F64(u) => serializer.serialize_f64(u),
Value::Bool(b) => serializer.serialize_bool(b),
Value::Date(ref date) => time::serde::rfc3339::serialize(&date.into_utc(), serializer),
Value::Facet(ref facet) => facet.serialize(serializer),
Value::Bytes(ref bytes) => serializer.serialize_str(&BASE64.encode(bytes)),
Value::Object(ref obj) => obj.serialize(serializer),
Value::IpAddr(ref ip_v6) => {
OwnedValue::Null => serializer.serialize_unit(),
OwnedValue::Str(ref v) => serializer.serialize_str(v),
OwnedValue::PreTokStr(ref v) => v.serialize(serializer),
OwnedValue::U64(u) => serializer.serialize_u64(u),
OwnedValue::I64(u) => serializer.serialize_i64(u),
OwnedValue::F64(u) => serializer.serialize_f64(u),
OwnedValue::Bool(b) => serializer.serialize_bool(b),
OwnedValue::Date(ref date) => {
time::serde::rfc3339::serialize(&date.into_utc(), serializer)
}
OwnedValue::Facet(ref facet) => facet.serialize(serializer),
OwnedValue::Bytes(ref bytes) => serializer.serialize_str(&BASE64.encode(bytes)),
OwnedValue::Object(ref obj) => obj.serialize(serializer),
OwnedValue::IpAddr(ref ip_v6) => {
// Ensure IpV4 addresses get serialized as IpV4, but excluding IpV6 loopback.
if let Some(ip_v4) = ip_v6.to_ipv4_mapped() {
ip_v4.serialize(serializer)
@@ -179,50 +182,50 @@ impl serde::Serialize for Value {
ip_v6.serialize(serializer)
}
}
Value::Array(ref array) => array.serialize(serializer),
OwnedValue::Array(ref array) => array.serialize(serializer),
}
}
}
impl<'de> serde::Deserialize<'de> for Value {
impl<'de> serde::Deserialize<'de> for OwnedValue {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where D: serde::Deserializer<'de> {
struct ValueVisitor;
impl<'de> serde::de::Visitor<'de> for ValueVisitor {
type Value = Value;
type Value = OwnedValue;
fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_str("a string or u32")
}
fn visit_bool<E>(self, v: bool) -> Result<Self::Value, E> {
Ok(Value::Bool(v))
Ok(OwnedValue::Bool(v))
}
fn visit_i64<E>(self, v: i64) -> Result<Self::Value, E> {
Ok(Value::I64(v))
Ok(OwnedValue::I64(v))
}
fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E> {
Ok(Value::U64(v))
Ok(OwnedValue::U64(v))
}
fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E> {
Ok(Value::F64(v))
Ok(OwnedValue::F64(v))
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> {
Ok(Value::Str(v.to_owned()))
Ok(OwnedValue::Str(v.to_owned()))
}
fn visit_string<E>(self, v: String) -> Result<Self::Value, E> {
Ok(Value::Str(v))
Ok(OwnedValue::Str(v))
}
fn visit_unit<E>(self) -> Result<Self::Value, E>
where E: serde::de::Error {
Ok(Value::Null)
Ok(OwnedValue::Null)
}
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
@@ -233,7 +236,7 @@ impl<'de> serde::Deserialize<'de> for Value {
elements.push(value);
}
Ok(Value::Array(elements))
Ok(OwnedValue::Array(elements))
}
fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
@@ -244,7 +247,7 @@ impl<'de> serde::Deserialize<'de> for Value {
object.insert(key, value);
}
Ok(Value::Object(object))
Ok(OwnedValue::Object(object))
}
}
@@ -252,81 +255,81 @@ impl<'de> serde::Deserialize<'de> for Value {
}
}
impl From<String> for Value {
fn from(s: String) -> Value {
Value::Str(s)
impl From<String> for OwnedValue {
fn from(s: String) -> OwnedValue {
OwnedValue::Str(s)
}
}
impl From<Ipv6Addr> for Value {
fn from(v: Ipv6Addr) -> Value {
Value::IpAddr(v)
impl From<Ipv6Addr> for OwnedValue {
fn from(v: Ipv6Addr) -> OwnedValue {
OwnedValue::IpAddr(v)
}
}
impl From<u64> for Value {
fn from(v: u64) -> Value {
Value::U64(v)
impl From<u64> for OwnedValue {
fn from(v: u64) -> OwnedValue {
OwnedValue::U64(v)
}
}
impl From<i64> for Value {
fn from(v: i64) -> Value {
Value::I64(v)
impl From<i64> for OwnedValue {
fn from(v: i64) -> OwnedValue {
OwnedValue::I64(v)
}
}
impl From<f64> for Value {
fn from(v: f64) -> Value {
Value::F64(v)
impl From<f64> for OwnedValue {
fn from(v: f64) -> OwnedValue {
OwnedValue::F64(v)
}
}
impl From<bool> for Value {
impl From<bool> for OwnedValue {
fn from(b: bool) -> Self {
Value::Bool(b)
OwnedValue::Bool(b)
}
}
impl From<DateTime> for Value {
fn from(dt: DateTime) -> Value {
Value::Date(dt)
impl From<DateTime> for OwnedValue {
fn from(dt: DateTime) -> OwnedValue {
OwnedValue::Date(dt)
}
}
impl<'a> From<&'a str> for Value {
fn from(s: &'a str) -> Value {
Value::Str(s.to_string())
impl<'a> From<&'a str> for OwnedValue {
fn from(s: &'a str) -> OwnedValue {
OwnedValue::Str(s.to_string())
}
}
impl<'a> From<&'a [u8]> for Value {
fn from(bytes: &'a [u8]) -> Value {
Value::Bytes(bytes.to_vec())
impl<'a> From<&'a [u8]> for OwnedValue {
fn from(bytes: &'a [u8]) -> OwnedValue {
OwnedValue::Bytes(bytes.to_vec())
}
}
impl From<Facet> for Value {
fn from(facet: Facet) -> Value {
Value::Facet(facet)
impl From<Facet> for OwnedValue {
fn from(facet: Facet) -> OwnedValue {
OwnedValue::Facet(facet)
}
}
impl From<Vec<u8>> for Value {
fn from(bytes: Vec<u8>) -> Value {
Value::Bytes(bytes)
impl From<Vec<u8>> for OwnedValue {
fn from(bytes: Vec<u8>) -> OwnedValue {
OwnedValue::Bytes(bytes)
}
}
impl From<PreTokenizedString> for Value {
fn from(pretokenized_string: PreTokenizedString) -> Value {
Value::PreTokStr(pretokenized_string)
impl From<PreTokenizedString> for OwnedValue {
fn from(pretokenized_string: PreTokenizedString) -> OwnedValue {
OwnedValue::PreTokStr(pretokenized_string)
}
}
impl From<BTreeMap<String, Value>> for Value {
fn from(object: BTreeMap<String, Value>) -> Value {
Value::Object(object)
impl From<BTreeMap<String, OwnedValue>> for OwnedValue {
fn from(object: BTreeMap<String, OwnedValue>) -> OwnedValue {
OwnedValue::Object(object)
}
}
@@ -340,7 +343,7 @@ fn can_be_rfc3339_date_time(text: &str) -> bool {
false
}
impl From<serde_json::Value> for Value {
impl From<serde_json::Value> for OwnedValue {
fn from(value: serde_json::Value) -> Self {
match value {
serde_json::Value::Null => Self::Null,
@@ -378,23 +381,23 @@ impl From<serde_json::Value> for Value {
}
}
impl From<serde_json::Map<String, serde_json::Value>> for Value {
impl From<serde_json::Map<String, serde_json::Value>> for OwnedValue {
fn from(map: serde_json::Map<String, serde_json::Value>) -> Self {
let mut object = BTreeMap::new();
for (key, value) in map {
object.insert(key, Value::from(value));
object.insert(key, OwnedValue::from(value));
}
Value::Object(object)
OwnedValue::Object(object)
}
}
/// A wrapper type for iterating over a serde_json array producing reference values.
pub struct ArrayIter<'a>(std::slice::Iter<'a, Value>);
pub struct ArrayIter<'a>(std::slice::Iter<'a, OwnedValue>);
impl<'a> Iterator for ArrayIter<'a> {
type Item = ReferenceValue<'a, &'a Value>;
type Item = ReferenceValue<'a, &'a OwnedValue>;
fn next(&mut self) -> Option<Self::Item> {
let value = self.0.next()?;
@@ -403,10 +406,10 @@ impl<'a> Iterator for ArrayIter<'a> {
}
/// A wrapper type for iterating over a serde_json object producing reference values.
pub struct ObjectMapIter<'a>(btree_map::Iter<'a, String, Value>);
pub struct ObjectMapIter<'a>(btree_map::Iter<'a, String, OwnedValue>);
impl<'a> Iterator for ObjectMapIter<'a> {
type Item = (&'a str, ReferenceValue<'a, &'a Value>);
type Item = (&'a str, ReferenceValue<'a, &'a OwnedValue>);
fn next(&mut self) -> Option<Self::Item> {
let (key, value) = self.0.next()?;
@@ -416,7 +419,7 @@ impl<'a> Iterator for ObjectMapIter<'a> {
#[cfg(test)]
mod tests {
use super::Value;
use super::OwnedValue;
use crate::schema::{BytesOptions, Schema};
use crate::time::format_description::well_known::Rfc3339;
use crate::time::OffsetDateTime;
@@ -466,12 +469,12 @@ mod tests {
#[test]
fn test_serialize_date() {
let value = Value::from(DateTime::from_utc(
let value = OwnedValue::from(DateTime::from_utc(
OffsetDateTime::parse("1996-12-20T00:39:57+00:00", &Rfc3339).unwrap(),
));
let serialized_value_json = serde_json::to_string_pretty(&value).unwrap();
assert_eq!(serialized_value_json, r#""1996-12-20T00:39:57Z""#);
let value = Value::from(DateTime::from_utc(
let value = OwnedValue::from(DateTime::from_utc(
OffsetDateTime::parse("1996-12-20T00:39:57-01:00", &Rfc3339).unwrap(),
));
let serialized_value_json = serde_json::to_string_pretty(&value).unwrap();

View File

@@ -5,7 +5,7 @@ use std::ops::Range;
use htmlescape::encode_minimal;
use crate::query::Query;
use crate::schema::document::{DocValue, Document};
use crate::schema::document::{Document, Value};
use crate::schema::Field;
use crate::tokenizer::{TextAnalyzer, Token};
use crate::{Score, Searcher, Term};

View File

@@ -59,7 +59,7 @@ pub mod tests {
use super::*;
use crate::directory::{Directory, RamDirectory, WritePtr};
use crate::fastfield::AliveBitSet;
use crate::schema::document::DocValue;
use crate::schema::document::Value;
use crate::schema::{
self, Schema, TantivyDocument, TextFieldIndexing, TextOptions, STORED, TEXT,
};

View File

@@ -385,7 +385,7 @@ mod tests {
use super::*;
use crate::directory::RamDirectory;
use crate::schema::document::DocValue;
use crate::schema::document::Value;
use crate::schema::{Field, TantivyDocument};
use crate::store::tests::write_lorem_ipsum_store;
use crate::store::Compressor;