From 03a1f4076746829ab1cec76ff011321bd53dd1ee Mon Sep 17 00:00:00 2001 From: PSeitz Date: Mon, 2 Oct 2023 17:03:00 +0200 Subject: [PATCH] rename DocValue to Value (#2197) rename DocValue to Value to avoid confusion with lucene DocValues rename Value to OwnedValue --- examples/date_time_field.rs | 4 +- src/core/json_utils.rs | 8 +- src/fastfield/facet_reader.rs | 2 +- src/fastfield/writer.rs | 10 +- src/indexer/doc_id_mapping.rs | 2 +- src/indexer/index_writer.rs | 2 +- src/indexer/merger.rs | 2 +- src/indexer/merger_sorted_index_test.rs | 2 +- src/indexer/segment_writer.rs | 6 +- src/lib.rs | 6 +- src/query/more_like_this/more_like_this.rs | 8 +- src/query/more_like_this/query.rs | 11 +- src/schema/document/de.rs | 90 +++++---- src/schema/document/default_doc_type.rs | 18 +- src/schema/document/existing_type_impls.rs | 26 +-- src/schema/document/mod.rs | 28 +-- src/schema/document/se.rs | 24 +-- src/schema/field_type.rs | 65 +++--- src/schema/field_value.rs | 12 +- src/schema/mod.rs | 4 +- src/schema/named_field_document.rs | 4 +- src/schema/schema.rs | 14 +- src/schema/value.rs | 217 +++++++++++---------- src/snippet/mod.rs | 2 +- src/store/mod.rs | 2 +- src/store/reader.rs | 2 +- 26 files changed, 293 insertions(+), 278 deletions(-) diff --git a/examples/date_time_field.rs b/examples/date_time_field.rs index 57cd28e41..13d37a39f 100644 --- a/examples/date_time_field.rs +++ b/examples/date_time_field.rs @@ -4,7 +4,7 @@ use tantivy::collector::TopDocs; use tantivy::query::QueryParser; -use tantivy::schema::{DateOptions, Schema, Value, INDEXED, STORED, STRING}; +use tantivy::schema::{DateOptions, OwnedValue, Schema, INDEXED, STORED, STRING}; use tantivy::{Index, IndexWriter, TantivyDocument}; fn main() -> tantivy::Result<()> { @@ -63,7 +63,7 @@ fn main() -> tantivy::Result<()> { let retrieved_doc = searcher.doc::(doc_address)?; assert!(matches!( retrieved_doc.get_first(occurred_at), - Some(Value::Date(_)) + Some(OwnedValue::Date(_)) )); assert_eq!( retrieved_doc.to_json(&schema), diff --git a/src/core/json_utils.rs b/src/core/json_utils.rs index 8e5efc49d..ba7d8f707 100644 --- a/src/core/json_utils.rs +++ b/src/core/json_utils.rs @@ -5,7 +5,7 @@ use rustc_hash::FxHashMap; use crate::fastfield::FastValue; use crate::postings::{IndexingContext, IndexingPosition, PostingsWriter}; -use crate::schema::document::{DocValue, ReferenceValue}; +use crate::schema::document::{ReferenceValue, Value}; use crate::schema::term::{JSON_PATH_SEGMENT_SEP, JSON_PATH_SEGMENT_SEP_STR}; use crate::schema::{Field, Type, DATE_TIME_PRECISION_INDEXED}; use crate::time::format_description::well_known::Rfc3339; @@ -65,7 +65,7 @@ impl IndexingPositionsPerPath { } } -pub(crate) fn index_json_values<'a, V: DocValue<'a>>( +pub(crate) fn index_json_values<'a, V: Value<'a>>( doc: DocId, json_visitors: impl Iterator>, text_analyzer: &mut TextAnalyzer, @@ -91,7 +91,7 @@ pub(crate) fn index_json_values<'a, V: DocValue<'a>>( Ok(()) } -fn index_json_object<'a, V: DocValue<'a>>( +fn index_json_object<'a, V: Value<'a>>( doc: DocId, json_visitor: V::ObjectIter, text_analyzer: &mut TextAnalyzer, @@ -115,7 +115,7 @@ fn index_json_object<'a, V: DocValue<'a>>( } } -fn index_json_value<'a, V: DocValue<'a>>( +fn index_json_value<'a, V: Value<'a>>( doc: DocId, json_value: ReferenceValue<'a, V>, text_analyzer: &mut TextAnalyzer, diff --git a/src/fastfield/facet_reader.rs b/src/fastfield/facet_reader.rs index ec844bbed..c4e170352 100644 --- a/src/fastfield/facet_reader.rs +++ b/src/fastfield/facet_reader.rs @@ -62,7 +62,7 @@ impl FacetReader { #[cfg(test)] mod tests { - use crate::schema::document::DocValue; + use crate::schema::document::Value; use crate::schema::{Facet, FacetOptions, SchemaBuilder, STORED}; use crate::{DocAddress, Index, IndexWriter, TantivyDocument}; diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index 4eed75588..6450b0be0 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -5,7 +5,7 @@ use common::replace_in_place; use tokenizer_api::Token; use crate::indexer::doc_id_mapping::DocIdMapping; -use crate::schema::document::{DocValue, Document, ReferenceValue}; +use crate::schema::document::{Document, ReferenceValue, Value}; use crate::schema::term::{JSON_PATH_SEGMENT_SEP, JSON_PATH_SEGMENT_SEP_STR}; use crate::schema::{value_type_to_column_type, Field, FieldType, Schema, Type}; use crate::tokenizer::{TextAnalyzer, TokenizerManager}; @@ -129,7 +129,7 @@ impl FastFieldsWriter { Ok(()) } - fn add_doc_value<'a, V: DocValue<'a>>( + fn add_doc_value<'a, V: Value<'a>>( &mut self, doc_id: DocId, field: Field, @@ -243,7 +243,7 @@ impl FastFieldsWriter { } } -fn record_json_obj_to_columnar_writer<'a, V: DocValue<'a>>( +fn record_json_obj_to_columnar_writer<'a, V: Value<'a>>( doc: DocId, json_visitor: V::ObjectIter, expand_dots: bool, @@ -282,7 +282,7 @@ fn record_json_obj_to_columnar_writer<'a, V: DocValue<'a>>( } } -fn record_json_value_to_columnar_writer<'a, V: DocValue<'a>>( +fn record_json_value_to_columnar_writer<'a, V: Value<'a>>( doc: DocId, json_val: ReferenceValue<'a, V>, expand_dots: bool, @@ -382,7 +382,7 @@ mod tests { use super::record_json_value_to_columnar_writer; use crate::fastfield::writer::JSON_DEPTH_LIMIT; - use crate::schema::document::DocValue; + use crate::schema::document::Value; use crate::DocId; fn test_columnar_from_jsons_aux( diff --git a/src/indexer/doc_id_mapping.rs b/src/indexer/doc_id_mapping.rs index 0bb2000cf..0fad45eb1 100644 --- a/src/indexer/doc_id_mapping.rs +++ b/src/indexer/doc_id_mapping.rs @@ -158,7 +158,7 @@ mod tests_indexsorting { use crate::indexer::doc_id_mapping::DocIdMapping; use crate::indexer::NoMergePolicy; use crate::query::QueryParser; - use crate::schema::document::DocValue; + use crate::schema::document::Value; use crate::schema::{Schema, *}; use crate::{DocAddress, Index, IndexSettings, IndexSortByField, Order}; diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 28bb16264..4357ce04c 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -815,7 +815,7 @@ mod tests { use crate::indexer::index_writer::MEMORY_BUDGET_NUM_BYTES_MIN; use crate::indexer::NoMergePolicy; use crate::query::{BooleanQuery, Occur, Query, QueryParser, TermQuery}; - use crate::schema::document::DocValue; + use crate::schema::document::Value; use crate::schema::{ self, Facet, FacetOptions, IndexRecordOption, IpAddrOptions, NumericOptions, Schema, TextFieldIndexing, TextOptions, FAST, INDEXED, STORED, STRING, TEXT, diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 6685db1ae..6c7837a49 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -753,7 +753,7 @@ mod tests { use crate::collector::{Count, FacetCollector}; use crate::core::Index; use crate::query::{AllQuery, BooleanQuery, EnableScoring, Scorer, TermQuery}; - use crate::schema::document::DocValue; + use crate::schema::document::Value; use crate::schema::{ Facet, FacetOptions, IndexRecordOption, NumericOptions, TantivyDocument, Term, TextFieldIndexing, INDEXED, TEXT, diff --git a/src/indexer/merger_sorted_index_test.rs b/src/indexer/merger_sorted_index_test.rs index e08598d40..045722d2d 100644 --- a/src/indexer/merger_sorted_index_test.rs +++ b/src/indexer/merger_sorted_index_test.rs @@ -4,7 +4,7 @@ mod tests { use crate::core::Index; use crate::fastfield::AliveBitSet; use crate::query::QueryParser; - use crate::schema::document::DocValue; + use crate::schema::document::Value; use crate::schema::{ self, BytesOptions, Facet, FacetOptions, IndexRecordOption, NumericOptions, TextFieldIndexing, TextOptions, diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 0c5f312d1..aac738d4d 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -13,7 +13,7 @@ use crate::postings::{ compute_table_memory_size, serialize_postings, IndexingContext, IndexingPosition, PerFieldPostingsWriter, PostingsWriter, }; -use crate::schema::document::{DocValue, Document, ReferenceValue}; +use crate::schema::document::{Document, ReferenceValue, Value}; use crate::schema::{FieldEntry, FieldType, Schema, Term, DATE_TIME_PRECISION_INDEXED}; use crate::store::{StoreReader, StoreWriter}; use crate::tokenizer::{FacetTokenizer, PreTokenizedStream, TextAnalyzer, Tokenizer}; @@ -492,7 +492,7 @@ mod tests { use crate::directory::RamDirectory; use crate::postings::TermInfo; use crate::query::PhraseQuery; - use crate::schema::document::DocValue; + use crate::schema::document::Value; use crate::schema::{ IndexRecordOption, Schema, TextFieldIndexing, TextOptions, Type, STORED, STRING, TEXT, }; @@ -715,7 +715,7 @@ mod tests { let json_field = schema_builder.add_json_field("json", STORED | TEXT); let schema = schema_builder.build(); let mut doc = TantivyDocument::default(); - let json_val: BTreeMap = + let json_val: BTreeMap = serde_json::from_str(r#"{"mykey": "repeated token token"}"#).unwrap(); doc.add_object(json_field, json_val); let index = Index::create_in_ram(schema); diff --git a/src/lib.rs b/src/lib.rs index f81fcb419..b24142205 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -342,7 +342,7 @@ pub mod tests { use crate::docset::{DocSet, TERMINATED}; use crate::merge_policy::NoMergePolicy; use crate::query::BooleanQuery; - use crate::schema::document::DocValue; + use crate::schema::document::Value; use crate::schema::*; use crate::{DateTime, DocAddress, Index, IndexWriter, Postings, ReloadPolicy}; @@ -986,11 +986,11 @@ pub mod tests { text_field => "some other value", other_text_field => "short"); assert_eq!(document.len(), 3); - let values: Vec<&Value> = document.get_all(text_field).collect(); + let values: Vec<&OwnedValue> = document.get_all(text_field).collect(); assert_eq!(values.len(), 2); assert_eq!(values[0].as_str(), Some("tantivy")); assert_eq!(values[1].as_str(), Some("some other value")); - let values: Vec<&Value> = document.get_all(other_text_field).collect(); + let values: Vec<&OwnedValue> = document.get_all(other_text_field).collect(); assert_eq!(values.len(), 1); assert_eq!(values[0].as_str(), Some("short")); } diff --git a/src/query/more_like_this/more_like_this.rs b/src/query/more_like_this/more_like_this.rs index bc644e28b..4fb692e9d 100644 --- a/src/query/more_like_this/more_like_this.rs +++ b/src/query/more_like_this/more_like_this.rs @@ -5,7 +5,7 @@ use tokenizer_api::Token; use crate::query::bm25::idf; use crate::query::{BooleanQuery, BoostQuery, Occur, Query, TermQuery}; -use crate::schema::document::{DocValue, Document}; +use crate::schema::document::{Document, Value}; use crate::schema::{Field, FieldType, IndexRecordOption, Term}; use crate::tokenizer::{FacetTokenizer, PreTokenizedStream, TokenStream, Tokenizer}; use crate::{DocAddress, Result, Searcher, TantivyDocument, TantivyError}; @@ -93,7 +93,7 @@ impl MoreLikeThis { } /// Creates a [`BooleanQuery`] using a set of field values. - pub fn query_with_document_fields<'a, V: DocValue<'a>>( + pub fn query_with_document_fields<'a, V: Value<'a>>( &self, searcher: &Searcher, doc_fields: &[(Field, Vec)], @@ -137,7 +137,7 @@ impl MoreLikeThis { /// Finds terms for a more-like-this query. /// field_to_field_values is a mapping from field to possible values of that field. - fn retrieve_terms_from_doc_fields<'a, V: DocValue<'a>>( + fn retrieve_terms_from_doc_fields<'a, V: Value<'a>>( &self, searcher: &Searcher, field_to_values: &[(Field, Vec)], @@ -159,7 +159,7 @@ impl MoreLikeThis { /// Computes the frequency of values for a field while updating the term frequencies /// Note: A FieldValue can be made up of multiple terms. /// We are interested in extracting terms within FieldValue - fn add_term_frequencies<'a, V: DocValue<'a>>( + fn add_term_frequencies<'a, V: Value<'a>>( &self, searcher: &Searcher, field: Field, diff --git a/src/query/more_like_this/query.rs b/src/query/more_like_this/query.rs index c46120ce6..dd3db39da 100644 --- a/src/query/more_like_this/query.rs +++ b/src/query/more_like_this/query.rs @@ -2,7 +2,7 @@ use std::fmt::Debug; use super::MoreLikeThis; use crate::query::{EnableScoring, Query, Weight}; -use crate::schema::{Field, Value}; +use crate::schema::{Field, OwnedValue}; use crate::DocAddress; /// A query that matches all of the documents similar to a document @@ -33,7 +33,7 @@ pub struct MoreLikeThisQuery { #[derive(Debug, Clone, PartialEq)] enum TargetDocument { DocumentAddress(DocAddress), - DocumentFields(Vec<(Field, Vec)>), + DocumentFields(Vec<(Field, Vec)>), } impl MoreLikeThisQuery { @@ -60,7 +60,7 @@ impl Query for MoreLikeThisQuery { TargetDocument::DocumentFields(doc_fields) => { let values = doc_fields .iter() - .map(|(field, values)| (*field, values.iter().collect::>())) + .map(|(field, values)| (*field, values.iter().collect::>())) .collect::>(); self.mlt @@ -175,7 +175,10 @@ impl MoreLikeThisQueryBuilder { /// that will be used to compose the resulting query. /// This interface is meant to be used when you want to provide your own set of fields /// not necessarily from a specific document. - pub fn with_document_fields(self, doc_fields: Vec<(Field, Vec)>) -> MoreLikeThisQuery { + pub fn with_document_fields( + self, + doc_fields: Vec<(Field, Vec)>, + ) -> MoreLikeThisQuery { MoreLikeThisQuery { mlt: self.mlt, target: TargetDocument::DocumentFields(doc_fields), diff --git a/src/schema/document/de.rs b/src/schema/document/de.rs index 3ab0eef69..186b26657 100644 --- a/src/schema/document/de.rs +++ b/src/schema/document/de.rs @@ -802,52 +802,52 @@ mod tests { writer } - fn deserialize_value(buffer: Vec) -> crate::schema::Value { + fn deserialize_value(buffer: Vec) -> crate::schema::OwnedValue { let mut cursor = Cursor::new(buffer); let deserializer = BinaryValueDeserializer::from_reader(&mut cursor).unwrap(); - crate::schema::Value::deserialize(deserializer).expect("Deserialize value") + crate::schema::OwnedValue::deserialize(deserializer).expect("Deserialize value") } #[test] fn test_simple_value_serialize() { let result = serialize_value(ReferenceValue::Null); let value = deserialize_value(result); - assert_eq!(value, crate::schema::Value::Null); + assert_eq!(value, crate::schema::OwnedValue::Null); let result = serialize_value(ReferenceValue::Str("hello, world")); let value = deserialize_value(result); assert_eq!( value, - crate::schema::Value::Str(String::from("hello, world")) + crate::schema::OwnedValue::Str(String::from("hello, world")) ); let result = serialize_value(ReferenceValue::U64(123)); let value = deserialize_value(result); - assert_eq!(value, crate::schema::Value::U64(123)); + assert_eq!(value, crate::schema::OwnedValue::U64(123)); let result = serialize_value(ReferenceValue::I64(-123)); let value = deserialize_value(result); - assert_eq!(value, crate::schema::Value::I64(-123)); + assert_eq!(value, crate::schema::OwnedValue::I64(-123)); let result = serialize_value(ReferenceValue::F64(123.3845)); let value = deserialize_value(result); - assert_eq!(value, crate::schema::Value::F64(123.3845)); + assert_eq!(value, crate::schema::OwnedValue::F64(123.3845)); let result = serialize_value(ReferenceValue::Bool(false)); let value = deserialize_value(result); - assert_eq!(value, crate::schema::Value::Bool(false)); + assert_eq!(value, crate::schema::OwnedValue::Bool(false)); let result = serialize_value(ReferenceValue::Date(DateTime::from_timestamp_micros(100))); let value = deserialize_value(result); assert_eq!( value, - crate::schema::Value::Date(DateTime::from_timestamp_micros(100)) + crate::schema::OwnedValue::Date(DateTime::from_timestamp_micros(100)) ); let facet = Facet::from_text("/hello/world").unwrap(); let result = serialize_value(ReferenceValue::Facet(&facet)); let value = deserialize_value(result); - assert_eq!(value, crate::schema::Value::Facet(facet)); + assert_eq!(value, crate::schema::OwnedValue::Facet(facet)); let pre_tok_str = PreTokenizedString { text: "hello, world".to_string(), @@ -855,7 +855,7 @@ mod tests { }; let result = serialize_value(ReferenceValue::PreTokStr(&pre_tok_str)); let value = deserialize_value(result); - assert_eq!(value, crate::schema::Value::PreTokStr(pre_tok_str)); + assert_eq!(value, crate::schema::OwnedValue::PreTokStr(pre_tok_str)); } #[test] @@ -865,9 +865,9 @@ mod tests { let value = deserialize_value(result); assert_eq!( value, - crate::schema::Value::Array(vec![ - crate::schema::Value::Null, - crate::schema::Value::Null, + crate::schema::OwnedValue::Array(vec![ + crate::schema::OwnedValue::Null, + crate::schema::OwnedValue::Null, ]), ); @@ -879,16 +879,16 @@ mod tests { let value = deserialize_value(result); assert_eq!( value, - crate::schema::Value::Array(vec![ - crate::schema::Value::Str(String::from("Hello, world")), - crate::schema::Value::Str(String::from("Some demo")), + crate::schema::OwnedValue::Array(vec![ + crate::schema::OwnedValue::Str(String::from("Hello, world")), + crate::schema::OwnedValue::Str(String::from("Some demo")), ]), ); let elements = vec![]; let result = serialize_value(ReferenceValue::Array(JsonArrayIter(elements.iter()))); let value = deserialize_value(result); - assert_eq!(value, crate::schema::Value::Array(vec![])); + assert_eq!(value, crate::schema::OwnedValue::Array(vec![])); let elements = vec![ serde_json::Value::Null, @@ -899,10 +899,10 @@ mod tests { let value = deserialize_value(result); assert_eq!( value, - crate::schema::Value::Array(vec![ - crate::schema::Value::Null, - crate::schema::Value::Str(String::from("Hello, world")), - crate::schema::Value::U64(12345), + crate::schema::OwnedValue::Array(vec![ + crate::schema::OwnedValue::Null, + crate::schema::OwnedValue::Str(String::from("Hello, world")), + crate::schema::OwnedValue::U64(12345), ]), ); } @@ -925,17 +925,20 @@ mod tests { let mut expected_object = BTreeMap::new(); expected_object.insert( "my-first-key".to_string(), - crate::schema::Value::Str(String::from("Hello")), + crate::schema::OwnedValue::Str(String::from("Hello")), ); - expected_object.insert("my-second-key".to_string(), crate::schema::Value::Null); - expected_object.insert("my-third-key".to_string(), crate::schema::Value::F64(123.0)); - assert_eq!(value, crate::schema::Value::Object(expected_object)); + expected_object.insert("my-second-key".to_string(), crate::schema::OwnedValue::Null); + expected_object.insert( + "my-third-key".to_string(), + crate::schema::OwnedValue::F64(123.0), + ); + assert_eq!(value, crate::schema::OwnedValue::Object(expected_object)); let object = serde_json::Map::new(); let result = serialize_value(ReferenceValue::Object(JsonObjectIter(object.iter()))); let value = deserialize_value(result); let expected_object = BTreeMap::new(); - assert_eq!(value, crate::schema::Value::Object(expected_object)); + assert_eq!(value, crate::schema::OwnedValue::Object(expected_object)); let mut object = serde_json::Map::new(); object.insert("my-first-key".into(), serde_json::Value::Null); @@ -944,10 +947,10 @@ mod tests { let result = serialize_value(ReferenceValue::Object(JsonObjectIter(object.iter()))); let value = deserialize_value(result); let mut expected_object = BTreeMap::new(); - expected_object.insert("my-first-key".to_string(), crate::schema::Value::Null); - expected_object.insert("my-second-key".to_string(), crate::schema::Value::Null); - expected_object.insert("my-third-key".to_string(), crate::schema::Value::Null); - assert_eq!(value, crate::schema::Value::Object(expected_object)); + expected_object.insert("my-first-key".to_string(), crate::schema::OwnedValue::Null); + expected_object.insert("my-second-key".to_string(), crate::schema::OwnedValue::Null); + expected_object.insert("my-third-key".to_string(), crate::schema::OwnedValue::Null); + assert_eq!(value, crate::schema::OwnedValue::Object(expected_object)); } #[test] @@ -983,26 +986,29 @@ mod tests { let mut expected_object = BTreeMap::new(); expected_object.insert( "my-array".to_string(), - crate::schema::Value::Array(vec![ - crate::schema::Value::Null, - crate::schema::Value::Str(String::from("bobby of the sea")), + crate::schema::OwnedValue::Array(vec![ + crate::schema::OwnedValue::Null, + crate::schema::OwnedValue::Str(String::from("bobby of the sea")), ]), ); expected_object.insert( "my-object".to_string(), - crate::schema::Value::Object( + crate::schema::OwnedValue::Object( vec![ - ("inner-1".to_string(), crate::schema::Value::I64(-123i64)), + ( + "inner-1".to_string(), + crate::schema::OwnedValue::I64(-123i64), + ), ( "inner-2".to_string(), - crate::schema::Value::Str(String::from("bobby of the sea 2")), + crate::schema::OwnedValue::Str(String::from("bobby of the sea 2")), ), ] .into_iter() .collect(), ), ); - assert_eq!(value, crate::schema::Value::Object(expected_object)); + assert_eq!(value, crate::schema::OwnedValue::Object(expected_object)); // Some more extreme nesting that might behave weirdly let mut object = serde_json::Map::new(); @@ -1019,11 +1025,11 @@ mod tests { let mut expected_object = BTreeMap::new(); expected_object.insert( "my-array".to_string(), - crate::schema::Value::Array(vec![crate::schema::Value::Array(vec![ - crate::schema::Value::Array(vec![]), - crate::schema::Value::Array(vec![crate::schema::Value::Null]), + crate::schema::OwnedValue::Array(vec![crate::schema::OwnedValue::Array(vec![ + crate::schema::OwnedValue::Array(vec![]), + crate::schema::OwnedValue::Array(vec![crate::schema::OwnedValue::Null]), ])]), ); - assert_eq!(value, crate::schema::Value::Object(expected_object)); + assert_eq!(value, crate::schema::OwnedValue::Object(expected_object)); } } diff --git a/src/schema/document/default_doc_type.rs b/src/schema/document/default_doc_type.rs index 87647c3bd..a680c3c88 100644 --- a/src/schema/document/default_doc_type.rs +++ b/src/schema/document/default_doc_type.rs @@ -9,7 +9,7 @@ use crate::schema::document::{ }; use crate::schema::field_type::ValueParsingError; use crate::schema::field_value::FieldValueIter; -use crate::schema::{Facet, Field, FieldValue, NamedFieldDocument, Schema, Value}; +use crate::schema::{Facet, Field, FieldValue, NamedFieldDocument, OwnedValue, Schema}; use crate::tokenizer::PreTokenizedString; /// Tantivy's Document is the object that can be indexed and then searched for. @@ -23,7 +23,7 @@ pub struct TantivyDocument { } impl Document for TantivyDocument { - type Value<'a> = &'a Value; + type Value<'a> = &'a OwnedValue; type FieldsValuesIter<'a> = FieldValueIter<'a>; fn iter_fields_and_values(&self) -> Self::FieldsValuesIter<'_> { @@ -99,13 +99,13 @@ impl TantivyDocument { pub fn add_facet(&mut self, field: Field, path: F) where Facet: From { let facet = Facet::from(path); - let value = Value::Facet(facet); + let value = OwnedValue::Facet(facet); self.add_field_value(field, value); } /// Add a text field. pub fn add_text(&mut self, field: Field, text: S) { - let value = Value::Str(text.to_string()); + let value = OwnedValue::Str(text.to_string()); self.add_field_value(field, value); } @@ -150,12 +150,12 @@ impl TantivyDocument { } /// Add a dynamic object field - pub fn add_object(&mut self, field: Field, object: BTreeMap) { + pub fn add_object(&mut self, field: Field, object: BTreeMap) { self.add_field_value(field, object); } /// Add a (field, value) to the document. - pub fn add_field_value>(&mut self, field: Field, typed_val: T) { + pub fn add_field_value>(&mut self, field: Field, typed_val: T) { let value = typed_val.into(); let field_value = FieldValue { field, value }; self.field_values.push(field_value); @@ -167,7 +167,7 @@ impl TantivyDocument { } /// Returns all of the `FieldValue`s associated the given field - pub fn get_all(&self, field: Field) -> impl Iterator { + pub fn get_all(&self, field: Field) -> impl Iterator { self.field_values .iter() .filter(move |field_value| field_value.field() == field) @@ -175,7 +175,7 @@ impl TantivyDocument { } /// Returns the first `FieldValue` associated the given field - pub fn get_first(&self, field: Field) -> Option<&Value> { + pub fn get_first(&self, field: Field) -> Option<&OwnedValue> { self.get_all(field).next() } @@ -200,7 +200,7 @@ impl TantivyDocument { let mut field_map = BTreeMap::new(); for (field, field_values) in self.get_sorted_field_values() { let field_name = schema.get_field_name(field); - let values: Vec = field_values.into_iter().cloned().collect(); + let values: Vec = field_values.into_iter().cloned().collect(); field_map.insert(field_name.to_string(), values); } NamedFieldDocument(field_map) diff --git a/src/schema/document/existing_type_impls.rs b/src/schema/document/existing_type_impls.rs index fc47acf52..3937f472e 100644 --- a/src/schema/document/existing_type_impls.rs +++ b/src/schema/document/existing_type_impls.rs @@ -9,13 +9,13 @@ use std::collections::{btree_map, hash_map, BTreeMap, HashMap}; use serde_json::Number; use crate::schema::document::{ - ArrayAccess, DeserializeError, DocValue, Document, DocumentDeserialize, DocumentDeserializer, - ObjectAccess, ReferenceValue, ValueDeserialize, ValueDeserializer, ValueVisitor, + ArrayAccess, DeserializeError, Document, DocumentDeserialize, DocumentDeserializer, + ObjectAccess, ReferenceValue, Value, ValueDeserialize, ValueDeserializer, ValueVisitor, }; use crate::schema::Field; // Serde compatibility support. -impl<'a> DocValue<'a> for &'a serde_json::Value { +impl<'a> Value<'a> for &'a serde_json::Value { type ChildValue = Self; type ArrayIter = JsonArrayIter<'a>; type ObjectIter = JsonObjectIter<'a>; @@ -137,19 +137,19 @@ impl<'a> Iterator for JsonObjectIter<'a> { // Custom document types // BTreeMap based documents -impl Document for BTreeMap { - type Value<'a> = &'a crate::schema::Value; +impl Document for BTreeMap { + type Value<'a> = &'a crate::schema::OwnedValue; type FieldsValuesIter<'a> = FieldCopyingIterator< 'a, - btree_map::Iter<'a, Field, crate::schema::Value>, - crate::schema::Value, + btree_map::Iter<'a, Field, crate::schema::OwnedValue>, + crate::schema::OwnedValue, >; fn iter_fields_and_values(&self) -> Self::FieldsValuesIter<'_> { FieldCopyingIterator(self.iter()) } } -impl DocumentDeserialize for BTreeMap { +impl DocumentDeserialize for BTreeMap { fn deserialize<'de, D>(mut deserializer: D) -> Result where D: DocumentDeserializer<'de> { let mut document = BTreeMap::new(); @@ -163,19 +163,19 @@ impl DocumentDeserialize for BTreeMap { } // HashMap based documents -impl Document for HashMap { - type Value<'a> = &'a crate::schema::Value; +impl Document for HashMap { + type Value<'a> = &'a crate::schema::OwnedValue; type FieldsValuesIter<'a> = FieldCopyingIterator< 'a, - hash_map::Iter<'a, Field, crate::schema::Value>, - crate::schema::Value, + hash_map::Iter<'a, Field, crate::schema::OwnedValue>, + crate::schema::OwnedValue, >; fn iter_fields_and_values(&self) -> Self::FieldsValuesIter<'_> { FieldCopyingIterator(self.iter()) } } -impl DocumentDeserialize for HashMap { +impl DocumentDeserialize for HashMap { fn deserialize<'de, D>(mut deserializer: D) -> Result where D: DocumentDeserializer<'de> { let mut document = HashMap::with_capacity(deserializer.size_hint()); diff --git a/src/schema/document/mod.rs b/src/schema/document/mod.rs index dc23d8396..42f72ec0b 100644 --- a/src/schema/document/mod.rs +++ b/src/schema/document/mod.rs @@ -1,8 +1,8 @@ //! Document definition for Tantivy to index and store. //! //! A document and its values are defined by a couple core traits: -//! - [DocumentAccess] which describes your top-level document and it's fields. -//! - [DocValue] which provides tantivy with a way to access the document's values in a common way +//! - [Document] which describes your top-level document and it's fields. +//! - [Value] which provides tantivy with a way to access the document's values in a common way //! without performing any additional allocations. //! - [DocumentDeserialize] which implements the necessary code to deserialize the document from the //! doc store. @@ -24,7 +24,7 @@ //! significant amount of time when indexing by avoiding the additional allocations. //! //! ### Important Note -//! The implementor of the `DocumentAccess` trait must be `'static` and safe to send across +//! The implementor of the `Document` trait must be `'static` and safe to send across //! thread boundaries. //! //! ## Reusing existing types @@ -96,27 +96,27 @@ //! ## Implementing custom values //! Internally, Tantivy only works with `ReferenceValue` which is an enum that tries to borrow //! as much data as it can, in order to allow documents to return custom types, they must implement -//! the `DocValue` trait which provides a way for Tantivy to get a `ReferenceValue` that it can then +//! the `Value` trait which provides a way for Tantivy to get a `ReferenceValue` that it can then //! index and store. //! -//! Values can just as easily be customised as documents by implementing the `DocValue` trait. +//! Values can just as easily be customised as documents by implementing the `Value` trait. //! //! The implementor of this type should not own the data it's returning, instead it should just //! hold references of the data held by the parent [Document] which can then be passed //! on to the [ReferenceValue]. //! -//! This is why `DocValue` is implemented for `&'a serde_json::Value` and `&'a +//! This is why `Value` is implemented for `&'a serde_json::Value` and `&'a //! tantivy::schema::Value` but not for their owned counterparts, as we cannot satisfy the lifetime //! bounds necessary when indexing the documents. //! //! ### A note about returning values //! The custom value type does not have to be the type stored by the document, instead the -//! implementor of a `DocValue` can just be used as a way to convert between the owned type +//! implementor of a `Value` can just be used as a way to convert between the owned type //! kept in the parent document, and the value passed into Tantivy. //! //! ``` //! use tantivy::schema::document::ReferenceValue; -//! use tantivy::schema::{DocValue}; +//! use tantivy::schema::{Value}; //! //! #[derive(Debug)] //! /// Our custom value type which has 3 types, a string, float and bool. @@ -129,7 +129,7 @@ //! Bool(bool), //! } //! -//! impl<'a> DocValue<'a> for MyCustomValue<'a> { +//! impl<'a> Value<'a> for MyCustomValue<'a> { //! type ChildValue = Self; //! // We don't need to worry about these types here as we're not //! // working with nested types, but if we wanted to we would @@ -176,7 +176,7 @@ use crate::DateTime; /// The core trait representing a document within the index. pub trait Document: DocumentDeserialize + Send + Sync + 'static { /// The value of the field. - type Value<'a>: DocValue<'a> + Clone + type Value<'a>: Value<'a> + Clone where Self: 'a; /// The iterator over all of the fields and values within the doc. @@ -223,9 +223,9 @@ pub trait Document: DocumentDeserialize + Send + Sync + 'static { } /// A single field value. -pub trait DocValue<'a>: Send + Sync + Debug { +pub trait Value<'a>: Send + Sync + Debug { /// The child value type returned by this doc value. - type ChildValue: DocValue<'a>; + type ChildValue: Value<'a>; /// The iterator for walking through the elements within the array. type ArrayIter: Iterator>; /// The visitor walking through the key-value pairs within @@ -357,7 +357,7 @@ pub trait DocValue<'a>: Send + Sync + Debug { /// A enum representing a value for tantivy to index. pub enum ReferenceValue<'a, V> -where V: DocValue<'a> + ?Sized +where V: Value<'a> + ?Sized { /// A null value. Null, @@ -388,7 +388,7 @@ where V: DocValue<'a> + ?Sized } impl<'a, V> ReferenceValue<'a, V> -where V: DocValue<'a> +where V: Value<'a> { #[inline] /// Returns if the value is `null` or not. diff --git a/src/schema/document/se.rs b/src/schema/document/se.rs index 6b75e55f9..216015e58 100644 --- a/src/schema/document/se.rs +++ b/src/schema/document/se.rs @@ -5,7 +5,7 @@ use std::io::Write; use columnar::MonotonicallyMappableToU128; use common::{f64_to_u64, BinarySerializable, VInt}; -use crate::schema::document::{type_codes, DocValue, Document, ReferenceValue}; +use crate::schema::document::{type_codes, Document, ReferenceValue, Value}; use crate::schema::Schema; /// A serializer writing documents which implement [`Document`] to a provided writer. @@ -40,9 +40,9 @@ where W: Write let mut serializer = BinaryValueSerializer::new(self.writer); match value_access.as_value() { ReferenceValue::PreTokStr(pre_tokenized_text) => { - serializer.serialize_value(ReferenceValue::Str::<&'_ crate::schema::Value>( - &pre_tokenized_text.text, - ))?; + serializer.serialize_value(ReferenceValue::Str::< + &'_ crate::schema::OwnedValue, + >(&pre_tokenized_text.text))?; } _ => { serializer.serialize_value(value_access.as_value())?; @@ -87,7 +87,7 @@ where W: Write value: ReferenceValue<'a, V>, ) -> io::Result<()> where - V: DocValue<'a>, + V: Value<'a>, { match value { ReferenceValue::Null => self.write_type_code(type_codes::NULL_CODE), @@ -209,7 +209,7 @@ where W: Write value: ReferenceValue<'a, V>, ) -> io::Result<()> where - V: DocValue<'a>, + V: Value<'a>, { let mut serializer = BinaryValueSerializer::new(self.writer); serializer.serialize_value(value)?; @@ -265,7 +265,7 @@ where W: Write value: ReferenceValue<'a, V>, ) -> io::Result<()> where - V: DocValue<'a>, + V: Value<'a>, { // Keys and values are stored inline with one another. // Technically this isn't the *most* optimal way of storing the objects @@ -712,8 +712,8 @@ mod tests { let schema = builder.build(); let mut document = BTreeMap::new(); - document.insert(name, crate::schema::Value::Str("ChillFish8".into())); - document.insert(age, crate::schema::Value::U64(20)); + document.insert(name, crate::schema::OwnedValue::Str("ChillFish8".into())); + document.insert(age, crate::schema::OwnedValue::U64(20)); let result = serialize_doc(&document, &schema); let mut expected = expected_doc_data!(length document.len()); @@ -734,8 +734,8 @@ mod tests { let schema = builder.build(); let mut document = BTreeMap::new(); - document.insert(name, crate::schema::Value::Str("ChillFish8".into())); - document.insert(age, crate::schema::Value::U64(20)); + document.insert(name, crate::schema::OwnedValue::Str("ChillFish8".into())); + document.insert(age, crate::schema::OwnedValue::U64(20)); let result = serialize_doc(&document, &schema); let mut expected = expected_doc_data!(length 1); @@ -749,7 +749,7 @@ mod tests { let builder = Schema::builder(); let schema = builder.build(); - let document = BTreeMap::::new(); + let document = BTreeMap::::new(); let result = serialize_doc(&document, &schema); let expected = expected_doc_data!(length document.len()); assert_eq!( diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index 5b8bc03e8..e4c36b8f0 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -12,8 +12,8 @@ use super::IntoIpv6Addr; use crate::schema::bytes_options::BytesOptions; use crate::schema::facet_options::FacetOptions; use crate::schema::{ - DateOptions, Facet, IndexRecordOption, JsonObjectOptions, NumericOptions, TextFieldIndexing, - TextOptions, Value, + DateOptions, Facet, IndexRecordOption, JsonObjectOptions, NumericOptions, OwnedValue, + TextFieldIndexing, TextOptions, }; use crate::time::format_description::well_known::Rfc3339; use crate::time::OffsetDateTime; @@ -316,7 +316,7 @@ impl FieldType { /// Tantivy will not try to cast values. /// For instance, If the json value is the integer `3` and the /// target field is a `Str`, this method will return an Error. - pub fn value_from_json(&self, json: JsonValue) -> Result { + pub fn value_from_json(&self, json: JsonValue) -> Result { match json { JsonValue::String(field_text) => { match self { @@ -328,10 +328,10 @@ impl FieldType { })?; Ok(DateTime::from_utc(dt_with_fixed_tz).into()) } - FieldType::Str(_) => Ok(Value::Str(field_text)), + FieldType::Str(_) => Ok(OwnedValue::Str(field_text)), FieldType::U64(opt) => { if opt.should_coerce() { - Ok(Value::U64(field_text.parse().map_err(|_| { + Ok(OwnedValue::U64(field_text.parse().map_err(|_| { ValueParsingError::TypeError { expected: "a u64 or a u64 as string", json: JsonValue::String(field_text), @@ -346,7 +346,7 @@ impl FieldType { } FieldType::I64(opt) => { if opt.should_coerce() { - Ok(Value::I64(field_text.parse().map_err(|_| { + Ok(OwnedValue::I64(field_text.parse().map_err(|_| { ValueParsingError::TypeError { expected: "a i64 or a i64 as string", json: JsonValue::String(field_text), @@ -361,7 +361,7 @@ impl FieldType { } FieldType::F64(opt) => { if opt.should_coerce() { - Ok(Value::F64(field_text.parse().map_err(|_| { + Ok(OwnedValue::F64(field_text.parse().map_err(|_| { ValueParsingError::TypeError { expected: "a f64 or a f64 as string", json: JsonValue::String(field_text), @@ -376,7 +376,7 @@ impl FieldType { } FieldType::Bool(opt) => { if opt.should_coerce() { - Ok(Value::Bool(field_text.parse().map_err(|_| { + Ok(OwnedValue::Bool(field_text.parse().map_err(|_| { ValueParsingError::TypeError { expected: "a i64 or a bool as string", json: JsonValue::String(field_text), @@ -389,10 +389,10 @@ impl FieldType { }) } } - FieldType::Facet(_) => Ok(Value::Facet(Facet::from(&field_text))), + FieldType::Facet(_) => Ok(OwnedValue::Facet(Facet::from(&field_text))), FieldType::Bytes(_) => BASE64 .decode(&field_text) - .map(Value::Bytes) + .map(OwnedValue::Bytes) .map_err(|_| ValueParsingError::InvalidBase64 { base64: field_text }), FieldType::JsonObject(_) => Err(ValueParsingError::TypeError { expected: "a json object", @@ -406,14 +406,14 @@ impl FieldType { } })?; - Ok(Value::IpAddr(ip_addr.into_ipv6_addr())) + Ok(OwnedValue::IpAddr(ip_addr.into_ipv6_addr())) } } } JsonValue::Number(field_val_num) => match self { FieldType::I64(_) | FieldType::Date(_) => { if let Some(field_val_i64) = field_val_num.as_i64() { - Ok(Value::I64(field_val_i64)) + Ok(OwnedValue::I64(field_val_i64)) } else { Err(ValueParsingError::OverflowError { expected: "an i64 int", @@ -423,7 +423,7 @@ impl FieldType { } FieldType::U64(_) => { if let Some(field_val_u64) = field_val_num.as_u64() { - Ok(Value::U64(field_val_u64)) + Ok(OwnedValue::U64(field_val_u64)) } else { Err(ValueParsingError::OverflowError { expected: "u64", @@ -433,7 +433,7 @@ impl FieldType { } FieldType::F64(_) => { if let Some(field_val_f64) = field_val_num.as_f64() { - Ok(Value::F64(field_val_f64)) + Ok(OwnedValue::F64(field_val_f64)) } else { Err(ValueParsingError::OverflowError { expected: "a f64", @@ -447,7 +447,7 @@ impl FieldType { }), FieldType::Str(opt) => { if opt.should_coerce() { - Ok(Value::Str(field_val_num.to_string())) + Ok(OwnedValue::Str(field_val_num.to_string())) } else { Err(ValueParsingError::TypeError { expected: "a string", @@ -473,7 +473,7 @@ impl FieldType { if let Ok(tok_str_val) = serde_json::from_value::( serde_json::Value::Object(json_map.clone()), ) { - Ok(Value::PreTokStr(tok_str_val)) + Ok(OwnedValue::PreTokStr(tok_str_val)) } else { Err(ValueParsingError::TypeError { expected: "a string or an pretokenized string", @@ -481,17 +481,17 @@ impl FieldType { }) } } - FieldType::JsonObject(_) => Ok(Value::from(json_map)), + FieldType::JsonObject(_) => Ok(OwnedValue::from(json_map)), _ => Err(ValueParsingError::TypeError { expected: self.value_type().name(), json: JsonValue::Object(json_map), }), }, JsonValue::Bool(json_bool_val) => match self { - FieldType::Bool(_) => Ok(Value::Bool(json_bool_val)), + FieldType::Bool(_) => Ok(OwnedValue::Bool(json_bool_val)), FieldType::Str(opt) => { if opt.should_coerce() { - Ok(Value::Str(json_bool_val.to_string())) + Ok(OwnedValue::Str(json_bool_val.to_string())) } else { Err(ValueParsingError::TypeError { expected: "a string", @@ -508,7 +508,7 @@ impl FieldType { JsonValue::Null => match self { FieldType::Str(opt) => { if opt.should_coerce() { - Ok(Value::Str("null".to_string())) + Ok(OwnedValue::Str("null".to_string())) } else { Err(ValueParsingError::TypeError { expected: "a string", @@ -535,7 +535,7 @@ mod tests { use super::FieldType; use crate::schema::field_type::ValueParsingError; - use crate::schema::{NumericOptions, Schema, TextOptions, Type, Value, COERCE, INDEXED}; + use crate::schema::{NumericOptions, OwnedValue, Schema, TextOptions, Type, COERCE, INDEXED}; use crate::time::{Date, Month, PrimitiveDateTime, Time}; use crate::tokenizer::{PreTokenizedString, Token}; use crate::{DateTime, TantivyDocument}; @@ -547,20 +547,20 @@ mod tests { let schema = schema_builder.build(); let doc = TantivyDocument::parse_json(&schema, r#"{"id": 100}"#).unwrap(); assert_eq!( - &Value::Str("100".to_string()), + &OwnedValue::Str("100".to_string()), doc.get_first(text_field).unwrap() ); let doc = TantivyDocument::parse_json(&schema, r#"{"id": true}"#).unwrap(); assert_eq!( - &Value::Str("true".to_string()), + &OwnedValue::Str("true".to_string()), doc.get_first(text_field).unwrap() ); // Not sure if this null coercion is the best approach let doc = TantivyDocument::parse_json(&schema, r#"{"id": null}"#).unwrap(); assert_eq!( - &Value::Str("null".to_string()), + &OwnedValue::Str("null".to_string()), doc.get_first(text_field).unwrap() ); } @@ -574,9 +574,9 @@ mod tests { let schema = schema_builder.build(); let doc_json = r#"{"i64": "100", "u64": "100", "f64": "100"}"#; let doc = TantivyDocument::parse_json(&schema, doc_json).unwrap(); - assert_eq!(&Value::I64(100), doc.get_first(i64_field).unwrap()); - assert_eq!(&Value::U64(100), doc.get_first(u64_field).unwrap()); - assert_eq!(&Value::F64(100.0), doc.get_first(f64_field).unwrap()); + assert_eq!(&OwnedValue::I64(100), doc.get_first(i64_field).unwrap()); + assert_eq!(&OwnedValue::U64(100), doc.get_first(u64_field).unwrap()); + assert_eq!(&OwnedValue::F64(100.0), doc.get_first(f64_field).unwrap()); } #[test] @@ -586,11 +586,11 @@ mod tests { let schema = schema_builder.build(); let doc_json = r#"{"bool": "true"}"#; let doc = TantivyDocument::parse_json(&schema, doc_json).unwrap(); - assert_eq!(&Value::Bool(true), doc.get_first(bool_field).unwrap()); + assert_eq!(&OwnedValue::Bool(true), doc.get_first(bool_field).unwrap()); let doc_json = r#"{"bool": "false"}"#; let doc = TantivyDocument::parse_json(&schema, doc_json).unwrap(); - assert_eq!(&Value::Bool(false), doc.get_first(bool_field).unwrap()); + assert_eq!(&OwnedValue::Bool(false), doc.get_first(bool_field).unwrap()); } #[test] @@ -647,7 +647,10 @@ mod tests { let result = FieldType::Bytes(Default::default()) .value_from_json(json!("dGhpcyBpcyBhIHRlc3Q=")) .unwrap(); - assert_eq!(result, Value::Bytes("this is a test".as_bytes().to_vec())); + assert_eq!( + result, + OwnedValue::Bytes("this is a test".as_bytes().to_vec()) + ); let result = FieldType::Bytes(Default::default()).value_from_json(json!(521)); match result { @@ -691,7 +694,7 @@ mod tests { ] }"#; - let expected_value = Value::PreTokStr(PreTokenizedString { + let expected_value = OwnedValue::PreTokStr(PreTokenizedString { text: String::from("The Old Man"), tokens: vec![ Token { diff --git a/src/schema/field_value.rs b/src/schema/field_value.rs index a03570b7c..ac5851ebd 100644 --- a/src/schema/field_value.rs +++ b/src/schema/field_value.rs @@ -1,16 +1,16 @@ -use crate::schema::{Field, Value}; +use crate::schema::{Field, OwnedValue}; /// `FieldValue` holds together a `Field` and its `Value`. #[allow(missing_docs)] #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] pub struct FieldValue { pub field: Field, - pub value: Value, + pub value: OwnedValue, } impl FieldValue { /// Constructor - pub fn new(field: Field, value: Value) -> FieldValue { + pub fn new(field: Field, value: OwnedValue) -> FieldValue { FieldValue { field, value } } @@ -20,12 +20,12 @@ impl FieldValue { } /// Value accessor - pub fn value(&self) -> &Value { + pub fn value(&self) -> &OwnedValue { &self.value } } -impl From for Value { +impl From for OwnedValue { fn from(field_value: FieldValue) -> Self { field_value.value } @@ -36,7 +36,7 @@ impl From for Value { pub struct FieldValueIter<'a>(pub(crate) std::slice::Iter<'a, FieldValue>); impl<'a> Iterator for FieldValueIter<'a> { - type Item = (Field, &'a Value); + type Item = (Field, &'a OwnedValue); fn next(&mut self) -> Option { self.0 diff --git a/src/schema/mod.rs b/src/schema/mod.rs index ffe68de42..f0073b740 100644 --- a/src/schema/mod.rs +++ b/src/schema/mod.rs @@ -134,7 +134,7 @@ pub use self::bytes_options::BytesOptions; #[allow(deprecated)] pub use self::date_time_options::DatePrecision; pub use self::date_time_options::{DateOptions, DateTimePrecision, DATE_TIME_PRECISION_INDEXED}; -pub use self::document::{DocParsingError, DocValue, Document, TantivyDocument}; +pub use self::document::{DocParsingError, Document, TantivyDocument, Value}; pub(crate) use self::facet::FACET_SEP_BYTE; pub use self::facet::{Facet, FacetParseError}; pub use self::facet_options::FacetOptions; @@ -153,7 +153,7 @@ pub use self::numeric_options::NumericOptions; pub use self::schema::{Schema, SchemaBuilder}; pub use self::term::{Term, ValueBytes, JSON_END_OF_PATH}; pub use self::text_options::{TextFieldIndexing, TextOptions, STRING, TEXT}; -pub use self::value::Value; +pub use self::value::OwnedValue; /// Validator for a potential `field_name`. /// Returns true if the name can be use for a field name. diff --git a/src/schema/named_field_document.rs b/src/schema/named_field_document.rs index 9f7d09fae..0aeea4c98 100644 --- a/src/schema/named_field_document.rs +++ b/src/schema/named_field_document.rs @@ -2,7 +2,7 @@ use std::collections::BTreeMap; use serde::{Deserialize, Serialize}; -use crate::schema::Value; +use crate::schema::OwnedValue; /// Internal representation of a document used for JSON /// serialization. @@ -10,4 +10,4 @@ use crate::schema::Value; /// A `NamedFieldDocument` is a simple representation of a document /// as a `BTreeMap>`. #[derive(Debug, Deserialize, Serialize)] -pub struct NamedFieldDocument(pub BTreeMap>); +pub struct NamedFieldDocument(pub BTreeMap>); diff --git a/src/schema/schema.rs b/src/schema/schema.rs index df57ba876..98d6de94e 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -413,7 +413,7 @@ mod tests { use pretty_assertions::assert_eq; use serde_json; - use crate::schema::document::DocValue; + use crate::schema::document::Value; use crate::schema::field_type::ValueParsingError; use crate::schema::schema::DocParsingError::InvalidJson; use crate::schema::*; @@ -630,24 +630,24 @@ mod tests { let mut named_doc_map = BTreeMap::default(); named_doc_map.insert( "title".to_string(), - vec![Value::from("title1"), Value::from("title2")], + vec![OwnedValue::from("title1"), OwnedValue::from("title2")], ); named_doc_map.insert( "val".to_string(), - vec![Value::from(14u64), Value::from(-1i64)], + vec![OwnedValue::from(14u64), OwnedValue::from(-1i64)], ); let doc = TantivyDocument::convert_named_doc(&schema, NamedFieldDocument(named_doc_map)).unwrap(); assert_eq!( doc.get_all(title).collect::>(), vec![ - &Value::from("title1".to_string()), - &Value::from("title2".to_string()) + &OwnedValue::from("title1".to_string()), + &OwnedValue::from("title2".to_string()) ] ); assert_eq!( doc.get_all(val).collect::>(), - vec![&Value::from(14u64), &Value::from(-1i64)] + vec![&OwnedValue::from(14u64), &OwnedValue::from(-1i64)] ); } @@ -657,7 +657,7 @@ mod tests { let mut named_doc_map = BTreeMap::default(); named_doc_map.insert( "title".to_string(), - vec![Value::from("title1"), Value::from("title2")], + vec![OwnedValue::from("title1"), OwnedValue::from("title2")], ); TantivyDocument::convert_named_doc(&schema, NamedFieldDocument(named_doc_map)).unwrap(); } diff --git a/src/schema/value.rs b/src/schema/value.rs index 341388e42..d0813c05d 100644 --- a/src/schema/value.rs +++ b/src/schema/value.rs @@ -9,17 +9,18 @@ use time::format_description::well_known::Rfc3339; use time::OffsetDateTime; use crate::schema::document::{ - ArrayAccess, DeserializeError, DocValue, ObjectAccess, ReferenceValue, ValueDeserialize, + ArrayAccess, DeserializeError, ObjectAccess, ReferenceValue, Value, ValueDeserialize, ValueDeserializer, ValueVisitor, }; use crate::schema::Facet; use crate::tokenizer::PreTokenizedString; use crate::DateTime; -/// Value represents the value of a any field. +/// This is a owned variant of `Value`, that can be passed around without lifetimes. +/// Represents the value of a any field. /// It is an enum over all over all of the possible field type. #[derive(Debug, Clone, PartialEq)] -pub enum Value { +pub enum OwnedValue { /// A null value. Null, /// The str type is used for any text information. @@ -48,83 +49,83 @@ pub enum Value { IpAddr(Ipv6Addr), } -impl<'a> DocValue<'a> for &'a Value { +impl<'a> Value<'a> for &'a OwnedValue { type ChildValue = Self; type ArrayIter = ArrayIter<'a>; type ObjectIter = ObjectMapIter<'a>; fn as_value(&self) -> ReferenceValue<'a, Self> { match self { - Value::Null => ReferenceValue::Null, - Value::Str(val) => ReferenceValue::Str(val), - Value::PreTokStr(val) => ReferenceValue::PreTokStr(val), - Value::U64(val) => ReferenceValue::U64(*val), - Value::I64(val) => ReferenceValue::I64(*val), - Value::F64(val) => ReferenceValue::F64(*val), - Value::Bool(val) => ReferenceValue::Bool(*val), - Value::Date(val) => ReferenceValue::Date(*val), - Value::Facet(val) => ReferenceValue::Facet(val), - Value::Bytes(val) => ReferenceValue::Bytes(val), - Value::IpAddr(val) => ReferenceValue::IpAddr(*val), - Value::Array(array) => ReferenceValue::Array(ArrayIter(array.iter())), - Value::Object(object) => ReferenceValue::Object(ObjectMapIter(object.iter())), + OwnedValue::Null => ReferenceValue::Null, + OwnedValue::Str(val) => ReferenceValue::Str(val), + OwnedValue::PreTokStr(val) => ReferenceValue::PreTokStr(val), + OwnedValue::U64(val) => ReferenceValue::U64(*val), + OwnedValue::I64(val) => ReferenceValue::I64(*val), + OwnedValue::F64(val) => ReferenceValue::F64(*val), + OwnedValue::Bool(val) => ReferenceValue::Bool(*val), + OwnedValue::Date(val) => ReferenceValue::Date(*val), + OwnedValue::Facet(val) => ReferenceValue::Facet(val), + OwnedValue::Bytes(val) => ReferenceValue::Bytes(val), + OwnedValue::IpAddr(val) => ReferenceValue::IpAddr(*val), + OwnedValue::Array(array) => ReferenceValue::Array(ArrayIter(array.iter())), + OwnedValue::Object(object) => ReferenceValue::Object(ObjectMapIter(object.iter())), } } } -impl ValueDeserialize for Value { +impl ValueDeserialize for OwnedValue { fn deserialize<'de, D>(deserializer: D) -> Result where D: ValueDeserializer<'de> { struct Visitor; impl ValueVisitor for Visitor { - type Value = Value; + type Value = OwnedValue; fn visit_null(&self) -> Result { - Ok(Value::Null) + Ok(OwnedValue::Null) } fn visit_string(&self, val: String) -> Result { - Ok(Value::Str(val)) + Ok(OwnedValue::Str(val)) } fn visit_u64(&self, val: u64) -> Result { - Ok(Value::U64(val)) + Ok(OwnedValue::U64(val)) } fn visit_i64(&self, val: i64) -> Result { - Ok(Value::I64(val)) + Ok(OwnedValue::I64(val)) } fn visit_f64(&self, val: f64) -> Result { - Ok(Value::F64(val)) + Ok(OwnedValue::F64(val)) } fn visit_bool(&self, val: bool) -> Result { - Ok(Value::Bool(val)) + Ok(OwnedValue::Bool(val)) } fn visit_datetime(&self, val: DateTime) -> Result { - Ok(Value::Date(val)) + Ok(OwnedValue::Date(val)) } fn visit_ip_address(&self, val: Ipv6Addr) -> Result { - Ok(Value::IpAddr(val)) + Ok(OwnedValue::IpAddr(val)) } fn visit_facet(&self, val: Facet) -> Result { - Ok(Value::Facet(val)) + Ok(OwnedValue::Facet(val)) } fn visit_bytes(&self, val: Vec) -> Result { - Ok(Value::Bytes(val)) + Ok(OwnedValue::Bytes(val)) } fn visit_pre_tokenized_string( &self, val: PreTokenizedString, ) -> Result { - Ok(Value::PreTokStr(val)) + Ok(OwnedValue::PreTokStr(val)) } fn visit_array<'de, A>(&self, mut access: A) -> Result @@ -135,7 +136,7 @@ impl ValueDeserialize for Value { elements.push(value); } - Ok(Value::Array(elements)) + Ok(OwnedValue::Array(elements)) } fn visit_object<'de, A>(&self, mut access: A) -> Result @@ -146,7 +147,7 @@ impl ValueDeserialize for Value { elements.insert(key, value); } - Ok(Value::Object(elements)) + Ok(OwnedValue::Object(elements)) } } @@ -154,24 +155,26 @@ impl ValueDeserialize for Value { } } -impl Eq for Value {} +impl Eq for OwnedValue {} -impl serde::Serialize for Value { +impl serde::Serialize for OwnedValue { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer { match *self { - Value::Null => serializer.serialize_unit(), - Value::Str(ref v) => serializer.serialize_str(v), - Value::PreTokStr(ref v) => v.serialize(serializer), - Value::U64(u) => serializer.serialize_u64(u), - Value::I64(u) => serializer.serialize_i64(u), - Value::F64(u) => serializer.serialize_f64(u), - Value::Bool(b) => serializer.serialize_bool(b), - Value::Date(ref date) => time::serde::rfc3339::serialize(&date.into_utc(), serializer), - Value::Facet(ref facet) => facet.serialize(serializer), - Value::Bytes(ref bytes) => serializer.serialize_str(&BASE64.encode(bytes)), - Value::Object(ref obj) => obj.serialize(serializer), - Value::IpAddr(ref ip_v6) => { + OwnedValue::Null => serializer.serialize_unit(), + OwnedValue::Str(ref v) => serializer.serialize_str(v), + OwnedValue::PreTokStr(ref v) => v.serialize(serializer), + OwnedValue::U64(u) => serializer.serialize_u64(u), + OwnedValue::I64(u) => serializer.serialize_i64(u), + OwnedValue::F64(u) => serializer.serialize_f64(u), + OwnedValue::Bool(b) => serializer.serialize_bool(b), + OwnedValue::Date(ref date) => { + time::serde::rfc3339::serialize(&date.into_utc(), serializer) + } + OwnedValue::Facet(ref facet) => facet.serialize(serializer), + OwnedValue::Bytes(ref bytes) => serializer.serialize_str(&BASE64.encode(bytes)), + OwnedValue::Object(ref obj) => obj.serialize(serializer), + OwnedValue::IpAddr(ref ip_v6) => { // Ensure IpV4 addresses get serialized as IpV4, but excluding IpV6 loopback. if let Some(ip_v4) = ip_v6.to_ipv4_mapped() { ip_v4.serialize(serializer) @@ -179,50 +182,50 @@ impl serde::Serialize for Value { ip_v6.serialize(serializer) } } - Value::Array(ref array) => array.serialize(serializer), + OwnedValue::Array(ref array) => array.serialize(serializer), } } } -impl<'de> serde::Deserialize<'de> for Value { +impl<'de> serde::Deserialize<'de> for OwnedValue { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de> { struct ValueVisitor; impl<'de> serde::de::Visitor<'de> for ValueVisitor { - type Value = Value; + type Value = OwnedValue; fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { formatter.write_str("a string or u32") } fn visit_bool(self, v: bool) -> Result { - Ok(Value::Bool(v)) + Ok(OwnedValue::Bool(v)) } fn visit_i64(self, v: i64) -> Result { - Ok(Value::I64(v)) + Ok(OwnedValue::I64(v)) } fn visit_u64(self, v: u64) -> Result { - Ok(Value::U64(v)) + Ok(OwnedValue::U64(v)) } fn visit_f64(self, v: f64) -> Result { - Ok(Value::F64(v)) + Ok(OwnedValue::F64(v)) } fn visit_str(self, v: &str) -> Result { - Ok(Value::Str(v.to_owned())) + Ok(OwnedValue::Str(v.to_owned())) } fn visit_string(self, v: String) -> Result { - Ok(Value::Str(v)) + Ok(OwnedValue::Str(v)) } fn visit_unit(self) -> Result where E: serde::de::Error { - Ok(Value::Null) + Ok(OwnedValue::Null) } fn visit_seq(self, mut seq: A) -> Result @@ -233,7 +236,7 @@ impl<'de> serde::Deserialize<'de> for Value { elements.push(value); } - Ok(Value::Array(elements)) + Ok(OwnedValue::Array(elements)) } fn visit_map(self, mut map: A) -> Result @@ -244,7 +247,7 @@ impl<'de> serde::Deserialize<'de> for Value { object.insert(key, value); } - Ok(Value::Object(object)) + Ok(OwnedValue::Object(object)) } } @@ -252,81 +255,81 @@ impl<'de> serde::Deserialize<'de> for Value { } } -impl From for Value { - fn from(s: String) -> Value { - Value::Str(s) +impl From for OwnedValue { + fn from(s: String) -> OwnedValue { + OwnedValue::Str(s) } } -impl From for Value { - fn from(v: Ipv6Addr) -> Value { - Value::IpAddr(v) +impl From for OwnedValue { + fn from(v: Ipv6Addr) -> OwnedValue { + OwnedValue::IpAddr(v) } } -impl From for Value { - fn from(v: u64) -> Value { - Value::U64(v) +impl From for OwnedValue { + fn from(v: u64) -> OwnedValue { + OwnedValue::U64(v) } } -impl From for Value { - fn from(v: i64) -> Value { - Value::I64(v) +impl From for OwnedValue { + fn from(v: i64) -> OwnedValue { + OwnedValue::I64(v) } } -impl From for Value { - fn from(v: f64) -> Value { - Value::F64(v) +impl From for OwnedValue { + fn from(v: f64) -> OwnedValue { + OwnedValue::F64(v) } } -impl From for Value { +impl From for OwnedValue { fn from(b: bool) -> Self { - Value::Bool(b) + OwnedValue::Bool(b) } } -impl From for Value { - fn from(dt: DateTime) -> Value { - Value::Date(dt) +impl From for OwnedValue { + fn from(dt: DateTime) -> OwnedValue { + OwnedValue::Date(dt) } } -impl<'a> From<&'a str> for Value { - fn from(s: &'a str) -> Value { - Value::Str(s.to_string()) +impl<'a> From<&'a str> for OwnedValue { + fn from(s: &'a str) -> OwnedValue { + OwnedValue::Str(s.to_string()) } } -impl<'a> From<&'a [u8]> for Value { - fn from(bytes: &'a [u8]) -> Value { - Value::Bytes(bytes.to_vec()) +impl<'a> From<&'a [u8]> for OwnedValue { + fn from(bytes: &'a [u8]) -> OwnedValue { + OwnedValue::Bytes(bytes.to_vec()) } } -impl From for Value { - fn from(facet: Facet) -> Value { - Value::Facet(facet) +impl From for OwnedValue { + fn from(facet: Facet) -> OwnedValue { + OwnedValue::Facet(facet) } } -impl From> for Value { - fn from(bytes: Vec) -> Value { - Value::Bytes(bytes) +impl From> for OwnedValue { + fn from(bytes: Vec) -> OwnedValue { + OwnedValue::Bytes(bytes) } } -impl From for Value { - fn from(pretokenized_string: PreTokenizedString) -> Value { - Value::PreTokStr(pretokenized_string) +impl From for OwnedValue { + fn from(pretokenized_string: PreTokenizedString) -> OwnedValue { + OwnedValue::PreTokStr(pretokenized_string) } } -impl From> for Value { - fn from(object: BTreeMap) -> Value { - Value::Object(object) +impl From> for OwnedValue { + fn from(object: BTreeMap) -> OwnedValue { + OwnedValue::Object(object) } } @@ -340,7 +343,7 @@ fn can_be_rfc3339_date_time(text: &str) -> bool { false } -impl From for Value { +impl From for OwnedValue { fn from(value: serde_json::Value) -> Self { match value { serde_json::Value::Null => Self::Null, @@ -378,23 +381,23 @@ impl From for Value { } } -impl From> for Value { +impl From> for OwnedValue { fn from(map: serde_json::Map) -> Self { let mut object = BTreeMap::new(); for (key, value) in map { - object.insert(key, Value::from(value)); + object.insert(key, OwnedValue::from(value)); } - Value::Object(object) + OwnedValue::Object(object) } } /// A wrapper type for iterating over a serde_json array producing reference values. -pub struct ArrayIter<'a>(std::slice::Iter<'a, Value>); +pub struct ArrayIter<'a>(std::slice::Iter<'a, OwnedValue>); impl<'a> Iterator for ArrayIter<'a> { - type Item = ReferenceValue<'a, &'a Value>; + type Item = ReferenceValue<'a, &'a OwnedValue>; fn next(&mut self) -> Option { let value = self.0.next()?; @@ -403,10 +406,10 @@ impl<'a> Iterator for ArrayIter<'a> { } /// A wrapper type for iterating over a serde_json object producing reference values. -pub struct ObjectMapIter<'a>(btree_map::Iter<'a, String, Value>); +pub struct ObjectMapIter<'a>(btree_map::Iter<'a, String, OwnedValue>); impl<'a> Iterator for ObjectMapIter<'a> { - type Item = (&'a str, ReferenceValue<'a, &'a Value>); + type Item = (&'a str, ReferenceValue<'a, &'a OwnedValue>); fn next(&mut self) -> Option { let (key, value) = self.0.next()?; @@ -416,7 +419,7 @@ impl<'a> Iterator for ObjectMapIter<'a> { #[cfg(test)] mod tests { - use super::Value; + use super::OwnedValue; use crate::schema::{BytesOptions, Schema}; use crate::time::format_description::well_known::Rfc3339; use crate::time::OffsetDateTime; @@ -466,12 +469,12 @@ mod tests { #[test] fn test_serialize_date() { - let value = Value::from(DateTime::from_utc( + let value = OwnedValue::from(DateTime::from_utc( OffsetDateTime::parse("1996-12-20T00:39:57+00:00", &Rfc3339).unwrap(), )); let serialized_value_json = serde_json::to_string_pretty(&value).unwrap(); assert_eq!(serialized_value_json, r#""1996-12-20T00:39:57Z""#); - let value = Value::from(DateTime::from_utc( + let value = OwnedValue::from(DateTime::from_utc( OffsetDateTime::parse("1996-12-20T00:39:57-01:00", &Rfc3339).unwrap(), )); let serialized_value_json = serde_json::to_string_pretty(&value).unwrap(); diff --git a/src/snippet/mod.rs b/src/snippet/mod.rs index e913510d9..c5884d6ae 100644 --- a/src/snippet/mod.rs +++ b/src/snippet/mod.rs @@ -5,7 +5,7 @@ use std::ops::Range; use htmlescape::encode_minimal; use crate::query::Query; -use crate::schema::document::{DocValue, Document}; +use crate::schema::document::{Document, Value}; use crate::schema::Field; use crate::tokenizer::{TextAnalyzer, Token}; use crate::{Score, Searcher, Term}; diff --git a/src/store/mod.rs b/src/store/mod.rs index b947a84a0..7fbd8c1e5 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -59,7 +59,7 @@ pub mod tests { use super::*; use crate::directory::{Directory, RamDirectory, WritePtr}; use crate::fastfield::AliveBitSet; - use crate::schema::document::DocValue; + use crate::schema::document::Value; use crate::schema::{ self, Schema, TantivyDocument, TextFieldIndexing, TextOptions, STORED, TEXT, }; diff --git a/src/store/reader.rs b/src/store/reader.rs index d20a4add9..24017ef17 100644 --- a/src/store/reader.rs +++ b/src/store/reader.rs @@ -385,7 +385,7 @@ mod tests { use super::*; use crate::directory::RamDirectory; - use crate::schema::document::DocValue; + use crate::schema::document::Value; use crate::schema::{Field, TantivyDocument}; use crate::store::tests::write_lorem_ipsum_store; use crate::store::Compressor;