diff --git a/common/src/serialize.rs b/common/src/serialize.rs index 7b96316ee..b6a5ba3a3 100644 --- a/common/src/serialize.rs +++ b/common/src/serialize.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::io::{Read, Write}; use std::{fmt, io}; @@ -210,6 +211,23 @@ impl BinarySerializable for String { } } +impl<'a> BinarySerializable for Cow<'a, str> { + fn serialize(&self, writer: &mut W) -> io::Result<()> { + let data: &[u8] = self.as_bytes(); + VInt(data.len() as u64).serialize(writer)?; + writer.write_all(data) + } + + fn deserialize(reader: &mut R) -> io::Result { + let string_length = VInt::deserialize(reader)?.val() as usize; + let mut result = String::with_capacity(string_length); + reader + .take(string_length as u64) + .read_to_string(&mut result)?; + Ok(Cow::Owned(result)) + } +} + #[cfg(test)] pub mod test { diff --git a/src/query/more_like_this/query.rs b/src/query/more_like_this/query.rs index 6d12c3272..fb5a48a28 100644 --- a/src/query/more_like_this/query.rs +++ b/src/query/more_like_this/query.rs @@ -31,7 +31,7 @@ pub struct MoreLikeThisQuery { #[derive(Debug, PartialEq, Clone)] enum TargetDocument { DocumentAdress(DocAddress), - DocumentFields(Vec<(Field, Vec)>), + DocumentFields(Vec<(Field, Vec>)>), } impl MoreLikeThisQuery { @@ -160,7 +160,10 @@ impl MoreLikeThisQueryBuilder { /// that will be used to compose the resulting query. /// This interface is meant to be used when you want to provide your own set of fields /// not necessarily from a specific document. - pub fn with_document_fields(self, doc_fields: Vec<(Field, Vec)>) -> MoreLikeThisQuery { + pub fn with_document_fields( + self, + doc_fields: Vec<(Field, Vec>)>, + ) -> MoreLikeThisQuery { MoreLikeThisQuery { mlt: self.mlt, target: TargetDocument::DocumentFields(doc_fields), diff --git a/src/schema/document.rs b/src/schema/document.rs index 3c92405c4..14f220d4d 100644 --- a/src/schema/document.rs +++ b/src/schema/document.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::collections::{HashMap, HashSet}; use std::io::{self, Read, Write}; use std::mem; @@ -15,12 +16,13 @@ use crate::DateTime; /// Documents are fundamentally a collection of unordered couples `(field, value)`. /// In this list, one field may appear more than once. #[derive(Clone, Debug, serde::Serialize, serde::Deserialize, Default)] +#[serde(bound(deserialize = "'static: 'de, 'de: 'static"))] pub struct Document { - field_values: Vec, + field_values: Vec>, } -impl From> for Document { - fn from(field_values: Vec) -> Self { +impl From>> for Document { + fn from(field_values: Vec>) -> Self { Document { field_values } } } @@ -49,9 +51,9 @@ impl PartialEq for Document { impl Eq for Document {} impl IntoIterator for Document { - type Item = FieldValue; + type Item = FieldValue<'static>; - type IntoIter = std::vec::IntoIter; + type IntoIter = std::vec::IntoIter>; fn into_iter(self) -> Self::IntoIter { self.field_values.into_iter() @@ -84,7 +86,7 @@ impl Document { /// Add a text field. pub fn add_text(&mut self, field: Field, text: S) { - let value = Value::Str(text.to_string()); + let value = Value::Str(Cow::Owned(text.to_string())); self.add_field_value(field, value); } @@ -138,7 +140,7 @@ impl Document { } /// Add a (field, value) to the document. - pub fn add_field_value>(&mut self, field: Field, typed_val: T) { + pub fn add_field_value>>(&mut self, field: Field, typed_val: T) { let value = typed_val.into(); let field_value = FieldValue { field, value }; self.field_values.push(field_value); @@ -216,7 +218,7 @@ impl Document { } => { let field_value = FieldValue { field: *field, - value: Value::Str(pre_tokenized_text.text.to_string()), + value: Value::Str(Cow::Owned(pre_tokenized_text.text.to_string())), }; field_value.serialize(writer)?; } diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index c2cae2f1e..af537f64c 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::net::IpAddr; use std::str::FromStr; @@ -329,7 +330,7 @@ impl FieldType { /// Tantivy will not try to cast values. /// For instance, If the json value is the integer `3` and the /// target field is a `Str`, this method will return an Error. - pub fn value_from_json(&self, json: JsonValue) -> Result { + pub fn value_from_json(&self, json: JsonValue) -> Result, ValueParsingError> { match json { JsonValue::String(field_text) => { match self { @@ -341,7 +342,7 @@ impl FieldType { })?; Ok(DateTime::from_utc(dt_with_fixed_tz).into()) } - FieldType::Str(_) => Ok(Value::Str(field_text)), + FieldType::Str(_) => Ok(Value::Str(Cow::Owned(field_text))), FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) => { Err(ValueParsingError::TypeError { expected: "an integer", diff --git a/src/schema/field_value.rs b/src/schema/field_value.rs index 5d3199f1b..0d2b76ad1 100644 --- a/src/schema/field_value.rs +++ b/src/schema/field_value.rs @@ -7,12 +7,13 @@ use crate::schema::{Field, Value}; /// `FieldValue` holds together a `Field` and its `Value`. #[allow(missing_docs)] #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] -pub struct FieldValue { +#[serde(bound(deserialize = "'a: 'de, 'de: 'a"))] +pub struct FieldValue<'a> { pub field: Field, - pub value: Value, + pub value: Value<'a>, } -impl FieldValue { +impl<'a> FieldValue<'a> { /// Constructor pub fn new(field: Field, value: Value) -> FieldValue { FieldValue { field, value } @@ -29,13 +30,13 @@ impl FieldValue { } } -impl From for Value { - fn from(field_value: FieldValue) -> Self { +impl<'a> From> for Value<'a> { + fn from(field_value: FieldValue<'a>) -> Self { field_value.value } } -impl BinarySerializable for FieldValue { +impl<'a> BinarySerializable for FieldValue<'a> { fn serialize(&self, writer: &mut W) -> io::Result<()> { self.field.serialize(writer)?; self.value.serialize(writer) diff --git a/src/schema/named_field_document.rs b/src/schema/named_field_document.rs index 9f7d09fae..c022f61eb 100644 --- a/src/schema/named_field_document.rs +++ b/src/schema/named_field_document.rs @@ -10,4 +10,5 @@ use crate::schema::Value; /// A `NamedFieldDocument` is a simple representation of a document /// as a `BTreeMap>`. #[derive(Debug, Deserialize, Serialize)] -pub struct NamedFieldDocument(pub BTreeMap>); +#[serde(bound(deserialize = "'static: 'de, 'de: 'static"))] +pub struct NamedFieldDocument(pub BTreeMap>>); diff --git a/src/schema/schema.rs b/src/schema/schema.rs index 0650a78f2..37722e7c4 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -308,7 +308,11 @@ impl Schema { let mut field_map = BTreeMap::new(); for (field, field_values) in doc.get_sorted_field_values() { let field_name = self.get_field_name(field); - let values: Vec = field_values.into_iter().cloned().collect(); + let values: Vec = field_values + .into_iter() + .cloned() + .map(Value::into_owned) + .collect(); field_map.insert(field_name.to_string(), values); } NamedFieldDocument(field_map) @@ -338,20 +342,21 @@ impl Schema { if let Some(field) = self.get_field(&field_name) { let field_entry = self.get_field_entry(field); let field_type = field_entry.field_type(); + // TODO rewrite this with shared allocation? match json_value { JsonValue::Array(json_items) => { for json_item in json_items { let value = field_type .value_from_json(json_item) .map_err(|e| DocParsingError::ValueError(field_name.clone(), e))?; - doc.add_field_value(field, value); + doc.add_field_value(field, value.into_owned()); } } _ => { let value = field_type .value_from_json(json_value) .map_err(|e| DocParsingError::ValueError(field_name.clone(), e))?; - doc.add_field_value(field, value); + doc.add_field_value(field, value.into_owned()); } } } diff --git a/src/schema/value.rs b/src/schema/value.rs index d3df1c46c..0766833fe 100644 --- a/src/schema/value.rs +++ b/src/schema/value.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::fmt; use std::net::Ipv6Addr; @@ -12,9 +13,9 @@ use crate::DateTime; /// Value represents the value of a any field. /// It is an enum over all over all of the possible field type. #[derive(Debug, Clone, PartialEq)] -pub enum Value { +pub enum Value<'a> { /// The str type is used for any text information. - Str(String), + Str(Cow<'a, str>), /// Pre-tokenized str type, PreTokStr(PreTokenizedString), /// Unsigned 64-bits Integer `u64` @@ -30,16 +31,38 @@ pub enum Value { /// Facet Facet(Facet), /// Arbitrarily sized byte array + // TODO allow Cow<'a, [u8]> Bytes(Vec), /// Json object value. + // TODO allow Cow keys and borrowed values JsonObject(serde_json::Map), /// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`. IpAddr(Ipv6Addr), } -impl Eq for Value {} +impl<'a> Value<'a> { + /// Convert a borrowing [`Value`] to an owning one. + pub fn into_owned(self) -> Value<'static> { + use Value::*; + match self { + Str(val) => Str(Cow::Owned(val.into_owned())), + PreTokStr(val) => PreTokStr(val), + U64(val) => U64(val), + I64(val) => I64(val), + F64(val) => F64(val), + Bool(val) => Bool(val), + Date(val) => Date(val), + Facet(val) => Facet(val), + Bytes(val) => Bytes(val), + JsonObject(val) => JsonObject(val), + IpAddr(val) => IpAddr(val), + } + } +} -impl Serialize for Value { +impl<'a> Eq for Value<'a> {} + +impl<'a> Serialize for Value<'a> { fn serialize(&self, serializer: S) -> Result where S: Serializer { match *self { @@ -65,13 +88,13 @@ impl Serialize for Value { } } -impl<'de> Deserialize<'de> for Value { +impl<'de> Deserialize<'de> for Value<'de> { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de> { struct ValueVisitor; impl<'de> Visitor<'de> for ValueVisitor { - type Value = Value; + type Value = Value<'de>; fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { formatter.write_str("a string or u32") @@ -93,12 +116,13 @@ impl<'de> Deserialize<'de> for Value { Ok(Value::Bool(v)) } + // TODO add visit_borrowed_str fn visit_str(self, v: &str) -> Result { - Ok(Value::Str(v.to_owned())) + Ok(Value::Str(Cow::Owned(v.to_owned()))) } fn visit_string(self, v: String) -> Result { - Ok(Value::Str(v)) + Ok(Value::Str(Cow::Owned(v))) } } @@ -106,7 +130,7 @@ impl<'de> Deserialize<'de> for Value { } } -impl Value { +impl<'a> Value<'a> { /// Returns the text value, provided the value is of the `Str` type. /// (Returns `None` if the value is not of the `Str` type). pub fn as_text(&self) -> Option<&str> { @@ -224,86 +248,87 @@ impl Value { } } -impl From for Value { - fn from(s: String) -> Value { - Value::Str(s) +impl From for Value<'static> { + fn from(s: String) -> Value<'static> { + Value::Str(Cow::Owned(s)) } } -impl From for Value { - fn from(v: Ipv6Addr) -> Value { +impl From for Value<'static> { + fn from(v: Ipv6Addr) -> Value<'static> { Value::IpAddr(v) } } -impl From for Value { - fn from(v: u64) -> Value { +impl From for Value<'static> { + fn from(v: u64) -> Value<'static> { Value::U64(v) } } -impl From for Value { - fn from(v: i64) -> Value { +impl From for Value<'static> { + fn from(v: i64) -> Value<'static> { Value::I64(v) } } -impl From for Value { - fn from(v: f64) -> Value { +impl From for Value<'static> { + fn from(v: f64) -> Value<'static> { Value::F64(v) } } -impl From for Value { +impl From for Value<'static> { fn from(b: bool) -> Self { Value::Bool(b) } } -impl From for Value { - fn from(dt: DateTime) -> Value { +impl From for Value<'static> { + fn from(dt: DateTime) -> Value<'static> { Value::Date(dt) } } -impl<'a> From<&'a str> for Value { - fn from(s: &'a str) -> Value { - Value::Str(s.to_string()) +impl<'a> From<&'a str> for Value<'a> { + fn from(s: &'a str) -> Value<'a> { + Value::Str(Cow::Borrowed(s)) } } -impl<'a> From<&'a [u8]> for Value { - fn from(bytes: &'a [u8]) -> Value { +// TODO change lifetime to 'a +impl<'a> From<&'a [u8]> for Value<'static> { + fn from(bytes: &'a [u8]) -> Value<'static> { Value::Bytes(bytes.to_vec()) } } -impl From for Value { - fn from(facet: Facet) -> Value { +impl From for Value<'static> { + fn from(facet: Facet) -> Value<'static> { Value::Facet(facet) } } -impl From> for Value { - fn from(bytes: Vec) -> Value { +impl From> for Value<'static> { + fn from(bytes: Vec) -> Value<'static> { Value::Bytes(bytes) } } -impl From for Value { - fn from(pretokenized_string: PreTokenizedString) -> Value { +impl From for Value<'static> { + fn from(pretokenized_string: PreTokenizedString) -> Value<'static> { Value::PreTokStr(pretokenized_string) } } -impl From> for Value { - fn from(json_object: serde_json::Map) -> Value { +impl From> for Value<'static> { + fn from(json_object: serde_json::Map) -> Value<'static> { Value::JsonObject(json_object) } } -impl From for Value { - fn from(json_value: serde_json::Value) -> Value { +impl From for Value<'static> { + fn from(json_value: serde_json::Value) -> Value<'static> { match json_value { serde_json::Value::Object(json_object) => Value::JsonObject(json_object), _ => { @@ -314,6 +339,7 @@ impl From for Value { } mod binary_serialize { + use std::borrow::Cow; use std::io::{self, Read, Write}; use std::net::Ipv6Addr; @@ -341,7 +367,7 @@ mod binary_serialize { const TOK_STR_CODE: u8 = 0; - impl BinarySerializable for Value { + impl<'a> BinarySerializable for Value<'a> { fn serialize(&self, writer: &mut W) -> io::Result<()> { match *self { Value::Str(ref text) => { @@ -408,7 +434,7 @@ mod binary_serialize { match type_code { TEXT_CODE => { let text = String::deserialize(reader)?; - Ok(Value::Str(text)) + Ok(Value::Str(Cow::Owned(text))) } U64_CODE => { let value = u64::deserialize(reader)?;