allow Value to borrow

This commit is contained in:
trinity-1686a
2022-12-22 15:43:13 +01:00
parent 951a898633
commit 654aa7f42c
8 changed files with 119 additions and 62 deletions

View File

@@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::io::{Read, Write};
use std::{fmt, io};
@@ -210,6 +211,23 @@ impl BinarySerializable for String {
}
}
impl<'a> BinarySerializable for Cow<'a, str> {
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
let data: &[u8] = self.as_bytes();
VInt(data.len() as u64).serialize(writer)?;
writer.write_all(data)
}
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
let string_length = VInt::deserialize(reader)?.val() as usize;
let mut result = String::with_capacity(string_length);
reader
.take(string_length as u64)
.read_to_string(&mut result)?;
Ok(Cow::Owned(result))
}
}
#[cfg(test)]
pub mod test {

View File

@@ -31,7 +31,7 @@ pub struct MoreLikeThisQuery {
#[derive(Debug, PartialEq, Clone)]
enum TargetDocument {
DocumentAdress(DocAddress),
DocumentFields(Vec<(Field, Vec<Value>)>),
DocumentFields(Vec<(Field, Vec<Value<'static>>)>),
}
impl MoreLikeThisQuery {
@@ -160,7 +160,10 @@ impl MoreLikeThisQueryBuilder {
/// that will be used to compose the resulting query.
/// This interface is meant to be used when you want to provide your own set of fields
/// not necessarily from a specific document.
pub fn with_document_fields(self, doc_fields: Vec<(Field, Vec<Value>)>) -> MoreLikeThisQuery {
pub fn with_document_fields(
self,
doc_fields: Vec<(Field, Vec<Value<'static>>)>,
) -> MoreLikeThisQuery {
MoreLikeThisQuery {
mlt: self.mlt,
target: TargetDocument::DocumentFields(doc_fields),

View File

@@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::io::{self, Read, Write};
use std::mem;
@@ -15,12 +16,13 @@ use crate::DateTime;
/// Documents are fundamentally a collection of unordered couples `(field, value)`.
/// In this list, one field may appear more than once.
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize, Default)]
#[serde(bound(deserialize = "'static: 'de, 'de: 'static"))]
pub struct Document {
field_values: Vec<FieldValue>,
field_values: Vec<FieldValue<'static>>,
}
impl From<Vec<FieldValue>> for Document {
fn from(field_values: Vec<FieldValue>) -> Self {
impl From<Vec<FieldValue<'static>>> for Document {
fn from(field_values: Vec<FieldValue<'static>>) -> Self {
Document { field_values }
}
}
@@ -49,9 +51,9 @@ impl PartialEq for Document {
impl Eq for Document {}
impl IntoIterator for Document {
type Item = FieldValue;
type Item = FieldValue<'static>;
type IntoIter = std::vec::IntoIter<FieldValue>;
type IntoIter = std::vec::IntoIter<FieldValue<'static>>;
fn into_iter(self) -> Self::IntoIter {
self.field_values.into_iter()
@@ -84,7 +86,7 @@ impl Document {
/// Add a text field.
pub fn add_text<S: ToString>(&mut self, field: Field, text: S) {
let value = Value::Str(text.to_string());
let value = Value::Str(Cow::Owned(text.to_string()));
self.add_field_value(field, value);
}
@@ -138,7 +140,7 @@ impl Document {
}
/// Add a (field, value) to the document.
pub fn add_field_value<T: Into<Value>>(&mut self, field: Field, typed_val: T) {
pub fn add_field_value<T: Into<Value<'static>>>(&mut self, field: Field, typed_val: T) {
let value = typed_val.into();
let field_value = FieldValue { field, value };
self.field_values.push(field_value);
@@ -216,7 +218,7 @@ impl Document {
} => {
let field_value = FieldValue {
field: *field,
value: Value::Str(pre_tokenized_text.text.to_string()),
value: Value::Str(Cow::Owned(pre_tokenized_text.text.to_string())),
};
field_value.serialize(writer)?;
}

View File

@@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::net::IpAddr;
use std::str::FromStr;
@@ -329,7 +330,7 @@ impl FieldType {
/// Tantivy will not try to cast values.
/// For instance, If the json value is the integer `3` and the
/// target field is a `Str`, this method will return an Error.
pub fn value_from_json(&self, json: JsonValue) -> Result<Value, ValueParsingError> {
pub fn value_from_json(&self, json: JsonValue) -> Result<Value<'static>, ValueParsingError> {
match json {
JsonValue::String(field_text) => {
match self {
@@ -341,7 +342,7 @@ impl FieldType {
})?;
Ok(DateTime::from_utc(dt_with_fixed_tz).into())
}
FieldType::Str(_) => Ok(Value::Str(field_text)),
FieldType::Str(_) => Ok(Value::Str(Cow::Owned(field_text))),
FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) => {
Err(ValueParsingError::TypeError {
expected: "an integer",

View File

@@ -7,12 +7,13 @@ use crate::schema::{Field, Value};
/// `FieldValue` holds together a `Field` and its `Value`.
#[allow(missing_docs)]
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct FieldValue {
#[serde(bound(deserialize = "'a: 'de, 'de: 'a"))]
pub struct FieldValue<'a> {
pub field: Field,
pub value: Value,
pub value: Value<'a>,
}
impl FieldValue {
impl<'a> FieldValue<'a> {
/// Constructor
pub fn new(field: Field, value: Value) -> FieldValue {
FieldValue { field, value }
@@ -29,13 +30,13 @@ impl FieldValue {
}
}
impl From<FieldValue> for Value {
fn from(field_value: FieldValue) -> Self {
impl<'a> From<FieldValue<'a>> for Value<'a> {
fn from(field_value: FieldValue<'a>) -> Self {
field_value.value
}
}
impl BinarySerializable for FieldValue {
impl<'a> BinarySerializable for FieldValue<'a> {
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
self.field.serialize(writer)?;
self.value.serialize(writer)

View File

@@ -10,4 +10,5 @@ use crate::schema::Value;
/// A `NamedFieldDocument` is a simple representation of a document
/// as a `BTreeMap<String, Vec<Value>>`.
#[derive(Debug, Deserialize, Serialize)]
pub struct NamedFieldDocument(pub BTreeMap<String, Vec<Value>>);
#[serde(bound(deserialize = "'static: 'de, 'de: 'static"))]
pub struct NamedFieldDocument(pub BTreeMap<String, Vec<Value<'static>>>);

View File

@@ -308,7 +308,11 @@ impl Schema {
let mut field_map = BTreeMap::new();
for (field, field_values) in doc.get_sorted_field_values() {
let field_name = self.get_field_name(field);
let values: Vec<Value> = field_values.into_iter().cloned().collect();
let values: Vec<Value> = field_values
.into_iter()
.cloned()
.map(Value::into_owned)
.collect();
field_map.insert(field_name.to_string(), values);
}
NamedFieldDocument(field_map)
@@ -338,20 +342,21 @@ impl Schema {
if let Some(field) = self.get_field(&field_name) {
let field_entry = self.get_field_entry(field);
let field_type = field_entry.field_type();
// TODO rewrite this with shared allocation?
match json_value {
JsonValue::Array(json_items) => {
for json_item in json_items {
let value = field_type
.value_from_json(json_item)
.map_err(|e| DocParsingError::ValueError(field_name.clone(), e))?;
doc.add_field_value(field, value);
doc.add_field_value(field, value.into_owned());
}
}
_ => {
let value = field_type
.value_from_json(json_value)
.map_err(|e| DocParsingError::ValueError(field_name.clone(), e))?;
doc.add_field_value(field, value);
doc.add_field_value(field, value.into_owned());
}
}
}

View File

@@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::fmt;
use std::net::Ipv6Addr;
@@ -12,9 +13,9 @@ use crate::DateTime;
/// Value represents the value of a any field.
/// It is an enum over all over all of the possible field type.
#[derive(Debug, Clone, PartialEq)]
pub enum Value {
pub enum Value<'a> {
/// The str type is used for any text information.
Str(String),
Str(Cow<'a, str>),
/// Pre-tokenized str type,
PreTokStr(PreTokenizedString),
/// Unsigned 64-bits Integer `u64`
@@ -30,16 +31,38 @@ pub enum Value {
/// Facet
Facet(Facet),
/// Arbitrarily sized byte array
// TODO allow Cow<'a, [u8]>
Bytes(Vec<u8>),
/// Json object value.
// TODO allow Cow keys and borrowed values
JsonObject(serde_json::Map<String, serde_json::Value>),
/// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`.
IpAddr(Ipv6Addr),
}
impl Eq for Value {}
impl<'a> Value<'a> {
/// Convert a borrowing [`Value`] to an owning one.
pub fn into_owned(self) -> Value<'static> {
use Value::*;
match self {
Str(val) => Str(Cow::Owned(val.into_owned())),
PreTokStr(val) => PreTokStr(val),
U64(val) => U64(val),
I64(val) => I64(val),
F64(val) => F64(val),
Bool(val) => Bool(val),
Date(val) => Date(val),
Facet(val) => Facet(val),
Bytes(val) => Bytes(val),
JsonObject(val) => JsonObject(val),
IpAddr(val) => IpAddr(val),
}
}
}
impl Serialize for Value {
impl<'a> Eq for Value<'a> {}
impl<'a> Serialize for Value<'a> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer {
match *self {
@@ -65,13 +88,13 @@ impl Serialize for Value {
}
}
impl<'de> Deserialize<'de> for Value {
impl<'de> Deserialize<'de> for Value<'de> {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where D: Deserializer<'de> {
struct ValueVisitor;
impl<'de> Visitor<'de> for ValueVisitor {
type Value = Value;
type Value = Value<'de>;
fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_str("a string or u32")
@@ -93,12 +116,13 @@ impl<'de> Deserialize<'de> for Value {
Ok(Value::Bool(v))
}
// TODO add visit_borrowed_str
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> {
Ok(Value::Str(v.to_owned()))
Ok(Value::Str(Cow::Owned(v.to_owned())))
}
fn visit_string<E>(self, v: String) -> Result<Self::Value, E> {
Ok(Value::Str(v))
Ok(Value::Str(Cow::Owned(v)))
}
}
@@ -106,7 +130,7 @@ impl<'de> Deserialize<'de> for Value {
}
}
impl Value {
impl<'a> Value<'a> {
/// Returns the text value, provided the value is of the `Str` type.
/// (Returns `None` if the value is not of the `Str` type).
pub fn as_text(&self) -> Option<&str> {
@@ -224,86 +248,87 @@ impl Value {
}
}
impl From<String> for Value {
fn from(s: String) -> Value {
Value::Str(s)
impl From<String> for Value<'static> {
fn from(s: String) -> Value<'static> {
Value::Str(Cow::Owned(s))
}
}
impl From<Ipv6Addr> for Value {
fn from(v: Ipv6Addr) -> Value {
impl From<Ipv6Addr> for Value<'static> {
fn from(v: Ipv6Addr) -> Value<'static> {
Value::IpAddr(v)
}
}
impl From<u64> for Value {
fn from(v: u64) -> Value {
impl From<u64> for Value<'static> {
fn from(v: u64) -> Value<'static> {
Value::U64(v)
}
}
impl From<i64> for Value {
fn from(v: i64) -> Value {
impl From<i64> for Value<'static> {
fn from(v: i64) -> Value<'static> {
Value::I64(v)
}
}
impl From<f64> for Value {
fn from(v: f64) -> Value {
impl From<f64> for Value<'static> {
fn from(v: f64) -> Value<'static> {
Value::F64(v)
}
}
impl From<bool> for Value {
impl From<bool> for Value<'static> {
fn from(b: bool) -> Self {
Value::Bool(b)
}
}
impl From<DateTime> for Value {
fn from(dt: DateTime) -> Value {
impl From<DateTime> for Value<'static> {
fn from(dt: DateTime) -> Value<'static> {
Value::Date(dt)
}
}
impl<'a> From<&'a str> for Value {
fn from(s: &'a str) -> Value {
Value::Str(s.to_string())
impl<'a> From<&'a str> for Value<'a> {
fn from(s: &'a str) -> Value<'a> {
Value::Str(Cow::Borrowed(s))
}
}
impl<'a> From<&'a [u8]> for Value {
fn from(bytes: &'a [u8]) -> Value {
// TODO change lifetime to 'a
impl<'a> From<&'a [u8]> for Value<'static> {
fn from(bytes: &'a [u8]) -> Value<'static> {
Value::Bytes(bytes.to_vec())
}
}
impl From<Facet> for Value {
fn from(facet: Facet) -> Value {
impl From<Facet> for Value<'static> {
fn from(facet: Facet) -> Value<'static> {
Value::Facet(facet)
}
}
impl From<Vec<u8>> for Value {
fn from(bytes: Vec<u8>) -> Value {
impl From<Vec<u8>> for Value<'static> {
fn from(bytes: Vec<u8>) -> Value<'static> {
Value::Bytes(bytes)
}
}
impl From<PreTokenizedString> for Value {
fn from(pretokenized_string: PreTokenizedString) -> Value {
impl From<PreTokenizedString> for Value<'static> {
fn from(pretokenized_string: PreTokenizedString) -> Value<'static> {
Value::PreTokStr(pretokenized_string)
}
}
impl From<serde_json::Map<String, serde_json::Value>> for Value {
fn from(json_object: serde_json::Map<String, serde_json::Value>) -> Value {
impl From<serde_json::Map<String, serde_json::Value>> for Value<'static> {
fn from(json_object: serde_json::Map<String, serde_json::Value>) -> Value<'static> {
Value::JsonObject(json_object)
}
}
impl From<serde_json::Value> for Value {
fn from(json_value: serde_json::Value) -> Value {
impl From<serde_json::Value> for Value<'static> {
fn from(json_value: serde_json::Value) -> Value<'static> {
match json_value {
serde_json::Value::Object(json_object) => Value::JsonObject(json_object),
_ => {
@@ -314,6 +339,7 @@ impl From<serde_json::Value> for Value {
}
mod binary_serialize {
use std::borrow::Cow;
use std::io::{self, Read, Write};
use std::net::Ipv6Addr;
@@ -341,7 +367,7 @@ mod binary_serialize {
const TOK_STR_CODE: u8 = 0;
impl BinarySerializable for Value {
impl<'a> BinarySerializable for Value<'a> {
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
match *self {
Value::Str(ref text) => {
@@ -408,7 +434,7 @@ mod binary_serialize {
match type_code {
TEXT_CODE => {
let text = String::deserialize(reader)?;
Ok(Value::Str(text))
Ok(Value::Str(Cow::Owned(text)))
}
U64_CODE => {
let value = u64::deserialize(reader)?;