mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-04 00:02:55 +00:00
allow Value to borrow
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
use std::borrow::Cow;
|
||||
use std::io::{Read, Write};
|
||||
use std::{fmt, io};
|
||||
|
||||
@@ -210,6 +211,23 @@ impl BinarySerializable for String {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BinarySerializable for Cow<'a, str> {
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
let data: &[u8] = self.as_bytes();
|
||||
VInt(data.len() as u64).serialize(writer)?;
|
||||
writer.write_all(data)
|
||||
}
|
||||
|
||||
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
|
||||
let string_length = VInt::deserialize(reader)?.val() as usize;
|
||||
let mut result = String::with_capacity(string_length);
|
||||
reader
|
||||
.take(string_length as u64)
|
||||
.read_to_string(&mut result)?;
|
||||
Ok(Cow::Owned(result))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ pub struct MoreLikeThisQuery {
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
enum TargetDocument {
|
||||
DocumentAdress(DocAddress),
|
||||
DocumentFields(Vec<(Field, Vec<Value>)>),
|
||||
DocumentFields(Vec<(Field, Vec<Value<'static>>)>),
|
||||
}
|
||||
|
||||
impl MoreLikeThisQuery {
|
||||
@@ -160,7 +160,10 @@ impl MoreLikeThisQueryBuilder {
|
||||
/// that will be used to compose the resulting query.
|
||||
/// This interface is meant to be used when you want to provide your own set of fields
|
||||
/// not necessarily from a specific document.
|
||||
pub fn with_document_fields(self, doc_fields: Vec<(Field, Vec<Value>)>) -> MoreLikeThisQuery {
|
||||
pub fn with_document_fields(
|
||||
self,
|
||||
doc_fields: Vec<(Field, Vec<Value<'static>>)>,
|
||||
) -> MoreLikeThisQuery {
|
||||
MoreLikeThisQuery {
|
||||
mlt: self.mlt,
|
||||
target: TargetDocument::DocumentFields(doc_fields),
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::io::{self, Read, Write};
|
||||
use std::mem;
|
||||
@@ -15,12 +16,13 @@ use crate::DateTime;
|
||||
/// Documents are fundamentally a collection of unordered couples `(field, value)`.
|
||||
/// In this list, one field may appear more than once.
|
||||
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize, Default)]
|
||||
#[serde(bound(deserialize = "'static: 'de, 'de: 'static"))]
|
||||
pub struct Document {
|
||||
field_values: Vec<FieldValue>,
|
||||
field_values: Vec<FieldValue<'static>>,
|
||||
}
|
||||
|
||||
impl From<Vec<FieldValue>> for Document {
|
||||
fn from(field_values: Vec<FieldValue>) -> Self {
|
||||
impl From<Vec<FieldValue<'static>>> for Document {
|
||||
fn from(field_values: Vec<FieldValue<'static>>) -> Self {
|
||||
Document { field_values }
|
||||
}
|
||||
}
|
||||
@@ -49,9 +51,9 @@ impl PartialEq for Document {
|
||||
impl Eq for Document {}
|
||||
|
||||
impl IntoIterator for Document {
|
||||
type Item = FieldValue;
|
||||
type Item = FieldValue<'static>;
|
||||
|
||||
type IntoIter = std::vec::IntoIter<FieldValue>;
|
||||
type IntoIter = std::vec::IntoIter<FieldValue<'static>>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.field_values.into_iter()
|
||||
@@ -84,7 +86,7 @@ impl Document {
|
||||
|
||||
/// Add a text field.
|
||||
pub fn add_text<S: ToString>(&mut self, field: Field, text: S) {
|
||||
let value = Value::Str(text.to_string());
|
||||
let value = Value::Str(Cow::Owned(text.to_string()));
|
||||
self.add_field_value(field, value);
|
||||
}
|
||||
|
||||
@@ -138,7 +140,7 @@ impl Document {
|
||||
}
|
||||
|
||||
/// Add a (field, value) to the document.
|
||||
pub fn add_field_value<T: Into<Value>>(&mut self, field: Field, typed_val: T) {
|
||||
pub fn add_field_value<T: Into<Value<'static>>>(&mut self, field: Field, typed_val: T) {
|
||||
let value = typed_val.into();
|
||||
let field_value = FieldValue { field, value };
|
||||
self.field_values.push(field_value);
|
||||
@@ -216,7 +218,7 @@ impl Document {
|
||||
} => {
|
||||
let field_value = FieldValue {
|
||||
field: *field,
|
||||
value: Value::Str(pre_tokenized_text.text.to_string()),
|
||||
value: Value::Str(Cow::Owned(pre_tokenized_text.text.to_string())),
|
||||
};
|
||||
field_value.serialize(writer)?;
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::borrow::Cow;
|
||||
use std::net::IpAddr;
|
||||
use std::str::FromStr;
|
||||
|
||||
@@ -329,7 +330,7 @@ impl FieldType {
|
||||
/// Tantivy will not try to cast values.
|
||||
/// For instance, If the json value is the integer `3` and the
|
||||
/// target field is a `Str`, this method will return an Error.
|
||||
pub fn value_from_json(&self, json: JsonValue) -> Result<Value, ValueParsingError> {
|
||||
pub fn value_from_json(&self, json: JsonValue) -> Result<Value<'static>, ValueParsingError> {
|
||||
match json {
|
||||
JsonValue::String(field_text) => {
|
||||
match self {
|
||||
@@ -341,7 +342,7 @@ impl FieldType {
|
||||
})?;
|
||||
Ok(DateTime::from_utc(dt_with_fixed_tz).into())
|
||||
}
|
||||
FieldType::Str(_) => Ok(Value::Str(field_text)),
|
||||
FieldType::Str(_) => Ok(Value::Str(Cow::Owned(field_text))),
|
||||
FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) => {
|
||||
Err(ValueParsingError::TypeError {
|
||||
expected: "an integer",
|
||||
|
||||
@@ -7,12 +7,13 @@ use crate::schema::{Field, Value};
|
||||
/// `FieldValue` holds together a `Field` and its `Value`.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct FieldValue {
|
||||
#[serde(bound(deserialize = "'a: 'de, 'de: 'a"))]
|
||||
pub struct FieldValue<'a> {
|
||||
pub field: Field,
|
||||
pub value: Value,
|
||||
pub value: Value<'a>,
|
||||
}
|
||||
|
||||
impl FieldValue {
|
||||
impl<'a> FieldValue<'a> {
|
||||
/// Constructor
|
||||
pub fn new(field: Field, value: Value) -> FieldValue {
|
||||
FieldValue { field, value }
|
||||
@@ -29,13 +30,13 @@ impl FieldValue {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<FieldValue> for Value {
|
||||
fn from(field_value: FieldValue) -> Self {
|
||||
impl<'a> From<FieldValue<'a>> for Value<'a> {
|
||||
fn from(field_value: FieldValue<'a>) -> Self {
|
||||
field_value.value
|
||||
}
|
||||
}
|
||||
|
||||
impl BinarySerializable for FieldValue {
|
||||
impl<'a> BinarySerializable for FieldValue<'a> {
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
self.field.serialize(writer)?;
|
||||
self.value.serialize(writer)
|
||||
|
||||
@@ -10,4 +10,5 @@ use crate::schema::Value;
|
||||
/// A `NamedFieldDocument` is a simple representation of a document
|
||||
/// as a `BTreeMap<String, Vec<Value>>`.
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct NamedFieldDocument(pub BTreeMap<String, Vec<Value>>);
|
||||
#[serde(bound(deserialize = "'static: 'de, 'de: 'static"))]
|
||||
pub struct NamedFieldDocument(pub BTreeMap<String, Vec<Value<'static>>>);
|
||||
|
||||
@@ -308,7 +308,11 @@ impl Schema {
|
||||
let mut field_map = BTreeMap::new();
|
||||
for (field, field_values) in doc.get_sorted_field_values() {
|
||||
let field_name = self.get_field_name(field);
|
||||
let values: Vec<Value> = field_values.into_iter().cloned().collect();
|
||||
let values: Vec<Value> = field_values
|
||||
.into_iter()
|
||||
.cloned()
|
||||
.map(Value::into_owned)
|
||||
.collect();
|
||||
field_map.insert(field_name.to_string(), values);
|
||||
}
|
||||
NamedFieldDocument(field_map)
|
||||
@@ -338,20 +342,21 @@ impl Schema {
|
||||
if let Some(field) = self.get_field(&field_name) {
|
||||
let field_entry = self.get_field_entry(field);
|
||||
let field_type = field_entry.field_type();
|
||||
// TODO rewrite this with shared allocation?
|
||||
match json_value {
|
||||
JsonValue::Array(json_items) => {
|
||||
for json_item in json_items {
|
||||
let value = field_type
|
||||
.value_from_json(json_item)
|
||||
.map_err(|e| DocParsingError::ValueError(field_name.clone(), e))?;
|
||||
doc.add_field_value(field, value);
|
||||
doc.add_field_value(field, value.into_owned());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let value = field_type
|
||||
.value_from_json(json_value)
|
||||
.map_err(|e| DocParsingError::ValueError(field_name.clone(), e))?;
|
||||
doc.add_field_value(field, value);
|
||||
doc.add_field_value(field, value.into_owned());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::borrow::Cow;
|
||||
use std::fmt;
|
||||
use std::net::Ipv6Addr;
|
||||
|
||||
@@ -12,9 +13,9 @@ use crate::DateTime;
|
||||
/// Value represents the value of a any field.
|
||||
/// It is an enum over all over all of the possible field type.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Value {
|
||||
pub enum Value<'a> {
|
||||
/// The str type is used for any text information.
|
||||
Str(String),
|
||||
Str(Cow<'a, str>),
|
||||
/// Pre-tokenized str type,
|
||||
PreTokStr(PreTokenizedString),
|
||||
/// Unsigned 64-bits Integer `u64`
|
||||
@@ -30,16 +31,38 @@ pub enum Value {
|
||||
/// Facet
|
||||
Facet(Facet),
|
||||
/// Arbitrarily sized byte array
|
||||
// TODO allow Cow<'a, [u8]>
|
||||
Bytes(Vec<u8>),
|
||||
/// Json object value.
|
||||
// TODO allow Cow keys and borrowed values
|
||||
JsonObject(serde_json::Map<String, serde_json::Value>),
|
||||
/// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`.
|
||||
IpAddr(Ipv6Addr),
|
||||
}
|
||||
|
||||
impl Eq for Value {}
|
||||
impl<'a> Value<'a> {
|
||||
/// Convert a borrowing [`Value`] to an owning one.
|
||||
pub fn into_owned(self) -> Value<'static> {
|
||||
use Value::*;
|
||||
match self {
|
||||
Str(val) => Str(Cow::Owned(val.into_owned())),
|
||||
PreTokStr(val) => PreTokStr(val),
|
||||
U64(val) => U64(val),
|
||||
I64(val) => I64(val),
|
||||
F64(val) => F64(val),
|
||||
Bool(val) => Bool(val),
|
||||
Date(val) => Date(val),
|
||||
Facet(val) => Facet(val),
|
||||
Bytes(val) => Bytes(val),
|
||||
JsonObject(val) => JsonObject(val),
|
||||
IpAddr(val) => IpAddr(val),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Value {
|
||||
impl<'a> Eq for Value<'a> {}
|
||||
|
||||
impl<'a> Serialize for Value<'a> {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where S: Serializer {
|
||||
match *self {
|
||||
@@ -65,13 +88,13 @@ impl Serialize for Value {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for Value {
|
||||
impl<'de> Deserialize<'de> for Value<'de> {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where D: Deserializer<'de> {
|
||||
struct ValueVisitor;
|
||||
|
||||
impl<'de> Visitor<'de> for ValueVisitor {
|
||||
type Value = Value;
|
||||
type Value = Value<'de>;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
formatter.write_str("a string or u32")
|
||||
@@ -93,12 +116,13 @@ impl<'de> Deserialize<'de> for Value {
|
||||
Ok(Value::Bool(v))
|
||||
}
|
||||
|
||||
// TODO add visit_borrowed_str
|
||||
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> {
|
||||
Ok(Value::Str(v.to_owned()))
|
||||
Ok(Value::Str(Cow::Owned(v.to_owned())))
|
||||
}
|
||||
|
||||
fn visit_string<E>(self, v: String) -> Result<Self::Value, E> {
|
||||
Ok(Value::Str(v))
|
||||
Ok(Value::Str(Cow::Owned(v)))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -106,7 +130,7 @@ impl<'de> Deserialize<'de> for Value {
|
||||
}
|
||||
}
|
||||
|
||||
impl Value {
|
||||
impl<'a> Value<'a> {
|
||||
/// Returns the text value, provided the value is of the `Str` type.
|
||||
/// (Returns `None` if the value is not of the `Str` type).
|
||||
pub fn as_text(&self) -> Option<&str> {
|
||||
@@ -224,86 +248,87 @@ impl Value {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for Value {
|
||||
fn from(s: String) -> Value {
|
||||
Value::Str(s)
|
||||
impl From<String> for Value<'static> {
|
||||
fn from(s: String) -> Value<'static> {
|
||||
Value::Str(Cow::Owned(s))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Ipv6Addr> for Value {
|
||||
fn from(v: Ipv6Addr) -> Value {
|
||||
impl From<Ipv6Addr> for Value<'static> {
|
||||
fn from(v: Ipv6Addr) -> Value<'static> {
|
||||
Value::IpAddr(v)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<u64> for Value {
|
||||
fn from(v: u64) -> Value {
|
||||
impl From<u64> for Value<'static> {
|
||||
fn from(v: u64) -> Value<'static> {
|
||||
Value::U64(v)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<i64> for Value {
|
||||
fn from(v: i64) -> Value {
|
||||
impl From<i64> for Value<'static> {
|
||||
fn from(v: i64) -> Value<'static> {
|
||||
Value::I64(v)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<f64> for Value {
|
||||
fn from(v: f64) -> Value {
|
||||
impl From<f64> for Value<'static> {
|
||||
fn from(v: f64) -> Value<'static> {
|
||||
Value::F64(v)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<bool> for Value {
|
||||
impl From<bool> for Value<'static> {
|
||||
fn from(b: bool) -> Self {
|
||||
Value::Bool(b)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DateTime> for Value {
|
||||
fn from(dt: DateTime) -> Value {
|
||||
impl From<DateTime> for Value<'static> {
|
||||
fn from(dt: DateTime) -> Value<'static> {
|
||||
Value::Date(dt)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a str> for Value {
|
||||
fn from(s: &'a str) -> Value {
|
||||
Value::Str(s.to_string())
|
||||
impl<'a> From<&'a str> for Value<'a> {
|
||||
fn from(s: &'a str) -> Value<'a> {
|
||||
Value::Str(Cow::Borrowed(s))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a [u8]> for Value {
|
||||
fn from(bytes: &'a [u8]) -> Value {
|
||||
// TODO change lifetime to 'a
|
||||
impl<'a> From<&'a [u8]> for Value<'static> {
|
||||
fn from(bytes: &'a [u8]) -> Value<'static> {
|
||||
Value::Bytes(bytes.to_vec())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Facet> for Value {
|
||||
fn from(facet: Facet) -> Value {
|
||||
impl From<Facet> for Value<'static> {
|
||||
fn from(facet: Facet) -> Value<'static> {
|
||||
Value::Facet(facet)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<u8>> for Value {
|
||||
fn from(bytes: Vec<u8>) -> Value {
|
||||
impl From<Vec<u8>> for Value<'static> {
|
||||
fn from(bytes: Vec<u8>) -> Value<'static> {
|
||||
Value::Bytes(bytes)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PreTokenizedString> for Value {
|
||||
fn from(pretokenized_string: PreTokenizedString) -> Value {
|
||||
impl From<PreTokenizedString> for Value<'static> {
|
||||
fn from(pretokenized_string: PreTokenizedString) -> Value<'static> {
|
||||
Value::PreTokStr(pretokenized_string)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::Map<String, serde_json::Value>> for Value {
|
||||
fn from(json_object: serde_json::Map<String, serde_json::Value>) -> Value {
|
||||
impl From<serde_json::Map<String, serde_json::Value>> for Value<'static> {
|
||||
fn from(json_object: serde_json::Map<String, serde_json::Value>) -> Value<'static> {
|
||||
Value::JsonObject(json_object)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::Value> for Value {
|
||||
fn from(json_value: serde_json::Value) -> Value {
|
||||
impl From<serde_json::Value> for Value<'static> {
|
||||
fn from(json_value: serde_json::Value) -> Value<'static> {
|
||||
match json_value {
|
||||
serde_json::Value::Object(json_object) => Value::JsonObject(json_object),
|
||||
_ => {
|
||||
@@ -314,6 +339,7 @@ impl From<serde_json::Value> for Value {
|
||||
}
|
||||
|
||||
mod binary_serialize {
|
||||
use std::borrow::Cow;
|
||||
use std::io::{self, Read, Write};
|
||||
use std::net::Ipv6Addr;
|
||||
|
||||
@@ -341,7 +367,7 @@ mod binary_serialize {
|
||||
|
||||
const TOK_STR_CODE: u8 = 0;
|
||||
|
||||
impl BinarySerializable for Value {
|
||||
impl<'a> BinarySerializable for Value<'a> {
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
match *self {
|
||||
Value::Str(ref text) => {
|
||||
@@ -408,7 +434,7 @@ mod binary_serialize {
|
||||
match type_code {
|
||||
TEXT_CODE => {
|
||||
let text = String::deserialize(reader)?;
|
||||
Ok(Value::Str(text))
|
||||
Ok(Value::Str(Cow::Owned(text)))
|
||||
}
|
||||
U64_CODE => {
|
||||
let value = u64::deserialize(reader)?;
|
||||
|
||||
Reference in New Issue
Block a user