mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-04 16:22:55 +00:00
Compare commits
6 Commits
missing-sp
...
trinity/yo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bcff3eb2d2 | ||
|
|
85f2588875 | ||
|
|
db6cf65d53 | ||
|
|
654aa7f42c | ||
|
|
951a898633 | ||
|
|
003722d831 |
@@ -61,6 +61,7 @@ measure_time = "0.8.2"
|
|||||||
ciborium = { version = "0.2", optional = true}
|
ciborium = { version = "0.2", optional = true}
|
||||||
async-trait = "0.1.53"
|
async-trait = "0.1.53"
|
||||||
arc-swap = "1.5.0"
|
arc-swap = "1.5.0"
|
||||||
|
yoke = { version = "0.6.2", features = ["derive"] }
|
||||||
|
|
||||||
[target.'cfg(windows)'.dependencies]
|
[target.'cfg(windows)'.dependencies]
|
||||||
winapi = "0.3.9"
|
winapi = "0.3.9"
|
||||||
|
|||||||
100000
benches/hdfs_with_array.json
Normal file
100000
benches/hdfs_with_array.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,116 +1,159 @@
|
|||||||
use criterion::{criterion_group, criterion_main, Criterion};
|
use criterion::{criterion_group, criterion_main, Criterion};
|
||||||
|
use itertools::Itertools;
|
||||||
use pprof::criterion::{Output, PProfProfiler};
|
use pprof::criterion::{Output, PProfProfiler};
|
||||||
use tantivy::schema::{INDEXED, STORED, STRING, TEXT};
|
use serde_json::{self, Value as JsonValue};
|
||||||
use tantivy::Index;
|
use tantivy::directory::RamDirectory;
|
||||||
|
use tantivy::schema::{
|
||||||
|
FieldValue, TextFieldIndexing, TextOptions, Value, INDEXED, STORED, STRING, TEXT,
|
||||||
|
};
|
||||||
|
use tantivy::{Document, Index, IndexBuilder};
|
||||||
|
|
||||||
const HDFS_LOGS: &str = include_str!("hdfs.json");
|
const HDFS_LOGS: &str = include_str!("hdfs.json");
|
||||||
const NUM_REPEATS: usize = 2;
|
const NUM_REPEATS: usize = 20;
|
||||||
|
|
||||||
pub fn hdfs_index_benchmark(c: &mut Criterion) {
|
pub fn hdfs_index_benchmark(c: &mut Criterion) {
|
||||||
let schema = {
|
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
|
||||||
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
|
let text_indexing_options = TextFieldIndexing::default()
|
||||||
schema_builder.add_u64_field("timestamp", INDEXED);
|
.set_tokenizer("default")
|
||||||
schema_builder.add_text_field("body", TEXT);
|
.set_fieldnorms(false)
|
||||||
schema_builder.add_text_field("severity", STRING);
|
.set_index_option(tantivy::schema::IndexRecordOption::WithFreqsAndPositions);
|
||||||
schema_builder.build()
|
let mut text_options = TextOptions::default().set_indexing_options(text_indexing_options);
|
||||||
};
|
let text_field = schema_builder.add_text_field("body", text_options);
|
||||||
let schema_with_store = {
|
let schema = schema_builder.build();
|
||||||
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
|
|
||||||
schema_builder.add_u64_field("timestamp", INDEXED | STORED);
|
// prepare doc
|
||||||
schema_builder.add_text_field("body", TEXT | STORED);
|
let mut documents_no_array = Vec::new();
|
||||||
schema_builder.add_text_field("severity", STRING | STORED);
|
let mut documents_with_array = Vec::new();
|
||||||
schema_builder.build()
|
for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||||
};
|
let json_obj: serde_json::Map<String, JsonValue> = serde_json::from_str(doc_json).unwrap();
|
||||||
let dynamic_schema = {
|
let text = json_obj.get("body").unwrap().as_str().unwrap();
|
||||||
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
|
let mut doc_no_array = Document::new();
|
||||||
schema_builder.add_json_field("json", TEXT);
|
doc_no_array.add_text(text_field, text);
|
||||||
schema_builder.build()
|
documents_no_array.push(doc_no_array);
|
||||||
};
|
let mut doc_with_array = Document::new();
|
||||||
|
doc_with_array.add_borrowed_values(text.to_owned(), |text| {
|
||||||
|
text.split(' ')
|
||||||
|
.map(|text| FieldValue::new(text_field, text.into()))
|
||||||
|
.collect()
|
||||||
|
});
|
||||||
|
documents_with_array.push(doc_with_array);
|
||||||
|
}
|
||||||
|
|
||||||
let mut group = c.benchmark_group("index-hdfs");
|
let mut group = c.benchmark_group("index-hdfs");
|
||||||
group.sample_size(20);
|
group.sample_size(20);
|
||||||
group.bench_function("index-hdfs-no-commit", |b| {
|
group.bench_function("index-hdfs-no-commit", |b| {
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
let index = Index::create_in_ram(schema.clone());
|
let ram_directory = RamDirectory::create();
|
||||||
let index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
let mut index_writer = IndexBuilder::new()
|
||||||
|
.schema(schema.clone())
|
||||||
|
.single_segment_index_writer(ram_directory, 100_000_000)
|
||||||
|
.unwrap();
|
||||||
for _ in 0..NUM_REPEATS {
|
for _ in 0..NUM_REPEATS {
|
||||||
for doc_json in HDFS_LOGS.trim().split("\n") {
|
let documents_cloned = documents_no_array.clone();
|
||||||
let doc = schema.parse_document(doc_json).unwrap();
|
for doc in documents_cloned {
|
||||||
index_writer.add_document(doc).unwrap();
|
index_writer.add_document(doc).unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
group.bench_function("index-hdfs-with-commit", |b| {
|
group.bench_function("index-hdfs-with-array-no-commit", |b| {
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
let index = Index::create_in_ram(schema.clone());
|
let ram_directory = RamDirectory::create();
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
let mut index_writer = IndexBuilder::new()
|
||||||
|
.schema(schema.clone())
|
||||||
|
.single_segment_index_writer(ram_directory, 100_000_000)
|
||||||
|
.unwrap();
|
||||||
for _ in 0..NUM_REPEATS {
|
for _ in 0..NUM_REPEATS {
|
||||||
for doc_json in HDFS_LOGS.trim().split("\n") {
|
let documents_with_array_cloned = documents_with_array.clone();
|
||||||
let doc = schema.parse_document(doc_json).unwrap();
|
for doc in documents_with_array_cloned {
|
||||||
index_writer.add_document(doc).unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
index_writer.commit().unwrap();
|
|
||||||
})
|
|
||||||
});
|
|
||||||
group.bench_function("index-hdfs-no-commit-with-docstore", |b| {
|
|
||||||
b.iter(|| {
|
|
||||||
let index = Index::create_in_ram(schema_with_store.clone());
|
|
||||||
let index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
|
||||||
for _ in 0..NUM_REPEATS {
|
|
||||||
for doc_json in HDFS_LOGS.trim().split("\n") {
|
|
||||||
let doc = schema.parse_document(doc_json).unwrap();
|
|
||||||
index_writer.add_document(doc).unwrap();
|
index_writer.add_document(doc).unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
group.bench_function("index-hdfs-with-commit-with-docstore", |b| {
|
// group.bench_function("index-hdfs-with-commit", |b| {
|
||||||
b.iter(|| {
|
// b.iter(|| {
|
||||||
let index = Index::create_in_ram(schema_with_store.clone());
|
// let ram_directory = RamDirectory::create();
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
// let mut index_writer = IndexBuilder::new()
|
||||||
for _ in 0..NUM_REPEATS {
|
// .schema(schema.clone())
|
||||||
for doc_json in HDFS_LOGS.trim().split("\n") {
|
// .single_segment_index_writer(ram_directory, 100_000_000)
|
||||||
let doc = schema.parse_document(doc_json).unwrap();
|
// .unwrap();
|
||||||
index_writer.add_document(doc).unwrap();
|
// for _ in 0..NUM_REPEATS {
|
||||||
}
|
// for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||||
}
|
// let doc = schema.parse_document(doc_json).unwrap();
|
||||||
index_writer.commit().unwrap();
|
// index_writer.add_document(doc).unwrap();
|
||||||
})
|
// }
|
||||||
});
|
// }
|
||||||
group.bench_function("index-hdfs-no-commit-json-without-docstore", |b| {
|
// index_writer.commit().unwrap();
|
||||||
b.iter(|| {
|
// })
|
||||||
let index = Index::create_in_ram(dynamic_schema.clone());
|
// });
|
||||||
let json_field = dynamic_schema.get_field("json").unwrap();
|
// group.bench_function("index-hdfs-no-commit-with-docstore", |b| {
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
// b.iter(|| {
|
||||||
for _ in 0..NUM_REPEATS {
|
// let ram_directory = RamDirectory::create();
|
||||||
for doc_json in HDFS_LOGS.trim().split("\n") {
|
// let mut index_writer = IndexBuilder::new()
|
||||||
let json_val: serde_json::Map<String, serde_json::Value> =
|
// .schema(schema.clone())
|
||||||
serde_json::from_str(doc_json).unwrap();
|
// .single_segment_index_writer(ram_directory, 100_000_000)
|
||||||
let doc = tantivy::doc!(json_field=>json_val);
|
// .unwrap();
|
||||||
index_writer.add_document(doc).unwrap();
|
// for _ in 0..NUM_REPEATS {
|
||||||
}
|
// for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||||
}
|
// let doc = schema.parse_document(doc_json).unwrap();
|
||||||
index_writer.commit().unwrap();
|
// index_writer.add_document(doc).unwrap();
|
||||||
})
|
// }
|
||||||
});
|
// }
|
||||||
group.bench_function("index-hdfs-with-commit-json-without-docstore", |b| {
|
// })
|
||||||
b.iter(|| {
|
// });
|
||||||
let index = Index::create_in_ram(dynamic_schema.clone());
|
// group.bench_function("index-hdfs-with-commit-with-docstore", |b| {
|
||||||
let json_field = dynamic_schema.get_field("json").unwrap();
|
// b.iter(|| {
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
// let ram_directory = RamDirectory::create();
|
||||||
for _ in 0..NUM_REPEATS {
|
// let mut index_writer = IndexBuilder::new()
|
||||||
for doc_json in HDFS_LOGS.trim().split("\n") {
|
// .schema(schema.clone())
|
||||||
let json_val: serde_json::Map<String, serde_json::Value> =
|
// .single_segment_index_writer(ram_directory, 100_000_000)
|
||||||
serde_json::from_str(doc_json).unwrap();
|
// .unwrap();
|
||||||
let doc = tantivy::doc!(json_field=>json_val);
|
// for _ in 0..NUM_REPEATS {
|
||||||
index_writer.add_document(doc).unwrap();
|
// for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||||
}
|
// let doc = schema.parse_document(doc_json).unwrap();
|
||||||
}
|
// index_writer.add_document(doc).unwrap();
|
||||||
index_writer.commit().unwrap();
|
// }
|
||||||
})
|
// }
|
||||||
});
|
// index_writer.commit().unwrap();
|
||||||
|
// })
|
||||||
|
// });
|
||||||
|
// group.bench_function("index-hdfs-no-commit-json-without-docstore", |b| {
|
||||||
|
// b.iter(|| {
|
||||||
|
// let ram_directory = RamDirectory::create();
|
||||||
|
// let mut index_writer = IndexBuilder::new()
|
||||||
|
// .schema(schema.clone())
|
||||||
|
// .single_segment_index_writer(ram_directory, 100_000_000)
|
||||||
|
// .unwrap();
|
||||||
|
// for _ in 0..NUM_REPEATS {
|
||||||
|
// for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||||
|
// let json_val: serde_json::Map<String, serde_json::Value> =
|
||||||
|
// serde_json::from_str(doc_json).unwrap();
|
||||||
|
// let doc = tantivy::doc!(json_field=>json_val);
|
||||||
|
// index_writer.add_document(doc).unwrap();
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// index_writer.commit().unwrap();
|
||||||
|
// })
|
||||||
|
// });
|
||||||
|
// group.bench_function("index-hdfs-with-commit-json-without-docstore", |b| {
|
||||||
|
// b.iter(|| {
|
||||||
|
// let ram_directory = RamDirectory::create();
|
||||||
|
// let mut index_writer = IndexBuilder::new()
|
||||||
|
// .schema(schema.clone())
|
||||||
|
// .single_segment_index_writer(ram_directory, 100_000_000)
|
||||||
|
// .unwrap();
|
||||||
|
// for _ in 0..NUM_REPEATS {
|
||||||
|
// for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||||
|
// let json_val: serde_json::Map<String, serde_json::Value> =
|
||||||
|
// serde_json::from_str(doc_json).unwrap();
|
||||||
|
// let doc = tantivy::doc!(json_field=>json_val);
|
||||||
|
// index_writer.add_document(doc).unwrap();
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// index_writer.commit().unwrap();
|
||||||
|
// })
|
||||||
|
//});
|
||||||
}
|
}
|
||||||
|
|
||||||
criterion_group! {
|
criterion_group! {
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
use std::borrow::Cow;
|
||||||
use std::io::{Read, Write};
|
use std::io::{Read, Write};
|
||||||
use std::{fmt, io};
|
use std::{fmt, io};
|
||||||
|
|
||||||
@@ -210,6 +211,23 @@ impl BinarySerializable for String {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'a> BinarySerializable for Cow<'a, str> {
|
||||||
|
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||||
|
let data: &[u8] = self.as_bytes();
|
||||||
|
VInt(data.len() as u64).serialize(writer)?;
|
||||||
|
writer.write_all(data)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
|
||||||
|
let string_length = VInt::deserialize(reader)?.val() as usize;
|
||||||
|
let mut result = String::with_capacity(string_length);
|
||||||
|
reader
|
||||||
|
.take(string_length as u64)
|
||||||
|
.read_to_string(&mut result)?;
|
||||||
|
Ok(Cow::Owned(result))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub mod test {
|
pub mod test {
|
||||||
|
|
||||||
|
|||||||
@@ -158,7 +158,6 @@ impl SegmentWriter {
|
|||||||
let doc_id = self.max_doc;
|
let doc_id = self.max_doc;
|
||||||
let vals_grouped_by_field = doc
|
let vals_grouped_by_field = doc
|
||||||
.field_values()
|
.field_values()
|
||||||
.iter()
|
|
||||||
.sorted_by_key(|el| el.field())
|
.sorted_by_key(|el| el.field())
|
||||||
.group_by(|el| el.field());
|
.group_by(|el| el.field());
|
||||||
for (field, field_values) in &vals_grouped_by_field {
|
for (field, field_values) in &vals_grouped_by_field {
|
||||||
@@ -502,9 +501,17 @@ mod tests {
|
|||||||
let reader = StoreReader::open(directory.open_read(path).unwrap(), 0).unwrap();
|
let reader = StoreReader::open(directory.open_read(path).unwrap(), 0).unwrap();
|
||||||
let doc = reader.get(0).unwrap();
|
let doc = reader.get(0).unwrap();
|
||||||
|
|
||||||
assert_eq!(doc.field_values().len(), 2);
|
assert_eq!(doc.value_count(), 2);
|
||||||
assert_eq!(doc.field_values()[0].value().as_text(), Some("A"));
|
let mut field_value_iter = doc.field_values();
|
||||||
assert_eq!(doc.field_values()[1].value().as_text(), Some("title"));
|
assert_eq!(
|
||||||
|
field_value_iter.next().unwrap().value().as_text(),
|
||||||
|
Some("A")
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
field_value_iter.next().unwrap().value().as_text(),
|
||||||
|
Some("title")
|
||||||
|
);
|
||||||
|
assert!(field_value_iter.next().is_none());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -833,20 +840,23 @@ mod tests {
|
|||||||
// This is a bit of a contrived example.
|
// This is a bit of a contrived example.
|
||||||
let tokens = PreTokenizedString {
|
let tokens = PreTokenizedString {
|
||||||
text: "contrived-example".to_string(), //< I can't think of a use case where this corner case happens in real life.
|
text: "contrived-example".to_string(), //< I can't think of a use case where this corner case happens in real life.
|
||||||
tokens: vec![Token { // Not the last token, yet ends after the last token.
|
tokens: vec![
|
||||||
offset_from: 0,
|
Token {
|
||||||
offset_to: 14,
|
// Not the last token, yet ends after the last token.
|
||||||
position: 0,
|
offset_from: 0,
|
||||||
text: "long_token".to_string(),
|
offset_to: 14,
|
||||||
position_length: 3,
|
position: 0,
|
||||||
},
|
text: "long_token".to_string(),
|
||||||
Token {
|
position_length: 3,
|
||||||
offset_from: 0,
|
},
|
||||||
offset_to: 14,
|
Token {
|
||||||
position: 1,
|
offset_from: 0,
|
||||||
text: "short".to_string(),
|
offset_to: 14,
|
||||||
position_length: 1,
|
position: 1,
|
||||||
}],
|
text: "short".to_string(),
|
||||||
|
position_length: 1,
|
||||||
|
},
|
||||||
|
],
|
||||||
};
|
};
|
||||||
doc.add_pre_tokenized_text(text, tokens);
|
doc.add_pre_tokenized_text(text, tokens);
|
||||||
doc.add_text(text, "hello");
|
doc.add_text(text, "hello");
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ pub struct MoreLikeThisQuery {
|
|||||||
#[derive(Debug, PartialEq, Clone)]
|
#[derive(Debug, PartialEq, Clone)]
|
||||||
enum TargetDocument {
|
enum TargetDocument {
|
||||||
DocumentAdress(DocAddress),
|
DocumentAdress(DocAddress),
|
||||||
DocumentFields(Vec<(Field, Vec<Value>)>),
|
DocumentFields(Vec<(Field, Vec<Value<'static>>)>),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MoreLikeThisQuery {
|
impl MoreLikeThisQuery {
|
||||||
@@ -160,7 +160,10 @@ impl MoreLikeThisQueryBuilder {
|
|||||||
/// that will be used to compose the resulting query.
|
/// that will be used to compose the resulting query.
|
||||||
/// This interface is meant to be used when you want to provide your own set of fields
|
/// This interface is meant to be used when you want to provide your own set of fields
|
||||||
/// not necessarily from a specific document.
|
/// not necessarily from a specific document.
|
||||||
pub fn with_document_fields(self, doc_fields: Vec<(Field, Vec<Value>)>) -> MoreLikeThisQuery {
|
pub fn with_document_fields(
|
||||||
|
self,
|
||||||
|
doc_fields: Vec<(Field, Vec<Value<'static>>)>,
|
||||||
|
) -> MoreLikeThisQuery {
|
||||||
MoreLikeThisQuery {
|
MoreLikeThisQuery {
|
||||||
mlt: self.mlt,
|
mlt: self.mlt,
|
||||||
target: TargetDocument::DocumentFields(doc_fields),
|
target: TargetDocument::DocumentFields(doc_fields),
|
||||||
|
|||||||
@@ -1,35 +1,105 @@
|
|||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::io::{self, Read, Write};
|
use std::io::{self, Read, Write};
|
||||||
use std::mem;
|
|
||||||
use std::net::Ipv6Addr;
|
use std::net::Ipv6Addr;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::{fmt, mem};
|
||||||
|
|
||||||
use common::{BinarySerializable, VInt};
|
use common::{BinarySerializable, VInt};
|
||||||
|
use itertools::Either;
|
||||||
|
use yoke::erased::ErasedArcCart;
|
||||||
|
use yoke::Yoke;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::schema::value::MaybeOwnedString;
|
||||||
use crate::tokenizer::PreTokenizedString;
|
use crate::tokenizer::PreTokenizedString;
|
||||||
use crate::DateTime;
|
use crate::DateTime;
|
||||||
|
|
||||||
|
/// A group of FieldValue sharing an underlying storage
|
||||||
|
///
|
||||||
|
/// Or a single owned FieldValue.
|
||||||
|
#[derive(Clone)]
|
||||||
|
enum FieldValueGroup {
|
||||||
|
Single(FieldValue<'static>),
|
||||||
|
Group(Yoke<VecFieldValue<'static>, ErasedArcCart>),
|
||||||
|
}
|
||||||
|
|
||||||
|
// this NewType is required to make it possible to yoke a vec with non 'static inner values.
|
||||||
|
#[derive(yoke::Yokeable, Clone)]
|
||||||
|
struct VecFieldValue<'a>(Vec<FieldValue<'a>>);
|
||||||
|
|
||||||
|
impl<'a> std::ops::Deref for VecFieldValue<'a> {
|
||||||
|
type Target = Vec<FieldValue<'a>>;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> From<Vec<FieldValue<'a>>> for VecFieldValue<'a> {
|
||||||
|
fn from(field_values: Vec<FieldValue>) -> VecFieldValue {
|
||||||
|
VecFieldValue(field_values)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FieldValueGroup {
|
||||||
|
fn iter(&self) -> impl Iterator<Item = &FieldValue> {
|
||||||
|
match self {
|
||||||
|
FieldValueGroup::Single(field_value) => Either::Left(std::iter::once(field_value)),
|
||||||
|
FieldValueGroup::Group(field_values) => Either::Right(field_values.get().iter()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn count(&self) -> usize {
|
||||||
|
match self {
|
||||||
|
FieldValueGroup::Single(_) => 1,
|
||||||
|
FieldValueGroup::Group(field_values) => field_values.get().len(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Vec<FieldValue<'static>>> for FieldValueGroup {
|
||||||
|
fn from(field_values: Vec<FieldValue<'static>>) -> FieldValueGroup {
|
||||||
|
FieldValueGroup::Group(
|
||||||
|
Yoke::new_always_owned(field_values.into())
|
||||||
|
.wrap_cart_in_arc()
|
||||||
|
.erase_arc_cart(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Tantivy's Document is the object that can
|
/// Tantivy's Document is the object that can
|
||||||
/// be indexed and then searched for.
|
/// be indexed and then searched for.
|
||||||
///
|
///
|
||||||
/// Documents are fundamentally a collection of unordered couples `(field, value)`.
|
/// Documents are fundamentally a collection of unordered couples `(field, value)`.
|
||||||
/// In this list, one field may appear more than once.
|
/// In this list, one field may appear more than once.
|
||||||
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize, Default)]
|
#[derive(Clone, Default)]
|
||||||
|
// TODO bring back Ser/De and Debug
|
||||||
|
//#[derive(Clone, Debug, serde::Serialize, serde::Deserialize, Default)]
|
||||||
|
//#[serde(bound(deserialize = "'static: 'de, 'de: 'static"))]
|
||||||
pub struct Document {
|
pub struct Document {
|
||||||
field_values: Vec<FieldValue>,
|
field_values: Vec<FieldValueGroup>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<Vec<FieldValue>> for Document {
|
impl fmt::Debug for Document {
|
||||||
fn from(field_values: Vec<FieldValue>) -> Self {
|
fn fmt(&self, _: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Vec<FieldValue<'static>>> for Document {
|
||||||
|
fn from(field_values: Vec<FieldValue<'static>>) -> Self {
|
||||||
|
let field_values = vec![field_values.into()];
|
||||||
Document { field_values }
|
Document { field_values }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl PartialEq for Document {
|
impl PartialEq for Document {
|
||||||
fn eq(&self, other: &Document) -> bool {
|
fn eq(&self, other: &Document) -> bool {
|
||||||
// super slow, but only here for tests
|
// super slow, but only here for tests
|
||||||
let convert_to_comparable_map = |field_values: &[FieldValue]| {
|
let convert_to_comparable_map = |field_values| {
|
||||||
let mut field_value_set: HashMap<Field, HashSet<String>> = Default::default();
|
let mut field_value_set: HashMap<Field, HashSet<String>> = Default::default();
|
||||||
for field_value in field_values.iter() {
|
for field_value in field_values {
|
||||||
|
// for some reason rustc fails to guess the type
|
||||||
|
let field_value: &FieldValue = field_value;
|
||||||
let json_val = serde_json::to_string(field_value.value()).unwrap();
|
let json_val = serde_json::to_string(field_value.value()).unwrap();
|
||||||
field_value_set
|
field_value_set
|
||||||
.entry(field_value.field())
|
.entry(field_value.field())
|
||||||
@@ -39,9 +109,9 @@ impl PartialEq for Document {
|
|||||||
field_value_set
|
field_value_set
|
||||||
};
|
};
|
||||||
let self_field_values: HashMap<Field, HashSet<String>> =
|
let self_field_values: HashMap<Field, HashSet<String>> =
|
||||||
convert_to_comparable_map(&self.field_values);
|
convert_to_comparable_map(self.field_values());
|
||||||
let other_field_values: HashMap<Field, HashSet<String>> =
|
let other_field_values: HashMap<Field, HashSet<String>> =
|
||||||
convert_to_comparable_map(&other.field_values);
|
convert_to_comparable_map(other.field_values());
|
||||||
self_field_values.eq(&other_field_values)
|
self_field_values.eq(&other_field_values)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -49,12 +119,13 @@ impl PartialEq for Document {
|
|||||||
impl Eq for Document {}
|
impl Eq for Document {}
|
||||||
|
|
||||||
impl IntoIterator for Document {
|
impl IntoIterator for Document {
|
||||||
type Item = FieldValue;
|
type Item = FieldValue<'static>;
|
||||||
|
|
||||||
type IntoIter = std::vec::IntoIter<FieldValue>;
|
type IntoIter = std::vec::IntoIter<FieldValue<'static>>;
|
||||||
|
|
||||||
fn into_iter(self) -> Self::IntoIter {
|
fn into_iter(self) -> Self::IntoIter {
|
||||||
self.field_values.into_iter()
|
todo!()
|
||||||
|
// self.field_values.into_iter()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -84,7 +155,7 @@ impl Document {
|
|||||||
|
|
||||||
/// Add a text field.
|
/// Add a text field.
|
||||||
pub fn add_text<S: ToString>(&mut self, field: Field, text: S) {
|
pub fn add_text<S: ToString>(&mut self, field: Field, text: S) {
|
||||||
let value = Value::Str(text.to_string());
|
let value = Value::Str(MaybeOwnedString::from_string(text.to_string()));
|
||||||
self.add_field_value(field, value);
|
self.add_field_value(field, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -138,15 +209,35 @@ impl Document {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Add a (field, value) to the document.
|
/// Add a (field, value) to the document.
|
||||||
pub fn add_field_value<T: Into<Value>>(&mut self, field: Field, typed_val: T) {
|
pub fn add_field_value<T: Into<Value<'static>>>(&mut self, field: Field, typed_val: T) {
|
||||||
let value = typed_val.into();
|
let value = typed_val.into();
|
||||||
let field_value = FieldValue { field, value };
|
let field_value = FieldValue { field, value };
|
||||||
self.field_values.push(field_value);
|
self.field_values.push(FieldValueGroup::Single(field_value));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add multiple borrowed values, also taking the container they're borrowing from
|
||||||
|
// TODO add a try_ variant?
|
||||||
|
pub fn add_borrowed_values<T, F>(&mut self, storage: T, f: F)
|
||||||
|
where
|
||||||
|
T: Send + Sync + 'static,
|
||||||
|
F: FnOnce(&T) -> Vec<FieldValue>,
|
||||||
|
{
|
||||||
|
let yoke =
|
||||||
|
Yoke::attach_to_cart(Arc::new(storage), |storage| f(storage).into()).erase_arc_cart();
|
||||||
|
|
||||||
|
self.field_values.push(FieldValueGroup::Group(yoke));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// field_values accessor
|
/// field_values accessor
|
||||||
pub fn field_values(&self) -> &[FieldValue] {
|
pub fn field_values(&self) -> impl Iterator<Item = &FieldValue> {
|
||||||
&self.field_values
|
self.field_values.iter().flat_map(|group| group.iter())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the total number of values
|
||||||
|
///
|
||||||
|
/// More efficient than calling `self.field_values().count()`
|
||||||
|
pub fn value_count(&self) -> usize {
|
||||||
|
self.field_values.iter().map(|group| group.count()).sum()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Sort and groups the field_values by field.
|
/// Sort and groups the field_values by field.
|
||||||
@@ -154,7 +245,7 @@ impl Document {
|
|||||||
/// The result of this method is not cached and is
|
/// The result of this method is not cached and is
|
||||||
/// computed on the fly when this method is called.
|
/// computed on the fly when this method is called.
|
||||||
pub fn get_sorted_field_values(&self) -> Vec<(Field, Vec<&Value>)> {
|
pub fn get_sorted_field_values(&self) -> Vec<(Field, Vec<&Value>)> {
|
||||||
let mut field_values: Vec<&FieldValue> = self.field_values().iter().collect();
|
let mut field_values: Vec<&FieldValue> = self.field_values().collect();
|
||||||
field_values.sort_by_key(|field_value| field_value.field());
|
field_values.sort_by_key(|field_value| field_value.field());
|
||||||
|
|
||||||
let mut field_values_it = field_values.into_iter();
|
let mut field_values_it = field_values.into_iter();
|
||||||
@@ -189,6 +280,7 @@ impl Document {
|
|||||||
pub fn get_all(&self, field: Field) -> impl Iterator<Item = &Value> {
|
pub fn get_all(&self, field: Field) -> impl Iterator<Item = &Value> {
|
||||||
self.field_values
|
self.field_values
|
||||||
.iter()
|
.iter()
|
||||||
|
.flat_map(|group| group.iter())
|
||||||
.filter(move |field_value| field_value.field() == field)
|
.filter(move |field_value| field_value.field() == field)
|
||||||
.map(FieldValue::value)
|
.map(FieldValue::value)
|
||||||
}
|
}
|
||||||
@@ -202,7 +294,6 @@ impl Document {
|
|||||||
pub fn serialize_stored<W: Write>(&self, schema: &Schema, writer: &mut W) -> io::Result<()> {
|
pub fn serialize_stored<W: Write>(&self, schema: &Schema, writer: &mut W) -> io::Result<()> {
|
||||||
let stored_field_values = || {
|
let stored_field_values = || {
|
||||||
self.field_values()
|
self.field_values()
|
||||||
.iter()
|
|
||||||
.filter(|field_value| schema.get_field_entry(field_value.field()).is_stored())
|
.filter(|field_value| schema.get_field_entry(field_value.field()).is_stored())
|
||||||
};
|
};
|
||||||
let num_field_values = stored_field_values().count();
|
let num_field_values = stored_field_values().count();
|
||||||
@@ -216,7 +307,9 @@ impl Document {
|
|||||||
} => {
|
} => {
|
||||||
let field_value = FieldValue {
|
let field_value = FieldValue {
|
||||||
field: *field,
|
field: *field,
|
||||||
value: Value::Str(pre_tokenized_text.text.to_string()),
|
value: Value::Str(MaybeOwnedString::from_string(
|
||||||
|
pre_tokenized_text.text.to_string(),
|
||||||
|
)),
|
||||||
};
|
};
|
||||||
field_value.serialize(writer)?;
|
field_value.serialize(writer)?;
|
||||||
}
|
}
|
||||||
@@ -230,7 +323,7 @@ impl Document {
|
|||||||
impl BinarySerializable for Document {
|
impl BinarySerializable for Document {
|
||||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||||
let field_values = self.field_values();
|
let field_values = self.field_values();
|
||||||
VInt(field_values.len() as u64).serialize(writer)?;
|
VInt(self.value_count() as u64).serialize(writer)?;
|
||||||
for field_value in field_values {
|
for field_value in field_values {
|
||||||
field_value.serialize(writer)?;
|
field_value.serialize(writer)?;
|
||||||
}
|
}
|
||||||
@@ -259,7 +352,7 @@ mod tests {
|
|||||||
let text_field = schema_builder.add_text_field("title", TEXT);
|
let text_field = schema_builder.add_text_field("title", TEXT);
|
||||||
let mut doc = Document::default();
|
let mut doc = Document::default();
|
||||||
doc.add_text(text_field, "My title");
|
doc.add_text(text_field, "My title");
|
||||||
assert_eq!(doc.field_values().len(), 1);
|
assert_eq!(doc.value_count(), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -273,7 +366,7 @@ mod tests {
|
|||||||
.clone(),
|
.clone(),
|
||||||
);
|
);
|
||||||
doc.add_text(Field::from_field_id(1), "hello");
|
doc.add_text(Field::from_field_id(1), "hello");
|
||||||
assert_eq!(doc.field_values().len(), 2);
|
assert_eq!(doc.value_count(), 2);
|
||||||
let mut payload: Vec<u8> = Vec::new();
|
let mut payload: Vec<u8> = Vec::new();
|
||||||
doc.serialize(&mut payload).unwrap();
|
doc.serialize(&mut payload).unwrap();
|
||||||
assert_eq!(payload.len(), 26);
|
assert_eq!(payload.len(), 26);
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ use super::ip_options::IpAddrOptions;
|
|||||||
use super::{Cardinality, IntoIpv6Addr};
|
use super::{Cardinality, IntoIpv6Addr};
|
||||||
use crate::schema::bytes_options::BytesOptions;
|
use crate::schema::bytes_options::BytesOptions;
|
||||||
use crate::schema::facet_options::FacetOptions;
|
use crate::schema::facet_options::FacetOptions;
|
||||||
|
use crate::schema::value::MaybeOwnedString;
|
||||||
use crate::schema::{
|
use crate::schema::{
|
||||||
DateOptions, Facet, IndexRecordOption, JsonObjectOptions, NumericOptions, TextFieldIndexing,
|
DateOptions, Facet, IndexRecordOption, JsonObjectOptions, NumericOptions, TextFieldIndexing,
|
||||||
TextOptions, Value,
|
TextOptions, Value,
|
||||||
@@ -329,7 +330,7 @@ impl FieldType {
|
|||||||
/// Tantivy will not try to cast values.
|
/// Tantivy will not try to cast values.
|
||||||
/// For instance, If the json value is the integer `3` and the
|
/// For instance, If the json value is the integer `3` and the
|
||||||
/// target field is a `Str`, this method will return an Error.
|
/// target field is a `Str`, this method will return an Error.
|
||||||
pub fn value_from_json(&self, json: JsonValue) -> Result<Value, ValueParsingError> {
|
pub fn value_from_json(&self, json: JsonValue) -> Result<Value<'static>, ValueParsingError> {
|
||||||
match json {
|
match json {
|
||||||
JsonValue::String(field_text) => {
|
JsonValue::String(field_text) => {
|
||||||
match self {
|
match self {
|
||||||
@@ -341,7 +342,7 @@ impl FieldType {
|
|||||||
})?;
|
})?;
|
||||||
Ok(DateTime::from_utc(dt_with_fixed_tz).into())
|
Ok(DateTime::from_utc(dt_with_fixed_tz).into())
|
||||||
}
|
}
|
||||||
FieldType::Str(_) => Ok(Value::Str(field_text)),
|
FieldType::Str(_) => Ok(Value::Str(MaybeOwnedString::from_string(field_text))),
|
||||||
FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) => {
|
FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) => {
|
||||||
Err(ValueParsingError::TypeError {
|
Err(ValueParsingError::TypeError {
|
||||||
expected: "an integer",
|
expected: "an integer",
|
||||||
|
|||||||
@@ -7,12 +7,13 @@ use crate::schema::{Field, Value};
|
|||||||
/// `FieldValue` holds together a `Field` and its `Value`.
|
/// `FieldValue` holds together a `Field` and its `Value`.
|
||||||
#[allow(missing_docs)]
|
#[allow(missing_docs)]
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||||
pub struct FieldValue {
|
#[serde(bound(deserialize = "'a: 'de, 'de: 'a"))]
|
||||||
|
pub struct FieldValue<'a> {
|
||||||
pub field: Field,
|
pub field: Field,
|
||||||
pub value: Value,
|
pub value: Value<'a>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FieldValue {
|
impl<'a> FieldValue<'a> {
|
||||||
/// Constructor
|
/// Constructor
|
||||||
pub fn new(field: Field, value: Value) -> FieldValue {
|
pub fn new(field: Field, value: Value) -> FieldValue {
|
||||||
FieldValue { field, value }
|
FieldValue { field, value }
|
||||||
@@ -29,13 +30,13 @@ impl FieldValue {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<FieldValue> for Value {
|
impl<'a> From<FieldValue<'a>> for Value<'a> {
|
||||||
fn from(field_value: FieldValue) -> Self {
|
fn from(field_value: FieldValue<'a>) -> Self {
|
||||||
field_value.value
|
field_value.value
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BinarySerializable for FieldValue {
|
impl<'a> BinarySerializable for FieldValue<'a> {
|
||||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||||
self.field.serialize(writer)?;
|
self.field.serialize(writer)?;
|
||||||
self.value.serialize(writer)
|
self.value.serialize(writer)
|
||||||
|
|||||||
@@ -10,4 +10,5 @@ use crate::schema::Value;
|
|||||||
/// A `NamedFieldDocument` is a simple representation of a document
|
/// A `NamedFieldDocument` is a simple representation of a document
|
||||||
/// as a `BTreeMap<String, Vec<Value>>`.
|
/// as a `BTreeMap<String, Vec<Value>>`.
|
||||||
#[derive(Debug, Deserialize, Serialize)]
|
#[derive(Debug, Deserialize, Serialize)]
|
||||||
pub struct NamedFieldDocument(pub BTreeMap<String, Vec<Value>>);
|
#[serde(bound(deserialize = "'static: 'de, 'de: 'static"))]
|
||||||
|
pub struct NamedFieldDocument(pub BTreeMap<String, Vec<Value<'static>>>);
|
||||||
|
|||||||
@@ -308,7 +308,11 @@ impl Schema {
|
|||||||
let mut field_map = BTreeMap::new();
|
let mut field_map = BTreeMap::new();
|
||||||
for (field, field_values) in doc.get_sorted_field_values() {
|
for (field, field_values) in doc.get_sorted_field_values() {
|
||||||
let field_name = self.get_field_name(field);
|
let field_name = self.get_field_name(field);
|
||||||
let values: Vec<Value> = field_values.into_iter().cloned().collect();
|
let values: Vec<Value> = field_values
|
||||||
|
.into_iter()
|
||||||
|
.cloned()
|
||||||
|
.map(Value::into_owned)
|
||||||
|
.collect();
|
||||||
field_map.insert(field_name.to_string(), values);
|
field_map.insert(field_name.to_string(), values);
|
||||||
}
|
}
|
||||||
NamedFieldDocument(field_map)
|
NamedFieldDocument(field_map)
|
||||||
@@ -338,20 +342,21 @@ impl Schema {
|
|||||||
if let Some(field) = self.get_field(&field_name) {
|
if let Some(field) = self.get_field(&field_name) {
|
||||||
let field_entry = self.get_field_entry(field);
|
let field_entry = self.get_field_entry(field);
|
||||||
let field_type = field_entry.field_type();
|
let field_type = field_entry.field_type();
|
||||||
|
// TODO rewrite this with shared allocation?
|
||||||
match json_value {
|
match json_value {
|
||||||
JsonValue::Array(json_items) => {
|
JsonValue::Array(json_items) => {
|
||||||
for json_item in json_items {
|
for json_item in json_items {
|
||||||
let value = field_type
|
let value = field_type
|
||||||
.value_from_json(json_item)
|
.value_from_json(json_item)
|
||||||
.map_err(|e| DocParsingError::ValueError(field_name.clone(), e))?;
|
.map_err(|e| DocParsingError::ValueError(field_name.clone(), e))?;
|
||||||
doc.add_field_value(field, value);
|
doc.add_field_value(field, value.into_owned());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
let value = field_type
|
let value = field_type
|
||||||
.value_from_json(json_value)
|
.value_from_json(json_value)
|
||||||
.map_err(|e| DocParsingError::ValueError(field_name.clone(), e))?;
|
.map_err(|e| DocParsingError::ValueError(field_name.clone(), e))?;
|
||||||
doc.add_field_value(field, value);
|
doc.add_field_value(field, value.into_owned());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -706,7 +711,7 @@ mod tests {
|
|||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
{
|
{
|
||||||
let doc = schema.parse_document("{}").unwrap();
|
let doc = schema.parse_document("{}").unwrap();
|
||||||
assert!(doc.field_values().is_empty());
|
assert_eq!(doc.value_count(), 0);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
let doc = schema
|
let doc = schema
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::net::Ipv6Addr;
|
use std::net::Ipv6Addr;
|
||||||
|
|
||||||
|
pub use not_safe::MaybeOwnedString;
|
||||||
use serde::de::Visitor;
|
use serde::de::Visitor;
|
||||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||||
use serde_json::Map;
|
use serde_json::Map;
|
||||||
@@ -12,9 +13,9 @@ use crate::DateTime;
|
|||||||
/// Value represents the value of a any field.
|
/// Value represents the value of a any field.
|
||||||
/// It is an enum over all over all of the possible field type.
|
/// It is an enum over all over all of the possible field type.
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
pub enum Value {
|
pub enum Value<'a> {
|
||||||
/// The str type is used for any text information.
|
/// The str type is used for any text information.
|
||||||
Str(String),
|
Str(MaybeOwnedString<'a>),
|
||||||
/// Pre-tokenized str type,
|
/// Pre-tokenized str type,
|
||||||
PreTokStr(PreTokenizedString),
|
PreTokStr(PreTokenizedString),
|
||||||
/// Unsigned 64-bits Integer `u64`
|
/// Unsigned 64-bits Integer `u64`
|
||||||
@@ -30,16 +31,38 @@ pub enum Value {
|
|||||||
/// Facet
|
/// Facet
|
||||||
Facet(Facet),
|
Facet(Facet),
|
||||||
/// Arbitrarily sized byte array
|
/// Arbitrarily sized byte array
|
||||||
|
// TODO allow Cow<'a, [u8]>
|
||||||
Bytes(Vec<u8>),
|
Bytes(Vec<u8>),
|
||||||
/// Json object value.
|
/// Json object value.
|
||||||
|
// TODO allow Cow keys and borrowed values
|
||||||
JsonObject(serde_json::Map<String, serde_json::Value>),
|
JsonObject(serde_json::Map<String, serde_json::Value>),
|
||||||
/// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`.
|
/// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`.
|
||||||
IpAddr(Ipv6Addr),
|
IpAddr(Ipv6Addr),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Eq for Value {}
|
impl<'a> Value<'a> {
|
||||||
|
/// Convert a borrowing [`Value`] to an owning one.
|
||||||
|
pub fn into_owned(self) -> Value<'static> {
|
||||||
|
use Value::*;
|
||||||
|
match self {
|
||||||
|
Str(val) => Str(MaybeOwnedString::from_string(val.into_string())),
|
||||||
|
PreTokStr(val) => PreTokStr(val),
|
||||||
|
U64(val) => U64(val),
|
||||||
|
I64(val) => I64(val),
|
||||||
|
F64(val) => F64(val),
|
||||||
|
Bool(val) => Bool(val),
|
||||||
|
Date(val) => Date(val),
|
||||||
|
Facet(val) => Facet(val),
|
||||||
|
Bytes(val) => Bytes(val),
|
||||||
|
JsonObject(val) => JsonObject(val),
|
||||||
|
IpAddr(val) => IpAddr(val),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Serialize for Value {
|
impl<'a> Eq for Value<'a> {}
|
||||||
|
|
||||||
|
impl<'a> Serialize for Value<'a> {
|
||||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||||
where S: Serializer {
|
where S: Serializer {
|
||||||
match *self {
|
match *self {
|
||||||
@@ -65,13 +88,13 @@ impl Serialize for Value {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'de> Deserialize<'de> for Value {
|
impl<'de> Deserialize<'de> for Value<'de> {
|
||||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||||
where D: Deserializer<'de> {
|
where D: Deserializer<'de> {
|
||||||
struct ValueVisitor;
|
struct ValueVisitor;
|
||||||
|
|
||||||
impl<'de> Visitor<'de> for ValueVisitor {
|
impl<'de> Visitor<'de> for ValueVisitor {
|
||||||
type Value = Value;
|
type Value = Value<'de>;
|
||||||
|
|
||||||
fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
formatter.write_str("a string or u32")
|
formatter.write_str("a string or u32")
|
||||||
@@ -93,12 +116,13 @@ impl<'de> Deserialize<'de> for Value {
|
|||||||
Ok(Value::Bool(v))
|
Ok(Value::Bool(v))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO add visit_borrowed_str
|
||||||
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> {
|
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> {
|
||||||
Ok(Value::Str(v.to_owned()))
|
Ok(Value::Str(MaybeOwnedString::from_string(v.to_owned())))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn visit_string<E>(self, v: String) -> Result<Self::Value, E> {
|
fn visit_string<E>(self, v: String) -> Result<Self::Value, E> {
|
||||||
Ok(Value::Str(v))
|
Ok(Value::Str(MaybeOwnedString::from_string(v)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -106,7 +130,7 @@ impl<'de> Deserialize<'de> for Value {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Value {
|
impl<'a> Value<'a> {
|
||||||
/// Returns the text value, provided the value is of the `Str` type.
|
/// Returns the text value, provided the value is of the `Str` type.
|
||||||
/// (Returns `None` if the value is not of the `Str` type).
|
/// (Returns `None` if the value is not of the `Str` type).
|
||||||
pub fn as_text(&self) -> Option<&str> {
|
pub fn as_text(&self) -> Option<&str> {
|
||||||
@@ -224,86 +248,87 @@ impl Value {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<String> for Value {
|
impl From<String> for Value<'static> {
|
||||||
fn from(s: String) -> Value {
|
fn from(s: String) -> Value<'static> {
|
||||||
Value::Str(s)
|
Value::Str(MaybeOwnedString::from_string(s))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<Ipv6Addr> for Value {
|
impl From<Ipv6Addr> for Value<'static> {
|
||||||
fn from(v: Ipv6Addr) -> Value {
|
fn from(v: Ipv6Addr) -> Value<'static> {
|
||||||
Value::IpAddr(v)
|
Value::IpAddr(v)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<u64> for Value {
|
impl From<u64> for Value<'static> {
|
||||||
fn from(v: u64) -> Value {
|
fn from(v: u64) -> Value<'static> {
|
||||||
Value::U64(v)
|
Value::U64(v)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<i64> for Value {
|
impl From<i64> for Value<'static> {
|
||||||
fn from(v: i64) -> Value {
|
fn from(v: i64) -> Value<'static> {
|
||||||
Value::I64(v)
|
Value::I64(v)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<f64> for Value {
|
impl From<f64> for Value<'static> {
|
||||||
fn from(v: f64) -> Value {
|
fn from(v: f64) -> Value<'static> {
|
||||||
Value::F64(v)
|
Value::F64(v)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<bool> for Value {
|
impl From<bool> for Value<'static> {
|
||||||
fn from(b: bool) -> Self {
|
fn from(b: bool) -> Self {
|
||||||
Value::Bool(b)
|
Value::Bool(b)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<DateTime> for Value {
|
impl From<DateTime> for Value<'static> {
|
||||||
fn from(dt: DateTime) -> Value {
|
fn from(dt: DateTime) -> Value<'static> {
|
||||||
Value::Date(dt)
|
Value::Date(dt)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> From<&'a str> for Value {
|
impl<'a> From<&'a str> for Value<'a> {
|
||||||
fn from(s: &'a str) -> Value {
|
fn from(s: &'a str) -> Value<'a> {
|
||||||
Value::Str(s.to_string())
|
Value::Str(MaybeOwnedString::from_str(s))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> From<&'a [u8]> for Value {
|
// TODO change lifetime to 'a
|
||||||
fn from(bytes: &'a [u8]) -> Value {
|
impl<'a> From<&'a [u8]> for Value<'static> {
|
||||||
|
fn from(bytes: &'a [u8]) -> Value<'static> {
|
||||||
Value::Bytes(bytes.to_vec())
|
Value::Bytes(bytes.to_vec())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<Facet> for Value {
|
impl From<Facet> for Value<'static> {
|
||||||
fn from(facet: Facet) -> Value {
|
fn from(facet: Facet) -> Value<'static> {
|
||||||
Value::Facet(facet)
|
Value::Facet(facet)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<Vec<u8>> for Value {
|
impl From<Vec<u8>> for Value<'static> {
|
||||||
fn from(bytes: Vec<u8>) -> Value {
|
fn from(bytes: Vec<u8>) -> Value<'static> {
|
||||||
Value::Bytes(bytes)
|
Value::Bytes(bytes)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<PreTokenizedString> for Value {
|
impl From<PreTokenizedString> for Value<'static> {
|
||||||
fn from(pretokenized_string: PreTokenizedString) -> Value {
|
fn from(pretokenized_string: PreTokenizedString) -> Value<'static> {
|
||||||
Value::PreTokStr(pretokenized_string)
|
Value::PreTokStr(pretokenized_string)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<serde_json::Map<String, serde_json::Value>> for Value {
|
impl From<serde_json::Map<String, serde_json::Value>> for Value<'static> {
|
||||||
fn from(json_object: serde_json::Map<String, serde_json::Value>) -> Value {
|
fn from(json_object: serde_json::Map<String, serde_json::Value>) -> Value<'static> {
|
||||||
Value::JsonObject(json_object)
|
Value::JsonObject(json_object)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<serde_json::Value> for Value {
|
impl From<serde_json::Value> for Value<'static> {
|
||||||
fn from(json_value: serde_json::Value) -> Value {
|
fn from(json_value: serde_json::Value) -> Value<'static> {
|
||||||
match json_value {
|
match json_value {
|
||||||
serde_json::Value::Object(json_object) => Value::JsonObject(json_object),
|
serde_json::Value::Object(json_object) => Value::JsonObject(json_object),
|
||||||
_ => {
|
_ => {
|
||||||
@@ -320,7 +345,7 @@ mod binary_serialize {
|
|||||||
use common::{f64_to_u64, u64_to_f64, BinarySerializable};
|
use common::{f64_to_u64, u64_to_f64, BinarySerializable};
|
||||||
use fastfield_codecs::MonotonicallyMappableToU128;
|
use fastfield_codecs::MonotonicallyMappableToU128;
|
||||||
|
|
||||||
use super::Value;
|
use super::{MaybeOwnedString, Value};
|
||||||
use crate::schema::Facet;
|
use crate::schema::Facet;
|
||||||
use crate::tokenizer::PreTokenizedString;
|
use crate::tokenizer::PreTokenizedString;
|
||||||
use crate::DateTime;
|
use crate::DateTime;
|
||||||
@@ -341,12 +366,13 @@ mod binary_serialize {
|
|||||||
|
|
||||||
const TOK_STR_CODE: u8 = 0;
|
const TOK_STR_CODE: u8 = 0;
|
||||||
|
|
||||||
impl BinarySerializable for Value {
|
impl<'a> BinarySerializable for Value<'a> {
|
||||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||||
match *self {
|
match *self {
|
||||||
Value::Str(ref text) => {
|
Value::Str(ref text) => {
|
||||||
TEXT_CODE.serialize(writer)?;
|
TEXT_CODE.serialize(writer)?;
|
||||||
text.serialize(writer)
|
// TODO impl trait for MaybeOwnedString
|
||||||
|
text.as_str().to_owned().serialize(writer)
|
||||||
}
|
}
|
||||||
Value::PreTokStr(ref tok_str) => {
|
Value::PreTokStr(ref tok_str) => {
|
||||||
EXT_CODE.serialize(writer)?;
|
EXT_CODE.serialize(writer)?;
|
||||||
@@ -408,7 +434,7 @@ mod binary_serialize {
|
|||||||
match type_code {
|
match type_code {
|
||||||
TEXT_CODE => {
|
TEXT_CODE => {
|
||||||
let text = String::deserialize(reader)?;
|
let text = String::deserialize(reader)?;
|
||||||
Ok(Value::Str(text))
|
Ok(Value::Str(MaybeOwnedString::from_string(text)))
|
||||||
}
|
}
|
||||||
U64_CODE => {
|
U64_CODE => {
|
||||||
let value = u64::deserialize(reader)?;
|
let value = u64::deserialize(reader)?;
|
||||||
@@ -550,3 +576,104 @@ mod tests {
|
|||||||
assert_eq!(serialized_value_json, r#""1996-12-20T01:39:57Z""#);
|
assert_eq!(serialized_value_json, r#""1996-12-20T01:39:57Z""#);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mod not_safe {
|
||||||
|
use std::ops::Deref;
|
||||||
|
|
||||||
|
union Ref<'a, T: ?Sized> {
|
||||||
|
shared: &'a T,
|
||||||
|
uniq: &'a mut T,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct MaybeOwnedString<'a> {
|
||||||
|
string: Ref<'a, str>,
|
||||||
|
capacity: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> MaybeOwnedString<'a> {
|
||||||
|
pub fn from_str(string: &'a str) -> MaybeOwnedString<'a> {
|
||||||
|
MaybeOwnedString {
|
||||||
|
string: Ref { shared: string },
|
||||||
|
capacity: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_string(mut string: String) -> MaybeOwnedString<'static> {
|
||||||
|
string.shrink_to_fit(); // <= actually important for safety, todo use the Vec .as_ptr instead
|
||||||
|
|
||||||
|
let mut s = std::mem::ManuallyDrop::new(string);
|
||||||
|
let ptr = s.as_mut_ptr();
|
||||||
|
let len = s.len();
|
||||||
|
let capacity = s.capacity();
|
||||||
|
|
||||||
|
let string = unsafe {
|
||||||
|
std::str::from_utf8_unchecked_mut(std::slice::from_raw_parts_mut(ptr, len))
|
||||||
|
};
|
||||||
|
MaybeOwnedString {
|
||||||
|
string: Ref { uniq: string },
|
||||||
|
capacity,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_string(mut self) -> String {
|
||||||
|
if self.capacity != 0 {
|
||||||
|
let string = unsafe { &mut self.string.uniq };
|
||||||
|
unsafe {
|
||||||
|
return String::from_raw_parts(string.as_mut_ptr(), self.len(), self.capacity);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
self.deref().to_owned()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_str(&self) -> &str {
|
||||||
|
self.deref()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Deref for MaybeOwnedString<'a> {
|
||||||
|
type Target = str;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn deref(&self) -> &str {
|
||||||
|
unsafe { self.string.shared }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Drop for MaybeOwnedString<'a> {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
// if capacity is 0, either it's an empty String so there is no dealloc to do, or it's
|
||||||
|
// borrowed
|
||||||
|
if self.capacity != 0 {
|
||||||
|
let string = unsafe { &mut self.string.uniq };
|
||||||
|
unsafe { String::from_raw_parts(string.as_mut_ptr(), self.len(), self.capacity) };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Clone for MaybeOwnedString<'a> {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
if self.capacity == 0 {
|
||||||
|
MaybeOwnedString {
|
||||||
|
string: Ref {
|
||||||
|
shared: unsafe { self.string.shared },
|
||||||
|
},
|
||||||
|
capacity: 0,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
MaybeOwnedString::from_string(self.deref().to_owned())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> std::fmt::Debug for MaybeOwnedString<'a> {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.write_str(self.deref())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> PartialEq for MaybeOwnedString<'a> {
|
||||||
|
fn eq(&self, other: &Self) -> bool {
|
||||||
|
self.deref() == other.deref()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user