mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-28 04:52:55 +00:00
Compare commits
6 Commits
test_parse
...
trinity/yo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bcff3eb2d2 | ||
|
|
85f2588875 | ||
|
|
db6cf65d53 | ||
|
|
654aa7f42c | ||
|
|
951a898633 | ||
|
|
003722d831 |
@@ -61,6 +61,7 @@ measure_time = "0.8.2"
|
||||
ciborium = { version = "0.2", optional = true}
|
||||
async-trait = "0.1.53"
|
||||
arc-swap = "1.5.0"
|
||||
yoke = { version = "0.6.2", features = ["derive"] }
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
winapi = "0.3.9"
|
||||
|
||||
100000
benches/hdfs_with_array.json
Normal file
100000
benches/hdfs_with_array.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,116 +1,159 @@
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use itertools::Itertools;
|
||||
use pprof::criterion::{Output, PProfProfiler};
|
||||
use tantivy::schema::{INDEXED, STORED, STRING, TEXT};
|
||||
use tantivy::Index;
|
||||
use serde_json::{self, Value as JsonValue};
|
||||
use tantivy::directory::RamDirectory;
|
||||
use tantivy::schema::{
|
||||
FieldValue, TextFieldIndexing, TextOptions, Value, INDEXED, STORED, STRING, TEXT,
|
||||
};
|
||||
use tantivy::{Document, Index, IndexBuilder};
|
||||
|
||||
const HDFS_LOGS: &str = include_str!("hdfs.json");
|
||||
const NUM_REPEATS: usize = 2;
|
||||
const NUM_REPEATS: usize = 20;
|
||||
|
||||
pub fn hdfs_index_benchmark(c: &mut Criterion) {
|
||||
let schema = {
|
||||
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
|
||||
schema_builder.add_u64_field("timestamp", INDEXED);
|
||||
schema_builder.add_text_field("body", TEXT);
|
||||
schema_builder.add_text_field("severity", STRING);
|
||||
schema_builder.build()
|
||||
};
|
||||
let schema_with_store = {
|
||||
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
|
||||
schema_builder.add_u64_field("timestamp", INDEXED | STORED);
|
||||
schema_builder.add_text_field("body", TEXT | STORED);
|
||||
schema_builder.add_text_field("severity", STRING | STORED);
|
||||
schema_builder.build()
|
||||
};
|
||||
let dynamic_schema = {
|
||||
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
|
||||
schema_builder.add_json_field("json", TEXT);
|
||||
schema_builder.build()
|
||||
};
|
||||
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
|
||||
let text_indexing_options = TextFieldIndexing::default()
|
||||
.set_tokenizer("default")
|
||||
.set_fieldnorms(false)
|
||||
.set_index_option(tantivy::schema::IndexRecordOption::WithFreqsAndPositions);
|
||||
let mut text_options = TextOptions::default().set_indexing_options(text_indexing_options);
|
||||
let text_field = schema_builder.add_text_field("body", text_options);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
// prepare doc
|
||||
let mut documents_no_array = Vec::new();
|
||||
let mut documents_with_array = Vec::new();
|
||||
for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
let json_obj: serde_json::Map<String, JsonValue> = serde_json::from_str(doc_json).unwrap();
|
||||
let text = json_obj.get("body").unwrap().as_str().unwrap();
|
||||
let mut doc_no_array = Document::new();
|
||||
doc_no_array.add_text(text_field, text);
|
||||
documents_no_array.push(doc_no_array);
|
||||
let mut doc_with_array = Document::new();
|
||||
doc_with_array.add_borrowed_values(text.to_owned(), |text| {
|
||||
text.split(' ')
|
||||
.map(|text| FieldValue::new(text_field, text.into()))
|
||||
.collect()
|
||||
});
|
||||
documents_with_array.push(doc_with_array);
|
||||
}
|
||||
|
||||
let mut group = c.benchmark_group("index-hdfs");
|
||||
group.sample_size(20);
|
||||
group.bench_function("index-hdfs-no-commit", |b| {
|
||||
b.iter(|| {
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
let index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
||||
let ram_directory = RamDirectory::create();
|
||||
let mut index_writer = IndexBuilder::new()
|
||||
.schema(schema.clone())
|
||||
.single_segment_index_writer(ram_directory, 100_000_000)
|
||||
.unwrap();
|
||||
for _ in 0..NUM_REPEATS {
|
||||
for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
let doc = schema.parse_document(doc_json).unwrap();
|
||||
let documents_cloned = documents_no_array.clone();
|
||||
for doc in documents_cloned {
|
||||
index_writer.add_document(doc).unwrap();
|
||||
}
|
||||
}
|
||||
})
|
||||
});
|
||||
group.bench_function("index-hdfs-with-commit", |b| {
|
||||
group.bench_function("index-hdfs-with-array-no-commit", |b| {
|
||||
b.iter(|| {
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
||||
let ram_directory = RamDirectory::create();
|
||||
let mut index_writer = IndexBuilder::new()
|
||||
.schema(schema.clone())
|
||||
.single_segment_index_writer(ram_directory, 100_000_000)
|
||||
.unwrap();
|
||||
for _ in 0..NUM_REPEATS {
|
||||
for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
let doc = schema.parse_document(doc_json).unwrap();
|
||||
index_writer.add_document(doc).unwrap();
|
||||
}
|
||||
}
|
||||
index_writer.commit().unwrap();
|
||||
})
|
||||
});
|
||||
group.bench_function("index-hdfs-no-commit-with-docstore", |b| {
|
||||
b.iter(|| {
|
||||
let index = Index::create_in_ram(schema_with_store.clone());
|
||||
let index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
||||
for _ in 0..NUM_REPEATS {
|
||||
for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
let doc = schema.parse_document(doc_json).unwrap();
|
||||
let documents_with_array_cloned = documents_with_array.clone();
|
||||
for doc in documents_with_array_cloned {
|
||||
index_writer.add_document(doc).unwrap();
|
||||
}
|
||||
}
|
||||
})
|
||||
});
|
||||
group.bench_function("index-hdfs-with-commit-with-docstore", |b| {
|
||||
b.iter(|| {
|
||||
let index = Index::create_in_ram(schema_with_store.clone());
|
||||
let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
||||
for _ in 0..NUM_REPEATS {
|
||||
for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
let doc = schema.parse_document(doc_json).unwrap();
|
||||
index_writer.add_document(doc).unwrap();
|
||||
}
|
||||
}
|
||||
index_writer.commit().unwrap();
|
||||
})
|
||||
});
|
||||
group.bench_function("index-hdfs-no-commit-json-without-docstore", |b| {
|
||||
b.iter(|| {
|
||||
let index = Index::create_in_ram(dynamic_schema.clone());
|
||||
let json_field = dynamic_schema.get_field("json").unwrap();
|
||||
let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
||||
for _ in 0..NUM_REPEATS {
|
||||
for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
let json_val: serde_json::Map<String, serde_json::Value> =
|
||||
serde_json::from_str(doc_json).unwrap();
|
||||
let doc = tantivy::doc!(json_field=>json_val);
|
||||
index_writer.add_document(doc).unwrap();
|
||||
}
|
||||
}
|
||||
index_writer.commit().unwrap();
|
||||
})
|
||||
});
|
||||
group.bench_function("index-hdfs-with-commit-json-without-docstore", |b| {
|
||||
b.iter(|| {
|
||||
let index = Index::create_in_ram(dynamic_schema.clone());
|
||||
let json_field = dynamic_schema.get_field("json").unwrap();
|
||||
let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
||||
for _ in 0..NUM_REPEATS {
|
||||
for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
let json_val: serde_json::Map<String, serde_json::Value> =
|
||||
serde_json::from_str(doc_json).unwrap();
|
||||
let doc = tantivy::doc!(json_field=>json_val);
|
||||
index_writer.add_document(doc).unwrap();
|
||||
}
|
||||
}
|
||||
index_writer.commit().unwrap();
|
||||
})
|
||||
});
|
||||
// group.bench_function("index-hdfs-with-commit", |b| {
|
||||
// b.iter(|| {
|
||||
// let ram_directory = RamDirectory::create();
|
||||
// let mut index_writer = IndexBuilder::new()
|
||||
// .schema(schema.clone())
|
||||
// .single_segment_index_writer(ram_directory, 100_000_000)
|
||||
// .unwrap();
|
||||
// for _ in 0..NUM_REPEATS {
|
||||
// for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
// let doc = schema.parse_document(doc_json).unwrap();
|
||||
// index_writer.add_document(doc).unwrap();
|
||||
// }
|
||||
// }
|
||||
// index_writer.commit().unwrap();
|
||||
// })
|
||||
// });
|
||||
// group.bench_function("index-hdfs-no-commit-with-docstore", |b| {
|
||||
// b.iter(|| {
|
||||
// let ram_directory = RamDirectory::create();
|
||||
// let mut index_writer = IndexBuilder::new()
|
||||
// .schema(schema.clone())
|
||||
// .single_segment_index_writer(ram_directory, 100_000_000)
|
||||
// .unwrap();
|
||||
// for _ in 0..NUM_REPEATS {
|
||||
// for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
// let doc = schema.parse_document(doc_json).unwrap();
|
||||
// index_writer.add_document(doc).unwrap();
|
||||
// }
|
||||
// }
|
||||
// })
|
||||
// });
|
||||
// group.bench_function("index-hdfs-with-commit-with-docstore", |b| {
|
||||
// b.iter(|| {
|
||||
// let ram_directory = RamDirectory::create();
|
||||
// let mut index_writer = IndexBuilder::new()
|
||||
// .schema(schema.clone())
|
||||
// .single_segment_index_writer(ram_directory, 100_000_000)
|
||||
// .unwrap();
|
||||
// for _ in 0..NUM_REPEATS {
|
||||
// for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
// let doc = schema.parse_document(doc_json).unwrap();
|
||||
// index_writer.add_document(doc).unwrap();
|
||||
// }
|
||||
// }
|
||||
// index_writer.commit().unwrap();
|
||||
// })
|
||||
// });
|
||||
// group.bench_function("index-hdfs-no-commit-json-without-docstore", |b| {
|
||||
// b.iter(|| {
|
||||
// let ram_directory = RamDirectory::create();
|
||||
// let mut index_writer = IndexBuilder::new()
|
||||
// .schema(schema.clone())
|
||||
// .single_segment_index_writer(ram_directory, 100_000_000)
|
||||
// .unwrap();
|
||||
// for _ in 0..NUM_REPEATS {
|
||||
// for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
// let json_val: serde_json::Map<String, serde_json::Value> =
|
||||
// serde_json::from_str(doc_json).unwrap();
|
||||
// let doc = tantivy::doc!(json_field=>json_val);
|
||||
// index_writer.add_document(doc).unwrap();
|
||||
// }
|
||||
// }
|
||||
// index_writer.commit().unwrap();
|
||||
// })
|
||||
// });
|
||||
// group.bench_function("index-hdfs-with-commit-json-without-docstore", |b| {
|
||||
// b.iter(|| {
|
||||
// let ram_directory = RamDirectory::create();
|
||||
// let mut index_writer = IndexBuilder::new()
|
||||
// .schema(schema.clone())
|
||||
// .single_segment_index_writer(ram_directory, 100_000_000)
|
||||
// .unwrap();
|
||||
// for _ in 0..NUM_REPEATS {
|
||||
// for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
// let json_val: serde_json::Map<String, serde_json::Value> =
|
||||
// serde_json::from_str(doc_json).unwrap();
|
||||
// let doc = tantivy::doc!(json_field=>json_val);
|
||||
// index_writer.add_document(doc).unwrap();
|
||||
// }
|
||||
// }
|
||||
// index_writer.commit().unwrap();
|
||||
// })
|
||||
//});
|
||||
}
|
||||
|
||||
criterion_group! {
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::borrow::Cow;
|
||||
use std::io::{Read, Write};
|
||||
use std::{fmt, io};
|
||||
|
||||
@@ -210,6 +211,23 @@ impl BinarySerializable for String {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BinarySerializable for Cow<'a, str> {
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
let data: &[u8] = self.as_bytes();
|
||||
VInt(data.len() as u64).serialize(writer)?;
|
||||
writer.write_all(data)
|
||||
}
|
||||
|
||||
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
|
||||
let string_length = VInt::deserialize(reader)?.val() as usize;
|
||||
let mut result = String::with_capacity(string_length);
|
||||
reader
|
||||
.take(string_length as u64)
|
||||
.read_to_string(&mut result)?;
|
||||
Ok(Cow::Owned(result))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
|
||||
|
||||
@@ -158,7 +158,6 @@ impl SegmentWriter {
|
||||
let doc_id = self.max_doc;
|
||||
let vals_grouped_by_field = doc
|
||||
.field_values()
|
||||
.iter()
|
||||
.sorted_by_key(|el| el.field())
|
||||
.group_by(|el| el.field());
|
||||
for (field, field_values) in &vals_grouped_by_field {
|
||||
@@ -502,9 +501,17 @@ mod tests {
|
||||
let reader = StoreReader::open(directory.open_read(path).unwrap(), 0).unwrap();
|
||||
let doc = reader.get(0).unwrap();
|
||||
|
||||
assert_eq!(doc.field_values().len(), 2);
|
||||
assert_eq!(doc.field_values()[0].value().as_text(), Some("A"));
|
||||
assert_eq!(doc.field_values()[1].value().as_text(), Some("title"));
|
||||
assert_eq!(doc.value_count(), 2);
|
||||
let mut field_value_iter = doc.field_values();
|
||||
assert_eq!(
|
||||
field_value_iter.next().unwrap().value().as_text(),
|
||||
Some("A")
|
||||
);
|
||||
assert_eq!(
|
||||
field_value_iter.next().unwrap().value().as_text(),
|
||||
Some("title")
|
||||
);
|
||||
assert!(field_value_iter.next().is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -833,20 +840,23 @@ mod tests {
|
||||
// This is a bit of a contrived example.
|
||||
let tokens = PreTokenizedString {
|
||||
text: "contrived-example".to_string(), //< I can't think of a use case where this corner case happens in real life.
|
||||
tokens: vec![Token { // Not the last token, yet ends after the last token.
|
||||
offset_from: 0,
|
||||
offset_to: 14,
|
||||
position: 0,
|
||||
text: "long_token".to_string(),
|
||||
position_length: 3,
|
||||
},
|
||||
Token {
|
||||
offset_from: 0,
|
||||
offset_to: 14,
|
||||
position: 1,
|
||||
text: "short".to_string(),
|
||||
position_length: 1,
|
||||
}],
|
||||
tokens: vec![
|
||||
Token {
|
||||
// Not the last token, yet ends after the last token.
|
||||
offset_from: 0,
|
||||
offset_to: 14,
|
||||
position: 0,
|
||||
text: "long_token".to_string(),
|
||||
position_length: 3,
|
||||
},
|
||||
Token {
|
||||
offset_from: 0,
|
||||
offset_to: 14,
|
||||
position: 1,
|
||||
text: "short".to_string(),
|
||||
position_length: 1,
|
||||
},
|
||||
],
|
||||
};
|
||||
doc.add_pre_tokenized_text(text, tokens);
|
||||
doc.add_text(text, "hello");
|
||||
|
||||
@@ -31,7 +31,7 @@ pub struct MoreLikeThisQuery {
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
enum TargetDocument {
|
||||
DocumentAdress(DocAddress),
|
||||
DocumentFields(Vec<(Field, Vec<Value>)>),
|
||||
DocumentFields(Vec<(Field, Vec<Value<'static>>)>),
|
||||
}
|
||||
|
||||
impl MoreLikeThisQuery {
|
||||
@@ -160,7 +160,10 @@ impl MoreLikeThisQueryBuilder {
|
||||
/// that will be used to compose the resulting query.
|
||||
/// This interface is meant to be used when you want to provide your own set of fields
|
||||
/// not necessarily from a specific document.
|
||||
pub fn with_document_fields(self, doc_fields: Vec<(Field, Vec<Value>)>) -> MoreLikeThisQuery {
|
||||
pub fn with_document_fields(
|
||||
self,
|
||||
doc_fields: Vec<(Field, Vec<Value<'static>>)>,
|
||||
) -> MoreLikeThisQuery {
|
||||
MoreLikeThisQuery {
|
||||
mlt: self.mlt,
|
||||
target: TargetDocument::DocumentFields(doc_fields),
|
||||
|
||||
@@ -1,35 +1,105 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::io::{self, Read, Write};
|
||||
use std::mem;
|
||||
use std::net::Ipv6Addr;
|
||||
use std::sync::Arc;
|
||||
use std::{fmt, mem};
|
||||
|
||||
use common::{BinarySerializable, VInt};
|
||||
use itertools::Either;
|
||||
use yoke::erased::ErasedArcCart;
|
||||
use yoke::Yoke;
|
||||
|
||||
use super::*;
|
||||
use crate::schema::value::MaybeOwnedString;
|
||||
use crate::tokenizer::PreTokenizedString;
|
||||
use crate::DateTime;
|
||||
|
||||
/// A group of FieldValue sharing an underlying storage
|
||||
///
|
||||
/// Or a single owned FieldValue.
|
||||
#[derive(Clone)]
|
||||
enum FieldValueGroup {
|
||||
Single(FieldValue<'static>),
|
||||
Group(Yoke<VecFieldValue<'static>, ErasedArcCart>),
|
||||
}
|
||||
|
||||
// this NewType is required to make it possible to yoke a vec with non 'static inner values.
|
||||
#[derive(yoke::Yokeable, Clone)]
|
||||
struct VecFieldValue<'a>(Vec<FieldValue<'a>>);
|
||||
|
||||
impl<'a> std::ops::Deref for VecFieldValue<'a> {
|
||||
type Target = Vec<FieldValue<'a>>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<Vec<FieldValue<'a>>> for VecFieldValue<'a> {
|
||||
fn from(field_values: Vec<FieldValue>) -> VecFieldValue {
|
||||
VecFieldValue(field_values)
|
||||
}
|
||||
}
|
||||
|
||||
impl FieldValueGroup {
|
||||
fn iter(&self) -> impl Iterator<Item = &FieldValue> {
|
||||
match self {
|
||||
FieldValueGroup::Single(field_value) => Either::Left(std::iter::once(field_value)),
|
||||
FieldValueGroup::Group(field_values) => Either::Right(field_values.get().iter()),
|
||||
}
|
||||
}
|
||||
|
||||
fn count(&self) -> usize {
|
||||
match self {
|
||||
FieldValueGroup::Single(_) => 1,
|
||||
FieldValueGroup::Group(field_values) => field_values.get().len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<FieldValue<'static>>> for FieldValueGroup {
|
||||
fn from(field_values: Vec<FieldValue<'static>>) -> FieldValueGroup {
|
||||
FieldValueGroup::Group(
|
||||
Yoke::new_always_owned(field_values.into())
|
||||
.wrap_cart_in_arc()
|
||||
.erase_arc_cart(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Tantivy's Document is the object that can
|
||||
/// be indexed and then searched for.
|
||||
///
|
||||
/// Documents are fundamentally a collection of unordered couples `(field, value)`.
|
||||
/// In this list, one field may appear more than once.
|
||||
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize, Default)]
|
||||
#[derive(Clone, Default)]
|
||||
// TODO bring back Ser/De and Debug
|
||||
//#[derive(Clone, Debug, serde::Serialize, serde::Deserialize, Default)]
|
||||
//#[serde(bound(deserialize = "'static: 'de, 'de: 'static"))]
|
||||
pub struct Document {
|
||||
field_values: Vec<FieldValue>,
|
||||
field_values: Vec<FieldValueGroup>,
|
||||
}
|
||||
|
||||
impl From<Vec<FieldValue>> for Document {
|
||||
fn from(field_values: Vec<FieldValue>) -> Self {
|
||||
impl fmt::Debug for Document {
|
||||
fn fmt(&self, _: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<FieldValue<'static>>> for Document {
|
||||
fn from(field_values: Vec<FieldValue<'static>>) -> Self {
|
||||
let field_values = vec![field_values.into()];
|
||||
Document { field_values }
|
||||
}
|
||||
}
|
||||
impl PartialEq for Document {
|
||||
fn eq(&self, other: &Document) -> bool {
|
||||
// super slow, but only here for tests
|
||||
let convert_to_comparable_map = |field_values: &[FieldValue]| {
|
||||
let convert_to_comparable_map = |field_values| {
|
||||
let mut field_value_set: HashMap<Field, HashSet<String>> = Default::default();
|
||||
for field_value in field_values.iter() {
|
||||
for field_value in field_values {
|
||||
// for some reason rustc fails to guess the type
|
||||
let field_value: &FieldValue = field_value;
|
||||
let json_val = serde_json::to_string(field_value.value()).unwrap();
|
||||
field_value_set
|
||||
.entry(field_value.field())
|
||||
@@ -39,9 +109,9 @@ impl PartialEq for Document {
|
||||
field_value_set
|
||||
};
|
||||
let self_field_values: HashMap<Field, HashSet<String>> =
|
||||
convert_to_comparable_map(&self.field_values);
|
||||
convert_to_comparable_map(self.field_values());
|
||||
let other_field_values: HashMap<Field, HashSet<String>> =
|
||||
convert_to_comparable_map(&other.field_values);
|
||||
convert_to_comparable_map(other.field_values());
|
||||
self_field_values.eq(&other_field_values)
|
||||
}
|
||||
}
|
||||
@@ -49,12 +119,13 @@ impl PartialEq for Document {
|
||||
impl Eq for Document {}
|
||||
|
||||
impl IntoIterator for Document {
|
||||
type Item = FieldValue;
|
||||
type Item = FieldValue<'static>;
|
||||
|
||||
type IntoIter = std::vec::IntoIter<FieldValue>;
|
||||
type IntoIter = std::vec::IntoIter<FieldValue<'static>>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.field_values.into_iter()
|
||||
todo!()
|
||||
// self.field_values.into_iter()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -84,7 +155,7 @@ impl Document {
|
||||
|
||||
/// Add a text field.
|
||||
pub fn add_text<S: ToString>(&mut self, field: Field, text: S) {
|
||||
let value = Value::Str(text.to_string());
|
||||
let value = Value::Str(MaybeOwnedString::from_string(text.to_string()));
|
||||
self.add_field_value(field, value);
|
||||
}
|
||||
|
||||
@@ -138,15 +209,35 @@ impl Document {
|
||||
}
|
||||
|
||||
/// Add a (field, value) to the document.
|
||||
pub fn add_field_value<T: Into<Value>>(&mut self, field: Field, typed_val: T) {
|
||||
pub fn add_field_value<T: Into<Value<'static>>>(&mut self, field: Field, typed_val: T) {
|
||||
let value = typed_val.into();
|
||||
let field_value = FieldValue { field, value };
|
||||
self.field_values.push(field_value);
|
||||
self.field_values.push(FieldValueGroup::Single(field_value));
|
||||
}
|
||||
|
||||
/// Add multiple borrowed values, also taking the container they're borrowing from
|
||||
// TODO add a try_ variant?
|
||||
pub fn add_borrowed_values<T, F>(&mut self, storage: T, f: F)
|
||||
where
|
||||
T: Send + Sync + 'static,
|
||||
F: FnOnce(&T) -> Vec<FieldValue>,
|
||||
{
|
||||
let yoke =
|
||||
Yoke::attach_to_cart(Arc::new(storage), |storage| f(storage).into()).erase_arc_cart();
|
||||
|
||||
self.field_values.push(FieldValueGroup::Group(yoke));
|
||||
}
|
||||
|
||||
/// field_values accessor
|
||||
pub fn field_values(&self) -> &[FieldValue] {
|
||||
&self.field_values
|
||||
pub fn field_values(&self) -> impl Iterator<Item = &FieldValue> {
|
||||
self.field_values.iter().flat_map(|group| group.iter())
|
||||
}
|
||||
|
||||
/// Return the total number of values
|
||||
///
|
||||
/// More efficient than calling `self.field_values().count()`
|
||||
pub fn value_count(&self) -> usize {
|
||||
self.field_values.iter().map(|group| group.count()).sum()
|
||||
}
|
||||
|
||||
/// Sort and groups the field_values by field.
|
||||
@@ -154,7 +245,7 @@ impl Document {
|
||||
/// The result of this method is not cached and is
|
||||
/// computed on the fly when this method is called.
|
||||
pub fn get_sorted_field_values(&self) -> Vec<(Field, Vec<&Value>)> {
|
||||
let mut field_values: Vec<&FieldValue> = self.field_values().iter().collect();
|
||||
let mut field_values: Vec<&FieldValue> = self.field_values().collect();
|
||||
field_values.sort_by_key(|field_value| field_value.field());
|
||||
|
||||
let mut field_values_it = field_values.into_iter();
|
||||
@@ -189,6 +280,7 @@ impl Document {
|
||||
pub fn get_all(&self, field: Field) -> impl Iterator<Item = &Value> {
|
||||
self.field_values
|
||||
.iter()
|
||||
.flat_map(|group| group.iter())
|
||||
.filter(move |field_value| field_value.field() == field)
|
||||
.map(FieldValue::value)
|
||||
}
|
||||
@@ -202,7 +294,6 @@ impl Document {
|
||||
pub fn serialize_stored<W: Write>(&self, schema: &Schema, writer: &mut W) -> io::Result<()> {
|
||||
let stored_field_values = || {
|
||||
self.field_values()
|
||||
.iter()
|
||||
.filter(|field_value| schema.get_field_entry(field_value.field()).is_stored())
|
||||
};
|
||||
let num_field_values = stored_field_values().count();
|
||||
@@ -216,7 +307,9 @@ impl Document {
|
||||
} => {
|
||||
let field_value = FieldValue {
|
||||
field: *field,
|
||||
value: Value::Str(pre_tokenized_text.text.to_string()),
|
||||
value: Value::Str(MaybeOwnedString::from_string(
|
||||
pre_tokenized_text.text.to_string(),
|
||||
)),
|
||||
};
|
||||
field_value.serialize(writer)?;
|
||||
}
|
||||
@@ -230,7 +323,7 @@ impl Document {
|
||||
impl BinarySerializable for Document {
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
let field_values = self.field_values();
|
||||
VInt(field_values.len() as u64).serialize(writer)?;
|
||||
VInt(self.value_count() as u64).serialize(writer)?;
|
||||
for field_value in field_values {
|
||||
field_value.serialize(writer)?;
|
||||
}
|
||||
@@ -259,7 +352,7 @@ mod tests {
|
||||
let text_field = schema_builder.add_text_field("title", TEXT);
|
||||
let mut doc = Document::default();
|
||||
doc.add_text(text_field, "My title");
|
||||
assert_eq!(doc.field_values().len(), 1);
|
||||
assert_eq!(doc.value_count(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -273,7 +366,7 @@ mod tests {
|
||||
.clone(),
|
||||
);
|
||||
doc.add_text(Field::from_field_id(1), "hello");
|
||||
assert_eq!(doc.field_values().len(), 2);
|
||||
assert_eq!(doc.value_count(), 2);
|
||||
let mut payload: Vec<u8> = Vec::new();
|
||||
doc.serialize(&mut payload).unwrap();
|
||||
assert_eq!(payload.len(), 26);
|
||||
|
||||
@@ -9,6 +9,7 @@ use super::ip_options::IpAddrOptions;
|
||||
use super::{Cardinality, IntoIpv6Addr};
|
||||
use crate::schema::bytes_options::BytesOptions;
|
||||
use crate::schema::facet_options::FacetOptions;
|
||||
use crate::schema::value::MaybeOwnedString;
|
||||
use crate::schema::{
|
||||
DateOptions, Facet, IndexRecordOption, JsonObjectOptions, NumericOptions, TextFieldIndexing,
|
||||
TextOptions, Value,
|
||||
@@ -329,7 +330,7 @@ impl FieldType {
|
||||
/// Tantivy will not try to cast values.
|
||||
/// For instance, If the json value is the integer `3` and the
|
||||
/// target field is a `Str`, this method will return an Error.
|
||||
pub fn value_from_json(&self, json: JsonValue) -> Result<Value, ValueParsingError> {
|
||||
pub fn value_from_json(&self, json: JsonValue) -> Result<Value<'static>, ValueParsingError> {
|
||||
match json {
|
||||
JsonValue::String(field_text) => {
|
||||
match self {
|
||||
@@ -341,7 +342,7 @@ impl FieldType {
|
||||
})?;
|
||||
Ok(DateTime::from_utc(dt_with_fixed_tz).into())
|
||||
}
|
||||
FieldType::Str(_) => Ok(Value::Str(field_text)),
|
||||
FieldType::Str(_) => Ok(Value::Str(MaybeOwnedString::from_string(field_text))),
|
||||
FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) => {
|
||||
Err(ValueParsingError::TypeError {
|
||||
expected: "an integer",
|
||||
|
||||
@@ -7,12 +7,13 @@ use crate::schema::{Field, Value};
|
||||
/// `FieldValue` holds together a `Field` and its `Value`.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct FieldValue {
|
||||
#[serde(bound(deserialize = "'a: 'de, 'de: 'a"))]
|
||||
pub struct FieldValue<'a> {
|
||||
pub field: Field,
|
||||
pub value: Value,
|
||||
pub value: Value<'a>,
|
||||
}
|
||||
|
||||
impl FieldValue {
|
||||
impl<'a> FieldValue<'a> {
|
||||
/// Constructor
|
||||
pub fn new(field: Field, value: Value) -> FieldValue {
|
||||
FieldValue { field, value }
|
||||
@@ -29,13 +30,13 @@ impl FieldValue {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<FieldValue> for Value {
|
||||
fn from(field_value: FieldValue) -> Self {
|
||||
impl<'a> From<FieldValue<'a>> for Value<'a> {
|
||||
fn from(field_value: FieldValue<'a>) -> Self {
|
||||
field_value.value
|
||||
}
|
||||
}
|
||||
|
||||
impl BinarySerializable for FieldValue {
|
||||
impl<'a> BinarySerializable for FieldValue<'a> {
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
self.field.serialize(writer)?;
|
||||
self.value.serialize(writer)
|
||||
|
||||
@@ -10,4 +10,5 @@ use crate::schema::Value;
|
||||
/// A `NamedFieldDocument` is a simple representation of a document
|
||||
/// as a `BTreeMap<String, Vec<Value>>`.
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct NamedFieldDocument(pub BTreeMap<String, Vec<Value>>);
|
||||
#[serde(bound(deserialize = "'static: 'de, 'de: 'static"))]
|
||||
pub struct NamedFieldDocument(pub BTreeMap<String, Vec<Value<'static>>>);
|
||||
|
||||
@@ -308,7 +308,11 @@ impl Schema {
|
||||
let mut field_map = BTreeMap::new();
|
||||
for (field, field_values) in doc.get_sorted_field_values() {
|
||||
let field_name = self.get_field_name(field);
|
||||
let values: Vec<Value> = field_values.into_iter().cloned().collect();
|
||||
let values: Vec<Value> = field_values
|
||||
.into_iter()
|
||||
.cloned()
|
||||
.map(Value::into_owned)
|
||||
.collect();
|
||||
field_map.insert(field_name.to_string(), values);
|
||||
}
|
||||
NamedFieldDocument(field_map)
|
||||
@@ -338,20 +342,21 @@ impl Schema {
|
||||
if let Some(field) = self.get_field(&field_name) {
|
||||
let field_entry = self.get_field_entry(field);
|
||||
let field_type = field_entry.field_type();
|
||||
// TODO rewrite this with shared allocation?
|
||||
match json_value {
|
||||
JsonValue::Array(json_items) => {
|
||||
for json_item in json_items {
|
||||
let value = field_type
|
||||
.value_from_json(json_item)
|
||||
.map_err(|e| DocParsingError::ValueError(field_name.clone(), e))?;
|
||||
doc.add_field_value(field, value);
|
||||
doc.add_field_value(field, value.into_owned());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let value = field_type
|
||||
.value_from_json(json_value)
|
||||
.map_err(|e| DocParsingError::ValueError(field_name.clone(), e))?;
|
||||
doc.add_field_value(field, value);
|
||||
doc.add_field_value(field, value.into_owned());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -706,7 +711,7 @@ mod tests {
|
||||
let schema = schema_builder.build();
|
||||
{
|
||||
let doc = schema.parse_document("{}").unwrap();
|
||||
assert!(doc.field_values().is_empty());
|
||||
assert_eq!(doc.value_count(), 0);
|
||||
}
|
||||
{
|
||||
let doc = schema
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::fmt;
|
||||
use std::net::Ipv6Addr;
|
||||
|
||||
pub use not_safe::MaybeOwnedString;
|
||||
use serde::de::Visitor;
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
use serde_json::Map;
|
||||
@@ -12,9 +13,9 @@ use crate::DateTime;
|
||||
/// Value represents the value of a any field.
|
||||
/// It is an enum over all over all of the possible field type.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Value {
|
||||
pub enum Value<'a> {
|
||||
/// The str type is used for any text information.
|
||||
Str(String),
|
||||
Str(MaybeOwnedString<'a>),
|
||||
/// Pre-tokenized str type,
|
||||
PreTokStr(PreTokenizedString),
|
||||
/// Unsigned 64-bits Integer `u64`
|
||||
@@ -30,16 +31,38 @@ pub enum Value {
|
||||
/// Facet
|
||||
Facet(Facet),
|
||||
/// Arbitrarily sized byte array
|
||||
// TODO allow Cow<'a, [u8]>
|
||||
Bytes(Vec<u8>),
|
||||
/// Json object value.
|
||||
// TODO allow Cow keys and borrowed values
|
||||
JsonObject(serde_json::Map<String, serde_json::Value>),
|
||||
/// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`.
|
||||
IpAddr(Ipv6Addr),
|
||||
}
|
||||
|
||||
impl Eq for Value {}
|
||||
impl<'a> Value<'a> {
|
||||
/// Convert a borrowing [`Value`] to an owning one.
|
||||
pub fn into_owned(self) -> Value<'static> {
|
||||
use Value::*;
|
||||
match self {
|
||||
Str(val) => Str(MaybeOwnedString::from_string(val.into_string())),
|
||||
PreTokStr(val) => PreTokStr(val),
|
||||
U64(val) => U64(val),
|
||||
I64(val) => I64(val),
|
||||
F64(val) => F64(val),
|
||||
Bool(val) => Bool(val),
|
||||
Date(val) => Date(val),
|
||||
Facet(val) => Facet(val),
|
||||
Bytes(val) => Bytes(val),
|
||||
JsonObject(val) => JsonObject(val),
|
||||
IpAddr(val) => IpAddr(val),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Value {
|
||||
impl<'a> Eq for Value<'a> {}
|
||||
|
||||
impl<'a> Serialize for Value<'a> {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where S: Serializer {
|
||||
match *self {
|
||||
@@ -65,13 +88,13 @@ impl Serialize for Value {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for Value {
|
||||
impl<'de> Deserialize<'de> for Value<'de> {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where D: Deserializer<'de> {
|
||||
struct ValueVisitor;
|
||||
|
||||
impl<'de> Visitor<'de> for ValueVisitor {
|
||||
type Value = Value;
|
||||
type Value = Value<'de>;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
formatter.write_str("a string or u32")
|
||||
@@ -93,12 +116,13 @@ impl<'de> Deserialize<'de> for Value {
|
||||
Ok(Value::Bool(v))
|
||||
}
|
||||
|
||||
// TODO add visit_borrowed_str
|
||||
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> {
|
||||
Ok(Value::Str(v.to_owned()))
|
||||
Ok(Value::Str(MaybeOwnedString::from_string(v.to_owned())))
|
||||
}
|
||||
|
||||
fn visit_string<E>(self, v: String) -> Result<Self::Value, E> {
|
||||
Ok(Value::Str(v))
|
||||
Ok(Value::Str(MaybeOwnedString::from_string(v)))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -106,7 +130,7 @@ impl<'de> Deserialize<'de> for Value {
|
||||
}
|
||||
}
|
||||
|
||||
impl Value {
|
||||
impl<'a> Value<'a> {
|
||||
/// Returns the text value, provided the value is of the `Str` type.
|
||||
/// (Returns `None` if the value is not of the `Str` type).
|
||||
pub fn as_text(&self) -> Option<&str> {
|
||||
@@ -224,86 +248,87 @@ impl Value {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for Value {
|
||||
fn from(s: String) -> Value {
|
||||
Value::Str(s)
|
||||
impl From<String> for Value<'static> {
|
||||
fn from(s: String) -> Value<'static> {
|
||||
Value::Str(MaybeOwnedString::from_string(s))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Ipv6Addr> for Value {
|
||||
fn from(v: Ipv6Addr) -> Value {
|
||||
impl From<Ipv6Addr> for Value<'static> {
|
||||
fn from(v: Ipv6Addr) -> Value<'static> {
|
||||
Value::IpAddr(v)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<u64> for Value {
|
||||
fn from(v: u64) -> Value {
|
||||
impl From<u64> for Value<'static> {
|
||||
fn from(v: u64) -> Value<'static> {
|
||||
Value::U64(v)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<i64> for Value {
|
||||
fn from(v: i64) -> Value {
|
||||
impl From<i64> for Value<'static> {
|
||||
fn from(v: i64) -> Value<'static> {
|
||||
Value::I64(v)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<f64> for Value {
|
||||
fn from(v: f64) -> Value {
|
||||
impl From<f64> for Value<'static> {
|
||||
fn from(v: f64) -> Value<'static> {
|
||||
Value::F64(v)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<bool> for Value {
|
||||
impl From<bool> for Value<'static> {
|
||||
fn from(b: bool) -> Self {
|
||||
Value::Bool(b)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DateTime> for Value {
|
||||
fn from(dt: DateTime) -> Value {
|
||||
impl From<DateTime> for Value<'static> {
|
||||
fn from(dt: DateTime) -> Value<'static> {
|
||||
Value::Date(dt)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a str> for Value {
|
||||
fn from(s: &'a str) -> Value {
|
||||
Value::Str(s.to_string())
|
||||
impl<'a> From<&'a str> for Value<'a> {
|
||||
fn from(s: &'a str) -> Value<'a> {
|
||||
Value::Str(MaybeOwnedString::from_str(s))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a [u8]> for Value {
|
||||
fn from(bytes: &'a [u8]) -> Value {
|
||||
// TODO change lifetime to 'a
|
||||
impl<'a> From<&'a [u8]> for Value<'static> {
|
||||
fn from(bytes: &'a [u8]) -> Value<'static> {
|
||||
Value::Bytes(bytes.to_vec())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Facet> for Value {
|
||||
fn from(facet: Facet) -> Value {
|
||||
impl From<Facet> for Value<'static> {
|
||||
fn from(facet: Facet) -> Value<'static> {
|
||||
Value::Facet(facet)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<u8>> for Value {
|
||||
fn from(bytes: Vec<u8>) -> Value {
|
||||
impl From<Vec<u8>> for Value<'static> {
|
||||
fn from(bytes: Vec<u8>) -> Value<'static> {
|
||||
Value::Bytes(bytes)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PreTokenizedString> for Value {
|
||||
fn from(pretokenized_string: PreTokenizedString) -> Value {
|
||||
impl From<PreTokenizedString> for Value<'static> {
|
||||
fn from(pretokenized_string: PreTokenizedString) -> Value<'static> {
|
||||
Value::PreTokStr(pretokenized_string)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::Map<String, serde_json::Value>> for Value {
|
||||
fn from(json_object: serde_json::Map<String, serde_json::Value>) -> Value {
|
||||
impl From<serde_json::Map<String, serde_json::Value>> for Value<'static> {
|
||||
fn from(json_object: serde_json::Map<String, serde_json::Value>) -> Value<'static> {
|
||||
Value::JsonObject(json_object)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::Value> for Value {
|
||||
fn from(json_value: serde_json::Value) -> Value {
|
||||
impl From<serde_json::Value> for Value<'static> {
|
||||
fn from(json_value: serde_json::Value) -> Value<'static> {
|
||||
match json_value {
|
||||
serde_json::Value::Object(json_object) => Value::JsonObject(json_object),
|
||||
_ => {
|
||||
@@ -320,7 +345,7 @@ mod binary_serialize {
|
||||
use common::{f64_to_u64, u64_to_f64, BinarySerializable};
|
||||
use fastfield_codecs::MonotonicallyMappableToU128;
|
||||
|
||||
use super::Value;
|
||||
use super::{MaybeOwnedString, Value};
|
||||
use crate::schema::Facet;
|
||||
use crate::tokenizer::PreTokenizedString;
|
||||
use crate::DateTime;
|
||||
@@ -341,12 +366,13 @@ mod binary_serialize {
|
||||
|
||||
const TOK_STR_CODE: u8 = 0;
|
||||
|
||||
impl BinarySerializable for Value {
|
||||
impl<'a> BinarySerializable for Value<'a> {
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
match *self {
|
||||
Value::Str(ref text) => {
|
||||
TEXT_CODE.serialize(writer)?;
|
||||
text.serialize(writer)
|
||||
// TODO impl trait for MaybeOwnedString
|
||||
text.as_str().to_owned().serialize(writer)
|
||||
}
|
||||
Value::PreTokStr(ref tok_str) => {
|
||||
EXT_CODE.serialize(writer)?;
|
||||
@@ -408,7 +434,7 @@ mod binary_serialize {
|
||||
match type_code {
|
||||
TEXT_CODE => {
|
||||
let text = String::deserialize(reader)?;
|
||||
Ok(Value::Str(text))
|
||||
Ok(Value::Str(MaybeOwnedString::from_string(text)))
|
||||
}
|
||||
U64_CODE => {
|
||||
let value = u64::deserialize(reader)?;
|
||||
@@ -550,3 +576,104 @@ mod tests {
|
||||
assert_eq!(serialized_value_json, r#""1996-12-20T01:39:57Z""#);
|
||||
}
|
||||
}
|
||||
|
||||
mod not_safe {
|
||||
use std::ops::Deref;
|
||||
|
||||
union Ref<'a, T: ?Sized> {
|
||||
shared: &'a T,
|
||||
uniq: &'a mut T,
|
||||
}
|
||||
|
||||
pub struct MaybeOwnedString<'a> {
|
||||
string: Ref<'a, str>,
|
||||
capacity: usize,
|
||||
}
|
||||
|
||||
impl<'a> MaybeOwnedString<'a> {
|
||||
pub fn from_str(string: &'a str) -> MaybeOwnedString<'a> {
|
||||
MaybeOwnedString {
|
||||
string: Ref { shared: string },
|
||||
capacity: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_string(mut string: String) -> MaybeOwnedString<'static> {
|
||||
string.shrink_to_fit(); // <= actually important for safety, todo use the Vec .as_ptr instead
|
||||
|
||||
let mut s = std::mem::ManuallyDrop::new(string);
|
||||
let ptr = s.as_mut_ptr();
|
||||
let len = s.len();
|
||||
let capacity = s.capacity();
|
||||
|
||||
let string = unsafe {
|
||||
std::str::from_utf8_unchecked_mut(std::slice::from_raw_parts_mut(ptr, len))
|
||||
};
|
||||
MaybeOwnedString {
|
||||
string: Ref { uniq: string },
|
||||
capacity,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_string(mut self) -> String {
|
||||
if self.capacity != 0 {
|
||||
let string = unsafe { &mut self.string.uniq };
|
||||
unsafe {
|
||||
return String::from_raw_parts(string.as_mut_ptr(), self.len(), self.capacity);
|
||||
};
|
||||
}
|
||||
self.deref().to_owned()
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> &str {
|
||||
self.deref()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Deref for MaybeOwnedString<'a> {
|
||||
type Target = str;
|
||||
|
||||
#[inline]
|
||||
fn deref(&self) -> &str {
|
||||
unsafe { self.string.shared }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Drop for MaybeOwnedString<'a> {
|
||||
fn drop(&mut self) {
|
||||
// if capacity is 0, either it's an empty String so there is no dealloc to do, or it's
|
||||
// borrowed
|
||||
if self.capacity != 0 {
|
||||
let string = unsafe { &mut self.string.uniq };
|
||||
unsafe { String::from_raw_parts(string.as_mut_ptr(), self.len(), self.capacity) };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Clone for MaybeOwnedString<'a> {
|
||||
fn clone(&self) -> Self {
|
||||
if self.capacity == 0 {
|
||||
MaybeOwnedString {
|
||||
string: Ref {
|
||||
shared: unsafe { self.string.shared },
|
||||
},
|
||||
capacity: 0,
|
||||
}
|
||||
} else {
|
||||
MaybeOwnedString::from_string(self.deref().to_owned())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> std::fmt::Debug for MaybeOwnedString<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(self.deref())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> PartialEq for MaybeOwnedString<'a> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.deref() == other.deref()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user