mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
Extend DocumentDeserialize with a stateful variant DocumentDeserializeSeed
This is modelled on Serde's `DeserializeSeed` include how the relevant API entry points gain a `_seed` variant. It can be used for example to obtain runtime field ID values when deserializing a struct field by field without relying on the order of the fields as written to/read from the document store.
This commit is contained in:
@@ -1,4 +1,5 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
|
use std::marker::PhantomData;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::{fmt, io};
|
use std::{fmt, io};
|
||||||
|
|
||||||
@@ -6,7 +7,7 @@ use crate::collector::Collector;
|
|||||||
use crate::core::Executor;
|
use crate::core::Executor;
|
||||||
use crate::index::{SegmentId, SegmentReader};
|
use crate::index::{SegmentId, SegmentReader};
|
||||||
use crate::query::{Bm25StatisticsProvider, EnableScoring, Query};
|
use crate::query::{Bm25StatisticsProvider, EnableScoring, Query};
|
||||||
use crate::schema::document::DocumentDeserialize;
|
use crate::schema::document::{DocumentDeserialize, DocumentDeserializeSeed};
|
||||||
use crate::schema::{Schema, Term};
|
use crate::schema::{Schema, Term};
|
||||||
use crate::space_usage::SearcherSpaceUsage;
|
use crate::space_usage::SearcherSpaceUsage;
|
||||||
use crate::store::{CacheStats, StoreReader};
|
use crate::store::{CacheStats, StoreReader};
|
||||||
@@ -86,8 +87,17 @@ impl Searcher {
|
|||||||
/// The searcher uses the segment ordinal to route the
|
/// The searcher uses the segment ordinal to route the
|
||||||
/// request to the right `Segment`.
|
/// request to the right `Segment`.
|
||||||
pub fn doc<D: DocumentDeserialize>(&self, doc_address: DocAddress) -> crate::Result<D> {
|
pub fn doc<D: DocumentDeserialize>(&self, doc_address: DocAddress) -> crate::Result<D> {
|
||||||
|
self.doc_seed(doc_address, PhantomData)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A stateful variant of [`doc`][Self::doc].`
|
||||||
|
pub fn doc_seed<T: DocumentDeserializeSeed>(
|
||||||
|
&self,
|
||||||
|
doc_address: DocAddress,
|
||||||
|
seed: T,
|
||||||
|
) -> crate::Result<T::Value> {
|
||||||
let store_reader = &self.inner.store_readers[doc_address.segment_ord as usize];
|
let store_reader = &self.inner.store_readers[doc_address.segment_ord as usize];
|
||||||
store_reader.get(doc_address.doc_id)
|
store_reader.get_seed(doc_address.doc_id, seed)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The cache stats for the underlying store reader.
|
/// The cache stats for the underlying store reader.
|
||||||
@@ -109,9 +119,21 @@ impl Searcher {
|
|||||||
&self,
|
&self,
|
||||||
doc_address: DocAddress,
|
doc_address: DocAddress,
|
||||||
) -> crate::Result<D> {
|
) -> crate::Result<D> {
|
||||||
|
self.doc_async_seed(doc_address, PhantomData).await
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "quickwit")]
|
||||||
|
/// A stateful variant of [`doc_async`][Self::doc_async].
|
||||||
|
pub async fn doc_async_seed<T: DocumentDeserializeSeed>(
|
||||||
|
&self,
|
||||||
|
doc_address: DocAddress,
|
||||||
|
seed: T,
|
||||||
|
) -> crate::Result<T::Value> {
|
||||||
let executor = self.inner.index.search_executor();
|
let executor = self.inner.index.search_executor();
|
||||||
let store_reader = &self.inner.store_readers[doc_address.segment_ord as usize];
|
let store_reader = &self.inner.store_readers[doc_address.segment_ord as usize];
|
||||||
store_reader.get_async(doc_address.doc_id, executor).await
|
store_reader
|
||||||
|
.get_async_seed(doc_address.doc_id, executor, seed)
|
||||||
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Access the schema associated with the index of this searcher.
|
/// Access the schema associated with the index of this searcher.
|
||||||
|
|||||||
@@ -69,6 +69,28 @@ pub trait DocumentDeserialize: Sized {
|
|||||||
where D: DocumentDeserializer<'de>;
|
where D: DocumentDeserializer<'de>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A stateful extension of [`DocumentDeserialize`].
|
||||||
|
pub trait DocumentDeserializeSeed: Sized {
|
||||||
|
/// The type produced by using this seed.
|
||||||
|
type Value;
|
||||||
|
|
||||||
|
/// Attempts to deserialize `Self::Value` from the given `seed` and `deserializer`.
|
||||||
|
fn deserialize<'de, D>(self, deserializer: D) -> Result<Self::Value, DeserializeError>
|
||||||
|
where D: DocumentDeserializer<'de>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> DocumentDeserializeSeed for PhantomData<T>
|
||||||
|
where T: DocumentDeserialize
|
||||||
|
{
|
||||||
|
/// The type produced by using this seed.
|
||||||
|
type Value = T;
|
||||||
|
|
||||||
|
fn deserialize<'de, D>(self, deserializer: D) -> Result<Self::Value, DeserializeError>
|
||||||
|
where D: DocumentDeserializer<'de> {
|
||||||
|
<T as DocumentDeserialize>::deserialize(deserializer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A deserializer that can walk through each entry in the document.
|
/// A deserializer that can walk through each entry in the document.
|
||||||
pub trait DocumentDeserializer<'de> {
|
pub trait DocumentDeserializer<'de> {
|
||||||
/// A indicator as to how many values are in the document.
|
/// A indicator as to how many values are in the document.
|
||||||
|
|||||||
@@ -603,7 +603,7 @@ impl<'a> Iterator for CompactDocObjectIter<'a> {
|
|||||||
container: self.container,
|
container: self.container,
|
||||||
value,
|
value,
|
||||||
};
|
};
|
||||||
return Some((key, value));
|
Some((key, value))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -637,7 +637,7 @@ impl<'a> Iterator for CompactDocArrayIter<'a> {
|
|||||||
container: self.container,
|
container: self.container,
|
||||||
value,
|
value,
|
||||||
};
|
};
|
||||||
return Some(value);
|
Some(value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -169,8 +169,9 @@ use std::mem;
|
|||||||
|
|
||||||
pub(crate) use self::de::BinaryDocumentDeserializer;
|
pub(crate) use self::de::BinaryDocumentDeserializer;
|
||||||
pub use self::de::{
|
pub use self::de::{
|
||||||
ArrayAccess, DeserializeError, DocumentDeserialize, DocumentDeserializer, ObjectAccess,
|
ArrayAccess, DeserializeError, DocumentDeserialize, DocumentDeserializeSeed,
|
||||||
ValueDeserialize, ValueDeserializer, ValueType, ValueVisitor,
|
DocumentDeserializer, ObjectAccess, ValueDeserialize, ValueDeserializer, ValueType,
|
||||||
|
ValueVisitor,
|
||||||
};
|
};
|
||||||
pub use self::default_document::{
|
pub use self::default_document::{
|
||||||
CompactDocArrayIter, CompactDocObjectIter, CompactDocValue, DocParsingError, TantivyDocument,
|
CompactDocArrayIter, CompactDocObjectIter, CompactDocValue, DocParsingError, TantivyDocument,
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
use std::io;
|
use std::io;
|
||||||
use std::iter::Sum;
|
use std::iter::Sum;
|
||||||
|
use std::marker::PhantomData;
|
||||||
use std::num::NonZeroUsize;
|
use std::num::NonZeroUsize;
|
||||||
use std::ops::{AddAssign, Range};
|
use std::ops::{AddAssign, Range};
|
||||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||||
@@ -14,7 +15,9 @@ use super::Decompressor;
|
|||||||
use crate::directory::FileSlice;
|
use crate::directory::FileSlice;
|
||||||
use crate::error::DataCorruption;
|
use crate::error::DataCorruption;
|
||||||
use crate::fastfield::AliveBitSet;
|
use crate::fastfield::AliveBitSet;
|
||||||
use crate::schema::document::{BinaryDocumentDeserializer, DocumentDeserialize};
|
use crate::schema::document::{
|
||||||
|
BinaryDocumentDeserializer, DocumentDeserialize, DocumentDeserializeSeed,
|
||||||
|
};
|
||||||
use crate::space_usage::StoreSpaceUsage;
|
use crate::space_usage::StoreSpaceUsage;
|
||||||
use crate::store::index::Checkpoint;
|
use crate::store::index::Checkpoint;
|
||||||
use crate::DocId;
|
use crate::DocId;
|
||||||
@@ -201,11 +204,21 @@ impl StoreReader {
|
|||||||
/// It should not be called to score documents
|
/// It should not be called to score documents
|
||||||
/// for instance.
|
/// for instance.
|
||||||
pub fn get<D: DocumentDeserialize>(&self, doc_id: DocId) -> crate::Result<D> {
|
pub fn get<D: DocumentDeserialize>(&self, doc_id: DocId) -> crate::Result<D> {
|
||||||
|
self.get_seed(doc_id, PhantomData)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A stateful version of [`get`][Self::get].
|
||||||
|
pub fn get_seed<T: DocumentDeserializeSeed>(
|
||||||
|
&self,
|
||||||
|
doc_id: DocId,
|
||||||
|
seed: T,
|
||||||
|
) -> crate::Result<T::Value> {
|
||||||
let mut doc_bytes = self.get_document_bytes(doc_id)?;
|
let mut doc_bytes = self.get_document_bytes(doc_id)?;
|
||||||
|
|
||||||
let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes)
|
let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes)
|
||||||
.map_err(crate::TantivyError::from)?;
|
.map_err(crate::TantivyError::from)?;
|
||||||
D::deserialize(deserializer).map_err(crate::TantivyError::from)
|
seed.deserialize(deserializer)
|
||||||
|
.map_err(crate::TantivyError::from)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns raw bytes of a given document.
|
/// Returns raw bytes of a given document.
|
||||||
@@ -237,16 +250,27 @@ impl StoreReader {
|
|||||||
/// Iterator over all Documents in their order as they are stored in the doc store.
|
/// Iterator over all Documents in their order as they are stored in the doc store.
|
||||||
/// Use this, if you want to extract all Documents from the doc store.
|
/// Use this, if you want to extract all Documents from the doc store.
|
||||||
/// The `alive_bitset` has to be forwarded from the `SegmentReader` or the results may be wrong.
|
/// The `alive_bitset` has to be forwarded from the `SegmentReader` or the results may be wrong.
|
||||||
pub fn iter<'a: 'b, 'b, D: DocumentDeserialize>(
|
pub fn iter<'a: 'b, 'b, D: DocumentDeserialize + 'b>(
|
||||||
&'b self,
|
&'b self,
|
||||||
alive_bitset: Option<&'a AliveBitSet>,
|
alive_bitset: Option<&'a AliveBitSet>,
|
||||||
) -> impl Iterator<Item = crate::Result<D>> + 'b {
|
) -> impl Iterator<Item = crate::Result<D>> + 'b {
|
||||||
|
self.iter_seed(alive_bitset, &PhantomData)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A stateful variant of [`iter`][Self::iter].
|
||||||
|
pub fn iter_seed<'a: 'b, 'b, T: DocumentDeserializeSeed + Clone + 'b>(
|
||||||
|
&'b self,
|
||||||
|
alive_bitset: Option<&'a AliveBitSet>,
|
||||||
|
seed: &'b T,
|
||||||
|
) -> impl Iterator<Item = crate::Result<T::Value>> + 'b {
|
||||||
self.iter_raw(alive_bitset).map(|doc_bytes_res| {
|
self.iter_raw(alive_bitset).map(|doc_bytes_res| {
|
||||||
let mut doc_bytes = doc_bytes_res?;
|
let mut doc_bytes = doc_bytes_res?;
|
||||||
|
|
||||||
let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes)
|
let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes)
|
||||||
.map_err(crate::TantivyError::from)?;
|
.map_err(crate::TantivyError::from)?;
|
||||||
D::deserialize(deserializer).map_err(crate::TantivyError::from)
|
seed.clone()
|
||||||
|
.deserialize(deserializer)
|
||||||
|
.map_err(crate::TantivyError::from)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -389,11 +413,22 @@ impl StoreReader {
|
|||||||
doc_id: DocId,
|
doc_id: DocId,
|
||||||
executor: &Executor,
|
executor: &Executor,
|
||||||
) -> crate::Result<D> {
|
) -> crate::Result<D> {
|
||||||
|
self.get_async_seed(doc_id, executor, PhantomData).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A stateful variant of [`get_async`][Self::get_async].
|
||||||
|
pub async fn get_async_seed<T: DocumentDeserializeSeed>(
|
||||||
|
&self,
|
||||||
|
doc_id: DocId,
|
||||||
|
executor: &Executor,
|
||||||
|
seed: T,
|
||||||
|
) -> crate::Result<T::Value> {
|
||||||
let mut doc_bytes = self.get_document_bytes_async(doc_id, executor).await?;
|
let mut doc_bytes = self.get_document_bytes_async(doc_id, executor).await?;
|
||||||
|
|
||||||
let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes)
|
let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes)
|
||||||
.map_err(crate::TantivyError::from)?;
|
.map_err(crate::TantivyError::from)?;
|
||||||
D::deserialize(deserializer).map_err(crate::TantivyError::from)
|
seed.deserialize(deserializer)
|
||||||
|
.map_err(crate::TantivyError::from)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user