diff --git a/src/core/searcher.rs b/src/core/searcher.rs index f74c837c4..b9980e5b5 100644 --- a/src/core/searcher.rs +++ b/src/core/searcher.rs @@ -1,4 +1,5 @@ use std::collections::BTreeMap; +use std::marker::PhantomData; use std::sync::Arc; use std::{fmt, io}; @@ -6,7 +7,7 @@ use crate::collector::Collector; use crate::core::Executor; use crate::index::{SegmentId, SegmentReader}; use crate::query::{Bm25StatisticsProvider, EnableScoring, Query}; -use crate::schema::document::DocumentDeserialize; +use crate::schema::document::{DocumentDeserialize, DocumentDeserializeSeed}; use crate::schema::{Schema, Term}; use crate::space_usage::SearcherSpaceUsage; use crate::store::{CacheStats, StoreReader}; @@ -86,8 +87,17 @@ impl Searcher { /// The searcher uses the segment ordinal to route the /// request to the right `Segment`. pub fn doc(&self, doc_address: DocAddress) -> crate::Result { + self.doc_seed(doc_address, PhantomData) + } + + /// A stateful variant of [`doc`][Self::doc].` + pub fn doc_seed( + &self, + doc_address: DocAddress, + seed: T, + ) -> crate::Result { let store_reader = &self.inner.store_readers[doc_address.segment_ord as usize]; - store_reader.get(doc_address.doc_id) + store_reader.get_seed(doc_address.doc_id, seed) } /// The cache stats for the underlying store reader. @@ -109,9 +119,21 @@ impl Searcher { &self, doc_address: DocAddress, ) -> crate::Result { + self.doc_async_seed(doc_address, PhantomData).await + } + + #[cfg(feature = "quickwit")] + /// A stateful variant of [`doc_async`][Self::doc_async]. + pub async fn doc_async_seed( + &self, + doc_address: DocAddress, + seed: T, + ) -> crate::Result { let executor = self.inner.index.search_executor(); let store_reader = &self.inner.store_readers[doc_address.segment_ord as usize]; - store_reader.get_async(doc_address.doc_id, executor).await + store_reader + .get_async_seed(doc_address.doc_id, executor, seed) + .await } /// Access the schema associated with the index of this searcher. diff --git a/src/schema/document/de.rs b/src/schema/document/de.rs index e80bff2c9..51d30c353 100644 --- a/src/schema/document/de.rs +++ b/src/schema/document/de.rs @@ -69,6 +69,28 @@ pub trait DocumentDeserialize: Sized { where D: DocumentDeserializer<'de>; } +/// A stateful extension of [`DocumentDeserialize`]. +pub trait DocumentDeserializeSeed: Sized { + /// The type produced by using this seed. + type Value; + + /// Attempts to deserialize `Self::Value` from the given `seed` and `deserializer`. + fn deserialize<'de, D>(self, deserializer: D) -> Result + where D: DocumentDeserializer<'de>; +} + +impl DocumentDeserializeSeed for PhantomData +where T: DocumentDeserialize +{ + /// The type produced by using this seed. + type Value = T; + + fn deserialize<'de, D>(self, deserializer: D) -> Result + where D: DocumentDeserializer<'de> { + ::deserialize(deserializer) + } +} + /// A deserializer that can walk through each entry in the document. pub trait DocumentDeserializer<'de> { /// A indicator as to how many values are in the document. diff --git a/src/schema/document/default_document.rs b/src/schema/document/default_document.rs index 5b65fc3eb..9abdeba2a 100644 --- a/src/schema/document/default_document.rs +++ b/src/schema/document/default_document.rs @@ -603,7 +603,7 @@ impl<'a> Iterator for CompactDocObjectIter<'a> { container: self.container, value, }; - return Some((key, value)); + Some((key, value)) } } @@ -637,7 +637,7 @@ impl<'a> Iterator for CompactDocArrayIter<'a> { container: self.container, value, }; - return Some(value); + Some(value) } } diff --git a/src/schema/document/mod.rs b/src/schema/document/mod.rs index 91ce894c4..aeb40c941 100644 --- a/src/schema/document/mod.rs +++ b/src/schema/document/mod.rs @@ -169,8 +169,9 @@ use std::mem; pub(crate) use self::de::BinaryDocumentDeserializer; pub use self::de::{ - ArrayAccess, DeserializeError, DocumentDeserialize, DocumentDeserializer, ObjectAccess, - ValueDeserialize, ValueDeserializer, ValueType, ValueVisitor, + ArrayAccess, DeserializeError, DocumentDeserialize, DocumentDeserializeSeed, + DocumentDeserializer, ObjectAccess, ValueDeserialize, ValueDeserializer, ValueType, + ValueVisitor, }; pub use self::default_document::{ CompactDocArrayIter, CompactDocObjectIter, CompactDocValue, DocParsingError, TantivyDocument, diff --git a/src/store/reader.rs b/src/store/reader.rs index 1e4432e5f..02e563d7a 100644 --- a/src/store/reader.rs +++ b/src/store/reader.rs @@ -1,5 +1,6 @@ use std::io; use std::iter::Sum; +use std::marker::PhantomData; use std::num::NonZeroUsize; use std::ops::{AddAssign, Range}; use std::sync::atomic::{AtomicUsize, Ordering}; @@ -14,7 +15,9 @@ use super::Decompressor; use crate::directory::FileSlice; use crate::error::DataCorruption; use crate::fastfield::AliveBitSet; -use crate::schema::document::{BinaryDocumentDeserializer, DocumentDeserialize}; +use crate::schema::document::{ + BinaryDocumentDeserializer, DocumentDeserialize, DocumentDeserializeSeed, +}; use crate::space_usage::StoreSpaceUsage; use crate::store::index::Checkpoint; use crate::DocId; @@ -201,11 +204,21 @@ impl StoreReader { /// It should not be called to score documents /// for instance. pub fn get(&self, doc_id: DocId) -> crate::Result { + self.get_seed(doc_id, PhantomData) + } + + /// A stateful version of [`get`][Self::get]. + pub fn get_seed( + &self, + doc_id: DocId, + seed: T, + ) -> crate::Result { let mut doc_bytes = self.get_document_bytes(doc_id)?; let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes) .map_err(crate::TantivyError::from)?; - D::deserialize(deserializer).map_err(crate::TantivyError::from) + seed.deserialize(deserializer) + .map_err(crate::TantivyError::from) } /// Returns raw bytes of a given document. @@ -237,16 +250,27 @@ impl StoreReader { /// Iterator over all Documents in their order as they are stored in the doc store. /// Use this, if you want to extract all Documents from the doc store. /// The `alive_bitset` has to be forwarded from the `SegmentReader` or the results may be wrong. - pub fn iter<'a: 'b, 'b, D: DocumentDeserialize>( + pub fn iter<'a: 'b, 'b, D: DocumentDeserialize + 'b>( &'b self, alive_bitset: Option<&'a AliveBitSet>, ) -> impl Iterator> + 'b { + self.iter_seed(alive_bitset, &PhantomData) + } + + /// A stateful variant of [`iter`][Self::iter]. + pub fn iter_seed<'a: 'b, 'b, T: DocumentDeserializeSeed + Clone + 'b>( + &'b self, + alive_bitset: Option<&'a AliveBitSet>, + seed: &'b T, + ) -> impl Iterator> + 'b { self.iter_raw(alive_bitset).map(|doc_bytes_res| { let mut doc_bytes = doc_bytes_res?; let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes) .map_err(crate::TantivyError::from)?; - D::deserialize(deserializer).map_err(crate::TantivyError::from) + seed.clone() + .deserialize(deserializer) + .map_err(crate::TantivyError::from) }) } @@ -389,11 +413,22 @@ impl StoreReader { doc_id: DocId, executor: &Executor, ) -> crate::Result { + self.get_async_seed(doc_id, executor, PhantomData).await + } + + /// A stateful variant of [`get_async`][Self::get_async]. + pub async fn get_async_seed( + &self, + doc_id: DocId, + executor: &Executor, + seed: T, + ) -> crate::Result { let mut doc_bytes = self.get_document_bytes_async(doc_id, executor).await?; let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes) .map_err(crate::TantivyError::from)?; - D::deserialize(deserializer).map_err(crate::TantivyError::from) + seed.deserialize(deserializer) + .map_err(crate::TantivyError::from) } }