mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-24 04:00:40 +00:00
Abstract tantivy's data storage behind traits for pluggable backends
Extract trait interfaces from tantivy's core reader types so that alternative storage backends (e.g. Quickwit) can provide their own implementations while tantivy's query engine works through dynamic dispatch. Reader trait extraction: - SegmentReader is now a trait; the concrete implementation is renamed to TantivySegmentReader. - DynInvertedIndexReader trait for object-safe dynamic dispatch, plus a typed InvertedIndexReader trait with associated Postings/DocSet types for static dispatch. The concrete reader becomes TantivyInvertedIndexReader. - StoreReader is now a trait; the concrete implementation is renamed to TantivyStoreReader. get() returns TantivyDocument directly instead of requiring a generic DocumentDeserialize bound. Typed downcast for performance-critical paths: - try_downcast_and_call() + TypedInvertedIndexReaderCb allow query weights (TermWeight, PhraseWeight) to attempt a downcast to the concrete TantivyInvertedIndexReader, obtaining typed postings for zero-cost scoring, and falling back to the dynamic path otherwise. - TermScorer<TPostings> is now generic over its postings type. - PostingsWithBlockMax trait enables block-max WAND acceleration through the trait boundary. - block_wand() and block_wand_single_scorer() are generic over PostingsWithBlockMax, and for_each_pruning is dispatched through the SegmentReader trait so custom backends can provide their own block-max implementations. Searcher decoupled from Index: - New SearcherContext holds schema, executor, and tokenizers. - Searcher can be constructed from Vec<Arc<dyn SegmentReader>> via Searcher::from_segment_readers(), without needing an Index. - Searcher::index() is deprecated in favor of Searcher::context(). Postings and DocSet changes: - Postings trait gains doc_freq() -> DocFreq (Exact/Approximate) and has_freq(). - RawPostingsData struct carries raw postings bytes across the trait boundary for custom reader implementations. - BlockSegmentPostings::open() takes OwnedBytes instead of FileSlice. - DocSet gains fill_bitset() method. Scorer improvements: - Scorer trait absorbs for_each, for_each_pruning, and explain (previously free functions or on Weight). - box_scorer() helper avoids double-boxing Box<dyn Scorer>. - BoxedTermScorer wraps a type-erased term scorer. - BufferedUnionScorer initialization fixed to avoid an extra advance() on construction. Other changes: - Document::to_json() now returns serde_json::Value; the old string serialization is renamed to to_serialized_json(). - DocumentDeserialize removed from the store reader public API.
This commit is contained in:
@@ -7,7 +7,7 @@ use arc_swap::ArcSwap;
|
||||
pub use warming::Warmer;
|
||||
|
||||
use self::warming::WarmingState;
|
||||
use crate::core::searcher::{SearcherGeneration, SearcherInner};
|
||||
use crate::core::searcher::{SearcherContext, SearcherGeneration, SearcherInner};
|
||||
use crate::directory::{Directory, WatchCallback, WatchHandle, META_LOCK};
|
||||
use crate::store::DOCSTORE_CACHE_CAPACITY;
|
||||
use crate::{Index, Inventory, Searcher, SegmentReader, TrackedObject};
|
||||
@@ -189,19 +189,28 @@ impl InnerIndexReader {
|
||||
///
|
||||
/// This function acquires a lock to prevent GC from removing files
|
||||
/// as we are opening our index.
|
||||
fn open_segment_readers(index: &Index) -> crate::Result<Vec<SegmentReader>> {
|
||||
fn open_segment_readers(index: &Index) -> crate::Result<Vec<Arc<dyn SegmentReader>>> {
|
||||
// Prevents segment files from getting deleted while we are in the process of opening them
|
||||
let _meta_lock = index.directory().acquire_lock(&META_LOCK)?;
|
||||
let searchable_segments = index.searchable_segments()?;
|
||||
let segment_readers = searchable_segments
|
||||
.iter()
|
||||
.map(SegmentReader::open)
|
||||
.map(|segment| {
|
||||
let reader =
|
||||
crate::TantivySegmentReader::open_with_custom_alive_set_from_directory(
|
||||
segment.index().directory(),
|
||||
segment.meta(),
|
||||
segment.schema(),
|
||||
None,
|
||||
)?;
|
||||
Ok(Arc::new(reader) as Arc<dyn SegmentReader>)
|
||||
})
|
||||
.collect::<crate::Result<_>>()?;
|
||||
Ok(segment_readers)
|
||||
}
|
||||
|
||||
fn track_segment_readers_in_inventory(
|
||||
segment_readers: &[SegmentReader],
|
||||
segment_readers: &[Arc<dyn SegmentReader>],
|
||||
searcher_generation_counter: &Arc<AtomicU64>,
|
||||
searcher_generation_inventory: &Inventory<SearcherGeneration>,
|
||||
) -> TrackedObject<SearcherGeneration> {
|
||||
@@ -225,10 +234,9 @@ impl InnerIndexReader {
|
||||
searcher_generation_inventory,
|
||||
);
|
||||
|
||||
let schema = index.schema();
|
||||
let context = SearcherContext::from_index(index);
|
||||
let searcher = Arc::new(SearcherInner::new(
|
||||
schema,
|
||||
index.clone(),
|
||||
context,
|
||||
segment_readers,
|
||||
searcher_generation,
|
||||
doc_store_cache_num_blocks,
|
||||
|
||||
Reference in New Issue
Block a user