From cf632673acfe65f0d33c7639a68db88569aa3bb7 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Fri, 16 Jan 2026 13:56:21 +0100 Subject: [PATCH] blop --- src/codec/mod.rs | 42 ++++++++++++++++++------------ src/codec/postings/mod.rs | 16 +++++++----- src/codec/standard/postings/mod.rs | 25 ++++++++---------- src/index/inverted_index_reader.rs | 9 ++++++- src/index/segment_reader.rs | 6 +++++ 5 files changed, 60 insertions(+), 38 deletions(-) diff --git a/src/codec/mod.rs b/src/codec/mod.rs index 77a3d6414..2e195c2ea 100644 --- a/src/codec/mod.rs +++ b/src/codec/mod.rs @@ -7,7 +7,9 @@ use common::OwnedBytes; use serde::{Deserialize, Serialize}; pub use standard::StandardCodec; -use crate::{codec::postings::PostingsCodec, postings::Postings, schema::IndexRecordOption}; +use crate::codec::postings::PostingsCodec; +use crate::postings::Postings; +use crate::schema::IndexRecordOption; pub trait Codec: Clone + std::fmt::Debug + Send + Sync + 'static { type PostingsCodec: PostingsCodec; @@ -20,7 +22,6 @@ pub trait Codec: Clone + std::fmt::Debug + Send + Sync + 'static { fn postings_codec(&self) -> &Self::PostingsCodec; } - #[derive(Serialize, Deserialize, Clone, Debug)] pub struct CodecConfiguration { name: Cow<'static, str>, @@ -54,8 +55,9 @@ impl Default for CodecConfiguration { } } -pub trait CodecPostingsLoader { - fn load_postings_type_erased(&self, +pub trait ObjectSafeCodec: 'static + Send + Sync { + fn load_postings_type_erased( + &self, doc_freq: u32, postings_data: OwnedBytes, record_option: IndexRecordOption, @@ -64,16 +66,24 @@ pub trait CodecPostingsLoader { ) -> crate::Result>; } -impl CodecPostingsLoader for TPostingsCodec { - fn load_postings_type_erased(&self, - doc_freq: u32, - postings_data: OwnedBytes, - record_option: IndexRecordOption, - requested_option: IndexRecordOption, - positions_data: Option, - ) -> crate::Result> { - let postings: ::Postings = self.load_postings(doc_freq, postings_data, record_option, requested_option, positions_data)?; - let boxed_postings: Box = Box::new(postings); - Ok(boxed_postings) - } +impl ObjectSafeCodec for TCodec { + fn load_postings_type_erased( + &self, + doc_freq: u32, + postings_data: OwnedBytes, + record_option: IndexRecordOption, + requested_option: IndexRecordOption, + positions_data: Option, + ) -> crate::Result> { + let postings: <::PostingsCodec as PostingsCodec>::Postings = + self.postings_codec().load_postings( + doc_freq, + postings_data, + record_option, + requested_option, + positions_data, + )?; + let boxed_postings: Box = Box::new(postings); + Ok(boxed_postings) + } } diff --git a/src/codec/postings/mod.rs b/src/codec/postings/mod.rs index 0ec7702b5..4345a9cfb 100644 --- a/src/codec/postings/mod.rs +++ b/src/codec/postings/mod.rs @@ -8,7 +8,7 @@ use crate::query::Bm25Weight; use crate::schema::IndexRecordOption; use crate::{DocId, Score}; -pub trait PostingsCodec { +pub trait PostingsCodec: Send + Sync + 'static { type PostingsSerializer: PostingsSerializer; type PostingsReader: PostingsReader; type Postings: Postings; @@ -34,12 +34,14 @@ pub trait PostingsCodec { requested_option: IndexRecordOption, ) -> std::io::Result; - fn load_postings(&self, - doc_freq: u32, - postings_data: OwnedBytes, - record_option: IndexRecordOption, - requested_option: IndexRecordOption, - positions_data: Option) -> io::Result; + fn load_postings( + &self, + doc_freq: u32, + postings_data: OwnedBytes, + record_option: IndexRecordOption, + requested_option: IndexRecordOption, + positions_data: Option, + ) -> io::Result; } pub trait PostingsSerializer { diff --git a/src/codec/standard/postings/mod.rs b/src/codec/standard/postings/mod.rs index bc9cc8afc..1f52a94fa 100644 --- a/src/codec/standard/postings/mod.rs +++ b/src/codec/standard/postings/mod.rs @@ -40,22 +40,19 @@ impl PostingsCodec for StandardPostingsCodec { StandardPostingsReader::open(doc_freq, data, record_option, requested_option) } - fn load_postings(&self, - doc_freq: u32, - postings_data: common::OwnedBytes, - record_option: IndexRecordOption, - requested_option: IndexRecordOption, - positions_data_opt: Option) -> io::Result { + fn load_postings( + &self, + doc_freq: u32, + postings_data: common::OwnedBytes, + record_option: IndexRecordOption, + requested_option: IndexRecordOption, + positions_data_opt: Option, + ) -> io::Result { // Rationalize record_option/requested_option. let record_option = requested_option.downgrade(record_option); - let block_segment_postings = StandardPostingsReader::open( - doc_freq, - postings_data, - record_option, - requested_option, - )?; - let position_reader = - positions_data_opt.map(PositionReader::open).transpose()?; + let block_segment_postings = + StandardPostingsReader::open(doc_freq, postings_data, record_option, requested_option)?; + let position_reader = positions_data_opt.map(PositionReader::open).transpose()?; Ok(SegmentPostings::from_block_postings( block_segment_postings, position_reader, diff --git a/src/index/inverted_index_reader.rs b/src/index/inverted_index_reader.rs index 5137619b5..607caa230 100644 --- a/src/index/inverted_index_reader.rs +++ b/src/index/inverted_index_reader.rs @@ -1,4 +1,5 @@ use std::io; +use std::sync::Arc; use common::json_path_writer::JSON_END_OF_PATH; use common::{BinarySerializable, ByteCount}; @@ -9,7 +10,9 @@ use itertools::Itertools; #[cfg(feature = "quickwit")] use tantivy_fst::automaton::{AlwaysMatch, Automaton}; -use crate::codec::postings::PostingsReader as _; +use crate::codec::postings::PostingsCodec; +use crate::codec::standard::postings::StandardPostingsCodec; +use crate::codec::{ObjectSafeCodec, StandardCodec}; use crate::directory::FileSlice; use crate::positions::PositionReader; use crate::postings::{BlockSegmentPostings, Postings, SegmentPostings, TermInfo}; @@ -34,6 +37,7 @@ pub struct InvertedIndexReader { positions_file_slice: FileSlice, record_option: IndexRecordOption, total_num_tokens: u64, + codec: Arc, } /// Object that records the amount of space used by a field in an inverted index. @@ -69,6 +73,7 @@ impl InvertedIndexReader { postings_file_slice: FileSlice, positions_file_slice: FileSlice, record_option: IndexRecordOption, + codec: Arc, ) -> io::Result { let (total_num_tokens_slice, postings_body) = postings_file_slice.split(8); let total_num_tokens = u64::deserialize(&mut total_num_tokens_slice.read_bytes()?)?; @@ -78,6 +83,7 @@ impl InvertedIndexReader { positions_file_slice, record_option, total_num_tokens, + codec, }) } @@ -90,6 +96,7 @@ impl InvertedIndexReader { positions_file_slice: FileSlice::empty(), record_option, total_num_tokens: 0u64, + codec: Arc::new(StandardCodec), } } diff --git a/src/index/segment_reader.rs b/src/index/segment_reader.rs index d47b0e6d6..0de9e072d 100644 --- a/src/index/segment_reader.rs +++ b/src/index/segment_reader.rs @@ -6,6 +6,7 @@ use common::{ByteCount, HasLen}; use fnv::FnvHashMap; use itertools::Itertools; +use crate::codec::ObjectSafeCodec; use crate::directory::{CompositeFile, FileSlice}; use crate::error::DataCorruption; use crate::fastfield::{intersect_alive_bitsets, AliveBitSet, FacetReader, FastFieldReaders}; @@ -47,6 +48,8 @@ pub struct SegmentReader { store_file: FileSlice, alive_bitset_opt: Option, schema: Schema, + + codec: Arc, } impl SegmentReader { @@ -149,6 +152,7 @@ impl SegmentReader { segment: &Segment, custom_bitset: Option, ) -> crate::Result { + let codec: Arc = Arc::new(segment.index().codec().clone()); let termdict_file = segment.open_read(SegmentComponent::Terms)?; let termdict_composite = CompositeFile::open(&termdict_file)?; @@ -204,6 +208,7 @@ impl SegmentReader { alive_bitset_opt, positions_composite, schema, + codec, }) } @@ -273,6 +278,7 @@ impl SegmentReader { postings_file, positions_file, record_option, + self.codec.clone(), )?); // by releasing the lock in between, we may end up opening the inverting index