This commit is contained in:
Paul Masurel
2026-01-16 13:56:21 +01:00
parent 6f00d96127
commit cf632673ac
5 changed files with 60 additions and 38 deletions

View File

@@ -7,7 +7,9 @@ use common::OwnedBytes;
use serde::{Deserialize, Serialize};
pub use standard::StandardCodec;
use crate::{codec::postings::PostingsCodec, postings::Postings, schema::IndexRecordOption};
use crate::codec::postings::PostingsCodec;
use crate::postings::Postings;
use crate::schema::IndexRecordOption;
pub trait Codec: Clone + std::fmt::Debug + Send + Sync + 'static {
type PostingsCodec: PostingsCodec;
@@ -20,7 +22,6 @@ pub trait Codec: Clone + std::fmt::Debug + Send + Sync + 'static {
fn postings_codec(&self) -> &Self::PostingsCodec;
}
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct CodecConfiguration {
name: Cow<'static, str>,
@@ -54,8 +55,9 @@ impl Default for CodecConfiguration {
}
}
pub trait CodecPostingsLoader {
fn load_postings_type_erased(&self,
pub trait ObjectSafeCodec: 'static + Send + Sync {
fn load_postings_type_erased(
&self,
doc_freq: u32,
postings_data: OwnedBytes,
record_option: IndexRecordOption,
@@ -64,16 +66,24 @@ pub trait CodecPostingsLoader {
) -> crate::Result<Box<dyn Postings>>;
}
impl<TPostingsCodec: PostingsCodec> CodecPostingsLoader for TPostingsCodec {
fn load_postings_type_erased(&self,
doc_freq: u32,
postings_data: OwnedBytes,
record_option: IndexRecordOption,
requested_option: IndexRecordOption,
positions_data: Option<OwnedBytes>,
) -> crate::Result<Box<dyn Postings>> {
let postings: <Self as PostingsCodec>::Postings = self.load_postings(doc_freq, postings_data, record_option, requested_option, positions_data)?;
let boxed_postings: Box<dyn Postings> = Box::new(postings);
Ok(boxed_postings)
}
impl<TCodec: Codec> ObjectSafeCodec for TCodec {
fn load_postings_type_erased(
&self,
doc_freq: u32,
postings_data: OwnedBytes,
record_option: IndexRecordOption,
requested_option: IndexRecordOption,
positions_data: Option<OwnedBytes>,
) -> crate::Result<Box<dyn Postings>> {
let postings: <<Self as Codec>::PostingsCodec as PostingsCodec>::Postings =
self.postings_codec().load_postings(
doc_freq,
postings_data,
record_option,
requested_option,
positions_data,
)?;
let boxed_postings: Box<dyn Postings> = Box::new(postings);
Ok(boxed_postings)
}
}

View File

@@ -8,7 +8,7 @@ use crate::query::Bm25Weight;
use crate::schema::IndexRecordOption;
use crate::{DocId, Score};
pub trait PostingsCodec {
pub trait PostingsCodec: Send + Sync + 'static {
type PostingsSerializer: PostingsSerializer;
type PostingsReader: PostingsReader;
type Postings: Postings;
@@ -34,12 +34,14 @@ pub trait PostingsCodec {
requested_option: IndexRecordOption,
) -> std::io::Result<Self::PostingsReader>;
fn load_postings(&self,
doc_freq: u32,
postings_data: OwnedBytes,
record_option: IndexRecordOption,
requested_option: IndexRecordOption,
positions_data: Option<OwnedBytes>) -> io::Result<Self::Postings>;
fn load_postings(
&self,
doc_freq: u32,
postings_data: OwnedBytes,
record_option: IndexRecordOption,
requested_option: IndexRecordOption,
positions_data: Option<OwnedBytes>,
) -> io::Result<Self::Postings>;
}
pub trait PostingsSerializer {

View File

@@ -40,22 +40,19 @@ impl PostingsCodec for StandardPostingsCodec {
StandardPostingsReader::open(doc_freq, data, record_option, requested_option)
}
fn load_postings(&self,
doc_freq: u32,
postings_data: common::OwnedBytes,
record_option: IndexRecordOption,
requested_option: IndexRecordOption,
positions_data_opt: Option<common::OwnedBytes>) -> io::Result<Self::Postings> {
fn load_postings(
&self,
doc_freq: u32,
postings_data: common::OwnedBytes,
record_option: IndexRecordOption,
requested_option: IndexRecordOption,
positions_data_opt: Option<common::OwnedBytes>,
) -> io::Result<Self::Postings> {
// Rationalize record_option/requested_option.
let record_option = requested_option.downgrade(record_option);
let block_segment_postings = StandardPostingsReader::open(
doc_freq,
postings_data,
record_option,
requested_option,
)?;
let position_reader =
positions_data_opt.map(PositionReader::open).transpose()?;
let block_segment_postings =
StandardPostingsReader::open(doc_freq, postings_data, record_option, requested_option)?;
let position_reader = positions_data_opt.map(PositionReader::open).transpose()?;
Ok(SegmentPostings::from_block_postings(
block_segment_postings,
position_reader,

View File

@@ -1,4 +1,5 @@
use std::io;
use std::sync::Arc;
use common::json_path_writer::JSON_END_OF_PATH;
use common::{BinarySerializable, ByteCount};
@@ -9,7 +10,9 @@ use itertools::Itertools;
#[cfg(feature = "quickwit")]
use tantivy_fst::automaton::{AlwaysMatch, Automaton};
use crate::codec::postings::PostingsReader as _;
use crate::codec::postings::PostingsCodec;
use crate::codec::standard::postings::StandardPostingsCodec;
use crate::codec::{ObjectSafeCodec, StandardCodec};
use crate::directory::FileSlice;
use crate::positions::PositionReader;
use crate::postings::{BlockSegmentPostings, Postings, SegmentPostings, TermInfo};
@@ -34,6 +37,7 @@ pub struct InvertedIndexReader {
positions_file_slice: FileSlice,
record_option: IndexRecordOption,
total_num_tokens: u64,
codec: Arc<dyn ObjectSafeCodec>,
}
/// Object that records the amount of space used by a field in an inverted index.
@@ -69,6 +73,7 @@ impl InvertedIndexReader {
postings_file_slice: FileSlice,
positions_file_slice: FileSlice,
record_option: IndexRecordOption,
codec: Arc<dyn ObjectSafeCodec>,
) -> io::Result<InvertedIndexReader> {
let (total_num_tokens_slice, postings_body) = postings_file_slice.split(8);
let total_num_tokens = u64::deserialize(&mut total_num_tokens_slice.read_bytes()?)?;
@@ -78,6 +83,7 @@ impl InvertedIndexReader {
positions_file_slice,
record_option,
total_num_tokens,
codec,
})
}
@@ -90,6 +96,7 @@ impl InvertedIndexReader {
positions_file_slice: FileSlice::empty(),
record_option,
total_num_tokens: 0u64,
codec: Arc::new(StandardCodec),
}
}

View File

@@ -6,6 +6,7 @@ use common::{ByteCount, HasLen};
use fnv::FnvHashMap;
use itertools::Itertools;
use crate::codec::ObjectSafeCodec;
use crate::directory::{CompositeFile, FileSlice};
use crate::error::DataCorruption;
use crate::fastfield::{intersect_alive_bitsets, AliveBitSet, FacetReader, FastFieldReaders};
@@ -47,6 +48,8 @@ pub struct SegmentReader {
store_file: FileSlice,
alive_bitset_opt: Option<AliveBitSet>,
schema: Schema,
codec: Arc<dyn ObjectSafeCodec>,
}
impl SegmentReader {
@@ -149,6 +152,7 @@ impl SegmentReader {
segment: &Segment<C>,
custom_bitset: Option<AliveBitSet>,
) -> crate::Result<SegmentReader> {
let codec: Arc<dyn ObjectSafeCodec> = Arc::new(segment.index().codec().clone());
let termdict_file = segment.open_read(SegmentComponent::Terms)?;
let termdict_composite = CompositeFile::open(&termdict_file)?;
@@ -204,6 +208,7 @@ impl SegmentReader {
alive_bitset_opt,
positions_composite,
schema,
codec,
})
}
@@ -273,6 +278,7 @@ impl SegmentReader {
postings_file,
positions_file,
record_option,
self.codec.clone(),
)?);
// by releasing the lock in between, we may end up opening the inverting index