mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-01 08:00:41 +00:00
blop
This commit is contained in:
@@ -7,7 +7,9 @@ use common::OwnedBytes;
|
||||
use serde::{Deserialize, Serialize};
|
||||
pub use standard::StandardCodec;
|
||||
|
||||
use crate::{codec::postings::PostingsCodec, postings::Postings, schema::IndexRecordOption};
|
||||
use crate::codec::postings::PostingsCodec;
|
||||
use crate::postings::Postings;
|
||||
use crate::schema::IndexRecordOption;
|
||||
|
||||
pub trait Codec: Clone + std::fmt::Debug + Send + Sync + 'static {
|
||||
type PostingsCodec: PostingsCodec;
|
||||
@@ -20,7 +22,6 @@ pub trait Codec: Clone + std::fmt::Debug + Send + Sync + 'static {
|
||||
fn postings_codec(&self) -> &Self::PostingsCodec;
|
||||
}
|
||||
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
pub struct CodecConfiguration {
|
||||
name: Cow<'static, str>,
|
||||
@@ -54,8 +55,9 @@ impl Default for CodecConfiguration {
|
||||
}
|
||||
}
|
||||
|
||||
pub trait CodecPostingsLoader {
|
||||
fn load_postings_type_erased(&self,
|
||||
pub trait ObjectSafeCodec: 'static + Send + Sync {
|
||||
fn load_postings_type_erased(
|
||||
&self,
|
||||
doc_freq: u32,
|
||||
postings_data: OwnedBytes,
|
||||
record_option: IndexRecordOption,
|
||||
@@ -64,16 +66,24 @@ pub trait CodecPostingsLoader {
|
||||
) -> crate::Result<Box<dyn Postings>>;
|
||||
}
|
||||
|
||||
impl<TPostingsCodec: PostingsCodec> CodecPostingsLoader for TPostingsCodec {
|
||||
fn load_postings_type_erased(&self,
|
||||
doc_freq: u32,
|
||||
postings_data: OwnedBytes,
|
||||
record_option: IndexRecordOption,
|
||||
requested_option: IndexRecordOption,
|
||||
positions_data: Option<OwnedBytes>,
|
||||
) -> crate::Result<Box<dyn Postings>> {
|
||||
let postings: <Self as PostingsCodec>::Postings = self.load_postings(doc_freq, postings_data, record_option, requested_option, positions_data)?;
|
||||
let boxed_postings: Box<dyn Postings> = Box::new(postings);
|
||||
Ok(boxed_postings)
|
||||
}
|
||||
impl<TCodec: Codec> ObjectSafeCodec for TCodec {
|
||||
fn load_postings_type_erased(
|
||||
&self,
|
||||
doc_freq: u32,
|
||||
postings_data: OwnedBytes,
|
||||
record_option: IndexRecordOption,
|
||||
requested_option: IndexRecordOption,
|
||||
positions_data: Option<OwnedBytes>,
|
||||
) -> crate::Result<Box<dyn Postings>> {
|
||||
let postings: <<Self as Codec>::PostingsCodec as PostingsCodec>::Postings =
|
||||
self.postings_codec().load_postings(
|
||||
doc_freq,
|
||||
postings_data,
|
||||
record_option,
|
||||
requested_option,
|
||||
positions_data,
|
||||
)?;
|
||||
let boxed_postings: Box<dyn Postings> = Box::new(postings);
|
||||
Ok(boxed_postings)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ use crate::query::Bm25Weight;
|
||||
use crate::schema::IndexRecordOption;
|
||||
use crate::{DocId, Score};
|
||||
|
||||
pub trait PostingsCodec {
|
||||
pub trait PostingsCodec: Send + Sync + 'static {
|
||||
type PostingsSerializer: PostingsSerializer;
|
||||
type PostingsReader: PostingsReader;
|
||||
type Postings: Postings;
|
||||
@@ -34,12 +34,14 @@ pub trait PostingsCodec {
|
||||
requested_option: IndexRecordOption,
|
||||
) -> std::io::Result<Self::PostingsReader>;
|
||||
|
||||
fn load_postings(&self,
|
||||
doc_freq: u32,
|
||||
postings_data: OwnedBytes,
|
||||
record_option: IndexRecordOption,
|
||||
requested_option: IndexRecordOption,
|
||||
positions_data: Option<OwnedBytes>) -> io::Result<Self::Postings>;
|
||||
fn load_postings(
|
||||
&self,
|
||||
doc_freq: u32,
|
||||
postings_data: OwnedBytes,
|
||||
record_option: IndexRecordOption,
|
||||
requested_option: IndexRecordOption,
|
||||
positions_data: Option<OwnedBytes>,
|
||||
) -> io::Result<Self::Postings>;
|
||||
}
|
||||
|
||||
pub trait PostingsSerializer {
|
||||
|
||||
@@ -40,22 +40,19 @@ impl PostingsCodec for StandardPostingsCodec {
|
||||
StandardPostingsReader::open(doc_freq, data, record_option, requested_option)
|
||||
}
|
||||
|
||||
fn load_postings(&self,
|
||||
doc_freq: u32,
|
||||
postings_data: common::OwnedBytes,
|
||||
record_option: IndexRecordOption,
|
||||
requested_option: IndexRecordOption,
|
||||
positions_data_opt: Option<common::OwnedBytes>) -> io::Result<Self::Postings> {
|
||||
fn load_postings(
|
||||
&self,
|
||||
doc_freq: u32,
|
||||
postings_data: common::OwnedBytes,
|
||||
record_option: IndexRecordOption,
|
||||
requested_option: IndexRecordOption,
|
||||
positions_data_opt: Option<common::OwnedBytes>,
|
||||
) -> io::Result<Self::Postings> {
|
||||
// Rationalize record_option/requested_option.
|
||||
let record_option = requested_option.downgrade(record_option);
|
||||
let block_segment_postings = StandardPostingsReader::open(
|
||||
doc_freq,
|
||||
postings_data,
|
||||
record_option,
|
||||
requested_option,
|
||||
)?;
|
||||
let position_reader =
|
||||
positions_data_opt.map(PositionReader::open).transpose()?;
|
||||
let block_segment_postings =
|
||||
StandardPostingsReader::open(doc_freq, postings_data, record_option, requested_option)?;
|
||||
let position_reader = positions_data_opt.map(PositionReader::open).transpose()?;
|
||||
Ok(SegmentPostings::from_block_postings(
|
||||
block_segment_postings,
|
||||
position_reader,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use std::io;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common::json_path_writer::JSON_END_OF_PATH;
|
||||
use common::{BinarySerializable, ByteCount};
|
||||
@@ -9,7 +10,9 @@ use itertools::Itertools;
|
||||
#[cfg(feature = "quickwit")]
|
||||
use tantivy_fst::automaton::{AlwaysMatch, Automaton};
|
||||
|
||||
use crate::codec::postings::PostingsReader as _;
|
||||
use crate::codec::postings::PostingsCodec;
|
||||
use crate::codec::standard::postings::StandardPostingsCodec;
|
||||
use crate::codec::{ObjectSafeCodec, StandardCodec};
|
||||
use crate::directory::FileSlice;
|
||||
use crate::positions::PositionReader;
|
||||
use crate::postings::{BlockSegmentPostings, Postings, SegmentPostings, TermInfo};
|
||||
@@ -34,6 +37,7 @@ pub struct InvertedIndexReader {
|
||||
positions_file_slice: FileSlice,
|
||||
record_option: IndexRecordOption,
|
||||
total_num_tokens: u64,
|
||||
codec: Arc<dyn ObjectSafeCodec>,
|
||||
}
|
||||
|
||||
/// Object that records the amount of space used by a field in an inverted index.
|
||||
@@ -69,6 +73,7 @@ impl InvertedIndexReader {
|
||||
postings_file_slice: FileSlice,
|
||||
positions_file_slice: FileSlice,
|
||||
record_option: IndexRecordOption,
|
||||
codec: Arc<dyn ObjectSafeCodec>,
|
||||
) -> io::Result<InvertedIndexReader> {
|
||||
let (total_num_tokens_slice, postings_body) = postings_file_slice.split(8);
|
||||
let total_num_tokens = u64::deserialize(&mut total_num_tokens_slice.read_bytes()?)?;
|
||||
@@ -78,6 +83,7 @@ impl InvertedIndexReader {
|
||||
positions_file_slice,
|
||||
record_option,
|
||||
total_num_tokens,
|
||||
codec,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -90,6 +96,7 @@ impl InvertedIndexReader {
|
||||
positions_file_slice: FileSlice::empty(),
|
||||
record_option,
|
||||
total_num_tokens: 0u64,
|
||||
codec: Arc::new(StandardCodec),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ use common::{ByteCount, HasLen};
|
||||
use fnv::FnvHashMap;
|
||||
use itertools::Itertools;
|
||||
|
||||
use crate::codec::ObjectSafeCodec;
|
||||
use crate::directory::{CompositeFile, FileSlice};
|
||||
use crate::error::DataCorruption;
|
||||
use crate::fastfield::{intersect_alive_bitsets, AliveBitSet, FacetReader, FastFieldReaders};
|
||||
@@ -47,6 +48,8 @@ pub struct SegmentReader {
|
||||
store_file: FileSlice,
|
||||
alive_bitset_opt: Option<AliveBitSet>,
|
||||
schema: Schema,
|
||||
|
||||
codec: Arc<dyn ObjectSafeCodec>,
|
||||
}
|
||||
|
||||
impl SegmentReader {
|
||||
@@ -149,6 +152,7 @@ impl SegmentReader {
|
||||
segment: &Segment<C>,
|
||||
custom_bitset: Option<AliveBitSet>,
|
||||
) -> crate::Result<SegmentReader> {
|
||||
let codec: Arc<dyn ObjectSafeCodec> = Arc::new(segment.index().codec().clone());
|
||||
let termdict_file = segment.open_read(SegmentComponent::Terms)?;
|
||||
let termdict_composite = CompositeFile::open(&termdict_file)?;
|
||||
|
||||
@@ -204,6 +208,7 @@ impl SegmentReader {
|
||||
alive_bitset_opt,
|
||||
positions_composite,
|
||||
schema,
|
||||
codec,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -273,6 +278,7 @@ impl SegmentReader {
|
||||
postings_file,
|
||||
positions_file,
|
||||
record_option,
|
||||
self.codec.clone(),
|
||||
)?);
|
||||
|
||||
// by releasing the lock in between, we may end up opening the inverting index
|
||||
|
||||
Reference in New Issue
Block a user