mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-31 23:50:41 +00:00
Removed specialized postings on SegmentPostings
This commit is contained in:
@@ -4,9 +4,7 @@ use postings::{BlockSegmentPostings, SegmentPostings};
|
||||
use postings::TermInfo;
|
||||
use schema::IndexRecordOption;
|
||||
use schema::Term;
|
||||
use std::cmp;
|
||||
use fastfield::DeleteBitSet;
|
||||
use schema::Schema;
|
||||
use compression::CompressedIntStream;
|
||||
use postings::FreqReadingOption;
|
||||
|
||||
@@ -139,7 +137,6 @@ impl InvertedIndexReader {
|
||||
/// `TextIndexingOptions` that does not index position will return a `SegmentPostings`
|
||||
/// with `DocId`s and frequencies.
|
||||
pub fn read_postings(&self, term: &Term, option: IndexRecordOption) -> Option<SegmentPostings> {
|
||||
let field = term.field();
|
||||
let term_info = get!(self.get_term_info(term));
|
||||
Some(self.read_postings_from_terminfo(&term_info, option))
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ use fastfield::DeleteBitSet;
|
||||
use std::cell::UnsafeCell;
|
||||
use directory::{ReadOnlySource, SourceRead};
|
||||
use postings::FreqReadingOption;
|
||||
use postings::serializer::PostingsSerializer;
|
||||
|
||||
const EMPTY_POSITIONS: [u32; 0] = [0u32; 0];
|
||||
|
||||
@@ -70,6 +71,24 @@ pub struct SegmentPostings {
|
||||
}
|
||||
|
||||
impl SegmentPostings {
|
||||
|
||||
pub fn create_from_docs(docs: &[u32]) -> SegmentPostings {
|
||||
let mut buffer = Vec::new();
|
||||
{
|
||||
let mut postings_serializer = PostingsSerializer::new(&mut buffer, false);
|
||||
for &doc in docs {
|
||||
postings_serializer.write_doc(doc, 1u32).unwrap();
|
||||
}
|
||||
postings_serializer.close_term().unwrap();
|
||||
}
|
||||
let data = ReadOnlySource::from(buffer);
|
||||
let block_segment_postings = BlockSegmentPostings::from_data(
|
||||
docs.len(),
|
||||
SourceRead::from(data),
|
||||
FreqReadingOption::NoFreq);
|
||||
SegmentPostings::from_block_postings(block_segment_postings, DeleteBitSet::empty(), None)
|
||||
}
|
||||
|
||||
/// Reads a Segment postings from an &[u8]
|
||||
///
|
||||
/// * `len` - number of document in the posting lists.
|
||||
@@ -314,7 +333,7 @@ impl BlockSegmentPostings {
|
||||
pub(crate) fn from_data(
|
||||
doc_freq: usize,
|
||||
data: SourceRead,
|
||||
freq_reading_option: FreqReadingOption,
|
||||
freq_reading_option: FreqReadingOption
|
||||
) -> BlockSegmentPostings {
|
||||
let num_bitpacked_blocks: usize = (doc_freq as usize) / COMPRESSION_BLOCK_SIZE;
|
||||
let num_vint_docs = (doc_freq as usize) - COMPRESSION_BLOCK_SIZE * num_bitpacked_blocks;
|
||||
|
||||
@@ -232,7 +232,7 @@ impl<'a> FieldSerializer<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
struct PostingsSerializer<W: Write> {
|
||||
pub struct PostingsSerializer<W: Write> {
|
||||
postings_write: CountingWriter<W>,
|
||||
last_doc_id_encoded: u32,
|
||||
|
||||
@@ -244,7 +244,7 @@ struct PostingsSerializer<W: Write> {
|
||||
}
|
||||
|
||||
impl<W: Write> PostingsSerializer<W> {
|
||||
fn new(write: W, termfreq_enabled: bool) -> PostingsSerializer<W> {
|
||||
pub fn new(write: W, termfreq_enabled: bool) -> PostingsSerializer<W> {
|
||||
PostingsSerializer {
|
||||
postings_write: CountingWriter::wrap(write),
|
||||
|
||||
@@ -257,7 +257,7 @@ impl<W: Write> PostingsSerializer<W> {
|
||||
}
|
||||
}
|
||||
|
||||
fn write_doc(&mut self, doc_id: DocId, term_freq: u32) -> io::Result<()> {
|
||||
pub fn write_doc(&mut self, doc_id: DocId, term_freq: u32) -> io::Result<()> {
|
||||
self.doc_ids.push(doc_id);
|
||||
if self.termfreq_enabled {
|
||||
self.term_freqs.push(term_freq as u32);
|
||||
@@ -282,7 +282,7 @@ impl<W: Write> PostingsSerializer<W> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn close_term(&mut self) -> io::Result<()> {
|
||||
pub fn close_term(&mut self) -> io::Result<()> {
|
||||
if !self.doc_ids.is_empty() {
|
||||
// we have doc ids waiting to be written
|
||||
// this happens when the number of doc ids is
|
||||
|
||||
@@ -13,7 +13,6 @@ mod tests {
|
||||
use collector::tests::TestCollector;
|
||||
use Index;
|
||||
use schema::*;
|
||||
use fastfield::U64FastFieldReader;
|
||||
use schema::IndexRecordOption;
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -9,7 +9,7 @@ pub use self::term_scorer::TermScorer;
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use postings::{DocSet, VecPostings};
|
||||
use postings::{DocSet, SegmentPostings};
|
||||
use query::Scorer;
|
||||
use query::term_query::TermScorer;
|
||||
use query::Query;
|
||||
@@ -59,7 +59,7 @@ mod tests {
|
||||
let left_fieldnorms = U64FastFieldReader::from(vec![10, 4]);
|
||||
assert_eq!(left_fieldnorms.get(0), 10);
|
||||
assert_eq!(left_fieldnorms.get(1), 4);
|
||||
let left = VecPostings::from(vec![1]);
|
||||
let left = SegmentPostings::create_from_docs(&[1]);
|
||||
let mut left_scorer = TermScorer {
|
||||
idf: 0.30685282,
|
||||
fieldnorm_reader_opt: Some(left_fieldnorms),
|
||||
|
||||
@@ -3,32 +3,24 @@ use DocId;
|
||||
use postings::SkipResult;
|
||||
use fastfield::U64FastFieldReader;
|
||||
use postings::DocSet;
|
||||
use postings::SegmentPostings;
|
||||
use query::Scorer;
|
||||
use postings::Postings;
|
||||
use fastfield::FastFieldReader;
|
||||
|
||||
pub struct TermScorer<TPostings>
|
||||
where
|
||||
TPostings: Postings,
|
||||
{
|
||||
pub struct TermScorer {
|
||||
pub idf: Score,
|
||||
pub fieldnorm_reader_opt: Option<U64FastFieldReader>,
|
||||
pub postings: TPostings,
|
||||
pub postings: SegmentPostings,
|
||||
}
|
||||
|
||||
impl<TPostings> TermScorer<TPostings>
|
||||
where
|
||||
TPostings: Postings,
|
||||
{
|
||||
pub fn postings(&self) -> &TPostings {
|
||||
impl TermScorer {
|
||||
pub fn postings(&self) -> &SegmentPostings {
|
||||
&self.postings
|
||||
}
|
||||
}
|
||||
|
||||
impl<TPostings> DocSet for TermScorer<TPostings>
|
||||
where
|
||||
TPostings: Postings,
|
||||
{
|
||||
impl DocSet for TermScorer {
|
||||
fn advance(&mut self) -> bool {
|
||||
self.postings.advance()
|
||||
}
|
||||
@@ -46,10 +38,7 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
impl<TPostings> Scorer for TermScorer<TPostings>
|
||||
where
|
||||
TPostings: Postings,
|
||||
{
|
||||
impl Scorer for TermScorer {
|
||||
fn score(&mut self) -> Score {
|
||||
let doc = self.postings.doc();
|
||||
let tf = match self.fieldnorm_reader_opt {
|
||||
|
||||
@@ -39,7 +39,7 @@ impl TermWeight {
|
||||
pub fn specialized_scorer(
|
||||
&self,
|
||||
reader: &SegmentReader,
|
||||
) -> Result<TermScorer<SegmentPostings>> {
|
||||
) -> Result<TermScorer> {
|
||||
let field = self.term.field();
|
||||
let inverted_index = reader.inverted_index(field);
|
||||
let fieldnorm_reader_opt = reader.get_fieldnorms_reader(field);
|
||||
|
||||
Reference in New Issue
Block a user