mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-14 04:52:54 +00:00
merging positions.
This commit is contained in:
@@ -18,6 +18,7 @@ use postings::ChainedPostings;
|
||||
use postings::HasLen;
|
||||
use postings::OffsetPostings;
|
||||
use core::index::SegmentInfo;
|
||||
use compression::NUM_DOCS_PER_BLOCK;
|
||||
use std::cmp::{min, max, Ordering};
|
||||
|
||||
|
||||
@@ -121,14 +122,38 @@ impl<'a> PostingsMerger<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
const EMPTY_ARRAY: [u32; 0] = [];
|
||||
|
||||
pub struct IndexMerger {
|
||||
schema: Schema,
|
||||
readers: Vec<SegmentReader>,
|
||||
segment_info: SegmentInfo,
|
||||
}
|
||||
|
||||
|
||||
struct DeltaPositionComputer {
|
||||
buffer: [u32; NUM_DOCS_PER_BLOCK]
|
||||
}
|
||||
|
||||
impl DeltaPositionComputer {
|
||||
fn new() -> DeltaPositionComputer {
|
||||
DeltaPositionComputer {
|
||||
buffer: [0u32; NUM_DOCS_PER_BLOCK]
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_delta_positions(&mut self, positions: &[u32],) -> &[u32] {
|
||||
let mut last_pos = 0u32;
|
||||
let num_positions = positions.len();
|
||||
for i in 0..num_positions {
|
||||
let position = positions[i];
|
||||
self.buffer[i] = position - last_pos;
|
||||
last_pos = position;
|
||||
}
|
||||
&self.buffer[..num_positions]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
impl IndexMerger {
|
||||
pub fn open(schema: Schema, segments: &Vec<Segment>) -> Result<IndexMerger> {
|
||||
let mut readers = Vec::new();
|
||||
@@ -205,12 +230,14 @@ impl IndexMerger {
|
||||
|
||||
fn write_postings(&self, postings_serializer: &mut PostingsSerializer) -> Result<()> {
|
||||
let mut postings_merger = PostingsMerger::new(&self.readers);
|
||||
let mut delta_position_computer = DeltaPositionComputer::new();
|
||||
loop {
|
||||
match postings_merger.next() {
|
||||
Some((term, mut merged_doc_ids)) => {
|
||||
try!(postings_serializer.new_term(&term, merged_doc_ids.len() as DocId));
|
||||
while merged_doc_ids.advance() {
|
||||
try!(postings_serializer.write_doc(merged_doc_ids.doc(), merged_doc_ids.term_freq(), &EMPTY_ARRAY));
|
||||
let delta_positions: &[u32] = delta_position_computer.compute_delta_positions(merged_doc_ids.positions());
|
||||
try!(postings_serializer.write_doc(merged_doc_ids.doc(), merged_doc_ids.term_freq(), delta_positions));
|
||||
}
|
||||
try!(postings_serializer.close_term());
|
||||
}
|
||||
|
||||
@@ -4,7 +4,14 @@ use postings::docset::DocSet;
|
||||
|
||||
|
||||
|
||||
// Postings trait includes all the infomration
|
||||
// Postings trait defines all of the information
|
||||
// associated with a term.
|
||||
//
|
||||
// List of docids, term freqs and positions.
|
||||
//
|
||||
// It's main implementation is SegmentPostings,
|
||||
// but some other implementation mocking SegmentPostings exists,
|
||||
// in order to help merging segment or for testing.
|
||||
pub trait Postings: DocSet {
|
||||
fn term_freq(&self,) -> u32;
|
||||
fn positions(&self) -> &[u32];
|
||||
|
||||
Reference in New Issue
Block a user