From b10227ea442409dceff9a330e60a508e4552847c Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Mon, 29 Aug 2016 00:51:45 +0900 Subject: [PATCH] (test not passing) --- src/indexer/index_writer.rs | 10 +- src/indexer/segment_writer.rs | 32 +-- .../{block_appender.rs => block_store.rs} | 137 +++---------- src/postings/mod.rs | 6 +- src/postings/postings_writer.rs | 126 ++++-------- src/postings/recorder.rs | 185 ++++++++++++++---- 6 files changed, 242 insertions(+), 254 deletions(-) rename src/postings/{block_appender.rs => block_store.rs} (61%) diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 814b5d071..2de45d412 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -13,6 +13,7 @@ use std::collections::HashSet; use indexer::merger::IndexMerger; use core::SegmentId; use std::mem::swap; +use postings::BlockStore; use chan; use Result; @@ -33,10 +34,12 @@ pub struct IndexWriter { const PIPELINE_MAX_SIZE_IN_DOCS: usize = 10_000; -fn index_documents(segment: Segment, +fn index_documents(block_store: &mut BlockStore, + segment: Segment, schema: &Schema, document_iterator: &mut Iterator) -> Result { - let mut segment_writer = try!(SegmentWriter::for_segment(segment, &schema)); + block_store.clear(); + let mut segment_writer = try!(SegmentWriter::for_segment(block_store, segment, &schema)); for doc in document_iterator { try!(segment_writer.add_document(&doc, &schema)); } @@ -63,6 +66,7 @@ impl IndexWriter { let document_receiver_clone = self.document_receiver.clone(); let target_num_docs = self.target_num_docs; let join_handle: JoinHandle<()> = thread::spawn(move || { + let mut block_store = BlockStore::allocate(100_000); loop { let segment = index.new_segment(); let segment_id = segment.id(); @@ -75,7 +79,7 @@ impl IndexWriter { // creating a new segment's files // if no document are available. if document_iterator.peek().is_some() { - let index_result = index_documents(segment, &schema, &mut document_iterator) + let index_result = index_documents(&mut block_store, segment, &schema, &mut document_iterator) .map(|num_docs| (segment_id, num_docs)); segment_ready_sender_clone.send(index_result); } diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 0f9c82a62..96fdec8fc 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -19,8 +19,10 @@ use schema::TextIndexingOptions; use postings::SpecializedPostingsWriter; use postings::{NothingRecorder, TermFrequencyRecorder, TFAndPositionRecorder}; use indexer::segment_serializer::SegmentSerializer; +use postings::BlockStore; -pub struct SegmentWriter { +pub struct SegmentWriter<'a> { + block_store: &'a mut BlockStore, max_doc: DocId, tokenizer: SimpleTokenizer, per_field_postings_writers: Vec>, @@ -61,9 +63,9 @@ fn posting_from_field_entry(field_entry: &FieldEntry) -> Box { } -impl SegmentWriter { +impl<'a> SegmentWriter<'a> { - pub fn for_segment(mut segment: Segment, schema: &Schema) -> Result { + pub fn for_segment(block_store: &'a mut BlockStore, mut segment: Segment, schema: &Schema) -> Result> { let segment_serializer = try!(SegmentSerializer::for_segment(&mut segment)); let per_field_postings_writers = schema.fields() .iter() @@ -72,6 +74,7 @@ impl SegmentWriter { }) .collect(); Ok(SegmentWriter { + block_store: block_store, max_doc: 0, per_field_postings_writers: per_field_postings_writers, fieldnorms_writer: create_fieldnorms_writer(schema), @@ -80,7 +83,7 @@ impl SegmentWriter { fast_field_writers: U32FastFieldsWriter::from_schema(schema), }) } - + // Write on disk all of the stuff that // is still on RAM : // - the dictionary in an fst @@ -91,9 +94,10 @@ impl SegmentWriter { pub fn finalize(mut self,) -> Result<()> { let segment_info = self.segment_info(); for per_field_postings_writer in self.per_field_postings_writers.iter_mut() { - per_field_postings_writer.close(); + per_field_postings_writer.close(&mut self.block_store); } - write(&self.per_field_postings_writers, + write(&mut self.block_store, + &self.per_field_postings_writers, &self.fast_field_writers, &self.fieldnorms_writer, segment_info, @@ -120,7 +124,7 @@ impl SegmentWriter { match tokens.next() { Some(token) => { let term = Term::from_field_text(field, token); - field_posting_writers.suscribe(doc_id, pos, term); + field_posting_writers.suscribe(&mut self.block_store, doc_id, pos, term); pos += 1; num_tokens += 1; }, @@ -130,7 +134,7 @@ impl SegmentWriter { } else { let term = Term::from_field_text(field, field_value.value().text()); - field_posting_writers.suscribe(doc_id, 0, term); + field_posting_writers.suscribe(&mut self.block_store, doc_id, 0, term); } pos += 1; // THIS is to avoid phrase query accross field repetition. @@ -146,7 +150,7 @@ impl SegmentWriter { if u32_options.is_indexed() { for field_value in field_values { let term = Term::from_field_u32(field_value.field(), field_value.value().u32_value()); - field_posting_writers.suscribe(doc_id, 0, term); + field_posting_writers.suscribe(&mut self.block_store, doc_id, 0, term); } } } @@ -181,13 +185,14 @@ impl SegmentWriter { } -fn write(per_field_postings_writers: &Vec>, +fn write(block_store: &BlockStore, + per_field_postings_writers: &Vec>, fast_field_writers: &U32FastFieldsWriter, fieldnorms_writer: &U32FastFieldsWriter, segment_info: SegmentInfo, mut serializer: SegmentSerializer) -> Result<()> { for per_field_postings_writer in per_field_postings_writers.iter() { - try!(per_field_postings_writer.serialize(serializer.get_postings_serializer())); + try!(per_field_postings_writer.serialize(block_store, serializer.get_postings_serializer())); } try!(fast_field_writers.serialize(serializer.get_fast_field_serializer())); try!(fieldnorms_writer.serialize(serializer.get_fieldnorms_serializer())); @@ -196,9 +201,10 @@ fn write(per_field_postings_writers: &Vec>, Ok(()) } -impl SerializableSegment for SegmentWriter { +impl<'a> SerializableSegment for SegmentWriter<'a> { fn write(&self, serializer: SegmentSerializer) -> Result<()> { - write(&self.per_field_postings_writers, + write(&self.block_store, + &self.per_field_postings_writers, &self.fast_field_writers, &self.fieldnorms_writer, self.segment_info(), diff --git a/src/postings/block_appender.rs b/src/postings/block_store.rs similarity index 61% rename from src/postings/block_appender.rs rename to src/postings/block_store.rs index 6da7c9ee5..872f695a7 100644 --- a/src/postings/block_appender.rs +++ b/src/postings/block_store.rs @@ -1,7 +1,6 @@ use compression::NUM_DOCS_PER_BLOCK; -use DocId; -const BLOCK_SIZE: u32 = NUM_DOCS_PER_BLOCK as u32; +pub const BLOCK_SIZE: u32 = NUM_DOCS_PER_BLOCK as u32; struct Block { data: [u32; BLOCK_SIZE as usize], @@ -27,36 +26,43 @@ struct ListInfo { pub struct BlockStore { lists: Vec, blocks: Vec, - free_blocks: Vec, + free_block_id: usize, } impl BlockStore { - pub fn allocate(num_blocks: usize) -> BlockStore { + pub fn allocate(num_blocks: usize) -> BlockStore { BlockStore { - lists: Vec::with_capacity(1_000_000), + lists: Vec::with_capacity(100_000), blocks: (0 .. num_blocks).map(|_| Block::new()).collect(), - free_blocks: (0u32 .. num_blocks as u32).collect() + free_block_id: 0, } } - fn new_list(&mut self, first_el: u32) -> u32 { + pub fn new_list(&mut self) -> u32 { let res = self.lists.len() as u32; let new_block_id = self.new_block().unwrap(); - self.blocks[new_block_id as usize].data[0] = first_el; self.lists.push(ListInfo { first: new_block_id, last: new_block_id, - len: 1, + len: 0, }); res } + pub fn clear(&mut self,) { + self.free_block_id = 0; + } + fn new_block(&mut self,) -> Option { - self.free_blocks.pop() - .map(|block_id| { - self.blocks[block_id as usize].next = u32::max_value(); - block_id - }) + let block_id = self.free_block_id; + self.free_block_id += 1; + if block_id >= self.blocks.len() { + None + } + else { + self.blocks[block_id].next = u32::max_value(); + Some(block_id as u32) + } } fn get_list_info(&mut self, list_id: u32) -> &mut ListInfo { @@ -66,9 +72,7 @@ impl BlockStore { fn block_id_to_append(&mut self, list_id: u32) -> u32 { let list_info: ListInfo = self.lists[list_id as usize]; - // get_list_info(list_id).len % BLOCK_SIZE == 0; - // let new_block_required: bool = self.get_list_info(list_id).len % BLOCK_SIZE == 0; - if list_info.len % BLOCK_SIZE == 0 { + if list_info.len != 0 && list_info.len % BLOCK_SIZE == 0 { // we need to add a fresh new block. let new_block_id: u32 = { self.new_block().unwrap() }; let last_block_id: usize; @@ -87,7 +91,6 @@ impl BlockStore { } pub fn push(&mut self, list_id: u32, val: u32) { - let new_block_required: bool = self.get_list_info(list_id).len % BLOCK_SIZE == 0; let block_id: u32 = self.block_id_to_append(list_id); let list_len: u32; { @@ -139,107 +142,23 @@ impl<'a> Iterator for BlockIterator<'a> { } - - - -pub struct BlockAppender { - blocks: Vec>, - doc_freq: usize, -} - -impl BlockAppender { - - pub fn new() -> BlockAppender { - BlockAppender { - blocks: Vec::new(), - doc_freq: 0, - } - } - - pub fn push(&mut self, doc_id: DocId) { - if self.doc_freq % NUM_DOCS_PER_BLOCK == 0 { - self.blocks.push(Box::new([0u32; NUM_DOCS_PER_BLOCK ])); - } - self.blocks[self.doc_freq / NUM_DOCS_PER_BLOCK][self.doc_freq % NUM_DOCS_PER_BLOCK] = doc_id; - self.doc_freq += 1; - } - - pub fn last(&self) -> Option { - if self.doc_freq == 0 { - return None - } - else { - Some(self.get(self.doc_freq - 1)) - } - } - - pub fn len(&self,) -> usize { - self.doc_freq - } - - - pub fn get(&self, cursor: usize) -> DocId { - self.blocks[cursor / NUM_DOCS_PER_BLOCK][cursor % NUM_DOCS_PER_BLOCK] - } - - - pub fn iter(&self,) -> IterBlockAppender { - IterBlockAppender { - cursor: 0, - block_appender: &self, - } - } -} - - - -pub struct IterBlockAppender<'a> { - cursor: usize, - block_appender: &'a BlockAppender, -} - - -impl<'a> Iterator for IterBlockAppender<'a> { - - type Item = DocId; - - fn next(&mut self) -> Option { - if self.cursor == self.block_appender.doc_freq { - return None - } - else { - let res = self.block_appender.get(self.cursor); - self.cursor += 1; - Some(res) - } - - } -} - - - - - - #[cfg(test)] mod tests { - use super::*; - #[test] pub fn test_block_store() { let mut block_store = BlockStore::allocate(1_000); - let list_2 = block_store.new_list(0); - let list_3 = block_store.new_list(0); - let list_4 = block_store.new_list(0); - let list_5 = block_store.new_list(0); - for i in 1 .. 2_000 { + let list_2 = block_store.new_list(); + let list_3 = block_store.new_list(); + let list_4 = block_store.new_list(); + let list_5 = block_store.new_list(); + for i in 0 .. 2_000 { block_store.push(list_2, i * 2); block_store.push(list_3, i * 3); } - for i in 1 .. 10 { + for i in 0 .. 10 { block_store.push(list_4, i * 4); block_store.push(list_5, i * 5); } @@ -262,4 +181,4 @@ mod tests { assert!(list4_iter.next().is_none()); assert!(list5_iter.next().is_none()); } -} \ No newline at end of file +} diff --git a/src/postings/mod.rs b/src/postings/mod.rs index d7d0084df..c675e6f54 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -12,7 +12,7 @@ mod freq_handler; mod docset; mod scored_docset; mod segment_postings_option; -mod block_appender; +mod block_store; pub use self::docset::{SkipResult, DocSet}; pub use self::offset_postings::OffsetPostings; @@ -31,6 +31,7 @@ pub use self::freq_handler::FreqHandler; pub use self::scored_docset::ScoredDocSet; pub use self::postings::HasLen; pub use self::segment_postings_option::SegmentPostingsOption; +pub use self::block_store::BlockStore; #[cfg(test)] mod tests { @@ -71,8 +72,9 @@ mod tests { let schema = schema_builder.build(); let index = Index::create_in_ram(schema.clone()); let segment = index.new_segment(); + let mut block_store = BlockStore::allocate(1_000); { - let mut segment_writer = SegmentWriter::for_segment(segment.clone(), &schema).unwrap(); + let mut segment_writer = SegmentWriter::for_segment(&mut block_store, segment.clone(), &schema).unwrap(); { let mut doc = Document::new(); doc.add_text(text_field, "a b a c a d a a."); diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index 933b6d3a8..3e70cb1a7 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -4,73 +4,39 @@ use schema::Term; use postings::PostingsSerializer; use std::io; use postings::Recorder; -use postings::block_appender::BlockAppender; +use postings::block_store::BlockStore; -struct TermPostingsWriter { - doc_ids: BlockAppender, - recorder: Rec, -} - -impl TermPostingsWriter { - pub fn new() -> TermPostingsWriter { - TermPostingsWriter { - doc_ids: BlockAppender::new(), - recorder: Recorder::new(), - } - } - - fn close_doc(&mut self,) { - self.recorder.close_doc(); - } - - pub fn doc_freq(&self) -> u32 { - self.doc_ids.len() as u32 - } - - pub fn suscribe(&mut self, doc: DocId, pos: u32) { - match self.doc_ids.last() { - Some(last_doc) => { - if last_doc != doc { - self.close_doc(); - self.doc_ids.push(doc); - } - }, - None => { - self.doc_ids.push(doc) - }, - } - self.recorder.record_position(pos); - } - - pub fn serialize(&self, serializer: &mut PostingsSerializer) -> io::Result<()> { - for (i, doc) in self.doc_ids.iter().enumerate() { - let (term_freq, position_deltas) = self.recorder.get_tf_and_posdeltas(i); - try!(serializer.write_doc(doc.clone(), term_freq, position_deltas)); - } - Ok(()) - } -} - pub trait PostingsWriter { - fn close(&mut self,); + fn close(&mut self, block_store: &mut BlockStore); - fn suscribe(&mut self, doc: DocId, pos: u32, term: Term); + fn suscribe(&mut self, block_store: &mut BlockStore, doc: DocId, pos: u32, term: Term); - fn serialize(&self, serializer: &mut PostingsSerializer) -> io::Result<()>; + fn serialize(&self, block_store: &BlockStore, serializer: &mut PostingsSerializer) -> io::Result<()>; } pub struct SpecializedPostingsWriter { - postings: Vec>, - term_index: HashMap, + term_index: HashMap, +} + + +fn get_or_create_recorder<'a, Rec: Recorder>(term: Term, term_index: &'a mut HashMap, block_store: &mut BlockStore) -> &'a mut Rec { + if term_index.contains_key(&term) { + term_index.get_mut(&term).unwrap() + } + else { + let recorder = Rec::new(block_store); + term_index + .entry(term) + .or_insert(recorder) + } } impl SpecializedPostingsWriter { pub fn new() -> SpecializedPostingsWriter { SpecializedPostingsWriter { - postings: Vec::new(), term_index: HashMap::new(), } } @@ -78,48 +44,38 @@ impl SpecializedPostingsWriter { pub fn new_boxed() -> Box { Box::new(Self::new()) } - - fn get_term_postings(&mut self, term: Term) -> &mut TermPostingsWriter { - let unord_id: usize = { - let num_terms = self.term_index.len(); - let postings = &mut self.postings; - self.term_index - .entry(term) - .or_insert_with(|| { - let unord_id = num_terms; - postings.push(TermPostingsWriter::new()); - unord_id - }).clone() - }; - &mut self.postings[unord_id] - } - + } impl PostingsWriter for SpecializedPostingsWriter { - fn close(&mut self,) { - for term_postings_writer in self.postings.iter_mut() { - term_postings_writer.close_doc(); + fn close(&mut self, block_store: &mut BlockStore) { + for recorder in self.term_index.values_mut() { + recorder.close_doc(block_store); } } - - fn suscribe(&mut self, doc: DocId, pos: u32, term: Term) { - let doc_ids: &mut TermPostingsWriter = self.get_term_postings(term); - doc_ids.suscribe(doc, pos); + + fn suscribe(&mut self, block_store: &mut BlockStore, doc: DocId, position: u32, term: Term) { + let mut recorder = get_or_create_recorder(term, &mut self.term_index, block_store); + let current_doc = recorder.current_doc(); + if current_doc != doc { + if current_doc != u32::max_value() { + recorder.close_doc(block_store); + } + recorder.new_doc(block_store, doc); + } + recorder.record_position(block_store, position); } - - fn serialize(&self, serializer: &mut PostingsSerializer) -> io::Result<()> { - let mut term_offsets: Vec<(&Term, usize)> = self.term_index + + fn serialize(&self, block_store: &BlockStore, serializer: &mut PostingsSerializer) -> io::Result<()> { + let mut term_offsets: Vec<(&Term, &Rec)> = self.term_index .iter() - .map(|(k,v)| (k, *v)) + .map(|(k,v)| (k, v)) .collect(); - term_offsets.sort(); - for (term, postings_id) in term_offsets { - let term_postings_writer = &self.postings[postings_id]; - let term_docfreq = term_postings_writer.doc_freq(); - try!(serializer.new_term(term, term_docfreq)); - try!(term_postings_writer.serialize(serializer)); + term_offsets.sort_by_key(|&(k, _v)| k); + for (term, recorder) in term_offsets { + try!(serializer.new_term(term, recorder.doc_freq())); + try!(recorder.serialize(serializer, block_store)); try!(serializer.close_term()); } Ok(()) diff --git a/src/postings/recorder.rs b/src/postings/recorder.rs index acb580ad0..28fd4084c 100644 --- a/src/postings/recorder.rs +++ b/src/postings/recorder.rs @@ -1,98 +1,199 @@ +use postings::block_store::BlockStore; +use DocId; +use std::io; +use postings::PostingsSerializer; +const EMPTY_ARRAY: [u32; 0] = [0u32; 0]; +const POSITION_END: u32 = 4294967295; pub trait Recorder { - fn new() -> Self; - fn record_position(&mut self, position: u32); - fn close_doc(&mut self,); - fn get_tf_and_posdeltas(&self, i: usize) -> (u32, &[u32]); + fn current_doc(&self,) -> u32; + fn new(block_store: &mut BlockStore) -> Self; + fn new_doc(&mut self, block_store: &mut BlockStore, doc: DocId); + fn record_position(&mut self, block_store: &mut BlockStore, position: u32); + fn close_doc(&mut self, block_store: &mut BlockStore); + fn doc_freq(&self,) -> u32; + + fn serialize(&self, serializer: &mut PostingsSerializer, block_store: &BlockStore) -> io::Result<()>; } -const EMPTY_ARRAY: [u32; 0] = []; - -pub struct NothingRecorder; +pub struct NothingRecorder { + list_id: u32, + current_doc: DocId, + doc_freq: u32, +} impl Recorder for NothingRecorder { - fn new() -> Self { - NothingRecorder + + fn current_doc(&self,) -> DocId { + self.current_doc } - fn record_position(&mut self, _position: u32) { + fn new(block_store: &mut BlockStore) -> Self { + NothingRecorder { + list_id: block_store.new_list(), + current_doc: u32::max_value(), + doc_freq: 0u32, + } } - fn close_doc(&mut self,) { + fn new_doc(&mut self, block_store: &mut BlockStore, doc: DocId) { + self.current_doc = doc; + block_store.push(self.list_id, doc); + self.doc_freq += 1; } - fn get_tf_and_posdeltas(&self, _: usize) -> (u32, &[u32]) { - (0u32, &EMPTY_ARRAY) + fn record_position(&mut self, _block_store: &mut BlockStore, _position: u32) { + } + + fn close_doc(&mut self, _block_store: &mut BlockStore) { + } + + fn doc_freq(&self,) -> u32 { + self.doc_freq + } + + fn serialize(&self, serializer: &mut PostingsSerializer, block_store: &BlockStore) -> io::Result<()> { + let doc_id_iter = block_store.iter_list(self.list_id); + for doc in doc_id_iter { + try!(serializer.write_doc(doc, 0u32, &EMPTY_ARRAY)); + } + Ok(()) } } pub struct TermFrequencyRecorder { - term_freqs: Vec, + list_id: u32, + current_doc: DocId, current_tf: u32, + doc_freq: u32, } impl Recorder for TermFrequencyRecorder { - fn new() -> Self { + + fn new(block_store: &mut BlockStore) -> Self { TermFrequencyRecorder { - term_freqs: Vec::new(), + list_id: block_store.new_list(), + current_doc: u32::max_value(), current_tf: 0u32, + doc_freq: 0u32, } } - fn record_position(&mut self, _position: u32) { + fn current_doc(&self,) -> DocId { + self.current_doc + } + + fn new_doc(&mut self, block_store: &mut BlockStore, doc: DocId) { + self.doc_freq += 1u32; + self.current_doc = doc; + block_store.push(self.list_id, doc); + } + + fn record_position(&mut self, _block_store: &mut BlockStore, _position: u32) { self.current_tf += 1; } - fn close_doc(&mut self,) { + fn close_doc(&mut self, block_store: &mut BlockStore) { assert!(self.current_tf > 0); - self.term_freqs.push(self.current_tf); + block_store.push(self.list_id, self.current_tf); self.current_tf = 0; } - fn get_tf_and_posdeltas(&self, i: usize) -> (u32, &[u32]) { - (self.term_freqs[i], &EMPTY_ARRAY) + fn doc_freq(&self,) -> u32 { + self.doc_freq + } + + fn serialize(&self, serializer: &mut PostingsSerializer, block_store: &BlockStore) -> io::Result<()> { + let mut doc_iter = block_store.iter_list(self.list_id); + loop { + if let Some(doc) = doc_iter.next() { + if let Some(term_freq) = doc_iter.next() { + try!(serializer.write_doc(doc, term_freq, &EMPTY_ARRAY)); + continue; + } + } + break; + } + Ok(()) } } pub struct TFAndPositionRecorder { - cumulated_tfs: Vec, - positions: Vec, - cumulated_tf: u32, - current_pos: u32, + list_id: u32, + current_doc: DocId, + doc_freq: u32, } + impl Recorder for TFAndPositionRecorder { - fn new() -> Self { + + fn new(block_store: &mut BlockStore) -> Self { TFAndPositionRecorder { - cumulated_tfs: vec!(0u32), - cumulated_tf: 0u32, - positions: Vec::new(), - current_pos: 0u32, + list_id: block_store.new_list(), + current_doc: u32::max_value(), + doc_freq: 0u32, } } - fn record_position(&mut self, position: u32) { - self.cumulated_tf += 1; - self.positions.push(position - self.current_pos); - self.current_pos = position; + fn current_doc(&self,) -> DocId { + self.current_doc } - fn close_doc(&mut self,) { - self.cumulated_tfs.push(self.cumulated_tf); - self.current_pos = 0; + fn new_doc(&mut self, block_store: &mut BlockStore, doc: DocId) { + self.doc_freq += 1; + self.current_doc = doc; + block_store.push(self.list_id, doc); } - fn get_tf_and_posdeltas(&self, i: usize) -> (u32, &[u32]) { - let tf = self.cumulated_tfs[i+1] - self.cumulated_tfs[i]; - let pos_idx = self.cumulated_tfs[i] as usize; - let posdeltas = &self.positions[pos_idx..pos_idx + tf as usize]; - (tf, posdeltas) + fn record_position(&mut self, block_store: &mut BlockStore, position: u32) { + block_store.push(self.list_id, position); + } + + fn close_doc(&mut self, block_store: &mut BlockStore) { + block_store.push(self.list_id, POSITION_END); + } + + fn doc_freq(&self,) -> u32 { + self.doc_freq + } + + + fn serialize(&self, serializer: &mut PostingsSerializer, block_store: &BlockStore) -> io::Result<()> { + let mut positions = Vec::with_capacity(100); + let mut doc_iter = block_store.iter_list(self.list_id); + loop { + if let Some(doc) = doc_iter.next() { + let mut prev_position = 0; + positions.clear(); + loop { + match doc_iter.next() { + Some(position) => { + if position == POSITION_END { + break; + } + else { + positions.push(position - prev_position); + prev_position = position; + } + } + None => { + panic!("This should never happen. Pleasee report the bug."); + } + } + } + try!(serializer.write_doc(doc, positions.len() as u32, &positions)); + } + else { + break; + } + } + Ok(()) } }