From 5b621393e2ced88c159094e49734ef981e063cd3 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Thu, 7 Apr 2016 22:35:59 +0900 Subject: [PATCH] take in account doc_freq for the docid buffer. --- src/core/merger.rs | 2 +- src/core/reader.rs | 13 +++++++------ src/core/writer.rs | 3 +-- src/lib.rs | 2 ++ 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/core/merger.rs b/src/core/merger.rs index ebcb40497..96655f7fd 100644 --- a/src/core/merger.rs +++ b/src/core/merger.rs @@ -95,7 +95,7 @@ impl<'a> PostingsMerger<'a> { { let offset = self.doc_offsets[heap_item.segment_ord]; let reader = &self.readers[heap_item.segment_ord]; - for doc_id in reader.read_postings(heap_item.term_info.postings_offset) { + for doc_id in reader.read_postings(&heap_item.term_info) { self.doc_ids.push(offset + doc_id); } } diff --git a/src/core/reader.rs b/src/core/reader.rs index 7d77e8c87..fa553d64b 100644 --- a/src/core/reader.rs +++ b/src/core/reader.rs @@ -42,10 +42,10 @@ impl SegmentPostings { } } - pub fn from_data(data: &[u8]) -> SegmentPostings { + pub fn from_data(doc_freq: DocId, data: &[u8]) -> SegmentPostings { let mut cursor = Cursor::new(data); let data: Vec = Vec::deserialize(&mut cursor).unwrap(); - let mut doc_ids: Vec = (0u32..data.len() as u32 + 1_000 as u32).collect(); + let mut doc_ids: Vec = (0u32..doc_freq).collect(); let decoder = Decoder::new(); let num_doc_ids = decoder.decode_sorted(&data, &mut doc_ids); doc_ids.truncate(num_doc_ids); @@ -153,9 +153,10 @@ impl SegmentReader { self.fast_fields_reader.get_field(u32_field) } - pub fn read_postings(&self, offset: u32) -> SegmentPostings { - let postings_data = &self.postings_data.as_slice()[(offset as usize)..]; - SegmentPostings::from_data(&postings_data) + pub fn read_postings(&self, term_info: &TermInfo) -> SegmentPostings { + let offset = term_info.postings_offset as usize; + let postings_data = &self.postings_data.as_slice()[offset..]; + SegmentPostings::from_data(term_info.doc_freq, &postings_data) } fn get_term<'a>(&'a self, term: &Term) -> Option { @@ -170,7 +171,7 @@ impl SegmentReader { for term in terms.iter() { match self.get_term(term) { Some(term_info) => { - let segment_posting = self.read_postings(term_info.postings_offset); + let segment_posting = self.read_postings(&term_info); segment_postings.push(segment_posting); } None => { diff --git a/src/core/writer.rs b/src/core/writer.rs index b291162e0..446dabf65 100644 --- a/src/core/writer.rs +++ b/src/core/writer.rs @@ -76,7 +76,6 @@ impl IndexWriter { segment_writer.finalize().unwrap(); index_clone.sync(&segment).unwrap(); index_clone.publish_segment(&segment).unwrap(); - // segment_writer.commit().unwrap(); } }) }).collect(); @@ -107,7 +106,7 @@ impl IndexWriter { } pub fn add_document(&mut self, doc: Document) -> io::Result<()> { - let arc_doc = ArcDoc::new(doc); + let arc_doc = ArcDoc::new(doc); try!( self.queue_input.send(arc_doc) .map_err(|e| io::Error::new(ErrorKind::Other, e)) diff --git a/src/lib.rs b/src/lib.rs index 0b1af23c9..848b43e27 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,6 +30,8 @@ extern crate num_cpus; mod core; +pub use core::directory::Directory; +pub use core::searcher::Searcher; pub use core::index::Index; pub use core::schema; pub use core::schema::Term;