From 0bd52f7d8f649302945bd09fa535fd72c22ad7da Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Sat, 5 Mar 2016 13:29:40 +0900 Subject: [PATCH] blop --- src/core/fstmap.rs | 3 +-- src/core/reader.rs | 64 ++++++++++++++++++++++------------------------ src/core/store.rs | 5 ++++ 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/src/core/fstmap.rs b/src/core/fstmap.rs index 278910967..e0fcd3d58 100644 --- a/src/core/fstmap.rs +++ b/src/core/fstmap.rs @@ -75,12 +75,11 @@ impl<'a, V: 'static + BinarySerializable> FstMapIter<'a, V> { // type Item = (Vec, V); - fn next(&mut self) -> Option<(&[u8], V)> { + pub fn next(&mut self) -> Option<(&[u8], V)> { let next_item = self.streamer.next(); match next_item { Some((key, offset)) => { let val = self.fst_map.read_value(offset); - // let () = key; Some((key, val)) }, None => None diff --git a/src/core/reader.rs b/src/core/reader.rs index af142e312..5640c1193 100644 --- a/src/core/reader.rs +++ b/src/core/reader.rs @@ -99,6 +99,10 @@ impl SegmentReader { self.segment.id() } + pub fn num_docs(&self,) -> DocId { + self.store_reader.num_docs() + } + pub fn open(segment: Segment) -> io::Result { let source = try!(segment.open_read(SegmentComponent::TERMS)); let term_offsets = try!(FstMap::from_source(source)); @@ -116,8 +120,8 @@ impl SegmentReader { self.store_reader.get(doc_id) } - fn read_postings(&self, offset: usize) -> SegmentPostings { - let postings_data = &self.postings_data.as_slice()[offset..]; + fn read_postings(&self, offset: u32) -> SegmentPostings { + let postings_data = &self.postings_data.as_slice()[(offset as usize)..]; SegmentPostings::from_data(&postings_data) } @@ -131,7 +135,7 @@ impl SegmentReader { for term in terms.iter() { match self.get_term(term) { Some(term_info) => { - let segment_posting = self.read_postings(term_info.postings_offset as usize); + let segment_posting = self.read_postings(term_info.postings_offset); segment_postings.push(segment_posting); } None => { @@ -146,34 +150,26 @@ impl SegmentReader { } -// -// fn write_postings>(mut cursor: R, num_docs: DocId, serializer: &mut SegSer) -> io::Result<()> { -// // TODO remove allocation -// let docs = Vec::with_capacity(num_docs as usize); -// for i in 0..num_docs { -// let doc_id = u32::serialize(&mut cursor); -// try!(serializer.add_doc(doc_id)); -// } -// Ok(()) -// } -// -// impl SerializableSegment for SegmentReader { -// -// fn write>(&self, mut serializer: SegSer) -> io::Result { -// let mut term_offsets_it = self.term_offsets.stream(); -// loop { -// match term_offsets_it.next() { -// Some((term_data, offset_u64)) => { -// let term = Term::from(term_data); -// let offset = offset_u64 as usize; -// try!(serializer.new_term(&term, num_docs)); -// let segment_postings = self.read_postings(offset); -// try!(write_postings(cursor, num_docs, &mut serializer)); -// }, -// None => { break; } -// } -// } -// serializer.close() -// } -// -// } + +impl SerializableSegment for SegmentReader { + + fn write>(&self, mut serializer: SegSer) -> io::Result { + let mut term_offsets_it = self.term_offsets.stream(); + loop { + match term_offsets_it.next() { + Some((term_data, term_info)) => { + let term = Term::from(term_data); + try!(serializer.new_term(&term, term_info.doc_freq)); + let segment_postings = self.read_postings(term_info.postings_offset); + serializer.write_docs(&segment_postings.doc_ids[..]); + }, + None => { break; } + } + } + for doc_id in 0..self.num_docs() { + let doc = self.store_reader.get(&doc_id); + serializer.store_doc(&mut doc.fields()); + } + serializer.close() + } +} diff --git a/src/core/store.rs b/src/core/store.rs index bac0eea1e..e73a2731c 100644 --- a/src/core/store.rs +++ b/src/core/store.rs @@ -108,6 +108,11 @@ pub struct StoreReader { } impl StoreReader { + + pub fn num_docs(&self,) -> DocId { + self.offsets.len() as DocId + } + fn read_header(data: &ReadOnlySource) -> Vec { // todo err let mut cursor = Cursor::new(data.as_slice());