From b8e4a63d73eddb20fdf5f0342aa2a9d3c301b21d Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Sat, 13 Feb 2016 23:46:19 +0900 Subject: [PATCH] toto --- src/core/skip.rs | 171 ++++++++++++++++++++++++----------------------- tests/skip.rs | 47 ++++++++++++- 2 files changed, 134 insertions(+), 84 deletions(-) diff --git a/src/core/skip.rs b/src/core/skip.rs index f8c75036e..445cb1f8b 100644 --- a/src/core/skip.rs +++ b/src/core/skip.rs @@ -83,19 +83,21 @@ impl LayerBuilder { } } - fn insert(&mut self, doc_id: DocId, value: &S) -> InsertResult { + fn insert(&mut self, doc_id: DocId, value: &S) -> Option<(DocId, u32)> { self.remaining -= 1; self.len += 1; - let offset = self.written_size(); // TODO not sure if we want after or here - self.buffer.write_u32::(doc_id); - value.serialize(&mut self.buffer); + let offset = self.written_size() as u32; // TODO not sure if we want after or here + let mut res; if self.remaining == 0 { self.remaining = self.period; - InsertResult::SkipPointer(offset as u32) + res = Some((doc_id, offset)); } else { - InsertResult::NoNeedForSkip + res = None; } + self.buffer.write_u32::(doc_id); + value.serialize(&mut self.buffer); + res } } @@ -106,11 +108,6 @@ pub struct SkipListBuilder { } -enum InsertResult { - SkipPointer(u32), - NoNeedForSkip, -} - impl SkipListBuilder { pub fn new(period: usize) -> SkipListBuilder { @@ -131,25 +128,17 @@ impl SkipListBuilder { pub fn insert(&mut self, doc_id: DocId, dest: &S) { let mut layer_id = 0; - match self.get_layer(0).insert(doc_id, dest) { - InsertResult::SkipPointer(mut offset) => { - loop { - layer_id += 1; - let skip_result = self.get_layer(layer_id) - .insert(doc_id, &offset); - match skip_result { - InsertResult::SkipPointer(next_offset) => { - offset = next_offset; - }, - InsertResult::NoNeedForSkip => { - return; - } - } - } - }, - InsertResult::NoNeedForSkip => { - return; - } + let mut skip_pointer = self.get_layer(layer_id).insert(doc_id, dest); + loop { + layer_id += 1; + println!("skip pointer {:?}", skip_pointer); + skip_pointer = match skip_pointer { + Some((skip_doc_id, skip_offset)) => + self + .get_layer(layer_id) + .insert(skip_doc_id, &skip_offset), + None => { return; } + }; } } @@ -215,18 +204,18 @@ fn test_rebase_cursor() { struct Layer<'a, T> { cursor: Cursor<&'a [u8]>, - next_id: u32, + next_id: DocId, _phantom_: PhantomData, } + impl<'a, T: BinarySerializable> Iterator for Layer<'a, T> { type Item = (DocId, T); fn next(&mut self,)-> Option<(DocId, T)> { - println!("next id! {}", self.next_id); - println!("datalen{}", self.cursor.get_ref().len()); + println!("eeeeee"); if self.next_id == u32::max_value() { None } @@ -238,7 +227,6 @@ impl<'a, T: BinarySerializable> Iterator for Layer<'a, T> { Ok(val) => val, Err(_) => u32::max_value() }; - println!("next id==> {}", self.next_id); Some((cur_id, cur_val)) } } @@ -255,7 +243,6 @@ impl<'a, T: BinarySerializable> Layer<'a, T> { Ok(val) => val, Err(_) => u32::max_value(), }; - println!("next_id {:?}", next_id); Layer { cursor: cursor, next_id: next_id, @@ -266,21 +253,57 @@ impl<'a, T: BinarySerializable> Layer<'a, T> { fn empty() -> Layer<'a, T> { Layer { cursor: Cursor::new(&EMPTY), - next_id: u32::max_value(), + next_id: DocId::max_value(), _phantom_: PhantomData, } } - fn seek(doc_id: DocId) { - // while self.next_doc_id < doc_id { - // self.next_doc_id = cursor.read_u32::(); - // self.cur_val = self.next_val; - // self.next_doc_id = bincode::Deserializer::new(self.cursor, 8).read_u32(); - // self.next_val = bincode::Deserializer::new(self.cursor, 8).read_u32(); - // } + + fn seek_offset(&mut self, offset: usize) { + self.cursor.seek(SeekFrom::Start(offset as u64)); + self.next_id = match self.cursor.read_u32::() { + Ok(val) => val, + Err(_) => u32::max_value(), + }; + } + + // Returns the last element (key, val) + // such that (key < doc_id) + // + // If there is no such element anymore, + // returns None. + fn seek(&mut self, doc_id: DocId) -> Option<(DocId, T)> { + let mut val = None; + while self.next_id < doc_id { + match self.next() { + None => { break; }, + v => { val = v; } + } + } + val } } + +fn display_layer<'a, T: BinarySerializable>(layer: &mut Layer<'a, T>) { + for it in layer { + println!(" - {:?}", it); + } +} + +pub fn display_skip_list<'a, T: BinarySerializable>(skip_list: &mut SkipList<'a, T>) { + let mut i = 0; + for mut layer in skip_list.skip_layers.iter_mut() { + println!("SkipLayer {}", i); + display_layer(&mut layer); + i += 1; + } + println!("DataLayer {}", i); + display_layer(&mut skip_list.data_layer); +} + + + pub struct SkipList<'a, T: BinarySerializable> { data_layer: Layer<'a, T>, skip_layers: Vec>, @@ -297,48 +320,32 @@ impl<'a, T: BinarySerializable> Iterator for SkipList<'a, T> { impl<'a, T: BinarySerializable> SkipList<'a, T> { - pub fn seek(&mut self, doc_id: DocId) { - // let mut next_layer_offset: u64 = 0; - // for skip_layer_id in 0..self.skip_layers.len() { - // println!("LAYER {}", skip_layer_id); - // let mut skip_layer: &mut Layer<'a, u32> = &mut self.skip_layers[skip_layer_id]; - // println!("seek {}", next_layer_offset); - // if next_layer_offset > 0 { - // skip_layer.cursor.seek(SeekFrom::Start(next_layer_offset)); - // next_layer_offset = 0; - // } - // println!("next id {}", skip_layer.next_id); - // while skip_layer.next_id < doc_id { - // match skip_layer.next() { - // Some((_, offset)) => { - // println!("bipoffset {}", offset); - // next_layer_offset = offset as u64; - // }, - // None => { - // break; - // } - // } - // } - // } - // for skip_layer in self.skip_layers.iter() { - // println!("{}", skip_layer.len()); - // } - // println!("last seek {}", next_layer_offset); - // if next_layer_offset > 0 { - // self.data_layer.cursor.seek(SeekFrom::Start(next_layer_offset)); - // } - while self.data_layer.next_id < doc_id { - match self.data_layer.next() { - None => { break; }, - _ => {} - } - } - + pub fn seek(&mut self, doc_id: DocId) -> Option<(DocId, T)> { + let mut next_layer_skip: Option<(DocId, u32)> = None; + for skip_layer_id in 0..self.skip_layers.len() { + let mut skip_layer: &mut Layer<'a, u32> = &mut self.skip_layers[skip_layer_id]; + println!("\n\nLAYER {}", skip_layer_id); + println!("nextid before skip {}", skip_layer.next_id); + match next_layer_skip { + Some((_, offset)) => { skip_layer.seek_offset(offset as usize); }, + None => {} + }; + println!("nextid after skip {}", skip_layer.next_id); + next_layer_skip = skip_layer.seek(doc_id); + println!("nextid after seek {}", skip_layer.next_id); + println!("--- nextlayerskip {:?}", next_layer_skip); + } + match next_layer_skip { + Some((_, offset)) => { self.data_layer.seek_offset(offset as usize); }, + None => {} + }; + self.data_layer.seek(doc_id) } pub fn read(data: &'a [u8]) -> SkipList<'a, T> { let mut cursor = Cursor::new(data); let offsets: Vec = Vec::deserialize(&mut cursor).unwrap(); + println!("offsets {:?}", offsets); let num_layers = offsets.len(); println!("{} layers ", num_layers); @@ -357,9 +364,7 @@ impl<'a, T: BinarySerializable> SkipList<'a, T> { skip_layers = offsets.iter() .zip(&offsets[1..]) .map(|(start, stop)| { - println!("start {} stop {}", start, stop); - let layer_data: &[u8] = &data[*start as usize..*stop as usize]; - println!("datalen2 {}", layer_data.len()); + let layer_data: &[u8] = &layers_data[*start as usize..*stop as usize]; let cursor = Cursor::new(layer_data); Layer::read(cursor) }) diff --git a/tests/skip.rs b/tests/skip.rs index cef0e6b8b..8dd0e8a75 100644 --- a/tests/skip.rs +++ b/tests/skip.rs @@ -2,7 +2,7 @@ extern crate tantivy; extern crate byteorder; use std::io::{Write, Seek}; use std::io::SeekFrom; -use tantivy::core::skip::{SkipListBuilder, SkipList}; +use tantivy::core::skip::*; use std::io::Cursor; use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; @@ -91,4 +91,49 @@ fn test_skip_list_reader() { assert_eq!(skip_list.next().unwrap(), (9, ())); assert_eq!(skip_list.next(), None); } + { + let mut output: Vec = Vec::new(); + let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(3); + skip_list_builder.insert(2, &()); + skip_list_builder.insert(3, &()); + skip_list_builder.insert(5, &()); + skip_list_builder.insert(6, &()); + skip_list_builder.write::>(&mut output); + let mut skip_list: SkipList<()> = SkipList::read(&mut output); + assert_eq!(skip_list.next().unwrap(), (2, ())); + skip_list.seek(6); + assert_eq!(skip_list.next().unwrap(), (6, ())); + assert_eq!(skip_list.next(), None); + + } + { + let mut output: Vec = Vec::new(); + let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(2); + skip_list_builder.insert(2, &()); + skip_list_builder.insert(3, &()); + skip_list_builder.insert(5, &()); + skip_list_builder.insert(7, &()); + skip_list_builder.insert(9, &()); + skip_list_builder.write::>(&mut output); + let mut skip_list: SkipList<()> = SkipList::read(&mut output); + assert_eq!(skip_list.next().unwrap(), (2, ())); + skip_list.seek(10); + assert_eq!(skip_list.next(), None); + } + { + let mut output: Vec = Vec::new(); + let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(3); + for i in (0..1000) { + skip_list_builder.insert(i, &()); + } + skip_list_builder.insert(1004, &()); + skip_list_builder.write::>(&mut output); + let mut skip_list: SkipList<()> = SkipList::read(&mut output); + assert_eq!(skip_list.next().unwrap(), (0, ())); + skip_list.seek(431); + assert_eq!(skip_list.next().unwrap(), (431,()) ); + skip_list.seek(1003); + assert_eq!(skip_list.next().unwrap(), (1004,()) ); + assert_eq!(skip_list.next(), None); + } }