diff --git a/src/core/skip.rs b/src/core/skip.rs index 7f0062c4f..e2124ac96 100644 --- a/src/core/skip.rs +++ b/src/core/skip.rs @@ -4,16 +4,31 @@ use std::io::Read; use std::io::Cursor; use std::io::SeekFrom; use std::io::Seek; +use std::marker; use core::DocId; use std::ops::DerefMut; -use serde::Serialize; -use serde; use bincode; use byteorder; use core::error; use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; +use std::fmt; +pub trait BinarySerializable : fmt::Debug + Sized { + // TODO move Result from Error. + fn serialize(&self, writer: &mut Write) -> error::Result; + fn deserialize(reader: &mut Read) -> error::Result; +} + +impl BinarySerializable for () { + fn serialize(&self, writer: &mut Write) -> error::Result { + Ok(0) + } + fn deserialize(reader: &mut Read) -> error::Result { + Ok(()) + } +} + struct LayerBuilder { period: usize, buffer: Vec, @@ -47,18 +62,17 @@ impl LayerBuilder { } } - fn insert(&mut self, doc_id: DocId, dest: S) -> InsertResult { + fn insert(&mut self, doc_id: DocId, value: &S) -> InsertResult { self.remaining -= 1; self.len += 1; + let offset = self.written_size(); // TODO not sure if we want after or here + self.buffer.write_u32::(doc_id); + value.serialize(&mut self.buffer); if self.remaining == 0 { - let offset = self.written_size(); - dest.serialize(&mut bincode::serde::Serializer::new(&mut self.buffer)); self.remaining = self.period; - InsertResult::SkipPointer(offset) + InsertResult::SkipPointer(offset as u32) } else { - doc_id.serialize(&mut bincode::serde::Serializer::new(&mut self.buffer)); - dest.serialize(&mut bincode::serde::Serializer::new(&mut self.buffer)); InsertResult::NoNeedForSkip } } @@ -70,8 +84,9 @@ pub struct SkipListBuilder { layers: Vec, } + enum InsertResult { - SkipPointer(usize), + SkipPointer(u32), NoNeedForSkip, } @@ -93,14 +108,14 @@ impl SkipListBuilder { &mut self.layers[layer_id] } - pub fn insert(&mut self, doc_id: DocId, dest: S) { + pub fn insert(&mut self, doc_id: DocId, dest: &S) { let mut layer_id = 0; match self.get_layer(0).insert(doc_id, dest) { InsertResult::SkipPointer(mut offset) => { loop { layer_id += 1; let skip_result = self.get_layer(layer_id) - .insert(doc_id, offset); + .insert(doc_id, &offset); match skip_result { InsertResult::SkipPointer(next_offset) => { offset = next_offset; @@ -130,45 +145,156 @@ impl SkipListBuilder { } -// --------------------------- +// the lower layer contains only the list of doc ids. +// A docset is represented +// SkipList<'a, Void> - -struct Layer { - reader: R, +struct SkipLayer<'a, T> { + cursor: Cursor<&'a [u8]>, num_items: u32, + next_item: Option, } -impl Layer { - fn read(reader: &mut R) -> Layer { + +fn rebase_cursor<'a>(cursor: &Cursor<&'a [u8]>) -> Cursor<&'a [u8]>{ + let data: &'a[u8] = *cursor.get_ref(); + let from_idx = cursor.position() as usize; + let rebased_data = &data[from_idx..]; + Cursor::new(rebased_data) +} + + +#[test] +fn test_rebase_cursor() { + { + let a: Vec = vec!(1, 2, 3); + let mut cur: Cursor<&[u8]> = Cursor::new(&a); + assert_eq!(cur.read_u8().unwrap(), 1); + let mut rebased_cursor = rebase_cursor(&cur); + assert_eq!(cur.read_u8().unwrap(), 2); + assert_eq!(rebased_cursor.read_u8().unwrap(), 2); + assert_eq!(cur.position(), 2); + assert_eq!(rebased_cursor.position(), 1); + cur.seek(SeekFrom::Start(0)); + assert_eq!(cur.read_u8().unwrap(), 1); + rebased_cursor.seek(SeekFrom::Start(0)); + assert_eq!(rebased_cursor.read_u8().unwrap(), 2); + } +} + + +struct Layer<'a, T> { + _phantom_: marker::PhantomData, + cursor: Cursor<&'a [u8]>, + item_idx: usize, + num_items: usize, + cur_id: u32, + next_id: Option, +} + + +impl<'a, T: BinarySerializable> Iterator for Layer<'a, T> { + + type Item = (DocId, T); + + fn next(&mut self,)-> Option<(DocId, T)> { + if self.item_idx >= self.num_items { + None + } + else { + let cur_val = T::deserialize(&mut self.cursor).unwrap(); + let cur_id = self.next_id; + self.item_idx += 1; + if self.item_idx < self.num_items - 1 { + self.next_id = Some(u32::deserialize(&mut self.cursor).unwrap()); + } + else { + self.next_id = None; + } + Some((self.cur_id.clone(), cur_val)) + } + + } +} + +impl BinarySerializable for u32 { + fn serialize(&self, writer: &mut Write) -> error::Result { + // TODO error handling + writer.write_u32::(self.clone()); + Ok(4) + } + + fn deserialize(reader: &mut Read) -> error::Result { + // TODO error handling + Ok(reader.read_u32::().unwrap()) + } +} + + + +impl<'a, T: BinarySerializable> Layer<'a, T> { + fn read(cursor: &mut Cursor<&'a [u8]>) -> Layer<'a, T> { // TODO error handling? - let num_items = reader.read_u32::().unwrap() as u32; - let num_bytes = reader.read_u32::().unwrap() as u32; - let reader_clone = reader.clone(); - reader.seek(SeekFrom::Current(num_bytes as i64)); + let num_items = cursor.read_u32::().unwrap() as u32; + println!("{} items ", num_items); + let num_bytes = cursor.read_u32::().unwrap() as u32; + println!("{} bytes ", num_bytes); + let mut rebased_cursor = rebase_cursor(cursor); + cursor.seek(SeekFrom::Current(num_bytes as i64)); + // println!("cur val {:?}", cur_val); + let next_id: Option = match rebased_cursor.read_u32::() { + Ok(val) => Some(val), + Err(_) => None + }; Layer { - reader: reader_clone, - num_items: num_items, + cursor: rebased_cursor, + item_idx: 0, + num_items: num_items as usize, + next_id: next_id, + } } + + fn seek(doc_id: DocId) { + // while self.next_doc_id < doc_id { + // self.next_doc_id = cursor.read_u32::(); + // self.cur_val = self.next_val; + // self.next_doc_id = bincode::Deserializer::new(self.cursor, 8).read_u32(); + // self.next_val = bincode::Deserializer::new(self.cursor, 8).read_u32(); + // } + } } -pub struct SkipList { - layers: Vec>, +pub struct SkipList<'a, T: BinarySerializable> { + data_layer: Layer<'a, T>, + skip_layers: Vec>, } -impl SkipList { +impl<'a, T: BinarySerializable> Iterator for SkipList<'a, T> { - pub fn read(data: &[u8]) -> SkipList> { + + type Item = (DocId, T); + + fn next(&mut self,)-> Option<(DocId, T)> { + self.data_layer.next() + } +} + +impl<'a, T: BinarySerializable> SkipList<'a, T> { + + pub fn read(data: &'a [u8]) -> SkipList<'a, T> { let mut cursor = Cursor::new(data); - // TODO error handling? let num_layers = cursor.read_u8().unwrap(); - let mut layers = Vec::new(); - for _ in (0..num_layers) { - layers.push(Layer::read(&mut cursor)); + println!("{} layers ", num_layers); + let mut skip_layers = Vec::new(); + for _ in (0..num_layers - 1) { + let skip_layer: Layer<'a, u32> = Layer::read(&mut cursor); + skip_layers.push(skip_layer); } + let data_layer: Layer<'a, T> = Layer::read(&mut cursor); SkipList { - layers: layers + skip_layers: skip_layers, + data_layer: data_layer, } } - } diff --git a/tests/skip.rs b/tests/skip.rs index 71bcbcf1c..6bee066e1 100644 --- a/tests/skip.rs +++ b/tests/skip.rs @@ -1,14 +1,17 @@ extern crate tantivy; - -use std::io::Write; -use tantivy::core::skip::SkipListBuilder; +extern crate byteorder; +use std::io::{Write, Seek}; +use std::io::SeekFrom; +use tantivy::core::skip::{SkipListBuilder, SkipList}; +use std::io::Cursor; +use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; #[test] fn test_skip_list_builder() { { let mut output: Vec = Vec::new(); let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(10); - skip_list_builder.insert(2, 3); + skip_list_builder.insert(2, &3); skip_list_builder.write::>(&mut output); assert_eq!(output.len(), 17); assert_eq!(output[0], 1); @@ -17,7 +20,7 @@ fn test_skip_list_builder() { let mut output: Vec = Vec::new(); let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(3); for i in (0..9) { - skip_list_builder.insert(i, i); + skip_list_builder.insert(i, &i); } skip_list_builder.write::>(&mut output); assert_eq!(output.len(), 129); @@ -28,7 +31,7 @@ fn test_skip_list_builder() { let mut output: Vec = Vec::new(); let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(3); for i in (0..9) { - skip_list_builder.insert(i, ()); + skip_list_builder.insert(i, &()); } skip_list_builder.write::>(&mut output); assert_eq!(output.len(), 93); @@ -36,15 +39,13 @@ fn test_skip_list_builder() { } } - #[test] -fn test_skip_list_builder() { - { - let mut output: Vec = Vec::new(); - let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(10); - skip_list_builder.insert(2, 3); - skip_list_builder.write::>(&mut output); - let skip_list = SkipList::read(output.as_slice()); - } - +fn test_skip_list_reader() { + let mut output: Vec = Vec::new(); + let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(10); + skip_list_builder.insert(2, &3); + skip_list_builder.write::>(&mut output); + let skip_list: SkipList = SkipList::read(&mut output); + // assert_eq!(output.len(), 17); + // assert_eq!(output[0], 1); }