This commit is contained in:
Paul Masurel
2016-02-13 15:25:06 +09:00
parent e7f77d9866
commit 83c5ce8a28
2 changed files with 176 additions and 49 deletions

View File

@@ -4,16 +4,31 @@ use std::io::Read;
use std::io::Cursor;
use std::io::SeekFrom;
use std::io::Seek;
use std::marker;
use core::DocId;
use std::ops::DerefMut;
use serde::Serialize;
use serde;
use bincode;
use byteorder;
use core::error;
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
use std::fmt;
pub trait BinarySerializable : fmt::Debug + Sized {
// TODO move Result from Error.
fn serialize(&self, writer: &mut Write) -> error::Result<usize>;
fn deserialize(reader: &mut Read) -> error::Result<Self>;
}
impl BinarySerializable for () {
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
Ok(0)
}
fn deserialize(reader: &mut Read) -> error::Result<Self> {
Ok(())
}
}
struct LayerBuilder {
period: usize,
buffer: Vec<u8>,
@@ -47,18 +62,17 @@ impl LayerBuilder {
}
}
fn insert<S: Serialize>(&mut self, doc_id: DocId, dest: S) -> InsertResult {
fn insert<S: BinarySerializable>(&mut self, doc_id: DocId, value: &S) -> InsertResult {
self.remaining -= 1;
self.len += 1;
let offset = self.written_size(); // TODO not sure if we want after or here
self.buffer.write_u32::<BigEndian>(doc_id);
value.serialize(&mut self.buffer);
if self.remaining == 0 {
let offset = self.written_size();
dest.serialize(&mut bincode::serde::Serializer::new(&mut self.buffer));
self.remaining = self.period;
InsertResult::SkipPointer(offset)
InsertResult::SkipPointer(offset as u32)
}
else {
doc_id.serialize(&mut bincode::serde::Serializer::new(&mut self.buffer));
dest.serialize(&mut bincode::serde::Serializer::new(&mut self.buffer));
InsertResult::NoNeedForSkip
}
}
@@ -70,8 +84,9 @@ pub struct SkipListBuilder {
layers: Vec<LayerBuilder>,
}
enum InsertResult {
SkipPointer(usize),
SkipPointer(u32),
NoNeedForSkip,
}
@@ -93,14 +108,14 @@ impl SkipListBuilder {
&mut self.layers[layer_id]
}
pub fn insert<S: Serialize>(&mut self, doc_id: DocId, dest: S) {
pub fn insert<S: BinarySerializable>(&mut self, doc_id: DocId, dest: &S) {
let mut layer_id = 0;
match self.get_layer(0).insert(doc_id, dest) {
InsertResult::SkipPointer(mut offset) => {
loop {
layer_id += 1;
let skip_result = self.get_layer(layer_id)
.insert(doc_id, offset);
.insert(doc_id, &offset);
match skip_result {
InsertResult::SkipPointer(next_offset) => {
offset = next_offset;
@@ -130,45 +145,156 @@ impl SkipListBuilder {
}
// ---------------------------
// the lower layer contains only the list of doc ids.
// A docset is represented
// SkipList<'a, Void>
struct Layer<R: Read + Seek> {
reader: R,
struct SkipLayer<'a, T> {
cursor: Cursor<&'a [u8]>,
num_items: u32,
next_item: Option<T>,
}
impl<R: Read + Seek + Clone> Layer<R> {
fn read(reader: &mut R) -> Layer<R> {
fn rebase_cursor<'a>(cursor: &Cursor<&'a [u8]>) -> Cursor<&'a [u8]>{
let data: &'a[u8] = *cursor.get_ref();
let from_idx = cursor.position() as usize;
let rebased_data = &data[from_idx..];
Cursor::new(rebased_data)
}
#[test]
fn test_rebase_cursor() {
{
let a: Vec<u8> = vec!(1, 2, 3);
let mut cur: Cursor<&[u8]> = Cursor::new(&a);
assert_eq!(cur.read_u8().unwrap(), 1);
let mut rebased_cursor = rebase_cursor(&cur);
assert_eq!(cur.read_u8().unwrap(), 2);
assert_eq!(rebased_cursor.read_u8().unwrap(), 2);
assert_eq!(cur.position(), 2);
assert_eq!(rebased_cursor.position(), 1);
cur.seek(SeekFrom::Start(0));
assert_eq!(cur.read_u8().unwrap(), 1);
rebased_cursor.seek(SeekFrom::Start(0));
assert_eq!(rebased_cursor.read_u8().unwrap(), 2);
}
}
struct Layer<'a, T> {
_phantom_: marker::PhantomData<T>,
cursor: Cursor<&'a [u8]>,
item_idx: usize,
num_items: usize,
cur_id: u32,
next_id: Option<u32>,
}
impl<'a, T: BinarySerializable> Iterator for Layer<'a, T> {
type Item = (DocId, T);
fn next(&mut self,)-> Option<(DocId, T)> {
if self.item_idx >= self.num_items {
None
}
else {
let cur_val = T::deserialize(&mut self.cursor).unwrap();
let cur_id = self.next_id;
self.item_idx += 1;
if self.item_idx < self.num_items - 1 {
self.next_id = Some(u32::deserialize(&mut self.cursor).unwrap());
}
else {
self.next_id = None;
}
Some((self.cur_id.clone(), cur_val))
}
}
}
impl BinarySerializable for u32 {
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
// TODO error handling
writer.write_u32::<BigEndian>(self.clone());
Ok(4)
}
fn deserialize(reader: &mut Read) -> error::Result<Self> {
// TODO error handling
Ok(reader.read_u32::<BigEndian>().unwrap())
}
}
impl<'a, T: BinarySerializable> Layer<'a, T> {
fn read(cursor: &mut Cursor<&'a [u8]>) -> Layer<'a, T> {
// TODO error handling?
let num_items = reader.read_u32::<BigEndian>().unwrap() as u32;
let num_bytes = reader.read_u32::<BigEndian>().unwrap() as u32;
let reader_clone = reader.clone();
reader.seek(SeekFrom::Current(num_bytes as i64));
let num_items = cursor.read_u32::<BigEndian>().unwrap() as u32;
println!("{} items ", num_items);
let num_bytes = cursor.read_u32::<BigEndian>().unwrap() as u32;
println!("{} bytes ", num_bytes);
let mut rebased_cursor = rebase_cursor(cursor);
cursor.seek(SeekFrom::Current(num_bytes as i64));
// println!("cur val {:?}", cur_val);
let next_id: Option<u32> = match rebased_cursor.read_u32::<BigEndian>() {
Ok(val) => Some(val),
Err(_) => None
};
Layer {
reader: reader_clone,
num_items: num_items,
cursor: rebased_cursor,
item_idx: 0,
num_items: num_items as usize,
next_id: next_id,
}
}
fn seek(doc_id: DocId) {
// while self.next_doc_id < doc_id {
// self.next_doc_id = cursor.read_u32::<BigEndian>();
// self.cur_val = self.next_val;
// self.next_doc_id = bincode::Deserializer::new(self.cursor, 8).read_u32();
// self.next_val = bincode::Deserializer::new(self.cursor, 8).read_u32();
// }
}
}
pub struct SkipList<R: Read + Seek> {
layers: Vec<Layer<R>>,
pub struct SkipList<'a, T: BinarySerializable> {
data_layer: Layer<'a, T>,
skip_layers: Vec<Layer<'a, u32>>,
}
impl<R: Read + Seek> SkipList<R> {
impl<'a, T: BinarySerializable> Iterator for SkipList<'a, T> {
pub fn read(data: &[u8]) -> SkipList<Cursor<&[u8]>> {
type Item = (DocId, T);
fn next(&mut self,)-> Option<(DocId, T)> {
self.data_layer.next()
}
}
impl<'a, T: BinarySerializable> SkipList<'a, T> {
pub fn read(data: &'a [u8]) -> SkipList<'a, T> {
let mut cursor = Cursor::new(data);
// TODO error handling?
let num_layers = cursor.read_u8().unwrap();
let mut layers = Vec::new();
for _ in (0..num_layers) {
layers.push(Layer::read(&mut cursor));
println!("{} layers ", num_layers);
let mut skip_layers = Vec::new();
for _ in (0..num_layers - 1) {
let skip_layer: Layer<'a, u32> = Layer::read(&mut cursor);
skip_layers.push(skip_layer);
}
let data_layer: Layer<'a, T> = Layer::read(&mut cursor);
SkipList {
layers: layers
skip_layers: skip_layers,
data_layer: data_layer,
}
}
}

View File

@@ -1,14 +1,17 @@
extern crate tantivy;
use std::io::Write;
use tantivy::core::skip::SkipListBuilder;
extern crate byteorder;
use std::io::{Write, Seek};
use std::io::SeekFrom;
use tantivy::core::skip::{SkipListBuilder, SkipList};
use std::io::Cursor;
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
#[test]
fn test_skip_list_builder() {
{
let mut output: Vec<u8> = Vec::new();
let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(10);
skip_list_builder.insert(2, 3);
skip_list_builder.insert(2, &3);
skip_list_builder.write::<Vec<u8>>(&mut output);
assert_eq!(output.len(), 17);
assert_eq!(output[0], 1);
@@ -17,7 +20,7 @@ fn test_skip_list_builder() {
let mut output: Vec<u8> = Vec::new();
let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(3);
for i in (0..9) {
skip_list_builder.insert(i, i);
skip_list_builder.insert(i, &i);
}
skip_list_builder.write::<Vec<u8>>(&mut output);
assert_eq!(output.len(), 129);
@@ -28,7 +31,7 @@ fn test_skip_list_builder() {
let mut output: Vec<u8> = Vec::new();
let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(3);
for i in (0..9) {
skip_list_builder.insert(i, ());
skip_list_builder.insert(i, &());
}
skip_list_builder.write::<Vec<u8>>(&mut output);
assert_eq!(output.len(), 93);
@@ -36,15 +39,13 @@ fn test_skip_list_builder() {
}
}
#[test]
fn test_skip_list_builder() {
{
let mut output: Vec<u8> = Vec::new();
let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(10);
skip_list_builder.insert(2, 3);
skip_list_builder.write::<Vec<u8>>(&mut output);
let skip_list = SkipList::read(output.as_slice());
}
fn test_skip_list_reader() {
let mut output: Vec<u8> = Vec::new();
let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(10);
skip_list_builder.insert(2, &3);
skip_list_builder.write::<Vec<u8>>(&mut output);
let skip_list: SkipList<u32> = SkipList::read(&mut output);
// assert_eq!(output.len(), 17);
// assert_eq!(output[0], 1);
}