diff --git a/src/core/codec.rs b/src/core/codec.rs index c93a6d22f..c6991b1d6 100644 --- a/src/core/codec.rs +++ b/src/core/codec.rs @@ -38,8 +38,9 @@ impl SimpleSegmentSerializer { impl SegmentSerializer<()> for SimpleSegmentSerializer { - fn store_doc(&mut self, field: &mut Iterator) { - + fn store_doc(&mut self, field_values_it: &mut Iterator) { + let field_values: Vec<&FieldValue> = field_values_it.collect(); + self.store_writer.store(&field_values); } fn new_term(&mut self, term: &Term, doc_freq: DocId) -> Result<()> { diff --git a/src/core/error.rs b/src/core/error.rs index c820cd7c7..dd649c81a 100644 --- a/src/core/error.rs +++ b/src/core/error.rs @@ -1,11 +1,13 @@ use std::result; use std::io; +use byteorder; #[derive(Debug)] pub enum Error { NotImplementedYet, WriteError(String), ReadError, + BinaryReadError(byteorder::Error), IOError(io::ErrorKind, String), FileNotFound(String), LockError(String), diff --git a/src/core/schema.rs b/src/core/schema.rs index b0cd6c8c0..db8d37431 100644 --- a/src/core/schema.rs +++ b/src/core/schema.rs @@ -1,5 +1,6 @@ use core::global::*; -use std::fmt::Write; +use core::error; +use std::io::Write; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use std::string::FromUtf8Error; use std::collections::HashMap; @@ -7,6 +8,8 @@ use std::str; use std::iter; use std::slice; use std::fmt; +use std::io::Read; +use core::serialize::BinarySerializable; @@ -57,6 +60,37 @@ pub struct FieldValue { } +impl BinarySerializable for Field { + fn serialize(&self, writer: &mut Write) -> error::Result { + let Field(field_id) = *self; + field_id.serialize(writer) + } + + fn deserialize(reader: &mut Read) -> error::Result { + u8::deserialize(reader).map(Field) + } +} + + +impl BinarySerializable for FieldValue { + fn serialize(&self, writer: &mut Write) -> error::Result { + Ok( + try!(self.field.serialize(writer)) + + try!(self.text.serialize(writer)) + ) + } + fn deserialize(reader: &mut Read) -> error::Result { + let field = try!(Field::deserialize(reader)); + let text = try!(String::deserialize(reader)); + Ok(FieldValue { + field: field, + text: text, + }) + } +} + + + #[derive(Clone,PartialEq,PartialOrd,Ord,Eq,Hash)] pub struct Term { data: Vec, diff --git a/src/core/serialize.rs b/src/core/serialize.rs index f66e761cb..6b9ded357 100644 --- a/src/core/serialize.rs +++ b/src/core/serialize.rs @@ -3,6 +3,8 @@ use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; use std::fmt; use std::io::Write; use core::error; +use core::error::Error; +use std::io::Cursor; use std::io::Read; pub trait BinarySerializable : fmt::Debug + Sized { @@ -22,18 +24,15 @@ impl BinarySerializable for () { impl BinarySerializable for Vec { fn serialize(&self, writer: &mut Write) -> error::Result { - let mut total_size = 0; - writer.write_u32::(self.len() as u32); - total_size += 4; + let num_elements = self.len() as u32; + let mut total_size = try!(num_elements.serialize(writer)); for it in self.iter() { - let item_size = try!(it.serialize(writer)); - total_size += item_size; + total_size += try!(it.serialize(writer)); } Ok(total_size) } fn deserialize(reader: &mut Read) -> error::Result> { - // TODO error - let num_items = reader.read_u32::().unwrap(); + let num_items = try!(u32::deserialize(reader)); let mut items: Vec = Vec::with_capacity(num_items as usize); for _ in 0..num_items { let item = try!(T::deserialize(reader)); @@ -42,3 +41,128 @@ impl BinarySerializable for Vec { Ok(items) } } + +impl BinarySerializable for u32 { + fn serialize(&self, writer: &mut Write) -> error::Result { + writer.write_u32::(self.clone()) + .map(|x| 4) + .map_err(Error::BinaryReadError) + } + fn deserialize(reader: &mut Read) -> error::Result { + reader.read_u32::() + .map_err(Error::BinaryReadError) + } +} + + +impl BinarySerializable for u8 { + fn serialize(&self, writer: &mut Write) -> error::Result { + // TODO error + writer.write_u8(self.clone()); + Ok(1) + } + fn deserialize(reader: &mut Read) -> error::Result { + reader.read_u8() + .map_err(Error::BinaryReadError) + } +} + +impl BinarySerializable for String { + fn serialize(&self, writer: &mut Write) -> error::Result { + // TODO error + let data: &[u8] = self.as_bytes(); + let mut size = try!((data.len() as u32).serialize(writer)); + size += data.len(); + writer.write_all(data); + Ok(size) + } + + fn deserialize(reader: &mut Read) -> error::Result { + // TODO error + let string_length = try!(u32::deserialize(reader)) as usize; + let mut result = String::with_capacity(string_length); + reader.take(string_length as u64).read_to_string(&mut result); + Ok(result) + } +} + + +#[test] +fn test_serialize_u8() { + let mut buffer: Vec = Vec::new(); + { + let x: u8 = 3; + x.serialize(&mut buffer); + assert_eq!(buffer.len(), 1); + } + { + let x: u8 = 5; + x.serialize(&mut buffer); + assert_eq!(buffer.len(), 2); + } + let mut cursor = Cursor::new(&buffer); + assert_eq!(3, u8::deserialize(&mut cursor).unwrap()); + assert_eq!(5, u8::deserialize(&mut cursor).unwrap()); + assert!(u8::deserialize(&mut cursor).is_err()); +} + + +#[test] +fn test_serialize_u32() { + let mut buffer: Vec = Vec::new(); + { + let x: u32 = 3; + x.serialize(&mut buffer); + assert_eq!(buffer.len(), 4); + } + { + let x: u32 = 5; + x.serialize(&mut buffer); + assert_eq!(buffer.len(), 8); + } + let mut cursor = Cursor::new(&buffer); + assert_eq!(3, u32::deserialize(&mut cursor).unwrap()); + assert_eq!(5, u32::deserialize(&mut cursor).unwrap()); + assert!(u32::deserialize(&mut cursor).is_err()); +} + + + +#[test] +fn test_serialize_string() { + let mut buffer: Vec = Vec::new(); + let first_length = 4 + 3 * 4; + let second_length = 4 + 3 * 8; + { + let x: String = String::from("ぽよぽよ"); + assert_eq!(x.serialize(&mut buffer).unwrap(), first_length); + assert_eq!(buffer.len(), first_length); + } + { + let x: String = String::from("富士さん見える。"); + assert_eq!(x.serialize(&mut buffer).unwrap(), second_length); + assert_eq!(buffer.len(), first_length + second_length); + } + let mut cursor = Cursor::new(&buffer); + assert_eq!("ぽよぽよ", String::deserialize(&mut cursor).unwrap()); + assert_eq!("富士さん見える。", String::deserialize(&mut cursor).unwrap()); + assert!(u32::deserialize(&mut cursor).is_err()); +} + + + +#[test] +fn test_serialize_vec() { + let mut buffer: Vec = Vec::new(); + let first_length = 4 + 3 * 4; + let second_length = 4 + 3 * 8; + let vec = vec!(String::from("ぽよぽよ"), String::from("富士さん見える。")); + assert_eq!(vec.serialize(&mut buffer).unwrap(), first_length + second_length + 4); + let mut cursor = Cursor::new(&buffer); + { + let deser: Vec = Vec::deserialize(&mut cursor).unwrap(); + assert_eq!(deser.len(), 2); + assert_eq!("ぽよぽよ", deser[0]); + assert_eq!("富士さん見える。", deser[1]); + } +} diff --git a/src/core/skip.rs b/src/core/skip.rs index 65c73ef1c..a143fa6fe 100644 --- a/src/core/skip.rs +++ b/src/core/skip.rs @@ -65,7 +65,7 @@ impl LayerBuilder { } -// +// // fn display_layer<'a, T: BinarySerializable>(layer: &mut Layer<'a, T>) { // for it in layer { // println!(" - {:?}", it); @@ -146,20 +146,6 @@ impl SkipListBuilder { } -impl BinarySerializable for u32 { - fn serialize(&self, writer: &mut Write) -> error::Result { - // TODO error handling - writer.write_u32::(self.clone()); - Ok(4) - } - - fn deserialize(reader: &mut Read) -> error::Result { - // TODO error handling - reader.read_u32::().map_err(|err| error::Error::ReadError) - } -} - - struct Layer<'a, T> { cursor: Cursor<&'a [u8]>, next_id: DocId, diff --git a/src/core/store.rs b/src/core/store.rs index 269079a08..bba90a2fb 100644 --- a/src/core/store.rs +++ b/src/core/store.rs @@ -2,8 +2,14 @@ use std::io::BufWriter; use std::fs::File; use core::schema::Document; use core::schema::FieldValue; +use core::error; +use core::serialize::BinarySerializable; +use std::io::Write; +use std::io::Read; pub struct StoreWriter { + doc: usize, + offsets: Vec, writer: BufWriter, } @@ -11,13 +17,31 @@ impl StoreWriter { pub fn new(file: File) -> StoreWriter { StoreWriter { + doc: 0, + offsets: Vec::new(), writer: BufWriter::new(file), } } - pub fn store(&mut self, fields: &mut Iterator) { - for field in fields { - println!("{:?}", field); - } + pub fn store(&mut self, fields: &Vec<&FieldValue>) { + + self.doc += 1; } + } + + + + +// impl BinarySerializable for Vec { +// fn serialize(&self, field_values: &mut Iterator) -> error::Result { +// let mut total_size = 0; +// writer.write_u32::(self.len() as u32); +// total_size += 4; +// for it in self.iter() { +// let item_size = try!(it.serialize(writer)); +// total_size += item_size; +// } +// Ok(total_size) +// } +// } diff --git a/src/core/writer.rs b/src/core/writer.rs index 4f60a51b4..e9767fa4e 100644 --- a/src/core/writer.rs +++ b/src/core/writer.rs @@ -158,11 +158,11 @@ impl SegmentWriter { } } } - let mut stored_field_it = doc.fields().filter(|field_value| { + let mut stored_fieldvalues_it = doc.fields().filter(|field_value| { schema.get_field(&field_value.field) .is_stored() }); - self.segment_serializer.store_doc(&mut stored_field_it); + self.segment_serializer.store_doc(&mut stored_fieldvalues_it); self.max_doc += 1; }