This commit is contained in:
Paul Masurel
2016-02-20 15:41:28 +09:00
parent 488c6ede60
commit ae574aea41
7 changed files with 202 additions and 31 deletions

View File

@@ -38,8 +38,9 @@ impl SimpleSegmentSerializer {
impl SegmentSerializer<()> for SimpleSegmentSerializer {
fn store_doc(&mut self, field: &mut Iterator<Item=&FieldValue>) {
fn store_doc(&mut self, field_values_it: &mut Iterator<Item=&FieldValue>) {
let field_values: Vec<&FieldValue> = field_values_it.collect();
self.store_writer.store(&field_values);
}
fn new_term(&mut self, term: &Term, doc_freq: DocId) -> Result<()> {

View File

@@ -1,11 +1,13 @@
use std::result;
use std::io;
use byteorder;
#[derive(Debug)]
pub enum Error {
NotImplementedYet,
WriteError(String),
ReadError,
BinaryReadError(byteorder::Error),
IOError(io::ErrorKind, String),
FileNotFound(String),
LockError(String),

View File

@@ -1,5 +1,6 @@
use core::global::*;
use std::fmt::Write;
use core::error;
use std::io::Write;
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use std::string::FromUtf8Error;
use std::collections::HashMap;
@@ -7,6 +8,8 @@ use std::str;
use std::iter;
use std::slice;
use std::fmt;
use std::io::Read;
use core::serialize::BinarySerializable;
@@ -57,6 +60,37 @@ pub struct FieldValue {
}
impl BinarySerializable for Field {
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
let Field(field_id) = *self;
field_id.serialize(writer)
}
fn deserialize(reader: &mut Read) -> error::Result<Field> {
u8::deserialize(reader).map(Field)
}
}
impl BinarySerializable for FieldValue {
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
Ok(
try!(self.field.serialize(writer)) +
try!(self.text.serialize(writer))
)
}
fn deserialize(reader: &mut Read) -> error::Result<Self> {
let field = try!(Field::deserialize(reader));
let text = try!(String::deserialize(reader));
Ok(FieldValue {
field: field,
text: text,
})
}
}
#[derive(Clone,PartialEq,PartialOrd,Ord,Eq,Hash)]
pub struct Term {
data: Vec<u8>,

View File

@@ -3,6 +3,8 @@ use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
use std::fmt;
use std::io::Write;
use core::error;
use core::error::Error;
use std::io::Cursor;
use std::io::Read;
pub trait BinarySerializable : fmt::Debug + Sized {
@@ -22,18 +24,15 @@ impl BinarySerializable for () {
impl<T: BinarySerializable> BinarySerializable for Vec<T> {
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
let mut total_size = 0;
writer.write_u32::<BigEndian>(self.len() as u32);
total_size += 4;
let num_elements = self.len() as u32;
let mut total_size = try!(num_elements.serialize(writer));
for it in self.iter() {
let item_size = try!(it.serialize(writer));
total_size += item_size;
total_size += try!(it.serialize(writer));
}
Ok(total_size)
}
fn deserialize(reader: &mut Read) -> error::Result<Vec<T>> {
// TODO error
let num_items = reader.read_u32::<BigEndian>().unwrap();
let num_items = try!(u32::deserialize(reader));
let mut items: Vec<T> = Vec::with_capacity(num_items as usize);
for _ in 0..num_items {
let item = try!(T::deserialize(reader));
@@ -42,3 +41,128 @@ impl<T: BinarySerializable> BinarySerializable for Vec<T> {
Ok(items)
}
}
impl BinarySerializable for u32 {
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
writer.write_u32::<BigEndian>(self.clone())
.map(|x| 4)
.map_err(Error::BinaryReadError)
}
fn deserialize(reader: &mut Read) -> error::Result<u32> {
reader.read_u32::<BigEndian>()
.map_err(Error::BinaryReadError)
}
}
impl BinarySerializable for u8 {
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
// TODO error
writer.write_u8(self.clone());
Ok(1)
}
fn deserialize(reader: &mut Read) -> error::Result<u8> {
reader.read_u8()
.map_err(Error::BinaryReadError)
}
}
impl BinarySerializable for String {
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
// TODO error
let data: &[u8] = self.as_bytes();
let mut size = try!((data.len() as u32).serialize(writer));
size += data.len();
writer.write_all(data);
Ok(size)
}
fn deserialize(reader: &mut Read) -> error::Result<String> {
// TODO error
let string_length = try!(u32::deserialize(reader)) as usize;
let mut result = String::with_capacity(string_length);
reader.take(string_length as u64).read_to_string(&mut result);
Ok(result)
}
}
#[test]
fn test_serialize_u8() {
let mut buffer: Vec<u8> = Vec::new();
{
let x: u8 = 3;
x.serialize(&mut buffer);
assert_eq!(buffer.len(), 1);
}
{
let x: u8 = 5;
x.serialize(&mut buffer);
assert_eq!(buffer.len(), 2);
}
let mut cursor = Cursor::new(&buffer);
assert_eq!(3, u8::deserialize(&mut cursor).unwrap());
assert_eq!(5, u8::deserialize(&mut cursor).unwrap());
assert!(u8::deserialize(&mut cursor).is_err());
}
#[test]
fn test_serialize_u32() {
let mut buffer: Vec<u8> = Vec::new();
{
let x: u32 = 3;
x.serialize(&mut buffer);
assert_eq!(buffer.len(), 4);
}
{
let x: u32 = 5;
x.serialize(&mut buffer);
assert_eq!(buffer.len(), 8);
}
let mut cursor = Cursor::new(&buffer);
assert_eq!(3, u32::deserialize(&mut cursor).unwrap());
assert_eq!(5, u32::deserialize(&mut cursor).unwrap());
assert!(u32::deserialize(&mut cursor).is_err());
}
#[test]
fn test_serialize_string() {
let mut buffer: Vec<u8> = Vec::new();
let first_length = 4 + 3 * 4;
let second_length = 4 + 3 * 8;
{
let x: String = String::from("ぽよぽよ");
assert_eq!(x.serialize(&mut buffer).unwrap(), first_length);
assert_eq!(buffer.len(), first_length);
}
{
let x: String = String::from("富士さん見える。");
assert_eq!(x.serialize(&mut buffer).unwrap(), second_length);
assert_eq!(buffer.len(), first_length + second_length);
}
let mut cursor = Cursor::new(&buffer);
assert_eq!("ぽよぽよ", String::deserialize(&mut cursor).unwrap());
assert_eq!("富士さん見える。", String::deserialize(&mut cursor).unwrap());
assert!(u32::deserialize(&mut cursor).is_err());
}
#[test]
fn test_serialize_vec() {
let mut buffer: Vec<u8> = Vec::new();
let first_length = 4 + 3 * 4;
let second_length = 4 + 3 * 8;
let vec = vec!(String::from("ぽよぽよ"), String::from("富士さん見える。"));
assert_eq!(vec.serialize(&mut buffer).unwrap(), first_length + second_length + 4);
let mut cursor = Cursor::new(&buffer);
{
let deser: Vec<String> = Vec::deserialize(&mut cursor).unwrap();
assert_eq!(deser.len(), 2);
assert_eq!("ぽよぽよ", deser[0]);
assert_eq!("富士さん見える。", deser[1]);
}
}

View File

@@ -65,7 +65,7 @@ impl<T: BinarySerializable> LayerBuilder<T> {
}
//
//
// fn display_layer<'a, T: BinarySerializable>(layer: &mut Layer<'a, T>) {
// for it in layer {
// println!(" - {:?}", it);
@@ -146,20 +146,6 @@ impl<T: BinarySerializable> SkipListBuilder<T> {
}
impl BinarySerializable for u32 {
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
// TODO error handling
writer.write_u32::<BigEndian>(self.clone());
Ok(4)
}
fn deserialize(reader: &mut Read) -> error::Result<Self> {
// TODO error handling
reader.read_u32::<BigEndian>().map_err(|err| error::Error::ReadError)
}
}
struct Layer<'a, T> {
cursor: Cursor<&'a [u8]>,
next_id: DocId,

View File

@@ -2,8 +2,14 @@ use std::io::BufWriter;
use std::fs::File;
use core::schema::Document;
use core::schema::FieldValue;
use core::error;
use core::serialize::BinarySerializable;
use std::io::Write;
use std::io::Read;
pub struct StoreWriter {
doc: usize,
offsets: Vec<usize>,
writer: BufWriter<File>,
}
@@ -11,13 +17,31 @@ impl StoreWriter {
pub fn new(file: File) -> StoreWriter {
StoreWriter {
doc: 0,
offsets: Vec::new(),
writer: BufWriter::new(file),
}
}
pub fn store(&mut self, fields: &mut Iterator<Item=&FieldValue>) {
for field in fields {
println!("{:?}", field);
}
pub fn store(&mut self, fields: &Vec<&FieldValue>) {
self.doc += 1;
}
}
// impl<T: BinarySerializable> BinarySerializable for Vec<T> {
// fn serialize(&self, field_values: &mut Iterator<Item=&FieldValue>) -> error::Result<usize> {
// let mut total_size = 0;
// writer.write_u32::<BigEndian>(self.len() as u32);
// total_size += 4;
// for it in self.iter() {
// let item_size = try!(it.serialize(writer));
// total_size += item_size;
// }
// Ok(total_size)
// }
// }

View File

@@ -158,11 +158,11 @@ impl SegmentWriter {
}
}
}
let mut stored_field_it = doc.fields().filter(|field_value| {
let mut stored_fieldvalues_it = doc.fields().filter(|field_value| {
schema.get_field(&field_value.field)
.is_stored()
});
self.segment_serializer.store_doc(&mut stored_field_it);
self.segment_serializer.store_doc(&mut stored_fieldvalues_it);
self.max_doc += 1;
}