From c4eb2eca435d49460055dd7ff5fd42e682a340fa Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Sat, 5 Mar 2016 18:01:28 +0900 Subject: [PATCH] blop --- src/core/codec.rs | 10 ++++++++-- src/core/schema.rs | 7 +++++++ src/core/serialize.rs | 31 ++++++++++++++++++++++++++++++- src/core/store.rs | 8 ++++++-- src/core/writer.rs | 4 ---- src/lib.rs | 2 +- 6 files changed, 52 insertions(+), 10 deletions(-) diff --git a/src/core/codec.rs b/src/core/codec.rs index 515adf0e4..4c878afac 100644 --- a/src/core/codec.rs +++ b/src/core/codec.rs @@ -9,6 +9,7 @@ use core::index::SegmentComponent; use core::schema::Term; use core::schema::DocId; use core::fstmap::FstMapBuilder; +use core::serialize::Size; use core::store::StoreWriter; use core::serialize::BinarySerializable; use core::simdcompression; @@ -22,9 +23,14 @@ pub struct TermInfo { } impl BinarySerializable for TermInfo { + + const SIZE: Size = Size::Constant(8); + fn serialize(&self, writer: &mut Write) -> io::Result { - self.doc_freq.serialize(writer); - self.postings_offset.serialize(writer) + Ok( + try!(self.doc_freq.serialize(writer)) + + try!(self.postings_offset.serialize(writer)) + ) } fn deserialize(reader: &mut Read) -> io::Result { let doc_freq = try!(u32::deserialize(reader)); diff --git a/src/core/schema.rs b/src/core/schema.rs index 823874225..fd3efef18 100644 --- a/src/core/schema.rs +++ b/src/core/schema.rs @@ -5,6 +5,7 @@ use std::fmt; use std::io; use std::io::Read; use std::str; +use core::serialize::Size; use core::serialize::BinarySerializable; use rustc_serialize::Decodable; use rustc_serialize::Encodable; @@ -68,6 +69,9 @@ pub struct FieldValue { impl BinarySerializable for Field { + + const SIZE: Size = Size::Variable; + fn serialize(&self, writer: &mut Write) -> io::Result { let Field(field_id) = *self; field_id.serialize(writer) @@ -80,6 +84,9 @@ impl BinarySerializable for Field { impl BinarySerializable for FieldValue { + + const SIZE: Size = Size::Variable; + fn serialize(&self, writer: &mut Write) -> io::Result { Ok( try!(self.field.serialize(writer)) + diff --git a/src/core/serialize.rs b/src/core/serialize.rs index 57952cd8c..c5c27bdc8 100644 --- a/src/core/serialize.rs +++ b/src/core/serialize.rs @@ -1,3 +1,4 @@ + use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; use std::fmt; use std::io::Write; @@ -13,13 +14,23 @@ fn convert_byte_order_error(byteorder_error: byteorder::Error) -> io::Error { } } +pub enum Size { + Variable, + Constant(usize) +} + + pub trait BinarySerializable : fmt::Debug + Sized { - // TODO move Result from Error. + const SIZE: Size; fn serialize(&self, writer: &mut Write) -> Result; fn deserialize(reader: &mut Read) -> Result; } + impl BinarySerializable for () { + + const SIZE: Size = Size::Constant(0); + fn serialize(&self, _: &mut Write) -> Result { Ok(0) } @@ -28,7 +39,11 @@ impl BinarySerializable for () { } } + impl BinarySerializable for Vec { + + const SIZE: Size = Size::Variable; + fn serialize(&self, writer: &mut Write) -> Result { let mut total_size = try!((self.len() as u32).serialize(writer)); for it in self.iter() { @@ -49,11 +64,15 @@ impl BinarySerializable for Vec { impl BinarySerializable for u32 { + + const SIZE: Size = Size::Constant(4); + fn serialize(&self, writer: &mut Write) -> Result { writer.write_u32::(self.clone()) .map(|_| 4) .map_err(convert_byte_order_error) } + fn deserialize(reader: &mut Read) -> Result { reader.read_u32::() .map_err(convert_byte_order_error) @@ -62,6 +81,9 @@ impl BinarySerializable for u32 { impl BinarySerializable for u64 { + + const SIZE: Size = Size::Constant(8); + fn serialize(&self, writer: &mut Write) -> Result { writer.write_u64::(self.clone()) .map(|_| 8) @@ -75,6 +97,9 @@ impl BinarySerializable for u64 { impl BinarySerializable for u8 { + + const SIZE: Size = Size::Constant(1); + fn serialize(&self, writer: &mut Write) -> Result { // TODO error writer.write_u8(self.clone()); @@ -87,6 +112,9 @@ impl BinarySerializable for u8 { } impl BinarySerializable for String { + + const SIZE: Size = Size::Variable; + fn serialize(&self, writer: &mut Write) -> Result { // TODO error let data: &[u8] = self.as_bytes(); @@ -105,6 +133,7 @@ impl BinarySerializable for String { } } + #[cfg(test)] mod test { diff --git a/src/core/store.rs b/src/core/store.rs index e73a2731c..dc9bf6f69 100644 --- a/src/core/store.rs +++ b/src/core/store.rs @@ -4,6 +4,7 @@ use core::schema::DocId; use core::schema::Document; use core::schema::FieldValue; use core::serialize::BinarySerializable; +use core::serialize::Size; use core::directory::ReadOnlySource; use std::io::Write; use std::io::Read; @@ -16,7 +17,7 @@ use lz4; // TODO cache uncompressed pages -const BLOCK_SIZE: usize = 131072; +const BLOCK_SIZE: usize = 131_072; pub struct StoreWriter { doc: DocId, @@ -31,6 +32,9 @@ pub struct StoreWriter { struct OffsetIndex(DocId, u64); impl BinarySerializable for OffsetIndex { + + const SIZE: Size = Size::Constant(4 + 8); + fn serialize(&self, writer: &mut Write) -> io::Result { let OffsetIndex(a, b) = *self; Ok(try!(a.serialize(writer)) + try!(b.serialize(writer))) @@ -112,7 +116,7 @@ impl StoreReader { pub fn num_docs(&self,) -> DocId { self.offsets.len() as DocId } - + fn read_header(data: &ReadOnlySource) -> Vec { // todo err let mut cursor = Cursor::new(data.as_slice()); diff --git a/src/core/writer.rs b/src/core/writer.rs index da04b4d4b..2fcfc2cd2 100644 --- a/src/core/writer.rs +++ b/src/core/writer.rs @@ -71,10 +71,6 @@ impl IndexWriter { Ok(segment_writer) => { let segment = segment_writer.segment(); segment_writer.finalize(); - // write(self.segment_serializer); - // try!(SimpleCodec::write(&self.segment_writer, &segment).map(|sz| (segment.clone(), sz))); - // At this point, the segment is written - // We still need to sync all of the file, as well as the parent directory. try!(self.directory.sync(segment.clone())); self.directory.publish_segment(segment.clone()); Ok(segment) diff --git a/src/lib.rs b/src/lib.rs index 0943e0528..a1cf12296 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,4 @@ -#![feature(test)] +#![feature(test,associated_consts)] #[allow(unused_imports)] #[macro_use]