This commit is contained in:
Paul Masurel
2016-03-05 18:01:28 +09:00
parent 80b4553283
commit c4eb2eca43
6 changed files with 52 additions and 10 deletions

View File

@@ -9,6 +9,7 @@ use core::index::SegmentComponent;
use core::schema::Term;
use core::schema::DocId;
use core::fstmap::FstMapBuilder;
use core::serialize::Size;
use core::store::StoreWriter;
use core::serialize::BinarySerializable;
use core::simdcompression;
@@ -22,9 +23,14 @@ pub struct TermInfo {
}
impl BinarySerializable for TermInfo {
const SIZE: Size = Size::Constant(8);
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
self.doc_freq.serialize(writer);
self.postings_offset.serialize(writer)
Ok(
try!(self.doc_freq.serialize(writer)) +
try!(self.postings_offset.serialize(writer))
)
}
fn deserialize(reader: &mut Read) -> io::Result<Self> {
let doc_freq = try!(u32::deserialize(reader));

View File

@@ -5,6 +5,7 @@ use std::fmt;
use std::io;
use std::io::Read;
use std::str;
use core::serialize::Size;
use core::serialize::BinarySerializable;
use rustc_serialize::Decodable;
use rustc_serialize::Encodable;
@@ -68,6 +69,9 @@ pub struct FieldValue {
impl BinarySerializable for Field {
const SIZE: Size = Size::Variable;
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
let Field(field_id) = *self;
field_id.serialize(writer)
@@ -80,6 +84,9 @@ impl BinarySerializable for Field {
impl BinarySerializable for FieldValue {
const SIZE: Size = Size::Variable;
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
Ok(
try!(self.field.serialize(writer)) +

View File

@@ -1,3 +1,4 @@
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
use std::fmt;
use std::io::Write;
@@ -13,13 +14,23 @@ fn convert_byte_order_error(byteorder_error: byteorder::Error) -> io::Error {
}
}
pub enum Size {
Variable,
Constant(usize)
}
pub trait BinarySerializable : fmt::Debug + Sized {
// TODO move Result from Error.
const SIZE: Size;
fn serialize(&self, writer: &mut Write) -> Result<usize>;
fn deserialize(reader: &mut Read) -> Result<Self>;
}
impl BinarySerializable for () {
const SIZE: Size = Size::Constant(0);
fn serialize(&self, _: &mut Write) -> Result<usize> {
Ok(0)
}
@@ -28,7 +39,11 @@ impl BinarySerializable for () {
}
}
impl<T: BinarySerializable> BinarySerializable for Vec<T> {
const SIZE: Size = Size::Variable;
fn serialize(&self, writer: &mut Write) -> Result<usize> {
let mut total_size = try!((self.len() as u32).serialize(writer));
for it in self.iter() {
@@ -49,11 +64,15 @@ impl<T: BinarySerializable> BinarySerializable for Vec<T> {
impl BinarySerializable for u32 {
const SIZE: Size = Size::Constant(4);
fn serialize(&self, writer: &mut Write) -> Result<usize> {
writer.write_u32::<BigEndian>(self.clone())
.map(|_| 4)
.map_err(convert_byte_order_error)
}
fn deserialize(reader: &mut Read) -> Result<u32> {
reader.read_u32::<BigEndian>()
.map_err(convert_byte_order_error)
@@ -62,6 +81,9 @@ impl BinarySerializable for u32 {
impl BinarySerializable for u64 {
const SIZE: Size = Size::Constant(8);
fn serialize(&self, writer: &mut Write) -> Result<usize> {
writer.write_u64::<BigEndian>(self.clone())
.map(|_| 8)
@@ -75,6 +97,9 @@ impl BinarySerializable for u64 {
impl BinarySerializable for u8 {
const SIZE: Size = Size::Constant(1);
fn serialize(&self, writer: &mut Write) -> Result<usize> {
// TODO error
writer.write_u8(self.clone());
@@ -87,6 +112,9 @@ impl BinarySerializable for u8 {
}
impl BinarySerializable for String {
const SIZE: Size = Size::Variable;
fn serialize(&self, writer: &mut Write) -> Result<usize> {
// TODO error
let data: &[u8] = self.as_bytes();
@@ -105,6 +133,7 @@ impl BinarySerializable for String {
}
}
#[cfg(test)]
mod test {

View File

@@ -4,6 +4,7 @@ use core::schema::DocId;
use core::schema::Document;
use core::schema::FieldValue;
use core::serialize::BinarySerializable;
use core::serialize::Size;
use core::directory::ReadOnlySource;
use std::io::Write;
use std::io::Read;
@@ -16,7 +17,7 @@ use lz4;
// TODO cache uncompressed pages
const BLOCK_SIZE: usize = 131072;
const BLOCK_SIZE: usize = 131_072;
pub struct StoreWriter {
doc: DocId,
@@ -31,6 +32,9 @@ pub struct StoreWriter {
struct OffsetIndex(DocId, u64);
impl BinarySerializable for OffsetIndex {
const SIZE: Size = Size::Constant(4 + 8);
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
let OffsetIndex(a, b) = *self;
Ok(try!(a.serialize(writer)) + try!(b.serialize(writer)))
@@ -112,7 +116,7 @@ impl StoreReader {
pub fn num_docs(&self,) -> DocId {
self.offsets.len() as DocId
}
fn read_header(data: &ReadOnlySource) -> Vec<OffsetIndex> {
// todo err
let mut cursor = Cursor::new(data.as_slice());

View File

@@ -71,10 +71,6 @@ impl IndexWriter {
Ok(segment_writer) => {
let segment = segment_writer.segment();
segment_writer.finalize();
// write(self.segment_serializer);
// try!(SimpleCodec::write(&self.segment_writer, &segment).map(|sz| (segment.clone(), sz)));
// At this point, the segment is written
// We still need to sync all of the file, as well as the parent directory.
try!(self.directory.sync(segment.clone()));
self.directory.publish_segment(segment.clone());
Ok(segment)

View File

@@ -1,4 +1,4 @@
#![feature(test)]
#![feature(test,associated_consts)]
#[allow(unused_imports)]
#[macro_use]