storebuidler

This commit is contained in:
Paul Masurel
2016-02-20 16:37:15 +09:00
parent ae574aea41
commit 1215049adf
5 changed files with 69 additions and 9 deletions

View File

@@ -22,6 +22,7 @@ tempdir = "0.3.4"
bincode = "0.4.0"
serde = "0.6.11"
libc = "0.2.6"
lz4 = "*"
[build-dependencies]
gcc = "0.3.24"

View File

@@ -83,9 +83,10 @@ impl SegmentSerializer<()> for SimpleSegmentSerializer {
Ok(())
}
fn close(self,) -> Result<()> {
fn close(mut self,) -> Result<()> {
// TODO handle errors on close
self.term_fst_builder.finish();
self.store_writer.close();
Ok(())
}
}

View File

@@ -42,6 +42,7 @@ impl<T: BinarySerializable> BinarySerializable for Vec<T> {
}
}
impl BinarySerializable for u32 {
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
writer.write_u32::<BigEndian>(self.clone())
@@ -54,6 +55,18 @@ impl BinarySerializable for u32 {
}
}
impl BinarySerializable for u64 {
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
writer.write_u64::<BigEndian>(self.clone())
.map(|x| 4)
.map_err(Error::BinaryReadError)
}
fn deserialize(reader: &mut Read) -> error::Result<u64> {
reader.read_u64::<BigEndian>()
.map_err(Error::BinaryReadError)
}
}
impl BinarySerializable for u8 {
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
@@ -126,8 +139,6 @@ fn test_serialize_u32() {
assert!(u32::deserialize(&mut cursor).is_err());
}
#[test]
fn test_serialize_string() {
let mut buffer: Vec<u8> = Vec::new();
@@ -149,8 +160,6 @@ fn test_serialize_string() {
assert!(u32::deserialize(&mut cursor).is_err());
}
#[test]
fn test_serialize_vec() {
let mut buffer: Vec<u8> = Vec::new();

View File

@@ -1,31 +1,79 @@
use std::io::BufWriter;
use std::fs::File;
use core::global::DocId;
use core::schema::Document;
use core::schema::FieldValue;
use core::error;
use core::serialize::BinarySerializable;
use std::io::Write;
use std::io::Read;
use lz4;
const BLOCK_SIZE: usize = 262144;
pub struct StoreWriter {
doc: usize,
offsets: Vec<usize>,
doc: DocId,
offsets: Vec<OffsetIndex>,
written: u64,
writer: BufWriter<File>,
current_block: Vec<u8>,
}
#[derive(Debug)]
struct OffsetIndex(DocId, u64);
impl BinarySerializable for OffsetIndex {
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
let OffsetIndex(a, b) = *self;
Ok(try!(a.serialize(writer)) + try!(b.serialize(writer)))
}
fn deserialize(reader: &mut Read) -> error::Result<OffsetIndex> {
let a = try!(DocId::deserialize(reader));
let b = try!(u64::deserialize(reader));
Ok(OffsetIndex(a, b))
}
}
impl StoreWriter {
pub fn new(file: File) -> StoreWriter {
StoreWriter {
doc: 0,
written: 0,
offsets: Vec::new(),
writer: BufWriter::new(file),
current_block: Vec::new(),
}
}
pub fn store(&mut self, fields: &Vec<&FieldValue>) {
pub fn store(&mut self, field_values: &Vec<&FieldValue>) {
(field_values.len() as u32).serialize(&mut self.current_block);
for field_value in field_values.iter() {
(*field_value).serialize(&mut self.current_block);
}
self.doc += 1;
if self.current_block.len() > BLOCK_SIZE {
self.write_and_compress_block();
}
}
fn write_and_compress_block(&mut self,) {
// err handling
let mut encoder = lz4::EncoderBuilder::new()
.build(&mut self.writer)
.unwrap();
encoder.write_all(&self.current_block);
self.written = self.current_block.len() as u64;
self.offsets.push(OffsetIndex(self.doc, self.written));
self.current_block.clear();
}
pub fn close(&mut self,) {
if self.current_block.len() > 0 {
self.write_and_compress_block();
}
}
}

View File

@@ -20,5 +20,6 @@ extern crate tempdir;
extern crate bincode;
extern crate serde;
extern crate libc;
extern crate lz4;
pub mod core;