mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-18 01:00:40 +00:00
storebuidler
This commit is contained in:
@@ -22,6 +22,7 @@ tempdir = "0.3.4"
|
||||
bincode = "0.4.0"
|
||||
serde = "0.6.11"
|
||||
libc = "0.2.6"
|
||||
lz4 = "*"
|
||||
|
||||
[build-dependencies]
|
||||
gcc = "0.3.24"
|
||||
|
||||
@@ -83,9 +83,10 @@ impl SegmentSerializer<()> for SimpleSegmentSerializer {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn close(self,) -> Result<()> {
|
||||
fn close(mut self,) -> Result<()> {
|
||||
// TODO handle errors on close
|
||||
self.term_fst_builder.finish();
|
||||
self.store_writer.close();
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,6 +42,7 @@ impl<T: BinarySerializable> BinarySerializable for Vec<T> {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl BinarySerializable for u32 {
|
||||
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
|
||||
writer.write_u32::<BigEndian>(self.clone())
|
||||
@@ -54,6 +55,18 @@ impl BinarySerializable for u32 {
|
||||
}
|
||||
}
|
||||
|
||||
impl BinarySerializable for u64 {
|
||||
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
|
||||
writer.write_u64::<BigEndian>(self.clone())
|
||||
.map(|x| 4)
|
||||
.map_err(Error::BinaryReadError)
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> error::Result<u64> {
|
||||
reader.read_u64::<BigEndian>()
|
||||
.map_err(Error::BinaryReadError)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl BinarySerializable for u8 {
|
||||
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
|
||||
@@ -126,8 +139,6 @@ fn test_serialize_u32() {
|
||||
assert!(u32::deserialize(&mut cursor).is_err());
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_serialize_string() {
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
@@ -149,8 +160,6 @@ fn test_serialize_string() {
|
||||
assert!(u32::deserialize(&mut cursor).is_err());
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_serialize_vec() {
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
|
||||
@@ -1,31 +1,79 @@
|
||||
use std::io::BufWriter;
|
||||
use std::fs::File;
|
||||
use core::global::DocId;
|
||||
use core::schema::Document;
|
||||
use core::schema::FieldValue;
|
||||
use core::error;
|
||||
use core::serialize::BinarySerializable;
|
||||
use std::io::Write;
|
||||
use std::io::Read;
|
||||
use lz4;
|
||||
|
||||
const BLOCK_SIZE: usize = 262144;
|
||||
|
||||
pub struct StoreWriter {
|
||||
doc: usize,
|
||||
offsets: Vec<usize>,
|
||||
doc: DocId,
|
||||
offsets: Vec<OffsetIndex>,
|
||||
written: u64,
|
||||
writer: BufWriter<File>,
|
||||
current_block: Vec<u8>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct OffsetIndex(DocId, u64);
|
||||
|
||||
impl BinarySerializable for OffsetIndex {
|
||||
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
|
||||
let OffsetIndex(a, b) = *self;
|
||||
Ok(try!(a.serialize(writer)) + try!(b.serialize(writer)))
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> error::Result<OffsetIndex> {
|
||||
let a = try!(DocId::deserialize(reader));
|
||||
let b = try!(u64::deserialize(reader));
|
||||
Ok(OffsetIndex(a, b))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
impl StoreWriter {
|
||||
|
||||
pub fn new(file: File) -> StoreWriter {
|
||||
StoreWriter {
|
||||
doc: 0,
|
||||
written: 0,
|
||||
offsets: Vec::new(),
|
||||
writer: BufWriter::new(file),
|
||||
current_block: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn store(&mut self, fields: &Vec<&FieldValue>) {
|
||||
|
||||
pub fn store(&mut self, field_values: &Vec<&FieldValue>) {
|
||||
(field_values.len() as u32).serialize(&mut self.current_block);
|
||||
for field_value in field_values.iter() {
|
||||
(*field_value).serialize(&mut self.current_block);
|
||||
}
|
||||
self.doc += 1;
|
||||
if self.current_block.len() > BLOCK_SIZE {
|
||||
self.write_and_compress_block();
|
||||
}
|
||||
}
|
||||
|
||||
fn write_and_compress_block(&mut self,) {
|
||||
// err handling
|
||||
let mut encoder = lz4::EncoderBuilder::new()
|
||||
.build(&mut self.writer)
|
||||
.unwrap();
|
||||
encoder.write_all(&self.current_block);
|
||||
self.written = self.current_block.len() as u64;
|
||||
self.offsets.push(OffsetIndex(self.doc, self.written));
|
||||
self.current_block.clear();
|
||||
}
|
||||
|
||||
pub fn close(&mut self,) {
|
||||
if self.current_block.len() > 0 {
|
||||
self.write_and_compress_block();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -20,5 +20,6 @@ extern crate tempdir;
|
||||
extern crate bincode;
|
||||
extern crate serde;
|
||||
extern crate libc;
|
||||
extern crate lz4;
|
||||
|
||||
pub mod core;
|
||||
|
||||
Reference in New Issue
Block a user