mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-19 17:50:42 +00:00
blop
This commit is contained in:
@@ -38,8 +38,9 @@ impl SimpleSegmentSerializer {
|
||||
|
||||
impl SegmentSerializer<()> for SimpleSegmentSerializer {
|
||||
|
||||
fn store_doc(&mut self, field: &mut Iterator<Item=&FieldValue>) {
|
||||
|
||||
fn store_doc(&mut self, field_values_it: &mut Iterator<Item=&FieldValue>) {
|
||||
let field_values: Vec<&FieldValue> = field_values_it.collect();
|
||||
self.store_writer.store(&field_values);
|
||||
}
|
||||
|
||||
fn new_term(&mut self, term: &Term, doc_freq: DocId) -> Result<()> {
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
use std::result;
|
||||
use std::io;
|
||||
use byteorder;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
NotImplementedYet,
|
||||
WriteError(String),
|
||||
ReadError,
|
||||
BinaryReadError(byteorder::Error),
|
||||
IOError(io::ErrorKind, String),
|
||||
FileNotFound(String),
|
||||
LockError(String),
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use core::global::*;
|
||||
use std::fmt::Write;
|
||||
use core::error;
|
||||
use std::io::Write;
|
||||
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
|
||||
use std::string::FromUtf8Error;
|
||||
use std::collections::HashMap;
|
||||
@@ -7,6 +8,8 @@ use std::str;
|
||||
use std::iter;
|
||||
use std::slice;
|
||||
use std::fmt;
|
||||
use std::io::Read;
|
||||
use core::serialize::BinarySerializable;
|
||||
|
||||
|
||||
|
||||
@@ -57,6 +60,37 @@ pub struct FieldValue {
|
||||
}
|
||||
|
||||
|
||||
impl BinarySerializable for Field {
|
||||
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
|
||||
let Field(field_id) = *self;
|
||||
field_id.serialize(writer)
|
||||
}
|
||||
|
||||
fn deserialize(reader: &mut Read) -> error::Result<Field> {
|
||||
u8::deserialize(reader).map(Field)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl BinarySerializable for FieldValue {
|
||||
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
|
||||
Ok(
|
||||
try!(self.field.serialize(writer)) +
|
||||
try!(self.text.serialize(writer))
|
||||
)
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> error::Result<Self> {
|
||||
let field = try!(Field::deserialize(reader));
|
||||
let text = try!(String::deserialize(reader));
|
||||
Ok(FieldValue {
|
||||
field: field,
|
||||
text: text,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[derive(Clone,PartialEq,PartialOrd,Ord,Eq,Hash)]
|
||||
pub struct Term {
|
||||
data: Vec<u8>,
|
||||
|
||||
@@ -3,6 +3,8 @@ use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
|
||||
use std::fmt;
|
||||
use std::io::Write;
|
||||
use core::error;
|
||||
use core::error::Error;
|
||||
use std::io::Cursor;
|
||||
use std::io::Read;
|
||||
|
||||
pub trait BinarySerializable : fmt::Debug + Sized {
|
||||
@@ -22,18 +24,15 @@ impl BinarySerializable for () {
|
||||
|
||||
impl<T: BinarySerializable> BinarySerializable for Vec<T> {
|
||||
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
|
||||
let mut total_size = 0;
|
||||
writer.write_u32::<BigEndian>(self.len() as u32);
|
||||
total_size += 4;
|
||||
let num_elements = self.len() as u32;
|
||||
let mut total_size = try!(num_elements.serialize(writer));
|
||||
for it in self.iter() {
|
||||
let item_size = try!(it.serialize(writer));
|
||||
total_size += item_size;
|
||||
total_size += try!(it.serialize(writer));
|
||||
}
|
||||
Ok(total_size)
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> error::Result<Vec<T>> {
|
||||
// TODO error
|
||||
let num_items = reader.read_u32::<BigEndian>().unwrap();
|
||||
let num_items = try!(u32::deserialize(reader));
|
||||
let mut items: Vec<T> = Vec::with_capacity(num_items as usize);
|
||||
for _ in 0..num_items {
|
||||
let item = try!(T::deserialize(reader));
|
||||
@@ -42,3 +41,128 @@ impl<T: BinarySerializable> BinarySerializable for Vec<T> {
|
||||
Ok(items)
|
||||
}
|
||||
}
|
||||
|
||||
impl BinarySerializable for u32 {
|
||||
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
|
||||
writer.write_u32::<BigEndian>(self.clone())
|
||||
.map(|x| 4)
|
||||
.map_err(Error::BinaryReadError)
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> error::Result<u32> {
|
||||
reader.read_u32::<BigEndian>()
|
||||
.map_err(Error::BinaryReadError)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl BinarySerializable for u8 {
|
||||
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
|
||||
// TODO error
|
||||
writer.write_u8(self.clone());
|
||||
Ok(1)
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> error::Result<u8> {
|
||||
reader.read_u8()
|
||||
.map_err(Error::BinaryReadError)
|
||||
}
|
||||
}
|
||||
|
||||
impl BinarySerializable for String {
|
||||
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
|
||||
// TODO error
|
||||
let data: &[u8] = self.as_bytes();
|
||||
let mut size = try!((data.len() as u32).serialize(writer));
|
||||
size += data.len();
|
||||
writer.write_all(data);
|
||||
Ok(size)
|
||||
}
|
||||
|
||||
fn deserialize(reader: &mut Read) -> error::Result<String> {
|
||||
// TODO error
|
||||
let string_length = try!(u32::deserialize(reader)) as usize;
|
||||
let mut result = String::with_capacity(string_length);
|
||||
reader.take(string_length as u64).read_to_string(&mut result);
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_serialize_u8() {
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
{
|
||||
let x: u8 = 3;
|
||||
x.serialize(&mut buffer);
|
||||
assert_eq!(buffer.len(), 1);
|
||||
}
|
||||
{
|
||||
let x: u8 = 5;
|
||||
x.serialize(&mut buffer);
|
||||
assert_eq!(buffer.len(), 2);
|
||||
}
|
||||
let mut cursor = Cursor::new(&buffer);
|
||||
assert_eq!(3, u8::deserialize(&mut cursor).unwrap());
|
||||
assert_eq!(5, u8::deserialize(&mut cursor).unwrap());
|
||||
assert!(u8::deserialize(&mut cursor).is_err());
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_serialize_u32() {
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
{
|
||||
let x: u32 = 3;
|
||||
x.serialize(&mut buffer);
|
||||
assert_eq!(buffer.len(), 4);
|
||||
}
|
||||
{
|
||||
let x: u32 = 5;
|
||||
x.serialize(&mut buffer);
|
||||
assert_eq!(buffer.len(), 8);
|
||||
}
|
||||
let mut cursor = Cursor::new(&buffer);
|
||||
assert_eq!(3, u32::deserialize(&mut cursor).unwrap());
|
||||
assert_eq!(5, u32::deserialize(&mut cursor).unwrap());
|
||||
assert!(u32::deserialize(&mut cursor).is_err());
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_serialize_string() {
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
let first_length = 4 + 3 * 4;
|
||||
let second_length = 4 + 3 * 8;
|
||||
{
|
||||
let x: String = String::from("ぽよぽよ");
|
||||
assert_eq!(x.serialize(&mut buffer).unwrap(), first_length);
|
||||
assert_eq!(buffer.len(), first_length);
|
||||
}
|
||||
{
|
||||
let x: String = String::from("富士さん見える。");
|
||||
assert_eq!(x.serialize(&mut buffer).unwrap(), second_length);
|
||||
assert_eq!(buffer.len(), first_length + second_length);
|
||||
}
|
||||
let mut cursor = Cursor::new(&buffer);
|
||||
assert_eq!("ぽよぽよ", String::deserialize(&mut cursor).unwrap());
|
||||
assert_eq!("富士さん見える。", String::deserialize(&mut cursor).unwrap());
|
||||
assert!(u32::deserialize(&mut cursor).is_err());
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_serialize_vec() {
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
let first_length = 4 + 3 * 4;
|
||||
let second_length = 4 + 3 * 8;
|
||||
let vec = vec!(String::from("ぽよぽよ"), String::from("富士さん見える。"));
|
||||
assert_eq!(vec.serialize(&mut buffer).unwrap(), first_length + second_length + 4);
|
||||
let mut cursor = Cursor::new(&buffer);
|
||||
{
|
||||
let deser: Vec<String> = Vec::deserialize(&mut cursor).unwrap();
|
||||
assert_eq!(deser.len(), 2);
|
||||
assert_eq!("ぽよぽよ", deser[0]);
|
||||
assert_eq!("富士さん見える。", deser[1]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,7 +65,7 @@ impl<T: BinarySerializable> LayerBuilder<T> {
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
//
|
||||
// fn display_layer<'a, T: BinarySerializable>(layer: &mut Layer<'a, T>) {
|
||||
// for it in layer {
|
||||
// println!(" - {:?}", it);
|
||||
@@ -146,20 +146,6 @@ impl<T: BinarySerializable> SkipListBuilder<T> {
|
||||
}
|
||||
|
||||
|
||||
impl BinarySerializable for u32 {
|
||||
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
|
||||
// TODO error handling
|
||||
writer.write_u32::<BigEndian>(self.clone());
|
||||
Ok(4)
|
||||
}
|
||||
|
||||
fn deserialize(reader: &mut Read) -> error::Result<Self> {
|
||||
// TODO error handling
|
||||
reader.read_u32::<BigEndian>().map_err(|err| error::Error::ReadError)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct Layer<'a, T> {
|
||||
cursor: Cursor<&'a [u8]>,
|
||||
next_id: DocId,
|
||||
|
||||
@@ -2,8 +2,14 @@ use std::io::BufWriter;
|
||||
use std::fs::File;
|
||||
use core::schema::Document;
|
||||
use core::schema::FieldValue;
|
||||
use core::error;
|
||||
use core::serialize::BinarySerializable;
|
||||
use std::io::Write;
|
||||
use std::io::Read;
|
||||
|
||||
pub struct StoreWriter {
|
||||
doc: usize,
|
||||
offsets: Vec<usize>,
|
||||
writer: BufWriter<File>,
|
||||
}
|
||||
|
||||
@@ -11,13 +17,31 @@ impl StoreWriter {
|
||||
|
||||
pub fn new(file: File) -> StoreWriter {
|
||||
StoreWriter {
|
||||
doc: 0,
|
||||
offsets: Vec::new(),
|
||||
writer: BufWriter::new(file),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn store(&mut self, fields: &mut Iterator<Item=&FieldValue>) {
|
||||
for field in fields {
|
||||
println!("{:?}", field);
|
||||
}
|
||||
pub fn store(&mut self, fields: &Vec<&FieldValue>) {
|
||||
|
||||
self.doc += 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// impl<T: BinarySerializable> BinarySerializable for Vec<T> {
|
||||
// fn serialize(&self, field_values: &mut Iterator<Item=&FieldValue>) -> error::Result<usize> {
|
||||
// let mut total_size = 0;
|
||||
// writer.write_u32::<BigEndian>(self.len() as u32);
|
||||
// total_size += 4;
|
||||
// for it in self.iter() {
|
||||
// let item_size = try!(it.serialize(writer));
|
||||
// total_size += item_size;
|
||||
// }
|
||||
// Ok(total_size)
|
||||
// }
|
||||
// }
|
||||
|
||||
@@ -158,11 +158,11 @@ impl SegmentWriter {
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut stored_field_it = doc.fields().filter(|field_value| {
|
||||
let mut stored_fieldvalues_it = doc.fields().filter(|field_value| {
|
||||
schema.get_field(&field_value.field)
|
||||
.is_stored()
|
||||
});
|
||||
self.segment_serializer.store_doc(&mut stored_field_it);
|
||||
self.segment_serializer.store_doc(&mut stored_fieldvalues_it);
|
||||
self.max_doc += 1;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user