mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-29 22:50:41 +00:00
no more warnings
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
use std::io;
|
||||
use std::io::{Read, Write};
|
||||
use core::serial::{SegmentSerializer, SerializableSegment};
|
||||
use core::serial::SegmentSerializer;
|
||||
use rustc_serialize::json;
|
||||
use core::directory::WritePtr;
|
||||
use core::index::Segment;
|
||||
@@ -9,7 +9,6 @@ use core::index::SegmentComponent;
|
||||
use core::schema::Term;
|
||||
use core::schema::DocId;
|
||||
use core::fstmap::FstMapBuilder;
|
||||
use core::serialize::Size;
|
||||
use core::store::StoreWriter;
|
||||
use core::serialize::BinarySerializable;
|
||||
use core::simdcompression;
|
||||
@@ -24,9 +23,6 @@ pub struct TermInfo {
|
||||
|
||||
|
||||
impl BinarySerializable for TermInfo {
|
||||
|
||||
const SIZE: Size = Size::Constant(8);
|
||||
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
Ok(
|
||||
try!(self.doc_freq.serialize(writer)) +
|
||||
@@ -63,9 +59,10 @@ impl SimpleSegmentSerializer {
|
||||
|
||||
impl SegmentSerializer<()> for SimpleSegmentSerializer {
|
||||
|
||||
fn store_doc(&mut self, field_values_it: &mut Iterator<Item=&FieldValue>) {
|
||||
fn store_doc(&mut self, field_values_it: &mut Iterator<Item=&FieldValue>) -> io::Result<()> {
|
||||
let field_values: Vec<&FieldValue> = field_values_it.collect();
|
||||
self.store_writer.store(&field_values);
|
||||
try!(self.store_writer.store(&field_values));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn new_term(&mut self, term: &Term, doc_freq: DocId) -> io::Result<()> {
|
||||
@@ -73,9 +70,8 @@ impl SegmentSerializer<()> for SimpleSegmentSerializer {
|
||||
doc_freq: doc_freq,
|
||||
postings_offset: self.written_bytes_postings as u32,
|
||||
};
|
||||
self.term_fst_builder.insert(term.as_slice(), &term_info);
|
||||
// writing the size of the posting list
|
||||
Ok(())
|
||||
self.term_fst_builder
|
||||
.insert(term.as_slice(), &term_info)
|
||||
}
|
||||
|
||||
fn write_docs(&mut self, doc_ids: &[DocId]) -> io::Result<()> {
|
||||
@@ -88,7 +84,7 @@ impl SegmentSerializer<()> for SimpleSegmentSerializer {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_segment_info(&self, segment_info: &SegmentInfo) -> io::Result<()> {
|
||||
fn write_segment_info(&mut self, segment_info: &SegmentInfo) -> io::Result<()> {
|
||||
let mut write = try!(self.segment.open_write(SegmentComponent::INFO));
|
||||
let json_data = try!(json::encode(segment_info).map_err(convert_to_ioerror));
|
||||
try!(write.write_all(json_data.as_bytes()));
|
||||
@@ -105,9 +101,6 @@ impl SegmentSerializer<()> for SimpleSegmentSerializer {
|
||||
}
|
||||
|
||||
impl SimpleCodec {
|
||||
// TODO impl packed int
|
||||
// TODO skip lists
|
||||
// TODO make that part of the codec API
|
||||
pub fn serializer(segment: &Segment) -> io::Result<SimpleSegmentSerializer> {
|
||||
let term_write = try!(segment.open_write(SegmentComponent::TERMS));
|
||||
let postings_write = try!(segment.open_write(SegmentComponent::POSTINGS));
|
||||
@@ -123,10 +116,4 @@ impl SimpleCodec {
|
||||
encoder: simdcompression::Encoder::new(),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
pub fn write<I: SerializableSegment>(index: &I, segment: &Segment) -> io::Result<()> {
|
||||
let serializer = try!(SimpleCodec::serializer(segment));
|
||||
index.write(serializer)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -142,8 +142,7 @@ impl Directory for MmapDirectory {
|
||||
let meta_file = atomicwrites::AtomicFile::new(full_path, atomicwrites::AllowOverwrite);
|
||||
meta_file.write(|f| {
|
||||
f.write_all(data)
|
||||
});
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
fn sync(&self, path: &Path) -> io::Result<()> {
|
||||
@@ -179,7 +178,7 @@ impl SharedVec {
|
||||
|
||||
impl Write for SharedVec {
|
||||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
self.0.write().unwrap().write(buf);
|
||||
try!(self.0.write().unwrap().write(buf));
|
||||
Ok(buf.len())
|
||||
}
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
@@ -224,11 +223,10 @@ impl Directory for RAMDirectory {
|
||||
let meta_file = atomicwrites::AtomicFile::new(PathBuf::from(path), atomicwrites::AllowOverwrite);
|
||||
meta_file.write(|f| {
|
||||
f.write_all(data)
|
||||
});
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
fn sync(&self, path: &Path) -> io::Result<()> {
|
||||
fn sync(&self, _: &Path) -> io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -242,8 +240,6 @@ impl Directory for RAMDirectory {
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use test::Bencher;
|
||||
use core::schema::DocId;
|
||||
use std::path::Path;
|
||||
|
||||
#[test]
|
||||
@@ -261,9 +257,9 @@ mod tests {
|
||||
fn test_directory(directory: &mut Directory) {
|
||||
{
|
||||
let mut write_file = directory.open_write(Path::new("toto")).unwrap();
|
||||
write_file.write_all(&[4]);
|
||||
write_file.write_all(&[3]);
|
||||
write_file.write_all(&[7,3,5]);
|
||||
write_file.write_all(&[4]).unwrap();
|
||||
write_file.write_all(&[3]).unwrap();
|
||||
write_file.write_all(&[7,3,5]).unwrap();
|
||||
}
|
||||
let read_file = directory.open_read(Path::new("toto")).unwrap();
|
||||
let data: &[u8] = &*read_file;
|
||||
|
||||
@@ -87,7 +87,7 @@ impl Index {
|
||||
IndexWriter::open(self,)
|
||||
}
|
||||
|
||||
pub fn searcher(&self,) -> Searcher {
|
||||
pub fn searcher(&self,) -> io::Result<Searcher> {
|
||||
Searcher::for_index(self.clone())
|
||||
}
|
||||
|
||||
|
||||
@@ -150,11 +150,7 @@ mod tests {
|
||||
b.iter(|| {
|
||||
let docs = VecPostings::new((0..1_000_000).collect());
|
||||
let intersection = IntersectionPostings::from_postings(vec!(docs));
|
||||
let mut doc_sum: u32 = 0;
|
||||
for doc in intersection {
|
||||
doc_sum += 1;
|
||||
}
|
||||
doc_sum
|
||||
intersection.count()
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -171,14 +171,14 @@ impl SerializableSegment for SegmentReader {
|
||||
let term = Term::from(term_data);
|
||||
try!(serializer.new_term(&term, term_info.doc_freq));
|
||||
let segment_postings = self.read_postings(term_info.postings_offset);
|
||||
serializer.write_docs(&segment_postings.doc_ids[..]);
|
||||
try!(serializer.write_docs(&segment_postings.doc_ids[..]));
|
||||
},
|
||||
None => { break; }
|
||||
}
|
||||
}
|
||||
for doc_id in 0..self.max_doc() {
|
||||
let doc = try!(self.store_reader.get(&doc_id));
|
||||
serializer.store_doc(&mut doc.fields());
|
||||
try!(serializer.store_doc(&mut doc.fields()));
|
||||
}
|
||||
serializer.close()
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@ use std::fmt;
|
||||
use std::io;
|
||||
use std::io::Read;
|
||||
use std::str;
|
||||
use core::serialize::Size;
|
||||
use core::serialize::BinarySerializable;
|
||||
use rustc_serialize::Decodable;
|
||||
use rustc_serialize::Encodable;
|
||||
@@ -69,9 +68,6 @@ pub struct FieldValue {
|
||||
|
||||
|
||||
impl BinarySerializable for Field {
|
||||
|
||||
const SIZE: Size = Size::Variable;
|
||||
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
let Field(field_id) = *self;
|
||||
field_id.serialize(writer)
|
||||
@@ -84,9 +80,6 @@ impl BinarySerializable for Field {
|
||||
|
||||
|
||||
impl BinarySerializable for FieldValue {
|
||||
|
||||
const SIZE: Size = Size::Variable;
|
||||
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
Ok(
|
||||
try!(self.field.serialize(writer)) +
|
||||
|
||||
@@ -44,13 +44,12 @@ impl Searcher {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn for_index(index: Index) -> Searcher {
|
||||
pub fn for_index(index: Index) -> io::Result<Searcher> {
|
||||
let mut searcher = Searcher::new();
|
||||
for segment in index.segments().into_iter() {
|
||||
println!("Segment {:?} ", segment);
|
||||
searcher.add_segment(segment);
|
||||
try!(searcher.add_segment(segment));
|
||||
}
|
||||
searcher
|
||||
Ok(searcher)
|
||||
}
|
||||
|
||||
pub fn search(&self, terms: &Vec<Term>, collector: &mut Collector) {
|
||||
|
||||
@@ -1,82 +1,92 @@
|
||||
use core::schema::*;
|
||||
use std::fmt;
|
||||
use core::schema::DocId;
|
||||
use core::schema::Term;
|
||||
use core::schema::FieldValue;
|
||||
use std::io;
|
||||
use core::index::SegmentInfo;
|
||||
|
||||
pub trait SegmentSerializer<Output> {
|
||||
fn new_term(&mut self, term: &Term, doc_freq: DocId) -> Result<(), io::Error>;
|
||||
fn write_docs(&mut self, docs: &[DocId]) -> Result<(), io::Error>; // TODO add size
|
||||
fn store_doc(&mut self, field: &mut Iterator<Item=&FieldValue>);
|
||||
fn new_term(&mut self, term: &Term, doc_freq: DocId) -> io::Result<()>;
|
||||
fn write_docs(&mut self, docs: &[DocId]) -> io::Result<()>; // TODO add size
|
||||
fn store_doc(&mut self, field: &mut Iterator<Item=&FieldValue>) -> io::Result<()>;
|
||||
fn close(self,) -> Result<Output, io::Error>;
|
||||
fn write_segment_info(&self, segment_info: &SegmentInfo) -> io::Result<()>;
|
||||
fn write_segment_info(&mut self, segment_info: &SegmentInfo) -> io::Result<()>;
|
||||
}
|
||||
|
||||
pub trait SerializableSegment {
|
||||
fn write<Output, SegSer: SegmentSerializer<Output>>(&self, serializer: SegSer) -> io::Result<Output>;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
|
||||
pub struct DebugSegmentSerializer {
|
||||
text: String,
|
||||
num_docs: u32,
|
||||
}
|
||||
use core::schema::DocId;
|
||||
use core::schema::FieldValue;
|
||||
use core::schema::Term;
|
||||
use std::fmt;
|
||||
use super::SegmentSerializer;
|
||||
use super::SerializableSegment;
|
||||
use core::index::SegmentInfo;
|
||||
use std::io;
|
||||
|
||||
impl fmt::Debug for DebugSegmentSerializer {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", self.text)
|
||||
}
|
||||
}
|
||||
|
||||
impl DebugSegmentSerializer {
|
||||
|
||||
pub fn debug_string<S: SerializableSegment>(index: &S) -> String {
|
||||
let serializer = DebugSegmentSerializer::new();
|
||||
index.write(serializer).unwrap()
|
||||
pub struct DebugSegmentSerializer {
|
||||
text: String,
|
||||
num_docs: u32,
|
||||
}
|
||||
|
||||
pub fn new() -> DebugSegmentSerializer {
|
||||
DebugSegmentSerializer {
|
||||
text: String::new(),
|
||||
num_docs: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SegmentSerializer<String> for DebugSegmentSerializer {
|
||||
fn new_term(&mut self, term: &Term, doc_freq: DocId) -> Result<(), io::Error> {
|
||||
self.text.push_str(&format!("{:?} - docfreq{}\n", term, doc_freq));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn store_doc(&mut self, fields: &mut Iterator<Item=&FieldValue>) {
|
||||
if self.num_docs == 0 {
|
||||
self.text.push_str(&format!("# STORED DOC\n======\n"))
|
||||
}
|
||||
self.text.push_str(&format!("doc {}", self.num_docs));
|
||||
for field_value in fields {
|
||||
self.text.push_str(&format!("field {:?} |", field_value.field));
|
||||
self.text.push_str(&format!("value {:?}\n", field_value.text));
|
||||
impl fmt::Debug for DebugSegmentSerializer {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", self.text)
|
||||
}
|
||||
}
|
||||
|
||||
fn write_docs(&mut self, docs: &[DocId]) -> Result<(), io::Error> {
|
||||
for doc in docs {
|
||||
self.text.push_str(&format!(" - Doc {:?}\n", doc));
|
||||
impl DebugSegmentSerializer {
|
||||
|
||||
pub fn debug_string<S: SerializableSegment>(index: &S) -> String {
|
||||
let serializer = DebugSegmentSerializer::new();
|
||||
index.write(serializer).unwrap()
|
||||
}
|
||||
|
||||
pub fn new() -> DebugSegmentSerializer {
|
||||
DebugSegmentSerializer {
|
||||
text: String::new(),
|
||||
num_docs: 0,
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn close(self,) -> Result<String, io::Error> {
|
||||
Ok(self.text)
|
||||
impl SegmentSerializer<String> for DebugSegmentSerializer {
|
||||
fn new_term(&mut self, term: &Term, doc_freq: DocId) -> Result<(), io::Error> {
|
||||
self.text.push_str(&format!("{:?} - docfreq{}\n", term, doc_freq));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn store_doc(&mut self, fields: &mut Iterator<Item=&FieldValue>) -> io::Result<()> {
|
||||
if self.num_docs == 0 {
|
||||
self.text.push_str(&format!("# STORED DOC\n======\n"))
|
||||
}
|
||||
self.text.push_str(&format!("doc {}", self.num_docs));
|
||||
for field_value in fields {
|
||||
self.text.push_str(&format!("field {:?} |", field_value.field));
|
||||
self.text.push_str(&format!("value {:?}\n", field_value.text));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_docs(&mut self, docs: &[DocId]) -> Result<(), io::Error> {
|
||||
for doc in docs {
|
||||
self.text.push_str(&format!(" - Doc {:?}\n", doc));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn close(self,) -> Result<String, io::Error> {
|
||||
Ok(self.text)
|
||||
}
|
||||
|
||||
fn write_segment_info(&mut self, segment_info: &SegmentInfo) -> io::Result<()> {
|
||||
self.text.push_str(&format!("\n segmentinfo({:?})", segment_info));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn write_segment_info(&self, segment_info: &SegmentInfo) -> io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn serialize_eq<L: SerializableSegment, R: SerializableSegment>(left: &L, right: &R) -> bool{
|
||||
let str_left = DebugSegmentSerializer::debug_string(left);
|
||||
let str_right = DebugSegmentSerializer::debug_string(right);
|
||||
str_left == str_right
|
||||
}
|
||||
|
||||
@@ -3,7 +3,6 @@ use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
|
||||
use std::fmt;
|
||||
use std::io::Write;
|
||||
use std::io::Read;
|
||||
use std::io::Result;
|
||||
use std::io;
|
||||
use byteorder;
|
||||
|
||||
@@ -16,44 +15,29 @@ fn convert_byte_order_error(byteorder_error: byteorder::Error) -> io::Error {
|
||||
}
|
||||
}
|
||||
|
||||
pub enum Size {
|
||||
Variable,
|
||||
Constant(usize)
|
||||
}
|
||||
|
||||
|
||||
pub trait BinarySerializable : fmt::Debug + Sized {
|
||||
const SIZE: Size;
|
||||
fn serialize(&self, writer: &mut Write) -> Result<usize>;
|
||||
fn deserialize(reader: &mut Read) -> Result<Self>;
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize>;
|
||||
fn deserialize(reader: &mut Read) -> io::Result<Self>;
|
||||
}
|
||||
|
||||
|
||||
impl BinarySerializable for () {
|
||||
|
||||
const SIZE: Size = Size::Constant(0);
|
||||
|
||||
fn serialize(&self, _: &mut Write) -> Result<usize> {
|
||||
fn serialize(&self, _: &mut Write) -> io::Result<usize> {
|
||||
Ok(0)
|
||||
}
|
||||
fn deserialize(_: &mut Read) -> Result<Self> {
|
||||
fn deserialize(_: &mut Read) -> io::Result<Self> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<T: BinarySerializable> BinarySerializable for Vec<T> {
|
||||
|
||||
const SIZE: Size = Size::Variable;
|
||||
|
||||
fn serialize(&self, writer: &mut Write) -> Result<usize> {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
let mut total_size = try!((self.len() as u32).serialize(writer));
|
||||
for it in self.iter() {
|
||||
total_size += try!(it.serialize(writer));
|
||||
}
|
||||
Ok(total_size)
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> Result<Vec<T>> {
|
||||
fn deserialize(reader: &mut Read) -> io::Result<Vec<T>> {
|
||||
let num_items = try!(u32::deserialize(reader));
|
||||
let mut items: Vec<T> = Vec::with_capacity(num_items as usize);
|
||||
for _ in 0..num_items {
|
||||
@@ -66,16 +50,13 @@ impl<T: BinarySerializable> BinarySerializable for Vec<T> {
|
||||
|
||||
|
||||
impl BinarySerializable for u32 {
|
||||
|
||||
const SIZE: Size = Size::Constant(4);
|
||||
|
||||
fn serialize(&self, writer: &mut Write) -> Result<usize> {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
writer.write_u32::<BigEndian>(self.clone())
|
||||
.map(|_| 4)
|
||||
.map_err(convert_byte_order_error)
|
||||
}
|
||||
|
||||
fn deserialize(reader: &mut Read) -> Result<u32> {
|
||||
fn deserialize(reader: &mut Read) -> io::Result<u32> {
|
||||
reader.read_u32::<BigEndian>()
|
||||
.map_err(convert_byte_order_error)
|
||||
}
|
||||
@@ -83,15 +64,12 @@ impl BinarySerializable for u32 {
|
||||
|
||||
|
||||
impl BinarySerializable for u64 {
|
||||
|
||||
const SIZE: Size = Size::Constant(8);
|
||||
|
||||
fn serialize(&self, writer: &mut Write) -> Result<usize> {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
writer.write_u64::<BigEndian>(self.clone())
|
||||
.map(|_| 8)
|
||||
.map_err(convert_byte_order_error)
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> Result<u64> {
|
||||
fn deserialize(reader: &mut Read) -> io::Result<u64> {
|
||||
reader.read_u64::<BigEndian>()
|
||||
.map_err(convert_byte_order_error)
|
||||
}
|
||||
@@ -99,25 +77,19 @@ impl BinarySerializable for u64 {
|
||||
|
||||
|
||||
impl BinarySerializable for u8 {
|
||||
|
||||
const SIZE: Size = Size::Constant(1);
|
||||
|
||||
fn serialize(&self, writer: &mut Write) -> Result<usize> {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
// TODO error
|
||||
try!(writer.write_u8(self.clone()).map_err(convert_byte_order_error));
|
||||
Ok(1)
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> Result<u8> {
|
||||
fn deserialize(reader: &mut Read) -> io::Result<u8> {
|
||||
reader.read_u8()
|
||||
.map_err(convert_byte_order_error)
|
||||
}
|
||||
}
|
||||
|
||||
impl BinarySerializable for String {
|
||||
|
||||
const SIZE: Size = Size::Variable;
|
||||
|
||||
fn serialize(&self, writer: &mut Write) -> Result<usize> {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
// TODO error
|
||||
let data: &[u8] = self.as_bytes();
|
||||
let mut size = try!((data.len() as u32).serialize(writer));
|
||||
@@ -126,11 +98,11 @@ impl BinarySerializable for String {
|
||||
Ok(size)
|
||||
}
|
||||
|
||||
fn deserialize(reader: &mut Read) -> Result<String> {
|
||||
fn deserialize(reader: &mut Read) -> io::Result<String> {
|
||||
// TODO error
|
||||
let string_length = try!(u32::deserialize(reader)) as usize;
|
||||
let mut result = String::with_capacity(string_length);
|
||||
reader.take(string_length as u64).read_to_string(&mut result);
|
||||
try!(reader.take(string_length as u64).read_to_string(&mut result));
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
@@ -147,12 +119,12 @@ mod test {
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
{
|
||||
let x: u8 = 3;
|
||||
x.serialize(&mut buffer);
|
||||
x.serialize(&mut buffer).unwrap();
|
||||
assert_eq!(buffer.len(), 1);
|
||||
}
|
||||
{
|
||||
let x: u8 = 5;
|
||||
x.serialize(&mut buffer);
|
||||
x.serialize(&mut buffer).unwrap();
|
||||
assert_eq!(buffer.len(), 2);
|
||||
}
|
||||
let mut cursor = Cursor::new(&buffer[..]);
|
||||
@@ -167,12 +139,12 @@ mod test {
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
{
|
||||
let x: u32 = 3;
|
||||
x.serialize(&mut buffer);
|
||||
x.serialize(&mut buffer).unwrap();
|
||||
assert_eq!(buffer.len(), 4);
|
||||
}
|
||||
{
|
||||
let x: u32 = 5;
|
||||
x.serialize(&mut buffer);
|
||||
x.serialize(&mut buffer).unwrap();
|
||||
assert_eq!(buffer.len(), 8);
|
||||
}
|
||||
let mut cursor = Cursor::new(&buffer[..]);
|
||||
|
||||
@@ -4,7 +4,6 @@ use core::schema::DocId;
|
||||
use core::schema::Document;
|
||||
use core::schema::FieldValue;
|
||||
use core::serialize::BinarySerializable;
|
||||
use core::serialize::Size;
|
||||
use core::directory::ReadOnlySource;
|
||||
use std::io::Write;
|
||||
use std::io::Read;
|
||||
@@ -31,9 +30,6 @@ pub struct StoreWriter {
|
||||
struct OffsetIndex(DocId, u64);
|
||||
|
||||
impl BinarySerializable for OffsetIndex {
|
||||
|
||||
const SIZE: Size = Size::Constant(4 + 8);
|
||||
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
let OffsetIndex(a, b) = *self;
|
||||
Ok(try!(a.serialize(writer)) + try!(b.serialize(writer)))
|
||||
@@ -58,18 +54,19 @@ impl StoreWriter {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn store<'a>(&mut self, field_values: &Vec<&'a FieldValue>) {
|
||||
pub fn store<'a>(&mut self, field_values: &Vec<&'a FieldValue>) -> io::Result<()> {
|
||||
self.intermediary_buffer.clear();
|
||||
(field_values.len() as u32).serialize(&mut self.intermediary_buffer);
|
||||
try!((field_values.len() as u32).serialize(&mut self.intermediary_buffer));
|
||||
for field_value in field_values.iter() {
|
||||
(*field_value).serialize(&mut self.intermediary_buffer);
|
||||
try!((*field_value).serialize(&mut self.intermediary_buffer));
|
||||
}
|
||||
(self.intermediary_buffer.len() as u32).serialize(&mut self.current_block);
|
||||
self.current_block.write_all(&self.intermediary_buffer[..]);
|
||||
try!((self.intermediary_buffer.len() as u32).serialize(&mut self.current_block));
|
||||
try!(self.current_block.write_all(&self.intermediary_buffer[..]));
|
||||
self.doc += 1;
|
||||
if self.current_block.len() > BLOCK_SIZE {
|
||||
self.write_and_compress_block();
|
||||
try!(self.write_and_compress_block());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_and_compress_block(&mut self,) -> io::Result<()> {
|
||||
@@ -94,7 +91,7 @@ impl StoreWriter {
|
||||
|
||||
pub fn close(&mut self,) -> io::Result<()> {
|
||||
if self.current_block.len() > 0 {
|
||||
self.write_and_compress_block();
|
||||
try!(self.write_and_compress_block());
|
||||
}
|
||||
let header_offset: u64 = self.written;
|
||||
try!(self.offsets.serialize(&mut self.writer));
|
||||
@@ -113,10 +110,6 @@ pub struct StoreReader {
|
||||
|
||||
impl StoreReader {
|
||||
|
||||
pub fn num_docs(&self,) -> DocId {
|
||||
self.offsets.len() as DocId
|
||||
}
|
||||
|
||||
fn read_header(data: &ReadOnlySource) -> Vec<OffsetIndex> {
|
||||
// todo err
|
||||
let mut cursor = Cursor::new(data.as_slice());
|
||||
@@ -216,9 +209,9 @@ mod tests {
|
||||
fields.push(field_value);
|
||||
}
|
||||
let fields_refs: Vec<&FieldValue> = fields.iter().collect();
|
||||
store_writer.store(&fields_refs);
|
||||
store_writer.store(&fields_refs).unwrap();
|
||||
}
|
||||
store_writer.close();
|
||||
store_writer.close().unwrap();
|
||||
}
|
||||
schema
|
||||
}
|
||||
@@ -256,7 +249,7 @@ mod tests {
|
||||
let store_source = directory.open_read(&path).unwrap();
|
||||
let store = StoreReader::new(store_source);
|
||||
b.iter(|| {
|
||||
store.get(&12);
|
||||
store.get(&12).unwrap();
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
@@ -53,8 +53,8 @@ impl IndexWriter {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add(&mut self, doc: Document) {
|
||||
Rc::get_mut(&mut self.segment_writer).unwrap().add(doc, &self.schema);
|
||||
pub fn add(&mut self, doc: Document) -> io::Result<()> {
|
||||
Rc::get_mut(&mut self.segment_writer).unwrap().add(doc, &self.schema)
|
||||
}
|
||||
|
||||
// TODO remove that some day
|
||||
@@ -70,9 +70,9 @@ impl IndexWriter {
|
||||
match segment_writer_res {
|
||||
Ok(segment_writer) => {
|
||||
let segment = segment_writer.segment();
|
||||
segment_writer.finalize();
|
||||
try!(segment_writer.finalize());
|
||||
try!(self.directory.sync(segment.clone()));
|
||||
self.directory.publish_segment(segment.clone());
|
||||
try!(self.directory.publish_segment(segment.clone()));
|
||||
Ok(segment)
|
||||
},
|
||||
Err(_) => {
|
||||
@@ -107,15 +107,15 @@ impl SegmentWriter {
|
||||
for (term, postings_id) in self.term_index.iter() {
|
||||
let doc_ids = &self.postings[postings_id.clone()].doc_ids;
|
||||
let term_docfreq = doc_ids.len() as u32;
|
||||
self.segment_serializer.new_term(&term, term_docfreq);
|
||||
self.segment_serializer.write_docs(&doc_ids);
|
||||
try!(self.segment_serializer.new_term(&term, term_docfreq));
|
||||
try!(self.segment_serializer.write_docs(&doc_ids));
|
||||
}
|
||||
}
|
||||
{
|
||||
let segment_info = SegmentInfo {
|
||||
max_doc: self.max_doc
|
||||
};
|
||||
self.segment_serializer.write_segment_info(&segment_info);
|
||||
try!(self.segment_serializer.write_segment_info(&segment_info));
|
||||
}
|
||||
self.segment_serializer.close()
|
||||
}
|
||||
@@ -141,7 +141,7 @@ impl SegmentWriter {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add(&mut self, doc: Document, schema: &Schema) {
|
||||
pub fn add(&mut self, doc: Document, schema: &Schema) -> io::Result<()> {
|
||||
let doc_id = self.max_doc;
|
||||
for field_value in doc.fields() {
|
||||
let field_options = schema.field_options(&field_value.field);
|
||||
@@ -162,8 +162,9 @@ impl SegmentWriter {
|
||||
let mut stored_fieldvalues_it = doc.fields().filter(|field_value| {
|
||||
schema.field_options(&field_value.field).is_stored()
|
||||
});
|
||||
self.segment_serializer.store_doc(&mut stored_fieldvalues_it);
|
||||
try!(self.segment_serializer.store_doc(&mut stored_fieldvalues_it));
|
||||
self.max_doc += 1;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_postings_writer(&mut self, term: Term) -> &mut PostingsWriter {
|
||||
@@ -189,8 +190,8 @@ impl SerializableSegment for SegmentWriter {
|
||||
for (term, postings_id) in self.term_index.iter() {
|
||||
let doc_ids = &self.postings[postings_id.clone()].doc_ids;
|
||||
let term_docfreq = doc_ids.len() as u32;
|
||||
serializer.new_term(&term, term_docfreq);
|
||||
serializer.write_docs(&doc_ids);
|
||||
try!(serializer.new_term(&term, term_docfreq));
|
||||
try!(serializer.write_docs(&doc_ids));
|
||||
}
|
||||
serializer.close()
|
||||
}
|
||||
|
||||
22
src/lib.rs
22
src/lib.rs
@@ -1,6 +1,9 @@
|
||||
#![feature(test,associated_consts)]
|
||||
//#![feature(test,associated_consts)]
|
||||
#![cfg_attr(test, feature(test))]
|
||||
|
||||
#[allow(unused_imports)]
|
||||
|
||||
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
|
||||
@@ -43,7 +46,7 @@ pub use core::reader::SegmentReader;
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use core::serial::DebugSegmentSerializer;
|
||||
use core::serial::tests::DebugSegmentSerializer;
|
||||
use collector::Collector;
|
||||
|
||||
// only make sense for a single segment
|
||||
@@ -87,17 +90,17 @@ mod tests {
|
||||
{
|
||||
let mut doc = Document::new();
|
||||
doc.set(&text_field, "af b");
|
||||
index_writer.add(doc);
|
||||
index_writer.add(doc).unwrap();
|
||||
}
|
||||
{
|
||||
let mut doc = Document::new();
|
||||
doc.set(&text_field, "a b c");
|
||||
index_writer.add(doc);
|
||||
index_writer.add(doc).unwrap();
|
||||
}
|
||||
{
|
||||
let mut doc = Document::new();
|
||||
doc.set(&text_field, "a b c d");
|
||||
index_writer.add(doc);
|
||||
index_writer.add(doc).unwrap();
|
||||
}
|
||||
|
||||
let segment_str_before_writing = DebugSegmentSerializer::debug_string(index_writer.current_segment_writer());
|
||||
@@ -127,25 +130,24 @@ mod tests {
|
||||
{
|
||||
let mut doc = Document::new();
|
||||
doc.set(&text_field, "af b");
|
||||
index_writer.add(doc);
|
||||
index_writer.add(doc).unwrap();
|
||||
}
|
||||
{
|
||||
let mut doc = Document::new();
|
||||
doc.set(&text_field, "a b c");
|
||||
index_writer.add(doc);
|
||||
index_writer.add(doc).unwrap();
|
||||
}
|
||||
{
|
||||
let mut doc = Document::new();
|
||||
doc.set(&text_field, "a b c d");
|
||||
index_writer.add(doc);
|
||||
index_writer.add(doc).unwrap();
|
||||
}
|
||||
let commit_result = index_writer.commit();
|
||||
commit_result.unwrap();
|
||||
}
|
||||
println!("index {:?}", index.schema());
|
||||
{
|
||||
|
||||
let searcher = index.searcher();
|
||||
let searcher = index.searcher().unwrap();
|
||||
let get_doc_ids = |terms: Vec<Term>| {
|
||||
let mut collector = TestCollector::new();
|
||||
searcher.search(&terms, &mut collector);
|
||||
|
||||
Reference in New Issue
Block a user