NOBUG TODO hunt, and cleanups

This commit is contained in:
Paul Masurel
2017-02-22 21:59:53 +09:00
parent 4a8eb3cb05
commit df9090cb0b
10 changed files with 98 additions and 46 deletions

View File

@@ -1,7 +1,7 @@
use Result;
use collector::Collector;
use SegmentLocalId;
use SegmentReader;
use std::io;
use DocId;
use Score;
@@ -12,7 +12,7 @@ use Score;
pub struct DoNothingCollector;
impl Collector for DoNothingCollector {
#[inline]
fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> io::Result<()> {
fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> Result<()> {
Ok(())
}
#[inline]
@@ -38,7 +38,7 @@ impl<Left: Collector, Right: Collector> ChainedCollector<Left, Right> {
}
impl<Left: Collector, Right: Collector> Collector for ChainedCollector<Left, Right> {
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()> {
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> Result<()> {
try!(self.left.set_segment(segment_local_id, segment));
try!(self.right.set_segment(segment_local_id, segment));
Ok(())

View File

@@ -1,7 +1,7 @@
use std::io;
use super::Collector;
use DocId;
use Score;
use Result;
use SegmentReader;
use SegmentLocalId;
@@ -28,7 +28,7 @@ impl Default for CountCollector {
impl Collector for CountCollector {
fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> io::Result<()> {
fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> Result<()> {
Ok(())
}

View File

@@ -2,7 +2,7 @@ use SegmentReader;
use SegmentLocalId;
use DocId;
use Score;
use std::io;
use Result;
mod count_collector;
pub use self::count_collector::CountCollector;
@@ -48,14 +48,14 @@ pub use self::chained_collector::chain;
pub trait Collector {
/// `set_segment` is called before beginning to enumerate
/// on this segment.
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()>;
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> Result<()>;
/// The query pushes the scored document to the collector via this method.
fn collect(&mut self, doc: DocId, score: Score);
}
impl<'a, C: Collector> Collector for &'a mut C {
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()> {
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> Result<()> {
(*self).set_segment(segment_local_id, segment)
}
/// The query pushes the scored document to the collector via this method.
@@ -73,7 +73,6 @@ pub mod tests {
use DocId;
use Score;
use core::SegmentReader;
use std::io;
use SegmentLocalId;
use fastfield::U32FastFieldReader;
use schema::Field;
@@ -107,7 +106,7 @@ pub mod tests {
impl Collector for TestCollector {
fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> io::Result<()> {
fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
self.offset += self.segment_max_doc;
self.segment_max_doc = reader.max_doc();
Ok(())
@@ -146,7 +145,7 @@ pub mod tests {
}
impl Collector for FastFieldTestCollector {
fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> io::Result<()> {
fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
self.ff_reader = Some(try!(reader.get_fast_field_reader(self.field)));
Ok(())
}

View File

@@ -1,7 +1,7 @@
use std::io;
use super::Collector;
use DocId;
use Score;
use Result;
use SegmentReader;
use SegmentLocalId;
@@ -25,7 +25,7 @@ impl<'a> MultiCollector<'a> {
impl<'a> Collector for MultiCollector<'a> {
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()> {
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> Result<()> {
for collector in &mut self.collectors {
try!(collector.set_segment(segment_local_id, segment));
}

View File

@@ -1,8 +1,8 @@
use std::io;
use super::Collector;
use SegmentReader;
use SegmentLocalId;
use DocAddress;
use Result;
use std::collections::BinaryHeap;
use std::cmp::Ordering;
use DocId;
@@ -105,7 +105,7 @@ impl TopCollector {
impl Collector for TopCollector {
fn set_segment(&mut self, segment_id: SegmentLocalId, _: &SegmentReader) -> io::Result<()> {
fn set_segment(&mut self, segment_id: SegmentLocalId, _: &SegmentReader) -> Result<()> {
self.segment_id = segment_id;
Ok(())
}

View File

@@ -74,7 +74,6 @@ impl BinarySerializable for u64 {
impl BinarySerializable for u8 {
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
// TODO error
try!(writer.write_u8(*self));
Ok(1)
}

View File

@@ -26,7 +26,6 @@ use directory::error::FileError;
const NUM_SEARCHERS: usize = 12;
fn load_metas(directory: &Directory) -> Result<IndexMeta> {
let meta_data = directory.atomic_read(&META_FILEPATH)?;
let meta_string = String::from_utf8_lossy(&meta_data);
@@ -117,7 +116,6 @@ impl Index {
fn create_from_metas(directory: Box<Directory>, metas: IndexMeta) -> Result<Index> {
let schema = metas.schema.clone();
let opstamp = metas.opstamp;
// TODO log somethings is uncommitted is not empty.
let index = Index {
directory: directory,
schema: schema,
@@ -137,7 +135,7 @@ impl Index {
/// Opens a new directory from an index path.
pub fn open(directory_path: &Path) -> Result<Index> {
let directory = try!(MmapDirectory::open(directory_path));
let metas = try!(load_metas(&directory)); //< TODO does the directory already exists?
let metas = try!(load_metas(&directory));
Index::create_from_metas(directory.box_clone(), metas)
}

View File

@@ -74,16 +74,19 @@ impl SegmentReader {
}
/// Accessor to a segment's fast field reader given a field.
pub fn get_fast_field_reader(&self, field: Field) -> io::Result<U32FastFieldReader> {
pub fn get_fast_field_reader(&self, field: Field) -> Result<U32FastFieldReader> {
let field_entry = self.schema.get_field_entry(field);
match *field_entry.field_type() {
FieldType::Str(_) => {
Err(io::Error::new(io::ErrorKind::Other, "fast field are not yet supported for text fields."))
match field_entry.field_type() {
&FieldType::Str(_) => {
Err(Error::InvalidArgument(format!("Field <{}> is not a fast field. It is a text field, and fast text fields are not supported yet.", field_entry.name())))
},
FieldType::U32(_) => {
// TODO check that the schema allows that
//Err(io::Error::new(io::ErrorKind::Other, "fast field are not yet supported for text fields."))
self.fast_fields_reader.get_field(field)
&FieldType::U32(ref u32_options) => {
if u32_options.is_fast() {
Ok(self.fast_fields_reader.get_field(field)?)
}
else {
Err(Error::InvalidArgument(format!("Field <{}> is not defined as a fast field.", field_entry.name())))
}
},
}
}

View File

@@ -18,7 +18,7 @@ use store::StoreWriter;
use core::SegmentInfo;
use std::cmp::{min, max};
use std::iter;
use std::io;
pub struct IndexMerger {
schema: Schema,
@@ -70,11 +70,11 @@ fn compute_min_max_val(u32_reader: &U32FastFieldReader, max_doc: DocId, delete_b
}
}
fn extract_fieldnorm_reader(segment_reader: &SegmentReader, field: Field) -> io::Result<U32FastFieldReader> {
segment_reader.get_fieldnorms_reader(field)
fn extract_fieldnorm_reader(segment_reader: &SegmentReader, field: Field) -> Result<U32FastFieldReader> {
Ok(segment_reader.get_fieldnorms_reader(field)?)
}
fn extract_fast_field_reader(segment_reader: &SegmentReader, field: Field) -> io::Result<U32FastFieldReader> {
fn extract_fast_field_reader(segment_reader: &SegmentReader, field: Field) -> Result<U32FastFieldReader> {
segment_reader.get_fast_field_reader(field)
}
@@ -121,7 +121,7 @@ impl IndexMerger {
// used both to merge field norms and regular u32 fast fields.
fn generic_write_fast_field(&self,
fields: Vec<Field>,
field_reader_extractor: &Fn(&SegmentReader, Field) -> io::Result<U32FastFieldReader>,
field_reader_extractor: &Fn(&SegmentReader, Field) -> Result<U32FastFieldReader>,
fast_field_serializer: &mut FastFieldSerializer) -> Result<()> {
for field in fields {
@@ -509,14 +509,6 @@ mod tests {
index_writer.merge(&segment_ids)
.wait()
.expect("Merging failed");
index_writer.wait_merging_threads().unwrap();
let segment_ids = index.searchable_segment_ids().expect("Searchable segments failed.");
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
index_writer.merge(&segment_ids)
.wait()
.expect("Merging failed");
index_writer.wait_merging_threads().unwrap();
index.load_searchers().unwrap();
let ref searcher = *index.searcher();
assert_eq!(searcher.segment_readers().len(), 1);
@@ -534,6 +526,65 @@ mod tests {
assert_eq!(score_field_reader.min_val(), 3);
assert_eq!(score_field_reader.max_val(), 7000);
}
{
// test a commit with only deletes
index_writer.delete_term(Term::from_field_text(text_field, "c"));
index_writer.commit().unwrap();
index.load_searchers().unwrap();
let ref searcher = *index.searcher();
assert_eq!(searcher.segment_readers().len(), 1);
assert_eq!(searcher.num_docs(), 2);
assert_eq!(searcher.segment_readers()[0].num_docs(), 2);
assert_eq!(searcher.segment_readers()[0].max_doc(), 3);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), vec!(6_000));
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), vec!(6_000, 7_000));
let score_field_reader = searcher.segment_reader(0).get_fast_field_reader(score_field).unwrap();
assert_eq!(score_field_reader.min_val(), 3);
assert_eq!(score_field_reader.max_val(), 7000);
}
{ // Test merging a single segment in order to remove deletes.
let segment_ids = index.searchable_segment_ids().expect("Searchable segments failed.");
index_writer.merge(&segment_ids)
.wait()
.expect("Merging failed");
index.load_searchers().unwrap();
let ref searcher = *index.searcher();
assert_eq!(searcher.segment_readers().len(), 1);
assert_eq!(searcher.num_docs(), 2);
assert_eq!(searcher.segment_readers()[0].num_docs(), 2);
assert_eq!(searcher.segment_readers()[0].max_doc(), 2);
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), vec!());
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), vec!(6_000));
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), vec!(6_000, 7_000));
let score_field_reader = searcher.segment_reader(0).get_fast_field_reader(score_field).unwrap();
assert_eq!(score_field_reader.min_val(), 6000);
assert_eq!(score_field_reader.max_val(), 7000);
}
{ // Test removing all docs
index_writer.delete_term(Term::from_field_text(text_field, "g"));
let segment_ids = index.searchable_segment_ids().expect("Searchable segments failed.");
index_writer.merge(&segment_ids)
.wait()
.expect("Merging failed");
index.load_searchers().unwrap();
let ref searcher = *index.searcher();
assert_eq!(searcher.segment_readers().len(), 1);
assert_eq!(searcher.num_docs(), 0);
}
}
}

View File

@@ -117,14 +117,16 @@ pub use postings::SegmentPostingsOption;
pub use core::TermIterator;
#[cfg(feature="simdcompression")]
pub fn version() -> &'static str {
concat!(version!(), "-simd")
}
#[cfg(not(feature="simdcompression"))]
/// Expose the current version of tantivy, as well
/// whether it was compiled with the simd compression.
pub fn version() -> &'static str {
concat!(version!(), "-nosimd")
if cfg!(feature="simdcompression") {
concat!(version!(), "-simd")
}
else {
concat!(version!(), "-nosimd")
}
}
/// Tantivy's makes it possible to personalize when