diff --git a/src/collector/chained_collector.rs b/src/collector/chained_collector.rs index 5840eb775..524ffec58 100644 --- a/src/collector/chained_collector.rs +++ b/src/collector/chained_collector.rs @@ -1,7 +1,7 @@ +use Result; use collector::Collector; use SegmentLocalId; use SegmentReader; -use std::io; use DocId; use Score; @@ -12,7 +12,7 @@ use Score; pub struct DoNothingCollector; impl Collector for DoNothingCollector { #[inline] - fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> io::Result<()> { + fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> Result<()> { Ok(()) } #[inline] @@ -38,7 +38,7 @@ impl ChainedCollector { } impl Collector for ChainedCollector { - fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()> { + fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> Result<()> { try!(self.left.set_segment(segment_local_id, segment)); try!(self.right.set_segment(segment_local_id, segment)); Ok(()) diff --git a/src/collector/count_collector.rs b/src/collector/count_collector.rs index 8a9014a25..ff15abd73 100644 --- a/src/collector/count_collector.rs +++ b/src/collector/count_collector.rs @@ -1,7 +1,7 @@ -use std::io; use super::Collector; use DocId; use Score; +use Result; use SegmentReader; use SegmentLocalId; @@ -28,7 +28,7 @@ impl Default for CountCollector { impl Collector for CountCollector { - fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> io::Result<()> { + fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> Result<()> { Ok(()) } diff --git a/src/collector/mod.rs b/src/collector/mod.rs index 584b16714..ff856ad08 100644 --- a/src/collector/mod.rs +++ b/src/collector/mod.rs @@ -2,7 +2,7 @@ use SegmentReader; use SegmentLocalId; use DocId; use Score; -use std::io; +use Result; mod count_collector; pub use self::count_collector::CountCollector; @@ -48,14 +48,14 @@ pub use self::chained_collector::chain; pub trait Collector { /// `set_segment` is called before beginning to enumerate /// on this segment. - fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()>; + fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> Result<()>; /// The query pushes the scored document to the collector via this method. fn collect(&mut self, doc: DocId, score: Score); } impl<'a, C: Collector> Collector for &'a mut C { - fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()> { + fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> Result<()> { (*self).set_segment(segment_local_id, segment) } /// The query pushes the scored document to the collector via this method. @@ -73,7 +73,6 @@ pub mod tests { use DocId; use Score; use core::SegmentReader; - use std::io; use SegmentLocalId; use fastfield::U32FastFieldReader; use schema::Field; @@ -107,7 +106,7 @@ pub mod tests { impl Collector for TestCollector { - fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> io::Result<()> { + fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> { self.offset += self.segment_max_doc; self.segment_max_doc = reader.max_doc(); Ok(()) @@ -146,7 +145,7 @@ pub mod tests { } impl Collector for FastFieldTestCollector { - fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> io::Result<()> { + fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> { self.ff_reader = Some(try!(reader.get_fast_field_reader(self.field))); Ok(()) } diff --git a/src/collector/multi_collector.rs b/src/collector/multi_collector.rs index 6ce999e80..e5eddc7f4 100644 --- a/src/collector/multi_collector.rs +++ b/src/collector/multi_collector.rs @@ -1,7 +1,7 @@ -use std::io; use super::Collector; use DocId; use Score; +use Result; use SegmentReader; use SegmentLocalId; @@ -25,7 +25,7 @@ impl<'a> MultiCollector<'a> { impl<'a> Collector for MultiCollector<'a> { - fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()> { + fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> Result<()> { for collector in &mut self.collectors { try!(collector.set_segment(segment_local_id, segment)); } diff --git a/src/collector/top_collector.rs b/src/collector/top_collector.rs index 21c023caf..6425eb300 100644 --- a/src/collector/top_collector.rs +++ b/src/collector/top_collector.rs @@ -1,8 +1,8 @@ -use std::io; use super::Collector; use SegmentReader; use SegmentLocalId; use DocAddress; +use Result; use std::collections::BinaryHeap; use std::cmp::Ordering; use DocId; @@ -105,7 +105,7 @@ impl TopCollector { impl Collector for TopCollector { - fn set_segment(&mut self, segment_id: SegmentLocalId, _: &SegmentReader) -> io::Result<()> { + fn set_segment(&mut self, segment_id: SegmentLocalId, _: &SegmentReader) -> Result<()> { self.segment_id = segment_id; Ok(()) } diff --git a/src/common/serialize.rs b/src/common/serialize.rs index b1ffab6cd..6bd1426fe 100644 --- a/src/common/serialize.rs +++ b/src/common/serialize.rs @@ -74,7 +74,6 @@ impl BinarySerializable for u64 { impl BinarySerializable for u8 { fn serialize(&self, writer: &mut Write) -> io::Result { - // TODO error try!(writer.write_u8(*self)); Ok(1) } diff --git a/src/core/index.rs b/src/core/index.rs index b4e04d0c8..06a6bc744 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -26,7 +26,6 @@ use directory::error::FileError; const NUM_SEARCHERS: usize = 12; - fn load_metas(directory: &Directory) -> Result { let meta_data = directory.atomic_read(&META_FILEPATH)?; let meta_string = String::from_utf8_lossy(&meta_data); @@ -117,7 +116,6 @@ impl Index { fn create_from_metas(directory: Box, metas: IndexMeta) -> Result { let schema = metas.schema.clone(); let opstamp = metas.opstamp; - // TODO log somethings is uncommitted is not empty. let index = Index { directory: directory, schema: schema, @@ -137,7 +135,7 @@ impl Index { /// Opens a new directory from an index path. pub fn open(directory_path: &Path) -> Result { let directory = try!(MmapDirectory::open(directory_path)); - let metas = try!(load_metas(&directory)); //< TODO does the directory already exists? + let metas = try!(load_metas(&directory)); Index::create_from_metas(directory.box_clone(), metas) } diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index 2795d87e3..5e8676882 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -74,16 +74,19 @@ impl SegmentReader { } /// Accessor to a segment's fast field reader given a field. - pub fn get_fast_field_reader(&self, field: Field) -> io::Result { + pub fn get_fast_field_reader(&self, field: Field) -> Result { let field_entry = self.schema.get_field_entry(field); - match *field_entry.field_type() { - FieldType::Str(_) => { - Err(io::Error::new(io::ErrorKind::Other, "fast field are not yet supported for text fields.")) + match field_entry.field_type() { + &FieldType::Str(_) => { + Err(Error::InvalidArgument(format!("Field <{}> is not a fast field. It is a text field, and fast text fields are not supported yet.", field_entry.name()))) }, - FieldType::U32(_) => { - // TODO check that the schema allows that - //Err(io::Error::new(io::ErrorKind::Other, "fast field are not yet supported for text fields.")) - self.fast_fields_reader.get_field(field) + &FieldType::U32(ref u32_options) => { + if u32_options.is_fast() { + Ok(self.fast_fields_reader.get_field(field)?) + } + else { + Err(Error::InvalidArgument(format!("Field <{}> is not defined as a fast field.", field_entry.name()))) + } }, } } diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 071d362d3..879b278e0 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -18,7 +18,7 @@ use store::StoreWriter; use core::SegmentInfo; use std::cmp::{min, max}; use std::iter; -use std::io; + pub struct IndexMerger { schema: Schema, @@ -70,11 +70,11 @@ fn compute_min_max_val(u32_reader: &U32FastFieldReader, max_doc: DocId, delete_b } } -fn extract_fieldnorm_reader(segment_reader: &SegmentReader, field: Field) -> io::Result { - segment_reader.get_fieldnorms_reader(field) +fn extract_fieldnorm_reader(segment_reader: &SegmentReader, field: Field) -> Result { + Ok(segment_reader.get_fieldnorms_reader(field)?) } -fn extract_fast_field_reader(segment_reader: &SegmentReader, field: Field) -> io::Result { +fn extract_fast_field_reader(segment_reader: &SegmentReader, field: Field) -> Result { segment_reader.get_fast_field_reader(field) } @@ -121,7 +121,7 @@ impl IndexMerger { // used both to merge field norms and regular u32 fast fields. fn generic_write_fast_field(&self, fields: Vec, - field_reader_extractor: &Fn(&SegmentReader, Field) -> io::Result, + field_reader_extractor: &Fn(&SegmentReader, Field) -> Result, fast_field_serializer: &mut FastFieldSerializer) -> Result<()> { for field in fields { @@ -509,14 +509,6 @@ mod tests { index_writer.merge(&segment_ids) .wait() .expect("Merging failed"); - index_writer.wait_merging_threads().unwrap(); - - let segment_ids = index.searchable_segment_ids().expect("Searchable segments failed."); - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); - index_writer.merge(&segment_ids) - .wait() - .expect("Merging failed"); - index_writer.wait_merging_threads().unwrap(); index.load_searchers().unwrap(); let ref searcher = *index.searcher(); assert_eq!(searcher.segment_readers().len(), 1); @@ -534,6 +526,65 @@ mod tests { assert_eq!(score_field_reader.min_val(), 3); assert_eq!(score_field_reader.max_val(), 7000); } + { + // test a commit with only deletes + index_writer.delete_term(Term::from_field_text(text_field, "c")); + index_writer.commit().unwrap(); + + index.load_searchers().unwrap(); + let ref searcher = *index.searcher(); + assert_eq!(searcher.segment_readers().len(), 1); + assert_eq!(searcher.num_docs(), 2); + assert_eq!(searcher.segment_readers()[0].num_docs(), 2); + assert_eq!(searcher.segment_readers()[0].max_doc(), 3); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), vec!()); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), vec!()); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), vec!()); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), vec!()); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), vec!()); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), vec!(6_000)); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), vec!(6_000, 7_000)); + let score_field_reader = searcher.segment_reader(0).get_fast_field_reader(score_field).unwrap(); + assert_eq!(score_field_reader.min_val(), 3); + assert_eq!(score_field_reader.max_val(), 7000); + } + { // Test merging a single segment in order to remove deletes. + let segment_ids = index.searchable_segment_ids().expect("Searchable segments failed."); + index_writer.merge(&segment_ids) + .wait() + .expect("Merging failed"); + index.load_searchers().unwrap(); + + let ref searcher = *index.searcher(); + assert_eq!(searcher.segment_readers().len(), 1); + assert_eq!(searcher.num_docs(), 2); + assert_eq!(searcher.segment_readers()[0].num_docs(), 2); + assert_eq!(searcher.segment_readers()[0].max_doc(), 2); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), vec!()); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), vec!()); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), vec!()); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), vec!()); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), vec!()); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), vec!(6_000)); + assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), vec!(6_000, 7_000)); + let score_field_reader = searcher.segment_reader(0).get_fast_field_reader(score_field).unwrap(); + assert_eq!(score_field_reader.min_val(), 6000); + assert_eq!(score_field_reader.max_val(), 7000); + } + + { // Test removing all docs + index_writer.delete_term(Term::from_field_text(text_field, "g")); + let segment_ids = index.searchable_segment_ids().expect("Searchable segments failed."); + index_writer.merge(&segment_ids) + .wait() + .expect("Merging failed"); + index.load_searchers().unwrap(); + + let ref searcher = *index.searcher(); + assert_eq!(searcher.segment_readers().len(), 1); + assert_eq!(searcher.num_docs(), 0); + } + } } diff --git a/src/lib.rs b/src/lib.rs index 45d9ca4fc..2333987e4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -117,14 +117,16 @@ pub use postings::SegmentPostingsOption; pub use core::TermIterator; -#[cfg(feature="simdcompression")] -pub fn version() -> &'static str { - concat!(version!(), "-simd") -} -#[cfg(not(feature="simdcompression"))] +/// Expose the current version of tantivy, as well +/// whether it was compiled with the simd compression. pub fn version() -> &'static str { - concat!(version!(), "-nosimd") + if cfg!(feature="simdcompression") { + concat!(version!(), "-simd") + } + else { + concat!(version!(), "-nosimd") + } } /// Tantivy's makes it possible to personalize when