mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-27 21:50:41 +00:00
NOBUG TODO hunt, and cleanups
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
use Result;
|
||||
use collector::Collector;
|
||||
use SegmentLocalId;
|
||||
use SegmentReader;
|
||||
use std::io;
|
||||
use DocId;
|
||||
use Score;
|
||||
|
||||
@@ -12,7 +12,7 @@ use Score;
|
||||
pub struct DoNothingCollector;
|
||||
impl Collector for DoNothingCollector {
|
||||
#[inline]
|
||||
fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> io::Result<()> {
|
||||
fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
#[inline]
|
||||
@@ -38,7 +38,7 @@ impl<Left: Collector, Right: Collector> ChainedCollector<Left, Right> {
|
||||
}
|
||||
|
||||
impl<Left: Collector, Right: Collector> Collector for ChainedCollector<Left, Right> {
|
||||
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()> {
|
||||
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> Result<()> {
|
||||
try!(self.left.set_segment(segment_local_id, segment));
|
||||
try!(self.right.set_segment(segment_local_id, segment));
|
||||
Ok(())
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::io;
|
||||
use super::Collector;
|
||||
use DocId;
|
||||
use Score;
|
||||
use Result;
|
||||
use SegmentReader;
|
||||
use SegmentLocalId;
|
||||
|
||||
@@ -28,7 +28,7 @@ impl Default for CountCollector {
|
||||
|
||||
impl Collector for CountCollector {
|
||||
|
||||
fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> io::Result<()> {
|
||||
fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ use SegmentReader;
|
||||
use SegmentLocalId;
|
||||
use DocId;
|
||||
use Score;
|
||||
use std::io;
|
||||
use Result;
|
||||
|
||||
mod count_collector;
|
||||
pub use self::count_collector::CountCollector;
|
||||
@@ -48,14 +48,14 @@ pub use self::chained_collector::chain;
|
||||
pub trait Collector {
|
||||
/// `set_segment` is called before beginning to enumerate
|
||||
/// on this segment.
|
||||
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()>;
|
||||
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> Result<()>;
|
||||
/// The query pushes the scored document to the collector via this method.
|
||||
fn collect(&mut self, doc: DocId, score: Score);
|
||||
}
|
||||
|
||||
|
||||
impl<'a, C: Collector> Collector for &'a mut C {
|
||||
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()> {
|
||||
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> Result<()> {
|
||||
(*self).set_segment(segment_local_id, segment)
|
||||
}
|
||||
/// The query pushes the scored document to the collector via this method.
|
||||
@@ -73,7 +73,6 @@ pub mod tests {
|
||||
use DocId;
|
||||
use Score;
|
||||
use core::SegmentReader;
|
||||
use std::io;
|
||||
use SegmentLocalId;
|
||||
use fastfield::U32FastFieldReader;
|
||||
use schema::Field;
|
||||
@@ -107,7 +106,7 @@ pub mod tests {
|
||||
|
||||
impl Collector for TestCollector {
|
||||
|
||||
fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> io::Result<()> {
|
||||
fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
|
||||
self.offset += self.segment_max_doc;
|
||||
self.segment_max_doc = reader.max_doc();
|
||||
Ok(())
|
||||
@@ -146,7 +145,7 @@ pub mod tests {
|
||||
}
|
||||
|
||||
impl Collector for FastFieldTestCollector {
|
||||
fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> io::Result<()> {
|
||||
fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
|
||||
self.ff_reader = Some(try!(reader.get_fast_field_reader(self.field)));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::io;
|
||||
use super::Collector;
|
||||
use DocId;
|
||||
use Score;
|
||||
use Result;
|
||||
use SegmentReader;
|
||||
use SegmentLocalId;
|
||||
|
||||
@@ -25,7 +25,7 @@ impl<'a> MultiCollector<'a> {
|
||||
|
||||
|
||||
impl<'a> Collector for MultiCollector<'a> {
|
||||
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()> {
|
||||
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> Result<()> {
|
||||
for collector in &mut self.collectors {
|
||||
try!(collector.set_segment(segment_local_id, segment));
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use std::io;
|
||||
use super::Collector;
|
||||
use SegmentReader;
|
||||
use SegmentLocalId;
|
||||
use DocAddress;
|
||||
use Result;
|
||||
use std::collections::BinaryHeap;
|
||||
use std::cmp::Ordering;
|
||||
use DocId;
|
||||
@@ -105,7 +105,7 @@ impl TopCollector {
|
||||
|
||||
impl Collector for TopCollector {
|
||||
|
||||
fn set_segment(&mut self, segment_id: SegmentLocalId, _: &SegmentReader) -> io::Result<()> {
|
||||
fn set_segment(&mut self, segment_id: SegmentLocalId, _: &SegmentReader) -> Result<()> {
|
||||
self.segment_id = segment_id;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -74,7 +74,6 @@ impl BinarySerializable for u64 {
|
||||
|
||||
impl BinarySerializable for u8 {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
// TODO error
|
||||
try!(writer.write_u8(*self));
|
||||
Ok(1)
|
||||
}
|
||||
|
||||
@@ -26,7 +26,6 @@ use directory::error::FileError;
|
||||
const NUM_SEARCHERS: usize = 12;
|
||||
|
||||
|
||||
|
||||
fn load_metas(directory: &Directory) -> Result<IndexMeta> {
|
||||
let meta_data = directory.atomic_read(&META_FILEPATH)?;
|
||||
let meta_string = String::from_utf8_lossy(&meta_data);
|
||||
@@ -117,7 +116,6 @@ impl Index {
|
||||
fn create_from_metas(directory: Box<Directory>, metas: IndexMeta) -> Result<Index> {
|
||||
let schema = metas.schema.clone();
|
||||
let opstamp = metas.opstamp;
|
||||
// TODO log somethings is uncommitted is not empty.
|
||||
let index = Index {
|
||||
directory: directory,
|
||||
schema: schema,
|
||||
@@ -137,7 +135,7 @@ impl Index {
|
||||
/// Opens a new directory from an index path.
|
||||
pub fn open(directory_path: &Path) -> Result<Index> {
|
||||
let directory = try!(MmapDirectory::open(directory_path));
|
||||
let metas = try!(load_metas(&directory)); //< TODO does the directory already exists?
|
||||
let metas = try!(load_metas(&directory));
|
||||
Index::create_from_metas(directory.box_clone(), metas)
|
||||
}
|
||||
|
||||
|
||||
@@ -74,16 +74,19 @@ impl SegmentReader {
|
||||
}
|
||||
|
||||
/// Accessor to a segment's fast field reader given a field.
|
||||
pub fn get_fast_field_reader(&self, field: Field) -> io::Result<U32FastFieldReader> {
|
||||
pub fn get_fast_field_reader(&self, field: Field) -> Result<U32FastFieldReader> {
|
||||
let field_entry = self.schema.get_field_entry(field);
|
||||
match *field_entry.field_type() {
|
||||
FieldType::Str(_) => {
|
||||
Err(io::Error::new(io::ErrorKind::Other, "fast field are not yet supported for text fields."))
|
||||
match field_entry.field_type() {
|
||||
&FieldType::Str(_) => {
|
||||
Err(Error::InvalidArgument(format!("Field <{}> is not a fast field. It is a text field, and fast text fields are not supported yet.", field_entry.name())))
|
||||
},
|
||||
FieldType::U32(_) => {
|
||||
// TODO check that the schema allows that
|
||||
//Err(io::Error::new(io::ErrorKind::Other, "fast field are not yet supported for text fields."))
|
||||
self.fast_fields_reader.get_field(field)
|
||||
&FieldType::U32(ref u32_options) => {
|
||||
if u32_options.is_fast() {
|
||||
Ok(self.fast_fields_reader.get_field(field)?)
|
||||
}
|
||||
else {
|
||||
Err(Error::InvalidArgument(format!("Field <{}> is not defined as a fast field.", field_entry.name())))
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ use store::StoreWriter;
|
||||
use core::SegmentInfo;
|
||||
use std::cmp::{min, max};
|
||||
use std::iter;
|
||||
use std::io;
|
||||
|
||||
|
||||
pub struct IndexMerger {
|
||||
schema: Schema,
|
||||
@@ -70,11 +70,11 @@ fn compute_min_max_val(u32_reader: &U32FastFieldReader, max_doc: DocId, delete_b
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_fieldnorm_reader(segment_reader: &SegmentReader, field: Field) -> io::Result<U32FastFieldReader> {
|
||||
segment_reader.get_fieldnorms_reader(field)
|
||||
fn extract_fieldnorm_reader(segment_reader: &SegmentReader, field: Field) -> Result<U32FastFieldReader> {
|
||||
Ok(segment_reader.get_fieldnorms_reader(field)?)
|
||||
}
|
||||
|
||||
fn extract_fast_field_reader(segment_reader: &SegmentReader, field: Field) -> io::Result<U32FastFieldReader> {
|
||||
fn extract_fast_field_reader(segment_reader: &SegmentReader, field: Field) -> Result<U32FastFieldReader> {
|
||||
segment_reader.get_fast_field_reader(field)
|
||||
}
|
||||
|
||||
@@ -121,7 +121,7 @@ impl IndexMerger {
|
||||
// used both to merge field norms and regular u32 fast fields.
|
||||
fn generic_write_fast_field(&self,
|
||||
fields: Vec<Field>,
|
||||
field_reader_extractor: &Fn(&SegmentReader, Field) -> io::Result<U32FastFieldReader>,
|
||||
field_reader_extractor: &Fn(&SegmentReader, Field) -> Result<U32FastFieldReader>,
|
||||
fast_field_serializer: &mut FastFieldSerializer) -> Result<()> {
|
||||
|
||||
for field in fields {
|
||||
@@ -509,14 +509,6 @@ mod tests {
|
||||
index_writer.merge(&segment_ids)
|
||||
.wait()
|
||||
.expect("Merging failed");
|
||||
index_writer.wait_merging_threads().unwrap();
|
||||
|
||||
let segment_ids = index.searchable_segment_ids().expect("Searchable segments failed.");
|
||||
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
|
||||
index_writer.merge(&segment_ids)
|
||||
.wait()
|
||||
.expect("Merging failed");
|
||||
index_writer.wait_merging_threads().unwrap();
|
||||
index.load_searchers().unwrap();
|
||||
let ref searcher = *index.searcher();
|
||||
assert_eq!(searcher.segment_readers().len(), 1);
|
||||
@@ -534,6 +526,65 @@ mod tests {
|
||||
assert_eq!(score_field_reader.min_val(), 3);
|
||||
assert_eq!(score_field_reader.max_val(), 7000);
|
||||
}
|
||||
{
|
||||
// test a commit with only deletes
|
||||
index_writer.delete_term(Term::from_field_text(text_field, "c"));
|
||||
index_writer.commit().unwrap();
|
||||
|
||||
index.load_searchers().unwrap();
|
||||
let ref searcher = *index.searcher();
|
||||
assert_eq!(searcher.segment_readers().len(), 1);
|
||||
assert_eq!(searcher.num_docs(), 2);
|
||||
assert_eq!(searcher.segment_readers()[0].num_docs(), 2);
|
||||
assert_eq!(searcher.segment_readers()[0].max_doc(), 3);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), vec!(6_000));
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), vec!(6_000, 7_000));
|
||||
let score_field_reader = searcher.segment_reader(0).get_fast_field_reader(score_field).unwrap();
|
||||
assert_eq!(score_field_reader.min_val(), 3);
|
||||
assert_eq!(score_field_reader.max_val(), 7000);
|
||||
}
|
||||
{ // Test merging a single segment in order to remove deletes.
|
||||
let segment_ids = index.searchable_segment_ids().expect("Searchable segments failed.");
|
||||
index_writer.merge(&segment_ids)
|
||||
.wait()
|
||||
.expect("Merging failed");
|
||||
index.load_searchers().unwrap();
|
||||
|
||||
let ref searcher = *index.searcher();
|
||||
assert_eq!(searcher.segment_readers().len(), 1);
|
||||
assert_eq!(searcher.num_docs(), 2);
|
||||
assert_eq!(searcher.segment_readers()[0].num_docs(), 2);
|
||||
assert_eq!(searcher.segment_readers()[0].max_doc(), 2);
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), vec!());
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), vec!(6_000));
|
||||
assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), vec!(6_000, 7_000));
|
||||
let score_field_reader = searcher.segment_reader(0).get_fast_field_reader(score_field).unwrap();
|
||||
assert_eq!(score_field_reader.min_val(), 6000);
|
||||
assert_eq!(score_field_reader.max_val(), 7000);
|
||||
}
|
||||
|
||||
{ // Test removing all docs
|
||||
index_writer.delete_term(Term::from_field_text(text_field, "g"));
|
||||
let segment_ids = index.searchable_segment_ids().expect("Searchable segments failed.");
|
||||
index_writer.merge(&segment_ids)
|
||||
.wait()
|
||||
.expect("Merging failed");
|
||||
index.load_searchers().unwrap();
|
||||
|
||||
let ref searcher = *index.searcher();
|
||||
assert_eq!(searcher.segment_readers().len(), 1);
|
||||
assert_eq!(searcher.num_docs(), 0);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
14
src/lib.rs
14
src/lib.rs
@@ -117,14 +117,16 @@ pub use postings::SegmentPostingsOption;
|
||||
|
||||
pub use core::TermIterator;
|
||||
|
||||
#[cfg(feature="simdcompression")]
|
||||
pub fn version() -> &'static str {
|
||||
concat!(version!(), "-simd")
|
||||
}
|
||||
|
||||
#[cfg(not(feature="simdcompression"))]
|
||||
/// Expose the current version of tantivy, as well
|
||||
/// whether it was compiled with the simd compression.
|
||||
pub fn version() -> &'static str {
|
||||
concat!(version!(), "-nosimd")
|
||||
if cfg!(feature="simdcompression") {
|
||||
concat!(version!(), "-simd")
|
||||
}
|
||||
else {
|
||||
concat!(version!(), "-nosimd")
|
||||
}
|
||||
}
|
||||
|
||||
/// Tantivy's makes it possible to personalize when
|
||||
|
||||
Reference in New Issue
Block a user