diff --git a/src/compression/mod.rs b/src/compression/mod.rs index b00ad5097..31b9a9a7c 100644 --- a/src/compression/mod.rs +++ b/src/compression/mod.rs @@ -271,10 +271,24 @@ mod bench { use test::Bencher; use tests; + + fn generate_array_with_seed(n: usize, ratio: f32, seed_val: u32) -> Vec { + let seed: &[u32; 4] = &[1, 2, 3, seed_val]; + let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed); + (0..u32::max_value()) + .filter(|_| rng.next_f32() < ratio) + .take(n) + .collect() + } + + pub fn generate_array(n: usize, ratio: f32) -> Vec { + generate_array_with_seed(n, ratio, 4) + } + #[bench] fn bench_compress(b: &mut Bencher) { let mut encoder = BlockEncoder::new(); - let data = tests::generate_array(COMPRESSION_BLOCK_SIZE, 0.1); + let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1); b.iter(|| { encoder.compress_block_sorted(&data, 0u32); }); @@ -283,7 +297,7 @@ mod bench { #[bench] fn bench_uncompress(b: &mut Bencher) { let mut encoder = BlockEncoder::new(); - let data = tests::generate_array(COMPRESSION_BLOCK_SIZE, 0.1); + let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1); let compressed = encoder.compress_block_sorted(&data, 0u32); let mut decoder = BlockDecoder::new(); b.iter(|| { @@ -310,7 +324,7 @@ mod bench { #[bench] fn bench_compress_vint(b: &mut Bencher) { let mut encoder = BlockEncoder::new(); - let data = tests::generate_array(NUM_INTS_BENCH_VINT, 0.001); + let data = generate_array(NUM_INTS_BENCH_VINT, 0.001); b.iter(|| { encoder.compress_vint_sorted(&data, 0u32); }); @@ -319,7 +333,7 @@ mod bench { #[bench] fn bench_uncompress_vint(b: &mut Bencher) { let mut encoder = BlockEncoder::new(); - let data = tests::generate_array(NUM_INTS_BENCH_VINT, 0.001); + let data = generate_array(NUM_INTS_BENCH_VINT, 0.001); let compressed = encoder.compress_vint_sorted(&data, 0u32); let mut decoder = BlockDecoder::new(); b.iter(|| { diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index 5d80ec3ca..7be2565f6 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -76,6 +76,11 @@ impl SegmentReader { self.segment_meta.num_docs() } + /// Returns the schema of the index this segment belongs to. + pub fn schema(&self) -> &Schema { + &self.schema + } + /// Return the number of documents that have been /// deleted in the segment. pub fn num_deleted_docs(&self) -> DocId { diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs index 83c68c953..501a7813f 100644 --- a/src/indexer/segment_updater.rs +++ b/src/indexer/segment_updater.rs @@ -361,8 +361,9 @@ impl SegmentUpdater { let committed_merge_candidates = merge_policy.compute_merge_candidates(&committed_segments); merge_candidates.extend_from_slice(&committed_merge_candidates[..]); for MergeCandidate(segment_metas) in merge_candidates { - // TODO what do we do with the future here - self.start_merge(&segment_metas); + if let Err(e) = self.start_merge(&segment_metas).fuse().poll() { + error!("The merge task failed quickly after starting: {:?}", e); + } } } diff --git a/src/indexer/stamper.rs b/src/indexer/stamper.rs index f9eee3136..479f5874b 100644 --- a/src/indexer/stamper.rs +++ b/src/indexer/stamper.rs @@ -23,8 +23,6 @@ mod archicture_impl { } - - #[cfg(not(target="x86_64"))] mod archicture_impl { diff --git a/src/lib.rs b/src/lib.rs index 9502da460..daed21331 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -293,15 +293,6 @@ mod tests { pub fn nearly_equals(a: f32, b: f32) -> bool { (a - b).abs() < 0.0005 * (a + b).abs() } - - fn generate_array_with_seed(n: usize, ratio: f32, seed_val: u32) -> Vec { - let seed: &[u32; 4] = &[1, 2, 3, seed_val]; - let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed); - (0..u32::max_value()) - .filter(|_| rng.next_f32() < ratio) - .take(n) - .collect() - } pub fn generate_nonunique_unsorted(max_value: u32, n_elems: usize) -> Vec { let seed: &[u32; 4] = &[1, 2, 3, 4]; @@ -312,10 +303,6 @@ mod tests { .collect::>() } - pub fn generate_array(n: usize, ratio: f32) -> Vec { - generate_array_with_seed(n, ratio, 4) - } - pub fn sample_with_seed(n: u32, ratio: f32, seed_val: u32) -> Vec { let seed: &[u32; 4] = &[1, 2, 3, seed_val]; let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed); diff --git a/src/query/range_query.rs b/src/query/range_query.rs index 9c85449ad..b94097955 100644 --- a/src/query/range_query.rs +++ b/src/query/range_query.rs @@ -8,6 +8,7 @@ use core::Searcher; use query::BitSetDocSet; use query::ConstScorer; use std::ops::Range; +use schema::Type; use std::collections::Bound; fn map_bound Vec>( @@ -81,13 +82,17 @@ fn map_bound Vec>( #[derive(Debug)] pub struct RangeQuery { field: Field, + value_type: Type, left_bound: Bound>, right_bound: Bound>, } impl RangeQuery { - + /// Creates a new `RangeQuery` over a `i64` field. + /// + /// If the field is not of the type `i64`, tantivy + /// will panic when the `Weight` object is created. pub fn new_i64( field: Field, range: Range @@ -99,6 +104,9 @@ impl RangeQuery { /// /// The two `Bound` arguments make it possible to create more complex /// ranges than semi-inclusive range. + /// + /// If the field is not of the type `i64`, tantivy + /// will panic when the `Weight` object is created. pub fn new_i64_bounds( field: Field, left_bound: Bound, @@ -107,6 +115,7 @@ impl RangeQuery { let make_term_val = |val: i64| Term::from_field_i64(field, val).value_bytes().to_owned(); RangeQuery { field, + value_type: Type::I64, left_bound: map_bound(left_bound, &make_term_val), right_bound: map_bound(right_bound, &make_term_val), } @@ -116,6 +125,9 @@ impl RangeQuery { /// /// The two `Bound` arguments make it possible to create more complex /// ranges than semi-inclusive range. + /// + /// If the field is not of the type `u64`, tantivy + /// will panic when the `Weight` object is created. pub fn new_u64_bounds( field: Field, left_bound: Bound, @@ -124,12 +136,16 @@ impl RangeQuery { let make_term_val = |val: u64| Term::from_field_u64(field, val).value_bytes().to_owned(); RangeQuery { field, + value_type: Type::U64, left_bound: map_bound(left_bound, &make_term_val), right_bound: map_bound(right_bound, &make_term_val), } } /// Create a new `RangeQuery` over a `u64` field. + /// + /// If the field is not of the type `u64`, tantivy + /// will panic when the `Weight` object is created. pub fn new_u64( field: Field, range: Range @@ -141,6 +157,9 @@ impl RangeQuery { /// /// The two `Bound` arguments make it possible to create more complex /// ranges than semi-inclusive range. + /// + /// If the field is not of the type `Str`, tantivy + /// will panic when the `Weight` object is created. pub fn new_str_bounds<'b>( field: Field, left: Bound<&'b str>, @@ -149,12 +168,16 @@ impl RangeQuery { let make_term_val = |val: &str| val.as_bytes().to_vec(); RangeQuery { field, + value_type: Type::Str, left_bound: map_bound(left, &make_term_val), right_bound: map_bound(right, &make_term_val), } } /// Create a new `RangeQuery` over a `Str` field. + /// + /// If the field is not of the type `Str`, tantivy + /// will panic when the `Weight` object is created. pub fn new_str<'b>( field: Field, range: Range<&'b str> @@ -164,7 +187,14 @@ impl RangeQuery { } impl Query for RangeQuery { - fn weight(&self, _searcher: &Searcher, _scoring_enabled: bool) -> Result> { + fn weight(&self, searcher: &Searcher, _scoring_enabled: bool) -> Result> { + if let Some(first_segment_reader) = searcher.segment_readers().iter().next() { + let value_type = first_segment_reader.schema().get_field_entry(self.field).field_type().value_type(); + assert_eq!( + value_type, self.value_type, + "Create a range query of the type {:?}, when the field given was of type {:?}", + self.value_type, value_type); + } Ok(Box::new(RangeWeight { field: self.field, left_bound: self.left_bound.clone(), diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index 3069a3b37..a619535cb 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -17,6 +17,18 @@ pub enum ValueParsingError { TypeError(String), } +/// Type of the value that a field can take. +/// +/// Contrary to FieldType, this does +/// not include the way the field must be indexed. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum Type { + Str, + U64, + I64, + HierarchicalFacet +} + /// A `FieldType` describes the type (text, u64) of a field as well as /// how it should be handled by tantivy. #[derive(Clone, Debug, Eq, PartialEq)] @@ -32,6 +44,21 @@ pub enum FieldType { } impl FieldType { + + /// Returns the value type associated for this field. + pub fn value_type(&self) -> Type { + match *self { + FieldType::Str(_) => + Type::Str, + FieldType::U64(_) => + Type::U64, + FieldType::I64(_) => + Type::I64, + FieldType::HierarchicalFacet => + Type::HierarchicalFacet, + } + } + /// returns true iff the field is indexed. pub fn is_indexed(&self) -> bool { match *self { diff --git a/src/schema/mod.rs b/src/schema/mod.rs index 6e4b0f51d..7f3f637b1 100644 --- a/src/schema/mod.rs +++ b/src/schema/mod.rs @@ -128,7 +128,7 @@ pub use self::document::Document; pub use self::field::Field; pub use self::term::Term; -pub use self::field_type::FieldType; +pub use self::field_type::{Type, FieldType}; pub use self::field_entry::FieldEntry; pub use self::field_value::FieldValue;