Checking the type of range queries

2026-06-05 01:50:42 +00:00 · 2018-04-16 09:56:13 +09:00
parent 8083bc6eef
commit 0804b42afa
8 changed files with 86 additions and 24 deletions
--- a/src/compression/mod.rs
+++ b/src/compression/mod.rs
@@ -271,10 +271,24 @@ mod bench {
    use test::Bencher;
    use tests;

+
+    fn generate_array_with_seed(n: usize, ratio: f32, seed_val: u32) -> Vec<u32> {
+        let seed: &[u32; 4] = &[1, 2, 3, seed_val];
+        let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);
+        (0..u32::max_value())
+            .filter(|_| rng.next_f32() < ratio)
+            .take(n)
+            .collect()
+    }
+
+    pub fn generate_array(n: usize, ratio: f32) -> Vec<u32> {
+        generate_array_with_seed(n, ratio, 4)
+    }
+
    #[bench]
    fn bench_compress(b: &mut Bencher) {
        let mut encoder = BlockEncoder::new();
-        let data = tests::generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
+        let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
        b.iter(|| {
            encoder.compress_block_sorted(&data, 0u32);
        });
@@ -283,7 +297,7 @@ mod bench {
    #[bench]
    fn bench_uncompress(b: &mut Bencher) {
        let mut encoder = BlockEncoder::new();
-        let data = tests::generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
+        let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
        let compressed = encoder.compress_block_sorted(&data, 0u32);
        let mut decoder = BlockDecoder::new();
        b.iter(|| {
@@ -310,7 +324,7 @@ mod bench {
    #[bench]
    fn bench_compress_vint(b: &mut Bencher) {
        let mut encoder = BlockEncoder::new();
-        let data = tests::generate_array(NUM_INTS_BENCH_VINT, 0.001);
+        let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
        b.iter(|| {
            encoder.compress_vint_sorted(&data, 0u32);
        });
@@ -319,7 +333,7 @@ mod bench {
    #[bench]
    fn bench_uncompress_vint(b: &mut Bencher) {
        let mut encoder = BlockEncoder::new();
-        let data = tests::generate_array(NUM_INTS_BENCH_VINT, 0.001);
+        let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
        let compressed = encoder.compress_vint_sorted(&data, 0u32);
        let mut decoder = BlockDecoder::new();
        b.iter(|| {
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -76,6 +76,11 @@ impl SegmentReader {
        self.segment_meta.num_docs()
    }

+    /// Returns the schema of the index this segment belongs to.
+    pub fn schema(&self) -> &Schema {
+        &self.schema
+    }
+
    /// Return the number of documents that have been
    /// deleted in the segment.
    pub fn num_deleted_docs(&self) -> DocId {
--- a/src/indexer/segment_updater.rs
+++ b/src/indexer/segment_updater.rs
@@ -361,8 +361,9 @@ impl SegmentUpdater {
        let committed_merge_candidates = merge_policy.compute_merge_candidates(&committed_segments);
        merge_candidates.extend_from_slice(&committed_merge_candidates[..]);
        for MergeCandidate(segment_metas) in merge_candidates {
-            // TODO what do we do with the future here
-            self.start_merge(&segment_metas);
+            if let Err(e) = self.start_merge(&segment_metas).fuse().poll() {
+                error!("The merge task failed quickly after starting: {:?}", e);
+            }
        }
    }

--- a/src/indexer/stamper.rs
+++ b/src/indexer/stamper.rs
@@ -23,8 +23,6 @@ mod archicture_impl {
 }


-
-
 #[cfg(not(target="x86_64"))]
 mod archicture_impl {

--- a/src/lib.rs
+++ b/src/lib.rs
@@ -293,15 +293,6 @@ mod tests {
    pub fn nearly_equals(a: f32, b: f32) -> bool {
        (a - b).abs() < 0.0005 * (a + b).abs()
    }
-    
-    fn generate_array_with_seed(n: usize, ratio: f32, seed_val: u32) -> Vec<u32> {
-        let seed: &[u32; 4] = &[1, 2, 3, seed_val];
-        let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);
-        (0..u32::max_value())
-            .filter(|_| rng.next_f32() < ratio)
-            .take(n)
-            .collect()
-    }

    pub fn generate_nonunique_unsorted(max_value: u32, n_elems: usize) -> Vec<u32> {
        let seed: &[u32; 4] = &[1, 2, 3, 4];
@@ -312,10 +303,6 @@ mod tests {
            .collect::<Vec<u32>>()
    }

-    pub fn generate_array(n: usize, ratio: f32) -> Vec<u32> {
-        generate_array_with_seed(n, ratio, 4)
-    }
-
    pub fn sample_with_seed(n: u32, ratio: f32, seed_val: u32) -> Vec<u32> {
        let seed: &[u32; 4] = &[1, 2, 3, seed_val];
        let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);
--- a/src/query/range_query.rs
+++ b/src/query/range_query.rs
@@ -8,6 +8,7 @@ use core::Searcher;
 use query::BitSetDocSet;
 use query::ConstScorer;
 use std::ops::Range;
+use schema::Type;
 use std::collections::Bound;

 fn map_bound<TFrom, Transform: Fn(TFrom) -> Vec<u8>>(
@@ -81,13 +82,17 @@ fn map_bound<TFrom, Transform: Fn(TFrom) -> Vec<u8>>(
 #[derive(Debug)]
 pub struct RangeQuery {
    field: Field,
+    value_type: Type,
    left_bound: Bound<Vec<u8>>,
    right_bound: Bound<Vec<u8>>,
 }

 impl RangeQuery {

-
+    /// Creates a new `RangeQuery` over a `i64` field.
+    ///
+    /// If the field is not of the type `i64`, tantivy
+    /// will panic when the `Weight` object is created.
    pub fn new_i64(
        field: Field,
        range: Range<i64>
@@ -99,6 +104,9 @@ impl RangeQuery {
    ///
    /// The two `Bound` arguments make it possible to create more complex
    /// ranges than semi-inclusive range.
+    ///
+    /// If the field is not of the type `i64`, tantivy
+    /// will panic when the `Weight` object is created.
    pub fn new_i64_bounds(
        field: Field,
        left_bound: Bound<i64>,
@@ -107,6 +115,7 @@ impl RangeQuery {
        let make_term_val = |val: i64| Term::from_field_i64(field, val).value_bytes().to_owned();
        RangeQuery {
            field,
+            value_type: Type::I64,
            left_bound: map_bound(left_bound, &make_term_val),
            right_bound: map_bound(right_bound, &make_term_val),
        }
@@ -116,6 +125,9 @@ impl RangeQuery {
    ///
    /// The two `Bound` arguments make it possible to create more complex
    /// ranges than semi-inclusive range.
+    ///
+    /// If the field is not of the type `u64`, tantivy
+    /// will panic when the `Weight` object is created.
    pub fn new_u64_bounds(
        field: Field,
        left_bound: Bound<u64>,
@@ -124,12 +136,16 @@ impl RangeQuery {
        let make_term_val = |val: u64| Term::from_field_u64(field, val).value_bytes().to_owned();
        RangeQuery {
            field,
+            value_type: Type::U64,
            left_bound: map_bound(left_bound, &make_term_val),
            right_bound: map_bound(right_bound, &make_term_val),
        }
    }

    /// Create a new `RangeQuery` over a `u64` field.
+    ///
+    /// If the field is not of the type `u64`, tantivy
+    /// will panic when the `Weight` object is created.
    pub fn new_u64(
        field: Field,
        range: Range<u64>
@@ -141,6 +157,9 @@ impl RangeQuery {
    ///
    /// The two `Bound` arguments make it possible to create more complex
    /// ranges than semi-inclusive range.
+    ///
+    /// If the field is not of the type `Str`, tantivy
+    /// will panic when the `Weight` object is created.
    pub fn new_str_bounds<'b>(
        field: Field,
        left: Bound<&'b str>,
@@ -149,12 +168,16 @@ impl RangeQuery {
        let make_term_val = |val: &str| val.as_bytes().to_vec();
        RangeQuery {
            field,
+            value_type: Type::Str,
            left_bound: map_bound(left, &make_term_val),
            right_bound: map_bound(right, &make_term_val),
        }
    }

    /// Create a new `RangeQuery` over a `Str` field.
+    ///
+    /// If the field is not of the type `Str`, tantivy
+    /// will panic when the `Weight` object is created.
    pub fn new_str<'b>(
        field: Field,
        range: Range<&'b str>
@@ -164,7 +187,14 @@ impl RangeQuery {
 }

 impl Query for RangeQuery {
-    fn weight(&self, _searcher: &Searcher, _scoring_enabled: bool) -> Result<Box<Weight>> {
+    fn weight(&self, searcher: &Searcher, _scoring_enabled: bool) -> Result<Box<Weight>> {
+        if let Some(first_segment_reader) = searcher.segment_readers().iter().next() {
+            let value_type = first_segment_reader.schema().get_field_entry(self.field).field_type().value_type();
+            assert_eq!(
+                value_type, self.value_type,
+                "Create a range query of the type {:?}, when the field given was of type {:?}",
+                self.value_type, value_type);
+        }
        Ok(Box::new(RangeWeight {
            field: self.field,
            left_bound: self.left_bound.clone(),
--- a/src/schema/field_type.rs
+++ b/src/schema/field_type.rs
@@ -17,6 +17,18 @@ pub enum ValueParsingError {
    TypeError(String),
 }

+/// Type of the value that a field can take.
+///
+/// Contrary to FieldType, this does
+/// not include the way the field must be indexed.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub enum Type {
+    Str,
+    U64,
+    I64,
+    HierarchicalFacet
+}
+
 /// A `FieldType` describes the type (text, u64) of a field as well as
 /// how it should be handled by tantivy.
 #[derive(Clone, Debug, Eq, PartialEq)]
@@ -32,6 +44,21 @@ pub enum FieldType {
 }

 impl FieldType {
+
+    /// Returns the value type associated for this field.
+    pub fn value_type(&self) -> Type {
+        match *self {
+            FieldType::Str(_) =>
+                Type::Str,
+            FieldType::U64(_) =>
+                Type::U64,
+            FieldType::I64(_) =>
+                Type::I64,
+            FieldType::HierarchicalFacet =>
+                Type::HierarchicalFacet,
+        }
+    }
+
    /// returns true iff the field is indexed.
    pub fn is_indexed(&self) -> bool {
        match *self {
--- a/src/schema/mod.rs
+++ b/src/schema/mod.rs
@@ -128,7 +128,7 @@ pub use self::document::Document;
 pub use self::field::Field;
 pub use self::term::Term;

-pub use self::field_type::FieldType;
+pub use self::field_type::{Type, FieldType};
 pub use self::field_entry::FieldEntry;
 pub use self::field_value::FieldValue;