mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-05 01:50:42 +00:00
Checking the type of range queries
This commit is contained in:
@@ -271,10 +271,24 @@ mod bench {
|
||||
use test::Bencher;
|
||||
use tests;
|
||||
|
||||
|
||||
fn generate_array_with_seed(n: usize, ratio: f32, seed_val: u32) -> Vec<u32> {
|
||||
let seed: &[u32; 4] = &[1, 2, 3, seed_val];
|
||||
let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);
|
||||
(0..u32::max_value())
|
||||
.filter(|_| rng.next_f32() < ratio)
|
||||
.take(n)
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn generate_array(n: usize, ratio: f32) -> Vec<u32> {
|
||||
generate_array_with_seed(n, ratio, 4)
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_compress(b: &mut Bencher) {
|
||||
let mut encoder = BlockEncoder::new();
|
||||
let data = tests::generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
|
||||
let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
|
||||
b.iter(|| {
|
||||
encoder.compress_block_sorted(&data, 0u32);
|
||||
});
|
||||
@@ -283,7 +297,7 @@ mod bench {
|
||||
#[bench]
|
||||
fn bench_uncompress(b: &mut Bencher) {
|
||||
let mut encoder = BlockEncoder::new();
|
||||
let data = tests::generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
|
||||
let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
|
||||
let compressed = encoder.compress_block_sorted(&data, 0u32);
|
||||
let mut decoder = BlockDecoder::new();
|
||||
b.iter(|| {
|
||||
@@ -310,7 +324,7 @@ mod bench {
|
||||
#[bench]
|
||||
fn bench_compress_vint(b: &mut Bencher) {
|
||||
let mut encoder = BlockEncoder::new();
|
||||
let data = tests::generate_array(NUM_INTS_BENCH_VINT, 0.001);
|
||||
let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
|
||||
b.iter(|| {
|
||||
encoder.compress_vint_sorted(&data, 0u32);
|
||||
});
|
||||
@@ -319,7 +333,7 @@ mod bench {
|
||||
#[bench]
|
||||
fn bench_uncompress_vint(b: &mut Bencher) {
|
||||
let mut encoder = BlockEncoder::new();
|
||||
let data = tests::generate_array(NUM_INTS_BENCH_VINT, 0.001);
|
||||
let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
|
||||
let compressed = encoder.compress_vint_sorted(&data, 0u32);
|
||||
let mut decoder = BlockDecoder::new();
|
||||
b.iter(|| {
|
||||
|
||||
@@ -76,6 +76,11 @@ impl SegmentReader {
|
||||
self.segment_meta.num_docs()
|
||||
}
|
||||
|
||||
/// Returns the schema of the index this segment belongs to.
|
||||
pub fn schema(&self) -> &Schema {
|
||||
&self.schema
|
||||
}
|
||||
|
||||
/// Return the number of documents that have been
|
||||
/// deleted in the segment.
|
||||
pub fn num_deleted_docs(&self) -> DocId {
|
||||
|
||||
@@ -361,8 +361,9 @@ impl SegmentUpdater {
|
||||
let committed_merge_candidates = merge_policy.compute_merge_candidates(&committed_segments);
|
||||
merge_candidates.extend_from_slice(&committed_merge_candidates[..]);
|
||||
for MergeCandidate(segment_metas) in merge_candidates {
|
||||
// TODO what do we do with the future here
|
||||
self.start_merge(&segment_metas);
|
||||
if let Err(e) = self.start_merge(&segment_metas).fuse().poll() {
|
||||
error!("The merge task failed quickly after starting: {:?}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -23,8 +23,6 @@ mod archicture_impl {
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#[cfg(not(target="x86_64"))]
|
||||
mod archicture_impl {
|
||||
|
||||
|
||||
13
src/lib.rs
13
src/lib.rs
@@ -293,15 +293,6 @@ mod tests {
|
||||
pub fn nearly_equals(a: f32, b: f32) -> bool {
|
||||
(a - b).abs() < 0.0005 * (a + b).abs()
|
||||
}
|
||||
|
||||
fn generate_array_with_seed(n: usize, ratio: f32, seed_val: u32) -> Vec<u32> {
|
||||
let seed: &[u32; 4] = &[1, 2, 3, seed_val];
|
||||
let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);
|
||||
(0..u32::max_value())
|
||||
.filter(|_| rng.next_f32() < ratio)
|
||||
.take(n)
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn generate_nonunique_unsorted(max_value: u32, n_elems: usize) -> Vec<u32> {
|
||||
let seed: &[u32; 4] = &[1, 2, 3, 4];
|
||||
@@ -312,10 +303,6 @@ mod tests {
|
||||
.collect::<Vec<u32>>()
|
||||
}
|
||||
|
||||
pub fn generate_array(n: usize, ratio: f32) -> Vec<u32> {
|
||||
generate_array_with_seed(n, ratio, 4)
|
||||
}
|
||||
|
||||
pub fn sample_with_seed(n: u32, ratio: f32, seed_val: u32) -> Vec<u32> {
|
||||
let seed: &[u32; 4] = &[1, 2, 3, seed_val];
|
||||
let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);
|
||||
|
||||
@@ -8,6 +8,7 @@ use core::Searcher;
|
||||
use query::BitSetDocSet;
|
||||
use query::ConstScorer;
|
||||
use std::ops::Range;
|
||||
use schema::Type;
|
||||
use std::collections::Bound;
|
||||
|
||||
fn map_bound<TFrom, Transform: Fn(TFrom) -> Vec<u8>>(
|
||||
@@ -81,13 +82,17 @@ fn map_bound<TFrom, Transform: Fn(TFrom) -> Vec<u8>>(
|
||||
#[derive(Debug)]
|
||||
pub struct RangeQuery {
|
||||
field: Field,
|
||||
value_type: Type,
|
||||
left_bound: Bound<Vec<u8>>,
|
||||
right_bound: Bound<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl RangeQuery {
|
||||
|
||||
|
||||
/// Creates a new `RangeQuery` over a `i64` field.
|
||||
///
|
||||
/// If the field is not of the type `i64`, tantivy
|
||||
/// will panic when the `Weight` object is created.
|
||||
pub fn new_i64(
|
||||
field: Field,
|
||||
range: Range<i64>
|
||||
@@ -99,6 +104,9 @@ impl RangeQuery {
|
||||
///
|
||||
/// The two `Bound` arguments make it possible to create more complex
|
||||
/// ranges than semi-inclusive range.
|
||||
///
|
||||
/// If the field is not of the type `i64`, tantivy
|
||||
/// will panic when the `Weight` object is created.
|
||||
pub fn new_i64_bounds(
|
||||
field: Field,
|
||||
left_bound: Bound<i64>,
|
||||
@@ -107,6 +115,7 @@ impl RangeQuery {
|
||||
let make_term_val = |val: i64| Term::from_field_i64(field, val).value_bytes().to_owned();
|
||||
RangeQuery {
|
||||
field,
|
||||
value_type: Type::I64,
|
||||
left_bound: map_bound(left_bound, &make_term_val),
|
||||
right_bound: map_bound(right_bound, &make_term_val),
|
||||
}
|
||||
@@ -116,6 +125,9 @@ impl RangeQuery {
|
||||
///
|
||||
/// The two `Bound` arguments make it possible to create more complex
|
||||
/// ranges than semi-inclusive range.
|
||||
///
|
||||
/// If the field is not of the type `u64`, tantivy
|
||||
/// will panic when the `Weight` object is created.
|
||||
pub fn new_u64_bounds(
|
||||
field: Field,
|
||||
left_bound: Bound<u64>,
|
||||
@@ -124,12 +136,16 @@ impl RangeQuery {
|
||||
let make_term_val = |val: u64| Term::from_field_u64(field, val).value_bytes().to_owned();
|
||||
RangeQuery {
|
||||
field,
|
||||
value_type: Type::U64,
|
||||
left_bound: map_bound(left_bound, &make_term_val),
|
||||
right_bound: map_bound(right_bound, &make_term_val),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new `RangeQuery` over a `u64` field.
|
||||
///
|
||||
/// If the field is not of the type `u64`, tantivy
|
||||
/// will panic when the `Weight` object is created.
|
||||
pub fn new_u64(
|
||||
field: Field,
|
||||
range: Range<u64>
|
||||
@@ -141,6 +157,9 @@ impl RangeQuery {
|
||||
///
|
||||
/// The two `Bound` arguments make it possible to create more complex
|
||||
/// ranges than semi-inclusive range.
|
||||
///
|
||||
/// If the field is not of the type `Str`, tantivy
|
||||
/// will panic when the `Weight` object is created.
|
||||
pub fn new_str_bounds<'b>(
|
||||
field: Field,
|
||||
left: Bound<&'b str>,
|
||||
@@ -149,12 +168,16 @@ impl RangeQuery {
|
||||
let make_term_val = |val: &str| val.as_bytes().to_vec();
|
||||
RangeQuery {
|
||||
field,
|
||||
value_type: Type::Str,
|
||||
left_bound: map_bound(left, &make_term_val),
|
||||
right_bound: map_bound(right, &make_term_val),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new `RangeQuery` over a `Str` field.
|
||||
///
|
||||
/// If the field is not of the type `Str`, tantivy
|
||||
/// will panic when the `Weight` object is created.
|
||||
pub fn new_str<'b>(
|
||||
field: Field,
|
||||
range: Range<&'b str>
|
||||
@@ -164,7 +187,14 @@ impl RangeQuery {
|
||||
}
|
||||
|
||||
impl Query for RangeQuery {
|
||||
fn weight(&self, _searcher: &Searcher, _scoring_enabled: bool) -> Result<Box<Weight>> {
|
||||
fn weight(&self, searcher: &Searcher, _scoring_enabled: bool) -> Result<Box<Weight>> {
|
||||
if let Some(first_segment_reader) = searcher.segment_readers().iter().next() {
|
||||
let value_type = first_segment_reader.schema().get_field_entry(self.field).field_type().value_type();
|
||||
assert_eq!(
|
||||
value_type, self.value_type,
|
||||
"Create a range query of the type {:?}, when the field given was of type {:?}",
|
||||
self.value_type, value_type);
|
||||
}
|
||||
Ok(Box::new(RangeWeight {
|
||||
field: self.field,
|
||||
left_bound: self.left_bound.clone(),
|
||||
|
||||
@@ -17,6 +17,18 @@ pub enum ValueParsingError {
|
||||
TypeError(String),
|
||||
}
|
||||
|
||||
/// Type of the value that a field can take.
|
||||
///
|
||||
/// Contrary to FieldType, this does
|
||||
/// not include the way the field must be indexed.
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||
pub enum Type {
|
||||
Str,
|
||||
U64,
|
||||
I64,
|
||||
HierarchicalFacet
|
||||
}
|
||||
|
||||
/// A `FieldType` describes the type (text, u64) of a field as well as
|
||||
/// how it should be handled by tantivy.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
@@ -32,6 +44,21 @@ pub enum FieldType {
|
||||
}
|
||||
|
||||
impl FieldType {
|
||||
|
||||
/// Returns the value type associated for this field.
|
||||
pub fn value_type(&self) -> Type {
|
||||
match *self {
|
||||
FieldType::Str(_) =>
|
||||
Type::Str,
|
||||
FieldType::U64(_) =>
|
||||
Type::U64,
|
||||
FieldType::I64(_) =>
|
||||
Type::I64,
|
||||
FieldType::HierarchicalFacet =>
|
||||
Type::HierarchicalFacet,
|
||||
}
|
||||
}
|
||||
|
||||
/// returns true iff the field is indexed.
|
||||
pub fn is_indexed(&self) -> bool {
|
||||
match *self {
|
||||
|
||||
@@ -128,7 +128,7 @@ pub use self::document::Document;
|
||||
pub use self::field::Field;
|
||||
pub use self::term::Term;
|
||||
|
||||
pub use self::field_type::FieldType;
|
||||
pub use self::field_type::{Type, FieldType};
|
||||
pub use self::field_entry::FieldEntry;
|
||||
pub use self::field_value::FieldValue;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user