mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
add support for str fast field range query (#2453)
* add support for str fast field range query Add support for range queries on fast fields, by converting term bounds to term ordinals bounds. closes https://github.com/quickwit-oss/tantivy/issues/2023 * extend tests, rename * update comment * update comment
This commit is contained in:
@@ -12,9 +12,9 @@ pub use self::range_query_u64_fastfield::FastFieldRangeWeight;
|
||||
// TODO is this correct?
|
||||
pub(crate) fn is_type_valid_for_fastfield_range_query(typ: Type) -> bool {
|
||||
match typ {
|
||||
Type::U64 | Type::I64 | Type::F64 | Type::Bool | Type::Date => true,
|
||||
Type::Str | Type::U64 | Type::I64 | Type::F64 | Type::Bool | Type::Date => true,
|
||||
Type::IpAddr => true,
|
||||
Type::Str | Type::Facet | Type::Bytes | Type::Json => false,
|
||||
Type::Facet | Type::Bytes | Type::Json => false,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
use std::net::Ipv6Addr;
|
||||
use std::ops::{Bound, RangeInclusive};
|
||||
|
||||
use columnar::{Column, MonotonicallyMappableToU128, MonotonicallyMappableToU64};
|
||||
use columnar::{Column, MonotonicallyMappableToU128, MonotonicallyMappableToU64, StrColumn};
|
||||
use common::BinarySerializable;
|
||||
|
||||
use super::fast_field_range_doc_set::RangeDocSet;
|
||||
@@ -51,16 +51,22 @@ impl Weight for FastFieldRangeWeight {
|
||||
}
|
||||
let field_name = reader.schema().get_field_name(self.field);
|
||||
let field_type = reader.schema().get_field_entry(self.field).field_type();
|
||||
|
||||
let term = inner_bound(&self.lower_bound)
|
||||
.or(inner_bound(&self.upper_bound))
|
||||
.expect("At least one bound must be set");
|
||||
assert_eq!(
|
||||
term.typ(),
|
||||
field_type.value_type(),
|
||||
"Field is of type {:?}, but got term of type {:?}",
|
||||
field_type,
|
||||
term.typ()
|
||||
);
|
||||
if field_type.is_ip_addr() {
|
||||
let parse_ip_from_bytes = |term: &Term| {
|
||||
let ip_u128_bytes: [u8; 16] =
|
||||
term.serialized_value_bytes().try_into().map_err(|_| {
|
||||
crate::TantivyError::InvalidArgument(
|
||||
"Expected 8 bytes for ip address".to_string(),
|
||||
)
|
||||
})?;
|
||||
let ip_u128 = u128::from_be_bytes(ip_u128_bytes);
|
||||
crate::Result::<Ipv6Addr>::Ok(Ipv6Addr::from_u128(ip_u128))
|
||||
term.value().as_ip_addr().ok_or_else(|| {
|
||||
crate::TantivyError::InvalidArgument("Expected ip address".to_string())
|
||||
})
|
||||
};
|
||||
let lower_bound = map_bound_res(&self.lower_bound, parse_ip_from_bytes)?;
|
||||
let upper_bound = map_bound_res(&self.upper_bound, parse_ip_from_bytes)?;
|
||||
@@ -79,33 +85,42 @@ impl Weight for FastFieldRangeWeight {
|
||||
let docset = RangeDocSet::new(value_range, ip_addr_column);
|
||||
Ok(Box::new(ConstScorer::new(docset, boost)))
|
||||
} else {
|
||||
assert!(
|
||||
maps_to_u64_fastfield(field_type.value_type()),
|
||||
"{:?}",
|
||||
field_type
|
||||
);
|
||||
let (lower_bound, upper_bound) = if field_type.is_str() {
|
||||
let Some(str_dict_column): Option<StrColumn> =
|
||||
reader.fast_fields().str(field_name)?
|
||||
else {
|
||||
return Ok(Box::new(EmptyScorer));
|
||||
};
|
||||
let dict = str_dict_column.dictionary();
|
||||
|
||||
let term = inner_bound(&self.lower_bound)
|
||||
.or(inner_bound(&self.upper_bound))
|
||||
.expect("At least one bound must be set");
|
||||
assert_eq!(
|
||||
term.typ(),
|
||||
field_type.value_type(),
|
||||
"Field is of type {:?}, but got term of type {:?}",
|
||||
field_type,
|
||||
term.typ()
|
||||
);
|
||||
let lower_bound = map_bound(&self.lower_bound, |term| {
|
||||
term.serialized_value_bytes().to_vec()
|
||||
});
|
||||
let upper_bound = map_bound(&self.upper_bound, |term| {
|
||||
term.serialized_value_bytes().to_vec()
|
||||
});
|
||||
// Get term ids for terms
|
||||
let (lower_bound, upper_bound) =
|
||||
dict.term_bounds_to_ord(lower_bound, upper_bound)?;
|
||||
(lower_bound, upper_bound)
|
||||
} else {
|
||||
assert!(
|
||||
maps_to_u64_fastfield(field_type.value_type()),
|
||||
"{:?}",
|
||||
field_type
|
||||
);
|
||||
let parse_from_bytes = |term: &Term| {
|
||||
u64::from_be(
|
||||
BinarySerializable::deserialize(&mut &term.serialized_value_bytes()[..])
|
||||
.unwrap(),
|
||||
)
|
||||
};
|
||||
|
||||
let parse_from_bytes = |term: &Term| {
|
||||
u64::from_be(
|
||||
BinarySerializable::deserialize(&mut &term.serialized_value_bytes()[..])
|
||||
.unwrap(),
|
||||
)
|
||||
let lower_bound = map_bound(&self.lower_bound, parse_from_bytes);
|
||||
let upper_bound = map_bound(&self.upper_bound, parse_from_bytes);
|
||||
(lower_bound, upper_bound)
|
||||
};
|
||||
|
||||
let lower_bound = map_bound(&self.lower_bound, parse_from_bytes);
|
||||
let upper_bound = map_bound(&self.upper_bound, parse_from_bytes);
|
||||
|
||||
let fast_field_reader = reader.fast_fields();
|
||||
let Some((column, _)) = fast_field_reader.u64_lenient_for_type(None, field_name)?
|
||||
else {
|
||||
@@ -202,12 +217,73 @@ pub mod tests {
|
||||
use rand::seq::SliceRandom;
|
||||
use rand::SeedableRng;
|
||||
|
||||
use crate::collector::Count;
|
||||
use crate::collector::{Count, TopDocs};
|
||||
use crate::query::range_query::range_query_u64_fastfield::FastFieldRangeWeight;
|
||||
use crate::query::{QueryParser, Weight};
|
||||
use crate::schema::{NumericOptions, Schema, SchemaBuilder, FAST, INDEXED, STORED, STRING};
|
||||
use crate::schema::{
|
||||
NumericOptions, Schema, SchemaBuilder, FAST, INDEXED, STORED, STRING, TEXT,
|
||||
};
|
||||
use crate::{Index, IndexWriter, Term, TERMINATED};
|
||||
|
||||
#[test]
|
||||
fn test_text_field_ff_range_query() -> crate::Result<()> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
schema_builder.add_text_field("title", TEXT | FAST);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
let title = schema.get_field("title").unwrap();
|
||||
index_writer.add_document(doc!(
|
||||
title => "bbb"
|
||||
))?;
|
||||
index_writer.add_document(doc!(
|
||||
title => "ddd"
|
||||
))?;
|
||||
index_writer.commit()?;
|
||||
let reader = index.reader()?;
|
||||
let searcher = reader.searcher();
|
||||
let query_parser = QueryParser::for_index(&index, vec![title]);
|
||||
|
||||
let test_query = |query, num_hits| {
|
||||
let query = query_parser.parse_query(query).unwrap();
|
||||
let top_docs = searcher.search(&query, &TopDocs::with_limit(10)).unwrap();
|
||||
assert_eq!(top_docs.len(), num_hits);
|
||||
};
|
||||
|
||||
test_query("title:[aaa TO ccc]", 1);
|
||||
test_query("title:[aaa TO bbb]", 1);
|
||||
test_query("title:[bbb TO bbb]", 1);
|
||||
test_query("title:[bbb TO ddd]", 2);
|
||||
test_query("title:[bbb TO eee]", 2);
|
||||
test_query("title:[bb TO eee]", 2);
|
||||
test_query("title:[ccc TO ccc]", 0);
|
||||
test_query("title:[ccc TO ddd]", 1);
|
||||
test_query("title:[ccc TO eee]", 1);
|
||||
|
||||
test_query("title:[aaa TO *}", 2);
|
||||
test_query("title:[bbb TO *]", 2);
|
||||
test_query("title:[bb TO *]", 2);
|
||||
test_query("title:[ccc TO *]", 1);
|
||||
test_query("title:[ddd TO *]", 1);
|
||||
test_query("title:[dddd TO *]", 0);
|
||||
|
||||
test_query("title:{aaa TO *}", 2);
|
||||
test_query("title:{bbb TO *]", 1);
|
||||
test_query("title:{bb TO *]", 2);
|
||||
test_query("title:{ccc TO *]", 1);
|
||||
test_query("title:{ddd TO *]", 0);
|
||||
test_query("title:{dddd TO *]", 0);
|
||||
|
||||
test_query("title:[* TO bb]", 0);
|
||||
test_query("title:[* TO bbb]", 1);
|
||||
test_query("title:[* TO ccc]", 1);
|
||||
test_query("title:[* TO ddd]", 2);
|
||||
test_query("title:[* TO ddd}", 1);
|
||||
test_query("title:[* TO eee]", 2);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Doc {
|
||||
pub id_name: String,
|
||||
@@ -224,14 +300,14 @@ pub mod tests {
|
||||
fn doc_from_id_1(id: u64) -> Doc {
|
||||
let id = id * 1000;
|
||||
Doc {
|
||||
id_name: id.to_string(),
|
||||
id_name: format!("id_name{:010}", id),
|
||||
id,
|
||||
}
|
||||
}
|
||||
fn doc_from_id_2(id: u64) -> Doc {
|
||||
let id = id * 1000;
|
||||
Doc {
|
||||
id_name: (id - 1).to_string(),
|
||||
id_name: format!("id_name{:010}", id - 1),
|
||||
id,
|
||||
}
|
||||
}
|
||||
@@ -319,7 +395,8 @@ pub mod tests {
|
||||
NumericOptions::default().set_fast().set_indexed(),
|
||||
);
|
||||
|
||||
let text_field = schema_builder.add_text_field("id_name", STRING | STORED);
|
||||
let text_field = schema_builder.add_text_field("id_name", STRING | STORED | FAST);
|
||||
let text_field2 = schema_builder.add_text_field("id_name_fast", STRING | STORED | FAST);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
|
||||
@@ -338,6 +415,7 @@ pub mod tests {
|
||||
id_f64_field => doc.id as f64,
|
||||
id_i64_field => doc.id as i64,
|
||||
text_field => doc.id_name.to_string(),
|
||||
text_field2 => doc.id_name.to_string(),
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
@@ -382,6 +460,24 @@ pub mod tests {
|
||||
let query = gen_query_inclusive("ids", ids[0]..=ids[1]);
|
||||
assert_eq!(get_num_hits(query_from_text(&query)), expected_num_hits);
|
||||
|
||||
// Text query
|
||||
{
|
||||
let test_text_query = |field_name: &str| {
|
||||
let mut id_names: Vec<&str> =
|
||||
sample_docs.iter().map(|doc| doc.id_name.as_str()).collect();
|
||||
id_names.sort();
|
||||
let expected_num_hits = docs
|
||||
.iter()
|
||||
.filter(|doc| (id_names[0]..=id_names[1]).contains(&doc.id_name.as_str()))
|
||||
.count();
|
||||
let query = format!("{}:[{} TO {}]", field_name, id_names[0], id_names[1]);
|
||||
assert_eq!(get_num_hits(query_from_text(&query)), expected_num_hits);
|
||||
};
|
||||
|
||||
test_text_query("id_name");
|
||||
test_text_query("id_name_fast");
|
||||
}
|
||||
|
||||
// Exclusive range
|
||||
let expected_num_hits = docs
|
||||
.iter()
|
||||
|
||||
@@ -201,6 +201,11 @@ impl FieldType {
|
||||
matches!(self, FieldType::IpAddr(_))
|
||||
}
|
||||
|
||||
/// returns true if this is an str field
|
||||
pub fn is_str(&self) -> bool {
|
||||
matches!(self, FieldType::Str(_))
|
||||
}
|
||||
|
||||
/// returns true if this is an date field
|
||||
pub fn is_date(&self) -> bool {
|
||||
matches!(self, FieldType::Date(_))
|
||||
|
||||
@@ -56,6 +56,53 @@ impl Dictionary<VoidSSTable> {
|
||||
}
|
||||
}
|
||||
|
||||
fn map_bound<TFrom, TTo>(bound: &Bound<TFrom>, transform: impl Fn(&TFrom) -> TTo) -> Bound<TTo> {
|
||||
use self::Bound::*;
|
||||
match bound {
|
||||
Excluded(ref from_val) => Bound::Excluded(transform(from_val)),
|
||||
Included(ref from_val) => Bound::Included(transform(from_val)),
|
||||
Unbounded => Unbounded,
|
||||
}
|
||||
}
|
||||
|
||||
/// Takes a bound and transforms the inner value into a new bound via a closure.
|
||||
/// The bound variant may change by the value returned value from the closure.
|
||||
fn transform_bound_inner<TFrom, TTo>(
|
||||
bound: &Bound<TFrom>,
|
||||
transform: impl Fn(&TFrom) -> io::Result<Bound<TTo>>,
|
||||
) -> io::Result<Bound<TTo>> {
|
||||
use self::Bound::*;
|
||||
Ok(match bound {
|
||||
Excluded(ref from_val) => transform(from_val)?,
|
||||
Included(ref from_val) => transform(from_val)?,
|
||||
Unbounded => Unbounded,
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum TermOrdHit {
|
||||
/// Exact term ord hit
|
||||
Exact(TermOrdinal),
|
||||
/// Next best term ordinal
|
||||
Next(TermOrdinal),
|
||||
}
|
||||
|
||||
impl TermOrdHit {
|
||||
fn into_exact(self) -> Option<TermOrdinal> {
|
||||
match self {
|
||||
TermOrdHit::Exact(ord) => Some(ord),
|
||||
TermOrdHit::Next(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn map<F: FnOnce(TermOrdinal) -> TermOrdinal>(self, f: F) -> Self {
|
||||
match self {
|
||||
TermOrdHit::Exact(ord) => TermOrdHit::Exact(f(ord)),
|
||||
TermOrdHit::Next(ord) => TermOrdHit::Next(f(ord)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<TSSTable: SSTable> Dictionary<TSSTable> {
|
||||
pub fn builder<W: io::Write>(wrt: W) -> io::Result<crate::Writer<W, TSSTable::ValueWriter>> {
|
||||
Ok(TSSTable::writer(wrt))
|
||||
@@ -257,6 +304,17 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
|
||||
key: K,
|
||||
sstable_delta_reader: &mut DeltaReader<TSSTable::ValueReader>,
|
||||
) -> io::Result<Option<TermOrdinal>> {
|
||||
self.decode_up_to_or_next(key, sstable_delta_reader)
|
||||
.map(|hit| hit.into_exact())
|
||||
}
|
||||
/// Decode a DeltaReader up to key, returning the number of terms traversed
|
||||
///
|
||||
/// If the key was not found, it returns the next term id.
|
||||
fn decode_up_to_or_next<K: AsRef<[u8]>>(
|
||||
&self,
|
||||
key: K,
|
||||
sstable_delta_reader: &mut DeltaReader<TSSTable::ValueReader>,
|
||||
) -> io::Result<TermOrdHit> {
|
||||
let mut term_ord = 0;
|
||||
let key_bytes = key.as_ref();
|
||||
let mut ok_bytes = 0;
|
||||
@@ -265,7 +323,7 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
|
||||
let suffix = sstable_delta_reader.suffix();
|
||||
|
||||
match prefix_len.cmp(&ok_bytes) {
|
||||
Ordering::Less => return Ok(None), // popped bytes already matched => too far
|
||||
Ordering::Less => return Ok(TermOrdHit::Next(term_ord)), /* popped bytes already matched => too far */
|
||||
Ordering::Equal => (),
|
||||
Ordering::Greater => {
|
||||
// the ok prefix is less than current entry prefix => continue to next elem
|
||||
@@ -277,25 +335,26 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
|
||||
// we have ok_bytes byte of common prefix, check if this key adds more
|
||||
for (key_byte, suffix_byte) in key_bytes[ok_bytes..].iter().zip(suffix) {
|
||||
match suffix_byte.cmp(key_byte) {
|
||||
Ordering::Less => break, // byte too small
|
||||
Ordering::Equal => ok_bytes += 1, // new matching byte
|
||||
Ordering::Greater => return Ok(None), // too far
|
||||
Ordering::Less => break, // byte too small
|
||||
Ordering::Equal => ok_bytes += 1, // new matching
|
||||
// byte
|
||||
Ordering::Greater => return Ok(TermOrdHit::Next(term_ord)), // too far
|
||||
}
|
||||
}
|
||||
|
||||
if ok_bytes == key_bytes.len() {
|
||||
if prefix_len + suffix.len() == ok_bytes {
|
||||
return Ok(Some(term_ord));
|
||||
return Ok(TermOrdHit::Exact(term_ord));
|
||||
} else {
|
||||
// current key is a prefix of current element, not a match
|
||||
return Ok(None);
|
||||
return Ok(TermOrdHit::Next(term_ord));
|
||||
}
|
||||
}
|
||||
|
||||
term_ord += 1;
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
Ok(TermOrdHit::Next(term_ord))
|
||||
}
|
||||
|
||||
/// Returns the ordinal associated with a given term.
|
||||
@@ -312,6 +371,61 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
|
||||
.map(|opt| opt.map(|ord| ord + first_ordinal))
|
||||
}
|
||||
|
||||
/// Returns the ordinal associated with a given term or its closest next term_id
|
||||
/// The closest next term_id may not exist.
|
||||
pub fn term_ord_or_next<K: AsRef<[u8]>>(&self, key: K) -> io::Result<TermOrdHit> {
|
||||
let key_bytes = key.as_ref();
|
||||
|
||||
let Some(block_addr) = self.sstable_index.get_block_with_key(key_bytes) else {
|
||||
// TODO: Would be more consistent to return last_term id + 1
|
||||
return Ok(TermOrdHit::Next(u64::MAX));
|
||||
};
|
||||
|
||||
let first_ordinal = block_addr.first_ordinal;
|
||||
let mut sstable_delta_reader = self.sstable_delta_reader_block(block_addr)?;
|
||||
self.decode_up_to_or_next(key_bytes, &mut sstable_delta_reader)
|
||||
.map(|opt| opt.map(|ord| ord + first_ordinal))
|
||||
}
|
||||
|
||||
/// Converts strings into a Bound range.
|
||||
/// This does handle several special cases if the term is not exactly in the dictionary.
|
||||
/// e.g. [bbb, ddd]
|
||||
/// lower_bound: Bound::Included(aaa) => Included(0) // "Next" term id
|
||||
/// lower_bound: Bound::Excluded(aaa) => Included(0) // "Next" term id + Change the Bounds
|
||||
/// lower_bound: Bound::Included(ccc) => Included(1) // "Next" term id
|
||||
/// lower_bound: Bound::Excluded(ccc) => Included(1) // "Next" term id + Change the Bounds
|
||||
/// lower_bound: Bound::Included(zzz) => Included(2) // "Next" term id
|
||||
/// lower_bound: Bound::Excluded(zzz) => Included(2) // "Next" term id + Change the Bounds
|
||||
/// For zzz we should have some post processing to return an empty query`
|
||||
///
|
||||
/// upper_bound: Bound::Included(aaa) => Excluded(0) // "Next" term id + Change the bounds
|
||||
/// upper_bound: Bound::Excluded(aaa) => Excluded(0) // "Next" term id
|
||||
/// upper_bound: Bound::Included(ccc) => Excluded(1) // Next term id + Change the bounds
|
||||
/// upper_bound: Bound::Excluded(ccc) => Excluded(1) // Next term id
|
||||
/// upper_bound: Bound::Included(zzz) => Excluded(2) // Next term id + Change the bounds
|
||||
/// upper_bound: Bound::Excluded(zzz) => Excluded(2) // Next term id
|
||||
pub fn term_bounds_to_ord<K: AsRef<[u8]>>(
|
||||
&self,
|
||||
lower_bound: Bound<K>,
|
||||
upper_bound: Bound<K>,
|
||||
) -> io::Result<(Bound<TermOrdinal>, Bound<TermOrdinal>)> {
|
||||
let lower_bound = transform_bound_inner(&lower_bound, |start_bound_bytes| {
|
||||
let ord = self.term_ord_or_next(start_bound_bytes)?;
|
||||
match ord {
|
||||
TermOrdHit::Exact(ord) => Ok(map_bound(&lower_bound, |_| ord)),
|
||||
TermOrdHit::Next(ord) => Ok(Bound::Included(ord)), // Change bounds to included
|
||||
}
|
||||
})?;
|
||||
let upper_bound = transform_bound_inner(&upper_bound, |end_bound_bytes| {
|
||||
let ord = self.term_ord_or_next(end_bound_bytes)?;
|
||||
match ord {
|
||||
TermOrdHit::Exact(ord) => Ok(map_bound(&upper_bound, |_| ord)),
|
||||
TermOrdHit::Next(ord) => Ok(Bound::Excluded(ord)), // Change bounds to excluded
|
||||
}
|
||||
})?;
|
||||
Ok((lower_bound, upper_bound))
|
||||
}
|
||||
|
||||
/// Returns the term associated with a given term ordinal.
|
||||
///
|
||||
/// Term ordinals are defined as the position of the term in
|
||||
@@ -455,12 +569,13 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::ops::Range;
|
||||
use std::ops::{Bound, Range};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use common::OwnedBytes;
|
||||
|
||||
use super::Dictionary;
|
||||
use crate::dictionary::TermOrdHit;
|
||||
use crate::MonotonicU64SSTable;
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -524,6 +639,140 @@ mod tests {
|
||||
(dictionary, table)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_term_to_ord_or_next() {
|
||||
let dict = {
|
||||
let mut builder = Dictionary::<MonotonicU64SSTable>::builder(Vec::new()).unwrap();
|
||||
|
||||
builder.insert(b"bbb", &1).unwrap();
|
||||
builder.insert(b"ddd", &2).unwrap();
|
||||
|
||||
let table = builder.finish().unwrap();
|
||||
let table = Arc::new(PermissionedHandle::new(table));
|
||||
let slice = common::file_slice::FileSlice::new(table.clone());
|
||||
|
||||
Dictionary::<MonotonicU64SSTable>::open(slice).unwrap()
|
||||
};
|
||||
|
||||
assert_eq!(dict.term_ord_or_next(b"aaa").unwrap(), TermOrdHit::Next(0));
|
||||
assert_eq!(dict.term_ord_or_next(b"bbb").unwrap(), TermOrdHit::Exact(0));
|
||||
assert_eq!(dict.term_ord_or_next(b"bb").unwrap(), TermOrdHit::Next(0));
|
||||
assert_eq!(dict.term_ord_or_next(b"bbbb").unwrap(), TermOrdHit::Next(1));
|
||||
assert_eq!(dict.term_ord_or_next(b"dd").unwrap(), TermOrdHit::Next(1));
|
||||
assert_eq!(dict.term_ord_or_next(b"ddd").unwrap(), TermOrdHit::Exact(1));
|
||||
assert_eq!(dict.term_ord_or_next(b"dddd").unwrap(), TermOrdHit::Next(2));
|
||||
|
||||
// This is not u64::MAX because for very small sstables (only one block),
|
||||
// we don't store an index, and the pseudo-index always reply that the
|
||||
// answer lies in block number 0
|
||||
assert_eq!(
|
||||
dict.term_ord_or_next(b"zzzzzzz").unwrap(),
|
||||
TermOrdHit::Next(2)
|
||||
);
|
||||
}
|
||||
#[test]
|
||||
fn test_term_to_ord_or_next_2() {
|
||||
let dict = {
|
||||
let mut builder = Dictionary::<MonotonicU64SSTable>::builder(Vec::new()).unwrap();
|
||||
|
||||
let mut term_ord = 0;
|
||||
builder.insert(b"bbb", &term_ord).unwrap();
|
||||
|
||||
// Fill blocks in between
|
||||
for elem in 0..50_000 {
|
||||
term_ord += 1;
|
||||
let key = format!("ccccc{elem:05X}").into_bytes();
|
||||
builder.insert(&key, &term_ord).unwrap();
|
||||
}
|
||||
|
||||
term_ord += 1;
|
||||
builder.insert(b"eee", &term_ord).unwrap();
|
||||
|
||||
let table = builder.finish().unwrap();
|
||||
let table = Arc::new(PermissionedHandle::new(table));
|
||||
let slice = common::file_slice::FileSlice::new(table.clone());
|
||||
|
||||
Dictionary::<MonotonicU64SSTable>::open(slice).unwrap()
|
||||
};
|
||||
|
||||
assert_eq!(dict.term_ord(b"bbb").unwrap(), Some(0));
|
||||
assert_eq!(dict.term_ord_or_next(b"bbb").unwrap(), TermOrdHit::Exact(0));
|
||||
assert_eq!(dict.term_ord_or_next(b"aaa").unwrap(), TermOrdHit::Next(0));
|
||||
assert_eq!(dict.term_ord_or_next(b"bb").unwrap(), TermOrdHit::Next(0));
|
||||
assert_eq!(dict.term_ord_or_next(b"bbbb").unwrap(), TermOrdHit::Next(1));
|
||||
assert_eq!(
|
||||
dict.term_ord_or_next(b"ee").unwrap(),
|
||||
TermOrdHit::Next(50001)
|
||||
);
|
||||
assert_eq!(
|
||||
dict.term_ord_or_next(b"eee").unwrap(),
|
||||
TermOrdHit::Exact(50001)
|
||||
);
|
||||
assert_eq!(
|
||||
dict.term_ord_or_next(b"eeee").unwrap(),
|
||||
TermOrdHit::Next(u64::MAX)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
dict.term_ord_or_next(b"zzzzzzz").unwrap(),
|
||||
TermOrdHit::Next(u64::MAX)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_term_bounds_to_ord() {
|
||||
let dict = {
|
||||
let mut builder = Dictionary::<MonotonicU64SSTable>::builder(Vec::new()).unwrap();
|
||||
|
||||
builder.insert(b"bbb", &1).unwrap();
|
||||
builder.insert(b"ddd", &2).unwrap();
|
||||
|
||||
let table = builder.finish().unwrap();
|
||||
let table = Arc::new(PermissionedHandle::new(table));
|
||||
let slice = common::file_slice::FileSlice::new(table.clone());
|
||||
|
||||
Dictionary::<MonotonicU64SSTable>::open(slice).unwrap()
|
||||
};
|
||||
|
||||
// Test cases for lower_bound
|
||||
let test_lower_bound = |bound, expected| {
|
||||
assert_eq!(
|
||||
dict.term_bounds_to_ord::<&[u8]>(bound, Bound::Included(b"ignored"))
|
||||
.unwrap()
|
||||
.0,
|
||||
expected
|
||||
);
|
||||
};
|
||||
|
||||
test_lower_bound(Bound::Included(b"aaa".as_slice()), Bound::Included(0));
|
||||
test_lower_bound(Bound::Excluded(b"aaa".as_slice()), Bound::Included(0));
|
||||
|
||||
test_lower_bound(Bound::Included(b"bbb".as_slice()), Bound::Included(0));
|
||||
test_lower_bound(Bound::Excluded(b"bbb".as_slice()), Bound::Excluded(0));
|
||||
|
||||
test_lower_bound(Bound::Included(b"ccc".as_slice()), Bound::Included(1));
|
||||
test_lower_bound(Bound::Excluded(b"ccc".as_slice()), Bound::Included(1));
|
||||
|
||||
test_lower_bound(Bound::Included(b"zzz".as_slice()), Bound::Included(2));
|
||||
test_lower_bound(Bound::Excluded(b"zzz".as_slice()), Bound::Included(2));
|
||||
|
||||
// Test cases for upper_bound
|
||||
let test_upper_bound = |bound, expected| {
|
||||
assert_eq!(
|
||||
dict.term_bounds_to_ord::<&[u8]>(Bound::Included(b"ignored"), bound,)
|
||||
.unwrap()
|
||||
.1,
|
||||
expected
|
||||
);
|
||||
};
|
||||
test_upper_bound(Bound::Included(b"ccc".as_slice()), Bound::Excluded(1));
|
||||
test_upper_bound(Bound::Excluded(b"ccc".as_slice()), Bound::Excluded(1));
|
||||
test_upper_bound(Bound::Included(b"zzz".as_slice()), Bound::Excluded(2));
|
||||
test_upper_bound(Bound::Excluded(b"zzz".as_slice()), Bound::Excluded(2));
|
||||
test_upper_bound(Bound::Included(b"ddd".as_slice()), Bound::Included(1));
|
||||
test_upper_bound(Bound::Excluded(b"ddd".as_slice()), Bound::Excluded(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ord_term_conversion() {
|
||||
let (dic, slice) = make_test_sstable();
|
||||
|
||||
Reference in New Issue
Block a user