mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-27 20:42:54 +00:00
Compare commits
6 Commits
columnar-c
...
0.19.2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6761237ec7 | ||
|
|
3da08e92c7 | ||
|
|
6c4b8d97ed | ||
|
|
dc5f503c9a | ||
|
|
4ffcf3a933 | ||
|
|
079f542f97 |
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tantivy"
|
||||
version = "0.19.0"
|
||||
version = "0.19.2"
|
||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||
license = "MIT"
|
||||
categories = ["database-implementations", "data-structures"]
|
||||
@@ -60,7 +60,7 @@ arc-swap = "1.5.0"
|
||||
tantivy-query-grammar = { version= "0.19.0", path="./query-grammar" }
|
||||
tantivy-bitpacker = { version= "0.3", path="./bitpacker" }
|
||||
common = { version= "0.4", path = "./common/", package = "tantivy-common" }
|
||||
fastfield_codecs = { version= "0.3", path="./fastfield_codecs", default-features = false }
|
||||
fastfield_codecs = { version= "0.3.1", path="./fastfield_codecs", default-features = false }
|
||||
ownedbytes = { version= "0.4", path="./ownedbytes" }
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "fastfield_codecs"
|
||||
version = "0.3.0"
|
||||
version = "0.3.1"
|
||||
authors = ["Pascal Seitz <pascal@quickwit.io>"]
|
||||
license = "MIT"
|
||||
edition = "2021"
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::fmt::{self, Debug};
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::{Range, RangeInclusive};
|
||||
|
||||
@@ -6,7 +7,7 @@ use tantivy_bitpacker::minmax;
|
||||
use crate::monotonic_mapping::StrictlyMonotonicFn;
|
||||
|
||||
/// `Column` provides columnar access on a field.
|
||||
pub trait Column<T: PartialOrd = u64>: Send + Sync {
|
||||
pub trait Column<T: PartialOrd + Debug = u64>: Send + Sync {
|
||||
/// Return the value associated with the given idx.
|
||||
///
|
||||
/// This accessor should return as fast as possible.
|
||||
@@ -83,7 +84,7 @@ pub struct VecColumn<'a, T = u64> {
|
||||
max_value: T,
|
||||
}
|
||||
|
||||
impl<'a, C: Column<T>, T: Copy + PartialOrd> Column<T> for &'a C {
|
||||
impl<'a, C: Column<T>, T: Copy + PartialOrd + fmt::Debug> Column<T> for &'a C {
|
||||
fn get_val(&self, idx: u32) -> T {
|
||||
(*self).get_val(idx)
|
||||
}
|
||||
@@ -109,7 +110,7 @@ impl<'a, C: Column<T>, T: Copy + PartialOrd> Column<T> for &'a C {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Copy + PartialOrd + Send + Sync> Column<T> for VecColumn<'a, T> {
|
||||
impl<'a, T: Copy + PartialOrd + Send + Sync + Debug> Column<T> for VecColumn<'a, T> {
|
||||
fn get_val(&self, position: u32) -> T {
|
||||
self.values[position as usize]
|
||||
}
|
||||
@@ -177,8 +178,8 @@ pub fn monotonic_map_column<C, T, Input, Output>(
|
||||
where
|
||||
C: Column<Input>,
|
||||
T: StrictlyMonotonicFn<Input, Output> + Send + Sync,
|
||||
Input: PartialOrd + Send + Sync + Clone,
|
||||
Output: PartialOrd + Send + Sync + Clone,
|
||||
Input: PartialOrd + Send + Sync + Copy + Debug,
|
||||
Output: PartialOrd + Send + Sync + Copy + Debug,
|
||||
{
|
||||
MonotonicMappingColumn {
|
||||
from_column,
|
||||
@@ -191,8 +192,8 @@ impl<C, T, Input, Output> Column<Output> for MonotonicMappingColumn<C, T, Input>
|
||||
where
|
||||
C: Column<Input>,
|
||||
T: StrictlyMonotonicFn<Input, Output> + Send + Sync,
|
||||
Input: PartialOrd + Send + Sync + Clone,
|
||||
Output: PartialOrd + Send + Sync + Clone,
|
||||
Input: PartialOrd + Send + Sync + Copy + Debug,
|
||||
Output: PartialOrd + Send + Sync + Copy + Debug,
|
||||
{
|
||||
#[inline]
|
||||
fn get_val(&self, idx: u32) -> Output {
|
||||
@@ -228,12 +229,15 @@ where
|
||||
doc_id_range: Range<u32>,
|
||||
positions: &mut Vec<u32>,
|
||||
) {
|
||||
self.from_column.get_docids_for_value_range(
|
||||
self.monotonic_mapping.inverse(range.start().clone())
|
||||
..=self.monotonic_mapping.inverse(range.end().clone()),
|
||||
doc_id_range,
|
||||
positions,
|
||||
)
|
||||
if range.start() > &self.max_value() || range.end() < &self.min_value() {
|
||||
return;
|
||||
}
|
||||
let range = self.monotonic_mapping.inverse_coerce(range);
|
||||
if range.start() > range.end() {
|
||||
return;
|
||||
}
|
||||
self.from_column
|
||||
.get_docids_for_value_range(range, doc_id_range, positions)
|
||||
}
|
||||
|
||||
// We voluntarily do not implement get_range as it yields a regression,
|
||||
@@ -254,7 +258,7 @@ where T: Iterator + Clone + ExactSizeIterator
|
||||
impl<T> Column<T::Item> for IterColumn<T>
|
||||
where
|
||||
T: Iterator + Clone + ExactSizeIterator + Send + Sync,
|
||||
T::Item: PartialOrd,
|
||||
T::Item: PartialOrd + fmt::Debug,
|
||||
{
|
||||
fn get_val(&self, idx: u32) -> T::Item {
|
||||
self.0.clone().nth(idx as usize).unwrap()
|
||||
|
||||
@@ -455,6 +455,8 @@ impl CompactSpaceDecompressor {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use std::fmt;
|
||||
|
||||
use super::*;
|
||||
use crate::format_version::read_format_version;
|
||||
use crate::null_index_footer::read_null_index_footer;
|
||||
@@ -708,7 +710,7 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
fn get_positions_for_value_range_helper<C: Column<T> + ?Sized, T: PartialOrd>(
|
||||
fn get_positions_for_value_range_helper<C: Column<T> + ?Sized, T: PartialOrd + fmt::Debug>(
|
||||
column: &C,
|
||||
value_range: RangeInclusive<T>,
|
||||
doc_id_range: Range<u32>,
|
||||
|
||||
@@ -14,9 +14,9 @@ extern crate more_asserts;
|
||||
#[cfg(all(test, feature = "unstable"))]
|
||||
extern crate test;
|
||||
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::sync::Arc;
|
||||
use std::{fmt, io};
|
||||
|
||||
use common::BinarySerializable;
|
||||
use compact_space::CompactSpaceDecompressor;
|
||||
@@ -132,7 +132,7 @@ impl U128FastFieldCodecType {
|
||||
}
|
||||
|
||||
/// Returns the correct codec reader wrapped in the `Arc` for the data.
|
||||
pub fn open_u128<Item: MonotonicallyMappableToU128>(
|
||||
pub fn open_u128<Item: MonotonicallyMappableToU128 + fmt::Debug>(
|
||||
bytes: OwnedBytes,
|
||||
) -> io::Result<Arc<dyn Column<Item>>> {
|
||||
let (bytes, _format_version) = read_format_version(bytes)?;
|
||||
@@ -146,7 +146,9 @@ pub fn open_u128<Item: MonotonicallyMappableToU128>(
|
||||
}
|
||||
|
||||
/// Returns the correct codec reader wrapped in the `Arc` for the data.
|
||||
pub fn open<T: MonotonicallyMappableToU64>(bytes: OwnedBytes) -> io::Result<Arc<dyn Column<T>>> {
|
||||
pub fn open<T: MonotonicallyMappableToU64 + fmt::Debug>(
|
||||
bytes: OwnedBytes,
|
||||
) -> io::Result<Arc<dyn Column<T>>> {
|
||||
let (bytes, _format_version) = read_format_version(bytes)?;
|
||||
let (mut bytes, _null_index_footer) = read_null_index_footer(bytes)?;
|
||||
let header = Header::deserialize(&mut bytes)?;
|
||||
@@ -159,7 +161,7 @@ pub fn open<T: MonotonicallyMappableToU64>(bytes: OwnedBytes) -> io::Result<Arc<
|
||||
}
|
||||
}
|
||||
|
||||
fn open_specific_codec<C: FastFieldCodec, Item: MonotonicallyMappableToU64>(
|
||||
fn open_specific_codec<C: FastFieldCodec, Item: MonotonicallyMappableToU64 + fmt::Debug>(
|
||||
bytes: OwnedBytes,
|
||||
header: &Header,
|
||||
) -> io::Result<Arc<dyn Column<Item>>> {
|
||||
@@ -320,6 +322,9 @@ mod tests {
|
||||
pub fn get_codec_test_datasets() -> Vec<(Vec<u64>, &'static str)> {
|
||||
let mut data_and_names = vec![];
|
||||
|
||||
let data = vec![10];
|
||||
data_and_names.push((data, "minimal test"));
|
||||
|
||||
let data = (10..=10_000_u64).collect::<Vec<_>>();
|
||||
data_and_names.push((data, "simple monotonically increasing"));
|
||||
|
||||
@@ -327,6 +332,9 @@ mod tests {
|
||||
vec![5, 6, 7, 8, 9, 10, 99, 100],
|
||||
"offset in linear interpol",
|
||||
));
|
||||
|
||||
data_and_names.push((vec![3, 18446744073709551613, 5], "docid range regression"));
|
||||
|
||||
data_and_names.push((vec![5, 50, 3, 13, 1, 1000, 35], "rand small"));
|
||||
data_and_names.push((vec![10], "single value"));
|
||||
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
use std::fmt;
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::RangeInclusive;
|
||||
|
||||
use fastdivide::DividerU64;
|
||||
|
||||
@@ -6,7 +8,9 @@ use crate::MonotonicallyMappableToU128;
|
||||
|
||||
/// Monotonic maps a value to u64 value space.
|
||||
/// Monotonic mapping enables `PartialOrd` on u64 space without conversion to original space.
|
||||
pub trait MonotonicallyMappableToU64: 'static + PartialOrd + Copy + Send + Sync {
|
||||
pub trait MonotonicallyMappableToU64:
|
||||
'static + PartialOrd + Copy + Send + Sync + fmt::Debug
|
||||
{
|
||||
/// Converts a value to u64.
|
||||
///
|
||||
/// Internally all fast field values are encoded as u64.
|
||||
@@ -29,11 +33,29 @@ pub trait MonotonicallyMappableToU64: 'static + PartialOrd + Copy + Send + Sync
|
||||
/// mapping from their range to their domain. The `inverse` method is required when opening a codec,
|
||||
/// so a value can be converted back to its original domain (e.g. ip address or f64) from its
|
||||
/// internal representation.
|
||||
pub trait StrictlyMonotonicFn<External, Internal> {
|
||||
pub trait StrictlyMonotonicFn<External: Copy, Internal: Copy> {
|
||||
/// Strictly monotonically maps the value from External to Internal.
|
||||
fn mapping(&self, inp: External) -> Internal;
|
||||
/// Inverse of `mapping`. Maps the value from Internal to External.
|
||||
fn inverse(&self, out: Internal) -> External;
|
||||
|
||||
/// Maps a user provded value from External to Internal.
|
||||
/// It may be necessary to coerce the value if it is outside the value space.
|
||||
/// In that case it tries to find the next greater value in the value space.
|
||||
///
|
||||
/// Returns a bool to mark if a value was outside the value space and had to be coerced _up_.
|
||||
/// With that information we can detect if two values in a range both map outside the same value
|
||||
/// space.
|
||||
///
|
||||
/// coerce_up means the next valid upper value in the value space will be chosen if the value
|
||||
/// has to be coerced.
|
||||
fn mapping_coerce(&self, inp: RangeInclusive<External>) -> RangeInclusive<Internal> {
|
||||
self.mapping(*inp.start())..=self.mapping(*inp.end())
|
||||
}
|
||||
/// Inverse of `mapping_coerce`.
|
||||
fn inverse_coerce(&self, out: RangeInclusive<Internal>) -> RangeInclusive<External> {
|
||||
self.inverse(*out.start())..=self.inverse(*out.end())
|
||||
}
|
||||
}
|
||||
|
||||
/// Inverts a strictly monotonic mapping from `StrictlyMonotonicFn<A, B>` to
|
||||
@@ -54,7 +76,10 @@ impl<T> From<T> for StrictlyMonotonicMappingInverter<T> {
|
||||
}
|
||||
|
||||
impl<From, To, T> StrictlyMonotonicFn<To, From> for StrictlyMonotonicMappingInverter<T>
|
||||
where T: StrictlyMonotonicFn<From, To>
|
||||
where
|
||||
T: StrictlyMonotonicFn<From, To>,
|
||||
From: Copy,
|
||||
To: Copy,
|
||||
{
|
||||
fn mapping(&self, val: To) -> From {
|
||||
self.orig_mapping.inverse(val)
|
||||
@@ -63,6 +88,15 @@ where T: StrictlyMonotonicFn<From, To>
|
||||
fn inverse(&self, val: From) -> To {
|
||||
self.orig_mapping.mapping(val)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn mapping_coerce(&self, inp: RangeInclusive<To>) -> RangeInclusive<From> {
|
||||
self.orig_mapping.inverse_coerce(inp)
|
||||
}
|
||||
#[inline]
|
||||
fn inverse_coerce(&self, out: RangeInclusive<From>) -> RangeInclusive<To> {
|
||||
self.orig_mapping.mapping_coerce(out)
|
||||
}
|
||||
}
|
||||
|
||||
/// Applies the strictly monotonic mapping from `T` without any additional changes.
|
||||
@@ -134,6 +168,31 @@ impl<External: MonotonicallyMappableToU64> StrictlyMonotonicFn<External, u64>
|
||||
fn inverse(&self, out: u64) -> External {
|
||||
External::from_u64(self.min_value + out * self.gcd)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[allow(clippy::reversed_empty_ranges)]
|
||||
fn mapping_coerce(&self, inp: RangeInclusive<External>) -> RangeInclusive<u64> {
|
||||
let end = External::to_u64(*inp.end());
|
||||
if end < self.min_value || inp.end() < inp.start() {
|
||||
return 1..=0;
|
||||
}
|
||||
let map_coerce = |mut inp, coerce_up| {
|
||||
let inp_lower_bound = self.inverse(0);
|
||||
if inp < inp_lower_bound {
|
||||
inp = inp_lower_bound;
|
||||
}
|
||||
let val = External::to_u64(inp);
|
||||
let need_coercion = coerce_up && (val - self.min_value) % self.gcd != 0;
|
||||
let mut mapped_val = self.mapping(inp);
|
||||
if need_coercion {
|
||||
mapped_val += 1;
|
||||
}
|
||||
mapped_val
|
||||
};
|
||||
let start = map_coerce(*inp.start(), true);
|
||||
let end = map_coerce(*inp.end(), false);
|
||||
start..=end
|
||||
}
|
||||
}
|
||||
|
||||
/// Strictly monotonic mapping with a base value.
|
||||
@@ -149,6 +208,17 @@ impl StrictlyMonotonicMappingToInternalBaseval {
|
||||
impl<External: MonotonicallyMappableToU64> StrictlyMonotonicFn<External, u64>
|
||||
for StrictlyMonotonicMappingToInternalBaseval
|
||||
{
|
||||
#[inline]
|
||||
#[allow(clippy::reversed_empty_ranges)]
|
||||
fn mapping_coerce(&self, inp: RangeInclusive<External>) -> RangeInclusive<u64> {
|
||||
if External::to_u64(*inp.end()) < self.min_value {
|
||||
return 1..=0;
|
||||
}
|
||||
let start = self.mapping(External::to_u64(*inp.start()).max(self.min_value));
|
||||
let end = self.mapping(External::to_u64(*inp.end()));
|
||||
start..=end
|
||||
}
|
||||
|
||||
fn mapping(&self, val: External) -> u64 {
|
||||
External::to_u64(val) - self.min_value
|
||||
}
|
||||
@@ -224,7 +294,7 @@ mod tests {
|
||||
test_round_trip::<_, _, u64>(&mapping, 100u64);
|
||||
}
|
||||
|
||||
fn test_round_trip<T: StrictlyMonotonicFn<K, L>, K: std::fmt::Debug + Eq + Copy, L>(
|
||||
fn test_round_trip<T: StrictlyMonotonicFn<K, L>, K: std::fmt::Debug + Eq + Copy, L: Copy>(
|
||||
mapping: &T,
|
||||
test_val: K,
|
||||
) {
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
use std::fmt;
|
||||
use std::net::Ipv6Addr;
|
||||
|
||||
/// Montonic maps a value to u128 value space
|
||||
/// Monotonic mapping enables `PartialOrd` on u128 space without conversion to original space.
|
||||
pub trait MonotonicallyMappableToU128: 'static + PartialOrd + Copy + Send + Sync {
|
||||
pub trait MonotonicallyMappableToU128:
|
||||
'static + PartialOrd + Copy + Send + Sync + fmt::Debug
|
||||
{
|
||||
/// Converts a value to u128.
|
||||
///
|
||||
/// Internally all fast field values are encoded as u64.
|
||||
|
||||
@@ -17,9 +17,9 @@
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use std::io;
|
||||
use std::num::NonZeroU64;
|
||||
use std::sync::Arc;
|
||||
use std::{fmt, io};
|
||||
|
||||
use common::{BinarySerializable, VInt};
|
||||
use log::warn;
|
||||
@@ -168,7 +168,7 @@ impl BinarySerializable for Header {
|
||||
|
||||
/// Return estimated compression for given codec in the value range [0.0..1.0], where 1.0 means no
|
||||
/// compression.
|
||||
pub fn estimate<T: MonotonicallyMappableToU64>(
|
||||
pub fn estimate<T: MonotonicallyMappableToU64 + fmt::Debug>(
|
||||
typed_column: impl Column<T>,
|
||||
codec_type: FastFieldCodecType,
|
||||
) -> Option<f32> {
|
||||
@@ -214,7 +214,7 @@ pub fn serialize_u128<F: Fn() -> I, I: Iterator<Item = u128>>(
|
||||
}
|
||||
|
||||
/// Serializes the column with the codec with the best estimate on the data.
|
||||
pub fn serialize<T: MonotonicallyMappableToU64>(
|
||||
pub fn serialize<T: MonotonicallyMappableToU64 + fmt::Debug>(
|
||||
typed_column: impl Column<T>,
|
||||
output: &mut impl io::Write,
|
||||
codecs: &[FastFieldCodecType],
|
||||
@@ -294,7 +294,7 @@ fn serialize_given_codec(
|
||||
}
|
||||
|
||||
/// Helper function to serialize a column (autodetect from all codecs) and then open it
|
||||
pub fn serialize_and_load<T: MonotonicallyMappableToU64 + Ord + Default>(
|
||||
pub fn serialize_and_load<T: MonotonicallyMappableToU64 + Ord + Default + fmt::Debug>(
|
||||
column: &[T],
|
||||
) -> Arc<dyn Column<T>> {
|
||||
let mut buffer = Vec::new();
|
||||
|
||||
@@ -145,7 +145,7 @@ impl FastFieldType {
|
||||
mod tests {
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::ops::Range;
|
||||
use std::ops::{Range, RangeInclusive};
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -159,7 +159,9 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr};
|
||||
use crate::merge_policy::NoMergePolicy;
|
||||
use crate::schema::{Cardinality, Document, Field, Schema, SchemaBuilder, FAST, STRING, TEXT};
|
||||
use crate::schema::{
|
||||
Cardinality, Document, Field, Schema, SchemaBuilder, FAST, INDEXED, STRING, TEXT,
|
||||
};
|
||||
use crate::time::OffsetDateTime;
|
||||
use crate::{DateOptions, DatePrecision, Index, SegmentId, SegmentReader};
|
||||
|
||||
@@ -969,4 +971,117 @@ mod tests {
|
||||
}
|
||||
Ok(len)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_gcd_bug_regression_1757() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let num_field = schema_builder.add_u64_field("url_norm_hash", FAST | INDEXED);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
{
|
||||
let mut writer = index.writer_for_tests().unwrap();
|
||||
writer
|
||||
.add_document(doc! {
|
||||
num_field => 100u64,
|
||||
})
|
||||
.unwrap();
|
||||
writer
|
||||
.add_document(doc! {
|
||||
num_field => 200u64,
|
||||
})
|
||||
.unwrap();
|
||||
writer
|
||||
.add_document(doc! {
|
||||
num_field => 300u64,
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
writer.commit().unwrap();
|
||||
}
|
||||
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
let segment = &searcher.segment_readers()[0];
|
||||
let field = segment.fast_fields().u64(num_field).unwrap();
|
||||
|
||||
let numbers = vec![100, 200, 300];
|
||||
let test_range = |range: RangeInclusive<u64>| {
|
||||
let expexted_count = numbers.iter().filter(|num| range.contains(num)).count();
|
||||
let mut vec = vec![];
|
||||
field.get_docids_for_value_range(range, 0..u32::MAX, &mut vec);
|
||||
assert_eq!(vec.len(), expexted_count);
|
||||
};
|
||||
test_range(50..=50);
|
||||
test_range(150..=150);
|
||||
test_range(350..=350);
|
||||
test_range(100..=250);
|
||||
test_range(101..=200);
|
||||
test_range(101..=199);
|
||||
test_range(100..=300);
|
||||
test_range(100..=299);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mapping_bug_docids_for_value_range() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let num_field = schema_builder.add_u64_field("url_norm_hash", FAST | INDEXED);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
{
|
||||
// Values without gcd, but with min_value
|
||||
let mut writer = index.writer_for_tests().unwrap();
|
||||
writer
|
||||
.add_document(doc! {
|
||||
num_field => 1000u64,
|
||||
})
|
||||
.unwrap();
|
||||
writer
|
||||
.add_document(doc! {
|
||||
num_field => 1001u64,
|
||||
})
|
||||
.unwrap();
|
||||
writer
|
||||
.add_document(doc! {
|
||||
num_field => 1003u64,
|
||||
})
|
||||
.unwrap();
|
||||
writer.commit().unwrap();
|
||||
}
|
||||
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
let segment = &searcher.segment_readers()[0];
|
||||
let field = segment.fast_fields().u64(num_field).unwrap();
|
||||
|
||||
let numbers = vec![1000, 1001, 1003];
|
||||
let test_range = |range: RangeInclusive<u64>| {
|
||||
let expexted_count = numbers.iter().filter(|num| range.contains(num)).count();
|
||||
let mut vec = vec![];
|
||||
field.get_docids_for_value_range(range, 0..u32::MAX, &mut vec);
|
||||
assert_eq!(vec.len(), expexted_count);
|
||||
};
|
||||
let test_range_variant = |start, stop| {
|
||||
let start_range = start..=stop;
|
||||
test_range(start_range);
|
||||
let start_range = start..=(stop - 1);
|
||||
test_range(start_range);
|
||||
let start_range = start..=(stop + 1);
|
||||
test_range(start_range);
|
||||
let start_range = (start - 1)..=stop;
|
||||
test_range(start_range);
|
||||
let start_range = (start - 1)..=(stop - 1);
|
||||
test_range(start_range);
|
||||
let start_range = (start - 1)..=(stop + 1);
|
||||
test_range(start_range);
|
||||
let start_range = (start + 1)..=stop;
|
||||
test_range(start_range);
|
||||
let start_range = (start + 1)..=(stop - 1);
|
||||
test_range(start_range);
|
||||
let start_range = (start + 1)..=(stop + 1);
|
||||
test_range(start_range);
|
||||
};
|
||||
test_range_variant(50, 50);
|
||||
test_range_variant(1000, 1000);
|
||||
test_range_variant(1000, 1002);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::fmt;
|
||||
use std::io::{self, Write};
|
||||
|
||||
pub use fastfield_codecs::Column;
|
||||
@@ -49,7 +50,7 @@ impl CompositeFastFieldSerializer {
|
||||
|
||||
/// Serialize data into a new u64 fast field. The best compression codec will be chosen
|
||||
/// automatically.
|
||||
pub fn create_auto_detect_u64_fast_field<T: MonotonicallyMappableToU64>(
|
||||
pub fn create_auto_detect_u64_fast_field<T: MonotonicallyMappableToU64 + fmt::Debug>(
|
||||
&mut self,
|
||||
field: Field,
|
||||
fastfield_accessor: impl Column<T>,
|
||||
@@ -59,7 +60,9 @@ impl CompositeFastFieldSerializer {
|
||||
|
||||
/// Serialize data into a new u64 fast field. The best compression codec will be chosen
|
||||
/// automatically.
|
||||
pub fn create_auto_detect_u64_fast_field_with_idx<T: MonotonicallyMappableToU64>(
|
||||
pub fn create_auto_detect_u64_fast_field_with_idx<
|
||||
T: MonotonicallyMappableToU64 + fmt::Debug,
|
||||
>(
|
||||
&mut self,
|
||||
field: Field,
|
||||
fastfield_accessor: impl Column<T>,
|
||||
@@ -72,7 +75,9 @@ impl CompositeFastFieldSerializer {
|
||||
|
||||
/// Serialize data into a new u64 fast field. The best compression codec of the the provided
|
||||
/// will be chosen.
|
||||
pub fn create_auto_detect_u64_fast_field_with_idx_and_codecs<T: MonotonicallyMappableToU64>(
|
||||
pub fn create_auto_detect_u64_fast_field_with_idx_and_codecs<
|
||||
T: MonotonicallyMappableToU64 + fmt::Debug,
|
||||
>(
|
||||
&mut self,
|
||||
field: Field,
|
||||
fastfield_accessor: impl Column<T>,
|
||||
|
||||
@@ -90,7 +90,7 @@ impl CheckpointBlock {
|
||||
return Ok(());
|
||||
}
|
||||
let mut doc = read_u32_vint(data);
|
||||
let mut start_offset = read_u32_vint(data) as usize;
|
||||
let mut start_offset = VInt::deserialize_u64(data)? as usize;
|
||||
for _ in 0..len {
|
||||
let num_docs = read_u32_vint(data);
|
||||
let block_num_bytes = read_u32_vint(data) as usize;
|
||||
@@ -147,6 +147,15 @@ mod tests {
|
||||
test_aux_ser_deser(&checkpoints)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_block_serialize_large_byte_range() -> io::Result<()> {
|
||||
let checkpoints = vec![Checkpoint {
|
||||
doc_range: 10..12,
|
||||
byte_range: 8_000_000_000..9_000_000_000,
|
||||
}];
|
||||
test_aux_ser_deser(&checkpoints)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_block_serialize() -> io::Result<()> {
|
||||
let offsets: Vec<usize> = (0..11).map(|i| i * i * i).collect();
|
||||
|
||||
Reference in New Issue
Block a user