mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-27 12:32:55 +00:00
update edition to 2024 (#2620)
* update common to edition 2024 * update bitpacker to edition 2024 * update stacker to edition 2024 * update query-grammar to edition 2024 * update sstable to edition 2024 + fmt * fmt * update columnar to edition 2024 * cargo fmt * use None instead of _
This commit is contained in:
@@ -11,7 +11,7 @@ repository = "https://github.com/quickwit-oss/tantivy"
|
|||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
keywords = ["search", "information", "retrieval"]
|
keywords = ["search", "information", "retrieval"]
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
rust-version = "1.75"
|
rust-version = "1.85"
|
||||||
exclude = ["benches/*.json", "benches/*.txt"]
|
exclude = ["benches/*.json", "benches/*.txt"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy-bitpacker"
|
name = "tantivy-bitpacker"
|
||||||
version = "0.6.0"
|
version = "0.6.0"
|
||||||
edition = "2021"
|
edition = "2024"
|
||||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
categories = []
|
categories = []
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
use super::bitpacker::BitPacker;
|
use super::bitpacker::BitPacker;
|
||||||
use super::compute_num_bits;
|
use super::compute_num_bits;
|
||||||
use crate::{minmax, BitUnpacker};
|
use crate::{BitUnpacker, minmax};
|
||||||
|
|
||||||
const BLOCK_SIZE: usize = 128;
|
const BLOCK_SIZE: usize = 128;
|
||||||
|
|
||||||
|
|||||||
@@ -33,11 +33,7 @@ pub use crate::blocked_bitpacker::BlockedBitpacker;
|
|||||||
/// number of bits.
|
/// number of bits.
|
||||||
pub fn compute_num_bits(n: u64) -> u8 {
|
pub fn compute_num_bits(n: u64) -> u8 {
|
||||||
let amplitude = (64u32 - n.leading_zeros()) as u8;
|
let amplitude = (64u32 - n.leading_zeros()) as u8;
|
||||||
if amplitude <= 64 - 8 {
|
if amplitude <= 64 - 8 { amplitude } else { 64 }
|
||||||
amplitude
|
|
||||||
} else {
|
|
||||||
64
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Computes the (min, max) of an iterator of `PartialOrd` values.
|
/// Computes the (min, max) of an iterator of `PartialOrd` values.
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy-columnar"
|
name = "tantivy-columnar"
|
||||||
version = "0.3.0"
|
version = "0.3.0"
|
||||||
edition = "2021"
|
edition = "2024"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
homepage = "https://github.com/quickwit-oss/tantivy"
|
homepage = "https://github.com/quickwit-oss/tantivy"
|
||||||
repository = "https://github.com/quickwit-oss/tantivy"
|
repository = "https://github.com/quickwit-oss/tantivy"
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
use binggan::{black_box, InputGroup};
|
use binggan::{InputGroup, black_box};
|
||||||
use common::*;
|
use common::*;
|
||||||
use tantivy_columnar::Column;
|
use tantivy_columnar::Column;
|
||||||
|
|
||||||
|
|||||||
@@ -4,9 +4,9 @@ extern crate test;
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use rand::prelude::*;
|
use rand::prelude::*;
|
||||||
use tantivy_columnar::column_values::{serialize_and_load_u64_based_column_values, CodecType};
|
use tantivy_columnar::column_values::{CodecType, serialize_and_load_u64_based_column_values};
|
||||||
use tantivy_columnar::*;
|
use tantivy_columnar::*;
|
||||||
use test::{black_box, Bencher};
|
use test::{Bencher, black_box};
|
||||||
|
|
||||||
struct Columns {
|
struct Columns {
|
||||||
pub optional: Column,
|
pub optional: Column,
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
pub mod common;
|
pub mod common;
|
||||||
|
|
||||||
use binggan::BenchRunner;
|
use binggan::BenchRunner;
|
||||||
use common::{generate_columnar_with_name, Card};
|
use common::{Card, generate_columnar_with_name};
|
||||||
use tantivy_columnar::*;
|
use tantivy_columnar::*;
|
||||||
|
|
||||||
const NUM_DOCS: u32 = 100_000;
|
const NUM_DOCS: u32 = 100_000;
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ use std::sync::Arc;
|
|||||||
use common::OwnedBytes;
|
use common::OwnedBytes;
|
||||||
use rand::rngs::StdRng;
|
use rand::rngs::StdRng;
|
||||||
use rand::seq::SliceRandom;
|
use rand::seq::SliceRandom;
|
||||||
use rand::{random, Rng, SeedableRng};
|
use rand::{Rng, SeedableRng, random};
|
||||||
use tantivy_columnar::ColumnValues;
|
use tantivy_columnar::ColumnValues;
|
||||||
use test::Bencher;
|
use test::Bencher;
|
||||||
extern crate test;
|
extern crate test;
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ use std::ops::RangeInclusive;
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use rand::prelude::*;
|
use rand::prelude::*;
|
||||||
use tantivy_columnar::column_values::{serialize_and_load_u64_based_column_values, CodecType};
|
use tantivy_columnar::column_values::{CodecType, serialize_and_load_u64_based_column_values};
|
||||||
use tantivy_columnar::*;
|
use tantivy_columnar::*;
|
||||||
use test::Bencher;
|
use test::Bencher;
|
||||||
|
|
||||||
|
|||||||
@@ -66,7 +66,7 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
|
|||||||
&'a self,
|
&'a self,
|
||||||
docs: &'a [u32],
|
docs: &'a [u32],
|
||||||
accessor: &Column<T>,
|
accessor: &Column<T>,
|
||||||
) -> impl Iterator<Item = (DocId, T)> + 'a {
|
) -> impl Iterator<Item = (DocId, T)> + 'a + use<'a, T> {
|
||||||
if accessor.index.get_cardinality().is_full() {
|
if accessor.index.get_cardinality().is_full() {
|
||||||
docs.iter().cloned().zip(self.val_cache.iter().cloned())
|
docs.iter().cloned().zip(self.val_cache.iter().cloned())
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -4,8 +4,8 @@ use std::{fmt, io};
|
|||||||
|
|
||||||
use sstable::{Dictionary, VoidSSTable};
|
use sstable::{Dictionary, VoidSSTable};
|
||||||
|
|
||||||
use crate::column::Column;
|
|
||||||
use crate::RowId;
|
use crate::RowId;
|
||||||
|
use crate::column::Column;
|
||||||
|
|
||||||
/// Dictionary encoded column.
|
/// Dictionary encoded column.
|
||||||
///
|
///
|
||||||
|
|||||||
@@ -9,13 +9,14 @@ use std::sync::Arc;
|
|||||||
use common::BinarySerializable;
|
use common::BinarySerializable;
|
||||||
pub use dictionary_encoded::{BytesColumn, StrColumn};
|
pub use dictionary_encoded::{BytesColumn, StrColumn};
|
||||||
pub use serialize::{
|
pub use serialize::{
|
||||||
open_column_bytes, open_column_str, open_column_u128, open_column_u128_as_compact_u64,
|
open_column_bytes, open_column_str, open_column_u64, open_column_u128,
|
||||||
open_column_u64, serialize_column_mappable_to_u128, serialize_column_mappable_to_u64,
|
open_column_u128_as_compact_u64, serialize_column_mappable_to_u64,
|
||||||
|
serialize_column_mappable_to_u128,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::column_index::{ColumnIndex, Set};
|
use crate::column_index::{ColumnIndex, Set};
|
||||||
use crate::column_values::monotonic_mapping::StrictlyMonotonicMappingToInternal;
|
use crate::column_values::monotonic_mapping::StrictlyMonotonicMappingToInternal;
|
||||||
use crate::column_values::{monotonic_map_column, ColumnValues};
|
use crate::column_values::{ColumnValues, monotonic_map_column};
|
||||||
use crate::{Cardinality, DocId, EmptyColumnValues, MonotonicallyMappableToU64, RowId};
|
use crate::{Cardinality, DocId, EmptyColumnValues, MonotonicallyMappableToU64, RowId};
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
|
|||||||
@@ -6,10 +6,10 @@ use common::OwnedBytes;
|
|||||||
use sstable::Dictionary;
|
use sstable::Dictionary;
|
||||||
|
|
||||||
use crate::column::{BytesColumn, Column};
|
use crate::column::{BytesColumn, Column};
|
||||||
use crate::column_index::{serialize_column_index, SerializableColumnIndex};
|
use crate::column_index::{SerializableColumnIndex, serialize_column_index};
|
||||||
use crate::column_values::{
|
use crate::column_values::{
|
||||||
|
CodecType, MonotonicallyMappableToU64, MonotonicallyMappableToU128,
|
||||||
load_u64_based_column_values, serialize_column_values_u128, serialize_u64_based_column_values,
|
load_u64_based_column_values, serialize_column_values_u128, serialize_u64_based_column_values,
|
||||||
CodecType, MonotonicallyMappableToU128, MonotonicallyMappableToU64,
|
|
||||||
};
|
};
|
||||||
use crate::iterable::Iterable;
|
use crate::iterable::Iterable;
|
||||||
use crate::{StrColumn, Version};
|
use crate::{StrColumn, Version};
|
||||||
|
|||||||
@@ -99,9 +99,9 @@ mod tests {
|
|||||||
|
|
||||||
use crate::column_index::merge::detect_cardinality;
|
use crate::column_index::merge::detect_cardinality;
|
||||||
use crate::column_index::multivalued_index::{
|
use crate::column_index::multivalued_index::{
|
||||||
open_multivalued_index, serialize_multivalued_index, MultiValueIndex,
|
MultiValueIndex, open_multivalued_index, serialize_multivalued_index,
|
||||||
};
|
};
|
||||||
use crate::column_index::{merge_column_index, OptionalIndex, SerializableColumnIndex};
|
use crate::column_index::{OptionalIndex, SerializableColumnIndex, merge_column_index};
|
||||||
use crate::{
|
use crate::{
|
||||||
Cardinality, ColumnIndex, MergeRowOrder, RowAddr, RowId, ShuffleMergeOrder, StackMergeOrder,
|
Cardinality, ColumnIndex, MergeRowOrder, RowAddr, RowId, ShuffleMergeOrder, StackMergeOrder,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -137,8 +137,8 @@ impl Iterable<u32> for ShuffledMultivaluedIndex<'_> {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::column_index::OptionalIndex;
|
|
||||||
use crate::RowAddr;
|
use crate::RowAddr;
|
||||||
|
use crate::column_index::OptionalIndex;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_integrate_num_vals_empty() {
|
fn test_integrate_num_vals_empty() {
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
|
|
||||||
|
use crate::column_index::SerializableColumnIndex;
|
||||||
use crate::column_index::multivalued_index::{MultiValueIndex, SerializableMultivalueIndex};
|
use crate::column_index::multivalued_index::{MultiValueIndex, SerializableMultivalueIndex};
|
||||||
use crate::column_index::serialize::SerializableOptionalIndex;
|
use crate::column_index::serialize::SerializableOptionalIndex;
|
||||||
use crate::column_index::SerializableColumnIndex;
|
|
||||||
use crate::iterable::Iterable;
|
use crate::iterable::Iterable;
|
||||||
use crate::{Cardinality, ColumnIndex, RowId, StackMergeOrder};
|
use crate::{Cardinality, ColumnIndex, RowId, StackMergeOrder};
|
||||||
|
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ pub use merge::merge_column_index;
|
|||||||
pub(crate) use multivalued_index::SerializableMultivalueIndex;
|
pub(crate) use multivalued_index::SerializableMultivalueIndex;
|
||||||
pub use optional_index::{OptionalIndex, Set};
|
pub use optional_index::{OptionalIndex, Set};
|
||||||
pub use serialize::{
|
pub use serialize::{
|
||||||
open_column_index, serialize_column_index, SerializableColumnIndex, SerializableOptionalIndex,
|
SerializableColumnIndex, SerializableOptionalIndex, open_column_index, serialize_column_index,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::column_index::multivalued_index::MultiValueIndex;
|
use crate::column_index::multivalued_index::MultiValueIndex;
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ use common::{CountingWriter, OwnedBytes};
|
|||||||
use super::optional_index::{open_optional_index, serialize_optional_index};
|
use super::optional_index::{open_optional_index, serialize_optional_index};
|
||||||
use super::{OptionalIndex, SerializableOptionalIndex, Set};
|
use super::{OptionalIndex, SerializableOptionalIndex, Set};
|
||||||
use crate::column_values::{
|
use crate::column_values::{
|
||||||
load_u64_based_column_values, serialize_u64_based_column_values, CodecType, ColumnValues,
|
CodecType, ColumnValues, load_u64_based_column_values, serialize_u64_based_column_values,
|
||||||
};
|
};
|
||||||
use crate::iterable::Iterable;
|
use crate::iterable::Iterable;
|
||||||
use crate::{DocId, RowId, Version};
|
use crate::{DocId, RowId, Version};
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ mod set_block;
|
|||||||
use common::{BinarySerializable, OwnedBytes, VInt};
|
use common::{BinarySerializable, OwnedBytes, VInt};
|
||||||
pub use set::{SelectCursor, Set, SetCodec};
|
pub use set::{SelectCursor, Set, SetCodec};
|
||||||
use set_block::{
|
use set_block::{
|
||||||
DenseBlock, DenseBlockCodec, SparseBlock, SparseBlockCodec, DENSE_BLOCK_NUM_BYTES,
|
DENSE_BLOCK_NUM_BYTES, DenseBlock, DenseBlockCodec, SparseBlock, SparseBlockCodec,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::iterable::Iterable;
|
use crate::iterable::Iterable;
|
||||||
@@ -259,11 +259,13 @@ impl Set<RowId> for OptionalIndex {
|
|||||||
|
|
||||||
impl OptionalIndex {
|
impl OptionalIndex {
|
||||||
pub fn for_test(num_rows: RowId, row_ids: &[RowId]) -> OptionalIndex {
|
pub fn for_test(num_rows: RowId, row_ids: &[RowId]) -> OptionalIndex {
|
||||||
assert!(row_ids
|
assert!(
|
||||||
.last()
|
row_ids
|
||||||
.copied()
|
.last()
|
||||||
.map(|last_row_id| last_row_id < num_rows)
|
.copied()
|
||||||
.unwrap_or(true));
|
.map(|last_row_id| last_row_id < num_rows)
|
||||||
|
.unwrap_or(true)
|
||||||
|
);
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
serialize_optional_index(&row_ids, num_rows, &mut buffer).unwrap();
|
serialize_optional_index(&row_ids, num_rows, &mut buffer).unwrap();
|
||||||
let bytes = OwnedBytes::new(buffer);
|
let bytes = OwnedBytes::new(buffer);
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ use std::io::{self, Write};
|
|||||||
|
|
||||||
use common::BinarySerializable;
|
use common::BinarySerializable;
|
||||||
|
|
||||||
use crate::column_index::optional_index::{SelectCursor, Set, SetCodec, ELEMENTS_PER_BLOCK};
|
use crate::column_index::optional_index::{ELEMENTS_PER_BLOCK, SelectCursor, Set, SetCodec};
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn get_bit_at(input: u64, n: u16) -> bool {
|
fn get_bit_at(input: u64, n: u16) -> bool {
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
mod dense;
|
mod dense;
|
||||||
mod sparse;
|
mod sparse;
|
||||||
|
|
||||||
pub use dense::{DenseBlock, DenseBlockCodec, DENSE_BLOCK_NUM_BYTES};
|
pub use dense::{DENSE_BLOCK_NUM_BYTES, DenseBlock, DenseBlockCodec};
|
||||||
pub use sparse::{SparseBlock, SparseBlockCodec};
|
pub use sparse::{SparseBlock, SparseBlockCodec};
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|||||||
@@ -254,11 +254,7 @@ mod bench {
|
|||||||
let mut current = start;
|
let mut current = start;
|
||||||
std::iter::from_fn(move || {
|
std::iter::from_fn(move || {
|
||||||
current += rng.gen_range(avg_step_size - avg_deviation..=avg_step_size + avg_deviation);
|
current += rng.gen_range(avg_step_size - avg_deviation..=avg_step_size + avg_deviation);
|
||||||
if current >= end {
|
if current >= end { None } else { Some(current) }
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(current)
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -3,11 +3,11 @@ use std::io::Write;
|
|||||||
|
|
||||||
use common::{CountingWriter, OwnedBytes};
|
use common::{CountingWriter, OwnedBytes};
|
||||||
|
|
||||||
use super::multivalued_index::SerializableMultivalueIndex;
|
|
||||||
use super::OptionalIndex;
|
use super::OptionalIndex;
|
||||||
|
use super::multivalued_index::SerializableMultivalueIndex;
|
||||||
|
use crate::column_index::ColumnIndex;
|
||||||
use crate::column_index::multivalued_index::serialize_multivalued_index;
|
use crate::column_index::multivalued_index::serialize_multivalued_index;
|
||||||
use crate::column_index::optional_index::serialize_optional_index;
|
use crate::column_index::optional_index::serialize_optional_index;
|
||||||
use crate::column_index::ColumnIndex;
|
|
||||||
use crate::iterable::Iterable;
|
use crate::iterable::Iterable;
|
||||||
use crate::{Cardinality, RowId, Version};
|
use crate::{Cardinality, RowId, Version};
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ use crate::column_values::u64_based::*;
|
|||||||
fn get_data() -> Vec<u64> {
|
fn get_data() -> Vec<u64> {
|
||||||
let mut rng = StdRng::seed_from_u64(2u64);
|
let mut rng = StdRng::seed_from_u64(2u64);
|
||||||
let mut data: Vec<_> = (100..55000_u64)
|
let mut data: Vec<_> = (100..55000_u64)
|
||||||
.map(|num| num + rng.gen::<u8>() as u64)
|
.map(|num| num + rng.r#gen::<u8>() as u64)
|
||||||
.collect();
|
.collect();
|
||||||
data.push(99_000);
|
data.push(99_000);
|
||||||
data.insert(1000, 2000);
|
data.insert(1000, 2000);
|
||||||
|
|||||||
@@ -26,13 +26,13 @@ mod monotonic_column;
|
|||||||
|
|
||||||
pub(crate) use merge::MergedColumnValues;
|
pub(crate) use merge::MergedColumnValues;
|
||||||
pub use stats::ColumnStats;
|
pub use stats::ColumnStats;
|
||||||
pub use u128_based::{
|
|
||||||
open_u128_as_compact_u64, open_u128_mapped, serialize_column_values_u128,
|
|
||||||
CompactSpaceU64Accessor,
|
|
||||||
};
|
|
||||||
pub use u64_based::{
|
pub use u64_based::{
|
||||||
load_u64_based_column_values, serialize_and_load_u64_based_column_values,
|
ALL_U64_CODEC_TYPES, CodecType, load_u64_based_column_values,
|
||||||
serialize_u64_based_column_values, CodecType, ALL_U64_CODEC_TYPES,
|
serialize_and_load_u64_based_column_values, serialize_u64_based_column_values,
|
||||||
|
};
|
||||||
|
pub use u128_based::{
|
||||||
|
CompactSpaceU64Accessor, open_u128_as_compact_u64, open_u128_mapped,
|
||||||
|
serialize_column_values_u128,
|
||||||
};
|
};
|
||||||
pub use vec_column::VecColumn;
|
pub use vec_column::VecColumn;
|
||||||
|
|
||||||
|
|||||||
@@ -2,8 +2,8 @@ use std::fmt::Debug;
|
|||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
use std::ops::{Range, RangeInclusive};
|
use std::ops::{Range, RangeInclusive};
|
||||||
|
|
||||||
use crate::column_values::monotonic_mapping::StrictlyMonotonicFn;
|
|
||||||
use crate::ColumnValues;
|
use crate::ColumnValues;
|
||||||
|
use crate::column_values::monotonic_mapping::StrictlyMonotonicFn;
|
||||||
|
|
||||||
struct MonotonicMappingColumn<C, T, Input> {
|
struct MonotonicMappingColumn<C, T, Input> {
|
||||||
from_column: C,
|
from_column: C,
|
||||||
@@ -99,10 +99,10 @@ where
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::column_values::VecColumn;
|
||||||
use crate::column_values::monotonic_mapping::{
|
use crate::column_values::monotonic_mapping::{
|
||||||
StrictlyMonotonicMappingInverter, StrictlyMonotonicMappingToInternal,
|
StrictlyMonotonicMappingInverter, StrictlyMonotonicMappingToInternal,
|
||||||
};
|
};
|
||||||
use crate::column_values::VecColumn;
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_monotonic_mapping_iter() {
|
fn test_monotonic_mapping_iter() {
|
||||||
|
|||||||
@@ -24,8 +24,8 @@ use build_compact_space::get_compact_space;
|
|||||||
use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt, VIntU128};
|
use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt, VIntU128};
|
||||||
use tantivy_bitpacker::{BitPacker, BitUnpacker};
|
use tantivy_bitpacker::{BitPacker, BitUnpacker};
|
||||||
|
|
||||||
use crate::column_values::ColumnValues;
|
|
||||||
use crate::RowId;
|
use crate::RowId;
|
||||||
|
use crate::column_values::ColumnValues;
|
||||||
|
|
||||||
/// The cost per blank is quite hard actually, since blanks are delta encoded, the actual cost of
|
/// The cost per blank is quite hard actually, since blanks are delta encoded, the actual cost of
|
||||||
/// blanks depends on the number of blanks.
|
/// blanks depends on the number of blanks.
|
||||||
@@ -653,12 +653,14 @@ mod tests {
|
|||||||
),
|
),
|
||||||
&[3]
|
&[3]
|
||||||
);
|
);
|
||||||
assert!(get_positions_for_value_range_helper(
|
assert!(
|
||||||
&decomp,
|
get_positions_for_value_range_helper(
|
||||||
99998u128..=99998u128,
|
&decomp,
|
||||||
complete_range.clone()
|
99998u128..=99998u128,
|
||||||
)
|
complete_range.clone()
|
||||||
.is_empty());
|
)
|
||||||
|
.is_empty()
|
||||||
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&get_positions_for_value_range_helper(
|
&get_positions_for_value_range_helper(
|
||||||
&decomp,
|
&decomp,
|
||||||
|
|||||||
@@ -130,11 +130,11 @@ pub fn open_u128_as_compact_u64(mut bytes: OwnedBytes) -> io::Result<Arc<dyn Col
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) mod tests {
|
pub(crate) mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::column_values::u64_based::{
|
|
||||||
serialize_and_load_u64_based_column_values, serialize_u64_based_column_values,
|
|
||||||
ALL_U64_CODEC_TYPES,
|
|
||||||
};
|
|
||||||
use crate::column_values::CodecType;
|
use crate::column_values::CodecType;
|
||||||
|
use crate::column_values::u64_based::{
|
||||||
|
ALL_U64_CODEC_TYPES, serialize_and_load_u64_based_column_values,
|
||||||
|
serialize_u64_based_column_values,
|
||||||
|
};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_serialize_deserialize_u128_header() {
|
fn test_serialize_deserialize_u128_header() {
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ use std::ops::{Range, RangeInclusive};
|
|||||||
|
|
||||||
use common::{BinarySerializable, OwnedBytes};
|
use common::{BinarySerializable, OwnedBytes};
|
||||||
use fastdivide::DividerU64;
|
use fastdivide::DividerU64;
|
||||||
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits};
|
||||||
|
|
||||||
use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
|
use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
|
||||||
use crate::{ColumnValues, RowId};
|
use crate::{ColumnValues, RowId};
|
||||||
@@ -23,11 +23,7 @@ const fn div_ceil(n: u64, q: NonZeroU64) -> u64 {
|
|||||||
// copied from unstable rust standard library.
|
// copied from unstable rust standard library.
|
||||||
let d = n / q.get();
|
let d = n / q.get();
|
||||||
let r = n % q.get();
|
let r = n % q.get();
|
||||||
if r > 0 {
|
if r > 0 { d + 1 } else { d }
|
||||||
d + 1
|
|
||||||
} else {
|
|
||||||
d
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// The bitpacked codec applies a linear transformation `f` over data that are bitpacked.
|
// The bitpacked codec applies a linear transformation `f` over data that are bitpacked.
|
||||||
|
|||||||
@@ -4,12 +4,12 @@ use std::{io, iter};
|
|||||||
|
|
||||||
use common::{BinarySerializable, CountingWriter, DeserializeFrom, OwnedBytes};
|
use common::{BinarySerializable, CountingWriter, DeserializeFrom, OwnedBytes};
|
||||||
use fastdivide::DividerU64;
|
use fastdivide::DividerU64;
|
||||||
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits};
|
||||||
|
|
||||||
|
use crate::MonotonicallyMappableToU64;
|
||||||
use crate::column_values::u64_based::line::Line;
|
use crate::column_values::u64_based::line::Line;
|
||||||
use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
|
use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
|
||||||
use crate::column_values::{ColumnValues, VecColumn};
|
use crate::column_values::{ColumnValues, VecColumn};
|
||||||
use crate::MonotonicallyMappableToU64;
|
|
||||||
|
|
||||||
const BLOCK_SIZE: u32 = 512u32;
|
const BLOCK_SIZE: u32 = 512u32;
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
use std::io;
|
use std::io;
|
||||||
|
|
||||||
use common::{BinarySerializable, OwnedBytes};
|
use common::{BinarySerializable, OwnedBytes};
|
||||||
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits};
|
||||||
|
|
||||||
use super::line::Line;
|
|
||||||
use super::ColumnValues;
|
use super::ColumnValues;
|
||||||
use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
|
use super::line::Line;
|
||||||
use crate::column_values::VecColumn;
|
|
||||||
use crate::RowId;
|
use crate::RowId;
|
||||||
|
use crate::column_values::VecColumn;
|
||||||
|
use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
|
||||||
|
|
||||||
const HALF_SPACE: u64 = u64::MAX / 2;
|
const HALF_SPACE: u64 = u64::MAX / 2;
|
||||||
const LINE_ESTIMATION_BLOCK_LEN: usize = 512;
|
const LINE_ESTIMATION_BLOCK_LEN: usize = 512;
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ pub use crate::column_values::u64_based::bitpacked::BitpackedCodec;
|
|||||||
pub use crate::column_values::u64_based::blockwise_linear::BlockwiseLinearCodec;
|
pub use crate::column_values::u64_based::blockwise_linear::BlockwiseLinearCodec;
|
||||||
pub use crate::column_values::u64_based::linear::LinearCodec;
|
pub use crate::column_values::u64_based::linear::LinearCodec;
|
||||||
pub use crate::column_values::u64_based::stats_collector::StatsCollector;
|
pub use crate::column_values::u64_based::stats_collector::StatsCollector;
|
||||||
use crate::column_values::{monotonic_map_column, ColumnStats};
|
use crate::column_values::{ColumnStats, monotonic_map_column};
|
||||||
use crate::iterable::Iterable;
|
use crate::iterable::Iterable;
|
||||||
use crate::{ColumnValues, MonotonicallyMappableToU64};
|
use crate::{ColumnValues, MonotonicallyMappableToU64};
|
||||||
|
|
||||||
|
|||||||
@@ -2,8 +2,8 @@ use std::num::NonZeroU64;
|
|||||||
|
|
||||||
use fastdivide::DividerU64;
|
use fastdivide::DividerU64;
|
||||||
|
|
||||||
use crate::column_values::ColumnStats;
|
|
||||||
use crate::RowId;
|
use crate::RowId;
|
||||||
|
use crate::column_values::ColumnStats;
|
||||||
|
|
||||||
/// Compute the gcd of two non null numbers.
|
/// Compute the gcd of two non null numbers.
|
||||||
///
|
///
|
||||||
@@ -96,8 +96,8 @@ impl StatsCollector {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::num::NonZeroU64;
|
use std::num::NonZeroU64;
|
||||||
|
|
||||||
use crate::column_values::u64_based::stats_collector::{compute_gcd, StatsCollector};
|
|
||||||
use crate::column_values::u64_based::ColumnStats;
|
use crate::column_values::u64_based::ColumnStats;
|
||||||
|
use crate::column_values::u64_based::stats_collector::{StatsCollector, compute_gcd};
|
||||||
|
|
||||||
fn compute_stats(vals: impl Iterator<Item = u64>) -> ColumnStats {
|
fn compute_stats(vals: impl Iterator<Item = u64>) -> ColumnStats {
|
||||||
let mut stats_collector = StatsCollector::default();
|
let mut stats_collector = StatsCollector::default();
|
||||||
|
|||||||
@@ -4,8 +4,8 @@ use std::net::Ipv6Addr;
|
|||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::value::NumericalType;
|
|
||||||
use crate::InvalidData;
|
use crate::InvalidData;
|
||||||
|
use crate::value::NumericalType;
|
||||||
|
|
||||||
/// The column type represents the column type.
|
/// The column type represents the column type.
|
||||||
/// Any changes need to be propagated to `COLUMN_TYPES`.
|
/// Any changes need to be propagated to `COLUMN_TYPES`.
|
||||||
|
|||||||
@@ -10,11 +10,11 @@ use std::sync::Arc;
|
|||||||
pub use merge_mapping::{MergeRowOrder, ShuffleMergeOrder, StackMergeOrder};
|
pub use merge_mapping::{MergeRowOrder, ShuffleMergeOrder, StackMergeOrder};
|
||||||
|
|
||||||
use super::writer::ColumnarSerializer;
|
use super::writer::ColumnarSerializer;
|
||||||
use crate::column::{serialize_column_mappable_to_u128, serialize_column_mappable_to_u64};
|
use crate::column::{serialize_column_mappable_to_u64, serialize_column_mappable_to_u128};
|
||||||
use crate::column_values::MergedColumnValues;
|
use crate::column_values::MergedColumnValues;
|
||||||
|
use crate::columnar::ColumnarReader;
|
||||||
use crate::columnar::merge::merge_dict_column::merge_bytes_or_str_column;
|
use crate::columnar::merge::merge_dict_column::merge_bytes_or_str_column;
|
||||||
use crate::columnar::writer::CompatibleNumericalTypes;
|
use crate::columnar::writer::CompatibleNumericalTypes;
|
||||||
use crate::columnar::ColumnarReader;
|
|
||||||
use crate::dynamic_column::DynamicColumn;
|
use crate::dynamic_column::DynamicColumn;
|
||||||
use crate::{
|
use crate::{
|
||||||
BytesColumn, Column, ColumnIndex, ColumnType, ColumnValues, DynamicColumnHandle, NumericalType,
|
BytesColumn, Column, ColumnIndex, ColumnType, ColumnValues, DynamicColumnHandle, NumericalType,
|
||||||
@@ -144,16 +144,17 @@ fn merge_column(
|
|||||||
let mut column_values: Vec<Option<Arc<dyn ColumnValues>>> =
|
let mut column_values: Vec<Option<Arc<dyn ColumnValues>>> =
|
||||||
Vec::with_capacity(columns_to_merge.len());
|
Vec::with_capacity(columns_to_merge.len());
|
||||||
for (i, dynamic_column_opt) in columns_to_merge.into_iter().enumerate() {
|
for (i, dynamic_column_opt) in columns_to_merge.into_iter().enumerate() {
|
||||||
if let Some(Column { index: idx, values }) =
|
match dynamic_column_opt.and_then(dynamic_column_to_u64_monotonic) {
|
||||||
dynamic_column_opt.and_then(dynamic_column_to_u64_monotonic)
|
Some(Column { index: idx, values }) => {
|
||||||
{
|
column_indexes.push(idx);
|
||||||
column_indexes.push(idx);
|
column_values.push(Some(values));
|
||||||
column_values.push(Some(values));
|
}
|
||||||
} else {
|
None => {
|
||||||
column_indexes.push(ColumnIndex::Empty {
|
column_indexes.push(ColumnIndex::Empty {
|
||||||
num_docs: num_docs_per_column[i],
|
num_docs: num_docs_per_column[i],
|
||||||
});
|
});
|
||||||
column_values.push(None);
|
column_values.push(None);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let merged_column_index =
|
let merged_column_index =
|
||||||
@@ -253,11 +254,13 @@ impl GroupedColumns {
|
|||||||
}
|
}
|
||||||
// At the moment, only the numerical column type category has more than one possible
|
// At the moment, only the numerical column type category has more than one possible
|
||||||
// column type.
|
// column type.
|
||||||
assert!(self
|
assert!(
|
||||||
.columns
|
self.columns
|
||||||
.iter()
|
.iter()
|
||||||
.flatten()
|
.flatten()
|
||||||
.all(|el| ColumnTypeCategory::from(el.column_type()) == ColumnTypeCategory::Numerical));
|
.all(|el| ColumnTypeCategory::from(el.column_type())
|
||||||
|
== ColumnTypeCategory::Numerical)
|
||||||
|
);
|
||||||
merged_numerical_columns_type(self.columns.iter().flatten()).into()
|
merged_numerical_columns_type(self.columns.iter().flatten()).into()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -74,18 +74,19 @@ impl<'a> TermMerger<'a> {
|
|||||||
/// False if there is none.
|
/// False if there is none.
|
||||||
pub fn advance(&mut self) -> bool {
|
pub fn advance(&mut self) -> bool {
|
||||||
self.advance_segments();
|
self.advance_segments();
|
||||||
if let Some(head) = self.heap.pop() {
|
match self.heap.pop() {
|
||||||
self.term_streams_with_segment.push(head);
|
Some(head) => {
|
||||||
while let Some(next_streamer) = self.heap.peek() {
|
self.term_streams_with_segment.push(head);
|
||||||
if self.term_streams_with_segment[0].terms.key() != next_streamer.terms.key() {
|
while let Some(next_streamer) = self.heap.peek() {
|
||||||
break;
|
if self.term_streams_with_segment[0].terms.key() != next_streamer.terms.key() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
let next_heap_it = self.heap.pop().unwrap(); // safe : we peeked beforehand
|
||||||
|
self.term_streams_with_segment.push(next_heap_it);
|
||||||
}
|
}
|
||||||
let next_heap_it = self.heap.pop().unwrap(); // safe : we peeked beforehand
|
true
|
||||||
self.term_streams_with_segment.push(next_heap_it);
|
|
||||||
}
|
}
|
||||||
true
|
_ => false,
|
||||||
} else {
|
|
||||||
false
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ use proptest::collection::vec;
|
|||||||
use proptest::prelude::*;
|
use proptest::prelude::*;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::columnar::{merge_columnar, ColumnarReader, MergeRowOrder, StackMergeOrder};
|
use crate::columnar::{ColumnarReader, MergeRowOrder, StackMergeOrder, merge_columnar};
|
||||||
use crate::{Cardinality, ColumnarWriter, DynamicColumn, HasAssociatedColumnType, RowId};
|
use crate::{Cardinality, ColumnarWriter, DynamicColumn, HasAssociatedColumnType, RowId};
|
||||||
|
|
||||||
fn make_columnar<T: Into<NumericalValue> + HasAssociatedColumnType + Copy>(
|
fn make_columnar<T: Into<NumericalValue> + HasAssociatedColumnType + Copy>(
|
||||||
|
|||||||
@@ -5,9 +5,9 @@ mod reader;
|
|||||||
mod writer;
|
mod writer;
|
||||||
|
|
||||||
pub use column_type::{ColumnType, HasAssociatedColumnType};
|
pub use column_type::{ColumnType, HasAssociatedColumnType};
|
||||||
pub use format_version::{Version, CURRENT_VERSION};
|
pub use format_version::{CURRENT_VERSION, Version};
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) use merge::ColumnTypeCategory;
|
pub(crate) use merge::ColumnTypeCategory;
|
||||||
pub use merge::{merge_columnar, MergeRowOrder, ShuffleMergeOrder, StackMergeOrder};
|
pub use merge::{MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, merge_columnar};
|
||||||
pub use reader::ColumnarReader;
|
pub use reader::ColumnarReader;
|
||||||
pub use writer::ColumnarWriter;
|
pub use writer::ColumnarWriter;
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
use std::{fmt, io, mem};
|
use std::{fmt, io, mem};
|
||||||
|
|
||||||
|
use common::BinarySerializable;
|
||||||
use common::file_slice::FileSlice;
|
use common::file_slice::FileSlice;
|
||||||
use common::json_path_writer::JSON_PATH_SEGMENT_SEP;
|
use common::json_path_writer::JSON_PATH_SEGMENT_SEP;
|
||||||
use common::BinarySerializable;
|
|
||||||
use sstable::{Dictionary, RangeSSTable};
|
use sstable::{Dictionary, RangeSSTable};
|
||||||
|
|
||||||
use crate::columnar::{format_version, ColumnType};
|
use crate::columnar::{ColumnType, format_version};
|
||||||
use crate::dynamic_column::DynamicColumnHandle;
|
use crate::dynamic_column::DynamicColumnHandle;
|
||||||
use crate::{RowId, Version};
|
use crate::{RowId, Version};
|
||||||
|
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ impl ColumnWriter {
|
|||||||
&self,
|
&self,
|
||||||
arena: &MemoryArena,
|
arena: &MemoryArena,
|
||||||
buffer: &'a mut Vec<u8>,
|
buffer: &'a mut Vec<u8>,
|
||||||
) -> impl Iterator<Item = ColumnOperation<V>> + 'a {
|
) -> impl Iterator<Item = ColumnOperation<V>> + 'a + use<'a, V> {
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
self.values.read_to_end(arena, buffer);
|
self.values.read_to_end(arena, buffer);
|
||||||
let mut cursor: &[u8] = &buffer[..];
|
let mut cursor: &[u8] = &buffer[..];
|
||||||
@@ -104,9 +104,10 @@ pub(crate) struct NumericalColumnWriter {
|
|||||||
|
|
||||||
impl NumericalColumnWriter {
|
impl NumericalColumnWriter {
|
||||||
pub fn force_numerical_type(&mut self, numerical_type: NumericalType) {
|
pub fn force_numerical_type(&mut self, numerical_type: NumericalType) {
|
||||||
assert!(self
|
assert!(
|
||||||
.compatible_numerical_types
|
self.compatible_numerical_types
|
||||||
.is_type_accepted(numerical_type));
|
.is_type_accepted(numerical_type)
|
||||||
|
);
|
||||||
self.compatible_numerical_types = CompatibleNumericalTypes::StaticType(numerical_type);
|
self.compatible_numerical_types = CompatibleNumericalTypes::StaticType(numerical_type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -211,7 +212,7 @@ impl NumericalColumnWriter {
|
|||||||
self,
|
self,
|
||||||
arena: &MemoryArena,
|
arena: &MemoryArena,
|
||||||
buffer: &'a mut Vec<u8>,
|
buffer: &'a mut Vec<u8>,
|
||||||
) -> impl Iterator<Item = ColumnOperation<NumericalValue>> + 'a {
|
) -> impl Iterator<Item = ColumnOperation<NumericalValue>> + 'a + use<'a> {
|
||||||
self.column_writer.operation_iterator(arena, buffer)
|
self.column_writer.operation_iterator(arena, buffer)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -255,7 +256,7 @@ impl StrOrBytesColumnWriter {
|
|||||||
&self,
|
&self,
|
||||||
arena: &MemoryArena,
|
arena: &MemoryArena,
|
||||||
byte_buffer: &'a mut Vec<u8>,
|
byte_buffer: &'a mut Vec<u8>,
|
||||||
) -> impl Iterator<Item = ColumnOperation<UnorderedId>> + 'a {
|
) -> impl Iterator<Item = ColumnOperation<UnorderedId>> + 'a + use<'a> {
|
||||||
self.column_writer.operation_iterator(arena, byte_buffer)
|
self.column_writer.operation_iterator(arena, byte_buffer)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,13 +8,13 @@ use std::net::Ipv6Addr;
|
|||||||
|
|
||||||
use column_operation::ColumnOperation;
|
use column_operation::ColumnOperation;
|
||||||
pub(crate) use column_writers::CompatibleNumericalTypes;
|
pub(crate) use column_writers::CompatibleNumericalTypes;
|
||||||
use common::json_path_writer::JSON_END_OF_PATH;
|
|
||||||
use common::CountingWriter;
|
use common::CountingWriter;
|
||||||
|
use common::json_path_writer::JSON_END_OF_PATH;
|
||||||
pub(crate) use serializer::ColumnarSerializer;
|
pub(crate) use serializer::ColumnarSerializer;
|
||||||
use stacker::{Addr, ArenaHashMap, MemoryArena};
|
use stacker::{Addr, ArenaHashMap, MemoryArena};
|
||||||
|
|
||||||
use crate::column_index::{SerializableColumnIndex, SerializableOptionalIndex};
|
use crate::column_index::{SerializableColumnIndex, SerializableOptionalIndex};
|
||||||
use crate::column_values::{MonotonicallyMappableToU128, MonotonicallyMappableToU64};
|
use crate::column_values::{MonotonicallyMappableToU64, MonotonicallyMappableToU128};
|
||||||
use crate::columnar::column_type::ColumnType;
|
use crate::columnar::column_type::ColumnType;
|
||||||
use crate::columnar::writer::column_writers::{
|
use crate::columnar::writer::column_writers::{
|
||||||
ColumnWriter, NumericalColumnWriter, StrOrBytesColumnWriter,
|
ColumnWriter, NumericalColumnWriter, StrOrBytesColumnWriter,
|
||||||
|
|||||||
@@ -3,11 +3,11 @@ use std::io::Write;
|
|||||||
|
|
||||||
use common::json_path_writer::JSON_END_OF_PATH;
|
use common::json_path_writer::JSON_END_OF_PATH;
|
||||||
use common::{BinarySerializable, CountingWriter};
|
use common::{BinarySerializable, CountingWriter};
|
||||||
use sstable::value::RangeValueWriter;
|
|
||||||
use sstable::RangeSSTable;
|
use sstable::RangeSSTable;
|
||||||
|
use sstable::value::RangeValueWriter;
|
||||||
|
|
||||||
use crate::columnar::ColumnType;
|
|
||||||
use crate::RowId;
|
use crate::RowId;
|
||||||
|
use crate::columnar::ColumnType;
|
||||||
|
|
||||||
pub struct ColumnarSerializer<W: io::Write> {
|
pub struct ColumnarSerializer<W: io::Write> {
|
||||||
wrt: CountingWriter<W>,
|
wrt: CountingWriter<W>,
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
|
use crate::RowId;
|
||||||
use crate::column_index::{SerializableMultivalueIndex, SerializableOptionalIndex};
|
use crate::column_index::{SerializableMultivalueIndex, SerializableOptionalIndex};
|
||||||
use crate::iterable::Iterable;
|
use crate::iterable::Iterable;
|
||||||
use crate::RowId;
|
|
||||||
|
|
||||||
/// The `IndexBuilder` interprets a sequence of
|
/// The `IndexBuilder` interprets a sequence of
|
||||||
/// calls of the form:
|
/// calls of the form:
|
||||||
@@ -31,12 +31,13 @@ pub struct OptionalIndexBuilder {
|
|||||||
|
|
||||||
impl OptionalIndexBuilder {
|
impl OptionalIndexBuilder {
|
||||||
pub fn finish(&mut self, num_rows: RowId) -> impl Iterable<RowId> + '_ {
|
pub fn finish(&mut self, num_rows: RowId) -> impl Iterable<RowId> + '_ {
|
||||||
debug_assert!(self
|
debug_assert!(
|
||||||
.docs
|
self.docs
|
||||||
.last()
|
.last()
|
||||||
.copied()
|
.copied()
|
||||||
.map(|last_doc| last_doc < num_rows)
|
.map(|last_doc| last_doc < num_rows)
|
||||||
.unwrap_or(true));
|
.unwrap_or(true)
|
||||||
|
);
|
||||||
&self.docs[..]
|
&self.docs[..]
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -48,12 +49,13 @@ impl OptionalIndexBuilder {
|
|||||||
impl IndexBuilder for OptionalIndexBuilder {
|
impl IndexBuilder for OptionalIndexBuilder {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn record_row(&mut self, doc: RowId) {
|
fn record_row(&mut self, doc: RowId) {
|
||||||
debug_assert!(self
|
debug_assert!(
|
||||||
.docs
|
self.docs
|
||||||
.last()
|
.last()
|
||||||
.copied()
|
.copied()
|
||||||
.map(|prev_doc| doc > prev_doc)
|
.map(|prev_doc| doc > prev_doc)
|
||||||
.unwrap_or(true));
|
.unwrap_or(true)
|
||||||
|
);
|
||||||
self.docs.push(doc);
|
self.docs.push(doc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,8 +3,8 @@ use std::path::PathBuf;
|
|||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
merge_columnar, Cardinality, Column, ColumnarReader, DynamicColumn, StackMergeOrder,
|
CURRENT_VERSION, Cardinality, Column, ColumnarReader, DynamicColumn, StackMergeOrder,
|
||||||
CURRENT_VERSION,
|
merge_columnar,
|
||||||
};
|
};
|
||||||
|
|
||||||
const NUM_DOCS: u32 = u16::MAX as u32;
|
const NUM_DOCS: u32 = u16::MAX as u32;
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ use common::file_slice::FileSlice;
|
|||||||
use common::{ByteCount, DateTime, HasLen, OwnedBytes};
|
use common::{ByteCount, DateTime, HasLen, OwnedBytes};
|
||||||
|
|
||||||
use crate::column::{BytesColumn, Column, StrColumn};
|
use crate::column::{BytesColumn, Column, StrColumn};
|
||||||
use crate::column_values::{monotonic_map_column, StrictlyMonotonicFn};
|
use crate::column_values::{StrictlyMonotonicFn, monotonic_map_column};
|
||||||
use crate::columnar::ColumnType;
|
use crate::columnar::ColumnType;
|
||||||
use crate::{Cardinality, ColumnIndex, ColumnValues, NumericalType, Version};
|
use crate::{Cardinality, ColumnIndex, ColumnValues, NumericalType, Version};
|
||||||
|
|
||||||
|
|||||||
@@ -44,11 +44,11 @@ pub use block_accessor::ColumnBlockAccessor;
|
|||||||
pub use column::{BytesColumn, Column, StrColumn};
|
pub use column::{BytesColumn, Column, StrColumn};
|
||||||
pub use column_index::ColumnIndex;
|
pub use column_index::ColumnIndex;
|
||||||
pub use column_values::{
|
pub use column_values::{
|
||||||
ColumnValues, EmptyColumnValues, MonotonicallyMappableToU128, MonotonicallyMappableToU64,
|
ColumnValues, EmptyColumnValues, MonotonicallyMappableToU64, MonotonicallyMappableToU128,
|
||||||
};
|
};
|
||||||
pub use columnar::{
|
pub use columnar::{
|
||||||
merge_columnar, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
|
CURRENT_VERSION, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
|
||||||
MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, CURRENT_VERSION,
|
MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, merge_columnar,
|
||||||
};
|
};
|
||||||
use sstable::VoidSSTable;
|
use sstable::VoidSSTable;
|
||||||
pub use value::{NumericalType, NumericalValue};
|
pub use value::{NumericalType, NumericalValue};
|
||||||
|
|||||||
@@ -716,8 +716,8 @@ fn test_columnar_merging_number_columns() {
|
|||||||
// TODO document edge case: required_columns incompatible with values.
|
// TODO document edge case: required_columns incompatible with values.
|
||||||
|
|
||||||
#[allow(clippy::type_complexity)]
|
#[allow(clippy::type_complexity)]
|
||||||
fn columnar_docs_and_remap(
|
fn columnar_docs_and_remap()
|
||||||
) -> impl Strategy<Value = (Vec<Vec<Vec<(&'static str, ColumnValue)>>>, Vec<RowAddr>)> {
|
-> impl Strategy<Value = (Vec<Vec<Vec<(&'static str, ColumnValue)>>>, Vec<RowAddr>)> {
|
||||||
proptest::collection::vec(columnar_docs_strategy(), 2..=3).prop_flat_map(
|
proptest::collection::vec(columnar_docs_strategy(), 2..=3).prop_flat_map(
|
||||||
|columnars_docs: Vec<Vec<Vec<(&str, ColumnValue)>>>| {
|
|columnars_docs: Vec<Vec<Vec<(&str, ColumnValue)>>>| {
|
||||||
let row_addrs: Vec<RowAddr> = columnars_docs
|
let row_addrs: Vec<RowAddr> = columnars_docs
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ name = "tantivy-common"
|
|||||||
version = "0.7.0"
|
version = "0.7.0"
|
||||||
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
|
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
edition = "2021"
|
edition = "2024"
|
||||||
description = "common traits and utility functions used by multiple tantivy subcrates"
|
description = "common traits and utility functions used by multiple tantivy subcrates"
|
||||||
documentation = "https://docs.rs/tantivy_common/"
|
documentation = "https://docs.rs/tantivy_common/"
|
||||||
homepage = "https://github.com/quickwit-oss/tantivy"
|
homepage = "https://github.com/quickwit-oss/tantivy"
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
use binggan::{black_box, BenchRunner};
|
use binggan::{BenchRunner, black_box};
|
||||||
use rand::seq::IteratorRandom;
|
use rand::seq::IteratorRandom;
|
||||||
use rand::thread_rng;
|
use rand::thread_rng;
|
||||||
use tantivy_common::{serialize_vint_u32, BitSet, TinySet};
|
use tantivy_common::{BitSet, TinySet, serialize_vint_u32};
|
||||||
|
|
||||||
fn bench_vint() {
|
fn bench_vint() {
|
||||||
let mut runner = BenchRunner::new();
|
let mut runner = BenchRunner::new();
|
||||||
|
|||||||
@@ -65,11 +65,11 @@ pub fn transform_bound_inner_res<TFrom, TTo>(
|
|||||||
) -> io::Result<Bound<TTo>> {
|
) -> io::Result<Bound<TTo>> {
|
||||||
use self::Bound::*;
|
use self::Bound::*;
|
||||||
Ok(match bound {
|
Ok(match bound {
|
||||||
Excluded(ref from_val) => match transform(from_val)? {
|
Excluded(from_val) => match transform(from_val)? {
|
||||||
TransformBound::NewBound(new_val) => new_val,
|
TransformBound::NewBound(new_val) => new_val,
|
||||||
TransformBound::Existing(new_val) => Excluded(new_val),
|
TransformBound::Existing(new_val) => Excluded(new_val),
|
||||||
},
|
},
|
||||||
Included(ref from_val) => match transform(from_val)? {
|
Included(from_val) => match transform(from_val)? {
|
||||||
TransformBound::NewBound(new_val) => new_val,
|
TransformBound::NewBound(new_val) => new_val,
|
||||||
TransformBound::Existing(new_val) => Included(new_val),
|
TransformBound::Existing(new_val) => Included(new_val),
|
||||||
},
|
},
|
||||||
@@ -85,11 +85,11 @@ pub fn transform_bound_inner<TFrom, TTo>(
|
|||||||
) -> Bound<TTo> {
|
) -> Bound<TTo> {
|
||||||
use self::Bound::*;
|
use self::Bound::*;
|
||||||
match bound {
|
match bound {
|
||||||
Excluded(ref from_val) => match transform(from_val) {
|
Excluded(from_val) => match transform(from_val) {
|
||||||
TransformBound::NewBound(new_val) => new_val,
|
TransformBound::NewBound(new_val) => new_val,
|
||||||
TransformBound::Existing(new_val) => Excluded(new_val),
|
TransformBound::Existing(new_val) => Excluded(new_val),
|
||||||
},
|
},
|
||||||
Included(ref from_val) => match transform(from_val) {
|
Included(from_val) => match transform(from_val) {
|
||||||
TransformBound::NewBound(new_val) => new_val,
|
TransformBound::NewBound(new_val) => new_val,
|
||||||
TransformBound::Existing(new_val) => Included(new_val),
|
TransformBound::Existing(new_val) => Included(new_val),
|
||||||
},
|
},
|
||||||
@@ -111,8 +111,8 @@ pub fn map_bound<TFrom, TTo>(
|
|||||||
) -> Bound<TTo> {
|
) -> Bound<TTo> {
|
||||||
use self::Bound::*;
|
use self::Bound::*;
|
||||||
match bound {
|
match bound {
|
||||||
Excluded(ref from_val) => Bound::Excluded(transform(from_val)),
|
Excluded(from_val) => Bound::Excluded(transform(from_val)),
|
||||||
Included(ref from_val) => Bound::Included(transform(from_val)),
|
Included(from_val) => Bound::Included(transform(from_val)),
|
||||||
Unbounded => Unbounded,
|
Unbounded => Unbounded,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -123,8 +123,8 @@ pub fn map_bound_res<TFrom, TTo, Err>(
|
|||||||
) -> Result<Bound<TTo>, Err> {
|
) -> Result<Bound<TTo>, Err> {
|
||||||
use self::Bound::*;
|
use self::Bound::*;
|
||||||
Ok(match bound {
|
Ok(match bound {
|
||||||
Excluded(ref from_val) => Excluded(transform(from_val)?),
|
Excluded(from_val) => Excluded(transform(from_val)?),
|
||||||
Included(ref from_val) => Included(transform(from_val)?),
|
Included(from_val) => Included(transform(from_val)?),
|
||||||
Unbounded => Unbounded,
|
Unbounded => Unbounded,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -74,7 +74,7 @@ impl FileHandle for WrapFile {
|
|||||||
{
|
{
|
||||||
use std::io::{Read, Seek};
|
use std::io::{Read, Seek};
|
||||||
let mut file = self.file.try_clone()?; // Clone the file to read from it separately
|
let mut file = self.file.try_clone()?; // Clone the file to read from it separately
|
||||||
// Seek to the start position in the file
|
// Seek to the start position in the file
|
||||||
file.seek(io::SeekFrom::Start(start as u64))?;
|
file.seek(io::SeekFrom::Start(start as u64))?;
|
||||||
// Read the data into the buffer
|
// Read the data into the buffer
|
||||||
file.read_exact(&mut buffer)?;
|
file.read_exact(&mut buffer)?;
|
||||||
@@ -346,8 +346,8 @@ mod tests {
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use super::{FileHandle, FileSlice};
|
use super::{FileHandle, FileSlice};
|
||||||
use crate::file_slice::combine_ranges;
|
|
||||||
use crate::HasLen;
|
use crate::HasLen;
|
||||||
|
use crate::file_slice::combine_ranges;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_file_slice() -> io::Result<()> {
|
fn test_file_slice() -> io::Result<()> {
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ pub use json_path_writer::JsonPathWriter;
|
|||||||
pub use ownedbytes::{OwnedBytes, StableDeref};
|
pub use ownedbytes::{OwnedBytes, StableDeref};
|
||||||
pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};
|
pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};
|
||||||
pub use vint::{
|
pub use vint::{
|
||||||
read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt, VIntU128,
|
VInt, VIntU128, read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint,
|
||||||
};
|
};
|
||||||
pub use writer::{AntiCallToken, CountingWriter, TerminatingWrite};
|
pub use writer::{AntiCallToken, CountingWriter, TerminatingWrite};
|
||||||
|
|
||||||
@@ -177,8 +177,10 @@ pub(crate) mod test {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_f64_order() {
|
fn test_f64_order() {
|
||||||
assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY))
|
assert!(
|
||||||
.contains(&f64_to_u64(f64::NAN))); // nan is not a number
|
!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY))
|
||||||
|
.contains(&f64_to_u64(f64::NAN))
|
||||||
|
); // nan is not a number
|
||||||
assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); // same exponent, different mantissa
|
assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); // same exponent, different mantissa
|
||||||
assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); // same mantissa, different exponent
|
assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); // same mantissa, different exponent
|
||||||
assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); // different exponent and mantissa
|
assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); // different exponent and mantissa
|
||||||
|
|||||||
@@ -222,7 +222,7 @@ impl BinarySerializable for VInt {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
use super::{serialize_vint_u32, BinarySerializable, VInt};
|
use super::{BinarySerializable, VInt, serialize_vint_u32};
|
||||||
|
|
||||||
fn aux_test_vint(val: u64) {
|
fn aux_test_vint(val: u64) {
|
||||||
let mut v = [14u8; 10];
|
let mut v = [14u8; 10];
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ homepage = "https://github.com/quickwit-oss/tantivy"
|
|||||||
repository = "https://github.com/quickwit-oss/tantivy"
|
repository = "https://github.com/quickwit-oss/tantivy"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
keywords = ["search", "information", "retrieval"]
|
keywords = ["search", "information", "retrieval"]
|
||||||
edition = "2021"
|
edition = "2024"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
nom = "7"
|
nom = "7"
|
||||||
|
|||||||
@@ -186,19 +186,19 @@ macro_rules! tuple_trait_impl(
|
|||||||
);
|
);
|
||||||
|
|
||||||
macro_rules! tuple_trait_inner(
|
macro_rules! tuple_trait_inner(
|
||||||
($it:tt, $self:expr, $input:expr, (), $error_list:expr, $head:ident $($id:ident)+) => ({
|
($it:tt, $self:expr_2021, $input:expr_2021, (), $error_list:expr_2021, $head:ident $($id:ident)+) => ({
|
||||||
let (i, (o, mut err)) = $self.$it.parse($input.clone())?;
|
let (i, (o, mut err)) = $self.$it.parse($input.clone())?;
|
||||||
$error_list.append(&mut err);
|
$error_list.append(&mut err);
|
||||||
|
|
||||||
succ!($it, tuple_trait_inner!($self, i, ( o ), $error_list, $($id)+))
|
succ!($it, tuple_trait_inner!($self, i, ( o ), $error_list, $($id)+))
|
||||||
});
|
});
|
||||||
($it:tt, $self:expr, $input:expr, ($($parsed:tt)*), $error_list:expr, $head:ident $($id:ident)+) => ({
|
($it:tt, $self:expr_2021, $input:expr_2021, ($($parsed:tt)*), $error_list:expr_2021, $head:ident $($id:ident)+) => ({
|
||||||
let (i, (o, mut err)) = $self.$it.parse($input.clone())?;
|
let (i, (o, mut err)) = $self.$it.parse($input.clone())?;
|
||||||
$error_list.append(&mut err);
|
$error_list.append(&mut err);
|
||||||
|
|
||||||
succ!($it, tuple_trait_inner!($self, i, ($($parsed)* , o), $error_list, $($id)+))
|
succ!($it, tuple_trait_inner!($self, i, ($($parsed)* , o), $error_list, $($id)+))
|
||||||
});
|
});
|
||||||
($it:tt, $self:expr, $input:expr, ($($parsed:tt)*), $error_list:expr, $head:ident) => ({
|
($it:tt, $self:expr_2021, $input:expr_2021, ($($parsed:tt)*), $error_list:expr_2021, $head:ident) => ({
|
||||||
let (i, (o, mut err)) = $self.$it.parse($input.clone())?;
|
let (i, (o, mut err)) = $self.$it.parse($input.clone())?;
|
||||||
$error_list.append(&mut err);
|
$error_list.append(&mut err);
|
||||||
|
|
||||||
@@ -328,13 +328,13 @@ macro_rules! alt_trait_impl(
|
|||||||
);
|
);
|
||||||
|
|
||||||
macro_rules! alt_trait_inner(
|
macro_rules! alt_trait_inner(
|
||||||
($it:tt, $self:expr, $input:expr, $head_cond:ident $head:ident, $($id_cond:ident $id:ident),+) => (
|
($it:tt, $self:expr_2021, $input:expr_2021, $head_cond:ident $head:ident, $($id_cond:ident $id:ident),+) => (
|
||||||
match $self.$it.0.parse($input.clone()) {
|
match $self.$it.0.parse($input.clone()) {
|
||||||
Err(_) => succ!($it, alt_trait_inner!($self, $input, $($id_cond $id),+)),
|
Err(_) => succ!($it, alt_trait_inner!($self, $input, $($id_cond $id),+)),
|
||||||
Ok((input_left, _)) => Some($self.$it.1.parse(input_left)),
|
Ok((input_left, _)) => Some($self.$it.1.parse(input_left)),
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
($it:tt, $self:expr, $input:expr, $head_cond:ident $head:ident) => (
|
($it:tt, $self:expr_2021, $input:expr_2021, $head_cond:ident $head:ident) => (
|
||||||
None
|
None
|
||||||
);
|
);
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::iter::once;
|
use std::iter::once;
|
||||||
|
|
||||||
|
use nom::IResult;
|
||||||
use nom::branch::alt;
|
use nom::branch::alt;
|
||||||
use nom::bytes::complete::tag;
|
use nom::bytes::complete::tag;
|
||||||
use nom::character::complete::{
|
use nom::character::complete::{
|
||||||
@@ -10,12 +11,11 @@ use nom::combinator::{eof, map, map_res, opt, peek, recognize, value, verify};
|
|||||||
use nom::error::{Error, ErrorKind};
|
use nom::error::{Error, ErrorKind};
|
||||||
use nom::multi::{many0, many1, separated_list0};
|
use nom::multi::{many0, many1, separated_list0};
|
||||||
use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple};
|
use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple};
|
||||||
use nom::IResult;
|
|
||||||
|
|
||||||
use super::user_input_ast::{UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral};
|
use super::user_input_ast::{UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral};
|
||||||
|
use crate::Occur;
|
||||||
use crate::infallible::*;
|
use crate::infallible::*;
|
||||||
use crate::user_input_ast::Delimiter;
|
use crate::user_input_ast::Delimiter;
|
||||||
use crate::Occur;
|
|
||||||
|
|
||||||
// Note: '-' char is only forbidden at the beginning of a field name, would be clearer to add it to
|
// Note: '-' char is only forbidden at the beginning of a field name, would be clearer to add it to
|
||||||
// special characters.
|
// special characters.
|
||||||
@@ -1030,7 +1030,7 @@ fn rewrite_ast(mut input: UserInputAst) -> UserInputAst {
|
|||||||
|
|
||||||
fn rewrite_ast_clause(input: &mut (Option<Occur>, UserInputAst)) {
|
fn rewrite_ast_clause(input: &mut (Option<Occur>, UserInputAst)) {
|
||||||
match input {
|
match input {
|
||||||
(None, UserInputAst::Clause(ref mut clauses)) if clauses.len() == 1 => {
|
(None, UserInputAst::Clause(clauses)) if clauses.len() == 1 => {
|
||||||
*input = clauses.pop().unwrap(); // safe because clauses.len() == 1
|
*input = clauses.pop().unwrap(); // safe because clauses.len() == 1
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
@@ -1376,7 +1376,7 @@ mod test {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_range_parser_lenient() {
|
fn test_range_parser_lenient() {
|
||||||
let literal = |query| literal_infallible(query).unwrap().1 .0.unwrap();
|
let literal = |query| literal_infallible(query).unwrap().1.0.unwrap();
|
||||||
|
|
||||||
// same tests as non-lenient
|
// same tests as non-lenient
|
||||||
let res = literal("title: <hello");
|
let res = literal("title: <hello");
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ impl UserInputLeaf {
|
|||||||
|
|
||||||
pub(crate) fn set_default_field(&mut self, default_field: String) {
|
pub(crate) fn set_default_field(&mut self, default_field: String) {
|
||||||
match self {
|
match self {
|
||||||
UserInputLeaf::Literal(ref mut literal) if literal.field_name.is_none() => {
|
UserInputLeaf::Literal(literal) if literal.field_name.is_none() => {
|
||||||
literal.field_name = Some(default_field)
|
literal.field_name = Some(default_field)
|
||||||
}
|
}
|
||||||
UserInputLeaf::All => {
|
UserInputLeaf::All => {
|
||||||
@@ -59,12 +59,8 @@ impl UserInputLeaf {
|
|||||||
field: default_field,
|
field: default_field,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
UserInputLeaf::Range { ref mut field, .. } if field.is_none() => {
|
UserInputLeaf::Range { field, .. } if field.is_none() => *field = Some(default_field),
|
||||||
*field = Some(default_field)
|
UserInputLeaf::Set { field, .. } if field.is_none() => *field = Some(default_field),
|
||||||
}
|
|
||||||
UserInputLeaf::Set { ref mut field, .. } if field.is_none() => {
|
|
||||||
*field = Some(default_field)
|
|
||||||
}
|
|
||||||
_ => (), // field was already set, do nothing
|
_ => (), // field was already set, do nothing
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -75,11 +71,11 @@ impl Debug for UserInputLeaf {
|
|||||||
match self {
|
match self {
|
||||||
UserInputLeaf::Literal(literal) => literal.fmt(formatter),
|
UserInputLeaf::Literal(literal) => literal.fmt(formatter),
|
||||||
UserInputLeaf::Range {
|
UserInputLeaf::Range {
|
||||||
ref field,
|
field,
|
||||||
ref lower,
|
lower,
|
||||||
ref upper,
|
upper,
|
||||||
} => {
|
} => {
|
||||||
if let Some(ref field) = field {
|
if let Some(field) = field {
|
||||||
// TODO properly escape field (in case of \")
|
// TODO properly escape field (in case of \")
|
||||||
write!(formatter, "\"{field}\":")?;
|
write!(formatter, "\"{field}\":")?;
|
||||||
}
|
}
|
||||||
@@ -89,7 +85,7 @@ impl Debug for UserInputLeaf {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
UserInputLeaf::Set { field, elements } => {
|
UserInputLeaf::Set { field, elements } => {
|
||||||
if let Some(ref field) = field {
|
if let Some(field) = field {
|
||||||
// TODO properly escape field (in case of \")
|
// TODO properly escape field (in case of \")
|
||||||
write!(formatter, "\"{field}\": ")?;
|
write!(formatter, "\"{field}\": ")?;
|
||||||
}
|
}
|
||||||
@@ -267,7 +263,7 @@ impl UserInputAst {
|
|||||||
.iter_mut()
|
.iter_mut()
|
||||||
.for_each(|(_, ast)| ast.set_default_field(field.clone())),
|
.for_each(|(_, ast)| ast.set_default_field(field.clone())),
|
||||||
UserInputAst::Leaf(leaf) => leaf.set_default_field(field),
|
UserInputAst::Leaf(leaf) => leaf.set_default_field(field),
|
||||||
UserInputAst::Boost(ref mut ast, _) => ast.set_default_field(field),
|
UserInputAst::Boost(ast, _) => ast.set_default_field(field),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy-sstable"
|
name = "tantivy-sstable"
|
||||||
version = "0.3.0"
|
version = "0.3.0"
|
||||||
edition = "2021"
|
edition = "2024"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
homepage = "https://github.com/quickwit-oss/tantivy"
|
homepage = "https://github.com/quickwit-oss/tantivy"
|
||||||
repository = "https://github.com/quickwit-oss/tantivy"
|
repository = "https://github.com/quickwit-oss/tantivy"
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use common::file_slice::FileSlice;
|
|
||||||
use common::OwnedBytes;
|
use common::OwnedBytes;
|
||||||
use criterion::{criterion_group, criterion_main, Criterion};
|
use common::file_slice::FileSlice;
|
||||||
|
use criterion::{Criterion, criterion_group, criterion_main};
|
||||||
use tantivy_sstable::{Dictionary, MonotonicU64SSTable};
|
use tantivy_sstable::{Dictionary, MonotonicU64SSTable};
|
||||||
|
|
||||||
fn make_test_sstable(suffix: &str) -> FileSlice {
|
fn make_test_sstable(suffix: &str) -> FileSlice {
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ use std::collections::BTreeSet;
|
|||||||
use std::io;
|
use std::io;
|
||||||
|
|
||||||
use common::file_slice::FileSlice;
|
use common::file_slice::FileSlice;
|
||||||
use criterion::{criterion_group, criterion_main, Criterion};
|
use criterion::{Criterion, criterion_group, criterion_main};
|
||||||
use rand::rngs::StdRng;
|
use rand::rngs::StdRng;
|
||||||
use rand::{Rng, SeedableRng};
|
use rand::{Rng, SeedableRng};
|
||||||
use tantivy_sstable::{Dictionary, MonotonicU64SSTable};
|
use tantivy_sstable::{Dictionary, MonotonicU64SSTable};
|
||||||
|
|||||||
@@ -51,18 +51,21 @@ impl BlockReader {
|
|||||||
let block_len = match self.reader.len() {
|
let block_len = match self.reader.len() {
|
||||||
0 => {
|
0 => {
|
||||||
// we are out of data for this block. Check if we have another block after
|
// we are out of data for this block. Check if we have another block after
|
||||||
if let Some(new_reader) = self.next_readers.next() {
|
match self.next_readers.next() {
|
||||||
self.reader = new_reader;
|
Some(new_reader) => {
|
||||||
continue;
|
self.reader = new_reader;
|
||||||
} else {
|
continue;
|
||||||
return Ok(false);
|
}
|
||||||
|
_ => {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
1..=3 => {
|
1..=3 => {
|
||||||
return Err(io::Error::new(
|
return Err(io::Error::new(
|
||||||
io::ErrorKind::UnexpectedEof,
|
io::ErrorKind::UnexpectedEof,
|
||||||
"failed to read block_len",
|
"failed to read block_len",
|
||||||
))
|
));
|
||||||
}
|
}
|
||||||
_ => self.reader.read_u32() as usize,
|
_ => self.reader.read_u32() as usize,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ use common::{CountingWriter, OwnedBytes};
|
|||||||
use zstd::bulk::Compressor;
|
use zstd::bulk::Compressor;
|
||||||
|
|
||||||
use super::value::ValueWriter;
|
use super::value::ValueWriter;
|
||||||
use super::{value, vint, BlockReader};
|
use super::{BlockReader, value, vint};
|
||||||
|
|
||||||
const FOUR_BIT_LIMITS: usize = 1 << 4;
|
const FOUR_BIT_LIMITS: usize = 1 << 4;
|
||||||
const VINT_MODE: u8 = 1u8;
|
const VINT_MODE: u8 = 1u8;
|
||||||
|
|||||||
@@ -6,13 +6,13 @@ use std::marker::PhantomData;
|
|||||||
use std::ops::{Bound, RangeBounds};
|
use std::ops::{Bound, RangeBounds};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use common::bounds::{transform_bound_inner_res, TransformBound};
|
use common::bounds::{TransformBound, transform_bound_inner_res};
|
||||||
use common::file_slice::FileSlice;
|
use common::file_slice::FileSlice;
|
||||||
use common::{BinarySerializable, OwnedBytes};
|
use common::{BinarySerializable, OwnedBytes};
|
||||||
use futures_util::{stream, StreamExt, TryStreamExt};
|
use futures_util::{StreamExt, TryStreamExt, stream};
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use tantivy_fst::automaton::AlwaysMatch;
|
|
||||||
use tantivy_fst::Automaton;
|
use tantivy_fst::Automaton;
|
||||||
|
use tantivy_fst::automaton::AlwaysMatch;
|
||||||
|
|
||||||
use crate::sstable_index_v3::SSTableIndexV3Empty;
|
use crate::sstable_index_v3::SSTableIndexV3Empty;
|
||||||
use crate::streamer::{Streamer, StreamerBuilder};
|
use crate::streamer::{Streamer, StreamerBuilder};
|
||||||
@@ -311,7 +311,7 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
|
|||||||
return Err(io::Error::new(
|
return Err(io::Error::new(
|
||||||
io::ErrorKind::Other,
|
io::ErrorKind::Other,
|
||||||
format!("Unsupported sstable version, expected one of [2, 3], found {version}"),
|
format!("Unsupported sstable version, expected one of [2, 3], found {version}"),
|
||||||
))
|
));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -644,8 +644,8 @@ mod tests {
|
|||||||
use common::OwnedBytes;
|
use common::OwnedBytes;
|
||||||
|
|
||||||
use super::Dictionary;
|
use super::Dictionary;
|
||||||
use crate::dictionary::TermOrdHit;
|
|
||||||
use crate::MonotonicU64SSTable;
|
use crate::MonotonicU64SSTable;
|
||||||
|
use crate::dictionary::TermOrdHit;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct PermissionedHandle {
|
struct PermissionedHandle {
|
||||||
@@ -914,30 +914,33 @@ mod tests {
|
|||||||
|
|
||||||
// Single term
|
// Single term
|
||||||
let mut terms = Vec::new();
|
let mut terms = Vec::new();
|
||||||
assert!(dic
|
assert!(
|
||||||
.sorted_ords_to_term_cb(100_000..100_001, |term| {
|
dic.sorted_ords_to_term_cb(100_000..100_001, |term| {
|
||||||
terms.push(term.to_vec());
|
terms.push(term.to_vec());
|
||||||
Ok(())
|
Ok(())
|
||||||
})
|
})
|
||||||
.unwrap());
|
.unwrap()
|
||||||
|
);
|
||||||
assert_eq!(terms, vec![format!("{:05X}", 100_000).into_bytes(),]);
|
assert_eq!(terms, vec![format!("{:05X}", 100_000).into_bytes(),]);
|
||||||
// Single term
|
// Single term
|
||||||
let mut terms = Vec::new();
|
let mut terms = Vec::new();
|
||||||
assert!(dic
|
assert!(
|
||||||
.sorted_ords_to_term_cb(100_001..100_002, |term| {
|
dic.sorted_ords_to_term_cb(100_001..100_002, |term| {
|
||||||
terms.push(term.to_vec());
|
terms.push(term.to_vec());
|
||||||
Ok(())
|
Ok(())
|
||||||
})
|
})
|
||||||
.unwrap());
|
.unwrap()
|
||||||
|
);
|
||||||
assert_eq!(terms, vec![format!("{:05X}", 100_001).into_bytes(),]);
|
assert_eq!(terms, vec![format!("{:05X}", 100_001).into_bytes(),]);
|
||||||
// both terms
|
// both terms
|
||||||
let mut terms = Vec::new();
|
let mut terms = Vec::new();
|
||||||
assert!(dic
|
assert!(
|
||||||
.sorted_ords_to_term_cb(100_000..100_002, |term| {
|
dic.sorted_ords_to_term_cb(100_000..100_002, |term| {
|
||||||
terms.push(term.to_vec());
|
terms.push(term.to_vec());
|
||||||
Ok(())
|
Ok(())
|
||||||
})
|
})
|
||||||
.unwrap());
|
.unwrap()
|
||||||
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
terms,
|
terms,
|
||||||
vec![
|
vec![
|
||||||
@@ -947,12 +950,13 @@ mod tests {
|
|||||||
);
|
);
|
||||||
// Test cross block
|
// Test cross block
|
||||||
let mut terms = Vec::new();
|
let mut terms = Vec::new();
|
||||||
assert!(dic
|
assert!(
|
||||||
.sorted_ords_to_term_cb(98653..=98655, |term| {
|
dic.sorted_ords_to_term_cb(98653..=98655, |term| {
|
||||||
terms.push(term.to_vec());
|
terms.push(term.to_vec());
|
||||||
Ok(())
|
Ok(())
|
||||||
})
|
})
|
||||||
.unwrap());
|
.unwrap()
|
||||||
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
terms,
|
terms,
|
||||||
vec![
|
vec![
|
||||||
|
|||||||
@@ -322,7 +322,7 @@ mod test {
|
|||||||
|
|
||||||
use common::OwnedBytes;
|
use common::OwnedBytes;
|
||||||
|
|
||||||
use super::{common_prefix_len, MonotonicU64SSTable, SSTable, VoidMerge, VoidSSTable};
|
use super::{MonotonicU64SSTable, SSTable, VoidMerge, VoidSSTable, common_prefix_len};
|
||||||
|
|
||||||
fn aux_test_common_prefix_len(left: &str, right: &str, expect_len: usize) {
|
fn aux_test_common_prefix_len(left: &str, right: &str, expect_len: usize) {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
use std::collections::binary_heap::PeekMut;
|
|
||||||
use std::collections::BinaryHeap;
|
use std::collections::BinaryHeap;
|
||||||
|
use std::collections::binary_heap::PeekMut;
|
||||||
use std::io;
|
use std::io;
|
||||||
|
|
||||||
use super::{SingleValueMerger, ValueMerger};
|
use super::{SingleValueMerger, ValueMerger};
|
||||||
@@ -41,14 +41,17 @@ pub fn merge_sstable<SST: SSTable, W: io::Write, M: ValueMerger<SST::Value>>(
|
|||||||
loop {
|
loop {
|
||||||
let len = heap.len();
|
let len = heap.len();
|
||||||
let mut value_merger;
|
let mut value_merger;
|
||||||
if let Some(mut head) = heap.peek_mut() {
|
match heap.peek_mut() {
|
||||||
writer.insert_key(head.0.key()).unwrap();
|
Some(mut head) => {
|
||||||
value_merger = merger.new_value(head.0.value());
|
writer.insert_key(head.0.key()).unwrap();
|
||||||
if !head.0.advance()? {
|
value_merger = merger.new_value(head.0.value());
|
||||||
PeekMut::pop(head);
|
if !head.0.advance()? {
|
||||||
|
PeekMut::pop(head);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
for _ in 0..len - 1 {
|
for _ in 0..len - 1 {
|
||||||
if let Some(mut head) = heap.peek_mut() {
|
if let Some(mut head) = heap.peek_mut() {
|
||||||
|
|||||||
@@ -3,12 +3,12 @@ use std::ops::Range;
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use common::{BinarySerializable, FixedSize, OwnedBytes};
|
use common::{BinarySerializable, FixedSize, OwnedBytes};
|
||||||
use tantivy_bitpacker::{compute_num_bits, BitPacker};
|
use tantivy_bitpacker::{BitPacker, compute_num_bits};
|
||||||
use tantivy_fst::raw::Fst;
|
use tantivy_fst::raw::Fst;
|
||||||
use tantivy_fst::{Automaton, IntoStreamer, Map, MapBuilder, Streamer};
|
use tantivy_fst::{Automaton, IntoStreamer, Map, MapBuilder, Streamer};
|
||||||
|
|
||||||
use crate::block_match_automaton::can_block_match_automaton;
|
use crate::block_match_automaton::can_block_match_automaton;
|
||||||
use crate::{common_prefix_len, SSTableDataCorruption, TermOrdinal};
|
use crate::{SSTableDataCorruption, TermOrdinal, common_prefix_len};
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub enum SSTableIndex {
|
pub enum SSTableIndex {
|
||||||
@@ -824,8 +824,8 @@ mod tests {
|
|||||||
use common::OwnedBytes;
|
use common::OwnedBytes;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::block_match_automaton::tests::EqBuffer;
|
|
||||||
use crate::SSTableDataCorruption;
|
use crate::SSTableDataCorruption;
|
||||||
|
use crate::block_match_automaton::tests::EqBuffer;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_sstable_index() {
|
fn test_sstable_index() {
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
use std::io;
|
use std::io;
|
||||||
use std::ops::Bound;
|
use std::ops::Bound;
|
||||||
|
|
||||||
use tantivy_fst::automaton::AlwaysMatch;
|
|
||||||
use tantivy_fst::Automaton;
|
use tantivy_fst::Automaton;
|
||||||
|
use tantivy_fst::automaton::AlwaysMatch;
|
||||||
|
|
||||||
use crate::dictionary::Dictionary;
|
use crate::dictionary::Dictionary;
|
||||||
use crate::{DeltaReader, SSTable, TermOrdinal};
|
use crate::{DeltaReader, SSTable, TermOrdinal};
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
use std::io;
|
use std::io;
|
||||||
|
|
||||||
use crate::value::{deserialize_vint_u64, ValueReader, ValueWriter};
|
use crate::value::{ValueReader, ValueWriter, deserialize_vint_u64};
|
||||||
use crate::{vint, BlockAddr};
|
use crate::{BlockAddr, vint};
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub(crate) struct IndexValueReader {
|
pub(crate) struct IndexValueReader {
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
use std::io;
|
use std::io;
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
|
|
||||||
use crate::value::{deserialize_vint_u64, ValueReader, ValueWriter};
|
use crate::value::{ValueReader, ValueWriter, deserialize_vint_u64};
|
||||||
|
|
||||||
/// See module comment.
|
/// See module comment.
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
use std::io;
|
use std::io;
|
||||||
|
|
||||||
use crate::value::{deserialize_vint_u64, ValueReader, ValueWriter};
|
use crate::value::{ValueReader, ValueWriter, deserialize_vint_u64};
|
||||||
use crate::vint;
|
use crate::vint;
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy-stacker"
|
name = "tantivy-stacker"
|
||||||
version = "0.3.0"
|
version = "0.3.0"
|
||||||
edition = "2021"
|
edition = "2024"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
homepage = "https://github.com/quickwit-oss/tantivy"
|
homepage = "https://github.com/quickwit-oss/tantivy"
|
||||||
repository = "https://github.com/quickwit-oss/tantivy"
|
repository = "https://github.com/quickwit-oss/tantivy"
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
use binggan::plugins::PeakMemAllocPlugin;
|
use binggan::plugins::PeakMemAllocPlugin;
|
||||||
use binggan::{black_box, BenchRunner, PeakMemAlloc, INSTRUMENTED_SYSTEM};
|
use binggan::{BenchRunner, INSTRUMENTED_SYSTEM, PeakMemAlloc, black_box};
|
||||||
use rand::SeedableRng;
|
use rand::SeedableRng;
|
||||||
use rustc_hash::FxHashMap;
|
use rustc_hash::FxHashMap;
|
||||||
use tantivy_stacker::{ArenaHashMap, ExpUnrolledLinkedList, MemoryArena};
|
use tantivy_stacker::{ArenaHashMap, ExpUnrolledLinkedList, MemoryArena};
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ mod shared_arena_hashmap;
|
|||||||
pub use self::arena_hashmap::ArenaHashMap;
|
pub use self::arena_hashmap::ArenaHashMap;
|
||||||
pub use self::expull::ExpUnrolledLinkedList;
|
pub use self::expull::ExpUnrolledLinkedList;
|
||||||
pub use self::memory_arena::{Addr, MemoryArena};
|
pub use self::memory_arena::{Addr, MemoryArena};
|
||||||
pub use self::shared_arena_hashmap::{compute_table_memory_size, SharedArenaHashMap};
|
pub use self::shared_arena_hashmap::{SharedArenaHashMap, compute_table_memory_size};
|
||||||
|
|
||||||
/// When adding an element in a `ArenaHashMap`, we get a unique id associated to the given key.
|
/// When adding an element in a `ArenaHashMap`, we get a unique id associated to the given key.
|
||||||
pub type UnorderedId = u32;
|
pub type UnorderedId = u32;
|
||||||
|
|||||||
@@ -356,7 +356,7 @@ mod tests {
|
|||||||
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use super::{compute_previous_power_of_two, SharedArenaHashMap};
|
use super::{SharedArenaHashMap, compute_previous_power_of_two};
|
||||||
use crate::MemoryArena;
|
use crate::MemoryArena;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
Reference in New Issue
Block a user