Compare commits

..

12 Commits

Author SHA1 Message Date
ChillFish8
1e50f96fb0 Disable GC and merge checker. 2022-12-11 14:04:20 +00:00
PSeitz
a05a0035f8 Merge pull request #1711 from quickwit-oss/sparse_dense_index
add dense codec
2022-12-09 08:48:43 +01:00
Pascal Seitz
976128a412 extend benchmarks 2022-12-09 15:21:25 +08:00
PSeitz
f27b3e312d Apply suggestions from code review
Co-authored-by: Paul Masurel <paul@quickwit.io>
2022-12-09 08:01:56 +01:00
PSeitz
56dea6f08d Apply suggestions from code review
Co-authored-by: Paul Masurel <paul@quickwit.io>
2022-12-09 08:01:02 +01:00
Pascal Seitz
789d29cf45 move code to DenseIndexBlock
improve benchmark
2022-12-09 14:18:26 +08:00
Paul Masurel
a36b50d825 benchmark fix and important optimisation 2022-12-08 18:55:20 +09:00
PSeitz
09f65e5467 Merge pull request #1707 from quickwit-oss/bump_version
bump version
2022-12-08 09:03:47 +01:00
Pascal Seitz
11b01e4141 chore: Release 2022-12-02 16:45:18 +08:00
Pascal Seitz
3e8852c606 revert tant version 2022-12-02 16:44:34 +08:00
Pascal Seitz
725f1ecb80 update cargo.toml 2022-12-02 16:43:17 +08:00
Pascal Seitz
afa27afe7d group workspace deps 2022-12-02 16:31:30 +08:00
10 changed files with 210 additions and 107 deletions

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "tantivy" name = "tantivy"
version = "0.19.0-dev" version = "0.19.0"
authors = ["Paul Masurel <paul.masurel@gmail.com>"] authors = ["Paul Masurel <paul.masurel@gmail.com>"]
license = "MIT" license = "MIT"
categories = ["database-implementations", "data-structures"] categories = ["database-implementations", "data-structures"]
@@ -36,11 +36,6 @@ fs2 = { version = "0.4.3", optional = true }
levenshtein_automata = "0.2.1" levenshtein_automata = "0.2.1"
uuid = { version = "1.0.0", features = ["v4", "serde"] } uuid = { version = "1.0.0", features = ["v4", "serde"] }
crossbeam-channel = "0.5.4" crossbeam-channel = "0.5.4"
tantivy-query-grammar = { version="0.18.0", path="./query-grammar" }
tantivy-bitpacker = { version="0.2", path="./bitpacker" }
common = { version = "0.3", path = "./common/", package = "tantivy-common" }
fastfield_codecs = { version="0.2", path="./fastfield_codecs", default-features = false }
ownedbytes = { version="0.3", path="./ownedbytes" }
stable_deref_trait = "1.2.0" stable_deref_trait = "1.2.0"
rust-stemmers = "1.2.0" rust-stemmers = "1.2.0"
downcast-rs = "1.2.0" downcast-rs = "1.2.0"
@@ -62,6 +57,12 @@ ciborium = { version = "0.2", optional = true}
async-trait = "0.1.53" async-trait = "0.1.53"
arc-swap = "1.5.0" arc-swap = "1.5.0"
tantivy-query-grammar = { version= "0.19.0", path="./query-grammar" }
tantivy-bitpacker = { version= "0.3", path="./bitpacker" }
common = { version= "0.4", path = "./common/", package = "tantivy-common" }
fastfield_codecs = { version= "0.3", path="./fastfield_codecs", default-features = false }
ownedbytes = { version= "0.4", path="./ownedbytes" }
[target.'cfg(windows)'.dependencies] [target.'cfg(windows)'.dependencies]
winapi = "0.3.9" winapi = "0.3.9"

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "tantivy-bitpacker" name = "tantivy-bitpacker"
version = "0.2.0" version = "0.3.0"
edition = "2021" edition = "2021"
authors = ["Paul Masurel <paul.masurel@gmail.com>"] authors = ["Paul Masurel <paul.masurel@gmail.com>"]
license = "MIT" license = "MIT"
@@ -8,6 +8,8 @@ categories = []
description = """Tantivy-sub crate: bitpacking""" description = """Tantivy-sub crate: bitpacking"""
repository = "https://github.com/quickwit-oss/tantivy" repository = "https://github.com/quickwit-oss/tantivy"
keywords = [] keywords = []
documentation = "https://docs.rs/tantivy-bitpacker/latest/tantivy_bitpacker"
homepage = "https://github.com/quickwit-oss/tantivy"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@@ -1,16 +1,20 @@
[package] [package]
name = "tantivy-common" name = "tantivy-common"
version = "0.3.0" version = "0.4.0"
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"] authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
license = "MIT" license = "MIT"
edition = "2021" edition = "2021"
description = "common traits and utility functions used by multiple tantivy subcrates" description = "common traits and utility functions used by multiple tantivy subcrates"
documentation = "https://docs.rs/tantivy_common/"
homepage = "https://github.com/quickwit-oss/tantivy"
repository = "https://github.com/quickwit-oss/tantivy"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
byteorder = "1.4.3" byteorder = "1.4.3"
ownedbytes = { version="0.3", path="../ownedbytes" } ownedbytes = { version= "0.4", path="../ownedbytes" }
[dev-dependencies] [dev-dependencies]
proptest = "1.0.0" proptest = "1.0.0"

View File

@@ -1,17 +1,20 @@
[package] [package]
name = "fastfield_codecs" name = "fastfield_codecs"
version = "0.2.0" version = "0.3.0"
authors = ["Pascal Seitz <pascal@quickwit.io>"] authors = ["Pascal Seitz <pascal@quickwit.io>"]
license = "MIT" license = "MIT"
edition = "2021" edition = "2021"
description = "Fast field codecs used by tantivy" description = "Fast field codecs used by tantivy"
documentation = "https://docs.rs/fastfield_codecs/"
homepage = "https://github.com/quickwit-oss/tantivy"
repository = "https://github.com/quickwit-oss/tantivy"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
common = { version = "0.3", path = "../common/", package = "tantivy-common" } common = { version = "0.4", path = "../common/", package = "tantivy-common" }
tantivy-bitpacker = { version="0.2", path = "../bitpacker/" } tantivy-bitpacker = { version= "0.3", path = "../bitpacker/" }
ownedbytes = { version = "0.3.0", path = "../ownedbytes" } ownedbytes = { version = "0.4.0", path = "../ownedbytes" }
prettytable-rs = {version="0.9.0", optional= true} prettytable-rs = {version="0.9.0", optional= true}
rand = {version="0.8.3", optional= true} rand = {version="0.8.3", optional= true}
fastdivide = "0.4" fastdivide = "0.4"

View File

@@ -44,6 +44,7 @@ mod column;
mod gcd; mod gcd;
mod serialize; mod serialize;
/// TODO: remove when codec is used
pub use null_index::*; pub use null_index::*;
use self::bitpacked::BitpackedCodec; use self::bitpacked::BitpackedCodec;

View File

@@ -1,3 +1,4 @@
use std::convert::TryInto;
use std::io::{self, Write}; use std::io::{self, Write};
use common::BinarySerializable; use common::BinarySerializable;
@@ -23,71 +24,74 @@ pub struct DenseCodec {
// u32 is the offset of the block, the number of set bits so far. // u32 is the offset of the block, the number of set bits so far.
// //
// At the end one block is appended, to store the number of values in the index in offset. // At the end one block is appended, to store the number of values in the index in offset.
data: Vec<IndexBlock>, data: OwnedBytes,
} }
const ELEMENTS_PER_BLOCK: u32 = 32; const ELEMENTS_PER_BLOCK: u32 = 64;
const BLOCK_BITVEC_SIZE: usize = 8;
const BLOCK_OFFSET_SIZE: usize = 4;
const SERIALIZED_BLOCK_SIZE: usize = BLOCK_BITVEC_SIZE + BLOCK_OFFSET_SIZE;
#[inline] #[inline]
fn count_ones(block: u32, pos_in_block: u32) -> u32 { fn count_ones(bitvec: u64, pos_in_bitvec: u32) -> u32 {
unsafe { core::arch::x86_64::_bzhi_u32(block, pos_in_block + 1) }.count_ones() if pos_in_bitvec == 63 {
// if pos_in_block == 31 { bitvec.count_ones()
// block.count_ones() } else {
// } else { let mask = (1u64 << (pos_in_bitvec + 1)) - 1;
// let mask = (1u32 << (pos_in_block + 1)) - 1; let masked_bitvec = bitvec & mask;
// let masked_block = block & mask; masked_bitvec.count_ones()
// masked_block.count_ones() }
// }
} }
#[derive(Copy, Clone, Debug)] #[derive(Clone, Copy)]
pub struct IndexBlock { struct DenseIndexBlock {
bitvec: u32, bitvec: u64,
offset: u32, offset: u32,
} }
impl From<[u8; SERIALIZED_BLOCK_SIZE]> for DenseIndexBlock {
fn from(data: [u8; SERIALIZED_BLOCK_SIZE]) -> Self {
let bitvec = u64::from_le_bytes(data[..BLOCK_BITVEC_SIZE].try_into().unwrap());
let offset = u32::from_le_bytes(data[BLOCK_BITVEC_SIZE..].try_into().unwrap());
Self { bitvec, offset }
}
}
impl DenseCodec { impl DenseCodec {
/// Open the DenseCodec from OwnedBytes /// Open the DenseCodec from OwnedBytes
pub fn open(data: Vec<IndexBlock>) -> Self { pub fn open(data: OwnedBytes) -> Self {
Self { data } Self { data }
} }
#[inline] #[inline]
/// Check if value at position is not null. /// Check if value at position is not null.
pub fn exists(&self, idx: u32) -> bool { pub fn exists(&self, idx: u32) -> bool {
let block_pos = idx / ELEMENTS_PER_BLOCK; let block_pos = idx / ELEMENTS_PER_BLOCK;
let bitvec: u32 = self.block(block_pos); let bitvec = self.dense_index_block(block_pos).bitvec;
let pos_in_block = idx % ELEMENTS_PER_BLOCK;
get_bit_at(bitvec, pos_in_block)
}
#[inline] let pos_in_bitvec = idx % ELEMENTS_PER_BLOCK;
pub(crate) fn block(&self, block_pos: u32) -> u32 {
self.block_and_offset(block_pos).bitvec
}
get_bit_at(bitvec, pos_in_bitvec)
}
#[inline] #[inline]
/// Returns (bitvec, offset) fn dense_index_block(&self, block_pos: u32) -> DenseIndexBlock {
/// dense_index_block(&self.data, block_pos)
/// offset is the start offset of actual docids in the block.
pub(crate) fn block_and_offset(&self, block_pos: u32) -> IndexBlock {
self.data[block_pos as usize]
} }
/// Return the number of non-null values in an index /// Return the number of non-null values in an index
pub fn num_non_null_vals(&self) -> u32 { pub fn num_non_null_vals(&self) -> u32 {
let last_block = self.data.len() - 1; let last_block = (self.data.len() / SERIALIZED_BLOCK_SIZE) - 1;
self.block_and_offset(last_block as u32).offset self.dense_index_block(last_block as u32).offset
} }
#[inline] #[inline]
/// Translate from the original index to the codec index. /// Translate from the original index to the codec index.
pub fn translate_to_codec_idx(&self, idx: u32) -> Option<u32> { pub fn translate_to_codec_idx(&self, idx: u32) -> Option<u32> {
let block_pos = idx / ELEMENTS_PER_BLOCK; let block_pos = idx / ELEMENTS_PER_BLOCK;
let IndexBlock { bitvec: block, offset } = self.block_and_offset(block_pos); let index_block = self.dense_index_block(block_pos);
let pos_in_block = idx % ELEMENTS_PER_BLOCK; let pos_in_block_bit_vec = idx % ELEMENTS_PER_BLOCK;
let ones_in_block = count_ones(block, pos_in_block); let ones_in_block = count_ones(index_block.bitvec, pos_in_block_bit_vec);
if get_bit_at(block, pos_in_block) { if get_bit_at(index_block.bitvec, pos_in_block_bit_vec) {
Some(offset + ones_in_block - 1) // -1 is ok, since idx does exist, so there's at least // -1 is ok, since idx does exist, so there's at least one
// one Some(index_block.offset + ones_in_block - 1)
} else { } else {
None None
} }
@@ -106,16 +110,17 @@ impl DenseCodec {
iter.map(move |dense_idx| { iter.map(move |dense_idx| {
// update block_pos to limit search scope // update block_pos to limit search scope
block_pos = find_block(dense_idx, block_pos, &self.data); block_pos = find_block(dense_idx, block_pos, &self.data);
let IndexBlock { bitvec, offset} = self.block_and_offset(block_pos); let index_block = self.dense_index_block(block_pos);
// The next offset is higher than dense_idx and therefore: // The next offset is higher than dense_idx and therefore:
// dense_idx <= offset + num_set_bits in block // dense_idx <= offset + num_set_bits in block
let mut num_set_bits = 0; let mut num_set_bits = 0;
for idx_in_block in 0..ELEMENTS_PER_BLOCK { for idx_in_bitvec in 0..ELEMENTS_PER_BLOCK {
if get_bit_at(bitvec, idx_in_block) { if get_bit_at(index_block.bitvec, idx_in_bitvec) {
num_set_bits += 1; num_set_bits += 1;
} }
if num_set_bits == (dense_idx - offset + 1) { if num_set_bits == (dense_idx - index_block.offset + 1) {
let orig_idx = block_pos * ELEMENTS_PER_BLOCK + idx_in_block as u32; let orig_idx = block_pos * ELEMENTS_PER_BLOCK + idx_in_bitvec as u32;
return orig_idx; return orig_idx;
} }
} }
@@ -124,6 +129,15 @@ impl DenseCodec {
} }
} }
#[inline]
fn dense_index_block(data: &[u8], block_pos: u32) -> DenseIndexBlock {
let data_start_pos = block_pos as usize * SERIALIZED_BLOCK_SIZE;
let block_data: [u8; SERIALIZED_BLOCK_SIZE] = data[data_start_pos..][..SERIALIZED_BLOCK_SIZE]
.try_into()
.unwrap();
block_data.into()
}
#[inline] #[inline]
/// Finds the block position containing the dense_idx. /// Finds the block position containing the dense_idx.
/// ///
@@ -131,36 +145,40 @@ impl DenseCodec {
/// dense_idx needs to be smaller than the number of values in the index /// dense_idx needs to be smaller than the number of values in the index
/// ///
/// The last offset number is equal to the number of values in the index. /// The last offset number is equal to the number of values in the index.
fn find_block(dense_idx: u32, mut block_pos: u32, data: &[IndexBlock]) -> u32 { fn find_block(dense_idx: u32, mut block_pos: u32, data: &[u8]) -> u32 {
for i in block_pos.. { loop {
let index_block = &data[i as usize]; let offset = dense_index_block(data, block_pos).offset;
if index_block.offset > dense_idx { if offset > dense_idx {
// offset return block_pos - 1;
return i - 1;
} }
block_pos += 1;
} }
unreachable!()
} }
/// Iterator over all values, true if set, otherwise false /// Iterator over all values, true if set, otherwise false
pub fn serialize_dense_codec( pub fn serialize_dense_codec(
iter: impl Iterator<Item = bool>, iter: impl Iterator<Item = bool>,
out: &mut Vec<IndexBlock>, mut out: impl Write,
) -> io::Result<()> { ) -> io::Result<()> {
let mut offset: u32 = 0; let mut offset: u32 = 0;
for chunk in &iter.chunks(ELEMENTS_PER_BLOCK as usize) { for chunk in &iter.chunks(ELEMENTS_PER_BLOCK as usize) {
let mut bitvec: u32 = 0; let mut block: u64 = 0;
for (pos, is_bit_set) in chunk.enumerate() { for (pos, is_bit_set) in chunk.enumerate() {
if is_bit_set { if is_bit_set {
set_bit_at(&mut bitvec, pos as u32); set_bit_at(&mut block, pos as u64);
} }
} }
out.push(IndexBlock { bitvec, offset});
offset += bitvec.count_ones() as u32; block.serialize(&mut out)?;
offset.serialize(&mut out)?;
offset += block.count_ones() as u32;
} }
// Add sentinal block for the offset // Add sentinal block for the offset
out.push(IndexBlock { bitvec: 0, offset }); let block: u64 = 0;
block.serialize(&mut out)?;
offset.serialize(&mut out)?;
Ok(()) Ok(())
} }
@@ -185,15 +203,6 @@ mod tests {
.boxed() .boxed()
} }
#[test]
fn test_with_random_bitvecs_simple() {
let mut bitvec = Vec::new();
bitvec.extend_from_slice(&[]);
bitvec.extend_from_slice(&[]);
bitvec.extend_from_slice(&[true]);
test_null_index(bitvec);
}
proptest! { proptest! {
#![proptest_config(ProptestConfig::with_cases(500))] #![proptest_config(ProptestConfig::with_cases(500))]
#[test] #[test]
@@ -217,8 +226,7 @@ mod tests {
let mut out = vec![]; let mut out = vec![];
serialize_dense_codec(data.iter().cloned(), &mut out).unwrap(); serialize_dense_codec(data.iter().cloned(), &mut out).unwrap();
dbg!(&out); let null_index = DenseCodec::open(OwnedBytes::new(out));
let null_index = DenseCodec::open(out);
let orig_idx_with_value: Vec<u32> = data let orig_idx_with_value: Vec<u32> = data
.iter() .iter()
@@ -252,7 +260,7 @@ mod tests {
let iter = ([true, false, true, false]).iter().cloned(); let iter = ([true, false, true, false]).iter().cloned();
serialize_dense_codec(iter, &mut out).unwrap(); serialize_dense_codec(iter, &mut out).unwrap();
let null_index = DenseCodec::open(out); let null_index = DenseCodec::open(OwnedBytes::new(out));
assert_eq!( assert_eq!(
null_index null_index
@@ -268,7 +276,7 @@ mod tests {
let iter = ([true, false, true, false]).iter().cloned(); let iter = ([true, false, true, false]).iter().cloned();
serialize_dense_codec(iter, &mut out).unwrap(); serialize_dense_codec(iter, &mut out).unwrap();
let null_index = DenseCodec::open(out); let null_index = DenseCodec::open(OwnedBytes::new(out));
assert_eq!(null_index.translate_to_codec_idx(0), Some(0)); assert_eq!(null_index.translate_to_codec_idx(0), Some(0));
assert_eq!(null_index.translate_to_codec_idx(2), Some(1)); assert_eq!(null_index.translate_to_codec_idx(2), Some(1));
} }
@@ -279,7 +287,7 @@ mod tests {
let iter = ([true, false, true, false]).iter().cloned(); let iter = ([true, false, true, false]).iter().cloned();
serialize_dense_codec(iter, &mut out).unwrap(); serialize_dense_codec(iter, &mut out).unwrap();
let null_index = DenseCodec::open(out); let null_index = DenseCodec::open(OwnedBytes::new(out));
assert!(null_index.exists(0)); assert!(null_index.exists(0));
assert!(!null_index.exists(1)); assert!(!null_index.exists(1));
assert!(null_index.exists(2)); assert!(null_index.exists(2));
@@ -295,7 +303,7 @@ mod tests {
let iter = docs.iter().cloned(); let iter = docs.iter().cloned();
let mut out = vec![]; let mut out = vec![];
serialize_dense_codec(iter, &mut out).unwrap(); serialize_dense_codec(iter, &mut out).unwrap();
let null_index = DenseCodec::open(out); let null_index = DenseCodec::open(OwnedBytes::new(out));
assert!(!null_index.exists(0)); assert!(!null_index.exists(0));
assert!(!null_index.exists(100)); assert!(!null_index.exists(100));
assert!(!null_index.exists(999)); assert!(!null_index.exists(999));
@@ -320,39 +328,121 @@ mod tests {
#[cfg(all(test, feature = "unstable"))] #[cfg(all(test, feature = "unstable"))]
mod bench { mod bench {
use rand::rngs::StdRng; use rand::rngs::StdRng;
use rand::{Rng, SeedableRng}; use rand::{Rng, SeedableRng};
use test::Bencher; use test::Bencher;
use super::*; use super::*;
fn gen_bools() -> DenseCodec { const TOTAL_NUM_VALUES: u32 = 1_000_000;
fn gen_bools(fill_ratio: f64) -> DenseCodec {
let mut out = Vec::new(); let mut out = Vec::new();
let mut rng: StdRng = StdRng::from_seed([1u8; 32]); let mut rng: StdRng = StdRng::from_seed([1u8; 32]);
// 80% of values are set let bools: Vec<_> = (0..TOTAL_NUM_VALUES)
let bools: Vec<_> = (0..100_000).map(|_| rng.gen_bool(8f64 / 10f64)).collect(); .map(|_| rng.gen_bool(fill_ratio))
.collect();
serialize_dense_codec(bools.into_iter(), &mut out).unwrap(); serialize_dense_codec(bools.into_iter(), &mut out).unwrap();
let codec = DenseCodec::open(out); let codec = DenseCodec::open(OwnedBytes::new(out));
codec codec
} }
#[bench] fn random_range_iterator(start: u32, end: u32, step_size: u32) -> impl Iterator<Item = u32> {
fn bench_dense_codec_translate_orig_to_dense(bench: &mut Bencher) { let mut rng: StdRng = StdRng::from_seed([1u8; 32]);
let codec = gen_bools(); let mut current = start;
bench.iter(|| { std::iter::from_fn(move || {
let mut dense_idx: Option<u32> = None; current += rng.gen_range(1..step_size + 1);
for idx in 0..100_000 { if current >= end {
dense_idx = dense_idx.or(codec.translate_to_codec_idx(idx)); None
} else {
Some(current)
} }
dense_idx })
}
fn walk_over_data(codec: &DenseCodec, max_step_size: u32) -> Option<u32> {
walk_over_data_from_positions(
codec,
random_range_iterator(0, TOTAL_NUM_VALUES, max_step_size),
)
}
fn walk_over_data_from_positions(
codec: &DenseCodec,
positions: impl Iterator<Item = u32>,
) -> Option<u32> {
let mut dense_idx: Option<u32> = None;
for idx in positions {
dense_idx = dense_idx.or(codec.translate_to_codec_idx(idx));
}
dense_idx
}
#[bench]
fn bench_dense_codec_translate_orig_to_dense_90percent_filled_random_stride(
bench: &mut Bencher,
) {
let codec = gen_bools(0.9f64);
bench.iter(|| walk_over_data(&codec, 100));
}
#[bench]
fn bench_dense_codec_translate_orig_to_dense_50percent_filled_random_stride(
bench: &mut Bencher,
) {
let codec = gen_bools(0.5f64);
bench.iter(|| walk_over_data(&codec, 100));
}
#[bench]
fn bench_dense_codec_translate_orig_to_dense_full_scan_10percent(bench: &mut Bencher) {
let codec = gen_bools(0.1f64);
bench.iter(|| walk_over_data_from_positions(&codec, 0..TOTAL_NUM_VALUES));
}
#[bench]
fn bench_dense_codec_translate_orig_to_dense_full_scan_90percent(bench: &mut Bencher) {
let codec = gen_bools(0.9f64);
bench.iter(|| walk_over_data_from_positions(&codec, 0..TOTAL_NUM_VALUES));
}
#[bench]
fn bench_dense_codec_translate_orig_to_dense_10percent_filled_random_stride(
bench: &mut Bencher,
) {
let codec = gen_bools(0.1f64);
bench.iter(|| walk_over_data(&codec, 100));
}
#[bench]
fn bench_dense_codec_translate_dense_to_orig_90percent_filled_random_stride_big_step(
bench: &mut Bencher,
) {
let codec = gen_bools(0.9f64);
let num_vals = codec.num_non_null_vals();
bench.iter(|| {
codec
.translate_codec_idx_to_original_idx(random_range_iterator(0, num_vals, 50_000))
.last()
}); });
} }
#[bench] #[bench]
fn bench_dense_codec_translate_dense_to_orig(bench: &mut Bencher) { fn bench_dense_codec_translate_dense_to_orig_90percent_filled_random_stride(
let codec = gen_bools(); bench: &mut Bencher,
) {
let codec = gen_bools(0.9f64);
let num_vals = codec.num_non_null_vals();
bench.iter(|| {
codec
.translate_codec_idx_to_original_idx(random_range_iterator(0, num_vals, 100))
.last()
});
}
#[bench]
fn bench_dense_codec_translate_dense_to_orig_90percent_filled_full_scan(bench: &mut Bencher) {
let codec = gen_bools(0.9f64);
let num_vals = codec.num_non_null_vals(); let num_vals = codec.num_non_null_vals();
bench.iter(|| { bench.iter(|| {
codec codec

View File

@@ -2,14 +2,12 @@ pub use dense::{serialize_dense_codec, DenseCodec};
mod dense; mod dense;
#[inline]
fn get_bit_at(input: u64, n: u32) -> bool {
#[inline(always)]
fn get_bit_at(input: u32, n: u32) -> bool {
input & (1 << n) != 0 input & (1 << n) != 0
} }
#[inline(always)] #[inline]
fn set_bit_at(input: &mut u32, n: u32) { fn set_bit_at(input: &mut u64, n: u64) {
*input |= 1 << n; *input |= 1 << n;
} }

View File

@@ -1,10 +1,14 @@
[package] [package]
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"] authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
name = "ownedbytes" name = "ownedbytes"
version = "0.3.0" version = "0.4.0"
edition = "2021" edition = "2021"
description = "Expose data as static slice" description = "Expose data as static slice"
license = "MIT" license = "MIT"
documentation = "https://docs.rs/ownedbytes/"
homepage = "https://github.com/quickwit-oss/tantivy"
repository = "https://github.com/quickwit-oss/tantivy"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "tantivy-query-grammar" name = "tantivy-query-grammar"
version = "0.18.0" version = "0.19.0"
authors = ["Paul Masurel <paul.masurel@gmail.com>"] authors = ["Paul Masurel <paul.masurel@gmail.com>"]
license = "MIT" license = "MIT"
categories = ["database-implementations", "data-structures"] categories = ["database-implementations", "data-structures"]

View File

@@ -447,8 +447,8 @@ impl SegmentUpdater {
let segment_entries = segment_updater.purge_deletes(opstamp)?; let segment_entries = segment_updater.purge_deletes(opstamp)?;
segment_updater.segment_manager.commit(segment_entries); segment_updater.segment_manager.commit(segment_entries);
segment_updater.save_metas(opstamp, payload)?; segment_updater.save_metas(opstamp, payload)?;
let _ = garbage_collect_files(segment_updater.clone()); // let _ = garbage_collect_files(segment_updater.clone());
segment_updater.consider_merge_options(); // segment_updater.consider_merge_options();
Ok(opstamp) Ok(opstamp)
}) })
} }