mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 17:22:54 +00:00
525 lines
18 KiB
Rust
525 lines
18 KiB
Rust
use std::io::{self, Write};
|
|
use std::sync::Arc;
|
|
|
|
mod set;
|
|
mod set_block;
|
|
|
|
use common::{BinarySerializable, OwnedBytes, VInt};
|
|
pub use set::{SelectCursor, Set, SetCodec};
|
|
use set_block::{
|
|
DenseBlock, DenseBlockCodec, SparseBlock, SparseBlockCodec, DENSE_BLOCK_NUM_BYTES,
|
|
};
|
|
|
|
use crate::iterable::Iterable;
|
|
use crate::{DocId, InvalidData, RowId};
|
|
|
|
/// The threshold for for number of elements after which we switch to dense block encoding.
|
|
///
|
|
/// We simply pick the value that minimize the size of the blocks.
|
|
const DENSE_BLOCK_THRESHOLD: u32 =
|
|
set_block::DENSE_BLOCK_NUM_BYTES / std::mem::size_of::<u16>() as u32; //< 5_120
|
|
|
|
const ELEMENTS_PER_BLOCK: u32 = u16::MAX as u32 + 1;
|
|
|
|
const BLOCK_SIZE: RowId = 1 << 16;
|
|
|
|
#[derive(Copy, Clone, Debug)]
|
|
struct BlockMeta {
|
|
non_null_rows_before_block: u32,
|
|
start_byte_offset: u32,
|
|
block_variant: BlockVariant,
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug)]
|
|
enum BlockVariant {
|
|
Dense,
|
|
Sparse { num_vals: u16 },
|
|
}
|
|
|
|
impl BlockVariant {
|
|
pub fn empty() -> Self {
|
|
Self::Sparse { num_vals: 0 }
|
|
}
|
|
pub fn num_bytes_in_block(&self) -> u32 {
|
|
match *self {
|
|
BlockVariant::Dense => set_block::DENSE_BLOCK_NUM_BYTES,
|
|
BlockVariant::Sparse { num_vals } => num_vals as u32 * 2,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// This codec is inspired by roaring bitmaps.
|
|
/// In the dense blocks, however, in order to accelerate `select`
|
|
/// we interleave an offset over two bytes. (more on this lower)
|
|
///
|
|
/// The lower 16 bits of doc ids are stored as u16 while the upper 16 bits are given by the block
|
|
/// id. Each block contains 1<<16 docids.
|
|
///
|
|
/// # Serialized Data Layout
|
|
/// The data starts with the block data. Each block is either dense or sparse encoded, depending on
|
|
/// the number of values in the block. A block is sparse when it contains less than
|
|
/// DENSE_BLOCK_THRESHOLD (6144) values.
|
|
/// [Sparse data block | dense data block, .. #repeat*; Desc: Either a sparse or dense encoded
|
|
/// block]
|
|
/// ### Sparse block data
|
|
/// [u16 LE, .. #repeat*; Desc: Positions with values in a block]
|
|
/// ### Dense block data
|
|
/// [Dense codec for the whole block; Desc: Similar to a bitvec(0..ELEMENTS_PER_BLOCK) + Metadata
|
|
/// for faster lookups. See dense.rs]
|
|
///
|
|
/// The data is followed by block metadata, to know which area of the raw block data belongs to
|
|
/// which block. Only metadata for blocks with elements is recorded to
|
|
/// keep the overhead low for scenarios with many very sparse columns. The block metadata consists
|
|
/// of the block index and the number of values in the block. Since we don't store empty blocks
|
|
/// num_vals is incremented by 1, e.g. 0 means 1 value.
|
|
///
|
|
/// The last u16 is storing the number of metadata blocks.
|
|
/// [u16 LE, .. #repeat*; Desc: Positions with values in a block][(u16 LE, u16 LE), .. #repeat*;
|
|
/// Desc: (Block Id u16, Num Elements u16)][u16 LE; Desc: num blocks with values u16]
|
|
///
|
|
/// # Opening
|
|
/// When opening the data layout, the data is expanded to `Vec<SparseCodecBlockVariant>`, where the
|
|
/// index is the block index. For each block `byte_start` and `offset` is computed.
|
|
#[derive(Clone)]
|
|
pub struct OptionalIndex {
|
|
num_rows: RowId,
|
|
num_non_null_rows: RowId,
|
|
block_data: OwnedBytes,
|
|
block_metas: Arc<[BlockMeta]>,
|
|
}
|
|
|
|
impl std::fmt::Debug for OptionalIndex {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
f.debug_struct("OptionalIndex")
|
|
.field("num_rows", &self.num_rows)
|
|
.field("num_non_null_rows", &self.num_non_null_rows)
|
|
.finish_non_exhaustive()
|
|
}
|
|
}
|
|
|
|
/// Splits a value address into lower and upper 16bits.
|
|
/// The lower 16 bits are the value in the block
|
|
/// The upper 16 bits are the block index
|
|
#[derive(Copy, Debug, Clone)]
|
|
struct RowAddr {
|
|
block_id: u16,
|
|
in_block_row_id: u16,
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn row_addr_from_row_id(row_id: RowId) -> RowAddr {
|
|
RowAddr {
|
|
block_id: (row_id / BLOCK_SIZE) as u16,
|
|
in_block_row_id: (row_id % BLOCK_SIZE) as u16,
|
|
}
|
|
}
|
|
|
|
enum BlockSelectCursor<'a> {
|
|
Dense(<DenseBlock<'a> as Set<u16>>::SelectCursor<'a>),
|
|
Sparse(<SparseBlock<'a> as Set<u16>>::SelectCursor<'a>),
|
|
}
|
|
|
|
impl<'a> BlockSelectCursor<'a> {
|
|
fn select(&mut self, rank: u16) -> u16 {
|
|
match self {
|
|
BlockSelectCursor::Dense(dense_select_cursor) => dense_select_cursor.select(rank),
|
|
BlockSelectCursor::Sparse(sparse_select_cursor) => sparse_select_cursor.select(rank),
|
|
}
|
|
}
|
|
}
|
|
pub struct OptionalIndexSelectCursor<'a> {
|
|
current_block_cursor: BlockSelectCursor<'a>,
|
|
current_block_id: u16,
|
|
// The current block is guaranteed to contain ranks < end_rank.
|
|
current_block_end_rank: RowId,
|
|
optional_index: &'a OptionalIndex,
|
|
block_doc_idx_start: RowId,
|
|
num_null_rows_before_block: RowId,
|
|
}
|
|
|
|
impl<'a> OptionalIndexSelectCursor<'a> {
|
|
fn search_and_load_block(&mut self, rank: RowId) {
|
|
if rank < self.current_block_end_rank {
|
|
// we are already in the right block
|
|
return;
|
|
}
|
|
self.current_block_id = self.optional_index.find_block(rank, self.current_block_id);
|
|
self.current_block_end_rank = self
|
|
.optional_index
|
|
.block_metas
|
|
.get(self.current_block_id as usize + 1)
|
|
.map(|block_meta| block_meta.non_null_rows_before_block)
|
|
.unwrap_or(u32::MAX);
|
|
self.block_doc_idx_start = (self.current_block_id as u32) * ELEMENTS_PER_BLOCK;
|
|
let block_meta = self.optional_index.block_metas[self.current_block_id as usize];
|
|
self.num_null_rows_before_block = block_meta.non_null_rows_before_block;
|
|
let block: Block<'_> = self.optional_index.block(block_meta);
|
|
self.current_block_cursor = match block {
|
|
Block::Dense(dense_block) => BlockSelectCursor::Dense(dense_block.select_cursor()),
|
|
Block::Sparse(sparse_block) => BlockSelectCursor::Sparse(sparse_block.select_cursor()),
|
|
};
|
|
}
|
|
}
|
|
|
|
impl<'a> SelectCursor<RowId> for OptionalIndexSelectCursor<'a> {
|
|
fn select(&mut self, rank: RowId) -> RowId {
|
|
self.search_and_load_block(rank);
|
|
let index_in_block = (rank - self.num_null_rows_before_block) as u16;
|
|
self.current_block_cursor.select(index_in_block) as RowId + self.block_doc_idx_start
|
|
}
|
|
}
|
|
|
|
impl Set<RowId> for OptionalIndex {
|
|
type SelectCursor<'b> = OptionalIndexSelectCursor<'b> where Self: 'b;
|
|
// Check if value at position is not null.
|
|
#[inline]
|
|
fn contains(&self, row_id: RowId) -> bool {
|
|
let RowAddr {
|
|
block_id,
|
|
in_block_row_id,
|
|
} = row_addr_from_row_id(row_id);
|
|
let block_meta = self.block_metas[block_id as usize];
|
|
match self.block(block_meta) {
|
|
Block::Dense(dense_block) => dense_block.contains(in_block_row_id),
|
|
Block::Sparse(sparse_block) => sparse_block.contains(in_block_row_id),
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn rank(&self, doc_id: DocId) -> RowId {
|
|
let RowAddr {
|
|
block_id,
|
|
in_block_row_id,
|
|
} = row_addr_from_row_id(doc_id);
|
|
let block_meta = self.block_metas[block_id as usize];
|
|
let block = self.block(block_meta);
|
|
let block_offset_row_id = match block {
|
|
Block::Dense(dense_block) => dense_block.rank(in_block_row_id),
|
|
Block::Sparse(sparse_block) => sparse_block.rank(in_block_row_id),
|
|
} as u32;
|
|
block_meta.non_null_rows_before_block + block_offset_row_id
|
|
}
|
|
|
|
#[inline]
|
|
fn rank_if_exists(&self, doc_id: DocId) -> Option<RowId> {
|
|
let RowAddr {
|
|
block_id,
|
|
in_block_row_id,
|
|
} = row_addr_from_row_id(doc_id);
|
|
let block_meta = self.block_metas[block_id as usize];
|
|
let block = self.block(block_meta);
|
|
let block_offset_row_id = match block {
|
|
Block::Dense(dense_block) => dense_block.rank_if_exists(in_block_row_id),
|
|
Block::Sparse(sparse_block) => sparse_block.rank_if_exists(in_block_row_id),
|
|
}? as u32;
|
|
Some(block_meta.non_null_rows_before_block + block_offset_row_id)
|
|
}
|
|
|
|
#[inline]
|
|
fn select(&self, rank: RowId) -> RowId {
|
|
let block_pos = self.find_block(rank, 0);
|
|
let block_doc_idx_start = (block_pos as u32) * ELEMENTS_PER_BLOCK;
|
|
let block_meta = self.block_metas[block_pos as usize];
|
|
let block: Block<'_> = self.block(block_meta);
|
|
let index_in_block = (rank - block_meta.non_null_rows_before_block) as u16;
|
|
let in_block_rank = match block {
|
|
Block::Dense(dense_block) => dense_block.select(index_in_block),
|
|
Block::Sparse(sparse_block) => sparse_block.select(index_in_block),
|
|
};
|
|
block_doc_idx_start + in_block_rank as u32
|
|
}
|
|
|
|
fn select_cursor(&self) -> OptionalIndexSelectCursor<'_> {
|
|
OptionalIndexSelectCursor {
|
|
current_block_cursor: BlockSelectCursor::Sparse(
|
|
SparseBlockCodec::open(b"").select_cursor(),
|
|
),
|
|
current_block_id: 0u16,
|
|
current_block_end_rank: 0u32, //< this is sufficient to force the first load
|
|
optional_index: self,
|
|
block_doc_idx_start: 0u32,
|
|
num_null_rows_before_block: 0u32,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl OptionalIndex {
|
|
pub fn for_test(num_rows: RowId, row_ids: &[RowId]) -> OptionalIndex {
|
|
assert!(row_ids
|
|
.last()
|
|
.copied()
|
|
.map(|last_row_id| last_row_id < num_rows)
|
|
.unwrap_or(true));
|
|
let mut buffer = Vec::new();
|
|
serialize_optional_index(&row_ids, num_rows, &mut buffer).unwrap();
|
|
let bytes = OwnedBytes::new(buffer);
|
|
open_optional_index(bytes).unwrap()
|
|
}
|
|
|
|
pub fn num_docs(&self) -> RowId {
|
|
self.num_rows
|
|
}
|
|
|
|
pub fn num_non_nulls(&self) -> RowId {
|
|
self.num_non_null_rows
|
|
}
|
|
|
|
pub fn iter_rows(&self) -> impl Iterator<Item = RowId> + '_ {
|
|
// TODO optimize
|
|
let mut select_batch = self.select_cursor();
|
|
(0..self.num_non_null_rows).map(move |rank| select_batch.select(rank))
|
|
}
|
|
pub fn select_batch(&self, ranks: &mut [RowId]) {
|
|
let mut select_cursor = self.select_cursor();
|
|
for rank in ranks.iter_mut() {
|
|
*rank = select_cursor.select(*rank);
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn block(&self, block_meta: BlockMeta) -> Block<'_> {
|
|
let BlockMeta {
|
|
start_byte_offset,
|
|
block_variant,
|
|
..
|
|
} = block_meta;
|
|
let start_byte_offset = start_byte_offset as usize;
|
|
let bytes = self.block_data.as_slice();
|
|
match block_variant {
|
|
BlockVariant::Dense => Block::Dense(DenseBlockCodec::open(
|
|
&bytes[start_byte_offset..start_byte_offset + DENSE_BLOCK_NUM_BYTES as usize],
|
|
)),
|
|
BlockVariant::Sparse { num_vals } => {
|
|
let end_byte_offset = start_byte_offset + num_vals as usize * 2;
|
|
let sparse_bytes = &bytes[start_byte_offset..end_byte_offset];
|
|
Block::Sparse(SparseBlockCodec::open(sparse_bytes))
|
|
}
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn find_block(&self, dense_idx: u32, start_block_pos: u16) -> u16 {
|
|
for block_pos in start_block_pos..self.block_metas.len() as u16 {
|
|
let offset = self.block_metas[block_pos as usize].non_null_rows_before_block;
|
|
if offset > dense_idx {
|
|
return block_pos - 1u16;
|
|
}
|
|
}
|
|
self.block_metas.len() as u16 - 1u16
|
|
}
|
|
|
|
// TODO Add a good API for the codec_idx to original_idx translation.
|
|
// The Iterator API is a probably a bad idea
|
|
}
|
|
|
|
#[derive(Copy, Clone)]
|
|
enum Block<'a> {
|
|
Dense(DenseBlock<'a>),
|
|
Sparse(SparseBlock<'a>),
|
|
}
|
|
|
|
#[derive(Debug, Copy, Clone)]
|
|
enum OptionalIndexCodec {
|
|
Dense = 0,
|
|
Sparse = 1,
|
|
}
|
|
|
|
impl OptionalIndexCodec {
|
|
fn to_code(self) -> u8 {
|
|
self as u8
|
|
}
|
|
|
|
fn try_from_code(code: u8) -> Result<Self, InvalidData> {
|
|
match code {
|
|
0 => Ok(Self::Dense),
|
|
1 => Ok(Self::Sparse),
|
|
_ => Err(InvalidData),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl BinarySerializable for OptionalIndexCodec {
|
|
fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
|
|
writer.write_all(&[self.to_code()])
|
|
}
|
|
|
|
fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
|
|
let optional_codec_code = u8::deserialize(reader)?;
|
|
let optional_codec = Self::try_from_code(optional_codec_code)?;
|
|
Ok(optional_codec)
|
|
}
|
|
}
|
|
|
|
fn serialize_optional_index_block(block_els: &[u16], out: &mut impl io::Write) -> io::Result<()> {
|
|
let is_sparse = is_sparse(block_els.len() as u32);
|
|
if is_sparse {
|
|
SparseBlockCodec::serialize(block_els.iter().copied(), out)?;
|
|
} else {
|
|
DenseBlockCodec::serialize(block_els.iter().copied(), out)?;
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
pub fn serialize_optional_index<W: io::Write>(
|
|
non_null_rows: &dyn Iterable<RowId>,
|
|
num_rows: RowId,
|
|
output: &mut W,
|
|
) -> io::Result<()> {
|
|
VInt(num_rows as u64).serialize(output)?;
|
|
|
|
let mut rows_it = non_null_rows.boxed_iter();
|
|
let mut block_metadata: Vec<SerializedBlockMeta> = Vec::new();
|
|
let mut current_block = Vec::new();
|
|
|
|
// This if-statement for the first element ensures that
|
|
// `block_metadata` is not empty in the loop below.
|
|
let Some(idx) = rows_it.next() else {
|
|
output.write_all(&0u16.to_le_bytes())?;
|
|
return Ok(());
|
|
};
|
|
|
|
let row_addr = row_addr_from_row_id(idx);
|
|
|
|
let mut current_block_id = row_addr.block_id;
|
|
current_block.push(row_addr.in_block_row_id);
|
|
|
|
for idx in rows_it {
|
|
let value_addr = row_addr_from_row_id(idx);
|
|
if current_block_id != value_addr.block_id {
|
|
serialize_optional_index_block(¤t_block[..], output)?;
|
|
block_metadata.push(SerializedBlockMeta {
|
|
block_id: current_block_id,
|
|
num_non_null_rows: current_block.len() as u32,
|
|
});
|
|
current_block.clear();
|
|
current_block_id = value_addr.block_id;
|
|
}
|
|
current_block.push(value_addr.in_block_row_id);
|
|
}
|
|
|
|
// handle last block
|
|
serialize_optional_index_block(¤t_block[..], output)?;
|
|
|
|
block_metadata.push(SerializedBlockMeta {
|
|
block_id: current_block_id,
|
|
num_non_null_rows: current_block.len() as u32,
|
|
});
|
|
|
|
for block in &block_metadata {
|
|
output.write_all(&block.to_bytes())?;
|
|
}
|
|
|
|
output.write_all((block_metadata.len() as u16).to_le_bytes().as_ref())?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
const SERIALIZED_BLOCK_META_NUM_BYTES: usize = 4;
|
|
|
|
#[derive(Clone, Copy, Debug)]
|
|
struct SerializedBlockMeta {
|
|
block_id: u16,
|
|
num_non_null_rows: u32, //< takes values in 1..=u16::MAX
|
|
}
|
|
|
|
// TODO unit tests
|
|
impl SerializedBlockMeta {
|
|
#[inline]
|
|
fn from_bytes(bytes: [u8; SERIALIZED_BLOCK_META_NUM_BYTES]) -> SerializedBlockMeta {
|
|
let block_id = u16::from_le_bytes(bytes[0..2].try_into().unwrap());
|
|
let num_non_null_rows: u32 =
|
|
u16::from_le_bytes(bytes[2..4].try_into().unwrap()) as u32 + 1u32;
|
|
SerializedBlockMeta {
|
|
block_id,
|
|
num_non_null_rows,
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn to_bytes(self) -> [u8; SERIALIZED_BLOCK_META_NUM_BYTES] {
|
|
assert!(self.num_non_null_rows > 0);
|
|
let mut bytes = [0u8; SERIALIZED_BLOCK_META_NUM_BYTES];
|
|
bytes[0..2].copy_from_slice(&self.block_id.to_le_bytes());
|
|
// We don't store empty blocks, therefore we can subtract 1.
|
|
// This way we will be able to use u16 when the number of elements is 1 << 16 or u16::MAX+1
|
|
bytes[2..4].copy_from_slice(&((self.num_non_null_rows - 1u32) as u16).to_le_bytes());
|
|
bytes
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn is_sparse(num_rows_in_block: u32) -> bool {
|
|
num_rows_in_block < DENSE_BLOCK_THRESHOLD
|
|
}
|
|
|
|
fn deserialize_optional_index_block_metadatas(
|
|
data: &[u8],
|
|
num_rows: u32,
|
|
) -> (Box<[BlockMeta]>, u32) {
|
|
let num_blocks = data.len() / SERIALIZED_BLOCK_META_NUM_BYTES;
|
|
let mut block_metas = Vec::with_capacity(num_blocks + 1);
|
|
let mut start_byte_offset = 0;
|
|
let mut non_null_rows_before_block = 0;
|
|
for block_meta_bytes in data.chunks_exact(SERIALIZED_BLOCK_META_NUM_BYTES) {
|
|
let block_meta_bytes: [u8; SERIALIZED_BLOCK_META_NUM_BYTES] =
|
|
block_meta_bytes.try_into().unwrap();
|
|
let SerializedBlockMeta {
|
|
block_id,
|
|
num_non_null_rows,
|
|
} = SerializedBlockMeta::from_bytes(block_meta_bytes);
|
|
block_metas.resize(
|
|
block_id as usize,
|
|
BlockMeta {
|
|
non_null_rows_before_block,
|
|
start_byte_offset,
|
|
block_variant: BlockVariant::empty(),
|
|
},
|
|
);
|
|
let block_variant = if is_sparse(num_non_null_rows) {
|
|
BlockVariant::Sparse {
|
|
num_vals: num_non_null_rows as u16,
|
|
}
|
|
} else {
|
|
BlockVariant::Dense
|
|
};
|
|
block_metas.push(BlockMeta {
|
|
non_null_rows_before_block,
|
|
start_byte_offset,
|
|
block_variant,
|
|
});
|
|
start_byte_offset += block_variant.num_bytes_in_block();
|
|
non_null_rows_before_block += num_non_null_rows;
|
|
}
|
|
block_metas.resize(
|
|
((num_rows + BLOCK_SIZE - 1) / BLOCK_SIZE) as usize,
|
|
BlockMeta {
|
|
non_null_rows_before_block,
|
|
start_byte_offset,
|
|
block_variant: BlockVariant::empty(),
|
|
},
|
|
);
|
|
(block_metas.into_boxed_slice(), non_null_rows_before_block)
|
|
}
|
|
|
|
pub fn open_optional_index(bytes: OwnedBytes) -> io::Result<OptionalIndex> {
|
|
let (mut bytes, num_non_empty_blocks_bytes) = bytes.rsplit(2);
|
|
let num_non_empty_block_bytes =
|
|
u16::from_le_bytes(num_non_empty_blocks_bytes.as_slice().try_into().unwrap());
|
|
let num_rows = VInt::deserialize_u64(&mut bytes)? as u32;
|
|
let block_metas_num_bytes =
|
|
num_non_empty_block_bytes as usize * SERIALIZED_BLOCK_META_NUM_BYTES;
|
|
let (block_data, block_metas) = bytes.rsplit(block_metas_num_bytes);
|
|
let (block_metas, num_non_null_rows) =
|
|
deserialize_optional_index_block_metadatas(block_metas.as_slice(), num_rows);
|
|
let optional_index = OptionalIndex {
|
|
num_rows,
|
|
num_non_null_rows,
|
|
block_data,
|
|
block_metas: block_metas.into(),
|
|
};
|
|
Ok(optional_index)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests;
|