mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-04 16:22:55 +00:00
add docs/example and Vec<u32> values to sstable (#2660)
This commit is contained in:
@@ -1,3 +1,40 @@
|
||||
//! `tantivy_sstable` is a crate that provides a sorted string table data structure.
|
||||
//!
|
||||
//! It is used in `tantivy` to store the term dictionary.
|
||||
//!
|
||||
//! A `sstable` is a map of sorted `&[u8]` keys to values.
|
||||
//! The keys are encoded using incremental encoding.
|
||||
//!
|
||||
//! Values and keys are compressed using zstd with the default feature flag `zstd-compression`.
|
||||
//!
|
||||
//! # Example
|
||||
//!
|
||||
//! Here is an example of how to create and search an `sstable`:
|
||||
//!
|
||||
//! ```rust
|
||||
//! use common::OwnedBytes;
|
||||
//! use tantivy_sstable::{Dictionary, MonotonicU64SSTable};
|
||||
//!
|
||||
//! // Create a new sstable in memory.
|
||||
//! let mut builder = Dictionary::<MonotonicU64SSTable>::builder(Vec::new()).unwrap();
|
||||
//! builder.insert(b"apple", &1).unwrap();
|
||||
//! builder.insert(b"banana", &2).unwrap();
|
||||
//! builder.insert(b"orange", &3).unwrap();
|
||||
//! let sstable_bytes = builder.finish().unwrap();
|
||||
//!
|
||||
//! // Open the sstable.
|
||||
//! let sstable =
|
||||
//! Dictionary::<MonotonicU64SSTable>::from_bytes(OwnedBytes::new(sstable_bytes)).unwrap();
|
||||
//!
|
||||
//! // Search for a key.
|
||||
//! let value = sstable.get(b"banana").unwrap();
|
||||
//! assert_eq!(value, Some(2));
|
||||
//!
|
||||
//! // Search for a non-existent key.
|
||||
//! let value = sstable.get(b"grape").unwrap();
|
||||
//! assert_eq!(value, None);
|
||||
//! ```
|
||||
|
||||
use std::io::{self, Write};
|
||||
use std::ops::Range;
|
||||
|
||||
@@ -19,6 +56,7 @@ pub use streamer::{Streamer, StreamerBuilder};
|
||||
|
||||
mod block_reader;
|
||||
use common::{BinarySerializable, OwnedBytes};
|
||||
use value::{VecU32ValueReader, VecU32ValueWriter};
|
||||
|
||||
pub use self::block_reader::BlockReader;
|
||||
pub use self::delta::{DeltaReader, DeltaWriter};
|
||||
@@ -130,6 +168,15 @@ impl SSTable for RangeSSTable {
|
||||
type ValueWriter = RangeValueWriter;
|
||||
}
|
||||
|
||||
/// SSTable associating keys to Vec<u32>.
|
||||
pub struct VecU32ValueSSTable;
|
||||
|
||||
impl SSTable for VecU32ValueSSTable {
|
||||
type Value = Vec<u32>;
|
||||
type ValueReader = VecU32ValueReader;
|
||||
type ValueWriter = VecU32ValueWriter;
|
||||
}
|
||||
|
||||
/// SSTable reader.
|
||||
pub struct Reader<TValueReader> {
|
||||
key: Vec<u8>,
|
||||
|
||||
@@ -1,10 +1,16 @@
|
||||
pub(crate) mod index;
|
||||
mod range;
|
||||
mod u64_monotonic;
|
||||
mod vec_u32;
|
||||
mod void;
|
||||
|
||||
use std::io;
|
||||
|
||||
pub use range::{RangeValueReader, RangeValueWriter};
|
||||
pub use u64_monotonic::{U64MonotonicValueReader, U64MonotonicValueWriter};
|
||||
pub use vec_u32::{VecU32ValueReader, VecU32ValueWriter};
|
||||
pub use void::{VoidValueReader, VoidValueWriter};
|
||||
|
||||
/// `ValueReader` is a trait describing the contract of something
|
||||
/// reading blocks of value, and offering random access within this values.
|
||||
pub trait ValueReader: Default {
|
||||
@@ -40,10 +46,6 @@ pub trait ValueWriter: Default {
|
||||
fn clear(&mut self);
|
||||
}
|
||||
|
||||
pub use range::{RangeValueReader, RangeValueWriter};
|
||||
pub use u64_monotonic::{U64MonotonicValueReader, U64MonotonicValueWriter};
|
||||
pub use void::{VoidValueReader, VoidValueWriter};
|
||||
|
||||
fn deserialize_vint_u64(data: &mut &[u8]) -> u64 {
|
||||
let (num_bytes, val) = super::vint::deserialize_read(data);
|
||||
*data = &data[num_bytes..];
|
||||
|
||||
73
sstable/src/value/vec_u32.rs
Normal file
73
sstable/src/value/vec_u32.rs
Normal file
@@ -0,0 +1,73 @@
|
||||
use std::io;
|
||||
|
||||
use super::{ValueReader, ValueWriter};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct VecU32ValueReader {
|
||||
vals: Vec<Vec<u32>>,
|
||||
}
|
||||
|
||||
impl ValueReader for VecU32ValueReader {
|
||||
type Value = Vec<u32>;
|
||||
|
||||
#[inline(always)]
|
||||
fn value(&self, idx: usize) -> &Self::Value {
|
||||
&self.vals[idx]
|
||||
}
|
||||
|
||||
fn load(&mut self, mut data: &[u8]) -> io::Result<usize> {
|
||||
let original_num_bytes = data.len();
|
||||
self.vals.clear();
|
||||
|
||||
// The first 4 bytes are the number of blocks
|
||||
let num_blocks = u32::from_le_bytes(data[..4].try_into().unwrap()) as usize;
|
||||
data = &data[4..];
|
||||
|
||||
for _ in 0..num_blocks {
|
||||
// Each block starts with a 4-byte length
|
||||
let segment_len = u32::from_le_bytes(data[..4].try_into().unwrap()) as usize;
|
||||
data = &data[4..];
|
||||
|
||||
// Read the segment IDs for this block
|
||||
let mut segment_ids = Vec::with_capacity(segment_len);
|
||||
for _ in 0..segment_len {
|
||||
let segment_id = u32::from_le_bytes(data[..4].try_into().unwrap());
|
||||
segment_ids.push(segment_id);
|
||||
data = &data[4..];
|
||||
}
|
||||
self.vals.push(segment_ids);
|
||||
}
|
||||
|
||||
// Return the number of bytes consumed
|
||||
Ok(original_num_bytes - data.len())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct VecU32ValueWriter {
|
||||
vals: Vec<Vec<u32>>,
|
||||
}
|
||||
|
||||
impl ValueWriter for VecU32ValueWriter {
|
||||
type Value = Vec<u32>;
|
||||
|
||||
fn write(&mut self, val: &Self::Value) {
|
||||
self.vals.push(val.to_vec());
|
||||
}
|
||||
|
||||
fn serialize_block(&self, output: &mut Vec<u8>) {
|
||||
let num_blocks = self.vals.len() as u32;
|
||||
output.extend_from_slice(&num_blocks.to_le_bytes());
|
||||
for vals in &self.vals {
|
||||
let len = vals.len() as u32;
|
||||
output.extend_from_slice(&len.to_le_bytes());
|
||||
for &segment_id in vals.iter() {
|
||||
output.extend_from_slice(&segment_id.to_le_bytes());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn clear(&mut self) {
|
||||
self.vals.clear();
|
||||
}
|
||||
}
|
||||
49
sstable/tests/sstable_test.rs
Normal file
49
sstable/tests/sstable_test.rs
Normal file
@@ -0,0 +1,49 @@
|
||||
use common::OwnedBytes;
|
||||
use tantivy_sstable::{Dictionary, MonotonicU64SSTable, VecU32ValueSSTable};
|
||||
|
||||
#[test]
|
||||
fn test_create_and_search_sstable() {
|
||||
// Create a new sstable in memory.
|
||||
let mut builder = Dictionary::<MonotonicU64SSTable>::builder(Vec::new()).unwrap();
|
||||
builder.insert(b"apple", &1).unwrap();
|
||||
builder.insert(b"banana", &2).unwrap();
|
||||
builder.insert(b"orange", &3).unwrap();
|
||||
let sstable_bytes = builder.finish().unwrap();
|
||||
|
||||
// Open the sstable.
|
||||
let sstable =
|
||||
Dictionary::<MonotonicU64SSTable>::from_bytes(OwnedBytes::new(sstable_bytes)).unwrap();
|
||||
|
||||
// Search for a key.
|
||||
let value = sstable.get(b"banana").unwrap();
|
||||
assert_eq!(value, Some(2));
|
||||
|
||||
// Search for a non-existent key.
|
||||
let value = sstable.get(b"blub").unwrap();
|
||||
assert_eq!(value, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_custom_value_sstable() {
|
||||
// Create a new sstable with custom values.
|
||||
let mut builder = Dictionary::<VecU32ValueSSTable>::builder(Vec::new()).unwrap();
|
||||
builder.set_block_len(4096); // Ensure both values are in the same block
|
||||
builder.insert(b"first", &vec![1, 2, 3]).unwrap();
|
||||
builder.insert(b"second", &vec![4, 5]).unwrap();
|
||||
let sstable_bytes = builder.finish().unwrap();
|
||||
|
||||
// Open the sstable.
|
||||
let sstable =
|
||||
Dictionary::<VecU32ValueSSTable>::from_bytes(OwnedBytes::new(sstable_bytes)).unwrap();
|
||||
|
||||
let mut stream = sstable.stream().unwrap();
|
||||
assert!(stream.advance());
|
||||
assert_eq!(stream.key(), b"first");
|
||||
assert_eq!(stream.value(), &vec![1, 2, 3]);
|
||||
|
||||
assert!(stream.advance());
|
||||
assert_eq!(stream.key(), b"second");
|
||||
assert_eq!(stream.value(), &vec![4, 5]);
|
||||
|
||||
assert!(!stream.advance());
|
||||
}
|
||||
Reference in New Issue
Block a user