mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-07 17:42:55 +00:00
Refactoring to prepare for the addition of dynamic fast field
This commit is contained in:
@@ -60,9 +60,9 @@ sstable = { version="0.1", path="./sstable", package ="tantivy-sstable", optiona
|
||||
stacker = { version="0.1", path="./stacker", package ="tantivy-stacker" }
|
||||
tantivy-query-grammar = { version= "0.19.0", path="./query-grammar" }
|
||||
tantivy-bitpacker = { version= "0.3", path="./bitpacker" }
|
||||
common = { version= "0.4", path = "./common/", package = "tantivy-common" }
|
||||
common = { version= "0.5", path = "./common/", package = "tantivy-common" }
|
||||
fastfield_codecs = { version= "0.3", path="./fastfield_codecs", default-features = false }
|
||||
ownedbytes = { version= "0.4", path="./ownedbytes" }
|
||||
ownedbytes = { version= "0.5", path="./ownedbytes" }
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
winapi = "0.3.9"
|
||||
|
||||
@@ -101,7 +101,7 @@ impl BitUnpacker {
|
||||
.try_into()
|
||||
.unwrap();
|
||||
let val_unshifted_unmasked: u64 = u64::from_le_bytes(bytes);
|
||||
let val_shifted = (val_unshifted_unmasked >> bit_shift);
|
||||
let val_shifted: u64 = val_unshifted_unmasked >> bit_shift;
|
||||
val_shifted & self.mask
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tantivy-common"
|
||||
version = "0.4.0"
|
||||
version = "0.5.0"
|
||||
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
|
||||
license = "MIT"
|
||||
edition = "2021"
|
||||
@@ -14,7 +14,8 @@ repository = "https://github.com/quickwit-oss/tantivy"
|
||||
|
||||
[dependencies]
|
||||
byteorder = "1.4.3"
|
||||
ownedbytes = { version= "0.4", path="../ownedbytes" }
|
||||
ownedbytes = { version= "0.5", path="../ownedbytes" }
|
||||
async-trait = "0.1"
|
||||
|
||||
[dev-dependencies]
|
||||
proptest = "1.0.0"
|
||||
|
||||
@@ -1,19 +1,18 @@
|
||||
use std::ops::{Deref, Range};
|
||||
use std::ops::{Deref, Range, RangeBounds};
|
||||
use std::sync::Arc;
|
||||
use std::{fmt, io};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common::HasLen;
|
||||
use stable_deref_trait::StableDeref;
|
||||
use ownedbytes::{OwnedBytes, StableDeref};
|
||||
|
||||
use crate::directory::OwnedBytes;
|
||||
use crate::HasLen;
|
||||
|
||||
/// Objects that represents files sections in tantivy.
|
||||
///
|
||||
/// By contract, whatever happens to the directory file, as long as a FileHandle
|
||||
/// is alive, the data associated with it cannot be altered or destroyed.
|
||||
///
|
||||
/// The underlying behavior is therefore specific to the [`Directory`](crate::Directory) that
|
||||
/// The underlying behavior is therefore specific to the `Directory` that
|
||||
/// created it. Despite its name, a [`FileSlice`] may or may not directly map to an actual file
|
||||
/// on the filesystem.
|
||||
|
||||
@@ -24,13 +23,9 @@ pub trait FileHandle: 'static + Send + Sync + HasLen + fmt::Debug {
|
||||
/// This method may panic if the range requested is invalid.
|
||||
fn read_bytes(&self, range: Range<usize>) -> io::Result<OwnedBytes>;
|
||||
|
||||
#[cfg(feature = "quickwit")]
|
||||
#[doc(hidden)]
|
||||
async fn read_bytes_async(
|
||||
&self,
|
||||
_byte_range: Range<usize>,
|
||||
) -> crate::AsyncIoResult<OwnedBytes> {
|
||||
Err(crate::error::AsyncIoError::AsyncUnsupported)
|
||||
async fn read_bytes_async(&self, byte_range: Range<usize>) -> io::Result<OwnedBytes> {
|
||||
self.read_bytes(byte_range)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,7 +37,7 @@ impl FileHandle for &'static [u8] {
|
||||
}
|
||||
|
||||
#[cfg(feature = "quickwit")]
|
||||
async fn read_bytes_async(&self, byte_range: Range<usize>) -> crate::AsyncIoResult<OwnedBytes> {
|
||||
async fn read_bytes_async(&self, byte_range: Range<usize>) -> io::Result<OwnedBytes> {
|
||||
Ok(self.read_bytes(byte_range)?)
|
||||
}
|
||||
}
|
||||
@@ -70,6 +65,25 @@ impl fmt::Debug for FileSlice {
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn combine_ranges<R: RangeBounds<usize>>(orig_range: Range<usize>, rel_range: R) -> Range<usize> {
|
||||
let start: usize = orig_range.start
|
||||
+ match rel_range.start_bound().cloned() {
|
||||
std::ops::Bound::Included(rel_start) => rel_start,
|
||||
std::ops::Bound::Excluded(rel_start) => rel_start + 1,
|
||||
std::ops::Bound::Unbounded => 0,
|
||||
};
|
||||
assert!(start <= orig_range.end);
|
||||
let end: usize = match rel_range.end_bound().cloned() {
|
||||
std::ops::Bound::Included(rel_end) => orig_range.start + rel_end + 1,
|
||||
std::ops::Bound::Excluded(rel_end) => orig_range.start + rel_end,
|
||||
std::ops::Bound::Unbounded => orig_range.end,
|
||||
};
|
||||
assert!(end >= start);
|
||||
assert!(end <= orig_range.end);
|
||||
start..end
|
||||
}
|
||||
|
||||
impl FileSlice {
|
||||
/// Wraps a FileHandle.
|
||||
pub fn new(file_handle: Arc<dyn FileHandle>) -> Self {
|
||||
@@ -93,11 +107,11 @@ impl FileSlice {
|
||||
///
|
||||
/// Panics if `byte_range.end` exceeds the filesize.
|
||||
#[must_use]
|
||||
pub fn slice(&self, byte_range: Range<usize>) -> FileSlice {
|
||||
assert!(byte_range.end <= self.len());
|
||||
#[inline]
|
||||
pub fn slice<R: RangeBounds<usize>>(&self, byte_range: R) -> FileSlice {
|
||||
FileSlice {
|
||||
data: self.data.clone(),
|
||||
range: self.range.start + byte_range.start..self.range.start + byte_range.end,
|
||||
range: combine_ranges(self.range.clone(), byte_range),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -117,9 +131,8 @@ impl FileSlice {
|
||||
self.data.read_bytes(self.range.clone())
|
||||
}
|
||||
|
||||
#[cfg(feature = "quickwit")]
|
||||
#[doc(hidden)]
|
||||
pub async fn read_bytes_async(&self) -> crate::AsyncIoResult<OwnedBytes> {
|
||||
pub async fn read_bytes_async(&self) -> io::Result<OwnedBytes> {
|
||||
self.data.read_bytes_async(self.range.clone()).await
|
||||
}
|
||||
|
||||
@@ -137,12 +150,8 @@ impl FileSlice {
|
||||
.read_bytes(self.range.start + range.start..self.range.start + range.end)
|
||||
}
|
||||
|
||||
#[cfg(feature = "quickwit")]
|
||||
#[doc(hidden)]
|
||||
pub async fn read_bytes_slice_async(
|
||||
&self,
|
||||
byte_range: Range<usize>,
|
||||
) -> crate::AsyncIoResult<OwnedBytes> {
|
||||
pub async fn read_bytes_slice_async(&self, byte_range: Range<usize>) -> io::Result<OwnedBytes> {
|
||||
assert!(
|
||||
self.range.start + byte_range.end <= self.range.end,
|
||||
"`to` exceeds the fileslice length"
|
||||
@@ -205,7 +214,7 @@ impl FileHandle for FileSlice {
|
||||
}
|
||||
|
||||
#[cfg(feature = "quickwit")]
|
||||
async fn read_bytes_async(&self, byte_range: Range<usize>) -> crate::AsyncIoResult<OwnedBytes> {
|
||||
async fn read_bytes_async(&self, byte_range: Range<usize>) -> io::Result<OwnedBytes> {
|
||||
self.read_bytes_slice_async(byte_range).await
|
||||
}
|
||||
}
|
||||
@@ -223,7 +232,7 @@ impl FileHandle for OwnedBytes {
|
||||
}
|
||||
|
||||
#[cfg(feature = "quickwit")]
|
||||
async fn read_bytes_async(&self, range: Range<usize>) -> crate::AsyncIoResult<OwnedBytes> {
|
||||
async fn read_bytes_async(&self, range: Range<usize>) -> io::Result<OwnedBytes> {
|
||||
let bytes = self.read_bytes(range)?;
|
||||
Ok(bytes)
|
||||
}
|
||||
@@ -234,9 +243,9 @@ mod tests {
|
||||
use std::io;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common::HasLen;
|
||||
|
||||
use super::{FileHandle, FileSlice};
|
||||
use crate::file_slice::combine_ranges;
|
||||
use crate::HasLen;
|
||||
|
||||
#[test]
|
||||
fn test_file_slice() -> io::Result<()> {
|
||||
@@ -307,4 +316,18 @@ mod tests {
|
||||
b"bcd"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_combine_range() {
|
||||
assert_eq!(combine_ranges(1..3, 0..1), 1..2);
|
||||
assert_eq!(combine_ranges(1..3, 1..), 2..3);
|
||||
assert_eq!(combine_ranges(1..4, ..2), 1..3);
|
||||
assert_eq!(combine_ranges(3..10, 2..5), 5..8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_combine_range_panics() {
|
||||
let _ = combine_ranges(3..5, 1..4);
|
||||
}
|
||||
}
|
||||
@@ -5,11 +5,12 @@ use std::ops::Deref;
|
||||
pub use byteorder::LittleEndian as Endianness;
|
||||
|
||||
mod bitset;
|
||||
pub mod file_slice;
|
||||
mod serialize;
|
||||
mod vint;
|
||||
mod writer;
|
||||
|
||||
pub use bitset::*;
|
||||
pub use ownedbytes::OwnedBytes;
|
||||
pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};
|
||||
pub use vint::{
|
||||
deserialize_vint_u128, read_u32_vint, read_u32_vint_no_advance, serialize_vint_u128,
|
||||
|
||||
@@ -12,9 +12,9 @@ repository = "https://github.com/quickwit-oss/tantivy"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
common = { version = "0.4", path = "../common/", package = "tantivy-common" }
|
||||
common = { version = "0.5", path = "../common/", package = "tantivy-common" }
|
||||
tantivy-bitpacker = { version= "0.3", path = "../bitpacker/" }
|
||||
ownedbytes = { version = "0.4.0", path = "../ownedbytes" }
|
||||
ownedbytes = { version = "0.5", path = "../ownedbytes" }
|
||||
prettytable-rs = {version="0.9.0", optional= true}
|
||||
rand = {version="0.8.3", optional= true}
|
||||
fastdivide = "0.4"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
|
||||
name = "ownedbytes"
|
||||
version = "0.4.0"
|
||||
version = "0.5.0"
|
||||
edition = "2021"
|
||||
description = "Expose data as static slice"
|
||||
license = "MIT"
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::ops::{Deref, Range};
|
||||
use std::sync::Arc;
|
||||
use std::{fmt, io, mem};
|
||||
|
||||
use stable_deref_trait::StableDeref;
|
||||
pub use stable_deref_trait::StableDeref;
|
||||
|
||||
/// An OwnedBytes simply wraps an object that owns a slice of data and exposes
|
||||
/// this data as a slice.
|
||||
|
||||
@@ -200,10 +200,7 @@ impl InvertedIndexReader {
|
||||
|
||||
#[cfg(feature = "quickwit")]
|
||||
impl InvertedIndexReader {
|
||||
pub(crate) async fn get_term_info_async(
|
||||
&self,
|
||||
term: &Term,
|
||||
) -> crate::AsyncIoResult<Option<TermInfo>> {
|
||||
pub(crate) async fn get_term_info_async(&self, term: &Term) -> io::Result<Option<TermInfo>> {
|
||||
self.termdict.get_async(term.value_bytes()).await
|
||||
}
|
||||
|
||||
@@ -211,12 +208,8 @@ impl InvertedIndexReader {
|
||||
/// This method is for an advanced usage only.
|
||||
///
|
||||
/// Most users should prefer using [`Self::read_postings()`] instead.
|
||||
pub async fn warm_postings(
|
||||
&self,
|
||||
term: &Term,
|
||||
with_positions: bool,
|
||||
) -> crate::AsyncIoResult<()> {
|
||||
let term_info_opt = self.get_term_info_async(term).await?;
|
||||
pub async fn warm_postings(&self, term: &Term, with_positions: bool) -> io::Result<()> {
|
||||
let term_info_opt: Option<TermInfo> = self.get_term_info_async(term).await?;
|
||||
if let Some(term_info) = term_info_opt {
|
||||
self.postings_file_slice
|
||||
.read_bytes_slice_async(term_info.postings_range.clone())
|
||||
@@ -234,7 +227,7 @@ impl InvertedIndexReader {
|
||||
/// This method is for an advanced usage only.
|
||||
///
|
||||
/// If you know which terms to pre-load, prefer using [`Self::warm_postings`] instead.
|
||||
pub async fn warm_postings_full(&self, with_positions: bool) -> crate::AsyncIoResult<()> {
|
||||
pub async fn warm_postings_full(&self, with_positions: bool) -> io::Result<()> {
|
||||
self.postings_file_slice.read_bytes_async().await?;
|
||||
if with_positions {
|
||||
self.positions_file_slice.read_bytes_async().await?;
|
||||
@@ -243,7 +236,7 @@ impl InvertedIndexReader {
|
||||
}
|
||||
|
||||
/// Returns the number of documents containing the term asynchronously.
|
||||
pub async fn doc_freq_async(&self, term: &Term) -> crate::AsyncIoResult<u32> {
|
||||
pub async fn doc_freq_async(&self, term: &Term) -> io::Result<u32> {
|
||||
Ok(self
|
||||
.get_term_info_async(term)
|
||||
.await?
|
||||
|
||||
@@ -5,7 +5,6 @@ mod mmap_directory;
|
||||
|
||||
mod directory;
|
||||
mod directory_lock;
|
||||
mod file_slice;
|
||||
mod file_watcher;
|
||||
mod footer;
|
||||
mod managed_directory;
|
||||
@@ -20,13 +19,13 @@ mod composite_file;
|
||||
use std::io::BufWriter;
|
||||
use std::path::PathBuf;
|
||||
|
||||
pub use common::file_slice::{FileHandle, FileSlice};
|
||||
pub use common::{AntiCallToken, TerminatingWrite};
|
||||
pub use ownedbytes::OwnedBytes;
|
||||
|
||||
pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
|
||||
pub use self::directory::{Directory, DirectoryClone, DirectoryLock};
|
||||
pub use self::directory_lock::{Lock, INDEX_WRITER_LOCK, META_LOCK};
|
||||
pub use self::file_slice::{FileHandle, FileSlice};
|
||||
pub use self::ram_directory::RamDirectory;
|
||||
pub use self::watch_event_router::{WatchCallback, WatchCallbackList, WatchHandle};
|
||||
|
||||
|
||||
22
src/error.rs
22
src/error.rs
@@ -104,28 +104,6 @@ pub enum TantivyError {
|
||||
InternalError(String),
|
||||
}
|
||||
|
||||
#[cfg(feature = "quickwit")]
|
||||
#[derive(Error, Debug)]
|
||||
#[doc(hidden)]
|
||||
pub enum AsyncIoError {
|
||||
#[error("io::Error `{0}`")]
|
||||
Io(#[from] io::Error),
|
||||
#[error("Asynchronous API is unsupported by this directory")]
|
||||
AsyncUnsupported,
|
||||
}
|
||||
|
||||
#[cfg(feature = "quickwit")]
|
||||
impl From<AsyncIoError> for TantivyError {
|
||||
fn from(async_io_err: AsyncIoError) -> Self {
|
||||
match async_io_err {
|
||||
AsyncIoError::Io(io_err) => TantivyError::from(io_err),
|
||||
AsyncIoError::AsyncUnsupported => {
|
||||
TantivyError::SystemError(format!("{:?}", async_io_err))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<io::Error> for TantivyError {
|
||||
fn from(io_err: io::Error) -> TantivyError {
|
||||
TantivyError::IoError(Arc::new(io_err))
|
||||
|
||||
@@ -259,10 +259,6 @@ pub use crate::future_result::FutureResult;
|
||||
/// and instead, refer to this as `crate::Result<T>`.
|
||||
pub type Result<T> = std::result::Result<T, TantivyError>;
|
||||
|
||||
/// Result for an Async io operation.
|
||||
#[cfg(feature = "quickwit")]
|
||||
pub type AsyncIoResult<T> = std::result::Result<T, crate::error::AsyncIoError>;
|
||||
|
||||
mod core;
|
||||
mod indexer;
|
||||
|
||||
|
||||
@@ -319,7 +319,7 @@ impl StoreReader {
|
||||
/// In most cases use [`get_async`](Self::get_async)
|
||||
///
|
||||
/// Loads and decompresses a block asynchronously.
|
||||
async fn read_block_async(&self, checkpoint: &Checkpoint) -> crate::AsyncIoResult<Block> {
|
||||
async fn read_block_async(&self, checkpoint: &Checkpoint) -> io::Result<Block> {
|
||||
let cache_key = checkpoint.byte_range.start;
|
||||
if let Some(block) = self.cache.get_from_cache(checkpoint.byte_range.start) {
|
||||
return Ok(block);
|
||||
|
||||
@@ -121,7 +121,7 @@ fn extract_bits(data: &[u8], addr_bits: usize, num_bits: u8) -> u64 {
|
||||
}
|
||||
|
||||
impl TermInfoStore {
|
||||
pub fn open(term_info_store_file: FileSlice) -> crate::Result<TermInfoStore> {
|
||||
pub fn open(term_info_store_file: FileSlice) -> io::Result<TermInfoStore> {
|
||||
let (len_slice, main_slice) = term_info_store_file.split(16);
|
||||
let mut bytes = len_slice.read_bytes()?;
|
||||
let len = u64::deserialize(&mut bytes)? as usize;
|
||||
|
||||
@@ -8,7 +8,6 @@ use tantivy_fst::Automaton;
|
||||
use super::term_info_store::{TermInfoStore, TermInfoStoreWriter};
|
||||
use super::{TermStreamer, TermStreamerBuilder};
|
||||
use crate::directory::{FileSlice, OwnedBytes};
|
||||
use crate::error::DataCorruption;
|
||||
use crate::postings::TermInfo;
|
||||
use crate::termdict::TermOrdinal;
|
||||
|
||||
@@ -55,7 +54,7 @@ where W: Write
|
||||
/// to insert_key and insert_value.
|
||||
///
|
||||
/// Prefer using `.insert(key, value)`
|
||||
pub(crate) fn insert_key(&mut self, key: &[u8]) -> io::Result<()> {
|
||||
pub fn insert_key(&mut self, key: &[u8]) -> io::Result<()> {
|
||||
self.fst_builder
|
||||
.insert(key, self.term_ord)
|
||||
.map_err(convert_fst_error)?;
|
||||
@@ -66,7 +65,7 @@ where W: Write
|
||||
/// # Warning
|
||||
///
|
||||
/// Horribly dangerous internal API. See `.insert_key(...)`.
|
||||
pub(crate) fn insert_value(&mut self, term_info: &TermInfo) -> io::Result<()> {
|
||||
pub fn insert_value(&mut self, term_info: &TermInfo) -> io::Result<()> {
|
||||
self.term_info_store_writer.write_term_info(term_info)?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -86,10 +85,14 @@ where W: Write
|
||||
}
|
||||
}
|
||||
|
||||
fn open_fst_index(fst_file: FileSlice) -> crate::Result<tantivy_fst::Map<OwnedBytes>> {
|
||||
fn open_fst_index(fst_file: FileSlice) -> io::Result<tantivy_fst::Map<OwnedBytes>> {
|
||||
let bytes = fst_file.read_bytes()?;
|
||||
let fst = Fst::new(bytes)
|
||||
.map_err(|err| DataCorruption::comment_only(format!("Fst data is corrupted: {:?}", err)))?;
|
||||
let fst = Fst::new(bytes).map_err(|err| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
format!("Fst data is corrupted: {:?}", err),
|
||||
)
|
||||
})?;
|
||||
Ok(tantivy_fst::Map::from(fst))
|
||||
}
|
||||
|
||||
@@ -114,7 +117,7 @@ pub struct TermDictionary {
|
||||
|
||||
impl TermDictionary {
|
||||
/// Opens a `TermDictionary`.
|
||||
pub fn open(file: FileSlice) -> crate::Result<Self> {
|
||||
pub fn open(file: FileSlice) -> io::Result<Self> {
|
||||
let (main_slice, footer_len_slice) = file.split_from_end(8);
|
||||
let mut footer_len_bytes = footer_len_slice.read_bytes()?;
|
||||
let footer_size = u64::deserialize(&mut footer_len_bytes)?;
|
||||
|
||||
@@ -1,26 +1,28 @@
|
||||
use std::io;
|
||||
|
||||
mod merger;
|
||||
mod streamer;
|
||||
mod termdict;
|
||||
|
||||
use std::iter::ExactSizeIterator;
|
||||
|
||||
use common::VInt;
|
||||
use sstable::value::{ValueReader, ValueWriter};
|
||||
use sstable::{BlockReader, SSTable};
|
||||
use sstable::SSTable;
|
||||
use tantivy_fst::automaton::AlwaysMatch;
|
||||
|
||||
pub use self::merger::TermMerger;
|
||||
pub use self::streamer::{TermStreamer, TermStreamerBuilder};
|
||||
pub use self::termdict::{TermDictionary, TermDictionaryBuilder};
|
||||
use crate::postings::TermInfo;
|
||||
|
||||
pub type TermDictionary = sstable::Dictionary<TermSSTable>;
|
||||
pub type TermDictionaryBuilder<W> = sstable::Writer<W, TermInfoWriter>;
|
||||
pub type TermStreamer<'a, A = AlwaysMatch> = sstable::Streamer<'a, TermSSTable, A>;
|
||||
|
||||
pub struct TermSSTable;
|
||||
|
||||
impl SSTable for TermSSTable {
|
||||
type Value = TermInfo;
|
||||
type Reader = TermInfoReader;
|
||||
type Writer = TermInfoWriter;
|
||||
type ValueReader = TermInfoReader;
|
||||
type ValueWriter = TermInfoWriter;
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
@@ -35,15 +37,16 @@ impl ValueReader for TermInfoReader {
|
||||
&self.term_infos[idx]
|
||||
}
|
||||
|
||||
fn read(&mut self, reader: &mut BlockReader) -> io::Result<()> {
|
||||
fn load(&mut self, mut data: &[u8]) -> io::Result<usize> {
|
||||
let len_before = data.len();
|
||||
self.term_infos.clear();
|
||||
let num_els = VInt::deserialize_u64(reader)?;
|
||||
let mut postings_start = VInt::deserialize_u64(reader)? as usize;
|
||||
let mut positions_start = VInt::deserialize_u64(reader)? as usize;
|
||||
let num_els = VInt::deserialize_u64(&mut data)?;
|
||||
let mut postings_start = VInt::deserialize_u64(&mut data)? as usize;
|
||||
let mut positions_start = VInt::deserialize_u64(&mut data)? as usize;
|
||||
for _ in 0..num_els {
|
||||
let doc_freq = VInt::deserialize_u64(reader)? as u32;
|
||||
let postings_num_bytes = VInt::deserialize_u64(reader)?;
|
||||
let positions_num_bytes = VInt::deserialize_u64(reader)?;
|
||||
let doc_freq = VInt::deserialize_u64(&mut data)? as u32;
|
||||
let postings_num_bytes = VInt::deserialize_u64(&mut data)?;
|
||||
let positions_num_bytes = VInt::deserialize_u64(&mut data)?;
|
||||
let postings_end = postings_start + postings_num_bytes as usize;
|
||||
let positions_end = positions_start + positions_num_bytes as usize;
|
||||
let term_info = TermInfo {
|
||||
@@ -55,7 +58,8 @@ impl ValueReader for TermInfoReader {
|
||||
postings_start = postings_end;
|
||||
positions_start = positions_end;
|
||||
}
|
||||
Ok(())
|
||||
let consumed_len = len_before - data.len();
|
||||
Ok(consumed_len)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -71,7 +75,7 @@ impl ValueWriter for TermInfoWriter {
|
||||
self.term_infos.push(term_info.clone());
|
||||
}
|
||||
|
||||
fn write_block(&mut self, buffer: &mut Vec<u8>) {
|
||||
fn serialize_block(&mut self, buffer: &mut Vec<u8>) {
|
||||
VInt(self.term_infos.len() as u64).serialize_into_vec(buffer);
|
||||
if self.term_infos.is_empty() {
|
||||
return;
|
||||
@@ -89,17 +93,13 @@ impl ValueWriter for TermInfoWriter {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::io;
|
||||
|
||||
use sstable::value::{ValueReader, ValueWriter};
|
||||
|
||||
use super::BlockReader;
|
||||
use crate::directory::OwnedBytes;
|
||||
use crate::postings::TermInfo;
|
||||
use crate::termdict::sstable_termdict::TermInfoReader;
|
||||
|
||||
#[test]
|
||||
fn test_block_terminfos() -> io::Result<()> {
|
||||
fn test_block_terminfos() {
|
||||
let mut term_info_writer = super::TermInfoWriter::default();
|
||||
term_info_writer.write(&TermInfo {
|
||||
doc_freq: 120u32,
|
||||
@@ -117,10 +117,10 @@ mod tests {
|
||||
positions_range: 1100..1302,
|
||||
});
|
||||
let mut buffer = Vec::new();
|
||||
term_info_writer.write_block(&mut buffer);
|
||||
let mut block_reader = make_block_reader(&buffer[..]);
|
||||
term_info_writer.serialize_block(&mut buffer);
|
||||
// let mut block_reader = make_block_reader(&buffer[..]);
|
||||
let mut term_info_reader = TermInfoReader::default();
|
||||
term_info_reader.read(&mut block_reader)?;
|
||||
let num_bytes: usize = term_info_reader.load(&buffer[..]).unwrap();
|
||||
assert_eq!(
|
||||
term_info_reader.value(0),
|
||||
&TermInfo {
|
||||
@@ -129,16 +129,6 @@ mod tests {
|
||||
positions_range: 10..122
|
||||
}
|
||||
);
|
||||
assert!(block_reader.buffer().is_empty());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn make_block_reader(data: &[u8]) -> BlockReader {
|
||||
let mut buffer = (data.len() as u32).to_le_bytes().to_vec();
|
||||
buffer.extend_from_slice(data);
|
||||
let owned_bytes = OwnedBytes::new(buffer);
|
||||
let mut block_reader = BlockReader::new(Box::new(owned_bytes));
|
||||
block_reader.read_block().unwrap();
|
||||
block_reader
|
||||
assert_eq!(buffer.len(), num_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,256 +1,11 @@
|
||||
use std::io;
|
||||
use std::sync::Arc;
|
||||
use sstable::SSTable;
|
||||
|
||||
use common::BinarySerializable;
|
||||
use once_cell::sync::Lazy;
|
||||
use sstable::{BlockAddr, DeltaReader, Reader, SSTable, SSTableIndex, Writer};
|
||||
use tantivy_fst::automaton::AlwaysMatch;
|
||||
use tantivy_fst::Automaton;
|
||||
|
||||
use crate::directory::{FileSlice, OwnedBytes};
|
||||
use crate::postings::TermInfo;
|
||||
use crate::termdict::sstable_termdict::{
|
||||
TermInfoReader, TermInfoWriter, TermSSTable, TermStreamer, TermStreamerBuilder,
|
||||
};
|
||||
use crate::termdict::TermOrdinal;
|
||||
use crate::AsyncIoResult;
|
||||
use crate::termdict::sstable_termdict::{TermInfoReader, TermInfoWriter};
|
||||
|
||||
pub struct TermInfoSSTable;
|
||||
impl SSTable for TermInfoSSTable {
|
||||
type Value = TermInfo;
|
||||
type Reader = TermInfoReader;
|
||||
type Writer = TermInfoWriter;
|
||||
}
|
||||
|
||||
/// Builder for the new term dictionary.
|
||||
pub struct TermDictionaryBuilder<W: io::Write> {
|
||||
sstable_writer: Writer<W, TermInfoWriter>,
|
||||
}
|
||||
|
||||
impl<W: io::Write> TermDictionaryBuilder<W> {
|
||||
/// Creates a new `TermDictionaryBuilder`
|
||||
pub fn create(w: W) -> io::Result<Self> {
|
||||
let sstable_writer = TermSSTable::writer(w);
|
||||
Ok(TermDictionaryBuilder { sstable_writer })
|
||||
}
|
||||
|
||||
/// Inserts a `(key, value)` pair in the term dictionary.
|
||||
///
|
||||
/// *Keys have to be inserted in order.*
|
||||
pub fn insert<K: AsRef<[u8]>>(&mut self, key_ref: K, value: &TermInfo) -> io::Result<()> {
|
||||
let key = key_ref.as_ref();
|
||||
self.insert_key(key)?;
|
||||
self.insert_value(value)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// # Warning
|
||||
/// Horribly dangerous internal API
|
||||
///
|
||||
/// If used, it must be used by systematically alternating calls
|
||||
/// to insert_key and insert_value.
|
||||
///
|
||||
/// Prefer using `.insert(key, value)`
|
||||
#[allow(clippy::unnecessary_wraps)]
|
||||
pub(crate) fn insert_key(&mut self, key: &[u8]) -> io::Result<()> {
|
||||
self.sstable_writer.write_key(key);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// # Warning
|
||||
///
|
||||
/// Horribly dangerous internal API. See `.insert_key(...)`.
|
||||
pub(crate) fn insert_value(&mut self, term_info: &TermInfo) -> io::Result<()> {
|
||||
self.sstable_writer.write_value(term_info)
|
||||
}
|
||||
|
||||
/// Finalize writing the builder, and returns the underlying
|
||||
/// `Write` object.
|
||||
pub fn finish(self) -> io::Result<W> {
|
||||
self.sstable_writer.finalize()
|
||||
}
|
||||
}
|
||||
|
||||
static EMPTY_TERM_DICT_FILE: Lazy<FileSlice> = Lazy::new(|| {
|
||||
let term_dictionary_data: Vec<u8> = TermDictionaryBuilder::create(Vec::<u8>::new())
|
||||
.expect("Creating a TermDictionaryBuilder in a Vec<u8> should never fail")
|
||||
.finish()
|
||||
.expect("Writing in a Vec<u8> should never fail");
|
||||
FileSlice::from(term_dictionary_data)
|
||||
});
|
||||
|
||||
/// The term dictionary contains all of the terms in
|
||||
/// `tantivy index` in a sorted manner.
|
||||
///
|
||||
/// The `Fst` crate is used to associate terms to their
|
||||
/// respective `TermOrdinal`. The `TermInfoStore` then makes it
|
||||
/// possible to fetch the associated `TermInfo`.
|
||||
pub struct TermDictionary {
|
||||
sstable_slice: FileSlice,
|
||||
sstable_index: SSTableIndex,
|
||||
num_terms: u64,
|
||||
}
|
||||
|
||||
impl TermDictionary {
|
||||
pub(crate) fn sstable_reader(&self) -> io::Result<Reader<'static, TermInfoReader>> {
|
||||
let data = self.sstable_slice.read_bytes()?;
|
||||
Ok(TermInfoSSTable::reader(data))
|
||||
}
|
||||
|
||||
pub(crate) fn sstable_reader_block(
|
||||
&self,
|
||||
block_addr: BlockAddr,
|
||||
) -> io::Result<Reader<'static, TermInfoReader>> {
|
||||
let data = self.sstable_slice.read_bytes_slice(block_addr.byte_range)?;
|
||||
Ok(TermInfoSSTable::reader(data))
|
||||
}
|
||||
|
||||
pub(crate) async fn sstable_reader_block_async(
|
||||
&self,
|
||||
block_addr: BlockAddr,
|
||||
) -> AsyncIoResult<Reader<'static, TermInfoReader>> {
|
||||
let data = self
|
||||
.sstable_slice
|
||||
.read_bytes_slice_async(block_addr.byte_range)
|
||||
.await?;
|
||||
Ok(TermInfoSSTable::reader(data))
|
||||
}
|
||||
|
||||
pub(crate) fn sstable_delta_reader(&self) -> io::Result<DeltaReader<'static, TermInfoReader>> {
|
||||
let data = self.sstable_slice.read_bytes()?;
|
||||
Ok(TermInfoSSTable::delta_reader(data))
|
||||
}
|
||||
|
||||
/// Opens a `TermDictionary`.
|
||||
pub fn open(term_dictionary_file: FileSlice) -> crate::Result<Self> {
|
||||
let (main_slice, footer_len_slice) = term_dictionary_file.split_from_end(16);
|
||||
let mut footer_len_bytes: OwnedBytes = footer_len_slice.read_bytes()?;
|
||||
let index_offset = u64::deserialize(&mut footer_len_bytes)?;
|
||||
let num_terms = u64::deserialize(&mut footer_len_bytes)?;
|
||||
let (sstable_slice, index_slice) = main_slice.split(index_offset as usize);
|
||||
let sstable_index_bytes = index_slice.read_bytes()?;
|
||||
let sstable_index = SSTableIndex::load(sstable_index_bytes.as_slice())
|
||||
.map_err(|_| crate::error::DataCorruption::comment_only("SSTable corruption"))?;
|
||||
Ok(TermDictionary {
|
||||
sstable_slice,
|
||||
sstable_index,
|
||||
num_terms,
|
||||
})
|
||||
}
|
||||
|
||||
/// Creates a term dictionary from the supplied bytes.
|
||||
pub fn from_bytes(owned_bytes: OwnedBytes) -> crate::Result<TermDictionary> {
|
||||
TermDictionary::open(FileSlice::new(Arc::new(owned_bytes)))
|
||||
}
|
||||
|
||||
/// Creates an empty term dictionary which contains no terms.
|
||||
pub fn empty() -> Self {
|
||||
TermDictionary::open(EMPTY_TERM_DICT_FILE.clone()).unwrap()
|
||||
}
|
||||
|
||||
/// Returns the number of terms in the dictionary.
|
||||
/// Term ordinals range from 0 to `num_terms() - 1`.
|
||||
pub fn num_terms(&self) -> usize {
|
||||
self.num_terms as usize
|
||||
}
|
||||
|
||||
/// Returns the ordinal associated with a given term.
|
||||
pub fn term_ord<K: AsRef<[u8]>>(&self, key: K) -> io::Result<Option<TermOrdinal>> {
|
||||
let mut term_ord = 0u64;
|
||||
let key_bytes = key.as_ref();
|
||||
let mut sstable_reader = self.sstable_reader()?;
|
||||
while sstable_reader.advance().unwrap_or(false) {
|
||||
if sstable_reader.key() == key_bytes {
|
||||
return Ok(Some(term_ord));
|
||||
}
|
||||
term_ord += 1;
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Returns the term associated with a given term ordinal.
|
||||
///
|
||||
/// Term ordinals are defined as the position of the term in
|
||||
/// the sorted list of terms.
|
||||
///
|
||||
/// Returns true if and only if the term has been found.
|
||||
///
|
||||
/// Regardless of whether the term is found or not,
|
||||
/// the buffer may be modified.
|
||||
pub fn ord_to_term(&self, ord: TermOrdinal, bytes: &mut Vec<u8>) -> io::Result<bool> {
|
||||
let mut sstable_reader = self.sstable_reader()?;
|
||||
bytes.clear();
|
||||
for _ in 0..(ord + 1) {
|
||||
if !sstable_reader.advance().unwrap_or(false) {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
bytes.extend_from_slice(sstable_reader.key());
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
/// Returns the number of terms in the dictionary.
|
||||
pub fn term_info_from_ord(&self, term_ord: TermOrdinal) -> io::Result<TermInfo> {
|
||||
let mut sstable_reader = self.sstable_reader()?;
|
||||
for _ in 0..(term_ord + 1) {
|
||||
if !sstable_reader.advance().unwrap_or(false) {
|
||||
return Ok(TermInfo::default());
|
||||
}
|
||||
}
|
||||
Ok(sstable_reader.value().clone())
|
||||
}
|
||||
|
||||
/// Lookups the value corresponding to the key.
|
||||
pub fn get<K: AsRef<[u8]>>(&self, key: K) -> io::Result<Option<TermInfo>> {
|
||||
if let Some(block_addr) = self.sstable_index.search(key.as_ref()) {
|
||||
let mut sstable_reader = self.sstable_reader_block(block_addr)?;
|
||||
let key_bytes = key.as_ref();
|
||||
while sstable_reader.advance().unwrap_or(false) {
|
||||
if sstable_reader.key() == key_bytes {
|
||||
let term_info = sstable_reader.value().clone();
|
||||
return Ok(Some(term_info));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Lookups the value corresponding to the key.
|
||||
pub async fn get_async<K: AsRef<[u8]>>(&self, key: K) -> AsyncIoResult<Option<TermInfo>> {
|
||||
if let Some(block_addr) = self.sstable_index.search(key.as_ref()) {
|
||||
let mut sstable_reader = self.sstable_reader_block_async(block_addr).await?;
|
||||
let key_bytes = key.as_ref();
|
||||
while sstable_reader.advance().unwrap_or(false) {
|
||||
if sstable_reader.key() == key_bytes {
|
||||
let term_info = sstable_reader.value().clone();
|
||||
return Ok(Some(term_info));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Returns a range builder, to stream all of the terms
|
||||
/// within an interval.
|
||||
pub fn range(&self) -> TermStreamerBuilder<'_> {
|
||||
TermStreamerBuilder::new(self, AlwaysMatch)
|
||||
}
|
||||
|
||||
/// A stream of all the sorted terms.
|
||||
pub fn stream(&self) -> io::Result<TermStreamer<'_>> {
|
||||
self.range().into_stream()
|
||||
}
|
||||
|
||||
/// Returns a search builder, to stream all of the terms
|
||||
/// within the Automaton
|
||||
pub fn search<'a, A: Automaton + 'a>(&'a self, automaton: A) -> TermStreamerBuilder<'a, A>
|
||||
where A::State: Clone {
|
||||
TermStreamerBuilder::<A>::new(self, automaton)
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub async fn warm_up_dictionary(&self) -> AsyncIoResult<()> {
|
||||
self.sstable_slice.read_bytes_async().await?;
|
||||
Ok(())
|
||||
}
|
||||
type ValueReader = TermInfoReader;
|
||||
type ValueWriter = TermInfoWriter;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::path::PathBuf;
|
||||
use std::str;
|
||||
use std::{io, str};
|
||||
|
||||
use super::{TermDictionary, TermDictionaryBuilder, TermStreamer};
|
||||
use crate::directory::{Directory, FileSlice, RamDirectory, TerminatingWrite};
|
||||
@@ -247,7 +247,7 @@ fn test_empty_string() -> crate::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn stream_range_test_dict() -> crate::Result<TermDictionary> {
|
||||
fn stream_range_test_dict() -> io::Result<TermDictionary> {
|
||||
let buffer: Vec<u8> = {
|
||||
let mut term_dictionary_builder = TermDictionaryBuilder::create(Vec::new())?;
|
||||
for i in 0u8..10u8 {
|
||||
|
||||
@@ -6,8 +6,8 @@ edition = "2021"
|
||||
[dependencies]
|
||||
common = {path="../common", package="tantivy-common"}
|
||||
ciborium = "0.2"
|
||||
byteorder = "1"
|
||||
serde = "1"
|
||||
tantivy-fst = "0.4"
|
||||
|
||||
[dev-dependencies]
|
||||
proptest = "1"
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
use std::io::{self, Read};
|
||||
|
||||
use byteorder::{LittleEndian, ReadBytesExt};
|
||||
use std::io;
|
||||
|
||||
pub struct BlockReader<'a> {
|
||||
buffer: Vec<u8>,
|
||||
@@ -8,6 +6,13 @@ pub struct BlockReader<'a> {
|
||||
offset: usize,
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn read_u32(read: &mut dyn io::Read) -> io::Result<u32> {
|
||||
let mut buf = [0u8; 4];
|
||||
read.read_exact(&mut buf)?;
|
||||
Ok(u32::from_le_bytes(buf))
|
||||
}
|
||||
|
||||
impl<'a> BlockReader<'a> {
|
||||
pub fn new(reader: Box<dyn io::Read + 'a>) -> BlockReader<'a> {
|
||||
BlockReader {
|
||||
@@ -30,7 +35,7 @@ impl<'a> BlockReader<'a> {
|
||||
|
||||
pub fn read_block(&mut self) -> io::Result<bool> {
|
||||
self.offset = 0;
|
||||
let block_len_res = self.reader.read_u32::<LittleEndian>();
|
||||
let block_len_res = read_u32(self.reader.as_mut());
|
||||
if let Err(err) = &block_len_res {
|
||||
if err.kind() == io::ErrorKind::UnexpectedEof {
|
||||
return Ok(false);
|
||||
|
||||
@@ -44,7 +44,7 @@ where
|
||||
let start_offset = self.write.written_bytes() as usize;
|
||||
// TODO avoid buffer allocation
|
||||
let mut buffer = Vec::new();
|
||||
self.value_writer.write_block(&mut buffer);
|
||||
self.value_writer.serialize_block(&mut buffer);
|
||||
let block_len = buffer.len() + self.block.len();
|
||||
self.write.write_all(&(block_len as u32).to_le_bytes())?;
|
||||
self.write.write_all(&buffer[..])?;
|
||||
@@ -84,7 +84,7 @@ where
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub fn finalize(self) -> CountingWriter<BufWriter<W>> {
|
||||
pub fn finish(self) -> CountingWriter<BufWriter<W>> {
|
||||
self.write
|
||||
}
|
||||
}
|
||||
@@ -112,6 +112,10 @@ where TValueReader: value::ValueReader
|
||||
}
|
||||
}
|
||||
|
||||
pub fn empty() -> Self {
|
||||
DeltaReader::new(&b""[..])
|
||||
}
|
||||
|
||||
fn deserialize_vint(&mut self) -> u64 {
|
||||
self.block_reader.deserialize_u64()
|
||||
}
|
||||
@@ -156,7 +160,8 @@ where TValueReader: value::ValueReader
|
||||
if !self.block_reader.read_block()? {
|
||||
return Ok(false);
|
||||
}
|
||||
self.value_reader.read(&mut self.block_reader)?;
|
||||
let consumed_len = self.value_reader.load(self.block_reader.buffer())?;
|
||||
self.block_reader.advance(consumed_len);
|
||||
self.idx = 0;
|
||||
} else {
|
||||
self.idx += 1;
|
||||
@@ -180,3 +185,15 @@ where TValueReader: value::ValueReader
|
||||
self.value_reader.value(self.idx)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DeltaReader;
|
||||
use crate::value::U64MonotonicReader;
|
||||
|
||||
#[test]
|
||||
fn test_empty() {
|
||||
let mut delta_reader: DeltaReader<U64MonotonicReader> = DeltaReader::empty();
|
||||
assert!(!delta_reader.advance().unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
231
sstable/src/dictionary.rs
Normal file
231
sstable/src/dictionary.rs
Normal file
@@ -0,0 +1,231 @@
|
||||
use std::io;
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common::file_slice::FileSlice;
|
||||
use common::{BinarySerializable, OwnedBytes};
|
||||
use tantivy_fst::automaton::AlwaysMatch;
|
||||
use tantivy_fst::Automaton;
|
||||
|
||||
use crate::streamer::{Streamer, StreamerBuilder};
|
||||
use crate::{BlockAddr, DeltaReader, Reader, SSTable, SSTableIndex, TermOrdinal};
|
||||
|
||||
/// The term dictionary contains all of the terms in
|
||||
/// `tantivy index` in a sorted manner.
|
||||
///
|
||||
/// The `Fst` crate is used to associate terms to their
|
||||
/// respective `TermOrdinal`. The `TermInfoStore` then makes it
|
||||
/// possible to fetch the associated `TermInfo`.
|
||||
pub struct Dictionary<TSSTable: SSTable> {
|
||||
pub sstable_slice: FileSlice,
|
||||
pub sstable_index: SSTableIndex,
|
||||
num_terms: u64,
|
||||
phantom_data: PhantomData<TSSTable>,
|
||||
}
|
||||
|
||||
impl<TSSTable: SSTable> Dictionary<TSSTable> {
|
||||
pub fn builder<W: io::Write>(wrt: W) -> io::Result<crate::Writer<W, TSSTable::ValueWriter>> {
|
||||
Ok(TSSTable::writer(wrt))
|
||||
}
|
||||
|
||||
pub(crate) fn sstable_reader(&self) -> io::Result<Reader<'static, TSSTable::ValueReader>> {
|
||||
let data = self.sstable_slice.read_bytes()?;
|
||||
Ok(TSSTable::reader(data))
|
||||
}
|
||||
|
||||
pub(crate) fn sstable_reader_block(
|
||||
&self,
|
||||
block_addr: BlockAddr,
|
||||
) -> io::Result<Reader<'static, TSSTable::ValueReader>> {
|
||||
let data = self.sstable_slice.read_bytes_slice(block_addr.byte_range)?;
|
||||
Ok(TSSTable::reader(data))
|
||||
}
|
||||
|
||||
pub(crate) async fn sstable_reader_block_async(
|
||||
&self,
|
||||
block_addr: BlockAddr,
|
||||
) -> io::Result<Reader<'static, TSSTable::ValueReader>> {
|
||||
let data = self
|
||||
.sstable_slice
|
||||
.read_bytes_slice_async(block_addr.byte_range)
|
||||
.await?;
|
||||
Ok(TSSTable::reader(data))
|
||||
}
|
||||
|
||||
pub(crate) fn sstable_delta_reader_for_key_range(
|
||||
&self,
|
||||
key_range: impl RangeBounds<[u8]>,
|
||||
) -> io::Result<DeltaReader<'static, TSSTable::ValueReader>> {
|
||||
let slice = self.file_slice_for_range(key_range);
|
||||
let data = slice.read_bytes()?;
|
||||
Ok(TSSTable::delta_reader(data))
|
||||
}
|
||||
|
||||
fn file_slice_for_range(&self, key_range: impl RangeBounds<[u8]>) -> FileSlice {
|
||||
let start_bound: Bound<usize> = match key_range.start_bound() {
|
||||
Bound::Included(key) | Bound::Excluded(key) => {
|
||||
let Some(first_block_addr) = self.sstable_index.search_block(key) else {
|
||||
return FileSlice::empty();
|
||||
};
|
||||
Bound::Included(first_block_addr.byte_range.start)
|
||||
}
|
||||
Bound::Unbounded => Bound::Unbounded,
|
||||
};
|
||||
let end_bound: Bound<usize> = match key_range.end_bound() {
|
||||
Bound::Included(key) | Bound::Excluded(key) => {
|
||||
if let Some(block_addr) = self.sstable_index.search_block(key) {
|
||||
Bound::Excluded(block_addr.byte_range.end)
|
||||
} else {
|
||||
Bound::Unbounded
|
||||
}
|
||||
}
|
||||
Bound::Unbounded => Bound::Unbounded,
|
||||
};
|
||||
self.sstable_slice.slice((start_bound, end_bound))
|
||||
}
|
||||
|
||||
/// Opens a `TermDictionary`.
|
||||
pub fn open(term_dictionary_file: FileSlice) -> io::Result<Self> {
|
||||
let (main_slice, footer_len_slice) = term_dictionary_file.split_from_end(16);
|
||||
let mut footer_len_bytes: OwnedBytes = footer_len_slice.read_bytes()?;
|
||||
let index_offset = u64::deserialize(&mut footer_len_bytes)?;
|
||||
let num_terms = u64::deserialize(&mut footer_len_bytes)?;
|
||||
let (sstable_slice, index_slice) = main_slice.split(index_offset as usize);
|
||||
let sstable_index_bytes = index_slice.read_bytes()?;
|
||||
let sstable_index = SSTableIndex::load(sstable_index_bytes.as_slice())
|
||||
.map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "SSTable corruption"))?;
|
||||
Ok(Dictionary {
|
||||
sstable_slice,
|
||||
sstable_index,
|
||||
num_terms,
|
||||
phantom_data: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
/// Creates a term dictionary from the supplied bytes.
|
||||
pub fn from_bytes(owned_bytes: OwnedBytes) -> io::Result<Self> {
|
||||
Dictionary::open(FileSlice::new(Arc::new(owned_bytes)))
|
||||
}
|
||||
|
||||
/// Creates an empty term dictionary which contains no terms.
|
||||
pub fn empty() -> Self {
|
||||
let term_dictionary_data: Vec<u8> = Self::builder(Vec::<u8>::new())
|
||||
.expect("Creating a TermDictionaryBuilder in a Vec<u8> should never fail")
|
||||
.finish()
|
||||
.expect("Writing in a Vec<u8> should never fail");
|
||||
let empty_dict_file = FileSlice::from(term_dictionary_data);
|
||||
Dictionary::open(empty_dict_file).unwrap()
|
||||
}
|
||||
|
||||
/// Returns the number of terms in the dictionary.
|
||||
/// Term ordinals range from 0 to `num_terms() - 1`.
|
||||
pub fn num_terms(&self) -> usize {
|
||||
self.num_terms as usize
|
||||
}
|
||||
|
||||
/// Returns the ordinal associated with a given term.
|
||||
pub fn term_ord<K: AsRef<[u8]>>(&self, key: K) -> io::Result<Option<TermOrdinal>> {
|
||||
let mut term_ord = 0u64;
|
||||
let key_bytes = key.as_ref();
|
||||
let mut sstable_reader = self.sstable_reader()?;
|
||||
while sstable_reader.advance().unwrap_or(false) {
|
||||
if sstable_reader.key() == key_bytes {
|
||||
return Ok(Some(term_ord));
|
||||
}
|
||||
term_ord += 1;
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Returns the term associated with a given term ordinal.
|
||||
///
|
||||
/// Term ordinals are defined as the position of the term in
|
||||
/// the sorted list of terms.
|
||||
///
|
||||
/// Returns true if and only if the term has been found.
|
||||
///
|
||||
/// Regardless of whether the term is found or not,
|
||||
/// the buffer may be modified.
|
||||
pub fn ord_to_term(&self, ord: TermOrdinal, bytes: &mut Vec<u8>) -> io::Result<bool> {
|
||||
let mut sstable_reader = self.sstable_reader()?;
|
||||
bytes.clear();
|
||||
for _ in 0..(ord + 1) {
|
||||
if !sstable_reader.advance().unwrap_or(false) {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
bytes.extend_from_slice(sstable_reader.key());
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
/// Returns the number of terms in the dictionary.
|
||||
pub fn term_info_from_ord(&self, term_ord: TermOrdinal) -> io::Result<Option<TSSTable::Value>> {
|
||||
let mut sstable_reader = self.sstable_reader()?;
|
||||
for _ in 0..(term_ord + 1) {
|
||||
if !sstable_reader.advance().unwrap_or(false) {
|
||||
return Ok(None);
|
||||
}
|
||||
}
|
||||
Ok(Some(sstable_reader.value().clone()))
|
||||
}
|
||||
|
||||
/// Lookups the value corresponding to the key.
|
||||
pub fn get<K: AsRef<[u8]>>(&self, key: K) -> io::Result<Option<TSSTable::Value>> {
|
||||
if let Some(block_addr) = self.sstable_index.search_block(key.as_ref()) {
|
||||
let mut sstable_reader = self.sstable_reader_block(block_addr)?;
|
||||
let key_bytes = key.as_ref();
|
||||
while sstable_reader.advance().unwrap_or(false) {
|
||||
if sstable_reader.key() == key_bytes {
|
||||
let value = sstable_reader.value().clone();
|
||||
return Ok(Some(value));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Lookups the value corresponding to the key.
|
||||
pub async fn get_async<K: AsRef<[u8]>>(&self, key: K) -> io::Result<Option<TSSTable::Value>> {
|
||||
if let Some(block_addr) = self.sstable_index.search_block(key.as_ref()) {
|
||||
let mut sstable_reader = self.sstable_reader_block_async(block_addr).await?;
|
||||
let key_bytes = key.as_ref();
|
||||
while sstable_reader.advance().unwrap_or(false) {
|
||||
if sstable_reader.key() == key_bytes {
|
||||
let value = sstable_reader.value().clone();
|
||||
return Ok(Some(value));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Returns a range builder, to stream all of the terms
|
||||
/// within an interval.
|
||||
pub fn range(&self) -> StreamerBuilder<'_, TSSTable> {
|
||||
StreamerBuilder::new(self, AlwaysMatch)
|
||||
}
|
||||
|
||||
/// A stream of all the sorted terms.
|
||||
pub fn stream(&self) -> io::Result<Streamer<'_, TSSTable>> {
|
||||
self.range().into_stream()
|
||||
}
|
||||
|
||||
/// Returns a search builder, to stream all of the terms
|
||||
/// within the Automaton
|
||||
pub fn search<'a, A: Automaton + 'a>(
|
||||
&'a self,
|
||||
automaton: A,
|
||||
) -> StreamerBuilder<'a, TSSTable, A>
|
||||
where
|
||||
A::State: Clone,
|
||||
{
|
||||
StreamerBuilder::<TSSTable, A>::new(self, automaton)
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub async fn warm_up_dictionary(&self) -> io::Result<()> {
|
||||
self.sstable_slice.read_bytes_async().await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,21 +1,29 @@
|
||||
use std::io::{self, Write};
|
||||
use std::ops::Range;
|
||||
use std::usize;
|
||||
|
||||
use merge::ValueMerger;
|
||||
|
||||
mod delta;
|
||||
mod dictionary;
|
||||
pub mod merge;
|
||||
mod streamer;
|
||||
pub mod value;
|
||||
|
||||
mod sstable_index;
|
||||
pub use sstable_index::{BlockAddr, SSTableIndex, SSTableIndexBuilder};
|
||||
pub(crate) mod vint;
|
||||
pub use dictionary::Dictionary;
|
||||
pub use streamer::{Streamer, StreamerBuilder};
|
||||
|
||||
mod block_reader;
|
||||
pub use self::block_reader::BlockReader;
|
||||
pub use self::delta::{DeltaReader, DeltaWriter};
|
||||
pub use self::merge::VoidMerge;
|
||||
use self::value::{U64MonotonicReader, U64MonotonicWriter, ValueReader, ValueWriter};
|
||||
use crate::value::{RangeReader, RangeWriter};
|
||||
|
||||
pub type TermOrdinal = u64;
|
||||
|
||||
const DEFAULT_KEY_CAPACITY: usize = 50;
|
||||
|
||||
@@ -31,15 +39,15 @@ fn common_prefix_len(left: &[u8], right: &[u8]) -> usize {
|
||||
pub struct SSTableDataCorruption;
|
||||
|
||||
pub trait SSTable: Sized {
|
||||
type Value;
|
||||
type Reader: ValueReader<Value = Self::Value>;
|
||||
type Writer: ValueWriter<Value = Self::Value>;
|
||||
type Value: Clone;
|
||||
type ValueReader: ValueReader<Value = Self::Value>;
|
||||
type ValueWriter: ValueWriter<Value = Self::Value>;
|
||||
|
||||
fn delta_writer<W: io::Write>(write: W) -> DeltaWriter<W, Self::Writer> {
|
||||
fn delta_writer<W: io::Write>(write: W) -> DeltaWriter<W, Self::ValueWriter> {
|
||||
DeltaWriter::new(write)
|
||||
}
|
||||
|
||||
fn writer<W: io::Write>(write: W) -> Writer<W, Self::Writer> {
|
||||
fn writer<W: io::Write>(write: W) -> Writer<W, Self::ValueWriter> {
|
||||
Writer {
|
||||
previous_key: Vec::with_capacity(DEFAULT_KEY_CAPACITY),
|
||||
num_terms: 0u64,
|
||||
@@ -49,17 +57,22 @@ pub trait SSTable: Sized {
|
||||
}
|
||||
}
|
||||
|
||||
fn delta_reader<'a, R: io::Read + 'a>(reader: R) -> DeltaReader<'a, Self::Reader> {
|
||||
fn delta_reader<'a, R: io::Read + 'a>(reader: R) -> DeltaReader<'a, Self::ValueReader> {
|
||||
DeltaReader::new(reader)
|
||||
}
|
||||
|
||||
fn reader<'a, R: io::Read + 'a>(reader: R) -> Reader<'a, Self::Reader> {
|
||||
fn reader<'a, R: io::Read + 'a>(reader: R) -> Reader<'a, Self::ValueReader> {
|
||||
Reader {
|
||||
key: Vec::with_capacity(DEFAULT_KEY_CAPACITY),
|
||||
delta_reader: Self::delta_reader(reader),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an empty static reader.
|
||||
fn create_empty_reader() -> Reader<'static, Self::ValueReader> {
|
||||
Self::reader(&b""[..])
|
||||
}
|
||||
|
||||
fn merge<R: io::Read, W: io::Write, M: ValueMerger<Self::Value>>(
|
||||
io_readers: Vec<R>,
|
||||
w: W,
|
||||
@@ -76,8 +89,8 @@ pub struct VoidSSTable;
|
||||
|
||||
impl SSTable for VoidSSTable {
|
||||
type Value = ();
|
||||
type Reader = value::VoidReader;
|
||||
type Writer = value::VoidWriter;
|
||||
type ValueReader = value::VoidReader;
|
||||
type ValueWriter = value::VoidWriter;
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
@@ -86,9 +99,20 @@ pub struct SSTableMonotonicU64;
|
||||
impl SSTable for SSTableMonotonicU64 {
|
||||
type Value = u64;
|
||||
|
||||
type Reader = U64MonotonicReader;
|
||||
type ValueReader = U64MonotonicReader;
|
||||
|
||||
type Writer = U64MonotonicWriter;
|
||||
type ValueWriter = U64MonotonicWriter;
|
||||
}
|
||||
|
||||
/// Retpresent
|
||||
pub struct SSTableRange;
|
||||
|
||||
impl SSTable for SSTableRange {
|
||||
type Value = Range<u64>;
|
||||
|
||||
type ValueReader = RangeReader;
|
||||
|
||||
type ValueWriter = RangeWriter;
|
||||
}
|
||||
|
||||
pub struct Reader<'a, TValueReader> {
|
||||
@@ -141,11 +165,23 @@ where
|
||||
W: io::Write,
|
||||
TValueWriter: value::ValueWriter,
|
||||
{
|
||||
pub fn create(wrt: W) -> io::Result<Self> {
|
||||
Ok(Writer {
|
||||
previous_key: Vec::with_capacity(DEFAULT_KEY_CAPACITY),
|
||||
num_terms: 0u64,
|
||||
index_builder: SSTableIndexBuilder::default(),
|
||||
delta_writer: DeltaWriter::new(wrt),
|
||||
first_ordinal_of_the_block: 0u64,
|
||||
})
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn current_key(&self) -> &[u8] {
|
||||
&self.previous_key[..]
|
||||
}
|
||||
|
||||
pub fn write_key(&mut self, key: &[u8]) {
|
||||
#[inline]
|
||||
pub fn insert_key(&mut self, key: &[u8]) -> io::Result<()> {
|
||||
// If this is the first key in the block, we use it to
|
||||
// shorten the last term in the last block.
|
||||
if self.first_ordinal_of_the_block == self.num_terms {
|
||||
@@ -165,16 +201,22 @@ where
|
||||
self.previous_key.resize(key.len(), 0u8);
|
||||
self.previous_key[keep_len..].copy_from_slice(&key[keep_len..]);
|
||||
self.delta_writer.write_suffix(keep_len, &key[keep_len..]);
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn write(&mut self, key: &[u8], value: &TValueWriter::Value) -> io::Result<()> {
|
||||
self.write_key(key);
|
||||
self.write_value(value)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_value(&mut self, value: &TValueWriter::Value) -> io::Result<()> {
|
||||
#[inline]
|
||||
pub fn insert<K: AsRef<[u8]>>(
|
||||
&mut self,
|
||||
key: K,
|
||||
value: &TValueWriter::Value,
|
||||
) -> io::Result<()> {
|
||||
self.insert_key(key.as_ref())?;
|
||||
self.insert_value(value)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn insert_value(&mut self, value: &TValueWriter::Value) -> io::Result<()> {
|
||||
self.delta_writer.write_value(value);
|
||||
self.num_terms += 1u64;
|
||||
self.flush_block_if_required()
|
||||
@@ -193,7 +235,7 @@ where
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn finalize(mut self) -> io::Result<W> {
|
||||
pub fn finish(mut self) -> io::Result<W> {
|
||||
if let Some(byte_range) = self.delta_writer.flush_block()? {
|
||||
self.index_builder.add_block(
|
||||
&self.previous_key[..],
|
||||
@@ -202,7 +244,7 @@ where
|
||||
);
|
||||
self.first_ordinal_of_the_block = self.num_terms;
|
||||
}
|
||||
let mut wrt = self.delta_writer.finalize();
|
||||
let mut wrt = self.delta_writer.finish();
|
||||
wrt.write_all(&0u32.to_le_bytes())?;
|
||||
|
||||
let offset = wrt.written_bytes();
|
||||
@@ -246,10 +288,10 @@ mod test {
|
||||
let mut buffer = vec![];
|
||||
{
|
||||
let mut sstable_writer = VoidSSTable::writer(&mut buffer);
|
||||
assert!(sstable_writer.write(&long_key[..], &()).is_ok());
|
||||
assert!(sstable_writer.write(&[0, 3, 4], &()).is_ok());
|
||||
assert!(sstable_writer.write(&long_key2[..], &()).is_ok());
|
||||
assert!(sstable_writer.finalize().is_ok());
|
||||
assert!(sstable_writer.insert(&long_key[..], &()).is_ok());
|
||||
assert!(sstable_writer.insert(&[0, 3, 4], &()).is_ok());
|
||||
assert!(sstable_writer.insert(&long_key2[..], &()).is_ok());
|
||||
assert!(sstable_writer.finish().is_ok());
|
||||
}
|
||||
let mut sstable_reader = VoidSSTable::reader(&buffer[..]);
|
||||
assert!(sstable_reader.advance().unwrap());
|
||||
@@ -266,10 +308,10 @@ mod test {
|
||||
let mut buffer = vec![];
|
||||
{
|
||||
let mut sstable_writer = VoidSSTable::writer(&mut buffer);
|
||||
assert!(sstable_writer.write(&[17u8], &()).is_ok());
|
||||
assert!(sstable_writer.write(&[17u8, 18u8, 19u8], &()).is_ok());
|
||||
assert!(sstable_writer.write(&[17u8, 20u8], &()).is_ok());
|
||||
assert!(sstable_writer.finalize().is_ok());
|
||||
assert!(sstable_writer.insert(&[17u8], &()).is_ok());
|
||||
assert!(sstable_writer.insert(&[17u8, 18u8, 19u8], &()).is_ok());
|
||||
assert!(sstable_writer.insert(&[17u8, 20u8], &()).is_ok());
|
||||
assert!(sstable_writer.finish().is_ok());
|
||||
}
|
||||
assert_eq!(
|
||||
&buffer,
|
||||
@@ -304,8 +346,8 @@ mod test {
|
||||
fn test_simple_sstable_non_increasing_key() {
|
||||
let mut buffer = vec![];
|
||||
let mut sstable_writer = VoidSSTable::writer(&mut buffer);
|
||||
assert!(sstable_writer.write(&[17u8], &()).is_ok());
|
||||
assert!(sstable_writer.write(&[16u8], &()).is_ok());
|
||||
assert!(sstable_writer.insert(&[17u8], &()).is_ok());
|
||||
assert!(sstable_writer.insert(&[16u8], &()).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -313,9 +355,9 @@ mod test {
|
||||
let mut buffer = Vec::new();
|
||||
{
|
||||
let mut writer = VoidSSTable::writer(&mut buffer);
|
||||
writer.write(b"abcd", &()).unwrap();
|
||||
writer.write(b"abe", &()).unwrap();
|
||||
writer.finalize().unwrap();
|
||||
writer.insert(b"abcd", &()).unwrap();
|
||||
writer.insert(b"abe", &()).unwrap();
|
||||
writer.finish().unwrap();
|
||||
}
|
||||
let mut output = Vec::new();
|
||||
assert!(VoidSSTable::merge(vec![&buffer[..], &buffer[..]], &mut output, VoidMerge).is_ok());
|
||||
@@ -327,9 +369,9 @@ mod test {
|
||||
let mut buffer = Vec::new();
|
||||
{
|
||||
let mut writer = VoidSSTable::writer(&mut buffer);
|
||||
writer.write(b"abcd", &()).unwrap();
|
||||
writer.write(b"abe", &()).unwrap();
|
||||
writer.finalize().unwrap();
|
||||
writer.insert(b"abcd", &()).unwrap();
|
||||
writer.insert(b"abe", &()).unwrap();
|
||||
writer.finish().unwrap();
|
||||
}
|
||||
let mut output = Vec::new();
|
||||
assert!(VoidSSTable::merge(vec![&buffer[..], &buffer[..]], &mut output, VoidMerge).is_ok());
|
||||
@@ -340,10 +382,10 @@ mod test {
|
||||
fn test_sstable_u64() -> io::Result<()> {
|
||||
let mut buffer = Vec::new();
|
||||
let mut writer = SSTableMonotonicU64::writer(&mut buffer);
|
||||
writer.write(b"abcd", &1u64)?;
|
||||
writer.write(b"abe", &4u64)?;
|
||||
writer.write(b"gogo", &4324234234234234u64)?;
|
||||
writer.finalize()?;
|
||||
writer.insert(b"abcd", &1u64)?;
|
||||
writer.insert(b"abe", &4u64)?;
|
||||
writer.insert(b"gogo", &4324234234234234u64)?;
|
||||
writer.finish()?;
|
||||
let mut reader = SSTableMonotonicU64::reader(&buffer[..]);
|
||||
assert!(reader.advance()?);
|
||||
assert_eq!(reader.key(), b"abcd");
|
||||
@@ -357,4 +399,10 @@ mod test {
|
||||
assert!(!reader.advance()?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sstable_empty() {
|
||||
let mut sstable_range_empty = crate::SSTableRange::create_empty_reader();
|
||||
assert!(!sstable_range_empty.advance().unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,11 +28,11 @@ impl<B: AsRef<[u8]>> PartialEq for HeapItem<B> {
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn merge_sstable<SST: SSTable, W: io::Write, M: ValueMerger<SST::Value>>(
|
||||
readers: Vec<Reader<SST::Reader>>,
|
||||
mut writer: Writer<W, SST::Writer>,
|
||||
readers: Vec<Reader<SST::ValueReader>>,
|
||||
mut writer: Writer<W, SST::ValueWriter>,
|
||||
mut merger: M,
|
||||
) -> io::Result<()> {
|
||||
let mut heap: BinaryHeap<HeapItem<Reader<SST::Reader>>> =
|
||||
let mut heap: BinaryHeap<HeapItem<Reader<SST::ValueReader>>> =
|
||||
BinaryHeap::with_capacity(readers.len());
|
||||
for mut reader in readers {
|
||||
if reader.advance()? {
|
||||
@@ -43,7 +43,7 @@ pub fn merge_sstable<SST: SSTable, W: io::Write, M: ValueMerger<SST::Value>>(
|
||||
let len = heap.len();
|
||||
let mut value_merger;
|
||||
if let Some(mut head) = heap.peek_mut() {
|
||||
writer.write_key(head.0.key());
|
||||
writer.insert_key(head.0.key()).unwrap();
|
||||
value_merger = merger.new_value(head.0.value());
|
||||
if !head.0.advance()? {
|
||||
PeekMut::pop(head);
|
||||
@@ -64,9 +64,9 @@ pub fn merge_sstable<SST: SSTable, W: io::Write, M: ValueMerger<SST::Value>>(
|
||||
break;
|
||||
}
|
||||
let value = value_merger.finish();
|
||||
writer.write_value(&value)?;
|
||||
writer.insert_value(&value)?;
|
||||
writer.flush_block_if_required()?;
|
||||
}
|
||||
writer.finalize()?;
|
||||
writer.finish()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -79,9 +79,9 @@ mod tests {
|
||||
{
|
||||
let mut sstable_writer = VoidSSTable::writer(&mut buffer);
|
||||
for &key in keys {
|
||||
assert!(sstable_writer.write(key.as_bytes(), &()).is_ok());
|
||||
assert!(sstable_writer.insert(key.as_bytes(), &()).is_ok());
|
||||
}
|
||||
assert!(sstable_writer.finalize().is_ok());
|
||||
assert!(sstable_writer.finish().is_ok());
|
||||
}
|
||||
buffer
|
||||
}
|
||||
@@ -91,9 +91,9 @@ mod tests {
|
||||
{
|
||||
let mut sstable_writer = SSTableMonotonicU64::writer(&mut buffer);
|
||||
for (key, val) in keys {
|
||||
assert!(sstable_writer.write(key.as_bytes(), val).is_ok());
|
||||
assert!(sstable_writer.insert(key.as_bytes(), val).is_ok());
|
||||
}
|
||||
assert!(sstable_writer.finalize().is_ok());
|
||||
assert!(sstable_writer.finish().is_ok());
|
||||
}
|
||||
buffer
|
||||
}
|
||||
|
||||
@@ -15,10 +15,17 @@ impl SSTableIndex {
|
||||
ciborium::de::from_reader(data).map_err(|_| SSTableDataCorruption)
|
||||
}
|
||||
|
||||
pub fn search(&self, key: &[u8]) -> Option<BlockAddr> {
|
||||
pub fn search_block(&self, key: &[u8]) -> Option<BlockAddr> {
|
||||
self.search_block_from(key).next()
|
||||
}
|
||||
|
||||
pub fn search_block_from<'key, 'slf: 'key>(
|
||||
&'slf self,
|
||||
key: &'key [u8],
|
||||
) -> impl Iterator<Item = BlockAddr> + Clone + 'key {
|
||||
self.blocks
|
||||
.iter()
|
||||
.find(|block| &block.last_key_or_greater[..] >= key)
|
||||
.skip_while(|block| &block.last_key_or_greater[..] < key)
|
||||
.map(|block| block.block_addr.clone())
|
||||
}
|
||||
}
|
||||
@@ -105,7 +112,7 @@ mod tests {
|
||||
sstable_builder.serialize(&mut buffer).unwrap();
|
||||
let sstable_index = SSTableIndex::load(&buffer[..]).unwrap();
|
||||
assert_eq!(
|
||||
sstable_index.search(b"bbbde"),
|
||||
sstable_index.search_block(b"bbbde"),
|
||||
Some(BlockAddr {
|
||||
first_ordinal: 10u64,
|
||||
byte_range: 30..40
|
||||
|
||||
@@ -4,31 +4,39 @@ use std::ops::Bound;
|
||||
use tantivy_fst::automaton::AlwaysMatch;
|
||||
use tantivy_fst::Automaton;
|
||||
|
||||
use super::TermDictionary;
|
||||
use crate::postings::TermInfo;
|
||||
use crate::termdict::sstable_termdict::TermInfoReader;
|
||||
use crate::termdict::TermOrdinal;
|
||||
use crate::dictionary::Dictionary;
|
||||
use crate::{SSTable, TermOrdinal};
|
||||
|
||||
/// `TermStreamerBuilder` is a helper object used to define
|
||||
/// `StreamerBuilder` is a helper object used to define
|
||||
/// a range of terms that should be streamed.
|
||||
pub struct TermStreamerBuilder<'a, A = AlwaysMatch>
|
||||
pub struct StreamerBuilder<'a, TSSTable, A = AlwaysMatch>
|
||||
where
|
||||
A: Automaton,
|
||||
A::State: Clone,
|
||||
TSSTable: SSTable,
|
||||
{
|
||||
term_dict: &'a TermDictionary,
|
||||
term_dict: &'a Dictionary<TSSTable>,
|
||||
automaton: A,
|
||||
lower: Bound<Vec<u8>>,
|
||||
upper: Bound<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl<'a, A> TermStreamerBuilder<'a, A>
|
||||
fn bound_as_byte_slice(bound: &Bound<Vec<u8>>) -> Bound<&[u8]> {
|
||||
match bound.as_ref() {
|
||||
Bound::Included(key) => Bound::Included(key.as_slice()),
|
||||
Bound::Excluded(key) => Bound::Excluded(key.as_slice()),
|
||||
Bound::Unbounded => Bound::Unbounded,
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, TSSTable, A> StreamerBuilder<'a, TSSTable, A>
|
||||
where
|
||||
A: Automaton,
|
||||
A::State: Clone,
|
||||
TSSTable: SSTable,
|
||||
{
|
||||
pub(crate) fn new(term_dict: &'a TermDictionary, automaton: A) -> Self {
|
||||
TermStreamerBuilder {
|
||||
pub(crate) fn new(term_dict: &'a Dictionary<TSSTable>, automaton: A) -> Self {
|
||||
StreamerBuilder {
|
||||
term_dict,
|
||||
automaton,
|
||||
lower: Bound::Unbounded,
|
||||
@@ -61,12 +69,18 @@ where
|
||||
}
|
||||
|
||||
/// Creates the stream corresponding to the range
|
||||
/// of terms defined using the `TermStreamerBuilder`.
|
||||
pub fn into_stream(self) -> io::Result<TermStreamer<'a, A>> {
|
||||
/// of terms defined using the `StreamerBuilder`.
|
||||
pub fn into_stream(self) -> io::Result<Streamer<'a, TSSTable, A>> {
|
||||
// TODO Optimize by skipping to the right first block.
|
||||
let start_state = self.automaton.start();
|
||||
let delta_reader = self.term_dict.sstable_delta_reader()?;
|
||||
Ok(TermStreamer {
|
||||
let key_range = (
|
||||
bound_as_byte_slice(&self.lower),
|
||||
bound_as_byte_slice(&self.upper),
|
||||
);
|
||||
let delta_reader = self
|
||||
.term_dict
|
||||
.sstable_delta_reader_for_key_range(key_range)?;
|
||||
Ok(Streamer {
|
||||
automaton: self.automaton,
|
||||
states: vec![start_state],
|
||||
delta_reader,
|
||||
@@ -78,26 +92,28 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
/// `TermStreamer` acts as a cursor over a range of terms of a segment.
|
||||
/// `Streamer` acts as a cursor over a range of terms of a segment.
|
||||
/// Terms are guaranteed to be sorted.
|
||||
pub struct TermStreamer<'a, A = AlwaysMatch>
|
||||
pub struct Streamer<'a, TSSTable, A = AlwaysMatch>
|
||||
where
|
||||
A: Automaton,
|
||||
A::State: Clone,
|
||||
TSSTable: SSTable,
|
||||
{
|
||||
automaton: A,
|
||||
states: Vec<A::State>,
|
||||
delta_reader: sstable::DeltaReader<'a, TermInfoReader>,
|
||||
delta_reader: crate::DeltaReader<'a, TSSTable::ValueReader>,
|
||||
key: Vec<u8>,
|
||||
term_ord: Option<TermOrdinal>,
|
||||
lower_bound: Bound<Vec<u8>>,
|
||||
upper_bound: Bound<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl<'a, A> TermStreamer<'a, A>
|
||||
impl<'a, TSSTable, A> Streamer<'a, TSSTable, A>
|
||||
where
|
||||
A: Automaton,
|
||||
A::State: Clone,
|
||||
TSSTable: SSTable,
|
||||
{
|
||||
/// Advance position the stream on the next item.
|
||||
/// Before the first call to `.advance()`, the stream
|
||||
@@ -174,13 +190,13 @@ where
|
||||
///
|
||||
/// Calling `.value()` before the first call to `.advance()` returns
|
||||
/// `V::default()`.
|
||||
pub fn value(&self) -> &TermInfo {
|
||||
pub fn value(&self) -> &TSSTable::Value {
|
||||
self.delta_reader.value()
|
||||
}
|
||||
|
||||
/// Return the next `(key, value)` pair.
|
||||
#[allow(clippy::should_implement_trait)]
|
||||
pub fn next(&mut self) -> Option<(&[u8], &TermInfo)> {
|
||||
pub fn next(&mut self) -> Option<(&[u8], &TSSTable::Value)> {
|
||||
if self.advance() {
|
||||
Some((self.key(), self.value()))
|
||||
} else {
|
||||
@@ -191,60 +207,54 @@ where
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::super::TermDictionary;
|
||||
use crate::directory::OwnedBytes;
|
||||
use crate::postings::TermInfo;
|
||||
use std::io;
|
||||
|
||||
fn make_term_info(i: usize) -> TermInfo {
|
||||
TermInfo {
|
||||
doc_freq: 1000u32 + i as u32,
|
||||
postings_range: (i + 10) * (i * 10)..((i + 1) + 10) * ((i + 1) * 10),
|
||||
positions_range: i * 500..(i + 1) * 500,
|
||||
}
|
||||
}
|
||||
use common::OwnedBytes;
|
||||
|
||||
fn create_test_term_dictionary() -> crate::Result<TermDictionary> {
|
||||
let mut term_dict_builder = super::super::TermDictionaryBuilder::create(Vec::new())?;
|
||||
term_dict_builder.insert(b"abaisance", &make_term_info(0))?;
|
||||
term_dict_builder.insert(b"abalation", &make_term_info(1))?;
|
||||
term_dict_builder.insert(b"abalienate", &make_term_info(2))?;
|
||||
term_dict_builder.insert(b"abandon", &make_term_info(3))?;
|
||||
let buffer = term_dict_builder.finish()?;
|
||||
use crate::{Dictionary, SSTableMonotonicU64};
|
||||
|
||||
fn create_test_dictionary() -> io::Result<Dictionary<SSTableMonotonicU64>> {
|
||||
let mut dict_builder = Dictionary::<SSTableMonotonicU64>::builder(Vec::new())?;
|
||||
dict_builder.insert(b"abaisance", &0)?;
|
||||
dict_builder.insert(b"abalation", &1)?;
|
||||
dict_builder.insert(b"abalienate", &2)?;
|
||||
dict_builder.insert(b"abandon", &3)?;
|
||||
let buffer = dict_builder.finish()?;
|
||||
let owned_bytes = OwnedBytes::new(buffer);
|
||||
TermDictionary::from_bytes(owned_bytes)
|
||||
Dictionary::from_bytes(owned_bytes)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sstable_stream() -> crate::Result<()> {
|
||||
let term_dict = create_test_term_dictionary()?;
|
||||
let mut term_streamer = term_dict.stream()?;
|
||||
assert!(term_streamer.advance());
|
||||
assert_eq!(term_streamer.key(), b"abaisance");
|
||||
assert_eq!(term_streamer.value().doc_freq, 1000u32);
|
||||
assert!(term_streamer.advance());
|
||||
assert_eq!(term_streamer.key(), b"abalation");
|
||||
assert_eq!(term_streamer.value().doc_freq, 1001u32);
|
||||
assert!(term_streamer.advance());
|
||||
assert_eq!(term_streamer.key(), b"abalienate");
|
||||
assert_eq!(term_streamer.value().doc_freq, 1002u32);
|
||||
assert!(term_streamer.advance());
|
||||
assert_eq!(term_streamer.key(), b"abandon");
|
||||
assert_eq!(term_streamer.value().doc_freq, 1003u32);
|
||||
assert!(!term_streamer.advance());
|
||||
fn test_sstable_stream() -> io::Result<()> {
|
||||
let dict = create_test_dictionary()?;
|
||||
let mut streamer = dict.stream()?;
|
||||
assert!(streamer.advance());
|
||||
assert_eq!(streamer.key(), b"abaisance");
|
||||
assert_eq!(streamer.value(), &0);
|
||||
assert!(streamer.advance());
|
||||
assert_eq!(streamer.key(), b"abalation");
|
||||
assert_eq!(streamer.value(), &1);
|
||||
assert!(streamer.advance());
|
||||
assert_eq!(streamer.key(), b"abalienate");
|
||||
assert_eq!(streamer.value(), &2);
|
||||
assert!(streamer.advance());
|
||||
assert_eq!(streamer.key(), b"abandon");
|
||||
assert_eq!(streamer.value(), &3);
|
||||
assert!(!streamer.advance());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sstable_search() -> crate::Result<()> {
|
||||
let term_dict = create_test_term_dictionary()?;
|
||||
fn test_sstable_search() -> io::Result<()> {
|
||||
let term_dict = create_test_dictionary()?;
|
||||
let ptn = tantivy_fst::Regex::new("ab.*t.*").unwrap();
|
||||
let mut term_streamer = term_dict.search(ptn).into_stream()?;
|
||||
assert!(term_streamer.advance());
|
||||
assert_eq!(term_streamer.key(), b"abalation");
|
||||
assert_eq!(term_streamer.value().doc_freq, 1001u32);
|
||||
assert_eq!(term_streamer.value(), &1u64);
|
||||
assert!(term_streamer.advance());
|
||||
assert_eq!(term_streamer.key(), b"abalienate");
|
||||
assert_eq!(term_streamer.value().doc_freq, 1002u32);
|
||||
assert_eq!(term_streamer.value(), &2u64);
|
||||
assert!(!term_streamer.advance());
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,95 +0,0 @@
|
||||
use std::io;
|
||||
|
||||
use super::{vint, BlockReader};
|
||||
|
||||
pub trait ValueReader: Default {
|
||||
type Value;
|
||||
|
||||
fn value(&self, idx: usize) -> &Self::Value;
|
||||
|
||||
fn read(&mut self, reader: &mut BlockReader) -> io::Result<()>;
|
||||
}
|
||||
|
||||
pub trait ValueWriter: Default {
|
||||
type Value;
|
||||
|
||||
fn write(&mut self, val: &Self::Value);
|
||||
|
||||
fn write_block(&mut self, writer: &mut Vec<u8>);
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct VoidReader;
|
||||
|
||||
impl ValueReader for VoidReader {
|
||||
type Value = ();
|
||||
|
||||
fn value(&self, _idx: usize) -> &() {
|
||||
&()
|
||||
}
|
||||
|
||||
fn read(&mut self, _reader: &mut BlockReader) -> io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct VoidWriter;
|
||||
|
||||
impl ValueWriter for VoidWriter {
|
||||
type Value = ();
|
||||
|
||||
fn write(&mut self, _val: &()) {}
|
||||
|
||||
fn write_block(&mut self, _writer: &mut Vec<u8>) {}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct U64MonotonicWriter {
|
||||
vals: Vec<u64>,
|
||||
}
|
||||
|
||||
impl ValueWriter for U64MonotonicWriter {
|
||||
type Value = u64;
|
||||
|
||||
fn write(&mut self, val: &Self::Value) {
|
||||
self.vals.push(*val);
|
||||
}
|
||||
|
||||
fn write_block(&mut self, writer: &mut Vec<u8>) {
|
||||
let mut prev_val = 0u64;
|
||||
vint::serialize_into_vec(self.vals.len() as u64, writer);
|
||||
for &val in &self.vals {
|
||||
let delta = val - prev_val;
|
||||
vint::serialize_into_vec(delta, writer);
|
||||
prev_val = val;
|
||||
}
|
||||
self.vals.clear();
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct U64MonotonicReader {
|
||||
vals: Vec<u64>,
|
||||
}
|
||||
|
||||
impl ValueReader for U64MonotonicReader {
|
||||
type Value = u64;
|
||||
|
||||
fn value(&self, idx: usize) -> &Self::Value {
|
||||
&self.vals[idx]
|
||||
}
|
||||
|
||||
fn read(&mut self, reader: &mut BlockReader) -> io::Result<()> {
|
||||
let len = reader.deserialize_u64() as usize;
|
||||
self.vals.clear();
|
||||
let mut prev_val = 0u64;
|
||||
for _ in 0..len {
|
||||
let delta = reader.deserialize_u64();
|
||||
let val = prev_val + delta;
|
||||
self.vals.push(val);
|
||||
prev_val = val;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
75
sstable/src/value/mod.rs
Normal file
75
sstable/src/value/mod.rs
Normal file
@@ -0,0 +1,75 @@
|
||||
mod range;
|
||||
mod u64_monotonic;
|
||||
mod void;
|
||||
|
||||
use std::io;
|
||||
|
||||
/// `ValueReader` is a trait describing the contract of something
|
||||
/// reading blocks of value, and offering random access within this values.
|
||||
pub trait ValueReader: Default {
|
||||
/// Type of the value being read.
|
||||
type Value;
|
||||
|
||||
/// Access the value at index `idx`, in the last block that was read
|
||||
/// via a call to `ValueReader::read`.
|
||||
fn value(&self, idx: usize) -> &Self::Value;
|
||||
|
||||
/// Loads a block.
|
||||
///
|
||||
/// Returns the number of bytes that were written.
|
||||
fn load(&mut self, data: &[u8]) -> io::Result<usize>;
|
||||
}
|
||||
|
||||
pub trait ValueWriter: Default {
|
||||
/// Type of the value being written.
|
||||
type Value;
|
||||
|
||||
/// Records a new value.
|
||||
/// This method usually just accumulates data in a `Vec`,
|
||||
/// only to be serialized on the call to `ValueWriter::write_block`.
|
||||
fn write(&mut self, val: &Self::Value);
|
||||
|
||||
/// Serializes the accumulated values into the output buffer.
|
||||
fn serialize_block(&mut self, output: &mut Vec<u8>);
|
||||
}
|
||||
|
||||
pub use range::{RangeReader, RangeWriter};
|
||||
pub use u64_monotonic::{U64MonotonicReader, U64MonotonicWriter};
|
||||
pub use void::{VoidReader, VoidWriter};
|
||||
|
||||
fn deserialize_u64(data: &mut &[u8]) -> u64 {
|
||||
let (num_bytes, val) = super::vint::deserialize_read(data);
|
||||
*data = &data[num_bytes..];
|
||||
val
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use std::fmt;
|
||||
|
||||
use super::{ValueReader, ValueWriter};
|
||||
|
||||
pub(crate) fn test_value_reader_writer<
|
||||
V: Eq + fmt::Debug,
|
||||
TReader: ValueReader<Value = V>,
|
||||
TWriter: ValueWriter<Value = V>,
|
||||
>(
|
||||
value_block: &[V],
|
||||
) {
|
||||
let mut buffer = Vec::new();
|
||||
{
|
||||
let mut writer = TWriter::default();
|
||||
for value in value_block {
|
||||
writer.write(value);
|
||||
}
|
||||
writer.serialize_block(&mut buffer);
|
||||
}
|
||||
let data_len = buffer.len();
|
||||
buffer.extend_from_slice(&b"extradata"[..]);
|
||||
let mut reader = TReader::default();
|
||||
assert_eq!(reader.load(&buffer[..]).unwrap(), data_len);
|
||||
for (i, val) in value_block.iter().enumerate() {
|
||||
assert_eq!(reader.value(i), val);
|
||||
}
|
||||
}
|
||||
}
|
||||
95
sstable/src/value/range.rs
Normal file
95
sstable/src/value/range.rs
Normal file
@@ -0,0 +1,95 @@
|
||||
use std::io;
|
||||
use std::ops::Range;
|
||||
|
||||
use crate::value::{deserialize_u64, ValueReader, ValueWriter};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct RangeReader {
|
||||
vals: Vec<Range<u64>>,
|
||||
}
|
||||
|
||||
impl ValueReader for RangeReader {
|
||||
type Value = Range<u64>;
|
||||
|
||||
fn value(&self, idx: usize) -> &Range<u64> {
|
||||
&self.vals[idx]
|
||||
}
|
||||
|
||||
fn load(&mut self, mut data: &[u8]) -> io::Result<usize> {
|
||||
self.vals.clear();
|
||||
let original_num_bytes = data.len();
|
||||
let len = deserialize_u64(&mut data) as usize;
|
||||
if len != 0 {
|
||||
let mut prev_val = deserialize_u64(&mut data);
|
||||
for _ in 1..len {
|
||||
let next_val = prev_val + deserialize_u64(&mut data);
|
||||
self.vals.push(prev_val..next_val);
|
||||
prev_val = next_val;
|
||||
}
|
||||
}
|
||||
Ok(original_num_bytes - data.len())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct RangeWriter {
|
||||
vals: Vec<u64>,
|
||||
}
|
||||
|
||||
impl ValueWriter for RangeWriter {
|
||||
type Value = Range<u64>;
|
||||
|
||||
fn write(&mut self, val: &Range<u64>) {
|
||||
if let Some(previous_offset) = self.vals.last().copied() {
|
||||
assert_eq!(previous_offset, val.start);
|
||||
self.vals.push(val.end);
|
||||
} else {
|
||||
self.vals.push(val.start);
|
||||
self.vals.push(val.end)
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_block(&mut self, writer: &mut Vec<u8>) {
|
||||
let mut prev_val = 0u64;
|
||||
crate::vint::serialize_into_vec(self.vals.len() as u64, writer);
|
||||
for &val in &self.vals {
|
||||
let delta = val - prev_val;
|
||||
crate::vint::serialize_into_vec(delta, writer);
|
||||
prev_val = val;
|
||||
}
|
||||
self.vals.clear();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_range_reader_writer() {
|
||||
crate::value::tests::test_value_reader_writer::<_, RangeReader, RangeWriter>(&[]);
|
||||
crate::value::tests::test_value_reader_writer::<_, RangeReader, RangeWriter>(&[0..3]);
|
||||
crate::value::tests::test_value_reader_writer::<_, RangeReader, RangeWriter>(&[
|
||||
0..3,
|
||||
3..10,
|
||||
]);
|
||||
crate::value::tests::test_value_reader_writer::<_, RangeReader, RangeWriter>(&[
|
||||
0..0,
|
||||
0..10,
|
||||
]);
|
||||
crate::value::tests::test_value_reader_writer::<_, RangeReader, RangeWriter>(&[
|
||||
100..110,
|
||||
110..121,
|
||||
121..1250,
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_range_reader_writer_panics() {
|
||||
crate::value::tests::test_value_reader_writer::<_, RangeReader, RangeWriter>(&[
|
||||
1..3,
|
||||
4..10,
|
||||
]);
|
||||
}
|
||||
}
|
||||
73
sstable/src/value/u64_monotonic.rs
Normal file
73
sstable/src/value/u64_monotonic.rs
Normal file
@@ -0,0 +1,73 @@
|
||||
use std::io;
|
||||
|
||||
use crate::value::{deserialize_u64, ValueReader, ValueWriter};
|
||||
use crate::vint;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct U64MonotonicReader {
|
||||
vals: Vec<u64>,
|
||||
}
|
||||
|
||||
impl ValueReader for U64MonotonicReader {
|
||||
type Value = u64;
|
||||
|
||||
fn value(&self, idx: usize) -> &Self::Value {
|
||||
&self.vals[idx]
|
||||
}
|
||||
|
||||
fn load(&mut self, mut data: &[u8]) -> io::Result<usize> {
|
||||
let original_num_bytes = data.len();
|
||||
let num_vals = deserialize_u64(&mut data) as usize;
|
||||
self.vals.clear();
|
||||
let mut prev_val = 0u64;
|
||||
for _ in 0..num_vals {
|
||||
let delta = deserialize_u64(&mut data);
|
||||
let val = prev_val + delta;
|
||||
self.vals.push(val);
|
||||
prev_val = val;
|
||||
}
|
||||
Ok(original_num_bytes - data.len())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct U64MonotonicWriter {
|
||||
vals: Vec<u64>,
|
||||
}
|
||||
|
||||
impl ValueWriter for U64MonotonicWriter {
|
||||
type Value = u64;
|
||||
|
||||
fn write(&mut self, val: &Self::Value) {
|
||||
self.vals.push(*val);
|
||||
}
|
||||
|
||||
fn serialize_block(&mut self, output: &mut Vec<u8>) {
|
||||
let mut prev_val = 0u64;
|
||||
vint::serialize_into_vec(self.vals.len() as u64, output);
|
||||
for &val in &self.vals {
|
||||
let delta = val - prev_val;
|
||||
vint::serialize_into_vec(delta, output);
|
||||
prev_val = val;
|
||||
}
|
||||
self.vals.clear();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_u64_monotonic_reader_writer() {
|
||||
crate::value::tests::test_value_reader_writer::<_, U64MonotonicReader, U64MonotonicWriter>(
|
||||
&[],
|
||||
);
|
||||
crate::value::tests::test_value_reader_writer::<_, U64MonotonicReader, U64MonotonicWriter>(
|
||||
&[5],
|
||||
);
|
||||
crate::value::tests::test_value_reader_writer::<_, U64MonotonicReader, U64MonotonicWriter>(
|
||||
&[1u64, 30u64],
|
||||
);
|
||||
}
|
||||
}
|
||||
41
sstable/src/value/void.rs
Normal file
41
sstable/src/value/void.rs
Normal file
@@ -0,0 +1,41 @@
|
||||
use std::io;
|
||||
|
||||
use crate::value::{ValueReader, ValueWriter};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct VoidReader;
|
||||
|
||||
impl ValueReader for VoidReader {
|
||||
type Value = ();
|
||||
|
||||
fn value(&self, _idx: usize) -> &() {
|
||||
&()
|
||||
}
|
||||
|
||||
fn load(&mut self, _data: &[u8]) -> io::Result<usize> {
|
||||
Ok(0)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct VoidWriter;
|
||||
|
||||
impl ValueWriter for VoidWriter {
|
||||
type Value = ();
|
||||
|
||||
fn write(&mut self, _val: &()) {}
|
||||
|
||||
fn serialize_block(&mut self, _output: &mut Vec<u8>) {}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_range_reader_writer() {
|
||||
crate::value::tests::test_value_reader_writer::<_, VoidReader, VoidWriter>(&[]);
|
||||
crate::value::tests::test_value_reader_writer::<_, VoidReader, VoidWriter>(&[()]);
|
||||
crate::value::tests::test_value_reader_writer::<_, VoidReader, VoidWriter>(&[(), (), ()]);
|
||||
}
|
||||
}
|
||||
@@ -6,4 +6,4 @@ edition = "2021"
|
||||
[dependencies]
|
||||
murmurhash32 = "0.2"
|
||||
byteorder = "1"
|
||||
common = { version = "0.4", path = "../common/", package = "tantivy-common" }
|
||||
common = { version = "0.5", path = "../common/", package = "tantivy-common" }
|
||||
|
||||
Reference in New Issue
Block a user