mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-02 16:40:43 +00:00
add lz4 block format compressor as default docstore compressor
add lz4 block compressor using lz4_flex, add lz4-block-compression feature flag add snappy-compression feature flag for snap compressor, make snap crate optional set lz4-block-compression as default feature flag
This commit is contained in:
@@ -20,9 +20,10 @@ once_cell = "1"
|
||||
regex ={version = "1", default-features = false, features = ["std"]}
|
||||
tantivy-fst = "0.3"
|
||||
memmap = {version = "0.7", optional=true}
|
||||
lz4_flex = { version = "0.7", default-features = false, features = ["checked-decode"], optional=true }
|
||||
lz4 = {version="1", optional=true}
|
||||
brotli = {version="3.3.0", optional=true}
|
||||
snap = "1"
|
||||
snap = {version="1.0", optional=true}
|
||||
tempfile = {version="3", optional=true}
|
||||
log = "0.4"
|
||||
serde = {version="1", features=["derive"]}
|
||||
@@ -74,10 +75,12 @@ debug-assertions = true
|
||||
overflow-checks = true
|
||||
|
||||
[features]
|
||||
default = ["mmap"]
|
||||
default = ["mmap", "lz4-block-compression" ]
|
||||
mmap = ["fs2", "tempfile", "memmap"]
|
||||
brotli-compression = ["brotli"]
|
||||
lz4-compression = ["lz4"]
|
||||
lz4-block-compression = ["lz4_flex"]
|
||||
snappy-compression = ["snap"]
|
||||
failpoints = ["fail/failpoints"]
|
||||
unstable = [] # useful for benches.
|
||||
wasm-bindgen = ["uuid/wasm-bindgen"]
|
||||
|
||||
@@ -297,8 +297,10 @@ mod tests {
|
||||
assert!(footer_proxy.terminate().is_ok());
|
||||
if crate::store::COMPRESSION == "lz4" {
|
||||
assert_eq!(vec.len(), 158);
|
||||
} else {
|
||||
} else if crate::store::COMPRESSION == "snappy" {
|
||||
assert_eq!(vec.len(), 167);
|
||||
} else if crate::store::COMPRESSION == "lz4_block" {
|
||||
assert_eq!(vec.len(), 176);
|
||||
}
|
||||
let footer = Footer::deserialize(&mut &vec[..]).unwrap();
|
||||
assert!(matches!(
|
||||
|
||||
38
src/store/compression_lz4_block.rs
Normal file
38
src/store/compression_lz4_block.rs
Normal file
@@ -0,0 +1,38 @@
|
||||
use std::io::{self};
|
||||
|
||||
use core::convert::TryInto;
|
||||
use lz4_flex::{compress_into, decompress_into};
|
||||
/// Name of the compression scheme used in the doc store.
|
||||
///
|
||||
/// This name is appended to the version string of tantivy.
|
||||
pub const COMPRESSION: &str = "lz4_block";
|
||||
|
||||
pub fn compress(uncompressed: &[u8], compressed: &mut Vec<u8>) -> io::Result<()> {
|
||||
compressed.clear();
|
||||
|
||||
compressed.extend_from_slice(&[0, 0, 0, 0]);
|
||||
compress_into(uncompressed, compressed);
|
||||
let size = uncompressed.len() as u32;
|
||||
compressed[0] = size as u8;
|
||||
compressed[1] = (size >> 8) as u8;
|
||||
compressed[2] = (size >> 16) as u8;
|
||||
compressed[3] = (size >> 24) as u8;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn decompress(compressed: &[u8], decompressed: &mut Vec<u8>) -> io::Result<()> {
|
||||
decompressed.clear();
|
||||
//next lz4_flex version will support slice as input parameter.
|
||||
//this will make the usage much less ugly
|
||||
let size = compressed.get(..4).ok_or(io::ErrorKind::InvalidData)?;
|
||||
let size: &[u8; 4] = size.try_into().unwrap();
|
||||
let uncompressed_size = u32::from_le_bytes(*size) as usize;
|
||||
// reserve more than required, because blocked writes may write out of bounds, will be improved
|
||||
// with lz4_flex 1.0
|
||||
decompressed.reserve(uncompressed_size + 4 + 24);
|
||||
unsafe {
|
||||
decompressed.set_len(uncompressed_size);
|
||||
}
|
||||
decompress_into(&compressed[4..], decompressed).map_err(|_err| io::ErrorKind::InvalidData)?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -39,9 +39,40 @@ mod writer;
|
||||
pub use self::reader::StoreReader;
|
||||
pub use self::writer::StoreWriter;
|
||||
|
||||
// compile_error doesn't scale very well, enum like feature flags would be great to have in Rust
|
||||
#[cfg(all(feature = "lz4", feature = "brotli"))]
|
||||
compile_error!("feature `lz4` or `brotli` must not be enabled together.");
|
||||
|
||||
#[cfg(all(feature = "lz4_block", feature = "brotli"))]
|
||||
compile_error!("feature `lz4_block` or `brotli` must not be enabled together.");
|
||||
|
||||
#[cfg(all(feature = "lz4_block", feature = "lz4"))]
|
||||
compile_error!("feature `lz4_block` or `lz4` must not be enabled together.");
|
||||
|
||||
#[cfg(all(feature = "lz4_block", feature = "snap"))]
|
||||
compile_error!("feature `lz4_block` or `snap` must not be enabled together.");
|
||||
|
||||
#[cfg(all(feature = "lz4", feature = "snap"))]
|
||||
compile_error!("feature `lz4` or `snap` must not be enabled together.");
|
||||
|
||||
#[cfg(all(feature = "brotli", feature = "snap"))]
|
||||
compile_error!("feature `brotli` or `snap` must not be enabled together.");
|
||||
|
||||
#[cfg(not(any(
|
||||
feature = "lz4",
|
||||
feature = "brotli",
|
||||
feature = "lz4_flex",
|
||||
feature = "snap"
|
||||
)))]
|
||||
compile_error!("all compressors are deactivated via feature-flags, check Cargo.toml for available decompressors.");
|
||||
|
||||
#[cfg(feature = "lz4_flex")]
|
||||
mod compression_lz4_block;
|
||||
#[cfg(feature = "lz4_flex")]
|
||||
pub use self::compression_lz4_block::COMPRESSION;
|
||||
#[cfg(feature = "lz4_flex")]
|
||||
use self::compression_lz4_block::{compress, decompress};
|
||||
|
||||
#[cfg(feature = "lz4")]
|
||||
mod compression_lz4;
|
||||
#[cfg(feature = "lz4")]
|
||||
@@ -56,11 +87,11 @@ pub use self::compression_brotli::COMPRESSION;
|
||||
#[cfg(feature = "brotli")]
|
||||
use self::compression_brotli::{compress, decompress};
|
||||
|
||||
#[cfg(not(any(feature = "lz4", feature = "brotli")))]
|
||||
#[cfg(feature = "snap")]
|
||||
mod compression_snap;
|
||||
#[cfg(not(any(feature = "lz4", feature = "brotli")))]
|
||||
#[cfg(feature = "snap")]
|
||||
pub use self::compression_snap::COMPRESSION;
|
||||
#[cfg(not(any(feature = "lz4", feature = "brotli")))]
|
||||
#[cfg(feature = "snap")]
|
||||
use self::compression_snap::{compress, decompress};
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -191,7 +191,7 @@ mod tests {
|
||||
.unwrap()
|
||||
.peek_lru()
|
||||
.map(|(&k, _)| k as usize),
|
||||
Some(18806)
|
||||
Some(9249)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
|
||||
Reference in New Issue
Block a user