add lz4 block format compressor as default docstore compressor

add lz4 block compressor using lz4_flex, add lz4-block-compression feature flag
add snappy-compression feature flag for snap compressor, make snap crate optional
set lz4-block-compression as default feature flag
This commit is contained in:
Pascal Seitz
2021-04-16 15:13:46 +02:00
parent ba4bc6d7c3
commit a00049b879
5 changed files with 81 additions and 7 deletions

View File

@@ -20,9 +20,10 @@ once_cell = "1"
regex ={version = "1", default-features = false, features = ["std"]}
tantivy-fst = "0.3"
memmap = {version = "0.7", optional=true}
lz4_flex = { version = "0.7", default-features = false, features = ["checked-decode"], optional=true }
lz4 = {version="1", optional=true}
brotli = {version="3.3.0", optional=true}
snap = "1"
snap = {version="1.0", optional=true}
tempfile = {version="3", optional=true}
log = "0.4"
serde = {version="1", features=["derive"]}
@@ -74,10 +75,12 @@ debug-assertions = true
overflow-checks = true
[features]
default = ["mmap"]
default = ["mmap", "lz4-block-compression" ]
mmap = ["fs2", "tempfile", "memmap"]
brotli-compression = ["brotli"]
lz4-compression = ["lz4"]
lz4-block-compression = ["lz4_flex"]
snappy-compression = ["snap"]
failpoints = ["fail/failpoints"]
unstable = [] # useful for benches.
wasm-bindgen = ["uuid/wasm-bindgen"]

View File

@@ -297,8 +297,10 @@ mod tests {
assert!(footer_proxy.terminate().is_ok());
if crate::store::COMPRESSION == "lz4" {
assert_eq!(vec.len(), 158);
} else {
} else if crate::store::COMPRESSION == "snappy" {
assert_eq!(vec.len(), 167);
} else if crate::store::COMPRESSION == "lz4_block" {
assert_eq!(vec.len(), 176);
}
let footer = Footer::deserialize(&mut &vec[..]).unwrap();
assert!(matches!(

View File

@@ -0,0 +1,38 @@
use std::io::{self};
use core::convert::TryInto;
use lz4_flex::{compress_into, decompress_into};
/// Name of the compression scheme used in the doc store.
///
/// This name is appended to the version string of tantivy.
pub const COMPRESSION: &str = "lz4_block";
pub fn compress(uncompressed: &[u8], compressed: &mut Vec<u8>) -> io::Result<()> {
compressed.clear();
compressed.extend_from_slice(&[0, 0, 0, 0]);
compress_into(uncompressed, compressed);
let size = uncompressed.len() as u32;
compressed[0] = size as u8;
compressed[1] = (size >> 8) as u8;
compressed[2] = (size >> 16) as u8;
compressed[3] = (size >> 24) as u8;
Ok(())
}
pub fn decompress(compressed: &[u8], decompressed: &mut Vec<u8>) -> io::Result<()> {
decompressed.clear();
//next lz4_flex version will support slice as input parameter.
//this will make the usage much less ugly
let size = compressed.get(..4).ok_or(io::ErrorKind::InvalidData)?;
let size: &[u8; 4] = size.try_into().unwrap();
let uncompressed_size = u32::from_le_bytes(*size) as usize;
// reserve more than required, because blocked writes may write out of bounds, will be improved
// with lz4_flex 1.0
decompressed.reserve(uncompressed_size + 4 + 24);
unsafe {
decompressed.set_len(uncompressed_size);
}
decompress_into(&compressed[4..], decompressed).map_err(|_err| io::ErrorKind::InvalidData)?;
Ok(())
}

View File

@@ -39,9 +39,40 @@ mod writer;
pub use self::reader::StoreReader;
pub use self::writer::StoreWriter;
// compile_error doesn't scale very well, enum like feature flags would be great to have in Rust
#[cfg(all(feature = "lz4", feature = "brotli"))]
compile_error!("feature `lz4` or `brotli` must not be enabled together.");
#[cfg(all(feature = "lz4_block", feature = "brotli"))]
compile_error!("feature `lz4_block` or `brotli` must not be enabled together.");
#[cfg(all(feature = "lz4_block", feature = "lz4"))]
compile_error!("feature `lz4_block` or `lz4` must not be enabled together.");
#[cfg(all(feature = "lz4_block", feature = "snap"))]
compile_error!("feature `lz4_block` or `snap` must not be enabled together.");
#[cfg(all(feature = "lz4", feature = "snap"))]
compile_error!("feature `lz4` or `snap` must not be enabled together.");
#[cfg(all(feature = "brotli", feature = "snap"))]
compile_error!("feature `brotli` or `snap` must not be enabled together.");
#[cfg(not(any(
feature = "lz4",
feature = "brotli",
feature = "lz4_flex",
feature = "snap"
)))]
compile_error!("all compressors are deactivated via feature-flags, check Cargo.toml for available decompressors.");
#[cfg(feature = "lz4_flex")]
mod compression_lz4_block;
#[cfg(feature = "lz4_flex")]
pub use self::compression_lz4_block::COMPRESSION;
#[cfg(feature = "lz4_flex")]
use self::compression_lz4_block::{compress, decompress};
#[cfg(feature = "lz4")]
mod compression_lz4;
#[cfg(feature = "lz4")]
@@ -56,11 +87,11 @@ pub use self::compression_brotli::COMPRESSION;
#[cfg(feature = "brotli")]
use self::compression_brotli::{compress, decompress};
#[cfg(not(any(feature = "lz4", feature = "brotli")))]
#[cfg(feature = "snap")]
mod compression_snap;
#[cfg(not(any(feature = "lz4", feature = "brotli")))]
#[cfg(feature = "snap")]
pub use self::compression_snap::COMPRESSION;
#[cfg(not(any(feature = "lz4", feature = "brotli")))]
#[cfg(feature = "snap")]
use self::compression_snap::{compress, decompress};
#[cfg(test)]

View File

@@ -191,7 +191,7 @@ mod tests {
.unwrap()
.peek_lru()
.map(|(&k, _)| k as usize),
Some(18806)
Some(9249)
);
Ok(())