mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-03 00:50:41 +00:00
upgrade lz4_flex to 0.8 (#1049)
* upgrade lz4_flex to 0.8 * fix set_len
This commit is contained in:
@@ -20,7 +20,7 @@ once_cell = "1.7.2"
|
||||
regex ={ version = "1.5.4", default-features = false, features = ["std"] }
|
||||
tantivy-fst = "0.3"
|
||||
memmap = {version = "0.7", optional=true}
|
||||
lz4_flex = { version = "0.7.5", default-features = false, features = ["checked-decode"], optional = true }
|
||||
lz4_flex = { version = "0.8.0", default-features = false, features = ["checked-decode"], optional = true }
|
||||
lz4 = { version = "1.23.2", optional = true }
|
||||
brotli = { version = "3.3", optional = true }
|
||||
snap = { version = "1.0.5", optional = true }
|
||||
|
||||
@@ -9,31 +9,42 @@ pub const COMPRESSION: &str = "lz4_block";
|
||||
|
||||
pub fn compress(uncompressed: &[u8], compressed: &mut Vec<u8>) -> io::Result<()> {
|
||||
compressed.clear();
|
||||
let maximum_ouput_size = lz4_flex::block::get_maximum_output_size(uncompressed.len());
|
||||
compressed.reserve(maximum_ouput_size);
|
||||
|
||||
compressed.extend_from_slice(&[0, 0, 0, 0]);
|
||||
compress_into(uncompressed, compressed);
|
||||
unsafe {
|
||||
compressed.set_len(maximum_ouput_size + 4);
|
||||
}
|
||||
let bytes_written = compress_into(uncompressed, compressed, 4)
|
||||
.map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?;
|
||||
let num_bytes = uncompressed.len() as u32;
|
||||
compressed[0..4].copy_from_slice(&num_bytes.to_le_bytes());
|
||||
unsafe {
|
||||
compressed.set_len(bytes_written + 4);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn decompress(compressed: &[u8], decompressed: &mut Vec<u8>) -> io::Result<()> {
|
||||
decompressed.clear();
|
||||
//next lz4_flex version will support slice as input parameter.
|
||||
//this will make the usage much less ugly
|
||||
let uncompressed_size_bytes: &[u8; 4] = compressed
|
||||
.get(..4)
|
||||
.ok_or(io::ErrorKind::InvalidData)?
|
||||
.try_into()
|
||||
.unwrap();
|
||||
let uncompressed_size = u32::from_le_bytes(*uncompressed_size_bytes) as usize;
|
||||
// reserve more than required, because blocked writes may write out of bounds, will be improved
|
||||
// with lz4_flex 1.0
|
||||
decompressed.reserve(uncompressed_size + 4 + 24);
|
||||
decompressed.reserve(uncompressed_size);
|
||||
unsafe {
|
||||
decompressed.set_len(uncompressed_size);
|
||||
}
|
||||
decompress_into(&compressed[4..], decompressed)
|
||||
let bytes_written = decompress_into(&compressed[4..], decompressed, 0)
|
||||
.map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?;
|
||||
if bytes_written != uncompressed_size {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
"doc store block not completely decompressed, data corruption".to_string(),
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -310,7 +310,7 @@ mod tests {
|
||||
.unwrap()
|
||||
.peek_lru()
|
||||
.map(|(&k, _)| k as usize),
|
||||
Some(9249)
|
||||
Some(9210)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
|
||||
Reference in New Issue
Block a user