diff --git a/Cargo.toml b/Cargo.toml index 79fcfc141..d83fc7f1e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ once_cell = "1.7.2" regex ={ version = "1.5.4", default-features = false, features = ["std"] } tantivy-fst = "0.3" memmap = {version = "0.7", optional=true} -lz4_flex = { version = "0.7.5", default-features = false, features = ["checked-decode"], optional = true } +lz4_flex = { version = "0.8.0", default-features = false, features = ["checked-decode"], optional = true } lz4 = { version = "1.23.2", optional = true } brotli = { version = "3.3", optional = true } snap = { version = "1.0.5", optional = true } diff --git a/src/store/compression_lz4_block.rs b/src/store/compression_lz4_block.rs index 41d6583e9..dfce75e66 100644 --- a/src/store/compression_lz4_block.rs +++ b/src/store/compression_lz4_block.rs @@ -9,31 +9,42 @@ pub const COMPRESSION: &str = "lz4_block"; pub fn compress(uncompressed: &[u8], compressed: &mut Vec) -> io::Result<()> { compressed.clear(); + let maximum_ouput_size = lz4_flex::block::get_maximum_output_size(uncompressed.len()); + compressed.reserve(maximum_ouput_size); compressed.extend_from_slice(&[0, 0, 0, 0]); - compress_into(uncompressed, compressed); + unsafe { + compressed.set_len(maximum_ouput_size + 4); + } + let bytes_written = compress_into(uncompressed, compressed, 4) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?; let num_bytes = uncompressed.len() as u32; compressed[0..4].copy_from_slice(&num_bytes.to_le_bytes()); + unsafe { + compressed.set_len(bytes_written + 4); + } Ok(()) } pub fn decompress(compressed: &[u8], decompressed: &mut Vec) -> io::Result<()> { decompressed.clear(); - //next lz4_flex version will support slice as input parameter. - //this will make the usage much less ugly let uncompressed_size_bytes: &[u8; 4] = compressed .get(..4) .ok_or(io::ErrorKind::InvalidData)? .try_into() .unwrap(); let uncompressed_size = u32::from_le_bytes(*uncompressed_size_bytes) as usize; - // reserve more than required, because blocked writes may write out of bounds, will be improved - // with lz4_flex 1.0 - decompressed.reserve(uncompressed_size + 4 + 24); + decompressed.reserve(uncompressed_size); unsafe { decompressed.set_len(uncompressed_size); } - decompress_into(&compressed[4..], decompressed) + let bytes_written = decompress_into(&compressed[4..], decompressed, 0) .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?; + if bytes_written != uncompressed_size { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "doc store block not completely decompressed, data corruption".to_string(), + )); + } Ok(()) } diff --git a/src/store/reader.rs b/src/store/reader.rs index f451a70bc..8cddc7150 100644 --- a/src/store/reader.rs +++ b/src/store/reader.rs @@ -310,7 +310,7 @@ mod tests { .unwrap() .peek_lru() .map(|(&k, _)| k as usize), - Some(9249) + Some(9210) ); Ok(())