From 62e7638c6919ca4ba051c04425f30a779fb98f6b Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Fri, 8 Dec 2023 16:24:51 +0200 Subject: [PATCH] Store compression algorithm in image layer metadata --- pageserver/src/lib.rs | 9 +++++- .../src/tenant/storage_layer/image_layer.rs | 31 +++++++++++++++++-- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs index 02a690d4e1..4a688d36d4 100644 --- a/pageserver/src/lib.rs +++ b/pageserver/src/lib.rs @@ -40,7 +40,14 @@ use tracing::info; /// format, bump this! /// Note that TimelineMetadata uses its own version number to track /// backwards-compatible changes to the metadata format. -pub const STORAGE_FORMAT_VERSION: u16 = 3; +pub const STORAGE_FORMAT_VERSION: u16 = 4; + +/// Minimal sorage format version with compression support +pub const COMPRESSED_STORAGE_FORMAT_VERSION: u16 = 4; + +/// Page image compression algorithm +pub const NO_COMPRESSION: u8 = 0; +pub const LZ4_COMPRESSION: u8 = 0; pub const DEFAULT_PG_VERSION: u32 = 15; diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs index cd226c2e20..f9bff850e7 100644 --- a/pageserver/src/tenant/storage_layer/image_layer.rs +++ b/pageserver/src/tenant/storage_layer/image_layer.rs @@ -40,7 +40,7 @@ use crate::tenant::vectored_blob_io::{ }; use crate::tenant::{PageReconstructError, Timeline}; use crate::virtual_file::{self, VirtualFile}; -use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX}; +use crate::{IMAGE_FILE_MAGIC, LZ4_COMPRESSION, NO_COMPRESSION, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX}; use anyhow::{anyhow, bail, ensure, Context, Result}; use bytes::{Bytes, BytesMut}; use camino::{Utf8Path, Utf8PathBuf}; @@ -92,6 +92,10 @@ pub struct Summary { pub index_start_blk: u32, /// Block within the 'index', where the B-tree root page is stored pub index_root_blk: u32, + /// Compression algorithm used for relation pages. Compressed size should be always + /// smaller than original size, otherwise original image is stored instead of conpressed image. + /// Old storage versions (format_version < 4) do not have this fields and it is deseriealized as 0=NO+COMPRESSION + pub compression_alg: u8, // the 'values' part starts after the summary header, on block 1. } @@ -123,6 +127,7 @@ impl Summary { index_start_blk: 0, index_root_blk: 0, + compression_alg: LZ4_COMPRESSION, } } } @@ -157,6 +162,8 @@ pub struct ImageLayerInner { // values copied from summary index_start_blk: u32, index_root_blk: u32, + format_version: u16, + compression_alg: u8, lsn: Lsn, @@ -171,6 +178,8 @@ impl std::fmt::Debug for ImageLayerInner { f.debug_struct("ImageLayerInner") .field("index_start_blk", &self.index_start_blk) .field("index_root_blk", &self.index_root_blk) + .field("format_version", &self.format_version) + .field("compression_alg", &self.compression_alg) .finish() } } @@ -395,10 +404,20 @@ impl ImageLayerInner { let actual_summary = Summary::des_prefix(summary_blk.as_ref()).context("deserialize first block")?; + if actual_summary.compression_alg != LZ4_COMPRESSION + && actual_summary.compression_alg != NO_COMPRESSION + { + bail!( + "Unsupported compression algorithm: {}", + actual_summary.compression_alg + ); + } + if let Some(mut expected_summary) = summary { // production code path expected_summary.index_start_blk = actual_summary.index_start_blk; expected_summary.index_root_blk = actual_summary.index_root_blk; + expected_summary.compression_alg = actual_summary.compression_alg; if actual_summary != expected_summary { bail!( @@ -412,6 +431,8 @@ impl ImageLayerInner { Ok(Ok(ImageLayerInner { index_start_blk: actual_summary.index_start_blk, index_root_blk: actual_summary.index_root_blk, + format_version: actual_summary.format_version, + compression_alg: actual_summary.compression_alg, lsn, file, file_id, @@ -450,7 +471,11 @@ impl ImageLayerInner { ) .await .with_context(|| format!("failed to read value from offset {}", offset))?; - let value = if is_rel_data_key(key) && blob.len() < BLCKSZ as usize { + + let value = if self.compression_alg == LZ4_COMPRESSION + && is_rel_data_key(key) + && blob.len() < BLCKSZ as usize + { let decompressed = lz4_flex::block::decompress(&blob, BLCKSZ as usize)?; Bytes::from(decompressed) } else { @@ -667,6 +692,7 @@ impl ImageLayerWriterInner { async fn put_image(&mut self, key: Key, img: Bytes) -> anyhow::Result<()> { ensure!(self.key_range.contains(&key)); let (_img, res) = if is_rel_data_key(key) { + assert_eq!(img.len(), BLCKSZ as usize); let compressed = lz4_flex::block::compress(img); if compressed.len() < img.len() { self.blob_writer.write_blob(&compressed).await; @@ -713,6 +739,7 @@ impl ImageLayerWriterInner { lsn: self.lsn, index_start_blk, index_root_blk, + compression_alg: LZ4_COMPRESSION, }; let mut buf = Vec::with_capacity(PAGE_SZ);