From 8ea907b66cc425f261d41f4fdb791cd897a49053 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Sat, 14 May 2022 11:55:59 +0300 Subject: [PATCH] Minor refactoring --- .../src/layered_repository/delta_layer.rs | 51 ++++++++++--------- .../src/layered_repository/image_layer.rs | 3 +- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/pageserver/src/layered_repository/delta_layer.rs b/pageserver/src/layered_repository/delta_layer.rs index cf4f33d4c8..28071e0caa 100644 --- a/pageserver/src/layered_repository/delta_layer.rs +++ b/pageserver/src/layered_repository/delta_layer.rs @@ -27,7 +27,7 @@ //! //! Each value is stored as a Blob, which can optionally be compressed. Compression //! is done by ZStandard, in dictionary mode, which gives pretty good compression -//! ratio even for small inputs, like WAL records. +//! ratio even for small inputs like WAL records. //! //! The dictionary is built separately for each delta layer file, and stored in //! the file itself. @@ -94,6 +94,7 @@ struct Summary { /// Block within the 'index', where the B-tree root page is stored index_root_blk: u32, + /// Byte offset of the compression dictionary, or 0 if no compression dictionary_offset: u64, } @@ -117,11 +118,11 @@ impl From<&DeltaLayer> for Summary { } /// -/// Struct representing reference to BLOB in the file. The reference -/// contains the offset to the BLOB within the file, a flag indicating -/// if it's compressed or not, and also the `will_init` flag. The flag -/// helps to determine the range of records that needs to be applied, -/// without reading/deserializing records themselves. +/// Struct representing reference to BLOB in the file. The reference contains +/// the offset to the BLOB within the file, a flag indicating if it's +/// compressed or not, and also the `will_init` flag. The `will_init` flag +/// helps to determine the range of records that needs to be applied, without +/// reading/deserializing records themselves. /// #[derive(Debug, Serialize, Deserialize, Copy, Clone)] struct BlobRef(u64); @@ -227,15 +228,15 @@ pub struct DeltaLayerInner { /// Reader object for reading blocks from the file. (None if not loaded yet) file: Option>, - /// Compression dictionary. - dictionary: Vec, // empty if not loaded - prepared_dictionary: Option>, // None if not loaded + /// Compression dictionary, as raw bytes, and in prepared format ready for use + /// for decompression. None if there is no dictionary, or if 'loaded' is false. + dictionary: Option<(Vec, zstd::dict::DecoderDictionary<'static>)>, } impl DeltaLayerInner { // Create a new Decompressor, using the prepared dictionary fn create_decompressor(&self) -> Result>> { - if let Some(dict) = &self.prepared_dictionary { + if let Some((_, dict)) = &self.dictionary { let decompressor = zstd::bulk::Decompressor::with_prepared_dictionary(dict)?; Ok(Some(decompressor)) } else { @@ -250,8 +251,8 @@ impl DeltaLayerInner { fn create_decompressor_not_prepared( &self, ) -> Result>> { - if !self.dictionary.is_empty() { - let decompressor = zstd::bulk::Decompressor::with_dictionary(&self.dictionary)?; + if let Some((dict, _)) = &self.dictionary { + let decompressor = zstd::bulk::Decompressor::with_dictionary(dict)?; Ok(Some(decompressor)) } else { Ok(None) @@ -535,7 +536,7 @@ impl DeltaLayer { /// Open the underlying file and read the metadata into memory, if it's /// not loaded already. /// - fn load(&self) -> Result>{ + fn load(&self) -> Result> { loop { // Quick exit if already loaded let inner = self.inner.read().unwrap(); @@ -601,11 +602,12 @@ impl DeltaLayer { } } - let mut cursor = file.block_cursor(); + // Load and prepare the dictionary, if any if actual_summary.dictionary_offset != 0 { - inner.dictionary = cursor.read_blob(actual_summary.dictionary_offset)?; - inner.prepared_dictionary = - Some(zstd::dict::DecoderDictionary::copy(&inner.dictionary)); + let mut cursor = file.block_cursor(); + let dict = cursor.read_blob(actual_summary.dictionary_offset)?; + let prepared_dict = zstd::dict::DecoderDictionary::copy(&dict); + inner.dictionary = Some((dict, prepared_dict)); } inner.index_start_blk = actual_summary.index_start_blk; inner.index_root_blk = actual_summary.index_root_blk; @@ -632,8 +634,7 @@ impl DeltaLayer { inner: RwLock::new(DeltaLayerInner { loaded: false, file: None, - dictionary: Vec::new(), - prepared_dictionary: None, + dictionary: None, index_start_blk: 0, index_root_blk: 0, }), @@ -661,8 +662,7 @@ impl DeltaLayer { inner: RwLock::new(DeltaLayerInner { loaded: false, file: None, - dictionary: Vec::new(), - prepared_dictionary: None, + dictionary: None, index_start_blk: 0, index_root_blk: 0, }), @@ -819,6 +819,12 @@ impl DeltaLayerWriter { assert!(self.sample_sizes.len() == self.sample_key_lsn_willinit.len()); // Create the dictionary, if we had enough samples for it. + // + // If there weren't enough samples, we don't do any compression at + // all. Possibly we could still benefit from compression; for example + // if you have only one gigantic value in a single layer, it would + // still be good to compress that, without a dictionary. But we don't + // do that currently. if self.sample_sizes.len() >= config::ZSTD_MIN_SAMPLES { let dictionary = zstd::dict::from_continuous( &self.sample_data, @@ -948,8 +954,7 @@ impl DeltaLayerWriter { inner: RwLock::new(DeltaLayerInner { loaded: false, file: None, - dictionary: Vec::new(), - prepared_dictionary: None, + dictionary: None, index_start_blk, index_root_blk, }), diff --git a/pageserver/src/layered_repository/image_layer.rs b/pageserver/src/layered_repository/image_layer.rs index df44015365..1692915dfb 100644 --- a/pageserver/src/layered_repository/image_layer.rs +++ b/pageserver/src/layered_repository/image_layer.rs @@ -22,7 +22,7 @@ //! //! Each page image is compressed with ZStandard. See Compression section //! in the delta_layer.rs for more discussion. Difference from a delta -//! layer is that we don't currently use a dictionary. +//! layer is that we don't currently use a dictionary for image layers. use crate::config; use crate::config::PageServerConf; use crate::layered_repository::blob_io::{BlobCursor, BlobWriter, WriteBlobWriter}; @@ -540,7 +540,6 @@ impl ImageLayerWriter { // Try to compress the blob let compressed_bytes; if let Some(ref mut compressor) = self.compressor { - // Try to compress it compressed_bytes = compressor.compress(blob_content)?; // If compressed version is not any smaller than the original,