diff --git a/pageserver/ctl/src/layer_map_analyzer.rs b/pageserver/ctl/src/layer_map_analyzer.rs index 451d2a1d69..11b8e98f57 100644 --- a/pageserver/ctl/src/layer_map_analyzer.rs +++ b/pageserver/ctl/src/layer_map_analyzer.rs @@ -2,7 +2,7 @@ //! //! Currently it only analyzes holes, which are regions within the layer range that the layer contains no updates for. In the future it might do more analysis (maybe key quantiles?) but it should never return sensitive data. -use anyhow::Result; +use anyhow::{anyhow, Result}; use camino::{Utf8Path, Utf8PathBuf}; use pageserver::context::{DownloadBehavior, RequestContext}; use pageserver::task_mgr::TaskKind; @@ -11,13 +11,14 @@ use pageserver::virtual_file::api::IoMode; use std::cmp::Ordering; use std::collections::BinaryHeap; use std::ops::Range; +use std::str::FromStr; use std::{fs, str}; use pageserver::page_cache::{self, PAGE_SZ}; use pageserver::tenant::block_io::FileBlockReader; use pageserver::tenant::disk_btree::{DiskBtreeReader, VisitDirection}; use pageserver::tenant::storage_layer::delta_layer::{Summary, DELTA_KEY_SIZE}; -use pageserver::tenant::storage_layer::range_overlaps; +use pageserver::tenant::storage_layer::{range_overlaps, LayerName}; use pageserver::virtual_file::{self, VirtualFile}; use pageserver_api::key::{Key, KEY_SIZE}; @@ -74,35 +75,15 @@ impl LayerFile { } } -pub(crate) fn parse_filename(name: &str) -> Option { - let split: Vec<&str> = name.split("__").collect(); - if split.len() != 2 { - return None; - } - let keys: Vec<&str> = split[0].split('-').collect(); - let lsn_and_opt_generation: Vec<&str> = split[1].split('v').collect(); - let lsns: Vec<&str> = lsn_and_opt_generation[0].split('-').collect(); - let the_lsns: [&str; 2]; +pub(crate) fn parse_filename(name: &str) -> anyhow::Result { + let layer_name = + LayerName::from_str(name).map_err(|e| anyhow!("failed to parse layer name: {e}"))?; - /* - * Generations add a -vX-XXXXXX postfix, which causes issues when we try to - * parse 'vX' as an LSN. - */ - let is_delta = if lsns.len() == 1 || lsns[1].is_empty() { - the_lsns = [lsns[0], lsns[0]]; - false - } else { - the_lsns = [lsns[0], lsns[1]]; - true - }; - - let key_range = Key::from_hex(keys[0]).unwrap()..Key::from_hex(keys[1]).unwrap(); - let lsn_range = Lsn::from_hex(the_lsns[0]).unwrap()..Lsn::from_hex(the_lsns[1]).unwrap(); let holes = Vec::new(); - Some(LayerFile { - key_range, - lsn_range, - is_delta, + Ok(LayerFile { + key_range: layer_name.key_range().clone(), + lsn_range: layer_name.lsn_as_range(), + is_delta: layer_name.is_delta(), holes, }) } @@ -179,7 +160,7 @@ pub(crate) async fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> { for layer in fs::read_dir(timeline.path())? { let layer = layer?; - if let Some(mut layer_file) = + if let Ok(mut layer_file) = parse_filename(&layer.file_name().into_string().unwrap()) { if layer_file.is_delta { diff --git a/pageserver/ctl/src/layers.rs b/pageserver/ctl/src/layers.rs index 22627d72c8..6f543dcaa9 100644 --- a/pageserver/ctl/src/layers.rs +++ b/pageserver/ctl/src/layers.rs @@ -5,24 +5,12 @@ use camino::{Utf8Path, Utf8PathBuf}; use clap::Subcommand; use pageserver::context::{DownloadBehavior, RequestContext}; use pageserver::task_mgr::TaskKind; -use pageserver::tenant::block_io::BlockCursor; -use pageserver::tenant::disk_btree::DiskBtreeReader; -use pageserver::tenant::storage_layer::delta_layer::{BlobRef, Summary}; use pageserver::tenant::storage_layer::{delta_layer, image_layer}; use pageserver::tenant::storage_layer::{DeltaLayer, ImageLayer}; use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME}; use pageserver::virtual_file::api::IoMode; use pageserver::{page_cache, virtual_file}; -use pageserver::{ - tenant::{ - block_io::FileBlockReader, disk_btree::VisitDirection, - storage_layer::delta_layer::DELTA_KEY_SIZE, - }, - virtual_file::VirtualFile, -}; -use pageserver_api::key::{Key, KEY_SIZE}; -use std::fs; -use utils::bin_ser::BeSer; +use std::fs::{self, File}; use utils::id::{TenantId, TimelineId}; use crate::layer_map_analyzer::parse_filename; @@ -59,44 +47,30 @@ pub(crate) enum LayerCmd { } async fn read_delta_file(path: impl AsRef, ctx: &RequestContext) -> Result<()> { - let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path"); virtual_file::init( 10, virtual_file::api::IoEngineKind::StdFs, IoMode::preferred(), ); page_cache::init(100); - let file = VirtualFile::open(path, ctx).await?; - let file_id = page_cache::next_file_id(); - let block_reader = FileBlockReader::new(&file, file_id); - let summary_blk = block_reader.read_blk(0, ctx).await?; - let actual_summary = Summary::des_prefix(summary_blk.as_ref())?; - let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new( - actual_summary.index_start_blk, - actual_summary.index_root_blk, - &block_reader, + let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path"); + let file = File::open(path)?; + let delta_layer = DeltaLayer::new_for_path(path, file)?; + delta_layer.dump(true, ctx).await?; + Ok(()) +} + +async fn read_image_file(path: impl AsRef, ctx: &RequestContext) -> Result<()> { + virtual_file::init( + 10, + virtual_file::api::IoEngineKind::StdFs, + IoMode::preferred(), ); - // TODO(chi): dedup w/ `delta_layer.rs` by exposing the API. - let mut all = vec![]; - tree_reader - .visit( - &[0u8; DELTA_KEY_SIZE], - VisitDirection::Forwards, - |key, value_offset| { - let curr = Key::from_slice(&key[..KEY_SIZE]); - all.push((curr, BlobRef(value_offset))); - true - }, - ctx, - ) - .await?; - let cursor = BlockCursor::new_fileblockreader(&block_reader); - for (k, v) in all { - let value = cursor.read_blob(v.pos(), ctx).await?; - println!("key:{} value_len:{}", k, value.len()); - assert!(k.is_i128_representable(), "invalid key: "); - } - // TODO(chi): special handling for last key? + page_cache::init(100); + let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path"); + let file = File::open(path)?; + let image_layer = ImageLayer::new_for_path(path, file)?; + image_layer.dump(true, ctx).await?; Ok(()) } @@ -133,8 +107,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> { let mut idx = 0; for layer in fs::read_dir(timeline_path)? { let layer = layer?; - if let Some(layer_file) = parse_filename(&layer.file_name().into_string().unwrap()) - { + if let Ok(layer_file) = parse_filename(&layer.file_name().into_string().unwrap()) { println!( "[{:3}] key:{}-{}\n lsn:{}-{}\n delta:{}", idx, @@ -163,8 +136,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> { let mut idx = 0; for layer in fs::read_dir(timeline_path)? { let layer = layer?; - if let Some(layer_file) = parse_filename(&layer.file_name().into_string().unwrap()) - { + if let Ok(layer_file) = parse_filename(&layer.file_name().into_string().unwrap()) { if *id == idx { // TODO(chi): dedup code println!( @@ -180,7 +152,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> { if layer_file.is_delta { read_delta_file(layer.path(), &ctx).await?; } else { - anyhow::bail!("not supported yet :("); + read_image_file(layer.path(), &ctx).await?; } break; diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs index 10165b1d06..664c00a6b1 100644 --- a/pageserver/src/tenant/storage_layer/delta_layer.rs +++ b/pageserver/src/tenant/storage_layer/delta_layer.rs @@ -270,7 +270,7 @@ impl AsLayerDesc for DeltaLayer { } impl DeltaLayer { - pub(crate) async fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()> { + pub async fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()> { self.desc.dump(); if !verbose { @@ -1438,7 +1438,7 @@ impl DeltaLayerInner { offset } - pub(crate) fn iter<'a>(&'a self, ctx: &'a RequestContext) -> DeltaLayerIterator<'a> { + pub fn iter<'a>(&'a self, ctx: &'a RequestContext) -> DeltaLayerIterator<'a> { let block_reader = FileBlockReader::new(&self.file, self.file_id); let tree_reader = DiskBtreeReader::new(self.index_start_blk, self.index_root_blk, block_reader); diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs index c0d183dc08..834d1931d0 100644 --- a/pageserver/src/tenant/storage_layer/image_layer.rs +++ b/pageserver/src/tenant/storage_layer/image_layer.rs @@ -231,7 +231,7 @@ impl AsLayerDesc for ImageLayer { } impl ImageLayer { - pub(crate) async fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()> { + pub async fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()> { self.desc.dump(); if !verbose { diff --git a/pageserver/src/tenant/storage_layer/layer_name.rs b/pageserver/src/tenant/storage_layer/layer_name.rs index 2b98d74f9f..addf3b85d9 100644 --- a/pageserver/src/tenant/storage_layer/layer_name.rs +++ b/pageserver/src/tenant/storage_layer/layer_name.rs @@ -2,13 +2,11 @@ //! Helper functions for dealing with filenames of the image and delta layer files. //! use pageserver_api::key::Key; -use std::borrow::Cow; use std::cmp::Ordering; use std::fmt; use std::ops::Range; use std::str::FromStr; -use regex::Regex; use utils::lsn::Lsn; use super::PersistentLayerDesc; @@ -60,32 +58,31 @@ impl Ord for DeltaLayerName { /// Represents the region of the LSN-Key space covered by a DeltaLayer /// /// ```text -/// -__- +/// -__-- /// ``` impl DeltaLayerName { /// Parse the part of a delta layer's file name that represents the LayerName. Returns None /// if the filename does not match the expected pattern. pub fn parse_str(fname: &str) -> Option { - let mut parts = fname.split("__"); - let mut key_parts = parts.next()?.split('-'); - let mut lsn_parts = parts.next()?.split('-'); - - let key_start_str = key_parts.next()?; - let key_end_str = key_parts.next()?; - let lsn_start_str = lsn_parts.next()?; - let lsn_end_str = lsn_parts.next()?; - - if parts.next().is_some() || key_parts.next().is_some() || key_parts.next().is_some() { - return None; - } - - if key_start_str.len() != 36 - || key_end_str.len() != 36 - || lsn_start_str.len() != 16 - || lsn_end_str.len() != 16 + let (key_parts, lsn_generation_parts) = fname.split_once("__")?; + let (key_start_str, key_end_str) = key_parts.split_once('-')?; + let (lsn_start_str, lsn_end_generation_parts) = lsn_generation_parts.split_once('-')?; + let lsn_end_str = if let Some((lsn_end_str, maybe_generation)) = + lsn_end_generation_parts.split_once('-') { - return None; - } + if maybe_generation.starts_with("v") { + // vY-XXXXXXXX + lsn_end_str + } else if maybe_generation.len() == 8 { + // XXXXXXXX + lsn_end_str + } else { + // no idea what this is + return None; + } + } else { + lsn_end_generation_parts + }; let key_start = Key::from_hex(key_start_str).ok()?; let key_end = Key::from_hex(key_end_str).ok()?; @@ -173,25 +170,29 @@ impl ImageLayerName { /// Represents the part of the Key-LSN space covered by an ImageLayer /// /// ```text -/// -__ +/// -__- /// ``` impl ImageLayerName { /// Parse a string as then LayerName part of an image layer file name. Returns None if the /// filename does not match the expected pattern. pub fn parse_str(fname: &str) -> Option { - let mut parts = fname.split("__"); - let mut key_parts = parts.next()?.split('-'); - - let key_start_str = key_parts.next()?; - let key_end_str = key_parts.next()?; - let lsn_str = parts.next()?; - if parts.next().is_some() || key_parts.next().is_some() { - return None; - } - - if key_start_str.len() != 36 || key_end_str.len() != 36 || lsn_str.len() != 16 { - return None; - } + let (key_parts, lsn_generation_parts) = fname.split_once("__")?; + let (key_start_str, key_end_str) = key_parts.split_once('-')?; + let lsn_str = + if let Some((lsn_str, maybe_generation)) = lsn_generation_parts.split_once('-') { + if maybe_generation.starts_with("v") { + // vY-XXXXXXXX + lsn_str + } else if maybe_generation.len() == 8 { + // XXXXXXXX + lsn_str + } else { + // likely a delta layer + return None; + } + } else { + lsn_generation_parts + }; let key_start = Key::from_hex(key_start_str).ok()?; let key_end = Key::from_hex(key_end_str).ok()?; @@ -258,6 +259,14 @@ impl LayerName { } } + /// Gets the LSN range encoded in the layer name. + pub fn lsn_as_range(&self) -> Range { + match &self { + LayerName::Image(layer) => layer.lsn_as_range(), + LayerName::Delta(layer) => layer.lsn_range.clone(), + } + } + pub fn is_delta(&self) -> bool { matches!(self, LayerName::Delta(_)) } @@ -290,18 +299,8 @@ impl FromStr for LayerName { /// Self. When loading a physical layer filename, we drop any extra information /// not needed to build Self. fn from_str(value: &str) -> Result { - let gen_suffix_regex = Regex::new("^(?.+)(?-v1-[0-9a-f]{8})$").unwrap(); - let file_name: Cow = match gen_suffix_regex.captures(value) { - Some(captures) => captures - .name("base") - .expect("Non-optional group") - .as_str() - .into(), - None => value.into(), - }; - - let delta = DeltaLayerName::parse_str(&file_name); - let image = ImageLayerName::parse_str(&file_name); + let delta = DeltaLayerName::parse_str(value); + let image = ImageLayerName::parse_str(value); let ok = match (delta, image) { (None, None) => { return Err(format!( @@ -367,11 +366,14 @@ mod test { lsn: Lsn::from_hex("00000000014FED58").unwrap(), }); let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-v1-00000001").unwrap(); - assert_eq!(parsed, expected,); + assert_eq!(parsed, expected); + + let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-00000001").unwrap(); + assert_eq!(parsed, expected); // Omitting generation suffix is valid let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58").unwrap(); - assert_eq!(parsed, expected,); + assert_eq!(parsed, expected); } #[test] @@ -385,6 +387,9 @@ mod test { let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481-v1-00000001").unwrap(); assert_eq!(parsed, expected); + let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481-00000001").unwrap(); + assert_eq!(parsed, expected); + // Omitting generation suffix is valid let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481").unwrap(); assert_eq!(parsed, expected);