From fc136eec8f638bb50f04add5566dd8e822ad9625 Mon Sep 17 00:00:00 2001 From: Trung Dinh Date: Mon, 16 Jun 2025 03:29:42 -0700 Subject: [PATCH] pagectl: add dump layer local (#12245) ## Problem In our environment, we don't always have access to the pagectl tool on the pageserver. We have to download the page files to local env to introspect them. Hence, it'll be useful to be able to parse the local files using `pagectl`. ## Summary of changes * Add `dump-layer-local` to `pagectl` that takes a local path as argument and returns the layer content: ``` cargo run -p pagectl layer dump-layer-local ~/Desktop/000000067F000040490002800000FFFFFFFF-030000000000000000000000000000000002__00003E7A53EDE611-00003E7AF27BFD19-v1-00000001 ``` * Bonus: Fix a bug in `pageserver/ctl/src/draw_timeline_dir.rs` in which we don't filter out temporary files. --- pageserver/ctl/src/draw_timeline_dir.rs | 8 +++- pageserver/ctl/src/layers.rs | 49 +++++++++++++++---------- 2 files changed, 35 insertions(+), 22 deletions(-) diff --git a/pageserver/ctl/src/draw_timeline_dir.rs b/pageserver/ctl/src/draw_timeline_dir.rs index 80ca414543..881ebd49a7 100644 --- a/pageserver/ctl/src/draw_timeline_dir.rs +++ b/pageserver/ctl/src/draw_timeline_dir.rs @@ -20,7 +20,7 @@ //! //! # local timeline dir //! ls test_output/test_pgbench\[neon-45-684\]/repo/tenants/$TENANT/timelines/$TIMELINE | \ -//! grep "__" | cargo run --release --bin pagectl draw-timeline-dir > out.svg +//! grep "__" | cargo run --release --bin pagectl draw-timeline > out.svg //! //! # Layer map dump from `/v1/tenant/$TENANT/timeline/$TIMELINE/layer` //! (jq -r '.historic_layers[] | .layer_file_name' | cargo run -p pagectl draw-timeline) < layer-map.json > out.svg @@ -81,7 +81,11 @@ fn build_coordinate_compression_map(coords: Vec) -> BTreeMap (Range, Range) { let split: Vec<&str> = name.split("__").collect(); let keys: Vec<&str> = split[0].split('-').collect(); - let mut lsns: Vec<&str> = split[1].split('-').collect(); + + // Remove the temporary file extension, e.g., remove the `.d20a.___temp` part from the following filename: + // 000000067F000040490000404A00441B0000-000000067F000040490000404A00441B4000__000043483A34CE00.d20a.___temp + let lsns = split[1].split('.').collect::>()[0]; + let mut lsns: Vec<&str> = lsns.split('-').collect(); // The current format of the layer file name: 000000067F0000000400000B150100000000-000000067F0000000400000D350100000000__00000000014B7AC8-v1-00000001 diff --git a/pageserver/ctl/src/layers.rs b/pageserver/ctl/src/layers.rs index 79f56a5a51..42b3e4a9ba 100644 --- a/pageserver/ctl/src/layers.rs +++ b/pageserver/ctl/src/layers.rs @@ -13,7 +13,7 @@ use pageserver::{page_cache, virtual_file}; use pageserver_api::key::Key; use utils::id::{TenantId, TimelineId}; -use crate::layer_map_analyzer::parse_filename; +use crate::layer_map_analyzer::{LayerFile, parse_filename}; #[derive(Subcommand)] pub(crate) enum LayerCmd { @@ -38,6 +38,8 @@ pub(crate) enum LayerCmd { /// The id from list-layer command id: usize, }, + /// Dump all information of a layer file locally + DumpLayerLocal { path: PathBuf }, RewriteSummary { layer_file_path: Utf8PathBuf, #[clap(long)] @@ -131,15 +133,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> { } for (idx, layer_file) in to_print { - println!( - "[{:3}] key:{}-{}\n lsn:{}-{}\n delta:{}", - idx, - layer_file.key_range.start, - layer_file.key_range.end, - layer_file.lsn_range.start, - layer_file.lsn_range.end, - layer_file.is_delta, - ); + print_layer_file(idx, &layer_file); } Ok(()) } @@ -159,16 +153,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> { let layer = layer?; if let Ok(layer_file) = parse_filename(&layer.file_name().into_string().unwrap()) { if *id == idx { - // TODO(chi): dedup code - println!( - "[{:3}] key:{}-{}\n lsn:{}-{}\n delta:{}", - idx, - layer_file.key_range.start, - layer_file.key_range.end, - layer_file.lsn_range.start, - layer_file.lsn_range.end, - layer_file.is_delta, - ); + print_layer_file(idx, &layer_file); if layer_file.is_delta { read_delta_file(layer.path(), &ctx).await?; @@ -183,6 +168,18 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> { } Ok(()) } + LayerCmd::DumpLayerLocal { path } => { + if let Ok(layer_file) = parse_filename(path.file_name().unwrap().to_str().unwrap()) { + print_layer_file(0, &layer_file); + + if layer_file.is_delta { + read_delta_file(path, &ctx).await?; + } else { + read_image_file(path, &ctx).await?; + } + } + Ok(()) + } LayerCmd::RewriteSummary { layer_file_path, new_tenant_id, @@ -247,3 +244,15 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> { } } } + +fn print_layer_file(idx: usize, layer_file: &LayerFile) { + println!( + "[{:3}] key:{}-{}\n lsn:{}-{}\n delta:{}", + idx, + layer_file.key_range.start, + layer_file.key_range.end, + layer_file.lsn_range.start, + layer_file.lsn_range.end, + layer_file.is_delta, + ); +}