mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-14 11:40:38 +00:00
fix(pagectl): layer parsing and image layer dump (#9571)
This patch contains various improvements for the pagectl tool. ## Summary of changes * Rewrite layer name parsing: LayerName now supports all variants we use now. * Drop pagectl's own layer parsing function, use LayerName in the pageserver crate. * Support image layer dumping in the layer dump command using ImageLayer::dump, drop the original implementation. Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
//!
|
||||
//! Currently it only analyzes holes, which are regions within the layer range that the layer contains no updates for. In the future it might do more analysis (maybe key quantiles?) but it should never return sensitive data.
|
||||
|
||||
use anyhow::Result;
|
||||
use anyhow::{anyhow, Result};
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use pageserver::context::{DownloadBehavior, RequestContext};
|
||||
use pageserver::task_mgr::TaskKind;
|
||||
@@ -11,13 +11,14 @@ use pageserver::virtual_file::api::IoMode;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::BinaryHeap;
|
||||
use std::ops::Range;
|
||||
use std::str::FromStr;
|
||||
use std::{fs, str};
|
||||
|
||||
use pageserver::page_cache::{self, PAGE_SZ};
|
||||
use pageserver::tenant::block_io::FileBlockReader;
|
||||
use pageserver::tenant::disk_btree::{DiskBtreeReader, VisitDirection};
|
||||
use pageserver::tenant::storage_layer::delta_layer::{Summary, DELTA_KEY_SIZE};
|
||||
use pageserver::tenant::storage_layer::range_overlaps;
|
||||
use pageserver::tenant::storage_layer::{range_overlaps, LayerName};
|
||||
use pageserver::virtual_file::{self, VirtualFile};
|
||||
use pageserver_api::key::{Key, KEY_SIZE};
|
||||
|
||||
@@ -74,35 +75,15 @@ impl LayerFile {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn parse_filename(name: &str) -> Option<LayerFile> {
|
||||
let split: Vec<&str> = name.split("__").collect();
|
||||
if split.len() != 2 {
|
||||
return None;
|
||||
}
|
||||
let keys: Vec<&str> = split[0].split('-').collect();
|
||||
let lsn_and_opt_generation: Vec<&str> = split[1].split('v').collect();
|
||||
let lsns: Vec<&str> = lsn_and_opt_generation[0].split('-').collect();
|
||||
let the_lsns: [&str; 2];
|
||||
pub(crate) fn parse_filename(name: &str) -> anyhow::Result<LayerFile> {
|
||||
let layer_name =
|
||||
LayerName::from_str(name).map_err(|e| anyhow!("failed to parse layer name: {e}"))?;
|
||||
|
||||
/*
|
||||
* Generations add a -vX-XXXXXX postfix, which causes issues when we try to
|
||||
* parse 'vX' as an LSN.
|
||||
*/
|
||||
let is_delta = if lsns.len() == 1 || lsns[1].is_empty() {
|
||||
the_lsns = [lsns[0], lsns[0]];
|
||||
false
|
||||
} else {
|
||||
the_lsns = [lsns[0], lsns[1]];
|
||||
true
|
||||
};
|
||||
|
||||
let key_range = Key::from_hex(keys[0]).unwrap()..Key::from_hex(keys[1]).unwrap();
|
||||
let lsn_range = Lsn::from_hex(the_lsns[0]).unwrap()..Lsn::from_hex(the_lsns[1]).unwrap();
|
||||
let holes = Vec::new();
|
||||
Some(LayerFile {
|
||||
key_range,
|
||||
lsn_range,
|
||||
is_delta,
|
||||
Ok(LayerFile {
|
||||
key_range: layer_name.key_range().clone(),
|
||||
lsn_range: layer_name.lsn_as_range(),
|
||||
is_delta: layer_name.is_delta(),
|
||||
holes,
|
||||
})
|
||||
}
|
||||
@@ -179,7 +160,7 @@ pub(crate) async fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> {
|
||||
|
||||
for layer in fs::read_dir(timeline.path())? {
|
||||
let layer = layer?;
|
||||
if let Some(mut layer_file) =
|
||||
if let Ok(mut layer_file) =
|
||||
parse_filename(&layer.file_name().into_string().unwrap())
|
||||
{
|
||||
if layer_file.is_delta {
|
||||
|
||||
@@ -5,24 +5,12 @@ use camino::{Utf8Path, Utf8PathBuf};
|
||||
use clap::Subcommand;
|
||||
use pageserver::context::{DownloadBehavior, RequestContext};
|
||||
use pageserver::task_mgr::TaskKind;
|
||||
use pageserver::tenant::block_io::BlockCursor;
|
||||
use pageserver::tenant::disk_btree::DiskBtreeReader;
|
||||
use pageserver::tenant::storage_layer::delta_layer::{BlobRef, Summary};
|
||||
use pageserver::tenant::storage_layer::{delta_layer, image_layer};
|
||||
use pageserver::tenant::storage_layer::{DeltaLayer, ImageLayer};
|
||||
use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
|
||||
use pageserver::virtual_file::api::IoMode;
|
||||
use pageserver::{page_cache, virtual_file};
|
||||
use pageserver::{
|
||||
tenant::{
|
||||
block_io::FileBlockReader, disk_btree::VisitDirection,
|
||||
storage_layer::delta_layer::DELTA_KEY_SIZE,
|
||||
},
|
||||
virtual_file::VirtualFile,
|
||||
};
|
||||
use pageserver_api::key::{Key, KEY_SIZE};
|
||||
use std::fs;
|
||||
use utils::bin_ser::BeSer;
|
||||
use std::fs::{self, File};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
use crate::layer_map_analyzer::parse_filename;
|
||||
@@ -59,44 +47,30 @@ pub(crate) enum LayerCmd {
|
||||
}
|
||||
|
||||
async fn read_delta_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result<()> {
|
||||
let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
|
||||
virtual_file::init(
|
||||
10,
|
||||
virtual_file::api::IoEngineKind::StdFs,
|
||||
IoMode::preferred(),
|
||||
);
|
||||
page_cache::init(100);
|
||||
let file = VirtualFile::open(path, ctx).await?;
|
||||
let file_id = page_cache::next_file_id();
|
||||
let block_reader = FileBlockReader::new(&file, file_id);
|
||||
let summary_blk = block_reader.read_blk(0, ctx).await?;
|
||||
let actual_summary = Summary::des_prefix(summary_blk.as_ref())?;
|
||||
let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
|
||||
actual_summary.index_start_blk,
|
||||
actual_summary.index_root_blk,
|
||||
&block_reader,
|
||||
let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
|
||||
let file = File::open(path)?;
|
||||
let delta_layer = DeltaLayer::new_for_path(path, file)?;
|
||||
delta_layer.dump(true, ctx).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn read_image_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result<()> {
|
||||
virtual_file::init(
|
||||
10,
|
||||
virtual_file::api::IoEngineKind::StdFs,
|
||||
IoMode::preferred(),
|
||||
);
|
||||
// TODO(chi): dedup w/ `delta_layer.rs` by exposing the API.
|
||||
let mut all = vec![];
|
||||
tree_reader
|
||||
.visit(
|
||||
&[0u8; DELTA_KEY_SIZE],
|
||||
VisitDirection::Forwards,
|
||||
|key, value_offset| {
|
||||
let curr = Key::from_slice(&key[..KEY_SIZE]);
|
||||
all.push((curr, BlobRef(value_offset)));
|
||||
true
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
let cursor = BlockCursor::new_fileblockreader(&block_reader);
|
||||
for (k, v) in all {
|
||||
let value = cursor.read_blob(v.pos(), ctx).await?;
|
||||
println!("key:{} value_len:{}", k, value.len());
|
||||
assert!(k.is_i128_representable(), "invalid key: ");
|
||||
}
|
||||
// TODO(chi): special handling for last key?
|
||||
page_cache::init(100);
|
||||
let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
|
||||
let file = File::open(path)?;
|
||||
let image_layer = ImageLayer::new_for_path(path, file)?;
|
||||
image_layer.dump(true, ctx).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -133,8 +107,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
|
||||
let mut idx = 0;
|
||||
for layer in fs::read_dir(timeline_path)? {
|
||||
let layer = layer?;
|
||||
if let Some(layer_file) = parse_filename(&layer.file_name().into_string().unwrap())
|
||||
{
|
||||
if let Ok(layer_file) = parse_filename(&layer.file_name().into_string().unwrap()) {
|
||||
println!(
|
||||
"[{:3}] key:{}-{}\n lsn:{}-{}\n delta:{}",
|
||||
idx,
|
||||
@@ -163,8 +136,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
|
||||
let mut idx = 0;
|
||||
for layer in fs::read_dir(timeline_path)? {
|
||||
let layer = layer?;
|
||||
if let Some(layer_file) = parse_filename(&layer.file_name().into_string().unwrap())
|
||||
{
|
||||
if let Ok(layer_file) = parse_filename(&layer.file_name().into_string().unwrap()) {
|
||||
if *id == idx {
|
||||
// TODO(chi): dedup code
|
||||
println!(
|
||||
@@ -180,7 +152,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
|
||||
if layer_file.is_delta {
|
||||
read_delta_file(layer.path(), &ctx).await?;
|
||||
} else {
|
||||
anyhow::bail!("not supported yet :(");
|
||||
read_image_file(layer.path(), &ctx).await?;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
@@ -270,7 +270,7 @@ impl AsLayerDesc for DeltaLayer {
|
||||
}
|
||||
|
||||
impl DeltaLayer {
|
||||
pub(crate) async fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()> {
|
||||
pub async fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()> {
|
||||
self.desc.dump();
|
||||
|
||||
if !verbose {
|
||||
@@ -1438,7 +1438,7 @@ impl DeltaLayerInner {
|
||||
offset
|
||||
}
|
||||
|
||||
pub(crate) fn iter<'a>(&'a self, ctx: &'a RequestContext) -> DeltaLayerIterator<'a> {
|
||||
pub fn iter<'a>(&'a self, ctx: &'a RequestContext) -> DeltaLayerIterator<'a> {
|
||||
let block_reader = FileBlockReader::new(&self.file, self.file_id);
|
||||
let tree_reader =
|
||||
DiskBtreeReader::new(self.index_start_blk, self.index_root_blk, block_reader);
|
||||
|
||||
@@ -231,7 +231,7 @@ impl AsLayerDesc for ImageLayer {
|
||||
}
|
||||
|
||||
impl ImageLayer {
|
||||
pub(crate) async fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()> {
|
||||
pub async fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()> {
|
||||
self.desc.dump();
|
||||
|
||||
if !verbose {
|
||||
|
||||
@@ -2,13 +2,11 @@
|
||||
//! Helper functions for dealing with filenames of the image and delta layer files.
|
||||
//!
|
||||
use pageserver_api::key::Key;
|
||||
use std::borrow::Cow;
|
||||
use std::cmp::Ordering;
|
||||
use std::fmt;
|
||||
use std::ops::Range;
|
||||
use std::str::FromStr;
|
||||
|
||||
use regex::Regex;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use super::PersistentLayerDesc;
|
||||
@@ -60,32 +58,31 @@ impl Ord for DeltaLayerName {
|
||||
/// Represents the region of the LSN-Key space covered by a DeltaLayer
|
||||
///
|
||||
/// ```text
|
||||
/// <key start>-<key end>__<LSN start>-<LSN end>
|
||||
/// <key start>-<key end>__<LSN start>-<LSN end>-<generation>
|
||||
/// ```
|
||||
impl DeltaLayerName {
|
||||
/// Parse the part of a delta layer's file name that represents the LayerName. Returns None
|
||||
/// if the filename does not match the expected pattern.
|
||||
pub fn parse_str(fname: &str) -> Option<Self> {
|
||||
let mut parts = fname.split("__");
|
||||
let mut key_parts = parts.next()?.split('-');
|
||||
let mut lsn_parts = parts.next()?.split('-');
|
||||
|
||||
let key_start_str = key_parts.next()?;
|
||||
let key_end_str = key_parts.next()?;
|
||||
let lsn_start_str = lsn_parts.next()?;
|
||||
let lsn_end_str = lsn_parts.next()?;
|
||||
|
||||
if parts.next().is_some() || key_parts.next().is_some() || key_parts.next().is_some() {
|
||||
return None;
|
||||
}
|
||||
|
||||
if key_start_str.len() != 36
|
||||
|| key_end_str.len() != 36
|
||||
|| lsn_start_str.len() != 16
|
||||
|| lsn_end_str.len() != 16
|
||||
let (key_parts, lsn_generation_parts) = fname.split_once("__")?;
|
||||
let (key_start_str, key_end_str) = key_parts.split_once('-')?;
|
||||
let (lsn_start_str, lsn_end_generation_parts) = lsn_generation_parts.split_once('-')?;
|
||||
let lsn_end_str = if let Some((lsn_end_str, maybe_generation)) =
|
||||
lsn_end_generation_parts.split_once('-')
|
||||
{
|
||||
return None;
|
||||
}
|
||||
if maybe_generation.starts_with("v") {
|
||||
// vY-XXXXXXXX
|
||||
lsn_end_str
|
||||
} else if maybe_generation.len() == 8 {
|
||||
// XXXXXXXX
|
||||
lsn_end_str
|
||||
} else {
|
||||
// no idea what this is
|
||||
return None;
|
||||
}
|
||||
} else {
|
||||
lsn_end_generation_parts
|
||||
};
|
||||
|
||||
let key_start = Key::from_hex(key_start_str).ok()?;
|
||||
let key_end = Key::from_hex(key_end_str).ok()?;
|
||||
@@ -173,25 +170,29 @@ impl ImageLayerName {
|
||||
/// Represents the part of the Key-LSN space covered by an ImageLayer
|
||||
///
|
||||
/// ```text
|
||||
/// <key start>-<key end>__<LSN>
|
||||
/// <key start>-<key end>__<LSN>-<generation>
|
||||
/// ```
|
||||
impl ImageLayerName {
|
||||
/// Parse a string as then LayerName part of an image layer file name. Returns None if the
|
||||
/// filename does not match the expected pattern.
|
||||
pub fn parse_str(fname: &str) -> Option<Self> {
|
||||
let mut parts = fname.split("__");
|
||||
let mut key_parts = parts.next()?.split('-');
|
||||
|
||||
let key_start_str = key_parts.next()?;
|
||||
let key_end_str = key_parts.next()?;
|
||||
let lsn_str = parts.next()?;
|
||||
if parts.next().is_some() || key_parts.next().is_some() {
|
||||
return None;
|
||||
}
|
||||
|
||||
if key_start_str.len() != 36 || key_end_str.len() != 36 || lsn_str.len() != 16 {
|
||||
return None;
|
||||
}
|
||||
let (key_parts, lsn_generation_parts) = fname.split_once("__")?;
|
||||
let (key_start_str, key_end_str) = key_parts.split_once('-')?;
|
||||
let lsn_str =
|
||||
if let Some((lsn_str, maybe_generation)) = lsn_generation_parts.split_once('-') {
|
||||
if maybe_generation.starts_with("v") {
|
||||
// vY-XXXXXXXX
|
||||
lsn_str
|
||||
} else if maybe_generation.len() == 8 {
|
||||
// XXXXXXXX
|
||||
lsn_str
|
||||
} else {
|
||||
// likely a delta layer
|
||||
return None;
|
||||
}
|
||||
} else {
|
||||
lsn_generation_parts
|
||||
};
|
||||
|
||||
let key_start = Key::from_hex(key_start_str).ok()?;
|
||||
let key_end = Key::from_hex(key_end_str).ok()?;
|
||||
@@ -258,6 +259,14 @@ impl LayerName {
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the LSN range encoded in the layer name.
|
||||
pub fn lsn_as_range(&self) -> Range<Lsn> {
|
||||
match &self {
|
||||
LayerName::Image(layer) => layer.lsn_as_range(),
|
||||
LayerName::Delta(layer) => layer.lsn_range.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_delta(&self) -> bool {
|
||||
matches!(self, LayerName::Delta(_))
|
||||
}
|
||||
@@ -290,18 +299,8 @@ impl FromStr for LayerName {
|
||||
/// Self. When loading a physical layer filename, we drop any extra information
|
||||
/// not needed to build Self.
|
||||
fn from_str(value: &str) -> Result<Self, Self::Err> {
|
||||
let gen_suffix_regex = Regex::new("^(?<base>.+)(?<gen>-v1-[0-9a-f]{8})$").unwrap();
|
||||
let file_name: Cow<str> = match gen_suffix_regex.captures(value) {
|
||||
Some(captures) => captures
|
||||
.name("base")
|
||||
.expect("Non-optional group")
|
||||
.as_str()
|
||||
.into(),
|
||||
None => value.into(),
|
||||
};
|
||||
|
||||
let delta = DeltaLayerName::parse_str(&file_name);
|
||||
let image = ImageLayerName::parse_str(&file_name);
|
||||
let delta = DeltaLayerName::parse_str(value);
|
||||
let image = ImageLayerName::parse_str(value);
|
||||
let ok = match (delta, image) {
|
||||
(None, None) => {
|
||||
return Err(format!(
|
||||
@@ -367,11 +366,14 @@ mod test {
|
||||
lsn: Lsn::from_hex("00000000014FED58").unwrap(),
|
||||
});
|
||||
let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-v1-00000001").unwrap();
|
||||
assert_eq!(parsed, expected,);
|
||||
assert_eq!(parsed, expected);
|
||||
|
||||
let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-00000001").unwrap();
|
||||
assert_eq!(parsed, expected);
|
||||
|
||||
// Omitting generation suffix is valid
|
||||
let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58").unwrap();
|
||||
assert_eq!(parsed, expected,);
|
||||
assert_eq!(parsed, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -385,6 +387,9 @@ mod test {
|
||||
let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481-v1-00000001").unwrap();
|
||||
assert_eq!(parsed, expected);
|
||||
|
||||
let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481-00000001").unwrap();
|
||||
assert_eq!(parsed, expected);
|
||||
|
||||
// Omitting generation suffix is valid
|
||||
let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481").unwrap();
|
||||
assert_eq!(parsed, expected);
|
||||
|
||||
Reference in New Issue
Block a user