pagectl: key command for dumping what we know about the key (#7890)

What we know about the key via added `pagectl key $key` command:
- debug formatting
- shard placement when `--shard-count` is specified
- different boolean queries in `key.rs`
- aux files v2

Example:

```
$ cargo run -qp pagectl -- key 000000063F00004005000060270000100E2C
parsed from hex: 000000063F00004005000060270000100E2C:

Key { field1: 0, field2: 1599, field3: 16389, field4: 24615, field5: 0, field6: 1052204 }
rel_block:         true
rel_vm_block:      false
rel_fsm_block:     false
slru_block:        false
inherited:         true
rel_size:          false
slru_segment_size: false
recognized kind:   None
```
This commit is contained in:
Joonas Koivunen
2024-05-31 21:19:41 +03:00
committed by GitHub
parent 9fda85b486
commit ef83f31e77
8 changed files with 608 additions and 8 deletions

1
Cargo.lock generated
View File

@@ -3570,6 +3570,7 @@ dependencies = [
"serde",
"serde_json",
"svg_fmt",
"thiserror",
"tokio",
"tokio-util",
"toml_edit",

View File

@@ -381,10 +381,15 @@ pub fn rel_size_to_key(rel: RelTag) -> Key {
field3: rel.dbnode,
field4: rel.relnode,
field5: rel.forknum,
field6: 0xffffffff,
field6: 0xffff_ffff,
}
}
#[inline(always)]
pub fn is_rel_size_key(key: &Key) -> bool {
key.field1 == 0 && key.field6 == u32::MAX
}
#[inline(always)]
pub fn rel_key_range(rel: RelTag) -> Range<Key> {
Key {
@@ -422,6 +427,25 @@ pub fn slru_dir_to_key(kind: SlruKind) -> Key {
}
}
#[inline(always)]
pub fn slru_dir_kind(key: &Key) -> Option<Result<SlruKind, u32>> {
if key.field1 == 0x01
&& key.field3 == 0
&& key.field4 == 0
&& key.field5 == 0
&& key.field6 == 0
{
match key.field2 {
0 => Some(Ok(SlruKind::Clog)),
1 => Some(Ok(SlruKind::MultiXactMembers)),
2 => Some(Ok(SlruKind::MultiXactOffsets)),
x => Some(Err(x)),
}
} else {
None
}
}
#[inline(always)]
pub fn slru_block_to_key(kind: SlruKind, segno: u32, blknum: BlockNumber) -> Key {
Key {
@@ -450,10 +474,18 @@ pub fn slru_segment_size_to_key(kind: SlruKind, segno: u32) -> Key {
field3: 1,
field4: segno,
field5: 0,
field6: 0xffffffff,
field6: 0xffff_ffff,
}
}
pub fn is_slru_segment_size_key(key: &Key) -> bool {
key.field1 == 0x01
&& key.field2 < 0x03
&& key.field3 == 0x01
&& key.field5 == 0
&& key.field6 == u32::MAX
}
#[inline(always)]
pub fn slru_segment_key_range(kind: SlruKind, segno: u32) -> Range<Key> {
let field2 = match kind {

View File

@@ -3,7 +3,7 @@ use std::cmp::Ordering;
use std::fmt;
use postgres_ffi::pg_constants::GLOBALTABLESPACE_OID;
use postgres_ffi::relfile_utils::forknumber_to_name;
use postgres_ffi::relfile_utils::{forkname_to_number, forknumber_to_name, MAIN_FORKNUM};
use postgres_ffi::Oid;
///
@@ -68,6 +68,57 @@ impl fmt::Display for RelTag {
}
}
#[derive(Debug, thiserror::Error)]
pub enum ParseRelTagError {
#[error("invalid forknum")]
InvalidForknum(#[source] std::num::ParseIntError),
#[error("missing triplet member {}", .0)]
MissingTripletMember(usize),
#[error("invalid triplet member {}", .0)]
InvalidTripletMember(usize, #[source] std::num::ParseIntError),
}
impl std::str::FromStr for RelTag {
type Err = ParseRelTagError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
use ParseRelTagError::*;
// FIXME: in postgres logs this separator is dot
// Example:
// could not read block 2 in rel 1663/208101/2620.1 from page server at lsn 0/2431E6F0
// with a regex we could get this more painlessly
let (triplet, forknum) = match s.split_once('_').or_else(|| s.split_once('.')) {
Some((t, f)) => {
let forknum = forkname_to_number(Some(f));
let forknum = if let Ok(f) = forknum {
f
} else {
f.parse::<u8>().map_err(InvalidForknum)?
};
(t, Some(forknum))
}
None => (s, None),
};
let mut split = triplet
.splitn(3, '/')
.enumerate()
.map(|(i, s)| s.parse::<u32>().map_err(|e| InvalidTripletMember(i, e)));
let spcnode = split.next().ok_or(MissingTripletMember(0))??;
let dbnode = split.next().ok_or(MissingTripletMember(1))??;
let relnode = split.next().ok_or(MissingTripletMember(2))??;
Ok(RelTag {
spcnode,
forknum: forknum.unwrap_or(MAIN_FORKNUM),
dbnode,
relnode,
})
}
}
impl RelTag {
pub fn to_segfile_name(&self, segno: u32) -> String {
let mut name = if self.spcnode == GLOBALTABLESPACE_OID {

View File

@@ -428,6 +428,12 @@ impl<'de> Deserialize<'de> for TenantShardId {
#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
pub struct ShardStripeSize(pub u32);
impl Default for ShardStripeSize {
fn default() -> Self {
DEFAULT_STRIPE_SIZE
}
}
/// Layout version: for future upgrades where we might change how the key->shard mapping works
#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
pub struct ShardLayout(u8);
@@ -713,6 +719,25 @@ fn key_to_shard_number(count: ShardCount, stripe_size: ShardStripeSize, key: &Ke
ShardNumber((hash % count.0 as u32) as u8)
}
/// For debugging, while not exposing the internals.
#[derive(Debug)]
#[allow(unused)] // used by debug formatting by pagectl
struct KeyShardingInfo {
shard0: bool,
shard_number: ShardNumber,
}
pub fn describe(
key: &Key,
shard_count: ShardCount,
stripe_size: ShardStripeSize,
) -> impl std::fmt::Debug {
KeyShardingInfo {
shard0: key_is_shard0(key),
shard_number: key_to_shard_number(shard_count, stripe_size, key),
}
}
#[cfg(test)]
mod tests {
use utils::Hex;

View File

@@ -19,13 +19,13 @@
/// // right: [0x68; 1]
/// # fn serialize_something() -> Vec<u8> { "hello world".as_bytes().to_vec() }
/// ```
#[derive(PartialEq)]
pub struct Hex<'a>(pub &'a [u8]);
pub struct Hex<S>(pub S);
impl std::fmt::Debug for Hex<'_> {
impl<S: AsRef<[u8]>> std::fmt::Debug for Hex<S> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "[")?;
for (i, c) in self.0.chunks(16).enumerate() {
let chunks = self.0.as_ref().chunks(16);
for (i, c) in chunks.enumerate() {
if i > 0 && !c.is_empty() {
writeln!(f, ", ")?;
}
@@ -36,6 +36,15 @@ impl std::fmt::Debug for Hex<'_> {
write!(f, "0x{b:02x}")?;
}
}
write!(f, "; {}]", self.0.len())
write!(f, "; {}]", self.0.as_ref().len())
}
}
impl<R: AsRef<[u8]>, L: AsRef<[u8]>> PartialEq<Hex<R>> for Hex<L> {
fn eq(&self, other: &Hex<R>) -> bool {
let left = self.0.as_ref();
let right = other.0.as_ref();
left == right
}
}

View File

@@ -17,6 +17,7 @@ pageserver = { path = ".." }
pageserver_api.workspace = true
remote_storage = { path = "../../libs/remote_storage" }
postgres_ffi.workspace = true
thiserror.workspace = true
tokio.workspace = true
tokio-util.workspace = true
toml_edit.workspace = true

477
pageserver/ctl/src/key.rs Normal file
View File

@@ -0,0 +1,477 @@
use anyhow::Context;
use clap::Parser;
use pageserver_api::{
key::Key,
reltag::{BlockNumber, RelTag, SlruKind},
shard::{ShardCount, ShardStripeSize},
};
use std::str::FromStr;
#[derive(Parser)]
pub(super) struct DescribeKeyCommand {
/// Key material in one of the forms: hex, span attributes captured from log, reltag blocknum
input: Vec<String>,
/// The number of shards to calculate what Keys placement would be.
#[arg(long)]
shard_count: Option<CustomShardCount>,
/// The sharding stripe size.
///
/// The default is hardcoded. It makes no sense to provide this without providing
/// `--shard-count`.
#[arg(long, requires = "shard_count")]
stripe_size: Option<u32>,
}
/// Sharded shard count without unsharded count, which the actual ShardCount supports.
#[derive(Clone, Copy)]
pub(super) struct CustomShardCount(std::num::NonZeroU8);
#[derive(Debug, thiserror::Error)]
pub(super) enum InvalidShardCount {
#[error(transparent)]
ParsingFailed(#[from] std::num::ParseIntError),
#[error("too few shards")]
TooFewShards,
}
impl FromStr for CustomShardCount {
type Err = InvalidShardCount;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let inner: std::num::NonZeroU8 = s.parse()?;
if inner.get() < 2 {
Err(InvalidShardCount::TooFewShards)
} else {
Ok(CustomShardCount(inner))
}
}
}
impl From<CustomShardCount> for ShardCount {
fn from(value: CustomShardCount) -> Self {
ShardCount::new(value.0.get())
}
}
impl DescribeKeyCommand {
pub(super) fn execute(self) {
let DescribeKeyCommand {
input,
shard_count,
stripe_size,
} = self;
let material = KeyMaterial::try_from(input.as_slice()).unwrap();
let kind = material.kind();
let key = Key::from(material);
println!("parsed from {kind}: {key}:");
println!();
println!("{key:?}");
macro_rules! kind_query {
($name:ident) => {{
let s: &'static str = stringify!($name);
let s = s.strip_prefix("is_").unwrap_or(s);
let s = s.strip_suffix("_key").unwrap_or(s);
#[allow(clippy::needless_borrow)]
(s, pageserver_api::key::$name(key))
}};
}
// the current characterization is a mess of these boolean queries and separate
// "recognization". I think it accurately represents how strictly we model the Key
// right now, but could of course be made less confusing.
let queries = [
("rel_block", pageserver_api::key::is_rel_block_key(&key)),
kind_query!(is_rel_vm_block_key),
kind_query!(is_rel_fsm_block_key),
kind_query!(is_slru_block_key),
kind_query!(is_inherited_key),
("rel_size", pageserver_api::key::is_rel_size_key(&key)),
(
"slru_segment_size",
pageserver_api::key::is_slru_segment_size_key(&key),
),
];
let recognized_kind = "recognized kind";
let metadata_key = "metadata key";
let shard_placement = "shard placement";
let longest = queries
.iter()
.map(|t| t.0)
.chain([recognized_kind, metadata_key, shard_placement])
.map(|s| s.len())
.max()
.unwrap();
let colon = 1;
let padding = 1;
for (name, is) in queries {
let width = longest - name.len() + colon + padding;
println!("{}{:width$}{}", name, ":", is);
}
let width = longest - recognized_kind.len() + colon + padding;
println!(
"{}{:width$}{:?}",
recognized_kind,
":",
RecognizedKeyKind::new(key),
);
if let Some(shard_count) = shard_count {
// seeing the sharding placement might be confusing, so leave it out unless shard
// count was given.
let stripe_size = stripe_size.map(ShardStripeSize).unwrap_or_default();
println!(
"# placement with shard_count: {} and stripe_size: {}:",
shard_count.0, stripe_size.0
);
let width = longest - shard_placement.len() + colon + padding;
println!(
"{}{:width$}{:?}",
shard_placement,
":",
pageserver_api::shard::describe(&key, shard_count.into(), stripe_size)
);
}
}
}
/// Hand-wavy "inputs we accept" for a key.
#[derive(Debug)]
pub(super) enum KeyMaterial {
Hex(Key),
String(SpanAttributesFromLogs),
Split(RelTag, BlockNumber),
}
impl KeyMaterial {
fn kind(&self) -> &'static str {
match self {
KeyMaterial::Hex(_) => "hex",
KeyMaterial::String(_) | KeyMaterial::Split(_, _) => "split",
}
}
}
impl From<KeyMaterial> for Key {
fn from(value: KeyMaterial) -> Self {
match value {
KeyMaterial::Hex(key) => key,
KeyMaterial::String(SpanAttributesFromLogs(rt, blocknum))
| KeyMaterial::Split(rt, blocknum) => {
pageserver_api::key::rel_block_to_key(rt, blocknum)
}
}
}
}
impl<S: AsRef<str>> TryFrom<&[S]> for KeyMaterial {
type Error = anyhow::Error;
fn try_from(value: &[S]) -> Result<Self, Self::Error> {
match value {
[] => anyhow::bail!(
"need 1..N positional arguments describing the key, try hex or a log line"
),
[one] => {
let one = one.as_ref();
let key = Key::from_hex(one).map(KeyMaterial::Hex);
let attrs = SpanAttributesFromLogs::from_str(one).map(KeyMaterial::String);
match (key, attrs) {
(Ok(key), _) => Ok(key),
(_, Ok(s)) => Ok(s),
(Err(e1), Err(e2)) => anyhow::bail!(
"failed to parse {one:?} as hex or span attributes:\n- {e1:#}\n- {e2:#}"
),
}
}
more => {
// assume going left to right one of these is a reltag and then we find a blocknum
// this works, because we don't have plain numbers at least right after reltag in
// logs. for some definition of "works".
let Some((reltag_at, reltag)) = more
.iter()
.map(AsRef::as_ref)
.enumerate()
.find_map(|(i, s)| {
s.split_once("rel=")
.map(|(_garbage, actual)| actual)
.unwrap_or(s)
.parse::<RelTag>()
.ok()
.map(|rt| (i, rt))
})
else {
anyhow::bail!("found no RelTag in arguments");
};
let Some(blocknum) = more
.iter()
.map(AsRef::as_ref)
.skip(reltag_at)
.find_map(|s| {
s.split_once("blkno=")
.map(|(_garbage, actual)| actual)
.unwrap_or(s)
.parse::<BlockNumber>()
.ok()
})
else {
anyhow::bail!("found no blocknum in arguments");
};
Ok(KeyMaterial::Split(reltag, blocknum))
}
}
}
}
#[derive(Debug)]
pub(super) struct SpanAttributesFromLogs(RelTag, BlockNumber);
impl std::str::FromStr for SpanAttributesFromLogs {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
// accept the span separator but do not require or fail if either is missing
// "whatever{rel=1663/16389/24615 blkno=1052204 req_lsn=FFFFFFFF/FFFFFFFF}"
let (_, reltag) = s
.split_once("rel=")
.ok_or_else(|| anyhow::anyhow!("cannot find 'rel='"))?;
let reltag = reltag.split_whitespace().next().unwrap();
let (_, blocknum) = s
.split_once("blkno=")
.ok_or_else(|| anyhow::anyhow!("cannot find 'blkno='"))?;
let blocknum = blocknum.split_whitespace().next().unwrap();
let reltag = reltag
.parse()
.with_context(|| format!("parse reltag from {reltag:?}"))?;
let blocknum = blocknum
.parse()
.with_context(|| format!("parse blocknum from {blocknum:?}"))?;
Ok(Self(reltag, blocknum))
}
}
#[derive(Debug)]
#[allow(dead_code)] // debug print is used
enum RecognizedKeyKind {
DbDir,
ControlFile,
Checkpoint,
AuxFilesV1,
SlruDir(Result<SlruKind, u32>),
RelMap(RelTagish<2>),
RelDir(RelTagish<2>),
AuxFileV2(Result<AuxFileV2, utils::Hex<[u8; 16]>>),
}
#[derive(Debug, PartialEq)]
#[allow(unused)]
enum AuxFileV2 {
Recognized(&'static str, utils::Hex<[u8; 13]>),
OtherWithPrefix(&'static str, utils::Hex<[u8; 13]>),
Other(utils::Hex<[u8; 13]>),
}
impl RecognizedKeyKind {
fn new(key: Key) -> Option<Self> {
use RecognizedKeyKind::{
AuxFilesV1, Checkpoint, ControlFile, DbDir, RelDir, RelMap, SlruDir,
};
let slru_dir_kind = pageserver_api::key::slru_dir_kind(&key);
Some(match key {
pageserver_api::key::DBDIR_KEY => DbDir,
pageserver_api::key::CONTROLFILE_KEY => ControlFile,
pageserver_api::key::CHECKPOINT_KEY => Checkpoint,
pageserver_api::key::AUX_FILES_KEY => AuxFilesV1,
_ if slru_dir_kind.is_some() => SlruDir(slru_dir_kind.unwrap()),
_ if key.field1 == 0 && key.field4 == 0 && key.field5 == 0 && key.field6 == 0 => {
RelMap([key.field2, key.field3].into())
}
_ if key.field1 == 0 && key.field4 == 0 && key.field5 == 0 && key.field6 == 1 => {
RelDir([key.field2, key.field3].into())
}
_ if key.is_metadata_key() => RecognizedKeyKind::AuxFileV2(
AuxFileV2::new(key).ok_or_else(|| utils::Hex(key.to_i128().to_be_bytes())),
),
_ => return None,
})
}
}
impl AuxFileV2 {
fn new(key: Key) -> Option<AuxFileV2> {
const EMPTY_HASH: [u8; 13] = {
let mut out = [0u8; 13];
let hash = pageserver::aux_file::fnv_hash(b"").to_be_bytes();
let mut i = 3;
while i < 16 {
out[i - 3] = hash[i];
i += 1;
}
out
};
let bytes = key.to_i128().to_be_bytes();
let hash = utils::Hex(<[u8; 13]>::try_from(&bytes[3..]).unwrap());
assert_eq!(EMPTY_HASH.len(), hash.0.len());
// TODO: we could probably find the preimages for the hashes
Some(match (bytes[1], bytes[2]) {
(1, 1) => AuxFileV2::Recognized("pg_logical/mappings/", hash),
(1, 2) => AuxFileV2::Recognized("pg_logical/snapshots/", hash),
(1, 3) if hash.0 == EMPTY_HASH => {
AuxFileV2::Recognized("pg_logical/replorigin_checkpoint", hash)
}
(2, 1) => AuxFileV2::Recognized("pg_replslot/", hash),
(1, 0xff) => AuxFileV2::OtherWithPrefix("pg_logical/", hash),
(0xff, 0xff) => AuxFileV2::Other(hash),
_ => return None,
})
}
}
/// Prefix of RelTag, currently only known use cases are the two item versions.
///
/// Renders like a reltag with `/`, nothing else.
struct RelTagish<const N: usize>([u32; N]);
impl<const N: usize> From<[u32; N]> for RelTagish<N> {
fn from(val: [u32; N]) -> Self {
RelTagish(val)
}
}
impl<const N: usize> std::fmt::Debug for RelTagish<N> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use std::fmt::Write as _;
let mut first = true;
self.0.iter().try_for_each(|x| {
if !first {
f.write_char('/')?;
}
first = false;
write!(f, "{}", x)
})
}
}
#[cfg(test)]
mod tests {
use pageserver::aux_file::encode_aux_file_key;
use super::*;
#[test]
fn hex_is_key_material() {
let m = KeyMaterial::try_from(&["000000067F0000400200DF927900FFFFFFFF"][..]).unwrap();
assert!(matches!(m, KeyMaterial::Hex(_)), "{m:?}");
}
#[test]
fn single_positional_spanalike_is_key_material() {
// why is this needed? if you are checking many, then copypaste starts to appeal
let strings = [
(line!(), "2024-05-15T15:33:49.873906Z ERROR page_service_conn_main{peer_addr=A:B}:process_query{tenant_id=C timeline_id=D}:handle_pagerequests:handle_get_page_at_lsn_request{rel=1663/208101/2620_fsm blkno=2 req_lsn=0/238D98C8}: error reading relation or page version: Read error: could not find data for key 000000067F00032CE5000000000000000001 (shard ShardNumber(0)) at LSN 0/1D0A16C1, request LSN 0/238D98C8, ancestor 0/0"),
(line!(), "rel=1663/208101/2620_fsm blkno=2"),
(line!(), "rel=1663/208101/2620.1 blkno=2"),
];
let mut first: Option<Key> = None;
for (line, example) in strings {
let m = KeyMaterial::try_from(&[example][..])
.unwrap_or_else(|e| panic!("failed to parse example from line {line}: {e:?}"));
let key = Key::from(m);
if let Some(first) = first {
assert_eq!(first, key);
} else {
first = Some(key);
}
}
// not supporting this is rather accidential, but I think the input parsing is lenient
// enough already
KeyMaterial::try_from(&["1663/208101/2620_fsm 2"][..]).unwrap_err();
}
#[test]
fn multiple_spanlike_args() {
let strings = [
(line!(), &["process_query{tenant_id=C", "timeline_id=D}:handle_pagerequests:handle_get_page_at_lsn_request{rel=1663/208101/2620_fsm", "blkno=2", "req_lsn=0/238D98C8}"][..]),
(line!(), &["rel=1663/208101/2620_fsm", "blkno=2"][..]),
(line!(), &["1663/208101/2620_fsm", "2"][..]),
];
let mut first: Option<Key> = None;
for (line, example) in strings {
let m = KeyMaterial::try_from(example)
.unwrap_or_else(|e| panic!("failed to parse example from line {line}: {e:?}"));
let key = Key::from(m);
if let Some(first) = first {
assert_eq!(first, key);
} else {
first = Some(key);
}
}
}
#[test]
fn recognized_auxfiles() {
use AuxFileV2::*;
let empty = [
0x2e, 0x07, 0xbb, 0x01, 0x42, 0x62, 0xb8, 0x21, 0x75, 0x62, 0x95, 0xc5, 0x8d,
];
let foobar = [
0x62, 0x79, 0x3c, 0x64, 0xbf, 0x6f, 0x0d, 0x35, 0x97, 0xba, 0x44, 0x6f, 0x18,
];
#[rustfmt::skip]
let examples = [
(line!(), "pg_logical/mappings/foobar", Recognized("pg_logical/mappings/", utils::Hex(foobar))),
(line!(), "pg_logical/snapshots/foobar", Recognized("pg_logical/snapshots/", utils::Hex(foobar))),
(line!(), "pg_logical/replorigin_checkpoint", Recognized("pg_logical/replorigin_checkpoint", utils::Hex(empty))),
(line!(), "pg_logical/foobar", OtherWithPrefix("pg_logical/", utils::Hex(foobar))),
(line!(), "pg_replslot/foobar", Recognized("pg_replslot/", utils::Hex(foobar))),
(line!(), "foobar", Other(utils::Hex(foobar))),
];
for (line, path, expected) in examples {
let key = encode_aux_file_key(path);
let recognized =
AuxFileV2::new(key).unwrap_or_else(|| panic!("line {line} example failed"));
assert_eq!(recognized, expected);
}
assert_eq!(
AuxFileV2::new(Key::from_hex("600000102000000000000000000000000000").unwrap()),
None,
"example key has one too few 0 after 6 before 1"
);
}
}

View File

@@ -6,6 +6,7 @@
mod draw_timeline_dir;
mod index_part;
mod key;
mod layer_map_analyzer;
mod layers;
@@ -61,6 +62,8 @@ enum Commands {
AnalyzeLayerMap(AnalyzeLayerMapCmd),
#[command(subcommand)]
Layer(LayerCmd),
/// Debug print a hex key found from logs
Key(key::DescribeKeyCommand),
}
/// Read and update pageserver metadata file
@@ -183,6 +186,7 @@ async fn main() -> anyhow::Result<()> {
.time_travel_recover(Some(&prefix), timestamp, done_if_after, &cancel)
.await?;
}
Commands::Key(dkc) => dkc.execute(),
};
Ok(())
}