Files
neon/trace/src/main.rs
Heikki Linnakangas 4917f52c88 Server support for new pagestream protocol version (#7377)
In the old protocol version, the client sent with each request:

- latest: bool. If true, the client requested the latest page
  version, and the 'lsn' was just a hint of when the page was last
  modified
- lsn: Lsn, the page version to return

This protocol didn't allow requesting a page at a particular
non-latest LSN and *also* sending a hint on when the page was last
modified. That put a read only compute into an awkward position where
it had to either request each page at the replay-LSN, which could be
very close to the last LSN written in the primary and therefore
require the pageserver to wait for it to arrive, or an older LSN which
could already be garbage collected in the pageserver, resulting in an
error. The new protocol version fixes that by allowing a read only
compute to send both LSNs.

To use the new protocol version, use "pagestream_v2" command instead
of just "pagestream". The old protocol version is still supported, for
compatibility with old computes (and in fact there is no client
support yet, it is added by the next commit).
2024-04-25 20:45:37 +03:00

176 lines
5.3 KiB
Rust

//! A tool for working with read traces generated by the pageserver.
use std::collections::HashMap;
use std::path::PathBuf;
use std::str::FromStr;
use std::{
fs::{read_dir, File},
io::BufReader,
};
use pageserver_api::models::{
PagestreamFeMessage, PagestreamGetPageRequest, PagestreamProtocolVersion,
};
use utils::id::{ConnectionId, TenantId, TimelineId};
use clap::{Parser, Subcommand};
/// Utils for working with pageserver read traces. For generating
/// traces, see the `trace_read_requests` tenant config option.
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
/// Path of trace directory
#[arg(short, long)]
path: PathBuf,
#[command(subcommand)]
command: Command,
}
/// What to do with the read trace
#[derive(Subcommand, Debug)]
enum Command {
/// List traces in the directory
List,
/// Print the traces in text format
Dump,
/// Print stats and anomalies about the traces
Analyze,
/// Draw the traces in svg format
Draw,
/// Send the read requests to a pageserver
Replay,
}
// HACK This function will change and improve as we see what kind of analysis is useful.
// Currently it collects the difference in blkno of consecutive GetPage requests,
// and counts the frequency of each value. This information is useful in order to:
// - see how sequential a workload is by seeing how often the delta is 1
// - detect any prefetching anomalies by looking for negative deltas during seqscan
fn analyze_trace<R: std::io::Read>(mut reader: R) {
let mut total = 0; // Total requests traced
let mut cross_rel = 0; // Requests that ask for different rel than previous request
let mut deltas = HashMap::<i32, u32>::new(); // Consecutive blkno differences
let mut prev: Option<PagestreamGetPageRequest> = None;
// Compute stats
while let Ok(msg) = PagestreamFeMessage::parse(&mut reader, PagestreamProtocolVersion::V2) {
match msg {
PagestreamFeMessage::Exists(_) => {}
PagestreamFeMessage::Nblocks(_) => {}
PagestreamFeMessage::GetSlruSegment(_) => {}
PagestreamFeMessage::GetPage(req) => {
total += 1;
if let Some(prev) = prev {
if prev.rel == req.rel {
let delta = (req.blkno as i32) - (prev.blkno as i32);
deltas.entry(delta).and_modify(|c| *c += 1).or_insert(1);
} else {
cross_rel += 1;
}
}
prev = Some(req);
}
PagestreamFeMessage::DbSize(_) => {}
};
}
// Print stats.
let mut other = deltas.len();
deltas.retain(|_, count| *count > 300);
other -= deltas.len();
dbg!(total);
dbg!(cross_rel);
dbg!(other);
dbg!(deltas);
}
fn dump_trace<R: std::io::Read>(mut reader: R) {
while let Ok(msg) = PagestreamFeMessage::parse(&mut reader, PagestreamProtocolVersion::V2) {
println!("{msg:?}");
}
}
#[derive(Debug)]
struct TraceFile {
#[allow(dead_code)]
pub tenant_id: TenantId,
#[allow(dead_code)]
pub timeline_id: TimelineId,
#[allow(dead_code)]
pub connection_id: ConnectionId,
pub path: PathBuf,
}
fn get_trace_files(traces_dir: &PathBuf) -> anyhow::Result<Vec<TraceFile>> {
let mut trace_files = Vec::<TraceFile>::new();
// Trace files are organized as {tenant_id}/{timeline_id}/{connection_id}
for tenant_dir in read_dir(traces_dir)? {
let entry = tenant_dir?;
let path = entry.path();
let tenant_id = TenantId::from_str(path.file_name().unwrap().to_str().unwrap())?;
for timeline_dir in read_dir(path)? {
let entry = timeline_dir?;
let path = entry.path();
let timeline_id = TimelineId::from_str(path.file_name().unwrap().to_str().unwrap())?;
for trace_dir in read_dir(path)? {
let entry = trace_dir?;
let path = entry.path();
let connection_id =
ConnectionId::from_str(path.file_name().unwrap().to_str().unwrap())?;
trace_files.push(TraceFile {
tenant_id,
timeline_id,
connection_id,
path,
});
}
}
}
Ok(trace_files)
}
fn main() -> anyhow::Result<()> {
let args = Args::parse();
match args.command {
Command::List => {
for trace_file in get_trace_files(&args.path)? {
println!("{trace_file:?}");
}
}
Command::Dump => {
for trace_file in get_trace_files(&args.path)? {
let file = File::open(trace_file.path.clone())?;
let reader = BufReader::new(file);
dump_trace(reader);
}
}
Command::Analyze => {
for trace_file in get_trace_files(&args.path)? {
println!("analyzing {trace_file:?}");
let file = File::open(trace_file.path.clone())?;
let reader = BufReader::new(file);
analyze_trace(reader);
}
}
Command::Draw => todo!(),
Command::Replay => todo!(),
}
Ok(())
}