diff --git a/Cargo.lock b/Cargo.lock index 13774f7fe6..b39ca6e5a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2170,6 +2170,7 @@ dependencies = [ "serde_json", "serde_with", "signal-hook", + "svg_fmt", "tar", "tempfile", "thiserror", @@ -3461,6 +3462,12 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" +[[package]] +name = "svg_fmt" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fb1df15f412ee2e9dfc1c504260fa695c1c3f10fe9f4a6ee2d2184d7d6450e2" + [[package]] name = "symbolic-common" version = "8.8.0" diff --git a/Dockerfile b/Dockerfile index cb4e213687..b0d934d480 100644 --- a/Dockerfile +++ b/Dockerfile @@ -44,7 +44,7 @@ COPY . . # Show build caching stats to check if it was used in the end. # Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats. RUN set -e \ -&& mold -run cargo build --bin pageserver --bin pageserver_binutils --bin safekeeper --bin proxy --locked --release \ +&& mold -run cargo build --bin pageserver --bin pageserver_binutils --bin draw_timeline_dir --bin safekeeper --bin proxy --locked --release \ && cachepot -s # Build final image @@ -65,6 +65,7 @@ RUN set -e \ COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver /usr/local/bin COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver_binutils /usr/local/bin +COPY --from=build --chown=neon:neon /home/nonroot/target/release/draw_timeline_dir /usr/local/bin COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper /usr/local/bin COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy /usr/local/bin diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index 2139e24ee2..b075b86aa1 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -67,6 +67,7 @@ remote_storage = { path = "../libs/remote_storage" } workspace_hack = { version = "0.1", path = "../workspace_hack" } close_fds = "0.3.2" walkdir = "2.3.2" +svg_fmt = "0.4.1" [dev-dependencies] criterion = "0.4" diff --git a/pageserver/src/bin/draw_timeline_dir.rs b/pageserver/src/bin/draw_timeline_dir.rs new file mode 100644 index 0000000000..ea1ff7f3c7 --- /dev/null +++ b/pageserver/src/bin/draw_timeline_dir.rs @@ -0,0 +1,150 @@ +//! A tool for visualizing the arrangement of layerfiles within a timeline. +//! +//! It reads filenames from stdin and prints a svg on stdout. The image is a plot in +//! page-lsn space, where every delta layer is a rectangle and every image layer is a +//! thick line. Legend: +//! - The x axis (left to right) represents page index. +//! - The y axis represents LSN, growing upwards. +//! +//! Coordinates in both axis are compressed for better readability. +//! (see https://medium.com/algorithms-digest/coordinate-compression-2fff95326fb) +//! +//! Example use: +//! ``` +//! $ cd test_output/test_pgbench\[neon-45-684\]/repo/tenants/$TENANT/timelines/$TIMELINE +//! $ ls | grep "__" | cargo run --release --bin draw_timeline_dir > out.svg +//! $ firefox out.svg +//! ``` +//! +//! This API was chosen so that we can easily work with filenames extracted from ssh, +//! or from pageserver log files. +//! +//! TODO Consider shipping this as a grafana panel plugin: +//! https://grafana.com/tutorials/build-a-panel-plugin/ +use anyhow::Result; +use pageserver::repository::Key; +use std::cmp::Ordering; +use std::io::{self, BufRead}; +use std::{ + collections::{BTreeMap, BTreeSet}, + ops::Range, +}; +use svg_fmt::{rectangle, rgb, BeginSvg, EndSvg, Fill, Stroke}; +use utils::{lsn::Lsn, project_git_version}; + +project_git_version!(GIT_VERSION); + +// Map values to their compressed coordinate - the index the value +// would have in a sorted and deduplicated list of all values. +fn build_coordinate_compression_map(coords: Vec) -> BTreeMap { + let set: BTreeSet = coords.into_iter().collect(); + + let mut map: BTreeMap = BTreeMap::new(); + for (i, e) in set.iter().enumerate() { + map.insert(*e, i); + } + + map +} + +fn parse_filename(name: &str) -> (Range, Range) { + let split: Vec<&str> = name.split("__").collect(); + let keys: Vec<&str> = split[0].split('-').collect(); + let mut lsns: Vec<&str> = split[1].split('-').collect(); + if lsns.len() == 1 { + lsns.push(lsns[0]); + } + + let keys = Key::from_hex(keys[0]).unwrap()..Key::from_hex(keys[1]).unwrap(); + let lsns = Lsn::from_hex(lsns[0]).unwrap()..Lsn::from_hex(lsns[1]).unwrap(); + (keys, lsns) +} + +fn main() -> Result<()> { + // Parse layer filenames from stdin + let mut ranges: Vec<(Range, Range)> = vec![]; + let stdin = io::stdin(); + for line in stdin.lock().lines() { + let range = parse_filename(&line.unwrap()); + ranges.push(range); + } + + // Collect all coordinates + let mut keys: Vec = vec![]; + let mut lsns: Vec = vec![]; + for (keyr, lsnr) in &ranges { + keys.push(keyr.start); + keys.push(keyr.end); + lsns.push(lsnr.start); + lsns.push(lsnr.end); + } + + // Analyze + let key_map = build_coordinate_compression_map(keys); + let lsn_map = build_coordinate_compression_map(lsns); + + // Initialize stats + let mut num_deltas = 0; + let mut num_images = 0; + + // Draw + let stretch = 3.0; // Stretch out vertically for better visibility + println!( + "{}", + BeginSvg { + w: key_map.len() as f32, + h: stretch * lsn_map.len() as f32 + } + ); + for (keyr, lsnr) in &ranges { + let key_start = *key_map.get(&keyr.start).unwrap(); + let key_end = *key_map.get(&keyr.end).unwrap(); + let key_diff = key_end - key_start; + let lsn_max = lsn_map.len(); + + if key_start >= key_end { + panic!("Invalid key range {}-{}", key_start, key_end); + } + + let lsn_start = *lsn_map.get(&lsnr.start).unwrap(); + let lsn_end = *lsn_map.get(&lsnr.end).unwrap(); + + let mut lsn_diff = (lsn_end - lsn_start) as f32; + let mut fill = Fill::None; + let mut margin = 0.05 * lsn_diff; // Height-dependent margin to disambiguate overlapping deltas + let mut lsn_offset = 0.0; + + // Fill in and thicken rectangle if it's an + // image layer so that we can see it. + match lsn_start.cmp(&lsn_end) { + Ordering::Less => num_deltas += 1, + Ordering::Equal => { + num_images += 1; + lsn_diff = 0.3; + lsn_offset = -lsn_diff / 2.0; + margin = 0.05; + fill = Fill::Color(rgb(0, 0, 0)); + } + Ordering::Greater => panic!("Invalid lsn range {}-{}", lsn_start, lsn_end), + } + + println!( + " {}", + rectangle( + key_start as f32 + stretch * margin, + stretch * (lsn_max as f32 - (lsn_end as f32 - margin - lsn_offset)), + key_diff as f32 - stretch * 2.0 * margin, + stretch * (lsn_diff - 2.0 * margin) + ) + .fill(fill) + .stroke(Stroke::Color(rgb(0, 0, 0), 0.1)) + .border_radius(0.4) + ); + } + println!("{}", EndSvg); + + eprintln!("num_images: {}", num_images); + eprintln!("num_deltas: {}", num_deltas); + + Ok(()) +}