Add size metric and test

This commit is contained in:
Bojan Serafimov
2023-06-12 18:25:11 -04:00
parent 4936ab6842
commit 45b71fecec
3 changed files with 68 additions and 1 deletions

View File

@@ -1,4 +1,5 @@
use std::fs;
use std::io::Read;
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::process::{Command, Stdio};
@@ -133,6 +134,50 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
}
}
/// Wrapper for a reader that counts bytes and reports metrics.
///
/// HACK The interface of this struct is a little funny, mostly because we want
/// to use it as input for tar::Archive::new(reader), which for some reason
/// takes ownership of the reader instead of just &mut. So we can't access
/// the reader to read the byte count because we lose ownership. Instead we
/// pass the ComputeNode inside the struct and update metrics on Drop.
struct ByteCounter<'a, R: Read> {
inner: R,
byte_count: usize,
compute_node: &'a ComputeNode,
}
impl<'a, R: Read> ByteCounter<'a, R> {
fn new(reader: R, compute_node: &'a ComputeNode) -> Self {
Self {
inner: reader,
byte_count: 0,
compute_node,
}
}
}
impl<R: Read> Read for ByteCounter<'_, R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let result = self.inner.read(buf);
if let Ok(n_bytes) = result {
self.byte_count += n_bytes
}
result
}
}
impl<R: Read> Drop for ByteCounter<'_, R> {
fn drop(&mut self) {
self.compute_node
.state
.lock()
.unwrap()
.metrics
.basebackup_bytes = self.byte_count as u64;
}
}
impl ComputeNode {
pub fn set_status(&self, status: ComputeStatus) {
let mut state = self.state.lock().unwrap();
@@ -179,13 +224,14 @@ impl ComputeNode {
_ => format!("basebackup {} {} {}", spec.tenant_id, spec.timeline_id, lsn),
};
let copyreader = client.copy_out(basebackup_cmd.as_str())?;
let read_counter = ByteCounter::new(copyreader, self);
// Read the archive directly from the `CopyOutReader`
//
// Set `ignore_zeros` so that unpack() reads all the Copy data and
// doesn't stop at the end-of-archive marker. Otherwise, if the server
// sends an Error after finishing the tarball, we will not notice it.
let mut ar = tar::Archive::new(copyreader);
let mut ar = tar::Archive::new(read_counter);
ar.set_ignore_zeros(true);
ar.unpack(&self.pgdata)?;

View File

@@ -71,6 +71,7 @@ pub struct ComputeMetrics {
pub wait_for_spec_ms: u64,
pub sync_safekeepers_ms: u64,
pub basebackup_ms: u64,
pub basebackup_bytes: u64,
pub config_ms: u64,
pub total_startup_ms: u64,
}

View File

@@ -6,6 +6,26 @@ from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
from fixtures.neon_fixtures import NeonEnvBuilder
@pytest.mark.xfail # We currently pass a 16MB pg_wal dir instead of creating it client-side
def test_basebackup_size(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
# Start
env.neon_cli.create_branch("test_startup")
endpoint = env.endpoints.create_start("test_startup")
# Get metrics
metrics = requests.get(f"http://localhost:{endpoint.http_port}/metrics.json").json()
basebackup_bytes = metrics["basebackup_bytes"]
zenbenchmark.record(
"basebackup_size", basebackup_bytes, "bytes", report=MetricReport.LOWER_IS_BETTER
)
# Seems like a reasonable limit, but increase it if it becomes impossible to meet
assert basebackup_bytes < 70 * 1024
# Just start and measure duration.
#
# This test runs pretty quickly and can be informative when used in combination