mirror of
https://github.com/neondatabase/neon.git
synced 2026-03-13 13:20:38 +00:00
Compare commits
10 Commits
feature-be
...
layer-stat
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a4bd82cb81 | ||
|
|
c64798956d | ||
|
|
39bbaecb03 | ||
|
|
26dca374eb | ||
|
|
9787227c35 | ||
|
|
ef80a902c8 | ||
|
|
66cdba990a | ||
|
|
82484e8241 | ||
|
|
36fee50f4d | ||
|
|
330083638f |
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -2530,6 +2530,7 @@ dependencies = [
|
||||
"bytes",
|
||||
"clap 4.3.0",
|
||||
"git-version",
|
||||
"itertools",
|
||||
"pageserver",
|
||||
"postgres_ffi",
|
||||
"svg_fmt",
|
||||
|
||||
@@ -33,5 +33,7 @@ pub fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result<()> {
|
||||
.init();
|
||||
tracing::info!("logging and tracing started");
|
||||
|
||||
utils::logging::replace_panic_hook_with_tracing_panic_hook().forget();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -52,9 +52,7 @@ completion, or shield the rest of the code from surprise cancellations
|
||||
by spawning a separate task. The code that handles incoming HTTP
|
||||
requests, for example, spawns a separate task for each request,
|
||||
because Hyper will drop the request-handling Future if the HTTP
|
||||
connection is lost. (FIXME: our HTTP handlers do not do that
|
||||
currently, but we should fix that. See [issue
|
||||
3478](https://github.com/neondatabase/neon/issues/3478)).
|
||||
connection is lost.
|
||||
|
||||
|
||||
#### How to cancel, then?
|
||||
|
||||
@@ -40,6 +40,12 @@ struct RequestId(String);
|
||||
///
|
||||
/// This also handles errors, logging them and converting them to an HTTP error response.
|
||||
///
|
||||
/// NB: If the client disconnects, Hyper will drop the Future, without polling it to
|
||||
/// completion. In other words, the handler must be async cancellation safe! request_span
|
||||
/// prints a warning to the log when that happens, so that you have some trace of it in
|
||||
/// the log.
|
||||
///
|
||||
///
|
||||
/// There could be other ways to implement similar functionality:
|
||||
///
|
||||
/// * procmacros placed on top of all handler methods
|
||||
|
||||
@@ -9,9 +9,6 @@ default = []
|
||||
# Enables test-only APIs, incuding failpoints. In particular, enables the `fail_point!` macro,
|
||||
# which adds some runtime cost to run tests on outage conditions
|
||||
testing = ["fail/failpoints"]
|
||||
# Just a marker that compiles mock structs that are used in both tests and benchmarks. We
|
||||
# hide them behind a feature flag so that we can apply stronger lints to prod-only code.
|
||||
bench = []
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
|
||||
@@ -3,10 +3,10 @@
|
||||
# How to run
|
||||
|
||||
To run all benchmarks:
|
||||
`cargo bench --features bench`
|
||||
`cargo bench`
|
||||
|
||||
To run a specific file:
|
||||
`cargo bench --features bench --bench bench_layer_map`
|
||||
`cargo bench --bench bench_layer_map`
|
||||
|
||||
To run a specific function:
|
||||
`cargo bench --features bench --bench bench_layer_map -- real_map_uniform_queries`
|
||||
`cargo bench --bench bench_layer_map -- real_map_uniform_queries`
|
||||
|
||||
@@ -1,264 +1,245 @@
|
||||
// Hiding this code under a compilation flag allows us to lint it differently than prod code
|
||||
#[cfg(feature = "bench")]
|
||||
pub mod bench {
|
||||
use pageserver::keyspace::{KeyPartitioning, KeySpace};
|
||||
use pageserver::repository::Key;
|
||||
use pageserver::tenant::layer_map::LayerMap;
|
||||
use pageserver::tenant::storage_layer::mock::LayerDescriptor;
|
||||
use pageserver::tenant::storage_layer::{Layer, LayerFileName};
|
||||
use rand::prelude::{SeedableRng, SliceRandom, StdRng};
|
||||
use std::cmp::{max, min};
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
use pageserver::keyspace::{KeyPartitioning, KeySpace};
|
||||
use pageserver::repository::Key;
|
||||
use pageserver::tenant::layer_map::LayerMap;
|
||||
use pageserver::tenant::storage_layer::{Layer, LayerDescriptor, LayerFileName};
|
||||
use rand::prelude::{SeedableRng, SliceRandom, StdRng};
|
||||
use std::cmp::{max, min};
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use utils::lsn::Lsn;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
|
||||
fn build_layer_map(filename_dump: PathBuf) -> LayerMap<LayerDescriptor> {
|
||||
let mut layer_map = LayerMap::<LayerDescriptor>::default();
|
||||
fn build_layer_map(filename_dump: PathBuf) -> LayerMap<LayerDescriptor> {
|
||||
let mut layer_map = LayerMap::<LayerDescriptor>::default();
|
||||
|
||||
let mut min_lsn = Lsn(u64::MAX);
|
||||
let mut max_lsn = Lsn(0);
|
||||
let mut min_lsn = Lsn(u64::MAX);
|
||||
let mut max_lsn = Lsn(0);
|
||||
|
||||
let filenames = BufReader::new(File::open(filename_dump).unwrap()).lines();
|
||||
let filenames = BufReader::new(File::open(filename_dump).unwrap()).lines();
|
||||
|
||||
let mut updates = layer_map.batch_update();
|
||||
for fname in filenames {
|
||||
let fname = fname.unwrap();
|
||||
let fname = LayerFileName::from_str(&fname).unwrap();
|
||||
let layer = LayerDescriptor::from(fname);
|
||||
let mut updates = layer_map.batch_update();
|
||||
for fname in filenames {
|
||||
let fname = fname.unwrap();
|
||||
let fname = LayerFileName::from_str(&fname).unwrap();
|
||||
let layer = LayerDescriptor::from(fname);
|
||||
|
||||
let lsn_range = layer.get_lsn_range();
|
||||
min_lsn = min(min_lsn, lsn_range.start);
|
||||
max_lsn = max(max_lsn, Lsn(lsn_range.end.0 - 1));
|
||||
let lsn_range = layer.get_lsn_range();
|
||||
min_lsn = min(min_lsn, lsn_range.start);
|
||||
max_lsn = max(max_lsn, Lsn(lsn_range.end.0 - 1));
|
||||
|
||||
updates.insert_historic(Arc::new(layer));
|
||||
}
|
||||
|
||||
println!("min: {min_lsn}, max: {max_lsn}");
|
||||
|
||||
updates.flush();
|
||||
layer_map
|
||||
updates.insert_historic(Arc::new(layer));
|
||||
}
|
||||
|
||||
/// Construct a layer map query pattern for benchmarks
|
||||
fn uniform_query_pattern(layer_map: &LayerMap<LayerDescriptor>) -> Vec<(Key, Lsn)> {
|
||||
// For each image layer we query one of the pages contained, at LSN right
|
||||
// before the image layer was created. This gives us a somewhat uniform
|
||||
// coverage of both the lsn and key space because image layers have
|
||||
// approximately equal sizes and cover approximately equal WAL since
|
||||
// last image.
|
||||
layer_map
|
||||
.iter_historic_layers()
|
||||
.filter_map(|l| {
|
||||
if l.is_incremental() {
|
||||
None
|
||||
} else {
|
||||
let kr = l.get_key_range();
|
||||
let lr = l.get_lsn_range();
|
||||
println!("min: {min_lsn}, max: {max_lsn}");
|
||||
|
||||
let key_inside = kr.start.next();
|
||||
let lsn_before = Lsn(lr.start.0 - 1);
|
||||
updates.flush();
|
||||
layer_map
|
||||
}
|
||||
|
||||
Some((key_inside, lsn_before))
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
/// Construct a layer map query pattern for benchmarks
|
||||
fn uniform_query_pattern(layer_map: &LayerMap<LayerDescriptor>) -> Vec<(Key, Lsn)> {
|
||||
// For each image layer we query one of the pages contained, at LSN right
|
||||
// before the image layer was created. This gives us a somewhat uniform
|
||||
// coverage of both the lsn and key space because image layers have
|
||||
// approximately equal sizes and cover approximately equal WAL since
|
||||
// last image.
|
||||
layer_map
|
||||
.iter_historic_layers()
|
||||
.filter_map(|l| {
|
||||
if l.is_incremental() {
|
||||
None
|
||||
} else {
|
||||
let kr = l.get_key_range();
|
||||
let lr = l.get_lsn_range();
|
||||
|
||||
// Construct a partitioning for testing get_difficulty map when we
|
||||
// don't have an exact result of `collect_keyspace` to work with.
|
||||
fn uniform_key_partitioning(
|
||||
layer_map: &LayerMap<LayerDescriptor>,
|
||||
_lsn: Lsn,
|
||||
) -> KeyPartitioning {
|
||||
let mut parts = Vec::new();
|
||||
let key_inside = kr.start.next();
|
||||
let lsn_before = Lsn(lr.start.0 - 1);
|
||||
|
||||
// We add a partition boundary at the start of each image layer,
|
||||
// no matter what lsn range it covers. This is just the easiest
|
||||
// thing to do. A better thing to do would be to get a real
|
||||
// partitioning from some database. Even better, remove the need
|
||||
// for key partitions by deciding where to create image layers
|
||||
// directly based on a coverage-based difficulty map.
|
||||
let mut keys: Vec<_> = layer_map
|
||||
.iter_historic_layers()
|
||||
.filter_map(|l| {
|
||||
if l.is_incremental() {
|
||||
None
|
||||
} else {
|
||||
let kr = l.get_key_range();
|
||||
Some(kr.start.next())
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
keys.sort();
|
||||
|
||||
let mut current_key = Key::from_hex("000000000000000000000000000000000000").unwrap();
|
||||
for key in keys {
|
||||
parts.push(KeySpace {
|
||||
ranges: vec![current_key..key],
|
||||
});
|
||||
current_key = key;
|
||||
}
|
||||
|
||||
KeyPartitioning { parts }
|
||||
}
|
||||
|
||||
// Benchmark using metadata extracted from our performance test environment, from
|
||||
// a project where we have run pgbench many timmes. The pgbench database was initialized
|
||||
// between each test run.
|
||||
fn bench_from_captest_env(c: &mut Criterion) {
|
||||
// TODO consider compressing this file
|
||||
let layer_map = build_layer_map(PathBuf::from("benches/odd-brook-layernames.txt"));
|
||||
let queries: Vec<(Key, Lsn)> = uniform_query_pattern(&layer_map);
|
||||
|
||||
// Test with uniform query pattern
|
||||
c.bench_function("captest_uniform_queries", |b| {
|
||||
b.iter(|| {
|
||||
for q in queries.clone().into_iter() {
|
||||
black_box(layer_map.search(q.0, q.1));
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// test with a key that corresponds to the RelDir entry. See pgdatadir_mapping.rs.
|
||||
c.bench_function("captest_rel_dir_query", |b| {
|
||||
b.iter(|| {
|
||||
let result = black_box(layer_map.search(
|
||||
Key::from_hex("000000067F00008000000000000000000001").unwrap(),
|
||||
// This LSN is higher than any of the LSNs in the tree
|
||||
Lsn::from_str("D0/80208AE1").unwrap(),
|
||||
));
|
||||
result.unwrap();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Benchmark using metadata extracted from a real project that was taknig
|
||||
// too long processing layer map queries.
|
||||
fn bench_from_real_project(c: &mut Criterion) {
|
||||
// Init layer map
|
||||
let now = Instant::now();
|
||||
let layer_map = build_layer_map(PathBuf::from("benches/odd-brook-layernames.txt"));
|
||||
println!("Finished layer map init in {:?}", now.elapsed());
|
||||
|
||||
// Choose uniformly distributed queries
|
||||
let queries: Vec<(Key, Lsn)> = uniform_query_pattern(&layer_map);
|
||||
|
||||
// Choose inputs for get_difficulty_map
|
||||
let latest_lsn = layer_map
|
||||
.iter_historic_layers()
|
||||
.map(|l| l.get_lsn_range().end)
|
||||
.max()
|
||||
.unwrap();
|
||||
let partitioning = uniform_key_partitioning(&layer_map, latest_lsn);
|
||||
|
||||
// Check correctness of get_difficulty_map
|
||||
// TODO put this in a dedicated test outside of this mod
|
||||
{
|
||||
println!("running correctness check");
|
||||
|
||||
let now = Instant::now();
|
||||
let result_bruteforce =
|
||||
layer_map.get_difficulty_map_bruteforce(latest_lsn, &partitioning);
|
||||
assert!(result_bruteforce.len() == partitioning.parts.len());
|
||||
println!("Finished bruteforce in {:?}", now.elapsed());
|
||||
|
||||
let now = Instant::now();
|
||||
let result_fast = layer_map.get_difficulty_map(latest_lsn, &partitioning, None);
|
||||
assert!(result_fast.len() == partitioning.parts.len());
|
||||
println!("Finished fast in {:?}", now.elapsed());
|
||||
|
||||
// Assert results are equal. Manually iterate for easier debugging.
|
||||
let zip = std::iter::zip(
|
||||
&partitioning.parts,
|
||||
std::iter::zip(result_bruteforce, result_fast),
|
||||
);
|
||||
for (_part, (bruteforce, fast)) in zip {
|
||||
assert_eq!(bruteforce, fast);
|
||||
Some((key_inside, lsn_before))
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
println!("No issues found");
|
||||
}
|
||||
// Construct a partitioning for testing get_difficulty map when we
|
||||
// don't have an exact result of `collect_keyspace` to work with.
|
||||
fn uniform_key_partitioning(layer_map: &LayerMap<LayerDescriptor>, _lsn: Lsn) -> KeyPartitioning {
|
||||
let mut parts = Vec::new();
|
||||
|
||||
// Define and name the benchmark function
|
||||
let mut group = c.benchmark_group("real_map");
|
||||
group.bench_function("uniform_queries", |b| {
|
||||
b.iter(|| {
|
||||
for q in queries.clone().into_iter() {
|
||||
black_box(layer_map.search(q.0, q.1));
|
||||
}
|
||||
});
|
||||
// We add a partition boundary at the start of each image layer,
|
||||
// no matter what lsn range it covers. This is just the easiest
|
||||
// thing to do. A better thing to do would be to get a real
|
||||
// partitioning from some database. Even better, remove the need
|
||||
// for key partitions by deciding where to create image layers
|
||||
// directly based on a coverage-based difficulty map.
|
||||
let mut keys: Vec<_> = layer_map
|
||||
.iter_historic_layers()
|
||||
.filter_map(|l| {
|
||||
if l.is_incremental() {
|
||||
None
|
||||
} else {
|
||||
let kr = l.get_key_range();
|
||||
Some(kr.start.next())
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
keys.sort();
|
||||
|
||||
let mut current_key = Key::from_hex("000000000000000000000000000000000000").unwrap();
|
||||
for key in keys {
|
||||
parts.push(KeySpace {
|
||||
ranges: vec![current_key..key],
|
||||
});
|
||||
group.bench_function("get_difficulty_map", |b| {
|
||||
b.iter(|| {
|
||||
layer_map.get_difficulty_map(latest_lsn, &partitioning, Some(3));
|
||||
});
|
||||
});
|
||||
group.finish();
|
||||
current_key = key;
|
||||
}
|
||||
|
||||
// Benchmark using synthetic data. Arrange image layers on stacked diagonal lines.
|
||||
fn bench_sequential(c: &mut Criterion) {
|
||||
// Init layer map. Create 100_000 layers arranged in 1000 diagonal lines.
|
||||
//
|
||||
// TODO This code is pretty slow and runs even if we're only running other
|
||||
// benchmarks. It needs to be somewhere else, but it's not clear where.
|
||||
// Putting it inside the `bench_function` closure is not a solution
|
||||
// because then it runs multiple times during warmup.
|
||||
KeyPartitioning { parts }
|
||||
}
|
||||
|
||||
// Benchmark using metadata extracted from our performance test environment, from
|
||||
// a project where we have run pgbench many timmes. The pgbench database was initialized
|
||||
// between each test run.
|
||||
fn bench_from_captest_env(c: &mut Criterion) {
|
||||
// TODO consider compressing this file
|
||||
let layer_map = build_layer_map(PathBuf::from("benches/odd-brook-layernames.txt"));
|
||||
let queries: Vec<(Key, Lsn)> = uniform_query_pattern(&layer_map);
|
||||
|
||||
// Test with uniform query pattern
|
||||
c.bench_function("captest_uniform_queries", |b| {
|
||||
b.iter(|| {
|
||||
for q in queries.clone().into_iter() {
|
||||
black_box(layer_map.search(q.0, q.1));
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// test with a key that corresponds to the RelDir entry. See pgdatadir_mapping.rs.
|
||||
c.bench_function("captest_rel_dir_query", |b| {
|
||||
b.iter(|| {
|
||||
let result = black_box(layer_map.search(
|
||||
Key::from_hex("000000067F00008000000000000000000001").unwrap(),
|
||||
// This LSN is higher than any of the LSNs in the tree
|
||||
Lsn::from_str("D0/80208AE1").unwrap(),
|
||||
));
|
||||
result.unwrap();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Benchmark using metadata extracted from a real project that was taknig
|
||||
// too long processing layer map queries.
|
||||
fn bench_from_real_project(c: &mut Criterion) {
|
||||
// Init layer map
|
||||
let now = Instant::now();
|
||||
let layer_map = build_layer_map(PathBuf::from("benches/odd-brook-layernames.txt"));
|
||||
println!("Finished layer map init in {:?}", now.elapsed());
|
||||
|
||||
// Choose uniformly distributed queries
|
||||
let queries: Vec<(Key, Lsn)> = uniform_query_pattern(&layer_map);
|
||||
|
||||
// Choose inputs for get_difficulty_map
|
||||
let latest_lsn = layer_map
|
||||
.iter_historic_layers()
|
||||
.map(|l| l.get_lsn_range().end)
|
||||
.max()
|
||||
.unwrap();
|
||||
let partitioning = uniform_key_partitioning(&layer_map, latest_lsn);
|
||||
|
||||
// Check correctness of get_difficulty_map
|
||||
// TODO put this in a dedicated test outside of this mod
|
||||
{
|
||||
println!("running correctness check");
|
||||
|
||||
let now = Instant::now();
|
||||
let mut layer_map = LayerMap::default();
|
||||
let mut updates = layer_map.batch_update();
|
||||
for i in 0..100_000 {
|
||||
let i32 = (i as u32) % 100;
|
||||
let zero = Key::from_hex("000000000000000000000000000000000000").unwrap();
|
||||
let layer = LayerDescriptor {
|
||||
key: zero.add(10 * i32)..zero.add(10 * i32 + 1),
|
||||
lsn: Lsn(i)..Lsn(i + 1),
|
||||
is_incremental: false,
|
||||
short_id: format!("Layer {}", i),
|
||||
};
|
||||
updates.insert_historic(Arc::new(layer));
|
||||
let result_bruteforce = layer_map.get_difficulty_map_bruteforce(latest_lsn, &partitioning);
|
||||
assert!(result_bruteforce.len() == partitioning.parts.len());
|
||||
println!("Finished bruteforce in {:?}", now.elapsed());
|
||||
|
||||
let now = Instant::now();
|
||||
let result_fast = layer_map.get_difficulty_map(latest_lsn, &partitioning, None);
|
||||
assert!(result_fast.len() == partitioning.parts.len());
|
||||
println!("Finished fast in {:?}", now.elapsed());
|
||||
|
||||
// Assert results are equal. Manually iterate for easier debugging.
|
||||
let zip = std::iter::zip(
|
||||
&partitioning.parts,
|
||||
std::iter::zip(result_bruteforce, result_fast),
|
||||
);
|
||||
for (_part, (bruteforce, fast)) in zip {
|
||||
assert_eq!(bruteforce, fast);
|
||||
}
|
||||
updates.flush();
|
||||
println!("Finished layer map init in {:?}", now.elapsed());
|
||||
|
||||
// Choose 100 uniformly random queries
|
||||
let rng = &mut StdRng::seed_from_u64(1);
|
||||
let queries: Vec<(Key, Lsn)> = uniform_query_pattern(&layer_map)
|
||||
.choose_multiple(rng, 100)
|
||||
.copied()
|
||||
.collect();
|
||||
|
||||
// Define and name the benchmark function
|
||||
let mut group = c.benchmark_group("sequential");
|
||||
group.bench_function("uniform_queries", |b| {
|
||||
b.iter(|| {
|
||||
for q in queries.clone().into_iter() {
|
||||
black_box(layer_map.search(q.0, q.1));
|
||||
}
|
||||
});
|
||||
});
|
||||
group.finish();
|
||||
println!("No issues found");
|
||||
}
|
||||
|
||||
criterion_group!(group_1, bench_from_captest_env);
|
||||
criterion_group!(group_2, bench_from_real_project);
|
||||
criterion_group!(group_3, bench_sequential);
|
||||
// Define and name the benchmark function
|
||||
let mut group = c.benchmark_group("real_map");
|
||||
group.bench_function("uniform_queries", |b| {
|
||||
b.iter(|| {
|
||||
for q in queries.clone().into_iter() {
|
||||
black_box(layer_map.search(q.0, q.1));
|
||||
}
|
||||
});
|
||||
});
|
||||
group.bench_function("get_difficulty_map", |b| {
|
||||
b.iter(|| {
|
||||
layer_map.get_difficulty_map(latest_lsn, &partitioning, Some(3));
|
||||
});
|
||||
});
|
||||
group.finish();
|
||||
}
|
||||
|
||||
#[cfg(feature = "bench")]
|
||||
use criterion::criterion_main;
|
||||
// Benchmark using synthetic data. Arrange image layers on stacked diagonal lines.
|
||||
fn bench_sequential(c: &mut Criterion) {
|
||||
// Init layer map. Create 100_000 layers arranged in 1000 diagonal lines.
|
||||
//
|
||||
// TODO This code is pretty slow and runs even if we're only running other
|
||||
// benchmarks. It needs to be somewhere else, but it's not clear where.
|
||||
// Putting it inside the `bench_function` closure is not a solution
|
||||
// because then it runs multiple times during warmup.
|
||||
let now = Instant::now();
|
||||
let mut layer_map = LayerMap::default();
|
||||
let mut updates = layer_map.batch_update();
|
||||
for i in 0..100_000 {
|
||||
let i32 = (i as u32) % 100;
|
||||
let zero = Key::from_hex("000000000000000000000000000000000000").unwrap();
|
||||
let layer = LayerDescriptor {
|
||||
key: zero.add(10 * i32)..zero.add(10 * i32 + 1),
|
||||
lsn: Lsn(i)..Lsn(i + 1),
|
||||
is_incremental: false,
|
||||
short_id: format!("Layer {}", i),
|
||||
};
|
||||
updates.insert_historic(Arc::new(layer));
|
||||
}
|
||||
updates.flush();
|
||||
println!("Finished layer map init in {:?}", now.elapsed());
|
||||
|
||||
#[cfg(feature = "bench")]
|
||||
criterion_main!(bench::group_1, bench::group_2, bench::group_3);
|
||||
// Choose 100 uniformly random queries
|
||||
let rng = &mut StdRng::seed_from_u64(1);
|
||||
let queries: Vec<(Key, Lsn)> = uniform_query_pattern(&layer_map)
|
||||
.choose_multiple(rng, 100)
|
||||
.copied()
|
||||
.collect();
|
||||
|
||||
#[cfg(not(feature = "bench"))]
|
||||
fn main() {
|
||||
panic!("Use `--features bench` to run benchmarks")
|
||||
// Define and name the benchmark function
|
||||
let mut group = c.benchmark_group("sequential");
|
||||
group.bench_function("uniform_queries", |b| {
|
||||
b.iter(|| {
|
||||
for q in queries.clone().into_iter() {
|
||||
black_box(layer_map.search(q.0, q.1));
|
||||
}
|
||||
});
|
||||
});
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(group_1, bench_from_captest_env);
|
||||
criterion_group!(group_2, bench_from_real_project);
|
||||
criterion_group!(group_3, bench_sequential);
|
||||
criterion_main!(group_1, group_2, group_3);
|
||||
|
||||
@@ -16,3 +16,4 @@ postgres_ffi.workspace = true
|
||||
utils.workspace = true
|
||||
svg_fmt.workspace = true
|
||||
workspace_hack.workspace = true
|
||||
itertools.workspace = true
|
||||
|
||||
@@ -41,9 +41,18 @@ pub(crate) enum LayerCmd {
|
||||
/// The id from list-layer command
|
||||
id: usize,
|
||||
},
|
||||
/// Output layer statistics
|
||||
GetStats {
|
||||
path: PathBuf,
|
||||
tenant: String,
|
||||
timeline: String,
|
||||
/// The id from list-layer command
|
||||
id: usize,
|
||||
},
|
||||
}
|
||||
|
||||
fn read_delta_file(path: impl AsRef<Path>) -> Result<()> {
|
||||
// Return (key, value.len) for all keys, sorted by key.
|
||||
fn read_delta_file(path: impl AsRef<Path>) -> Result<Vec<(Key, usize)>> {
|
||||
use pageserver::tenant::blob_io::BlobCursor;
|
||||
use pageserver::tenant::block_io::BlockReader;
|
||||
|
||||
@@ -70,11 +79,48 @@ fn read_delta_file(path: impl AsRef<Path>) -> Result<()> {
|
||||
},
|
||||
)?;
|
||||
let mut cursor = BlockCursor::new(&file);
|
||||
|
||||
let mut result = vec![];
|
||||
for (k, v) in all {
|
||||
let value = cursor.read_blob(v.pos())?;
|
||||
println!("key:{} value_len:{}", k, value.len());
|
||||
result.push((k, value.len()));
|
||||
}
|
||||
// TODO(chi): special handling for last key?
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
// We divide the entire i128 keyspace into pre-assigned fixed segments,
|
||||
// 8MB each. Group keys by segment, and report segment size for each.
|
||||
//
|
||||
// 8MB is chosen as the segment size because we're unlikely to make
|
||||
// s3 partial downloads smaller than 8MB (due to cost). So summarizing
|
||||
// layer metadata in 8MB segments could be enough to generate good test
|
||||
// data for write amplification tests.
|
||||
//
|
||||
// Note that the segments are fixed, and don't depend on what keyspace
|
||||
// is actually in use.
|
||||
fn read_delta_segments(path: impl AsRef<Path>) -> Result<Vec<(i128, usize)>> {
|
||||
fn key_to_segment(key: &Key) -> i128 {
|
||||
// A page is 8KB. So 1024 pages are 8MB.
|
||||
key.to_i128() >> 10
|
||||
}
|
||||
|
||||
use itertools::Itertools;
|
||||
let delta_metadata = read_delta_file(path)?;
|
||||
let group_iter = delta_metadata.iter().group_by(|(k, _)| key_to_segment(k));
|
||||
let group_sizes = group_iter.into_iter().map(|(segment, lengths_group)| {
|
||||
let sum: usize = lengths_group.map(|(_k, len)| len).sum();
|
||||
(segment, sum)
|
||||
});
|
||||
Ok(group_sizes.collect())
|
||||
}
|
||||
|
||||
fn summarize_delta_file(path: impl AsRef<Path>) -> Result<()> {
|
||||
// TODO write in some compressed binary format
|
||||
for (segment, size) in read_delta_segments(path)? {
|
||||
println!("segment:{} size:{}", segment, size);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -153,7 +199,38 @@ pub(crate) fn main(cmd: &LayerCmd) -> Result<()> {
|
||||
);
|
||||
|
||||
if layer_file.is_delta {
|
||||
read_delta_file(layer.path())?;
|
||||
for (k, len) in read_delta_file(layer.path())? {
|
||||
println!("key:{} value_len:{}", k, len);
|
||||
}
|
||||
} else {
|
||||
anyhow::bail!("not supported yet :(");
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
idx += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
LayerCmd::GetStats {
|
||||
path,
|
||||
tenant,
|
||||
timeline,
|
||||
id,
|
||||
} => {
|
||||
let timeline_path = path
|
||||
.join("tenants")
|
||||
.join(tenant)
|
||||
.join("timelines")
|
||||
.join(timeline);
|
||||
let mut idx = 0;
|
||||
for layer in fs::read_dir(timeline_path)? {
|
||||
let layer = layer?;
|
||||
if let Some(layer_file) = parse_filename(&layer.file_name().into_string().unwrap())
|
||||
{
|
||||
if *id == idx {
|
||||
if layer_file.is_delta {
|
||||
summarize_delta_file(layer.path())?;
|
||||
} else {
|
||||
anyhow::bail!("not supported yet :(");
|
||||
}
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
//!
|
||||
//! Management HTTP API
|
||||
//!
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -46,7 +49,6 @@ use utils::{
|
||||
};
|
||||
|
||||
// Imports only used for testing APIs
|
||||
#[cfg(feature = "testing")]
|
||||
use super::models::ConfigureFailpointsRequest;
|
||||
|
||||
struct State {
|
||||
@@ -290,13 +292,19 @@ fn build_timeline_info_common(
|
||||
}
|
||||
|
||||
// healthcheck handler
|
||||
async fn status_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn status_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
check_permission(&request, None)?;
|
||||
let config = get_config(&request);
|
||||
json_response(StatusCode::OK, StatusResponse { id: config.id })
|
||||
}
|
||||
|
||||
async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn timeline_create_handler(
|
||||
mut request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let request_data: TimelineCreateRequest = json_request(&mut request).await?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
@@ -332,7 +340,10 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
|
||||
.await
|
||||
}
|
||||
|
||||
async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn timeline_list_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let include_non_incremental_logical_size: Option<bool> =
|
||||
parse_query_param(&request, "include-non-incremental-logical-size")?;
|
||||
@@ -366,7 +377,10 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,
|
||||
json_response(StatusCode::OK, response_data)
|
||||
}
|
||||
|
||||
async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn timeline_detail_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
let include_non_incremental_logical_size: Option<bool> =
|
||||
@@ -400,7 +414,10 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
|
||||
json_response(StatusCode::OK, timeline_info)
|
||||
}
|
||||
|
||||
async fn get_lsn_by_timestamp_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn get_lsn_by_timestamp_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
@@ -424,7 +441,10 @@ async fn get_lsn_by_timestamp_handler(request: Request<Body>) -> Result<Response
|
||||
json_response(StatusCode::OK, result)
|
||||
}
|
||||
|
||||
async fn tenant_attach_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn tenant_attach_handler(
|
||||
mut request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
@@ -460,7 +480,10 @@ async fn tenant_attach_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
json_response(StatusCode::ACCEPTED, ())
|
||||
}
|
||||
|
||||
async fn timeline_delete_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn timeline_delete_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
@@ -474,7 +497,10 @@ async fn timeline_delete_handler(request: Request<Body>) -> Result<Response<Body
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
async fn tenant_detach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn tenant_detach_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
let detach_ignored: Option<bool> = parse_query_param(&request, "detach_ignored")?;
|
||||
@@ -488,7 +514,10 @@ async fn tenant_detach_handler(request: Request<Body>) -> Result<Response<Body>,
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
async fn tenant_load_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn tenant_load_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
@@ -508,7 +537,10 @@ async fn tenant_load_handler(request: Request<Body>) -> Result<Response<Body>, A
|
||||
json_response(StatusCode::ACCEPTED, ())
|
||||
}
|
||||
|
||||
async fn tenant_ignore_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn tenant_ignore_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
@@ -521,7 +553,10 @@ async fn tenant_ignore_handler(request: Request<Body>) -> Result<Response<Body>,
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn tenant_list_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
check_permission(&request, None)?;
|
||||
|
||||
let response_data = mgr::list_tenants()
|
||||
@@ -541,7 +576,10 @@ async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, A
|
||||
json_response(StatusCode::OK, response_data)
|
||||
}
|
||||
|
||||
async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn tenant_status(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
@@ -581,7 +619,10 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
|
||||
/// Note: we don't update the cached size and prometheus metric here.
|
||||
/// The retention period might be different, and it's nice to have a method to just calculate it
|
||||
/// without modifying anything anyway.
|
||||
async fn tenant_size_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn tenant_size_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
let inputs_only: Option<bool> = parse_query_param(&request, "inputs_only")?;
|
||||
@@ -646,7 +687,10 @@ async fn tenant_size_handler(request: Request<Body>) -> Result<Response<Body>, A
|
||||
)
|
||||
}
|
||||
|
||||
async fn layer_map_info_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn layer_map_info_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
let reset: LayerAccessStatsReset =
|
||||
@@ -660,7 +704,10 @@ async fn layer_map_info_handler(request: Request<Body>) -> Result<Response<Body>
|
||||
json_response(StatusCode::OK, layer_map_info)
|
||||
}
|
||||
|
||||
async fn layer_download_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn layer_download_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
@@ -683,7 +730,10 @@ async fn layer_download_handler(request: Request<Body>) -> Result<Response<Body>
|
||||
}
|
||||
}
|
||||
|
||||
async fn evict_timeline_layer_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn evict_timeline_layer_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
@@ -761,7 +811,10 @@ pub fn html_response(status: StatusCode, data: String) -> Result<Response<Body>,
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn tenant_create_handler(
|
||||
mut request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let request_data: TenantCreateRequest = json_request(&mut request).await?;
|
||||
let target_tenant_id = request_data.new_tenant_id;
|
||||
check_permission(&request, None)?;
|
||||
@@ -808,7 +861,10 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
)
|
||||
}
|
||||
|
||||
async fn get_tenant_config_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn get_tenant_config_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
@@ -834,6 +890,7 @@ async fn get_tenant_config_handler(request: Request<Body>) -> Result<Response<Bo
|
||||
|
||||
async fn update_tenant_config_handler(
|
||||
mut request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let request_data: TenantConfigRequest = json_request(&mut request).await?;
|
||||
let tenant_id = request_data.tenant_id;
|
||||
@@ -851,8 +908,10 @@ async fn update_tenant_config_handler(
|
||||
}
|
||||
|
||||
/// Testing helper to transition a tenant to [`crate::tenant::TenantState::Broken`].
|
||||
#[cfg(feature = "testing")]
|
||||
async fn handle_tenant_break(r: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn handle_tenant_break(
|
||||
r: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&r, "tenant_id")?;
|
||||
|
||||
let tenant = crate::tenant::mgr::get_tenant(tenant_id, true)
|
||||
@@ -864,8 +923,10 @@ async fn handle_tenant_break(r: Request<Body>) -> Result<Response<Body>, ApiErro
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
async fn failpoints_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn failpoints_handler(
|
||||
mut request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
if !fail::has_failpoints() {
|
||||
return Err(ApiError::BadRequest(anyhow!(
|
||||
"Cannot manage failpoints because pageserver was compiled without failpoints support"
|
||||
@@ -898,7 +959,10 @@ async fn failpoints_handler(mut request: Request<Body>) -> Result<Response<Body>
|
||||
}
|
||||
|
||||
// Run GC immediately on given timeline.
|
||||
async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn timeline_gc_handler(
|
||||
mut request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
@@ -917,8 +981,10 @@ async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body
|
||||
}
|
||||
|
||||
// Run compaction immediately on given timeline.
|
||||
#[cfg(feature = "testing")]
|
||||
async fn timeline_compact_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn timeline_compact_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
@@ -939,8 +1005,10 @@ async fn timeline_compact_handler(request: Request<Body>) -> Result<Response<Bod
|
||||
}
|
||||
|
||||
// Run checkpoint immediately on given timeline.
|
||||
#[cfg(feature = "testing")]
|
||||
async fn timeline_checkpoint_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn timeline_checkpoint_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
@@ -964,6 +1032,7 @@ async fn timeline_checkpoint_handler(request: Request<Body>) -> Result<Response<
|
||||
|
||||
async fn timeline_download_remote_layers_handler_post(
|
||||
mut request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
@@ -979,6 +1048,7 @@ async fn timeline_download_remote_layers_handler_post(
|
||||
|
||||
async fn timeline_download_remote_layers_handler_get(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
@@ -1002,7 +1072,10 @@ async fn active_timeline_of_active_tenant(
|
||||
.map_err(ApiError::NotFound)
|
||||
}
|
||||
|
||||
async fn always_panic_handler(req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn always_panic_handler(
|
||||
req: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
// Deliberately cause a panic to exercise the panic hook registered via std::panic::set_hook().
|
||||
// For pageserver, the relevant panic hook is `tracing_panic_hook` , and the `sentry` crate's wrapper around it.
|
||||
// Use catch_unwind to ensure that tokio nor hyper are distracted by our panic.
|
||||
@@ -1013,7 +1086,10 @@ async fn always_panic_handler(req: Request<Body>) -> Result<Response<Body>, ApiE
|
||||
json_response(StatusCode::NO_CONTENT, ())
|
||||
}
|
||||
|
||||
async fn disk_usage_eviction_run(mut r: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn disk_usage_eviction_run(
|
||||
mut r: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
check_permission(&r, None)?;
|
||||
|
||||
#[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize)]
|
||||
@@ -1103,8 +1179,10 @@ async fn handler_404(_: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
async fn post_tracing_event_handler(mut r: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
async fn post_tracing_event_handler(
|
||||
mut r: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
#[derive(Debug, serde::Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
enum Level {
|
||||
@@ -1134,6 +1212,85 @@ async fn post_tracing_event_handler(mut r: Request<Body>) -> Result<Response<Bod
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
/// Common functionality of all the HTTP API handlers.
|
||||
///
|
||||
/// - Adds a tracing span to each request (by `request_span`)
|
||||
/// - Logs the request depending on the request method (by `request_span`)
|
||||
/// - Logs the response if it was not successful (by `request_span`
|
||||
/// - Shields the handler function from async cancellations. Hyper can drop the handler
|
||||
/// Future if the connection to the client is lost, but most of the pageserver code is
|
||||
/// not async cancellation safe. This converts the dropped future into a graceful cancellation
|
||||
/// request with a CancellationToken.
|
||||
async fn api_handler<R, H>(request: Request<Body>, handler: H) -> Result<Response<Body>, ApiError>
|
||||
where
|
||||
R: std::future::Future<Output = Result<Response<Body>, ApiError>> + Send + 'static,
|
||||
H: FnOnce(Request<Body>, CancellationToken) -> R + Send + Sync + 'static,
|
||||
{
|
||||
// Spawn a new task to handle the request, to protect the handler from unexpected
|
||||
// async cancellations. Most pageserver functions are not async cancellation safe.
|
||||
// We arm a drop-guard, so that if Hyper drops the Future, we signal the task
|
||||
// with the cancellation token.
|
||||
let token = CancellationToken::new();
|
||||
let cancel_guard = token.clone().drop_guard();
|
||||
let result = request_span(request, move |r| async {
|
||||
let handle = tokio::spawn(
|
||||
async {
|
||||
let token_cloned = token.clone();
|
||||
let result = handler(r, token).await;
|
||||
if token_cloned.is_cancelled() {
|
||||
info!("Cancelled request finished");
|
||||
}
|
||||
result
|
||||
}
|
||||
.in_current_span(),
|
||||
);
|
||||
|
||||
match handle.await {
|
||||
Ok(result) => result,
|
||||
Err(e) => {
|
||||
// The handler task panicked. We have a global panic handler that logs the
|
||||
// panic with its backtrace, so no need to log that here. Only log a brief
|
||||
// message to make it clear that we returned the error to the client.
|
||||
error!("HTTP request handler task panicked: {e:#}");
|
||||
|
||||
// Don't return an Error here, because then fallback error handler that was
|
||||
// installed in make_router() will print the error. Instead, construct the
|
||||
// HTTP error response and return that.
|
||||
Ok(
|
||||
ApiError::InternalServerError(anyhow!("HTTP request handler task panicked"))
|
||||
.into_response(),
|
||||
)
|
||||
}
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
cancel_guard.disarm();
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Like api_handler, but returns an error response if the server is built without
|
||||
/// the 'testing' feature.
|
||||
async fn testing_api_handler<R, H>(
|
||||
desc: &str,
|
||||
request: Request<Body>,
|
||||
handler: H,
|
||||
) -> Result<Response<Body>, ApiError>
|
||||
where
|
||||
R: std::future::Future<Output = Result<Response<Body>, ApiError>> + Send + 'static,
|
||||
H: FnOnce(Request<Body>, CancellationToken) -> R + Send + Sync + 'static,
|
||||
{
|
||||
if cfg!(feature = "testing") {
|
||||
api_handler(request, handler).await
|
||||
} else {
|
||||
std::future::ready(Err(ApiError::BadRequest(anyhow!(
|
||||
"Cannot {desc} because pageserver was compiled without testing APIs",
|
||||
))))
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
pub fn make_router(
|
||||
conf: &'static PageServerConf,
|
||||
launch_ts: &'static LaunchTimestamp,
|
||||
@@ -1163,26 +1320,6 @@ pub fn make_router(
|
||||
.expect("construct launch timestamp header middleware"),
|
||||
);
|
||||
|
||||
macro_rules! testing_api {
|
||||
($handler_desc:literal, $handler:path $(,)?) => {{
|
||||
#[cfg(not(feature = "testing"))]
|
||||
async fn cfg_disabled(_req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
Err(ApiError::BadRequest(anyhow!(concat!(
|
||||
"Cannot ",
|
||||
$handler_desc,
|
||||
" because pageserver was compiled without testing APIs",
|
||||
))))
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
let handler = $handler;
|
||||
#[cfg(not(feature = "testing"))]
|
||||
let handler = cfg_disabled;
|
||||
|
||||
move |r| request_span(r, handler)
|
||||
}};
|
||||
}
|
||||
|
||||
Ok(router
|
||||
.data(Arc::new(
|
||||
State::new(
|
||||
@@ -1194,92 +1331,88 @@ pub fn make_router(
|
||||
)
|
||||
.context("Failed to initialize router state")?,
|
||||
))
|
||||
.get("/v1/status", |r| request_span(r, status_handler))
|
||||
.put(
|
||||
"/v1/failpoints",
|
||||
testing_api!("manage failpoints", failpoints_handler),
|
||||
)
|
||||
.get("/v1/tenant", |r| request_span(r, tenant_list_handler))
|
||||
.post("/v1/tenant", |r| request_span(r, tenant_create_handler))
|
||||
.get("/v1/tenant/:tenant_id", |r| request_span(r, tenant_status))
|
||||
.get("/v1/status", |r| api_handler(r, status_handler))
|
||||
.put("/v1/failpoints", |r| {
|
||||
testing_api_handler("manage failpoints", r, failpoints_handler)
|
||||
})
|
||||
.get("/v1/tenant", |r| api_handler(r, tenant_list_handler))
|
||||
.post("/v1/tenant", |r| api_handler(r, tenant_create_handler))
|
||||
.get("/v1/tenant/:tenant_id", |r| api_handler(r, tenant_status))
|
||||
.get("/v1/tenant/:tenant_id/synthetic_size", |r| {
|
||||
request_span(r, tenant_size_handler)
|
||||
api_handler(r, tenant_size_handler)
|
||||
})
|
||||
.put("/v1/tenant/config", |r| {
|
||||
request_span(r, update_tenant_config_handler)
|
||||
api_handler(r, update_tenant_config_handler)
|
||||
})
|
||||
.get("/v1/tenant/:tenant_id/config", |r| {
|
||||
request_span(r, get_tenant_config_handler)
|
||||
api_handler(r, get_tenant_config_handler)
|
||||
})
|
||||
.get("/v1/tenant/:tenant_id/timeline", |r| {
|
||||
request_span(r, timeline_list_handler)
|
||||
api_handler(r, timeline_list_handler)
|
||||
})
|
||||
.post("/v1/tenant/:tenant_id/timeline", |r| {
|
||||
request_span(r, timeline_create_handler)
|
||||
api_handler(r, timeline_create_handler)
|
||||
})
|
||||
.post("/v1/tenant/:tenant_id/attach", |r| {
|
||||
request_span(r, tenant_attach_handler)
|
||||
api_handler(r, tenant_attach_handler)
|
||||
})
|
||||
.post("/v1/tenant/:tenant_id/detach", |r| {
|
||||
request_span(r, tenant_detach_handler)
|
||||
api_handler(r, tenant_detach_handler)
|
||||
})
|
||||
.post("/v1/tenant/:tenant_id/load", |r| {
|
||||
request_span(r, tenant_load_handler)
|
||||
api_handler(r, tenant_load_handler)
|
||||
})
|
||||
.post("/v1/tenant/:tenant_id/ignore", |r| {
|
||||
request_span(r, tenant_ignore_handler)
|
||||
api_handler(r, tenant_ignore_handler)
|
||||
})
|
||||
.get("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
|
||||
request_span(r, timeline_detail_handler)
|
||||
api_handler(r, timeline_detail_handler)
|
||||
})
|
||||
.get(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/get_lsn_by_timestamp",
|
||||
|r| request_span(r, get_lsn_by_timestamp_handler),
|
||||
|r| api_handler(r, get_lsn_by_timestamp_handler),
|
||||
)
|
||||
.put("/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc", |r| {
|
||||
request_span(r, timeline_gc_handler)
|
||||
api_handler(r, timeline_gc_handler)
|
||||
})
|
||||
.put("/v1/tenant/:tenant_id/timeline/:timeline_id/compact", |r| {
|
||||
testing_api_handler("run timeline compaction", r, timeline_compact_handler)
|
||||
})
|
||||
.put(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/compact",
|
||||
testing_api!("run timeline compaction", timeline_compact_handler),
|
||||
)
|
||||
.put(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/checkpoint",
|
||||
testing_api!("run timeline checkpoint", timeline_checkpoint_handler),
|
||||
|r| testing_api_handler("run timeline checkpoint", r, timeline_checkpoint_handler),
|
||||
)
|
||||
.post(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/download_remote_layers",
|
||||
|r| request_span(r, timeline_download_remote_layers_handler_post),
|
||||
|r| api_handler(r, timeline_download_remote_layers_handler_post),
|
||||
)
|
||||
.get(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/download_remote_layers",
|
||||
|r| request_span(r, timeline_download_remote_layers_handler_get),
|
||||
|r| api_handler(r, timeline_download_remote_layers_handler_get),
|
||||
)
|
||||
.delete("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
|
||||
request_span(r, timeline_delete_handler)
|
||||
api_handler(r, timeline_delete_handler)
|
||||
})
|
||||
.get("/v1/tenant/:tenant_id/timeline/:timeline_id/layer", |r| {
|
||||
request_span(r, layer_map_info_handler)
|
||||
api_handler(r, layer_map_info_handler)
|
||||
})
|
||||
.get(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
|
||||
|r| request_span(r, layer_download_handler),
|
||||
|r| api_handler(r, layer_download_handler),
|
||||
)
|
||||
.delete(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
|
||||
|r| request_span(r, evict_timeline_layer_handler),
|
||||
|r| api_handler(r, evict_timeline_layer_handler),
|
||||
)
|
||||
.put("/v1/disk_usage_eviction/run", |r| {
|
||||
request_span(r, disk_usage_eviction_run)
|
||||
api_handler(r, disk_usage_eviction_run)
|
||||
})
|
||||
.put("/v1/tenant/:tenant_id/break", |r| {
|
||||
testing_api_handler("set tenant state to broken", r, handle_tenant_break)
|
||||
})
|
||||
.get("/v1/panic", |r| api_handler(r, always_panic_handler))
|
||||
.post("/v1/tracing/event", |r| {
|
||||
testing_api_handler("emit a tracing event", r, post_tracing_event_handler)
|
||||
})
|
||||
.put(
|
||||
"/v1/tenant/:tenant_id/break",
|
||||
testing_api!("set tenant state to broken", handle_tenant_break),
|
||||
)
|
||||
.get("/v1/panic", |r| request_span(r, always_panic_handler))
|
||||
.post(
|
||||
"/v1/tracing/event",
|
||||
testing_api!("emit a tracing event", post_tracing_event_handler),
|
||||
)
|
||||
.any(handler_404))
|
||||
}
|
||||
|
||||
@@ -84,6 +84,16 @@ pub static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static READ_NUM_FS_LAYERS: Lazy<HistogramVec> = Lazy::new(|| {
|
||||
register_histogram_vec!(
|
||||
"pageserver_read_num_fs_layers",
|
||||
"Number of persistent layers accessed for processing a read request, including those in the cache",
|
||||
&["tenant_id", "timeline_id"],
|
||||
vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 10.0, 20.0, 50.0, 100.0],
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
// Metrics collected on operations on the storage repository.
|
||||
static RECONSTRUCT_TIME: Lazy<HistogramVec> = Lazy::new(|| {
|
||||
register_histogram_vec!(
|
||||
@@ -95,6 +105,25 @@ static RECONSTRUCT_TIME: Lazy<HistogramVec> = Lazy::new(|| {
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static MATERIALIZED_PAGE_CACHE_HIT_DIRECT: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"pageserver_materialized_cache_hits_direct_total",
|
||||
"Number of cache hits from materialized page cache without redo",
|
||||
&["tenant_id", "timeline_id"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static GET_RECONSTRUCT_DATA_TIME: Lazy<HistogramVec> = Lazy::new(|| {
|
||||
register_histogram_vec!(
|
||||
"pageserver_getpage_get_reconstruct_data_seconds",
|
||||
"Time spent in get_reconstruct_value_data",
|
||||
&["tenant_id", "timeline_id"],
|
||||
CRITICAL_OP_BUCKETS.into(),
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static MATERIALIZED_PAGE_CACHE_HIT: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"pageserver_materialized_cache_hits_total",
|
||||
@@ -354,6 +383,7 @@ const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
|
||||
0.001000, // 1000 usec
|
||||
0.030, // 30 ms
|
||||
1.000, // 1000 ms
|
||||
30.000, // 30000 ms
|
||||
];
|
||||
|
||||
const STORAGE_IO_TIME_OPERATIONS: &[&str] = &[
|
||||
@@ -622,7 +652,7 @@ pub static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
|
||||
pub static WAL_REDO_WAIT_TIME: Lazy<Histogram> = Lazy::new(|| {
|
||||
register_histogram!(
|
||||
"pageserver_wal_redo_wait_seconds",
|
||||
"Time spent waiting for access to the WAL redo process",
|
||||
"Time spent waiting for access to the Postgres WAL redo process",
|
||||
redo_histogram_time_buckets!(),
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
@@ -631,7 +661,7 @@ pub static WAL_REDO_WAIT_TIME: Lazy<Histogram> = Lazy::new(|| {
|
||||
pub static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
|
||||
register_histogram!(
|
||||
"pageserver_wal_redo_records_histogram",
|
||||
"Histogram of number of records replayed per redo",
|
||||
"Histogram of number of records replayed per redo in the Postgres WAL redo process",
|
||||
redo_histogram_count_buckets!(),
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
@@ -640,7 +670,7 @@ pub static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
|
||||
pub static WAL_REDO_BYTES_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
|
||||
register_histogram!(
|
||||
"pageserver_wal_redo_bytes_histogram",
|
||||
"Histogram of number of records replayed per redo",
|
||||
"Histogram of number of records replayed per redo sent to Postgres",
|
||||
redo_bytes_histogram_count_buckets!(),
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
@@ -723,7 +753,9 @@ pub struct TimelineMetrics {
|
||||
tenant_id: String,
|
||||
timeline_id: String,
|
||||
pub reconstruct_time_histo: Histogram,
|
||||
pub get_reconstruct_data_time_histo: Histogram,
|
||||
pub materialized_page_cache_hit_counter: GenericCounter<AtomicU64>,
|
||||
pub materialized_page_cache_hit_upon_request_counter: GenericCounter<AtomicU64>,
|
||||
pub flush_time_histo: StorageTimeMetrics,
|
||||
pub compact_time_histo: StorageTimeMetrics,
|
||||
pub create_images_time_histo: StorageTimeMetrics,
|
||||
@@ -734,6 +766,7 @@ pub struct TimelineMetrics {
|
||||
pub last_record_gauge: IntGauge,
|
||||
pub wait_lsn_time_histo: Histogram,
|
||||
pub resident_physical_size_gauge: UIntGauge,
|
||||
pub read_num_fs_layers: Histogram,
|
||||
/// copy of LayeredTimeline.current_logical_size
|
||||
pub current_logical_size_gauge: UIntGauge,
|
||||
pub num_persistent_files_created: IntCounter,
|
||||
@@ -753,6 +786,9 @@ impl TimelineMetrics {
|
||||
let reconstruct_time_histo = RECONSTRUCT_TIME
|
||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let get_reconstruct_data_time_histo = GET_RECONSTRUCT_DATA_TIME
|
||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let materialized_page_cache_hit_counter = MATERIALIZED_PAGE_CACHE_HIT
|
||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
@@ -794,6 +830,12 @@ impl TimelineMetrics {
|
||||
let evictions = EVICTIONS
|
||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let read_num_fs_layers = READ_NUM_FS_LAYERS
|
||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let materialized_page_cache_hit_upon_request_counter = MATERIALIZED_PAGE_CACHE_HIT_DIRECT
|
||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let evictions_with_low_residence_duration =
|
||||
evictions_with_low_residence_duration_builder.build(&tenant_id, &timeline_id);
|
||||
|
||||
@@ -801,7 +843,9 @@ impl TimelineMetrics {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
reconstruct_time_histo,
|
||||
get_reconstruct_data_time_histo,
|
||||
materialized_page_cache_hit_counter,
|
||||
materialized_page_cache_hit_upon_request_counter,
|
||||
flush_time_histo,
|
||||
compact_time_histo,
|
||||
create_images_time_histo,
|
||||
@@ -819,6 +863,7 @@ impl TimelineMetrics {
|
||||
evictions_with_low_residence_duration: std::sync::RwLock::new(
|
||||
evictions_with_low_residence_duration,
|
||||
),
|
||||
read_num_fs_layers,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -828,7 +873,9 @@ impl Drop for TimelineMetrics {
|
||||
let tenant_id = &self.tenant_id;
|
||||
let timeline_id = &self.timeline_id;
|
||||
let _ = RECONSTRUCT_TIME.remove_label_values(&[tenant_id, timeline_id]);
|
||||
let _ = GET_RECONSTRUCT_DATA_TIME.remove_label_values(&[tenant_id, timeline_id]);
|
||||
let _ = MATERIALIZED_PAGE_CACHE_HIT.remove_label_values(&[tenant_id, timeline_id]);
|
||||
let _ = MATERIALIZED_PAGE_CACHE_HIT_DIRECT.remove_label_values(&[tenant_id, timeline_id]);
|
||||
let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, timeline_id]);
|
||||
let _ = WAIT_LSN_TIME.remove_label_values(&[tenant_id, timeline_id]);
|
||||
let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
|
||||
@@ -836,6 +883,8 @@ impl Drop for TimelineMetrics {
|
||||
let _ = NUM_PERSISTENT_FILES_CREATED.remove_label_values(&[tenant_id, timeline_id]);
|
||||
let _ = PERSISTENT_BYTES_WRITTEN.remove_label_values(&[tenant_id, timeline_id]);
|
||||
let _ = EVICTIONS.remove_label_values(&[tenant_id, timeline_id]);
|
||||
let _ = READ_NUM_FS_LAYERS.remove_label_values(&[tenant_id, timeline_id]);
|
||||
|
||||
self.evictions_with_low_residence_duration
|
||||
.write()
|
||||
.unwrap()
|
||||
|
||||
@@ -762,8 +762,7 @@ where
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{LayerMap, Replacement};
|
||||
use crate::tenant::storage_layer::mock::LayerDescriptor;
|
||||
use crate::tenant::storage_layer::{Layer, LayerFileName};
|
||||
use crate::tenant::storage_layer::{Layer, LayerDescriptor, LayerFileName};
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use std::ops::Range;
|
||||
|
||||
// TODO the `im` crate has 20x more downloads and also has
|
||||
// persistent/immutable BTree. It also runs a bit faster but
|
||||
// results are not the same on some tests.
|
||||
// NOTE the `im` crate has 20x more downloads and also has
|
||||
// persistent/immutable BTree. But it's bugged so rpds is a
|
||||
// better choice https://github.com/neondatabase/neon/issues/3395
|
||||
use rpds::RedBlackTreeMapSync;
|
||||
|
||||
/// Data structure that can efficiently:
|
||||
|
||||
@@ -779,7 +779,6 @@ pub async fn immediate_gc(
|
||||
Ok(wait_task_done)
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
pub async fn immediate_compact(
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
|
||||
@@ -4,6 +4,7 @@ pub mod delta_layer;
|
||||
mod filename;
|
||||
mod image_layer;
|
||||
mod inmemory_layer;
|
||||
mod layer_desc;
|
||||
mod remote_layer;
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
@@ -37,6 +38,7 @@ pub use delta_layer::{DeltaLayer, DeltaLayerWriter};
|
||||
pub use filename::{DeltaFileName, ImageFileName, LayerFileName};
|
||||
pub use image_layer::{ImageLayer, ImageLayerWriter};
|
||||
pub use inmemory_layer::InMemoryLayer;
|
||||
pub use layer_desc::PersistentLayerDesc;
|
||||
pub use remote_layer::RemoteLayer;
|
||||
|
||||
use super::layer_map::BatchedUpdates;
|
||||
@@ -406,14 +408,23 @@ pub type LayerKeyIter<'i> = Box<dyn Iterator<Item = (Key, Lsn, u64)> + 'i>;
|
||||
/// An image layer is a snapshot of all the data in a key-range, at a single
|
||||
/// LSN.
|
||||
pub trait PersistentLayer: Layer {
|
||||
fn get_tenant_id(&self) -> TenantId;
|
||||
/// Get the layer descriptor.
|
||||
fn layer_desc(&self) -> &PersistentLayerDesc;
|
||||
|
||||
fn get_tenant_id(&self) -> TenantId {
|
||||
self.layer_desc().tenant_id
|
||||
}
|
||||
|
||||
/// Identify the timeline this layer belongs to
|
||||
fn get_timeline_id(&self) -> TimelineId;
|
||||
fn get_timeline_id(&self) -> TimelineId {
|
||||
self.layer_desc().timeline_id
|
||||
}
|
||||
|
||||
/// File name used for this layer, both in the pageserver's local filesystem
|
||||
/// state as well as in the remote storage.
|
||||
fn filename(&self) -> LayerFileName;
|
||||
fn filename(&self) -> LayerFileName {
|
||||
self.layer_desc().filename()
|
||||
}
|
||||
|
||||
// Path to the layer file in the local filesystem.
|
||||
// `None` for `RemoteLayer`.
|
||||
@@ -460,86 +471,80 @@ pub fn downcast_remote_layer(
|
||||
}
|
||||
}
|
||||
|
||||
// Hiding this code under a compilation flag allows us to lint it differently than prod code
|
||||
#[cfg(any(test, feature = "bench"))]
|
||||
pub mod mock {
|
||||
use super::*;
|
||||
/// Holds metadata about a layer without any content. Used mostly for testing.
|
||||
///
|
||||
/// To use filenames as fixtures, parse them as [`LayerFileName`] then convert from that to a
|
||||
/// LayerDescriptor.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct LayerDescriptor {
|
||||
pub key: Range<Key>,
|
||||
pub lsn: Range<Lsn>,
|
||||
pub is_incremental: bool,
|
||||
pub short_id: String,
|
||||
}
|
||||
|
||||
/// Holds metadata about a layer without any content. Used mostly for testing.
|
||||
///
|
||||
/// To use filenames as fixtures, parse them as [`LayerFileName`] then convert from that to a
|
||||
/// LayerDescriptor.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct LayerDescriptor {
|
||||
pub key: Range<Key>,
|
||||
pub lsn: Range<Lsn>,
|
||||
pub is_incremental: bool,
|
||||
pub short_id: String,
|
||||
impl Layer for LayerDescriptor {
|
||||
fn get_key_range(&self) -> Range<Key> {
|
||||
self.key.clone()
|
||||
}
|
||||
|
||||
impl Layer for LayerDescriptor {
|
||||
fn get_key_range(&self) -> Range<Key> {
|
||||
self.key.clone()
|
||||
}
|
||||
|
||||
fn get_lsn_range(&self) -> Range<Lsn> {
|
||||
self.lsn.clone()
|
||||
}
|
||||
|
||||
fn is_incremental(&self) -> bool {
|
||||
self.is_incremental
|
||||
}
|
||||
|
||||
fn get_value_reconstruct_data(
|
||||
&self,
|
||||
_key: Key,
|
||||
_lsn_range: Range<Lsn>,
|
||||
_reconstruct_data: &mut ValueReconstructState,
|
||||
_ctx: &RequestContext,
|
||||
) -> Result<ValueReconstructResult> {
|
||||
todo!("This method shouldn't be part of the Layer trait")
|
||||
}
|
||||
|
||||
fn short_id(&self) -> String {
|
||||
self.short_id.clone()
|
||||
}
|
||||
|
||||
fn dump(&self, _verbose: bool, _ctx: &RequestContext) -> Result<()> {
|
||||
todo!()
|
||||
}
|
||||
fn get_lsn_range(&self) -> Range<Lsn> {
|
||||
self.lsn.clone()
|
||||
}
|
||||
|
||||
impl From<DeltaFileName> for LayerDescriptor {
|
||||
fn from(value: DeltaFileName) -> Self {
|
||||
let short_id = value.to_string();
|
||||
LayerDescriptor {
|
||||
key: value.key_range,
|
||||
lsn: value.lsn_range,
|
||||
is_incremental: true,
|
||||
short_id,
|
||||
}
|
||||
}
|
||||
fn is_incremental(&self) -> bool {
|
||||
self.is_incremental
|
||||
}
|
||||
|
||||
impl From<ImageFileName> for LayerDescriptor {
|
||||
fn from(value: ImageFileName) -> Self {
|
||||
let short_id = value.to_string();
|
||||
let lsn = value.lsn_as_range();
|
||||
LayerDescriptor {
|
||||
key: value.key_range,
|
||||
lsn,
|
||||
is_incremental: false,
|
||||
short_id,
|
||||
}
|
||||
}
|
||||
fn get_value_reconstruct_data(
|
||||
&self,
|
||||
_key: Key,
|
||||
_lsn_range: Range<Lsn>,
|
||||
_reconstruct_data: &mut ValueReconstructState,
|
||||
_ctx: &RequestContext,
|
||||
) -> Result<ValueReconstructResult> {
|
||||
todo!("This method shouldn't be part of the Layer trait")
|
||||
}
|
||||
|
||||
impl From<LayerFileName> for LayerDescriptor {
|
||||
fn from(value: LayerFileName) -> Self {
|
||||
match value {
|
||||
LayerFileName::Delta(d) => Self::from(d),
|
||||
LayerFileName::Image(i) => Self::from(i),
|
||||
}
|
||||
fn short_id(&self) -> String {
|
||||
self.short_id.clone()
|
||||
}
|
||||
|
||||
fn dump(&self, _verbose: bool, _ctx: &RequestContext) -> Result<()> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DeltaFileName> for LayerDescriptor {
|
||||
fn from(value: DeltaFileName) -> Self {
|
||||
let short_id = value.to_string();
|
||||
LayerDescriptor {
|
||||
key: value.key_range,
|
||||
lsn: value.lsn_range,
|
||||
is_incremental: true,
|
||||
short_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ImageFileName> for LayerDescriptor {
|
||||
fn from(value: ImageFileName) -> Self {
|
||||
let short_id = value.to_string();
|
||||
let lsn = value.lsn_as_range();
|
||||
LayerDescriptor {
|
||||
key: value.key_range,
|
||||
lsn,
|
||||
is_incremental: false,
|
||||
short_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<LayerFileName> for LayerDescriptor {
|
||||
fn from(value: LayerFileName) -> Self {
|
||||
match value {
|
||||
LayerFileName::Delta(d) => Self::from(d),
|
||||
LayerFileName::Image(i) => Self::from(i),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -56,8 +56,8 @@ use utils::{
|
||||
};
|
||||
|
||||
use super::{
|
||||
DeltaFileName, Layer, LayerAccessStats, LayerAccessStatsReset, LayerFileName, LayerIter,
|
||||
LayerKeyIter, PathOrConf,
|
||||
DeltaFileName, Layer, LayerAccessStats, LayerAccessStatsReset, LayerIter, LayerKeyIter,
|
||||
PathOrConf, PersistentLayerDesc,
|
||||
};
|
||||
|
||||
///
|
||||
@@ -89,10 +89,10 @@ impl From<&DeltaLayer> for Summary {
|
||||
magic: DELTA_FILE_MAGIC,
|
||||
format_version: STORAGE_FORMAT_VERSION,
|
||||
|
||||
tenant_id: layer.tenant_id,
|
||||
timeline_id: layer.timeline_id,
|
||||
key_range: layer.key_range.clone(),
|
||||
lsn_range: layer.lsn_range.clone(),
|
||||
tenant_id: layer.desc.tenant_id,
|
||||
timeline_id: layer.desc.timeline_id,
|
||||
key_range: layer.desc.key_range.clone(),
|
||||
lsn_range: layer.desc.lsn_range.clone(),
|
||||
|
||||
index_start_blk: 0,
|
||||
index_root_blk: 0,
|
||||
@@ -180,10 +180,7 @@ impl DeltaKey {
|
||||
pub struct DeltaLayer {
|
||||
path_or_conf: PathOrConf,
|
||||
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
pub key_range: Range<Key>,
|
||||
pub lsn_range: Range<Lsn>,
|
||||
pub desc: PersistentLayerDesc,
|
||||
|
||||
pub file_size: u64,
|
||||
|
||||
@@ -197,8 +194,8 @@ impl std::fmt::Debug for DeltaLayer {
|
||||
use super::RangeDisplayDebug;
|
||||
|
||||
f.debug_struct("DeltaLayer")
|
||||
.field("key_range", &RangeDisplayDebug(&self.key_range))
|
||||
.field("lsn_range", &self.lsn_range)
|
||||
.field("key_range", &RangeDisplayDebug(&self.desc.key_range))
|
||||
.field("lsn_range", &self.desc.lsn_range)
|
||||
.field("file_size", &self.file_size)
|
||||
.field("inner", &self.inner)
|
||||
.finish()
|
||||
@@ -228,30 +225,16 @@ impl std::fmt::Debug for DeltaLayerInner {
|
||||
}
|
||||
|
||||
impl Layer for DeltaLayer {
|
||||
fn get_key_range(&self) -> Range<Key> {
|
||||
self.key_range.clone()
|
||||
}
|
||||
|
||||
fn get_lsn_range(&self) -> Range<Lsn> {
|
||||
self.lsn_range.clone()
|
||||
}
|
||||
fn is_incremental(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn short_id(&self) -> String {
|
||||
self.filename().file_name()
|
||||
}
|
||||
/// debugging function to print out the contents of the layer
|
||||
fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()> {
|
||||
println!(
|
||||
"----- delta layer for ten {} tli {} keys {}-{} lsn {}-{} ----",
|
||||
self.tenant_id,
|
||||
self.timeline_id,
|
||||
self.key_range.start,
|
||||
self.key_range.end,
|
||||
self.lsn_range.start,
|
||||
self.lsn_range.end
|
||||
self.desc.tenant_id,
|
||||
self.desc.timeline_id,
|
||||
self.desc.key_range.start,
|
||||
self.desc.key_range.end,
|
||||
self.desc.lsn_range.start,
|
||||
self.desc.lsn_range.end
|
||||
);
|
||||
|
||||
if !verbose {
|
||||
@@ -324,10 +307,10 @@ impl Layer for DeltaLayer {
|
||||
reconstruct_state: &mut ValueReconstructState,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<ValueReconstructResult> {
|
||||
ensure!(lsn_range.start >= self.lsn_range.start);
|
||||
ensure!(lsn_range.start >= self.desc.lsn_range.start);
|
||||
let mut need_image = true;
|
||||
|
||||
ensure!(self.key_range.contains(&key));
|
||||
ensure!(self.desc.key_range.contains(&key));
|
||||
|
||||
{
|
||||
// Open the file and lock the metadata in memory
|
||||
@@ -402,19 +385,31 @@ impl Layer for DeltaLayer {
|
||||
Ok(ValueReconstructResult::Complete)
|
||||
}
|
||||
}
|
||||
|
||||
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||
fn get_key_range(&self) -> Range<Key> {
|
||||
self.layer_desc().key_range.clone()
|
||||
}
|
||||
|
||||
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||
fn get_lsn_range(&self) -> Range<Lsn> {
|
||||
self.layer_desc().lsn_range.clone()
|
||||
}
|
||||
|
||||
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||
fn is_incremental(&self) -> bool {
|
||||
self.layer_desc().is_incremental
|
||||
}
|
||||
|
||||
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||
fn short_id(&self) -> String {
|
||||
self.layer_desc().short_id()
|
||||
}
|
||||
}
|
||||
|
||||
impl PersistentLayer for DeltaLayer {
|
||||
fn get_tenant_id(&self) -> TenantId {
|
||||
self.tenant_id
|
||||
}
|
||||
|
||||
fn get_timeline_id(&self) -> TimelineId {
|
||||
self.timeline_id
|
||||
}
|
||||
|
||||
fn filename(&self) -> LayerFileName {
|
||||
self.layer_name().into()
|
||||
fn layer_desc(&self) -> &PersistentLayerDesc {
|
||||
&self.desc
|
||||
}
|
||||
|
||||
fn local_path(&self) -> Option<PathBuf> {
|
||||
@@ -602,10 +597,12 @@ impl DeltaLayer {
|
||||
) -> DeltaLayer {
|
||||
DeltaLayer {
|
||||
path_or_conf: PathOrConf::Conf(conf),
|
||||
timeline_id,
|
||||
tenant_id,
|
||||
key_range: filename.key_range.clone(),
|
||||
lsn_range: filename.lsn_range.clone(),
|
||||
desc: PersistentLayerDesc::new_delta(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
filename.key_range.clone(),
|
||||
filename.lsn_range.clone(),
|
||||
),
|
||||
file_size,
|
||||
access_stats,
|
||||
inner: RwLock::new(DeltaLayerInner {
|
||||
@@ -632,10 +629,12 @@ impl DeltaLayer {
|
||||
|
||||
Ok(DeltaLayer {
|
||||
path_or_conf: PathOrConf::Path(path.to_path_buf()),
|
||||
timeline_id: summary.timeline_id,
|
||||
tenant_id: summary.tenant_id,
|
||||
key_range: summary.key_range,
|
||||
lsn_range: summary.lsn_range,
|
||||
desc: PersistentLayerDesc::new_delta(
|
||||
summary.tenant_id,
|
||||
summary.timeline_id,
|
||||
summary.key_range,
|
||||
summary.lsn_range,
|
||||
),
|
||||
file_size: metadata.len(),
|
||||
access_stats: LayerAccessStats::empty_will_record_residence_event_later(),
|
||||
inner: RwLock::new(DeltaLayerInner {
|
||||
@@ -648,18 +647,14 @@ impl DeltaLayer {
|
||||
}
|
||||
|
||||
fn layer_name(&self) -> DeltaFileName {
|
||||
DeltaFileName {
|
||||
key_range: self.key_range.clone(),
|
||||
lsn_range: self.lsn_range.clone(),
|
||||
}
|
||||
self.desc.delta_file_name()
|
||||
}
|
||||
|
||||
/// Path to the layer file in pageserver workdir.
|
||||
pub fn path(&self) -> PathBuf {
|
||||
Self::path_for(
|
||||
&self.path_or_conf,
|
||||
self.timeline_id,
|
||||
self.tenant_id,
|
||||
self.desc.timeline_id,
|
||||
self.desc.tenant_id,
|
||||
&self.layer_name(),
|
||||
)
|
||||
}
|
||||
@@ -803,10 +798,12 @@ impl DeltaLayerWriterInner {
|
||||
// set inner.file here. The first read will have to re-open it.
|
||||
let layer = DeltaLayer {
|
||||
path_or_conf: PathOrConf::Conf(self.conf),
|
||||
tenant_id: self.tenant_id,
|
||||
timeline_id: self.timeline_id,
|
||||
key_range: self.key_start..key_end,
|
||||
lsn_range: self.lsn_range.clone(),
|
||||
desc: PersistentLayerDesc::new_delta(
|
||||
self.tenant_id,
|
||||
self.timeline_id,
|
||||
self.key_start..key_end,
|
||||
self.lsn_range.clone(),
|
||||
),
|
||||
file_size: metadata.len(),
|
||||
access_stats: LayerAccessStats::empty_will_record_residence_event_later(),
|
||||
inner: RwLock::new(DeltaLayerInner {
|
||||
|
||||
@@ -9,6 +9,8 @@ use std::str::FromStr;
|
||||
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use super::PersistentLayerDesc;
|
||||
|
||||
// Note: Timeline::load_layer_map() relies on this sort order
|
||||
#[derive(PartialEq, Eq, Clone, Hash)]
|
||||
pub struct DeltaFileName {
|
||||
@@ -153,7 +155,7 @@ impl Ord for ImageFileName {
|
||||
impl ImageFileName {
|
||||
pub fn lsn_as_range(&self) -> Range<Lsn> {
|
||||
// Saves from having to copypaste this all over
|
||||
self.lsn..(self.lsn + 1)
|
||||
PersistentLayerDesc::image_layer_lsn_range(self.lsn)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -52,8 +52,8 @@ use utils::{
|
||||
lsn::Lsn,
|
||||
};
|
||||
|
||||
use super::filename::{ImageFileName, LayerFileName};
|
||||
use super::{Layer, LayerAccessStatsReset, LayerIter, PathOrConf};
|
||||
use super::filename::ImageFileName;
|
||||
use super::{Layer, LayerAccessStatsReset, LayerIter, PathOrConf, PersistentLayerDesc};
|
||||
|
||||
///
|
||||
/// Header stored in the beginning of the file
|
||||
@@ -84,9 +84,9 @@ impl From<&ImageLayer> for Summary {
|
||||
Self {
|
||||
magic: IMAGE_FILE_MAGIC,
|
||||
format_version: STORAGE_FORMAT_VERSION,
|
||||
tenant_id: layer.tenant_id,
|
||||
timeline_id: layer.timeline_id,
|
||||
key_range: layer.key_range.clone(),
|
||||
tenant_id: layer.desc.tenant_id,
|
||||
timeline_id: layer.desc.timeline_id,
|
||||
key_range: layer.desc.key_range.clone(),
|
||||
lsn: layer.lsn,
|
||||
|
||||
index_start_blk: 0,
|
||||
@@ -104,14 +104,13 @@ impl From<&ImageLayer> for Summary {
|
||||
/// and it needs to be loaded before using it in queries.
|
||||
pub struct ImageLayer {
|
||||
path_or_conf: PathOrConf,
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
pub key_range: Range<Key>,
|
||||
pub file_size: u64,
|
||||
|
||||
// This entry contains an image of all pages as of this LSN
|
||||
pub desc: PersistentLayerDesc,
|
||||
// This entry contains an image of all pages as of this LSN, should be the same as desc.lsn
|
||||
pub lsn: Lsn,
|
||||
|
||||
pub file_size: u64,
|
||||
|
||||
access_stats: LayerAccessStats,
|
||||
|
||||
inner: RwLock<ImageLayerInner>,
|
||||
@@ -122,7 +121,7 @@ impl std::fmt::Debug for ImageLayer {
|
||||
use super::RangeDisplayDebug;
|
||||
|
||||
f.debug_struct("ImageLayer")
|
||||
.field("key_range", &RangeDisplayDebug(&self.key_range))
|
||||
.field("key_range", &RangeDisplayDebug(&self.desc.key_range))
|
||||
.field("file_size", &self.file_size)
|
||||
.field("lsn", &self.lsn)
|
||||
.field("inner", &self.inner)
|
||||
@@ -153,27 +152,15 @@ impl std::fmt::Debug for ImageLayerInner {
|
||||
}
|
||||
|
||||
impl Layer for ImageLayer {
|
||||
fn get_key_range(&self) -> Range<Key> {
|
||||
self.key_range.clone()
|
||||
}
|
||||
|
||||
fn get_lsn_range(&self) -> Range<Lsn> {
|
||||
// End-bound is exclusive
|
||||
self.lsn..(self.lsn + 1)
|
||||
}
|
||||
fn is_incremental(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn short_id(&self) -> String {
|
||||
self.filename().file_name()
|
||||
}
|
||||
|
||||
/// debugging function to print out the contents of the layer
|
||||
fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()> {
|
||||
println!(
|
||||
"----- image layer for ten {} tli {} key {}-{} at {} ----",
|
||||
self.tenant_id, self.timeline_id, self.key_range.start, self.key_range.end, self.lsn
|
||||
self.desc.tenant_id,
|
||||
self.desc.timeline_id,
|
||||
self.desc.key_range.start,
|
||||
self.desc.key_range.end,
|
||||
self.lsn
|
||||
);
|
||||
|
||||
if !verbose {
|
||||
@@ -203,7 +190,7 @@ impl Layer for ImageLayer {
|
||||
reconstruct_state: &mut ValueReconstructState,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<ValueReconstructResult> {
|
||||
assert!(self.key_range.contains(&key));
|
||||
assert!(self.desc.key_range.contains(&key));
|
||||
assert!(lsn_range.start >= self.lsn);
|
||||
assert!(lsn_range.end >= self.lsn);
|
||||
|
||||
@@ -230,24 +217,37 @@ impl Layer for ImageLayer {
|
||||
Ok(ValueReconstructResult::Missing)
|
||||
}
|
||||
}
|
||||
|
||||
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||
fn get_key_range(&self) -> Range<Key> {
|
||||
self.layer_desc().key_range.clone()
|
||||
}
|
||||
|
||||
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||
fn get_lsn_range(&self) -> Range<Lsn> {
|
||||
self.layer_desc().lsn_range.clone()
|
||||
}
|
||||
|
||||
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||
fn is_incremental(&self) -> bool {
|
||||
self.layer_desc().is_incremental
|
||||
}
|
||||
|
||||
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||
fn short_id(&self) -> String {
|
||||
self.layer_desc().short_id()
|
||||
}
|
||||
}
|
||||
|
||||
impl PersistentLayer for ImageLayer {
|
||||
fn filename(&self) -> LayerFileName {
|
||||
self.layer_name().into()
|
||||
fn layer_desc(&self) -> &PersistentLayerDesc {
|
||||
&self.desc
|
||||
}
|
||||
|
||||
fn local_path(&self) -> Option<PathBuf> {
|
||||
Some(self.path())
|
||||
}
|
||||
|
||||
fn get_tenant_id(&self) -> TenantId {
|
||||
self.tenant_id
|
||||
}
|
||||
|
||||
fn get_timeline_id(&self) -> TimelineId {
|
||||
self.timeline_id
|
||||
}
|
||||
fn iter(&self, _ctx: &RequestContext) -> Result<LayerIter<'_>> {
|
||||
unimplemented!();
|
||||
}
|
||||
@@ -405,9 +405,13 @@ impl ImageLayer {
|
||||
) -> ImageLayer {
|
||||
ImageLayer {
|
||||
path_or_conf: PathOrConf::Conf(conf),
|
||||
timeline_id,
|
||||
tenant_id,
|
||||
key_range: filename.key_range.clone(),
|
||||
desc: PersistentLayerDesc::new_img(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
filename.key_range.clone(),
|
||||
filename.lsn,
|
||||
false,
|
||||
), // Now we assume image layer ALWAYS covers the full range. This may change in the future.
|
||||
lsn: filename.lsn,
|
||||
file_size,
|
||||
access_stats,
|
||||
@@ -433,9 +437,13 @@ impl ImageLayer {
|
||||
.context("get file metadata to determine size")?;
|
||||
Ok(ImageLayer {
|
||||
path_or_conf: PathOrConf::Path(path.to_path_buf()),
|
||||
timeline_id: summary.timeline_id,
|
||||
tenant_id: summary.tenant_id,
|
||||
key_range: summary.key_range,
|
||||
desc: PersistentLayerDesc::new_img(
|
||||
summary.tenant_id,
|
||||
summary.timeline_id,
|
||||
summary.key_range,
|
||||
summary.lsn,
|
||||
false,
|
||||
), // Now we assume image layer ALWAYS covers the full range. This may change in the future.
|
||||
lsn: summary.lsn,
|
||||
file_size: metadata.len(),
|
||||
access_stats: LayerAccessStats::empty_will_record_residence_event_later(),
|
||||
@@ -449,18 +457,15 @@ impl ImageLayer {
|
||||
}
|
||||
|
||||
fn layer_name(&self) -> ImageFileName {
|
||||
ImageFileName {
|
||||
key_range: self.key_range.clone(),
|
||||
lsn: self.lsn,
|
||||
}
|
||||
self.desc.image_file_name()
|
||||
}
|
||||
|
||||
/// Path to the layer file in pageserver workdir.
|
||||
pub fn path(&self) -> PathBuf {
|
||||
Self::path_for(
|
||||
&self.path_or_conf,
|
||||
self.timeline_id,
|
||||
self.tenant_id,
|
||||
self.desc.timeline_id,
|
||||
self.desc.tenant_id,
|
||||
&self.layer_name(),
|
||||
)
|
||||
}
|
||||
@@ -484,6 +489,7 @@ struct ImageLayerWriterInner {
|
||||
tenant_id: TenantId,
|
||||
key_range: Range<Key>,
|
||||
lsn: Lsn,
|
||||
is_incremental: bool,
|
||||
|
||||
blob_writer: WriteBlobWriter<VirtualFile>,
|
||||
tree: DiskBtreeBuilder<BlockBuf, KEY_SIZE>,
|
||||
@@ -499,6 +505,7 @@ impl ImageLayerWriterInner {
|
||||
tenant_id: TenantId,
|
||||
key_range: &Range<Key>,
|
||||
lsn: Lsn,
|
||||
is_incremental: bool,
|
||||
) -> anyhow::Result<Self> {
|
||||
// Create the file initially with a temporary filename.
|
||||
// We'll atomically rename it to the final name when we're done.
|
||||
@@ -533,6 +540,7 @@ impl ImageLayerWriterInner {
|
||||
lsn,
|
||||
tree: tree_builder,
|
||||
blob_writer,
|
||||
is_incremental,
|
||||
};
|
||||
|
||||
Ok(writer)
|
||||
@@ -570,6 +578,14 @@ impl ImageLayerWriterInner {
|
||||
file.write_all(buf.as_ref())?;
|
||||
}
|
||||
|
||||
let desc = PersistentLayerDesc::new_img(
|
||||
self.tenant_id,
|
||||
self.timeline_id,
|
||||
self.key_range.clone(),
|
||||
self.lsn,
|
||||
self.is_incremental, // for now, image layer ALWAYS covers the full range
|
||||
);
|
||||
|
||||
// Fill in the summary on blk 0
|
||||
let summary = Summary {
|
||||
magic: IMAGE_FILE_MAGIC,
|
||||
@@ -593,9 +609,7 @@ impl ImageLayerWriterInner {
|
||||
// set inner.file here. The first read will have to re-open it.
|
||||
let layer = ImageLayer {
|
||||
path_or_conf: PathOrConf::Conf(self.conf),
|
||||
timeline_id: self.timeline_id,
|
||||
tenant_id: self.tenant_id,
|
||||
key_range: self.key_range.clone(),
|
||||
desc,
|
||||
lsn: self.lsn,
|
||||
file_size: metadata.len(),
|
||||
access_stats: LayerAccessStats::empty_will_record_residence_event_later(),
|
||||
@@ -667,6 +681,7 @@ impl ImageLayerWriter {
|
||||
tenant_id: TenantId,
|
||||
key_range: &Range<Key>,
|
||||
lsn: Lsn,
|
||||
is_incremental: bool,
|
||||
) -> anyhow::Result<ImageLayerWriter> {
|
||||
Ok(Self {
|
||||
inner: Some(ImageLayerWriterInner::new(
|
||||
@@ -675,6 +690,7 @@ impl ImageLayerWriter {
|
||||
tenant_id,
|
||||
key_range,
|
||||
lsn,
|
||||
is_incremental,
|
||||
)?),
|
||||
})
|
||||
}
|
||||
|
||||
109
pageserver/src/tenant/storage_layer/layer_desc.rs
Normal file
109
pageserver/src/tenant/storage_layer/layer_desc.rs
Normal file
@@ -0,0 +1,109 @@
|
||||
use std::ops::Range;
|
||||
use utils::{
|
||||
id::{TenantId, TimelineId},
|
||||
lsn::Lsn,
|
||||
};
|
||||
|
||||
use crate::repository::Key;
|
||||
|
||||
use super::{DeltaFileName, ImageFileName, LayerFileName};
|
||||
|
||||
/// A unique identifier of a persistent layer. This is different from `LayerDescriptor`, which is only used in the
|
||||
/// benchmarks. This struct contains all necessary information to find the image / delta layer. It also provides
|
||||
/// a unified way to generate layer information like file name.
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
pub struct PersistentLayerDesc {
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
pub key_range: Range<Key>,
|
||||
/// For image layer, this is `[lsn, lsn+1)`.
|
||||
pub lsn_range: Range<Lsn>,
|
||||
/// Whether this is a delta layer.
|
||||
pub is_delta: bool,
|
||||
/// Whether this layer only contains page images for part of the keys in the range. In the current implementation, this should
|
||||
/// always be equal to `is_delta`. If we land the partial image layer PR someday, image layer could also be
|
||||
/// incremental.
|
||||
pub is_incremental: bool,
|
||||
}
|
||||
|
||||
impl PersistentLayerDesc {
|
||||
pub fn short_id(&self) -> String {
|
||||
self.filename().file_name()
|
||||
}
|
||||
|
||||
pub fn new_img(
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
key_range: Range<Key>,
|
||||
lsn: Lsn,
|
||||
is_incremental: bool,
|
||||
) -> Self {
|
||||
Self {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
key_range,
|
||||
lsn_range: Self::image_layer_lsn_range(lsn),
|
||||
is_delta: false,
|
||||
is_incremental,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_delta(
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
key_range: Range<Key>,
|
||||
lsn_range: Range<Lsn>,
|
||||
) -> Self {
|
||||
Self {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
key_range,
|
||||
lsn_range,
|
||||
is_delta: true,
|
||||
is_incremental: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the LSN that the image layer covers.
|
||||
pub fn image_layer_lsn(&self) -> Lsn {
|
||||
assert!(!self.is_delta);
|
||||
assert!(self.lsn_range.start + 1 == self.lsn_range.end);
|
||||
self.lsn_range.start
|
||||
}
|
||||
|
||||
/// Get the LSN range corresponding to a single image layer LSN.
|
||||
pub fn image_layer_lsn_range(lsn: Lsn) -> Range<Lsn> {
|
||||
lsn..(lsn + 1)
|
||||
}
|
||||
|
||||
/// Get a delta file name for this layer.
|
||||
///
|
||||
/// Panic: if this is not a delta layer.
|
||||
pub fn delta_file_name(&self) -> DeltaFileName {
|
||||
assert!(self.is_delta);
|
||||
DeltaFileName {
|
||||
key_range: self.key_range.clone(),
|
||||
lsn_range: self.lsn_range.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a delta file name for this layer.
|
||||
///
|
||||
/// Panic: if this is not an image layer, or the lsn range is invalid
|
||||
pub fn image_file_name(&self) -> ImageFileName {
|
||||
assert!(!self.is_delta);
|
||||
assert!(self.lsn_range.start + 1 == self.lsn_range.end);
|
||||
ImageFileName {
|
||||
key_range: self.key_range.clone(),
|
||||
lsn: self.lsn_range.start,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn filename(&self) -> LayerFileName {
|
||||
if self.is_delta {
|
||||
self.delta_file_name().into()
|
||||
} else {
|
||||
self.image_file_name().into()
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -18,11 +18,10 @@ use utils::{
|
||||
lsn::Lsn,
|
||||
};
|
||||
|
||||
use super::filename::{DeltaFileName, ImageFileName, LayerFileName};
|
||||
use super::image_layer::ImageLayer;
|
||||
use super::filename::{DeltaFileName, ImageFileName};
|
||||
use super::{
|
||||
DeltaLayer, LayerAccessStats, LayerAccessStatsReset, LayerIter, LayerKeyIter,
|
||||
LayerResidenceStatus, PersistentLayer,
|
||||
DeltaLayer, ImageLayer, LayerAccessStats, LayerAccessStatsReset, LayerIter, LayerKeyIter,
|
||||
LayerResidenceStatus, PersistentLayer, PersistentLayerDesc,
|
||||
};
|
||||
|
||||
/// RemoteLayer is a not yet downloaded [`ImageLayer`] or
|
||||
@@ -34,19 +33,10 @@ use super::{
|
||||
///
|
||||
/// See: [`crate::context::RequestContext`] for authorization to download
|
||||
pub struct RemoteLayer {
|
||||
tenantid: TenantId,
|
||||
timelineid: TimelineId,
|
||||
key_range: Range<Key>,
|
||||
lsn_range: Range<Lsn>,
|
||||
|
||||
pub file_name: LayerFileName,
|
||||
pub desc: PersistentLayerDesc,
|
||||
|
||||
pub layer_metadata: LayerFileMetadata,
|
||||
|
||||
is_delta: bool,
|
||||
|
||||
is_incremental: bool,
|
||||
|
||||
access_stats: LayerAccessStats,
|
||||
|
||||
pub(crate) ongoing_download: Arc<tokio::sync::Semaphore>,
|
||||
@@ -66,22 +56,14 @@ pub struct RemoteLayer {
|
||||
impl std::fmt::Debug for RemoteLayer {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("RemoteLayer")
|
||||
.field("file_name", &self.file_name)
|
||||
.field("file_name", &self.desc.filename())
|
||||
.field("layer_metadata", &self.layer_metadata)
|
||||
.field("is_incremental", &self.is_incremental)
|
||||
.field("is_incremental", &self.desc.is_incremental)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl Layer for RemoteLayer {
|
||||
fn get_key_range(&self) -> Range<Key> {
|
||||
self.key_range.clone()
|
||||
}
|
||||
|
||||
fn get_lsn_range(&self) -> Range<Lsn> {
|
||||
self.lsn_range.clone()
|
||||
}
|
||||
|
||||
fn get_value_reconstruct_data(
|
||||
&self,
|
||||
_key: Key,
|
||||
@@ -95,53 +77,45 @@ impl Layer for RemoteLayer {
|
||||
);
|
||||
}
|
||||
|
||||
fn is_incremental(&self) -> bool {
|
||||
self.is_incremental
|
||||
}
|
||||
|
||||
/// debugging function to print out the contents of the layer
|
||||
fn dump(&self, _verbose: bool, _ctx: &RequestContext) -> Result<()> {
|
||||
println!(
|
||||
"----- remote layer for ten {} tli {} keys {}-{} lsn {}-{} ----",
|
||||
self.tenantid,
|
||||
self.timelineid,
|
||||
self.key_range.start,
|
||||
self.key_range.end,
|
||||
self.lsn_range.start,
|
||||
self.lsn_range.end
|
||||
self.desc.tenant_id,
|
||||
self.desc.timeline_id,
|
||||
self.desc.key_range.start,
|
||||
self.desc.key_range.end,
|
||||
self.desc.lsn_range.start,
|
||||
self.desc.lsn_range.end
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||
fn get_key_range(&self) -> Range<Key> {
|
||||
self.layer_desc().key_range.clone()
|
||||
}
|
||||
|
||||
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||
fn get_lsn_range(&self) -> Range<Lsn> {
|
||||
self.layer_desc().lsn_range.clone()
|
||||
}
|
||||
|
||||
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||
fn is_incremental(&self) -> bool {
|
||||
self.layer_desc().is_incremental
|
||||
}
|
||||
|
||||
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||
fn short_id(&self) -> String {
|
||||
self.filename().file_name()
|
||||
self.layer_desc().short_id()
|
||||
}
|
||||
}
|
||||
|
||||
impl PersistentLayer for RemoteLayer {
|
||||
fn get_tenant_id(&self) -> TenantId {
|
||||
self.tenantid
|
||||
}
|
||||
|
||||
fn get_timeline_id(&self) -> TimelineId {
|
||||
self.timelineid
|
||||
}
|
||||
|
||||
fn filename(&self) -> LayerFileName {
|
||||
if self.is_delta {
|
||||
DeltaFileName {
|
||||
key_range: self.key_range.clone(),
|
||||
lsn_range: self.lsn_range.clone(),
|
||||
}
|
||||
.into()
|
||||
} else {
|
||||
ImageFileName {
|
||||
key_range: self.key_range.clone(),
|
||||
lsn: self.lsn_range.start,
|
||||
}
|
||||
.into()
|
||||
}
|
||||
fn layer_desc(&self) -> &PersistentLayerDesc {
|
||||
&self.desc
|
||||
}
|
||||
|
||||
fn local_path(&self) -> Option<PathBuf> {
|
||||
@@ -176,7 +150,7 @@ impl PersistentLayer for RemoteLayer {
|
||||
let layer_file_name = self.filename().file_name();
|
||||
let lsn_range = self.get_lsn_range();
|
||||
|
||||
if self.is_delta {
|
||||
if self.desc.is_delta {
|
||||
HistoricLayerInfo::Delta {
|
||||
layer_file_name,
|
||||
layer_file_size: self.layer_metadata.file_size(),
|
||||
@@ -210,13 +184,13 @@ impl RemoteLayer {
|
||||
access_stats: LayerAccessStats,
|
||||
) -> RemoteLayer {
|
||||
RemoteLayer {
|
||||
tenantid,
|
||||
timelineid,
|
||||
key_range: fname.key_range.clone(),
|
||||
lsn_range: fname.lsn_as_range(),
|
||||
is_delta: false,
|
||||
is_incremental: false,
|
||||
file_name: fname.to_owned().into(),
|
||||
desc: PersistentLayerDesc::new_img(
|
||||
tenantid,
|
||||
timelineid,
|
||||
fname.key_range.clone(),
|
||||
fname.lsn,
|
||||
false,
|
||||
),
|
||||
layer_metadata: layer_metadata.clone(),
|
||||
ongoing_download: Arc::new(tokio::sync::Semaphore::new(1)),
|
||||
download_replacement_failure: std::sync::atomic::AtomicBool::default(),
|
||||
@@ -232,13 +206,12 @@ impl RemoteLayer {
|
||||
access_stats: LayerAccessStats,
|
||||
) -> RemoteLayer {
|
||||
RemoteLayer {
|
||||
tenantid,
|
||||
timelineid,
|
||||
key_range: fname.key_range.clone(),
|
||||
lsn_range: fname.lsn_range.clone(),
|
||||
is_delta: true,
|
||||
is_incremental: true,
|
||||
file_name: fname.to_owned().into(),
|
||||
desc: PersistentLayerDesc::new_delta(
|
||||
tenantid,
|
||||
timelineid,
|
||||
fname.key_range.clone(),
|
||||
fname.lsn_range.clone(),
|
||||
),
|
||||
layer_metadata: layer_metadata.clone(),
|
||||
ongoing_download: Arc::new(tokio::sync::Semaphore::new(1)),
|
||||
download_replacement_failure: std::sync::atomic::AtomicBool::default(),
|
||||
@@ -256,15 +229,12 @@ impl RemoteLayer {
|
||||
where
|
||||
L: ?Sized + Layer,
|
||||
{
|
||||
if self.is_delta {
|
||||
let fname = DeltaFileName {
|
||||
key_range: self.key_range.clone(),
|
||||
lsn_range: self.lsn_range.clone(),
|
||||
};
|
||||
if self.desc.is_delta {
|
||||
let fname = self.desc.delta_file_name();
|
||||
Arc::new(DeltaLayer::new(
|
||||
conf,
|
||||
self.timelineid,
|
||||
self.tenantid,
|
||||
self.desc.timeline_id,
|
||||
self.desc.tenant_id,
|
||||
&fname,
|
||||
file_size,
|
||||
self.access_stats.clone_for_residence_change(
|
||||
@@ -273,14 +243,11 @@ impl RemoteLayer {
|
||||
),
|
||||
))
|
||||
} else {
|
||||
let fname = ImageFileName {
|
||||
key_range: self.key_range.clone(),
|
||||
lsn: self.lsn_range.start,
|
||||
};
|
||||
let fname = self.desc.image_file_name();
|
||||
Arc::new(ImageLayer::new(
|
||||
conf,
|
||||
self.timelineid,
|
||||
self.tenantid,
|
||||
self.desc.timeline_id,
|
||||
self.desc.tenant_id,
|
||||
&fname,
|
||||
file_size,
|
||||
self.access_stats.clone_for_residence_change(
|
||||
|
||||
@@ -525,7 +525,12 @@ impl Timeline {
|
||||
Some((cached_lsn, cached_img)) => {
|
||||
match cached_lsn.cmp(&lsn) {
|
||||
Ordering::Less => {} // there might be WAL between cached_lsn and lsn, we need to check
|
||||
Ordering::Equal => return Ok(cached_img), // exact LSN match, return the image
|
||||
Ordering::Equal => {
|
||||
self.metrics
|
||||
.materialized_page_cache_hit_upon_request_counter
|
||||
.inc();
|
||||
return Ok(cached_img); // exact LSN match, return the image
|
||||
}
|
||||
Ordering::Greater => {
|
||||
unreachable!("the returned lsn should never be after the requested lsn")
|
||||
}
|
||||
@@ -540,8 +545,10 @@ impl Timeline {
|
||||
img: cached_page_img,
|
||||
};
|
||||
|
||||
let timer = self.metrics.get_reconstruct_data_time_histo.start_timer();
|
||||
self.get_reconstruct_data(key, lsn, &mut reconstruct_state, ctx)
|
||||
.await?;
|
||||
timer.stop_and_record();
|
||||
|
||||
self.metrics
|
||||
.reconstruct_time_histo
|
||||
@@ -2261,6 +2268,9 @@ impl Timeline {
|
||||
let mut timeline_owned;
|
||||
let mut timeline = self;
|
||||
|
||||
let mut read_count =
|
||||
scopeguard::guard(0, |cnt| self.metrics.read_num_fs_layers.observe(cnt as f64));
|
||||
|
||||
// For debugging purposes, collect the path of layers that we traversed
|
||||
// through. It's included in the error message if we fail to find the key.
|
||||
let mut traversal_path = Vec::<TraversalPathItem>::new();
|
||||
@@ -2395,6 +2405,7 @@ impl Timeline {
|
||||
Err(e) => return Err(PageReconstructError::from(e)),
|
||||
};
|
||||
cont_lsn = lsn_floor;
|
||||
// metrics: open_layer does not count as fs access, so we are not updating `read_count`
|
||||
traversal_path.push((
|
||||
result,
|
||||
cont_lsn,
|
||||
@@ -2421,6 +2432,7 @@ impl Timeline {
|
||||
Err(e) => return Err(PageReconstructError::from(e)),
|
||||
};
|
||||
cont_lsn = lsn_floor;
|
||||
// metrics: open_layer does not count as fs access, so we are not updating `read_count`
|
||||
traversal_path.push((
|
||||
result,
|
||||
cont_lsn,
|
||||
@@ -2455,6 +2467,7 @@ impl Timeline {
|
||||
Err(e) => return Err(PageReconstructError::from(e)),
|
||||
};
|
||||
cont_lsn = lsn_floor;
|
||||
*read_count += 1;
|
||||
traversal_path.push((
|
||||
result,
|
||||
cont_lsn,
|
||||
@@ -2520,7 +2533,7 @@ impl Timeline {
|
||||
(DownloadBehavior::Error, false) => {
|
||||
return Err(PageReconstructError::NeedsDownload(
|
||||
TenantTimelineId::new(self.tenant_id, self.timeline_id),
|
||||
remote_layer.file_name.clone(),
|
||||
remote_layer.filename(),
|
||||
))
|
||||
}
|
||||
}
|
||||
@@ -3053,6 +3066,7 @@ impl Timeline {
|
||||
self.tenant_id,
|
||||
&img_range,
|
||||
lsn,
|
||||
false, // image layer always covers the full range
|
||||
)?;
|
||||
|
||||
fail_point!("image-layer-writer-fail-before-finish", |_| {
|
||||
@@ -4113,7 +4127,7 @@ impl Timeline {
|
||||
// Does retries + exponential back-off internally.
|
||||
// When this fails, don't layer further retry attempts here.
|
||||
let result = remote_client
|
||||
.download_layer_file(&remote_layer.file_name, &remote_layer.layer_metadata)
|
||||
.download_layer_file(&remote_layer.filename(), &remote_layer.layer_metadata)
|
||||
.await;
|
||||
|
||||
if let Ok(size) = &result {
|
||||
|
||||
@@ -17,7 +17,7 @@ use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use utils::{project_git_version, sentry_init::init_sentry};
|
||||
|
||||
use tracing::{error, info, warn};
|
||||
use tracing::{error, info, warn, Instrument};
|
||||
|
||||
project_git_version!(GIT_VERSION);
|
||||
|
||||
@@ -141,7 +141,6 @@ async fn task_main(
|
||||
tokio::select! {
|
||||
accept_result = listener.accept() => {
|
||||
let (socket, peer_addr) = accept_result?;
|
||||
info!("accepted postgres client connection from {peer_addr}");
|
||||
|
||||
let session_id = uuid::Uuid::new_v4();
|
||||
let tls_config = Arc::clone(&tls_config);
|
||||
@@ -149,18 +148,18 @@ async fn task_main(
|
||||
|
||||
connections.spawn(
|
||||
async move {
|
||||
info!("spawned a task for {peer_addr}");
|
||||
|
||||
socket
|
||||
.set_nodelay(true)
|
||||
.context("failed to set socket option")?;
|
||||
|
||||
handle_client(dest_suffix, tls_config, session_id, socket).await
|
||||
info!(%peer_addr, "serving");
|
||||
handle_client(dest_suffix, tls_config, socket).await
|
||||
}
|
||||
.unwrap_or_else(|e| {
|
||||
// Acknowledge that the task has finished with an error.
|
||||
error!("per-client task finished with an error: {e:#}");
|
||||
}),
|
||||
})
|
||||
.instrument(tracing::info_span!("handle_client", ?session_id))
|
||||
);
|
||||
}
|
||||
_ = cancellation_token.cancelled() => {
|
||||
@@ -192,7 +191,6 @@ async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
let mut stream = PqStream::new(Stream::from_raw(raw_stream));
|
||||
|
||||
let msg = stream.read_startup_packet().await?;
|
||||
info!("received {msg:?}");
|
||||
use pq_proto::FeStartupPacket::*;
|
||||
|
||||
match msg {
|
||||
@@ -215,15 +213,19 @@ async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
}
|
||||
Ok(raw.upgrade(tls_config).await?)
|
||||
}
|
||||
_ => stream.throw_error_str(ERR_INSECURE_CONNECTION).await?,
|
||||
unexpected => {
|
||||
info!(
|
||||
?unexpected,
|
||||
"unexpected startup packet, rejecting connection"
|
||||
);
|
||||
stream.throw_error_str(ERR_INSECURE_CONNECTION).await?
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(fields(session_id = ?session_id), skip_all)]
|
||||
async fn handle_client(
|
||||
dest_suffix: Arc<String>,
|
||||
tls_config: Arc<rustls::ServerConfig>,
|
||||
session_id: uuid::Uuid,
|
||||
stream: impl AsyncRead + AsyncWrite + Unpin,
|
||||
) -> anyhow::Result<()> {
|
||||
let tls_stream = ssl_handshake(stream, tls_config).await?;
|
||||
|
||||
@@ -65,12 +65,19 @@ PAGESERVER_PER_TENANT_METRICS: Tuple[str, ...] = (
|
||||
"pageserver_getpage_reconstruct_seconds_bucket",
|
||||
"pageserver_getpage_reconstruct_seconds_count",
|
||||
"pageserver_getpage_reconstruct_seconds_sum",
|
||||
"pageserver_getpage_get_reconstruct_data_seconds_bucket",
|
||||
"pageserver_getpage_get_reconstruct_data_seconds_count",
|
||||
"pageserver_getpage_get_reconstruct_data_seconds_sum",
|
||||
"pageserver_io_operations_bytes_total",
|
||||
"pageserver_io_operations_seconds_bucket",
|
||||
"pageserver_io_operations_seconds_count",
|
||||
"pageserver_io_operations_seconds_sum",
|
||||
"pageserver_last_record_lsn",
|
||||
"pageserver_materialized_cache_hits_total",
|
||||
"pageserver_materialized_cache_hits_direct_total",
|
||||
"pageserver_read_num_fs_layers_bucket",
|
||||
"pageserver_read_num_fs_layers_count",
|
||||
"pageserver_read_num_fs_layers_sum",
|
||||
"pageserver_smgr_query_seconds_bucket",
|
||||
"pageserver_smgr_query_seconds_count",
|
||||
"pageserver_smgr_query_seconds_sum",
|
||||
|
||||
@@ -37,6 +37,7 @@ class PgSniRouter(PgProtocol):
|
||||
destination: str,
|
||||
tls_cert: Path,
|
||||
tls_key: Path,
|
||||
test_output_dir: Path,
|
||||
):
|
||||
# Must use a hostname rather than IP here, for SNI to work
|
||||
host = "localhost"
|
||||
@@ -49,6 +50,7 @@ class PgSniRouter(PgProtocol):
|
||||
self.tls_cert = tls_cert
|
||||
self.tls_key = tls_key
|
||||
self._popen: Optional[subprocess.Popen[bytes]] = None
|
||||
self.test_output_dir = test_output_dir
|
||||
|
||||
def start(self) -> "PgSniRouter":
|
||||
assert self._popen is None
|
||||
@@ -60,8 +62,12 @@ class PgSniRouter(PgProtocol):
|
||||
*["--destination", self.destination],
|
||||
]
|
||||
|
||||
self._popen = subprocess.Popen(args)
|
||||
router_log_path = self.test_output_dir / "pg_sni_router.log"
|
||||
router_log = open(router_log_path, "w")
|
||||
|
||||
self._popen = subprocess.Popen(args, stderr=router_log)
|
||||
self._wait_until_ready()
|
||||
log.info(f"pg_sni_router started, log file: {router_log_path}")
|
||||
return self
|
||||
|
||||
@backoff.on_exception(backoff.expo, OSError, max_time=10)
|
||||
@@ -121,6 +127,7 @@ def test_pg_sni_router(
|
||||
destination="localtest.me",
|
||||
tls_cert=test_output_dir / "router.crt",
|
||||
tls_key=test_output_dir / "router.key",
|
||||
test_output_dir=test_output_dir,
|
||||
) as router:
|
||||
router.start()
|
||||
|
||||
|
||||
@@ -437,12 +437,22 @@ def test_delete_timeline_client_hangup(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
wait_until(50, 0.1, got_hangup_log_message)
|
||||
|
||||
# ok, retry without failpoint, it should succeed
|
||||
# check that the timeline is still present
|
||||
ps_http.timeline_detail(env.initial_tenant, child_timeline_id)
|
||||
|
||||
# ok, disable the failpoint to let the deletion finish
|
||||
ps_http.configure_failpoints((failpoint_name, "off"))
|
||||
|
||||
# this should succeed
|
||||
ps_http.timeline_delete(env.initial_tenant, child_timeline_id, timeout=2)
|
||||
# the second call will try to transition the timeline into Stopping state, but it's already in that state
|
||||
env.pageserver.allowed_errors.append(
|
||||
f".*{child_timeline_id}.*Ignoring new state, equal to the existing one: Stopping"
|
||||
)
|
||||
def first_request_finished():
|
||||
message = f".*DELETE.*{child_timeline_id}.*Cancelled request finished"
|
||||
assert env.pageserver.log_contains(message)
|
||||
|
||||
wait_until(50, 0.1, first_request_finished)
|
||||
|
||||
# check that the timeline is gone
|
||||
notfound_message = f"Timeline {env.initial_tenant}/{child_timeline_id} was not found"
|
||||
env.pageserver.allowed_errors.append(".*" + notfound_message)
|
||||
with pytest.raises(PageserverApiException, match=notfound_message) as exc:
|
||||
ps_http.timeline_detail(env.initial_tenant, child_timeline_id)
|
||||
|
||||
assert exc.value.status_code == 404
|
||||
|
||||
Reference in New Issue
Block a user