Files
neon/pageserver/src/tenant/storage_layer/layer/tests.rs
Vlad Lazar 07b974480c pageserver: move things around to prepare for decoding logic (#9504)
## Problem

We wish to have high level WAL decoding logic in `wal_decoder::decoder`
module.

## Summary of Changes

For this we need the `Value` and `NeonWalRecord` types accessible there, so:
1. Move `Value` and `NeonWalRecord` to `pageserver::value` and
`pageserver::record` respectively.
2. Get rid of `pageserver::repository` (follow up from (1))
3. Move PG specific WAL record types to `postgres_ffi::walrecord`. In
theory they could live in `wal_decoder`, but it would create a circular
dependency between `wal_decoder` and `postgres_ffi`. Long term it makes
sense for those types to be PG version specific, so that will work out nicely.
4. Move higher level WAL record types (to be ingested by pageserver)
into `wal_decoder::models`

Related: https://github.com/neondatabase/neon/issues/9335
Epic: https://github.com/neondatabase/neon/issues/9329
2024-10-29 10:00:34 +00:00

1049 lines
36 KiB
Rust

use std::time::UNIX_EPOCH;
use pageserver_api::key::CONTROLFILE_KEY;
use tokio::task::JoinSet;
use utils::{
completion::{self, Completion},
id::TimelineId,
};
use super::failpoints::{Failpoint, FailpointKind};
use super::*;
use crate::{context::DownloadBehavior, tenant::storage_layer::LayerVisibilityHint};
use crate::{task_mgr::TaskKind, tenant::harness::TenantHarness};
/// Used in tests to advance a future to wanted await point, and not futher.
const ADVANCE: std::time::Duration = std::time::Duration::from_secs(3600);
/// Used in tests to indicate forever long timeout; has to be longer than the amount of ADVANCE
/// timeout uses to advance futures.
const FOREVER: std::time::Duration = std::time::Duration::from_secs(ADVANCE.as_secs() * 24 * 7);
/// Demonstrate the API and resident -> evicted -> resident -> deleted transitions.
#[tokio::test]
async fn smoke_test() {
let handle = tokio::runtime::Handle::current();
let h = TenantHarness::create("smoke_test").await.unwrap();
let span = h.span();
let download_span = span.in_scope(|| tracing::info_span!("downloading", timeline_id = 1));
let (tenant, _) = h.load().await;
let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Download);
let timeline = tenant
.create_test_timeline(TimelineId::generate(), Lsn(0x10), 14, &ctx)
.await
.unwrap();
let layer = {
let mut layers = {
let layers = timeline.layers.read().await;
layers.likely_resident_layers().cloned().collect::<Vec<_>>()
};
assert_eq!(layers.len(), 1);
layers.swap_remove(0)
};
// all layers created at pageserver are like `layer`, initialized with strong
// Arc<DownloadedLayer>.
let controlfile_keyspace = KeySpace {
ranges: vec![CONTROLFILE_KEY..CONTROLFILE_KEY.next()],
};
let img_before = {
let mut data = ValuesReconstructState::default();
layer
.get_values_reconstruct_data(
controlfile_keyspace.clone(),
Lsn(0x10)..Lsn(0x11),
&mut data,
&ctx,
)
.await
.unwrap();
data.keys
.remove(&CONTROLFILE_KEY)
.expect("must be present")
.expect("should not error")
.img
.take()
.expect("tenant harness writes the control file")
};
// important part is evicting the layer, which can be done when there are no more ResidentLayer
// instances -- there currently are none, only two `Layer` values, one in the layermap and on
// in scope.
layer.evict_and_wait(FOREVER).await.unwrap();
// double-evict returns an error, which is valid if both eviction_task and disk usage based
// eviction would both evict the same layer at the same time.
let e = layer.evict_and_wait(FOREVER).await.unwrap_err();
assert!(matches!(e, EvictionError::NotFound));
// on accesses when the layer is evicted, it will automatically be downloaded.
let img_after = {
let mut data = ValuesReconstructState::default();
layer
.get_values_reconstruct_data(
controlfile_keyspace.clone(),
Lsn(0x10)..Lsn(0x11),
&mut data,
&ctx,
)
.instrument(download_span.clone())
.await
.unwrap();
data.keys
.remove(&CONTROLFILE_KEY)
.expect("must be present")
.expect("should not error")
.img
.take()
.expect("tenant harness writes the control file")
};
assert_eq!(img_before, img_after);
// evict_and_wait can timeout, but it doesn't cancel the evicting itself
//
// ZERO for timeout does not work reliably, so first take up all spawn_blocking slots to
// artificially slow it down.
let helper = SpawnBlockingPoolHelper::consume_all_spawn_blocking_threads(&handle).await;
match layer
.evict_and_wait(std::time::Duration::ZERO)
.await
.unwrap_err()
{
EvictionError::Timeout => {
// expected, but note that the eviction is "still ongoing"
helper.release().await;
// exhaust spawn_blocking pool to ensure it is now complete
SpawnBlockingPoolHelper::consume_and_release_all_of_spawn_blocking_threads(&handle)
.await;
}
other => unreachable!("{other:?}"),
}
// only way to query if a layer is resident is to acquire a ResidentLayer instance.
// Layer::keep_resident never downloads, but it might initialize if the layer file is found
// downloaded locally.
let none = layer.keep_resident().await;
assert!(
none.is_none(),
"Expected none, because eviction removed the local file, found: {none:?}"
);
// plain downloading is rarely needed
layer
.download_and_keep_resident()
.instrument(download_span)
.await
.unwrap();
// last important part is deletion on drop: gc and compaction use it for compacted L0 layers
// or fully garbage collected layers. deletion means deleting the local file, and scheduling a
// deletion of the already unlinked from index_part.json remote file.
//
// marking a layer to be deleted on drop is irreversible; there is no technical reason against
// reversiblity, but currently it is not needed so it is not provided.
layer.delete_on_drop();
let path = layer.local_path().to_owned();
// wait_drop produces an unconnected to Layer future which will resolve when the
// LayerInner::drop has completed.
let mut wait_drop = std::pin::pin!(layer.wait_drop());
// paused time doesn't really work well with timeouts and evict_and_wait, so delay pausing
// until here
tokio::time::pause();
tokio::time::timeout(ADVANCE, &mut wait_drop)
.await
.expect_err("should had timed out because two strong references exist");
tokio::fs::metadata(&path)
.await
.expect("the local layer file still exists");
let rtc = &timeline.remote_client;
{
let layers = &[layer];
let mut g = timeline.layers.write().await;
g.open_mut().unwrap().finish_gc_timeline(layers);
// this just updates the remote_physical_size for demonstration purposes
rtc.schedule_gc_update(layers).unwrap();
}
// when strong references are dropped, the file is deleted and remote deletion is scheduled
wait_drop.await;
let e = tokio::fs::metadata(&path)
.await
.expect_err("the local file is deleted");
assert_eq!(e.kind(), std::io::ErrorKind::NotFound);
rtc.wait_completion().await.unwrap();
assert_eq!(rtc.get_remote_physical_size(), 0);
assert_eq!(0, LAYER_IMPL_METRICS.inits_cancelled.get())
}
/// This test demonstrates a previous hang when a eviction and deletion were requested at the same
/// time. Now both of them complete per Arc drop semantics.
#[tokio::test(start_paused = true)]
async fn evict_and_wait_on_wanted_deleted() {
// this is the runtime on which Layer spawns the blocking tasks on
let handle = tokio::runtime::Handle::current();
let h = TenantHarness::create("evict_and_wait_on_wanted_deleted")
.await
.unwrap();
utils::logging::replace_panic_hook_with_tracing_panic_hook().forget();
let (tenant, ctx) = h.load().await;
let timeline = tenant
.create_test_timeline(TimelineId::generate(), Lsn(0x10), 14, &ctx)
.await
.unwrap();
let layer = {
let mut layers = {
let layers = timeline.layers.read().await;
layers.likely_resident_layers().cloned().collect::<Vec<_>>()
};
assert_eq!(layers.len(), 1);
layers.swap_remove(0)
};
// setup done
let resident = layer.keep_resident().await.unwrap();
{
let mut evict_and_wait = std::pin::pin!(layer.evict_and_wait(FOREVER));
// drive the future to await on the status channel
tokio::time::timeout(ADVANCE, &mut evict_and_wait)
.await
.expect_err("should had been a timeout since we are holding the layer resident");
layer.delete_on_drop();
drop(resident);
// make sure the eviction task gets to run
SpawnBlockingPoolHelper::consume_and_release_all_of_spawn_blocking_threads(&handle).await;
let resident = layer.keep_resident().await;
assert!(
resident.is_none(),
"keep_resident should not have re-initialized: {resident:?}"
);
evict_and_wait
.await
.expect("evict_and_wait should had succeeded");
// works as intended
}
// assert that once we remove the `layer` from the layer map and drop our reference,
// the deletion of the layer in remote_storage happens.
{
let mut layers = timeline.layers.write().await;
layers.open_mut().unwrap().finish_gc_timeline(&[layer]);
}
SpawnBlockingPoolHelper::consume_and_release_all_of_spawn_blocking_threads(&handle).await;
assert_eq!(1, LAYER_IMPL_METRICS.started_deletes.get());
assert_eq!(1, LAYER_IMPL_METRICS.completed_deletes.get());
assert_eq!(1, LAYER_IMPL_METRICS.started_evictions.get());
assert_eq!(1, LAYER_IMPL_METRICS.completed_evictions.get());
assert_eq!(0, LAYER_IMPL_METRICS.inits_cancelled.get())
}
/// This test ensures we are able to read the layer while the layer eviction has been
/// started but not completed.
#[test]
fn read_wins_pending_eviction() {
let rt = tokio::runtime::Builder::new_current_thread()
.max_blocking_threads(1)
.enable_all()
.start_paused(true)
.build()
.unwrap();
rt.block_on(async move {
// this is the runtime on which Layer spawns the blocking tasks on
let handle = tokio::runtime::Handle::current();
let h = TenantHarness::create("read_wins_pending_eviction")
.await
.unwrap();
let (tenant, ctx) = h.load().await;
let span = h.span();
let download_span = span.in_scope(|| tracing::info_span!("downloading", timeline_id = 1));
let timeline = tenant
.create_test_timeline(TimelineId::generate(), Lsn(0x10), 14, &ctx)
.await
.unwrap();
let layer = {
let mut layers = {
let layers = timeline.layers.read().await;
layers.likely_resident_layers().cloned().collect::<Vec<_>>()
};
assert_eq!(layers.len(), 1);
layers.swap_remove(0)
};
// setup done
let resident = layer.keep_resident().await.unwrap();
let mut evict_and_wait = std::pin::pin!(layer.evict_and_wait(FOREVER));
// drive the future to await on the status channel
tokio::time::timeout(ADVANCE, &mut evict_and_wait)
.await
.expect_err("should had been a timeout since we are holding the layer resident");
assert_eq!(1, LAYER_IMPL_METRICS.started_evictions.get());
let (completion, barrier) = utils::completion::channel();
let (arrival, arrived_at_barrier) = utils::completion::channel();
layer.enable_failpoint(Failpoint::WaitBeforeStartingEvicting(
Some(arrival),
barrier,
));
// now the eviction cannot proceed because the threads are consumed while completion exists
drop(resident);
arrived_at_barrier.wait().await;
assert!(!layer.is_likely_resident());
// because no actual eviction happened, we get to just reinitialize the DownloadedLayer
layer
.0
.get_or_maybe_download(false, None)
.instrument(download_span)
.await
.expect("should had reinitialized without downloading");
assert!(layer.is_likely_resident());
// reinitialization notifies of new resident status, which should error out all evict_and_wait
let e = tokio::time::timeout(ADVANCE, &mut evict_and_wait)
.await
.expect("no timeout, because get_or_maybe_download re-initialized")
.expect_err("eviction should not have succeeded because re-initialized");
// works as intended: evictions lose to "downloads"
assert!(matches!(e, EvictionError::Downloaded), "{e:?}");
assert_eq!(0, LAYER_IMPL_METRICS.completed_evictions.get());
// this is not wrong: the eviction is technically still "on the way" as it's still queued
// because of a failpoint
assert_eq!(
0,
LAYER_IMPL_METRICS
.cancelled_evictions
.values()
.map(|ctr| ctr.get())
.sum::<u64>()
);
drop(completion);
tokio::time::sleep(ADVANCE).await;
SpawnBlockingPoolHelper::consume_and_release_all_of_spawn_blocking_threads0(&handle, 1)
.await;
assert_eq!(0, LAYER_IMPL_METRICS.completed_evictions.get());
// now we finally can observe the original eviction failing
// it would had been possible to observe it earlier, but here it is guaranteed to have
// happened.
assert_eq!(
1,
LAYER_IMPL_METRICS
.cancelled_evictions
.values()
.map(|ctr| ctr.get())
.sum::<u64>()
);
assert_eq!(
1,
LAYER_IMPL_METRICS.cancelled_evictions[EvictionCancelled::AlreadyReinitialized].get()
);
assert_eq!(0, LAYER_IMPL_METRICS.inits_cancelled.get())
});
}
/// Use failpoint to delay an eviction starting to get a VersionCheckFailed.
#[test]
fn multiple_pending_evictions_in_order() {
let name = "multiple_pending_evictions_in_order";
let in_order = true;
multiple_pending_evictions_scenario(name, in_order);
}
/// Use failpoint to reorder later eviction before first to get a UnexpectedEvictedState.
#[test]
fn multiple_pending_evictions_out_of_order() {
let name = "multiple_pending_evictions_out_of_order";
let in_order = false;
multiple_pending_evictions_scenario(name, in_order);
}
fn multiple_pending_evictions_scenario(name: &'static str, in_order: bool) {
let rt = tokio::runtime::Builder::new_current_thread()
.max_blocking_threads(1)
.enable_all()
.start_paused(true)
.build()
.unwrap();
rt.block_on(async move {
// this is the runtime on which Layer spawns the blocking tasks on
let handle = tokio::runtime::Handle::current();
let h = TenantHarness::create(name).await.unwrap();
let (tenant, ctx) = h.load().await;
let span = h.span();
let download_span = span.in_scope(|| tracing::info_span!("downloading", timeline_id = 1));
let timeline = tenant
.create_test_timeline(TimelineId::generate(), Lsn(0x10), 14, &ctx)
.await
.unwrap();
let layer = {
let mut layers = {
let layers = timeline.layers.read().await;
layers.likely_resident_layers().cloned().collect::<Vec<_>>()
};
assert_eq!(layers.len(), 1);
layers.swap_remove(0)
};
// setup done
let resident = layer.keep_resident().await.unwrap();
let mut evict_and_wait = std::pin::pin!(layer.evict_and_wait(FOREVER));
// drive the future to await on the status channel
tokio::time::timeout(ADVANCE, &mut evict_and_wait)
.await
.expect_err("should had been a timeout since we are holding the layer resident");
assert_eq!(1, LAYER_IMPL_METRICS.started_evictions.get());
let (completion1, barrier) = utils::completion::channel();
let mut completion1 = Some(completion1);
let (arrival, arrived_at_barrier) = utils::completion::channel();
layer.enable_failpoint(Failpoint::WaitBeforeStartingEvicting(
Some(arrival),
barrier,
));
// now the eviction cannot proceed because we are simulating arbitrary long delay for the
// eviction task start.
drop(resident);
assert!(!layer.is_likely_resident());
arrived_at_barrier.wait().await;
// because no actual eviction happened, we get to just reinitialize the DownloadedLayer
layer
.0
.get_or_maybe_download(false, None)
.instrument(download_span)
.await
.expect("should had reinitialized without downloading");
assert!(layer.is_likely_resident());
// reinitialization notifies of new resident status, which should error out all evict_and_wait
let e = tokio::time::timeout(ADVANCE, &mut evict_and_wait)
.await
.expect("no timeout, because get_or_maybe_download re-initialized")
.expect_err("eviction should not have succeeded because re-initialized");
// works as intended: evictions lose to "downloads"
assert!(matches!(e, EvictionError::Downloaded), "{e:?}");
assert_eq!(0, LAYER_IMPL_METRICS.completed_evictions.get());
// this is not wrong: the eviction is technically still "on the way" as it's still queued
// because of a failpoint
assert_eq!(
0,
LAYER_IMPL_METRICS
.cancelled_evictions
.values()
.map(|ctr| ctr.get())
.sum::<u64>()
);
assert_eq!(0, LAYER_IMPL_METRICS.completed_evictions.get());
// configure another failpoint for the second eviction -- evictions are per initialization,
// so now that we've reinitialized the inner, we get to run two of them at the same time.
let (completion2, barrier) = utils::completion::channel();
let (arrival, arrived_at_barrier) = utils::completion::channel();
layer.enable_failpoint(Failpoint::WaitBeforeStartingEvicting(
Some(arrival),
barrier,
));
let mut second_eviction = std::pin::pin!(layer.evict_and_wait(FOREVER));
// advance to the wait on the queue
tokio::time::timeout(ADVANCE, &mut second_eviction)
.await
.expect_err("timeout because failpoint is blocking");
arrived_at_barrier.wait().await;
assert_eq!(2, LAYER_IMPL_METRICS.started_evictions.get());
let mut release_earlier_eviction = |expected_reason| {
assert_eq!(
0,
LAYER_IMPL_METRICS.cancelled_evictions[expected_reason].get(),
);
drop(completion1.take().unwrap());
let handle = &handle;
async move {
tokio::time::sleep(ADVANCE).await;
SpawnBlockingPoolHelper::consume_and_release_all_of_spawn_blocking_threads0(
handle, 1,
)
.await;
assert_eq!(
1,
LAYER_IMPL_METRICS.cancelled_evictions[expected_reason].get(),
);
}
};
if in_order {
release_earlier_eviction(EvictionCancelled::VersionCheckFailed).await;
}
// release the later eviction which is for the current version
drop(completion2);
tokio::time::sleep(ADVANCE).await;
SpawnBlockingPoolHelper::consume_and_release_all_of_spawn_blocking_threads0(&handle, 1)
.await;
if !in_order {
release_earlier_eviction(EvictionCancelled::UnexpectedEvictedState).await;
}
tokio::time::timeout(ADVANCE, &mut second_eviction)
.await
.expect("eviction goes through now that spawn_blocking is unclogged")
.expect("eviction should succeed, because version matches");
assert_eq!(1, LAYER_IMPL_METRICS.completed_evictions.get());
// ensure the cancelled are unchanged
assert_eq!(
1,
LAYER_IMPL_METRICS
.cancelled_evictions
.values()
.map(|ctr| ctr.get())
.sum::<u64>()
);
assert_eq!(0, LAYER_IMPL_METRICS.inits_cancelled.get())
});
}
/// The test ensures with a failpoint that a pending eviction is not cancelled by what is currently
/// a `Layer::keep_resident` call.
///
/// This matters because cancelling the eviction would leave us in a state where the file is on
/// disk but the layer internal state says it has not been initialized. Futhermore, it allows us to
/// have non-repairing `Layer::is_likely_resident`.
#[tokio::test(start_paused = true)]
async fn cancelled_get_or_maybe_download_does_not_cancel_eviction() {
let handle = tokio::runtime::Handle::current();
let h = TenantHarness::create("cancelled_get_or_maybe_download_does_not_cancel_eviction")
.await
.unwrap();
let (tenant, ctx) = h.load().await;
let timeline = tenant
.create_test_timeline(TimelineId::generate(), Lsn(0x10), 14, &ctx)
.await
.unwrap();
let layer = {
let mut layers = {
let layers = timeline.layers.read().await;
layers.likely_resident_layers().cloned().collect::<Vec<_>>()
};
assert_eq!(layers.len(), 1);
layers.swap_remove(0)
};
// this failpoint will simulate the `get_or_maybe_download` becoming cancelled (by returning an
// Err) at the right time as in "during" the `LayerInner::needs_download`.
layer.enable_failpoint(Failpoint::AfterDeterminingLayerNeedsNoDownload);
let (completion, barrier) = utils::completion::channel();
let (arrival, arrived_at_barrier) = utils::completion::channel();
layer.enable_failpoint(Failpoint::WaitBeforeStartingEvicting(
Some(arrival),
barrier,
));
tokio::time::timeout(ADVANCE, layer.evict_and_wait(FOREVER))
.await
.expect_err("should had advanced to waiting on channel");
arrived_at_barrier.wait().await;
// simulate a cancelled read which is cancelled before it gets to re-initialize
let e = layer
.0
.get_or_maybe_download(false, None)
.await
.unwrap_err();
assert!(
matches!(
e,
DownloadError::Failpoint(FailpointKind::AfterDeterminingLayerNeedsNoDownload)
),
"{e:?}"
);
assert!(
layer.0.needs_download().await.unwrap().is_none(),
"file is still on disk"
);
// release the eviction task
drop(completion);
tokio::time::sleep(ADVANCE).await;
SpawnBlockingPoolHelper::consume_and_release_all_of_spawn_blocking_threads(&handle).await;
// failpoint is still enabled, but it is not hit
let e = layer
.0
.get_or_maybe_download(false, None)
.await
.unwrap_err();
assert!(matches!(e, DownloadError::DownloadRequired), "{e:?}");
// failpoint is not counted as cancellation either
assert_eq!(0, LAYER_IMPL_METRICS.inits_cancelled.get())
}
#[tokio::test(start_paused = true)]
async fn evict_and_wait_does_not_wait_for_download() {
// let handle = tokio::runtime::Handle::current();
let h = TenantHarness::create("evict_and_wait_does_not_wait_for_download")
.await
.unwrap();
let (tenant, ctx) = h.load().await;
let span = h.span();
let download_span = span.in_scope(|| tracing::info_span!("downloading", timeline_id = 1));
let timeline = tenant
.create_test_timeline(TimelineId::generate(), Lsn(0x10), 14, &ctx)
.await
.unwrap();
let layer = {
let mut layers = {
let layers = timeline.layers.read().await;
layers.likely_resident_layers().cloned().collect::<Vec<_>>()
};
assert_eq!(layers.len(), 1);
layers.swap_remove(0)
};
// kind of forced setup: start an eviction but do not allow it progress until we are
// downloading
let (eviction_can_continue, barrier) = utils::completion::channel();
let (arrival, eviction_arrived) = utils::completion::channel();
layer.enable_failpoint(Failpoint::WaitBeforeStartingEvicting(
Some(arrival),
barrier,
));
let mut evict_and_wait = std::pin::pin!(layer.evict_and_wait(FOREVER));
// use this once-awaited other_evict to synchronize with the eviction
let other_evict = layer.evict_and_wait(FOREVER);
tokio::time::timeout(ADVANCE, &mut evict_and_wait)
.await
.expect_err("should had advanced");
eviction_arrived.wait().await;
drop(eviction_can_continue);
other_evict.await.unwrap();
// now the layer is evicted, and the "evict_and_wait" is waiting on the receiver
assert!(!layer.is_likely_resident());
// following new evict_and_wait will fail until we've completed the download
let e = layer.evict_and_wait(FOREVER).await.unwrap_err();
assert!(matches!(e, EvictionError::NotFound), "{e:?}");
let (download_can_continue, barrier) = utils::completion::channel();
let (arrival, _download_arrived) = utils::completion::channel();
layer.enable_failpoint(Failpoint::WaitBeforeDownloading(Some(arrival), barrier));
let mut download = std::pin::pin!(layer
.0
.get_or_maybe_download(true, None)
.instrument(download_span));
assert!(
!layer.is_likely_resident(),
"during download layer is evicted"
);
tokio::time::timeout(ADVANCE, &mut download)
.await
.expect_err("should had timed out because of failpoint");
// now we finally get to continue, and because the latest state is downloading, we deduce that
// original eviction succeeded
evict_and_wait.await.unwrap();
// however a new evict_and_wait will fail
let e = layer.evict_and_wait(FOREVER).await.unwrap_err();
assert!(matches!(e, EvictionError::NotFound), "{e:?}");
assert!(!layer.is_likely_resident());
drop(download_can_continue);
download.await.expect("download should had succeeded");
assert!(layer.is_likely_resident());
// only now can we evict
layer.evict_and_wait(FOREVER).await.unwrap();
}
/// Asserts that there is no miscalculation when Layer is dropped while it is being kept resident,
/// which is the last value.
///
/// Also checks that the same does not happen on a non-evicted layer (regression test).
#[tokio::test(start_paused = true)]
async fn eviction_cancellation_on_drop() {
use bytes::Bytes;
use pageserver_api::value::Value;
// this is the runtime on which Layer spawns the blocking tasks on
let handle = tokio::runtime::Handle::current();
let h = TenantHarness::create("eviction_cancellation_on_drop")
.await
.unwrap();
utils::logging::replace_panic_hook_with_tracing_panic_hook().forget();
let (tenant, ctx) = h.load().await;
let timeline = tenant
.create_test_timeline(TimelineId::generate(), Lsn(0x10), 14, &ctx)
.await
.unwrap();
{
// create_test_timeline wrote us one layer, write another
let mut writer = timeline.writer().await;
writer
.put(
pageserver_api::key::Key::from_i128(5),
Lsn(0x20),
&Value::Image(Bytes::from_static(b"this does not matter either")),
&ctx,
)
.await
.unwrap();
writer.finish_write(Lsn(0x20));
}
timeline.freeze_and_flush().await.unwrap();
// wait for the upload to complete so our Arc::strong_count assertion holds
timeline.remote_client.wait_completion().await.unwrap();
let (evicted_layer, not_evicted) = {
let mut layers = {
let mut guard = timeline.layers.write().await;
let layers = guard.likely_resident_layers().cloned().collect::<Vec<_>>();
// remove the layers from layermap
guard.open_mut().unwrap().finish_gc_timeline(&layers);
layers
};
assert_eq!(layers.len(), 2);
(layers.pop().unwrap(), layers.pop().unwrap())
};
let victims = [(evicted_layer, true), (not_evicted, false)];
for (victim, evict) in victims {
let resident = victim.keep_resident().await.unwrap();
drop(victim);
assert_eq!(Arc::strong_count(&resident.owner.0), 1);
if evict {
let evict_and_wait = resident.owner.evict_and_wait(FOREVER);
// drive the future to await on the status channel, and then drop it
tokio::time::timeout(ADVANCE, evict_and_wait)
.await
.expect_err("should had been a timeout since we are holding the layer resident");
}
// 1 == we only evict one of the layers
assert_eq!(1, LAYER_IMPL_METRICS.started_evictions.get());
drop(resident);
// run any spawned
tokio::time::sleep(ADVANCE).await;
SpawnBlockingPoolHelper::consume_and_release_all_of_spawn_blocking_threads(&handle).await;
assert_eq!(
1,
LAYER_IMPL_METRICS.cancelled_evictions[EvictionCancelled::LayerGone].get()
);
}
}
/// A test case to remind you the cost of these structures. You can bump the size limit
/// below if it is really necessary to add more fields to the structures.
#[test]
#[cfg(target_arch = "x86_64")]
fn layer_size() {
assert_eq!(size_of::<LayerAccessStats>(), 8);
assert_eq!(size_of::<PersistentLayerDesc>(), 104);
assert_eq!(size_of::<LayerInner>(), 296);
// it also has the utf8 path
}
struct SpawnBlockingPoolHelper {
awaited_by_spawn_blocking_tasks: Completion,
blocking_tasks: JoinSet<()>,
}
impl SpawnBlockingPoolHelper {
/// All `crate::task_mgr::BACKGROUND_RUNTIME` spawn_blocking threads will be consumed until
/// release is called.
///
/// In the tests this can be used to ensure something cannot be started on the target runtimes
/// spawn_blocking pool.
///
/// This should be no issue nowdays, because nextest runs each test in it's own process.
async fn consume_all_spawn_blocking_threads(handle: &tokio::runtime::Handle) -> Self {
let default_max_blocking_threads = 512;
Self::consume_all_spawn_blocking_threads0(handle, default_max_blocking_threads).await
}
async fn consume_all_spawn_blocking_threads0(
handle: &tokio::runtime::Handle,
threads: usize,
) -> Self {
assert_ne!(threads, 0);
let (completion, barrier) = completion::channel();
let (started, starts_completed) = completion::channel();
let mut blocking_tasks = JoinSet::new();
for _ in 0..threads {
let barrier = barrier.clone();
let started = started.clone();
blocking_tasks.spawn_blocking_on(
move || {
drop(started);
tokio::runtime::Handle::current().block_on(barrier.wait());
},
handle,
);
}
drop(started);
starts_completed.wait().await;
drop(barrier);
tracing::trace!("consumed all threads");
SpawnBlockingPoolHelper {
awaited_by_spawn_blocking_tasks: completion,
blocking_tasks,
}
}
/// Release all previously blocked spawn_blocking threads
async fn release(self) {
let SpawnBlockingPoolHelper {
awaited_by_spawn_blocking_tasks,
mut blocking_tasks,
} = self;
drop(awaited_by_spawn_blocking_tasks);
while let Some(res) = blocking_tasks.join_next().await {
res.expect("none of the tasks should had panicked");
}
tracing::trace!("released all threads");
}
/// In the tests it is used as an easy way of making sure something scheduled on the target
/// runtimes `spawn_blocking` has completed, because it must've been scheduled and completed
/// before our tasks have a chance to schedule and complete.
async fn consume_and_release_all_of_spawn_blocking_threads(handle: &tokio::runtime::Handle) {
Self::consume_and_release_all_of_spawn_blocking_threads0(handle, 512).await
}
async fn consume_and_release_all_of_spawn_blocking_threads0(
handle: &tokio::runtime::Handle,
threads: usize,
) {
Self::consume_all_spawn_blocking_threads0(handle, threads)
.await
.release()
.await
}
}
#[test]
fn spawn_blocking_pool_helper_actually_works() {
// create a custom runtime for which we know and control how many blocking threads it has
//
// because the amount is not configurable for our helper, expect the same amount as
// BACKGROUND_RUNTIME using the tokio defaults would have.
let rt = tokio::runtime::Builder::new_current_thread()
.max_blocking_threads(1)
.enable_all()
.build()
.unwrap();
let handle = rt.handle();
rt.block_on(async move {
// this will not return until all threads are spun up and actually executing the code
// waiting on `consumed` to be `SpawnBlockingPoolHelper::release`'d.
let consumed =
SpawnBlockingPoolHelper::consume_all_spawn_blocking_threads0(handle, 1).await;
println!("consumed");
let mut jh = std::pin::pin!(tokio::task::spawn_blocking(move || {
// this will not get to run before we release
}));
println!("spawned");
tokio::time::timeout(std::time::Duration::from_secs(1), &mut jh)
.await
.expect_err("the task should not have gotten to run yet");
println!("tried to join");
consumed.release().await;
println!("released");
tokio::time::timeout(std::time::Duration::from_secs(1), jh)
.await
.expect("no timeout")
.expect("no join error");
println!("joined");
});
}
/// Drop the low bits from a time, to emulate the precision loss in LayerAccessStats
fn lowres_time(hires: SystemTime) -> SystemTime {
let ts = hires.duration_since(UNIX_EPOCH).unwrap().as_secs();
UNIX_EPOCH + Duration::from_secs(ts)
}
#[test]
fn access_stats() {
let access_stats = LayerAccessStats::default();
// Default is visible
assert_eq!(access_stats.visibility(), LayerVisibilityHint::Visible);
access_stats.set_visibility(LayerVisibilityHint::Covered);
assert_eq!(access_stats.visibility(), LayerVisibilityHint::Covered);
access_stats.set_visibility(LayerVisibilityHint::Visible);
assert_eq!(access_stats.visibility(), LayerVisibilityHint::Visible);
let rtime = UNIX_EPOCH + Duration::from_secs(2000000000);
access_stats.record_residence_event_at(rtime);
assert_eq!(access_stats.latest_activity(), lowres_time(rtime));
let atime = UNIX_EPOCH + Duration::from_secs(2100000000);
access_stats.record_access_at(atime);
assert_eq!(access_stats.latest_activity(), lowres_time(atime));
// Setting visibility doesn't clobber access time
access_stats.set_visibility(LayerVisibilityHint::Covered);
assert_eq!(access_stats.latest_activity(), lowres_time(atime));
access_stats.set_visibility(LayerVisibilityHint::Visible);
assert_eq!(access_stats.latest_activity(), lowres_time(atime));
// Recording access implicitly makes layer visible, if it wasn't already
let atime = UNIX_EPOCH + Duration::from_secs(2200000000);
access_stats.set_visibility(LayerVisibilityHint::Covered);
assert_eq!(access_stats.visibility(), LayerVisibilityHint::Covered);
assert!(access_stats.record_access_at(atime));
access_stats.set_visibility(LayerVisibilityHint::Visible);
assert!(!access_stats.record_access_at(atime));
access_stats.set_visibility(LayerVisibilityHint::Visible);
}
#[test]
fn access_stats_2038() {
// The access stats structure uses a timestamp representation that will run out
// of bits in 2038. One year before that, this unit test will start failing.
let one_year_from_now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap()
+ Duration::from_secs(3600 * 24 * 365);
assert!(one_year_from_now.as_secs() < (2 << 31));
}