virtual_file: distinguish first and subsequent open() syscalls

This helps with identifying thrashing.

While reading the code, I also found a case where we waste
work in a cache pressure situation: https://github.com/neondatabase/neon/issues/6065

refs https://github.com/neondatabase/cloud/issues/8351
This commit is contained in:
Christian Schwarz
2023-12-07 14:52:01 +00:00
parent 6f7d248419
commit db0a35176e
2 changed files with 16 additions and 3 deletions

View File

@@ -774,6 +774,7 @@ const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
)]
pub(crate) enum StorageIoOperation {
Open,
OpenAfterReplace,
Close,
CloseByReplace,
Read,
@@ -787,6 +788,7 @@ impl StorageIoOperation {
pub fn as_str(&self) -> &'static str {
match self {
StorageIoOperation::Open => "open",
StorageIoOperation::OpenAfterReplace => "open-after-replace",
StorageIoOperation::Close => "close",
StorageIoOperation::CloseByReplace => "close-by-replace",
StorageIoOperation::Read => "read",
@@ -840,7 +842,6 @@ pub(crate) static STORAGE_IO_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
)
.expect("failed to define a metric")
});
#[derive(Debug)]
struct GlobalAndPerTimelineHistogram {
global: Histogram,

View File

@@ -312,6 +312,9 @@ impl VirtualFile {
}
let (handle, mut slot_guard) = get_open_files().find_victim_slot().await;
// NB: there is also StorageIoOperation::OpenAfterReplace which is for the case
// where our caller doesn't get to use the returned VirtualFile before its
// slot gets re-used by someone else.
let file = observe_duration!(StorageIoOperation::Open, open_options.open(path))?;
// Strip all options other than read and write.
@@ -333,6 +336,9 @@ impl VirtualFile {
timeline_id,
};
// TODO: Under pressure, it's likely the slot will get re-used and
// the underlying file closed before they get around to using it.
// => https://github.com/neondatabase/neon/issues/6065
slot_guard.file.replace(file);
Ok(vfile)
@@ -441,8 +447,14 @@ impl VirtualFile {
// now locked in write-mode. Find a free slot to put it in.
let (handle, mut slot_guard) = open_files.find_victim_slot().await;
// Open the physical file
let file = observe_duration!(StorageIoOperation::Open, self.open_options.open(&self.path))?;
// Re-open the physical file.
// NB: we use StorageIoOperation::OpenAferReplace for this to distinguish this
// case from StorageIoOperation::Open. This helps with identifying thrashing
// of the virtual file descriptor cache.
let file = observe_duration!(
StorageIoOperation::OpenAfterReplace,
self.open_options.open(&self.path)
)?;
// Store the File in the slot and update the handle in the VirtualFile
// to point to it.