mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-14 00:42:54 +00:00
refs https://github.com/neondatabase/neon/issues/7136 Problem ------- Before this PR, we were using `tokio_epoll_uring::thread_local_system()`, which panics on tokio_epoll_uring::System::launch() failure As we've learned in [the past](https://github.com/neondatabase/neon/issues/6373#issuecomment-1905814391), some older Linux kernels account io_uring instances as locked memory. And while we've raised the limit in prod considerably, we did hit it once on 2024-03-11 16:30 UTC. That was after we enabled tokio-epoll-uring fleet-wide, but before we had shipped release-5090 (c6ed86d3d0) which did away with the last mass-creation of tokio-epoll-uring instances as per commit3da410c8feAuthor: Christian Schwarz <christian@neon.tech> Date: Tue Mar 5 10:03:54 2024 +0100 tokio-epoll-uring: use it on the layer-creating code paths (#6378) Nonetheless, it highlighted that panicking in this situation is probably not ideal, as it can leave the pageserver process in a semi-broken state. Further, due to low sampling rate of Prometheus metrics, we don't know much about the circumstances of this failure instance. Solution -------- This PR implements a custom thread_local_system() that is pageserver-aware and will do the following on failure: - dump relevant stats to `tracing!`, hopefully they will be useful to understand the circumstances better - if it's the locked memory failure (or any other ENOMEM): abort() the process - if it's ENOMEM, retry with exponential back-off, capped at 3s. - add metric counters so we can create an alert This makes sense in the production environment where we know that _usually_, there's ample locked memory allowance available, and we know the failure rate is rare.
140 lines
4.1 KiB
Rust
140 lines
4.1 KiB
Rust
//! Enum-dispatch to the `OpenOptions` type of the respective [`super::IoEngineKind`];
|
|
|
|
use super::io_engine::IoEngine;
|
|
use std::{os::fd::OwnedFd, path::Path};
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub enum OpenOptions {
|
|
StdFs(std::fs::OpenOptions),
|
|
#[cfg(target_os = "linux")]
|
|
TokioEpollUring(tokio_epoll_uring::ops::open_at::OpenOptions),
|
|
}
|
|
|
|
impl Default for OpenOptions {
|
|
fn default() -> Self {
|
|
match super::io_engine::get() {
|
|
IoEngine::NotSet => panic!("io engine not set"),
|
|
IoEngine::StdFs => Self::StdFs(std::fs::OpenOptions::new()),
|
|
#[cfg(target_os = "linux")]
|
|
IoEngine::TokioEpollUring => {
|
|
Self::TokioEpollUring(tokio_epoll_uring::ops::open_at::OpenOptions::new())
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl OpenOptions {
|
|
pub fn new() -> OpenOptions {
|
|
Self::default()
|
|
}
|
|
|
|
pub fn read(&mut self, read: bool) -> &mut OpenOptions {
|
|
match self {
|
|
OpenOptions::StdFs(x) => {
|
|
let _ = x.read(read);
|
|
}
|
|
#[cfg(target_os = "linux")]
|
|
OpenOptions::TokioEpollUring(x) => {
|
|
let _ = x.read(read);
|
|
}
|
|
}
|
|
self
|
|
}
|
|
|
|
pub fn write(&mut self, write: bool) -> &mut OpenOptions {
|
|
match self {
|
|
OpenOptions::StdFs(x) => {
|
|
let _ = x.write(write);
|
|
}
|
|
#[cfg(target_os = "linux")]
|
|
OpenOptions::TokioEpollUring(x) => {
|
|
let _ = x.write(write);
|
|
}
|
|
}
|
|
self
|
|
}
|
|
|
|
pub fn create(&mut self, create: bool) -> &mut OpenOptions {
|
|
match self {
|
|
OpenOptions::StdFs(x) => {
|
|
let _ = x.create(create);
|
|
}
|
|
#[cfg(target_os = "linux")]
|
|
OpenOptions::TokioEpollUring(x) => {
|
|
let _ = x.create(create);
|
|
}
|
|
}
|
|
self
|
|
}
|
|
|
|
pub fn create_new(&mut self, create_new: bool) -> &mut OpenOptions {
|
|
match self {
|
|
OpenOptions::StdFs(x) => {
|
|
let _ = x.create_new(create_new);
|
|
}
|
|
#[cfg(target_os = "linux")]
|
|
OpenOptions::TokioEpollUring(x) => {
|
|
let _ = x.create_new(create_new);
|
|
}
|
|
}
|
|
self
|
|
}
|
|
|
|
pub fn truncate(&mut self, truncate: bool) -> &mut OpenOptions {
|
|
match self {
|
|
OpenOptions::StdFs(x) => {
|
|
let _ = x.truncate(truncate);
|
|
}
|
|
#[cfg(target_os = "linux")]
|
|
OpenOptions::TokioEpollUring(x) => {
|
|
let _ = x.truncate(truncate);
|
|
}
|
|
}
|
|
self
|
|
}
|
|
|
|
pub(in crate::virtual_file) async fn open(&self, path: &Path) -> std::io::Result<OwnedFd> {
|
|
match self {
|
|
OpenOptions::StdFs(x) => x.open(path).map(|file| file.into()),
|
|
#[cfg(target_os = "linux")]
|
|
OpenOptions::TokioEpollUring(x) => {
|
|
let system = super::io_engine::tokio_epoll_uring_ext::thread_local_system().await;
|
|
system.open(path, x).await.map_err(|e| match e {
|
|
tokio_epoll_uring::Error::Op(e) => e,
|
|
tokio_epoll_uring::Error::System(system) => {
|
|
std::io::Error::new(std::io::ErrorKind::Other, system)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl std::os::unix::prelude::OpenOptionsExt for OpenOptions {
|
|
fn mode(&mut self, mode: u32) -> &mut OpenOptions {
|
|
match self {
|
|
OpenOptions::StdFs(x) => {
|
|
let _ = x.mode(mode);
|
|
}
|
|
#[cfg(target_os = "linux")]
|
|
OpenOptions::TokioEpollUring(x) => {
|
|
let _ = x.mode(mode);
|
|
}
|
|
}
|
|
self
|
|
}
|
|
|
|
fn custom_flags(&mut self, flags: i32) -> &mut OpenOptions {
|
|
match self {
|
|
OpenOptions::StdFs(x) => {
|
|
let _ = x.custom_flags(flags);
|
|
}
|
|
#[cfg(target_os = "linux")]
|
|
OpenOptions::TokioEpollUring(x) => {
|
|
let _ = x.custom_flags(flags);
|
|
}
|
|
}
|
|
self
|
|
}
|
|
}
|