attachment_service: use atomic overwrite to persist attachments.json (#6444)

The pagebench integration PR (#6214) is the first to SIGQUIT & then
restart attachment_service.

With many tenants (100), we have found frequent failures on restart in
the CI[^1].

[^1]:
[Allure](https://neon-github-public-dev.s3.amazonaws.com/reports/pr-6214/7615750160/index.html#suites/e26265675583c610f99af77084ae58f1/851ff709578c4452/)

```
2024-01-22T19:07:57.932021Z  INFO request{method=POST path=/attach-hook request_id=2697503c-7b3e-4529-b8c1-d12ef912d3eb}: Request handled, status: 200 OK
2024-01-22T19:07:58.898213Z  INFO Got SIGQUIT. Terminating
2024-01-22T19:08:02.176588Z  INFO version: git-env:d56f31639356ed8e8ce832097f132f27ee19ac8a, launch_timestamp: 2024-01-22 19:08:02.174634554 UTC, build_tag build_tag-env:7615750160, state at /tmp/test_output/test_pageserver_max_throughput_getpage_at_latest_lsn[10-13-30]/repo/attachments.json, listening on 127.0.0.1:15048
thread 'main' panicked at /__w/neon/neon/control_plane/attachment_service/src/persistence.rs:95:17:
Failed to load state from '/tmp/test_output/test_pageserver_max_throughput_getpage_at_latest_lsn[10-13-30]/repo/attachments.json': trailing characters at line 1 column 8957 (maybe your .neon/ dir was written by an older version?)
stack backtrace:
   0: rust_begin_unwind
             at /rustc/82e1608dfa6e0b5569232559e3d385fea5a93112/library/std/src/panicking.rs:645:5
   1: core::panicking::panic_fmt
             at /rustc/82e1608dfa6e0b5569232559e3d385fea5a93112/library/core/src/panicking.rs:72:14
   2: attachment_service::persistence::PersistentState::load_or_new::{{closure}}
             at ./control_plane/attachment_service/src/persistence.rs:95:17
   3: attachment_service::persistence::Persistence:🆕:{{closure}}
             at ./control_plane/attachment_service/src/persistence.rs:103:56
   4: attachment_service::main::{{closure}}
             at ./control_plane/attachment_service/src/main.rs:69:61
   5: tokio::runtime::park::CachedParkThread::block_on::{{closure}}
             at ./.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.34.0/src/runtime/park.rs:282:63
   6: tokio::runtime::coop::with_budget
             at ./.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.34.0/src/runtime/coop.rs:107:5
   7: tokio::runtime::coop::budget
             at ./.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.34.0/src/runtime/coop.rs:73:5
   8: tokio::runtime::park::CachedParkThread::block_on
             at ./.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.34.0/src/runtime/park.rs:282:31
   9: tokio::runtime::context::blocking::BlockingRegionGuard::block_on
             at ./.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.34.0/src/runtime/context/blocking.rs:66:9
  10: tokio::runtime::scheduler::multi_thread::MultiThread::block_on::{{closure}}
             at ./.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.34.0/src/runtime/scheduler/multi_thread/mod.rs:87:13
  11: tokio::runtime::context::runtime::enter_runtime
             at ./.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.34.0/src/runtime/context/runtime.rs:65:16
  12: tokio::runtime::scheduler::multi_thread::MultiThread::block_on
             at ./.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.34.0/src/runtime/scheduler/multi_thread/mod.rs:86:9
  13: tokio::runtime::runtime::Runtime::block_on
             at ./.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.34.0/src/runtime/runtime.rs:350:50
  14: attachment_service::main
             at ./control_plane/attachment_service/src/main.rs:99:5
  15: core::ops::function::FnOnce::call_once
             at /rustc/82e1608dfa6e0b5569232559e3d385fea5a93112/library/core/src/ops/function.rs:250:5
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.
```

The attachment_service handles SIGQUIT by just exiting the process.
In theory, the SIGQUIT could come in while we're writing out the
`attachments.json`.

Now, in above log output, there's a 1 second gap between the last
request completing
and the SIGQUIT coming in. So, there must be some other issue.

But, let's have this change anyways, maybe it helps uncover the real
cause for the test failure.
This commit is contained in:
Christian Schwarz
2024-01-23 17:21:06 +01:00
committed by GitHub
parent 37638fce79
commit 42c17a6fc6
3 changed files with 53 additions and 11 deletions

View File

@@ -1,5 +1,6 @@
use std::{collections::HashMap, str::FromStr};
use anyhow::Context;
use camino::{Utf8Path, Utf8PathBuf};
use control_plane::{
attachment_service::{NodeAvailability, NodeSchedulingPolicy},
@@ -41,10 +42,14 @@ struct PendingWrite {
}
impl PendingWrite {
async fn commit(&self) -> anyhow::Result<()> {
tokio::fs::write(&self.path, &self.bytes).await?;
Ok(())
async fn commit(self) -> anyhow::Result<()> {
tokio::task::spawn_blocking(move || {
let tmp_path = utils::crashsafe::path_with_suffix_extension(&self.path, "___new");
utils::crashsafe::overwrite(&self.path, &tmp_path, &self.bytes)
})
.await
.context("spawn_blocking")?
.context("write file")
}
}

View File

@@ -1,7 +1,7 @@
use std::{
borrow::Cow,
fs::{self, File},
io,
io::{self, Write},
};
use camino::{Utf8Path, Utf8PathBuf};
@@ -112,6 +112,48 @@ pub async fn fsync_async(path: impl AsRef<Utf8Path>) -> Result<(), std::io::Erro
tokio::fs::File::open(path.as_ref()).await?.sync_all().await
}
/// Writes a file to the specified `final_path` in a crash safe fasion
///
/// The file is first written to the specified tmp_path, and in a second
/// step, the tmp path is renamed to the final path. As renames are
/// atomic, a crash during the write operation will never leave behind a
/// partially written file.
///
/// NB: an async variant of this code exists in Pageserver's VirtualFile.
pub fn overwrite(
final_path: &Utf8Path,
tmp_path: &Utf8Path,
content: &[u8],
) -> std::io::Result<()> {
let Some(final_path_parent) = final_path.parent() else {
return Err(std::io::Error::from_raw_os_error(
nix::errno::Errno::EINVAL as i32,
));
};
std::fs::remove_file(tmp_path).or_else(crate::fs_ext::ignore_not_found)?;
let mut file = std::fs::OpenOptions::new()
.write(true)
// Use `create_new` so that, if we race with ourselves or something else,
// we bail out instead of causing damage.
.create_new(true)
.open(tmp_path)?;
file.write_all(content)?;
file.sync_all()?;
drop(file); // before the rename, that's important!
// renames are atomic
std::fs::rename(tmp_path, final_path)?;
// Only open final path parent dirfd now, so that this operation only
// ever holds one VirtualFile fd at a time. That's important because
// the current `find_victim_slot` impl might pick the same slot for both
// VirtualFile., and it eventually does a blocking write lock instead of
// try_lock.
let final_parent_dirfd = std::fs::OpenOptions::new()
.read(true)
.open(final_path_parent)?;
final_parent_dirfd.sync_all()?;
Ok(())
}
#[cfg(test)]
mod tests {

View File

@@ -356,12 +356,7 @@ impl VirtualFile {
Ok(vfile)
}
/// Writes a file to the specified `final_path` in a crash safe fasion
///
/// The file is first written to the specified tmp_path, and in a second
/// step, the tmp path is renamed to the final path. As renames are
/// atomic, a crash during the write operation will never leave behind a
/// partially written file.
/// Async & [`VirtualFile`]-enabled version of [`::utils::crashsafe::overwrite`].
pub async fn crashsafe_overwrite(
final_path: &Utf8Path,
tmp_path: &Utf8Path,