mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-30 03:20:36 +00:00
Make VirtualFile::crashsafe_overwrite async fn (#5189)
## Problem The `VirtualFile::crashsafe_overwrite` function was introduced by #5186 but it was not turned `async fn` yet. We want to make these functions async fn as part of #4743. ## Summary of changes Make `VirtualFile::crashsafe_overwrite` async fn, as well as all the functions calling it. Don't make anything inside `crashsafe_overwrite` use async functionalities, as per #4743 instructions. Also, add rustdoc to `crashsafe_overwrite`. Part of #4743.
This commit is contained in:
@@ -469,7 +469,9 @@ impl PageServerHandler {
|
||||
// Create empty timeline
|
||||
info!("creating new timeline");
|
||||
let tenant = get_active_tenant_with_timeout(tenant_id, &ctx).await?;
|
||||
let timeline = tenant.create_empty_timeline(timeline_id, base_lsn, pg_version, &ctx)?;
|
||||
let timeline = tenant
|
||||
.create_empty_timeline(timeline_id, base_lsn, pg_version, &ctx)
|
||||
.await?;
|
||||
|
||||
// TODO mark timeline as not ready until it reaches end_lsn.
|
||||
// We might have some wal to import as well, and we should prevent compute
|
||||
|
||||
@@ -438,6 +438,7 @@ impl Tenant {
|
||||
// Save the metadata file to local disk.
|
||||
if !picked_local {
|
||||
save_metadata(self.conf, &tenant_id, &timeline_id, up_to_date_metadata)
|
||||
.await
|
||||
.context("save_metadata")?;
|
||||
}
|
||||
|
||||
@@ -1438,7 +1439,7 @@ impl Tenant {
|
||||
/// For tests, use `DatadirModification::init_empty_test_timeline` + `commit` to setup the
|
||||
/// minimum amount of keys required to get a writable timeline.
|
||||
/// (Without it, `put` might fail due to `repartition` failing.)
|
||||
pub fn create_empty_timeline(
|
||||
pub async fn create_empty_timeline(
|
||||
&self,
|
||||
new_timeline_id: TimelineId,
|
||||
initdb_lsn: Lsn,
|
||||
@@ -1450,10 +1451,10 @@ impl Tenant {
|
||||
"Cannot create empty timelines on inactive tenant"
|
||||
);
|
||||
|
||||
let timelines = self.timelines.lock().unwrap();
|
||||
let timeline_uninit_mark = self.create_timeline_uninit_mark(new_timeline_id, &timelines)?;
|
||||
drop(timelines);
|
||||
|
||||
let timeline_uninit_mark = {
|
||||
let timelines = self.timelines.lock().unwrap();
|
||||
self.create_timeline_uninit_mark(new_timeline_id, &timelines)?
|
||||
};
|
||||
let new_metadata = TimelineMetadata::new(
|
||||
// Initialize disk_consistent LSN to 0, The caller must import some data to
|
||||
// make it valid, before calling finish_creation()
|
||||
@@ -1472,6 +1473,7 @@ impl Tenant {
|
||||
initdb_lsn,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Helper for unit tests to create an empty timeline.
|
||||
@@ -1487,7 +1489,9 @@ impl Tenant {
|
||||
pg_version: u32,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Arc<Timeline>> {
|
||||
let uninit_tl = self.create_empty_timeline(new_timeline_id, initdb_lsn, pg_version, ctx)?;
|
||||
let uninit_tl = self
|
||||
.create_empty_timeline(new_timeline_id, initdb_lsn, pg_version, ctx)
|
||||
.await?;
|
||||
let tline = uninit_tl.raw_timeline().expect("we just created it");
|
||||
assert_eq!(tline.get_last_record_lsn(), Lsn(0));
|
||||
|
||||
@@ -2362,13 +2366,12 @@ impl Tenant {
|
||||
Ok(tenant_conf)
|
||||
}
|
||||
|
||||
pub(super) fn persist_tenant_config(
|
||||
#[tracing::instrument(skip_all, fields(%tenant_id))]
|
||||
pub(super) async fn persist_tenant_config(
|
||||
tenant_id: &TenantId,
|
||||
target_config_path: &Path,
|
||||
tenant_conf: TenantConfOpt,
|
||||
) -> anyhow::Result<()> {
|
||||
let _enter = info_span!("saving tenantconf").entered();
|
||||
|
||||
// imitate a try-block with a closure
|
||||
info!("persisting tenantconf to {}", target_config_path.display());
|
||||
|
||||
@@ -2386,6 +2389,7 @@ impl Tenant {
|
||||
|
||||
let temp_path = path_with_suffix_extension(target_config_path, TEMP_FILE_SUFFIX);
|
||||
VirtualFile::crashsafe_overwrite(target_config_path, &temp_path, conf_content)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"write tenant {tenant_id} config to {}",
|
||||
@@ -2703,13 +2707,15 @@ impl Tenant {
|
||||
src_timeline.pg_version,
|
||||
);
|
||||
|
||||
let uninitialized_timeline = self.prepare_new_timeline(
|
||||
dst_id,
|
||||
&metadata,
|
||||
timeline_uninit_mark,
|
||||
start_lsn + 1,
|
||||
Some(Arc::clone(src_timeline)),
|
||||
)?;
|
||||
let uninitialized_timeline = self
|
||||
.prepare_new_timeline(
|
||||
dst_id,
|
||||
&metadata,
|
||||
timeline_uninit_mark,
|
||||
start_lsn + 1,
|
||||
Some(Arc::clone(src_timeline)),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let new_timeline = uninitialized_timeline.finish_creation()?;
|
||||
|
||||
@@ -2787,13 +2793,15 @@ impl Tenant {
|
||||
pgdata_lsn,
|
||||
pg_version,
|
||||
);
|
||||
let raw_timeline = self.prepare_new_timeline(
|
||||
timeline_id,
|
||||
&new_metadata,
|
||||
timeline_uninit_mark,
|
||||
pgdata_lsn,
|
||||
None,
|
||||
)?;
|
||||
let raw_timeline = self
|
||||
.prepare_new_timeline(
|
||||
timeline_id,
|
||||
&new_metadata,
|
||||
timeline_uninit_mark,
|
||||
pgdata_lsn,
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let tenant_id = raw_timeline.owning_tenant.tenant_id;
|
||||
let unfinished_timeline = raw_timeline.raw_timeline()?;
|
||||
@@ -2864,7 +2872,7 @@ impl Tenant {
|
||||
/// at 'disk_consistent_lsn'. After any initial data has been imported, call
|
||||
/// `finish_creation` to insert the Timeline into the timelines map and to remove the
|
||||
/// uninit mark file.
|
||||
fn prepare_new_timeline(
|
||||
async fn prepare_new_timeline(
|
||||
&self,
|
||||
new_timeline_id: TimelineId,
|
||||
new_metadata: &TimelineMetadata,
|
||||
@@ -2892,8 +2900,9 @@ impl Tenant {
|
||||
|
||||
timeline_struct.init_empty_layer_map(start_lsn);
|
||||
|
||||
if let Err(e) =
|
||||
self.create_timeline_files(&uninit_mark.timeline_path, &new_timeline_id, new_metadata)
|
||||
if let Err(e) = self
|
||||
.create_timeline_files(&uninit_mark.timeline_path, &new_timeline_id, new_metadata)
|
||||
.await
|
||||
{
|
||||
error!("Failed to create initial files for timeline {tenant_id}/{new_timeline_id}, cleaning up: {e:?}");
|
||||
cleanup_timeline_directory(uninit_mark);
|
||||
@@ -2909,7 +2918,7 @@ impl Tenant {
|
||||
))
|
||||
}
|
||||
|
||||
fn create_timeline_files(
|
||||
async fn create_timeline_files(
|
||||
&self,
|
||||
timeline_path: &Path,
|
||||
new_timeline_id: &TimelineId,
|
||||
@@ -2922,6 +2931,7 @@ impl Tenant {
|
||||
});
|
||||
|
||||
save_metadata(self.conf, &self.tenant_id, new_timeline_id, new_metadata)
|
||||
.await
|
||||
.context("Failed to create timeline metadata")?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -3069,7 +3079,7 @@ pub(crate) enum CreateTenantFilesMode {
|
||||
Attach,
|
||||
}
|
||||
|
||||
pub(crate) fn create_tenant_files(
|
||||
pub(crate) async fn create_tenant_files(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_conf: TenantConfOpt,
|
||||
tenant_id: &TenantId,
|
||||
@@ -3105,7 +3115,8 @@ pub(crate) fn create_tenant_files(
|
||||
mode,
|
||||
&temporary_tenant_dir,
|
||||
&target_tenant_directory,
|
||||
);
|
||||
)
|
||||
.await;
|
||||
|
||||
if creation_result.is_err() {
|
||||
error!("Failed to create directory structure for tenant {tenant_id}, cleaning tmp data");
|
||||
@@ -3123,7 +3134,7 @@ pub(crate) fn create_tenant_files(
|
||||
Ok(target_tenant_directory)
|
||||
}
|
||||
|
||||
fn try_create_target_tenant_dir(
|
||||
async fn try_create_target_tenant_dir(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_conf: TenantConfOpt,
|
||||
tenant_id: &TenantId,
|
||||
@@ -3162,7 +3173,7 @@ fn try_create_target_tenant_dir(
|
||||
)
|
||||
.with_context(|| format!("resolve tenant {tenant_id} temporary config path"))?;
|
||||
|
||||
Tenant::persist_tenant_config(tenant_id, &temporary_tenant_config_path, tenant_conf)?;
|
||||
Tenant::persist_tenant_config(tenant_id, &temporary_tenant_config_path, tenant_conf).await?;
|
||||
|
||||
crashsafe::create_dir(&temporary_tenant_timelines_dir).with_context(|| {
|
||||
format!(
|
||||
@@ -3561,7 +3572,10 @@ mod tests {
|
||||
.create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
|
||||
.await?;
|
||||
|
||||
match tenant.create_empty_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx) {
|
||||
match tenant
|
||||
.create_empty_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
|
||||
.await
|
||||
{
|
||||
Ok(_) => panic!("duplicate timeline creation should fail"),
|
||||
Err(e) => assert_eq!(
|
||||
e.to_string(),
|
||||
@@ -4419,8 +4433,9 @@ mod tests {
|
||||
.await;
|
||||
|
||||
let initdb_lsn = Lsn(0x20);
|
||||
let utline =
|
||||
tenant.create_empty_timeline(TIMELINE_ID, initdb_lsn, DEFAULT_PG_VERSION, &ctx)?;
|
||||
let utline = tenant
|
||||
.create_empty_timeline(TIMELINE_ID, initdb_lsn, DEFAULT_PG_VERSION, &ctx)
|
||||
.await?;
|
||||
let tline = utline.raw_timeline().unwrap();
|
||||
|
||||
// Spawn flush loop now so that we can set the `expect_initdb_optimization`
|
||||
@@ -4485,8 +4500,9 @@ mod tests {
|
||||
let harness = TenantHarness::create(name)?;
|
||||
{
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
let tline =
|
||||
tenant.create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)?;
|
||||
let tline = tenant
|
||||
.create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)
|
||||
.await?;
|
||||
// Keeps uninit mark in place
|
||||
let raw_tline = tline.raw_timeline().unwrap();
|
||||
raw_tline
|
||||
|
||||
@@ -13,7 +13,6 @@ use std::io::{self};
|
||||
use anyhow::{ensure, Context};
|
||||
use serde::{de::Error, Deserialize, Serialize, Serializer};
|
||||
use thiserror::Error;
|
||||
use tracing::info_span;
|
||||
use utils::bin_ser::SerializeError;
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
use utils::{
|
||||
@@ -256,17 +255,18 @@ impl Serialize for TimelineMetadata {
|
||||
}
|
||||
|
||||
/// Save timeline metadata to file
|
||||
pub fn save_metadata(
|
||||
#[tracing::instrument(skip_all, fields(%tenant_id, %timeline_id))]
|
||||
pub async fn save_metadata(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: &TenantId,
|
||||
timeline_id: &TimelineId,
|
||||
data: &TimelineMetadata,
|
||||
) -> anyhow::Result<()> {
|
||||
let _enter = info_span!("saving metadata").entered();
|
||||
let path = conf.metadata_path(tenant_id, timeline_id);
|
||||
let temp_path = path_with_suffix_extension(&path, TEMP_FILE_SUFFIX);
|
||||
let metadata_bytes = data.to_bytes().context("serialize metadata")?;
|
||||
VirtualFile::crashsafe_overwrite(&path, &temp_path, &metadata_bytes)
|
||||
.await
|
||||
.context("write metadata")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -387,11 +387,11 @@ pub async fn create_tenant(
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Arc<Tenant>, TenantMapInsertError> {
|
||||
tenant_map_insert(tenant_id, || {
|
||||
tenant_map_insert(tenant_id, || async {
|
||||
// We're holding the tenants lock in write mode while doing local IO.
|
||||
// If this section ever becomes contentious, introduce a new `TenantState::Creating`
|
||||
// and do the work in that state.
|
||||
let tenant_directory = super::create_tenant_files(conf, tenant_conf, &tenant_id, CreateTenantFilesMode::Create)?;
|
||||
let tenant_directory = super::create_tenant_files(conf, tenant_conf, &tenant_id, CreateTenantFilesMode::Create).await?;
|
||||
// TODO: tenant directory remains on disk if we bail out from here on.
|
||||
// See https://github.com/neondatabase/neon/issues/4233
|
||||
|
||||
@@ -431,6 +431,7 @@ pub async fn set_new_tenant_config(
|
||||
|
||||
let tenant_config_path = conf.tenant_config_path(&tenant_id);
|
||||
Tenant::persist_tenant_config(&tenant_id, &tenant_config_path, new_tenant_conf)
|
||||
.await
|
||||
.map_err(SetNewTenantConfigError::Persist)?;
|
||||
tenant.set_new_tenant_config(new_tenant_conf);
|
||||
Ok(())
|
||||
@@ -551,7 +552,7 @@ pub async fn load_tenant(
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), TenantMapInsertError> {
|
||||
tenant_map_insert(tenant_id, || {
|
||||
tenant_map_insert(tenant_id, || async {
|
||||
let tenant_path = conf.tenant_path(&tenant_id);
|
||||
let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(&tenant_id);
|
||||
if tenant_ignore_mark.exists() {
|
||||
@@ -632,8 +633,8 @@ pub async fn attach_tenant(
|
||||
remote_storage: GenericRemoteStorage,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), TenantMapInsertError> {
|
||||
tenant_map_insert(tenant_id, || {
|
||||
let tenant_dir = create_tenant_files(conf, tenant_conf, &tenant_id, CreateTenantFilesMode::Attach)?;
|
||||
tenant_map_insert(tenant_id, || async {
|
||||
let tenant_dir = create_tenant_files(conf, tenant_conf, &tenant_id, CreateTenantFilesMode::Attach).await?;
|
||||
// TODO: tenant directory remains on disk if we bail out from here on.
|
||||
// See https://github.com/neondatabase/neon/issues/4233
|
||||
|
||||
@@ -681,12 +682,13 @@ pub enum TenantMapInsertError {
|
||||
///
|
||||
/// NB: the closure should return quickly because the current implementation of tenants map
|
||||
/// serializes access through an `RwLock`.
|
||||
async fn tenant_map_insert<F>(
|
||||
async fn tenant_map_insert<F, R>(
|
||||
tenant_id: TenantId,
|
||||
insert_fn: F,
|
||||
) -> Result<Arc<Tenant>, TenantMapInsertError>
|
||||
where
|
||||
F: FnOnce() -> anyhow::Result<Arc<Tenant>>,
|
||||
F: FnOnce() -> R,
|
||||
R: std::future::Future<Output = anyhow::Result<Arc<Tenant>>>,
|
||||
{
|
||||
let mut guard = TENANTS.write().await;
|
||||
let m = match &mut *guard {
|
||||
@@ -699,7 +701,7 @@ where
|
||||
tenant_id,
|
||||
e.get().current_state(),
|
||||
)),
|
||||
hash_map::Entry::Vacant(v) => match insert_fn() {
|
||||
hash_map::Entry::Vacant(v) => match insert_fn().await {
|
||||
Ok(tenant) => {
|
||||
v.insert(tenant.clone());
|
||||
Ok(tenant)
|
||||
|
||||
@@ -2778,6 +2778,7 @@ impl Timeline {
|
||||
if disk_consistent_lsn != old_disk_consistent_lsn {
|
||||
assert!(disk_consistent_lsn > old_disk_consistent_lsn);
|
||||
self.update_metadata_file(disk_consistent_lsn, layer_paths_to_upload)
|
||||
.await
|
||||
.context("update_metadata_file")?;
|
||||
// Also update the in-memory copy
|
||||
self.disk_consistent_lsn.store(disk_consistent_lsn);
|
||||
@@ -2786,7 +2787,7 @@ impl Timeline {
|
||||
}
|
||||
|
||||
/// Update metadata file
|
||||
fn update_metadata_file(
|
||||
async fn update_metadata_file(
|
||||
&self,
|
||||
disk_consistent_lsn: Lsn,
|
||||
layer_paths_to_upload: HashMap<LayerFileName, LayerFileMetadata>,
|
||||
@@ -2828,6 +2829,7 @@ impl Timeline {
|
||||
));
|
||||
|
||||
save_metadata(self.conf, &self.tenant_id, &self.timeline_id, &metadata)
|
||||
.await
|
||||
.context("save_metadata")?;
|
||||
|
||||
if let Some(remote_client) = &self.remote_client {
|
||||
@@ -4159,7 +4161,8 @@ impl Timeline {
|
||||
if !layers_to_remove.is_empty() {
|
||||
// Persist the new GC cutoff value in the metadata file, before
|
||||
// we actually remove anything.
|
||||
self.update_metadata_file(self.disk_consistent_lsn.load(), HashMap::new())?;
|
||||
self.update_metadata_file(self.disk_consistent_lsn.load(), HashMap::new())
|
||||
.await?;
|
||||
|
||||
// Actually delete the layers from disk and remove them from the map.
|
||||
// (couldn't do this in the loop above, because you cannot modify a collection
|
||||
|
||||
@@ -271,7 +271,13 @@ impl VirtualFile {
|
||||
Ok(vfile)
|
||||
}
|
||||
|
||||
pub fn crashsafe_overwrite(
|
||||
/// Writes a file to the specified `final_path` in a crash safe fasion
|
||||
///
|
||||
/// The file is first written to the specified tmp_path, and in a second
|
||||
/// step, the tmp path is renamed to the final path. As renames are
|
||||
/// atomic, a crash during the write operation will never leave behind a
|
||||
/// partially written file.
|
||||
pub async fn crashsafe_overwrite(
|
||||
final_path: &Path,
|
||||
tmp_path: &Path,
|
||||
content: &[u8],
|
||||
|
||||
Reference in New Issue
Block a user