diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml index 1fbca1086f..a49eef8bb9 100644 --- a/pageserver/src/http/openapi_spec.yml +++ b/pageserver/src/http/openapi_spec.yml @@ -877,6 +877,56 @@ paths: schema: $ref: "#/components/schemas/ServiceUnavailableError" + /v1/tenant/{tenant_id}/{timeline_id}/preserve_initdb_archive: + parameters: + - name: tenant_id + in: path + required: true + schema: + type: string + - name: timeline_id + in: path + required: true + schema: + type: string + post: + description: | + Marks the initdb archive for preservation upon deletion of the timeline or tenant. + This is meant to be part of the disaster recovery process. + responses: + "202": + description: Tenant scheduled to load successfully + "404": + description: No tenant or timeline found for the specified ids + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + "401": + description: Unauthorized Error + content: + application/json: + schema: + $ref: "#/components/schemas/UnauthorizedError" + "403": + description: Forbidden Error + content: + application/json: + schema: + $ref: "#/components/schemas/ForbiddenError" + "500": + description: Generic operation error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + "503": + description: Temporarily unavailable, please retry. + content: + application/json: + schema: + $ref: "#/components/schemas/ServiceUnavailableError" + /v1/tenant/{tenant_id}/synthetic_size: parameters: diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 811232397c..5e09a5aa1a 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -561,6 +561,43 @@ async fn timeline_list_handler( json_response(StatusCode::OK, response_data) } +async fn timeline_preserve_initdb_handler( + request: Request, + _cancel: CancellationToken, +) -> Result, ApiError> { + let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?; + let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; + check_permission(&request, Some(tenant_shard_id.tenant_id))?; + + // Part of the process for disaster recovery from safekeeper-stored WAL: + // If we don't recover into a new timeline but want to keep the timeline ID, + // then the initdb archive is deleted. This endpoint copies it to a different + // location where timeline recreation cand find it. + + async { + let tenant = mgr::get_tenant(tenant_shard_id, true)?; + + let timeline = tenant + .get_timeline(timeline_id, false) + .map_err(|e| ApiError::NotFound(e.into()))?; + + timeline + .preserve_initdb_archive() + .await + .context("preserving initdb archive") + .map_err(ApiError::InternalServerError)?; + + Ok::<_, ApiError>(()) + } + .instrument(info_span!("timeline_preserve_initdb_archive", + tenant_id = %tenant_shard_id.tenant_id, + shard_id = %tenant_shard_id.shard_slug(), + %timeline_id)) + .await?; + + json_response(StatusCode::OK, ()) +} + async fn timeline_detail_handler( request: Request, _cancel: CancellationToken, @@ -1943,6 +1980,10 @@ pub fn make_router( .post("/v1/tenant/:tenant_id/ignore", |r| { api_handler(r, tenant_ignore_handler) }) + .post( + "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/preserve_initdb_archive", + |r| api_handler(r, timeline_preserve_initdb_handler), + ) .get("/v1/tenant/:tenant_shard_id/timeline/:timeline_id", |r| { api_handler(r, timeline_detail_handler) }) diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 1b5f861c90..80ff5c9a2d 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -257,6 +257,8 @@ pub(crate) const FAILED_UPLOAD_WARN_THRESHOLD: u32 = 3; pub(crate) const INITDB_PATH: &str = "initdb.tar.zst"; +pub(crate) const INITDB_PRESERVED_PATH: &str = "initdb-preserved.tar.zst"; + /// Default buffer size when interfacing with [`tokio::fs::File`]. pub(crate) const BUFFER_SIZE: usize = 32 * 1024; @@ -1066,6 +1068,28 @@ impl RemoteTimelineClient { Ok(()) } + pub(crate) async fn preserve_initdb_archive( + self: &Arc, + tenant_id: &TenantId, + timeline_id: &TimelineId, + cancel: &CancellationToken, + ) -> anyhow::Result<()> { + backoff::retry( + || async { + upload::preserve_initdb_archive(&self.storage_impl, tenant_id, timeline_id, cancel) + .await + }, + |_e| false, + FAILED_DOWNLOAD_WARN_THRESHOLD, + FAILED_REMOTE_OP_RETRIES, + "preserve_initdb_tar_zst", + backoff::Cancel::new(cancel.clone(), || anyhow::anyhow!("Cancelled!")), + ) + .await + .context("backing up initdb archive")?; + Ok(()) + } + /// Prerequisites: UploadQueue should be in stopped state and deleted_at should be successfuly set. /// The function deletes layer files one by one, then lists the prefix to see if we leaked something /// deletes leaked files if any and proceeds with deletion of index file at the end. @@ -1101,6 +1125,14 @@ impl RemoteTimelineClient { let layer_deletion_count = layers.len(); self.deletion_queue_client.push_immediate(layers).await?; + // Delete the initdb.tar.zst, which is not always present, but deletion attempts of + // inexistant objects are not considered errors. + let initdb_path = + remote_initdb_archive_path(&self.tenant_shard_id.tenant_id, &self.timeline_id); + self.deletion_queue_client + .push_immediate(vec![initdb_path]) + .await?; + // Do not delete index part yet, it is needed for possible retry. If we remove it first // and retry will arrive to different pageserver there wont be any traces of it on remote storage let timeline_storage_path = remote_timeline_path(&self.tenant_shard_id, &self.timeline_id); @@ -1148,10 +1180,8 @@ impl RemoteTimelineClient { if p == &latest_index { return false; } - if let Some(name) = p.object_name() { - if name == INITDB_PATH { - return false; - } + if p.object_name() == Some(INITDB_PRESERVED_PATH) { + return false; } true }) @@ -1724,6 +1754,16 @@ pub fn remote_initdb_archive_path(tenant_id: &TenantId, timeline_id: &TimelineId .expect("Failed to construct path") } +pub fn remote_initdb_preserved_archive_path( + tenant_id: &TenantId, + timeline_id: &TimelineId, +) -> RemotePath { + RemotePath::from_string(&format!( + "tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{INITDB_PRESERVED_PATH}" + )) + .expect("Failed to construct path") +} + pub fn remote_index_path( tenant_shard_id: &TenantShardId, timeline_id: &TimelineId, diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs index d3956163c8..4309c683e2 100644 --- a/pageserver/src/tenant/remote_timeline_client/download.rs +++ b/pageserver/src/tenant/remote_timeline_client/download.rs @@ -32,7 +32,8 @@ use utils::id::TimelineId; use super::index::{IndexPart, LayerFileMetadata}; use super::{ parse_remote_index_path, remote_index_path, remote_initdb_archive_path, - FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES, INITDB_PATH, + remote_initdb_preserved_archive_path, FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES, + INITDB_PATH, }; /// @@ -430,6 +431,9 @@ pub(crate) async fn download_initdb_tar_zst( let remote_path = remote_initdb_archive_path(&tenant_shard_id.tenant_id, timeline_id); + let remote_preserved_path = + remote_initdb_preserved_archive_path(&tenant_shard_id.tenant_id, timeline_id); + let timeline_path = conf.timelines_path(tenant_shard_id); if !timeline_path.exists() { @@ -456,8 +460,16 @@ pub(crate) async fn download_initdb_tar_zst( .with_context(|| format!("tempfile creation {temp_path}")) .map_err(DownloadError::Other)?; - let download = - download_cancellable(&cancel_inner, storage.download(&remote_path)).await?; + let download = match download_cancellable(&cancel_inner, storage.download(&remote_path)) + .await + { + Ok(dl) => dl, + Err(DownloadError::NotFound) => { + download_cancellable(&cancel_inner, storage.download(&remote_preserved_path)) + .await? + } + Err(other) => Err(other)?, + }; let mut download = tokio_util::io::StreamReader::new(download.download_stream); let mut writer = tokio::io::BufWriter::with_capacity(8 * 1024, file); diff --git a/pageserver/src/tenant/remote_timeline_client/upload.rs b/pageserver/src/tenant/remote_timeline_client/upload.rs index 11c6956875..58d95f75c2 100644 --- a/pageserver/src/tenant/remote_timeline_client/upload.rs +++ b/pageserver/src/tenant/remote_timeline_client/upload.rs @@ -13,8 +13,8 @@ use super::Generation; use crate::{ config::PageServerConf, tenant::remote_timeline_client::{ - index::IndexPart, remote_index_path, remote_initdb_archive_path, remote_path, - upload_cancellable, + index::IndexPart, remote_index_path, remote_initdb_archive_path, + remote_initdb_preserved_archive_path, remote_path, upload_cancellable, }, }; use remote_storage::GenericRemoteStorage; @@ -144,3 +144,16 @@ pub(crate) async fn upload_initdb_dir( .await .with_context(|| format!("upload initdb dir for '{tenant_id} / {timeline_id}'")) } + +pub(crate) async fn preserve_initdb_archive( + storage: &GenericRemoteStorage, + tenant_id: &TenantId, + timeline_id: &TimelineId, + cancel: &CancellationToken, +) -> anyhow::Result<()> { + let source_path = remote_initdb_archive_path(tenant_id, timeline_id); + let dest_path = remote_initdb_preserved_archive_path(tenant_id, timeline_id); + upload_cancellable(cancel, storage.copy_object(&source_path, &dest_path)) + .await + .with_context(|| format!("backing up initdb archive for '{tenant_id} / {timeline_id}'")) +} diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index bac9bf6573..603ae3b83a 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -2973,6 +2973,21 @@ impl Timeline { Ok(()) } + pub(crate) async fn preserve_initdb_archive(&self) -> anyhow::Result<()> { + if let Some(remote_client) = &self.remote_client { + remote_client + .preserve_initdb_archive( + &self.tenant_shard_id.tenant_id, + &self.timeline_id, + &self.cancel, + ) + .await?; + } else { + bail!("No remote storage configured, but was asked to backup the initdb archive for {} / {}", self.tenant_shard_id.tenant_id, self.timeline_id); + } + Ok(()) + } + // Write out the given frozen in-memory layer as a new L0 delta file. This L0 file will not be tracked // in layer map immediately. The caller is responsible to put it into the layer map. async fn create_delta_layer( diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index cfa2a2674d..ddf83b56a0 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -526,6 +526,17 @@ class PageserverHttpClient(requests.Session): res_json = res.json() assert res_json is None + def timeline_preserve_initdb_archive( + self, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId + ): + log.info( + f"Requesting initdb archive preservation for tenant {tenant_id} and timeline {timeline_id}" + ) + res = self.post( + f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/preserve_initdb_archive", + ) + self.verbose_error(res) + def timeline_get_lsn_by_timestamp( self, tenant_id: Union[TenantId, TenantShardId], diff --git a/test_runner/regress/test_compatibility.py b/test_runner/regress/test_compatibility.py index f9d6d0a934..1a1425f069 100644 --- a/test_runner/regress/test_compatibility.py +++ b/test_runner/regress/test_compatibility.py @@ -7,11 +7,13 @@ from typing import List, Optional import pytest import toml +from fixtures.log_helper import log from fixtures.neon_fixtures import ( NeonEnv, NeonEnvBuilder, PgBin, ) +from fixtures.pageserver.http import PageserverApiException from fixtures.pageserver.utils import ( timeline_delete_wait_completed, wait_for_last_record_lsn, @@ -269,14 +271,20 @@ def check_neon_works(env: NeonEnv, test_output_dir: Path, sql_dump_path: Path, r timeline_id = env.initial_timeline pg_version = env.pg_version - # Delete all files from local_fs_remote_storage except initdb.tar.zst, + try: + pageserver_http.timeline_preserve_initdb_archive(tenant_id, timeline_id) + except PageserverApiException as e: + # Allow the error as we might be running the old pageserver binary + log.info(f"Got allowed error: '{e}'") + + # Delete all files from local_fs_remote_storage except initdb-preserved.tar.zst, # the file is required for `timeline_create` with `existing_initdb_timeline_id`. # # TODO: switch to Path.walk() in Python 3.12 # for dirpath, _dirnames, filenames in (repo_dir / "local_fs_remote_storage").walk(): for dirpath, _dirnames, filenames in os.walk(repo_dir / "local_fs_remote_storage"): for filename in filenames: - if filename != "initdb.tar.zst": + if filename != "initdb-preserved.tar.zst" and filename != "initdb.tar.zst": (Path(dirpath) / filename).unlink() timeline_delete_wait_completed(pageserver_http, tenant_id, timeline_id) diff --git a/test_runner/regress/test_wal_restore.py b/test_runner/regress/test_wal_restore.py index 7d03f644d1..97db857c74 100644 --- a/test_runner/regress/test_wal_restore.py +++ b/test_runner/regress/test_wal_restore.py @@ -137,6 +137,9 @@ def test_wal_restore_http(neon_env_builder: NeonEnvBuilder): ps_client = env.pageserver.http_client() + # Mark the initdb archive for preservation + ps_client.timeline_preserve_initdb_archive(tenant_id, timeline_id) + # shut down the endpoint and delete the timeline from the pageserver endpoint.stop()