diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml
index 1fbca1086f..a49eef8bb9 100644
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -877,6 +877,56 @@ paths:
schema:
$ref: "#/components/schemas/ServiceUnavailableError"
+ /v1/tenant/{tenant_id}/{timeline_id}/preserve_initdb_archive:
+ parameters:
+ - name: tenant_id
+ in: path
+ required: true
+ schema:
+ type: string
+ - name: timeline_id
+ in: path
+ required: true
+ schema:
+ type: string
+ post:
+ description: |
+ Marks the initdb archive for preservation upon deletion of the timeline or tenant.
+ This is meant to be part of the disaster recovery process.
+ responses:
+ "202":
+ description: Tenant scheduled to load successfully
+ "404":
+ description: No tenant or timeline found for the specified ids
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ "401":
+ description: Unauthorized Error
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/UnauthorizedError"
+ "403":
+ description: Forbidden Error
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ForbiddenError"
+ "500":
+ description: Generic operation error
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ "503":
+ description: Temporarily unavailable, please retry.
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ServiceUnavailableError"
+
/v1/tenant/{tenant_id}/synthetic_size:
parameters:
diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs
index 811232397c..5e09a5aa1a 100644
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -561,6 +561,43 @@ async fn timeline_list_handler(
json_response(StatusCode::OK, response_data)
}
+async fn timeline_preserve_initdb_handler(
+ request: Request
,
+ _cancel: CancellationToken,
+) -> Result, ApiError> {
+ let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+ let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
+ check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+
+ // Part of the process for disaster recovery from safekeeper-stored WAL:
+ // If we don't recover into a new timeline but want to keep the timeline ID,
+ // then the initdb archive is deleted. This endpoint copies it to a different
+ // location where timeline recreation cand find it.
+
+ async {
+ let tenant = mgr::get_tenant(tenant_shard_id, true)?;
+
+ let timeline = tenant
+ .get_timeline(timeline_id, false)
+ .map_err(|e| ApiError::NotFound(e.into()))?;
+
+ timeline
+ .preserve_initdb_archive()
+ .await
+ .context("preserving initdb archive")
+ .map_err(ApiError::InternalServerError)?;
+
+ Ok::<_, ApiError>(())
+ }
+ .instrument(info_span!("timeline_preserve_initdb_archive",
+ tenant_id = %tenant_shard_id.tenant_id,
+ shard_id = %tenant_shard_id.shard_slug(),
+ %timeline_id))
+ .await?;
+
+ json_response(StatusCode::OK, ())
+}
+
async fn timeline_detail_handler(
request: Request,
_cancel: CancellationToken,
@@ -1943,6 +1980,10 @@ pub fn make_router(
.post("/v1/tenant/:tenant_id/ignore", |r| {
api_handler(r, tenant_ignore_handler)
})
+ .post(
+ "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/preserve_initdb_archive",
+ |r| api_handler(r, timeline_preserve_initdb_handler),
+ )
.get("/v1/tenant/:tenant_shard_id/timeline/:timeline_id", |r| {
api_handler(r, timeline_detail_handler)
})
diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs
index 1b5f861c90..80ff5c9a2d 100644
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -257,6 +257,8 @@ pub(crate) const FAILED_UPLOAD_WARN_THRESHOLD: u32 = 3;
pub(crate) const INITDB_PATH: &str = "initdb.tar.zst";
+pub(crate) const INITDB_PRESERVED_PATH: &str = "initdb-preserved.tar.zst";
+
/// Default buffer size when interfacing with [`tokio::fs::File`].
pub(crate) const BUFFER_SIZE: usize = 32 * 1024;
@@ -1066,6 +1068,28 @@ impl RemoteTimelineClient {
Ok(())
}
+ pub(crate) async fn preserve_initdb_archive(
+ self: &Arc,
+ tenant_id: &TenantId,
+ timeline_id: &TimelineId,
+ cancel: &CancellationToken,
+ ) -> anyhow::Result<()> {
+ backoff::retry(
+ || async {
+ upload::preserve_initdb_archive(&self.storage_impl, tenant_id, timeline_id, cancel)
+ .await
+ },
+ |_e| false,
+ FAILED_DOWNLOAD_WARN_THRESHOLD,
+ FAILED_REMOTE_OP_RETRIES,
+ "preserve_initdb_tar_zst",
+ backoff::Cancel::new(cancel.clone(), || anyhow::anyhow!("Cancelled!")),
+ )
+ .await
+ .context("backing up initdb archive")?;
+ Ok(())
+ }
+
/// Prerequisites: UploadQueue should be in stopped state and deleted_at should be successfuly set.
/// The function deletes layer files one by one, then lists the prefix to see if we leaked something
/// deletes leaked files if any and proceeds with deletion of index file at the end.
@@ -1101,6 +1125,14 @@ impl RemoteTimelineClient {
let layer_deletion_count = layers.len();
self.deletion_queue_client.push_immediate(layers).await?;
+ // Delete the initdb.tar.zst, which is not always present, but deletion attempts of
+ // inexistant objects are not considered errors.
+ let initdb_path =
+ remote_initdb_archive_path(&self.tenant_shard_id.tenant_id, &self.timeline_id);
+ self.deletion_queue_client
+ .push_immediate(vec![initdb_path])
+ .await?;
+
// Do not delete index part yet, it is needed for possible retry. If we remove it first
// and retry will arrive to different pageserver there wont be any traces of it on remote storage
let timeline_storage_path = remote_timeline_path(&self.tenant_shard_id, &self.timeline_id);
@@ -1148,10 +1180,8 @@ impl RemoteTimelineClient {
if p == &latest_index {
return false;
}
- if let Some(name) = p.object_name() {
- if name == INITDB_PATH {
- return false;
- }
+ if p.object_name() == Some(INITDB_PRESERVED_PATH) {
+ return false;
}
true
})
@@ -1724,6 +1754,16 @@ pub fn remote_initdb_archive_path(tenant_id: &TenantId, timeline_id: &TimelineId
.expect("Failed to construct path")
}
+pub fn remote_initdb_preserved_archive_path(
+ tenant_id: &TenantId,
+ timeline_id: &TimelineId,
+) -> RemotePath {
+ RemotePath::from_string(&format!(
+ "tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{INITDB_PRESERVED_PATH}"
+ ))
+ .expect("Failed to construct path")
+}
+
pub fn remote_index_path(
tenant_shard_id: &TenantShardId,
timeline_id: &TimelineId,
diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs
index d3956163c8..4309c683e2 100644
--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -32,7 +32,8 @@ use utils::id::TimelineId;
use super::index::{IndexPart, LayerFileMetadata};
use super::{
parse_remote_index_path, remote_index_path, remote_initdb_archive_path,
- FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES, INITDB_PATH,
+ remote_initdb_preserved_archive_path, FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES,
+ INITDB_PATH,
};
///
@@ -430,6 +431,9 @@ pub(crate) async fn download_initdb_tar_zst(
let remote_path = remote_initdb_archive_path(&tenant_shard_id.tenant_id, timeline_id);
+ let remote_preserved_path =
+ remote_initdb_preserved_archive_path(&tenant_shard_id.tenant_id, timeline_id);
+
let timeline_path = conf.timelines_path(tenant_shard_id);
if !timeline_path.exists() {
@@ -456,8 +460,16 @@ pub(crate) async fn download_initdb_tar_zst(
.with_context(|| format!("tempfile creation {temp_path}"))
.map_err(DownloadError::Other)?;
- let download =
- download_cancellable(&cancel_inner, storage.download(&remote_path)).await?;
+ let download = match download_cancellable(&cancel_inner, storage.download(&remote_path))
+ .await
+ {
+ Ok(dl) => dl,
+ Err(DownloadError::NotFound) => {
+ download_cancellable(&cancel_inner, storage.download(&remote_preserved_path))
+ .await?
+ }
+ Err(other) => Err(other)?,
+ };
let mut download = tokio_util::io::StreamReader::new(download.download_stream);
let mut writer = tokio::io::BufWriter::with_capacity(8 * 1024, file);
diff --git a/pageserver/src/tenant/remote_timeline_client/upload.rs b/pageserver/src/tenant/remote_timeline_client/upload.rs
index 11c6956875..58d95f75c2 100644
--- a/pageserver/src/tenant/remote_timeline_client/upload.rs
+++ b/pageserver/src/tenant/remote_timeline_client/upload.rs
@@ -13,8 +13,8 @@ use super::Generation;
use crate::{
config::PageServerConf,
tenant::remote_timeline_client::{
- index::IndexPart, remote_index_path, remote_initdb_archive_path, remote_path,
- upload_cancellable,
+ index::IndexPart, remote_index_path, remote_initdb_archive_path,
+ remote_initdb_preserved_archive_path, remote_path, upload_cancellable,
},
};
use remote_storage::GenericRemoteStorage;
@@ -144,3 +144,16 @@ pub(crate) async fn upload_initdb_dir(
.await
.with_context(|| format!("upload initdb dir for '{tenant_id} / {timeline_id}'"))
}
+
+pub(crate) async fn preserve_initdb_archive(
+ storage: &GenericRemoteStorage,
+ tenant_id: &TenantId,
+ timeline_id: &TimelineId,
+ cancel: &CancellationToken,
+) -> anyhow::Result<()> {
+ let source_path = remote_initdb_archive_path(tenant_id, timeline_id);
+ let dest_path = remote_initdb_preserved_archive_path(tenant_id, timeline_id);
+ upload_cancellable(cancel, storage.copy_object(&source_path, &dest_path))
+ .await
+ .with_context(|| format!("backing up initdb archive for '{tenant_id} / {timeline_id}'"))
+}
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index bac9bf6573..603ae3b83a 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -2973,6 +2973,21 @@ impl Timeline {
Ok(())
}
+ pub(crate) async fn preserve_initdb_archive(&self) -> anyhow::Result<()> {
+ if let Some(remote_client) = &self.remote_client {
+ remote_client
+ .preserve_initdb_archive(
+ &self.tenant_shard_id.tenant_id,
+ &self.timeline_id,
+ &self.cancel,
+ )
+ .await?;
+ } else {
+ bail!("No remote storage configured, but was asked to backup the initdb archive for {} / {}", self.tenant_shard_id.tenant_id, self.timeline_id);
+ }
+ Ok(())
+ }
+
// Write out the given frozen in-memory layer as a new L0 delta file. This L0 file will not be tracked
// in layer map immediately. The caller is responsible to put it into the layer map.
async fn create_delta_layer(
diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py
index cfa2a2674d..ddf83b56a0 100644
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -526,6 +526,17 @@ class PageserverHttpClient(requests.Session):
res_json = res.json()
assert res_json is None
+ def timeline_preserve_initdb_archive(
+ self, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId
+ ):
+ log.info(
+ f"Requesting initdb archive preservation for tenant {tenant_id} and timeline {timeline_id}"
+ )
+ res = self.post(
+ f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/preserve_initdb_archive",
+ )
+ self.verbose_error(res)
+
def timeline_get_lsn_by_timestamp(
self,
tenant_id: Union[TenantId, TenantShardId],
diff --git a/test_runner/regress/test_compatibility.py b/test_runner/regress/test_compatibility.py
index f9d6d0a934..1a1425f069 100644
--- a/test_runner/regress/test_compatibility.py
+++ b/test_runner/regress/test_compatibility.py
@@ -7,11 +7,13 @@ from typing import List, Optional
import pytest
import toml
+from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnv,
NeonEnvBuilder,
PgBin,
)
+from fixtures.pageserver.http import PageserverApiException
from fixtures.pageserver.utils import (
timeline_delete_wait_completed,
wait_for_last_record_lsn,
@@ -269,14 +271,20 @@ def check_neon_works(env: NeonEnv, test_output_dir: Path, sql_dump_path: Path, r
timeline_id = env.initial_timeline
pg_version = env.pg_version
- # Delete all files from local_fs_remote_storage except initdb.tar.zst,
+ try:
+ pageserver_http.timeline_preserve_initdb_archive(tenant_id, timeline_id)
+ except PageserverApiException as e:
+ # Allow the error as we might be running the old pageserver binary
+ log.info(f"Got allowed error: '{e}'")
+
+ # Delete all files from local_fs_remote_storage except initdb-preserved.tar.zst,
# the file is required for `timeline_create` with `existing_initdb_timeline_id`.
#
# TODO: switch to Path.walk() in Python 3.12
# for dirpath, _dirnames, filenames in (repo_dir / "local_fs_remote_storage").walk():
for dirpath, _dirnames, filenames in os.walk(repo_dir / "local_fs_remote_storage"):
for filename in filenames:
- if filename != "initdb.tar.zst":
+ if filename != "initdb-preserved.tar.zst" and filename != "initdb.tar.zst":
(Path(dirpath) / filename).unlink()
timeline_delete_wait_completed(pageserver_http, tenant_id, timeline_id)
diff --git a/test_runner/regress/test_wal_restore.py b/test_runner/regress/test_wal_restore.py
index 7d03f644d1..97db857c74 100644
--- a/test_runner/regress/test_wal_restore.py
+++ b/test_runner/regress/test_wal_restore.py
@@ -137,6 +137,9 @@ def test_wal_restore_http(neon_env_builder: NeonEnvBuilder):
ps_client = env.pageserver.http_client()
+ # Mark the initdb archive for preservation
+ ps_client.timeline_preserve_initdb_archive(tenant_id, timeline_id)
+
# shut down the endpoint and delete the timeline from the pageserver
endpoint.stop()