pageserver: always detach before deleting (#8082)

In #7957 we enabled deletion without attachment, but retained the
old-style deletion (return 202, delete in background) for attached
tenants. In this PR, we remove the old-style deletion path, such that if
the tenant delete API is invoked while a tenant is detached, it is
simply detached before completing the deletion.

This intentionally doesn't rip out all the old deletion code: in case a
deletion was in progress at time of upgrade, we keep around the code for
finishing it for one release cycle. The rest of the code removal happens
in https://github.com/neondatabase/neon/pull/8091

Now that deletion will always be via the new path, the new path is also
updated to use some retries around remote storage operations, to
tripping up the control plane with 500s if S3 has an intermittent issue.
This commit is contained in:
John Spray
2024-06-21 15:39:19 +01:00
committed by GitHub
parent f45cf28247
commit 15728be0e1
14 changed files with 123 additions and 865 deletions

View File

@@ -78,29 +78,14 @@ paths:
delete:
description: |
Attempts to delete specified tenant. 500, 503 and 409 errors should be retried until 404 is retrieved.
404 means that deletion successfully finished"
Attempts to delete specified tenant. 500, 503 and 409 errors should be retried. Deleting
a non-existent tenant is considered successful (returns 200).
responses:
"200":
description: Tenant was successfully deleted, or was already not found.
"404":
description: Tenant not found. This is a success result, equivalent to 200.
content:
application/json:
schema:
$ref: "#/components/schemas/NotFoundError"
"409":
description: Deletion is already in progress, continue polling
content:
application/json:
schema:
$ref: "#/components/schemas/ConflictError"
"412":
description: Deletion may not proceed, tenant is not in Active state
content:
application/json:
schema:
$ref: "#/components/schemas/PreconditionFailedError"
"503":
description: Service is unavailable, or tenant is already being modified (perhaps concurrently deleted)
/v1/tenant/{tenant_id}/time_travel_remote_storage:
parameters:

View File

@@ -334,13 +334,10 @@ impl From<crate::tenant::delete::DeleteTenantError> for ApiError {
use crate::tenant::delete::DeleteTenantError::*;
match value {
Get(g) => ApiError::from(g),
e @ AlreadyInProgress => ApiError::Conflict(e.to_string()),
Timeline(t) => ApiError::from(t),
NotAttached => ApiError::NotFound(anyhow::anyhow!("Tenant is not attached").into()),
SlotError(e) => e.into(),
SlotUpsertError(e) => e.into(),
Other(o) => ApiError::InternalServerError(o),
e @ InvalidState(_) => ApiError::PreconditionFailed(e.to_string().into_boxed_str()),
Cancelled => ApiError::ShuttingDown,
}
}
@@ -1015,23 +1012,16 @@ async fn tenant_delete_handler(
let state = get_state(&request);
let status = state
state
.tenant_manager
.delete_tenant(tenant_shard_id, ACTIVE_TENANT_TIMEOUT)
.delete_tenant(tenant_shard_id)
.instrument(info_span!("tenant_delete_handler",
tenant_id = %tenant_shard_id.tenant_id,
shard_id = %tenant_shard_id.shard_slug()
))
.await?;
// Callers use 404 as success for deletions, for historical reasons.
if status == StatusCode::NOT_FOUND {
return Err(ApiError::NotFound(
anyhow::anyhow!("Deletion complete").into(),
));
}
json_response(status, ())
json_response(StatusCode::OK, ())
}
/// HTTP endpoint to query the current tenant_size of a tenant.

View File

@@ -6,25 +6,23 @@ use pageserver_api::{models::TenantState, shard::TenantShardId};
use remote_storage::{GenericRemoteStorage, RemotePath, TimeoutOrCancel};
use tokio::sync::OwnedMutexGuard;
use tokio_util::sync::CancellationToken;
use tracing::{error, instrument, Instrument};
use tracing::{error, Instrument};
use utils::{backoff, completion, crashsafe, fs_ext, id::TimelineId, pausable_failpoint};
use crate::{
config::PageServerConf,
context::RequestContext,
task_mgr::{self, TaskKind},
task_mgr::{self},
tenant::{
mgr::{TenantSlot, TenantsMapRemoveResult},
remote_timeline_client::remote_heatmap_path,
timeline::ShutdownMode,
},
};
use super::{
mgr::{GetTenantError, TenantSlotError, TenantSlotUpsertError, TenantsMap},
remote_timeline_client::{FAILED_REMOTE_OP_RETRIES, FAILED_UPLOAD_WARN_THRESHOLD},
span,
timeline::delete::DeleteTimelineFlow,
tree_sort_timelines, DeleteTimelineError, Tenant, TenantPreload,
};
@@ -34,15 +32,6 @@ pub(crate) enum DeleteTenantError {
#[error("GetTenant {0}")]
Get(#[from] GetTenantError),
#[error("Tenant not attached")]
NotAttached,
#[error("Invalid state {0}. Expected Active or Broken")]
InvalidState(TenantState),
#[error("Tenant deletion is already in progress")]
AlreadyInProgress,
#[error("Tenant map slot error {0}")]
SlotError(#[from] TenantSlotError),
@@ -74,56 +63,6 @@ fn remote_tenant_delete_mark_path(
Ok(tenant_remote_path.join(Utf8Path::new("timelines/deleted")))
}
async fn create_remote_delete_mark(
conf: &PageServerConf,
remote_storage: &GenericRemoteStorage,
tenant_shard_id: &TenantShardId,
cancel: &CancellationToken,
) -> Result<(), DeleteTenantError> {
let remote_mark_path = remote_tenant_delete_mark_path(conf, tenant_shard_id)?;
let data: &[u8] = &[];
backoff::retry(
|| async {
let data = bytes::Bytes::from_static(data);
let stream = futures::stream::once(futures::future::ready(Ok(data)));
remote_storage
.upload(stream, 0, &remote_mark_path, None, cancel)
.await
},
TimeoutOrCancel::caused_by_cancel,
FAILED_UPLOAD_WARN_THRESHOLD,
FAILED_REMOTE_OP_RETRIES,
"mark_upload",
cancel,
)
.await
.ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))
.and_then(|x| x)
.context("mark_upload")?;
Ok(())
}
async fn create_local_delete_mark(
conf: &PageServerConf,
tenant_shard_id: &TenantShardId,
) -> Result<(), DeleteTenantError> {
let marker_path = conf.tenant_deleted_mark_file_path(tenant_shard_id);
// Note: we're ok to replace existing file.
let _ = std::fs::OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(&marker_path)
.with_context(|| format!("could not create delete marker file {marker_path:?}"))?;
crashsafe::fsync_file_and_parent(&marker_path).context("sync_mark")?;
Ok(())
}
async fn schedule_ordered_timeline_deletions(
tenant: &Arc<Tenant>,
) -> Result<Vec<(Arc<tokio::sync::Mutex<DeleteTimelineFlow>>, TimelineId)>, DeleteTenantError> {
@@ -262,21 +201,6 @@ async fn cleanup_remaining_fs_traces(
Ok(())
}
/// Orchestrates tenant shut down of all tasks, removes its in-memory structures,
/// and deletes its data from both disk and s3.
/// The sequence of steps:
/// 1. Upload remote deletion mark.
/// 2. Create local mark file.
/// 3. Shutdown tasks
/// 4. Run ordered timeline deletions
/// 5. Wait for timeline deletion operations that were scheduled before tenant deletion was requested
/// 6. Remove remote mark
/// 7. Cleanup remaining fs traces, tenant dir, config, timelines dir, local delete mark
/// It is resumable from any step in case a crash/restart occurs.
/// There are two entrypoints to the process:
/// 1. [`DeleteTenantFlow::run`] this is the main one called by a management api handler.
/// 2. [`DeleteTenantFlow::resume_from_attach`] is called when deletion is resumed tenant is found to be deleted during attach process.
/// Note the only other place that messes around timeline delete mark is the `Tenant::spawn_load` function.
#[derive(Default)]
pub enum DeleteTenantFlow {
#[default]
@@ -286,91 +210,6 @@ pub enum DeleteTenantFlow {
}
impl DeleteTenantFlow {
// These steps are run in the context of management api request handler.
// Long running steps are continued to run in the background.
// NB: If this fails half-way through, and is retried, the retry will go through
// all the same steps again. Make sure the code here is idempotent, and don't
// error out if some of the shutdown tasks have already been completed!
// NOTE: static needed for background part.
// We assume that calling code sets up the span with tenant_id.
#[instrument(skip_all)]
pub(crate) async fn run(
conf: &'static PageServerConf,
remote_storage: GenericRemoteStorage,
tenants: &'static std::sync::RwLock<TenantsMap>,
tenant: Arc<Tenant>,
cancel: &CancellationToken,
) -> Result<(), DeleteTenantError> {
span::debug_assert_current_span_has_tenant_id();
pausable_failpoint!("tenant-delete-before-run");
let mut guard = Self::prepare(&tenant).await?;
if let Err(e) = Self::run_inner(&mut guard, conf, &remote_storage, &tenant, cancel).await {
tenant.set_broken(format!("{e:#}")).await;
return Err(e);
}
Self::schedule_background(guard, conf, remote_storage, tenants, tenant);
Ok(())
}
// Helper function needed to be able to match once on returned error and transition tenant into broken state.
// This is needed because tenant.shutwodn is not idempotent. If tenant state is set to stopping another call to tenant.shutdown
// will result in an error, but here we need to be able to retry shutdown when tenant deletion is retried.
// So the solution is to set tenant state to broken.
async fn run_inner(
guard: &mut OwnedMutexGuard<Self>,
conf: &'static PageServerConf,
remote_storage: &GenericRemoteStorage,
tenant: &Tenant,
cancel: &CancellationToken,
) -> Result<(), DeleteTenantError> {
guard.mark_in_progress()?;
fail::fail_point!("tenant-delete-before-create-remote-mark", |_| {
Err(anyhow::anyhow!(
"failpoint: tenant-delete-before-create-remote-mark"
))?
});
create_remote_delete_mark(conf, remote_storage, &tenant.tenant_shard_id, cancel)
.await
.context("remote_mark")?;
fail::fail_point!("tenant-delete-before-create-local-mark", |_| {
Err(anyhow::anyhow!(
"failpoint: tenant-delete-before-create-local-mark"
))?
});
create_local_delete_mark(conf, &tenant.tenant_shard_id)
.await
.context("local delete mark")?;
fail::fail_point!("tenant-delete-before-background", |_| {
Err(anyhow::anyhow!(
"failpoint: tenant-delete-before-background"
))?
});
Ok(())
}
fn mark_in_progress(&mut self) -> anyhow::Result<()> {
match self {
Self::Finished => anyhow::bail!("Bug. Is in finished state"),
Self::InProgress { .. } => { /* We're in a retry */ }
Self::NotStarted => { /* Fresh start */ }
}
*self = Self::InProgress;
Ok(())
}
pub(crate) async fn should_resume_deletion(
conf: &'static PageServerConf,
remote_mark_exists: bool,
@@ -428,79 +267,6 @@ impl DeleteTenantFlow {
.await
}
/// Check whether background deletion of this tenant is currently in progress
pub(crate) fn is_in_progress(tenant: &Tenant) -> bool {
tenant.delete_progress.try_lock().is_err()
}
async fn prepare(
tenant: &Arc<Tenant>,
) -> Result<tokio::sync::OwnedMutexGuard<Self>, DeleteTenantError> {
// FIXME: unsure about active only. Our init jobs may not be cancellable properly,
// so at least for now allow deletions only for active tenants. TODO recheck
// Broken and Stopping is needed for retries.
if !matches!(
tenant.current_state(),
TenantState::Active | TenantState::Broken { .. }
) {
return Err(DeleteTenantError::InvalidState(tenant.current_state()));
}
let guard = Arc::clone(&tenant.delete_progress)
.try_lock_owned()
.map_err(|_| DeleteTenantError::AlreadyInProgress)?;
fail::fail_point!("tenant-delete-before-shutdown", |_| {
Err(anyhow::anyhow!("failpoint: tenant-delete-before-shutdown"))?
});
// make pageserver shutdown not to wait for our completion
let (_, progress) = completion::channel();
// It would be good to only set stopping here and continue shutdown in the background, but shutdown is not idempotent.
// i e it is an error to do:
// tenant.set_stopping
// tenant.shutdown
// Its also bad that we're holding tenants.read here.
// TODO relax set_stopping to be idempotent?
if tenant.shutdown(progress, ShutdownMode::Hard).await.is_err() {
return Err(DeleteTenantError::Other(anyhow::anyhow!(
"tenant shutdown is already in progress"
)));
}
Ok(guard)
}
fn schedule_background(
guard: OwnedMutexGuard<Self>,
conf: &'static PageServerConf,
remote_storage: GenericRemoteStorage,
tenants: &'static std::sync::RwLock<TenantsMap>,
tenant: Arc<Tenant>,
) {
let tenant_shard_id = tenant.tenant_shard_id;
task_mgr::spawn(
task_mgr::BACKGROUND_RUNTIME.handle(),
TaskKind::TimelineDeletionWorker,
Some(tenant_shard_id),
None,
"tenant_delete",
false,
async move {
if let Err(err) =
Self::background(guard, conf, remote_storage, tenants, &tenant).await
{
error!("Error: {err:#}");
tenant.set_broken(format!("{err:#}")).await;
};
Ok(())
}
.instrument(tracing::info_span!(parent: None, "delete_tenant", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug())),
);
}
async fn background(
mut guard: OwnedMutexGuard<Self>,
conf: &PageServerConf,
@@ -580,8 +346,6 @@ impl DeleteTenantFlow {
.context("cleanup_remaining_fs_traces")?;
{
pausable_failpoint!("tenant-delete-before-map-remove");
// This block is simply removing the TenantSlot for this tenant. It requires a loop because
// we might conflict with a TenantSlot::InProgress marker and need to wait for it.
//

View File

@@ -3,7 +3,6 @@
use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf};
use futures::StreamExt;
use hyper::StatusCode;
use itertools::Itertools;
use pageserver_api::key::Key;
use pageserver_api::models::LocationConfigMode;
@@ -27,7 +26,7 @@ use tokio::task::JoinSet;
use tokio_util::sync::CancellationToken;
use tracing::*;
use utils::{completion, crashsafe};
use utils::{backoff, completion, crashsafe};
use crate::config::PageServerConf;
use crate::context::{DownloadBehavior, RequestContext};
@@ -41,7 +40,6 @@ use crate::task_mgr::{self, TaskKind};
use crate::tenant::config::{
AttachedLocationConfig, AttachmentMode, LocationConf, LocationMode, SecondaryLocationConfig,
};
use crate::tenant::delete::DeleteTenantFlow;
use crate::tenant::span::debug_assert_current_span_has_tenant_id;
use crate::tenant::storage_layer::inmemory_layer;
use crate::tenant::timeline::ShutdownMode;
@@ -1354,56 +1352,10 @@ impl TenantManager {
}
}
pub(crate) async fn delete_tenant(
async fn delete_tenant_remote(
&self,
tenant_shard_id: TenantShardId,
activation_timeout: Duration,
) -> Result<StatusCode, DeleteTenantError> {
super::span::debug_assert_current_span_has_tenant_id();
// We acquire a SlotGuard during this function to protect against concurrent
// changes while the ::prepare phase of DeleteTenantFlow executes, but then
// have to return the Tenant to the map while the background deletion runs.
//
// TODO: refactor deletion to happen outside the lifetime of a Tenant.
// Currently, deletion requires a reference to the tenants map in order to
// keep the Tenant in the map until deletion is complete, and then remove
// it at the end.
//
// See https://github.com/neondatabase/neon/issues/5080
// Tenant deletion can happen two ways:
// - Legacy: called on an attached location. The attached Tenant object stays alive in Stopping
// state until deletion is complete.
// - New: called on a pageserver without an attached location. We proceed with deletion from
// remote storage.
//
// See https://github.com/neondatabase/neon/issues/5080 for more context on this transition.
let slot_guard = tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::Any)?;
match &slot_guard.old_value {
Some(TenantSlot::Attached(tenant)) => {
// Legacy deletion flow: the tenant remains attached, goes to Stopping state, and
// deletion will be resumed across restarts.
let tenant = tenant.clone();
return self
.delete_tenant_attached(slot_guard, tenant, activation_timeout)
.await;
}
Some(TenantSlot::Secondary(secondary_tenant)) => {
secondary_tenant.shutdown().await;
let local_tenant_directory = self.conf.tenant_path(&tenant_shard_id);
let tmp_dir = safe_rename_tenant_dir(&local_tenant_directory)
.await
.with_context(|| {
format!("local tenant directory {local_tenant_directory:?} rename")
})?;
spawn_background_purge(tmp_dir);
}
Some(TenantSlot::InProgress(_)) => unreachable!(),
None => {}
};
// Fall through: local state for this tenant is no longer present, proceed with remote delete
) -> Result<(), DeleteTenantError> {
let remote_path = remote_tenant_path(&tenant_shard_id);
let keys = match self
.resources
@@ -1420,7 +1372,7 @@ impl TenantManager {
Err(remote_storage::DownloadError::Cancelled) => {
return Err(DeleteTenantError::Cancelled)
}
Err(remote_storage::DownloadError::NotFound) => return Ok(StatusCode::NOT_FOUND),
Err(remote_storage::DownloadError::NotFound) => return Ok(()),
Err(other) => return Err(DeleteTenantError::Other(anyhow::anyhow!(other))),
};
@@ -1434,60 +1386,83 @@ impl TenantManager {
.await?;
}
// Callers use 404 as success for deletions, for historical reasons.
Ok(StatusCode::NOT_FOUND)
Ok(())
}
async fn delete_tenant_attached(
/// If a tenant is attached, detach it. Then remove its data from remote storage.
///
/// A tenant is considered deleted once it is gone from remote storage. It is the caller's
/// responsibility to avoid trying to attach the tenant again or use it any way once deletion
/// has started: this operation is not atomic, and must be retried until it succeeds.
pub(crate) async fn delete_tenant(
&self,
slot_guard: SlotGuard,
tenant: Arc<Tenant>,
activation_timeout: Duration,
) -> Result<StatusCode, DeleteTenantError> {
match tenant.current_state() {
TenantState::Broken { .. } | TenantState::Stopping { .. } => {
// If deletion is already in progress, return success (the semantics of this
// function are to rerturn success afterr deletion is spawned in background).
// Otherwise fall through and let [`DeleteTenantFlow`] handle this state.
if DeleteTenantFlow::is_in_progress(&tenant) {
// The `delete_progress` lock is held: deletion is already happening
// in the bacckground
slot_guard.revert();
return Ok(StatusCode::ACCEPTED);
}
}
_ => {
tenant
.wait_to_become_active(activation_timeout)
.await
.map_err(|e| match e {
GetActiveTenantError::WillNotBecomeActive(_)
| GetActiveTenantError::Broken(_) => {
DeleteTenantError::InvalidState(tenant.current_state())
}
GetActiveTenantError::Cancelled => DeleteTenantError::Cancelled,
GetActiveTenantError::NotFound(_) => DeleteTenantError::NotAttached,
GetActiveTenantError::WaitForActiveTimeout {
latest_state: _latest_state,
wait_time: _wait_time,
} => DeleteTenantError::InvalidState(tenant.current_state()),
})?;
}
tenant_shard_id: TenantShardId,
) -> Result<(), DeleteTenantError> {
super::span::debug_assert_current_span_has_tenant_id();
async fn delete_local(
conf: &PageServerConf,
tenant_shard_id: &TenantShardId,
) -> anyhow::Result<()> {
let local_tenant_directory = conf.tenant_path(tenant_shard_id);
let tmp_dir = safe_rename_tenant_dir(&local_tenant_directory)
.await
.with_context(|| {
format!("local tenant directory {local_tenant_directory:?} rename")
})?;
spawn_background_purge(tmp_dir);
Ok(())
}
let result = DeleteTenantFlow::run(
self.conf,
self.resources.remote_storage.clone(),
&TENANTS,
tenant,
let slot_guard = tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::Any)?;
match &slot_guard.old_value {
Some(TenantSlot::Attached(tenant)) => {
// Legacy deletion flow: the tenant remains attached, goes to Stopping state, and
// deletion will be resumed across restarts.
let tenant = tenant.clone();
let (_guard, progress) = utils::completion::channel();
match tenant.shutdown(progress, ShutdownMode::Hard).await {
Ok(()) => {}
Err(barrier) => {
info!("Shutdown already in progress, waiting for it to complete");
barrier.wait().await;
}
}
delete_local(self.conf, &tenant_shard_id).await?;
}
Some(TenantSlot::Secondary(secondary_tenant)) => {
secondary_tenant.shutdown().await;
delete_local(self.conf, &tenant_shard_id).await?;
}
Some(TenantSlot::InProgress(_)) => unreachable!(),
None => {}
};
// Fall through: local state for this tenant is no longer present, proceed with remote delete.
// - We use a retry wrapper here so that common transient S3 errors (e.g. 503, 429) do not result
// in 500 responses to delete requests.
// - We keep the `SlotGuard` during this I/O, so that if a concurrent delete request comes in, it will
// 503/retry, rather than kicking off a wasteful concurrent deletion.
match backoff::retry(
|| async move { self.delete_tenant_remote(tenant_shard_id).await },
|e| match e {
DeleteTenantError::Cancelled => true,
DeleteTenantError::SlotError(_) => {
unreachable!("Remote deletion doesn't touch slots")
}
_ => false,
},
1,
3,
&format!("delete_tenant[tenant_shard_id={tenant_shard_id}]"),
&self.cancel,
)
.await;
// The Tenant goes back into the map in Stopping state, it will eventually be removed by DeleteTenantFLow
slot_guard.revert();
let () = result?;
Ok(StatusCode::ACCEPTED)
.await
{
Some(r) => r,
None => Err(DeleteTenantError::Cancelled),
}
}
#[instrument(skip_all, fields(tenant_id=%tenant.get_tenant_shard_id().tenant_id, shard_id=%tenant.get_tenant_shard_id().shard_slug(), new_shard_count=%new_shard_count.literal()))]

View File

@@ -430,52 +430,6 @@ def enable_remote_storage_versioning(
return response
def wait_tenant_status_404(
pageserver_http: PageserverHttpClient,
tenant_id: TenantId,
iterations: int,
interval: float = 0.250,
):
def tenant_is_missing():
data = {}
try:
data = pageserver_http.tenant_status(tenant_id)
log.info(f"tenant status {data}")
except PageserverApiException as e:
log.debug(e)
if e.status_code == 404:
return
raise RuntimeError(f"Timeline exists state {data.get('state')}")
wait_until(iterations, interval=interval, func=tenant_is_missing)
def tenant_delete_wait_completed(
pageserver_http: PageserverHttpClient,
tenant_id: TenantId,
iterations: int,
ignore_errors: bool = False,
):
if not ignore_errors:
pageserver_http.tenant_delete(tenant_id=tenant_id)
else:
interval = 0.5
def delete_request_sent():
try:
pageserver_http.tenant_delete(tenant_id=tenant_id)
except PageserverApiException as e:
log.debug(e)
if e.status_code == 404:
return
except Exception as e:
log.debug(e)
wait_until(iterations, interval=interval, func=delete_request_sent)
wait_tenant_status_404(pageserver_http, tenant_id=tenant_id, iterations=iterations)
MANY_SMALL_LAYERS_TENANT_CONFIG = {
"gc_period": "0s",
"compaction_period": "0s",

View File

@@ -4,7 +4,6 @@ import pytest
from fixtures.benchmark_fixture import MetricReport
from fixtures.common_types import Lsn
from fixtures.compare_fixtures import NeonCompare, PgCompare
from fixtures.pageserver.utils import wait_tenant_status_404
from fixtures.pg_version import PgVersion
@@ -68,7 +67,6 @@ def measure_recovery_time(env: NeonCompare):
(attach_gen, _) = attach_status
client.tenant_delete(env.tenant)
wait_tenant_status_404(client, env.tenant, iterations=60, interval=0.5)
env.env.pageserver.tenant_create(tenant_id=env.tenant, generation=attach_gen)
# Measure recovery time

View File

@@ -11,8 +11,6 @@ from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserver, StorageScrubb
from fixtures.pageserver.common_types import parse_layer_file_name
from fixtures.pageserver.utils import (
assert_prefix_empty,
poll_for_remote_storage_iterations,
tenant_delete_wait_completed,
wait_for_upload_queue_empty,
)
from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind, S3Storage, s3_storage
@@ -363,8 +361,7 @@ def test_live_migration(neon_env_builder: NeonEnvBuilder):
# Check that deletion works properly on a tenant that was live-migrated
# (reproduce https://github.com/neondatabase/neon/issues/6802)
iterations = poll_for_remote_storage_iterations(remote_storage_kind)
tenant_delete_wait_completed(pageserver_b.http_client(), tenant_id, iterations)
pageserver_b.http_client().tenant_delete(tenant_id)
def test_heatmap_uploads(neon_env_builder: NeonEnvBuilder):
@@ -552,7 +549,7 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder):
)
log.info("Deleting tenant...")
tenant_delete_wait_completed(ps_attached.http_client(), tenant_id, 10)
ps_attached.http_client().tenant_delete(tenant_id)
assert_prefix_empty(
neon_env_builder.pageserver_remote_storage,

View File

@@ -11,8 +11,6 @@ from fixtures.pageserver.utils import (
MANY_SMALL_LAYERS_TENANT_CONFIG,
assert_prefix_empty,
enable_remote_storage_versioning,
poll_for_remote_storage_iterations,
tenant_delete_wait_completed,
wait_for_upload,
)
from fixtures.remote_storage import RemoteStorageKind, s3_storage
@@ -83,8 +81,7 @@ def test_tenant_s3_restore(
assert (
ps_http.get_metric_value("pageserver_tenant_manager_slots", {"mode": "attached"}) == 1
), "tenant removed before we deletion was issued"
iterations = poll_for_remote_storage_iterations(remote_storage_kind)
tenant_delete_wait_completed(ps_http, tenant_id, iterations)
ps_http.tenant_delete(tenant_id)
ps_http.deletion_queue_flush(execute=True)
assert (
ps_http.get_metric_value("pageserver_tenant_manager_slots", {"mode": "attached"}) == 0

View File

@@ -24,7 +24,6 @@ from fixtures.pageserver.utils import (
enable_remote_storage_versioning,
list_prefix,
remote_storage_delete_key,
tenant_delete_wait_completed,
timeline_delete_wait_completed,
)
from fixtures.pg_version import PgVersion
@@ -158,7 +157,7 @@ def test_storage_controller_smoke(
# Delete all the tenants
for tid in tenant_ids:
tenant_delete_wait_completed(env.storage_controller.pageserver_api(), tid, 10)
env.storage_controller.pageserver_api().tenant_delete(tid)
env.storage_controller.consistency_check()

View File

@@ -1,17 +1,11 @@
import concurrent.futures
import enum
import os
import shutil
from threading import Thread
import pytest
from fixtures.common_types import Lsn, TenantId, TimelineId
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnvBuilder,
PgBin,
StorageScrubber,
last_flush_lsn_upload,
wait_for_last_flush_lsn,
)
from fixtures.pageserver.http import PageserverApiException
@@ -19,18 +13,33 @@ from fixtures.pageserver.utils import (
MANY_SMALL_LAYERS_TENANT_CONFIG,
assert_prefix_empty,
assert_prefix_not_empty,
poll_for_remote_storage_iterations,
tenant_delete_wait_completed,
wait_for_upload,
wait_tenant_status_404,
wait_until_tenant_active,
wait_until_tenant_state,
)
from fixtures.remote_storage import RemoteStorageKind, available_s3_storages, s3_storage
from fixtures.remote_storage import RemoteStorageKind, s3_storage
from fixtures.utils import run_pg_bench_small, wait_until
from requests.exceptions import ReadTimeout
def error_tolerant_delete(ps_http, tenant_id):
"""
For tests that inject 500 errors, we must retry repeatedly when issuing deletions
"""
while True:
try:
ps_http.tenant_delete(tenant_id=tenant_id)
except PageserverApiException as e:
if e.status_code == 500:
# This test uses failure injection, which can produce 500s as the pageserver expects
# the object store to always be available, and the ListObjects during deletion is generally
# an infallible operation
assert "simulated failure of remote operation" in e.message
else:
raise
else:
# Success, drop out
break
def test_tenant_delete_smoke(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
@@ -59,21 +68,7 @@ def test_tenant_delete_smoke(
# Check that deleting a non-existent tenant gives the expected result: this is a loop because we
# may need to retry on some remote storage errors injected by the test harness
while True:
try:
ps_http.tenant_delete(tenant_id=tenant_id)
except PageserverApiException as e:
if e.status_code == 500:
# This test uses failure injection, which can produce 500s as the pageserver expects
# the object store to always be available, and the ListObjects during deletion is generally
# an infallible operation
assert "simulated failure of remote operation" in e.message
elif e.status_code == 404:
# This is our expected result: trying to erase a non-existent tenant gives us 404
assert "NotFound" in e.message
break
else:
raise
error_tolerant_delete(ps_http, tenant_id)
env.neon_cli.create_tenant(
tenant_id=tenant_id,
@@ -108,10 +103,8 @@ def test_tenant_delete_smoke(
# Upload a heatmap so that we exercise deletion of that too
ps_http.tenant_heatmap_upload(tenant_id)
iterations = poll_for_remote_storage_iterations(remote_storage_kind)
assert ps_http.get_metric_value("pageserver_tenant_manager_slots", {"mode": "attached"}) == 2
tenant_delete_wait_completed(ps_http, tenant_id, iterations)
error_tolerant_delete(ps_http, tenant_id)
assert ps_http.get_metric_value("pageserver_tenant_manager_slots", {"mode": "attached"}) == 1
tenant_path = env.pageserver.tenant_dir(tenant_id)
@@ -129,286 +122,7 @@ def test_tenant_delete_smoke(
# Deletion updates the tenant count: the one default tenant remains
assert ps_http.get_metric_value("pageserver_tenant_manager_slots", {"mode": "attached"}) == 1
class Check(enum.Enum):
RETRY_WITHOUT_RESTART = enum.auto()
RETRY_WITH_RESTART = enum.auto()
FAILPOINTS = [
"tenant-delete-before-shutdown",
"tenant-delete-before-create-remote-mark",
"tenant-delete-before-create-local-mark",
"tenant-delete-before-background",
"tenant-delete-before-polling-ongoing-deletions",
"tenant-delete-before-cleanup-remaining-fs-traces",
"tenant-delete-before-remove-timelines-dir",
"tenant-delete-before-remove-deleted-mark",
"tenant-delete-before-remove-tenant-dir",
# Some failpoints from timeline deletion
"timeline-delete-before-index-deleted-at",
"timeline-delete-before-rm",
"timeline-delete-before-index-delete",
]
FAILPOINTS_BEFORE_BACKGROUND = [
"timeline-delete-before-schedule",
"tenant-delete-before-shutdown",
"tenant-delete-before-create-remote-mark",
"tenant-delete-before-create-local-mark",
"tenant-delete-before-background",
]
def combinations():
result = []
remotes = available_s3_storages()
for remote_storage_kind in remotes:
for delete_failpoint in FAILPOINTS:
# Simulate failures for only one type of remote storage
# to avoid log pollution and make tests run faster
if remote_storage_kind is RemoteStorageKind.MOCK_S3:
simulate_failures = True
else:
simulate_failures = False
result.append((remote_storage_kind, delete_failpoint, simulate_failures))
return result
@pytest.mark.parametrize("check", list(Check))
@pytest.mark.parametrize("remote_storage_kind, failpoint, simulate_failures", combinations())
def test_delete_tenant_exercise_crash_safety_failpoints(
neon_env_builder: NeonEnvBuilder,
remote_storage_kind: RemoteStorageKind,
failpoint: str,
simulate_failures: bool,
check: Check,
pg_bin: PgBin,
):
if simulate_failures:
neon_env_builder.pageserver_config_override = "test_remote_failures=1"
neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)
env = neon_env_builder.init_start(initial_tenant_conf=MANY_SMALL_LAYERS_TENANT_CONFIG)
tenant_id = env.initial_tenant
env.pageserver.allowed_errors.extend(
[
# From deletion polling
f".*NotFound: tenant {env.initial_tenant}.*",
# allow errors caused by failpoints
f".*failpoint: {failpoint}",
# It appears when we stopped flush loop during deletion (attempt) and then pageserver is stopped
".*shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
# We may leave some upload tasks in the queue. They're likely deletes.
# For uploads we explicitly wait with `last_flush_lsn_upload` below.
# So by ignoring these instead of waiting for empty upload queue
# we execute more distinct code paths.
'.*stopping left-over name="remote upload".*',
# an on-demand is cancelled by shutdown
".*initial size calculation failed: downloading failed, possibly for shutdown",
]
)
if simulate_failures:
env.pageserver.allowed_errors.append(
# The deletion queue will complain when it encounters simulated S3 errors
".*deletion executor: DeleteObjects request failed.*",
)
ps_http = env.pageserver.http_client()
timeline_id = env.neon_cli.create_timeline("delete", tenant_id=tenant_id)
with env.endpoints.create_start("delete", tenant_id=tenant_id) as endpoint:
# generate enough layers
run_pg_bench_small(pg_bin, endpoint.connstr())
last_flush_lsn_upload(env, endpoint, tenant_id, timeline_id)
assert_prefix_not_empty(
neon_env_builder.pageserver_remote_storage,
prefix="/".join(
(
"tenants",
str(tenant_id),
)
),
)
ps_http.configure_failpoints((failpoint, "return"))
iterations = poll_for_remote_storage_iterations(remote_storage_kind)
# These failpoints are earlier than background task is spawned.
# so they result in api request failure.
if failpoint in FAILPOINTS_BEFORE_BACKGROUND:
with pytest.raises(PageserverApiException, match=failpoint):
ps_http.tenant_delete(tenant_id)
else:
ps_http.tenant_delete(tenant_id)
tenant_info = wait_until_tenant_state(
pageserver_http=ps_http,
tenant_id=tenant_id,
expected_state="Broken",
iterations=iterations,
)
reason = tenant_info["state"]["data"]["reason"]
log.info(f"tenant broken: {reason}")
# failpoint may not be the only error in the stack
assert reason.endswith(f"failpoint: {failpoint}"), reason
if check is Check.RETRY_WITH_RESTART:
env.pageserver.restart()
if failpoint in (
"tenant-delete-before-shutdown",
"tenant-delete-before-create-remote-mark",
):
wait_until_tenant_active(
ps_http, tenant_id=tenant_id, iterations=iterations, period=0.25
)
tenant_delete_wait_completed(ps_http, tenant_id, iterations=iterations)
else:
# Pageserver should've resumed deletion after restart.
wait_tenant_status_404(ps_http, tenant_id, iterations=iterations + 10)
elif check is Check.RETRY_WITHOUT_RESTART:
# this should succeed
# this also checks that delete can be retried even when tenant is in Broken state
ps_http.configure_failpoints((failpoint, "off"))
tenant_delete_wait_completed(ps_http, tenant_id, iterations=iterations)
tenant_dir = env.pageserver.tenant_dir(tenant_id)
# Check local is empty
assert not tenant_dir.exists()
# Check remote is empty
assert_prefix_empty(
neon_env_builder.pageserver_remote_storage,
prefix="/".join(
(
"tenants",
str(tenant_id),
)
),
allowed_postfix="initdb.tar.zst",
)
def test_tenant_delete_is_resumed_on_attach(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
remote_storage_kind = s3_storage()
neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)
env = neon_env_builder.init_start(initial_tenant_conf=MANY_SMALL_LAYERS_TENANT_CONFIG)
env.pageserver.allowed_errors.append(
# lucky race with stopping from flushing a layer we fail to schedule any uploads
".*layer flush task.+: could not flush frozen layer: update_metadata_file"
)
tenant_id = env.initial_tenant
ps_http = env.pageserver.http_client()
# create two timelines
for timeline in ["first", "second"]:
timeline_id = env.neon_cli.create_timeline(timeline, tenant_id=tenant_id)
with env.endpoints.create_start(timeline, tenant_id=tenant_id) as endpoint:
run_pg_bench_small(pg_bin, endpoint.connstr())
wait_for_last_flush_lsn(env, endpoint, tenant=tenant_id, timeline=timeline_id)
# sanity check, data should be there
assert_prefix_not_empty(
neon_env_builder.pageserver_remote_storage,
prefix="/".join(
(
"tenants",
str(tenant_id),
)
),
)
# failpoint before we remove index_part from s3
failpoint = "timeline-delete-before-index-delete"
ps_http.configure_failpoints((failpoint, "return"))
env.pageserver.allowed_errors.extend(
(
# allow errors caused by failpoints
f".*failpoint: {failpoint}",
# From deletion polling
f".*NotFound: tenant {env.initial_tenant}.*",
# It appears when we stopped flush loop during deletion (attempt) and then pageserver is stopped
".*shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
# error from http response is also logged
".*InternalServerError\\(Tenant is marked as deleted on remote storage.*",
'.*shutdown_pageserver{exit_code=0}: stopping left-over name="remote upload".*',
)
)
iterations = poll_for_remote_storage_iterations(remote_storage_kind)
ps_http.tenant_delete(tenant_id)
tenant_info = wait_until_tenant_state(
pageserver_http=ps_http,
tenant_id=tenant_id,
expected_state="Broken",
iterations=iterations,
)
assert_prefix_not_empty(
neon_env_builder.pageserver_remote_storage,
prefix="/".join(
(
"tenants",
str(tenant_id),
)
),
)
reason = tenant_info["state"]["data"]["reason"]
# failpoint may not be the only error in the stack
assert reason.endswith(f"failpoint: {failpoint}"), reason
# now we stop pageserver and remove local tenant state
env.endpoints.stop_all()
env.pageserver.stop()
dir_to_clear = env.pageserver.tenant_dir()
shutil.rmtree(dir_to_clear)
os.mkdir(dir_to_clear)
env.pageserver.start()
# now we call attach
env.pageserver.tenant_attach(tenant_id=tenant_id)
# delete should be resumed
wait_tenant_status_404(ps_http, tenant_id, iterations)
# we shouldn've created tenant dir on disk
tenant_path = env.pageserver.tenant_dir(tenant_id)
assert not tenant_path.exists()
ps_http.deletion_queue_flush(execute=True)
assert_prefix_empty(
neon_env_builder.pageserver_remote_storage,
prefix="/".join(
(
"tenants",
str(tenant_id),
)
),
)
assert ps_http.get_metric_value("pageserver_tenant_manager_slots", {"mode": "inprogress"}) == 0
def test_long_timeline_create_cancelled_by_tenant_delete(neon_env_builder: NeonEnvBuilder):
@@ -483,105 +197,6 @@ def test_long_timeline_create_cancelled_by_tenant_delete(neon_env_builder: NeonE
deletion.join()
def test_tenant_delete_concurrent(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
"""
Validate that concurrent delete requests to the same tenant behave correctly:
exactly one should execute: the rest should give 202 responses but not start
another deletion.
This is a reproducer for https://github.com/neondatabase/neon/issues/5936
"""
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.MOCK_S3)
env = neon_env_builder.init_start(initial_tenant_conf=MANY_SMALL_LAYERS_TENANT_CONFIG)
ps_http = env.pageserver.http_client()
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
# Populate some data
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
run_pg_bench_small(pg_bin, endpoint.connstr())
last_flush_lsn_upload(env, endpoint, tenant_id, timeline_id)
env.pageserver.allowed_errors.extend(
[
# lucky race with stopping from flushing a layer we fail to schedule any uploads
".*layer flush task.+: could not flush frozen layer: update_metadata_file",
]
)
BEFORE_REMOVE_FAILPOINT = "tenant-delete-before-map-remove"
BEFORE_RUN_FAILPOINT = "tenant-delete-before-run"
# We will let the initial delete run until right before it would remove
# the tenant's TenantSlot. This pauses it in a state where the tenant
# is visible in Stopping state, and concurrent requests should fail with 4xx.
ps_http.configure_failpoints((BEFORE_REMOVE_FAILPOINT, "pause"))
def delete_tenant():
return ps_http.tenant_delete(tenant_id)
def hit_remove_failpoint():
return env.pageserver.assert_log_contains(f"at failpoint {BEFORE_REMOVE_FAILPOINT}")[1]
def hit_run_failpoint():
env.pageserver.assert_log_contains(f"at failpoint {BEFORE_RUN_FAILPOINT}")
with concurrent.futures.ThreadPoolExecutor() as executor:
background_200_req = executor.submit(delete_tenant)
assert background_200_req.result(timeout=10).status_code == 202
# Wait until the first request completes its work and is blocked on removing
# the TenantSlot from tenant manager.
log_cursor = wait_until(100, 0.1, hit_remove_failpoint)
assert log_cursor is not None
# Start another request: this should succeed without actually entering the deletion code
ps_http.tenant_delete(tenant_id)
assert not env.pageserver.log_contains(
f"at failpoint {BEFORE_RUN_FAILPOINT}", offset=log_cursor
)
# Start another background request, which will pause after acquiring a TenantSlotGuard
# but before completing.
ps_http.configure_failpoints((BEFORE_RUN_FAILPOINT, "pause"))
background_4xx_req = executor.submit(delete_tenant)
wait_until(100, 0.1, hit_run_failpoint)
# The TenantSlot is still present while the original request is hung before
# final removal
assert (
ps_http.get_metric_value("pageserver_tenant_manager_slots", {"mode": "attached"}) == 1
)
# Permit the original request to run to success
ps_http.configure_failpoints((BEFORE_REMOVE_FAILPOINT, "off"))
# Permit the duplicate background request to run to completion and fail.
ps_http.configure_failpoints((BEFORE_RUN_FAILPOINT, "off"))
background_4xx_req.result(timeout=10)
assert not env.pageserver.log_contains(
f"at failpoint {BEFORE_RUN_FAILPOINT}", offset=log_cursor
)
# Physical deletion should have happened
assert_prefix_empty(
neon_env_builder.pageserver_remote_storage,
prefix="/".join(
(
"tenants",
str(tenant_id),
)
),
)
# Zero tenants remain (we deleted the default tenant)
assert ps_http.get_metric_value("pageserver_tenant_manager_slots", {"mode": "attached"}) == 0
assert ps_http.get_metric_value("pageserver_tenant_manager_slots", {"mode": "inprogress"}) == 0
def test_tenant_delete_races_timeline_creation(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
@@ -674,9 +289,7 @@ def test_tenant_delete_races_timeline_creation(
# Disable the failpoint and wait for deletion to finish
ps_http.configure_failpoints((BEFORE_INITDB_UPLOAD_FAILPOINT, "off"))
iterations = poll_for_remote_storage_iterations(remote_storage_kind)
tenant_delete_wait_completed(ps_http, tenant_id, iterations, ignore_errors=True)
ps_http.tenant_delete(tenant_id)
# Physical deletion should have happened
assert_prefix_empty(
@@ -727,8 +340,7 @@ def test_tenant_delete_scrubber(pg_bin: PgBin, neon_env_builder: NeonEnvBuilder)
env.start()
ps_http = env.pageserver.http_client()
iterations = poll_for_remote_storage_iterations(remote_storage_kind)
tenant_delete_wait_completed(ps_http, tenant_id, iterations)
ps_http.tenant_delete(tenant_id)
env.stop()
scrubber.scan_metadata()

View File

@@ -15,7 +15,6 @@ from fixtures.pageserver.utils import (
assert_tenant_state,
wait_for_last_record_lsn,
wait_for_upload,
wait_tenant_status_404,
)
from fixtures.remote_storage import (
LocalFsStorage,
@@ -348,9 +347,6 @@ def test_tenant_relocation(
# is no longer involved, and if it is, we will see the error
origin_http.tenant_detach(tenant_id)
# Wait a little, so that the detach operation has time to finish.
wait_tenant_status_404(origin_http, tenant_id, iterations=100, interval=1)
post_migration_check(ep_main, 500500, old_local_path_main)
post_migration_check(ep_second, 1001000, old_local_path_second)

View File

@@ -15,7 +15,6 @@ from fixtures.neon_fixtures import (
)
from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient
from fixtures.pageserver.utils import (
tenant_delete_wait_completed,
timeline_delete_wait_completed,
wait_until_tenant_active,
)
@@ -669,7 +668,7 @@ def test_synthetic_size_while_deleting(neon_env_builder: NeonEnvBuilder):
),
)
tenant_delete_wait_completed(client, env.initial_tenant, 10)
client.tenant_delete(env.initial_tenant)
client.configure_failpoints((failpoint, "off"))

View File

@@ -14,7 +14,7 @@ from fixtures.neon_fixtures import (
wait_for_last_flush_lsn,
)
from fixtures.pageserver.http import HistoricLayerInfo, PageserverApiException
from fixtures.pageserver.utils import wait_tenant_status_404, wait_timeline_detail_404
from fixtures.pageserver.utils import wait_timeline_detail_404
from fixtures.remote_storage import LocalFsStorage
from fixtures.utils import assert_pageserver_backups_equal
@@ -578,7 +578,6 @@ def test_timeline_ancestor_errors(neon_env_builder: NeonEnvBuilder):
assert info.value.status_code == 400
client.tenant_delete(env.initial_tenant)
wait_tenant_status_404(client, env.initial_tenant, 10, 1)
with pytest.raises(PageserverApiException) as e:
client.detach_ancestor(env.initial_tenant, first_branch)

View File

@@ -26,7 +26,6 @@ from fixtures.pageserver.utils import (
assert_tenant_state,
timeline_delete_wait_completed,
wait_for_upload_queue_empty,
wait_tenant_status_404,
wait_until_tenant_active,
)
from fixtures.pg_version import PgVersion
@@ -864,39 +863,33 @@ def delete_lazy_activating(
):
pageserver_http = pageserver.http_client()
# Deletion itself won't complete due to our failpoint: Tenant::shutdown can't complete while calculating
# logical size is paused in a failpoint. So instead we will use a log observation to check that
# on-demand activation was triggered by the tenant deletion
log_match = f".*attach{{tenant_id={delete_tenant_id} shard_id=0000 gen=[0-9a-f]+}}: Activating tenant \\(on-demand\\).*"
if expect_attaching:
assert pageserver_http.tenant_status(delete_tenant_id)["state"]["slug"] == "Attaching"
with concurrent.futures.ThreadPoolExecutor() as executor:
log.info("Starting background delete")
def activated_on_demand():
assert pageserver.log_contains(log_match) is not None
def shutting_down():
assert pageserver.log_contains(".*Waiting for timelines.*") is not None
def delete_tenant():
pageserver_http.tenant_delete(delete_tenant_id)
background_delete = executor.submit(delete_tenant)
log.info(f"Waiting for activation message '{log_match}'")
# We expect deletion to enter shutdown of the tenant even though it's in the attaching state
try:
wait_until(10, 1, activated_on_demand)
# Deletion will get to the point in shutdown where it's waiting for timeline shutdown, then
# hang because of our failpoint blocking activation.
wait_until(10, 1, shutting_down)
finally:
log.info("Clearing failpoint")
pageserver_http.configure_failpoints(("timeline-calculate-logical-size-pause", "off"))
# Deletion should complete successfully now that failpoint is unblocked
# Deletion should complete successfully now that failpoint is unblocked and shutdown can complete
log.info("Joining background delete")
background_delete.result(timeout=10)
# Poll for deletion to complete
wait_tenant_status_404(pageserver_http, tenant_id=delete_tenant_id, iterations=40)
def test_timeline_logical_size_task_priority(neon_env_builder: NeonEnvBuilder):
"""