mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-06 21:12:55 +00:00
Do not attach deleted tenants (#5008)
Rather temporary solution before proper: https://github.com/neondatabase/neon/issues/5006 It requires more plumbing so lets not attach deleted tenants first and then implement resume. Additionally fix `assert_prefix_empty`. It had a buggy prefix calculation, and since we always asserted for absence of stuff it worked. Here I started to assert for presence of stuff too and it failed. Added more "presence" asserts to other places to be confident that it works. Resolves [#5016](https://github.com/neondatabase/neon/issues/5016)
This commit is contained in:
@@ -238,6 +238,30 @@ async fn cleanup_remaining_fs_traces(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn remote_delete_mark_exists(
|
||||
conf: &PageServerConf,
|
||||
tenant_id: &TenantId,
|
||||
remote_storage: &GenericRemoteStorage,
|
||||
) -> anyhow::Result<bool> {
|
||||
// If remote storage is there we rely on it
|
||||
let remote_mark_path = remote_tenant_delete_mark_path(conf, tenant_id).context("path")?;
|
||||
|
||||
let result = backoff::retry(
|
||||
|| async { remote_storage.download(&remote_mark_path).await },
|
||||
|e| matches!(e, DownloadError::NotFound),
|
||||
SHOULD_RESUME_DELETION_FETCH_MARK_ATTEMPTS,
|
||||
SHOULD_RESUME_DELETION_FETCH_MARK_ATTEMPTS,
|
||||
"fetch_tenant_deletion_mark",
|
||||
)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(_) => Ok(true),
|
||||
Err(DownloadError::NotFound) => Ok(false),
|
||||
Err(e) => Err(anyhow::anyhow!(e)).context("remote_delete_mark_exists")?,
|
||||
}
|
||||
}
|
||||
|
||||
/// Orchestrates tenant shut down of all tasks, removes its in-memory structures,
|
||||
/// and deletes its data from both disk and s3.
|
||||
/// The sequence of steps:
|
||||
@@ -372,22 +396,10 @@ impl DeleteTenantFlow {
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
// If remote storage is there we rely on it
|
||||
let remote_mark_path = remote_tenant_delete_mark_path(conf, &tenant_id)?;
|
||||
|
||||
let result = backoff::retry(
|
||||
|| async { remote_storage.download(&remote_mark_path).await },
|
||||
|e| matches!(e, DownloadError::NotFound),
|
||||
SHOULD_RESUME_DELETION_FETCH_MARK_ATTEMPTS,
|
||||
SHOULD_RESUME_DELETION_FETCH_MARK_ATTEMPTS,
|
||||
"fetch_tenant_deletion_mark",
|
||||
)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(_) => Ok(acquire(tenant)),
|
||||
Err(DownloadError::NotFound) => Ok(None),
|
||||
Err(e) => Err(anyhow::anyhow!(e)).context("should_resume_deletion")?,
|
||||
if remote_delete_mark_exists(conf, &tenant_id, remote_storage).await? {
|
||||
Ok(acquire(tenant))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ use crate::{InitializationOrder, IGNORED_TENANT_FILE_NAME};
|
||||
use utils::fs_ext::PathExt;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
use super::delete::DeleteTenantError;
|
||||
use super::delete::{remote_delete_mark_exists, DeleteTenantError};
|
||||
use super::timeline::delete::DeleteTimelineFlow;
|
||||
|
||||
/// The tenants known to the pageserver.
|
||||
@@ -591,6 +591,12 @@ pub async fn attach_tenant(
|
||||
remote_storage: GenericRemoteStorage,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), TenantMapInsertError> {
|
||||
// Temporary solution, proper one would be to resume deletion, but that needs more plumbing around Tenant::load/Tenant::attach
|
||||
// Corresponding issue https://github.com/neondatabase/neon/issues/5006
|
||||
if remote_delete_mark_exists(conf, &tenant_id, &remote_storage).await? {
|
||||
return Err(anyhow::anyhow!("Tenant is marked as deleted on remote storage").into());
|
||||
}
|
||||
|
||||
tenant_map_insert(tenant_id, || {
|
||||
let tenant_dir = create_tenant_files(conf, tenant_conf, &tenant_id, CreateTenantFilesMode::Attach)?;
|
||||
// TODO: tenant directory remains on disk if we bail out from here on.
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import time
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional
|
||||
|
||||
from mypy_boto3_s3.type_defs import ListObjectsV2OutputTypeDef
|
||||
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient
|
||||
from fixtures.remote_storage import RemoteStorageKind, S3Storage
|
||||
@@ -230,6 +232,24 @@ if TYPE_CHECKING:
|
||||
|
||||
|
||||
def assert_prefix_empty(neon_env_builder: "NeonEnvBuilder", prefix: Optional[str] = None):
|
||||
response = list_prefix(neon_env_builder, prefix)
|
||||
objects = response.get("Contents")
|
||||
assert (
|
||||
response["KeyCount"] == 0
|
||||
), f"remote dir with prefix {prefix} is not empty after deletion: {objects}"
|
||||
|
||||
|
||||
def assert_prefix_not_empty(neon_env_builder: "NeonEnvBuilder", prefix: Optional[str] = None):
|
||||
response = list_prefix(neon_env_builder, prefix)
|
||||
assert response["KeyCount"] != 0, f"remote dir with prefix {prefix} is empty: {response}"
|
||||
|
||||
|
||||
def list_prefix(
|
||||
neon_env_builder: "NeonEnvBuilder", prefix: Optional[str] = None
|
||||
) -> ListObjectsV2OutputTypeDef:
|
||||
"""
|
||||
Note that this function takes into account prefix_in_bucket.
|
||||
"""
|
||||
# For local_fs we need to properly handle empty directories, which we currently dont, so for simplicity stick to s3 api.
|
||||
assert neon_env_builder.remote_storage_kind in (
|
||||
RemoteStorageKind.MOCK_S3,
|
||||
@@ -239,15 +259,21 @@ def assert_prefix_empty(neon_env_builder: "NeonEnvBuilder", prefix: Optional[str
|
||||
assert isinstance(neon_env_builder.remote_storage, S3Storage)
|
||||
assert neon_env_builder.remote_storage_client is not None
|
||||
|
||||
prefix_in_bucket = neon_env_builder.remote_storage.prefix_in_bucket or ""
|
||||
if not prefix:
|
||||
prefix = prefix_in_bucket
|
||||
else:
|
||||
# real s3 tests have uniqie per test prefix
|
||||
# mock_s3 tests use special pageserver prefix for pageserver stuff
|
||||
prefix = "/".join((prefix_in_bucket, prefix))
|
||||
|
||||
# Note that this doesnt use pagination, so list is not guaranteed to be exhaustive.
|
||||
response = neon_env_builder.remote_storage_client.list_objects_v2(
|
||||
Delimiter="/",
|
||||
Bucket=neon_env_builder.remote_storage.bucket_name,
|
||||
Prefix=prefix or neon_env_builder.remote_storage.prefix_in_bucket or "",
|
||||
Prefix=prefix,
|
||||
)
|
||||
objects = response.get("Contents")
|
||||
assert (
|
||||
response["KeyCount"] == 0
|
||||
), f"remote dir with prefix {prefix} is not empty after deletion: {objects}"
|
||||
return response
|
||||
|
||||
|
||||
def wait_tenant_status_404(
|
||||
|
||||
@@ -92,8 +92,11 @@ def available_s3_storages() -> List[RemoteStorageKind]:
|
||||
class LocalFsStorage:
|
||||
root: Path
|
||||
|
||||
def tenant_path(self, tenant_id: TenantId) -> Path:
|
||||
return self.root / "tenants" / str(tenant_id)
|
||||
|
||||
def timeline_path(self, tenant_id: TenantId, timeline_id: TimelineId) -> Path:
|
||||
return self.root / "tenants" / str(tenant_id) / "timelines" / str(timeline_id)
|
||||
return self.tenant_path(tenant_id) / "timelines" / str(timeline_id)
|
||||
|
||||
def index_path(self, tenant_id: TenantId, timeline_id: TimelineId) -> Path:
|
||||
return self.timeline_path(tenant_id, timeline_id) / TIMELINE_INDEX_PART_FILE_NAME
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import enum
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
@@ -13,13 +15,18 @@ from fixtures.pageserver.http import PageserverApiException
|
||||
from fixtures.pageserver.utils import (
|
||||
MANY_SMALL_LAYERS_TENANT_CONFIG,
|
||||
assert_prefix_empty,
|
||||
assert_prefix_not_empty,
|
||||
poll_for_remote_storage_iterations,
|
||||
tenant_delete_wait_completed,
|
||||
wait_tenant_status_404,
|
||||
wait_until_tenant_active,
|
||||
wait_until_tenant_state,
|
||||
)
|
||||
from fixtures.remote_storage import RemoteStorageKind, available_remote_storages
|
||||
from fixtures.remote_storage import (
|
||||
RemoteStorageKind,
|
||||
available_remote_storages,
|
||||
available_s3_storages,
|
||||
)
|
||||
from fixtures.types import TenantId
|
||||
from fixtures.utils import run_pg_bench_small
|
||||
|
||||
@@ -64,6 +71,17 @@ def test_tenant_delete_smoke(
|
||||
run_pg_bench_small(pg_bin, endpoint.connstr())
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant=tenant_id, timeline=timeline_id)
|
||||
|
||||
if remote_storage_kind in available_s3_storages():
|
||||
assert_prefix_not_empty(
|
||||
neon_env_builder,
|
||||
prefix="/".join(
|
||||
(
|
||||
"tenants",
|
||||
str(tenant_id),
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
parent = timeline
|
||||
|
||||
iterations = poll_for_remote_storage_iterations(remote_storage_kind)
|
||||
@@ -73,7 +91,7 @@ def test_tenant_delete_smoke(
|
||||
tenant_path = env.tenant_dir(tenant_id=tenant_id)
|
||||
assert not tenant_path.exists()
|
||||
|
||||
if remote_storage_kind in [RemoteStorageKind.MOCK_S3, RemoteStorageKind.REAL_S3]:
|
||||
if remote_storage_kind in available_s3_storages():
|
||||
assert_prefix_empty(
|
||||
neon_env_builder,
|
||||
prefix="/".join(
|
||||
@@ -189,6 +207,17 @@ def test_delete_tenant_exercise_crash_safety_failpoints(
|
||||
else:
|
||||
last_flush_lsn_upload(env, endpoint, tenant_id, timeline_id)
|
||||
|
||||
if remote_storage_kind in available_s3_storages():
|
||||
assert_prefix_not_empty(
|
||||
neon_env_builder,
|
||||
prefix="/".join(
|
||||
(
|
||||
"tenants",
|
||||
str(tenant_id),
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
ps_http.configure_failpoints((failpoint, "return"))
|
||||
|
||||
iterations = poll_for_remote_storage_iterations(remote_storage_kind)
|
||||
@@ -241,8 +270,12 @@ def test_delete_tenant_exercise_crash_safety_failpoints(
|
||||
|
||||
tenant_delete_wait_completed(ps_http, tenant_id, iterations=iterations)
|
||||
|
||||
# Check remote is impty
|
||||
if remote_storage_kind is RemoteStorageKind.MOCK_S3:
|
||||
tenant_dir = env.tenant_dir(tenant_id)
|
||||
# Check local is empty
|
||||
assert not tenant_dir.exists()
|
||||
|
||||
# Check remote is empty
|
||||
if remote_storage_kind in available_s3_storages():
|
||||
assert_prefix_empty(
|
||||
neon_env_builder,
|
||||
prefix="/".join(
|
||||
@@ -253,10 +286,118 @@ def test_delete_tenant_exercise_crash_safety_failpoints(
|
||||
),
|
||||
)
|
||||
|
||||
tenant_dir = env.tenant_dir(tenant_id)
|
||||
# Check local is empty
|
||||
assert not tenant_dir.exists()
|
||||
|
||||
# TODO resume deletion (https://github.com/neondatabase/neon/issues/5006)
|
||||
@pytest.mark.parametrize("remote_storage_kind", available_remote_storages())
|
||||
def test_deleted_tenant_ignored_on_attach(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
remote_storage_kind: RemoteStorageKind,
|
||||
pg_bin: PgBin,
|
||||
):
|
||||
neon_env_builder.enable_remote_storage(
|
||||
remote_storage_kind=remote_storage_kind,
|
||||
test_name="test_deleted_tenant_ignored_on_attach",
|
||||
)
|
||||
|
||||
env = neon_env_builder.init_start(initial_tenant_conf=MANY_SMALL_LAYERS_TENANT_CONFIG)
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
|
||||
ps_http = env.pageserver.http_client()
|
||||
# create two timelines
|
||||
for timeline in ["first", "second"]:
|
||||
timeline_id = env.neon_cli.create_timeline(timeline, tenant_id=tenant_id)
|
||||
with env.endpoints.create_start(timeline, tenant_id=tenant_id) as endpoint:
|
||||
run_pg_bench_small(pg_bin, endpoint.connstr())
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant=tenant_id, timeline=timeline_id)
|
||||
|
||||
# sanity check, data should be there
|
||||
if remote_storage_kind in available_s3_storages():
|
||||
assert_prefix_not_empty(
|
||||
neon_env_builder,
|
||||
prefix="/".join(
|
||||
(
|
||||
"tenants",
|
||||
str(tenant_id),
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
# failpoint before we remove index_part from s3
|
||||
failpoint = "timeline-delete-before-index-delete"
|
||||
ps_http.configure_failpoints((failpoint, "return"))
|
||||
|
||||
env.pageserver.allowed_errors.extend(
|
||||
(
|
||||
# allow errors caused by failpoints
|
||||
f".*failpoint: {failpoint}",
|
||||
# It appears when we stopped flush loop during deletion (attempt) and then pageserver is stopped
|
||||
".*freeze_and_flush_on_shutdown.*failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
|
||||
# error from http response is also logged
|
||||
".*InternalServerError\\(Tenant is marked as deleted on remote storage.*",
|
||||
'.*shutdown_pageserver{exit_code=0}: stopping left-over name="remote upload".*',
|
||||
)
|
||||
)
|
||||
|
||||
iterations = poll_for_remote_storage_iterations(remote_storage_kind)
|
||||
|
||||
ps_http.tenant_delete(tenant_id)
|
||||
|
||||
tenant_info = wait_until_tenant_state(
|
||||
pageserver_http=ps_http,
|
||||
tenant_id=tenant_id,
|
||||
expected_state="Broken",
|
||||
iterations=iterations,
|
||||
)
|
||||
|
||||
if remote_storage_kind in available_s3_storages():
|
||||
assert_prefix_not_empty(
|
||||
neon_env_builder,
|
||||
prefix="/".join(
|
||||
(
|
||||
"tenants",
|
||||
str(tenant_id),
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
reason = tenant_info["state"]["data"]["reason"]
|
||||
# failpoint may not be the only error in the stack
|
||||
assert reason.endswith(f"failpoint: {failpoint}"), reason
|
||||
|
||||
# now we stop pageserver and remove local tenant state
|
||||
env.endpoints.stop_all()
|
||||
env.pageserver.stop()
|
||||
|
||||
dir_to_clear = Path(env.repo_dir) / "tenants"
|
||||
shutil.rmtree(dir_to_clear)
|
||||
os.mkdir(dir_to_clear)
|
||||
|
||||
env.pageserver.start()
|
||||
|
||||
# now we call attach
|
||||
with pytest.raises(
|
||||
PageserverApiException, match="Tenant is marked as deleted on remote storage"
|
||||
):
|
||||
ps_http.tenant_attach(tenant_id=tenant_id)
|
||||
|
||||
# delete should be resumed (not yet)
|
||||
# wait_tenant_status_404(ps_http, tenant_id, iterations)
|
||||
|
||||
# we shouldn've created tenant dir on disk
|
||||
tenant_path = env.tenant_dir(tenant_id=tenant_id)
|
||||
assert not tenant_path.exists()
|
||||
|
||||
if remote_storage_kind in available_s3_storages():
|
||||
assert_prefix_not_empty(
|
||||
neon_env_builder,
|
||||
prefix="/".join(
|
||||
(
|
||||
"tenants",
|
||||
str(tenant_id),
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# TODO test concurrent deletions with "hang" failpoint
|
||||
# TODO test tenant delete continues after attach
|
||||
|
||||
@@ -18,6 +18,7 @@ from fixtures.neon_fixtures import (
|
||||
from fixtures.pageserver.http import PageserverApiException
|
||||
from fixtures.pageserver.utils import (
|
||||
assert_prefix_empty,
|
||||
assert_prefix_not_empty,
|
||||
poll_for_remote_storage_iterations,
|
||||
timeline_delete_wait_completed,
|
||||
wait_for_last_record_lsn,
|
||||
@@ -30,6 +31,7 @@ from fixtures.remote_storage import (
|
||||
LocalFsStorage,
|
||||
RemoteStorageKind,
|
||||
available_remote_storages,
|
||||
available_s3_storages,
|
||||
)
|
||||
from fixtures.types import Lsn, TenantId, TimelineId
|
||||
from fixtures.utils import query_scalar, wait_until
|
||||
@@ -212,6 +214,19 @@ def test_delete_timeline_exercise_crash_safety_failpoints(
|
||||
else:
|
||||
last_flush_lsn_upload(env, endpoint, env.initial_tenant, timeline_id)
|
||||
|
||||
if remote_storage_kind in available_s3_storages():
|
||||
assert_prefix_not_empty(
|
||||
neon_env_builder,
|
||||
prefix="/".join(
|
||||
(
|
||||
"tenants",
|
||||
str(env.initial_tenant),
|
||||
"timelines",
|
||||
str(timeline_id),
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
env.pageserver.allowed_errors.append(f".*{timeline_id}.*failpoint: {failpoint}")
|
||||
# It appears when we stopped flush loop during deletion and then pageserver is stopped
|
||||
env.pageserver.allowed_errors.append(
|
||||
@@ -298,7 +313,7 @@ def test_delete_timeline_exercise_crash_safety_failpoints(
|
||||
ps_http, env.initial_tenant, timeline_id, iterations=iterations
|
||||
)
|
||||
|
||||
# Check remote is impty
|
||||
# Check remote is empty
|
||||
if remote_storage_kind is RemoteStorageKind.MOCK_S3:
|
||||
assert_prefix_empty(
|
||||
neon_env_builder,
|
||||
@@ -739,6 +754,19 @@ def test_timeline_delete_works_for_remote_smoke(
|
||||
|
||||
timeline_ids.append(timeline_id)
|
||||
|
||||
for timeline_id in timeline_ids:
|
||||
assert_prefix_not_empty(
|
||||
neon_env_builder,
|
||||
prefix="/".join(
|
||||
(
|
||||
"tenants",
|
||||
str(env.initial_tenant),
|
||||
"timelines",
|
||||
str(timeline_id),
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
for timeline_id in reversed(timeline_ids):
|
||||
# note that we need to finish previous deletion before scheduling next one
|
||||
# otherwise we can get an "HasChildren" error if deletion is not fast enough (real_s3)
|
||||
@@ -758,11 +786,7 @@ def test_timeline_delete_works_for_remote_smoke(
|
||||
|
||||
# for some reason the check above doesnt immediately take effect for the below.
|
||||
# Assume it is mock server inconsistency and check twice.
|
||||
wait_until(
|
||||
2,
|
||||
0.5,
|
||||
lambda: assert_prefix_empty(neon_env_builder),
|
||||
)
|
||||
wait_until(2, 0.5, lambda: assert_prefix_empty(neon_env_builder))
|
||||
|
||||
|
||||
def test_delete_orphaned_objects(
|
||||
|
||||
Reference in New Issue
Block a user