Allow calling detach on ignored tenant (#3834)

## Describe your changes
Added a query param to detach API
Allow to remove local state of a tenant even if its not in the memory
(following ignore API)
## Issue ticket number and link
#3828
## Checklist before requesting a review
- [x] I have performed a self-review of my code.
- [ ] If it is a core feature, I have added thorough tests.
- [ ] Do we need to implement analytics? if so did you add the relevant
metrics to the dashboard?
- [ ] If this PR requires public announcement, mark it with
/release-notes label and add several sentences in this section.

---------

Co-authored-by: Kirill Bulatov <kirill@neon.tech>
This commit is contained in:
Shany Pozin
2023-03-22 09:17:00 +02:00
committed by GitHub
parent dd22c87100
commit 0f7de84785
5 changed files with 130 additions and 16 deletions

View File

@@ -351,6 +351,13 @@ paths:
schema:
type: string
format: hex
- name: detach_ignored
in: query
required: false
schema:
type: boolean
description: |
When true, allow to detach a tenant which state is ignored.
post:
description: |
Remove tenant data (including all corresponding timelines) from pageserver's memory and file system.

View File

@@ -384,10 +384,11 @@ async fn timeline_delete_handler(request: Request<Body>) -> Result<Response<Body
async fn tenant_detach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
check_permission(&request, Some(tenant_id))?;
let detach_ignored: Option<bool> = parse_query_param(&request, "detach_ignored")?;
let state = get_state(&request);
let conf = state.conf;
mgr::detach_tenant(conf, tenant_id)
mgr::detach_tenant(conf, tenant_id, detach_ignored.unwrap_or(false))
.instrument(info_span!("tenant_detach", tenant = %tenant_id))
.await?;

View File

@@ -315,10 +315,6 @@ pub async fn get_tenant(
.get(&tenant_id)
.ok_or(TenantStateError::NotFound(tenant_id))?;
if active_only && !tenant.is_active() {
tracing::warn!(
"Tenant {tenant_id} is not active. Current state: {:?}",
tenant.current_state()
);
Err(TenantStateError::NotActive(tenant_id))
} else {
Ok(Arc::clone(tenant))
@@ -350,17 +346,35 @@ pub enum TenantStateError {
pub async fn detach_tenant(
conf: &'static PageServerConf,
tenant_id: TenantId,
detach_ignored: bool,
) -> Result<(), TenantStateError> {
remove_tenant_from_memory(tenant_id, async {
let local_tenant_directory = conf.tenant_path(&tenant_id);
let local_files_cleanup_operation = |tenant_id_to_clean| async move {
let local_tenant_directory = conf.tenant_path(&tenant_id_to_clean);
fs::remove_dir_all(&local_tenant_directory)
.await
.with_context(|| {
format!("Failed to remove local tenant directory {local_tenant_directory:?}")
format!("local tenant directory {local_tenant_directory:?} removal")
})?;
Ok(())
})
.await
};
let removal_result =
remove_tenant_from_memory(tenant_id, local_files_cleanup_operation(tenant_id)).await;
// Ignored tenants are not present in memory and will bail the removal from memory operation.
// Before returning the error, check for ignored tenant removal case — we only need to clean its local files then.
if detach_ignored && matches!(removal_result, Err(TenantStateError::NotFound(_))) {
let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(tenant_id);
if tenant_ignore_mark.exists() {
info!("Detaching an ignored tenant");
local_files_cleanup_operation(tenant_id)
.await
.with_context(|| format!("Ignored tenant {tenant_id} local files cleanup"))?;
return Ok(());
}
}
removal_result
}
pub async fn load_tenant(

View File

@@ -1119,7 +1119,9 @@ def neon_env_builder(
class PageserverApiException(Exception):
pass
def __init__(self, message, status_code: int):
super().__init__(message)
self.status_code = status_code
class PageserverHttpClient(requests.Session):
@@ -1140,7 +1142,7 @@ class PageserverHttpClient(requests.Session):
msg = res.json()["msg"]
except: # noqa: E722
msg = ""
raise PageserverApiException(msg) from e
raise PageserverApiException(msg, res.status_code) from e
def check_status(self):
self.get(f"http://localhost:{self.port}/v1/status").raise_for_status()
@@ -1190,8 +1192,12 @@ class PageserverHttpClient(requests.Session):
res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/attach")
self.verbose_error(res)
def tenant_detach(self, tenant_id: TenantId):
res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/detach")
def tenant_detach(self, tenant_id: TenantId, detach_ignored=False):
params = {}
if detach_ignored:
params["detach_ignored"] = "true"
res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/detach", params=params)
self.verbose_error(res)
def tenant_load(self, tenant_id: TenantId):

View File

@@ -264,9 +264,11 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
with pytest.raises(
expected_exception=PageserverApiException,
match=f"NotFound: tenant {tenant_id}",
):
) as excinfo:
pageserver_http.tenant_detach(tenant_id)
assert excinfo.value.status_code == 404
# the error will be printed to the log too
env.pageserver.allowed_errors.append(".*NotFound: tenant *")
@@ -325,7 +327,91 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
pageserver_http.timeline_gc(tenant_id, timeline_id, 0)
#
# Creates and ignores a tenant, then detaches it: first, with no parameters (should fail),
# then with parameters to force ignored tenant detach (should not fail).
def test_tenant_detach_ignored_tenant(neon_simple_env: NeonEnv):
env = neon_simple_env
client = env.pageserver.http_client()
# create a new tenant
tenant_id, _ = env.neon_cli.create_tenant()
# assert tenant exists on disk
assert (env.repo_dir / "tenants" / str(tenant_id)).exists()
pg = env.postgres.create_start("main", tenant_id=tenant_id)
# we rely upon autocommit after each statement
pg.safe_psql_many(
queries=[
"CREATE TABLE t(key int primary key, value text)",
"INSERT INTO t SELECT generate_series(1,100000), 'payload'",
]
)
# ignore tenant
client.tenant_ignore(tenant_id)
env.pageserver.allowed_errors.append(".*NotFound: tenant .*")
# ensure tenant couldn't be detached without the special flag for ignored tenant
log.info("detaching ignored tenant WITHOUT required flag")
with pytest.raises(
expected_exception=PageserverApiException, match=f"NotFound: tenant {tenant_id}"
):
client.tenant_detach(tenant_id)
log.info("tenant detached failed as expected")
# ensure tenant is detached with ignore state
log.info("detaching ignored tenant with required flag")
client.tenant_detach(tenant_id, True)
log.info("ignored tenant detached without error")
# check that nothing is left on disk for deleted tenant
assert not (env.repo_dir / "tenants" / str(tenant_id)).exists()
# assert the tenant does not exists in the Pageserver
tenants_after_detach = [tenant["id"] for tenant in client.tenant_list()]
assert (
tenant_id not in tenants_after_detach
), f"Ignored and then detached tenant {tenant_id} \
should not be present in pageserver's memory"
# Creates a tenant, and detaches it with extra paremeter that forces ignored tenant detach.
# Tenant should be detached without issues.
def test_tenant_detach_regular_tenant(neon_simple_env: NeonEnv):
env = neon_simple_env
client = env.pageserver.http_client()
# create a new tenant
tenant_id, _ = env.neon_cli.create_tenant()
# assert tenant exists on disk
assert (env.repo_dir / "tenants" / str(tenant_id)).exists()
pg = env.postgres.create_start("main", tenant_id=tenant_id)
# we rely upon autocommit after each statement
pg.safe_psql_many(
queries=[
"CREATE TABLE t(key int primary key, value text)",
"INSERT INTO t SELECT generate_series(1,100000), 'payload'",
]
)
log.info("detaching regular tenant with detach ignored flag")
client.tenant_detach(tenant_id, True)
log.info("regular tenant detached without error")
# check that nothing is left on disk for deleted tenant
assert not (env.repo_dir / "tenants" / str(tenant_id)).exists()
# assert the tenant does not exists in the Pageserver
tenants_after_detach = [tenant["id"] for tenant in client.tenant_list()]
assert (
tenant_id not in tenants_after_detach
), f"Ignored and then detached tenant {tenant_id} \
should not be present in pageserver's memory"
@pytest.mark.parametrize("remote_storage_kind", available_remote_storages())
def test_detach_while_attaching(
neon_env_builder: NeonEnvBuilder,