diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml index 3d3a9892bf..2098f848d5 100644 --- a/pageserver/src/http/openapi_spec.yml +++ b/pageserver/src/http/openapi_spec.yml @@ -351,6 +351,13 @@ paths: schema: type: string format: hex + - name: detach_ignored + in: query + required: false + schema: + type: boolean + description: | + When true, allow to detach a tenant which state is ignored. post: description: | Remove tenant data (including all corresponding timelines) from pageserver's memory and file system. diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index d91e421a52..04b7928d31 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -384,10 +384,11 @@ async fn timeline_delete_handler(request: Request) -> Result) -> Result, ApiError> { let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?; check_permission(&request, Some(tenant_id))?; + let detach_ignored: Option = parse_query_param(&request, "detach_ignored")?; let state = get_state(&request); let conf = state.conf; - mgr::detach_tenant(conf, tenant_id) + mgr::detach_tenant(conf, tenant_id, detach_ignored.unwrap_or(false)) .instrument(info_span!("tenant_detach", tenant = %tenant_id)) .await?; diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index a4212ea8a6..26a2bb972c 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -315,10 +315,6 @@ pub async fn get_tenant( .get(&tenant_id) .ok_or(TenantStateError::NotFound(tenant_id))?; if active_only && !tenant.is_active() { - tracing::warn!( - "Tenant {tenant_id} is not active. Current state: {:?}", - tenant.current_state() - ); Err(TenantStateError::NotActive(tenant_id)) } else { Ok(Arc::clone(tenant)) @@ -350,17 +346,35 @@ pub enum TenantStateError { pub async fn detach_tenant( conf: &'static PageServerConf, tenant_id: TenantId, + detach_ignored: bool, ) -> Result<(), TenantStateError> { - remove_tenant_from_memory(tenant_id, async { - let local_tenant_directory = conf.tenant_path(&tenant_id); + let local_files_cleanup_operation = |tenant_id_to_clean| async move { + let local_tenant_directory = conf.tenant_path(&tenant_id_to_clean); fs::remove_dir_all(&local_tenant_directory) .await .with_context(|| { - format!("Failed to remove local tenant directory {local_tenant_directory:?}") + format!("local tenant directory {local_tenant_directory:?} removal") })?; Ok(()) - }) - .await + }; + + let removal_result = + remove_tenant_from_memory(tenant_id, local_files_cleanup_operation(tenant_id)).await; + + // Ignored tenants are not present in memory and will bail the removal from memory operation. + // Before returning the error, check for ignored tenant removal case — we only need to clean its local files then. + if detach_ignored && matches!(removal_result, Err(TenantStateError::NotFound(_))) { + let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(tenant_id); + if tenant_ignore_mark.exists() { + info!("Detaching an ignored tenant"); + local_files_cleanup_operation(tenant_id) + .await + .with_context(|| format!("Ignored tenant {tenant_id} local files cleanup"))?; + return Ok(()); + } + } + + removal_result } pub async fn load_tenant( diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 6429b1e940..9929d3e66b 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1119,7 +1119,9 @@ def neon_env_builder( class PageserverApiException(Exception): - pass + def __init__(self, message, status_code: int): + super().__init__(message) + self.status_code = status_code class PageserverHttpClient(requests.Session): @@ -1140,7 +1142,7 @@ class PageserverHttpClient(requests.Session): msg = res.json()["msg"] except: # noqa: E722 msg = "" - raise PageserverApiException(msg) from e + raise PageserverApiException(msg, res.status_code) from e def check_status(self): self.get(f"http://localhost:{self.port}/v1/status").raise_for_status() @@ -1190,8 +1192,12 @@ class PageserverHttpClient(requests.Session): res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/attach") self.verbose_error(res) - def tenant_detach(self, tenant_id: TenantId): - res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/detach") + def tenant_detach(self, tenant_id: TenantId, detach_ignored=False): + params = {} + if detach_ignored: + params["detach_ignored"] = "true" + + res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/detach", params=params) self.verbose_error(res) def tenant_load(self, tenant_id: TenantId): diff --git a/test_runner/regress/test_tenant_detach.py b/test_runner/regress/test_tenant_detach.py index e061ab92a4..5db79eef4a 100644 --- a/test_runner/regress/test_tenant_detach.py +++ b/test_runner/regress/test_tenant_detach.py @@ -264,9 +264,11 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): with pytest.raises( expected_exception=PageserverApiException, match=f"NotFound: tenant {tenant_id}", - ): + ) as excinfo: pageserver_http.tenant_detach(tenant_id) + assert excinfo.value.status_code == 404 + # the error will be printed to the log too env.pageserver.allowed_errors.append(".*NotFound: tenant *") @@ -325,7 +327,91 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): pageserver_http.timeline_gc(tenant_id, timeline_id, 0) -# +# Creates and ignores a tenant, then detaches it: first, with no parameters (should fail), +# then with parameters to force ignored tenant detach (should not fail). +def test_tenant_detach_ignored_tenant(neon_simple_env: NeonEnv): + env = neon_simple_env + client = env.pageserver.http_client() + + # create a new tenant + tenant_id, _ = env.neon_cli.create_tenant() + + # assert tenant exists on disk + assert (env.repo_dir / "tenants" / str(tenant_id)).exists() + + pg = env.postgres.create_start("main", tenant_id=tenant_id) + # we rely upon autocommit after each statement + pg.safe_psql_many( + queries=[ + "CREATE TABLE t(key int primary key, value text)", + "INSERT INTO t SELECT generate_series(1,100000), 'payload'", + ] + ) + + # ignore tenant + client.tenant_ignore(tenant_id) + env.pageserver.allowed_errors.append(".*NotFound: tenant .*") + # ensure tenant couldn't be detached without the special flag for ignored tenant + log.info("detaching ignored tenant WITHOUT required flag") + with pytest.raises( + expected_exception=PageserverApiException, match=f"NotFound: tenant {tenant_id}" + ): + client.tenant_detach(tenant_id) + + log.info("tenant detached failed as expected") + + # ensure tenant is detached with ignore state + log.info("detaching ignored tenant with required flag") + client.tenant_detach(tenant_id, True) + log.info("ignored tenant detached without error") + + # check that nothing is left on disk for deleted tenant + assert not (env.repo_dir / "tenants" / str(tenant_id)).exists() + + # assert the tenant does not exists in the Pageserver + tenants_after_detach = [tenant["id"] for tenant in client.tenant_list()] + assert ( + tenant_id not in tenants_after_detach + ), f"Ignored and then detached tenant {tenant_id} \ + should not be present in pageserver's memory" + + +# Creates a tenant, and detaches it with extra paremeter that forces ignored tenant detach. +# Tenant should be detached without issues. +def test_tenant_detach_regular_tenant(neon_simple_env: NeonEnv): + env = neon_simple_env + client = env.pageserver.http_client() + + # create a new tenant + tenant_id, _ = env.neon_cli.create_tenant() + + # assert tenant exists on disk + assert (env.repo_dir / "tenants" / str(tenant_id)).exists() + + pg = env.postgres.create_start("main", tenant_id=tenant_id) + # we rely upon autocommit after each statement + pg.safe_psql_many( + queries=[ + "CREATE TABLE t(key int primary key, value text)", + "INSERT INTO t SELECT generate_series(1,100000), 'payload'", + ] + ) + + log.info("detaching regular tenant with detach ignored flag") + client.tenant_detach(tenant_id, True) + log.info("regular tenant detached without error") + + # check that nothing is left on disk for deleted tenant + assert not (env.repo_dir / "tenants" / str(tenant_id)).exists() + + # assert the tenant does not exists in the Pageserver + tenants_after_detach = [tenant["id"] for tenant in client.tenant_list()] + assert ( + tenant_id not in tenants_after_detach + ), f"Ignored and then detached tenant {tenant_id} \ + should not be present in pageserver's memory" + + @pytest.mark.parametrize("remote_storage_kind", available_remote_storages()) def test_detach_while_attaching( neon_env_builder: NeonEnvBuilder,