pageserver: remove attach/detach apis (#8134)

## Problem

These APIs have been deprecated for some time, but were still used from
test code.

Closes: https://github.com/neondatabase/neon/issues/4282

## Summary of changes

- It is still convenient to do a "tenant_attach" from a test without
having to write out a location_conf body, so those test methods have
been retained with implementations that call through to their
location_conf equivalent.
This commit is contained in:
John Spray
2024-06-25 17:38:06 +01:00
committed by GitHub
parent 64a4461191
commit 07f21dd6b6
12 changed files with 54 additions and 260 deletions

View File

@@ -607,31 +607,6 @@ impl TenantConfigRequest {
} }
} }
#[derive(Debug, Deserialize)]
pub struct TenantAttachRequest {
#[serde(default)]
pub config: TenantAttachConfig,
#[serde(default)]
pub generation: Option<u32>,
}
/// Newtype to enforce deny_unknown_fields on TenantConfig for
/// its usage inside `TenantAttachRequest`.
#[derive(Debug, Serialize, Deserialize, Default)]
#[serde(deny_unknown_fields)]
pub struct TenantAttachConfig {
#[serde(flatten)]
allowing_unknown_fields: TenantConfig,
}
impl std::ops::Deref for TenantAttachConfig {
type Target = TenantConfig;
fn deref(&self) -> &Self::Target {
&self.allowing_unknown_fields
}
}
/// See [`TenantState::attachment_status`] and the OpenAPI docs for context. /// See [`TenantState::attachment_status`] and the OpenAPI docs for context.
#[derive(Serialize, Deserialize, Clone)] #[derive(Serialize, Deserialize, Clone)]
#[serde(tag = "slug", content = "data", rename_all = "snake_case")] #[serde(tag = "slug", content = "data", rename_all = "snake_case")]
@@ -1554,18 +1529,6 @@ mod tests {
"expect unknown field `unknown_field` error, got: {}", "expect unknown field `unknown_field` error, got: {}",
err err
); );
let attach_request = json!({
"config": {
"unknown_field": "unknown_value".to_string(),
},
});
let err = serde_json::from_value::<TenantAttachRequest>(attach_request).unwrap_err();
assert!(
err.to_string().contains("unknown field `unknown_field`"),
"expect unknown field `unknown_field` error, got: {}",
err
);
} }
#[test] #[test]

View File

@@ -8,22 +8,15 @@ use super::error::ApiError;
pub async fn json_request<T: for<'de> Deserialize<'de>>( pub async fn json_request<T: for<'de> Deserialize<'de>>(
request: &mut Request<Body>, request: &mut Request<Body>,
) -> Result<T, ApiError> { ) -> Result<T, ApiError> {
json_request_or_empty_body(request)
.await?
.context("missing request body")
.map_err(ApiError::BadRequest)
}
/// Will be removed as part of <https://github.com/neondatabase/neon/issues/4282>
pub async fn json_request_or_empty_body<T: for<'de> Deserialize<'de>>(
request: &mut Request<Body>,
) -> Result<Option<T>, ApiError> {
let body = hyper::body::aggregate(request.body_mut()) let body = hyper::body::aggregate(request.body_mut())
.await .await
.context("Failed to read request body") .context("Failed to read request body")
.map_err(ApiError::BadRequest)?; .map_err(ApiError::BadRequest)?;
if body.remaining() == 0 { if body.remaining() == 0 {
return Ok(None); return Err(ApiError::BadRequest(anyhow::anyhow!(
"missing request body"
)));
} }
let mut deser = serde_json::de::Deserializer::from_reader(body.reader()); let mut deser = serde_json::de::Deserializer::from_reader(body.reader());
@@ -31,7 +24,6 @@ pub async fn json_request_or_empty_body<T: for<'de> Deserialize<'de>>(
serde_path_to_error::deserialize(&mut deser) serde_path_to_error::deserialize(&mut deser)
// intentionally stringify because the debug version is not helpful in python logs // intentionally stringify because the debug version is not helpful in python logs
.map_err(|e| anyhow::anyhow!("Failed to parse json request: {e}")) .map_err(|e| anyhow::anyhow!("Failed to parse json request: {e}"))
.map(Some)
.map_err(ApiError::BadRequest) .map_err(ApiError::BadRequest)
} }

View File

@@ -367,16 +367,7 @@ paths:
$ref: "#/components/schemas/TenantLocationConfigResponse" $ref: "#/components/schemas/TenantLocationConfigResponse"
"409": "409":
description: | description: |
The tenant is already known to Pageserver in some way, The tenant is already being modified, perhaps by a concurrent call to this API
and hence this `/attach` call has been rejected.
Some examples of how this can happen:
- tenant was created on this pageserver
- tenant attachment was started by an earlier call to `/attach`.
Callers should poll the tenant status's `attachment_status` field,
like for status 202. See the longer description for `POST /attach`
for details.
content: content:
application/json: application/json:
schema: schema:
@@ -762,8 +753,6 @@ components:
For example this can be caused by s3 being unreachable. The retry may be implemented For example this can be caused by s3 being unreachable. The retry may be implemented
with call to detach, though it would be better to not automate it and inspec failed state with call to detach, though it would be better to not automate it and inspec failed state
manually before proceeding with a retry. manually before proceeding with a retry.
See the tenant `/attach` endpoint for more information.
type: object type: object
required: required:
- slug - slug

View File

@@ -31,13 +31,11 @@ use pageserver_api::models::TenantShardLocation;
use pageserver_api::models::TenantShardSplitRequest; use pageserver_api::models::TenantShardSplitRequest;
use pageserver_api::models::TenantShardSplitResponse; use pageserver_api::models::TenantShardSplitResponse;
use pageserver_api::models::TenantSorting; use pageserver_api::models::TenantSorting;
use pageserver_api::models::TenantState;
use pageserver_api::models::TopTenantShardItem; use pageserver_api::models::TopTenantShardItem;
use pageserver_api::models::TopTenantShardsRequest; use pageserver_api::models::TopTenantShardsRequest;
use pageserver_api::models::TopTenantShardsResponse; use pageserver_api::models::TopTenantShardsResponse;
use pageserver_api::models::{ use pageserver_api::models::{
DownloadRemoteLayersTaskSpawnRequest, LocationConfigMode, TenantAttachRequest, DownloadRemoteLayersTaskSpawnRequest, LocationConfigMode, TenantLocationConfigRequest,
TenantLocationConfigRequest,
}; };
use pageserver_api::shard::ShardCount; use pageserver_api::shard::ShardCount;
use pageserver_api::shard::TenantShardId; use pageserver_api::shard::TenantShardId;
@@ -51,7 +49,6 @@ use utils::auth::JwtAuth;
use utils::failpoint_support::failpoints_handler; use utils::failpoint_support::failpoints_handler;
use utils::http::endpoint::prometheus_metrics_handler; use utils::http::endpoint::prometheus_metrics_handler;
use utils::http::endpoint::request_span; use utils::http::endpoint::request_span;
use utils::http::json::json_request_or_empty_body;
use utils::http::request::{get_request_param, must_get_query_param, parse_query_param}; use utils::http::request::{get_request_param, must_get_query_param, parse_query_param};
use crate::context::{DownloadBehavior, RequestContext}; use crate::context::{DownloadBehavior, RequestContext};
@@ -821,58 +818,6 @@ async fn get_timestamp_of_lsn_handler(
} }
} }
async fn tenant_attach_handler(
mut request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
check_permission(&request, Some(tenant_id))?;
let maybe_body: Option<TenantAttachRequest> = json_request_or_empty_body(&mut request).await?;
let tenant_conf = match &maybe_body {
Some(request) => TenantConfOpt::try_from(&*request.config).map_err(ApiError::BadRequest)?,
None => TenantConfOpt::default(),
};
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
info!("Handling tenant attach {tenant_id}");
let state = get_state(&request);
let generation = get_request_generation(state, maybe_body.as_ref().and_then(|r| r.generation))?;
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
let shard_params = ShardParameters::default();
let location_conf = LocationConf::attached_single(tenant_conf, generation, &shard_params);
let tenant = state
.tenant_manager
.upsert_location(tenant_shard_id, location_conf, None, SpawnMode::Eager, &ctx)
.await?;
let Some(tenant) = tenant else {
// This should never happen: indicates a bug in upsert_location
return Err(ApiError::InternalServerError(anyhow::anyhow!(
"Upsert succeeded but didn't return tenant!"
)));
};
// We might have successfully constructed a Tenant, but it could still
// end up in a broken state:
if let TenantState::Broken {
reason,
backtrace: _,
} = tenant.current_state()
{
return Err(ApiError::InternalServerError(anyhow::anyhow!(
"Tenant state is Broken: {reason}"
)));
}
json_response(StatusCode::ACCEPTED, ())
}
async fn timeline_delete_handler( async fn timeline_delete_handler(
request: Request<Body>, request: Request<Body>,
_cancel: CancellationToken, _cancel: CancellationToken,
@@ -903,26 +848,6 @@ async fn timeline_delete_handler(
json_response(StatusCode::ACCEPTED, ()) json_response(StatusCode::ACCEPTED, ())
} }
async fn tenant_detach_handler(
request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
check_permission(&request, Some(tenant_id))?;
// This is a legacy API (`/location_conf` is the replacement). It only supports unsharded tenants
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
let state = get_state(&request);
let conf = state.conf;
state
.tenant_manager
.detach_tenant(conf, tenant_shard_id, &state.deletion_queue_client)
.instrument(info_span!("tenant_detach", %tenant_id, shard_id=%tenant_shard_id.shard_slug()))
.await?;
json_response(StatusCode::OK, ())
}
async fn tenant_reset_handler( async fn tenant_reset_handler(
request: Request<Body>, request: Request<Body>,
_cancel: CancellationToken, _cancel: CancellationToken,
@@ -2711,12 +2636,6 @@ pub fn make_router(
.post("/v1/tenant/:tenant_shard_id/timeline", |r| { .post("/v1/tenant/:tenant_shard_id/timeline", |r| {
api_handler(r, timeline_create_handler) api_handler(r, timeline_create_handler)
}) })
.post("/v1/tenant/:tenant_id/attach", |r| {
api_handler(r, tenant_attach_handler)
})
.post("/v1/tenant/:tenant_id/detach", |r| {
api_handler(r, tenant_detach_handler)
})
.post("/v1/tenant/:tenant_shard_id/reset", |r| { .post("/v1/tenant/:tenant_shard_id/reset", |r| {
api_handler(r, tenant_reset_handler) api_handler(r, tenant_reset_handler)
}) })

View File

@@ -1231,6 +1231,13 @@ impl Service {
&self, &self,
attach_req: AttachHookRequest, attach_req: AttachHookRequest,
) -> anyhow::Result<AttachHookResponse> { ) -> anyhow::Result<AttachHookResponse> {
let _tenant_lock = trace_exclusive_lock(
&self.tenant_op_locks,
attach_req.tenant_shard_id.tenant_id,
TenantOperations::ShardSplit,
)
.await;
// This is a test hook. To enable using it on tenants that were created directly with // This is a test hook. To enable using it on tenants that were created directly with
// the pageserver API (not via this service), we will auto-create any missing tenant // the pageserver API (not via this service), we will auto-create any missing tenant
// shards with default state. // shards with default state.

View File

@@ -2684,7 +2684,6 @@ class NeonPageserver(PgProtocol, LogUtils):
self, self,
tenant_id: TenantId, tenant_id: TenantId,
config: None | Dict[str, Any] = None, config: None | Dict[str, Any] = None,
config_null: bool = False,
generation: Optional[int] = None, generation: Optional[int] = None,
override_storage_controller_generation: bool = False, override_storage_controller_generation: bool = False,
): ):
@@ -2702,7 +2701,6 @@ class NeonPageserver(PgProtocol, LogUtils):
return client.tenant_attach( return client.tenant_attach(
tenant_id, tenant_id,
config, config,
config_null,
generation=generation, generation=generation,
) )

View File

@@ -1,6 +1,5 @@
from __future__ import annotations from __future__ import annotations
import json
import time import time
from collections import defaultdict from collections import defaultdict
from dataclasses import dataclass from dataclasses import dataclass
@@ -253,39 +252,30 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
self, self,
tenant_id: Union[TenantId, TenantShardId], tenant_id: Union[TenantId, TenantShardId],
config: None | Dict[str, Any] = None, config: None | Dict[str, Any] = None,
config_null: bool = False,
generation: Optional[int] = None, generation: Optional[int] = None,
): ):
if config_null: config = config or {}
assert config is None
body: Any = None
else:
# null-config is prohibited by the API
config = config or {}
body = {"config": config}
if generation is not None:
body.update({"generation": generation})
res = self.post( return self.tenant_location_conf(
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/attach", tenant_id,
data=json.dumps(body), location_conf={
headers={"Content-Type": "application/json"}, "mode": "AttachedSingle",
"secondary_conf": None,
"tenant_conf": config,
"generation": generation,
},
) )
self.verbose_error(res)
def tenant_detach(self, tenant_id: TenantId, detach_ignored=False, timeout_secs=None): def tenant_detach(self, tenant_id: TenantId):
params = {} return self.tenant_location_conf(
if detach_ignored: tenant_id,
params["detach_ignored"] = "true" location_conf={
"mode": "Detached",
kwargs = {} "secondary_conf": None,
if timeout_secs is not None: "tenant_conf": {},
kwargs["timeout"] = timeout_secs "generation": None,
},
res = self.post(
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/detach", params=params, **kwargs
) )
self.verbose_error(res)
def tenant_reset(self, tenant_id: Union[TenantId, TenantShardId], drop_cache: bool): def tenant_reset(self, tenant_id: Union[TenantId, TenantShardId], drop_cache: bool):
params = {} params = {}

View File

@@ -7,7 +7,7 @@ from fixtures.neon_fixtures import (
NeonEnv, NeonEnv,
NeonEnvBuilder, NeonEnvBuilder,
) )
from fixtures.pageserver.http import PageserverApiException, TenantConfig from fixtures.pageserver.http import TenantConfig
from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind
from fixtures.utils import wait_until from fixtures.utils import wait_until
@@ -82,8 +82,8 @@ def test_null_body(negative_env: NegativeTests):
tenant_id = negative_env.tenant_id tenant_id = negative_env.tenant_id
ps_http = env.pageserver.http_client() ps_http = env.pageserver.http_client()
res = ps_http.post( res = ps_http.put(
f"{ps_http.base_url}/v1/tenant/{tenant_id}/attach", f"{ps_http.base_url}/v1/tenant/{tenant_id}/location_config",
data=b"null", data=b"null",
headers={"Content-Type": "application/json"}, headers={"Content-Type": "application/json"},
) )
@@ -99,35 +99,16 @@ def test_null_config(negative_env: NegativeTests):
tenant_id = negative_env.tenant_id tenant_id = negative_env.tenant_id
ps_http = env.pageserver.http_client() ps_http = env.pageserver.http_client()
res = ps_http.post( res = ps_http.put(
f"{ps_http.base_url}/v1/tenant/{tenant_id}/attach", f"{ps_http.base_url}/v1/tenant/{tenant_id}/location_config",
data=b'{"config": null}', json={"mode": "AttachedSingle", "generation": 1, "tenant_conf": None},
headers={"Content-Type": "application/json"}, headers={"Content-Type": "application/json"},
) )
assert res.status_code == 400 assert res.status_code == 400
def test_config_with_unknown_keys_is_bad_request(negative_env: NegativeTests):
"""
If we send a config with unknown keys, the request should be rejected with status 400.
"""
env = negative_env.neon_env
tenant_id = negative_env.tenant_id
config_with_unknown_keys = {
"compaction_period": "1h",
"this_key_does_not_exist": "some value",
}
with pytest.raises(PageserverApiException) as e:
env.pageserver.tenant_attach(tenant_id, config=config_with_unknown_keys)
assert e.type == PageserverApiException
assert e.value.status_code == 400
@pytest.mark.parametrize("content_type", [None, "application/json"]) @pytest.mark.parametrize("content_type", [None, "application/json"])
def test_no_config(positive_env: NeonEnv, content_type: Optional[str]): def test_empty_config(positive_env: NeonEnv, content_type: Optional[str]):
""" """
When the 'config' body attribute is omitted, the request should be accepted When the 'config' body attribute is omitted, the request should be accepted
and the tenant should use the default configuration and the tenant should use the default configuration
@@ -141,11 +122,13 @@ def test_no_config(positive_env: NeonEnv, content_type: Optional[str]):
ps_http.tenant_detach(tenant_id) ps_http.tenant_detach(tenant_id)
assert tenant_id not in [TenantId(t["id"]) for t in ps_http.tenant_list()] assert tenant_id not in [TenantId(t["id"]) for t in ps_http.tenant_list()]
body = {"generation": env.storage_controller.attach_hook_issue(tenant_id, env.pageserver.id)} ps_http.put(
f"{ps_http.base_url}/v1/tenant/{tenant_id}/location_config",
ps_http.post( json={
f"{ps_http.base_url}/v1/tenant/{tenant_id}/attach", "mode": "AttachedSingle",
json=body, "generation": env.storage_controller.attach_hook_issue(tenant_id, env.pageserver.id),
"tenant_conf": {},
},
headers=None if content_type else {"Content-Type": "application/json"}, headers=None if content_type else {"Content-Type": "application/json"},
).raise_for_status() ).raise_for_status()

View File

@@ -164,13 +164,14 @@ def test_remote_storage_backup_and_restore(
"data": {"reason": "storage-sync-list-remote-timelines"}, "data": {"reason": "storage-sync-list-remote-timelines"},
} }
# Even though the tenant is broken, subsequent calls to location_conf API will succeed, but
# the tenant will always end up in a broken state as a result of the failpoint.
# Ensure that even though the tenant is broken, retrying the attachment fails # Ensure that even though the tenant is broken, retrying the attachment fails
with pytest.raises(Exception, match="Tenant state is Broken"): tenant_info = wait_until_tenant_state(pageserver_http, tenant_id, "Broken", 15)
# Use same generation as in previous attempt gen_state = env.storage_controller.inspect(tenant_id)
gen_state = env.storage_controller.inspect(tenant_id) assert gen_state is not None
assert gen_state is not None generation = gen_state[0]
generation = gen_state[0] env.pageserver.tenant_attach(tenant_id, generation=generation)
env.pageserver.tenant_attach(tenant_id, generation=generation)
# Restart again, this implicitly clears the failpoint. # Restart again, this implicitly clears the failpoint.
# test_remote_failures=1 remains active, though, as it's in the pageserver config. # test_remote_failures=1 remains active, though, as it's in the pageserver config.

View File

@@ -275,16 +275,6 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
env.pageserver.allowed_errors.extend(PERMIT_PAGE_SERVICE_ERRORS) env.pageserver.allowed_errors.extend(PERMIT_PAGE_SERVICE_ERRORS)
# first check for non existing tenant
tenant_id = TenantId.generate()
with pytest.raises(
expected_exception=PageserverApiException,
match=f"NotFound: tenant {tenant_id}",
) as excinfo:
pageserver_http.tenant_detach(tenant_id)
assert excinfo.value.status_code == 404
# create new nenant # create new nenant
tenant_id, timeline_id = env.neon_cli.create_tenant() tenant_id, timeline_id = env.neon_cli.create_tenant()
@@ -344,44 +334,6 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
pageserver_http.timeline_gc(tenant_id, timeline_id, 0) pageserver_http.timeline_gc(tenant_id, timeline_id, 0)
# Creates a tenant, and detaches it with extra paremeter that forces ignored tenant detach.
# Tenant should be detached without issues.
def test_tenant_detach_regular_tenant(neon_simple_env: NeonEnv):
env = neon_simple_env
client = env.pageserver.http_client()
# create a new tenant
tenant_id, _ = env.neon_cli.create_tenant()
env.pageserver.allowed_errors.extend(PERMIT_PAGE_SERVICE_ERRORS)
# assert tenant exists on disk
assert env.pageserver.tenant_dir(tenant_id).exists()
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
# we rely upon autocommit after each statement
endpoint.safe_psql_many(
queries=[
"CREATE TABLE t(key int primary key, value text)",
"INSERT INTO t SELECT generate_series(1,100000), 'payload'",
]
)
log.info("detaching regular tenant with detach ignored flag")
client.tenant_detach(tenant_id, True)
log.info("regular tenant detached without error")
# check that nothing is left on disk for deleted tenant
assert not env.pageserver.tenant_dir(tenant_id).exists()
# assert the tenant does not exists in the Pageserver
tenants_after_detach = [tenant["id"] for tenant in client.tenant_list()]
assert (
tenant_id not in tenants_after_detach
), f"Ignored and then detached tenant {tenant_id} should not be present in pageserver's memory"
def test_detach_while_attaching( def test_detach_while_attaching(
neon_env_builder: NeonEnvBuilder, neon_env_builder: NeonEnvBuilder,
): ):

View File

@@ -840,7 +840,7 @@ def test_ondemand_activation(neon_env_builder: NeonEnvBuilder):
# Detaching a stuck tenant should proceed promptly # Detaching a stuck tenant should proceed promptly
# (reproducer for https://github.com/neondatabase/neon/pull/6430) # (reproducer for https://github.com/neondatabase/neon/pull/6430)
env.pageserver.http_client().tenant_detach(detach_tenant_id, timeout_secs=10) env.pageserver.http_client().tenant_detach(detach_tenant_id)
tenant_ids.remove(detach_tenant_id) tenant_ids.remove(detach_tenant_id)
# FIXME: currently the mechanism for cancelling attach is to set state to broken, which is reported spuriously at error level # FIXME: currently the mechanism for cancelling attach is to set state to broken, which is reported spuriously at error level
env.pageserver.allowed_errors.append( env.pageserver.allowed_errors.append(

View File

@@ -37,7 +37,7 @@ def test_walredo_not_left_behind_on_detach(neon_env_builder: NeonEnvBuilder):
expected_exception=PageserverApiException, expected_exception=PageserverApiException,
match=f"NotFound: tenant {tenant_id}", match=f"NotFound: tenant {tenant_id}",
): ):
pageserver_http.tenant_detach(tenant_id) pageserver_http.tenant_status(tenant_id)
# create new nenant # create new nenant
tenant_id, _ = env.neon_cli.create_tenant() tenant_id, _ = env.neon_cli.create_tenant()