neon_local: add "tenant import" (#7399)

## Problem

Sometimes we have test data in the form of S3 contents that we would
like to run live in a neon_local environment.

## Summary of changes

- Add a storage controller API that imports an existing tenant.
Currently this is equivalent to doing a create with a high generation
number, but in future this would be something smarter to probe S3 to
find the shards in a tenant and find generation numbers.
- Add a `neon_local` command that invokes the import API, and then
inspects timelines in the newly attached tenant to create matching
branches.
This commit is contained in:
John Spray
2024-04-29 08:52:18 +01:00
committed by GitHub
parent 3695a1efa1
commit b655c7030f
15 changed files with 481 additions and 53 deletions

View File

@@ -522,6 +522,18 @@ async fn handle_tenant_drop(req: Request<Body>) -> Result<Response<Body>, ApiErr
json_response(StatusCode::OK, state.service.tenant_drop(tenant_id).await?)
}
async fn handle_tenant_import(req: Request<Body>) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
check_permissions(&req, Scope::PageServerApi)?;
let state = get_state(&req);
json_response(
StatusCode::OK,
state.service.tenant_import(tenant_id).await?,
)
}
async fn handle_tenants_dump(req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
@@ -759,6 +771,13 @@ pub fn make_router(
.post("/debug/v1/node/:node_id/drop", |r| {
named_request_span(r, handle_node_drop, RequestName("debug_v1_node_drop"))
})
.post("/debug/v1/tenant/:tenant_id/import", |r| {
named_request_span(
r,
handle_tenant_import,
RequestName("debug_v1_tenant_import"),
)
})
.get("/debug/v1/tenant", |r| {
named_request_span(r, handle_tenants_dump, RequestName("debug_v1_tenant"))
})

View File

@@ -1,13 +1,14 @@
use pageserver_api::{
models::{
LocationConfig, LocationConfigListResponse, PageserverUtilization, SecondaryProgress,
TenantShardSplitRequest, TenantShardSplitResponse, TimelineCreateRequest, TimelineInfo,
TenantScanRemoteStorageResponse, TenantShardSplitRequest, TenantShardSplitResponse,
TimelineCreateRequest, TimelineInfo,
},
shard::TenantShardId,
};
use pageserver_client::mgmt_api::{Client, Result};
use reqwest::StatusCode;
use utils::id::{NodeId, TimelineId};
use utils::id::{NodeId, TenantId, TimelineId};
/// Thin wrapper around [`pageserver_client::mgmt_api::Client`]. It allows the storage
/// controller to collect metrics in a non-intrusive manner.
@@ -88,6 +89,18 @@ impl PageserverClient {
)
}
pub(crate) async fn tenant_scan_remote_storage(
&self,
tenant_id: TenantId,
) -> Result<TenantScanRemoteStorageResponse> {
measured_request!(
"tenant_scan_remote_storage",
crate::metrics::Method::Get,
&self.node_id_label,
self.inner.tenant_scan_remote_storage(tenant_id).await
)
}
pub(crate) async fn tenant_secondary_download(
&self,
tenant_id: TenantShardId,

View File

@@ -110,6 +110,42 @@ struct ServiceState {
delayed_reconcile_rx: tokio::sync::mpsc::Receiver<TenantShardId>,
}
/// Transform an error from a pageserver into an error to return to callers of a storage
/// controller API.
fn passthrough_api_error(node: &Node, e: mgmt_api::Error) -> ApiError {
match e {
mgmt_api::Error::ReceiveErrorBody(str) => {
// Presume errors receiving body are connectivity/availability issues
ApiError::ResourceUnavailable(
format!("{node} error receiving error body: {str}").into(),
)
}
mgmt_api::Error::ReceiveBody(str) => {
// Presume errors receiving body are connectivity/availability issues
ApiError::ResourceUnavailable(format!("{node} error receiving body: {str}").into())
}
mgmt_api::Error::ApiError(StatusCode::NOT_FOUND, msg) => {
ApiError::NotFound(anyhow::anyhow!(format!("{node}: {msg}")).into())
}
mgmt_api::Error::ApiError(StatusCode::SERVICE_UNAVAILABLE, msg) => {
ApiError::ResourceUnavailable(format!("{node}: {msg}").into())
}
mgmt_api::Error::ApiError(status @ StatusCode::UNAUTHORIZED, msg)
| mgmt_api::Error::ApiError(status @ StatusCode::FORBIDDEN, msg) => {
// Auth errors talking to a pageserver are not auth errors for the caller: they are
// internal server errors, showing that something is wrong with the pageserver or
// storage controller's auth configuration.
ApiError::InternalServerError(anyhow::anyhow!("{node} {status}: {msg}"))
}
mgmt_api::Error::ApiError(status, msg) => {
// Presume general case of pageserver API errors is that we tried to do something
// that can't be done right now.
ApiError::Conflict(format!("{node} {status}: {status} {msg}"))
}
mgmt_api::Error::Cancelled => ApiError::ShuttingDown,
}
}
impl ServiceState {
fn new(
nodes: HashMap<NodeId, Node>,
@@ -2519,17 +2555,7 @@ impl Service {
client
.timeline_create(tenant_shard_id, &create_req)
.await
.map_err(|e| match e {
mgmt_api::Error::ApiError(status, msg)
if status == StatusCode::INTERNAL_SERVER_ERROR
|| status == StatusCode::NOT_ACCEPTABLE =>
{
// TODO: handle more error codes, e.g. 503 should be passed through. Make a general wrapper
// for pass-through API calls.
ApiError::InternalServerError(anyhow::anyhow!(msg))
}
_ => ApiError::Conflict(format!("Failed to create timeline: {e}")),
})
.map_err(|e| passthrough_api_error(&node, e))
}
// Because the caller might not provide an explicit LSN, we must do the creation first on a single shard, and then
@@ -3654,6 +3680,88 @@ impl Service {
Ok(())
}
/// This is for debug/support only: assuming tenant data is already present in S3, we "create" a
/// tenant with a very high generation number so that it will see the existing data.
pub(crate) async fn tenant_import(
&self,
tenant_id: TenantId,
) -> Result<TenantCreateResponse, ApiError> {
// Pick an arbitrary available pageserver to use for scanning the tenant in remote storage
let maybe_node = {
self.inner
.read()
.unwrap()
.nodes
.values()
.find(|n| n.is_available())
.cloned()
};
let Some(node) = maybe_node else {
return Err(ApiError::BadRequest(anyhow::anyhow!("No nodes available")));
};
let client = PageserverClient::new(
node.get_id(),
node.base_url(),
self.config.jwt_token.as_deref(),
);
let scan_result = client
.tenant_scan_remote_storage(tenant_id)
.await
.map_err(|e| passthrough_api_error(&node, e))?;
// A post-split tenant may contain a mixture of shard counts in remote storage: pick the highest count.
let Some(shard_count) = scan_result
.shards
.iter()
.map(|s| s.tenant_shard_id.shard_count)
.max()
else {
return Err(ApiError::NotFound(
anyhow::anyhow!("No shards found").into(),
));
};
// Ideally we would set each newly imported shard's generation independently, but for correctness it is sufficient
// to
let generation = scan_result
.shards
.iter()
.map(|s| s.generation)
.max()
.expect("We already validated >0 shards");
// FIXME: we have no way to recover the shard stripe size from contents of remote storage: this will
// only work if they were using the default stripe size.
let stripe_size = ShardParameters::DEFAULT_STRIPE_SIZE;
let (response, waiters) = self
.do_tenant_create(TenantCreateRequest {
new_tenant_id: TenantShardId::unsharded(tenant_id),
generation,
shard_parameters: ShardParameters {
count: shard_count,
stripe_size,
},
placement_policy: Some(PlacementPolicy::Attached(0)), // No secondaries, for convenient debug/hacking
// There is no way to know what the tenant's config was: revert to defaults
config: TenantConfig::default(),
})
.await?;
if let Err(e) = self.await_waiters(waiters, SHORT_RECONCILE_TIMEOUT).await {
// Since this is a debug/support operation, all kinds of weird issues are possible (e.g. this
// tenant doesn't exist in the control plane), so don't fail the request if it can't fully
// reconcile, as reconciliation includes notifying compute.
tracing::warn!(%tenant_id, "Reconcile not done yet while importing tenant ({e})");
}
Ok(response)
}
/// For debug/support: a full JSON dump of TenantShards. Returns a response so that
/// we don't have to make TenantShard clonable in the return path.
pub(crate) fn tenants_dump(&self) -> Result<hyper::Response<hyper::Body>, ApiError> {