storage controller: make direct tenant creation more robust (#7247)

## Problem

- Creations were not idempotent (unique key violation)
- Creations waited for reconciliation, which control plane blocks while
an operation is in flight

## Summary of changes

- Handle unique key constraint violation as an OK situation: if we're
creating the same tenant ID and shard count, it's reasonable to assume
this is a duplicate creation.
- Make the wait for reconcile during creation tolerate failures: this is
similar to location_conf, where the cloud control plane blocks our
notification calls until it is done with calling into our API (in future
this constraint is expected to relax as the cloud control plane learns
to run multiple operations concurrently for a tenant)
This commit is contained in:
John Spray
2024-03-26 16:57:35 +00:00
committed by GitHub
parent 47d2b3a483
commit b3bb1d1cad
3 changed files with 33 additions and 6 deletions

View File

@@ -1523,6 +1523,8 @@ impl Service {
&self,
create_req: TenantCreateRequest,
) -> Result<TenantCreateResponse, ApiError> {
let tenant_id = create_req.new_tenant_id.tenant_id;
// Exclude any concurrent attempts to create/access the same tenant ID
let _tenant_lock = self
.tenant_op_locks
@@ -1531,7 +1533,12 @@ impl Service {
let (response, waiters) = self.do_tenant_create(create_req).await?;
self.await_waiters(waiters, SHORT_RECONCILE_TIMEOUT).await?;
if let Err(e) = self.await_waiters(waiters, SHORT_RECONCILE_TIMEOUT).await {
// Avoid deadlock: reconcile may fail while notifying compute, if the cloud control plane refuses to
// accept compute notifications while it is in the process of creating. Reconciliation will
// be retried in the background.
tracing::warn!(%tenant_id, "Reconcile not done yet while creating tenant ({e})");
}
Ok(response)
}
@@ -1610,13 +1617,25 @@ impl Service {
splitting: SplitState::default(),
})
.collect();
self.persistence
match self
.persistence
.insert_tenant_shards(persist_tenant_shards)
.await
.map_err(|e| {
// TODO: distinguish primary key constraint (idempotent, OK), from other errors
ApiError::InternalServerError(anyhow::anyhow!(e))
})?;
{
Ok(_) => {}
Err(DatabaseError::Query(diesel::result::Error::DatabaseError(
DatabaseErrorKind::UniqueViolation,
_,
))) => {
// Unique key violation: this is probably a retry. Because the shard count is part of the unique key,
// if we see a unique key violation it means that the creation request's shard count matches the previous
// creation's shard count.
tracing::info!("Tenant shards already present in database, proceeding with idempotent creation...");
}
// Any other database error is unexpected and a bug.
Err(e) => return Err(ApiError::InternalServerError(anyhow::anyhow!(e))),
};
let (waiters, response_shards) = {
let mut locked = self.inner.write().unwrap();

View File

@@ -2126,6 +2126,8 @@ class NeonStorageController(MetricsGetter):
shard_params = {"count": shard_count}
if shard_stripe_size is not None:
shard_params["stripe_size"] = shard_stripe_size
else:
shard_params["stripe_size"] = 32768
body["shard_parameters"] = shard_params
@@ -2139,6 +2141,7 @@ class NeonStorageController(MetricsGetter):
json=body,
headers=self.headers(TokenScope.PAGE_SERVER_API),
)
response.raise_for_status()
log.info(f"tenant_create success: {response.json()}")
def locate(self, tenant_id: TenantId) -> list[dict[str, Any]]:

View File

@@ -89,6 +89,11 @@ def test_sharding_service_smoke(
for tid in tenant_ids:
env.neon_cli.create_tenant(tid, shard_count=shards_per_tenant)
# Repeating a creation should be idempotent (we are just testing it doesn't return an error)
env.storage_controller.tenant_create(
tenant_id=next(iter(tenant_ids)), shard_count=shards_per_tenant
)
for node_id, count in get_node_shard_counts(env, tenant_ids).items():
# we used a multiple of pagservers for the total shard count,
# so expect equal number on all pageservers