storcon: handle fills including high tput tenants more gracefully (#8865)

## Problem
A tenant may ingest a lot of data between being drained for node restart
and being moved back
in the fill phase. This is expensive and causes the fill to stall. 

## Summary of changes
We make a tactical change to reduce secondary warm-up time for
migrations in fills.
This commit is contained in:
Vlad Lazar
2024-09-05 09:56:26 +01:00
committed by GitHub
parent 99fa1c3600
commit 708322ce3c

View File

@@ -6297,9 +6297,13 @@ impl Service {
node_id: NodeId,
cancel: CancellationToken,
) -> Result<(), OperationError> {
// TODO(vlad): Currently this operates on the assumption that all
// secondaries are warm. This is not always true (e.g. we just migrated the
// tenant). Take that into consideration by checking the secondary status.
const SECONDARY_WARMUP_TIMEOUT: Duration = Duration::from_secs(20);
const SECONDARY_DOWNLOAD_REQUEST_TIMEOUT: Duration = Duration::from_secs(5);
let reconciler_config = ReconcilerConfigBuilder::new()
.secondary_warmup_timeout(SECONDARY_WARMUP_TIMEOUT)
.secondary_download_request_timeout(SECONDARY_DOWNLOAD_REQUEST_TIMEOUT)
.build();
let mut tids_to_promote = self.fill_node_plan(node_id);
let mut waiters = Vec::new();
@@ -6367,9 +6371,11 @@ impl Service {
node_id
);
if let Some(waiter) =
self.maybe_reconcile_shard(tenant_shard, nodes)
{
if let Some(waiter) = self.maybe_configured_reconcile_shard(
tenant_shard,
nodes,
reconciler_config,
) {
waiters.push(waiter);
}
}