storcon: allow for more concurrency in drain/fill operations (#10093)

## Problem

We saw the drain/fill operations not drain fast enough in ap-southeast.

## Summary of changes

These are some quick changes to speed it up:
* double reconcile concurrency - this is now half of the available
reconcile bandwidth
* reduce the waiter polling timeout - this way we can spawn new
reconciliations faster
This commit is contained in:
Vlad Lazar
2024-12-11 19:43:40 +00:00
committed by GitHub
parent a3e80448e8
commit e8395807a5
3 changed files with 6 additions and 4 deletions

View File

@@ -3,7 +3,7 @@ use std::{borrow::Cow, fmt::Debug, fmt::Display};
use tokio_util::sync::CancellationToken;
use utils::id::NodeId;
pub(crate) const MAX_RECONCILES_PER_OPERATION: usize = 32;
pub(crate) const MAX_RECONCILES_PER_OPERATION: usize = 64;
#[derive(Copy, Clone)]
pub(crate) struct Drain {

View File

@@ -100,6 +100,8 @@ use crate::{
use context_iterator::TenantShardContextIterator;
const WAITER_FILL_DRAIN_POLL_TIMEOUT: Duration = Duration::from_millis(500);
// For operations that should be quick, like attaching a new tenant
const SHORT_RECONCILE_TIMEOUT: Duration = Duration::from_secs(5);
@@ -6798,7 +6800,7 @@ impl Service {
}
waiters = self
.await_waiters_remainder(waiters, SHORT_RECONCILE_TIMEOUT)
.await_waiters_remainder(waiters, WAITER_FILL_DRAIN_POLL_TIMEOUT)
.await;
failpoint_support::sleep_millis_async!("sleepy-drain-loop", &cancel);
@@ -7051,7 +7053,7 @@ impl Service {
}
waiters = self
.await_waiters_remainder(waiters, SHORT_RECONCILE_TIMEOUT)
.await_waiters_remainder(waiters, WAITER_FILL_DRAIN_POLL_TIMEOUT)
.await;
}

View File

@@ -2136,7 +2136,7 @@ def test_background_operation_cancellation(neon_env_builder: NeonEnvBuilder):
env.start()
tenant_count = 10
shard_count_per_tenant = 8
shard_count_per_tenant = 16
tenant_ids = []
for _ in range(0, tenant_count):