mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-21 23:20:40 +00:00
storcon: track preferred AZ for each tenant shard (#8937)
## Problem We want to do AZ aware scheduling, but don't have enough metadata. ## Summary of changes Introduce a `preferred_az_id` concept for each managed tenant shard. In a future PR, the scheduler will use this as a soft preference. The idea is to try and keep the shard attachments within the same AZ. Under the assumption that the compute was placed in the correct AZ, this reduces the chances of cross AZ trafic from between compute and PS. In terms of code changes we: 1. Add a new nullable `preferred_az_id` column to the `tenant_shards` table. Also include an in-memory counterpart. 2. Populate the preferred az on tenant creation and shard splits. 3. Add an endpoint which allows to bulk-set preferred AZs. (3) gives us the migration path. I'll write a script which queries the cplane db in the region and sets the preferred az of all shards with an active compute to the AZ of said compute. For shards without an active compute, I'll use the AZ of the currently attached pageserver since this is what cplane uses now to schedule computes.
This commit is contained in:
@@ -105,6 +105,7 @@ pub(crate) enum DatabaseOperation {
|
||||
ListMetadataHealthOutdated,
|
||||
GetLeader,
|
||||
UpdateLeader,
|
||||
SetPreferredAzs,
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
@@ -664,6 +665,33 @@ impl Persistence {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn set_tenant_shard_preferred_azs(
|
||||
&self,
|
||||
preferred_azs: Vec<(TenantShardId, String)>,
|
||||
) -> DatabaseResult<Vec<(TenantShardId, String)>> {
|
||||
use crate::schema::tenant_shards::dsl::*;
|
||||
|
||||
self.with_measured_conn(DatabaseOperation::SetPreferredAzs, move |conn| {
|
||||
let mut shards_updated = Vec::default();
|
||||
|
||||
for (tenant_shard_id, preferred_az) in preferred_azs.iter() {
|
||||
let updated = diesel::update(tenant_shards)
|
||||
.filter(tenant_id.eq(tenant_shard_id.tenant_id.to_string()))
|
||||
.filter(shard_number.eq(tenant_shard_id.shard_number.0 as i32))
|
||||
.filter(shard_count.eq(tenant_shard_id.shard_count.literal() as i32))
|
||||
.set(preferred_az_id.eq(preferred_az))
|
||||
.execute(conn)?;
|
||||
|
||||
if updated == 1 {
|
||||
shards_updated.push((*tenant_shard_id, preferred_az.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(shards_updated)
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub(crate) async fn detach(&self, tenant_shard_id: TenantShardId) -> anyhow::Result<()> {
|
||||
use crate::schema::tenant_shards::dsl::*;
|
||||
self.with_measured_conn(DatabaseOperation::Detach, move |conn| {
|
||||
@@ -1050,6 +1078,11 @@ pub(crate) struct TenantShardPersistence {
|
||||
pub(crate) config: String,
|
||||
#[serde(default)]
|
||||
pub(crate) scheduling_policy: String,
|
||||
|
||||
// Hint that we should attempt to schedule this tenant shard the given
|
||||
// availability zone in order to minimise the chances of cross-AZ communication
|
||||
// with compute.
|
||||
pub(crate) preferred_az_id: Option<String>,
|
||||
}
|
||||
|
||||
impl TenantShardPersistence {
|
||||
|
||||
Reference in New Issue
Block a user