Compare commits

...

4 Commits

Author SHA1 Message Date
John Spray
6686367452 wip 2024-02-28 14:22:01 +00:00
John Spray
9085b0b7a2 control_plane: update shard config from location_config 2024-02-28 12:06:03 +00:00
John Spray
24b01c1652 control_plane: let reconciler avoid bumping generation when changing config 2024-02-27 11:50:04 +00:00
John Spray
ee558cb77b WIP test 2024-02-27 00:31:33 +00:00
5 changed files with 80 additions and 7 deletions

View File

@@ -440,15 +440,46 @@ impl Reconciler {
// Nothing to do
tracing::info!(%node_id, "Observed configuration already correct.")
}
_ => {
observed => {
// In all cases other than a matching observed configuration, we will
// reconcile this location. This includes locations with different configurations, as well
// as locations with unknown (None) observed state.
self.generation = self
.persistence
.increment_generation(self.tenant_shard_id, node_id)
.await?;
wanted_conf.generation = self.generation.into();
// The general case is to increment the generation. However, there are cases
// where this is not necessary:
// - if we are only updating the TenantConf part of the location
// - if we are only changing the attachment mode (e.g. going to attachedmulti or attachedstale)
// and the location was already in the correct generation
let increment_generation = match observed {
None => true,
Some(ObservedStateLocation { conf: None }) => true,
Some(ObservedStateLocation {
conf: Some(observed),
}) => {
// If mode and generation are the same, it follows that only the configuration has changed
let config_update = observed.generation == wanted_conf.generation
&& observed.mode == wanted_conf.mode;
// Usually the short-lived attachment modes (multi and stale) are only used
// in the case of [`Self::live_migrate`], but it is simple to handle them correctly
// here too. Locations are allowed to go Single->Stale and Multi->Single within the same generation.
let mode_transition = observed.generation == wanted_conf.generation
&& ((observed.mode == LocationConfigMode::AttachedSingle
&& wanted_conf.mode == LocationConfigMode::AttachedStale)
|| (observed.mode == LocationConfigMode::AttachedMulti
&& wanted_conf.mode == LocationConfigMode::AttachedSingle));
!(config_update || mode_transition)
}
};
if increment_generation {
self.generation = self
.persistence
.increment_generation(self.tenant_shard_id, node_id)
.await?;
wanted_conf.generation = self.generation.into();
}
tracing::info!(%node_id, "Observed configuration requires update.");
self.location_config(node_id, wanted_conf, None).await?;
self.compute_notify().await?;

View File

@@ -1273,6 +1273,11 @@ impl Service {
}
}
// TODO: we need to recognize that this is an update, issue a DB
// transaction to update the shards, and _then_ apply in memory. Need
// the tenant equivalent of node_configure()
shard.config = req.config.tenant_conf.clone();
shard.schedule(scheduler)?;
let maybe_waiter = shard.maybe_reconcile(

View File

@@ -615,6 +615,10 @@ impl TenantState {
return None;
};
// Advance the sequence before spawning a reconciler, so that sequence waiters
// can distinguish between before+after the reconcile completes.
self.sequence = self.sequence.next();
let reconciler_cancel = cancel.child_token();
let mut reconciler = Reconciler {
tenant_shard_id: self.tenant_shard_id,

View File

@@ -289,6 +289,14 @@ pub struct TenantConfig {
pub timeline_get_throttle: Option<ThrottleConfig>,
}
impl TenantConfig {
pub fn merge(&mut self, other: &TenantConfig) {
if let Some(max_lsn_wal_lag) = &other.max_lsn_wal_lag {
self.max_lsn_wal_lag = Some(*max_lsn_wal_lag);
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "kind")]
pub enum EvictionPolicy {

View File

@@ -9,7 +9,7 @@ from fixtures.neon_fixtures import (
NeonEnvBuilder,
PgBin,
)
from fixtures.pageserver.http import PageserverHttpClient
from fixtures.pageserver.http import PageserverHttpClient, TenantConfig
from fixtures.pageserver.utils import (
MANY_SMALL_LAYERS_TENANT_CONFIG,
enable_remote_storage_versioning,
@@ -362,6 +362,31 @@ def test_sharding_service_onboarding(
dest_ps.stop()
dest_ps.start()
# Having onboarded via /location_config, we should also be able to update the
# TenantConf part of LocationConf, without inadvertently resetting the generation
modified_tenant_conf = {"max_lsn_wal_lag": 1024 * 1024 * 1024 * 100}
dest_tenant_before_conf_change = dest_ps.http_client().tenant_status(tenant_id)
# The generation has moved on since we onboarded
assert generation != dest_tenant_before_conf_change["generation"]
virtual_ps_http.tenant_location_conf(
tenant_id,
{
"mode": "AttachedSingle",
"secondary_conf": None,
"tenant_conf": modified_tenant_conf,
# This is intentionally a stale generation
"generation": generation,
},
)
dest_tenant_after_conf_change = dest_ps.http_client().tenant_status(tenant_id)
assert (
dest_tenant_after_conf_change["generation"] == dest_tenant_before_conf_change["generation"]
)
dest_tenant_conf_after = dest_ps.http_client().tenant_config(tenant_id)
assert dest_tenant_conf_after.tenant_specific_overrides == modified_tenant_conf
env.attachment_service.consistency_check()