mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-10 15:02:56 +00:00
storage controller: fix spurious reconciles after pageserver restarts (#6814)
## Problem When investigating test failures (https://github.com/neondatabase/neon/issues/6813) I noticed we were doing a bunch of Reconciler runs right after splitting a tenant. It's because the splitting test does a pageserver restart, and there was a bug in /re-attach handling, where we would update the generation correctly in the database and intent state, but not observed state, thereby triggering a reconciliation on the next call to maybe_reconcile. This didn't break anything profound (underlying rules about generations were respected), but caused the storage controller to do an un-needed extra round of bumping the generation and reconciling. ## Summary of changes - Start adding metrics to the storage controller - Assert on the number of reconciles done in test_sharding_split_smoke - Fix /re-attach to update `observed` such that we don't spuriously re-reconcile tenants.
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -286,6 +286,7 @@ dependencies = [
|
||||
"git-version",
|
||||
"hyper",
|
||||
"metrics",
|
||||
"once_cell",
|
||||
"pageserver_api",
|
||||
"pageserver_client",
|
||||
"postgres_connection",
|
||||
|
||||
@@ -18,6 +18,7 @@ clap.workspace = true
|
||||
futures.workspace = true
|
||||
git-version.workspace = true
|
||||
hyper.workspace = true
|
||||
once_cell.workspace = true
|
||||
pageserver_api.workspace = true
|
||||
pageserver_client.workspace = true
|
||||
postgres_connection.workspace = true
|
||||
|
||||
@@ -3,6 +3,7 @@ use utils::seqwait::MonotonicCounter;
|
||||
|
||||
mod compute_hook;
|
||||
pub mod http;
|
||||
pub mod metrics;
|
||||
mod node;
|
||||
pub mod persistence;
|
||||
mod reconciler;
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
///
|
||||
use anyhow::{anyhow, Context};
|
||||
use attachment_service::http::make_router;
|
||||
use attachment_service::metrics::preinitialize_metrics;
|
||||
use attachment_service::persistence::Persistence;
|
||||
use attachment_service::service::{Config, Service};
|
||||
use aws_config::{self, BehaviorVersion, Region};
|
||||
@@ -205,6 +206,8 @@ async fn async_main() -> anyhow::Result<()> {
|
||||
logging::Output::Stdout,
|
||||
)?;
|
||||
|
||||
preinitialize_metrics();
|
||||
|
||||
let args = Cli::parse();
|
||||
tracing::info!(
|
||||
"version: {}, launch_timestamp: {}, build_tag {}, state at {}, listening on {}",
|
||||
|
||||
32
control_plane/attachment_service/src/metrics.rs
Normal file
32
control_plane/attachment_service/src/metrics.rs
Normal file
@@ -0,0 +1,32 @@
|
||||
use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
pub(crate) struct ReconcilerMetrics {
|
||||
pub(crate) spawned: IntCounter,
|
||||
pub(crate) complete: IntCounterVec,
|
||||
}
|
||||
|
||||
impl ReconcilerMetrics {
|
||||
// Labels used on [`Self::complete`]
|
||||
pub(crate) const SUCCESS: &'static str = "ok";
|
||||
pub(crate) const ERROR: &'static str = "success";
|
||||
pub(crate) const CANCEL: &'static str = "cancel";
|
||||
}
|
||||
|
||||
pub(crate) static RECONCILER: Lazy<ReconcilerMetrics> = Lazy::new(|| ReconcilerMetrics {
|
||||
spawned: register_int_counter!(
|
||||
"storage_controller_reconcile_spawn",
|
||||
"Count of how many times we spawn a reconcile task",
|
||||
)
|
||||
.expect("failed to define a metric"),
|
||||
complete: register_int_counter_vec!(
|
||||
"storage_controller_reconcile_complete",
|
||||
"Reconciler tasks completed, broken down by success/failure/cancelled",
|
||||
&["status"],
|
||||
)
|
||||
.expect("failed to define a metric"),
|
||||
});
|
||||
|
||||
pub fn preinitialize_metrics() {
|
||||
Lazy::force(&RECONCILER);
|
||||
}
|
||||
@@ -92,6 +92,8 @@ impl TargetState {
|
||||
pub(crate) enum ReconcileError {
|
||||
#[error(transparent)]
|
||||
Notify(#[from] NotifyError),
|
||||
#[error("Cancelled")]
|
||||
Cancel,
|
||||
#[error(transparent)]
|
||||
Other(#[from] anyhow::Error),
|
||||
}
|
||||
@@ -497,6 +499,9 @@ impl Reconciler {
|
||||
}
|
||||
|
||||
for (node_id, conf) in changes {
|
||||
if self.cancel.is_cancelled() {
|
||||
return Err(ReconcileError::Cancel);
|
||||
}
|
||||
self.location_config(node_id, conf, None).await?;
|
||||
}
|
||||
|
||||
|
||||
@@ -807,6 +807,15 @@ impl Service {
|
||||
};
|
||||
|
||||
shard_state.generation = std::cmp::max(shard_state.generation, new_gen);
|
||||
if let Some(observed) = shard_state
|
||||
.observed
|
||||
.locations
|
||||
.get_mut(&reattach_req.node_id)
|
||||
{
|
||||
if let Some(conf) = observed.conf.as_mut() {
|
||||
conf.generation = new_gen.into();
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: cancel/restart any running reconciliation for this tenant, it might be trying
|
||||
// to call location_conf API with an old generation. Wait for cancellation to complete
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use std::{collections::HashMap, sync::Arc, time::Duration};
|
||||
|
||||
use crate::metrics;
|
||||
use control_plane::attachment_service::NodeAvailability;
|
||||
use pageserver_api::{
|
||||
models::{LocationConfig, LocationConfigMode, TenantConfig},
|
||||
@@ -570,6 +571,7 @@ impl TenantState {
|
||||
let reconciler_span = tracing::info_span!(parent: None, "reconciler", seq=%reconcile_seq,
|
||||
tenant_id=%reconciler.tenant_shard_id.tenant_id,
|
||||
shard_id=%reconciler.tenant_shard_id.shard_slug());
|
||||
metrics::RECONCILER.spawned.inc();
|
||||
let join_handle = tokio::task::spawn(
|
||||
async move {
|
||||
// Wait for any previous reconcile task to complete before we start
|
||||
@@ -586,6 +588,10 @@ impl TenantState {
|
||||
// TODO: wrap all remote API operations in cancellation check
|
||||
// as well.
|
||||
if reconciler.cancel.is_cancelled() {
|
||||
metrics::RECONCILER
|
||||
.complete
|
||||
.with_label_values(&[metrics::ReconcilerMetrics::CANCEL])
|
||||
.inc();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -599,6 +605,20 @@ impl TenantState {
|
||||
reconciler.compute_notify().await.ok();
|
||||
}
|
||||
|
||||
// Update result counter
|
||||
match &result {
|
||||
Ok(_) => metrics::RECONCILER
|
||||
.complete
|
||||
.with_label_values(&[metrics::ReconcilerMetrics::SUCCESS]),
|
||||
Err(ReconcileError::Cancel) => metrics::RECONCILER
|
||||
.complete
|
||||
.with_label_values(&[metrics::ReconcilerMetrics::CANCEL]),
|
||||
Err(_) => metrics::RECONCILER
|
||||
.complete
|
||||
.with_label_values(&[metrics::ReconcilerMetrics::ERROR]),
|
||||
}
|
||||
.inc();
|
||||
|
||||
result_tx
|
||||
.send(ReconcileResult {
|
||||
sequence: reconcile_seq,
|
||||
|
||||
@@ -4,6 +4,8 @@ from typing import Dict, List, Optional, Tuple
|
||||
from prometheus_client.parser import text_string_to_metric_families
|
||||
from prometheus_client.samples import Sample
|
||||
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
class Metrics:
|
||||
metrics: Dict[str, List[Sample]]
|
||||
@@ -31,6 +33,55 @@ class Metrics:
|
||||
return res[0]
|
||||
|
||||
|
||||
class MetricsGetter:
|
||||
"""
|
||||
Mixin for types that implement a `get_metrics` function and would like associated
|
||||
helpers for querying the metrics
|
||||
"""
|
||||
|
||||
def get_metrics(self) -> Metrics:
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_metric_value(
|
||||
self, name: str, filter: Optional[Dict[str, str]] = None
|
||||
) -> Optional[float]:
|
||||
metrics = self.get_metrics()
|
||||
results = metrics.query_all(name, filter=filter)
|
||||
if not results:
|
||||
log.info(f'could not find metric "{name}"')
|
||||
return None
|
||||
assert len(results) == 1, f"metric {name} with given filters is not unique, got: {results}"
|
||||
return results[0].value
|
||||
|
||||
def get_metrics_values(
|
||||
self, names: list[str], filter: Optional[Dict[str, str]] = None
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
When fetching multiple named metrics, it is more efficient to use this
|
||||
than to call `get_metric_value` repeatedly.
|
||||
|
||||
Throws RuntimeError if no metrics matching `names` are found, or if
|
||||
not all of `names` are found: this method is intended for loading sets
|
||||
of metrics whose existence is coupled.
|
||||
"""
|
||||
metrics = self.get_metrics()
|
||||
samples = []
|
||||
for name in names:
|
||||
samples.extend(metrics.query_all(name, filter=filter))
|
||||
|
||||
result = {}
|
||||
for sample in samples:
|
||||
if sample.name in result:
|
||||
raise RuntimeError(f"Multiple values found for {sample.name}")
|
||||
result[sample.name] = sample.value
|
||||
|
||||
if len(result) != len(names):
|
||||
log.info(f"Metrics found: {metrics.metrics}")
|
||||
raise RuntimeError(f"could not find all metrics {' '.join(names)}")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def parse_metrics(text: str, name: str = "") -> Metrics:
|
||||
metrics = Metrics(name)
|
||||
gen = text_string_to_metric_families(text)
|
||||
|
||||
@@ -46,6 +46,7 @@ from urllib3.util.retry import Retry
|
||||
from fixtures import overlayfs
|
||||
from fixtures.broker import NeonBroker
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.metrics import Metrics, MetricsGetter, parse_metrics
|
||||
from fixtures.pageserver.allowed_errors import (
|
||||
DEFAULT_PAGESERVER_ALLOWED_ERRORS,
|
||||
scan_pageserver_log_for_errors,
|
||||
@@ -1913,7 +1914,7 @@ class Pagectl(AbstractNeonCli):
|
||||
return IndexPartDump.from_json(parsed)
|
||||
|
||||
|
||||
class NeonAttachmentService:
|
||||
class NeonAttachmentService(MetricsGetter):
|
||||
def __init__(self, env: NeonEnv, auth_enabled: bool):
|
||||
self.env = env
|
||||
self.running = False
|
||||
@@ -1951,6 +1952,11 @@ class NeonAttachmentService:
|
||||
|
||||
return headers
|
||||
|
||||
def get_metrics(self) -> Metrics:
|
||||
res = self.request("GET", f"{self.env.attachment_service_api}/metrics")
|
||||
res.raise_for_status()
|
||||
return parse_metrics(res.text)
|
||||
|
||||
def ready(self) -> bool:
|
||||
resp = self.request("GET", f"{self.env.attachment_service_api}/ready")
|
||||
if resp.status_code == 503:
|
||||
|
||||
@@ -12,7 +12,7 @@ from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.metrics import Metrics, parse_metrics
|
||||
from fixtures.metrics import Metrics, MetricsGetter, parse_metrics
|
||||
from fixtures.pg_version import PgVersion
|
||||
from fixtures.types import Lsn, TenantId, TenantShardId, TimelineId
|
||||
from fixtures.utils import Fn
|
||||
@@ -125,7 +125,7 @@ class TenantConfig:
|
||||
)
|
||||
|
||||
|
||||
class PageserverHttpClient(requests.Session):
|
||||
class PageserverHttpClient(requests.Session, MetricsGetter):
|
||||
def __init__(
|
||||
self,
|
||||
port: int,
|
||||
@@ -721,45 +721,6 @@ class PageserverHttpClient(requests.Session):
|
||||
assert len(matches) < 2, "above filter should uniquely identify metric"
|
||||
return value
|
||||
|
||||
def get_metric_value(
|
||||
self, name: str, filter: Optional[Dict[str, str]] = None
|
||||
) -> Optional[float]:
|
||||
metrics = self.get_metrics()
|
||||
results = metrics.query_all(name, filter=filter)
|
||||
if not results:
|
||||
log.info(f'could not find metric "{name}"')
|
||||
return None
|
||||
assert len(results) == 1, f"metric {name} with given filters is not unique, got: {results}"
|
||||
return results[0].value
|
||||
|
||||
def get_metrics_values(
|
||||
self, names: list[str], filter: Optional[Dict[str, str]] = None
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
When fetching multiple named metrics, it is more efficient to use this
|
||||
than to call `get_metric_value` repeatedly.
|
||||
|
||||
Throws RuntimeError if no metrics matching `names` are found, or if
|
||||
not all of `names` are found: this method is intended for loading sets
|
||||
of metrics whose existence is coupled.
|
||||
"""
|
||||
metrics = self.get_metrics()
|
||||
samples = []
|
||||
for name in names:
|
||||
samples.extend(metrics.query_all(name, filter=filter))
|
||||
|
||||
result = {}
|
||||
for sample in samples:
|
||||
if sample.name in result:
|
||||
raise RuntimeError(f"Multiple values found for {sample.name}")
|
||||
result[sample.name] = sample.value
|
||||
|
||||
if len(result) != len(names):
|
||||
log.info(f"Metrics found: {metrics.metrics}")
|
||||
raise RuntimeError(f"could not find all metrics {' '.join(names)}")
|
||||
|
||||
return result
|
||||
|
||||
def layer_map_info(
|
||||
self,
|
||||
tenant_id: Union[TenantId, TenantShardId],
|
||||
|
||||
@@ -255,3 +255,26 @@ def test_sharding_split_smoke(
|
||||
env.neon_cli.tenant_migrate(migrate_shard, destination, timeout_secs=10)
|
||||
|
||||
workload.validate()
|
||||
|
||||
# Check that we didn't do any spurious reconciliations.
|
||||
# Total number of reconciles should have been one per original shard, plus
|
||||
# one for each shard that was migrated.
|
||||
reconcile_ok = env.attachment_service.get_metric_value(
|
||||
"storage_controller_reconcile_complete_total", filter={"status": "ok"}
|
||||
)
|
||||
assert reconcile_ok == shard_count + split_shard_count // 2
|
||||
|
||||
# Check that no cancelled or errored reconciliations occurred: this test does no
|
||||
# failure injection and should run clean.
|
||||
assert (
|
||||
env.attachment_service.get_metric_value(
|
||||
"storage_controller_reconcile_complete_total", filter={"status": "cancel"}
|
||||
)
|
||||
is None
|
||||
)
|
||||
assert (
|
||||
env.attachment_service.get_metric_value(
|
||||
"storage_controller_reconcile_complete_total", filter={"status": "error"}
|
||||
)
|
||||
is None
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user