mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-15 12:10:37 +00:00
Replace broker duration logs with metrics (#4370)
I've added logs for broker push duration after every iteration in https://github.com/neondatabase/neon/pull/4142. This log has not found any real issues, so we can replace it with metrics, to slightly reduce log volume. LogQL query found that pushes longer that 500ms happened only 90 times for the last month. https://neonprod.grafana.net/goto/KTNj9UwVg?orgId=1 `{unit="safekeeper.service"} |= "timeline updates to broker in" | regexp "to broker in (?P<duration>.*)" | duration > 500ms`
This commit is contained in:
committed by
GitHub
parent
daa79b150f
commit
210be6b6ab
@@ -19,8 +19,10 @@ use tokio::task::JoinHandle;
|
||||
use tokio::{runtime, time::sleep};
|
||||
use tracing::*;
|
||||
|
||||
use crate::metrics::BROKER_ITERATION_TIMELINES;
|
||||
use crate::metrics::BROKER_PULLED_UPDATES;
|
||||
use crate::metrics::BROKER_PUSHED_UPDATES;
|
||||
use crate::metrics::BROKER_PUSH_ALL_UPDATES_SECONDS;
|
||||
use crate::GlobalTimelines;
|
||||
use crate::SafeKeeperConf;
|
||||
|
||||
@@ -61,8 +63,14 @@ async fn push_loop(conf: SafeKeeperConf) -> anyhow::Result<()> {
|
||||
BROKER_PUSHED_UPDATES.inc();
|
||||
}
|
||||
let elapsed = now.elapsed();
|
||||
// Log duration every second. Should be about 10MB of logs per day.
|
||||
info!("pushed {} timeline updates to broker in {:?}", active_tlis.len(), elapsed);
|
||||
|
||||
BROKER_PUSH_ALL_UPDATES_SECONDS.observe(elapsed.as_secs_f64());
|
||||
BROKER_ITERATION_TIMELINES.observe(active_tlis.len() as f64);
|
||||
|
||||
if elapsed > push_interval / 2 {
|
||||
info!("broker push is too long, pushed {} timeline updates to broker in {:?}", active_tlis.len(), elapsed);
|
||||
}
|
||||
|
||||
sleep(push_interval).await;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -125,6 +125,25 @@ pub static BACKUP_ERRORS: Lazy<IntCounter> = Lazy::new(|| {
|
||||
)
|
||||
.expect("Failed to register safekeeper_backup_errors_total counter")
|
||||
});
|
||||
pub static BROKER_PUSH_ALL_UPDATES_SECONDS: Lazy<Histogram> = Lazy::new(|| {
|
||||
register_histogram!(
|
||||
"safekeeper_broker_push_update_seconds",
|
||||
"Seconds to push all timeline updates to the broker",
|
||||
DISK_WRITE_SECONDS_BUCKETS.to_vec()
|
||||
)
|
||||
.expect("Failed to register safekeeper_broker_push_update_seconds histogram vec")
|
||||
});
|
||||
pub const TIMELINES_COUNT_BUCKETS: &[f64] = &[
|
||||
1.0, 10.0, 50.0, 100.0, 200.0, 500.0, 1000.0, 2000.0, 5000.0, 10000.0, 20000.0, 50000.0,
|
||||
];
|
||||
pub static BROKER_ITERATION_TIMELINES: Lazy<Histogram> = Lazy::new(|| {
|
||||
register_histogram!(
|
||||
"safekeeper_broker_iteration_timelines",
|
||||
"Count of timelines pushed to the broker in a single iteration",
|
||||
TIMELINES_COUNT_BUCKETS.to_vec()
|
||||
)
|
||||
.expect("Failed to register safekeeper_broker_iteration_timelines histogram vec")
|
||||
});
|
||||
|
||||
pub const LABEL_UNKNOWN: &str = "unknown";
|
||||
|
||||
|
||||
Reference in New Issue
Block a user