mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-13 08:22:55 +00:00
add remote_physical_size metric
We do the accounting exclusively after updating remote IndexPart successfully. This is cleaner & more robust than doing it upon completion of individual layer file uploads / deletions since we can uset .set() insteaf of add()/sub(). NB: Originally, this work was intended to be part of #3013 but it turns out that it's completely orthogonal. So, spin it out into this PR for easier review. Since this change is additive, it won't break anything.
This commit is contained in:
committed by
Christian Schwarz
parent
c04c201520
commit
bf3ac2be2d
@@ -96,6 +96,16 @@ static CURRENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static REMOTE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
register_uint_gauge_vec!(
|
||||
"pageserver_remote_physical_size",
|
||||
"The size of the layer files present in the remote storage that are listed in the the remote index_part.json.",
|
||||
// Corollary: If any files are missing from the index part, they won't be included here.
|
||||
&["tenant_id", "timeline_id"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
register_uint_gauge_vec!(
|
||||
"pageserver_current_logical_size",
|
||||
@@ -500,6 +510,7 @@ use std::time::Instant;
|
||||
pub struct RemoteTimelineClientMetrics {
|
||||
tenant_id: String,
|
||||
timeline_id: String,
|
||||
remote_physical_size_gauge: Mutex<Option<UIntGauge>>,
|
||||
remote_operation_time: Mutex<HashMap<(&'static str, &'static str, &'static str), Histogram>>,
|
||||
unfinished_tasks: Mutex<HashMap<(&'static str, &'static str), IntGauge>>,
|
||||
}
|
||||
@@ -511,8 +522,22 @@ impl RemoteTimelineClientMetrics {
|
||||
timeline_id: timeline_id.to_string(),
|
||||
remote_operation_time: Mutex::new(HashMap::default()),
|
||||
unfinished_tasks: Mutex::new(HashMap::default()),
|
||||
remote_physical_size_gauge: Mutex::new(None),
|
||||
}
|
||||
}
|
||||
pub fn remote_physical_size_gauge(&self) -> UIntGauge {
|
||||
let mut guard = self.remote_physical_size_gauge.lock().unwrap();
|
||||
guard
|
||||
.get_or_insert_with(|| {
|
||||
REMOTE_PHYSICAL_SIZE
|
||||
.get_metric_with_label_values(&[
|
||||
&self.tenant_id.to_string(),
|
||||
&self.timeline_id.to_string(),
|
||||
])
|
||||
.unwrap()
|
||||
})
|
||||
.clone()
|
||||
}
|
||||
pub fn remote_operation_time(
|
||||
&self,
|
||||
file_kind: &RemoteOpFileKind,
|
||||
@@ -562,6 +587,7 @@ impl Drop for RemoteTimelineClientMetrics {
|
||||
let RemoteTimelineClientMetrics {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
remote_physical_size_gauge,
|
||||
remote_operation_time,
|
||||
unfinished_tasks,
|
||||
} = self;
|
||||
@@ -576,6 +602,10 @@ impl Drop for RemoteTimelineClientMetrics {
|
||||
b,
|
||||
]);
|
||||
}
|
||||
{
|
||||
let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above
|
||||
let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -460,6 +460,7 @@ impl RemoteTimelineClient {
|
||||
pub fn init_upload_queue(&self, index_part: &IndexPart) -> anyhow::Result<()> {
|
||||
let mut upload_queue = self.upload_queue.lock().unwrap();
|
||||
upload_queue.initialize_with_current_remote_index_part(index_part)?;
|
||||
self.update_remote_physical_size_gauge(Some(index_part));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -471,6 +472,7 @@ impl RemoteTimelineClient {
|
||||
) -> anyhow::Result<()> {
|
||||
let mut upload_queue = self.upload_queue.lock().unwrap();
|
||||
upload_queue.initialize_empty_remote(local_metadata)?;
|
||||
self.update_remote_physical_size_gauge(None);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -482,6 +484,20 @@ impl RemoteTimelineClient {
|
||||
}
|
||||
}
|
||||
|
||||
fn update_remote_physical_size_gauge(&self, current_remote_index_part: Option<&IndexPart>) {
|
||||
let size: u64 = if let Some(current_remote_index_part) = current_remote_index_part {
|
||||
current_remote_index_part
|
||||
.layer_metadata
|
||||
.iter()
|
||||
// If we don't have the file size for the layer, don't account for it in the metric.
|
||||
.map(|(_, ilmd)| ilmd.file_size.unwrap_or(0))
|
||||
.sum()
|
||||
} else {
|
||||
0
|
||||
};
|
||||
self.metrics.remote_physical_size_gauge().set(size);
|
||||
}
|
||||
|
||||
//
|
||||
// Download operations.
|
||||
//
|
||||
@@ -543,6 +559,14 @@ impl RemoteTimelineClient {
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
if let Some(upgraded) = upload_queue.latest_files.get_mut(layer_file_name) {
|
||||
upgraded.merge(&new_metadata);
|
||||
// If we don't do an index file upload inbetween here and restart,
|
||||
// the value will go back down after pageserver restart, since we will
|
||||
// have lost this data point.
|
||||
// But, we upload index part fairly frequently, and restart pageserver rarely.
|
||||
// So, by accounting eagerly, we present a most-of-the-time-more-accurate value sooner.
|
||||
self.metrics
|
||||
.remote_physical_size_gauge()
|
||||
.add(downloaded_size);
|
||||
} else {
|
||||
// The file should exist, since we just downloaded it.
|
||||
warn!(
|
||||
@@ -855,7 +879,7 @@ impl RemoteTimelineClient {
|
||||
.await
|
||||
}
|
||||
UploadOp::UploadMetadata(ref index_part, _lsn) => {
|
||||
upload::upload_index_part(
|
||||
let res = upload::upload_index_part(
|
||||
self.conf,
|
||||
&self.storage_impl,
|
||||
self.tenant_id,
|
||||
@@ -869,7 +893,11 @@ impl RemoteTimelineClient {
|
||||
RemoteOpKind::Upload,
|
||||
Arc::clone(&self.metrics),
|
||||
)
|
||||
.await
|
||||
.await;
|
||||
if res.is_ok() {
|
||||
self.update_remote_physical_size_gauge(Some(index_part));
|
||||
}
|
||||
res
|
||||
}
|
||||
UploadOp::Delete(metric_file_kind, ref layer_file_name) => {
|
||||
let path = &self
|
||||
|
||||
@@ -232,7 +232,7 @@ impl IndexPart {
|
||||
/// Serialized form of [`LayerFileMetadata`].
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct IndexLayerMetadata {
|
||||
file_size: Option<u64>,
|
||||
pub(super) file_size: Option<u64>,
|
||||
}
|
||||
|
||||
impl From<&'_ LayerFileMetadata> for IndexLayerMetadata {
|
||||
|
||||
@@ -44,6 +44,7 @@ PAGESERVER_PER_TENANT_REMOTE_TIMELINE_CLIENT_METRICS: Tuple[str, ...] = (
|
||||
"pageserver_remote_operation_seconds_bucket",
|
||||
"pageserver_remote_operation_seconds_count",
|
||||
"pageserver_remote_operation_seconds_sum",
|
||||
"pageserver_remote_physical_size",
|
||||
)
|
||||
|
||||
PAGESERVER_PER_TENANT_METRICS: Tuple[str, ...] = (
|
||||
|
||||
Reference in New Issue
Block a user