diff --git a/pageserver/src/disk_usage_eviction_task.rs b/pageserver/src/disk_usage_eviction_task.rs index 4cb20a1bb1..dc7afeb4ae 100644 --- a/pageserver/src/disk_usage_eviction_task.rs +++ b/pageserver/src/disk_usage_eviction_task.rs @@ -796,14 +796,16 @@ async fn collect_eviction_candidates( // A default override can be put in the default tenant conf in the pageserver.toml. let min_resident_size = if let Some(s) = tenant.get_min_resident_size_override() { debug!( - tenant_id=%tenant.tenant_id(), + tenant_id=%tenant.tenant_shard_id().tenant_id, + shard_id=%tenant.tenant_shard_id().shard_slug(), overridden_size=s, "using overridden min resident size for tenant" ); s } else { debug!( - tenant_id=%tenant.tenant_id(), + tenant_id=%tenant.tenant_shard_id().tenant_id, + shard_id=%tenant.tenant_shard_id().shard_slug(), max_layer_size, "using max layer size as min_resident_size for tenant", ); diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 0a18ac4af1..811232397c 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -1236,7 +1236,7 @@ async fn tenant_create_handler( json_response( StatusCode::CREATED, - TenantCreateResponse(new_tenant.tenant_id()), + TenantCreateResponse(new_tenant.tenant_shard_id().tenant_id), ) } diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 07f9049ca5..993685db6e 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -11,7 +11,7 @@ use once_cell::sync::Lazy; use pageserver_api::shard::TenantShardId; use strum::{EnumCount, IntoEnumIterator, VariantNames}; use strum_macros::{EnumVariantNames, IntoStaticStr}; -use utils::id::{TenantId, TimelineId}; +use utils::id::TimelineId; /// Prometheus histogram buckets (in seconds) for operations in the critical /// path. In other words, operations that directly affect that latency of user @@ -59,7 +59,7 @@ pub(crate) static STORAGE_TIME_SUM_PER_TIMELINE: Lazy = Lazy::new(|| register_counter_vec!( "pageserver_storage_operations_seconds_sum", "Total time spent on storage operations with operation, tenant and timeline dimensions", - &["operation", "tenant_id", "timeline_id"], + &["operation", "tenant_id", "shard_id", "timeline_id"], ) .expect("failed to define a metric") }); @@ -68,7 +68,7 @@ pub(crate) static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy = Lazy::n register_int_counter_vec!( "pageserver_storage_operations_seconds_count", "Count of storage operations with operation, tenant and timeline dimensions", - &["operation", "tenant_id", "timeline_id"], + &["operation", "tenant_id", "shard_id", "timeline_id"], ) .expect("failed to define a metric") }); @@ -373,7 +373,7 @@ static LAST_RECORD_LSN: Lazy = Lazy::new(|| { register_int_gauge_vec!( "pageserver_last_record_lsn", "Last record LSN grouped by timeline", - &["tenant_id", "timeline_id"] + &["tenant_id", "shard_id", "timeline_id"] ) .expect("failed to define a metric") }); @@ -382,7 +382,7 @@ static RESIDENT_PHYSICAL_SIZE: Lazy = Lazy::new(|| { register_uint_gauge_vec!( "pageserver_resident_physical_size", "The size of the layer files present in the pageserver's filesystem.", - &["tenant_id", "timeline_id"] + &["tenant_id", "shard_id", "timeline_id"] ) .expect("failed to define a metric") }); @@ -400,7 +400,7 @@ static REMOTE_PHYSICAL_SIZE: Lazy = Lazy::new(|| { "pageserver_remote_physical_size", "The size of the layer files present in the remote storage that are listed in the the remote index_part.json.", // Corollary: If any files are missing from the index part, they won't be included here. - &["tenant_id", "timeline_id"] + &["tenant_id", "shard_id", "timeline_id"] ) .expect("failed to define a metric") }); @@ -433,7 +433,7 @@ static CURRENT_LOGICAL_SIZE: Lazy = Lazy::new(|| { register_uint_gauge_vec!( "pageserver_current_logical_size", "Current logical size grouped by timeline", - &["tenant_id", "timeline_id"] + &["tenant_id", "shard_id", "timeline_id"] ) .expect("failed to define current logical size metric") }); @@ -582,7 +582,7 @@ pub(crate) static BROKEN_TENANTS_SET: Lazy = Lazy::new(|| { register_uint_gauge_vec!( "pageserver_broken_tenants_count", "Set of broken tenants", - &["tenant_id"] + &["tenant_id", "shard_id"] ) .expect("Failed to register pageserver_tenant_states_count metric") }); @@ -602,7 +602,7 @@ static NUM_PERSISTENT_FILES_CREATED: Lazy = Lazy::new(|| { register_int_counter_vec!( "pageserver_created_persistent_files_total", "Number of files created that are meant to be uploaded to cloud storage", - &["tenant_id", "timeline_id"] + &["tenant_id", "shard_id", "timeline_id"] ) .expect("failed to define a metric") }); @@ -611,7 +611,7 @@ static PERSISTENT_BYTES_WRITTEN: Lazy = Lazy::new(|| { register_int_counter_vec!( "pageserver_written_persistent_bytes_total", "Total bytes written that are meant to be uploaded to cloud storage", - &["tenant_id", "timeline_id"] + &["tenant_id", "shard_id", "timeline_id"] ) .expect("failed to define a metric") }); @@ -630,7 +630,7 @@ static EVICTIONS: Lazy = Lazy::new(|| { register_int_counter_vec!( "pageserver_evictions", "Number of layers evicted from the pageserver", - &["tenant_id", "timeline_id"] + &["tenant_id", "shard_id", "timeline_id"] ) .expect("failed to define a metric") }); @@ -927,7 +927,7 @@ pub(crate) static STORAGE_IO_SIZE: Lazy = Lazy::new(|| { register_int_gauge_vec!( "pageserver_io_operations_bytes_total", "Total amount of bytes read/written in IO operations", - &["operation", "tenant_id", "timeline_id"] + &["operation", "tenant_id", "shard_id", "timeline_id"] ) .expect("failed to define a metric") }); @@ -1002,7 +1002,7 @@ static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy = Lazy::new(|| { register_histogram_vec!( "pageserver_smgr_query_seconds", "Time spent on smgr query handling, aggegated by query type and tenant/timeline.", - &["smgr_query_type", "tenant_id", "timeline_id"], + &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"], CRITICAL_OP_BUCKETS.into(), ) .expect("failed to define a metric") @@ -1069,8 +1069,9 @@ static SMGR_QUERY_TIME_GLOBAL: Lazy = Lazy::new(|| { }); impl SmgrQueryTimePerTimeline { - pub(crate) fn new(tenant_id: &TenantId, timeline_id: &TimelineId) -> Self { - let tenant_id = tenant_id.to_string(); + pub(crate) fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self { + let tenant_id = tenant_shard_id.tenant_id.to_string(); + let shard_slug = format!("{}", tenant_shard_id.shard_slug()); let timeline_id = timeline_id.to_string(); let metrics = std::array::from_fn(|i| { let op = SmgrQueryType::from_repr(i).unwrap(); @@ -1078,7 +1079,7 @@ impl SmgrQueryTimePerTimeline { .get_metric_with_label_values(&[op.into()]) .unwrap(); let per_tenant_timeline = SMGR_QUERY_TIME_PER_TENANT_TIMELINE - .get_metric_with_label_values(&[op.into(), &tenant_id, &timeline_id]) + .get_metric_with_label_values(&[op.into(), &tenant_id, &shard_slug, &timeline_id]) .unwrap(); GlobalAndPerTimelineHistogram { global, @@ -1098,6 +1099,7 @@ impl SmgrQueryTimePerTimeline { #[cfg(test)] mod smgr_query_time_tests { + use pageserver_api::shard::TenantShardId; use strum::IntoEnumIterator; use utils::id::{TenantId, TimelineId}; @@ -1124,7 +1126,10 @@ mod smgr_query_time_tests { for op in &ops { let tenant_id = TenantId::generate(); let timeline_id = TimelineId::generate(); - let metrics = super::SmgrQueryTimePerTimeline::new(&tenant_id, &timeline_id); + let metrics = super::SmgrQueryTimePerTimeline::new( + &TenantShardId::unsharded(tenant_id), + &timeline_id, + ); let get_counts = || { let global: u64 = ops @@ -1205,7 +1210,13 @@ static REMOTE_TIMELINE_CLIENT_CALLS_UNFINISHED_GAUGE: Lazy = Lazy:: "Number of ongoing calls to remote timeline client. \ Used to populate pageserver_remote_timeline_client_calls_started. \ This metric is not useful for sampling from Prometheus, but useful in tests.", - &["tenant_id", "timeline_id", "file_kind", "op_kind"], + &[ + "tenant_id", + "shard_id", + "timeline_id", + "file_kind", + "op_kind" + ], ) .expect("failed to define a metric") }); @@ -1226,22 +1237,23 @@ static REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST: Lazy = Lazy::new .expect("failed to define a metric") }); -static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy = Lazy::new(|| { - register_int_counter_vec!( +static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy = + Lazy::new(|| { + register_int_counter_vec!( "pageserver_remote_timeline_client_bytes_started", "Incremented by the number of bytes associated with a remote timeline client operation. \ The increment happens when the operation is scheduled.", - &["tenant_id", "timeline_id", "file_kind", "op_kind"], + &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"], ) - .expect("failed to define a metric") -}); + .expect("failed to define a metric") + }); static REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy = Lazy::new(|| { register_int_counter_vec!( "pageserver_remote_timeline_client_bytes_finished", "Incremented by the number of bytes associated with a remote timeline client operation. \ The increment happens when the operation finishes (regardless of success/failure/shutdown).", - &["tenant_id", "timeline_id", "file_kind", "op_kind"], + &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"], ) .expect("failed to define a metric") }); @@ -1687,14 +1699,19 @@ pub(crate) struct StorageTimeMetrics { } impl StorageTimeMetrics { - pub fn new(operation: StorageTimeOperation, tenant_id: &str, timeline_id: &str) -> Self { + pub fn new( + operation: StorageTimeOperation, + tenant_id: &str, + shard_id: &str, + timeline_id: &str, + ) -> Self { let operation: &'static str = operation.into(); let timeline_sum = STORAGE_TIME_SUM_PER_TIMELINE - .get_metric_with_label_values(&[operation, tenant_id, timeline_id]) + .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id]) .unwrap(); let timeline_count = STORAGE_TIME_COUNT_PER_TIMELINE - .get_metric_with_label_values(&[operation, tenant_id, timeline_id]) + .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id]) .unwrap(); let global_histogram = STORAGE_TIME_GLOBAL .get_metric_with_label_values(&[operation]) @@ -1746,40 +1763,66 @@ impl TimelineMetrics { let tenant_id = tenant_shard_id.tenant_id.to_string(); let shard_id = format!("{}", tenant_shard_id.shard_slug()); let timeline_id = timeline_id.to_string(); - let flush_time_histo = - StorageTimeMetrics::new(StorageTimeOperation::LayerFlush, &tenant_id, &timeline_id); - let compact_time_histo = - StorageTimeMetrics::new(StorageTimeOperation::Compact, &tenant_id, &timeline_id); - let create_images_time_histo = - StorageTimeMetrics::new(StorageTimeOperation::CreateImages, &tenant_id, &timeline_id); - let logical_size_histo = - StorageTimeMetrics::new(StorageTimeOperation::LogicalSize, &tenant_id, &timeline_id); + let flush_time_histo = StorageTimeMetrics::new( + StorageTimeOperation::LayerFlush, + &tenant_id, + &shard_id, + &timeline_id, + ); + let compact_time_histo = StorageTimeMetrics::new( + StorageTimeOperation::Compact, + &tenant_id, + &shard_id, + &timeline_id, + ); + let create_images_time_histo = StorageTimeMetrics::new( + StorageTimeOperation::CreateImages, + &tenant_id, + &shard_id, + &timeline_id, + ); + let logical_size_histo = StorageTimeMetrics::new( + StorageTimeOperation::LogicalSize, + &tenant_id, + &shard_id, + &timeline_id, + ); let imitate_logical_size_histo = StorageTimeMetrics::new( StorageTimeOperation::ImitateLogicalSize, &tenant_id, + &shard_id, + &timeline_id, + ); + let load_layer_map_histo = StorageTimeMetrics::new( + StorageTimeOperation::LoadLayerMap, + &tenant_id, + &shard_id, + &timeline_id, + ); + let garbage_collect_histo = StorageTimeMetrics::new( + StorageTimeOperation::Gc, + &tenant_id, + &shard_id, &timeline_id, ); - let load_layer_map_histo = - StorageTimeMetrics::new(StorageTimeOperation::LoadLayerMap, &tenant_id, &timeline_id); - let garbage_collect_histo = - StorageTimeMetrics::new(StorageTimeOperation::Gc, &tenant_id, &timeline_id); let last_record_gauge = LAST_RECORD_LSN - .get_metric_with_label_values(&[&tenant_id, &timeline_id]) + .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) .unwrap(); let resident_physical_size_gauge = RESIDENT_PHYSICAL_SIZE - .get_metric_with_label_values(&[&tenant_id, &timeline_id]) + .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) .unwrap(); + // TODO: we shouldn't expose this metric let current_logical_size_gauge = CURRENT_LOGICAL_SIZE - .get_metric_with_label_values(&[&tenant_id, &timeline_id]) + .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) .unwrap(); let num_persistent_files_created = NUM_PERSISTENT_FILES_CREATED - .get_metric_with_label_values(&[&tenant_id, &timeline_id]) + .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) .unwrap(); let persistent_bytes_written = PERSISTENT_BYTES_WRITTEN - .get_metric_with_label_values(&[&tenant_id, &timeline_id]) + .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) .unwrap(); let evictions = EVICTIONS - .get_metric_with_label_values(&[&tenant_id, &timeline_id]) + .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) .unwrap(); let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder .build(&tenant_id, &shard_id, &timeline_id); @@ -1833,15 +1876,17 @@ impl Drop for TimelineMetrics { let tenant_id = &self.tenant_id; let timeline_id = &self.timeline_id; let shard_id = &self.shard_id; - let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, timeline_id]); + let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, &shard_id, timeline_id]); { RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get()); - let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]); + let _ = + RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, &shard_id, timeline_id]); } - let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]); - let _ = NUM_PERSISTENT_FILES_CREATED.remove_label_values(&[tenant_id, timeline_id]); - let _ = PERSISTENT_BYTES_WRITTEN.remove_label_values(&[tenant_id, timeline_id]); - let _ = EVICTIONS.remove_label_values(&[tenant_id, timeline_id]); + let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, &shard_id, timeline_id]); + let _ = + NUM_PERSISTENT_FILES_CREATED.remove_label_values(&[tenant_id, &shard_id, timeline_id]); + let _ = PERSISTENT_BYTES_WRITTEN.remove_label_values(&[tenant_id, &shard_id, timeline_id]); + let _ = EVICTIONS.remove_label_values(&[tenant_id, &shard_id, timeline_id]); self.evictions_with_low_residence_duration .write() @@ -1854,29 +1899,42 @@ impl Drop for TimelineMetrics { // outlive an individual smgr connection, but not the timeline. for op in StorageTimeOperation::VARIANTS { - let _ = - STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[op, tenant_id, timeline_id]); - let _ = - STORAGE_TIME_COUNT_PER_TIMELINE.remove_label_values(&[op, tenant_id, timeline_id]); + let _ = STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[ + op, + tenant_id, + shard_id, + timeline_id, + ]); + let _ = STORAGE_TIME_COUNT_PER_TIMELINE.remove_label_values(&[ + op, + tenant_id, + shard_id, + timeline_id, + ]); } for op in STORAGE_IO_SIZE_OPERATIONS { - let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, timeline_id]); + let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]); } for op in SmgrQueryType::iter() { let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[ op.into(), tenant_id, + shard_id, timeline_id, ]); } } } -pub fn remove_tenant_metrics(tenant_id: &TenantId) { - let tid = tenant_id.to_string(); - let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]); +pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) { + // Only shard zero deals in synthetic sizes + if tenant_shard_id.is_zero() { + let tid = tenant_shard_id.tenant_id.to_string(); + let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]); + } + // we leave the BROKEN_TENANTS_SET entry if any } @@ -1926,6 +1984,7 @@ impl Drop for PerTimelineRemotePhysicalSizeGauge { pub(crate) struct RemoteTimelineClientMetrics { tenant_id: String, + shard_id: String, timeline_id: String, remote_physical_size_gauge: Mutex>, calls_unfinished_gauge: Mutex>, @@ -1937,6 +1996,7 @@ impl RemoteTimelineClientMetrics { pub fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self { RemoteTimelineClientMetrics { tenant_id: tenant_shard_id.tenant_id.to_string(), + shard_id: format!("{}", tenant_shard_id.shard_slug()), timeline_id: timeline_id.to_string(), calls_unfinished_gauge: Mutex::new(HashMap::default()), bytes_started_counter: Mutex::new(HashMap::default()), @@ -1951,8 +2011,9 @@ impl RemoteTimelineClientMetrics { PerTimelineRemotePhysicalSizeGauge::new( REMOTE_PHYSICAL_SIZE .get_metric_with_label_values(&[ - &self.tenant_id.to_string(), - &self.timeline_id.to_string(), + &self.tenant_id, + &self.shard_id, + &self.timeline_id, ]) .unwrap(), ) @@ -1987,8 +2048,9 @@ impl RemoteTimelineClientMetrics { let metric = guard.entry(key).or_insert_with(move || { REMOTE_TIMELINE_CLIENT_CALLS_UNFINISHED_GAUGE .get_metric_with_label_values(&[ - &self.tenant_id.to_string(), - &self.timeline_id.to_string(), + &self.tenant_id, + &self.shard_id, + &self.timeline_id, key.0, key.1, ]) @@ -2018,8 +2080,9 @@ impl RemoteTimelineClientMetrics { let metric = guard.entry(key).or_insert_with(move || { REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER .get_metric_with_label_values(&[ - &self.tenant_id.to_string(), - &self.timeline_id.to_string(), + &self.tenant_id, + &self.shard_id, + &self.timeline_id, key.0, key.1, ]) @@ -2038,8 +2101,9 @@ impl RemoteTimelineClientMetrics { let metric = guard.entry(key).or_insert_with(move || { REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER .get_metric_with_label_values(&[ - &self.tenant_id.to_string(), - &self.timeline_id.to_string(), + &self.tenant_id, + &self.shard_id, + &self.timeline_id, key.0, key.1, ]) @@ -2183,6 +2247,7 @@ impl Drop for RemoteTimelineClientMetrics { fn drop(&mut self) { let RemoteTimelineClientMetrics { tenant_id, + shard_id, timeline_id, remote_physical_size_gauge, calls_unfinished_gauge, @@ -2192,6 +2257,7 @@ impl Drop for RemoteTimelineClientMetrics { for ((a, b), _) in calls_unfinished_gauge.get_mut().unwrap().drain() { let _ = REMOTE_TIMELINE_CLIENT_CALLS_UNFINISHED_GAUGE.remove_label_values(&[ tenant_id, + shard_id, timeline_id, a, b, @@ -2200,6 +2266,7 @@ impl Drop for RemoteTimelineClientMetrics { for ((a, b), _) in bytes_started_counter.get_mut().unwrap().drain() { let _ = REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER.remove_label_values(&[ tenant_id, + shard_id, timeline_id, a, b, @@ -2208,6 +2275,7 @@ impl Drop for RemoteTimelineClientMetrics { for ((a, b), _) in bytes_finished_counter.get_mut().unwrap().drain() { let _ = REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER.remove_label_values(&[ tenant_id, + shard_id, timeline_id, a, b, @@ -2215,7 +2283,7 @@ impl Drop for RemoteTimelineClientMetrics { } { let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above - let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]); + let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]); } } } @@ -2225,8 +2293,6 @@ impl Drop for RemoteTimelineClientMetrics { pub(crate) trait MeasureRemoteOp: Sized { fn measure_remote_op( self, - tenant_id: TenantId, - timeline_id: TimelineId, file_kind: RemoteOpFileKind, op: RemoteOpKind, metrics: Arc, @@ -2234,8 +2300,6 @@ pub(crate) trait MeasureRemoteOp: Sized { let start = Instant::now(); MeasuredRemoteOp { inner: self, - tenant_id, - timeline_id, file_kind, op, start, @@ -2251,8 +2315,6 @@ pin_project! { { #[pin] inner: F, - tenant_id: TenantId, - timeline_id: TimelineId, file_kind: RemoteOpFileKind, op: RemoteOpKind, start: Instant, diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index a296dde07d..bbd2d0e76e 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -545,8 +545,6 @@ impl PageServerHandler { pgb.write_message_noflush(&BeMessage::CopyBothResponse)?; self.flush_cancellable(pgb, &tenant.cancel).await?; - let metrics = metrics::SmgrQueryTimePerTimeline::new(&tenant_id, &timeline_id); - loop { let msg = tokio::select! { biased; @@ -585,7 +583,6 @@ impl PageServerHandler { let (response, span) = match neon_fe_msg { PagestreamFeMessage::Exists(req) => { - let _timer = metrics.start_timer(metrics::SmgrQueryType::GetRelExists); let span = tracing::info_span!("handle_get_rel_exists_request", rel = %req.rel, req_lsn = %req.lsn); ( self.handle_get_rel_exists_request(tenant_id, timeline_id, &req, &ctx) @@ -595,7 +592,6 @@ impl PageServerHandler { ) } PagestreamFeMessage::Nblocks(req) => { - let _timer = metrics.start_timer(metrics::SmgrQueryType::GetRelSize); let span = tracing::info_span!("handle_get_nblocks_request", rel = %req.rel, req_lsn = %req.lsn); ( self.handle_get_nblocks_request(tenant_id, timeline_id, &req, &ctx) @@ -605,7 +601,6 @@ impl PageServerHandler { ) } PagestreamFeMessage::GetPage(req) => { - let _timer = metrics.start_timer(metrics::SmgrQueryType::GetPageAtLsn); let span = tracing::info_span!("handle_get_page_at_lsn_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.lsn); ( self.handle_get_page_at_lsn_request(tenant_id, timeline_id, &req, &ctx) @@ -615,7 +610,6 @@ impl PageServerHandler { ) } PagestreamFeMessage::DbSize(req) => { - let _timer = metrics.start_timer(metrics::SmgrQueryType::GetDbSize); let span = tracing::info_span!("handle_db_size_request", dbnode = %req.dbnode, req_lsn = %req.lsn); ( self.handle_db_size_request(tenant_id, timeline_id, &req, &ctx) @@ -865,6 +859,9 @@ impl PageServerHandler { ctx: &RequestContext, ) -> Result { let timeline = self.get_timeline_shard_zero(tenant_id, timeline_id).await?; + let _timer = timeline + .query_metrics + .start_timer(metrics::SmgrQueryType::GetRelExists); let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); let lsn = @@ -888,6 +885,11 @@ impl PageServerHandler { ctx: &RequestContext, ) -> Result { let timeline = self.get_timeline_shard_zero(tenant_id, timeline_id).await?; + + let _timer = timeline + .query_metrics + .start_timer(metrics::SmgrQueryType::GetRelSize); + let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn, ctx) @@ -910,6 +912,11 @@ impl PageServerHandler { ctx: &RequestContext, ) -> Result { let timeline = self.get_timeline_shard_zero(tenant_id, timeline_id).await?; + + let _timer = timeline + .query_metrics + .start_timer(metrics::SmgrQueryType::GetDbSize); + let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn, ctx) @@ -1080,6 +1087,10 @@ impl PageServerHandler { } }; + let _timer = timeline + .query_metrics + .start_timer(metrics::SmgrQueryType::GetPageAtLsn); + let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn, ctx) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 5240cc217a..ce99569beb 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -112,7 +112,7 @@ use toml_edit; use utils::{ crashsafe, generation::Generation, - id::{TenantId, TimelineId}, + id::TimelineId, lsn::{Lsn, RecordLsn}, }; @@ -371,13 +371,13 @@ impl WalRedoManager { pub enum GetTimelineError { #[error("Timeline {tenant_id}/{timeline_id} is not active, state: {state:?}")] NotActive { - tenant_id: TenantId, + tenant_id: TenantShardId, timeline_id: TimelineId, state: TimelineState, }, #[error("Timeline {tenant_id}/{timeline_id} was not found")] NotFound { - tenant_id: TenantId, + tenant_id: TenantShardId, timeline_id: TimelineId, }, } @@ -1517,10 +1517,6 @@ impl Tenant { .map_err(LoadLocalTimelineError::Load) } - pub(crate) fn tenant_id(&self) -> TenantId { - self.tenant_shard_id.tenant_id - } - pub(crate) fn tenant_shard_id(&self) -> TenantShardId { self.tenant_shard_id } @@ -1536,13 +1532,13 @@ impl Tenant { let timeline = timelines_accessor .get(&timeline_id) .ok_or(GetTimelineError::NotFound { - tenant_id: self.tenant_shard_id.tenant_id, + tenant_id: self.tenant_shard_id, timeline_id, })?; if active_only && !timeline.is_active() { Err(GetTimelineError::NotActive { - tenant_id: self.tenant_shard_id.tenant_id, + tenant_id: self.tenant_shard_id, timeline_id, state: timeline.current_state(), }) @@ -2597,7 +2593,9 @@ impl Tenant { let (state, mut rx) = watch::channel(state); tokio::spawn(async move { + // Strings for metric labels let tid = tenant_shard_id.to_string(); + let shard_id_str = format!("{}", tenant_shard_id.shard_slug()); fn inspect_state(state: &TenantState) -> ([&'static str; 1], bool) { ([state.into()], matches!(state, TenantState::Broken { .. })) @@ -2610,13 +2608,15 @@ impl Tenant { // the tenant might be ignored and reloaded, so first remove any previous set // element. it most likely has already been scraped, as these are manual operations // right now. most likely we will add it back very soon. - drop(crate::metrics::BROKEN_TENANTS_SET.remove_label_values(&[&tid])); + drop( + crate::metrics::BROKEN_TENANTS_SET.remove_label_values(&[&tid, &shard_id_str]), + ); false } else { // add the id to the set right away, there should not be any updates on the channel // after crate::metrics::BROKEN_TENANTS_SET - .with_label_values(&[&tid]) + .with_label_values(&[&tid, &shard_id_str]) .set(1); true }; @@ -2642,7 +2642,7 @@ impl Tenant { counted_broken = true; // insert the tenant_id (back) into the set crate::metrics::BROKEN_TENANTS_SET - .with_label_values(&[&tid]) + .with_label_values(&[&tid, &shard_id_str]) .inc(); } } @@ -3629,6 +3629,9 @@ impl Tenant { self.cached_synthetic_tenant_size .store(size, Ordering::Relaxed); + // Only shard zero should be calculating synthetic sizes + debug_assert!(self.shard_identity.is_zero()); + TENANT_SYNTHETIC_SIZE_METRIC .get_metric_with_label_values(&[&self.tenant_shard_id.tenant_id.to_string()]) .unwrap() @@ -3780,7 +3783,7 @@ async fn run_initdb( impl Drop for Tenant { fn drop(&mut self) { - remove_tenant_metrics(&self.tenant_shard_id.tenant_id); + remove_tenant_metrics(&self.tenant_shard_id); } } /// Dump contents of a layer file to stdout. @@ -5208,7 +5211,7 @@ mod tests { assert_eq!( e, GetTimelineError::NotFound { - tenant_id: tenant.tenant_shard_id.tenant_id, + tenant_id: tenant.tenant_shard_id, timeline_id: TIMELINE_ID, } ) diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 50b895aca1..84c7a20247 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -847,15 +847,13 @@ impl TenantManager { TenantState::Active => Ok(Arc::clone(tenant)), _ => { if active_only { - Err(GetTenantError::NotActive(tenant_shard_id.tenant_id)) + Err(GetTenantError::NotActive(tenant_shard_id)) } else { Ok(Arc::clone(tenant)) } } }, - Some(TenantSlot::InProgress(_)) => { - Err(GetTenantError::NotActive(tenant_shard_id.tenant_id)) - } + Some(TenantSlot::InProgress(_)) => Err(GetTenantError::NotActive(tenant_shard_id)), None | Some(TenantSlot::Secondary(_)) => { Err(GetTenantError::NotFound(tenant_shard_id.tenant_id)) } @@ -1306,10 +1304,13 @@ impl TenantManager { #[derive(Debug, thiserror::Error)] pub(crate) enum GetTenantError { + /// NotFound is a TenantId rather than TenantShardId, because this error type is used from + /// getters that use a TenantId and a ShardSelector, not just getters that target a specific shard. #[error("Tenant {0} not found")] NotFound(TenantId), + #[error("Tenant {0} is not active")] - NotActive(TenantId), + NotActive(TenantShardId), /// Broken is logically a subset of NotActive, but a distinct error is useful as /// NotActive is usually a retryable state for API purposes, whereas Broken /// is a stuck error state @@ -1342,15 +1343,13 @@ pub(crate) fn get_tenant( TenantState::Active => Ok(Arc::clone(tenant)), _ => { if active_only { - Err(GetTenantError::NotActive(tenant_shard_id.tenant_id)) + Err(GetTenantError::NotActive(tenant_shard_id)) } else { Ok(Arc::clone(tenant)) } } }, - Some(TenantSlot::InProgress(_)) => { - Err(GetTenantError::NotActive(tenant_shard_id.tenant_id)) - } + Some(TenantSlot::InProgress(_)) => Err(GetTenantError::NotActive(tenant_shard_id)), None | Some(TenantSlot::Secondary(_)) => { Err(GetTenantError::NotFound(tenant_shard_id.tenant_id)) } @@ -1426,7 +1425,7 @@ pub(crate) async fn get_active_tenant_with_timeout( } Some(TenantSlot::Secondary(_)) => { return Err(GetActiveTenantError::NotFound(GetTenantError::NotActive( - tenant_id, + tenant_shard_id, ))) } Some(TenantSlot::InProgress(barrier)) => { @@ -1465,7 +1464,7 @@ pub(crate) async fn get_active_tenant_with_timeout( Some(TenantSlot::Attached(tenant)) => tenant.clone(), _ => { return Err(GetActiveTenantError::NotFound(GetTenantError::NotActive( - tenant_id, + tenant_shard_id, ))) } } @@ -1493,7 +1492,7 @@ pub(crate) enum DeleteTimelineError { #[derive(Debug, thiserror::Error)] pub(crate) enum TenantStateError { #[error("Tenant {0} is stopping")] - IsStopping(TenantId), + IsStopping(TenantShardId), #[error(transparent)] SlotError(#[from] TenantSlotError), #[error(transparent)] @@ -2123,7 +2122,7 @@ where // if pageserver shutdown or other detach/ignore is already ongoing, we don't want to // wait for it but return an error right away because these are distinct requests. slot_guard.revert(); - return Err(TenantStateError::IsStopping(tenant_shard_id.tenant_id)); + return Err(TenantStateError::IsStopping(tenant_shard_id)); } } Some(tenant) @@ -2252,7 +2251,6 @@ pub(crate) async fn immediate_gc( #[cfg(test)] mod tests { - use pageserver_api::shard::TenantShardId; use std::collections::BTreeMap; use std::sync::Arc; use tracing::{info_span, Instrument}; @@ -2273,7 +2271,7 @@ mod tests { // harness loads it to active, which is forced and nothing is running on the tenant - let id = TenantShardId::unsharded(t.tenant_id()); + let id = t.tenant_shard_id(); // tenant harness configures the logging and we cannot escape it let _e = info_span!("testing", tenant_id = %id).entered(); diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 7935209252..1b5f861c90 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -522,8 +522,6 @@ impl RemoteTimelineClient { cancel, ) .measure_remote_op( - self.tenant_shard_id.tenant_id, - self.timeline_id, RemoteOpFileKind::Index, RemoteOpKind::Download, Arc::clone(&self.metrics), @@ -566,8 +564,6 @@ impl RemoteTimelineClient { cancel, ) .measure_remote_op( - self.tenant_shard_id.tenant_id, - self.timeline_id, RemoteOpFileKind::Layer, RemoteOpKind::Download, Arc::clone(&self.metrics), @@ -1351,8 +1347,6 @@ impl RemoteTimelineClient { &self.cancel, ) .measure_remote_op( - self.tenant_shard_id.tenant_id, - self.timeline_id, RemoteOpFileKind::Layer, RemoteOpKind::Upload, Arc::clone(&self.metrics), @@ -1378,8 +1372,6 @@ impl RemoteTimelineClient { &self.cancel, ) .measure_remote_op( - self.tenant_shard_id.tenant_id, - self.timeline_id, RemoteOpFileKind::Index, RemoteOpKind::Upload, Arc::clone(&self.metrics), diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 6b1487259f..0cb7cf26f2 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -252,6 +252,10 @@ pub struct Timeline { pub(super) metrics: TimelineMetrics, + // `Timeline` doesn't write these metrics itself, but it manages the lifetime. Code + // in `crate::page_service` writes these metrics. + pub(crate) query_metrics: crate::metrics::SmgrQueryTimePerTimeline, + /// Ensures layers aren't frozen by checkpointer between /// [`Timeline::get_layer_for_write`] and layer reads. /// Locked automatically by [`TimelineWriter`] and checkpointer. @@ -1315,6 +1319,11 @@ impl Timeline { ), ), + query_metrics: crate::metrics::SmgrQueryTimePerTimeline::new( + &tenant_shard_id, + &timeline_id, + ), + flush_loop_state: Mutex::new(FlushLoopState::NotStarted), layer_flush_start_tx, diff --git a/pageserver/src/virtual_file.rs b/pageserver/src/virtual_file.rs index e549d208b2..9feefd8a32 100644 --- a/pageserver/src/virtual_file.rs +++ b/pageserver/src/virtual_file.rs @@ -14,6 +14,7 @@ use crate::metrics::{StorageIoOperation, STORAGE_IO_SIZE, STORAGE_IO_TIME_METRIC use crate::tenant::TENANTS_SEGMENT_NAME; use camino::{Utf8Path, Utf8PathBuf}; use once_cell::sync::OnceCell; +use pageserver_api::shard::TenantShardId; use std::fs::{self, File, OpenOptions}; use std::io::{Error, ErrorKind, Seek, SeekFrom}; use std::os::unix::fs::FileExt; @@ -60,6 +61,7 @@ pub struct VirtualFile { // It makes no sense for us to constantly turn the `TimelineId` and `TenantId` into // strings. tenant_id: String, + shard_id: String, timeline_id: String, } @@ -301,15 +303,24 @@ impl VirtualFile { ) -> Result { let path_str = path.to_string(); let parts = path_str.split('/').collect::>(); - let tenant_id; - let timeline_id; - if parts.len() > 5 && parts[parts.len() - 5] == TENANTS_SEGMENT_NAME { - tenant_id = parts[parts.len() - 4].to_string(); - timeline_id = parts[parts.len() - 2].to_string(); - } else { - tenant_id = "*".to_string(); - timeline_id = "*".to_string(); - } + let (tenant_id, shard_id, timeline_id) = + if parts.len() > 5 && parts[parts.len() - 5] == TENANTS_SEGMENT_NAME { + let tenant_shard_part = parts[parts.len() - 4]; + let (tenant_id, shard_id) = match tenant_shard_part.parse::() { + Ok(tenant_shard_id) => ( + tenant_shard_id.tenant_id.to_string(), + format!("{}", tenant_shard_id.shard_slug()), + ), + Err(_) => { + // Malformed path: this ID is just for observability, so tolerate it + // and pass through + (tenant_shard_part.to_string(), "*".to_string()) + } + }; + (tenant_id, shard_id, parts[parts.len() - 2].to_string()) + } else { + ("*".to_string(), "*".to_string(), "*".to_string()) + }; let (handle, mut slot_guard) = get_open_files().find_victim_slot().await; // NB: there is also StorageIoOperation::OpenAfterReplace which is for the case @@ -333,6 +344,7 @@ impl VirtualFile { path: path.to_path_buf(), open_options: reopen_options, tenant_id, + shard_id, timeline_id, }; @@ -574,7 +586,7 @@ impl VirtualFile { .read_at(buf, offset)); if let Ok(size) = result { STORAGE_IO_SIZE - .with_label_values(&["read", &self.tenant_id, &self.timeline_id]) + .with_label_values(&["read", &self.tenant_id, &self.shard_id, &self.timeline_id]) .add(size as i64); } result @@ -586,7 +598,7 @@ impl VirtualFile { .write_at(buf, offset)); if let Ok(size) = result { STORAGE_IO_SIZE - .with_label_values(&["write", &self.tenant_id, &self.timeline_id]) + .with_label_values(&["write", &self.tenant_id, &self.shard_id, &self.timeline_id]) .add(size as i64); } result diff --git a/pageserver/src/walingest.rs b/pageserver/src/walingest.rs index 852b290f77..26de229e4d 100644 --- a/pageserver/src/walingest.rs +++ b/pageserver/src/walingest.rs @@ -2201,7 +2201,8 @@ mod tests { let harness = TenantHarness::create("test_ingest_real_wal").unwrap(); let (tenant, ctx) = harness.load().await; - let remote_initdb_path = remote_initdb_archive_path(&tenant.tenant_id(), &TIMELINE_ID); + let remote_initdb_path = + remote_initdb_archive_path(&tenant.tenant_shard_id().tenant_id, &TIMELINE_ID); let initdb_path = harness.remote_fs_dir.join(remote_initdb_path.get_path()); std::fs::create_dir_all(initdb_path.parent().unwrap())