diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs index b39acbca4d..591eb3728b 100644 --- a/control_plane/src/pageserver.rs +++ b/control_plane/src/pageserver.rs @@ -545,6 +545,11 @@ impl PageServerNode { .map(|x| x.parse::()) .transpose() .context("Failed to parse 'gc_compaction_ratio_percent' as integer")?, + sampling_ratio: settings + .remove("sampling_ratio") + .map(serde_json::from_str) + .transpose() + .context("Falied to parse 'sampling_ratio'")?, }; if !settings.is_empty() { bail!("Unrecognized tenant settings: {settings:?}") diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs index 66a02b87b0..d0225c8918 100644 --- a/libs/pageserver_api/src/config.rs +++ b/libs/pageserver_api/src/config.rs @@ -192,7 +192,7 @@ pub enum GetVectoredConcurrentIo { SidecarTask, } -#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] pub struct Ratio { pub numerator: usize, pub denominator: usize, @@ -416,6 +416,9 @@ pub struct TenantConfigToml { /// The ratio that triggers the auto gc-compaction. If (the total size of layers between L2 LSN and gc-horizon) / (size below the L2 LSN) /// is above this ratio, gc-compaction will be triggered. pub gc_compaction_ratio_percent: u64, + /// Tenant level performance sampling ratio override. Controls the ratio of get page requests + /// that will get perf sampling for the tenant. + pub sampling_ratio: Option, } pub mod defaults { @@ -702,6 +705,7 @@ impl Default for TenantConfigToml { gc_compaction_enabled: DEFAULT_GC_COMPACTION_ENABLED, gc_compaction_initial_threshold_kb: DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB, gc_compaction_ratio_percent: DEFAULT_GC_COMPACTION_RATIO_PERCENT, + sampling_ratio: None, } } } diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index f2dd3a0ebf..16d9433973 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -23,6 +23,7 @@ use utils::lsn::Lsn; use utils::postgres_client::PostgresClientProtocol; use utils::{completion, serde_system_time}; +use crate::config::Ratio; use crate::key::{CompactKey, Key}; use crate::reltag::RelTag; use crate::shard::{ShardCount, ShardStripeSize, TenantShardId}; @@ -568,6 +569,8 @@ pub struct TenantConfigPatch { pub gc_compaction_initial_threshold_kb: FieldPatch, #[serde(skip_serializing_if = "FieldPatch::is_noop")] pub gc_compaction_ratio_percent: FieldPatch, + #[serde(skip_serializing_if = "FieldPatch::is_noop")] + pub sampling_ratio: FieldPatch>, } /// Like [`crate::config::TenantConfigToml`], but preserves the information @@ -688,6 +691,9 @@ pub struct TenantConfig { #[serde(skip_serializing_if = "Option::is_none")] pub gc_compaction_ratio_percent: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub sampling_ratio: Option>, } impl TenantConfig { @@ -730,6 +736,7 @@ impl TenantConfig { mut gc_compaction_enabled, mut gc_compaction_initial_threshold_kb, mut gc_compaction_ratio_percent, + mut sampling_ratio, } = self; patch.checkpoint_distance.apply(&mut checkpoint_distance); @@ -824,6 +831,7 @@ impl TenantConfig { patch .gc_compaction_ratio_percent .apply(&mut gc_compaction_ratio_percent); + patch.sampling_ratio.apply(&mut sampling_ratio); Ok(Self { checkpoint_distance, @@ -860,6 +868,7 @@ impl TenantConfig { gc_compaction_enabled, gc_compaction_initial_threshold_kb, gc_compaction_ratio_percent, + sampling_ratio, }) } @@ -961,6 +970,7 @@ impl TenantConfig { gc_compaction_ratio_percent: self .gc_compaction_ratio_percent .unwrap_or(global_conf.gc_compaction_ratio_percent), + sampling_ratio: self.sampling_ratio.unwrap_or(global_conf.sampling_ratio), } } } diff --git a/libs/tracing-utils/src/perf_span.rs b/libs/tracing-utils/src/perf_span.rs index f2ca76a816..16f713c67e 100644 --- a/libs/tracing-utils/src/perf_span.rs +++ b/libs/tracing-utils/src/perf_span.rs @@ -28,7 +28,7 @@ use core::{ task::{Context, Poll}, }; use pin_project_lite::pin_project; -use tracing::{Dispatch, field, span::Span}; +use tracing::{Dispatch, span::Span}; #[derive(Debug, Clone)] pub struct PerfSpan { @@ -49,15 +49,6 @@ impl PerfSpan { } } - pub fn record( - &self, - field: &Q, - value: V, - ) -> &Self { - self.inner.record(field, value); - self - } - pub fn enter(&self) -> PerfSpanEntered { if let Some(ref id) = self.inner.id() { self.dispatch.enter(id); diff --git a/pageserver/src/context.rs b/pageserver/src/context.rs index 279d2daf75..481fdb4ea2 100644 --- a/pageserver/src/context.rs +++ b/pageserver/src/context.rs @@ -572,19 +572,6 @@ impl RequestContext { } } - pub(crate) fn perf_span_record< - Q: tracing::field::AsField + ?Sized, - V: tracing::field::Value, - >( - &self, - field: &Q, - value: V, - ) { - if let Some(span) = &self.perf_span { - span.record(field, value); - } - } - pub(crate) fn has_perf_span(&self) -> bool { self.perf_span.is_some() } diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index 3ebd6d8506..f9bf45bb71 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -18,7 +18,7 @@ use itertools::Itertools; use once_cell::sync::OnceCell; use pageserver_api::config::{ PageServicePipeliningConfig, PageServicePipeliningConfigPipelined, - PageServiceProtocolPipelinedExecutionStrategy, Tracing, + PageServiceProtocolPipelinedExecutionStrategy, }; use pageserver_api::key::rel_block_to_key; use pageserver_api::models::{ @@ -37,7 +37,6 @@ use postgres_ffi::BLCKSZ; use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID; use pq_proto::framed::ConnectionError; use pq_proto::{BeMessage, FeMessage, FeStartupPacket, RowDescriptor}; -use rand::Rng; use strum_macros::IntoStaticStr; use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, BufWriter}; use tokio::task::JoinHandle; @@ -755,7 +754,6 @@ impl PageServerHandler { tenant_id: TenantId, timeline_id: TimelineId, timeline_handles: &mut TimelineHandles, - tracing_config: Option<&Tracing>, cancel: &CancellationToken, ctx: &RequestContext, protocol_version: PagestreamProtocolVersion, @@ -916,47 +914,8 @@ impl PageServerHandler { let key = rel_block_to_key(req.rel, req.blkno); - let sampled = match tracing_config { - Some(conf) => { - let ratio = &conf.sampling_ratio; - - if ratio.numerator == 0 { - false - } else { - rand::thread_rng().gen_range(0..ratio.denominator) < ratio.numerator - } - } - None => false, - }; - - let ctx = if sampled { - RequestContextBuilder::from(ctx) - .root_perf_span(|| { - info_span!( - target: PERF_TRACE_TARGET, - "GET_PAGE", - tenant_id = %tenant_id, - shard_id = field::Empty, - timeline_id = %timeline_id, - lsn = %req.hdr.request_lsn, - request_id = %req.hdr.reqid, - key = %key, - ) - }) - .attached_child() - } else { - ctx.attached_child() - }; - let res = timeline_handles .get(tenant_id, timeline_id, ShardSelector::Page(key)) - .maybe_perf_instrument(&ctx, |current_perf_span| { - info_span!( - target: PERF_TRACE_TARGET, - parent: current_perf_span, - "SHARD_SELECTION", - ) - }) .await; let shard = match res { @@ -987,6 +946,25 @@ impl PageServerHandler { } }; + let ctx = if shard.is_get_page_request_sampled() { + RequestContextBuilder::from(ctx) + .root_perf_span(|| { + info_span!( + target: PERF_TRACE_TARGET, + "GET_PAGE", + tenant_id = %tenant_id, + shard_id = %shard.get_shard_identity().shard_slug(), + timeline_id = %timeline_id, + lsn = %req.hdr.request_lsn, + request_id = %req.hdr.reqid, + key = %key, + ) + }) + .attached_child() + } else { + ctx.attached_child() + }; + // This ctx travels as part of the BatchedFeMessage through // batching into the request handler. // The request handler needs to do some per-request work @@ -1001,12 +979,6 @@ impl PageServerHandler { // request handler log messages contain the request-specific fields. let span = mkspan!(shard.tenant_shard_id.shard_slug()); - // Enrich the perf span with shard_id now that shard routing is done. - ctx.perf_span_record( - "shard_id", - tracing::field::display(shard.get_shard_identity().shard_slug()), - ); - let timer = record_op_start_and_throttle( &shard, metrics::SmgrQueryType::GetPageAtLsn, @@ -1602,7 +1574,6 @@ impl PageServerHandler { IO: AsyncRead + AsyncWrite + Send + Sync + Unpin + 'static, { let cancel = self.cancel.clone(); - let tracing_config = self.conf.tracing.clone(); let err = loop { let msg = Self::pagestream_read_message( @@ -1610,7 +1581,6 @@ impl PageServerHandler { tenant_id, timeline_id, &mut timeline_handles, - tracing_config.as_ref(), &cancel, ctx, protocol_version, @@ -1744,8 +1714,6 @@ impl PageServerHandler { // Batcher // - let tracing_config = self.conf.tracing.clone(); - let cancel_batcher = self.cancel.child_token(); let (mut batch_tx, mut batch_rx) = spsc_fold::channel(); let batcher = pipeline_stage!("batcher", cancel_batcher.clone(), move |cancel_batcher| { @@ -1759,7 +1727,6 @@ impl PageServerHandler { tenant_id, timeline_id, &mut timeline_handles, - tracing_config.as_ref(), &cancel_batcher, &ctx, protocol_version, diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 74e97653d2..6ca3704bc1 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -2476,6 +2476,31 @@ impl Timeline { .unwrap_or(self.conf.default_tenant_conf.lazy_slru_download) } + /// Checks if a get page request should get perf tracing + /// + /// The configuration priority is: tenant config override, default tenant config, + /// pageserver config. + pub(crate) fn is_get_page_request_sampled(&self) -> bool { + let tenant_conf = self.tenant_conf.load(); + let ratio = tenant_conf + .tenant_conf + .sampling_ratio + .flatten() + .or(self.conf.default_tenant_conf.sampling_ratio) + .or(self.conf.tracing.as_ref().map(|t| t.sampling_ratio)); + + match ratio { + Some(r) => { + if r.numerator == 0 { + false + } else { + rand::thread_rng().gen_range(0..r.denominator) < r.numerator + } + } + None => false, + } + } + fn get_checkpoint_distance(&self) -> u64 { let tenant_conf = self.tenant_conf.load(); tenant_conf diff --git a/test_runner/regress/test_attach_tenant_config.py b/test_runner/regress/test_attach_tenant_config.py index 22dfcbda92..5021cc4b17 100644 --- a/test_runner/regress/test_attach_tenant_config.py +++ b/test_runner/regress/test_attach_tenant_config.py @@ -190,6 +190,10 @@ def test_fully_custom_config(positive_env: NeonEnv): "gc_compaction_initial_threshold_kb": 1024000, "gc_compaction_ratio_percent": 200, "image_creation_preempt_threshold": 5, + "sampling_ratio": { + "numerator": 0, + "denominator": 10, + }, } vps_http = env.storage_controller.pageserver_api()