From ff9bb05f17fcf94c674ab3931445e9479eabc75e Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Mon, 10 Feb 2025 23:14:23 +0100 Subject: [PATCH] pageserver: add tracing configuration knobs --- Cargo.lock | 1 + libs/pageserver_api/Cargo.toml | 1 + libs/pageserver_api/src/config.rs | 54 +++++++++++++++++++++++++++ pageserver/src/config.rs | 15 ++++++++ test_runner/fixtures/neon_fixtures.py | 34 +++++++++++++++++ 5 files changed, 105 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 4ac93bba67..eb4b31af9a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4339,6 +4339,7 @@ dependencies = [ "strum", "strum_macros", "thiserror 1.0.69", + "tracing-utils", "utils", ] diff --git a/libs/pageserver_api/Cargo.toml b/libs/pageserver_api/Cargo.toml index 87dfdfb5ec..688e9de6e7 100644 --- a/libs/pageserver_api/Cargo.toml +++ b/libs/pageserver_api/Cargo.toml @@ -34,6 +34,7 @@ postgres_backend.workspace = true nix = {workspace = true, optional = true} reqwest.workspace = true rand.workspace = true +tracing-utils.workspace = true [dev-dependencies] bincode.workspace = true diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs index f387ff0579..7adb8e6422 100644 --- a/libs/pageserver_api/src/config.rs +++ b/libs/pageserver_api/src/config.rs @@ -127,6 +127,7 @@ pub struct ConfigToml { pub load_previous_heatmap: Option, #[serde(skip_serializing_if = "Option::is_none")] pub generate_unarchival_heatmap: Option, + pub tracing: Option, } #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] @@ -184,6 +185,58 @@ pub enum GetVectoredConcurrentIo { SidecarTask, } +#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct Ratio { + pub numerator: usize, + pub denominator: usize, +} + +#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct OtelExporterConfig { + pub endpoint: String, + pub protocol: OtelExporterProtocol, + #[serde(with = "humantime_serde")] + pub timeout: Duration, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum OtelExporterProtocol { + Grpc, + HttpBinary, + HttpJson, +} + +#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "kebab-case")] +pub struct Tracing { + pub sampling_ratio: Ratio, + pub export_config: OtelExporterConfig, +} + +impl From<&OtelExporterConfig> for tracing_utils::ExportConfig { + fn from(val: &OtelExporterConfig) -> Self { + tracing_utils::ExportConfig { + endpoint: Some(val.endpoint.clone()), + protocol: val.protocol.into(), + timeout: val.timeout, + } + } +} + +impl From for tracing_utils::Protocol { + fn from(val: OtelExporterProtocol) -> Self { + match val { + OtelExporterProtocol::Grpc => tracing_utils::Protocol::Grpc, + OtelExporterProtocol::HttpJson => tracing_utils::Protocol::HttpJson, + OtelExporterProtocol::HttpBinary => tracing_utils::Protocol::HttpBinary, + } + } +} + pub mod statvfs { pub mod mock { #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] @@ -529,6 +582,7 @@ impl Default for ConfigToml { validate_wal_contiguity: None, load_previous_heatmap: None, generate_unarchival_heatmap: None, + tracing: None, } } } diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 06be873160..cd6df20473 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -201,6 +201,8 @@ pub struct PageServerConf { /// When set, include visible layers in the next uploaded heatmaps of an unarchived timeline. pub generate_unarchival_heatmap: bool, + + pub tracing: Option, } /// Token for authentication to safekeepers @@ -367,6 +369,7 @@ impl PageServerConf { validate_wal_contiguity, load_previous_heatmap, generate_unarchival_heatmap, + tracing, } = config_toml; let mut conf = PageServerConf { @@ -412,6 +415,7 @@ impl PageServerConf { wal_receiver_protocol, page_service_pipelining, get_vectored_concurrent_io, + tracing, // ------------------------------------------------------------ // fields that require additional validation or custom handling @@ -476,6 +480,17 @@ impl PageServerConf { ); } + if let Some(tracing_config) = conf.tracing.as_ref() { + let ratio = &tracing_config.sampling_ratio; + ensure!( + ratio.denominator != 0 && ratio.denominator >= ratio.numerator, + format!( + "Invalid sampling ratio: {}/{}", + ratio.numerator, ratio.denominator + ) + ); + } + IndexEntry::validate_checkpoint_distance(conf.default_tenant_conf.checkpoint_distance) .map_err(anyhow::Error::msg) .with_context(|| { diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 4d2b3587e8..d8fbc22b40 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -376,6 +376,28 @@ class PageserverWalReceiverProtocol(StrEnum): raise ValueError(f"Unknown protocol type: {proto}") +@dataclass +class PageserverTracingConfig: + sampling_ratio: tuple[int, int] + endpoint: str + protocol: str + timeout: str + + def to_config_key_value(self) -> tuple[str, dict[str, Any]]: + value = { + "sampling-ratio": { + "numerator": self.sampling_ratio[0], + "denominator": self.sampling_ratio[1], + }, + "export-config": { + "endpoint": self.endpoint, + "protocol": self.protocol, + "timeout": self.timeout, + }, + } + return ("tracing", value) + + class NeonEnvBuilder: """ Builder object to create a Neon runtime environment @@ -425,6 +447,7 @@ class NeonEnvBuilder: pageserver_virtual_file_io_mode: str | None = None, pageserver_wal_receiver_protocol: PageserverWalReceiverProtocol | None = None, pageserver_get_vectored_concurrent_io: str | None = None, + pageserver_tracing_config: PageserverTracingConfig | None = None, ): self.repo_dir = repo_dir self.rust_log_override = rust_log_override @@ -468,6 +491,8 @@ class NeonEnvBuilder: pageserver_get_vectored_concurrent_io ) + self.pageserver_tracing_config = pageserver_tracing_config + self.pageserver_default_tenant_config_compaction_algorithm: dict[str, Any] | None = ( pageserver_default_tenant_config_compaction_algorithm ) @@ -1113,6 +1138,7 @@ class NeonEnv: self.pageserver_virtual_file_io_mode = config.pageserver_virtual_file_io_mode self.pageserver_wal_receiver_protocol = config.pageserver_wal_receiver_protocol self.pageserver_get_vectored_concurrent_io = config.pageserver_get_vectored_concurrent_io + self.pageserver_tracing_config = config.pageserver_tracing_config # Create the neon_local's `NeonLocalInitConf` cfg: dict[str, Any] = { @@ -1216,6 +1242,14 @@ class NeonEnv: if key not in ps_cfg: ps_cfg[key] = value + if self.pageserver_tracing_config is not None: + key, value = self.pageserver_tracing_config.to_config_key_value() + + if key not in ps_cfg: + ps_cfg[key] = value + + ps_cfg[key] = value + # Create a corresponding NeonPageserver object self.pageservers.append( NeonPageserver(self, ps_id, port=pageserver_port, az_id=ps_cfg["availability_zone"])