VPC flow logs IaC

aws lambda function for periodic collection of pod infomation
Fix Postgres build on macOS (#11442 )
2026-02-05 03:30:36 +00:00 · 2025-04-06 13:14:40 +02:00 · 2025-04-06 13:14:40 +02:00 · 2025-04-04 14:09:15 +00:00 · 2025-04-04 13:41:28 +00:00 · 2025-04-04 10:52:59 +00:00
26 changed files with 2452 additions and 284 deletions
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -980,7 +980,7 @@ jobs:
          TEST_EXTENSIONS_TAG: >-
            ${{
              contains(fromJSON('["storage-rc-pr", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
-              && 'latest'
+              && needs.meta.outputs.previous-compute-release
              || needs.meta.outputs.build-tag
            }}
          TEST_VERSION_ONLY: ${{ matrix.pg_version }}
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -40,6 +40,8 @@ members = [
    "libs/proxy/postgres-protocol2",
    "libs/proxy/postgres-types2",
    "libs/proxy/tokio-postgres2",
+    "lambda/aztraffic",
+    "lambda/pod_info_dumper",
 ]

 [workspace.package]
@@ -342,3 +344,12 @@ inherits = "release"
 debug = false # true = 2 = all symbols, 1 = line only
 opt-level = "z"
 lto = true
+
+[profile.release-lambda-function]
+inherits = "release"
+lto = true
+opt-level = "z"
+codegen-units = 1
+panic = "abort"
+debug = false
+strip = true
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -545,6 +545,11 @@ impl PageServerNode {
                .map(|x| x.parse::<u64>())
                .transpose()
                .context("Failed to parse 'gc_compaction_ratio_percent' as integer")?,
+            sampling_ratio: settings
+                .remove("sampling_ratio")
+                .map(serde_json::from_str)
+                .transpose()
+                .context("Falied to parse 'sampling_ratio'")?,
        };
        if !settings.is_empty() {
            bail!("Unrecognized tenant settings: {settings:?}")
--- a/lambda/aztraffic/Cargo.toml
+++ b/lambda/aztraffic/Cargo.toml
@@ -0,0 +1,22 @@
+[package]
+name = "aztraffic"
+version = "0.0.0"
+edition.workspace = true
+license.workspace = true
+publish = false
+
+[dependencies]
+anyhow = "1.0.97"
+aws-config = "1.6.1"
+aws-sdk-athena = "1.68.0"
+aws-sdk-ec2 = "1.121.0"
+aws-sdk-eks = "1.82.0"
+aws-sdk-glue = "1.88.0"
+aws-sdk-lambda = "1.75.0"
+aws-sdk-scheduler = "1.64.0"
+aws-sdk-sfn = "1.68.0"
+aws-sdk-sts = "1.65.0"
+clap = { version = "4.5.35", features = ["derive", "env"] }
+tokio = { version = "1.44.1", features = ["full"] }
+serde = "1.0.219"
+serde_json = { version = "1.0.140", features = ["preserve_order"] }
--- a/lambda/aztraffic/src/main.rs
+++ b/lambda/aztraffic/src/main.rs
@@ -0,0 +1,794 @@
+use std::fs;
+
+use aws_config::default_provider::credentials::DefaultCredentialsChain;
+use aws_sdk_ec2::types::{
+    DestinationFileFormat, DestinationOptionsRequest, FlowLogsResourceType, LogDestinationType,
+    TrafficType,
+};
+use aws_sdk_glue::primitives::Blob;
+use aws_sdk_glue::types::{Column, DatabaseInput, SerDeInfo, StorageDescriptor, TableInput};
+use aws_sdk_lambda::types::{Environment, FunctionCode, Runtime};
+use aws_sdk_scheduler::types::{
+    DeadLetterConfig, FlexibleTimeWindow, FlexibleTimeWindowMode, RetryPolicy, Target,
+};
+use aws_sdk_sfn::types::{CloudWatchLogsLogGroup, LogDestination, LogLevel, LoggingConfiguration};
+use clap::Parser;
+use serde_json::json;
+
+#[derive(Parser, Clone, Debug)]
+struct Args {
+    #[arg(long, value_name = "id")]
+    account_id: String,
+    #[arg(long, value_name = "region")]
+    region: String,
+    #[arg(long, value_name = "cluster")]
+    cluster: String,
+    #[arg(long, value_name = "id")]
+    vpc_id: Vec<String>,
+
+    #[arg(long, value_name = "arn")]
+    log_group_arn: String,
+    #[arg(long, value_name = "name")]
+    pod_info_s3_bucket_name: String,
+    #[arg(
+        long,
+        value_name = "path",
+        default_value = "CrossAZTraffic/pod_info_dumper/pod_info.csv"
+    )]
+    pod_info_s3_bucket_key: String,
+    #[arg(long, value_name = "uri")]
+    pod_info_s3_bucket_uri: String,
+    #[arg(long, value_name = "uri")]
+    vpc_flow_logs_s3_bucket_uri: String,
+    #[arg(long, value_name = "uri")]
+    results_s3_bucket_uri: String,
+
+    #[arg(
+        long,
+        value_name = "name",
+        default_value = "./target/lambda/pod_info_dumper/bootstrap.zip"
+    )]
+    lambda_zipfile_path: String,
+    #[arg(
+        long,
+        value_name = "name",
+        default_value = "CrossAZTraffic-podinfo-function"
+    )]
+    lambda_function_name: String,
+    #[arg(long, value_name = "arn")]
+    lambda_role_arn: String,
+
+    #[arg(long, value_name = "name")]
+    glue_database_name: String,
+    #[arg(
+        long,
+        value_name = "name",
+        default_value = "CrossAZTraffic-podinfo-table"
+    )]
+    glue_pod_info_table_name: String,
+    #[arg(
+        long,
+        value_name = "name",
+        default_value = "CrossAZTraffic-vpcflowlogs-table"
+    )]
+    glue_vpc_flow_logs_table_name: String,
+    #[arg(
+        long,
+        value_name = "name",
+        default_value = "CrossAZTraffic-results-table"
+    )]
+    glue_results_table_name: String,
+
+    #[arg(
+        long,
+        value_name = "name",
+        default_value = "CrossAZTraffic-trigger-schedule"
+    )]
+    schedule_name: String,
+    #[arg(long, value_name = "minutes", default_value_t = 60)]
+    schedule_interval_minutes: usize,
+    #[arg(long, value_name = "arn")]
+    schedule_target_state_machine_arn: String,
+    #[arg(long, value_name = "arn")]
+    schedule_target_role_arn: String,
+    #[arg(long, value_name = "arn")]
+    schedule_dead_letter_queue_arn: Option<String>,
+
+    #[arg(
+        long,
+        value_name = "name",
+        default_value = "CrossAZTraffic-combine-query"
+    )]
+    athena_query_name: String,
+
+    #[arg(long, value_name = "uri")]
+    vpcflowlogs_destination_s3_bucket_uri: String,
+
+    #[arg(
+        long,
+        value_name = "name",
+        default_value = "CrossAZTraffic-statemachine"
+    )]
+    statemachine_name: String,
+    #[arg(long, value_name = "arn")]
+    statemachine_role_arn: String,
+
+    #[arg(long, value_name = "uri")]
+    athena_results_s3_bucket_uri: String,
+}
+
+#[tokio::main]
+async fn main() -> anyhow::Result<()> {
+    let args = Args::parse();
+    eprintln!("{args:#?}");
+
+    // TODO: athena results bucket + lifecycle config
+    // TODO: iam role split
+    // TODO: iam policy
+    // TODO: clusterrole + binding
+    // TODO: eks mapping
+    // TODO: log group
+    // TODO: dlq
+
+    let sdk_config = create_sdk_config(&args).await?;
+
+    LambdaFunction {
+        local_zipfile_path: args.lambda_zipfile_path,
+        function_name: args.lambda_function_name.clone(),
+        role_arn: args.lambda_role_arn,
+        account_id: args.account_id,
+        region: args.region,
+        cluster: args.cluster,
+        s3_bucket_name: args.pod_info_s3_bucket_name,
+        s3_bucket_key: args.pod_info_s3_bucket_key,
+    }
+    .create(&sdk_config)
+    .await?;
+
+    GlueDatabase {
+        database_name: args.glue_database_name.clone(),
+        pod_info_table_name: args.glue_pod_info_table_name.clone(),
+        pod_info_s3_bucket_uri: args.pod_info_s3_bucket_uri,
+        vpc_flow_logs_table_name: args.glue_vpc_flow_logs_table_name.clone(),
+        vpc_flow_logs_s3_bucket_uri: args.vpc_flow_logs_s3_bucket_uri,
+        results_table_name: args.glue_results_table_name.clone(),
+        results_s3_bucket_uri: args.results_s3_bucket_uri,
+    }
+    .create(&sdk_config)
+    .await?;
+
+    let named_query_id = AthenaQuery {
+        query_name: args.athena_query_name,
+        glue_database: args.glue_database_name.clone(),
+        invocation_frequency: args.schedule_interval_minutes,
+        athena_results_table_name: args.glue_results_table_name,
+        vpc_flow_logs_table_name: args.glue_vpc_flow_logs_table_name,
+        pod_info_table_name: args.glue_pod_info_table_name,
+    }
+    .create(&sdk_config)
+    .await?;
+
+    StateMachine {
+        name: args.statemachine_name,
+        role_arn: args.statemachine_role_arn,
+        named_query_id,
+        glue_database: args.glue_database_name,
+        lambda_function_name: args.lambda_function_name,
+        athena_results_s3_bucket_uri: args.athena_results_s3_bucket_uri,
+        log_group_arn: args.log_group_arn,
+    }
+    .create(&sdk_config)
+    .await?;
+
+    Schedule {
+        name: args.schedule_name,
+        interval_minutes: args.schedule_interval_minutes,
+        dead_letter_queue_arn: args.schedule_dead_letter_queue_arn,
+        target_role_arn: args.schedule_target_role_arn,
+        target_state_machine_arn: args.schedule_target_state_machine_arn,
+    }
+    .create(&sdk_config)
+    .await?;
+
+    let flow_log_ids = VpcFlowLogs {
+        vpc_ids: args.vpc_id,
+        destination_s3_bucket_uri: args.vpcflowlogs_destination_s3_bucket_uri,
+    }
+    .create(&sdk_config)
+    .await?;
+
+    println!("VPC flow log IDs: {:?}", flow_log_ids.as_slice());
+
+    Ok(())
+}
+
+async fn create_sdk_config(args: &Args) -> anyhow::Result<aws_config::SdkConfig> {
+    let region = aws_config::Region::new(args.region.to_owned());
+    let credentials_provider = DefaultCredentialsChain::builder()
+        .region(region.clone())
+        .build()
+        .await;
+    Ok(aws_config::defaults(aws_config::BehaviorVersion::latest())
+        .region(region)
+        .credentials_provider(credentials_provider)
+        .load()
+        .await)
+}
+
+struct LambdaFunction {
+    local_zipfile_path: String,
+    function_name: String,
+    role_arn: String,
+    account_id: String,
+    region: String,
+    cluster: String,
+    s3_bucket_name: String,
+    s3_bucket_key: String,
+}
+
+impl LambdaFunction {
+    async fn create(&self, sdk_config: &aws_config::SdkConfig) -> anyhow::Result<()> {
+        let code = fs::read(&self.local_zipfile_path)?;
+
+        let client = aws_sdk_lambda::Client::new(sdk_config);
+        client
+            .delete_function()
+            .function_name(&self.function_name)
+            .send()
+            .await
+            .ok();
+
+        client
+            .create_function()
+            .function_name(&self.function_name)
+            .runtime(Runtime::Providedal2023)
+            .handler("bootstrap")
+            .role(&self.role_arn)
+            .code(FunctionCode::builder().zip_file(Blob::new(code)).build())
+            .timeout(60)
+            .environment(
+                Environment::builder()
+                    .set_variables(Some(
+                        [
+                            ("NEON_ACCOUNT_ID", self.account_id.as_str()),
+                            ("NEON_REGION", self.region.as_str()),
+                            ("NEON_CLUSTER", self.cluster.as_str()),
+                            ("NEON_S3_BUCKET_NAME", self.s3_bucket_name.as_str()),
+                            ("NEON_S3_BUCKET_KEY", self.s3_bucket_key.as_str()),
+                            ("AWS_LAMBDA_LOG_FORMAT", "JSON"),
+                            ("AWS_LAMBDA_LOG_LEVEL", "INFO"),
+                        ]
+                        .into_iter()
+                        .map(|(k, v)| (k.into(), v.into()))
+                        .collect(),
+                    ))
+                    .build(),
+            )
+            .send()
+            .await?;
+
+        Ok(())
+    }
+}
+
+struct VpcFlowLogs {
+    vpc_ids: Vec<String>,
+    destination_s3_bucket_uri: String,
+}
+
+impl VpcFlowLogs {
+    async fn create(&self, sdk_config: &aws_config::SdkConfig) -> anyhow::Result<Vec<String>> {
+        let ec2_client = aws_sdk_ec2::Client::new(sdk_config);
+
+        let flow_logs = ec2_client
+        .create_flow_logs()
+        .resource_type(FlowLogsResourceType::Vpc)
+        .set_resource_ids(Some(self.vpc_ids.clone()))
+        .traffic_type(TrafficType::All)
+        .log_destination_type(LogDestinationType::S3)
+        .log_destination(&self.destination_s3_bucket_uri)
+        .destination_options(
+            DestinationOptionsRequest::builder()
+                .file_format(DestinationFileFormat::Parquet)
+                .hive_compatible_partitions(false)
+                .per_hour_partition(true)
+                .build(),
+        )
+        .log_format("${region} ${az-id} ${vpc-id} ${flow-direction} ${pkt-srcaddr} ${pkt-dstaddr} ${srcport} ${dstport} ${start} ${bytes}")
+        .send()
+        .await?;
+
+        if let Some(unsuccessful) = flow_logs
+            .unsuccessful
+            .as_ref()
+            .and_then(|v| if v.is_empty() { None } else { Some(v) })
+        {
+            anyhow::bail!("VPC flow log creation unsuccessful: {unsuccessful:?}");
+        }
+
+        Ok(flow_logs.flow_log_ids().iter().cloned().collect())
+    }
+}
+
+struct GlueDatabase {
+    database_name: String,
+    pod_info_table_name: String,
+    pod_info_s3_bucket_uri: String,
+    vpc_flow_logs_table_name: String,
+    vpc_flow_logs_s3_bucket_uri: String,
+    results_table_name: String,
+    results_s3_bucket_uri: String,
+}
+
+impl GlueDatabase {
+    async fn create(&self, sdk_config: &aws_config::SdkConfig) -> anyhow::Result<()> {
+        let glue_client = aws_sdk_glue::Client::new(sdk_config);
+
+        let db = DatabaseInput::builder().name(&self.database_name).build()?;
+
+        glue_client
+            .create_database()
+            .database_input(db.clone())
+            .send()
+            .await?;
+
+        let pod_info_columns = &[
+            Column::builder()
+                .name("namespace")
+                .r#type("string")
+                .build()?,
+            Column::builder().name("name").r#type("string").build()?,
+            Column::builder().name("ip").r#type("string").build()?,
+            Column::builder()
+                .name("creation_time")
+                .r#type("timestamp")
+                .build()?,
+            Column::builder().name("node").r#type("string").build()?,
+            Column::builder().name("az").r#type("string").build()?,
+        ];
+        glue_client
+            .create_table()
+            .database_name(db.name())
+            .table_input(
+                TableInput::builder()
+                    .name(&self.pod_info_table_name)
+                    .storage_descriptor(
+                        StorageDescriptor::builder()
+                            .location(&self.pod_info_s3_bucket_uri)
+                            .compressed(false)
+                            .set_columns(Some(pod_info_columns.into_iter().cloned().collect()))
+                            .input_format("org.apache.hadoop.mapred.TextInputFormat")
+                            .output_format(
+                                "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
+                            )
+                            .serde_info(
+                                SerDeInfo::builder()
+                                    .serialization_library(
+                                        "org.apache.hadoop.hive.serde2.OpenCSVSerde",
+                                    )
+                                    .parameters("separatorChar", ",")
+                                    .parameters("quoteChar", "`")
+                                    .parameters("escapeChar", r"\")
+                                    .build(),
+                            )
+                            .build(),
+                    )
+                    .table_type("EXTERNAL_TABLE")
+                    .parameters("classification", "csv")
+                    .parameters("skip.header.line.count", "1")
+                    .retention(0)
+                    .build()?,
+            )
+            .send()
+            .await?;
+
+        let vpc_flow_logs_columns = &[
+            Column::builder().name("region").r#type("string").build()?,
+            Column::builder().name("az_id").r#type("string").build()?,
+            Column::builder().name("vpc_id").r#type("string").build()?,
+            Column::builder()
+                .name("flow_direction")
+                .r#type("string")
+                .build()?,
+            Column::builder()
+                .name("pkt_srcaddr")
+                .r#type("string")
+                .build()?,
+            Column::builder()
+                .name("pkt_dstaddr")
+                .r#type("string")
+                .build()?,
+            Column::builder().name("srcport").r#type("int").build()?,
+            Column::builder().name("dstport").r#type("int").build()?,
+            Column::builder().name("start").r#type("bigint").build()?,
+            Column::builder().name("bytes").r#type("bigint").build()?,
+        ];
+        glue_client
+        .create_table()
+        .database_name(db.name())
+        .table_input(
+            TableInput::builder()
+                .name(&self.vpc_flow_logs_table_name)
+                .storage_descriptor(
+                    StorageDescriptor::builder()
+                        .location(&self.vpc_flow_logs_s3_bucket_uri)
+                        .compressed(false)
+                        .set_columns(Some(vpc_flow_logs_columns.into_iter().cloned().collect()))
+                        .input_format(
+                            "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
+                        )
+                        .output_format(
+                            "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat",
+                        )
+                        .serde_info(
+                            SerDeInfo::builder()
+                                .serialization_library(
+                                    "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe",
+                                )
+                                .parameters("serialization.format", "1")
+                                .build(),
+                        )
+                        .build(),
+                )
+                .table_type("EXTERNAL_TABLE")
+                .parameters("classification", "parquet")
+                .retention(0)
+                .build()?,
+        )
+        .send()
+        .await?;
+
+        let athena_results_columns = &[
+            Column::builder().name("time").r#type("timestamp").build()?,
+            Column::builder().name("traffic").r#type("string").build()?,
+            Column::builder()
+                .name("total_bytes")
+                .r#type("bigint")
+                .build()?,
+        ];
+        glue_client
+        .create_table()
+        .database_name(db.name())
+        .table_input(
+            TableInput::builder()
+                .name(&self.results_table_name)
+                .storage_descriptor(
+                    StorageDescriptor::builder()
+                        .location(&self.results_s3_bucket_uri)
+                        .compressed(false)
+                        .set_columns(Some(athena_results_columns.into_iter().cloned().collect()))
+                        .input_format(
+                            "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
+                        )
+                        .output_format(
+                            "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat",
+                        )
+                        .serde_info(
+                            SerDeInfo::builder()
+                                .serialization_library(
+                                    "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe",
+                                )
+                                .parameters("serialization.format", "1")
+                                .build(),
+                        )
+                        .build(),
+                )
+                .table_type("EXTERNAL_TABLE")
+                .parameters("classification", "parquet")
+                .retention(0)
+                .build()?,
+        )
+        .send()
+        .await?;
+
+        Ok(())
+    }
+}
+
+struct AthenaQuery {
+    query_name: String,
+    glue_database: String,
+    invocation_frequency: usize,
+    athena_results_table_name: String,
+    vpc_flow_logs_table_name: String,
+    pod_info_table_name: String,
+}
+
+impl AthenaQuery {
+    async fn create(&self, sdk_config: &aws_config::SdkConfig) -> anyhow::Result<String> {
+        let Self {
+            athena_results_table_name,
+            vpc_flow_logs_table_name,
+            pod_info_table_name,
+            invocation_frequency,
+            ..
+        } = self;
+
+        let query_string = format!(
+            r#"
+INSERT INTO "{athena_results_table_name}"
+WITH
+  ip_addresses_and_az_mapping AS (
+    SELECT
+      DISTINCT pkt_srcaddr AS ipaddress,
+      az_id
+    FROM "{vpc_flow_logs_table_name}"
+    WHERE flow_direction = 'egress'
+    AND from_unixtime("{vpc_flow_logs_table_name}".start) > (CURRENT_TIMESTAMP - ({invocation_frequency} * interval '1' minute))
+  ),
+  egress_flows_of_pods_with_status AS (
+    SELECT
+      "{pod_info_table_name}".name AS srcpodname,
+      pkt_srcaddr AS srcaddr,
+      pkt_dstaddr AS dstaddr,
+      "{vpc_flow_logs_table_name}".az_id AS srcazid,
+      bytes,
+      start
+    FROM "{vpc_flow_logs_table_name}"
+    INNER JOIN "{pod_info_table_name}" ON "{vpc_flow_logs_table_name}".pkt_srcaddr = "{pod_info_table_name}".ip
+    WHERE flow_direction = 'egress'
+    AND from_unixtime("{vpc_flow_logs_table_name}".start) > (CURRENT_TIMESTAMP - ({invocation_frequency} * interval '1' minute))
+  ),
+  cross_az_traffic_by_pod AS (
+    SELECT
+      srcaddr,
+      srcpodname,
+      dstaddr,
+      "{pod_info_table_name}".name AS dstpodname,
+      srcazid,
+      ip_addresses_and_az_mapping.az_id AS dstazid,
+      bytes,
+      start
+    FROM egress_flows_of_pods_with_status
+    INNER JOIN "{pod_info_table_name}" ON dstaddr = "{pod_info_table_name}".ip
+    LEFT JOIN ip_addresses_and_az_mapping ON dstaddr = ipaddress
+    WHERE ip_addresses_and_az_mapping.az_id != srcazid
+  )
+SELECT
+  date_trunc('MINUTE', from_unixtime(start)) AS time,
+  CONCAT(srcpodname, ' -> ', dstpodname) AS traffic,
+  SUM(bytes) AS total_bytes
+FROM cross_az_traffic_by_pod
+GROUP BY date_trunc('MINUTE', from_unixtime(start)), CONCAT(srcpodname, ' -> ', dstpodname)
+ORDER BY time, total_bytes DESC
+"#
+        );
+
+        let athena_client = aws_sdk_athena::Client::new(sdk_config);
+        let res = athena_client
+            .create_named_query()
+            .name(&self.query_name)
+            .database(&self.glue_database)
+            .query_string(query_string)
+            .send()
+            .await?;
+
+        Ok(res.named_query_id.unwrap())
+    }
+}
+
+struct StateMachine {
+    name: String,
+    role_arn: String,
+    named_query_id: String,
+    glue_database: String,
+    lambda_function_name: String,
+    athena_results_s3_bucket_uri: String,
+    log_group_arn: String,
+}
+
+impl StateMachine {
+    async fn create(&self, sdk_config: &aws_config::SdkConfig) -> anyhow::Result<()> {
+        let sfn_client = aws_sdk_sfn::Client::new(sdk_config);
+        sfn_client
+            .create_state_machine()
+            .name(&self.name)
+            .role_arn(&self.role_arn)
+            .logging_configuration(
+                LoggingConfiguration::builder()
+                    .level(LogLevel::All)
+                    .destinations(
+                        LogDestination::builder()
+                            .cloud_watch_logs_log_group(
+                                CloudWatchLogsLogGroup::builder()
+                                    .log_group_arn(&self.log_group_arn)
+                                    .build(),
+                            )
+                            .build(),
+                    )
+                    .build(),
+            )
+            .definition(
+                json!(
+                  {
+                  "StartAt": "Invoke",
+                  "States": {
+                    "Invoke": {
+                      "Type": "Task",
+                      "Resource": "arn:aws:states:::lambda:invoke",
+                      "Output": "{% $states.result.Payload %}",
+                      "Arguments": {
+                        "FunctionName": self.lambda_function_name,
+                        "Payload": json!({
+                            "detail-type": "Scheduled Event",
+                            "source": "aws.events",
+                            "detail": {}
+                        }).to_string()
+                      },
+                      "Retry": [
+                        {
+                          "ErrorEquals": [
+                            "Lambda.ServiceException",
+                            "Lambda.AWSLambdaException",
+                            "Lambda.SdkClientException",
+                            "Lambda.TooManyRequestsException"
+                          ],
+                          "IntervalSeconds": 1,
+                          "MaxAttempts": 3,
+                          "BackoffRate": 2,
+                          "JitterStrategy": "FULL"
+                        }
+                      ],
+                      "Next": "Check"
+                    },
+                    "Check": {
+                      "Type": "Choice",
+                      "Choices": [
+                        {
+                          "Next": "GetNamedQuery",
+                          "Condition": "{% $states.input.statusCode = 200 %}"
+                        }
+                      ],
+                      "Default": "Fail"
+                    },
+                    "GetNamedQuery": {
+                      "Type": "Task",
+                      "Arguments": {
+                        "NamedQueryId": self.named_query_id
+                      },
+                      "Resource": "arn:aws:states:::aws-sdk:athena:getNamedQuery",
+                      "Output": {
+                        "QueryString": "{% $states.result.NamedQuery.QueryString %}"
+                      },
+                      "Next": "StartQueryExecution"
+                    },
+                    "StartQueryExecution": {
+                      "Type": "Task",
+                      "Resource": "arn:aws:states:::athena:startQueryExecution.sync",
+                      "Arguments": {
+                        "QueryString": "{% $states.input.QueryString %}",
+                        "QueryExecutionContext": {
+                          "Database": self.glue_database
+                        },
+                        "ResultConfiguration": {
+                          "OutputLocation": self.athena_results_s3_bucket_uri
+                        },
+                        "WorkGroup": "primary"
+                      },
+                      "End": true
+                    },
+                    "Fail": {
+                      "Type": "Fail"
+                    }
+                  },
+                  "QueryLanguage": "JSONata"
+                }
+                )
+                .to_string(),
+            )
+            .send()
+            .await?;
+
+        Ok(())
+    }
+}
+
+struct Schedule {
+    name: String,
+    interval_minutes: usize,
+    target_state_machine_arn: String,
+    target_role_arn: String,
+    dead_letter_queue_arn: Option<String>,
+}
+
+impl Schedule {
+    async fn create(&self, sdk_config: &aws_config::SdkConfig) -> anyhow::Result<()> {
+        let sched_client = aws_sdk_scheduler::Client::new(sdk_config);
+
+        sched_client
+            .create_schedule()
+            .name(&self.name)
+            .schedule_expression(format!("rate({} minute)", self.interval_minutes))
+            .flexible_time_window(
+                FlexibleTimeWindow::builder()
+                    .mode(FlexibleTimeWindowMode::Off)
+                    .build()?,
+            )
+            .target(
+                Target::builder()
+                    .arn(&self.target_state_machine_arn)
+                    .role_arn(&self.target_role_arn)
+                    .input(
+                        json!({
+                            "detail-type": "Scheduled Event",
+                            "source": "aws.events",
+                            "detail": {}
+                        })
+                        .to_string(),
+                    )
+                    .retry_policy(
+                        RetryPolicy::builder()
+                            .maximum_retry_attempts(0)
+                            .maximum_event_age_in_seconds(60)
+                            .build(),
+                    )
+                    .set_dead_letter_config(
+                        self.dead_letter_queue_arn
+                            .as_ref()
+                            .map(|arn| DeadLetterConfig::builder().arn(arn).build()),
+                    )
+                    .build()?,
+            )
+            .send()
+            .await?;
+
+        Ok(())
+    }
+}
+
+struct KubernetesRoles {
+    region: String,
+    cluster: String,
+    k8s_role_prefix: String,
+    lambda_role_arn: String,
+}
+
+impl KubernetesRoles {
+    fn print(&self) -> anyhow::Result<()> {
+        let Self {
+            region,
+            cluster,
+            k8s_role_prefix,
+            lambda_role_arn,
+        } = self;
+
+        let yaml = format!(
+            r#"
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: {k8s_role_prefix}-clusterrole
+rules:
+- apiGroups:
+  - ""
+  resources: ["nodes", "namespaces", "pods"]
+  verbs: ["get", "list"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: {k8s_role_prefix}-binding
+subjects:
+- kind: Group
+  name: {k8s_role_prefix}-group
+  apiGroup: rbac.authorization.k8s.io
+roleRef:
+  kind: ClusterRole
+  name: {k8s_role_prefix}-clusterrole
+  apiGroup: rbac.authorization.k8s.io
+"#
+        );
+
+        let eksctl = format!(
+            r#"eksctl create iamidentitymapping \
+  --region "{region}"
+  --cluster "{cluster}" \
+  --arn "{lambda_role_arn}" \
+  --username "{k8s_role_prefix}-binding" \
+  --group "{k8s_role_prefix}-group"
+"#
+        );
+
+        Ok(())
+    }
+}
--- a/lambda/pod_info_dumper/Cargo.toml
+++ b/lambda/pod_info_dumper/Cargo.toml
@@ -0,0 +1,27 @@
+[package]
+name = "pod_info_dumper"
+version = "0.0.0"
+edition = "2024"
+publish = false
+
+[dependencies]
+aws_lambda_events = { version = "0.16.0", default-features = false, features = ["eventbridge"] }
+aws-config = { workspace = true }
+aws-sdk-eks = "1.75.0"
+aws-sdk-s3 = { workspace = true }
+aws-sdk-sts = "1.65.0"
+aws-sigv4 = "1.3.0"
+base64 = { version = "0.22.1" }
+csv = { version = "1.3.1", default-features = false }
+http = { workspace = true }
+k8s-openapi = { version = "0.24.0", default-features = false, features = ["v1_31"] }
+kube = { version = "0.99.0", default-features = false, features = ["client", "rustls-tls"] }
+lambda_runtime = { version = "0.13.0", default-features = false, features = ["tracing"] }
+rustls = { version = "0.23.25" }
+rustls-pemfile = { workspace = true }
+secrecy = "0.10.3"
+serde = { workspace = true }
+serde_json = { workspace = true }
+sha2 = { workspace = true, features = ["asm"] }
+tokio = { workspace = true, features = ["macros"] }
+tracing = { workspace = true, features = ["max_level_debug", "release_max_level_info"] }
--- a/lambda/pod_info_dumper/README.md
+++ b/lambda/pod_info_dumper/README.md
@@ -0,0 +1,8 @@
+# pod_info_dumper
+
+An event-triggered AWS lambda function that writes the list of all pods with
+node information to a CSV file in S3.
+
+```shell
+cargo lambda build -p pod_info_dumper --output-format Zip --x86-64 --profile release-lambda-function
+```
--- a/lambda/pod_info_dumper/src/lib.rs
+++ b/lambda/pod_info_dumper/src/lib.rs
@@ -0,0 +1,420 @@
+use std::borrow::Cow;
+use std::collections::HashMap;
+use std::time::{Duration, SystemTime};
+use std::{env, io};
+
+use aws_config::default_provider::credentials::DefaultCredentialsChain;
+use aws_config::retry::RetryConfig;
+use aws_lambda_events::event::eventbridge::EventBridgeEvent;
+use aws_sdk_s3::primitives::{ByteStream, SdkBody};
+use aws_sdk_s3::types::ChecksumAlgorithm;
+use aws_sdk_sts::config::ProvideCredentials;
+use aws_sigv4::http_request::{
+    SignableBody, SignableRequest, SignatureLocation, SigningSettings, sign,
+};
+use aws_sigv4::sign::v4;
+use base64::Engine as _;
+use base64::engine::general_purpose::STANDARD;
+use base64::prelude::*;
+use k8s_openapi::api::core::v1::{Node, Pod};
+use k8s_openapi::chrono::SecondsFormat;
+use kube::api::{Api, ListParams, ResourceExt};
+use lambda_runtime::{Error, LambdaEvent, run, service_fn, tracing};
+use secrecy::SecretString;
+use serde::ser::SerializeMap;
+use sha2::{Digest as _, Sha256};
+
+const AZ_LABEL: &str = "topology.kubernetes.io/zone";
+
+#[derive(Debug)]
+struct Config {
+    aws_account_id: String,
+    s3_bucket: S3BucketConfig,
+    eks_cluster: EksClusterConfig,
+}
+
+#[derive(Debug)]
+struct S3BucketConfig {
+    region: String,
+    name: String,
+    key: String,
+}
+
+impl S3BucketConfig {
+    #[tracing::instrument(skip_all, err)]
+    async fn create_sdk_config(&self) -> Result<aws_config::SdkConfig, Error> {
+        let region = aws_config::Region::new(self.region.clone());
+
+        let credentials_provider = DefaultCredentialsChain::builder()
+            .region(region.clone())
+            .build()
+            .await;
+
+        Ok(aws_config::defaults(aws_config::BehaviorVersion::latest())
+            .region(region)
+            .credentials_provider(credentials_provider)
+            .load()
+            .await)
+    }
+}
+
+#[derive(Debug)]
+struct EksClusterConfig {
+    region: String,
+    name: String,
+}
+
+impl EksClusterConfig {
+    #[tracing::instrument(skip_all, err)]
+    async fn create_sdk_config(&self) -> Result<aws_config::SdkConfig, Error> {
+        let region = aws_config::Region::new(self.region.clone());
+
+        let credentials_provider = DefaultCredentialsChain::builder()
+            .region(region.clone())
+            .build()
+            .await;
+
+        Ok(aws_config::defaults(aws_config::BehaviorVersion::latest())
+            .region(region)
+            .credentials_provider(credentials_provider)
+            .load()
+            .await)
+    }
+}
+
+#[tokio::main]
+pub async fn start() -> Result<(), Error> {
+    tracing::init_default_subscriber();
+    rustls::crypto::aws_lc_rs::default_provider()
+        .install_default()
+        .unwrap();
+
+    tracing::info!("function handler started");
+
+    let config = Config {
+        aws_account_id: env::var("NEON_ACCOUNT_ID")?,
+        s3_bucket: S3BucketConfig {
+            region: env::var("NEON_REGION")?,
+            name: env::var("NEON_S3_BUCKET_NAME")?,
+            key: env::var("NEON_S3_BUCKET_KEY")?,
+        },
+        eks_cluster: EksClusterConfig {
+            region: env::var("NEON_REGION")?,
+            name: env::var("NEON_CLUSTER")?,
+        },
+    };
+
+    run(service_fn(async |event: LambdaEvent<EventBridgeEvent<serde_json::Value>>| -> Result<StatusResponse, Error> {
+        function_handler(event, &config).await
+    }))
+    .await
+}
+
+#[derive(Debug, PartialEq)]
+struct StatusResponse {
+    status_code: http::StatusCode,
+    body: Cow<'static, str>,
+}
+
+impl StatusResponse {
+    fn ok() -> Self {
+        StatusResponse {
+            status_code: http::StatusCode::OK,
+            body: "OK".into(),
+        }
+    }
+}
+
+impl serde::Serialize for StatusResponse {
+    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
+        let mut serializer = serializer.serialize_map(None)?;
+        serializer.serialize_entry("statusCode", &self.status_code.as_u16())?;
+        serializer.serialize_entry("body", &self.body)?;
+        serializer.end()
+    }
+}
+
+#[tracing::instrument(skip_all, fields(?event), err)]
+async fn function_handler(
+    event: LambdaEvent<EventBridgeEvent<serde_json::Value>>,
+    config: &Config,
+) -> Result<StatusResponse, Error> {
+    tracing::info!("function handler called");
+
+    let kube_client = connect_to_cluster(config).await?;
+    let s3_client = connect_to_s3(config).await?;
+
+    let nodes_azs = get_nodes_azs(kube_client.clone()).await?;
+
+    let mut pods_info = get_current_pods(kube_client.clone(), &nodes_azs).await?;
+    pods_info.sort_unstable();
+
+    let mut csv = Vec::with_capacity(64 * 1024);
+    write_csv(&pods_info, &mut csv)?;
+
+    tracing::info!(
+        "csv is {} bytes, containing {} pods",
+        csv.len(),
+        pods_info.len()
+    );
+
+    upload_csv(config, &s3_client, &csv).await?;
+
+    tracing::info!("pod info successfully stored");
+    Ok(StatusResponse::ok())
+}
+
+#[derive(Debug, serde::Serialize, PartialEq, Eq, PartialOrd, Ord)]
+struct PodInfo<'a> {
+    namespace: String,
+    name: String,
+    ip: String,
+    creation_time: String,
+    node: String,
+    az: Option<&'a str>,
+}
+
+#[tracing::instrument(skip_all, err)]
+async fn connect_to_cluster(config: &Config) -> Result<kube::Client, Error> {
+    let sdk_config = config.eks_cluster.create_sdk_config().await?;
+    let eks_client = aws_sdk_eks::Client::new(&sdk_config);
+
+    let resp = eks_client
+        .describe_cluster()
+        .name(&config.eks_cluster.name)
+        .send()
+        .await?;
+
+    let cluster = resp
+        .cluster()
+        .ok_or_else(|| format!("cluster not found: {}", config.eks_cluster.name))?;
+    let endpoint = cluster.endpoint().ok_or("cluster endpoint not found")?;
+    let ca_data = cluster
+        .certificate_authority()
+        .and_then(|ca| ca.data())
+        .ok_or("cluster certificate data not found")?;
+
+    let mut k8s_config = kube::Config::new(endpoint.parse()?);
+    let cert_bytes = STANDARD.decode(ca_data)?;
+    let certs = rustls_pemfile::certs(&mut cert_bytes.as_slice())
+        .map(|c| c.map(|c| c.to_vec()))
+        .collect::<Result<_, _>>()?;
+    k8s_config.root_cert = Some(certs);
+    k8s_config.auth_info.token = Some(
+        create_kube_auth_token(
+            &sdk_config,
+            &config.eks_cluster.name,
+            Duration::from_secs(10 * 60),
+        )
+        .await?,
+    );
+
+    tracing::info!("cluster description completed");
+
+    Ok(kube::Client::try_from(k8s_config)?)
+}
+
+#[tracing::instrument(skip_all, err)]
+async fn create_kube_auth_token(
+    sdk_config: &aws_config::SdkConfig,
+    cluster_name: &str,
+    expires_in: Duration,
+) -> Result<SecretString, Error> {
+    let identity = sdk_config
+        .credentials_provider()
+        .unwrap()
+        .provide_credentials()
+        .await?
+        .into();
+
+    let region = sdk_config.region().expect("region").as_ref();
+    let host = format!("sts.{region}.amazonaws.com");
+    let get_caller_id_url = format!("https://{host}/?Action=GetCallerIdentity&Version=2011-06-15");
+
+    let mut signing_settings = SigningSettings::default();
+    signing_settings.signature_location = SignatureLocation::QueryParams;
+    signing_settings.expires_in = Some(expires_in);
+    let signing_params = v4::SigningParams::builder()
+        .identity(&identity)
+        .region(region)
+        .name("sts")
+        .time(SystemTime::now())
+        .settings(signing_settings)
+        .build()?
+        .into();
+    let signable_request = SignableRequest::new(
+        "GET",
+        &get_caller_id_url,
+        [("host", host.as_str()), ("x-k8s-aws-id", cluster_name)].into_iter(),
+        SignableBody::Bytes(&[]),
+    )?;
+    let (signing_instructions, _signature) = sign(signable_request, &signing_params)?.into_parts();
+
+    let mut token_request = http::Request::get(get_caller_id_url).body(()).unwrap();
+    signing_instructions.apply_to_request_http1x(&mut token_request);
+
+    let token = format!(
+        "k8s-aws-v1.{}",
+        BASE64_STANDARD_NO_PAD.encode(token_request.uri().to_string())
+    )
+    .into();
+
+    Ok(token)
+}
+
+#[tracing::instrument(skip_all, err)]
+async fn connect_to_s3(config: &Config) -> Result<aws_sdk_s3::Client, Error> {
+    let sdk_config = config.s3_bucket.create_sdk_config().await?;
+
+    let s3_client = aws_sdk_s3::Client::from_conf(
+        aws_sdk_s3::config::Builder::from(&sdk_config)
+            .retry_config(RetryConfig::standard())
+            .build(),
+    );
+
+    Ok(s3_client)
+}
+
+#[tracing::instrument(skip_all, err)]
+async fn get_nodes_azs(client: kube::Client) -> Result<HashMap<String, String>, Error> {
+    let nodes = Api::<Node>::all(client);
+
+    let list_params = ListParams::default().timeout(10);
+
+    let mut nodes_azs = HashMap::default();
+    for node in nodes.list(&list_params).await? {
+        let Some(name) = node.metadata.name else {
+            tracing::warn!("pod without name");
+            continue;
+        };
+        let Some(mut labels) = node.metadata.labels else {
+            tracing::warn!(name, "pod without labels");
+            continue;
+        };
+        let Some(az) = labels.remove(AZ_LABEL) else {
+            tracing::warn!(name, "pod without AZ label");
+            continue;
+        };
+
+        tracing::debug!(name, az, "adding node");
+        nodes_azs.insert(name, az);
+    }
+
+    Ok(nodes_azs)
+}
+
+#[tracing::instrument(skip_all, err)]
+async fn get_current_pods(
+    client: kube::Client,
+    node_az: &HashMap<String, String>,
+) -> Result<Vec<PodInfo<'_>>, Error> {
+    let pods = Api::<Pod>::all(client);
+
+    let mut pods_info = vec![];
+    let mut continuation_token = Some(String::new());
+
+    while let Some(token) = continuation_token {
+        let list_params = ListParams::default()
+            .timeout(10)
+            .limit(500)
+            .continue_token(&token);
+
+        let list = pods.list(&list_params).await?;
+        continuation_token = list.metadata.continue_;
+
+        tracing::info!("received list of {} pods", list.items.len());
+
+        for pod in list.items {
+            let name = pod.name_any();
+            let Some(namespace) = pod.namespace() else {
+                tracing::warn!(name, "pod without namespace");
+                continue;
+            };
+
+            let Some(status) = pod.status else {
+                tracing::warn!(namespace, name, "pod without status");
+                continue;
+            };
+            let Some(conditions) = status.conditions else {
+                tracing::warn!(namespace, name, "pod without conditions");
+                continue;
+            };
+            let Some(ready_condition) = conditions.iter().find(|cond| cond.type_ == "Ready") else {
+                tracing::debug!(namespace, name, "pod not ready");
+                continue;
+            };
+            let Some(ref ready_time) = ready_condition.last_transition_time else {
+                tracing::warn!(
+                    namespace,
+                    name,
+                    "pod ready condition without transition time"
+                );
+                continue;
+            };
+
+            let Some(spec) = pod.spec else {
+                tracing::warn!(namespace, name, "pod without spec");
+                continue;
+            };
+            let Some(node) = spec.node_name else {
+                tracing::warn!(namespace, name, "pod without node");
+                continue;
+            };
+            let Some(ip) = status.pod_ip else {
+                tracing::warn!(namespace, name, "pod without IP");
+                continue;
+            };
+            let az = node_az.get(&node).map(String::as_str);
+            let creation_time = ready_time.0.to_rfc3339_opts(SecondsFormat::Secs, true);
+
+            let pod_info = PodInfo {
+                namespace,
+                name,
+                ip,
+                creation_time,
+                node,
+                az,
+            };
+            tracing::debug!(?pod_info, "adding pod");
+
+            pods_info.push(pod_info);
+        }
+    }
+
+    Ok(pods_info)
+}
+
+#[tracing::instrument(skip_all, err)]
+fn write_csv<W: io::Write>(pods_info: &Vec<PodInfo>, writer: W) -> Result<(), Error> {
+    let mut w = csv::Writer::from_writer(writer);
+    for pod in pods_info {
+        w.serialize(pod)?;
+    }
+    w.flush()?;
+    Ok(())
+}
+
+#[tracing::instrument(skip_all, err)]
+async fn upload_csv(
+    config: &Config,
+    s3_client: &aws_sdk_s3::Client,
+    csv: &[u8],
+) -> Result<aws_sdk_s3::operation::put_object::PutObjectOutput, Error> {
+    let mut hasher = Sha256::new();
+    hasher.update(csv);
+    let csum = hasher.finalize();
+
+    let resp = s3_client
+        .put_object()
+        .bucket(&config.s3_bucket.name)
+        .key(&config.s3_bucket.key)
+        .content_type("text/csv")
+        .checksum_algorithm(ChecksumAlgorithm::Sha256)
+        .checksum_sha256(STANDARD.encode(csum))
+        .body(ByteStream::from(SdkBody::from(csv)))
+        .expected_bucket_owner(&config.aws_account_id)
+        .send()
+        .await?;
+
+    Ok(resp)
+}
--- a/lambda/pod_info_dumper/src/main.rs
+++ b/lambda/pod_info_dumper/src/main.rs
@@ -0,0 +1,3 @@
+fn main() -> Result<(), lambda_runtime::Error> {
+    pod_info_dumper::start()
+}
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -192,7 +192,7 @@ pub enum GetVectoredConcurrentIo {
    SidecarTask,
 }

-#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 pub struct Ratio {
    pub numerator: usize,
    pub denominator: usize,
@@ -416,6 +416,9 @@ pub struct TenantConfigToml {
    /// The ratio that triggers the auto gc-compaction. If (the total size of layers between L2 LSN and gc-horizon) / (size below the L2 LSN)
    /// is above this ratio, gc-compaction will be triggered.
    pub gc_compaction_ratio_percent: u64,
+    /// Tenant level performance sampling ratio override. Controls the ratio of get page requests
+    /// that will get perf sampling for the tenant.
+    pub sampling_ratio: Option<Ratio>,
 }

 pub mod defaults {
@@ -702,6 +705,7 @@ impl Default for TenantConfigToml {
            gc_compaction_enabled: DEFAULT_GC_COMPACTION_ENABLED,
            gc_compaction_initial_threshold_kb: DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB,
            gc_compaction_ratio_percent: DEFAULT_GC_COMPACTION_RATIO_PERCENT,
+            sampling_ratio: None,
        }
    }
 }
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -23,6 +23,7 @@ use utils::lsn::Lsn;
 use utils::postgres_client::PostgresClientProtocol;
 use utils::{completion, serde_system_time};

+use crate::config::Ratio;
 use crate::key::{CompactKey, Key};
 use crate::reltag::RelTag;
 use crate::shard::{ShardCount, ShardStripeSize, TenantShardId};
@@ -568,6 +569,8 @@ pub struct TenantConfigPatch {
    pub gc_compaction_initial_threshold_kb: FieldPatch<u64>,
    #[serde(skip_serializing_if = "FieldPatch::is_noop")]
    pub gc_compaction_ratio_percent: FieldPatch<u64>,
+    #[serde(skip_serializing_if = "FieldPatch::is_noop")]
+    pub sampling_ratio: FieldPatch<Option<Ratio>>,
 }

 /// Like [`crate::config::TenantConfigToml`], but preserves the information
@@ -688,6 +691,9 @@ pub struct TenantConfig {

    #[serde(skip_serializing_if = "Option::is_none")]
    pub gc_compaction_ratio_percent: Option<u64>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub sampling_ratio: Option<Option<Ratio>>,
 }

 impl TenantConfig {
@@ -730,6 +736,7 @@ impl TenantConfig {
            mut gc_compaction_enabled,
            mut gc_compaction_initial_threshold_kb,
            mut gc_compaction_ratio_percent,
+            mut sampling_ratio,
        } = self;

        patch.checkpoint_distance.apply(&mut checkpoint_distance);
@@ -824,6 +831,7 @@ impl TenantConfig {
        patch
            .gc_compaction_ratio_percent
            .apply(&mut gc_compaction_ratio_percent);
+        patch.sampling_ratio.apply(&mut sampling_ratio);

        Ok(Self {
            checkpoint_distance,
@@ -860,6 +868,7 @@ impl TenantConfig {
            gc_compaction_enabled,
            gc_compaction_initial_threshold_kb,
            gc_compaction_ratio_percent,
+            sampling_ratio,
        })
    }

@@ -961,6 +970,7 @@ impl TenantConfig {
            gc_compaction_ratio_percent: self
                .gc_compaction_ratio_percent
                .unwrap_or(global_conf.gc_compaction_ratio_percent),
+            sampling_ratio: self.sampling_ratio.unwrap_or(global_conf.sampling_ratio),
        }
    }
 }
--- a/libs/tracing-utils/src/perf_span.rs
+++ b/libs/tracing-utils/src/perf_span.rs
@@ -28,7 +28,7 @@ use core::{
    task::{Context, Poll},
 };
 use pin_project_lite::pin_project;
-use tracing::{Dispatch, field, span::Span};
+use tracing::{Dispatch, span::Span};

 #[derive(Debug, Clone)]
 pub struct PerfSpan {
@@ -49,15 +49,6 @@ impl PerfSpan {
        }
    }

-    pub fn record<Q: field::AsField + ?Sized, V: field::Value>(
-        &self,
-        field: &Q,
-        value: V,
-    ) -> &Self {
-        self.inner.record(field, value);
-        self
-    }
-
    pub fn enter(&self) -> PerfSpanEntered {
        if let Some(ref id) = self.inner.id() {
            self.dispatch.enter(id);
--- a/pageserver/src/context.rs
+++ b/pageserver/src/context.rs
@@ -572,19 +572,6 @@ impl RequestContext {
        }
    }

-    pub(crate) fn perf_span_record<
-        Q: tracing::field::AsField + ?Sized,
-        V: tracing::field::Value,
-    >(
-        &self,
-        field: &Q,
-        value: V,
-    ) {
-        if let Some(span) = &self.perf_span {
-            span.record(field, value);
-        }
-    }
-
    pub(crate) fn has_perf_span(&self) -> bool {
        self.perf_span.is_some()
    }
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -1248,13 +1248,13 @@ pub(crate) static STORAGE_IO_TIME_METRIC: Lazy<StorageIoTime> = Lazy::new(Storag

 #[derive(Clone, Copy)]
 #[repr(usize)]
-enum StorageIoSizeOperation {
+pub(crate) enum StorageIoSizeOperation {
    Read,
    Write,
 }

 impl StorageIoSizeOperation {
-    const VARIANTS: &'static [&'static str] = &["read", "write"];
+    pub(crate) const VARIANTS: &'static [&'static str] = &["read", "write"];

    fn as_str(&self) -> &'static str {
        Self::VARIANTS[*self as usize]
@@ -1262,7 +1262,7 @@ impl StorageIoSizeOperation {
 }

 // Needed for the https://neonprod.grafana.net/d/5uK9tHL4k/picking-tenant-for-relocation?orgId=1
-static STORAGE_IO_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
+pub(crate) static STORAGE_IO_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
    register_uint_gauge_vec!(
        "pageserver_io_operations_bytes_total",
        "Total amount of bytes read/written in IO operations",
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -18,7 +18,7 @@ use itertools::Itertools;
 use once_cell::sync::OnceCell;
 use pageserver_api::config::{
    PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
-    PageServiceProtocolPipelinedExecutionStrategy, Tracing,
+    PageServiceProtocolPipelinedExecutionStrategy,
 };
 use pageserver_api::key::rel_block_to_key;
 use pageserver_api::models::{
@@ -37,7 +37,6 @@ use postgres_ffi::BLCKSZ;
 use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
 use pq_proto::framed::ConnectionError;
 use pq_proto::{BeMessage, FeMessage, FeStartupPacket, RowDescriptor};
-use rand::Rng;
 use strum_macros::IntoStaticStr;
 use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, BufWriter};
 use tokio::task::JoinHandle;
@@ -755,7 +754,6 @@ impl PageServerHandler {
        tenant_id: TenantId,
        timeline_id: TimelineId,
        timeline_handles: &mut TimelineHandles,
-        tracing_config: Option<&Tracing>,
        cancel: &CancellationToken,
        ctx: &RequestContext,
        protocol_version: PagestreamProtocolVersion,
@@ -916,47 +914,8 @@ impl PageServerHandler {

                let key = rel_block_to_key(req.rel, req.blkno);

-                let sampled = match tracing_config {
-                    Some(conf) => {
-                        let ratio = &conf.sampling_ratio;
-
-                        if ratio.numerator == 0 {
-                            false
-                        } else {
-                            rand::thread_rng().gen_range(0..ratio.denominator) < ratio.numerator
-                        }
-                    }
-                    None => false,
-                };
-
-                let ctx = if sampled {
-                    RequestContextBuilder::from(ctx)
-                        .root_perf_span(|| {
-                            info_span!(
-                            target: PERF_TRACE_TARGET,
-                            "GET_PAGE",
-                            tenant_id = %tenant_id,
-                            shard_id = field::Empty,
-                            timeline_id = %timeline_id,
-                            lsn = %req.hdr.request_lsn,
-                            request_id = %req.hdr.reqid,
-                            key = %key,
-                            )
-                        })
-                        .attached_child()
-                } else {
-                    ctx.attached_child()
-                };
-
                let res = timeline_handles
                    .get(tenant_id, timeline_id, ShardSelector::Page(key))
-                    .maybe_perf_instrument(&ctx, |current_perf_span| {
-                        info_span!(
-                            target: PERF_TRACE_TARGET,
-                            parent: current_perf_span,
-                            "SHARD_SELECTION",
-                        )
-                    })
                    .await;

                let shard = match res {
@@ -987,6 +946,25 @@ impl PageServerHandler {
                    }
                };

+                let ctx = if shard.is_get_page_request_sampled() {
+                    RequestContextBuilder::from(ctx)
+                        .root_perf_span(|| {
+                            info_span!(
+                            target: PERF_TRACE_TARGET,
+                            "GET_PAGE",
+                            tenant_id = %tenant_id,
+                            shard_id = %shard.get_shard_identity().shard_slug(),
+                            timeline_id = %timeline_id,
+                            lsn = %req.hdr.request_lsn,
+                            request_id = %req.hdr.reqid,
+                            key = %key,
+                            )
+                        })
+                        .attached_child()
+                } else {
+                    ctx.attached_child()
+                };
+
                // This ctx travels as part of the BatchedFeMessage through
                // batching into the request handler.
                // The request handler needs to do some per-request work
@@ -1001,12 +979,6 @@ impl PageServerHandler {
                // request handler log messages contain the request-specific fields.
                let span = mkspan!(shard.tenant_shard_id.shard_slug());

-                // Enrich the perf span with shard_id now that shard routing is done.
-                ctx.perf_span_record(
-                    "shard_id",
-                    tracing::field::display(shard.get_shard_identity().shard_slug()),
-                );
-
                let timer = record_op_start_and_throttle(
                    &shard,
                    metrics::SmgrQueryType::GetPageAtLsn,
@@ -1602,7 +1574,6 @@ impl PageServerHandler {
        IO: AsyncRead + AsyncWrite + Send + Sync + Unpin + 'static,
    {
        let cancel = self.cancel.clone();
-        let tracing_config = self.conf.tracing.clone();

        let err = loop {
            let msg = Self::pagestream_read_message(
@@ -1610,7 +1581,6 @@ impl PageServerHandler {
                tenant_id,
                timeline_id,
                &mut timeline_handles,
-                tracing_config.as_ref(),
                &cancel,
                ctx,
                protocol_version,
@@ -1744,8 +1714,6 @@ impl PageServerHandler {
        // Batcher
        //

-        let tracing_config = self.conf.tracing.clone();
-
        let cancel_batcher = self.cancel.child_token();
        let (mut batch_tx, mut batch_rx) = spsc_fold::channel();
        let batcher = pipeline_stage!("batcher", cancel_batcher.clone(), move |cancel_batcher| {
@@ -1759,7 +1727,6 @@ impl PageServerHandler {
                        tenant_id,
                        timeline_id,
                        &mut timeline_handles,
-                        tracing_config.as_ref(),
                        &cancel_batcher,
                        &ctx,
                        protocol_version,
--- a/pageserver/src/tenant/secondary.rs
+++ b/pageserver/src/tenant/secondary.rs
@@ -167,10 +167,17 @@ impl SecondaryTenant {

        self.validate_metrics();

+        // Metrics are subtracted from and/or removed eagerly.
+        // Deletions are done in the background via [`BackgroundPurges::spawn`].
        let tenant_id = self.tenant_shard_id.tenant_id.to_string();
        let shard_id = format!("{}", self.tenant_shard_id.shard_slug());
        let _ = SECONDARY_RESIDENT_PHYSICAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);
        let _ = SECONDARY_HEATMAP_TOTAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);
+
+        self.detail
+            .lock()
+            .unwrap()
+            .drain_timelines(&self.tenant_shard_id, &self.resident_size_metric);
    }

    pub(crate) fn set_config(&self, config: &SecondaryLocationConfig) {
--- a/pageserver/src/tenant/secondary/downloader.rs
+++ b/pageserver/src/tenant/secondary/downloader.rs
@@ -4,6 +4,7 @@ use std::str::FromStr;
 use std::sync::Arc;
 use std::time::{Duration, Instant, SystemTime};

+use crate::metrics::{STORAGE_IO_SIZE, StorageIoSizeOperation};
 use camino::Utf8PathBuf;
 use chrono::format::{DelayedFormat, StrftimeItems};
 use futures::Future;
@@ -124,15 +125,53 @@ impl OnDiskState {
    }
 }

-#[derive(Debug, Clone, Default)]
 pub(super) struct SecondaryDetailTimeline {
    on_disk_layers: HashMap<LayerName, OnDiskState>,

    /// We remember when layers were evicted, to prevent re-downloading them.
    pub(super) evicted_at: HashMap<LayerName, SystemTime>,
+
+    ctx: RequestContext,
+}
+
+impl Clone for SecondaryDetailTimeline {
+    fn clone(&self) -> Self {
+        Self {
+            on_disk_layers: self.on_disk_layers.clone(),
+            evicted_at: self.evicted_at.clone(),
+            // This is a bit awkward. The downloader code operates on a snapshot
+            // of the secondary list to avoid locking it for extended periods of time.
+            // No particularly strong reason to chose [`RequestContext::detached_child`],
+            // but makes more sense than [`RequestContext::attached_child`].
+            ctx: self
+                .ctx
+                .detached_child(self.ctx.task_kind(), self.ctx.download_behavior()),
+        }
+    }
+}
+
+impl std::fmt::Debug for SecondaryDetailTimeline {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("SecondaryDetailTimeline")
+            .field("on_disk_layers", &self.on_disk_layers)
+            .field("evicted_at", &self.evicted_at)
+            .finish()
+    }
 }

 impl SecondaryDetailTimeline {
+    pub(super) fn empty(ctx: RequestContext) -> Self {
+        SecondaryDetailTimeline {
+            on_disk_layers: Default::default(),
+            evicted_at: Default::default(),
+            ctx,
+        }
+    }
+
+    pub(super) fn context(&self) -> &RequestContext {
+        &self.ctx
+    }
+
    pub(super) fn remove_layer(
        &mut self,
        name: &LayerName,
@@ -258,18 +297,50 @@ impl SecondaryDetail {

    pub(super) fn remove_timeline(
        &mut self,
+        tenant_shard_id: &TenantShardId,
        timeline_id: &TimelineId,
        resident_metric: &UIntGauge,
    ) {
        let removed = self.timelines.remove(timeline_id);
        if let Some(removed) = removed {
-            resident_metric.sub(
-                removed
-                    .on_disk_layers
-                    .values()
-                    .map(|l| l.metadata.file_size)
-                    .sum(),
-            );
+            Self::clear_timeline_metrics(tenant_shard_id, timeline_id, removed, resident_metric);
+        }
+    }
+
+    pub(super) fn drain_timelines(
+        &mut self,
+        tenant_shard_id: &TenantShardId,
+        resident_metric: &UIntGauge,
+    ) {
+        for (timeline_id, removed) in self.timelines.drain() {
+            Self::clear_timeline_metrics(tenant_shard_id, &timeline_id, removed, resident_metric);
+        }
+    }
+
+    fn clear_timeline_metrics(
+        tenant_shard_id: &TenantShardId,
+        timeline_id: &TimelineId,
+        detail: SecondaryDetailTimeline,
+        resident_metric: &UIntGauge,
+    ) {
+        resident_metric.sub(
+            detail
+                .on_disk_layers
+                .values()
+                .map(|l| l.metadata.file_size)
+                .sum(),
+        );
+
+        let shard_id = format!("{}", tenant_shard_id.shard_slug());
+        let tenant_id = tenant_shard_id.tenant_id.to_string();
+        let timeline_id = timeline_id.to_string();
+        for op in StorageIoSizeOperation::VARIANTS {
+            let _ = STORAGE_IO_SIZE.remove_label_values(&[
+                op,
+                tenant_id.as_str(),
+                shard_id.as_str(),
+                timeline_id.as_str(),
+            ]);
        }
    }

@@ -727,6 +798,7 @@ impl<'a> TenantDownloader<'a> {
                        last_heatmap,
                        timeline,
                        &self.secondary_state.resident_size_metric,
+                        ctx,
                    )
                    .await;

@@ -774,7 +846,6 @@ impl<'a> TenantDownloader<'a> {

        // Download the layers in the heatmap
        for timeline in heatmap.timelines {
-            let ctx = &ctx.with_scope_secondary_timeline(tenant_shard_id, &timeline.timeline_id);
            let timeline_state = timeline_states
                .remove(&timeline.timeline_id)
                .expect("Just populated above");
@@ -917,7 +988,11 @@ impl<'a> TenantDownloader<'a> {
            for delete_timeline in &delete_timelines {
                // We haven't removed from disk yet, but optimistically remove from in-memory state: if removal
                // from disk fails that will be a fatal error.
-                detail.remove_timeline(delete_timeline, &self.secondary_state.resident_size_metric);
+                detail.remove_timeline(
+                    self.secondary_state.get_tenant_shard_id(),
+                    delete_timeline,
+                    &self.secondary_state.resident_size_metric,
+                );
            }
        }

@@ -1013,7 +1088,6 @@ impl<'a> TenantDownloader<'a> {
        timeline: HeatMapTimeline,
        timeline_state: SecondaryDetailTimeline,
        deadline: Instant,
-        ctx: &RequestContext,
    ) -> (Result<(), UpdateError>, Vec<HeatMapLayer>) {
        // Accumulate updates to the state
        let mut touched = Vec::new();
@@ -1044,7 +1118,12 @@ impl<'a> TenantDownloader<'a> {
            }

            match self
-                .download_layer(tenant_shard_id, &timeline_id, layer, ctx)
+                .download_layer(
+                    tenant_shard_id,
+                    &timeline_id,
+                    layer,
+                    timeline_state.context(),
+                )
                .await
            {
                Ok(Some(layer)) => touched.push(layer),
@@ -1155,13 +1234,16 @@ impl<'a> TenantDownloader<'a> {
        tracing::debug!(timeline_id=%timeline_id, "Downloading layers, {} in heatmap", timeline.hot_layers().count());

        let (result, touched) = self
-            .download_timeline_layers(tenant_shard_id, timeline, timeline_state, deadline, ctx)
+            .download_timeline_layers(tenant_shard_id, timeline, timeline_state, deadline)
            .await;

        // Write updates to state to record layers we just downloaded or touched, irrespective of whether the overall result was successful
        {
            let mut detail = self.secondary_state.detail.lock().unwrap();
-            let timeline_detail = detail.timelines.entry(timeline_id).or_default();
+            let timeline_detail = detail.timelines.entry(timeline_id).or_insert_with(|| {
+                let ctx = ctx.with_scope_secondary_timeline(tenant_shard_id, &timeline_id);
+                SecondaryDetailTimeline::empty(ctx)
+            });

            tracing::info!("Wrote timeline_detail for {} touched layers", touched.len());
            touched.into_iter().for_each(|t| {
@@ -1295,10 +1377,12 @@ async fn init_timeline_state(
    last_heatmap: Option<&HeatMapTimeline>,
    heatmap: &HeatMapTimeline,
    resident_metric: &UIntGauge,
+    ctx: &RequestContext,
 ) -> SecondaryDetailTimeline {
-    let timeline_path = conf.timeline_path(tenant_shard_id, &heatmap.timeline_id);
-    let mut detail = SecondaryDetailTimeline::default();
+    let ctx = ctx.with_scope_secondary_timeline(tenant_shard_id, &heatmap.timeline_id);
+    let mut detail = SecondaryDetailTimeline::empty(ctx);

+    let timeline_path = conf.timeline_path(tenant_shard_id, &heatmap.timeline_id);
    let mut dir = match tokio::fs::read_dir(&timeline_path).await {
        Ok(d) => d,
        Err(e) => {
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -2476,6 +2476,31 @@ impl Timeline {
            .unwrap_or(self.conf.default_tenant_conf.lazy_slru_download)
    }

+    /// Checks if a get page request should get perf tracing
+    ///
+    /// The configuration priority is: tenant config override, default tenant config,
+    /// pageserver config.
+    pub(crate) fn is_get_page_request_sampled(&self) -> bool {
+        let tenant_conf = self.tenant_conf.load();
+        let ratio = tenant_conf
+            .tenant_conf
+            .sampling_ratio
+            .flatten()
+            .or(self.conf.default_tenant_conf.sampling_ratio)
+            .or(self.conf.tracing.as_ref().map(|t| t.sampling_ratio));
+
+        match ratio {
+            Some(r) => {
+                if r.numerator == 0 {
+                    false
+                } else {
+                    rand::thread_rng().gen_range(0..r.denominator) < r.numerator
+                }
+            }
+            None => false,
+        }
+    }
+
    fn get_checkpoint_distance(&self) -> u64 {
        let tenant_conf = self.tenant_conf.load();
        tenant_conf
--- a/test_runner/regress/test_attach_tenant_config.py
+++ b/test_runner/regress/test_attach_tenant_config.py
@@ -190,6 +190,10 @@ def test_fully_custom_config(positive_env: NeonEnv):
        "gc_compaction_initial_threshold_kb": 1024000,
        "gc_compaction_ratio_percent": 200,
        "image_creation_preempt_threshold": 5,
+        "sampling_ratio": {
+            "numerator": 0,
+            "denominator": 10,
+        },
    }

    vps_http = env.storage_controller.pageserver_api()
--- a/test_runner/regress/test_pageserver_secondary.py
+++ b/test_runner/regress/test_pageserver_secondary.py
@@ -1099,3 +1099,70 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder):
    # Warm up the current secondary.
    ps_attached.http_client().tenant_secondary_download(tenant_id, wait_ms=100)
    wait_until(lambda: all_layers_downloaded(ps_secondary, expected_locally))
+
+
+@run_only_on_default_postgres("PG version is not interesting here")
+@pytest.mark.parametrize("action", ["delete_timeline", "detach"])
+def test_io_metrics_match_secondary_timeline_lifecycle(
+    neon_env_builder: NeonEnvBuilder, action: str
+):
+    """
+    Check that IO metrics for secondary timelines are de-registered when the timeline
+    is removed
+    """
+    neon_env_builder.num_pageservers = 2
+    env = neon_env_builder.init_configs()
+    env.start()
+
+    tenant_id = TenantId.generate()
+    parent_timeline_id = TimelineId.generate()
+
+    # We do heatmap uploads and pulls manually
+    tenant_conf = {"heatmap_period": "0s"}
+    env.create_tenant(
+        tenant_id, parent_timeline_id, conf=tenant_conf, placement_policy='{"Attached":1}'
+    )
+
+    child_timeline_id = env.create_branch("foo", tenant_id)
+
+    attached_to_id = env.storage_controller.locate(tenant_id)[0]["node_id"]
+    ps_attached = env.get_pageserver(attached_to_id)
+    ps_secondary = next(p for p in env.pageservers if p != ps_attached)
+
+    ps_attached.http_client().tenant_heatmap_upload(tenant_id)
+    status, _ = ps_secondary.http_client().tenant_secondary_download(tenant_id, wait_ms=5000)
+    assert status == 200
+
+    labels = {
+        "operation": "write",
+        "tenant_id": str(tenant_id),
+        "timeline_id": str(child_timeline_id),
+    }
+    bytes_written = (
+        ps_secondary.http_client()
+        .get_metrics()
+        .query_one("pageserver_io_operations_bytes_total", labels)
+        .value
+    )
+
+    assert bytes_written == 0
+
+    if action == "delete_timeline":
+        env.storage_controller.pageserver_api().timeline_delete(tenant_id, child_timeline_id)
+        ps_attached.http_client().tenant_heatmap_upload(tenant_id)
+        status, _ = ps_secondary.http_client().tenant_secondary_download(tenant_id, wait_ms=5000)
+        assert status == 200
+    elif action == "detach":
+        env.storage_controller.tenant_policy_update(tenant_id, {"placement": {"Attached": 0}})
+        env.storage_controller.reconcile_until_idle()
+    else:
+        raise Exception("Unexpected action")
+
+    assert (
+        len(
+            ps_secondary.http_client()
+            .get_metrics()
+            .query_all("pageserver_io_operations_bytes_total", labels)
+        )
+        == 0
+    )
--- a/vendor/postgres-v14
+++ b/vendor/postgres-v14
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15
--- a/vendor/postgres-v16
+++ b/vendor/postgres-v16
--- a/vendor/postgres-v17
+++ b/vendor/postgres-v17
--- a/vendor/revisions.json
+++ b/vendor/revisions.json
@@ -1,18 +1,18 @@
 {
  "v17": [
    "17.4",
-    "7ec41bf6cd92a4af751272145fdd590270c491da"
+    "c9e4ff5a38907acd71107634055bf2609aba43a5"
  ],
  "v16": [
    "16.8",
-    "26c7d3f6de6f361c8923bb80d7563853b4a04958"
+    "746bd9ffe5c29bce030eaea1031054057f3c5d45"
  ],
  "v15": [
    "15.12",
-    "4ac24a747cd897119ce9b20547b3b04eba2cacbd"
+    "23708b3aca9adf163aa0973eb63d9afc0e4a04c3"
  ],
  "v14": [
    "14.17",
-    "bce3e48d8a72e70e72dfee1b7421fecd0f1b00ac"
+    "8cca70c22e2894dd4645f9a940086ac437b0a11b"
  ]
 }
Author	SHA1	Message	Date
Folke Behrens	357795e0c5	VPC flow logs IaC	2025-04-06 13:14:40 +02:00
Folke Behrens	f105ddb778	aws lambda function for periodic collection of pod infomation	2025-04-06 13:14:40 +02:00
Alexander Bayandin	8e1b5a9727	Fix Postgres build on macOS (#11442 ) ## Problem Postgres build fails with the following error on macOS: ``` /Users/bayandin/work/neon//vendor/postgres-v14/src/port/snprintf.c:424:27: error: 'strchrnul' is only available on macOS 15.4 or newer [-Werror,-Wunguarded-availability-new] 424 \| const char next_pct = strchrnul(format + 1, '%'); \| ^~~~~~~~~ /Users/bayandin/work/neon//vendor/postgres-v14/src/port/snprintf.c:376:14: note: 'strchrnul' has been marked as being introduced in macOS 15.4 here, but the deployment target is macOS 15.0.0 376 \| extern char strchrnul(const char s, int c); \| ^ /Users/bayandin/work/neon//vendor/postgres-v14/src/port/snprintf.c:424:27: note: enclose 'strchrnul' in a __builtin_available check to silence this warning 424 \| const char next_pct = strchrnul(format + 1, '%'); \| ^~~~~~~~~ 425 \| 426 \| /* Dump literal data we just scanned over / 427 \| dostr(format, next_pct - format, target); 428 \| if (target->failed) 429 \| break; 430 \| 431 \| if (next_pct == '\0') 432 \| break; 433 \| format = next_pct; \| 1 error generated. ``` ## Summary of changes - Update Postgres fork to include changes from `6da2ba1d8a` Corresponding Postgres PRs: - https://github.com/neondatabase/postgres/pull/608 - https://github.com/neondatabase/postgres/pull/609 - https://github.com/neondatabase/postgres/pull/610 - https://github.com/neondatabase/postgres/pull/611	2025-04-04 14:09:15 +00:00
Vlad Lazar	1ef4258f29	pageserver: add tenant level performance tracing sampling ratio (#11433 ) ## Problem https://github.com/neondatabase/neon/pull/11140 introduces performance tracing with OTEL and a pageserver config which configures the sampling ratio of get page requests. Enabling a non-zero sampling ratio on a per region basis is too aggressive and comes with perf impact that isn't very well understood yet. ## Summary of changes Add a `sampling_ratio` tenant level config which overrides the pageserver level config. Note that we do not cache the config and load it on every get page request such that changes propagate timely. Note that I've had to remove the `SHARD_SELECTION` span to get this to work. The tracing library doesn't expose a neat way to drop a span if one realises it's not needed at runtime. Closes https://github.com/neondatabase/neon/issues/11392	2025-04-04 13:41:28 +00:00
Vlad Lazar	65e2aae6e4	pageserver/secondary: deregister IO metrics (#11283 ) ## Problem IO metrics for secondary locations do not get deregistered when the timeline is removed. ## Summary of changes Stash the request context to be used for downloads in `SecondaryTimelineDetail`. These objects match the lifetime of the secondary timeline location pretty well. When the timeline is removed, deregister the metrics too. Closes https://github.com/neondatabase/neon/issues/11156	2025-04-04 10:52:59 +00:00
a-masterov	edc874e1b3	Use the same test image version as the computer one (#11448 ) ## Problem Changes in compute can cause errors in tests if another version of `neon-test-extensions` image is used. ## Summary of changes Use the same version of `neon-test-extensions` image as `compute` one for docker-compose based extension tests.	2025-04-04 10:13:00 +00:00