mirror of
https://github.com/neondatabase/neon.git
synced 2026-02-05 03:30:36 +00:00
Compare commits
6 Commits
release-co
...
cloneable/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
357795e0c5 | ||
|
|
f105ddb778 | ||
|
|
8e1b5a9727 | ||
|
|
1ef4258f29 | ||
|
|
65e2aae6e4 | ||
|
|
edc874e1b3 |
2
.github/workflows/build_and_test.yml
vendored
2
.github/workflows/build_and_test.yml
vendored
@@ -980,7 +980,7 @@ jobs:
|
||||
TEST_EXTENSIONS_TAG: >-
|
||||
${{
|
||||
contains(fromJSON('["storage-rc-pr", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
|
||||
&& 'latest'
|
||||
&& needs.meta.outputs.previous-compute-release
|
||||
|| needs.meta.outputs.build-tag
|
||||
}}
|
||||
TEST_VERSION_ONLY: ${{ matrix.pg_version }}
|
||||
|
||||
1090
Cargo.lock
generated
1090
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
11
Cargo.toml
11
Cargo.toml
@@ -40,6 +40,8 @@ members = [
|
||||
"libs/proxy/postgres-protocol2",
|
||||
"libs/proxy/postgres-types2",
|
||||
"libs/proxy/tokio-postgres2",
|
||||
"lambda/aztraffic",
|
||||
"lambda/pod_info_dumper",
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
@@ -342,3 +344,12 @@ inherits = "release"
|
||||
debug = false # true = 2 = all symbols, 1 = line only
|
||||
opt-level = "z"
|
||||
lto = true
|
||||
|
||||
[profile.release-lambda-function]
|
||||
inherits = "release"
|
||||
lto = true
|
||||
opt-level = "z"
|
||||
codegen-units = 1
|
||||
panic = "abort"
|
||||
debug = false
|
||||
strip = true
|
||||
|
||||
@@ -545,6 +545,11 @@ impl PageServerNode {
|
||||
.map(|x| x.parse::<u64>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'gc_compaction_ratio_percent' as integer")?,
|
||||
sampling_ratio: settings
|
||||
.remove("sampling_ratio")
|
||||
.map(serde_json::from_str)
|
||||
.transpose()
|
||||
.context("Falied to parse 'sampling_ratio'")?,
|
||||
};
|
||||
if !settings.is_empty() {
|
||||
bail!("Unrecognized tenant settings: {settings:?}")
|
||||
|
||||
22
lambda/aztraffic/Cargo.toml
Normal file
22
lambda/aztraffic/Cargo.toml
Normal file
@@ -0,0 +1,22 @@
|
||||
[package]
|
||||
name = "aztraffic"
|
||||
version = "0.0.0"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.97"
|
||||
aws-config = "1.6.1"
|
||||
aws-sdk-athena = "1.68.0"
|
||||
aws-sdk-ec2 = "1.121.0"
|
||||
aws-sdk-eks = "1.82.0"
|
||||
aws-sdk-glue = "1.88.0"
|
||||
aws-sdk-lambda = "1.75.0"
|
||||
aws-sdk-scheduler = "1.64.0"
|
||||
aws-sdk-sfn = "1.68.0"
|
||||
aws-sdk-sts = "1.65.0"
|
||||
clap = { version = "4.5.35", features = ["derive", "env"] }
|
||||
tokio = { version = "1.44.1", features = ["full"] }
|
||||
serde = "1.0.219"
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
794
lambda/aztraffic/src/main.rs
Normal file
794
lambda/aztraffic/src/main.rs
Normal file
@@ -0,0 +1,794 @@
|
||||
use std::fs;
|
||||
|
||||
use aws_config::default_provider::credentials::DefaultCredentialsChain;
|
||||
use aws_sdk_ec2::types::{
|
||||
DestinationFileFormat, DestinationOptionsRequest, FlowLogsResourceType, LogDestinationType,
|
||||
TrafficType,
|
||||
};
|
||||
use aws_sdk_glue::primitives::Blob;
|
||||
use aws_sdk_glue::types::{Column, DatabaseInput, SerDeInfo, StorageDescriptor, TableInput};
|
||||
use aws_sdk_lambda::types::{Environment, FunctionCode, Runtime};
|
||||
use aws_sdk_scheduler::types::{
|
||||
DeadLetterConfig, FlexibleTimeWindow, FlexibleTimeWindowMode, RetryPolicy, Target,
|
||||
};
|
||||
use aws_sdk_sfn::types::{CloudWatchLogsLogGroup, LogDestination, LogLevel, LoggingConfiguration};
|
||||
use clap::Parser;
|
||||
use serde_json::json;
|
||||
|
||||
#[derive(Parser, Clone, Debug)]
|
||||
struct Args {
|
||||
#[arg(long, value_name = "id")]
|
||||
account_id: String,
|
||||
#[arg(long, value_name = "region")]
|
||||
region: String,
|
||||
#[arg(long, value_name = "cluster")]
|
||||
cluster: String,
|
||||
#[arg(long, value_name = "id")]
|
||||
vpc_id: Vec<String>,
|
||||
|
||||
#[arg(long, value_name = "arn")]
|
||||
log_group_arn: String,
|
||||
#[arg(long, value_name = "name")]
|
||||
pod_info_s3_bucket_name: String,
|
||||
#[arg(
|
||||
long,
|
||||
value_name = "path",
|
||||
default_value = "CrossAZTraffic/pod_info_dumper/pod_info.csv"
|
||||
)]
|
||||
pod_info_s3_bucket_key: String,
|
||||
#[arg(long, value_name = "uri")]
|
||||
pod_info_s3_bucket_uri: String,
|
||||
#[arg(long, value_name = "uri")]
|
||||
vpc_flow_logs_s3_bucket_uri: String,
|
||||
#[arg(long, value_name = "uri")]
|
||||
results_s3_bucket_uri: String,
|
||||
|
||||
#[arg(
|
||||
long,
|
||||
value_name = "name",
|
||||
default_value = "./target/lambda/pod_info_dumper/bootstrap.zip"
|
||||
)]
|
||||
lambda_zipfile_path: String,
|
||||
#[arg(
|
||||
long,
|
||||
value_name = "name",
|
||||
default_value = "CrossAZTraffic-podinfo-function"
|
||||
)]
|
||||
lambda_function_name: String,
|
||||
#[arg(long, value_name = "arn")]
|
||||
lambda_role_arn: String,
|
||||
|
||||
#[arg(long, value_name = "name")]
|
||||
glue_database_name: String,
|
||||
#[arg(
|
||||
long,
|
||||
value_name = "name",
|
||||
default_value = "CrossAZTraffic-podinfo-table"
|
||||
)]
|
||||
glue_pod_info_table_name: String,
|
||||
#[arg(
|
||||
long,
|
||||
value_name = "name",
|
||||
default_value = "CrossAZTraffic-vpcflowlogs-table"
|
||||
)]
|
||||
glue_vpc_flow_logs_table_name: String,
|
||||
#[arg(
|
||||
long,
|
||||
value_name = "name",
|
||||
default_value = "CrossAZTraffic-results-table"
|
||||
)]
|
||||
glue_results_table_name: String,
|
||||
|
||||
#[arg(
|
||||
long,
|
||||
value_name = "name",
|
||||
default_value = "CrossAZTraffic-trigger-schedule"
|
||||
)]
|
||||
schedule_name: String,
|
||||
#[arg(long, value_name = "minutes", default_value_t = 60)]
|
||||
schedule_interval_minutes: usize,
|
||||
#[arg(long, value_name = "arn")]
|
||||
schedule_target_state_machine_arn: String,
|
||||
#[arg(long, value_name = "arn")]
|
||||
schedule_target_role_arn: String,
|
||||
#[arg(long, value_name = "arn")]
|
||||
schedule_dead_letter_queue_arn: Option<String>,
|
||||
|
||||
#[arg(
|
||||
long,
|
||||
value_name = "name",
|
||||
default_value = "CrossAZTraffic-combine-query"
|
||||
)]
|
||||
athena_query_name: String,
|
||||
|
||||
#[arg(long, value_name = "uri")]
|
||||
vpcflowlogs_destination_s3_bucket_uri: String,
|
||||
|
||||
#[arg(
|
||||
long,
|
||||
value_name = "name",
|
||||
default_value = "CrossAZTraffic-statemachine"
|
||||
)]
|
||||
statemachine_name: String,
|
||||
#[arg(long, value_name = "arn")]
|
||||
statemachine_role_arn: String,
|
||||
|
||||
#[arg(long, value_name = "uri")]
|
||||
athena_results_s3_bucket_uri: String,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let args = Args::parse();
|
||||
eprintln!("{args:#?}");
|
||||
|
||||
// TODO: athena results bucket + lifecycle config
|
||||
// TODO: iam role split
|
||||
// TODO: iam policy
|
||||
// TODO: clusterrole + binding
|
||||
// TODO: eks mapping
|
||||
// TODO: log group
|
||||
// TODO: dlq
|
||||
|
||||
let sdk_config = create_sdk_config(&args).await?;
|
||||
|
||||
LambdaFunction {
|
||||
local_zipfile_path: args.lambda_zipfile_path,
|
||||
function_name: args.lambda_function_name.clone(),
|
||||
role_arn: args.lambda_role_arn,
|
||||
account_id: args.account_id,
|
||||
region: args.region,
|
||||
cluster: args.cluster,
|
||||
s3_bucket_name: args.pod_info_s3_bucket_name,
|
||||
s3_bucket_key: args.pod_info_s3_bucket_key,
|
||||
}
|
||||
.create(&sdk_config)
|
||||
.await?;
|
||||
|
||||
GlueDatabase {
|
||||
database_name: args.glue_database_name.clone(),
|
||||
pod_info_table_name: args.glue_pod_info_table_name.clone(),
|
||||
pod_info_s3_bucket_uri: args.pod_info_s3_bucket_uri,
|
||||
vpc_flow_logs_table_name: args.glue_vpc_flow_logs_table_name.clone(),
|
||||
vpc_flow_logs_s3_bucket_uri: args.vpc_flow_logs_s3_bucket_uri,
|
||||
results_table_name: args.glue_results_table_name.clone(),
|
||||
results_s3_bucket_uri: args.results_s3_bucket_uri,
|
||||
}
|
||||
.create(&sdk_config)
|
||||
.await?;
|
||||
|
||||
let named_query_id = AthenaQuery {
|
||||
query_name: args.athena_query_name,
|
||||
glue_database: args.glue_database_name.clone(),
|
||||
invocation_frequency: args.schedule_interval_minutes,
|
||||
athena_results_table_name: args.glue_results_table_name,
|
||||
vpc_flow_logs_table_name: args.glue_vpc_flow_logs_table_name,
|
||||
pod_info_table_name: args.glue_pod_info_table_name,
|
||||
}
|
||||
.create(&sdk_config)
|
||||
.await?;
|
||||
|
||||
StateMachine {
|
||||
name: args.statemachine_name,
|
||||
role_arn: args.statemachine_role_arn,
|
||||
named_query_id,
|
||||
glue_database: args.glue_database_name,
|
||||
lambda_function_name: args.lambda_function_name,
|
||||
athena_results_s3_bucket_uri: args.athena_results_s3_bucket_uri,
|
||||
log_group_arn: args.log_group_arn,
|
||||
}
|
||||
.create(&sdk_config)
|
||||
.await?;
|
||||
|
||||
Schedule {
|
||||
name: args.schedule_name,
|
||||
interval_minutes: args.schedule_interval_minutes,
|
||||
dead_letter_queue_arn: args.schedule_dead_letter_queue_arn,
|
||||
target_role_arn: args.schedule_target_role_arn,
|
||||
target_state_machine_arn: args.schedule_target_state_machine_arn,
|
||||
}
|
||||
.create(&sdk_config)
|
||||
.await?;
|
||||
|
||||
let flow_log_ids = VpcFlowLogs {
|
||||
vpc_ids: args.vpc_id,
|
||||
destination_s3_bucket_uri: args.vpcflowlogs_destination_s3_bucket_uri,
|
||||
}
|
||||
.create(&sdk_config)
|
||||
.await?;
|
||||
|
||||
println!("VPC flow log IDs: {:?}", flow_log_ids.as_slice());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn create_sdk_config(args: &Args) -> anyhow::Result<aws_config::SdkConfig> {
|
||||
let region = aws_config::Region::new(args.region.to_owned());
|
||||
let credentials_provider = DefaultCredentialsChain::builder()
|
||||
.region(region.clone())
|
||||
.build()
|
||||
.await;
|
||||
Ok(aws_config::defaults(aws_config::BehaviorVersion::latest())
|
||||
.region(region)
|
||||
.credentials_provider(credentials_provider)
|
||||
.load()
|
||||
.await)
|
||||
}
|
||||
|
||||
struct LambdaFunction {
|
||||
local_zipfile_path: String,
|
||||
function_name: String,
|
||||
role_arn: String,
|
||||
account_id: String,
|
||||
region: String,
|
||||
cluster: String,
|
||||
s3_bucket_name: String,
|
||||
s3_bucket_key: String,
|
||||
}
|
||||
|
||||
impl LambdaFunction {
|
||||
async fn create(&self, sdk_config: &aws_config::SdkConfig) -> anyhow::Result<()> {
|
||||
let code = fs::read(&self.local_zipfile_path)?;
|
||||
|
||||
let client = aws_sdk_lambda::Client::new(sdk_config);
|
||||
client
|
||||
.delete_function()
|
||||
.function_name(&self.function_name)
|
||||
.send()
|
||||
.await
|
||||
.ok();
|
||||
|
||||
client
|
||||
.create_function()
|
||||
.function_name(&self.function_name)
|
||||
.runtime(Runtime::Providedal2023)
|
||||
.handler("bootstrap")
|
||||
.role(&self.role_arn)
|
||||
.code(FunctionCode::builder().zip_file(Blob::new(code)).build())
|
||||
.timeout(60)
|
||||
.environment(
|
||||
Environment::builder()
|
||||
.set_variables(Some(
|
||||
[
|
||||
("NEON_ACCOUNT_ID", self.account_id.as_str()),
|
||||
("NEON_REGION", self.region.as_str()),
|
||||
("NEON_CLUSTER", self.cluster.as_str()),
|
||||
("NEON_S3_BUCKET_NAME", self.s3_bucket_name.as_str()),
|
||||
("NEON_S3_BUCKET_KEY", self.s3_bucket_key.as_str()),
|
||||
("AWS_LAMBDA_LOG_FORMAT", "JSON"),
|
||||
("AWS_LAMBDA_LOG_LEVEL", "INFO"),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.into(), v.into()))
|
||||
.collect(),
|
||||
))
|
||||
.build(),
|
||||
)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
struct VpcFlowLogs {
|
||||
vpc_ids: Vec<String>,
|
||||
destination_s3_bucket_uri: String,
|
||||
}
|
||||
|
||||
impl VpcFlowLogs {
|
||||
async fn create(&self, sdk_config: &aws_config::SdkConfig) -> anyhow::Result<Vec<String>> {
|
||||
let ec2_client = aws_sdk_ec2::Client::new(sdk_config);
|
||||
|
||||
let flow_logs = ec2_client
|
||||
.create_flow_logs()
|
||||
.resource_type(FlowLogsResourceType::Vpc)
|
||||
.set_resource_ids(Some(self.vpc_ids.clone()))
|
||||
.traffic_type(TrafficType::All)
|
||||
.log_destination_type(LogDestinationType::S3)
|
||||
.log_destination(&self.destination_s3_bucket_uri)
|
||||
.destination_options(
|
||||
DestinationOptionsRequest::builder()
|
||||
.file_format(DestinationFileFormat::Parquet)
|
||||
.hive_compatible_partitions(false)
|
||||
.per_hour_partition(true)
|
||||
.build(),
|
||||
)
|
||||
.log_format("${region} ${az-id} ${vpc-id} ${flow-direction} ${pkt-srcaddr} ${pkt-dstaddr} ${srcport} ${dstport} ${start} ${bytes}")
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if let Some(unsuccessful) = flow_logs
|
||||
.unsuccessful
|
||||
.as_ref()
|
||||
.and_then(|v| if v.is_empty() { None } else { Some(v) })
|
||||
{
|
||||
anyhow::bail!("VPC flow log creation unsuccessful: {unsuccessful:?}");
|
||||
}
|
||||
|
||||
Ok(flow_logs.flow_log_ids().iter().cloned().collect())
|
||||
}
|
||||
}
|
||||
|
||||
struct GlueDatabase {
|
||||
database_name: String,
|
||||
pod_info_table_name: String,
|
||||
pod_info_s3_bucket_uri: String,
|
||||
vpc_flow_logs_table_name: String,
|
||||
vpc_flow_logs_s3_bucket_uri: String,
|
||||
results_table_name: String,
|
||||
results_s3_bucket_uri: String,
|
||||
}
|
||||
|
||||
impl GlueDatabase {
|
||||
async fn create(&self, sdk_config: &aws_config::SdkConfig) -> anyhow::Result<()> {
|
||||
let glue_client = aws_sdk_glue::Client::new(sdk_config);
|
||||
|
||||
let db = DatabaseInput::builder().name(&self.database_name).build()?;
|
||||
|
||||
glue_client
|
||||
.create_database()
|
||||
.database_input(db.clone())
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
let pod_info_columns = &[
|
||||
Column::builder()
|
||||
.name("namespace")
|
||||
.r#type("string")
|
||||
.build()?,
|
||||
Column::builder().name("name").r#type("string").build()?,
|
||||
Column::builder().name("ip").r#type("string").build()?,
|
||||
Column::builder()
|
||||
.name("creation_time")
|
||||
.r#type("timestamp")
|
||||
.build()?,
|
||||
Column::builder().name("node").r#type("string").build()?,
|
||||
Column::builder().name("az").r#type("string").build()?,
|
||||
];
|
||||
glue_client
|
||||
.create_table()
|
||||
.database_name(db.name())
|
||||
.table_input(
|
||||
TableInput::builder()
|
||||
.name(&self.pod_info_table_name)
|
||||
.storage_descriptor(
|
||||
StorageDescriptor::builder()
|
||||
.location(&self.pod_info_s3_bucket_uri)
|
||||
.compressed(false)
|
||||
.set_columns(Some(pod_info_columns.into_iter().cloned().collect()))
|
||||
.input_format("org.apache.hadoop.mapred.TextInputFormat")
|
||||
.output_format(
|
||||
"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
|
||||
)
|
||||
.serde_info(
|
||||
SerDeInfo::builder()
|
||||
.serialization_library(
|
||||
"org.apache.hadoop.hive.serde2.OpenCSVSerde",
|
||||
)
|
||||
.parameters("separatorChar", ",")
|
||||
.parameters("quoteChar", "`")
|
||||
.parameters("escapeChar", r"\")
|
||||
.build(),
|
||||
)
|
||||
.build(),
|
||||
)
|
||||
.table_type("EXTERNAL_TABLE")
|
||||
.parameters("classification", "csv")
|
||||
.parameters("skip.header.line.count", "1")
|
||||
.retention(0)
|
||||
.build()?,
|
||||
)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
let vpc_flow_logs_columns = &[
|
||||
Column::builder().name("region").r#type("string").build()?,
|
||||
Column::builder().name("az_id").r#type("string").build()?,
|
||||
Column::builder().name("vpc_id").r#type("string").build()?,
|
||||
Column::builder()
|
||||
.name("flow_direction")
|
||||
.r#type("string")
|
||||
.build()?,
|
||||
Column::builder()
|
||||
.name("pkt_srcaddr")
|
||||
.r#type("string")
|
||||
.build()?,
|
||||
Column::builder()
|
||||
.name("pkt_dstaddr")
|
||||
.r#type("string")
|
||||
.build()?,
|
||||
Column::builder().name("srcport").r#type("int").build()?,
|
||||
Column::builder().name("dstport").r#type("int").build()?,
|
||||
Column::builder().name("start").r#type("bigint").build()?,
|
||||
Column::builder().name("bytes").r#type("bigint").build()?,
|
||||
];
|
||||
glue_client
|
||||
.create_table()
|
||||
.database_name(db.name())
|
||||
.table_input(
|
||||
TableInput::builder()
|
||||
.name(&self.vpc_flow_logs_table_name)
|
||||
.storage_descriptor(
|
||||
StorageDescriptor::builder()
|
||||
.location(&self.vpc_flow_logs_s3_bucket_uri)
|
||||
.compressed(false)
|
||||
.set_columns(Some(vpc_flow_logs_columns.into_iter().cloned().collect()))
|
||||
.input_format(
|
||||
"org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
|
||||
)
|
||||
.output_format(
|
||||
"org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat",
|
||||
)
|
||||
.serde_info(
|
||||
SerDeInfo::builder()
|
||||
.serialization_library(
|
||||
"org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe",
|
||||
)
|
||||
.parameters("serialization.format", "1")
|
||||
.build(),
|
||||
)
|
||||
.build(),
|
||||
)
|
||||
.table_type("EXTERNAL_TABLE")
|
||||
.parameters("classification", "parquet")
|
||||
.retention(0)
|
||||
.build()?,
|
||||
)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
let athena_results_columns = &[
|
||||
Column::builder().name("time").r#type("timestamp").build()?,
|
||||
Column::builder().name("traffic").r#type("string").build()?,
|
||||
Column::builder()
|
||||
.name("total_bytes")
|
||||
.r#type("bigint")
|
||||
.build()?,
|
||||
];
|
||||
glue_client
|
||||
.create_table()
|
||||
.database_name(db.name())
|
||||
.table_input(
|
||||
TableInput::builder()
|
||||
.name(&self.results_table_name)
|
||||
.storage_descriptor(
|
||||
StorageDescriptor::builder()
|
||||
.location(&self.results_s3_bucket_uri)
|
||||
.compressed(false)
|
||||
.set_columns(Some(athena_results_columns.into_iter().cloned().collect()))
|
||||
.input_format(
|
||||
"org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
|
||||
)
|
||||
.output_format(
|
||||
"org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat",
|
||||
)
|
||||
.serde_info(
|
||||
SerDeInfo::builder()
|
||||
.serialization_library(
|
||||
"org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe",
|
||||
)
|
||||
.parameters("serialization.format", "1")
|
||||
.build(),
|
||||
)
|
||||
.build(),
|
||||
)
|
||||
.table_type("EXTERNAL_TABLE")
|
||||
.parameters("classification", "parquet")
|
||||
.retention(0)
|
||||
.build()?,
|
||||
)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
struct AthenaQuery {
|
||||
query_name: String,
|
||||
glue_database: String,
|
||||
invocation_frequency: usize,
|
||||
athena_results_table_name: String,
|
||||
vpc_flow_logs_table_name: String,
|
||||
pod_info_table_name: String,
|
||||
}
|
||||
|
||||
impl AthenaQuery {
|
||||
async fn create(&self, sdk_config: &aws_config::SdkConfig) -> anyhow::Result<String> {
|
||||
let Self {
|
||||
athena_results_table_name,
|
||||
vpc_flow_logs_table_name,
|
||||
pod_info_table_name,
|
||||
invocation_frequency,
|
||||
..
|
||||
} = self;
|
||||
|
||||
let query_string = format!(
|
||||
r#"
|
||||
INSERT INTO "{athena_results_table_name}"
|
||||
WITH
|
||||
ip_addresses_and_az_mapping AS (
|
||||
SELECT
|
||||
DISTINCT pkt_srcaddr AS ipaddress,
|
||||
az_id
|
||||
FROM "{vpc_flow_logs_table_name}"
|
||||
WHERE flow_direction = 'egress'
|
||||
AND from_unixtime("{vpc_flow_logs_table_name}".start) > (CURRENT_TIMESTAMP - ({invocation_frequency} * interval '1' minute))
|
||||
),
|
||||
egress_flows_of_pods_with_status AS (
|
||||
SELECT
|
||||
"{pod_info_table_name}".name AS srcpodname,
|
||||
pkt_srcaddr AS srcaddr,
|
||||
pkt_dstaddr AS dstaddr,
|
||||
"{vpc_flow_logs_table_name}".az_id AS srcazid,
|
||||
bytes,
|
||||
start
|
||||
FROM "{vpc_flow_logs_table_name}"
|
||||
INNER JOIN "{pod_info_table_name}" ON "{vpc_flow_logs_table_name}".pkt_srcaddr = "{pod_info_table_name}".ip
|
||||
WHERE flow_direction = 'egress'
|
||||
AND from_unixtime("{vpc_flow_logs_table_name}".start) > (CURRENT_TIMESTAMP - ({invocation_frequency} * interval '1' minute))
|
||||
),
|
||||
cross_az_traffic_by_pod AS (
|
||||
SELECT
|
||||
srcaddr,
|
||||
srcpodname,
|
||||
dstaddr,
|
||||
"{pod_info_table_name}".name AS dstpodname,
|
||||
srcazid,
|
||||
ip_addresses_and_az_mapping.az_id AS dstazid,
|
||||
bytes,
|
||||
start
|
||||
FROM egress_flows_of_pods_with_status
|
||||
INNER JOIN "{pod_info_table_name}" ON dstaddr = "{pod_info_table_name}".ip
|
||||
LEFT JOIN ip_addresses_and_az_mapping ON dstaddr = ipaddress
|
||||
WHERE ip_addresses_and_az_mapping.az_id != srcazid
|
||||
)
|
||||
SELECT
|
||||
date_trunc('MINUTE', from_unixtime(start)) AS time,
|
||||
CONCAT(srcpodname, ' -> ', dstpodname) AS traffic,
|
||||
SUM(bytes) AS total_bytes
|
||||
FROM cross_az_traffic_by_pod
|
||||
GROUP BY date_trunc('MINUTE', from_unixtime(start)), CONCAT(srcpodname, ' -> ', dstpodname)
|
||||
ORDER BY time, total_bytes DESC
|
||||
"#
|
||||
);
|
||||
|
||||
let athena_client = aws_sdk_athena::Client::new(sdk_config);
|
||||
let res = athena_client
|
||||
.create_named_query()
|
||||
.name(&self.query_name)
|
||||
.database(&self.glue_database)
|
||||
.query_string(query_string)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
Ok(res.named_query_id.unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
struct StateMachine {
|
||||
name: String,
|
||||
role_arn: String,
|
||||
named_query_id: String,
|
||||
glue_database: String,
|
||||
lambda_function_name: String,
|
||||
athena_results_s3_bucket_uri: String,
|
||||
log_group_arn: String,
|
||||
}
|
||||
|
||||
impl StateMachine {
|
||||
async fn create(&self, sdk_config: &aws_config::SdkConfig) -> anyhow::Result<()> {
|
||||
let sfn_client = aws_sdk_sfn::Client::new(sdk_config);
|
||||
sfn_client
|
||||
.create_state_machine()
|
||||
.name(&self.name)
|
||||
.role_arn(&self.role_arn)
|
||||
.logging_configuration(
|
||||
LoggingConfiguration::builder()
|
||||
.level(LogLevel::All)
|
||||
.destinations(
|
||||
LogDestination::builder()
|
||||
.cloud_watch_logs_log_group(
|
||||
CloudWatchLogsLogGroup::builder()
|
||||
.log_group_arn(&self.log_group_arn)
|
||||
.build(),
|
||||
)
|
||||
.build(),
|
||||
)
|
||||
.build(),
|
||||
)
|
||||
.definition(
|
||||
json!(
|
||||
{
|
||||
"StartAt": "Invoke",
|
||||
"States": {
|
||||
"Invoke": {
|
||||
"Type": "Task",
|
||||
"Resource": "arn:aws:states:::lambda:invoke",
|
||||
"Output": "{% $states.result.Payload %}",
|
||||
"Arguments": {
|
||||
"FunctionName": self.lambda_function_name,
|
||||
"Payload": json!({
|
||||
"detail-type": "Scheduled Event",
|
||||
"source": "aws.events",
|
||||
"detail": {}
|
||||
}).to_string()
|
||||
},
|
||||
"Retry": [
|
||||
{
|
||||
"ErrorEquals": [
|
||||
"Lambda.ServiceException",
|
||||
"Lambda.AWSLambdaException",
|
||||
"Lambda.SdkClientException",
|
||||
"Lambda.TooManyRequestsException"
|
||||
],
|
||||
"IntervalSeconds": 1,
|
||||
"MaxAttempts": 3,
|
||||
"BackoffRate": 2,
|
||||
"JitterStrategy": "FULL"
|
||||
}
|
||||
],
|
||||
"Next": "Check"
|
||||
},
|
||||
"Check": {
|
||||
"Type": "Choice",
|
||||
"Choices": [
|
||||
{
|
||||
"Next": "GetNamedQuery",
|
||||
"Condition": "{% $states.input.statusCode = 200 %}"
|
||||
}
|
||||
],
|
||||
"Default": "Fail"
|
||||
},
|
||||
"GetNamedQuery": {
|
||||
"Type": "Task",
|
||||
"Arguments": {
|
||||
"NamedQueryId": self.named_query_id
|
||||
},
|
||||
"Resource": "arn:aws:states:::aws-sdk:athena:getNamedQuery",
|
||||
"Output": {
|
||||
"QueryString": "{% $states.result.NamedQuery.QueryString %}"
|
||||
},
|
||||
"Next": "StartQueryExecution"
|
||||
},
|
||||
"StartQueryExecution": {
|
||||
"Type": "Task",
|
||||
"Resource": "arn:aws:states:::athena:startQueryExecution.sync",
|
||||
"Arguments": {
|
||||
"QueryString": "{% $states.input.QueryString %}",
|
||||
"QueryExecutionContext": {
|
||||
"Database": self.glue_database
|
||||
},
|
||||
"ResultConfiguration": {
|
||||
"OutputLocation": self.athena_results_s3_bucket_uri
|
||||
},
|
||||
"WorkGroup": "primary"
|
||||
},
|
||||
"End": true
|
||||
},
|
||||
"Fail": {
|
||||
"Type": "Fail"
|
||||
}
|
||||
},
|
||||
"QueryLanguage": "JSONata"
|
||||
}
|
||||
)
|
||||
.to_string(),
|
||||
)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
struct Schedule {
|
||||
name: String,
|
||||
interval_minutes: usize,
|
||||
target_state_machine_arn: String,
|
||||
target_role_arn: String,
|
||||
dead_letter_queue_arn: Option<String>,
|
||||
}
|
||||
|
||||
impl Schedule {
|
||||
async fn create(&self, sdk_config: &aws_config::SdkConfig) -> anyhow::Result<()> {
|
||||
let sched_client = aws_sdk_scheduler::Client::new(sdk_config);
|
||||
|
||||
sched_client
|
||||
.create_schedule()
|
||||
.name(&self.name)
|
||||
.schedule_expression(format!("rate({} minute)", self.interval_minutes))
|
||||
.flexible_time_window(
|
||||
FlexibleTimeWindow::builder()
|
||||
.mode(FlexibleTimeWindowMode::Off)
|
||||
.build()?,
|
||||
)
|
||||
.target(
|
||||
Target::builder()
|
||||
.arn(&self.target_state_machine_arn)
|
||||
.role_arn(&self.target_role_arn)
|
||||
.input(
|
||||
json!({
|
||||
"detail-type": "Scheduled Event",
|
||||
"source": "aws.events",
|
||||
"detail": {}
|
||||
})
|
||||
.to_string(),
|
||||
)
|
||||
.retry_policy(
|
||||
RetryPolicy::builder()
|
||||
.maximum_retry_attempts(0)
|
||||
.maximum_event_age_in_seconds(60)
|
||||
.build(),
|
||||
)
|
||||
.set_dead_letter_config(
|
||||
self.dead_letter_queue_arn
|
||||
.as_ref()
|
||||
.map(|arn| DeadLetterConfig::builder().arn(arn).build()),
|
||||
)
|
||||
.build()?,
|
||||
)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
struct KubernetesRoles {
|
||||
region: String,
|
||||
cluster: String,
|
||||
k8s_role_prefix: String,
|
||||
lambda_role_arn: String,
|
||||
}
|
||||
|
||||
impl KubernetesRoles {
|
||||
fn print(&self) -> anyhow::Result<()> {
|
||||
let Self {
|
||||
region,
|
||||
cluster,
|
||||
k8s_role_prefix,
|
||||
lambda_role_arn,
|
||||
} = self;
|
||||
|
||||
let yaml = format!(
|
||||
r#"
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: {k8s_role_prefix}-clusterrole
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources: ["nodes", "namespaces", "pods"]
|
||||
verbs: ["get", "list"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: {k8s_role_prefix}-binding
|
||||
subjects:
|
||||
- kind: Group
|
||||
name: {k8s_role_prefix}-group
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
roleRef:
|
||||
kind: ClusterRole
|
||||
name: {k8s_role_prefix}-clusterrole
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
"#
|
||||
);
|
||||
|
||||
let eksctl = format!(
|
||||
r#"eksctl create iamidentitymapping \
|
||||
--region "{region}"
|
||||
--cluster "{cluster}" \
|
||||
--arn "{lambda_role_arn}" \
|
||||
--username "{k8s_role_prefix}-binding" \
|
||||
--group "{k8s_role_prefix}-group"
|
||||
"#
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
27
lambda/pod_info_dumper/Cargo.toml
Normal file
27
lambda/pod_info_dumper/Cargo.toml
Normal file
@@ -0,0 +1,27 @@
|
||||
[package]
|
||||
name = "pod_info_dumper"
|
||||
version = "0.0.0"
|
||||
edition = "2024"
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
aws_lambda_events = { version = "0.16.0", default-features = false, features = ["eventbridge"] }
|
||||
aws-config = { workspace = true }
|
||||
aws-sdk-eks = "1.75.0"
|
||||
aws-sdk-s3 = { workspace = true }
|
||||
aws-sdk-sts = "1.65.0"
|
||||
aws-sigv4 = "1.3.0"
|
||||
base64 = { version = "0.22.1" }
|
||||
csv = { version = "1.3.1", default-features = false }
|
||||
http = { workspace = true }
|
||||
k8s-openapi = { version = "0.24.0", default-features = false, features = ["v1_31"] }
|
||||
kube = { version = "0.99.0", default-features = false, features = ["client", "rustls-tls"] }
|
||||
lambda_runtime = { version = "0.13.0", default-features = false, features = ["tracing"] }
|
||||
rustls = { version = "0.23.25" }
|
||||
rustls-pemfile = { workspace = true }
|
||||
secrecy = "0.10.3"
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
sha2 = { workspace = true, features = ["asm"] }
|
||||
tokio = { workspace = true, features = ["macros"] }
|
||||
tracing = { workspace = true, features = ["max_level_debug", "release_max_level_info"] }
|
||||
8
lambda/pod_info_dumper/README.md
Normal file
8
lambda/pod_info_dumper/README.md
Normal file
@@ -0,0 +1,8 @@
|
||||
# pod_info_dumper
|
||||
|
||||
An event-triggered AWS lambda function that writes the list of all pods with
|
||||
node information to a CSV file in S3.
|
||||
|
||||
```shell
|
||||
cargo lambda build -p pod_info_dumper --output-format Zip --x86-64 --profile release-lambda-function
|
||||
```
|
||||
420
lambda/pod_info_dumper/src/lib.rs
Normal file
420
lambda/pod_info_dumper/src/lib.rs
Normal file
@@ -0,0 +1,420 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::time::{Duration, SystemTime};
|
||||
use std::{env, io};
|
||||
|
||||
use aws_config::default_provider::credentials::DefaultCredentialsChain;
|
||||
use aws_config::retry::RetryConfig;
|
||||
use aws_lambda_events::event::eventbridge::EventBridgeEvent;
|
||||
use aws_sdk_s3::primitives::{ByteStream, SdkBody};
|
||||
use aws_sdk_s3::types::ChecksumAlgorithm;
|
||||
use aws_sdk_sts::config::ProvideCredentials;
|
||||
use aws_sigv4::http_request::{
|
||||
SignableBody, SignableRequest, SignatureLocation, SigningSettings, sign,
|
||||
};
|
||||
use aws_sigv4::sign::v4;
|
||||
use base64::Engine as _;
|
||||
use base64::engine::general_purpose::STANDARD;
|
||||
use base64::prelude::*;
|
||||
use k8s_openapi::api::core::v1::{Node, Pod};
|
||||
use k8s_openapi::chrono::SecondsFormat;
|
||||
use kube::api::{Api, ListParams, ResourceExt};
|
||||
use lambda_runtime::{Error, LambdaEvent, run, service_fn, tracing};
|
||||
use secrecy::SecretString;
|
||||
use serde::ser::SerializeMap;
|
||||
use sha2::{Digest as _, Sha256};
|
||||
|
||||
const AZ_LABEL: &str = "topology.kubernetes.io/zone";
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Config {
|
||||
aws_account_id: String,
|
||||
s3_bucket: S3BucketConfig,
|
||||
eks_cluster: EksClusterConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct S3BucketConfig {
|
||||
region: String,
|
||||
name: String,
|
||||
key: String,
|
||||
}
|
||||
|
||||
impl S3BucketConfig {
|
||||
#[tracing::instrument(skip_all, err)]
|
||||
async fn create_sdk_config(&self) -> Result<aws_config::SdkConfig, Error> {
|
||||
let region = aws_config::Region::new(self.region.clone());
|
||||
|
||||
let credentials_provider = DefaultCredentialsChain::builder()
|
||||
.region(region.clone())
|
||||
.build()
|
||||
.await;
|
||||
|
||||
Ok(aws_config::defaults(aws_config::BehaviorVersion::latest())
|
||||
.region(region)
|
||||
.credentials_provider(credentials_provider)
|
||||
.load()
|
||||
.await)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct EksClusterConfig {
|
||||
region: String,
|
||||
name: String,
|
||||
}
|
||||
|
||||
impl EksClusterConfig {
|
||||
#[tracing::instrument(skip_all, err)]
|
||||
async fn create_sdk_config(&self) -> Result<aws_config::SdkConfig, Error> {
|
||||
let region = aws_config::Region::new(self.region.clone());
|
||||
|
||||
let credentials_provider = DefaultCredentialsChain::builder()
|
||||
.region(region.clone())
|
||||
.build()
|
||||
.await;
|
||||
|
||||
Ok(aws_config::defaults(aws_config::BehaviorVersion::latest())
|
||||
.region(region)
|
||||
.credentials_provider(credentials_provider)
|
||||
.load()
|
||||
.await)
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
pub async fn start() -> Result<(), Error> {
|
||||
tracing::init_default_subscriber();
|
||||
rustls::crypto::aws_lc_rs::default_provider()
|
||||
.install_default()
|
||||
.unwrap();
|
||||
|
||||
tracing::info!("function handler started");
|
||||
|
||||
let config = Config {
|
||||
aws_account_id: env::var("NEON_ACCOUNT_ID")?,
|
||||
s3_bucket: S3BucketConfig {
|
||||
region: env::var("NEON_REGION")?,
|
||||
name: env::var("NEON_S3_BUCKET_NAME")?,
|
||||
key: env::var("NEON_S3_BUCKET_KEY")?,
|
||||
},
|
||||
eks_cluster: EksClusterConfig {
|
||||
region: env::var("NEON_REGION")?,
|
||||
name: env::var("NEON_CLUSTER")?,
|
||||
},
|
||||
};
|
||||
|
||||
run(service_fn(async |event: LambdaEvent<EventBridgeEvent<serde_json::Value>>| -> Result<StatusResponse, Error> {
|
||||
function_handler(event, &config).await
|
||||
}))
|
||||
.await
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
struct StatusResponse {
|
||||
status_code: http::StatusCode,
|
||||
body: Cow<'static, str>,
|
||||
}
|
||||
|
||||
impl StatusResponse {
|
||||
fn ok() -> Self {
|
||||
StatusResponse {
|
||||
status_code: http::StatusCode::OK,
|
||||
body: "OK".into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl serde::Serialize for StatusResponse {
|
||||
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
|
||||
let mut serializer = serializer.serialize_map(None)?;
|
||||
serializer.serialize_entry("statusCode", &self.status_code.as_u16())?;
|
||||
serializer.serialize_entry("body", &self.body)?;
|
||||
serializer.end()
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all, fields(?event), err)]
|
||||
async fn function_handler(
|
||||
event: LambdaEvent<EventBridgeEvent<serde_json::Value>>,
|
||||
config: &Config,
|
||||
) -> Result<StatusResponse, Error> {
|
||||
tracing::info!("function handler called");
|
||||
|
||||
let kube_client = connect_to_cluster(config).await?;
|
||||
let s3_client = connect_to_s3(config).await?;
|
||||
|
||||
let nodes_azs = get_nodes_azs(kube_client.clone()).await?;
|
||||
|
||||
let mut pods_info = get_current_pods(kube_client.clone(), &nodes_azs).await?;
|
||||
pods_info.sort_unstable();
|
||||
|
||||
let mut csv = Vec::with_capacity(64 * 1024);
|
||||
write_csv(&pods_info, &mut csv)?;
|
||||
|
||||
tracing::info!(
|
||||
"csv is {} bytes, containing {} pods",
|
||||
csv.len(),
|
||||
pods_info.len()
|
||||
);
|
||||
|
||||
upload_csv(config, &s3_client, &csv).await?;
|
||||
|
||||
tracing::info!("pod info successfully stored");
|
||||
Ok(StatusResponse::ok())
|
||||
}
|
||||
|
||||
#[derive(Debug, serde::Serialize, PartialEq, Eq, PartialOrd, Ord)]
|
||||
struct PodInfo<'a> {
|
||||
namespace: String,
|
||||
name: String,
|
||||
ip: String,
|
||||
creation_time: String,
|
||||
node: String,
|
||||
az: Option<&'a str>,
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all, err)]
|
||||
async fn connect_to_cluster(config: &Config) -> Result<kube::Client, Error> {
|
||||
let sdk_config = config.eks_cluster.create_sdk_config().await?;
|
||||
let eks_client = aws_sdk_eks::Client::new(&sdk_config);
|
||||
|
||||
let resp = eks_client
|
||||
.describe_cluster()
|
||||
.name(&config.eks_cluster.name)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
let cluster = resp
|
||||
.cluster()
|
||||
.ok_or_else(|| format!("cluster not found: {}", config.eks_cluster.name))?;
|
||||
let endpoint = cluster.endpoint().ok_or("cluster endpoint not found")?;
|
||||
let ca_data = cluster
|
||||
.certificate_authority()
|
||||
.and_then(|ca| ca.data())
|
||||
.ok_or("cluster certificate data not found")?;
|
||||
|
||||
let mut k8s_config = kube::Config::new(endpoint.parse()?);
|
||||
let cert_bytes = STANDARD.decode(ca_data)?;
|
||||
let certs = rustls_pemfile::certs(&mut cert_bytes.as_slice())
|
||||
.map(|c| c.map(|c| c.to_vec()))
|
||||
.collect::<Result<_, _>>()?;
|
||||
k8s_config.root_cert = Some(certs);
|
||||
k8s_config.auth_info.token = Some(
|
||||
create_kube_auth_token(
|
||||
&sdk_config,
|
||||
&config.eks_cluster.name,
|
||||
Duration::from_secs(10 * 60),
|
||||
)
|
||||
.await?,
|
||||
);
|
||||
|
||||
tracing::info!("cluster description completed");
|
||||
|
||||
Ok(kube::Client::try_from(k8s_config)?)
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all, err)]
|
||||
async fn create_kube_auth_token(
|
||||
sdk_config: &aws_config::SdkConfig,
|
||||
cluster_name: &str,
|
||||
expires_in: Duration,
|
||||
) -> Result<SecretString, Error> {
|
||||
let identity = sdk_config
|
||||
.credentials_provider()
|
||||
.unwrap()
|
||||
.provide_credentials()
|
||||
.await?
|
||||
.into();
|
||||
|
||||
let region = sdk_config.region().expect("region").as_ref();
|
||||
let host = format!("sts.{region}.amazonaws.com");
|
||||
let get_caller_id_url = format!("https://{host}/?Action=GetCallerIdentity&Version=2011-06-15");
|
||||
|
||||
let mut signing_settings = SigningSettings::default();
|
||||
signing_settings.signature_location = SignatureLocation::QueryParams;
|
||||
signing_settings.expires_in = Some(expires_in);
|
||||
let signing_params = v4::SigningParams::builder()
|
||||
.identity(&identity)
|
||||
.region(region)
|
||||
.name("sts")
|
||||
.time(SystemTime::now())
|
||||
.settings(signing_settings)
|
||||
.build()?
|
||||
.into();
|
||||
let signable_request = SignableRequest::new(
|
||||
"GET",
|
||||
&get_caller_id_url,
|
||||
[("host", host.as_str()), ("x-k8s-aws-id", cluster_name)].into_iter(),
|
||||
SignableBody::Bytes(&[]),
|
||||
)?;
|
||||
let (signing_instructions, _signature) = sign(signable_request, &signing_params)?.into_parts();
|
||||
|
||||
let mut token_request = http::Request::get(get_caller_id_url).body(()).unwrap();
|
||||
signing_instructions.apply_to_request_http1x(&mut token_request);
|
||||
|
||||
let token = format!(
|
||||
"k8s-aws-v1.{}",
|
||||
BASE64_STANDARD_NO_PAD.encode(token_request.uri().to_string())
|
||||
)
|
||||
.into();
|
||||
|
||||
Ok(token)
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all, err)]
|
||||
async fn connect_to_s3(config: &Config) -> Result<aws_sdk_s3::Client, Error> {
|
||||
let sdk_config = config.s3_bucket.create_sdk_config().await?;
|
||||
|
||||
let s3_client = aws_sdk_s3::Client::from_conf(
|
||||
aws_sdk_s3::config::Builder::from(&sdk_config)
|
||||
.retry_config(RetryConfig::standard())
|
||||
.build(),
|
||||
);
|
||||
|
||||
Ok(s3_client)
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all, err)]
|
||||
async fn get_nodes_azs(client: kube::Client) -> Result<HashMap<String, String>, Error> {
|
||||
let nodes = Api::<Node>::all(client);
|
||||
|
||||
let list_params = ListParams::default().timeout(10);
|
||||
|
||||
let mut nodes_azs = HashMap::default();
|
||||
for node in nodes.list(&list_params).await? {
|
||||
let Some(name) = node.metadata.name else {
|
||||
tracing::warn!("pod without name");
|
||||
continue;
|
||||
};
|
||||
let Some(mut labels) = node.metadata.labels else {
|
||||
tracing::warn!(name, "pod without labels");
|
||||
continue;
|
||||
};
|
||||
let Some(az) = labels.remove(AZ_LABEL) else {
|
||||
tracing::warn!(name, "pod without AZ label");
|
||||
continue;
|
||||
};
|
||||
|
||||
tracing::debug!(name, az, "adding node");
|
||||
nodes_azs.insert(name, az);
|
||||
}
|
||||
|
||||
Ok(nodes_azs)
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all, err)]
|
||||
async fn get_current_pods(
|
||||
client: kube::Client,
|
||||
node_az: &HashMap<String, String>,
|
||||
) -> Result<Vec<PodInfo<'_>>, Error> {
|
||||
let pods = Api::<Pod>::all(client);
|
||||
|
||||
let mut pods_info = vec![];
|
||||
let mut continuation_token = Some(String::new());
|
||||
|
||||
while let Some(token) = continuation_token {
|
||||
let list_params = ListParams::default()
|
||||
.timeout(10)
|
||||
.limit(500)
|
||||
.continue_token(&token);
|
||||
|
||||
let list = pods.list(&list_params).await?;
|
||||
continuation_token = list.metadata.continue_;
|
||||
|
||||
tracing::info!("received list of {} pods", list.items.len());
|
||||
|
||||
for pod in list.items {
|
||||
let name = pod.name_any();
|
||||
let Some(namespace) = pod.namespace() else {
|
||||
tracing::warn!(name, "pod without namespace");
|
||||
continue;
|
||||
};
|
||||
|
||||
let Some(status) = pod.status else {
|
||||
tracing::warn!(namespace, name, "pod without status");
|
||||
continue;
|
||||
};
|
||||
let Some(conditions) = status.conditions else {
|
||||
tracing::warn!(namespace, name, "pod without conditions");
|
||||
continue;
|
||||
};
|
||||
let Some(ready_condition) = conditions.iter().find(|cond| cond.type_ == "Ready") else {
|
||||
tracing::debug!(namespace, name, "pod not ready");
|
||||
continue;
|
||||
};
|
||||
let Some(ref ready_time) = ready_condition.last_transition_time else {
|
||||
tracing::warn!(
|
||||
namespace,
|
||||
name,
|
||||
"pod ready condition without transition time"
|
||||
);
|
||||
continue;
|
||||
};
|
||||
|
||||
let Some(spec) = pod.spec else {
|
||||
tracing::warn!(namespace, name, "pod without spec");
|
||||
continue;
|
||||
};
|
||||
let Some(node) = spec.node_name else {
|
||||
tracing::warn!(namespace, name, "pod without node");
|
||||
continue;
|
||||
};
|
||||
let Some(ip) = status.pod_ip else {
|
||||
tracing::warn!(namespace, name, "pod without IP");
|
||||
continue;
|
||||
};
|
||||
let az = node_az.get(&node).map(String::as_str);
|
||||
let creation_time = ready_time.0.to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||
|
||||
let pod_info = PodInfo {
|
||||
namespace,
|
||||
name,
|
||||
ip,
|
||||
creation_time,
|
||||
node,
|
||||
az,
|
||||
};
|
||||
tracing::debug!(?pod_info, "adding pod");
|
||||
|
||||
pods_info.push(pod_info);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(pods_info)
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all, err)]
|
||||
fn write_csv<W: io::Write>(pods_info: &Vec<PodInfo>, writer: W) -> Result<(), Error> {
|
||||
let mut w = csv::Writer::from_writer(writer);
|
||||
for pod in pods_info {
|
||||
w.serialize(pod)?;
|
||||
}
|
||||
w.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all, err)]
|
||||
async fn upload_csv(
|
||||
config: &Config,
|
||||
s3_client: &aws_sdk_s3::Client,
|
||||
csv: &[u8],
|
||||
) -> Result<aws_sdk_s3::operation::put_object::PutObjectOutput, Error> {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(csv);
|
||||
let csum = hasher.finalize();
|
||||
|
||||
let resp = s3_client
|
||||
.put_object()
|
||||
.bucket(&config.s3_bucket.name)
|
||||
.key(&config.s3_bucket.key)
|
||||
.content_type("text/csv")
|
||||
.checksum_algorithm(ChecksumAlgorithm::Sha256)
|
||||
.checksum_sha256(STANDARD.encode(csum))
|
||||
.body(ByteStream::from(SdkBody::from(csv)))
|
||||
.expected_bucket_owner(&config.aws_account_id)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
Ok(resp)
|
||||
}
|
||||
3
lambda/pod_info_dumper/src/main.rs
Normal file
3
lambda/pod_info_dumper/src/main.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
fn main() -> Result<(), lambda_runtime::Error> {
|
||||
pod_info_dumper::start()
|
||||
}
|
||||
@@ -192,7 +192,7 @@ pub enum GetVectoredConcurrentIo {
|
||||
SidecarTask,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct Ratio {
|
||||
pub numerator: usize,
|
||||
pub denominator: usize,
|
||||
@@ -416,6 +416,9 @@ pub struct TenantConfigToml {
|
||||
/// The ratio that triggers the auto gc-compaction. If (the total size of layers between L2 LSN and gc-horizon) / (size below the L2 LSN)
|
||||
/// is above this ratio, gc-compaction will be triggered.
|
||||
pub gc_compaction_ratio_percent: u64,
|
||||
/// Tenant level performance sampling ratio override. Controls the ratio of get page requests
|
||||
/// that will get perf sampling for the tenant.
|
||||
pub sampling_ratio: Option<Ratio>,
|
||||
}
|
||||
|
||||
pub mod defaults {
|
||||
@@ -702,6 +705,7 @@ impl Default for TenantConfigToml {
|
||||
gc_compaction_enabled: DEFAULT_GC_COMPACTION_ENABLED,
|
||||
gc_compaction_initial_threshold_kb: DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB,
|
||||
gc_compaction_ratio_percent: DEFAULT_GC_COMPACTION_RATIO_PERCENT,
|
||||
sampling_ratio: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ use utils::lsn::Lsn;
|
||||
use utils::postgres_client::PostgresClientProtocol;
|
||||
use utils::{completion, serde_system_time};
|
||||
|
||||
use crate::config::Ratio;
|
||||
use crate::key::{CompactKey, Key};
|
||||
use crate::reltag::RelTag;
|
||||
use crate::shard::{ShardCount, ShardStripeSize, TenantShardId};
|
||||
@@ -568,6 +569,8 @@ pub struct TenantConfigPatch {
|
||||
pub gc_compaction_initial_threshold_kb: FieldPatch<u64>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub gc_compaction_ratio_percent: FieldPatch<u64>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub sampling_ratio: FieldPatch<Option<Ratio>>,
|
||||
}
|
||||
|
||||
/// Like [`crate::config::TenantConfigToml`], but preserves the information
|
||||
@@ -688,6 +691,9 @@ pub struct TenantConfig {
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub gc_compaction_ratio_percent: Option<u64>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub sampling_ratio: Option<Option<Ratio>>,
|
||||
}
|
||||
|
||||
impl TenantConfig {
|
||||
@@ -730,6 +736,7 @@ impl TenantConfig {
|
||||
mut gc_compaction_enabled,
|
||||
mut gc_compaction_initial_threshold_kb,
|
||||
mut gc_compaction_ratio_percent,
|
||||
mut sampling_ratio,
|
||||
} = self;
|
||||
|
||||
patch.checkpoint_distance.apply(&mut checkpoint_distance);
|
||||
@@ -824,6 +831,7 @@ impl TenantConfig {
|
||||
patch
|
||||
.gc_compaction_ratio_percent
|
||||
.apply(&mut gc_compaction_ratio_percent);
|
||||
patch.sampling_ratio.apply(&mut sampling_ratio);
|
||||
|
||||
Ok(Self {
|
||||
checkpoint_distance,
|
||||
@@ -860,6 +868,7 @@ impl TenantConfig {
|
||||
gc_compaction_enabled,
|
||||
gc_compaction_initial_threshold_kb,
|
||||
gc_compaction_ratio_percent,
|
||||
sampling_ratio,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -961,6 +970,7 @@ impl TenantConfig {
|
||||
gc_compaction_ratio_percent: self
|
||||
.gc_compaction_ratio_percent
|
||||
.unwrap_or(global_conf.gc_compaction_ratio_percent),
|
||||
sampling_ratio: self.sampling_ratio.unwrap_or(global_conf.sampling_ratio),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ use core::{
|
||||
task::{Context, Poll},
|
||||
};
|
||||
use pin_project_lite::pin_project;
|
||||
use tracing::{Dispatch, field, span::Span};
|
||||
use tracing::{Dispatch, span::Span};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PerfSpan {
|
||||
@@ -49,15 +49,6 @@ impl PerfSpan {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn record<Q: field::AsField + ?Sized, V: field::Value>(
|
||||
&self,
|
||||
field: &Q,
|
||||
value: V,
|
||||
) -> &Self {
|
||||
self.inner.record(field, value);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn enter(&self) -> PerfSpanEntered {
|
||||
if let Some(ref id) = self.inner.id() {
|
||||
self.dispatch.enter(id);
|
||||
|
||||
@@ -572,19 +572,6 @@ impl RequestContext {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn perf_span_record<
|
||||
Q: tracing::field::AsField + ?Sized,
|
||||
V: tracing::field::Value,
|
||||
>(
|
||||
&self,
|
||||
field: &Q,
|
||||
value: V,
|
||||
) {
|
||||
if let Some(span) = &self.perf_span {
|
||||
span.record(field, value);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn has_perf_span(&self) -> bool {
|
||||
self.perf_span.is_some()
|
||||
}
|
||||
|
||||
@@ -1248,13 +1248,13 @@ pub(crate) static STORAGE_IO_TIME_METRIC: Lazy<StorageIoTime> = Lazy::new(Storag
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
#[repr(usize)]
|
||||
enum StorageIoSizeOperation {
|
||||
pub(crate) enum StorageIoSizeOperation {
|
||||
Read,
|
||||
Write,
|
||||
}
|
||||
|
||||
impl StorageIoSizeOperation {
|
||||
const VARIANTS: &'static [&'static str] = &["read", "write"];
|
||||
pub(crate) const VARIANTS: &'static [&'static str] = &["read", "write"];
|
||||
|
||||
fn as_str(&self) -> &'static str {
|
||||
Self::VARIANTS[*self as usize]
|
||||
@@ -1262,7 +1262,7 @@ impl StorageIoSizeOperation {
|
||||
}
|
||||
|
||||
// Needed for the https://neonprod.grafana.net/d/5uK9tHL4k/picking-tenant-for-relocation?orgId=1
|
||||
static STORAGE_IO_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
pub(crate) static STORAGE_IO_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
register_uint_gauge_vec!(
|
||||
"pageserver_io_operations_bytes_total",
|
||||
"Total amount of bytes read/written in IO operations",
|
||||
|
||||
@@ -18,7 +18,7 @@ use itertools::Itertools;
|
||||
use once_cell::sync::OnceCell;
|
||||
use pageserver_api::config::{
|
||||
PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
|
||||
PageServiceProtocolPipelinedExecutionStrategy, Tracing,
|
||||
PageServiceProtocolPipelinedExecutionStrategy,
|
||||
};
|
||||
use pageserver_api::key::rel_block_to_key;
|
||||
use pageserver_api::models::{
|
||||
@@ -37,7 +37,6 @@ use postgres_ffi::BLCKSZ;
|
||||
use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
|
||||
use pq_proto::framed::ConnectionError;
|
||||
use pq_proto::{BeMessage, FeMessage, FeStartupPacket, RowDescriptor};
|
||||
use rand::Rng;
|
||||
use strum_macros::IntoStaticStr;
|
||||
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, BufWriter};
|
||||
use tokio::task::JoinHandle;
|
||||
@@ -755,7 +754,6 @@ impl PageServerHandler {
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
timeline_handles: &mut TimelineHandles,
|
||||
tracing_config: Option<&Tracing>,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
@@ -916,47 +914,8 @@ impl PageServerHandler {
|
||||
|
||||
let key = rel_block_to_key(req.rel, req.blkno);
|
||||
|
||||
let sampled = match tracing_config {
|
||||
Some(conf) => {
|
||||
let ratio = &conf.sampling_ratio;
|
||||
|
||||
if ratio.numerator == 0 {
|
||||
false
|
||||
} else {
|
||||
rand::thread_rng().gen_range(0..ratio.denominator) < ratio.numerator
|
||||
}
|
||||
}
|
||||
None => false,
|
||||
};
|
||||
|
||||
let ctx = if sampled {
|
||||
RequestContextBuilder::from(ctx)
|
||||
.root_perf_span(|| {
|
||||
info_span!(
|
||||
target: PERF_TRACE_TARGET,
|
||||
"GET_PAGE",
|
||||
tenant_id = %tenant_id,
|
||||
shard_id = field::Empty,
|
||||
timeline_id = %timeline_id,
|
||||
lsn = %req.hdr.request_lsn,
|
||||
request_id = %req.hdr.reqid,
|
||||
key = %key,
|
||||
)
|
||||
})
|
||||
.attached_child()
|
||||
} else {
|
||||
ctx.attached_child()
|
||||
};
|
||||
|
||||
let res = timeline_handles
|
||||
.get(tenant_id, timeline_id, ShardSelector::Page(key))
|
||||
.maybe_perf_instrument(&ctx, |current_perf_span| {
|
||||
info_span!(
|
||||
target: PERF_TRACE_TARGET,
|
||||
parent: current_perf_span,
|
||||
"SHARD_SELECTION",
|
||||
)
|
||||
})
|
||||
.await;
|
||||
|
||||
let shard = match res {
|
||||
@@ -987,6 +946,25 @@ impl PageServerHandler {
|
||||
}
|
||||
};
|
||||
|
||||
let ctx = if shard.is_get_page_request_sampled() {
|
||||
RequestContextBuilder::from(ctx)
|
||||
.root_perf_span(|| {
|
||||
info_span!(
|
||||
target: PERF_TRACE_TARGET,
|
||||
"GET_PAGE",
|
||||
tenant_id = %tenant_id,
|
||||
shard_id = %shard.get_shard_identity().shard_slug(),
|
||||
timeline_id = %timeline_id,
|
||||
lsn = %req.hdr.request_lsn,
|
||||
request_id = %req.hdr.reqid,
|
||||
key = %key,
|
||||
)
|
||||
})
|
||||
.attached_child()
|
||||
} else {
|
||||
ctx.attached_child()
|
||||
};
|
||||
|
||||
// This ctx travels as part of the BatchedFeMessage through
|
||||
// batching into the request handler.
|
||||
// The request handler needs to do some per-request work
|
||||
@@ -1001,12 +979,6 @@ impl PageServerHandler {
|
||||
// request handler log messages contain the request-specific fields.
|
||||
let span = mkspan!(shard.tenant_shard_id.shard_slug());
|
||||
|
||||
// Enrich the perf span with shard_id now that shard routing is done.
|
||||
ctx.perf_span_record(
|
||||
"shard_id",
|
||||
tracing::field::display(shard.get_shard_identity().shard_slug()),
|
||||
);
|
||||
|
||||
let timer = record_op_start_and_throttle(
|
||||
&shard,
|
||||
metrics::SmgrQueryType::GetPageAtLsn,
|
||||
@@ -1602,7 +1574,6 @@ impl PageServerHandler {
|
||||
IO: AsyncRead + AsyncWrite + Send + Sync + Unpin + 'static,
|
||||
{
|
||||
let cancel = self.cancel.clone();
|
||||
let tracing_config = self.conf.tracing.clone();
|
||||
|
||||
let err = loop {
|
||||
let msg = Self::pagestream_read_message(
|
||||
@@ -1610,7 +1581,6 @@ impl PageServerHandler {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
&mut timeline_handles,
|
||||
tracing_config.as_ref(),
|
||||
&cancel,
|
||||
ctx,
|
||||
protocol_version,
|
||||
@@ -1744,8 +1714,6 @@ impl PageServerHandler {
|
||||
// Batcher
|
||||
//
|
||||
|
||||
let tracing_config = self.conf.tracing.clone();
|
||||
|
||||
let cancel_batcher = self.cancel.child_token();
|
||||
let (mut batch_tx, mut batch_rx) = spsc_fold::channel();
|
||||
let batcher = pipeline_stage!("batcher", cancel_batcher.clone(), move |cancel_batcher| {
|
||||
@@ -1759,7 +1727,6 @@ impl PageServerHandler {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
&mut timeline_handles,
|
||||
tracing_config.as_ref(),
|
||||
&cancel_batcher,
|
||||
&ctx,
|
||||
protocol_version,
|
||||
|
||||
@@ -167,10 +167,17 @@ impl SecondaryTenant {
|
||||
|
||||
self.validate_metrics();
|
||||
|
||||
// Metrics are subtracted from and/or removed eagerly.
|
||||
// Deletions are done in the background via [`BackgroundPurges::spawn`].
|
||||
let tenant_id = self.tenant_shard_id.tenant_id.to_string();
|
||||
let shard_id = format!("{}", self.tenant_shard_id.shard_slug());
|
||||
let _ = SECONDARY_RESIDENT_PHYSICAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);
|
||||
let _ = SECONDARY_HEATMAP_TOTAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);
|
||||
|
||||
self.detail
|
||||
.lock()
|
||||
.unwrap()
|
||||
.drain_timelines(&self.tenant_shard_id, &self.resident_size_metric);
|
||||
}
|
||||
|
||||
pub(crate) fn set_config(&self, config: &SecondaryLocationConfig) {
|
||||
|
||||
@@ -4,6 +4,7 @@ use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant, SystemTime};
|
||||
|
||||
use crate::metrics::{STORAGE_IO_SIZE, StorageIoSizeOperation};
|
||||
use camino::Utf8PathBuf;
|
||||
use chrono::format::{DelayedFormat, StrftimeItems};
|
||||
use futures::Future;
|
||||
@@ -124,15 +125,53 @@ impl OnDiskState {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub(super) struct SecondaryDetailTimeline {
|
||||
on_disk_layers: HashMap<LayerName, OnDiskState>,
|
||||
|
||||
/// We remember when layers were evicted, to prevent re-downloading them.
|
||||
pub(super) evicted_at: HashMap<LayerName, SystemTime>,
|
||||
|
||||
ctx: RequestContext,
|
||||
}
|
||||
|
||||
impl Clone for SecondaryDetailTimeline {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
on_disk_layers: self.on_disk_layers.clone(),
|
||||
evicted_at: self.evicted_at.clone(),
|
||||
// This is a bit awkward. The downloader code operates on a snapshot
|
||||
// of the secondary list to avoid locking it for extended periods of time.
|
||||
// No particularly strong reason to chose [`RequestContext::detached_child`],
|
||||
// but makes more sense than [`RequestContext::attached_child`].
|
||||
ctx: self
|
||||
.ctx
|
||||
.detached_child(self.ctx.task_kind(), self.ctx.download_behavior()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for SecondaryDetailTimeline {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("SecondaryDetailTimeline")
|
||||
.field("on_disk_layers", &self.on_disk_layers)
|
||||
.field("evicted_at", &self.evicted_at)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl SecondaryDetailTimeline {
|
||||
pub(super) fn empty(ctx: RequestContext) -> Self {
|
||||
SecondaryDetailTimeline {
|
||||
on_disk_layers: Default::default(),
|
||||
evicted_at: Default::default(),
|
||||
ctx,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn context(&self) -> &RequestContext {
|
||||
&self.ctx
|
||||
}
|
||||
|
||||
pub(super) fn remove_layer(
|
||||
&mut self,
|
||||
name: &LayerName,
|
||||
@@ -258,18 +297,50 @@ impl SecondaryDetail {
|
||||
|
||||
pub(super) fn remove_timeline(
|
||||
&mut self,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
resident_metric: &UIntGauge,
|
||||
) {
|
||||
let removed = self.timelines.remove(timeline_id);
|
||||
if let Some(removed) = removed {
|
||||
resident_metric.sub(
|
||||
removed
|
||||
.on_disk_layers
|
||||
.values()
|
||||
.map(|l| l.metadata.file_size)
|
||||
.sum(),
|
||||
);
|
||||
Self::clear_timeline_metrics(tenant_shard_id, timeline_id, removed, resident_metric);
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn drain_timelines(
|
||||
&mut self,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
resident_metric: &UIntGauge,
|
||||
) {
|
||||
for (timeline_id, removed) in self.timelines.drain() {
|
||||
Self::clear_timeline_metrics(tenant_shard_id, &timeline_id, removed, resident_metric);
|
||||
}
|
||||
}
|
||||
|
||||
fn clear_timeline_metrics(
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
detail: SecondaryDetailTimeline,
|
||||
resident_metric: &UIntGauge,
|
||||
) {
|
||||
resident_metric.sub(
|
||||
detail
|
||||
.on_disk_layers
|
||||
.values()
|
||||
.map(|l| l.metadata.file_size)
|
||||
.sum(),
|
||||
);
|
||||
|
||||
let shard_id = format!("{}", tenant_shard_id.shard_slug());
|
||||
let tenant_id = tenant_shard_id.tenant_id.to_string();
|
||||
let timeline_id = timeline_id.to_string();
|
||||
for op in StorageIoSizeOperation::VARIANTS {
|
||||
let _ = STORAGE_IO_SIZE.remove_label_values(&[
|
||||
op,
|
||||
tenant_id.as_str(),
|
||||
shard_id.as_str(),
|
||||
timeline_id.as_str(),
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -727,6 +798,7 @@ impl<'a> TenantDownloader<'a> {
|
||||
last_heatmap,
|
||||
timeline,
|
||||
&self.secondary_state.resident_size_metric,
|
||||
ctx,
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -774,7 +846,6 @@ impl<'a> TenantDownloader<'a> {
|
||||
|
||||
// Download the layers in the heatmap
|
||||
for timeline in heatmap.timelines {
|
||||
let ctx = &ctx.with_scope_secondary_timeline(tenant_shard_id, &timeline.timeline_id);
|
||||
let timeline_state = timeline_states
|
||||
.remove(&timeline.timeline_id)
|
||||
.expect("Just populated above");
|
||||
@@ -917,7 +988,11 @@ impl<'a> TenantDownloader<'a> {
|
||||
for delete_timeline in &delete_timelines {
|
||||
// We haven't removed from disk yet, but optimistically remove from in-memory state: if removal
|
||||
// from disk fails that will be a fatal error.
|
||||
detail.remove_timeline(delete_timeline, &self.secondary_state.resident_size_metric);
|
||||
detail.remove_timeline(
|
||||
self.secondary_state.get_tenant_shard_id(),
|
||||
delete_timeline,
|
||||
&self.secondary_state.resident_size_metric,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1013,7 +1088,6 @@ impl<'a> TenantDownloader<'a> {
|
||||
timeline: HeatMapTimeline,
|
||||
timeline_state: SecondaryDetailTimeline,
|
||||
deadline: Instant,
|
||||
ctx: &RequestContext,
|
||||
) -> (Result<(), UpdateError>, Vec<HeatMapLayer>) {
|
||||
// Accumulate updates to the state
|
||||
let mut touched = Vec::new();
|
||||
@@ -1044,7 +1118,12 @@ impl<'a> TenantDownloader<'a> {
|
||||
}
|
||||
|
||||
match self
|
||||
.download_layer(tenant_shard_id, &timeline_id, layer, ctx)
|
||||
.download_layer(
|
||||
tenant_shard_id,
|
||||
&timeline_id,
|
||||
layer,
|
||||
timeline_state.context(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Some(layer)) => touched.push(layer),
|
||||
@@ -1155,13 +1234,16 @@ impl<'a> TenantDownloader<'a> {
|
||||
tracing::debug!(timeline_id=%timeline_id, "Downloading layers, {} in heatmap", timeline.hot_layers().count());
|
||||
|
||||
let (result, touched) = self
|
||||
.download_timeline_layers(tenant_shard_id, timeline, timeline_state, deadline, ctx)
|
||||
.download_timeline_layers(tenant_shard_id, timeline, timeline_state, deadline)
|
||||
.await;
|
||||
|
||||
// Write updates to state to record layers we just downloaded or touched, irrespective of whether the overall result was successful
|
||||
{
|
||||
let mut detail = self.secondary_state.detail.lock().unwrap();
|
||||
let timeline_detail = detail.timelines.entry(timeline_id).or_default();
|
||||
let timeline_detail = detail.timelines.entry(timeline_id).or_insert_with(|| {
|
||||
let ctx = ctx.with_scope_secondary_timeline(tenant_shard_id, &timeline_id);
|
||||
SecondaryDetailTimeline::empty(ctx)
|
||||
});
|
||||
|
||||
tracing::info!("Wrote timeline_detail for {} touched layers", touched.len());
|
||||
touched.into_iter().for_each(|t| {
|
||||
@@ -1295,10 +1377,12 @@ async fn init_timeline_state(
|
||||
last_heatmap: Option<&HeatMapTimeline>,
|
||||
heatmap: &HeatMapTimeline,
|
||||
resident_metric: &UIntGauge,
|
||||
ctx: &RequestContext,
|
||||
) -> SecondaryDetailTimeline {
|
||||
let timeline_path = conf.timeline_path(tenant_shard_id, &heatmap.timeline_id);
|
||||
let mut detail = SecondaryDetailTimeline::default();
|
||||
let ctx = ctx.with_scope_secondary_timeline(tenant_shard_id, &heatmap.timeline_id);
|
||||
let mut detail = SecondaryDetailTimeline::empty(ctx);
|
||||
|
||||
let timeline_path = conf.timeline_path(tenant_shard_id, &heatmap.timeline_id);
|
||||
let mut dir = match tokio::fs::read_dir(&timeline_path).await {
|
||||
Ok(d) => d,
|
||||
Err(e) => {
|
||||
|
||||
@@ -2476,6 +2476,31 @@ impl Timeline {
|
||||
.unwrap_or(self.conf.default_tenant_conf.lazy_slru_download)
|
||||
}
|
||||
|
||||
/// Checks if a get page request should get perf tracing
|
||||
///
|
||||
/// The configuration priority is: tenant config override, default tenant config,
|
||||
/// pageserver config.
|
||||
pub(crate) fn is_get_page_request_sampled(&self) -> bool {
|
||||
let tenant_conf = self.tenant_conf.load();
|
||||
let ratio = tenant_conf
|
||||
.tenant_conf
|
||||
.sampling_ratio
|
||||
.flatten()
|
||||
.or(self.conf.default_tenant_conf.sampling_ratio)
|
||||
.or(self.conf.tracing.as_ref().map(|t| t.sampling_ratio));
|
||||
|
||||
match ratio {
|
||||
Some(r) => {
|
||||
if r.numerator == 0 {
|
||||
false
|
||||
} else {
|
||||
rand::thread_rng().gen_range(0..r.denominator) < r.numerator
|
||||
}
|
||||
}
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_checkpoint_distance(&self) -> u64 {
|
||||
let tenant_conf = self.tenant_conf.load();
|
||||
tenant_conf
|
||||
|
||||
@@ -190,6 +190,10 @@ def test_fully_custom_config(positive_env: NeonEnv):
|
||||
"gc_compaction_initial_threshold_kb": 1024000,
|
||||
"gc_compaction_ratio_percent": 200,
|
||||
"image_creation_preempt_threshold": 5,
|
||||
"sampling_ratio": {
|
||||
"numerator": 0,
|
||||
"denominator": 10,
|
||||
},
|
||||
}
|
||||
|
||||
vps_http = env.storage_controller.pageserver_api()
|
||||
|
||||
@@ -1099,3 +1099,70 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder):
|
||||
# Warm up the current secondary.
|
||||
ps_attached.http_client().tenant_secondary_download(tenant_id, wait_ms=100)
|
||||
wait_until(lambda: all_layers_downloaded(ps_secondary, expected_locally))
|
||||
|
||||
|
||||
@run_only_on_default_postgres("PG version is not interesting here")
|
||||
@pytest.mark.parametrize("action", ["delete_timeline", "detach"])
|
||||
def test_io_metrics_match_secondary_timeline_lifecycle(
|
||||
neon_env_builder: NeonEnvBuilder, action: str
|
||||
):
|
||||
"""
|
||||
Check that IO metrics for secondary timelines are de-registered when the timeline
|
||||
is removed
|
||||
"""
|
||||
neon_env_builder.num_pageservers = 2
|
||||
env = neon_env_builder.init_configs()
|
||||
env.start()
|
||||
|
||||
tenant_id = TenantId.generate()
|
||||
parent_timeline_id = TimelineId.generate()
|
||||
|
||||
# We do heatmap uploads and pulls manually
|
||||
tenant_conf = {"heatmap_period": "0s"}
|
||||
env.create_tenant(
|
||||
tenant_id, parent_timeline_id, conf=tenant_conf, placement_policy='{"Attached":1}'
|
||||
)
|
||||
|
||||
child_timeline_id = env.create_branch("foo", tenant_id)
|
||||
|
||||
attached_to_id = env.storage_controller.locate(tenant_id)[0]["node_id"]
|
||||
ps_attached = env.get_pageserver(attached_to_id)
|
||||
ps_secondary = next(p for p in env.pageservers if p != ps_attached)
|
||||
|
||||
ps_attached.http_client().tenant_heatmap_upload(tenant_id)
|
||||
status, _ = ps_secondary.http_client().tenant_secondary_download(tenant_id, wait_ms=5000)
|
||||
assert status == 200
|
||||
|
||||
labels = {
|
||||
"operation": "write",
|
||||
"tenant_id": str(tenant_id),
|
||||
"timeline_id": str(child_timeline_id),
|
||||
}
|
||||
bytes_written = (
|
||||
ps_secondary.http_client()
|
||||
.get_metrics()
|
||||
.query_one("pageserver_io_operations_bytes_total", labels)
|
||||
.value
|
||||
)
|
||||
|
||||
assert bytes_written == 0
|
||||
|
||||
if action == "delete_timeline":
|
||||
env.storage_controller.pageserver_api().timeline_delete(tenant_id, child_timeline_id)
|
||||
ps_attached.http_client().tenant_heatmap_upload(tenant_id)
|
||||
status, _ = ps_secondary.http_client().tenant_secondary_download(tenant_id, wait_ms=5000)
|
||||
assert status == 200
|
||||
elif action == "detach":
|
||||
env.storage_controller.tenant_policy_update(tenant_id, {"placement": {"Attached": 0}})
|
||||
env.storage_controller.reconcile_until_idle()
|
||||
else:
|
||||
raise Exception("Unexpected action")
|
||||
|
||||
assert (
|
||||
len(
|
||||
ps_secondary.http_client()
|
||||
.get_metrics()
|
||||
.query_all("pageserver_io_operations_bytes_total", labels)
|
||||
)
|
||||
== 0
|
||||
)
|
||||
|
||||
2
vendor/postgres-v14
vendored
2
vendor/postgres-v14
vendored
Submodule vendor/postgres-v14 updated: bce3e48d8a...8cca70c22e
2
vendor/postgres-v15
vendored
2
vendor/postgres-v15
vendored
Submodule vendor/postgres-v15 updated: 4ac24a747c...23708b3aca
2
vendor/postgres-v16
vendored
2
vendor/postgres-v16
vendored
Submodule vendor/postgres-v16 updated: 26c7d3f6de...746bd9ffe5
2
vendor/postgres-v17
vendored
2
vendor/postgres-v17
vendored
Submodule vendor/postgres-v17 updated: 7ec41bf6cd...c9e4ff5a38
8
vendor/revisions.json
vendored
8
vendor/revisions.json
vendored
@@ -1,18 +1,18 @@
|
||||
{
|
||||
"v17": [
|
||||
"17.4",
|
||||
"7ec41bf6cd92a4af751272145fdd590270c491da"
|
||||
"c9e4ff5a38907acd71107634055bf2609aba43a5"
|
||||
],
|
||||
"v16": [
|
||||
"16.8",
|
||||
"26c7d3f6de6f361c8923bb80d7563853b4a04958"
|
||||
"746bd9ffe5c29bce030eaea1031054057f3c5d45"
|
||||
],
|
||||
"v15": [
|
||||
"15.12",
|
||||
"4ac24a747cd897119ce9b20547b3b04eba2cacbd"
|
||||
"23708b3aca9adf163aa0973eb63d9afc0e4a04c3"
|
||||
],
|
||||
"v14": [
|
||||
"14.17",
|
||||
"bce3e48d8a72e70e72dfee1b7421fecd0f1b00ac"
|
||||
"8cca70c22e2894dd4645f9a940086ac437b0a11b"
|
||||
]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user