From de99ee2c0d4c2640309eb3e160e1a7f50141abf9 Mon Sep 17 00:00:00 2001 From: Stas Kelvich Date: Tue, 11 Apr 2023 12:50:10 +0300 Subject: [PATCH 01/77] Add more proxy cnames --- .../prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml | 2 +- .../helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml | 2 +- .github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml | 2 +- .github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml index 36dac8309d..5a98217bae 100644 --- a/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml +++ b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml @@ -24,7 +24,7 @@ settings: authBackend: "console" authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2" domain: "*.ap-southeast-1.aws.neon.tech" - extraDomains: ["*.ap-southeast-1.retooldb.com"] + extraDomains: ["*.ap-southeast-1.retooldb.com", "*.ap-southeast-1.postgres.vercel-storage.com"] sentryEnvironment: "production" wssPort: 8443 metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events" diff --git a/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml b/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml index f5b2f31cb9..a9ee49d82f 100644 --- a/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml +++ b/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml @@ -24,7 +24,7 @@ settings: authBackend: "console" authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2" domain: "*.eu-central-1.aws.neon.tech" - extraDomains: ["*.eu-central-1.retooldb.com"] + extraDomains: ["*.eu-central-1.retooldb.com", "*.eu-central-1.postgres.vercel-storage.com"] sentryEnvironment: "production" wssPort: 8443 metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events" diff --git a/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml b/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml index 0be78d868a..239a9911c7 100644 --- a/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml +++ b/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml @@ -24,7 +24,7 @@ settings: authBackend: "console" authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2" domain: "*.us-east-2.aws.neon.tech" - extraDomains: ["*.us-east-2.retooldb.com"] + extraDomains: ["*.us-east-2.retooldb.com", "*.us-east-2.postgres.vercel-storage.com"] sentryEnvironment: "production" wssPort: 8443 metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events" diff --git a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml index 79115be0e2..c987ae236a 100644 --- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml +++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml @@ -24,7 +24,7 @@ settings: authBackend: "console" authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2" domain: "*.us-west-2.aws.neon.tech" - extraDomains: ["*.us-west-2.retooldb.com"] + extraDomains: ["*.us-west-2.retooldb.com", "*.us-west-2.postgres.vercel-storage.com"] sentryEnvironment: "production" wssPort: 8443 metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events" From 40a68e907753b6813d00d8fd1266601c7e929132 Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Tue, 11 Apr 2023 15:05:35 +0200 Subject: [PATCH 02/77] [compute_ctl] Add timeout for `tracing_utils::shutdown_tracing()` (#3982) Shutting down OTEL tracing provider may hang for quite some time, see, for example: - https://github.com/open-telemetry/opentelemetry-rust/issues/868 - and our problems with staging https://github.com/neondatabase/cloud/issues/3707#issuecomment-1493983636 Yet, we want computes to shut down fast enough, as we may need a new one for the same timeline ASAP. So wait no longer than 2s for the shutdown to complete, then just error out and exit the main thread. Related to neondatabase/cloud#3707 --- compute_tools/src/bin/compute_ctl.rs | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs index d61eae5f7a..bce860b56b 100644 --- a/compute_tools/src/bin/compute_ctl.rs +++ b/compute_tools/src/bin/compute_ctl.rs @@ -34,7 +34,7 @@ use std::fs::File; use std::panic; use std::path::Path; use std::process::exit; -use std::sync::{Arc, Condvar, Mutex}; +use std::sync::{mpsc, Arc, Condvar, Mutex}; use std::{thread, time::Duration}; use anyhow::{Context, Result}; @@ -239,10 +239,25 @@ fn main() -> Result<()> { thread::sleep(Duration::from_secs(30)); } - info!("shutting down tracing"); // Shutdown trace pipeline gracefully, so that it has a chance to send any - // pending traces before we exit. - tracing_utils::shutdown_tracing(); + // pending traces before we exit. Shutting down OTEL tracing provider may + // hang for quite some time, see, for example: + // - https://github.com/open-telemetry/opentelemetry-rust/issues/868 + // - and our problems with staging https://github.com/neondatabase/cloud/issues/3707#issuecomment-1493983636 + // + // Yet, we want computes to shut down fast enough, as we may need a new one + // for the same timeline ASAP. So wait no longer than 2s for the shutdown to + // complete, then just error out and exit the main thread. + info!("shutting down tracing"); + let (sender, receiver) = mpsc::channel(); + let _ = thread::spawn(move || { + tracing_utils::shutdown_tracing(); + sender.send(()).ok() + }); + let shutdown_res = receiver.recv_timeout(Duration::from_millis(2000)); + if shutdown_res.is_err() { + error!("timed out while shutting down tracing, exiting anyway"); + } info!("shutting down"); exit(exit_code.unwrap_or(1)) From 3c9f42a2e21a4bbfd4c669c5b7b52fad454ffae4 Mon Sep 17 00:00:00 2001 From: Stas Kelvich Date: Tue, 11 Apr 2023 22:28:18 +0300 Subject: [PATCH 03/77] Support aarch64 in walredo seccomp code (#3996) Aarch64 doesn't implement some old syscalls like open and select. Use openat instead of open to check if seccomp is supported. Leave both select and pselect6 in the allowlist since we don't call select syscall directly and may hope that libc will call pselect6 on aarch64. To check whether some syscall is supported it is possible to use `scmp_sys_resolver` from seccopm package: ``` > apt install seccopm > scmp_sys_resolver -a x86_64 select 23 > scmp_sys_resolver -a aarch64 select -10101 > scmp_sys_resolver -a aarch64 pselect6 72 ``` Negative value means that syscall is not supported. Another cross-check is to look up for the actuall syscall table in `unistd.h`. To resolve all the macroses one can use `gcc -E` as it is done in `dump_sys_aarch64()` function in libseccomp/src/arch-syscall-validate. --------- Co-authored-by: Heikki Linnakangas --- pgxn/neon_walredo/seccomp.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/pgxn/neon_walredo/seccomp.c b/pgxn/neon_walredo/seccomp.c index 5d5ba549ef..1e8f6682a2 100644 --- a/pgxn/neon_walredo/seccomp.c +++ b/pgxn/neon_walredo/seccomp.c @@ -9,6 +9,14 @@ * To prevent this, it has been decided to limit possible interactions * with the outside world using the Secure Computing BPF mode. * + * This code is intended to support both x86_64 and aarch64. The latter + * doesn't implement some syscalls like open and select. We allow both + * select (absent on aarch64) and pselect6 (present on both architectures) + * We call select(2) through libc, and the libc wrapper calls select or pselect6 + * depending on the architecture. You can check which syscalls are present on + * different architectures with the `scmp_sys_resolver` tool from the + * seccomp package. + * * We use this mode to disable all syscalls not in the allowlist. This * approach has its pros & cons: * @@ -73,8 +81,6 @@ * I suspect that certain libc functions might involve slightly * different syscalls, e.g. select/pselect6/pselect6_time64/whatever. * - * - Test on any arch other than amd64 to see if it works there. - * *------------------------------------------------------------------------- */ @@ -122,9 +128,10 @@ seccomp_load_rules(PgSeccompRule *rules, int count) /* * First, check that open of a well-known file works. - * XXX: We use raw syscall() to call the very open(). + * XXX: We use raw syscall() to call the very openat() which is + * present both on x86_64 and on aarch64. */ - fd = syscall(SCMP_SYS(open), "/dev/null", O_RDONLY, 0); + fd = syscall(SCMP_SYS(openat), AT_FDCWD, "/dev/null", O_RDONLY, 0); if (seccomp_test_sighandler_done) ereport(FATAL, (errcode(ERRCODE_SYSTEM_ERROR), @@ -135,15 +142,15 @@ seccomp_load_rules(PgSeccompRule *rules, int count) errmsg("seccomp: could not open /dev/null for seccomp testing: %m"))); close((int) fd); - /* Set a trap on open() to test seccomp bpf */ - rule = PG_SCMP(open, SCMP_ACT_TRAP); + /* Set a trap on openat() to test seccomp bpf */ + rule = PG_SCMP(openat, SCMP_ACT_TRAP); if (do_seccomp_load_rules(&rule, 1, SCMP_ACT_ALLOW) != 0) ereport(FATAL, (errcode(ERRCODE_SYSTEM_ERROR), errmsg("seccomp: could not load test trap"))); - /* Finally, check that open() now raises SIGSYS */ - (void) syscall(SCMP_SYS(open), "/dev/null", O_RDONLY, 0); + /* Finally, check that openat() now raises SIGSYS */ + (void) syscall(SCMP_SYS(openat), AT_FDCWD, "/dev/null", O_RDONLY, 0); if (!seccomp_test_sighandler_done) ereport(FATAL, (errcode(ERRCODE_SYSTEM_ERROR), @@ -224,7 +231,7 @@ seccomp_test_sighandler(int signum, siginfo_t *info, void *cxt pg_attribute_unus die(1, DIE_PREFIX "bad signal number\n"); /* TODO: maybe somehow extract the hardcoded syscall number */ - if (info->si_syscall != SCMP_SYS(open)) + if (info->si_syscall != SCMP_SYS(openat)) die(1, DIE_PREFIX "bad syscall number\n"); #undef DIE_PREFIX From 6064a26963ec7811989da0f944904622997d964f Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 12 Apr 2023 01:13:17 +0300 Subject: [PATCH 04/77] Refactor 'spec' in ComputeState. Sometimes, it contained real values, sometimes just defaults if the spec was not received yet. Make the state more clear by making it an Option instead. One consequence is that if some of the required settings like neon.tenant_id are missing from the spec file sent to the /configure endpoint, it is spotted earlier and you get an immediate HTTP error response. Not that it matters very much, but it's nicer nevertheless. --- compute_tools/src/bin/compute_ctl.rs | 53 ++++---------- compute_tools/src/compute.rs | 101 +++++++++++++++++---------- compute_tools/src/http/api.rs | 14 ++-- libs/compute_api/src/responses.rs | 4 +- 4 files changed, 91 insertions(+), 81 deletions(-) diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs index bce860b56b..633e603f6b 100644 --- a/compute_tools/src/bin/compute_ctl.rs +++ b/compute_tools/src/bin/compute_ctl.rs @@ -45,12 +45,11 @@ use url::Url; use compute_api::responses::ComputeStatus; -use compute_tools::compute::{ComputeNode, ComputeState}; +use compute_tools::compute::{ComputeNode, ComputeState, ParsedSpec}; use compute_tools::http::api::launch_http_server; use compute_tools::logger::*; use compute_tools::monitor::launch_monitor; use compute_tools::params::*; -use compute_tools::pg_helpers::*; use compute_tools::spec::*; fn main() -> Result<()> { @@ -73,28 +72,24 @@ fn main() -> Result<()> { // Try to use just 'postgres' if no path is provided let pgbin = matches.get_one::("pgbin").unwrap(); - let mut spec = Default::default(); - let mut spec_set = false; + let mut spec = None; let mut live_config_allowed = false; match spec_json { // First, try to get cluster spec from the cli argument Some(json) => { - spec = serde_json::from_str(json)?; - spec_set = true; + spec = Some(serde_json::from_str(json)?); } None => { // Second, try to read it from the file if path is provided if let Some(sp) = spec_path { let path = Path::new(sp); let file = File::open(path)?; - spec = serde_json::from_reader(file)?; - spec_set = true; + spec = Some(serde_json::from_reader(file)?); } else if let Some(id) = compute_id { if let Some(cp_base) = control_plane_uri { live_config_allowed = true; if let Ok(s) = get_spec_from_control_plane(cp_base, id) { - spec = s; - spec_set = true; + spec = Some(s); } } else { panic!("must specify both --control-plane-uri and --compute-id or none"); @@ -109,8 +104,13 @@ fn main() -> Result<()> { }; let mut new_state = ComputeState::new(); - if spec_set { - new_state.spec = spec; + let spec_set; + if let Some(spec) = spec { + let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?; + new_state.pspec = Some(pspec); + spec_set = true; + } else { + spec_set = false; } let compute_node = ComputeNode { start_time: Utc::now(), @@ -142,33 +142,10 @@ fn main() -> Result<()> { } } - // We got all we need, fill in the state. + // We got all we need, update the state. let mut state = compute.state.lock().unwrap(); - let pageserver_connstr = state - .spec - .cluster - .settings - .find("neon.pageserver_connstring") - .expect("pageserver connstr should be provided"); - let storage_auth_token = state.spec.storage_auth_token.clone(); - let tenant = state - .spec - .cluster - .settings - .find("neon.tenant_id") - .expect("tenant id should be provided"); - let timeline = state - .spec - .cluster - .settings - .find("neon.timeline_id") - .expect("tenant id should be provided"); - let startup_tracing_context = state.spec.startup_tracing_context.clone(); - - state.pageserver_connstr = pageserver_connstr; - state.storage_auth_token = storage_auth_token; - state.tenant = tenant; - state.timeline = timeline; + let pspec = state.pspec.as_ref().expect("spec must be set"); + let startup_tracing_context = pspec.spec.startup_tracing_context.clone(); state.status = ComputeStatus::Init; compute.state_changed.notify_all(); drop(state); diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 689aa6ef43..94ec2a785c 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -69,12 +69,7 @@ pub struct ComputeState { /// Timestamp of the last Postgres activity pub last_active: DateTime, pub error: Option, - pub spec: ComputeSpec, - pub tenant: String, - pub timeline: String, - pub pageserver_connstr: String, - pub storage_auth_token: Option, - + pub pspec: Option, pub metrics: ComputeMetrics, } @@ -84,11 +79,7 @@ impl ComputeState { status: ComputeStatus::Empty, last_active: Utc::now(), error: None, - spec: ComputeSpec::default(), - tenant: String::new(), - timeline: String::new(), - pageserver_connstr: String::new(), - storage_auth_token: None, + pspec: None, metrics: ComputeMetrics::default(), } } @@ -100,6 +91,45 @@ impl Default for ComputeState { } } +#[derive(Clone, Debug)] +pub struct ParsedSpec { + pub spec: ComputeSpec, + pub tenant: String, + pub timeline: String, + pub pageserver_connstr: String, + pub storage_auth_token: Option, +} + +impl TryFrom for ParsedSpec { + type Error = String; + fn try_from(spec: ComputeSpec) -> Result { + let pageserver_connstr = spec + .cluster + .settings + .find("neon.pageserver_connstring") + .ok_or("pageserver connstr should be provided")?; + let storage_auth_token = spec.storage_auth_token.clone(); + let tenant = spec + .cluster + .settings + .find("neon.tenant_id") + .ok_or("tenant id should be provided")?; + let timeline = spec + .cluster + .settings + .find("neon.timeline_id") + .ok_or("tenant id should be provided")?; + + Ok(ParsedSpec { + spec, + pageserver_connstr, + storage_auth_token, + tenant, + timeline, + }) + } +} + impl ComputeNode { pub fn set_status(&self, status: ComputeStatus) { let mut state = self.state.lock().unwrap(); @@ -126,13 +156,14 @@ impl ComputeNode { // unarchive it to `pgdata` directory overriding all its previous content. #[instrument(skip(self, compute_state))] fn get_basebackup(&self, compute_state: &ComputeState, lsn: &str) -> Result<()> { + let spec = compute_state.pspec.as_ref().expect("spec must be set"); let start_time = Utc::now(); - let mut config = postgres::Config::from_str(&compute_state.pageserver_connstr)?; + let mut config = postgres::Config::from_str(&spec.pageserver_connstr)?; // Use the storage auth token from the config file, if given. // Note: this overrides any password set in the connection string. - if let Some(storage_auth_token) = &compute_state.storage_auth_token { + if let Some(storage_auth_token) = &spec.storage_auth_token { info!("Got storage auth token from spec file"); config.password(storage_auth_token); } else { @@ -141,14 +172,8 @@ impl ComputeNode { let mut client = config.connect(NoTls)?; let basebackup_cmd = match lsn { - "0/0" => format!( - "basebackup {} {}", - &compute_state.tenant, &compute_state.timeline - ), // First start of the compute - _ => format!( - "basebackup {} {} {}", - &compute_state.tenant, &compute_state.timeline, lsn - ), + "0/0" => format!("basebackup {} {}", &spec.tenant, &spec.timeline), // First start of the compute + _ => format!("basebackup {} {} {}", &spec.tenant, &spec.timeline, lsn), }; let copyreader = client.copy_out(basebackup_cmd.as_str())?; @@ -218,27 +243,27 @@ impl ComputeNode { /// safekeepers sync, basebackup, etc. #[instrument(skip(self, compute_state))] pub fn prepare_pgdata(&self, compute_state: &ComputeState) -> Result<()> { - let spec = &compute_state.spec; + let pspec = compute_state.pspec.as_ref().expect("spec must be set"); let pgdata_path = Path::new(&self.pgdata); // Remove/create an empty pgdata directory and put configuration there. self.create_pgdata()?; - config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?; + config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &pspec.spec)?; info!("starting safekeepers syncing"); let lsn = self - .sync_safekeepers(compute_state.storage_auth_token.clone()) + .sync_safekeepers(pspec.storage_auth_token.clone()) .with_context(|| "failed to sync safekeepers")?; info!("safekeepers synced at LSN {}", lsn); info!( "getting basebackup@{} from pageserver {}", - lsn, &compute_state.pageserver_connstr + lsn, &pspec.pageserver_connstr ); self.get_basebackup(compute_state, &lsn).with_context(|| { format!( "failed to get basebackup@{} from pageserver {}", - lsn, &compute_state.pageserver_connstr + lsn, &pspec.pageserver_connstr ) })?; @@ -306,19 +331,20 @@ impl ComputeNode { }; // Proceed with post-startup configuration. Note, that order of operations is important. - handle_roles(&compute_state.spec, &mut client)?; - handle_databases(&compute_state.spec, &mut client)?; - handle_role_deletions(&compute_state.spec, self.connstr.as_str(), &mut client)?; - handle_grants(&compute_state.spec, self.connstr.as_str(), &mut client)?; + let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec; + handle_roles(spec, &mut client)?; + handle_databases(spec, &mut client)?; + handle_role_deletions(spec, self.connstr.as_str(), &mut client)?; + handle_grants(spec, self.connstr.as_str(), &mut client)?; create_writability_check_data(&mut client)?; - handle_extensions(&compute_state.spec, &mut client)?; + handle_extensions(spec, &mut client)?; // 'Close' connection drop(client); info!( "finished configuration of compute for project {}", - compute_state.spec.cluster.cluster_id + spec.cluster.cluster_id ); Ok(()) @@ -327,19 +353,20 @@ impl ComputeNode { #[instrument(skip(self))] pub fn start_compute(&self) -> Result { let compute_state = self.state.lock().unwrap().clone(); + let spec = compute_state.pspec.as_ref().expect("spec must be set"); info!( "starting compute for project {}, operation {}, tenant {}, timeline {}", - compute_state.spec.cluster.cluster_id, - compute_state.spec.operation_uuid.as_ref().unwrap(), - compute_state.tenant, - compute_state.timeline, + spec.spec.cluster.cluster_id, + spec.spec.operation_uuid.as_ref().unwrap(), + spec.tenant, + spec.timeline, ); self.prepare_pgdata(&compute_state)?; let start_time = Utc::now(); - let pg = self.start_postgres(compute_state.storage_auth_token.clone())?; + let pg = self.start_postgres(spec.storage_auth_token.clone())?; self.apply_config(&compute_state)?; diff --git a/compute_tools/src/http/api.rs b/compute_tools/src/http/api.rs index cea45dc596..2ef2d898e1 100644 --- a/compute_tools/src/http/api.rs +++ b/compute_tools/src/http/api.rs @@ -3,7 +3,7 @@ use std::net::SocketAddr; use std::sync::Arc; use std::thread; -use crate::compute::{ComputeNode, ComputeState}; +use crate::compute::{ComputeNode, ComputeState, ParsedSpec}; use compute_api::requests::ConfigurationRequest; use compute_api::responses::{ComputeStatus, ComputeStatusResponse, GenericAPIError}; @@ -18,8 +18,8 @@ use tracing_utils::http::OtelName; fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse { ComputeStatusResponse { - tenant: state.tenant.clone(), - timeline: state.timeline.clone(), + tenant: state.pspec.as_ref().map(|pspec| pspec.tenant.clone()), + timeline: state.pspec.as_ref().map(|pspec| pspec.timeline.clone()), status: state.status, last_active: state.last_active, error: state.error.clone(), @@ -135,6 +135,12 @@ async fn handle_configure_request( let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap(); if let Ok(request) = serde_json::from_str::(&spec_raw) { let spec = request.spec; + + let parsed_spec = match ParsedSpec::try_from(spec) { + Ok(ps) => ps, + Err(msg) => return Err((msg, StatusCode::PRECONDITION_FAILED)), + }; + // XXX: wrap state update under lock in code blocks. Otherwise, // we will try to `Send` `mut state` into the spawned thread // bellow, which will cause error: @@ -150,7 +156,7 @@ async fn handle_configure_request( ); return Err((msg, StatusCode::PRECONDITION_FAILED)); } - state.spec = spec; + state.pspec = Some(parsed_spec); state.status = ComputeStatus::ConfigurationPending; compute.state_changed.notify_all(); drop(state); diff --git a/libs/compute_api/src/responses.rs b/libs/compute_api/src/responses.rs index 43289a5e3e..a28c6e8996 100644 --- a/libs/compute_api/src/responses.rs +++ b/libs/compute_api/src/responses.rs @@ -12,8 +12,8 @@ pub struct GenericAPIError { #[derive(Serialize, Debug)] #[serde(rename_all = "snake_case")] pub struct ComputeStatusResponse { - pub tenant: String, - pub timeline: String, + pub tenant: Option, + pub timeline: Option, pub status: ComputeStatus, #[serde(serialize_with = "rfc3339_serialize")] pub last_active: DateTime, From ef68321b315fdf4433b6d4850d807326f40c458d Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 12 Apr 2023 01:57:27 +0300 Subject: [PATCH 05/77] Use Lsn, TenantId, TimelineId types in compute_ctl. Stronger types are generally nicer. --- Cargo.lock | 1 + compute_tools/Cargo.toml | 1 + compute_tools/src/compute.rs | 38 ++++++++++++++++++++--------------- compute_tools/src/http/api.rs | 10 +++++++-- 4 files changed, 32 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5b99e93e76..668487a9bd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -879,6 +879,7 @@ dependencies = [ "tracing-subscriber", "tracing-utils", "url", + "utils", "workspace_hack", ] diff --git a/compute_tools/Cargo.toml b/compute_tools/Cargo.toml index f315d2b7d9..21226249cf 100644 --- a/compute_tools/Cargo.toml +++ b/compute_tools/Cargo.toml @@ -28,4 +28,5 @@ tracing-utils.workspace = true url.workspace = true compute_api.workspace = true +utils.workspace = true workspace_hack.workspace = true diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 94ec2a785c..426e2845ee 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -26,6 +26,8 @@ use chrono::{DateTime, Utc}; use postgres::{Client, NoTls}; use tokio_postgres; use tracing::{info, instrument, warn}; +use utils::id::{TenantId, TimelineId}; +use utils::lsn::Lsn; use compute_api::responses::{ComputeMetrics, ComputeStatus}; use compute_api::spec::ComputeSpec; @@ -94,8 +96,8 @@ impl Default for ComputeState { #[derive(Clone, Debug)] pub struct ParsedSpec { pub spec: ComputeSpec, - pub tenant: String, - pub timeline: String, + pub tenant_id: TenantId, + pub timeline_id: TimelineId, pub pageserver_connstr: String, pub storage_auth_token: Option, } @@ -109,23 +111,27 @@ impl TryFrom for ParsedSpec { .find("neon.pageserver_connstring") .ok_or("pageserver connstr should be provided")?; let storage_auth_token = spec.storage_auth_token.clone(); - let tenant = spec + let tenant_id: TenantId = spec .cluster .settings .find("neon.tenant_id") - .ok_or("tenant id should be provided")?; - let timeline = spec + .ok_or("tenant id should be provided") + .map(|s| TenantId::from_str(&s))? + .or(Err("invalid tenant id"))?; + let timeline_id: TimelineId = spec .cluster .settings .find("neon.timeline_id") - .ok_or("tenant id should be provided")?; + .ok_or("timeline id should be provided") + .map(|s| TimelineId::from_str(&s))? + .or(Err("invalid timeline id"))?; Ok(ParsedSpec { spec, pageserver_connstr, storage_auth_token, - tenant, - timeline, + tenant_id, + timeline_id, }) } } @@ -155,7 +161,7 @@ impl ComputeNode { // Get basebackup from the libpq connection to pageserver using `connstr` and // unarchive it to `pgdata` directory overriding all its previous content. #[instrument(skip(self, compute_state))] - fn get_basebackup(&self, compute_state: &ComputeState, lsn: &str) -> Result<()> { + fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> { let spec = compute_state.pspec.as_ref().expect("spec must be set"); let start_time = Utc::now(); @@ -172,8 +178,8 @@ impl ComputeNode { let mut client = config.connect(NoTls)?; let basebackup_cmd = match lsn { - "0/0" => format!("basebackup {} {}", &spec.tenant, &spec.timeline), // First start of the compute - _ => format!("basebackup {} {} {}", &spec.tenant, &spec.timeline, lsn), + Lsn(0) => format!("basebackup {} {}", spec.tenant_id, spec.timeline_id), // First start of the compute + _ => format!("basebackup {} {} {}", spec.tenant_id, spec.timeline_id, lsn), }; let copyreader = client.copy_out(basebackup_cmd.as_str())?; @@ -197,7 +203,7 @@ impl ComputeNode { // Run `postgres` in a special mode with `--sync-safekeepers` argument // and return the reported LSN back to the caller. #[instrument(skip(self, storage_auth_token))] - fn sync_safekeepers(&self, storage_auth_token: Option) -> Result { + fn sync_safekeepers(&self, storage_auth_token: Option) -> Result { let start_time = Utc::now(); let sync_handle = Command::new(&self.pgbin) @@ -234,7 +240,7 @@ impl ComputeNode { .unwrap() .as_millis() as u64; - let lsn = String::from(String::from_utf8(sync_output.stdout)?.trim()); + let lsn = Lsn::from_str(String::from_utf8(sync_output.stdout)?.trim())?; Ok(lsn) } @@ -260,7 +266,7 @@ impl ComputeNode { "getting basebackup@{} from pageserver {}", lsn, &pspec.pageserver_connstr ); - self.get_basebackup(compute_state, &lsn).with_context(|| { + self.get_basebackup(compute_state, lsn).with_context(|| { format!( "failed to get basebackup@{} from pageserver {}", lsn, &pspec.pageserver_connstr @@ -358,8 +364,8 @@ impl ComputeNode { "starting compute for project {}, operation {}, tenant {}, timeline {}", spec.spec.cluster.cluster_id, spec.spec.operation_uuid.as_ref().unwrap(), - spec.tenant, - spec.timeline, + spec.tenant_id, + spec.timeline_id, ); self.prepare_pgdata(&compute_state)?; diff --git a/compute_tools/src/http/api.rs b/compute_tools/src/http/api.rs index 2ef2d898e1..81d4953345 100644 --- a/compute_tools/src/http/api.rs +++ b/compute_tools/src/http/api.rs @@ -18,8 +18,14 @@ use tracing_utils::http::OtelName; fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse { ComputeStatusResponse { - tenant: state.pspec.as_ref().map(|pspec| pspec.tenant.clone()), - timeline: state.pspec.as_ref().map(|pspec| pspec.timeline.clone()), + tenant: state + .pspec + .as_ref() + .map(|pspec| pspec.tenant_id.to_string()), + timeline: state + .pspec + .as_ref() + .map(|pspec| pspec.timeline_id.to_string()), status: state.status, last_active: state.last_active, error: state.error.clone(), From 8ace7a7515bab697aec906d469d2a663315a1740 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 12 Apr 2023 01:57:43 +0300 Subject: [PATCH 06/77] Remove unused 'timestamp' field from ComputeSpec struct. --- libs/compute_api/src/spec.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libs/compute_api/src/spec.rs b/libs/compute_api/src/spec.rs index 37fe133b68..f771910329 100644 --- a/libs/compute_api/src/spec.rs +++ b/libs/compute_api/src/spec.rs @@ -15,7 +15,10 @@ pub type PgIdent = String; #[derive(Clone, Debug, Default, Deserialize)] pub struct ComputeSpec { pub format_version: f32, - pub timestamp: String, + + // The control plane also includes a 'timestamp' field in the JSON document, + // but we don't use it for anything. Serde will ignore missing fields when + // deserializing it. pub operation_uuid: Option, /// Expected cluster state at the end of transition process. pub cluster: Cluster, From 06ce83c9125110361a78e15b01b66d36040a87b0 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 12 Apr 2023 01:57:48 +0300 Subject: [PATCH 07/77] Tolerate missing 'operation_uuid' field in spec file. 'compute_ctl' doesn't use the operation_uuid for anything, it just prints it to the log. --- compute_tools/src/compute.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 426e2845ee..07ede44c9b 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -363,7 +363,7 @@ impl ComputeNode { info!( "starting compute for project {}, operation {}, tenant {}, timeline {}", spec.spec.cluster.cluster_id, - spec.spec.operation_uuid.as_ref().unwrap(), + spec.spec.operation_uuid.as_deref().unwrap_or("None"), spec.tenant_id, spec.timeline_id, ); From d8939d4162816d0576e2a7fb4ecc0b3479c4f28b Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Wed, 12 Apr 2023 12:39:02 +0300 Subject: [PATCH 08/77] Move walreceiver start and stop behind a struct (#3973) The PR changes module function-based walreceiver interface with a `WalReceiver` struct that exposes a few public methods, `new`, `start` and `stop` now. Later, the same struct is planned to be used for getting walreceiver stats (and, maybe, other extra data) to display during missing wal errors for https://github.com/neondatabase/neon/issues/2106 Now though, the change required extra logic changes: * due to the `WalReceiver` struct added, it became easier to pass `ctx` and later do a `detached_child` instead of https://github.com/neondatabase/neon/blob/bfee4127014022a43bd85bccb562ed4bc62dc075/pageserver/src/tenant/timeline.rs#L1379-L1381 * `WalReceiver::start` which is now the public API to start the walreceiver, could return an `Err` which now may turn a tenant into `Broken`, same as the timeline that it tries to load during startup. * `WalReceiverConf` was added to group walreceiver parameters from pageserver's tenant config --- pageserver/src/tenant.rs | 49 +++-- pageserver/src/tenant/timeline.rs | 84 ++++---- pageserver/src/tenant/timeline/walreceiver.rs | 133 +++++++++++- .../walreceiver/connection_manager.rs | 201 ++++++------------ .../walreceiver/walreceiver_connection.rs | 4 +- 5 files changed, 268 insertions(+), 203 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 7fac7d2ac0..03a4ff8c8e 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -177,9 +177,9 @@ impl UninitializedTimeline<'_> { /// /// The new timeline is initialized in Active state, and its background jobs are /// started - pub fn initialize(self, _ctx: &RequestContext) -> anyhow::Result> { + pub fn initialize(self, ctx: &RequestContext) -> anyhow::Result> { let mut timelines = self.owning_tenant.timelines.lock().unwrap(); - self.initialize_with_lock(&mut timelines, true, true) + self.initialize_with_lock(ctx, &mut timelines, true, true) } /// Like `initialize`, but the caller is already holding lock on Tenant::timelines. @@ -189,6 +189,7 @@ impl UninitializedTimeline<'_> { /// been initialized. fn initialize_with_lock( mut self, + ctx: &RequestContext, timelines: &mut HashMap>, load_layer_map: bool, activate: bool, @@ -229,7 +230,9 @@ impl UninitializedTimeline<'_> { new_timeline.maybe_spawn_flush_loop(); if activate { - new_timeline.activate(); + new_timeline + .activate(ctx) + .context("initializing timeline activation")?; } } } @@ -469,7 +472,7 @@ impl Tenant { local_metadata: Option, ancestor: Option>, first_save: bool, - _ctx: &RequestContext, + ctx: &RequestContext, ) -> anyhow::Result<()> { let tenant_id = self.tenant_id; @@ -504,7 +507,7 @@ impl Tenant { // Do not start walreceiver here. We do need loaded layer map for reconcile_with_remote // But we shouldnt start walreceiver before we have all the data locally, because working walreceiver // will ingest data which may require looking at the layers which are not yet available locally - match timeline.initialize_with_lock(&mut timelines_accessor, true, false) { + match timeline.initialize_with_lock(ctx, &mut timelines_accessor, true, false) { Ok(new_timeline) => new_timeline, Err(e) => { error!("Failed to initialize timeline {tenant_id}/{timeline_id}: {e:?}"); @@ -629,7 +632,7 @@ impl Tenant { /// /// Background task that downloads all data for a tenant and brings it to Active state. /// - #[instrument(skip(self, ctx), fields(tenant_id=%self.tenant_id))] + #[instrument(skip_all, fields(tenant_id=%self.tenant_id))] async fn attach(self: &Arc, ctx: RequestContext) -> anyhow::Result<()> { // Create directory with marker file to indicate attaching state. // The load_local_tenants() function in tenant::mgr relies on the marker file @@ -750,7 +753,7 @@ impl Tenant { // Start background operations and open the tenant for business. // The loops will shut themselves down when they notice that the tenant is inactive. - self.activate()?; + self.activate(&ctx)?; info!("Done"); @@ -1022,7 +1025,7 @@ impl Tenant { // Start background operations and open the tenant for business. // The loops will shut themselves down when they notice that the tenant is inactive. - self.activate()?; + self.activate(ctx)?; info!("Done"); @@ -1358,12 +1361,7 @@ impl Tenant { // Stop the walreceiver first. debug!("waiting for wal receiver to shutdown"); - task_mgr::shutdown_tasks( - Some(TaskKind::WalReceiverManager), - Some(self.tenant_id), - Some(timeline_id), - ) - .await; + timeline.walreceiver.stop().await; debug!("wal receiver shutdown confirmed"); info!("waiting for timeline tasks to shutdown"); @@ -1450,7 +1448,7 @@ impl Tenant { } /// Changes tenant status to active, unless shutdown was already requested. - fn activate(&self) -> anyhow::Result<()> { + fn activate(&self, ctx: &RequestContext) -> anyhow::Result<()> { let mut result = Ok(()); self.state.send_modify(|current_state| { match *current_state { @@ -1484,7 +1482,20 @@ impl Tenant { tasks::start_background_loops(self.tenant_id); for timeline in not_broken_timelines { - timeline.activate(); + match timeline + .activate(ctx) + .context("timeline activation for activating tenant") + { + Ok(()) => {} + Err(e) => { + error!( + "Failed to activate timeline {}: {:#}", + timeline.timeline_id, e + ); + timeline.set_state(TimelineState::Broken); + *current_state = TenantState::Broken; + } + } } } } @@ -2093,7 +2104,7 @@ impl Tenant { src_timeline: &Arc, dst_id: TimelineId, start_lsn: Option, - _ctx: &RequestContext, + ctx: &RequestContext, ) -> anyhow::Result> { let src_id = src_timeline.timeline_id; @@ -2186,7 +2197,7 @@ impl Tenant { false, Some(Arc::clone(src_timeline)), )? - .initialize_with_lock(&mut timelines, true, true)?; + .initialize_with_lock(ctx, &mut timelines, true, true)?; drop(timelines); // Root timeline gets its layers during creation and uploads them along with the metadata. @@ -2299,7 +2310,7 @@ impl Tenant { let timeline = { let mut timelines = self.timelines.lock().unwrap(); - raw_timeline.initialize_with_lock(&mut timelines, false, true)? + raw_timeline.initialize_with_lock(ctx, &mut timelines, false, true)? }; info!( diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index e80e32644b..4b0d7a6994 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -14,6 +14,7 @@ use pageserver_api::models::{ DownloadRemoteLayersTaskState, LayerMapInfo, LayerResidenceStatus, TimelineState, }; use remote_storage::GenericRemoteStorage; +use storage_broker::BrokerClientChannel; use tokio::sync::{oneshot, watch, Semaphore, TryAcquireError}; use tokio_util::sync::CancellationToken; use tracing::*; @@ -30,7 +31,7 @@ use std::sync::atomic::{AtomicI64, Ordering as AtomicOrdering}; use std::sync::{Arc, Mutex, MutexGuard, RwLock, Weak}; use std::time::{Duration, Instant, SystemTime}; -use crate::broker_client::is_broker_client_initialized; +use crate::broker_client::{get_broker_client, is_broker_client_initialized}; use crate::context::{DownloadBehavior, RequestContext}; use crate::tenant::remote_timeline_client::{self, index::LayerFileMetadata}; use crate::tenant::storage_layer::{ @@ -71,10 +72,10 @@ use crate::walredo::WalRedoManager; use crate::METADATA_FILE_NAME; use crate::ZERO_PAGE; use crate::{is_temporary, task_mgr}; -use walreceiver::spawn_connection_manager_task; pub(super) use self::eviction_task::EvictionTaskTenantState; use self::eviction_task::EvictionTaskTimelineState; +use self::walreceiver::{WalReceiver, WalReceiverConf}; use super::layer_map::BatchedUpdates; use super::remote_timeline_client::index::IndexPart; @@ -214,6 +215,7 @@ pub struct Timeline { /// or None if WAL receiver has not received anything for this timeline /// yet. pub last_received_wal: Mutex>, + pub walreceiver: WalReceiver, /// Relation size cache pub rel_size_cache: RwLock>, @@ -866,10 +868,18 @@ impl Timeline { Ok(()) } - pub fn activate(self: &Arc) { + pub fn activate(self: &Arc, ctx: &RequestContext) -> anyhow::Result<()> { + if is_broker_client_initialized() { + self.launch_wal_receiver(ctx, get_broker_client().clone())?; + } else if cfg!(test) { + info!("not launching WAL receiver because broker client hasn't been initialized"); + } else { + anyhow::bail!("broker client not initialized"); + } + self.set_state(TimelineState::Active); - self.launch_wal_receiver(); self.launch_eviction_task(); + Ok(()) } pub fn set_state(&self, new_state: TimelineState) { @@ -1220,7 +1230,31 @@ impl Timeline { let (layer_flush_start_tx, _) = tokio::sync::watch::channel(0); let (layer_flush_done_tx, _) = tokio::sync::watch::channel((0, Ok(()))); + let tenant_conf_guard = tenant_conf.read().unwrap(); + let wal_connect_timeout = tenant_conf_guard + .walreceiver_connect_timeout + .unwrap_or(conf.default_tenant_conf.walreceiver_connect_timeout); + let lagging_wal_timeout = tenant_conf_guard + .lagging_wal_timeout + .unwrap_or(conf.default_tenant_conf.lagging_wal_timeout); + let max_lsn_wal_lag = tenant_conf_guard + .max_lsn_wal_lag + .unwrap_or(conf.default_tenant_conf.max_lsn_wal_lag); + drop(tenant_conf_guard); + Arc::new_cyclic(|myself| { + let walreceiver = WalReceiver::new( + TenantTimelineId::new(tenant_id, timeline_id), + Weak::clone(myself), + WalReceiverConf { + wal_connect_timeout, + lagging_wal_timeout, + max_lsn_wal_lag, + auth_token: crate::config::SAFEKEEPER_AUTH_TOKEN.get().cloned(), + availability_zone: conf.availability_zone.clone(), + }, + ); + let mut result = Timeline { conf, tenant_conf, @@ -1231,6 +1265,7 @@ impl Timeline { layers: RwLock::new(LayerMap::default()), walredo_mgr, + walreceiver, remote_client: remote_client.map(Arc::new), @@ -1350,44 +1385,17 @@ impl Timeline { *flush_loop_state = FlushLoopState::Running; } - pub(super) fn launch_wal_receiver(self: &Arc) { - if !is_broker_client_initialized() { - if cfg!(test) { - info!("not launching WAL receiver because broker client hasn't been initialized"); - return; - } else { - panic!("broker client not initialized"); - } - } - + pub(super) fn launch_wal_receiver( + &self, + ctx: &RequestContext, + broker_client: BrokerClientChannel, + ) -> anyhow::Result<()> { info!( "launching WAL receiver for timeline {} of tenant {}", self.timeline_id, self.tenant_id ); - let tenant_conf_guard = self.tenant_conf.read().unwrap(); - let lagging_wal_timeout = tenant_conf_guard - .lagging_wal_timeout - .unwrap_or(self.conf.default_tenant_conf.lagging_wal_timeout); - let walreceiver_connect_timeout = tenant_conf_guard - .walreceiver_connect_timeout - .unwrap_or(self.conf.default_tenant_conf.walreceiver_connect_timeout); - let max_lsn_wal_lag = tenant_conf_guard - .max_lsn_wal_lag - .unwrap_or(self.conf.default_tenant_conf.max_lsn_wal_lag); - drop(tenant_conf_guard); - let self_clone = Arc::clone(self); - let background_ctx = - // XXX: this is a detached_child. Plumb through the ctx from call sites. - RequestContext::todo_child(TaskKind::WalReceiverManager, DownloadBehavior::Error); - spawn_connection_manager_task( - self_clone, - walreceiver_connect_timeout, - lagging_wal_timeout, - max_lsn_wal_lag, - crate::config::SAFEKEEPER_AUTH_TOKEN.get().cloned(), - self.conf.availability_zone.clone(), - background_ctx, - ); + self.walreceiver.start(ctx, broker_client)?; + Ok(()) } /// diff --git a/pageserver/src/tenant/timeline/walreceiver.rs b/pageserver/src/tenant/timeline/walreceiver.rs index f33a12c5cc..00f446af38 100644 --- a/pageserver/src/tenant/timeline/walreceiver.rs +++ b/pageserver/src/tenant/timeline/walreceiver.rs @@ -23,14 +23,133 @@ mod connection_manager; mod walreceiver_connection; -use crate::task_mgr::WALRECEIVER_RUNTIME; +use crate::context::{DownloadBehavior, RequestContext}; +use crate::task_mgr::{self, TaskKind, WALRECEIVER_RUNTIME}; +use crate::tenant::timeline::walreceiver::connection_manager::{ + connection_manager_loop_step, ConnectionManagerState, +}; +use anyhow::Context; use std::future::Future; +use std::num::NonZeroU64; +use std::ops::ControlFlow; +use std::sync::atomic::{self, AtomicBool}; +use std::sync::{Arc, Weak}; +use std::time::Duration; +use storage_broker::BrokerClientChannel; +use tokio::select; use tokio::sync::watch; use tokio_util::sync::CancellationToken; use tracing::*; -pub use connection_manager::spawn_connection_manager_task; +use utils::id::TenantTimelineId; + +use super::Timeline; + +#[derive(Clone)] +pub struct WalReceiverConf { + /// The timeout on the connection to safekeeper for WAL streaming. + pub wal_connect_timeout: Duration, + /// The timeout to use to determine when the current connection is "stale" and reconnect to the other one. + pub lagging_wal_timeout: Duration, + /// The Lsn lag to use to determine when the current connection is lagging to much behind and reconnect to the other one. + pub max_lsn_wal_lag: NonZeroU64, + pub auth_token: Option>, + pub availability_zone: Option, +} + +pub struct WalReceiver { + timeline: TenantTimelineId, + timeline_ref: Weak, + conf: WalReceiverConf, + started: AtomicBool, +} + +impl WalReceiver { + pub fn new( + timeline: TenantTimelineId, + timeline_ref: Weak, + conf: WalReceiverConf, + ) -> Self { + Self { + timeline, + timeline_ref, + conf, + started: AtomicBool::new(false), + } + } + + pub fn start( + &self, + ctx: &RequestContext, + mut broker_client: BrokerClientChannel, + ) -> anyhow::Result<()> { + if self.started.load(atomic::Ordering::Acquire) { + anyhow::bail!("Wal receiver is already started"); + } + + let timeline = self.timeline_ref.upgrade().with_context(|| { + format!("walreceiver start on a dropped timeline {}", self.timeline) + })?; + + let tenant_id = timeline.tenant_id; + let timeline_id = timeline.timeline_id; + let walreceiver_ctx = + ctx.detached_child(TaskKind::WalReceiverManager, DownloadBehavior::Error); + + let wal_receiver_conf = self.conf.clone(); + task_mgr::spawn( + WALRECEIVER_RUNTIME.handle(), + TaskKind::WalReceiverManager, + Some(tenant_id), + Some(timeline_id), + &format!("walreceiver for timeline {tenant_id}/{timeline_id}"), + false, + async move { + info!("WAL receiver manager started, connecting to broker"); + let mut connection_manager_state = ConnectionManagerState::new( + timeline, + wal_receiver_conf, + ); + loop { + select! { + _ = task_mgr::shutdown_watcher() => { + info!("WAL receiver shutdown requested, shutting down"); + connection_manager_state.shutdown().await; + return Ok(()); + }, + loop_step_result = connection_manager_loop_step( + &mut broker_client, + &mut connection_manager_state, + &walreceiver_ctx, + ) => match loop_step_result { + ControlFlow::Continue(()) => continue, + ControlFlow::Break(()) => { + info!("Connection manager loop ended, shutting down"); + connection_manager_state.shutdown().await; + return Ok(()); + } + }, + } + } + }.instrument(info_span!(parent: None, "wal_connection_manager", tenant = %tenant_id, timeline = %timeline_id)) + ); + + self.started.store(true, atomic::Ordering::Release); + + Ok(()) + } + + pub async fn stop(&self) { + task_mgr::shutdown_tasks( + Some(TaskKind::WalReceiverManager), + Some(self.timeline.tenant_id), + Some(self.timeline.timeline_id), + ) + .await; + self.started.store(false, atomic::Ordering::Release); + } +} /// A handle of an asynchronous task. /// The task has a channel that it can use to communicate its lifecycle events in a certain form, see [`TaskEvent`] @@ -39,26 +158,26 @@ pub use connection_manager::spawn_connection_manager_task; /// Note that the communication happens via the `watch` channel, that does not accumulate the events, replacing the old one with the never one on submission. /// That may lead to certain events not being observed by the listener. #[derive(Debug)] -pub struct TaskHandle { +struct TaskHandle { join_handle: Option>>, events_receiver: watch::Receiver>, cancellation: CancellationToken, } -pub enum TaskEvent { +enum TaskEvent { Update(TaskStateUpdate), End(anyhow::Result<()>), } #[derive(Debug, Clone)] -pub enum TaskStateUpdate { +enum TaskStateUpdate { Started, Progress(E), } impl TaskHandle { /// Initializes the task, starting it immediately after the creation. - pub fn spawn( + fn spawn( task: impl FnOnce(watch::Sender>, CancellationToken) -> Fut + Send + 'static, ) -> Self where @@ -131,7 +250,7 @@ impl TaskHandle { } /// Aborts current task, waiting for it to finish. - pub async fn shutdown(self) { + async fn shutdown(self) { if let Some(jh) = self.join_handle { self.cancellation.cancel(); match jh.await { diff --git a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs index de07676ffe..efcbfbce3d 100644 --- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs +++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs @@ -11,11 +11,9 @@ use std::{collections::HashMap, num::NonZeroU64, ops::ControlFlow, sync::Arc, time::Duration}; -use super::TaskStateUpdate; -use crate::broker_client::get_broker_client; +use super::{TaskStateUpdate, WalReceiverConf}; use crate::context::{DownloadBehavior, RequestContext}; -use crate::task_mgr::WALRECEIVER_RUNTIME; -use crate::task_mgr::{self, TaskKind}; +use crate::task_mgr::TaskKind; use crate::tenant::Timeline; use anyhow::Context; use chrono::{NaiveDateTime, Utc}; @@ -38,75 +36,17 @@ use utils::{ use super::{walreceiver_connection::WalConnectionStatus, TaskEvent, TaskHandle}; -/// Spawns the loop to take care of the timeline's WAL streaming connection. -pub fn spawn_connection_manager_task( - timeline: Arc, - wal_connect_timeout: Duration, - lagging_wal_timeout: Duration, - max_lsn_wal_lag: NonZeroU64, - auth_token: Option>, - availability_zone: Option, - ctx: RequestContext, -) { - let mut broker_client = get_broker_client().clone(); - - let tenant_id = timeline.tenant_id; - let timeline_id = timeline.timeline_id; - - task_mgr::spawn( - WALRECEIVER_RUNTIME.handle(), - TaskKind::WalReceiverManager, - Some(tenant_id), - Some(timeline_id), - &format!("walreceiver for timeline {tenant_id}/{timeline_id}"), - false, - async move { - info!("WAL receiver manager started, connecting to broker"); - let mut walreceiver_state = WalreceiverState::new( - timeline, - wal_connect_timeout, - lagging_wal_timeout, - max_lsn_wal_lag, - auth_token, - availability_zone, - ); - loop { - select! { - _ = task_mgr::shutdown_watcher() => { - info!("WAL receiver shutdown requested, shutting down"); - walreceiver_state.shutdown().await; - return Ok(()); - }, - loop_step_result = connection_manager_loop_step( - &mut broker_client, - &mut walreceiver_state, - &ctx, - ) => match loop_step_result { - ControlFlow::Continue(()) => continue, - ControlFlow::Break(()) => { - info!("Connection manager loop ended, shutting down"); - walreceiver_state.shutdown().await; - return Ok(()); - } - }, - } - } - } - .instrument( - info_span!(parent: None, "wal_connection_manager", tenant = %tenant_id, timeline = %timeline_id), - ), - ); -} - /// Attempts to subscribe for timeline updates, pushed by safekeepers into the broker. /// Based on the updates, desides whether to start, keep or stop a WAL receiver task. /// If storage broker subscription is cancelled, exits. -async fn connection_manager_loop_step( +pub(super) async fn connection_manager_loop_step( broker_client: &mut BrokerClientChannel, - walreceiver_state: &mut WalreceiverState, + connection_manager_state: &mut ConnectionManagerState, ctx: &RequestContext, ) -> ControlFlow<(), ()> { - let mut timeline_state_updates = walreceiver_state.timeline.subscribe_for_state_updates(); + let mut timeline_state_updates = connection_manager_state + .timeline + .subscribe_for_state_updates(); match wait_for_active_timeline(&mut timeline_state_updates).await { ControlFlow::Continue(()) => {} @@ -117,8 +57,8 @@ async fn connection_manager_loop_step( } let id = TenantTimelineId { - tenant_id: walreceiver_state.timeline.tenant_id, - timeline_id: walreceiver_state.timeline.timeline_id, + tenant_id: connection_manager_state.timeline.tenant_id, + timeline_id: connection_manager_state.timeline.timeline_id, }; // Subscribe to the broker updates. Stream shares underlying TCP connection @@ -128,7 +68,7 @@ async fn connection_manager_loop_step( info!("Subscribed for broker timeline updates"); loop { - let time_until_next_retry = walreceiver_state.time_until_next_retry(); + let time_until_next_retry = connection_manager_state.time_until_next_retry(); // These things are happening concurrently: // @@ -141,12 +81,12 @@ async fn connection_manager_loop_step( // - timeline state changes to something that does not allow walreceiver to run concurrently select! { Some(wal_connection_update) = async { - match walreceiver_state.wal_connection.as_mut() { + match connection_manager_state.wal_connection.as_mut() { Some(wal_connection) => Some(wal_connection.connection_task.next_task_event().await), None => None, } } => { - let wal_connection = walreceiver_state.wal_connection.as_mut() + let wal_connection = connection_manager_state.wal_connection.as_mut() .expect("Should have a connection, as checked by the corresponding select! guard"); match wal_connection_update { TaskEvent::Update(TaskStateUpdate::Started) => {}, @@ -156,7 +96,7 @@ async fn connection_manager_loop_step( // from this safekeeper. This is good enough to clean unsuccessful // retries history and allow reconnecting to this safekeeper without // sleeping for a long time. - walreceiver_state.wal_connection_retries.remove(&wal_connection.sk_id); + connection_manager_state.wal_connection_retries.remove(&wal_connection.sk_id); } wal_connection.status = new_status; } @@ -165,7 +105,7 @@ async fn connection_manager_loop_step( Ok(()) => debug!("WAL receiving task finished"), Err(e) => error!("wal receiver task finished with an error: {e:?}"), } - walreceiver_state.drop_old_connection(false).await; + connection_manager_state.drop_old_connection(false).await; }, } }, @@ -173,7 +113,7 @@ async fn connection_manager_loop_step( // Got a new update from the broker broker_update = broker_subscription.message() => { match broker_update { - Ok(Some(broker_update)) => walreceiver_state.register_timeline_update(broker_update), + Ok(Some(broker_update)) => connection_manager_state.register_timeline_update(broker_update), Err(e) => { error!("broker subscription failed: {e}"); return ControlFlow::Continue(()); @@ -187,12 +127,12 @@ async fn connection_manager_loop_step( new_event = async { loop { - if walreceiver_state.timeline.current_state() == TimelineState::Loading { + if connection_manager_state.timeline.current_state() == TimelineState::Loading { warn!("wal connection manager should only be launched after timeline has become active"); } match timeline_state_updates.changed().await { Ok(()) => { - let new_state = walreceiver_state.timeline.current_state(); + let new_state = connection_manager_state.timeline.current_state(); match new_state { // we're already active as walreceiver, no need to reactivate TimelineState::Active => continue, @@ -234,9 +174,9 @@ async fn connection_manager_loop_step( } => debug!("Waking up for the next retry after waiting for {time_until_next_retry:?}"), } - if let Some(new_candidate) = walreceiver_state.next_connection_candidate() { + if let Some(new_candidate) = connection_manager_state.next_connection_candidate() { info!("Switching to new connection candidate: {new_candidate:?}"); - walreceiver_state + connection_manager_state .change_connection(new_candidate, ctx) .await } @@ -314,25 +254,17 @@ const WALCONNECTION_RETRY_MAX_BACKOFF_SECONDS: f64 = 15.0; const WALCONNECTION_RETRY_BACKOFF_MULTIPLIER: f64 = 1.5; /// All data that's needed to run endless broker loop and keep the WAL streaming connection alive, if possible. -struct WalreceiverState { +pub(super) struct ConnectionManagerState { id: TenantTimelineId, - /// Use pageserver data about the timeline to filter out some of the safekeepers. timeline: Arc, - /// The timeout on the connection to safekeeper for WAL streaming. - wal_connect_timeout: Duration, - /// The timeout to use to determine when the current connection is "stale" and reconnect to the other one. - lagging_wal_timeout: Duration, - /// The Lsn lag to use to determine when the current connection is lagging to much behind and reconnect to the other one. - max_lsn_wal_lag: NonZeroU64, + conf: WalReceiverConf, /// Current connection to safekeeper for WAL streaming. wal_connection: Option, /// Info about retries and unsuccessful attempts to connect to safekeepers. wal_connection_retries: HashMap, /// Data about all timelines, available for connection, fetched from storage broker, grouped by their corresponding safekeeper node id. wal_stream_candidates: HashMap, - auth_token: Option>, - availability_zone: Option, } /// Current connection data. @@ -375,15 +307,8 @@ struct BrokerSkTimeline { latest_update: NaiveDateTime, } -impl WalreceiverState { - fn new( - timeline: Arc, - wal_connect_timeout: Duration, - lagging_wal_timeout: Duration, - max_lsn_wal_lag: NonZeroU64, - auth_token: Option>, - availability_zone: Option, - ) -> Self { +impl ConnectionManagerState { + pub(super) fn new(timeline: Arc, conf: WalReceiverConf) -> Self { let id = TenantTimelineId { tenant_id: timeline.tenant_id, timeline_id: timeline.timeline_id, @@ -391,14 +316,10 @@ impl WalreceiverState { Self { id, timeline, - wal_connect_timeout, - lagging_wal_timeout, - max_lsn_wal_lag, + conf, wal_connection: None, wal_stream_candidates: HashMap::new(), wal_connection_retries: HashMap::new(), - auth_token, - availability_zone, } } @@ -407,7 +328,7 @@ impl WalreceiverState { self.drop_old_connection(true).await; let id = self.id; - let connect_timeout = self.wal_connect_timeout; + let connect_timeout = self.conf.wal_connect_timeout; let timeline = Arc::clone(&self.timeline); let ctx = ctx.detached_child( TaskKind::WalReceiverConnectionHandler, @@ -563,7 +484,7 @@ impl WalreceiverState { (now - existing_wal_connection.status.latest_connection_update).to_std() { // Drop connection if we haven't received keepalive message for a while. - if latest_interaciton > self.wal_connect_timeout { + if latest_interaciton > self.conf.wal_connect_timeout { return Some(NewWalConnectionCandidate { safekeeper_id: new_sk_id, wal_source_connconf: new_wal_source_connconf, @@ -573,7 +494,7 @@ impl WalreceiverState { existing_wal_connection.status.latest_connection_update, ), check_time: now, - threshold: self.wal_connect_timeout, + threshold: self.conf.wal_connect_timeout, }, }); } @@ -589,7 +510,7 @@ impl WalreceiverState { // Check if the new candidate has much more WAL than the current one. match new_commit_lsn.0.checked_sub(current_commit_lsn.0) { Some(new_sk_lsn_advantage) => { - if new_sk_lsn_advantage >= self.max_lsn_wal_lag.get() { + if new_sk_lsn_advantage >= self.conf.max_lsn_wal_lag.get() { return Some(NewWalConnectionCandidate { safekeeper_id: new_sk_id, wal_source_connconf: new_wal_source_connconf, @@ -597,16 +518,16 @@ impl WalreceiverState { reason: ReconnectReason::LaggingWal { current_commit_lsn, new_commit_lsn, - threshold: self.max_lsn_wal_lag, + threshold: self.conf.max_lsn_wal_lag, }, }); } // If we have a candidate with the same commit_lsn as the current one, which is in the same AZ as pageserver, // and the current one is not, switch to the new one. - if self.availability_zone.is_some() + if self.conf.availability_zone.is_some() && existing_wal_connection.availability_zone - != self.availability_zone - && self.availability_zone == new_availability_zone + != self.conf.availability_zone + && self.conf.availability_zone == new_availability_zone { return Some(NewWalConnectionCandidate { safekeeper_id: new_sk_id, @@ -677,7 +598,7 @@ impl WalreceiverState { if let Some(waiting_for_new_lsn_since) = waiting_for_new_lsn_since { if let Ok(waiting_for_new_wal) = (now - waiting_for_new_lsn_since).to_std() { if candidate_commit_lsn > current_commit_lsn - && waiting_for_new_wal > self.lagging_wal_timeout + && waiting_for_new_wal > self.conf.lagging_wal_timeout { return Some(NewWalConnectionCandidate { safekeeper_id: new_sk_id, @@ -691,7 +612,7 @@ impl WalreceiverState { existing_wal_connection.status.latest_wal_update, ), check_time: now, - threshold: self.lagging_wal_timeout, + threshold: self.conf.lagging_wal_timeout, }, }); } @@ -757,11 +678,11 @@ impl WalreceiverState { match wal_stream_connection_config( self.id, info.safekeeper_connstr.as_ref(), - match &self.auth_token { + match &self.conf.auth_token { None => None, Some(x) => Some(x), }, - self.availability_zone.as_deref(), + self.conf.availability_zone.as_deref(), ) { Ok(connstr) => Some((*sk_id, info, connstr)), Err(e) => { @@ -775,7 +696,7 @@ impl WalreceiverState { /// Remove candidates which haven't sent broker updates for a while. fn cleanup_old_candidates(&mut self) { let mut node_ids_to_remove = Vec::with_capacity(self.wal_stream_candidates.len()); - let lagging_wal_timeout = self.lagging_wal_timeout; + let lagging_wal_timeout = self.conf.lagging_wal_timeout; self.wal_stream_candidates.retain(|node_id, broker_info| { if let Ok(time_since_latest_broker_update) = @@ -799,7 +720,7 @@ impl WalreceiverState { } } - async fn shutdown(mut self) { + pub(super) async fn shutdown(mut self) { if let Some(wal_connection) = self.wal_connection.take() { wal_connection.connection_task.shutdown().await; } @@ -903,7 +824,7 @@ mod tests { let mut state = dummy_state(&harness).await; let now = Utc::now().naive_utc(); - let lagging_wal_timeout = chrono::Duration::from_std(state.lagging_wal_timeout)?; + let lagging_wal_timeout = chrono::Duration::from_std(state.conf.lagging_wal_timeout)?; let delay_over_threshold = now - lagging_wal_timeout - lagging_wal_timeout; state.wal_connection = None; @@ -914,7 +835,7 @@ mod tests { ( NodeId(3), dummy_broker_sk_timeline( - 1 + state.max_lsn_wal_lag.get(), + 1 + state.conf.max_lsn_wal_lag.get(), "delay_over_threshold", delay_over_threshold, ), @@ -948,7 +869,7 @@ mod tests { streaming_lsn: Some(Lsn(current_lsn)), }; - state.max_lsn_wal_lag = NonZeroU64::new(100).unwrap(); + state.conf.max_lsn_wal_lag = NonZeroU64::new(100).unwrap(); state.wal_connection = Some(WalConnection { started_at: now, sk_id: connected_sk_id, @@ -966,7 +887,7 @@ mod tests { ( connected_sk_id, dummy_broker_sk_timeline( - current_lsn + state.max_lsn_wal_lag.get() * 2, + current_lsn + state.conf.max_lsn_wal_lag.get() * 2, DUMMY_SAFEKEEPER_HOST, now, ), @@ -978,7 +899,7 @@ mod tests { ( NodeId(2), dummy_broker_sk_timeline( - current_lsn + state.max_lsn_wal_lag.get() / 2, + current_lsn + state.conf.max_lsn_wal_lag.get() / 2, "not_enough_advanced_lsn", now, ), @@ -1003,7 +924,11 @@ mod tests { state.wal_connection = None; state.wal_stream_candidates = HashMap::from([( NodeId(0), - dummy_broker_sk_timeline(1 + state.max_lsn_wal_lag.get(), DUMMY_SAFEKEEPER_HOST, now), + dummy_broker_sk_timeline( + 1 + state.conf.max_lsn_wal_lag.get(), + DUMMY_SAFEKEEPER_HOST, + now, + ), )]); let only_candidate = state @@ -1101,7 +1026,7 @@ mod tests { let now = Utc::now().naive_utc(); let connected_sk_id = NodeId(0); - let new_lsn = Lsn(current_lsn.0 + state.max_lsn_wal_lag.get() + 1); + let new_lsn = Lsn(current_lsn.0 + state.conf.max_lsn_wal_lag.get() + 1); let connection_status = WalConnectionStatus { is_connected: true, @@ -1146,7 +1071,7 @@ mod tests { ReconnectReason::LaggingWal { current_commit_lsn: current_lsn, new_commit_lsn: new_lsn, - threshold: state.max_lsn_wal_lag + threshold: state.conf.max_lsn_wal_lag }, "Should select bigger WAL safekeeper if it starts to lag enough" ); @@ -1165,7 +1090,7 @@ mod tests { let current_lsn = Lsn(100_000).align(); let now = Utc::now().naive_utc(); - let wal_connect_timeout = chrono::Duration::from_std(state.wal_connect_timeout)?; + let wal_connect_timeout = chrono::Duration::from_std(state.conf.wal_connect_timeout)?; let time_over_threshold = Utc::now().naive_utc() - wal_connect_timeout - wal_connect_timeout; @@ -1208,7 +1133,7 @@ mod tests { .. } => { assert_eq!(last_keep_alive, Some(time_over_threshold)); - assert_eq!(threshold, state.lagging_wal_timeout); + assert_eq!(threshold, state.conf.lagging_wal_timeout); } unexpected => panic!("Unexpected reason: {unexpected:?}"), } @@ -1228,7 +1153,7 @@ mod tests { let new_lsn = Lsn(100_100).align(); let now = Utc::now().naive_utc(); - let lagging_wal_timeout = chrono::Duration::from_std(state.lagging_wal_timeout)?; + let lagging_wal_timeout = chrono::Duration::from_std(state.conf.lagging_wal_timeout)?; let time_over_threshold = Utc::now().naive_utc() - lagging_wal_timeout - lagging_wal_timeout; @@ -1275,7 +1200,7 @@ mod tests { assert_eq!(current_commit_lsn, current_lsn); assert_eq!(candidate_commit_lsn, new_lsn); assert_eq!(last_wal_interaction, Some(time_over_threshold)); - assert_eq!(threshold, state.lagging_wal_timeout); + assert_eq!(threshold, state.conf.lagging_wal_timeout); } unexpected => panic!("Unexpected reason: {unexpected:?}"), } @@ -1289,27 +1214,29 @@ mod tests { const DUMMY_SAFEKEEPER_HOST: &str = "safekeeper_connstr"; - async fn dummy_state(harness: &TenantHarness<'_>) -> WalreceiverState { + async fn dummy_state(harness: &TenantHarness<'_>) -> ConnectionManagerState { let (tenant, ctx) = harness.load().await; let timeline = tenant .create_empty_timeline(TIMELINE_ID, Lsn(0), crate::DEFAULT_PG_VERSION, &ctx) .expect("Failed to create an empty timeline for dummy wal connection manager"); let timeline = timeline.initialize(&ctx).unwrap(); - WalreceiverState { + ConnectionManagerState { id: TenantTimelineId { tenant_id: harness.tenant_id, timeline_id: TIMELINE_ID, }, timeline, - wal_connect_timeout: Duration::from_secs(1), - lagging_wal_timeout: Duration::from_secs(1), - max_lsn_wal_lag: NonZeroU64::new(1024 * 1024).unwrap(), + conf: WalReceiverConf { + wal_connect_timeout: Duration::from_secs(1), + lagging_wal_timeout: Duration::from_secs(1), + max_lsn_wal_lag: NonZeroU64::new(1024 * 1024).unwrap(), + auth_token: None, + availability_zone: None, + }, wal_connection: None, wal_stream_candidates: HashMap::new(), wal_connection_retries: HashMap::new(), - auth_token: None, - availability_zone: None, } } @@ -1321,7 +1248,7 @@ mod tests { let harness = TenantHarness::create("switch_to_same_availability_zone")?; let mut state = dummy_state(&harness).await; - state.availability_zone = test_az.clone(); + state.conf.availability_zone = test_az.clone(); let current_lsn = Lsn(100_000).align(); let now = Utc::now().naive_utc(); diff --git a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs index ea2f2392ea..d5099dc2a5 100644 --- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs +++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs @@ -42,7 +42,7 @@ use utils::lsn::Lsn; /// Status of the connection. #[derive(Debug, Clone, Copy)] -pub struct WalConnectionStatus { +pub(super) struct WalConnectionStatus { /// If we were able to initiate a postgres connection, this means that safekeeper process is at least running. pub is_connected: bool, /// Defines a healthy connection as one on which pageserver received WAL from safekeeper @@ -60,7 +60,7 @@ pub struct WalConnectionStatus { /// Open a connection to the given safekeeper and receive WAL, sending back progress /// messages as we go. -pub async fn handle_walreceiver_connection( +pub(super) async fn handle_walreceiver_connection( timeline: Arc, wal_source_connconf: PgConnectionConfig, events_sender: watch::Sender>, From a64044a7a9a9f2b32a73b97da1fd230f9b510064 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Wed, 12 Apr 2023 15:32:38 +0300 Subject: [PATCH 09/77] Update most of the dependencies to their latest versions (#3991) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All non-trivial updates extracted into separate commits, also `carho hakari` data and its manifest format were updated. 3 sets of crates remain unupdated: * `base64` — touches proxy in a lot of places and changed its api (by 0.21 version) quite strongly since our version (0.13). * `opentelemetry` and `opentelemetry-*` crates ``` error[E0308]: mismatched types --> libs/tracing-utils/src/http.rs:65:21 | 65 | span.set_parent(parent_ctx); | ---------- ^^^^^^^^^^ expected struct `opentelemetry_api::context::Context`, found struct `opentelemetry::Context` | | | arguments to this method are incorrect | = note: struct `opentelemetry::Context` and struct `opentelemetry_api::context::Context` have similar names, but are actually distinct types note: struct `opentelemetry::Context` is defined in crate `opentelemetry_api` --> /Users/someonetoignore/.cargo/registry/src/github.com-1ecc6299db9ec823/opentelemetry_api-0.19.0/src/context.rs:77:1 | 77 | pub struct Context { | ^^^^^^^^^^^^^^^^^^ note: struct `opentelemetry_api::context::Context` is defined in crate `opentelemetry_api` --> /Users/someonetoignore/.cargo/registry/src/github.com-1ecc6299db9ec823/opentelemetry_api-0.18.0/src/context.rs:77:1 | 77 | pub struct Context { | ^^^^^^^^^^^^^^^^^^ = note: perhaps two different versions of crate `opentelemetry_api` are being used? note: associated function defined here --> /Users/someonetoignore/.cargo/registry/src/github.com-1ecc6299db9ec823/tracing-opentelemetry-0.18.0/src/span_ext.rs:43:8 | 43 | fn set_parent(&self, cx: Context); | ^^^^^^^^^^ For more information about this error, try `rustc --explain E0308`. error: could not compile `tracing-utils` due to previous error warning: build failed, waiting for other jobs to finish... error: could not compile `tracing-utils` due to previous error ``` `tracing-opentelemetry` of version `0.19` is not yet released, that is supposed to have the update we need. * similarly, `rustls`, `tokio-rustls`, `rustls-*` and `tls-listener` crates have similar issue: ``` error[E0308]: mismatched types --> libs/postgres_backend/tests/simple_select.rs:112:78 | 112 | let mut make_tls_connect = tokio_postgres_rustls::MakeRustlsConnect::new(client_cfg); | --------------------------------------------- ^^^^^^^^^^ expected struct `rustls::client::client_conn::ClientConfig`, found struct `ClientConfig` | | | arguments to this function are incorrect | = note: struct `ClientConfig` and struct `rustls::client::client_conn::ClientConfig` have similar names, but are actually distinct types note: struct `ClientConfig` is defined in crate `rustls` --> /Users/someonetoignore/.cargo/registry/src/github.com-1ecc6299db9ec823/rustls-0.21.0/src/client/client_conn.rs:125:1 | 125 | pub struct ClientConfig { | ^^^^^^^^^^^^^^^^^^^^^^^ note: struct `rustls::client::client_conn::ClientConfig` is defined in crate `rustls` --> /Users/someonetoignore/.cargo/registry/src/github.com-1ecc6299db9ec823/rustls-0.20.8/src/client/client_conn.rs:91:1 | 91 | pub struct ClientConfig { | ^^^^^^^^^^^^^^^^^^^^^^^ = note: perhaps two different versions of crate `rustls` are being used? note: associated function defined here --> /Users/someonetoignore/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-postgres-rustls-0.9.0/src/lib.rs:23:12 | 23 | pub fn new(config: ClientConfig) -> Self { | ^^^ For more information about this error, try `rustc --explain E0308`. error: could not compile `postgres_backend` due to previous error warning: build failed, waiting for other jobs to finish... ``` * aws crates: I could not make new API to work with bucket endpoint overload, and console e2e tests failed. Other our tests passed, further investigation is worth to be done in https://github.com/neondatabase/neon/issues/4008 --- .config/hakari.toml | 2 +- Cargo.lock | 1410 +++++++++++------ Cargo.toml | 26 +- libs/consumption_metrics/Cargo.toml | 17 +- libs/postgres_ffi/build.rs | 6 +- libs/remote_storage/tests/pagination_tests.rs | 7 +- libs/tracing-utils/Cargo.toml | 3 +- libs/utils/Cargo.toml | 2 +- pageserver/src/config.rs | 22 +- pageserver/src/page_service.rs | 2 +- pageserver/src/tenant.rs | 2 +- pageserver/src/tenant/config.rs | 4 +- .../tenant/remote_timeline_client/upload.rs | 2 +- storage_broker/src/bin/storage_broker.rs | 3 +- trace/Cargo.toml | 2 - workspace_hack/Cargo.toml | 9 +- 16 files changed, 958 insertions(+), 561 deletions(-) diff --git a/.config/hakari.toml b/.config/hakari.toml index 12d2d1bf9c..15b939e86f 100644 --- a/.config/hakari.toml +++ b/.config/hakari.toml @@ -4,7 +4,7 @@ hakari-package = "workspace_hack" # Format for `workspace-hack = ...` lines in other Cargo.tomls. Requires cargo-hakari 0.9.8 or above. -dep-format-version = "3" +dep-format-version = "4" # Setting workspace.resolver = "2" in the root Cargo.toml is HIGHLY recommended. # Hakari works much better with the new feature resolver. diff --git a/Cargo.lock b/Cargo.lock index 668487a9bd..8dde4ebb57 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -64,28 +64,68 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] -name = "anyhow" -version = "1.0.68" +name = "anstream" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cb2f989d18dd141ab8ae82f64d1a8cdd37e0840f73a406896cf5e99502fab61" +checksum = "342258dd14006105c2b75ab1bd7543a03bdf0cfc94383303ac212a04939dff6f" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-wincon", + "concolor-override", + "concolor-query", + "is-terminal", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23ea9e81bd02e310c216d080f6223c179012256e5151c41db88d12c88a1684d2" + +[[package]] +name = "anstyle-parse" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7d1bb534e9efed14f3e5f44e7dd1a4f709384023a4165199a4241e18dff0116" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-wincon" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3127af6145b149f3287bb9a0d10ad9c5692dba8c53ad48285e5bec4063834fa" +dependencies = [ + "anstyle", + "windows-sys 0.45.0", +] + +[[package]] +name = "anyhow" +version = "1.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7de8ce5e0f9f8d88245311066a578d72b7af3e7088f32783804676302df237e4" dependencies = [ "backtrace", ] [[package]] name = "archery" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a8da9bc4c4053ee067669762bcaeea6e241841295a2b6c948312dad6ef4cc02" +checksum = "b6cd774058b1b415c4855d8b86436c04bf050c003156fe24bc326fb3fe75c343" dependencies = [ "static_assertions", ] [[package]] name = "asn1-rs" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf6690c370453db30743b373a60ba498fc0d6d83b11f4abfd87a84a075db5dd4" +checksum = "7f6fd5ddaf0351dff5b8da21b2fb4ff8e08ddd02857f0bf69c47639106c0fff0" dependencies = [ "asn1-rs-derive", "asn1-rs-impl", @@ -105,7 +145,7 @@ checksum = "726535892e8eae7e70657b4c8ea93d26b8553afb1ce617caee529ef96d7dee6c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", "synstructure", ] @@ -117,46 +157,47 @@ checksum = "2777730b2039ac0f95f093556e61b6d26cebed5393ca6f152717777cec3a42ed" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] name = "async-stream" -version = "0.3.3" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dad5c83079eae9969be7fadefe640a1c566901f05ff91ab221de4b6f68d9507e" +checksum = "cd56dd203fef61ac097dd65721a419ddccb106b2d2b70ba60a6b529f03961a51" dependencies = [ "async-stream-impl", "futures-core", + "pin-project-lite", ] [[package]] name = "async-stream-impl" -version = "0.3.3" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10f203db73a71dfa2fb6dd22763990fa26f3d2625a6da2da900d23b87d26be27" +checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.14", ] [[package]] name = "async-trait" -version = "0.1.64" +version = "0.1.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd7fce9ba8c3c042128ce72d8b2ddbf3a05747efb67ea0313c635e10bda47a2" +checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.14", ] [[package]] name = "atomic-polyfill" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d299f547288d6db8d5c3a2916f7b2f66134b15b8c1ac1c4357dd3b8752af7bb2" +checksum = "c314e70d181aa6053b26e3f7fbf86d1dfff84f816a6175b967666b3506ef7289" dependencies = [ "critical-section", ] @@ -187,13 +228,13 @@ dependencies = [ "aws-http", "aws-sdk-sso", "aws-sdk-sts", - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-http-tower", + "aws-smithy-async 0.51.0", + "aws-smithy-client 0.51.0", + "aws-smithy-http 0.51.0", + "aws-smithy-http-tower 0.51.0", "aws-smithy-json", - "aws-smithy-types", - "aws-types", + "aws-smithy-types 0.51.0", + "aws-types 0.51.0", "bytes", "hex", "http", @@ -206,15 +247,29 @@ dependencies = [ "zeroize", ] +[[package]] +name = "aws-credential-types" +version = "0.55.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77e37e62f59cf3284067337da7467d842df8cfe3f5e5c06487ac7521819cf16d" +dependencies = [ + "aws-smithy-async 0.55.1", + "aws-smithy-types 0.55.1", + "fastrand", + "tokio", + "tracing", + "zeroize", +] + [[package]] name = "aws-endpoint" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ca8f374874f6459aaa88dc861d7f5d834ca1ff97668eae190e97266b5f6c3fb" dependencies = [ - "aws-smithy-http", - "aws-smithy-types", - "aws-types", + "aws-smithy-http 0.51.0", + "aws-smithy-types 0.51.0", + "aws-types 0.51.0", "http", "regex", "tracing", @@ -226,9 +281,9 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78d41e19e779b73463f5f0c21b3aacc995f4ba783ab13a7ae9f5dfb159a551b4" dependencies = [ - "aws-smithy-http", - "aws-smithy-types", - "aws-types", + "aws-smithy-http 0.51.0", + "aws-smithy-types 0.51.0", + "aws-types 0.51.0", "bytes", "http", "http-body", @@ -248,15 +303,15 @@ dependencies = [ "aws-http", "aws-sig-auth", "aws-sigv4", - "aws-smithy-async", + "aws-smithy-async 0.51.0", "aws-smithy-checksums", - "aws-smithy-client", + "aws-smithy-client 0.51.0", "aws-smithy-eventstream", - "aws-smithy-http", - "aws-smithy-http-tower", - "aws-smithy-types", + "aws-smithy-http 0.51.0", + "aws-smithy-http-tower 0.51.0", + "aws-smithy-types 0.51.0", "aws-smithy-xml", - "aws-types", + "aws-types 0.51.0", "bytes", "bytes-utils", "http", @@ -275,13 +330,13 @@ dependencies = [ "aws-endpoint", "aws-http", "aws-sig-auth", - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-http-tower", + "aws-smithy-async 0.51.0", + "aws-smithy-client 0.51.0", + "aws-smithy-http 0.51.0", + "aws-smithy-http-tower 0.51.0", "aws-smithy-json", - "aws-smithy-types", - "aws-types", + "aws-smithy-types 0.51.0", + "aws-types 0.51.0", "bytes", "http", "tokio-stream", @@ -297,14 +352,14 @@ dependencies = [ "aws-endpoint", "aws-http", "aws-sig-auth", - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-http-tower", + "aws-smithy-async 0.51.0", + "aws-smithy-client 0.51.0", + "aws-smithy-http 0.51.0", + "aws-smithy-http-tower 0.51.0", "aws-smithy-query", - "aws-smithy-types", + "aws-smithy-types 0.51.0", "aws-smithy-xml", - "aws-types", + "aws-types 0.51.0", "bytes", "http", "tower", @@ -318,20 +373,20 @@ checksum = "12cbe7b2be9e185c1fbce27fc9c41c66b195b32d89aa099f98768d9544221308" dependencies = [ "aws-sigv4", "aws-smithy-eventstream", - "aws-smithy-http", - "aws-types", + "aws-smithy-http 0.51.0", + "aws-types 0.51.0", "http", "tracing", ] [[package]] name = "aws-sigv4" -version = "0.51.0" +version = "0.51.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03ff4cff8c4a101962d593ba94e72cd83891aecd423f0c6e3146bff6fb92c9e3" +checksum = "5c0b2658d2cb66dbf02f0e8dee80810ef1e0ca3530ede463e0ef994c301087d1" dependencies = [ "aws-smithy-eventstream", - "aws-smithy-http", + "aws-smithy-http 0.51.0", "bytes", "form_urlencoded", "hex", @@ -356,14 +411,26 @@ dependencies = [ "tokio-stream", ] +[[package]] +name = "aws-smithy-async" +version = "0.55.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88573bcfbe1dcfd54d4912846df028b42d6255cbf9ce07be216b1bbfd11fc4b9" +dependencies = [ + "futures-util", + "pin-project-lite", + "tokio", + "tokio-stream", +] + [[package]] name = "aws-smithy-checksums" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc227e36e346f45298288359f37123e1a92628d1cec6b11b5eb335553278bd9e" dependencies = [ - "aws-smithy-http", - "aws-smithy-types", + "aws-smithy-http 0.51.0", + "aws-smithy-types 0.51.0", "bytes", "crc32c", "crc32fast", @@ -383,10 +450,10 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff28d553714f8f54cd921227934fc13a536a1c03f106e56b362fd57e16d450ad" dependencies = [ - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-http-tower", - "aws-smithy-types", + "aws-smithy-async 0.51.0", + "aws-smithy-http 0.51.0", + "aws-smithy-http-tower 0.51.0", + "aws-smithy-types 0.51.0", "bytes", "fastrand", "http", @@ -400,13 +467,33 @@ dependencies = [ "tracing", ] +[[package]] +name = "aws-smithy-client" +version = "0.55.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2f52352bae50d3337d5d6151b695d31a8c10ebea113eca5bead531f8301b067" +dependencies = [ + "aws-smithy-async 0.55.1", + "aws-smithy-http 0.55.1", + "aws-smithy-http-tower 0.55.1", + "aws-smithy-types 0.55.1", + "bytes", + "fastrand", + "http", + "http-body", + "pin-project-lite", + "tokio", + "tower", + "tracing", +] + [[package]] name = "aws-smithy-eventstream" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7ea0df7161ce65b5c8ca6eb709a1a907376fa18226976e41c748ce02ccccf24" dependencies = [ - "aws-smithy-types", + "aws-smithy-types 0.51.0", "bytes", "crc32fast", ] @@ -418,7 +505,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf58ed4fefa61dbf038e5421a521cbc2c448ef69deff0ab1d915d8a10eda5664" dependencies = [ "aws-smithy-eventstream", - "aws-smithy-types", + "aws-smithy-types 0.51.0", "bytes", "bytes-utils", "futures-core", @@ -434,13 +521,49 @@ dependencies = [ "tracing", ] +[[package]] +name = "aws-smithy-http" +version = "0.55.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03bcc02d7ed9649d855c8ce4a735e9848d7b8f7568aad0504c158e3baa955df8" +dependencies = [ + "aws-smithy-types 0.55.1", + "bytes", + "bytes-utils", + "futures-core", + "http", + "http-body", + "hyper", + "once_cell", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + [[package]] name = "aws-smithy-http-tower" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20c96d7bd35e7cf96aca1134b2f81b1b59ffe493f7c6539c051791cbbf7a42d3" dependencies = [ - "aws-smithy-http", + "aws-smithy-http 0.51.0", + "bytes", + "http", + "http-body", + "pin-project-lite", + "tower", + "tracing", +] + +[[package]] +name = "aws-smithy-http-tower" +version = "0.55.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da88b3a860f65505996c29192d800f1aeb9480440f56d63aad33a3c12045017a" +dependencies = [ + "aws-smithy-http 0.55.1", + "aws-smithy-types 0.55.1", "bytes", "http", "http-body", @@ -455,7 +578,7 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8324ba98c8a94187723cc16c37aefa09504646ee65c3d2c3af495bab5ea701b" dependencies = [ - "aws-smithy-types", + "aws-smithy-types 0.51.0", ] [[package]] @@ -464,7 +587,7 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83834ed2ff69ea6f6657baf205267dc2c0abe940703503a3e5d60ce23be3d306" dependencies = [ - "aws-smithy-types", + "aws-smithy-types 0.51.0", "urlencoding", ] @@ -480,6 +603,19 @@ dependencies = [ "time", ] +[[package]] +name = "aws-smithy-types" +version = "0.55.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd0afc731fd1417d791f9145a1e0c30e23ae0beaab9b4814017708ead2fc20f1" +dependencies = [ + "base64-simd", + "itoa", + "num-integer", + "ryu", + "time", +] + [[package]] name = "aws-smithy-xml" version = "0.51.0" @@ -495,10 +631,10 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05701d32da168b44f7ee63147781aed8723e792cc131cb9b18363b5393f17f70" dependencies = [ - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-types", + "aws-smithy-async 0.51.0", + "aws-smithy-client 0.51.0", + "aws-smithy-http 0.51.0", + "aws-smithy-types 0.51.0", "http", "rustc_version", "tracing", @@ -506,10 +642,26 @@ dependencies = [ ] [[package]] -name = "axum" -version = "0.6.4" +name = "aws-types" +version = "0.55.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5694b64066a2459918d8074c2ce0d5a88f409431994c2356617c8ae0c4721fc" +checksum = "81fb02591b5075d318e0083dcb76df0e151db4ce48f987ecd00e5b53c7a6ba59" +dependencies = [ + "aws-credential-types", + "aws-smithy-async 0.55.1", + "aws-smithy-client 0.55.1", + "aws-smithy-http 0.55.1", + "aws-smithy-types 0.55.1", + "http", + "rustc_version", + "tracing", +] + +[[package]] +name = "axum" +version = "0.6.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b32c5ea3aabaf4deb5f5ced2d688ec0844c881c9e6c696a8b769a05fc691e62" dependencies = [ "async-trait", "axum-core", @@ -529,16 +681,15 @@ dependencies = [ "serde", "sync_wrapper", "tower", - "tower-http", "tower-layer", "tower-service", ] [[package]] name = "axum-core" -version = "0.3.2" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cae3e661676ffbacb30f1a824089a8c9150e71017f7e1e38f2aa32009188d34" +checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c" dependencies = [ "async-trait", "bytes", @@ -584,6 +735,16 @@ version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] + [[package]] name = "bincode" version = "1.3.3" @@ -595,9 +756,9 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.61.0" +version = "0.65.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a022e58a142a46fea340d68012b9201c094e93ec3d033a944a24f8fd4a4f09a" +checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" dependencies = [ "bitflags", "cexpr", @@ -606,12 +767,13 @@ dependencies = [ "lazycell", "log", "peeking_take_while", + "prettyplease 0.2.4", "proc-macro2", "quote", "regex", "rustc-hash", "shlex", - "syn", + "syn 2.0.14", "which", ] @@ -623,18 +785,18 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "block-buffer" -version = "0.10.3" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cce20737498f97b993470a6e536b8523f0af7892a4f928cceb1ac5e52ebe7e" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" dependencies = [ "generic-array", ] [[package]] name = "bstr" -version = "1.2.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7f0778972c64420fdedc63f09919c8a88bda7b25135357fd25a5d9f3257e832" +checksum = "c3d4260bcc2e8fc9df1eac4919a720effeb63a3f0952f5bf4944adfa18897f09" dependencies = [ "memchr", "once_cell", @@ -702,9 +864,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.23" +version = "0.4.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f" +checksum = "4e3c5919066adf22df73762e50cffcde3a758f2a848b113b586d1f86728b673b" dependencies = [ "iana-time-zone", "num-integer", @@ -742,9 +904,9 @@ dependencies = [ [[package]] name = "clang-sys" -version = "1.4.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa2e27ae6ab525c3d369ded447057bca5438d86dc3a68f6faafb8269ba82ebf3" +checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f" dependencies = [ "glob", "libc", @@ -765,30 +927,38 @@ dependencies = [ [[package]] name = "clap" -version = "4.1.4" +version = "4.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f13b9c79b5d1dd500d20ef541215a6423c75829ef43117e1b4d17fd8af0b5d76" +checksum = "046ae530c528f252094e4a77886ee1374437744b2bff1497aa898bbddbbb29b3" dependencies = [ - "bitflags", + "clap_builder", "clap_derive", - "clap_lex 0.3.1", - "is-terminal", "once_cell", +] + +[[package]] +name = "clap_builder" +version = "4.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "223163f58c9a40c3b0a43e1c4b50a9ce09f007ea2cb1ec258a687945b4b7929f" +dependencies = [ + "anstream", + "anstyle", + "bitflags", + "clap_lex 0.4.1", "strsim", - "termcolor", ] [[package]] name = "clap_derive" -version = "4.1.0" +version = "4.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "684a277d672e91966334af371f1a7b5833f9aa00b07c84e92fbce95e00208ce8" +checksum = "3f9644cd56d6b87dbe899ef8b053e331c0637664e9e21a33dfcdc36093f5c5c4" dependencies = [ "heck", - "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 2.0.14", ] [[package]] @@ -802,12 +972,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.3.1" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "783fe232adfca04f90f56201b26d79682d4cd2625e0bc7290b95123afe558ade" -dependencies = [ - "os_str_bytes", -] +checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1" [[package]] name = "close_fds" @@ -859,7 +1026,7 @@ version = "0.1.0" dependencies = [ "anyhow", "chrono", - "clap 4.1.4", + "clap 4.2.1", "compute_api", "futures", "hyper", @@ -883,6 +1050,21 @@ dependencies = [ "workspace_hack", ] +[[package]] +name = "concolor-override" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a855d4a1978dc52fb0536a04d384c2c0c1aa273597f08b77c8c4d3b2eec6037f" + +[[package]] +name = "concolor-query" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d11d52c3d7ca2e6d0040212be9e4dbbcd78b6447f535b6b561f449427944cf" +dependencies = [ + "windows-sys 0.45.0", +] + [[package]] name = "const_format" version = "0.2.30" @@ -921,7 +1103,7 @@ name = "control_plane" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.1.4", + "clap 4.2.1", "comfy-table", "git-version", "nix", @@ -957,15 +1139,15 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" +checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "cpufeatures" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d997bd5e24a5928dd43e46dc529867e207907fe0b239c3477d924f7f2ca320" +checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181" dependencies = [ "libc", ] @@ -1032,9 +1214,9 @@ checksum = "6548a0ad5d2549e111e1f6a11a6c2e2d00ce6a3dafe22948d67c2b443f775e52" [[package]] name = "crossbeam-channel" -version = "0.5.6" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" +checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" dependencies = [ "cfg-if", "crossbeam-utils", @@ -1042,9 +1224,9 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" +checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" dependencies = [ "cfg-if", "crossbeam-epoch", @@ -1053,22 +1235,22 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.13" +version = "0.9.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a" +checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695" dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", - "memoffset 0.7.1", + "memoffset 0.8.0", "scopeguard", ] [[package]] name = "crossbeam-utils" -version = "0.8.14" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f" +checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b" dependencies = [ "cfg-if", ] @@ -1110,9 +1292,9 @@ dependencies = [ [[package]] name = "cxx" -version = "1.0.89" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc831ee6a32dd495436e317595e639a587aa9907bef96fe6e6abc290ab6204e9" +checksum = "f61f1b6389c3fe1c316bf8a4dccc90a38208354b330925bce1f74a6c4756eb93" dependencies = [ "cc", "cxxbridge-flags", @@ -1122,9 +1304,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.89" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94331d54f1b1a8895cd81049f7eaaaef9d05a7dcb4d1fd08bf3ff0806246789d" +checksum = "12cee708e8962df2aeb38f594aae5d827c022b6460ac71a7a3e2c3c2aae5a07b" dependencies = [ "cc", "codespan-reporting", @@ -1132,31 +1314,31 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn", + "syn 2.0.14", ] [[package]] name = "cxxbridge-flags" -version = "1.0.89" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48dcd35ba14ca9b40d6e4b4b39961f23d835dbb8eed74565ded361d93e1feb8a" +checksum = "7944172ae7e4068c533afbb984114a56c46e9ccddda550499caa222902c7f7bb" [[package]] name = "cxxbridge-macro" -version = "1.0.89" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81bbeb29798b407ccd82a3324ade1a7286e0d29851475990b612670f6f5124d2" +checksum = "2345488264226bf682893e25de0769f3360aac9957980ec49361b083ddaa5bc5" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.14", ] [[package]] name = "darling" -version = "0.14.2" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0dd3cd20dc6b5a876612a6e5accfe7f3dd883db6d07acfbf14c128f61550dfa" +checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" dependencies = [ "darling_core", "darling_macro", @@ -1164,27 +1346,27 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.14.2" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a784d2ccaf7c98501746bf0be29b2022ba41fd62a2e622af997a03e9f972859f" +checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" dependencies = [ "fnv", "ident_case", "proc-macro2", "quote", "strsim", - "syn", + "syn 1.0.109", ] [[package]] name = "darling_macro" -version = "0.14.2" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7618812407e9402654622dd402b0a89dff9ba93badd6540781526117b92aab7e" +checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" dependencies = [ "darling_core", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1218,9 +1400,9 @@ dependencies = [ [[package]] name = "der-parser" -version = "8.1.0" +version = "8.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42d4bc9b0db0a0df9ae64634ac5bdefb7afcb534e182275ca0beadbe486701c1" +checksum = "dbd676fbbab537128ef0278adb5576cf363cff6aa22a7b24effe97347cfab61e" dependencies = [ "asn1-rs", "displaydoc", @@ -1249,7 +1431,7 @@ checksum = "3bf95dc3f046b9da4f2d51833c0d3547d8564ef6910f5c1ed130306a75b92886" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1269,9 +1451,9 @@ dependencies = [ [[package]] name = "enum-map" -version = "2.4.2" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50c25992259941eb7e57b936157961b217a4fc8597829ddef0596d6c3cd86e1a" +checksum = "988f0d17a0fa38291e5f41f71ea8d46a5d5497b9054d5a759fae2cbb819f2356" dependencies = [ "enum-map-derive", ] @@ -1284,7 +1466,7 @@ checksum = "2a4da76b3b6116d758c7ba93f7ec6a35d2e2cf24feda76c6e38a375f4d5c59f2" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1305,7 +1487,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1323,13 +1505,13 @@ dependencies = [ [[package]] name = "errno" -version = "0.2.8" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" dependencies = [ "errno-dragonfly", "libc", - "winapi", + "windows-sys 0.48.0", ] [[package]] @@ -1361,23 +1543,23 @@ checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" [[package]] name = "fastrand" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499" +checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" dependencies = [ "instant", ] [[package]] name = "filetime" -version = "0.2.19" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e884668cd0c7480504233e951174ddc3b382f7c2666e3b7310b5c4e7b0c37f9" +checksum = "5cbc844cecaee9d4443931972e1289c8ff485cb4cc2767cb03ca139ed6885153" dependencies = [ "cfg-if", "libc", - "redox_syscall", - "windows-sys 0.42.0", + "redox_syscall 0.2.16", + "windows-sys 0.48.0", ] [[package]] @@ -1422,9 +1604,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13e2792b0ff0340399d58445b88fd9770e3489eff258a4cbc1523418f12abf84" +checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" dependencies = [ "futures-channel", "futures-core", @@ -1437,9 +1619,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e5317663a9089767a1ec00a487df42e0ca174b61b4483213ac24448e4664df5" +checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" dependencies = [ "futures-core", "futures-sink", @@ -1447,15 +1629,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec90ff4d0fe1f57d600049061dc6bb68ed03c7d2fbd697274c41805dcb3f8608" +checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" [[package]] name = "futures-executor" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8de0a35a6ab97ec8869e32a2473f4b1324459e14c29275d14b10cb1fd19b50e" +checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" dependencies = [ "futures-core", "futures-task", @@ -1464,32 +1646,32 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfb8371b6fb2aeb2d280374607aeabfc99d95c72edfe51692e42d3d7f0d08531" +checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" [[package]] name = "futures-macro" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a73af87da33b5acf53acfebdc339fe592ecf5357ac7c0a7734ab9d8c876a70" +checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.14", ] [[package]] name = "futures-sink" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f310820bb3e8cfd46c80db4d7fb8353e15dfff853a127158425f31e0be6c8364" +checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" [[package]] name = "futures-task" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf79a1bf610b10f42aea489289c5a2c478a786509693b80cd39c44ccd936366" +checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" [[package]] name = "futures-timer" @@ -1499,9 +1681,9 @@ checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c" [[package]] name = "futures-util" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c1d6de3acfef38d2be4b1f543f553131788603495be83da675e180c8d6b7bd1" +checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" dependencies = [ "futures-channel", "futures-core", @@ -1517,9 +1699,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.6" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", @@ -1527,20 +1709,22 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", ] [[package]] name = "gimli" -version = "0.27.1" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "221996f774192f0f718773def8201c4ae31f02616a54ccfc2d358bb0e5cefdec" +checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4" [[package]] name = "git-version" @@ -1561,7 +1745,7 @@ dependencies = [ "proc-macro-hack", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1572,9 +1756,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.15" +version = "0.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f9f29bc9dda355256b2916cf526ab02ce0aeaaaf2bad60d65ef3f12f11dd0f4" +checksum = "5be7b54589b581f624f566bf5d8eb2bab1db736c51528720b6bd36b96b55924d" dependencies = [ "bytes", "fnv", @@ -1639,7 +1823,7 @@ dependencies = [ "atomic-polyfill", "hash32", "rustc_version", - "spin 0.9.4", + "spin 0.9.8", "stable_deref_trait", ] @@ -1667,6 +1851,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" + [[package]] name = "hex" version = "0.4.3" @@ -1678,9 +1868,9 @@ dependencies = [ [[package]] name = "hex-literal" -version = "0.3.4" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ebdb29d2ea9ed0083cd8cece49bbd968021bd99b0849edb4a9a7ee0fdf6a4e0" +checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46" [[package]] name = "hmac" @@ -1704,9 +1894,9 @@ dependencies = [ [[package]] name = "http" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399" +checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" dependencies = [ "bytes", "fnv", @@ -1724,12 +1914,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "http-range-header" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bfe8eed0a9285ef776bb792479ea3834e8b94e13d615c2f66d03dd50a435a29" - [[package]] name = "httparse" version = "1.8.0" @@ -1760,9 +1944,9 @@ dependencies = [ [[package]] name = "hyper" -version = "0.14.23" +version = "0.14.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "034711faac9d2166cb1baf1a2fb0b60b1f277f8492fd72176c17f3515e1abd3c" +checksum = "cc5e554ff619822309ffd57d8734d77cd5ce6238bc956f037ea06c58238c9899" dependencies = [ "bytes", "futures-channel", @@ -1775,7 +1959,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2", + "socket2 0.4.9", "tokio", "tower-service", "tracing", @@ -1791,10 +1975,10 @@ dependencies = [ "http", "hyper", "log", - "rustls", + "rustls 0.20.8", "rustls-native-certs", "tokio", - "tokio-rustls", + "tokio-rustls 0.23.4", ] [[package]] @@ -1824,16 +2008,16 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.53" +version = "0.1.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64c122667b287044802d6ce17ee2ddf13207ed924c712de9a66a5814d5b64765" +checksum = "0722cd7114b7de04316e7ea5456a0bbb20e4adb46fd27a3697adb812cff0f37c" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "winapi", + "windows", ] [[package]] @@ -1864,9 +2048,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.9.2" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown 0.12.3", @@ -1904,30 +2088,31 @@ dependencies = [ [[package]] name = "io-lifetimes" -version = "1.0.4" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7d6c6f8c91b4b9ed43484ad1a938e393caf35960fce7f82a040497207bd8e9e" +checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" dependencies = [ + "hermit-abi 0.3.1", "libc", - "windows-sys 0.42.0", + "windows-sys 0.48.0", ] [[package]] name = "ipnet" -version = "2.7.1" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30e22bd8629359895450b59ea7a776c850561b96a3b1d31321c1949d9e6c9146" +checksum = "12b6ee2129af8d4fb011108c73d99a1b83a85977f23b82460c0ae2e25bb4b57f" [[package]] name = "is-terminal" -version = "0.4.2" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28dfb6c8100ccc63462345b67d1bbc3679177c75ee4bf59bf29c8b1d110b8189" +checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" dependencies = [ - "hermit-abi 0.2.6", + "hermit-abi 0.3.1", "io-lifetimes", - "rustix", - "windows-sys 0.42.0", + "rustix 0.37.11", + "windows-sys 0.48.0", ] [[package]] @@ -1941,9 +2126,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440" +checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" [[package]] name = "js-sys" @@ -1956,11 +2141,11 @@ dependencies = [ [[package]] name = "jsonwebtoken" -version = "8.2.0" +version = "8.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09f4f04699947111ec1733e71778d763555737579e44b85844cae8e1940a1828" +checksum = "6971da4d9c3aa03c3d8f3ff0f4155b534aad021292003895a469716b2a230378" dependencies = [ - "base64 0.13.1", + "base64 0.21.0", "pem", "ring", "serde", @@ -2002,9 +2187,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.139" +version = "0.2.141" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" +checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" [[package]] name = "libloading" @@ -2031,6 +2216,12 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" +[[package]] +name = "linux-raw-sys" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f" + [[package]] name = "lock_api" version = "0.4.9" @@ -2123,9 +2314,9 @@ dependencies = [ [[package]] name = "mime" -version = "0.3.16" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "mime_guess" @@ -2145,23 +2336,23 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.6.4" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2e212582ede878b109755efd0773a4f0f4ec851584cf0aefbeb4d9ecc114822" +checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" dependencies = [ "adler", ] [[package]] name = "mio" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" +checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9" dependencies = [ "libc", "log", "wasi", - "windows-sys 0.42.0", + "windows-sys 0.45.0", ] [[package]] @@ -2194,15 +2385,6 @@ dependencies = [ "minimal-lexical", ] -[[package]] -name = "nom8" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae01545c9c7fc4486ab7debaf2aad7003ac19431791868fb2e8066df97fad2f8" -dependencies = [ - "memchr", -] - [[package]] name = "notify" version = "5.1.0" @@ -2291,9 +2473,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.17.0" +version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66" +checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" [[package]] name = "oorandom" @@ -2358,8 +2540,8 @@ dependencies = [ "futures-util", "opentelemetry", "prost", - "tonic", - "tonic-build", + "tonic 0.8.3", + "tonic-build 0.8.4", ] [[package]] @@ -2411,9 +2593,9 @@ dependencies = [ [[package]] name = "os_info" -version = "3.6.0" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c424bc68d15e0778838ac013b5b3449544d8133633d8016319e7e05a820b8c0" +checksum = "006e42d5b888366f1880eda20371fedde764ed2213dc8496f49622fa0c99cd5e" dependencies = [ "log", "serde", @@ -2422,9 +2604,15 @@ dependencies = [ [[package]] name = "os_str_bytes" -version = "6.4.1" +version = "6.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" +checksum = "ceedf44fb00f2d1984b0bc98102627ce622e083e49a5bacdb3e514fa4238e267" + +[[package]] +name = "outref" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" [[package]] name = "overload" @@ -2442,7 +2630,7 @@ dependencies = [ "byteorder", "bytes", "chrono", - "clap 4.1.4", + "clap 4.2.1", "close_fds", "const_format", "consumption_metrics", @@ -2539,7 +2727,7 @@ checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.2.16", "smallvec", "windows-sys 0.45.0", ] @@ -2567,9 +2755,9 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" [[package]] name = "petgraph" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5014253a1331579ce62aa67443b4a658c5e7dd03d4bc6d302b94474888143" +checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4" dependencies = [ "fixedbitset", "indexmap", @@ -2610,7 +2798,7 @@ checksum = "069bdb1e05adc7a8990dce9cc75370895fbe4e3d58b9b73bf1aee56359344a55" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -2704,14 +2892,14 @@ dependencies = [ "futures", "once_cell", "pq_proto", - "rustls", + "rustls 0.20.8", "rustls-pemfile", "serde", "thiserror", "tokio", "tokio-postgres", "tokio-postgres-rustls", - "tokio-rustls", + "tokio-rustls 0.23.4", "tracing", "workspace_hack", ] @@ -2777,36 +2965,22 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.1.23" +version = "0.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e97e3215779627f01ee256d2fad52f3d95e8e1c11e9fc6fd08f7cd455d5d5c78" +checksum = "6c8646e95016a7a6c4adea95bafa8a16baab64b583356217f2c85db4a39d9a86" dependencies = [ "proc-macro2", - "syn", + "syn 1.0.109", ] [[package]] -name = "proc-macro-error" -version = "1.0.4" +name = "prettyplease" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" -dependencies = [ - "proc-macro-error-attr", - "proc-macro2", - "quote", - "syn", - "version_check", -] - -[[package]] -name = "proc-macro-error-attr" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +checksum = "1ceca8aaf45b5c46ec7ed39fff75f57290368c1846d33d24a122ca81416ab058" dependencies = [ "proc-macro2", - "quote", - "version_check", + "syn 2.0.14", ] [[package]] @@ -2817,9 +2991,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.50" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ef7d57beacfaf2d8aee5937dab7b7f28de3cb8b1828479bb5de2a7106f2bae2" +checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" dependencies = [ "unicode-ident", ] @@ -2834,7 +3008,7 @@ dependencies = [ "byteorder", "hex", "lazy_static", - "rustix", + "rustix 0.36.12", ] [[package]] @@ -2855,9 +3029,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.11.6" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21dc42e00223fc37204bd4aa177e69420c604ca4a183209a8f9de30c6d934698" +checksum = "e48e50df39172a3e7eb17e14642445da64996989bc212b583015435d39a58537" dependencies = [ "bytes", "prost-derive", @@ -2865,9 +3039,9 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.11.6" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f8ad728fb08fe212df3c05169e940fbb6d9d16a877ddde14644a983ba2012e" +checksum = "2c828f93f5ca4826f97fedcbd3f9a536c16b12cff3dbbb4a007f932bbad95b12" dependencies = [ "bytes", "heck", @@ -2876,35 +3050,34 @@ dependencies = [ "log", "multimap", "petgraph", - "prettyplease", + "prettyplease 0.1.25", "prost", "prost-types", "regex", - "syn", + "syn 1.0.109", "tempfile", "which", ] [[package]] name = "prost-derive" -version = "0.11.6" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bda8c0881ea9f722eb9629376db3d0b903b462477c1aafcb0566610ac28ac5d" +checksum = "4ea9b0f8cbe5e15a8a042d030bd96668db28ecb567ec37d691971ff5731d2b1b" dependencies = [ "anyhow", "itertools", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] name = "prost-types" -version = "0.11.6" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e0526209433e96d83d750dd81a99118edbc55739e7e61a46764fd2ad537788" +checksum = "379119666929a1afd7a043aa6cf96fa67a6dce9af60c88095a4686dbce4c9c88" dependencies = [ - "bytes", "prost", ] @@ -2919,7 +3092,7 @@ dependencies = [ "bstr", "bytes", "chrono", - "clap 4.1.4", + "clap 4.2.1", "consumption_metrics", "futures", "git-version", @@ -2949,20 +3122,20 @@ dependencies = [ "reqwest-tracing", "routerify", "rstest", - "rustls", + "rustls 0.20.8", "rustls-pemfile", "scopeguard", "serde", "serde_json", "sha2", - "socket2", + "socket2 0.5.2", "sync_wrapper", "thiserror", "tls-listener", "tokio", "tokio-postgres", "tokio-postgres-rustls", - "tokio-rustls", + "tokio-rustls 0.23.4", "tracing", "tracing-opentelemetry", "tracing-subscriber", @@ -2970,16 +3143,16 @@ dependencies = [ "url", "utils", "uuid", - "webpki-roots", + "webpki-roots 0.23.0", "workspace_hack", "x509-parser", ] [[package]] name = "quote" -version = "1.0.23" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" dependencies = [ "proc-macro2", ] @@ -3016,9 +3189,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7" +checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" dependencies = [ "either", "rayon-core", @@ -3026,9 +3199,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.10.2" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "356a0625f1954f730c0201cdab48611198dc6ce21f4acff55089b5a78e6e835b" +checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" dependencies = [ "crossbeam-channel", "crossbeam-deque", @@ -3058,10 +3231,19 @@ dependencies = [ ] [[package]] -name = "regex" -version = "1.7.1" +name = "redox_syscall" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" dependencies = [ "aho-corasick", "memchr", @@ -3079,9 +3261,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.28" +version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "remote_storage" @@ -3091,8 +3273,8 @@ dependencies = [ "async-trait", "aws-config", "aws-sdk-s3", - "aws-smithy-http", - "aws-types", + "aws-smithy-http 0.51.0", + "aws-types 0.55.0", "hyper", "metrics", "once_cell", @@ -3111,9 +3293,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.14" +version = "0.11.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21eed90ec8570952d53b772ecf8f206aa1ec9a3d76b2521c56c42973f2d91ee9" +checksum = "27b71749df584b7f4cac2c426c127a7c785a5106cc98f7a8feb044115f0fa254" dependencies = [ "base64 0.21.0", "bytes", @@ -3133,27 +3315,27 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls", + "rustls 0.20.8", "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", "tokio", - "tokio-rustls", + "tokio-rustls 0.23.4", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "webpki-roots", + "webpki-roots 0.22.6", "winreg", ] [[package]] name = "reqwest-middleware" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a1c03e9011a8c59716ad13115550469e081e2e9892656b0ba6a47c907921894" +checksum = "99c50db2c7ccd815f976473dd7d0bde296f8c3b77c383acf4fc021cdcf10852b" dependencies = [ "anyhow", "async-trait", @@ -3166,11 +3348,12 @@ dependencies = [ [[package]] name = "reqwest-tracing" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b739d87a6b2cf4743968ad2b4cef648fbe0204c19999509824425babb2097bce" +checksum = "8a71d77945a1c5ae9604f0504901e77a1e2e71f2932b1cb8103078179ca62ff8" dependencies = [ "async-trait", + "getrandom", "opentelemetry", "reqwest", "reqwest-middleware", @@ -3209,18 +3392,18 @@ dependencies = [ [[package]] name = "rpds" -version = "0.12.0" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66262ea963eff99163e6b741fbc3417a52cc13074728c1047e9911789df9b000" +checksum = "9bd6ce569b15c331b1e5fd8cf6adb0bf240678b5f0cdc4d0f41e11683f6feba9" dependencies = [ "archery", ] [[package]] name = "rstest" -version = "0.16.0" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b07f2d176c472198ec1e6551dc7da28f1c089652f66a7b722676c2238ebc0edf" +checksum = "de1bb486a691878cd320c2f0d319ba91eeaa2e894066d8b5f8f117c000e9d962" dependencies = [ "futures", "futures-timer", @@ -3230,23 +3413,23 @@ dependencies = [ [[package]] name = "rstest_macros" -version = "0.16.0" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7229b505ae0706e64f37ffc54a9c163e11022a6636d58fe1f3f52018257ff9f7" +checksum = "290ca1a1c8ca7edb7c3283bd44dc35dd54fdec6253a3912e201ba1072018fca8" dependencies = [ "cfg-if", "proc-macro2", "quote", "rustc_version", - "syn", + "syn 1.0.109", "unicode-ident", ] [[package]] name = "rustc-demangle" -version = "0.1.21" +version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" +checksum = "d4a36c42d1873f9a77c53bde094f9664d9891bc604a45b4798fd2c389ed12e5b" [[package]] name = "rustc-hash" @@ -3274,16 +3457,30 @@ dependencies = [ [[package]] name = "rustix" -version = "0.36.7" +version = "0.36.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4fdebc4b395b7fbb9ab11e462e20ed9051e7b16e42d24042c776eca0ac81b03" +checksum = "e0af200a3324fa5bcd922e84e9b55a298ea9f431a489f01961acdebc6e908f25" dependencies = [ "bitflags", "errno", "io-lifetimes", "libc", - "linux-raw-sys", - "windows-sys 0.42.0", + "linux-raw-sys 0.1.4", + "windows-sys 0.45.0", +] + +[[package]] +name = "rustix" +version = "0.37.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85597d61f83914ddeba6a47b3b8ffe7365107221c2e557ed94426489fefb5f77" +dependencies = [ + "bitflags", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys 0.3.1", + "windows-sys 0.48.0", ] [[package]] @@ -3298,6 +3495,18 @@ dependencies = [ "webpki", ] +[[package]] +name = "rustls" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07180898a28ed6a7f7ba2311594308f595e3dd2e3c3812fa0a80a47b45f17e5d" +dependencies = [ + "log", + "ring", + "rustls-webpki", + "sct", +] + [[package]] name = "rustls-native-certs" version = "0.6.2" @@ -3320,16 +3529,26 @@ dependencies = [ ] [[package]] -name = "rustversion" -version = "1.0.11" +name = "rustls-webpki" +version = "0.100.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5583e89e108996506031660fe09baa5011b9dd0341b89029313006d1fb508d70" +checksum = "d6207cd5ed3d8dca7816f8f3725513a34609c0c765bf652b8c3cb4cfd87db46b" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "rustversion" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06" [[package]] name = "ryu" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde" +checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" [[package]] name = "safekeeper" @@ -3341,7 +3560,7 @@ dependencies = [ "byteorder", "bytes", "chrono", - "clap 4.1.4", + "clap 4.2.1", "const_format", "crc32c", "fs2", @@ -3414,9 +3633,9 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "scratch" -version = "1.0.3" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddccb15bcce173023b3fedd9436f882a0739b8dfb45e4f6b6002bee5929f61b2" +checksum = "1792db035ce95be60c3f8853017b3999209281c24e2ba5bc8e59bf97a0c590c1" [[package]] name = "sct" @@ -3453,33 +3672,33 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.16" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a" +checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" [[package]] name = "sentry" -version = "0.29.2" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6097dc270a9c4555c5d6222ed243eaa97ff38e29299ed7c5cb36099033c604e" +checksum = "b5ce6d3512e2617c209ec1e86b0ca2fea06454cd34653c91092bf0f3ec41f8e3" dependencies = [ "httpdate", "reqwest", - "rustls", + "rustls 0.20.8", "sentry-backtrace", "sentry-contexts", "sentry-core", "sentry-panic", "tokio", "ureq", - "webpki-roots", + "webpki-roots 0.22.6", ] [[package]] name = "sentry-backtrace" -version = "0.29.2" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d92d1e4d591534ae4f872d6142f3b500f4ffc179a6aed8a3e86c7cc96d10a6a" +checksum = "0e7fe408d4d1f8de188a9309916e02e129cbe51ca19e55badea5a64899399b1a" dependencies = [ "backtrace", "once_cell", @@ -3489,9 +3708,9 @@ dependencies = [ [[package]] name = "sentry-contexts" -version = "0.29.2" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3afa877b1898ff67dd9878cf4bec4e53cef7d3be9f14b1fc9e4fcdf36f8e4259" +checksum = "5695096a059a89973ec541062d331ff4c9aeef9c2951416c894f0fff76340e7d" dependencies = [ "hostname", "libc", @@ -3503,9 +3722,9 @@ dependencies = [ [[package]] name = "sentry-core" -version = "0.29.2" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc43eb7e4e3a444151a0fe8a0e9ce60eabd905dae33d66e257fa26f1b509c1bd" +checksum = "5b22828bfd118a7b660cf7a155002a494755c0424cebb7061e4743ecde9c7dbc" dependencies = [ "once_cell", "rand", @@ -3516,9 +3735,9 @@ dependencies = [ [[package]] name = "sentry-panic" -version = "0.29.2" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccab4fab11e3e63c45f4524bee2e75cde39cdf164cb0b0cbe6ccd1948ceddf66" +checksum = "1f4ced2a7a8c14899d58eec402d946f69d5ed26a3fc363a7e8b1e5cb88473a01" dependencies = [ "sentry-backtrace", "sentry-core", @@ -3526,9 +3745,9 @@ dependencies = [ [[package]] name = "sentry-types" -version = "0.29.2" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63708ec450b6bdcb657af760c447416d69c38ce421f34e5e2e9ce8118410bc7" +checksum = "360ee3270f7a4a1eee6c667f7d38360b995431598a73b740dfe420da548d9cc9" dependencies = [ "debugid", "getrandom", @@ -3543,35 +3762,44 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.152" +version = "1.0.160" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb" +checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.152" +version = "1.0.160" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" +checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.14", ] [[package]] name = "serde_json" -version = "1.0.91" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877c235533714907a8c2464236f5c4b2a17262ef1bd71f38f35ea592c8da6883" +checksum = "d721eca97ac802aa7777b701877c8004d950fc142651367300d21c1cc0194744" dependencies = [ "itoa", "ryu", "serde", ] +[[package]] +name = "serde_spanned" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0efd8caf556a6cebd3b285caf480045fcc1ac04f6bd786b09a6f11af30c4fcf4" +dependencies = [ + "serde", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -3586,9 +3814,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "2.2.0" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30d904179146de381af4c93d3af6ca4984b3152db687dacb9c3c35e86f39809c" +checksum = "331bb8c3bf9b92457ab7abecf07078c13f7d270ba490103e84e8b014490cd0b0" dependencies = [ "base64 0.13.1", "chrono", @@ -3602,14 +3830,14 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "2.2.0" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1966009f3c05f095697c537312f5415d1e3ed31ce0a56942bac4c771c5c335e" +checksum = "859011bddcc11f289f07f467cc1fe01c7a941daa4d8f6c40d4d1c92eb6d9319c" dependencies = [ "darling", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -3651,9 +3879,9 @@ checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" [[package]] name = "signal-hook" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a253b5e89e2698464fc26b545c9edceb338e18a89effeeecfea192c3025be29d" +checksum = "732768f1176d21d09e076c23a93123d40bba92d50c4058da34d45c8de8e682b9" dependencies = [ "libc", "signal-hook-registry", @@ -3672,9 +3900,9 @@ dependencies = [ [[package]] name = "signal-hook-registry" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" +checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" dependencies = [ "libc", ] @@ -3699,9 +3927,9 @@ checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" [[package]] name = "slab" -version = "0.4.7" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4614a76b2a8be0058caa9dbbaf66d988527d86d003c11a94fbd335d7661edcef" +checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d" dependencies = [ "autocfg", ] @@ -3714,14 +3942,24 @@ checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" [[package]] name = "socket2" -version = "0.4.7" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd" +checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" dependencies = [ "libc", "winapi", ] +[[package]] +name = "socket2" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d283f86695ae989d1e18440a943880967156325ba025f05049946bff47bcc2b" +dependencies = [ + "libc", + "windows-sys 0.48.0", +] + [[package]] name = "spin" version = "0.5.2" @@ -3730,9 +3968,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "spin" -version = "0.9.4" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f6002a767bff9e83f8eeecf883ecb8011875a21ae8da43bffb817a57e78cc09" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" dependencies = [ "lock_api", ] @@ -3756,7 +3994,7 @@ dependencies = [ "anyhow", "async-stream", "bytes", - "clap 4.1.4", + "clap 4.2.1", "const_format", "futures", "futures-core", @@ -3770,8 +4008,8 @@ dependencies = [ "prost", "tokio", "tokio-stream", - "tonic", - "tonic-build", + "tonic 0.9.1", + "tonic-build 0.9.1", "tracing", "utils", "workspace_hack", @@ -3809,7 +4047,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn", + "syn 1.0.109", ] [[package]] @@ -3826,9 +4064,20 @@ checksum = "8fb1df15f412ee2e9dfc1c504260fa695c1c3f10fe9f4a6ee2d2184d7d6450e2" [[package]] name = "syn" -version = "1.0.107" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcf316d5356ed6847742d036f8a39c3b8435cac10bd528a4bd461928a6ab34d5" dependencies = [ "proc-macro2", "quote", @@ -3849,7 +4098,7 @@ checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", "unicode-xid", ] @@ -3866,24 +4115,24 @@ dependencies = [ [[package]] name = "task-local-extensions" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4167afbec18ae012de40f8cf1b9bf48420abb390678c34821caa07d924941cc4" +checksum = "ba323866e5d033818e3240feeb9f7db2c4296674e4d9e16b97b7bf8f490434e8" dependencies = [ - "tokio", + "pin-utils", ] [[package]] name = "tempfile" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af18f7ae1acd354b992402e9ec5864359d693cd8a79dcbef59f76891701c1e95" +checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" dependencies = [ "cfg-if", "fastrand", - "redox_syscall", - "rustix", - "windows-sys 0.42.0", + "redox_syscall 0.3.5", + "rustix 0.37.11", + "windows-sys 0.45.0", ] [[package]] @@ -3923,7 +4172,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8901a55b0a7a06ebc4a674dcca925170da8e613fa3b163a1df804ed10afb154d" dependencies = [ "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -3934,38 +4183,39 @@ checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" [[package]] name = "thiserror" -version = "1.0.38" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" +checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.38" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" +checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.14", ] [[package]] name = "thread_local" -version = "1.1.4" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" dependencies = [ + "cfg-if", "once_cell", ] [[package]] name = "time" -version = "0.3.17" +version = "0.3.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a561bf4617eebd33bca6434b988f39ed798e527f51a1e797d0ee4f61c0a38376" +checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890" dependencies = [ "itoa", "serde", @@ -3981,9 +4231,9 @@ checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" [[package]] name = "time-macros" -version = "0.2.6" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d967f99f534ca7e495c575c62638eebc2898a8c84c119b89e250477bc4ba16b2" +checksum = "fd80a657e71da814b8e5d60d3374fc6d35045062245d80224748ae522dd76f36" dependencies = [ "time-core", ] @@ -4009,9 +4259,9 @@ dependencies = [ [[package]] name = "tinyvec_macros" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tls-listener" @@ -4024,26 +4274,25 @@ dependencies = [ "pin-project-lite", "thiserror", "tokio", - "tokio-rustls", + "tokio-rustls 0.23.4", ] [[package]] name = "tokio" -version = "1.25.0" +version = "1.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e00990ebabbe4c14c08aca901caed183ecd5c09562a12c824bb53d3c3fd3af" +checksum = "d0de47a4eecbe11f498978a9b29d792f0d2692d1dd003650c24c76510e3bc001" dependencies = [ "autocfg", "bytes", "libc", - "memchr", "mio", "num_cpus", "pin-project-lite", "signal-hook-registry", - "socket2", + "socket2 0.4.9", "tokio-macros", - "windows-sys 0.42.0", + "windows-sys 0.45.0", ] [[package]] @@ -4058,13 +4307,13 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "1.8.2" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d266c00fde287f55d3f1c3e96c500c362a2b8c695076ec180f27918820bc6df8" +checksum = "61a573bdc87985e9d6ddeed1b3d864e8a302c847e40d647746df2f1de209d1ce" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.14", ] [[package]] @@ -4085,7 +4334,7 @@ dependencies = [ "pin-project-lite", "postgres-protocol", "postgres-types", - "socket2", + "socket2 0.4.9", "tokio", "tokio-util", ] @@ -4098,10 +4347,10 @@ checksum = "606f2b73660439474394432239c82249c0d45eb5f23d91f401be1e33590444a7" dependencies = [ "futures", "ring", - "rustls", + "rustls 0.20.8", "tokio", "tokio-postgres", - "tokio-rustls", + "tokio-rustls 0.23.4", ] [[package]] @@ -4110,16 +4359,26 @@ version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" dependencies = [ - "rustls", + "rustls 0.20.8", "tokio", "webpki", ] [[package]] -name = "tokio-stream" -version = "0.1.11" +name = "tokio-rustls" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d660770404473ccd7bc9f8b28494a811bc18542b915c0855c51e8f419d5223ce" +checksum = "e0d409377ff5b1e3ca6437aa86c1eb7d40c134bfec254e44c830defa92669db5" +dependencies = [ + "rustls 0.21.0", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fb52b74f05dbf495a8fba459fdc331812b96aa086d9eb78101fa0d4569c3313" dependencies = [ "futures-core", "pin-project-lite", @@ -4134,7 +4393,7 @@ dependencies = [ "filetime", "futures-core", "libc", - "redox_syscall", + "redox_syscall 0.2.16", "tokio", "tokio-stream", "xattr", @@ -4154,9 +4413,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.4" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bb2e075f03b3d66d8d8785356224ba688d2906a371015e225beeb65ca92c740" +checksum = "5427d89453009325de0d8f342c9490009f76e999cb7672d77e46267448f7e6b2" dependencies = [ "bytes", "futures-core", @@ -4168,33 +4427,36 @@ dependencies = [ [[package]] name = "toml" -version = "0.5.11" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +checksum = "b403acf6f2bb0859c93c7f0d967cb4a75a7ac552100f9322faf64dc047669b21" dependencies = [ "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", ] [[package]] name = "toml_datetime" -version = "0.5.1" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4553f467ac8e3d374bc9a177a26801e5d0f9b211aa1673fb137a403afd1c9cf5" +checksum = "3ab8ed2edee10b50132aed5f331333428b011c99402b5a534154ed15746f9622" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.17.1" +version = "0.19.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a34cc558345efd7e88b9eda9626df2138b80bb46a7606f695e751c892bc7dac6" +checksum = "239410c8609e8125456927e6707163a3b1fdb40561e4b803bc041f466ccfdc13" dependencies = [ "indexmap", - "itertools", - "nom8", "serde", + "serde_spanned", "toml_datetime", + "winnow", ] [[package]] @@ -4219,10 +4481,7 @@ dependencies = [ "pin-project", "prost", "prost-derive", - "rustls-native-certs", - "rustls-pemfile", "tokio", - "tokio-rustls", "tokio-stream", "tokio-util", "tower", @@ -4232,17 +4491,62 @@ dependencies = [ "tracing-futures", ] +[[package]] +name = "tonic" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38bd8e87955eb13c1986671838177d6792cdc52af9bffced0d2c8a9a7f741ab3" +dependencies = [ + "async-stream", + "async-trait", + "axum", + "base64 0.21.0", + "bytes", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-timeout", + "percent-encoding", + "pin-project", + "prost", + "rustls-native-certs", + "rustls-pemfile", + "tokio", + "tokio-rustls 0.24.0", + "tokio-stream", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "tonic-build" version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5bf5e9b9c0f7e0a7c027dcfaba7b2c60816c7049171f679d99ee2ff65d0de8c4" dependencies = [ - "prettyplease", + "prettyplease 0.1.25", "proc-macro2", "prost-build", "quote", - "syn", + "syn 1.0.109", +] + +[[package]] +name = "tonic-build" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f60a933bbea70c95d633c04c951197ddf084958abaa2ed502a3743bdd8d8dd7" +dependencies = [ + "prettyplease 0.1.25", + "proc-macro2", + "prost-build", + "quote", + "syn 1.0.109", ] [[package]] @@ -4265,25 +4569,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "tower-http" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f873044bf02dd1e8239e9c1293ea39dad76dc594ec16185d0a1bf31d8dc8d858" -dependencies = [ - "bitflags", - "bytes", - "futures-core", - "futures-util", - "http", - "http-body", - "http-range-header", - "pin-project-lite", - "tower", - "tower-layer", - "tower-service", -] - [[package]] name = "tower-layer" version = "0.3.2" @@ -4301,7 +4586,7 @@ name = "trace" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.1.4", + "clap 4.2.1", "pageserver_api", "utils", "workspace_hack", @@ -4328,7 +4613,7 @@ checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -4474,15 +4759,15 @@ dependencies = [ [[package]] name = "unicode-bidi" -version = "0.3.10" +version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54675592c1dbefd78cbd98db9bacd89886e1ca50692a0692baefffdeb92dd58" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" [[package]] name = "unicode-ident" -version = "1.0.6" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" +checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" [[package]] name = "unicode-normalization" @@ -4520,10 +4805,10 @@ dependencies = [ "base64 0.13.1", "log", "once_cell", - "rustls", + "rustls 0.20.8", "url", "webpki", - "webpki-roots", + "webpki-roots 0.22.6", ] [[package]] @@ -4550,6 +4835,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + [[package]] name = "utils" version = "0.1.0" @@ -4593,9 +4884,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.3.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1674845326ee10d37ca60470760d4288a6f80f304007d92e5c53bab78c9cfd79" +checksum = "5b55a3fef2a1e3b3a00ce878640918820d3c51081576ac657d23af9fc7928fdb" dependencies = [ "getrandom", "serde", @@ -4613,12 +4904,18 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + [[package]] name = "wal_craft" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.1.4", + "clap 4.2.1", "env_logger", "log", "once_cell", @@ -4630,12 +4927,11 @@ dependencies = [ [[package]] name = "walkdir" -version = "2.3.2" +version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" dependencies = [ "same-file", - "winapi", "winapi-util", ] @@ -4676,7 +4972,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 1.0.109", "wasm-bindgen-shared", ] @@ -4710,7 +5006,7 @@ checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4750,6 +5046,15 @@ dependencies = [ "webpki", ] +[[package]] +name = "webpki-roots" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa54963694b65584e170cf5dc46aeb4dcaa5584e652ff5f3952e56d66aff0125" +dependencies = [ + "rustls-webpki", +] + [[package]] name = "which" version = "4.4.0" @@ -4792,19 +5097,28 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets 0.48.0", +] + [[package]] name = "windows-sys" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", ] [[package]] @@ -4813,65 +5127,140 @@ version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" dependencies = [ - "windows-targets", + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.0", ] [[package]] name = "windows-targets" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-targets" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" +dependencies = [ + "windows_aarch64_gnullvm 0.48.0", + "windows_aarch64_msvc 0.48.0", + "windows_i686_gnu 0.48.0", + "windows_i686_msvc 0.48.0", + "windows_x86_64_gnu 0.48.0", + "windows_x86_64_gnullvm 0.48.0", + "windows_x86_64_msvc 0.48.0", ] [[package]] name = "windows_aarch64_gnullvm" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" [[package]] name = "windows_aarch64_msvc" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" [[package]] name = "windows_i686_gnu" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" [[package]] name = "windows_i686_msvc" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" [[package]] name = "windows_x86_64_gnu" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" [[package]] name = "windows_x86_64_gnullvm" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" [[package]] name = "windows_x86_64_msvc" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" + +[[package]] +name = "winnow" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae8970b36c66498d8ff1d66685dc86b91b29db0c7739899012f63a63814b4b28" +dependencies = [ + "memchr", +] [[package]] name = "winreg" @@ -4890,7 +5279,8 @@ dependencies = [ "byteorder", "bytes", "chrono", - "clap 4.1.4", + "clap 4.2.1", + "clap_builder", "crossbeam-utils", "digest", "either", @@ -4902,7 +5292,6 @@ dependencies = [ "futures-sink", "futures-util", "hashbrown 0.12.3", - "indexmap", "itertools", "libc", "log", @@ -4917,16 +5306,18 @@ dependencies = [ "regex-syntax", "reqwest", "ring", - "rustls", + "rustls 0.20.8", "scopeguard", "serde", "serde_json", - "socket2", - "syn", + "socket2 0.4.9", + "syn 1.0.109", + "syn 2.0.14", "tokio", - "tokio-rustls", + "tokio-rustls 0.23.4", "tokio-util", - "tonic", + "toml_datetime", + "toml_edit", "tower", "tracing", "tracing-core", @@ -4936,12 +5327,11 @@ dependencies = [ [[package]] name = "x509-parser" -version = "0.14.0" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0ecbeb7b67ce215e40e3cc7f2ff902f94a223acf44995934763467e7b1febc8" +checksum = "bab0c2f54ae1d92f4fcb99c0b7ccf0b1e3451cbd395e5f115ccbdbcb18d4f634" dependencies = [ "asn1-rs", - "base64 0.13.1", "data-encoding", "der-parser", "lazy_static", @@ -4969,15 +5359,15 @@ checksum = "4d25c75bf9ea12c4040a97f829154768bbbce366287e2dc044af160cd79a13fd" [[package]] name = "yasna" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aed2e7a52e3744ab4d0c05c20aa065258e84c49fd4226f5191b2ed29712710b4" +checksum = "e17bb3549cc1321ae1296b9cdc2698e2b6cb1992adfa19a8c72e5b7a738f44cd" dependencies = [ "time", ] [[package]] name = "zeroize" -version = "1.5.7" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c394b5bd0c6f669e7275d9c20aa90ae064cb22e75a1cad54e1b34088034b149f" +checksum = "2a0956f1ba7c7909bfb66c2e9e4124ab6f6482560f6628b5aaeba39207c9aad9" diff --git a/Cargo.toml b/Cargo.toml index 679605dc1d..0b545e6190 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,10 +24,10 @@ atty = "0.2.14" aws-config = { version = "0.51.0", default-features = false, features=["rustls"] } aws-sdk-s3 = "0.21.0" aws-smithy-http = "0.51.0" -aws-types = "0.51.0" +aws-types = "0.55" base64 = "0.13.0" bincode = "1.3" -bindgen = "0.61" +bindgen = "0.65" bstr = "1.0" byteorder = "1.4" bytes = "1.0" @@ -50,7 +50,7 @@ git-version = "0.3" hashbrown = "0.13" hashlink = "0.8.1" hex = "0.4" -hex-literal = "0.3" +hex-literal = "0.4" hmac = "0.12.1" hostname = "0.3.1" humantime = "2.1" @@ -80,18 +80,18 @@ reqwest = { version = "0.11", default-features = false, features = ["rustls-tls" reqwest-tracing = { version = "0.4.0", features = ["opentelemetry_0_18"] } reqwest-middleware = "0.2.0" routerify = "3" -rpds = "0.12.0" +rpds = "0.13" rustls = "0.20" rustls-pemfile = "1" rustls-split = "0.3" scopeguard = "1.1" -sentry = { version = "0.29", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] } +sentry = { version = "0.30", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] } serde = { version = "1.0", features = ["derive"] } serde_json = "1" serde_with = "2.0" sha2 = "0.10.2" signal-hook = "0.3" -socket2 = "0.4.4" +socket2 = "0.5" strum = "0.24" strum_macros = "0.24" svg_fmt = "0.4.1" @@ -106,17 +106,17 @@ tokio-postgres-rustls = "0.9.0" tokio-rustls = "0.23" tokio-stream = "0.1" tokio-util = { version = "0.7", features = ["io"] } -toml = "0.5" -toml_edit = { version = "0.17", features = ["easy"] } -tonic = {version = "0.8", features = ["tls", "tls-roots"]} +toml = "0.7" +toml_edit = "0.19" +tonic = {version = "0.9", features = ["tls", "tls-roots"]} tracing = "0.1" tracing-opentelemetry = "0.18.0" tracing-subscriber = { version = "0.3", features = ["env-filter"] } url = "2.2" uuid = { version = "1.2", features = ["v4", "serde"] } walkdir = "2.3.2" -webpki-roots = "0.22.5" -x509-parser = "0.14" +webpki-roots = "0.23" +x509-parser = "0.15" ## TODO replace this with tracing env_logger = "0.10" @@ -154,9 +154,9 @@ workspace_hack = { version = "0.1", path = "./workspace_hack/" } ## Build dependencies criterion = "0.4" rcgen = "0.10" -rstest = "0.16" +rstest = "0.17" tempfile = "3.4" -tonic-build = "0.8" +tonic-build = "0.9" # This is only needed for proxy's tests. # TODO: we should probably fork `tokio-postgres-rustls` instead. diff --git a/libs/consumption_metrics/Cargo.toml b/libs/consumption_metrics/Cargo.toml index f26aa2fbc5..3f290821c2 100644 --- a/libs/consumption_metrics/Cargo.toml +++ b/libs/consumption_metrics/Cargo.toml @@ -4,13 +4,12 @@ version = "0.1.0" edition = "2021" license = "Apache-2.0" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] -anyhow = "1.0.68" -chrono = { version = "0.4", default-features = false, features = ["clock", "serde"] } -rand = "0.8.3" -serde = "1.0.152" -serde_with = "2.1.0" -utils = { version = "0.1.0", path = "../utils" } -workspace_hack = { version = "0.1.0", path = "../../workspace_hack" } +anyhow.workspace = true +chrono.workspace = true +rand.workspace = true +serde.workspace = true +serde_with.workspace = true +utils.workspace = true + +workspace_hack.workspace = true diff --git a/libs/postgres_ffi/build.rs b/libs/postgres_ffi/build.rs index 66221af522..f7e39751ef 100644 --- a/libs/postgres_ffi/build.rs +++ b/libs/postgres_ffi/build.rs @@ -5,7 +5,7 @@ use std::path::PathBuf; use std::process::Command; use anyhow::{anyhow, Context}; -use bindgen::callbacks::ParseCallbacks; +use bindgen::callbacks::{DeriveInfo, ParseCallbacks}; #[derive(Debug)] struct PostgresFfiCallbacks; @@ -20,7 +20,7 @@ impl ParseCallbacks for PostgresFfiCallbacks { // Add any custom #[derive] attributes to the data structures that bindgen // creates. - fn add_derives(&self, name: &str) -> Vec { + fn add_derives(&self, derive_info: &DeriveInfo) -> Vec { // This is the list of data structures that we want to serialize/deserialize. let serde_list = [ "XLogRecord", @@ -31,7 +31,7 @@ impl ParseCallbacks for PostgresFfiCallbacks { "ControlFileData", ]; - if serde_list.contains(&name) { + if serde_list.contains(&derive_info.name) { vec![ "Default".into(), // Default allows us to easily fill the padding fields with 0. "Serialize".into(), diff --git a/libs/remote_storage/tests/pagination_tests.rs b/libs/remote_storage/tests/pagination_tests.rs index eb52409c44..048e99d841 100644 --- a/libs/remote_storage/tests/pagination_tests.rs +++ b/libs/remote_storage/tests/pagination_tests.rs @@ -204,12 +204,7 @@ async fn upload_s3_data( let data = format!("remote blob data {i}").into_bytes(); let data_len = data.len(); task_client - .upload( - Box::new(std::io::Cursor::new(data)), - data_len, - &blob_path, - None, - ) + .upload(std::io::Cursor::new(data), data_len, &blob_path, None) .await?; Ok::<_, anyhow::Error>((blob_prefix, blob_path)) diff --git a/libs/tracing-utils/Cargo.toml b/libs/tracing-utils/Cargo.toml index 8c3d3f9063..b285c9b5b0 100644 --- a/libs/tracing-utils/Cargo.toml +++ b/libs/tracing-utils/Cargo.toml @@ -14,4 +14,5 @@ tokio = { workspace = true, features = ["rt", "rt-multi-thread"] } tracing.workspace = true tracing-opentelemetry.workspace = true tracing-subscriber.workspace = true -workspace_hack = { version = "0.1", path = "../../workspace_hack" } + +workspace_hack.workspace = true diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml index 391bc52a80..dc6326e73e 100644 --- a/libs/utils/Cargo.toml +++ b/libs/utils/Cargo.toml @@ -33,7 +33,7 @@ serde_with.workspace = true strum.workspace = true strum_macros.workspace = true url.workspace = true -uuid = { version = "1.2", features = ["v4", "serde"] } +uuid.workspace = true metrics.workspace = true workspace_hack.workspace = true diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 19f0f22815..0c87e208c8 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -725,8 +725,9 @@ impl PageServerConf { "disk_usage_based_eviction" => { tracing::info!("disk_usage_based_eviction: {:#?}", &item); builder.disk_usage_based_eviction( - toml_edit::de::from_item(item.clone()) - .context("parse disk_usage_based_eviction")?) + deserialize_from_item_string("disk_usage_based_eviction", item) + .context("parse disk_usage_based_eviction")? + ) }, "ondemand_download_behavior_treat_error_as_warn" => builder.ondemand_download_behavior_treat_error_as_warn(parse_toml_bool(key, item)?), _ => bail!("unrecognized pageserver option '{key}'"), @@ -827,14 +828,14 @@ impl PageServerConf { if let Some(eviction_policy) = item.get("eviction_policy") { t_conf.eviction_policy = Some( - toml_edit::de::from_item(eviction_policy.clone()) + deserialize_from_item_string("eviction_policy", eviction_policy) .context("parse eviction_policy")?, ); } if let Some(item) = item.get("min_resident_size_override") { t_conf.min_resident_size_override = Some( - toml_edit::de::from_item(item.clone()) + deserialize_from_item_string("min_resident_size_override", item) .context("parse min_resident_size_override")?, ); } @@ -938,6 +939,19 @@ where }) } +fn deserialize_from_item_string(name: &str, item: &Item) -> anyhow::Result +where + T: serde::de::DeserializeOwned, +{ + // ValueDeserializer::new is not public, so use the ValueDeserializer's documented way + let item_string = item.to_string(); + let deserializer = item_string + .trim() + .parse::() + .with_context(|| format!("parsing item for node {name} as ValueDeserializer"))?; + T::deserialize(deserializer).with_context(|| format!("deserializing item for node {name}")) +} + /// Configurable semaphore permits setting. /// /// Does not allow semaphore permits to be zero, because at runtime initially zero permits and empty diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index c0e4a2a9cf..bd38a7a2f3 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -65,7 +65,7 @@ fn copyin_stream(pgb: &mut PostgresBackendTCP) -> impl Stream { // We were requested to shut down. - let msg = format!("pageserver is shutting down"); + let msg = "pageserver is shutting down".to_string(); let _ = pgb.write_message_noflush(&BeMessage::ErrorResponse(&msg, None)); Err(QueryError::Other(anyhow::anyhow!(msg))) } diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 03a4ff8c8e..67bc1b36b0 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -1876,7 +1876,7 @@ impl Tenant { .to_string(); // Convert the config to a toml file. - conf_content += &toml_edit::easy::to_string(&tenant_conf)?; + conf_content += &toml_edit::ser::to_string(&tenant_conf)?; let mut target_config_file = VirtualFile::open_with_options( target_config_path, diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs index cdabb23a7b..9b719db180 100644 --- a/pageserver/src/tenant/config.rs +++ b/pageserver/src/tenant/config.rs @@ -275,9 +275,9 @@ mod tests { ..TenantConfOpt::default() }; - let toml_form = toml_edit::easy::to_string(&small_conf).unwrap(); + let toml_form = toml_edit::ser::to_string(&small_conf).unwrap(); assert_eq!(toml_form, "gc_horizon = 42\n"); - assert_eq!(small_conf, toml_edit::easy::from_str(&toml_form).unwrap()); + assert_eq!(small_conf, toml_edit::de::from_str(&toml_form).unwrap()); let json_form = serde_json::to_string(&small_conf).unwrap(); assert_eq!(json_form, "{\"gc_horizon\":42}"); diff --git a/pageserver/src/tenant/remote_timeline_client/upload.rs b/pageserver/src/tenant/remote_timeline_client/upload.rs index ce9f4d9bf8..699121ccd9 100644 --- a/pageserver/src/tenant/remote_timeline_client/upload.rs +++ b/pageserver/src/tenant/remote_timeline_client/upload.rs @@ -74,7 +74,7 @@ pub(super) async fn upload_timeline_layer<'a>( })?; storage - .upload(Box::new(source_file), fs_size, &storage_path, None) + .upload(source_file, fs_size, &storage_path, None) .await .with_context(|| { format!( diff --git a/storage_broker/src/bin/storage_broker.rs b/storage_broker/src/bin/storage_broker.rs index d7ace28426..de7b634ba0 100644 --- a/storage_broker/src/bin/storage_broker.rs +++ b/storage_broker/src/bin/storage_broker.rs @@ -23,7 +23,6 @@ use std::convert::Infallible; use std::net::SocketAddr; use std::pin::Pin; use std::sync::Arc; -use std::task::Poll; use std::time::Duration; use tokio::sync::broadcast; use tokio::sync::broadcast::error::RecvError; @@ -374,7 +373,7 @@ impl BrokerService for Broker { Ok(info) => yield info, Err(RecvError::Lagged(skipped_msg)) => { missed_msgs += skipped_msg; - if let Poll::Ready(_) = futures::poll!(Box::pin(warn_interval.tick())) { + if (futures::poll!(Box::pin(warn_interval.tick()))).is_ready() { warn!("subscription id={}, key={:?} addr={:?} dropped {} messages, channel is full", subscriber.id, subscriber.key, subscriber.remote_addr, missed_msgs); missed_msgs = 0; diff --git a/trace/Cargo.toml b/trace/Cargo.toml index 6ced992d4c..d6eed3f49c 100644 --- a/trace/Cargo.toml +++ b/trace/Cargo.toml @@ -4,8 +4,6 @@ version = "0.1.0" edition.workspace = true license.workspace = true -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] clap.workspace = true anyhow.workspace = true diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml index f885f4a94d..f735ffed4c 100644 --- a/workspace_hack/Cargo.toml +++ b/workspace_hack/Cargo.toml @@ -18,6 +18,7 @@ byteorder = { version = "1" } bytes = { version = "1", features = ["serde"] } chrono = { version = "0.4", default-features = false, features = ["clock", "serde"] } clap = { version = "4", features = ["derive", "string"] } +clap_builder = { version = "4", default-features = false, features = ["color", "help", "std", "string", "suggestions", "usage"] } crossbeam-utils = { version = "0.8" } digest = { version = "0.10", features = ["mac", "std"] } either = { version = "1" } @@ -29,7 +30,6 @@ futures-executor = { version = "0.3" } futures-sink = { version = "0.3" } futures-util = { version = "0.3", features = ["channel", "io", "sink"] } hashbrown = { version = "0.12", features = ["raw"] } -indexmap = { version = "1", default-features = false, features = ["std"] } itertools = { version = "0.10" } libc = { version = "0.2", features = ["extra_traits"] } log = { version = "0.4", default-features = false, features = ["std"] } @@ -52,7 +52,8 @@ socket2 = { version = "0.4", default-features = false, features = ["all"] } tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "net", "process", "rt-multi-thread", "signal", "sync", "time"] } tokio-rustls = { version = "0.23" } tokio-util = { version = "0.7", features = ["codec", "io"] } -tonic = { version = "0.8", features = ["tls-roots"] } +toml_datetime = { version = "0.6", default-features = false, features = ["serde"] } +toml_edit = { version = "0.19", features = ["serde"] } tower = { version = "0.4", features = ["balance", "buffer", "limit", "retry", "timeout", "util"] } tracing = { version = "0.1", features = ["log"] } tracing-core = { version = "0.1" } @@ -64,7 +65,6 @@ anyhow = { version = "1", features = ["backtrace"] } bytes = { version = "1", features = ["serde"] } either = { version = "1" } hashbrown = { version = "0.12", features = ["raw"] } -indexmap = { version = "1", default-features = false, features = ["std"] } itertools = { version = "0.10" } libc = { version = "0.2", features = ["extra_traits"] } log = { version = "0.4", default-features = false, features = ["std"] } @@ -74,6 +74,7 @@ prost = { version = "0.11" } regex = { version = "1" } regex-syntax = { version = "0.6" } serde = { version = "1", features = ["alloc", "derive"] } -syn = { version = "1", features = ["extra-traits", "full", "visit", "visit-mut"] } +syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-traits", "full", "visit", "visit-mut"] } +syn-f595c2ba2a3f28df = { package = "syn", version = "2", features = ["extra-traits", "full", "visit-mut"] } ### END HAKARI SECTION From 8d295780cb833848f1d4b97bee166cbf80b7d9bd Mon Sep 17 00:00:00 2001 From: Sam Gaw Date: Mon, 10 Apr 2023 17:07:43 +0100 Subject: [PATCH 10/77] Add support for ip4r extension --- Dockerfile.compute-node | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Dockerfile.compute-node b/Dockerfile.compute-node index 3473487444..7c64951fa5 100644 --- a/Dockerfile.compute-node +++ b/Dockerfile.compute-node @@ -256,6 +256,21 @@ RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgta make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgtap.control +######################################################################################### +# +# Layer "ip4r-pg-build" +# compile ip4r extension +# +######################################################################################### +FROM build-deps AS ip4r-pg-build +COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ + +RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.1.tar.gz -O ip4r.tar.gz && \ + mkdir ip4r-src && cd ip4r-src && tar xvzf ../ip4r.tar.gz --strip-components=1 -C . && \ + make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ + make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ + echo 'trusted = true' >> /usr/local/pgsql/share/extension/ip4r.control + ######################################################################################### # # Layer "prefix-pg-build" @@ -423,6 +438,7 @@ COPY --from=hypopg-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-hashids-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=rum-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pgtap-pg-build /usr/local/pgsql/ /usr/local/pgsql/ +COPY --from=ip4r-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=prefix-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=hll-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=plpgsql-check-pg-build /usr/local/pgsql/ /usr/local/pgsql/ From 218062cebade6dbb68c44a06e022769bf301289c Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Fri, 7 Apr 2023 12:04:06 +0100 Subject: [PATCH 11/77] GitHub Workflows: use ref_name instead of ref --- .github/workflows/benchmarking.yml | 12 ++++++------ .github/workflows/build_and_test.yml | 4 ++-- .github/workflows/neon_extra_builds.yml | 2 +- .github/workflows/pg_clients.yml | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index 028fe8d8ad..4f3ff15364 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -30,7 +30,7 @@ defaults: concurrency: # Allow only one workflow per any non-`main` branch. - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }} + group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }} cancel-in-progress: true jobs: @@ -42,7 +42,7 @@ jobs: DEFAULT_PG_VERSION: 14 TEST_OUTPUT: /tmp/test_output BUILD_TYPE: remote - SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }} + SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }} PLATFORM: "neon-staging" runs-on: [ self-hosted, us-east-2, x64 ] @@ -174,7 +174,7 @@ jobs: DEFAULT_PG_VERSION: 14 TEST_OUTPUT: /tmp/test_output BUILD_TYPE: remote - SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }} + SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }} PLATFORM: ${{ matrix.platform }} runs-on: [ self-hosted, us-east-2, x64 ] @@ -317,7 +317,7 @@ jobs: DEFAULT_PG_VERSION: 14 TEST_OUTPUT: /tmp/test_output BUILD_TYPE: remote - SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }} + SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }} PLATFORM: ${{ matrix.platform }} runs-on: [ self-hosted, us-east-2, x64 ] @@ -413,7 +413,7 @@ jobs: DEFAULT_PG_VERSION: 14 TEST_OUTPUT: /tmp/test_output BUILD_TYPE: remote - SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }} + SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }} PLATFORM: ${{ matrix.platform }} runs-on: [ self-hosted, us-east-2, x64 ] @@ -503,7 +503,7 @@ jobs: DEFAULT_PG_VERSION: 14 TEST_OUTPUT: /tmp/test_output BUILD_TYPE: remote - SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }} + SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }} PLATFORM: ${{ matrix.platform }} runs-on: [ self-hosted, us-east-2, x64 ] diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index c096aef4a9..691320324e 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -13,7 +13,7 @@ defaults: concurrency: # Allow only one workflow per any non-`main` branch. - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }} + group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }} cancel-in-progress: true env: @@ -368,7 +368,7 @@ jobs: build_type: ${{ matrix.build_type }} test_selection: performance run_in_parallel: false - save_perf_report: ${{ github.ref == 'refs/heads/main' }} + save_perf_report: ${{ github.ref_name == 'main' }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" diff --git a/.github/workflows/neon_extra_builds.yml b/.github/workflows/neon_extra_builds.yml index ef4c293e31..1196881541 100644 --- a/.github/workflows/neon_extra_builds.yml +++ b/.github/workflows/neon_extra_builds.yml @@ -12,7 +12,7 @@ defaults: concurrency: # Allow only one workflow per any non-`main` branch. - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }} + group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }} cancel-in-progress: true env: diff --git a/.github/workflows/pg_clients.yml b/.github/workflows/pg_clients.yml index 9f57519589..224b7b4a6d 100644 --- a/.github/workflows/pg_clients.yml +++ b/.github/workflows/pg_clients.yml @@ -14,7 +14,7 @@ on: concurrency: # Allow only one workflow per any non-`main` branch. - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }} + group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }} cancel-in-progress: true jobs: From c94b8998bedb61a7fda5c910412f067afb0d4e57 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Fri, 7 Apr 2023 12:23:08 +0100 Subject: [PATCH 12/77] GitHub Workflows: print error messages to stderr --- .github/actions/allure-report/action.yml | 4 ++-- .github/actions/download/action.yml | 2 +- .github/actions/neon-branch-create/action.yml | 4 ++-- .github/actions/neon-branch-delete/action.yml | 2 +- .github/actions/upload/action.yml | 6 +++--- .github/workflows/benchmarking.yml | 8 ++++---- .github/workflows/build_and_test.yml | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/actions/allure-report/action.yml b/.github/actions/allure-report/action.yml index e35cbb20fd..9a1037064a 100644 --- a/.github/actions/allure-report/action.yml +++ b/.github/actions/allure-report/action.yml @@ -45,12 +45,12 @@ runs: shell: bash -euxo pipefail {0} run: | if [ "${{ inputs.action }}" != "store" ] && [ "${{ inputs.action }}" != "generate" ]; then - echo 2>&1 "Unknown inputs.action type '${{ inputs.action }}'; allowed 'generate' or 'store' only" + echo >&2 "Unknown inputs.action type '${{ inputs.action }}'; allowed 'generate' or 'store' only" exit 1 fi if [ -z "${{ inputs.test_selection }}" ] && [ "${{ inputs.action }}" == "store" ]; then - echo 2>&1 "inputs.test_selection must be set for 'store' action" + echo >&2 "inputs.test_selection must be set for 'store' action" exit 2 fi diff --git a/.github/actions/download/action.yml b/.github/actions/download/action.yml index eb34d4206a..d3f9bc0414 100644 --- a/.github/actions/download/action.yml +++ b/.github/actions/download/action.yml @@ -37,7 +37,7 @@ runs: echo 'SKIPPED=true' >> $GITHUB_OUTPUT exit 0 else - echo 2>&1 "Neither s3://${BUCKET}/${PREFIX}/${FILENAME} nor its version from previous attempts exist" + echo >&2 "Neither s3://${BUCKET}/${PREFIX}/${FILENAME} nor its version from previous attempts exist" exit 1 fi fi diff --git a/.github/actions/neon-branch-create/action.yml b/.github/actions/neon-branch-create/action.yml index 7ee43a3587..f1eea34ab9 100644 --- a/.github/actions/neon-branch-create/action.yml +++ b/.github/actions/neon-branch-create/action.yml @@ -58,7 +58,7 @@ runs: done if [ -z "${branch_id}" ] || [ "${branch_id}" == "null" ]; then - echo 2>&1 "Failed to create branch after 10 attempts, the latest response was: ${branch}" + echo >&2 "Failed to create branch after 10 attempts, the latest response was: ${branch}" exit 1 fi @@ -122,7 +122,7 @@ runs: done if [ -z "${password}" ] || [ "${password}" == "null" ]; then - echo 2>&1 "Failed to reset password after 10 attempts, the latest response was: ${reset_password}" + echo >&2 "Failed to reset password after 10 attempts, the latest response was: ${reset_password}" exit 1 fi diff --git a/.github/actions/neon-branch-delete/action.yml b/.github/actions/neon-branch-delete/action.yml index 5689093e2e..f8cd351dd9 100644 --- a/.github/actions/neon-branch-delete/action.yml +++ b/.github/actions/neon-branch-delete/action.yml @@ -48,7 +48,7 @@ runs: done if [ -z "${branch_id}" ] || [ "${branch_id}" == "null" ]; then - echo 2>&1 "Failed to delete branch after 10 attempts, the latest response was: ${deleted_branch}" + echo >&2 "Failed to delete branch after 10 attempts, the latest response was: ${deleted_branch}" exit 1 fi env: diff --git a/.github/actions/upload/action.yml b/.github/actions/upload/action.yml index 291a2cf3b0..63973dfbe7 100644 --- a/.github/actions/upload/action.yml +++ b/.github/actions/upload/action.yml @@ -23,7 +23,7 @@ runs: mkdir -p $(dirname $ARCHIVE) if [ -f ${ARCHIVE} ]; then - echo 2>&1 "File ${ARCHIVE} already exist. Something went wrong before" + echo >&2 "File ${ARCHIVE} already exist. Something went wrong before" exit 1 fi @@ -33,10 +33,10 @@ runs: elif [ -f ${SOURCE} ]; then time tar -cf ${ARCHIVE} --zstd ${SOURCE} elif ! ls ${SOURCE} > /dev/null 2>&1; then - echo 2>&1 "${SOURCE} does not exist" + echo >&2 "${SOURCE} does not exist" exit 2 else - echo 2>&1 "${SOURCE} is neither a directory nor a file, do not know how to handle it" + echo >&2 "${SOURCE} is neither a directory nor a file, do not know how to handle it" exit 3 fi diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index 4f3ff15364..8471d802bb 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -226,7 +226,7 @@ jobs: CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }} ;; *) - echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-freetier', 'rds-aurora', or 'rds-postgres'" + echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-freetier', 'rds-aurora', or 'rds-postgres'" exit 1 ;; esac @@ -356,7 +356,7 @@ jobs: CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CLICKBENCH_10M_CONNSTR }} ;; *) - echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'" + echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'" exit 1 ;; esac @@ -452,7 +452,7 @@ jobs: CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_TPCH_S10_CONNSTR }} ;; *) - echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'" + echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'" exit 1 ;; esac @@ -542,7 +542,7 @@ jobs: CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_POSTGRES_CONNSTR }} ;; *) - echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'" + echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'" exit 1 ;; esac diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 691320324e..3212b76731 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -1007,7 +1007,7 @@ jobs: S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${OLD_PREFIX} | jq -r '.Contents[].Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true) if [ -z "${S3_KEY}" ]; then - echo 2>&1 "Neither s3://${BUCKET}/${OLD_PREFIX}/${FILENAME} nor its version from previous attempts exist" + echo >&2 "Neither s3://${BUCKET}/${OLD_PREFIX}/${FILENAME} nor its version from previous attempts exist" exit 1 fi From 13e53e5dc8012bf5c2f84d9b7737f1722c5e8f5e Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Wed, 12 Apr 2023 13:26:03 +0100 Subject: [PATCH 13/77] GitHub Workflows: use '!cancelled' instead of 'success or failure' --- .github/actions/run-python-test-set/action.yml | 2 +- .github/workflows/benchmarking.yml | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml index 11f5c78f19..115f555913 100644 --- a/.github/actions/run-python-test-set/action.yml +++ b/.github/actions/run-python-test-set/action.yml @@ -202,7 +202,7 @@ runs: prefix: latest - name: Create Allure report - if: success() || failure() + if: ${{ !cancelled() }} uses: ./.github/actions/allure-report with: action: store diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index 8471d802bb..a5a27e59a8 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -92,7 +92,7 @@ jobs: api_key: ${{ secrets.NEON_STAGING_API_KEY }} - name: Create Allure report - if: success() || failure() + if: ${{ !cancelled() }} uses: ./.github/actions/allure-report with: action: generate @@ -282,7 +282,7 @@ jobs: api_key: ${{ secrets.NEON_STAGING_API_KEY }} - name: Create Allure report - if: success() || failure() + if: ${{ !cancelled() }} uses: ./.github/actions/allure-report with: action: generate @@ -305,7 +305,7 @@ jobs: # # *_CLICKBENCH_CONNSTR: Genuine ClickBench DB with ~100M rows # *_CLICKBENCH_10M_CONNSTR: DB with the first 10M rows of ClickBench DB - if: success() || failure() + if: ${{ !cancelled() }} needs: [ generate-matrices, pgbench-compare ] strategy: @@ -379,7 +379,7 @@ jobs: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} - name: Create Allure report - if: success() || failure() + if: ${{ !cancelled() }} uses: ./.github/actions/allure-report with: action: generate @@ -401,7 +401,7 @@ jobs: # We might change it after https://github.com/neondatabase/neon/issues/2900. # # *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB) - if: success() || failure() + if: ${{ !cancelled() }} needs: [ generate-matrices, clickbench-compare ] strategy: @@ -475,7 +475,7 @@ jobs: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} - name: Create Allure report - if: success() || failure() + if: ${{ !cancelled() }} uses: ./.github/actions/allure-report with: action: generate @@ -491,7 +491,7 @@ jobs: SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} user-examples-compare: - if: success() || failure() + if: ${{ !cancelled() }} needs: [ generate-matrices, tpch-compare ] strategy: @@ -565,7 +565,7 @@ jobs: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} - name: Create Allure report - if: success() || failure() + if: ${{ !cancelled() }} uses: ./.github/actions/allure-report with: action: generate From f7995b3c7054cdbd32ced709ab1f8bbf4a20fce7 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Wed, 12 Apr 2023 17:51:59 +0300 Subject: [PATCH 14/77] Revert "Update most of the dependencies to their latest versions (#3991)" (#4013) This reverts commit a64044a7a9a9f2b32a73b97da1fd230f9b510064. See https://neondb.slack.com/archives/C03H1K0PGKH/p1681306682795559 --- .config/hakari.toml | 2 +- Cargo.lock | 1402 ++++++----------- Cargo.toml | 26 +- libs/consumption_metrics/Cargo.toml | 17 +- libs/postgres_ffi/build.rs | 6 +- libs/remote_storage/tests/pagination_tests.rs | 7 +- libs/tracing-utils/Cargo.toml | 3 +- libs/utils/Cargo.toml | 2 +- pageserver/src/config.rs | 22 +- pageserver/src/page_service.rs | 2 +- pageserver/src/tenant.rs | 2 +- pageserver/src/tenant/config.rs | 4 +- .../tenant/remote_timeline_client/upload.rs | 2 +- storage_broker/src/bin/storage_broker.rs | 3 +- trace/Cargo.toml | 2 + workspace_hack/Cargo.toml | 9 +- 16 files changed, 557 insertions(+), 954 deletions(-) diff --git a/.config/hakari.toml b/.config/hakari.toml index 15b939e86f..12d2d1bf9c 100644 --- a/.config/hakari.toml +++ b/.config/hakari.toml @@ -4,7 +4,7 @@ hakari-package = "workspace_hack" # Format for `workspace-hack = ...` lines in other Cargo.tomls. Requires cargo-hakari 0.9.8 or above. -dep-format-version = "4" +dep-format-version = "3" # Setting workspace.resolver = "2" in the root Cargo.toml is HIGHLY recommended. # Hakari works much better with the new feature resolver. diff --git a/Cargo.lock b/Cargo.lock index 8dde4ebb57..668487a9bd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -63,69 +63,29 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" -[[package]] -name = "anstream" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "342258dd14006105c2b75ab1bd7543a03bdf0cfc94383303ac212a04939dff6f" -dependencies = [ - "anstyle", - "anstyle-parse", - "anstyle-wincon", - "concolor-override", - "concolor-query", - "is-terminal", - "utf8parse", -] - -[[package]] -name = "anstyle" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23ea9e81bd02e310c216d080f6223c179012256e5151c41db88d12c88a1684d2" - -[[package]] -name = "anstyle-parse" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7d1bb534e9efed14f3e5f44e7dd1a4f709384023a4165199a4241e18dff0116" -dependencies = [ - "utf8parse", -] - -[[package]] -name = "anstyle-wincon" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3127af6145b149f3287bb9a0d10ad9c5692dba8c53ad48285e5bec4063834fa" -dependencies = [ - "anstyle", - "windows-sys 0.45.0", -] - [[package]] name = "anyhow" -version = "1.0.70" +version = "1.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7de8ce5e0f9f8d88245311066a578d72b7af3e7088f32783804676302df237e4" +checksum = "2cb2f989d18dd141ab8ae82f64d1a8cdd37e0840f73a406896cf5e99502fab61" dependencies = [ "backtrace", ] [[package]] name = "archery" -version = "0.5.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6cd774058b1b415c4855d8b86436c04bf050c003156fe24bc326fb3fe75c343" +checksum = "0a8da9bc4c4053ee067669762bcaeea6e241841295a2b6c948312dad6ef4cc02" dependencies = [ "static_assertions", ] [[package]] name = "asn1-rs" -version = "0.5.2" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f6fd5ddaf0351dff5b8da21b2fb4ff8e08ddd02857f0bf69c47639106c0fff0" +checksum = "cf6690c370453db30743b373a60ba498fc0d6d83b11f4abfd87a84a075db5dd4" dependencies = [ "asn1-rs-derive", "asn1-rs-impl", @@ -145,7 +105,7 @@ checksum = "726535892e8eae7e70657b4c8ea93d26b8553afb1ce617caee529ef96d7dee6c" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn", "synstructure", ] @@ -157,47 +117,46 @@ checksum = "2777730b2039ac0f95f093556e61b6d26cebed5393ca6f152717777cec3a42ed" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn", ] [[package]] name = "async-stream" -version = "0.3.5" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd56dd203fef61ac097dd65721a419ddccb106b2d2b70ba60a6b529f03961a51" +checksum = "dad5c83079eae9969be7fadefe640a1c566901f05ff91ab221de4b6f68d9507e" dependencies = [ "async-stream-impl", "futures-core", - "pin-project-lite", ] [[package]] name = "async-stream-impl" -version = "0.3.5" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" +checksum = "10f203db73a71dfa2fb6dd22763990fa26f3d2625a6da2da900d23b87d26be27" dependencies = [ "proc-macro2", "quote", - "syn 2.0.14", + "syn", ] [[package]] name = "async-trait" -version = "0.1.68" +version = "0.1.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" +checksum = "1cd7fce9ba8c3c042128ce72d8b2ddbf3a05747efb67ea0313c635e10bda47a2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.14", + "syn", ] [[package]] name = "atomic-polyfill" -version = "1.0.2" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c314e70d181aa6053b26e3f7fbf86d1dfff84f816a6175b967666b3506ef7289" +checksum = "d299f547288d6db8d5c3a2916f7b2f66134b15b8c1ac1c4357dd3b8752af7bb2" dependencies = [ "critical-section", ] @@ -228,13 +187,13 @@ dependencies = [ "aws-http", "aws-sdk-sso", "aws-sdk-sts", - "aws-smithy-async 0.51.0", - "aws-smithy-client 0.51.0", - "aws-smithy-http 0.51.0", - "aws-smithy-http-tower 0.51.0", + "aws-smithy-async", + "aws-smithy-client", + "aws-smithy-http", + "aws-smithy-http-tower", "aws-smithy-json", - "aws-smithy-types 0.51.0", - "aws-types 0.51.0", + "aws-smithy-types", + "aws-types", "bytes", "hex", "http", @@ -247,29 +206,15 @@ dependencies = [ "zeroize", ] -[[package]] -name = "aws-credential-types" -version = "0.55.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77e37e62f59cf3284067337da7467d842df8cfe3f5e5c06487ac7521819cf16d" -dependencies = [ - "aws-smithy-async 0.55.1", - "aws-smithy-types 0.55.1", - "fastrand", - "tokio", - "tracing", - "zeroize", -] - [[package]] name = "aws-endpoint" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ca8f374874f6459aaa88dc861d7f5d834ca1ff97668eae190e97266b5f6c3fb" dependencies = [ - "aws-smithy-http 0.51.0", - "aws-smithy-types 0.51.0", - "aws-types 0.51.0", + "aws-smithy-http", + "aws-smithy-types", + "aws-types", "http", "regex", "tracing", @@ -281,9 +226,9 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78d41e19e779b73463f5f0c21b3aacc995f4ba783ab13a7ae9f5dfb159a551b4" dependencies = [ - "aws-smithy-http 0.51.0", - "aws-smithy-types 0.51.0", - "aws-types 0.51.0", + "aws-smithy-http", + "aws-smithy-types", + "aws-types", "bytes", "http", "http-body", @@ -303,15 +248,15 @@ dependencies = [ "aws-http", "aws-sig-auth", "aws-sigv4", - "aws-smithy-async 0.51.0", + "aws-smithy-async", "aws-smithy-checksums", - "aws-smithy-client 0.51.0", + "aws-smithy-client", "aws-smithy-eventstream", - "aws-smithy-http 0.51.0", - "aws-smithy-http-tower 0.51.0", - "aws-smithy-types 0.51.0", + "aws-smithy-http", + "aws-smithy-http-tower", + "aws-smithy-types", "aws-smithy-xml", - "aws-types 0.51.0", + "aws-types", "bytes", "bytes-utils", "http", @@ -330,13 +275,13 @@ dependencies = [ "aws-endpoint", "aws-http", "aws-sig-auth", - "aws-smithy-async 0.51.0", - "aws-smithy-client 0.51.0", - "aws-smithy-http 0.51.0", - "aws-smithy-http-tower 0.51.0", + "aws-smithy-async", + "aws-smithy-client", + "aws-smithy-http", + "aws-smithy-http-tower", "aws-smithy-json", - "aws-smithy-types 0.51.0", - "aws-types 0.51.0", + "aws-smithy-types", + "aws-types", "bytes", "http", "tokio-stream", @@ -352,14 +297,14 @@ dependencies = [ "aws-endpoint", "aws-http", "aws-sig-auth", - "aws-smithy-async 0.51.0", - "aws-smithy-client 0.51.0", - "aws-smithy-http 0.51.0", - "aws-smithy-http-tower 0.51.0", + "aws-smithy-async", + "aws-smithy-client", + "aws-smithy-http", + "aws-smithy-http-tower", "aws-smithy-query", - "aws-smithy-types 0.51.0", + "aws-smithy-types", "aws-smithy-xml", - "aws-types 0.51.0", + "aws-types", "bytes", "http", "tower", @@ -373,20 +318,20 @@ checksum = "12cbe7b2be9e185c1fbce27fc9c41c66b195b32d89aa099f98768d9544221308" dependencies = [ "aws-sigv4", "aws-smithy-eventstream", - "aws-smithy-http 0.51.0", - "aws-types 0.51.0", + "aws-smithy-http", + "aws-types", "http", "tracing", ] [[package]] name = "aws-sigv4" -version = "0.51.1" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c0b2658d2cb66dbf02f0e8dee80810ef1e0ca3530ede463e0ef994c301087d1" +checksum = "03ff4cff8c4a101962d593ba94e72cd83891aecd423f0c6e3146bff6fb92c9e3" dependencies = [ "aws-smithy-eventstream", - "aws-smithy-http 0.51.0", + "aws-smithy-http", "bytes", "form_urlencoded", "hex", @@ -411,26 +356,14 @@ dependencies = [ "tokio-stream", ] -[[package]] -name = "aws-smithy-async" -version = "0.55.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88573bcfbe1dcfd54d4912846df028b42d6255cbf9ce07be216b1bbfd11fc4b9" -dependencies = [ - "futures-util", - "pin-project-lite", - "tokio", - "tokio-stream", -] - [[package]] name = "aws-smithy-checksums" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc227e36e346f45298288359f37123e1a92628d1cec6b11b5eb335553278bd9e" dependencies = [ - "aws-smithy-http 0.51.0", - "aws-smithy-types 0.51.0", + "aws-smithy-http", + "aws-smithy-types", "bytes", "crc32c", "crc32fast", @@ -450,10 +383,10 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff28d553714f8f54cd921227934fc13a536a1c03f106e56b362fd57e16d450ad" dependencies = [ - "aws-smithy-async 0.51.0", - "aws-smithy-http 0.51.0", - "aws-smithy-http-tower 0.51.0", - "aws-smithy-types 0.51.0", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-http-tower", + "aws-smithy-types", "bytes", "fastrand", "http", @@ -467,33 +400,13 @@ dependencies = [ "tracing", ] -[[package]] -name = "aws-smithy-client" -version = "0.55.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2f52352bae50d3337d5d6151b695d31a8c10ebea113eca5bead531f8301b067" -dependencies = [ - "aws-smithy-async 0.55.1", - "aws-smithy-http 0.55.1", - "aws-smithy-http-tower 0.55.1", - "aws-smithy-types 0.55.1", - "bytes", - "fastrand", - "http", - "http-body", - "pin-project-lite", - "tokio", - "tower", - "tracing", -] - [[package]] name = "aws-smithy-eventstream" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7ea0df7161ce65b5c8ca6eb709a1a907376fa18226976e41c748ce02ccccf24" dependencies = [ - "aws-smithy-types 0.51.0", + "aws-smithy-types", "bytes", "crc32fast", ] @@ -505,7 +418,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf58ed4fefa61dbf038e5421a521cbc2c448ef69deff0ab1d915d8a10eda5664" dependencies = [ "aws-smithy-eventstream", - "aws-smithy-types 0.51.0", + "aws-smithy-types", "bytes", "bytes-utils", "futures-core", @@ -521,49 +434,13 @@ dependencies = [ "tracing", ] -[[package]] -name = "aws-smithy-http" -version = "0.55.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03bcc02d7ed9649d855c8ce4a735e9848d7b8f7568aad0504c158e3baa955df8" -dependencies = [ - "aws-smithy-types 0.55.1", - "bytes", - "bytes-utils", - "futures-core", - "http", - "http-body", - "hyper", - "once_cell", - "percent-encoding", - "pin-project-lite", - "pin-utils", - "tracing", -] - [[package]] name = "aws-smithy-http-tower" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20c96d7bd35e7cf96aca1134b2f81b1b59ffe493f7c6539c051791cbbf7a42d3" dependencies = [ - "aws-smithy-http 0.51.0", - "bytes", - "http", - "http-body", - "pin-project-lite", - "tower", - "tracing", -] - -[[package]] -name = "aws-smithy-http-tower" -version = "0.55.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da88b3a860f65505996c29192d800f1aeb9480440f56d63aad33a3c12045017a" -dependencies = [ - "aws-smithy-http 0.55.1", - "aws-smithy-types 0.55.1", + "aws-smithy-http", "bytes", "http", "http-body", @@ -578,7 +455,7 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8324ba98c8a94187723cc16c37aefa09504646ee65c3d2c3af495bab5ea701b" dependencies = [ - "aws-smithy-types 0.51.0", + "aws-smithy-types", ] [[package]] @@ -587,7 +464,7 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83834ed2ff69ea6f6657baf205267dc2c0abe940703503a3e5d60ce23be3d306" dependencies = [ - "aws-smithy-types 0.51.0", + "aws-smithy-types", "urlencoding", ] @@ -603,19 +480,6 @@ dependencies = [ "time", ] -[[package]] -name = "aws-smithy-types" -version = "0.55.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd0afc731fd1417d791f9145a1e0c30e23ae0beaab9b4814017708ead2fc20f1" -dependencies = [ - "base64-simd", - "itoa", - "num-integer", - "ryu", - "time", -] - [[package]] name = "aws-smithy-xml" version = "0.51.0" @@ -631,37 +495,21 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05701d32da168b44f7ee63147781aed8723e792cc131cb9b18363b5393f17f70" dependencies = [ - "aws-smithy-async 0.51.0", - "aws-smithy-client 0.51.0", - "aws-smithy-http 0.51.0", - "aws-smithy-types 0.51.0", + "aws-smithy-async", + "aws-smithy-client", + "aws-smithy-http", + "aws-smithy-types", "http", "rustc_version", "tracing", "zeroize", ] -[[package]] -name = "aws-types" -version = "0.55.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fb02591b5075d318e0083dcb76df0e151db4ce48f987ecd00e5b53c7a6ba59" -dependencies = [ - "aws-credential-types", - "aws-smithy-async 0.55.1", - "aws-smithy-client 0.55.1", - "aws-smithy-http 0.55.1", - "aws-smithy-types 0.55.1", - "http", - "rustc_version", - "tracing", -] - [[package]] name = "axum" -version = "0.6.15" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b32c5ea3aabaf4deb5f5ced2d688ec0844c881c9e6c696a8b769a05fc691e62" +checksum = "e5694b64066a2459918d8074c2ce0d5a88f409431994c2356617c8ae0c4721fc" dependencies = [ "async-trait", "axum-core", @@ -681,15 +529,16 @@ dependencies = [ "serde", "sync_wrapper", "tower", + "tower-http", "tower-layer", "tower-service", ] [[package]] name = "axum-core" -version = "0.3.4" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c" +checksum = "1cae3e661676ffbacb30f1a824089a8c9150e71017f7e1e38f2aa32009188d34" dependencies = [ "async-trait", "bytes", @@ -735,16 +584,6 @@ version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" -[[package]] -name = "base64-simd" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" -dependencies = [ - "outref", - "vsimd", -] - [[package]] name = "bincode" version = "1.3.3" @@ -756,9 +595,9 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.65.1" +version = "0.61.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" +checksum = "8a022e58a142a46fea340d68012b9201c094e93ec3d033a944a24f8fd4a4f09a" dependencies = [ "bitflags", "cexpr", @@ -767,13 +606,12 @@ dependencies = [ "lazycell", "log", "peeking_take_while", - "prettyplease 0.2.4", "proc-macro2", "quote", "regex", "rustc-hash", "shlex", - "syn 2.0.14", + "syn", "which", ] @@ -785,18 +623,18 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "block-buffer" -version = "0.10.4" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +checksum = "69cce20737498f97b993470a6e536b8523f0af7892a4f928cceb1ac5e52ebe7e" dependencies = [ "generic-array", ] [[package]] name = "bstr" -version = "1.4.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d4260bcc2e8fc9df1eac4919a720effeb63a3f0952f5bf4944adfa18897f09" +checksum = "b7f0778972c64420fdedc63f09919c8a88bda7b25135357fd25a5d9f3257e832" dependencies = [ "memchr", "once_cell", @@ -864,9 +702,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.24" +version = "0.4.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e3c5919066adf22df73762e50cffcde3a758f2a848b113b586d1f86728b673b" +checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f" dependencies = [ "iana-time-zone", "num-integer", @@ -904,9 +742,9 @@ dependencies = [ [[package]] name = "clang-sys" -version = "1.6.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f" +checksum = "fa2e27ae6ab525c3d369ded447057bca5438d86dc3a68f6faafb8269ba82ebf3" dependencies = [ "glob", "libc", @@ -927,38 +765,30 @@ dependencies = [ [[package]] name = "clap" -version = "4.2.1" +version = "4.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "046ae530c528f252094e4a77886ee1374437744b2bff1497aa898bbddbbb29b3" +checksum = "f13b9c79b5d1dd500d20ef541215a6423c75829ef43117e1b4d17fd8af0b5d76" dependencies = [ - "clap_builder", - "clap_derive", - "once_cell", -] - -[[package]] -name = "clap_builder" -version = "4.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "223163f58c9a40c3b0a43e1c4b50a9ce09f007ea2cb1ec258a687945b4b7929f" -dependencies = [ - "anstream", - "anstyle", "bitflags", - "clap_lex 0.4.1", + "clap_derive", + "clap_lex 0.3.1", + "is-terminal", + "once_cell", "strsim", + "termcolor", ] [[package]] name = "clap_derive" -version = "4.2.0" +version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9644cd56d6b87dbe899ef8b053e331c0637664e9e21a33dfcdc36093f5c5c4" +checksum = "684a277d672e91966334af371f1a7b5833f9aa00b07c84e92fbce95e00208ce8" dependencies = [ "heck", + "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.14", + "syn", ] [[package]] @@ -972,9 +802,12 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.4.1" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1" +checksum = "783fe232adfca04f90f56201b26d79682d4cd2625e0bc7290b95123afe558ade" +dependencies = [ + "os_str_bytes", +] [[package]] name = "close_fds" @@ -1026,7 +859,7 @@ version = "0.1.0" dependencies = [ "anyhow", "chrono", - "clap 4.2.1", + "clap 4.1.4", "compute_api", "futures", "hyper", @@ -1050,21 +883,6 @@ dependencies = [ "workspace_hack", ] -[[package]] -name = "concolor-override" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a855d4a1978dc52fb0536a04d384c2c0c1aa273597f08b77c8c4d3b2eec6037f" - -[[package]] -name = "concolor-query" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88d11d52c3d7ca2e6d0040212be9e4dbbcd78b6447f535b6b561f449427944cf" -dependencies = [ - "windows-sys 0.45.0", -] - [[package]] name = "const_format" version = "0.2.30" @@ -1103,7 +921,7 @@ name = "control_plane" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.2.1", + "clap 4.1.4", "comfy-table", "git-version", "nix", @@ -1139,15 +957,15 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.4" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" [[package]] name = "cpufeatures" -version = "0.2.6" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181" +checksum = "28d997bd5e24a5928dd43e46dc529867e207907fe0b239c3477d924f7f2ca320" dependencies = [ "libc", ] @@ -1214,9 +1032,9 @@ checksum = "6548a0ad5d2549e111e1f6a11a6c2e2d00ce6a3dafe22948d67c2b443f775e52" [[package]] name = "crossbeam-channel" -version = "0.5.8" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" +checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" dependencies = [ "cfg-if", "crossbeam-utils", @@ -1224,9 +1042,9 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.3" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" dependencies = [ "cfg-if", "crossbeam-epoch", @@ -1235,22 +1053,22 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.14" +version = "0.9.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695" +checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a" dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", - "memoffset 0.8.0", + "memoffset 0.7.1", "scopeguard", ] [[package]] name = "crossbeam-utils" -version = "0.8.15" +version = "0.8.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b" +checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f" dependencies = [ "cfg-if", ] @@ -1292,9 +1110,9 @@ dependencies = [ [[package]] name = "cxx" -version = "1.0.94" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f61f1b6389c3fe1c316bf8a4dccc90a38208354b330925bce1f74a6c4756eb93" +checksum = "bc831ee6a32dd495436e317595e639a587aa9907bef96fe6e6abc290ab6204e9" dependencies = [ "cc", "cxxbridge-flags", @@ -1304,9 +1122,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.94" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12cee708e8962df2aeb38f594aae5d827c022b6460ac71a7a3e2c3c2aae5a07b" +checksum = "94331d54f1b1a8895cd81049f7eaaaef9d05a7dcb4d1fd08bf3ff0806246789d" dependencies = [ "cc", "codespan-reporting", @@ -1314,31 +1132,31 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn 2.0.14", + "syn", ] [[package]] name = "cxxbridge-flags" -version = "1.0.94" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7944172ae7e4068c533afbb984114a56c46e9ccddda550499caa222902c7f7bb" +checksum = "48dcd35ba14ca9b40d6e4b4b39961f23d835dbb8eed74565ded361d93e1feb8a" [[package]] name = "cxxbridge-macro" -version = "1.0.94" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2345488264226bf682893e25de0769f3360aac9957980ec49361b083ddaa5bc5" +checksum = "81bbeb29798b407ccd82a3324ade1a7286e0d29851475990b612670f6f5124d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.14", + "syn", ] [[package]] name = "darling" -version = "0.14.4" +version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" +checksum = "b0dd3cd20dc6b5a876612a6e5accfe7f3dd883db6d07acfbf14c128f61550dfa" dependencies = [ "darling_core", "darling_macro", @@ -1346,27 +1164,27 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.14.4" +version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" +checksum = "a784d2ccaf7c98501746bf0be29b2022ba41fd62a2e622af997a03e9f972859f" dependencies = [ "fnv", "ident_case", "proc-macro2", "quote", "strsim", - "syn 1.0.109", + "syn", ] [[package]] name = "darling_macro" -version = "0.14.4" +version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" +checksum = "7618812407e9402654622dd402b0a89dff9ba93badd6540781526117b92aab7e" dependencies = [ "darling_core", "quote", - "syn 1.0.109", + "syn", ] [[package]] @@ -1400,9 +1218,9 @@ dependencies = [ [[package]] name = "der-parser" -version = "8.2.0" +version = "8.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbd676fbbab537128ef0278adb5576cf363cff6aa22a7b24effe97347cfab61e" +checksum = "42d4bc9b0db0a0df9ae64634ac5bdefb7afcb534e182275ca0beadbe486701c1" dependencies = [ "asn1-rs", "displaydoc", @@ -1431,7 +1249,7 @@ checksum = "3bf95dc3f046b9da4f2d51833c0d3547d8564ef6910f5c1ed130306a75b92886" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn", ] [[package]] @@ -1451,9 +1269,9 @@ dependencies = [ [[package]] name = "enum-map" -version = "2.5.0" +version = "2.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "988f0d17a0fa38291e5f41f71ea8d46a5d5497b9054d5a759fae2cbb819f2356" +checksum = "50c25992259941eb7e57b936157961b217a4fc8597829ddef0596d6c3cd86e1a" dependencies = [ "enum-map-derive", ] @@ -1466,7 +1284,7 @@ checksum = "2a4da76b3b6116d758c7ba93f7ec6a35d2e2cf24feda76c6e38a375f4d5c59f2" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn", ] [[package]] @@ -1487,7 +1305,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 1.0.109", + "syn", ] [[package]] @@ -1505,13 +1323,13 @@ dependencies = [ [[package]] name = "errno" -version = "0.3.1" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" +checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" dependencies = [ "errno-dragonfly", "libc", - "windows-sys 0.48.0", + "winapi", ] [[package]] @@ -1543,23 +1361,23 @@ checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" [[package]] name = "fastrand" -version = "1.9.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" +checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499" dependencies = [ "instant", ] [[package]] name = "filetime" -version = "0.2.21" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cbc844cecaee9d4443931972e1289c8ff485cb4cc2767cb03ca139ed6885153" +checksum = "4e884668cd0c7480504233e951174ddc3b382f7c2666e3b7310b5c4e7b0c37f9" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.2.16", - "windows-sys 0.48.0", + "redox_syscall", + "windows-sys 0.42.0", ] [[package]] @@ -1604,9 +1422,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.28" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" +checksum = "13e2792b0ff0340399d58445b88fd9770e3489eff258a4cbc1523418f12abf84" dependencies = [ "futures-channel", "futures-core", @@ -1619,9 +1437,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.28" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" +checksum = "2e5317663a9089767a1ec00a487df42e0ca174b61b4483213ac24448e4664df5" dependencies = [ "futures-core", "futures-sink", @@ -1629,15 +1447,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.28" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" +checksum = "ec90ff4d0fe1f57d600049061dc6bb68ed03c7d2fbd697274c41805dcb3f8608" [[package]] name = "futures-executor" -version = "0.3.28" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" +checksum = "e8de0a35a6ab97ec8869e32a2473f4b1324459e14c29275d14b10cb1fd19b50e" dependencies = [ "futures-core", "futures-task", @@ -1646,32 +1464,32 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.28" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" +checksum = "bfb8371b6fb2aeb2d280374607aeabfc99d95c72edfe51692e42d3d7f0d08531" [[package]] name = "futures-macro" -version = "0.3.28" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" +checksum = "95a73af87da33b5acf53acfebdc339fe592ecf5357ac7c0a7734ab9d8c876a70" dependencies = [ "proc-macro2", "quote", - "syn 2.0.14", + "syn", ] [[package]] name = "futures-sink" -version = "0.3.28" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" +checksum = "f310820bb3e8cfd46c80db4d7fb8353e15dfff853a127158425f31e0be6c8364" [[package]] name = "futures-task" -version = "0.3.28" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" +checksum = "dcf79a1bf610b10f42aea489289c5a2c478a786509693b80cd39c44ccd936366" [[package]] name = "futures-timer" @@ -1681,9 +1499,9 @@ checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c" [[package]] name = "futures-util" -version = "0.3.28" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" +checksum = "9c1d6de3acfef38d2be4b1f543f553131788603495be83da675e180c8d6b7bd1" dependencies = [ "futures-channel", "futures-core", @@ -1699,9 +1517,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.7" +version = "0.14.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" dependencies = [ "typenum", "version_check", @@ -1709,22 +1527,20 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.9" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" +checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" dependencies = [ "cfg-if", - "js-sys", "libc", "wasi", - "wasm-bindgen", ] [[package]] name = "gimli" -version = "0.27.2" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4" +checksum = "221996f774192f0f718773def8201c4ae31f02616a54ccfc2d358bb0e5cefdec" [[package]] name = "git-version" @@ -1745,7 +1561,7 @@ dependencies = [ "proc-macro-hack", "proc-macro2", "quote", - "syn 1.0.109", + "syn", ] [[package]] @@ -1756,9 +1572,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.16" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5be7b54589b581f624f566bf5d8eb2bab1db736c51528720b6bd36b96b55924d" +checksum = "5f9f29bc9dda355256b2916cf526ab02ce0aeaaaf2bad60d65ef3f12f11dd0f4" dependencies = [ "bytes", "fnv", @@ -1823,7 +1639,7 @@ dependencies = [ "atomic-polyfill", "hash32", "rustc_version", - "spin 0.9.8", + "spin 0.9.4", "stable_deref_trait", ] @@ -1851,12 +1667,6 @@ dependencies = [ "libc", ] -[[package]] -name = "hermit-abi" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" - [[package]] name = "hex" version = "0.4.3" @@ -1868,9 +1678,9 @@ dependencies = [ [[package]] name = "hex-literal" -version = "0.4.1" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46" +checksum = "7ebdb29d2ea9ed0083cd8cece49bbd968021bd99b0849edb4a9a7ee0fdf6a4e0" [[package]] name = "hmac" @@ -1894,9 +1704,9 @@ dependencies = [ [[package]] name = "http" -version = "0.2.9" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" +checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399" dependencies = [ "bytes", "fnv", @@ -1914,6 +1724,12 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "http-range-header" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bfe8eed0a9285ef776bb792479ea3834e8b94e13d615c2f66d03dd50a435a29" + [[package]] name = "httparse" version = "1.8.0" @@ -1944,9 +1760,9 @@ dependencies = [ [[package]] name = "hyper" -version = "0.14.25" +version = "0.14.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc5e554ff619822309ffd57d8734d77cd5ce6238bc956f037ea06c58238c9899" +checksum = "034711faac9d2166cb1baf1a2fb0b60b1f277f8492fd72176c17f3515e1abd3c" dependencies = [ "bytes", "futures-channel", @@ -1959,7 +1775,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.4.9", + "socket2", "tokio", "tower-service", "tracing", @@ -1975,10 +1791,10 @@ dependencies = [ "http", "hyper", "log", - "rustls 0.20.8", + "rustls", "rustls-native-certs", "tokio", - "tokio-rustls 0.23.4", + "tokio-rustls", ] [[package]] @@ -2008,16 +1824,16 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.56" +version = "0.1.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0722cd7114b7de04316e7ea5456a0bbb20e4adb46fd27a3697adb812cff0f37c" +checksum = "64c122667b287044802d6ce17ee2ddf13207ed924c712de9a66a5814d5b64765" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "windows", + "winapi", ] [[package]] @@ -2048,9 +1864,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.9.3" +version = "1.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" dependencies = [ "autocfg", "hashbrown 0.12.3", @@ -2088,31 +1904,30 @@ dependencies = [ [[package]] name = "io-lifetimes" -version = "1.0.10" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" +checksum = "e7d6c6f8c91b4b9ed43484ad1a938e393caf35960fce7f82a040497207bd8e9e" dependencies = [ - "hermit-abi 0.3.1", "libc", - "windows-sys 0.48.0", + "windows-sys 0.42.0", ] [[package]] name = "ipnet" -version = "2.7.2" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12b6ee2129af8d4fb011108c73d99a1b83a85977f23b82460c0ae2e25bb4b57f" +checksum = "30e22bd8629359895450b59ea7a776c850561b96a3b1d31321c1949d9e6c9146" [[package]] name = "is-terminal" -version = "0.4.7" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" +checksum = "28dfb6c8100ccc63462345b67d1bbc3679177c75ee4bf59bf29c8b1d110b8189" dependencies = [ - "hermit-abi 0.3.1", + "hermit-abi 0.2.6", "io-lifetimes", - "rustix 0.37.11", - "windows-sys 0.48.0", + "rustix", + "windows-sys 0.42.0", ] [[package]] @@ -2126,9 +1941,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.6" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" +checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440" [[package]] name = "js-sys" @@ -2141,11 +1956,11 @@ dependencies = [ [[package]] name = "jsonwebtoken" -version = "8.3.0" +version = "8.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6971da4d9c3aa03c3d8f3ff0f4155b534aad021292003895a469716b2a230378" +checksum = "09f4f04699947111ec1733e71778d763555737579e44b85844cae8e1940a1828" dependencies = [ - "base64 0.21.0", + "base64 0.13.1", "pem", "ring", "serde", @@ -2187,9 +2002,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.141" +version = "0.2.139" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" +checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" [[package]] name = "libloading" @@ -2216,12 +2031,6 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" -[[package]] -name = "linux-raw-sys" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f" - [[package]] name = "lock_api" version = "0.4.9" @@ -2314,9 +2123,9 @@ dependencies = [ [[package]] name = "mime" -version = "0.3.17" +version = "0.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" [[package]] name = "mime_guess" @@ -2336,23 +2145,23 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.6.2" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" +checksum = "f2e212582ede878b109755efd0773a4f0f4ec851584cf0aefbeb4d9ecc114822" dependencies = [ "adler", ] [[package]] name = "mio" -version = "0.8.6" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9" +checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" dependencies = [ "libc", "log", "wasi", - "windows-sys 0.45.0", + "windows-sys 0.42.0", ] [[package]] @@ -2385,6 +2194,15 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nom8" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae01545c9c7fc4486ab7debaf2aad7003ac19431791868fb2e8066df97fad2f8" +dependencies = [ + "memchr", +] + [[package]] name = "notify" version = "5.1.0" @@ -2473,9 +2291,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.17.1" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66" [[package]] name = "oorandom" @@ -2540,8 +2358,8 @@ dependencies = [ "futures-util", "opentelemetry", "prost", - "tonic 0.8.3", - "tonic-build 0.8.4", + "tonic", + "tonic-build", ] [[package]] @@ -2593,9 +2411,9 @@ dependencies = [ [[package]] name = "os_info" -version = "3.7.0" +version = "3.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "006e42d5b888366f1880eda20371fedde764ed2213dc8496f49622fa0c99cd5e" +checksum = "5c424bc68d15e0778838ac013b5b3449544d8133633d8016319e7e05a820b8c0" dependencies = [ "log", "serde", @@ -2604,15 +2422,9 @@ dependencies = [ [[package]] name = "os_str_bytes" -version = "6.5.0" +version = "6.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ceedf44fb00f2d1984b0bc98102627ce622e083e49a5bacdb3e514fa4238e267" - -[[package]] -name = "outref" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" +checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" [[package]] name = "overload" @@ -2630,7 +2442,7 @@ dependencies = [ "byteorder", "bytes", "chrono", - "clap 4.2.1", + "clap 4.1.4", "close_fds", "const_format", "consumption_metrics", @@ -2727,7 +2539,7 @@ checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.2.16", + "redox_syscall", "smallvec", "windows-sys 0.45.0", ] @@ -2755,9 +2567,9 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" [[package]] name = "petgraph" -version = "0.6.3" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4" +checksum = "e6d5014253a1331579ce62aa67443b4a658c5e7dd03d4bc6d302b94474888143" dependencies = [ "fixedbitset", "indexmap", @@ -2798,7 +2610,7 @@ checksum = "069bdb1e05adc7a8990dce9cc75370895fbe4e3d58b9b73bf1aee56359344a55" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn", ] [[package]] @@ -2892,14 +2704,14 @@ dependencies = [ "futures", "once_cell", "pq_proto", - "rustls 0.20.8", + "rustls", "rustls-pemfile", "serde", "thiserror", "tokio", "tokio-postgres", "tokio-postgres-rustls", - "tokio-rustls 0.23.4", + "tokio-rustls", "tracing", "workspace_hack", ] @@ -2965,22 +2777,36 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.1.25" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8646e95016a7a6c4adea95bafa8a16baab64b583356217f2c85db4a39d9a86" +checksum = "e97e3215779627f01ee256d2fad52f3d95e8e1c11e9fc6fd08f7cd455d5d5c78" dependencies = [ "proc-macro2", - "syn 1.0.109", + "syn", ] [[package]] -name = "prettyplease" -version = "0.2.4" +name = "proc-macro-error" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ceca8aaf45b5c46ec7ed39fff75f57290368c1846d33d24a122ca81416ab058" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" dependencies = [ "proc-macro2", - "syn 2.0.14", + "quote", + "version_check", ] [[package]] @@ -2991,9 +2817,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.56" +version = "1.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" +checksum = "6ef7d57beacfaf2d8aee5937dab7b7f28de3cb8b1828479bb5de2a7106f2bae2" dependencies = [ "unicode-ident", ] @@ -3008,7 +2834,7 @@ dependencies = [ "byteorder", "hex", "lazy_static", - "rustix 0.36.12", + "rustix", ] [[package]] @@ -3029,9 +2855,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.11.8" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e48e50df39172a3e7eb17e14642445da64996989bc212b583015435d39a58537" +checksum = "21dc42e00223fc37204bd4aa177e69420c604ca4a183209a8f9de30c6d934698" dependencies = [ "bytes", "prost-derive", @@ -3039,9 +2865,9 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.11.8" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c828f93f5ca4826f97fedcbd3f9a536c16b12cff3dbbb4a007f932bbad95b12" +checksum = "a3f8ad728fb08fe212df3c05169e940fbb6d9d16a877ddde14644a983ba2012e" dependencies = [ "bytes", "heck", @@ -3050,34 +2876,35 @@ dependencies = [ "log", "multimap", "petgraph", - "prettyplease 0.1.25", + "prettyplease", "prost", "prost-types", "regex", - "syn 1.0.109", + "syn", "tempfile", "which", ] [[package]] name = "prost-derive" -version = "0.11.8" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ea9b0f8cbe5e15a8a042d030bd96668db28ecb567ec37d691971ff5731d2b1b" +checksum = "8bda8c0881ea9f722eb9629376db3d0b903b462477c1aafcb0566610ac28ac5d" dependencies = [ "anyhow", "itertools", "proc-macro2", "quote", - "syn 1.0.109", + "syn", ] [[package]] name = "prost-types" -version = "0.11.8" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "379119666929a1afd7a043aa6cf96fa67a6dce9af60c88095a4686dbce4c9c88" +checksum = "a5e0526209433e96d83d750dd81a99118edbc55739e7e61a46764fd2ad537788" dependencies = [ + "bytes", "prost", ] @@ -3092,7 +2919,7 @@ dependencies = [ "bstr", "bytes", "chrono", - "clap 4.2.1", + "clap 4.1.4", "consumption_metrics", "futures", "git-version", @@ -3122,20 +2949,20 @@ dependencies = [ "reqwest-tracing", "routerify", "rstest", - "rustls 0.20.8", + "rustls", "rustls-pemfile", "scopeguard", "serde", "serde_json", "sha2", - "socket2 0.5.2", + "socket2", "sync_wrapper", "thiserror", "tls-listener", "tokio", "tokio-postgres", "tokio-postgres-rustls", - "tokio-rustls 0.23.4", + "tokio-rustls", "tracing", "tracing-opentelemetry", "tracing-subscriber", @@ -3143,16 +2970,16 @@ dependencies = [ "url", "utils", "uuid", - "webpki-roots 0.23.0", + "webpki-roots", "workspace_hack", "x509-parser", ] [[package]] name = "quote" -version = "1.0.26" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" +checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" dependencies = [ "proc-macro2", ] @@ -3189,9 +3016,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.7.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" +checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7" dependencies = [ "either", "rayon-core", @@ -3199,9 +3026,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.11.0" +version = "1.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" +checksum = "356a0625f1954f730c0201cdab48611198dc6ce21f4acff55089b5a78e6e835b" dependencies = [ "crossbeam-channel", "crossbeam-deque", @@ -3230,20 +3057,11 @@ dependencies = [ "bitflags", ] -[[package]] -name = "redox_syscall" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" -dependencies = [ - "bitflags", -] - [[package]] name = "regex" -version = "1.7.3" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" +checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" dependencies = [ "aho-corasick", "memchr", @@ -3261,9 +3079,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.29" +version = "0.6.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" +checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" [[package]] name = "remote_storage" @@ -3273,8 +3091,8 @@ dependencies = [ "async-trait", "aws-config", "aws-sdk-s3", - "aws-smithy-http 0.51.0", - "aws-types 0.55.0", + "aws-smithy-http", + "aws-types", "hyper", "metrics", "once_cell", @@ -3293,9 +3111,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.16" +version = "0.11.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27b71749df584b7f4cac2c426c127a7c785a5106cc98f7a8feb044115f0fa254" +checksum = "21eed90ec8570952d53b772ecf8f206aa1ec9a3d76b2521c56c42973f2d91ee9" dependencies = [ "base64 0.21.0", "bytes", @@ -3315,27 +3133,27 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls 0.20.8", + "rustls", "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", "tokio", - "tokio-rustls 0.23.4", + "tokio-rustls", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "webpki-roots 0.22.6", + "webpki-roots", "winreg", ] [[package]] name = "reqwest-middleware" -version = "0.2.1" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99c50db2c7ccd815f976473dd7d0bde296f8c3b77c383acf4fc021cdcf10852b" +checksum = "4a1c03e9011a8c59716ad13115550469e081e2e9892656b0ba6a47c907921894" dependencies = [ "anyhow", "async-trait", @@ -3348,12 +3166,11 @@ dependencies = [ [[package]] name = "reqwest-tracing" -version = "0.4.1" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a71d77945a1c5ae9604f0504901e77a1e2e71f2932b1cb8103078179ca62ff8" +checksum = "b739d87a6b2cf4743968ad2b4cef648fbe0204c19999509824425babb2097bce" dependencies = [ "async-trait", - "getrandom", "opentelemetry", "reqwest", "reqwest-middleware", @@ -3392,18 +3209,18 @@ dependencies = [ [[package]] name = "rpds" -version = "0.13.0" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bd6ce569b15c331b1e5fd8cf6adb0bf240678b5f0cdc4d0f41e11683f6feba9" +checksum = "66262ea963eff99163e6b741fbc3417a52cc13074728c1047e9911789df9b000" dependencies = [ "archery", ] [[package]] name = "rstest" -version = "0.17.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de1bb486a691878cd320c2f0d319ba91eeaa2e894066d8b5f8f117c000e9d962" +checksum = "b07f2d176c472198ec1e6551dc7da28f1c089652f66a7b722676c2238ebc0edf" dependencies = [ "futures", "futures-timer", @@ -3413,23 +3230,23 @@ dependencies = [ [[package]] name = "rstest_macros" -version = "0.17.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290ca1a1c8ca7edb7c3283bd44dc35dd54fdec6253a3912e201ba1072018fca8" +checksum = "7229b505ae0706e64f37ffc54a9c163e11022a6636d58fe1f3f52018257ff9f7" dependencies = [ "cfg-if", "proc-macro2", "quote", "rustc_version", - "syn 1.0.109", + "syn", "unicode-ident", ] [[package]] name = "rustc-demangle" -version = "0.1.22" +version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4a36c42d1873f9a77c53bde094f9664d9891bc604a45b4798fd2c389ed12e5b" +checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" [[package]] name = "rustc-hash" @@ -3457,30 +3274,16 @@ dependencies = [ [[package]] name = "rustix" -version = "0.36.12" +version = "0.36.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0af200a3324fa5bcd922e84e9b55a298ea9f431a489f01961acdebc6e908f25" +checksum = "d4fdebc4b395b7fbb9ab11e462e20ed9051e7b16e42d24042c776eca0ac81b03" dependencies = [ "bitflags", "errno", "io-lifetimes", "libc", - "linux-raw-sys 0.1.4", - "windows-sys 0.45.0", -] - -[[package]] -name = "rustix" -version = "0.37.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85597d61f83914ddeba6a47b3b8ffe7365107221c2e557ed94426489fefb5f77" -dependencies = [ - "bitflags", - "errno", - "io-lifetimes", - "libc", - "linux-raw-sys 0.3.1", - "windows-sys 0.48.0", + "linux-raw-sys", + "windows-sys 0.42.0", ] [[package]] @@ -3495,18 +3298,6 @@ dependencies = [ "webpki", ] -[[package]] -name = "rustls" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07180898a28ed6a7f7ba2311594308f595e3dd2e3c3812fa0a80a47b45f17e5d" -dependencies = [ - "log", - "ring", - "rustls-webpki", - "sct", -] - [[package]] name = "rustls-native-certs" version = "0.6.2" @@ -3528,27 +3319,17 @@ dependencies = [ "base64 0.21.0", ] -[[package]] -name = "rustls-webpki" -version = "0.100.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6207cd5ed3d8dca7816f8f3725513a34609c0c765bf652b8c3cb4cfd87db46b" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "rustversion" -version = "1.0.12" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06" +checksum = "5583e89e108996506031660fe09baa5011b9dd0341b89029313006d1fb508d70" [[package]] name = "ryu" -version = "1.0.13" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" +checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde" [[package]] name = "safekeeper" @@ -3560,7 +3341,7 @@ dependencies = [ "byteorder", "bytes", "chrono", - "clap 4.2.1", + "clap 4.1.4", "const_format", "crc32c", "fs2", @@ -3633,9 +3414,9 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "scratch" -version = "1.0.5" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1792db035ce95be60c3f8853017b3999209281c24e2ba5bc8e59bf97a0c590c1" +checksum = "ddccb15bcce173023b3fedd9436f882a0739b8dfb45e4f6b6002bee5929f61b2" [[package]] name = "sct" @@ -3672,33 +3453,33 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.17" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" +checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a" [[package]] name = "sentry" -version = "0.30.0" +version = "0.29.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5ce6d3512e2617c209ec1e86b0ca2fea06454cd34653c91092bf0f3ec41f8e3" +checksum = "a6097dc270a9c4555c5d6222ed243eaa97ff38e29299ed7c5cb36099033c604e" dependencies = [ "httpdate", "reqwest", - "rustls 0.20.8", + "rustls", "sentry-backtrace", "sentry-contexts", "sentry-core", "sentry-panic", "tokio", "ureq", - "webpki-roots 0.22.6", + "webpki-roots", ] [[package]] name = "sentry-backtrace" -version = "0.30.0" +version = "0.29.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e7fe408d4d1f8de188a9309916e02e129cbe51ca19e55badea5a64899399b1a" +checksum = "9d92d1e4d591534ae4f872d6142f3b500f4ffc179a6aed8a3e86c7cc96d10a6a" dependencies = [ "backtrace", "once_cell", @@ -3708,9 +3489,9 @@ dependencies = [ [[package]] name = "sentry-contexts" -version = "0.30.0" +version = "0.29.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5695096a059a89973ec541062d331ff4c9aeef9c2951416c894f0fff76340e7d" +checksum = "3afa877b1898ff67dd9878cf4bec4e53cef7d3be9f14b1fc9e4fcdf36f8e4259" dependencies = [ "hostname", "libc", @@ -3722,9 +3503,9 @@ dependencies = [ [[package]] name = "sentry-core" -version = "0.30.0" +version = "0.29.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b22828bfd118a7b660cf7a155002a494755c0424cebb7061e4743ecde9c7dbc" +checksum = "fc43eb7e4e3a444151a0fe8a0e9ce60eabd905dae33d66e257fa26f1b509c1bd" dependencies = [ "once_cell", "rand", @@ -3735,9 +3516,9 @@ dependencies = [ [[package]] name = "sentry-panic" -version = "0.30.0" +version = "0.29.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f4ced2a7a8c14899d58eec402d946f69d5ed26a3fc363a7e8b1e5cb88473a01" +checksum = "ccab4fab11e3e63c45f4524bee2e75cde39cdf164cb0b0cbe6ccd1948ceddf66" dependencies = [ "sentry-backtrace", "sentry-core", @@ -3745,9 +3526,9 @@ dependencies = [ [[package]] name = "sentry-types" -version = "0.30.0" +version = "0.29.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "360ee3270f7a4a1eee6c667f7d38360b995431598a73b740dfe420da548d9cc9" +checksum = "f63708ec450b6bdcb657af760c447416d69c38ce421f34e5e2e9ce8118410bc7" dependencies = [ "debugid", "getrandom", @@ -3762,44 +3543,35 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.160" +version = "1.0.152" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c" +checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.160" +version = "1.0.152" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df" +checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.14", + "syn", ] [[package]] name = "serde_json" -version = "1.0.95" +version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d721eca97ac802aa7777b701877c8004d950fc142651367300d21c1cc0194744" +checksum = "877c235533714907a8c2464236f5c4b2a17262ef1bd71f38f35ea592c8da6883" dependencies = [ "itoa", "ryu", "serde", ] -[[package]] -name = "serde_spanned" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0efd8caf556a6cebd3b285caf480045fcc1ac04f6bd786b09a6f11af30c4fcf4" -dependencies = [ - "serde", -] - [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -3814,9 +3586,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "2.3.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "331bb8c3bf9b92457ab7abecf07078c13f7d270ba490103e84e8b014490cd0b0" +checksum = "30d904179146de381af4c93d3af6ca4984b3152db687dacb9c3c35e86f39809c" dependencies = [ "base64 0.13.1", "chrono", @@ -3830,14 +3602,14 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "2.3.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "859011bddcc11f289f07f467cc1fe01c7a941daa4d8f6c40d4d1c92eb6d9319c" +checksum = "a1966009f3c05f095697c537312f5415d1e3ed31ce0a56942bac4c771c5c335e" dependencies = [ "darling", "proc-macro2", "quote", - "syn 1.0.109", + "syn", ] [[package]] @@ -3879,9 +3651,9 @@ checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" [[package]] name = "signal-hook" -version = "0.3.15" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "732768f1176d21d09e076c23a93123d40bba92d50c4058da34d45c8de8e682b9" +checksum = "a253b5e89e2698464fc26b545c9edceb338e18a89effeeecfea192c3025be29d" dependencies = [ "libc", "signal-hook-registry", @@ -3900,9 +3672,9 @@ dependencies = [ [[package]] name = "signal-hook-registry" -version = "1.4.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" +checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" dependencies = [ "libc", ] @@ -3927,9 +3699,9 @@ checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" [[package]] name = "slab" -version = "0.4.8" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d" +checksum = "4614a76b2a8be0058caa9dbbaf66d988527d86d003c11a94fbd335d7661edcef" dependencies = [ "autocfg", ] @@ -3942,24 +3714,14 @@ checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" [[package]] name = "socket2" -version = "0.4.9" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" +checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd" dependencies = [ "libc", "winapi", ] -[[package]] -name = "socket2" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d283f86695ae989d1e18440a943880967156325ba025f05049946bff47bcc2b" -dependencies = [ - "libc", - "windows-sys 0.48.0", -] - [[package]] name = "spin" version = "0.5.2" @@ -3968,9 +3730,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "spin" -version = "0.9.8" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +checksum = "7f6002a767bff9e83f8eeecf883ecb8011875a21ae8da43bffb817a57e78cc09" dependencies = [ "lock_api", ] @@ -3994,7 +3756,7 @@ dependencies = [ "anyhow", "async-stream", "bytes", - "clap 4.2.1", + "clap 4.1.4", "const_format", "futures", "futures-core", @@ -4008,8 +3770,8 @@ dependencies = [ "prost", "tokio", "tokio-stream", - "tonic 0.9.1", - "tonic-build 0.9.1", + "tonic", + "tonic-build", "tracing", "utils", "workspace_hack", @@ -4047,7 +3809,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 1.0.109", + "syn", ] [[package]] @@ -4064,20 +3826,9 @@ checksum = "8fb1df15f412ee2e9dfc1c504260fa695c1c3f10fe9f4a6ee2d2184d7d6450e2" [[package]] name = "syn" -version = "1.0.109" +version = "1.0.107" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "syn" -version = "2.0.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcf316d5356ed6847742d036f8a39c3b8435cac10bd528a4bd461928a6ab34d5" +checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" dependencies = [ "proc-macro2", "quote", @@ -4098,7 +3849,7 @@ checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn", "unicode-xid", ] @@ -4115,24 +3866,24 @@ dependencies = [ [[package]] name = "task-local-extensions" -version = "0.1.4" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba323866e5d033818e3240feeb9f7db2c4296674e4d9e16b97b7bf8f490434e8" +checksum = "4167afbec18ae012de40f8cf1b9bf48420abb390678c34821caa07d924941cc4" dependencies = [ - "pin-utils", + "tokio", ] [[package]] name = "tempfile" -version = "3.5.0" +version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" +checksum = "af18f7ae1acd354b992402e9ec5864359d693cd8a79dcbef59f76891701c1e95" dependencies = [ "cfg-if", "fastrand", - "redox_syscall 0.3.5", - "rustix 0.37.11", - "windows-sys 0.45.0", + "redox_syscall", + "rustix", + "windows-sys 0.42.0", ] [[package]] @@ -4172,7 +3923,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8901a55b0a7a06ebc4a674dcca925170da8e613fa3b163a1df804ed10afb154d" dependencies = [ "quote", - "syn 1.0.109", + "syn", ] [[package]] @@ -4183,39 +3934,38 @@ checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" [[package]] name = "thiserror" -version = "1.0.40" +version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" +checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.40" +version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" +checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.14", + "syn", ] [[package]] name = "thread_local" -version = "1.1.7" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" dependencies = [ - "cfg-if", "once_cell", ] [[package]] name = "time" -version = "0.3.20" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890" +checksum = "a561bf4617eebd33bca6434b988f39ed798e527f51a1e797d0ee4f61c0a38376" dependencies = [ "itoa", "serde", @@ -4231,9 +3981,9 @@ checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" [[package]] name = "time-macros" -version = "0.2.8" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd80a657e71da814b8e5d60d3374fc6d35045062245d80224748ae522dd76f36" +checksum = "d967f99f534ca7e495c575c62638eebc2898a8c84c119b89e250477bc4ba16b2" dependencies = [ "time-core", ] @@ -4259,9 +4009,9 @@ dependencies = [ [[package]] name = "tinyvec_macros" -version = "0.1.1" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "tls-listener" @@ -4274,25 +4024,26 @@ dependencies = [ "pin-project-lite", "thiserror", "tokio", - "tokio-rustls 0.23.4", + "tokio-rustls", ] [[package]] name = "tokio" -version = "1.27.0" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0de47a4eecbe11f498978a9b29d792f0d2692d1dd003650c24c76510e3bc001" +checksum = "c8e00990ebabbe4c14c08aca901caed183ecd5c09562a12c824bb53d3c3fd3af" dependencies = [ "autocfg", "bytes", "libc", + "memchr", "mio", "num_cpus", "pin-project-lite", "signal-hook-registry", - "socket2 0.4.9", + "socket2", "tokio-macros", - "windows-sys 0.45.0", + "windows-sys 0.42.0", ] [[package]] @@ -4307,13 +4058,13 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.0.0" +version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61a573bdc87985e9d6ddeed1b3d864e8a302c847e40d647746df2f1de209d1ce" +checksum = "d266c00fde287f55d3f1c3e96c500c362a2b8c695076ec180f27918820bc6df8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.14", + "syn", ] [[package]] @@ -4334,7 +4085,7 @@ dependencies = [ "pin-project-lite", "postgres-protocol", "postgres-types", - "socket2 0.4.9", + "socket2", "tokio", "tokio-util", ] @@ -4347,10 +4098,10 @@ checksum = "606f2b73660439474394432239c82249c0d45eb5f23d91f401be1e33590444a7" dependencies = [ "futures", "ring", - "rustls 0.20.8", + "rustls", "tokio", "tokio-postgres", - "tokio-rustls 0.23.4", + "tokio-rustls", ] [[package]] @@ -4359,26 +4110,16 @@ version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" dependencies = [ - "rustls 0.20.8", + "rustls", "tokio", "webpki", ] -[[package]] -name = "tokio-rustls" -version = "0.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0d409377ff5b1e3ca6437aa86c1eb7d40c134bfec254e44c830defa92669db5" -dependencies = [ - "rustls 0.21.0", - "tokio", -] - [[package]] name = "tokio-stream" -version = "0.1.12" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fb52b74f05dbf495a8fba459fdc331812b96aa086d9eb78101fa0d4569c3313" +checksum = "d660770404473ccd7bc9f8b28494a811bc18542b915c0855c51e8f419d5223ce" dependencies = [ "futures-core", "pin-project-lite", @@ -4393,7 +4134,7 @@ dependencies = [ "filetime", "futures-core", "libc", - "redox_syscall 0.2.16", + "redox_syscall", "tokio", "tokio-stream", "xattr", @@ -4413,9 +4154,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.7" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5427d89453009325de0d8f342c9490009f76e999cb7672d77e46267448f7e6b2" +checksum = "0bb2e075f03b3d66d8d8785356224ba688d2906a371015e225beeb65ca92c740" dependencies = [ "bytes", "futures-core", @@ -4427,36 +4168,33 @@ dependencies = [ [[package]] name = "toml" -version = "0.7.3" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b403acf6f2bb0859c93c7f0d967cb4a75a7ac552100f9322faf64dc047669b21" +checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" dependencies = [ "serde", - "serde_spanned", - "toml_datetime", - "toml_edit", ] [[package]] name = "toml_datetime" -version = "0.6.1" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ab8ed2edee10b50132aed5f331333428b011c99402b5a534154ed15746f9622" +checksum = "4553f467ac8e3d374bc9a177a26801e5d0f9b211aa1673fb137a403afd1c9cf5" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.19.8" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "239410c8609e8125456927e6707163a3b1fdb40561e4b803bc041f466ccfdc13" +checksum = "a34cc558345efd7e88b9eda9626df2138b80bb46a7606f695e751c892bc7dac6" dependencies = [ "indexmap", + "itertools", + "nom8", "serde", - "serde_spanned", "toml_datetime", - "winnow", ] [[package]] @@ -4481,7 +4219,10 @@ dependencies = [ "pin-project", "prost", "prost-derive", + "rustls-native-certs", + "rustls-pemfile", "tokio", + "tokio-rustls", "tokio-stream", "tokio-util", "tower", @@ -4491,62 +4232,17 @@ dependencies = [ "tracing-futures", ] -[[package]] -name = "tonic" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38bd8e87955eb13c1986671838177d6792cdc52af9bffced0d2c8a9a7f741ab3" -dependencies = [ - "async-stream", - "async-trait", - "axum", - "base64 0.21.0", - "bytes", - "futures-core", - "futures-util", - "h2", - "http", - "http-body", - "hyper", - "hyper-timeout", - "percent-encoding", - "pin-project", - "prost", - "rustls-native-certs", - "rustls-pemfile", - "tokio", - "tokio-rustls 0.24.0", - "tokio-stream", - "tower", - "tower-layer", - "tower-service", - "tracing", -] - [[package]] name = "tonic-build" version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5bf5e9b9c0f7e0a7c027dcfaba7b2c60816c7049171f679d99ee2ff65d0de8c4" dependencies = [ - "prettyplease 0.1.25", + "prettyplease", "proc-macro2", "prost-build", "quote", - "syn 1.0.109", -] - -[[package]] -name = "tonic-build" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f60a933bbea70c95d633c04c951197ddf084958abaa2ed502a3743bdd8d8dd7" -dependencies = [ - "prettyplease 0.1.25", - "proc-macro2", - "prost-build", - "quote", - "syn 1.0.109", + "syn", ] [[package]] @@ -4569,6 +4265,25 @@ dependencies = [ "tracing", ] +[[package]] +name = "tower-http" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f873044bf02dd1e8239e9c1293ea39dad76dc594ec16185d0a1bf31d8dc8d858" +dependencies = [ + "bitflags", + "bytes", + "futures-core", + "futures-util", + "http", + "http-body", + "http-range-header", + "pin-project-lite", + "tower", + "tower-layer", + "tower-service", +] + [[package]] name = "tower-layer" version = "0.3.2" @@ -4586,7 +4301,7 @@ name = "trace" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.2.1", + "clap 4.1.4", "pageserver_api", "utils", "workspace_hack", @@ -4613,7 +4328,7 @@ checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn", ] [[package]] @@ -4759,15 +4474,15 @@ dependencies = [ [[package]] name = "unicode-bidi" -version = "0.3.13" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" +checksum = "d54675592c1dbefd78cbd98db9bacd89886e1ca50692a0692baefffdeb92dd58" [[package]] name = "unicode-ident" -version = "1.0.8" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" +checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" [[package]] name = "unicode-normalization" @@ -4805,10 +4520,10 @@ dependencies = [ "base64 0.13.1", "log", "once_cell", - "rustls 0.20.8", + "rustls", "url", "webpki", - "webpki-roots 0.22.6", + "webpki-roots", ] [[package]] @@ -4835,12 +4550,6 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" -[[package]] -name = "utf8parse" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" - [[package]] name = "utils" version = "0.1.0" @@ -4884,9 +4593,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.3.1" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b55a3fef2a1e3b3a00ce878640918820d3c51081576ac657d23af9fc7928fdb" +checksum = "1674845326ee10d37ca60470760d4288a6f80f304007d92e5c53bab78c9cfd79" dependencies = [ "getrandom", "serde", @@ -4904,18 +4613,12 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" -[[package]] -name = "vsimd" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" - [[package]] name = "wal_craft" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.2.1", + "clap 4.1.4", "env_logger", "log", "once_cell", @@ -4927,11 +4630,12 @@ dependencies = [ [[package]] name = "walkdir" -version = "2.3.3" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" dependencies = [ "same-file", + "winapi", "winapi-util", ] @@ -4972,7 +4676,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 1.0.109", + "syn", "wasm-bindgen-shared", ] @@ -5006,7 +4710,7 @@ checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -5046,15 +4750,6 @@ dependencies = [ "webpki", ] -[[package]] -name = "webpki-roots" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa54963694b65584e170cf5dc46aeb4dcaa5584e652ff5f3952e56d66aff0125" -dependencies = [ - "rustls-webpki", -] - [[package]] name = "which" version = "4.4.0" @@ -5097,28 +4792,19 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -[[package]] -name = "windows" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" -dependencies = [ - "windows-targets 0.48.0", -] - [[package]] name = "windows-sys" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] [[package]] @@ -5127,140 +4813,65 @@ version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" dependencies = [ - "windows-targets 0.42.2", -] - -[[package]] -name = "windows-sys" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" -dependencies = [ - "windows-targets 0.48.0", + "windows-targets", ] [[package]] name = "windows-targets" -version = "0.42.2" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7" dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - -[[package]] -name = "windows-targets" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" -dependencies = [ - "windows_aarch64_gnullvm 0.48.0", - "windows_aarch64_msvc 0.48.0", - "windows_i686_gnu 0.48.0", - "windows_i686_msvc 0.48.0", - "windows_x86_64_gnu 0.48.0", - "windows_x86_64_gnullvm 0.48.0", - "windows_x86_64_msvc 0.48.0", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" -version = "0.42.2" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" +checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" [[package]] name = "windows_aarch64_msvc" -version = "0.42.2" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" +checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" [[package]] name = "windows_i686_gnu" -version = "0.42.2" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" - -[[package]] -name = "windows_i686_gnu" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" +checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" [[package]] name = "windows_i686_msvc" -version = "0.42.2" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" +checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" [[package]] name = "windows_x86_64_gnu" -version = "0.42.2" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" +checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" [[package]] name = "windows_x86_64_gnullvm" -version = "0.42.2" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" +checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" [[package]] name = "windows_x86_64_msvc" -version = "0.42.2" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" - -[[package]] -name = "winnow" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae8970b36c66498d8ff1d66685dc86b91b29db0c7739899012f63a63814b4b28" -dependencies = [ - "memchr", -] +checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" [[package]] name = "winreg" @@ -5279,8 +4890,7 @@ dependencies = [ "byteorder", "bytes", "chrono", - "clap 4.2.1", - "clap_builder", + "clap 4.1.4", "crossbeam-utils", "digest", "either", @@ -5292,6 +4902,7 @@ dependencies = [ "futures-sink", "futures-util", "hashbrown 0.12.3", + "indexmap", "itertools", "libc", "log", @@ -5306,18 +4917,16 @@ dependencies = [ "regex-syntax", "reqwest", "ring", - "rustls 0.20.8", + "rustls", "scopeguard", "serde", "serde_json", - "socket2 0.4.9", - "syn 1.0.109", - "syn 2.0.14", + "socket2", + "syn", "tokio", - "tokio-rustls 0.23.4", + "tokio-rustls", "tokio-util", - "toml_datetime", - "toml_edit", + "tonic", "tower", "tracing", "tracing-core", @@ -5327,11 +4936,12 @@ dependencies = [ [[package]] name = "x509-parser" -version = "0.15.0" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bab0c2f54ae1d92f4fcb99c0b7ccf0b1e3451cbd395e5f115ccbdbcb18d4f634" +checksum = "e0ecbeb7b67ce215e40e3cc7f2ff902f94a223acf44995934763467e7b1febc8" dependencies = [ "asn1-rs", + "base64 0.13.1", "data-encoding", "der-parser", "lazy_static", @@ -5359,15 +4969,15 @@ checksum = "4d25c75bf9ea12c4040a97f829154768bbbce366287e2dc044af160cd79a13fd" [[package]] name = "yasna" -version = "0.5.2" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e17bb3549cc1321ae1296b9cdc2698e2b6cb1992adfa19a8c72e5b7a738f44cd" +checksum = "aed2e7a52e3744ab4d0c05c20aa065258e84c49fd4226f5191b2ed29712710b4" dependencies = [ "time", ] [[package]] name = "zeroize" -version = "1.6.0" +version = "1.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a0956f1ba7c7909bfb66c2e9e4124ab6f6482560f6628b5aaeba39207c9aad9" +checksum = "c394b5bd0c6f669e7275d9c20aa90ae064cb22e75a1cad54e1b34088034b149f" diff --git a/Cargo.toml b/Cargo.toml index 0b545e6190..679605dc1d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,10 +24,10 @@ atty = "0.2.14" aws-config = { version = "0.51.0", default-features = false, features=["rustls"] } aws-sdk-s3 = "0.21.0" aws-smithy-http = "0.51.0" -aws-types = "0.55" +aws-types = "0.51.0" base64 = "0.13.0" bincode = "1.3" -bindgen = "0.65" +bindgen = "0.61" bstr = "1.0" byteorder = "1.4" bytes = "1.0" @@ -50,7 +50,7 @@ git-version = "0.3" hashbrown = "0.13" hashlink = "0.8.1" hex = "0.4" -hex-literal = "0.4" +hex-literal = "0.3" hmac = "0.12.1" hostname = "0.3.1" humantime = "2.1" @@ -80,18 +80,18 @@ reqwest = { version = "0.11", default-features = false, features = ["rustls-tls" reqwest-tracing = { version = "0.4.0", features = ["opentelemetry_0_18"] } reqwest-middleware = "0.2.0" routerify = "3" -rpds = "0.13" +rpds = "0.12.0" rustls = "0.20" rustls-pemfile = "1" rustls-split = "0.3" scopeguard = "1.1" -sentry = { version = "0.30", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] } +sentry = { version = "0.29", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] } serde = { version = "1.0", features = ["derive"] } serde_json = "1" serde_with = "2.0" sha2 = "0.10.2" signal-hook = "0.3" -socket2 = "0.5" +socket2 = "0.4.4" strum = "0.24" strum_macros = "0.24" svg_fmt = "0.4.1" @@ -106,17 +106,17 @@ tokio-postgres-rustls = "0.9.0" tokio-rustls = "0.23" tokio-stream = "0.1" tokio-util = { version = "0.7", features = ["io"] } -toml = "0.7" -toml_edit = "0.19" -tonic = {version = "0.9", features = ["tls", "tls-roots"]} +toml = "0.5" +toml_edit = { version = "0.17", features = ["easy"] } +tonic = {version = "0.8", features = ["tls", "tls-roots"]} tracing = "0.1" tracing-opentelemetry = "0.18.0" tracing-subscriber = { version = "0.3", features = ["env-filter"] } url = "2.2" uuid = { version = "1.2", features = ["v4", "serde"] } walkdir = "2.3.2" -webpki-roots = "0.23" -x509-parser = "0.15" +webpki-roots = "0.22.5" +x509-parser = "0.14" ## TODO replace this with tracing env_logger = "0.10" @@ -154,9 +154,9 @@ workspace_hack = { version = "0.1", path = "./workspace_hack/" } ## Build dependencies criterion = "0.4" rcgen = "0.10" -rstest = "0.17" +rstest = "0.16" tempfile = "3.4" -tonic-build = "0.9" +tonic-build = "0.8" # This is only needed for proxy's tests. # TODO: we should probably fork `tokio-postgres-rustls` instead. diff --git a/libs/consumption_metrics/Cargo.toml b/libs/consumption_metrics/Cargo.toml index 3f290821c2..f26aa2fbc5 100644 --- a/libs/consumption_metrics/Cargo.toml +++ b/libs/consumption_metrics/Cargo.toml @@ -4,12 +4,13 @@ version = "0.1.0" edition = "2021" license = "Apache-2.0" -[dependencies] -anyhow.workspace = true -chrono.workspace = true -rand.workspace = true -serde.workspace = true -serde_with.workspace = true -utils.workspace = true +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -workspace_hack.workspace = true +[dependencies] +anyhow = "1.0.68" +chrono = { version = "0.4", default-features = false, features = ["clock", "serde"] } +rand = "0.8.3" +serde = "1.0.152" +serde_with = "2.1.0" +utils = { version = "0.1.0", path = "../utils" } +workspace_hack = { version = "0.1.0", path = "../../workspace_hack" } diff --git a/libs/postgres_ffi/build.rs b/libs/postgres_ffi/build.rs index f7e39751ef..66221af522 100644 --- a/libs/postgres_ffi/build.rs +++ b/libs/postgres_ffi/build.rs @@ -5,7 +5,7 @@ use std::path::PathBuf; use std::process::Command; use anyhow::{anyhow, Context}; -use bindgen::callbacks::{DeriveInfo, ParseCallbacks}; +use bindgen::callbacks::ParseCallbacks; #[derive(Debug)] struct PostgresFfiCallbacks; @@ -20,7 +20,7 @@ impl ParseCallbacks for PostgresFfiCallbacks { // Add any custom #[derive] attributes to the data structures that bindgen // creates. - fn add_derives(&self, derive_info: &DeriveInfo) -> Vec { + fn add_derives(&self, name: &str) -> Vec { // This is the list of data structures that we want to serialize/deserialize. let serde_list = [ "XLogRecord", @@ -31,7 +31,7 @@ impl ParseCallbacks for PostgresFfiCallbacks { "ControlFileData", ]; - if serde_list.contains(&derive_info.name) { + if serde_list.contains(&name) { vec![ "Default".into(), // Default allows us to easily fill the padding fields with 0. "Serialize".into(), diff --git a/libs/remote_storage/tests/pagination_tests.rs b/libs/remote_storage/tests/pagination_tests.rs index 048e99d841..eb52409c44 100644 --- a/libs/remote_storage/tests/pagination_tests.rs +++ b/libs/remote_storage/tests/pagination_tests.rs @@ -204,7 +204,12 @@ async fn upload_s3_data( let data = format!("remote blob data {i}").into_bytes(); let data_len = data.len(); task_client - .upload(std::io::Cursor::new(data), data_len, &blob_path, None) + .upload( + Box::new(std::io::Cursor::new(data)), + data_len, + &blob_path, + None, + ) .await?; Ok::<_, anyhow::Error>((blob_prefix, blob_path)) diff --git a/libs/tracing-utils/Cargo.toml b/libs/tracing-utils/Cargo.toml index b285c9b5b0..8c3d3f9063 100644 --- a/libs/tracing-utils/Cargo.toml +++ b/libs/tracing-utils/Cargo.toml @@ -14,5 +14,4 @@ tokio = { workspace = true, features = ["rt", "rt-multi-thread"] } tracing.workspace = true tracing-opentelemetry.workspace = true tracing-subscriber.workspace = true - -workspace_hack.workspace = true +workspace_hack = { version = "0.1", path = "../../workspace_hack" } diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml index dc6326e73e..391bc52a80 100644 --- a/libs/utils/Cargo.toml +++ b/libs/utils/Cargo.toml @@ -33,7 +33,7 @@ serde_with.workspace = true strum.workspace = true strum_macros.workspace = true url.workspace = true -uuid.workspace = true +uuid = { version = "1.2", features = ["v4", "serde"] } metrics.workspace = true workspace_hack.workspace = true diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 0c87e208c8..19f0f22815 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -725,9 +725,8 @@ impl PageServerConf { "disk_usage_based_eviction" => { tracing::info!("disk_usage_based_eviction: {:#?}", &item); builder.disk_usage_based_eviction( - deserialize_from_item_string("disk_usage_based_eviction", item) - .context("parse disk_usage_based_eviction")? - ) + toml_edit::de::from_item(item.clone()) + .context("parse disk_usage_based_eviction")?) }, "ondemand_download_behavior_treat_error_as_warn" => builder.ondemand_download_behavior_treat_error_as_warn(parse_toml_bool(key, item)?), _ => bail!("unrecognized pageserver option '{key}'"), @@ -828,14 +827,14 @@ impl PageServerConf { if let Some(eviction_policy) = item.get("eviction_policy") { t_conf.eviction_policy = Some( - deserialize_from_item_string("eviction_policy", eviction_policy) + toml_edit::de::from_item(eviction_policy.clone()) .context("parse eviction_policy")?, ); } if let Some(item) = item.get("min_resident_size_override") { t_conf.min_resident_size_override = Some( - deserialize_from_item_string("min_resident_size_override", item) + toml_edit::de::from_item(item.clone()) .context("parse min_resident_size_override")?, ); } @@ -939,19 +938,6 @@ where }) } -fn deserialize_from_item_string(name: &str, item: &Item) -> anyhow::Result -where - T: serde::de::DeserializeOwned, -{ - // ValueDeserializer::new is not public, so use the ValueDeserializer's documented way - let item_string = item.to_string(); - let deserializer = item_string - .trim() - .parse::() - .with_context(|| format!("parsing item for node {name} as ValueDeserializer"))?; - T::deserialize(deserializer).with_context(|| format!("deserializing item for node {name}")) -} - /// Configurable semaphore permits setting. /// /// Does not allow semaphore permits to be zero, because at runtime initially zero permits and empty diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index bd38a7a2f3..c0e4a2a9cf 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -65,7 +65,7 @@ fn copyin_stream(pgb: &mut PostgresBackendTCP) -> impl Stream { // We were requested to shut down. - let msg = "pageserver is shutting down".to_string(); + let msg = format!("pageserver is shutting down"); let _ = pgb.write_message_noflush(&BeMessage::ErrorResponse(&msg, None)); Err(QueryError::Other(anyhow::anyhow!(msg))) } diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 67bc1b36b0..03a4ff8c8e 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -1876,7 +1876,7 @@ impl Tenant { .to_string(); // Convert the config to a toml file. - conf_content += &toml_edit::ser::to_string(&tenant_conf)?; + conf_content += &toml_edit::easy::to_string(&tenant_conf)?; let mut target_config_file = VirtualFile::open_with_options( target_config_path, diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs index 9b719db180..cdabb23a7b 100644 --- a/pageserver/src/tenant/config.rs +++ b/pageserver/src/tenant/config.rs @@ -275,9 +275,9 @@ mod tests { ..TenantConfOpt::default() }; - let toml_form = toml_edit::ser::to_string(&small_conf).unwrap(); + let toml_form = toml_edit::easy::to_string(&small_conf).unwrap(); assert_eq!(toml_form, "gc_horizon = 42\n"); - assert_eq!(small_conf, toml_edit::de::from_str(&toml_form).unwrap()); + assert_eq!(small_conf, toml_edit::easy::from_str(&toml_form).unwrap()); let json_form = serde_json::to_string(&small_conf).unwrap(); assert_eq!(json_form, "{\"gc_horizon\":42}"); diff --git a/pageserver/src/tenant/remote_timeline_client/upload.rs b/pageserver/src/tenant/remote_timeline_client/upload.rs index 699121ccd9..ce9f4d9bf8 100644 --- a/pageserver/src/tenant/remote_timeline_client/upload.rs +++ b/pageserver/src/tenant/remote_timeline_client/upload.rs @@ -74,7 +74,7 @@ pub(super) async fn upload_timeline_layer<'a>( })?; storage - .upload(source_file, fs_size, &storage_path, None) + .upload(Box::new(source_file), fs_size, &storage_path, None) .await .with_context(|| { format!( diff --git a/storage_broker/src/bin/storage_broker.rs b/storage_broker/src/bin/storage_broker.rs index de7b634ba0..d7ace28426 100644 --- a/storage_broker/src/bin/storage_broker.rs +++ b/storage_broker/src/bin/storage_broker.rs @@ -23,6 +23,7 @@ use std::convert::Infallible; use std::net::SocketAddr; use std::pin::Pin; use std::sync::Arc; +use std::task::Poll; use std::time::Duration; use tokio::sync::broadcast; use tokio::sync::broadcast::error::RecvError; @@ -373,7 +374,7 @@ impl BrokerService for Broker { Ok(info) => yield info, Err(RecvError::Lagged(skipped_msg)) => { missed_msgs += skipped_msg; - if (futures::poll!(Box::pin(warn_interval.tick()))).is_ready() { + if let Poll::Ready(_) = futures::poll!(Box::pin(warn_interval.tick())) { warn!("subscription id={}, key={:?} addr={:?} dropped {} messages, channel is full", subscriber.id, subscriber.key, subscriber.remote_addr, missed_msgs); missed_msgs = 0; diff --git a/trace/Cargo.toml b/trace/Cargo.toml index d6eed3f49c..6ced992d4c 100644 --- a/trace/Cargo.toml +++ b/trace/Cargo.toml @@ -4,6 +4,8 @@ version = "0.1.0" edition.workspace = true license.workspace = true +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + [dependencies] clap.workspace = true anyhow.workspace = true diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml index f735ffed4c..f885f4a94d 100644 --- a/workspace_hack/Cargo.toml +++ b/workspace_hack/Cargo.toml @@ -18,7 +18,6 @@ byteorder = { version = "1" } bytes = { version = "1", features = ["serde"] } chrono = { version = "0.4", default-features = false, features = ["clock", "serde"] } clap = { version = "4", features = ["derive", "string"] } -clap_builder = { version = "4", default-features = false, features = ["color", "help", "std", "string", "suggestions", "usage"] } crossbeam-utils = { version = "0.8" } digest = { version = "0.10", features = ["mac", "std"] } either = { version = "1" } @@ -30,6 +29,7 @@ futures-executor = { version = "0.3" } futures-sink = { version = "0.3" } futures-util = { version = "0.3", features = ["channel", "io", "sink"] } hashbrown = { version = "0.12", features = ["raw"] } +indexmap = { version = "1", default-features = false, features = ["std"] } itertools = { version = "0.10" } libc = { version = "0.2", features = ["extra_traits"] } log = { version = "0.4", default-features = false, features = ["std"] } @@ -52,8 +52,7 @@ socket2 = { version = "0.4", default-features = false, features = ["all"] } tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "net", "process", "rt-multi-thread", "signal", "sync", "time"] } tokio-rustls = { version = "0.23" } tokio-util = { version = "0.7", features = ["codec", "io"] } -toml_datetime = { version = "0.6", default-features = false, features = ["serde"] } -toml_edit = { version = "0.19", features = ["serde"] } +tonic = { version = "0.8", features = ["tls-roots"] } tower = { version = "0.4", features = ["balance", "buffer", "limit", "retry", "timeout", "util"] } tracing = { version = "0.1", features = ["log"] } tracing-core = { version = "0.1" } @@ -65,6 +64,7 @@ anyhow = { version = "1", features = ["backtrace"] } bytes = { version = "1", features = ["serde"] } either = { version = "1" } hashbrown = { version = "0.12", features = ["raw"] } +indexmap = { version = "1", default-features = false, features = ["std"] } itertools = { version = "0.10" } libc = { version = "0.2", features = ["extra_traits"] } log = { version = "0.4", default-features = false, features = ["std"] } @@ -74,7 +74,6 @@ prost = { version = "0.11" } regex = { version = "1" } regex-syntax = { version = "0.6" } serde = { version = "1", features = ["alloc", "derive"] } -syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-traits", "full", "visit", "visit-mut"] } -syn-f595c2ba2a3f28df = { package = "syn", version = "2", features = ["extra-traits", "full", "visit-mut"] } +syn = { version = "1", features = ["extra-traits", "full", "visit", "visit-mut"] } ### END HAKARI SECTION From 5d0ecadf7cb56039ad541f515135e94d634f1752 Mon Sep 17 00:00:00 2001 From: Stas Kelvich Date: Wed, 12 Apr 2023 16:16:39 +0300 Subject: [PATCH 15/77] Add support for non-SNI case in multi-cert proxy When no SNI is provided use the default certificate, otherwise we can't get to the options parameter which can be used to set endpoint name too. That means that non-SNI flow will not work for CNAME domains in verify-full mode. --- proxy/src/config.rs | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/proxy/src/config.rs b/proxy/src/config.rs index ad51502b49..0ceb556ca1 100644 --- a/proxy/src/config.rs +++ b/proxy/src/config.rs @@ -40,7 +40,7 @@ pub fn configure_tls( let mut cert_resolver = CertResolver::new(); // add default certificate - cert_resolver.add_cert(key_path, cert_path)?; + cert_resolver.add_cert(key_path, cert_path, true)?; // add extra certificates if let Some(certs_dir) = certs_dir { @@ -52,8 +52,11 @@ pub fn configure_tls( let key_path = path.join("tls.key"); let cert_path = path.join("tls.crt"); if key_path.exists() && cert_path.exists() { - cert_resolver - .add_cert(&key_path.to_string_lossy(), &cert_path.to_string_lossy())?; + cert_resolver.add_cert( + &key_path.to_string_lossy(), + &cert_path.to_string_lossy(), + false, + )?; } } } @@ -78,16 +81,23 @@ pub fn configure_tls( struct CertResolver { certs: HashMap>, + default: Option>, } impl CertResolver { fn new() -> Self { Self { certs: HashMap::new(), + default: None, } } - fn add_cert(&mut self, key_path: &str, cert_path: &str) -> anyhow::Result<()> { + fn add_cert( + &mut self, + key_path: &str, + cert_path: &str, + is_default: bool, + ) -> anyhow::Result<()> { let priv_key = { let key_bytes = std::fs::read(key_path).context("TLS key file")?; let mut keys = rustls_pemfile::pkcs8_private_keys(&mut &key_bytes[..]) @@ -136,10 +146,13 @@ impl CertResolver { "Failed to parse common name from certificate at '{cert_path}'." ))?; - self.certs.insert( - common_name, - Arc::new(rustls::sign::CertifiedKey::new(cert_chain, key)), - ); + let cert = Arc::new(rustls::sign::CertifiedKey::new(cert_chain, key)); + + if is_default { + self.default = Some(cert.clone()); + } + + self.certs.insert(common_name, cert); Ok(()) } @@ -172,7 +185,17 @@ impl rustls::server::ResolvesServerCert for CertResolver { } } } else { - None + // No SNI, use the default certificate, otherwise we can't get to + // options parameter which can be used to set endpoint name too. + // That means that non-SNI flow will not work for CNAME domains in + // verify-full mode. + // + // If that will be a problem we can: + // + // a) Instead of multi-cert approach use single cert with extra + // domains listed in Subject Alternative Name (SAN). + // b) Deploy separate proxy instances for extra domains. + self.default.as_ref().cloned() } } } From 732acc54c1fa744fc0c5c48158c7716371e70b89 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Thu, 13 Apr 2023 10:19:34 +0300 Subject: [PATCH 16/77] Add check for duplicates of generated image layers (#3869) ## Describe your changes ## Issue ticket number and link #3673 ## Checklist before requesting a review - [ ] I have performed a self-review of my code. - [ ] If it is a core feature, I have added thorough tests. - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard? - [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section. --------- Co-authored-by: Heikki Linnakangas --- pageserver/benches/bench_layer_map.rs | 4 +-- pageserver/src/tenant.rs | 7 +++++- pageserver/src/tenant/layer_map.rs | 21 ++++++++++------ .../layer_map/historic_layer_coverage.rs | 8 ++++++ pageserver/src/tenant/timeline.rs | 25 ++++++++++++------- 5 files changed, 45 insertions(+), 20 deletions(-) diff --git a/pageserver/benches/bench_layer_map.rs b/pageserver/benches/bench_layer_map.rs index 5edfa84d8a..4882fc518f 100644 --- a/pageserver/benches/bench_layer_map.rs +++ b/pageserver/benches/bench_layer_map.rs @@ -33,7 +33,7 @@ fn build_layer_map(filename_dump: PathBuf) -> LayerMap { min_lsn = min(min_lsn, lsn_range.start); max_lsn = max(max_lsn, Lsn(lsn_range.end.0 - 1)); - updates.insert_historic(Arc::new(layer)); + updates.insert_historic(Arc::new(layer)).unwrap(); } println!("min: {min_lsn}, max: {max_lsn}"); @@ -215,7 +215,7 @@ fn bench_sequential(c: &mut Criterion) { is_incremental: false, short_id: format!("Layer {}", i), }; - updates.insert_historic(Arc::new(layer)); + updates.insert_historic(Arc::new(layer)).unwrap(); } updates.flush(); println!("Finished layer map init in {:?}", now.elapsed()); diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 03a4ff8c8e..7e88a12963 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -267,7 +267,10 @@ impl UninitializedTimeline<'_> { .await .context("Failed to flush after basebackup import")?; - self.initialize(ctx) + // Initialize without loading the layer map. We started with an empty layer map, and already + // updated it for the layers that we created during the import. + let mut timelines = self.owning_tenant.timelines.lock().unwrap(); + self.initialize_with_lock(ctx, &mut timelines, false, true) } fn raw_timeline(&self) -> anyhow::Result<&Arc> { @@ -2308,6 +2311,8 @@ impl Tenant { ) })?; + // Initialize the timeline without loading the layer map, because we already updated the layer + // map above, when we imported the datadir. let timeline = { let mut timelines = self.timelines.lock().unwrap(); raw_timeline.initialize_with_lock(ctx, &mut timelines, false, true)? diff --git a/pageserver/src/tenant/layer_map.rs b/pageserver/src/tenant/layer_map.rs index 4c659be9aa..02159ee291 100644 --- a/pageserver/src/tenant/layer_map.rs +++ b/pageserver/src/tenant/layer_map.rs @@ -52,7 +52,7 @@ use crate::metrics::NUM_ONDISK_LAYERS; use crate::repository::Key; use crate::tenant::storage_layer::InMemoryLayer; use crate::tenant::storage_layer::Layer; -use anyhow::Result; +use anyhow::{bail, Result}; use std::collections::VecDeque; use std::ops::Range; use std::sync::Arc; @@ -126,7 +126,7 @@ where /// /// Insert an on-disk layer. /// - pub fn insert_historic(&mut self, layer: Arc) { + pub fn insert_historic(&mut self, layer: Arc) -> anyhow::Result<()> { self.layer_map.insert_historic_noflush(layer) } @@ -274,17 +274,22 @@ where /// /// Helper function for BatchedUpdates::insert_historic /// - pub(self) fn insert_historic_noflush(&mut self, layer: Arc) { - self.historic.insert( - historic_layer_coverage::LayerKey::from(&*layer), - Arc::clone(&layer), - ); + pub(self) fn insert_historic_noflush(&mut self, layer: Arc) -> anyhow::Result<()> { + let key = historic_layer_coverage::LayerKey::from(&*layer); + if self.historic.contains(&key) { + bail!( + "Attempt to insert duplicate layer {} in layer map", + layer.short_id() + ); + } + self.historic.insert(key, Arc::clone(&layer)); if Self::is_l0(&layer) { self.l0_delta_layers.push(layer); } NUM_ONDISK_LAYERS.inc(); + Ok(()) } /// @@ -838,7 +843,7 @@ mod tests { let expected_in_counts = (1, usize::from(expected_l0)); - map.batch_update().insert_historic(remote.clone()); + map.batch_update().insert_historic(remote.clone()).unwrap(); assert_eq!(count_layer_in(&map, &remote), expected_in_counts); let replaced = map diff --git a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs index b63c361314..1fdcd5e5a4 100644 --- a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs +++ b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs @@ -417,6 +417,14 @@ impl BufferedHistoricLayerCoverage { } } + pub fn contains(&self, layer_key: &LayerKey) -> bool { + match self.buffer.get(layer_key) { + Some(None) => false, // layer remove was buffered + Some(_) => true, // layer insert was buffered + None => self.layers.contains_key(layer_key), // no buffered ops for this layer + } + } + pub fn insert(&mut self, layer_key: LayerKey, value: Value) { self.buffer.insert(layer_key, Some(value)); } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 4b0d7a6994..29d8b544cc 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -1446,7 +1446,7 @@ impl Timeline { trace!("found layer {}", layer.path().display()); total_physical_size += file_size; - updates.insert_historic(Arc::new(layer)); + updates.insert_historic(Arc::new(layer))?; num_layers += 1; } else if let Some(deltafilename) = DeltaFileName::parse_str(&fname) { // Create a DeltaLayer struct for each delta file. @@ -1478,7 +1478,7 @@ impl Timeline { trace!("found layer {}", layer.path().display()); total_physical_size += file_size; - updates.insert_historic(Arc::new(layer)); + updates.insert_historic(Arc::new(layer))?; num_layers += 1; } else if fname == METADATA_FILE_NAME || fname.ends_with(".old") { // ignore these @@ -1552,7 +1552,7 @@ impl Timeline { // remote index file? // If so, rename_to_backup those files & replace their local layer with // a RemoteLayer in the layer map so that we re-download them on-demand. - if let Some(local_layer) = local_layer { + if let Some(local_layer) = &local_layer { let local_layer_path = local_layer .local_path() .expect("caller must ensure that local_layers only contains local layers"); @@ -1577,7 +1577,6 @@ impl Timeline { anyhow::bail!("could not rename file {local_layer_path:?}: {err:?}"); } else { self.metrics.resident_physical_size_gauge.sub(local_size); - updates.remove_historic(local_layer); // fall-through to adding the remote layer } } else { @@ -1613,7 +1612,11 @@ impl Timeline { ); let remote_layer = Arc::new(remote_layer); - updates.insert_historic(remote_layer); + if let Some(local_layer) = &local_layer { + updates.replace_historic(local_layer, remote_layer)?; + } else { + updates.insert_historic(remote_layer)?; + } } LayerFileName::Delta(deltafilename) => { // Create a RemoteLayer for the delta file. @@ -1637,7 +1640,11 @@ impl Timeline { LayerAccessStats::for_loading_layer(LayerResidenceStatus::Evicted), ); let remote_layer = Arc::new(remote_layer); - updates.insert_historic(remote_layer); + if let Some(local_layer) = &local_layer { + updates.replace_historic(local_layer, remote_layer)?; + } else { + updates.insert_historic(remote_layer)?; + } } } } @@ -2684,7 +2691,7 @@ impl Timeline { .write() .unwrap() .batch_update() - .insert_historic(Arc::new(new_delta)); + .insert_historic(Arc::new(new_delta))?; // update the timeline's physical size let sz = new_delta_path.metadata()?.len(); @@ -2889,7 +2896,7 @@ impl Timeline { self.metrics .resident_physical_size_gauge .add(metadata.len()); - updates.insert_historic(Arc::new(l)); + updates.insert_historic(Arc::new(l))?; } updates.flush(); drop(layers); @@ -3322,7 +3329,7 @@ impl Timeline { new_layer_paths.insert(new_delta_path, LayerFileMetadata::new(metadata.len())); let x: Arc = Arc::new(l); - updates.insert_historic(x); + updates.insert_historic(x)?; } // Now that we have reshuffled the data to set of new delta layers, we can From 15d1f85552231a5f6d4d500ac75010669dbec540 Mon Sep 17 00:00:00 2001 From: Dmitry Rodionov Date: Thu, 13 Apr 2023 12:11:43 +0300 Subject: [PATCH 17/77] Add reason to TenantState::Broken (#3954) Reason and backtrace are added to the Broken state. Backtrace is automatically collected when tenant entered the broken state. The format for API, CLI and metrics is changed and unified to return tenant state name in camel case. Previously snake case was used for metrics and camel case was used for everything else. Now tenant state field in TenantInfo swagger spec is changed to contain state name in "slug" field and other fields (currently only reason and backtrace for Broken variant in "data" field). To allow for this breaking change state was removed from TenantInfo swagger spec because it was not used anywhere. Please note that the tenant's broken reason is not persisted on disk so the reason is lost when pageserver is restarted. Requires changes to grafana dashboard that monitors tenant states. Closes #3001 --------- Co-authored-by: theirix --- Cargo.lock | 2 + libs/pageserver_api/Cargo.toml | 4 +- libs/pageserver_api/src/models.rs | 106 ++++++++++++++---- pageserver/src/http/openapi_spec.yml | 3 - pageserver/src/http/routes.rs | 6 +- pageserver/src/metrics.rs | 14 +-- pageserver/src/tenant.rs | 56 +++++---- pageserver/src/tenant/mgr.rs | 4 +- pageserver/src/tenant/tasks.rs | 2 +- test_runner/fixtures/pageserver/utils.py | 26 +++-- test_runner/regress/test_ondemand_download.py | 6 +- .../regress/test_pageserver_restart.py | 2 +- test_runner/regress/test_tenant_conf.py | 4 +- test_runner/regress/test_tenant_relocation.py | 6 +- test_runner/regress/test_tenant_tasks.py | 15 +-- test_runner/regress/test_tenants.py | 12 +- .../test_tenants_with_remote_storage.py | 6 +- test_runner/regress/test_timeline_size.py | 4 +- test_runner/regress/test_wal_acceptor.py | 2 +- 19 files changed, 181 insertions(+), 99 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 668487a9bd..fc587c57bf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2517,6 +2517,8 @@ dependencies = [ "serde", "serde_json", "serde_with", + "strum", + "strum_macros", "utils", "workspace_hack", ] diff --git a/libs/pageserver_api/Cargo.toml b/libs/pageserver_api/Cargo.toml index 7709da1072..f97ec54e91 100644 --- a/libs/pageserver_api/Cargo.toml +++ b/libs/pageserver_api/Cargo.toml @@ -7,6 +7,7 @@ license.workspace = true [dependencies] serde.workspace = true serde_with.workspace = true +serde_json.workspace = true const_format.workspace = true anyhow.workspace = true bytes.workspace = true @@ -14,6 +15,7 @@ byteorder.workspace = true utils.workspace = true postgres_ffi.workspace = true enum-map.workspace = true -serde_json.workspace = true +strum.workspace = true +strum_macros.workspace = true workspace_hack.workspace = true diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 98a4b56858..a351761f4a 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -7,6 +7,7 @@ use std::{ use byteorder::{BigEndian, ReadBytesExt}; use serde::{Deserialize, Serialize}; use serde_with::{serde_as, DisplayFromStr}; +use strum_macros; use utils::{ history_buffer::HistoryBufferWithDropCounter, id::{NodeId, TenantId, TimelineId}, @@ -18,11 +19,23 @@ use anyhow::bail; use bytes::{BufMut, Bytes, BytesMut}; /// A state of a tenant in pageserver's memory. -#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[derive( + Clone, + PartialEq, + Eq, + serde::Serialize, + serde::Deserialize, + strum_macros::Display, + strum_macros::EnumString, + strum_macros::EnumVariantNames, + strum_macros::AsRefStr, + strum_macros::IntoStaticStr, +)] +#[serde(tag = "slug", content = "data")] pub enum TenantState { - // This tenant is being loaded from local disk + /// This tenant is being loaded from local disk Loading, - // This tenant is being downloaded from cloud storage. + /// This tenant is being downloaded from cloud storage. Attaching, /// Tenant is fully operational Active, @@ -31,15 +44,7 @@ pub enum TenantState { Stopping, /// A tenant is recognized by the pageserver, but can no longer be used for /// any operations, because it failed to be activated. - Broken, -} - -pub mod state { - pub const LOADING: &str = "loading"; - pub const ATTACHING: &str = "attaching"; - pub const ACTIVE: &str = "active"; - pub const STOPPING: &str = "stopping"; - pub const BROKEN: &str = "broken"; + Broken { reason: String, backtrace: String }, } impl TenantState { @@ -49,17 +54,26 @@ impl TenantState { Self::Attaching => true, Self::Active => false, Self::Stopping => false, - Self::Broken => false, + Self::Broken { .. } => false, } } - pub fn as_str(&self) -> &'static str { + pub fn broken_from_reason(reason: String) -> Self { + let backtrace_str: String = format!("{}", std::backtrace::Backtrace::force_capture()); + Self::Broken { + reason, + backtrace: backtrace_str, + } + } +} + +impl std::fmt::Debug for TenantState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - TenantState::Loading => state::LOADING, - TenantState::Attaching => state::ATTACHING, - TenantState::Active => state::ACTIVE, - TenantState::Stopping => state::STOPPING, - TenantState::Broken => state::BROKEN, + Self::Broken { reason, backtrace } if !reason.is_empty() => { + write!(f, "Broken due to: {reason}. Backtrace:\n{backtrace}") + } + _ => write!(f, "{self}"), } } } @@ -615,6 +629,7 @@ impl PagestreamBeMessage { #[cfg(test)] mod tests { use bytes::Buf; + use serde_json::json; use super::*; @@ -665,4 +680,57 @@ mod tests { assert!(msg == reconstructed); } } + + #[test] + fn test_tenantinfo_serde() { + // Test serialization/deserialization of TenantInfo + let original_active = TenantInfo { + id: TenantId::generate(), + state: TenantState::Active, + current_physical_size: Some(42), + has_in_progress_downloads: Some(false), + }; + let expected_active = json!({ + "id": original_active.id.to_string(), + "state": { + "slug": "Active", + }, + "current_physical_size": 42, + "has_in_progress_downloads": false, + }); + + let original_broken = TenantInfo { + id: TenantId::generate(), + state: TenantState::Broken { + reason: "reason".into(), + backtrace: "backtrace info".into(), + }, + current_physical_size: Some(42), + has_in_progress_downloads: Some(false), + }; + let expected_broken = json!({ + "id": original_broken.id.to_string(), + "state": { + "slug": "Broken", + "data": { + "backtrace": "backtrace info", + "reason": "reason", + } + }, + "current_physical_size": 42, + "has_in_progress_downloads": false, + }); + + assert_eq!( + serde_json::to_value(&original_active).unwrap(), + expected_active + ); + + assert_eq!( + serde_json::to_value(&original_broken).unwrap(), + expected_broken + ); + assert!(format!("{:?}", &original_broken.state).contains("reason")); + assert!(format!("{:?}", &original_broken.state).contains("backtrace info")); + } } diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml index 478e9d228a..b0e4e1ca85 100644 --- a/pageserver/src/http/openapi_spec.yml +++ b/pageserver/src/http/openapi_spec.yml @@ -829,12 +829,9 @@ components: type: object required: - id - - state properties: id: type: string - state: - type: string current_physical_size: type: integer has_in_progress_downloads: diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 2db60f557d..e7a86e4822 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -465,7 +465,7 @@ async fn tenant_list_handler(request: Request) -> Result, A .iter() .map(|(id, state)| TenantInfo { id: *id, - state: *state, + state: state.clone(), current_physical_size: None, has_in_progress_downloads: Some(state.has_in_progress_downloads()), }) @@ -490,7 +490,7 @@ async fn tenant_status(request: Request) -> Result, ApiErro let state = tenant.current_state(); Ok(TenantInfo { id: tenant_id, - state, + state: state.clone(), current_physical_size: Some(current_physical_size), has_in_progress_downloads: Some(state.has_in_progress_downloads()), }) @@ -931,7 +931,7 @@ async fn handle_tenant_break(r: Request) -> Result, ApiErro .await .map_err(|_| ApiError::Conflict(String::from("no active tenant found")))?; - tenant.set_broken("broken from test"); + tenant.set_broken("broken from test".to_owned()); json_response(StatusCode::OK, ()) } diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 1f31e5a8fb..dfb38387ea 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -6,7 +6,8 @@ use metrics::{ UIntGauge, UIntGaugeVec, }; use once_cell::sync::Lazy; -use pageserver_api::models::state; +use pageserver_api::models::TenantState; +use strum::VariantNames; use utils::id::{TenantId, TimelineId}; /// Prometheus histogram buckets (in seconds) for operations in the critical @@ -147,15 +148,6 @@ static CURRENT_LOGICAL_SIZE: Lazy = Lazy::new(|| { .expect("failed to define current logical size metric") }); -// Metrics collected on tenant states. -const TENANT_STATE_OPTIONS: &[&str] = &[ - state::LOADING, - state::ATTACHING, - state::ACTIVE, - state::STOPPING, - state::BROKEN, -]; - pub static TENANT_STATE_METRIC: Lazy = Lazy::new(|| { register_uint_gauge_vec!( "pageserver_tenant_states_count", @@ -707,7 +699,7 @@ impl Drop for TimelineMetrics { pub fn remove_tenant_metrics(tenant_id: &TenantId) { let tid = tenant_id.to_string(); let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]); - for state in TENANT_STATE_OPTIONS { + for state in TenantState::VARIANTS { let _ = TENANT_STATE_METRIC.remove_label_values(&[&tid, state]); } } diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 7e88a12963..d98aa5c566 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -622,7 +622,7 @@ impl Tenant { match tenant_clone.attach(ctx).await { Ok(_) => {} Err(e) => { - tenant_clone.set_broken(&e.to_string()); + tenant_clone.set_broken(e.to_string()); error!("error attaching tenant: {:?}", e); } } @@ -830,7 +830,10 @@ impl Tenant { pub fn create_broken_tenant(conf: &'static PageServerConf, tenant_id: TenantId) -> Arc { let wal_redo_manager = Arc::new(PostgresRedoManager::new(conf, tenant_id)); Arc::new(Tenant::new( - TenantState::Broken, + TenantState::Broken { + reason: "create_broken_tenant".into(), + backtrace: String::new(), + }, conf, TenantConfOpt::default(), wal_redo_manager, @@ -891,7 +894,7 @@ impl Tenant { match tenant_clone.load(&ctx).await { Ok(()) => {} Err(err) => { - tenant_clone.set_broken(&err.to_string()); + tenant_clone.set_broken(err.to_string()); error!("could not load tenant {tenant_id}: {err:?}"); } } @@ -1443,7 +1446,7 @@ impl Tenant { } pub fn current_state(&self) -> TenantState { - *self.state.borrow() + self.state.borrow().clone() } pub fn is_active(&self) -> bool { @@ -1454,15 +1457,15 @@ impl Tenant { fn activate(&self, ctx: &RequestContext) -> anyhow::Result<()> { let mut result = Ok(()); self.state.send_modify(|current_state| { - match *current_state { + match &*current_state { TenantState::Active => { // activate() was called on an already Active tenant. Shouldn't happen. result = Err(anyhow::anyhow!("Tenant is already active")); } - TenantState::Broken => { + TenantState::Broken { reason, .. } => { // This shouldn't happen either result = Err(anyhow::anyhow!( - "Could not activate tenant because it is in broken state" + "Could not activate tenant because it is in broken state due to: {reason}", )); } TenantState::Stopping => { @@ -1496,7 +1499,10 @@ impl Tenant { timeline.timeline_id, e ); timeline.set_state(TimelineState::Broken); - *current_state = TenantState::Broken; + *current_state = TenantState::broken_from_reason(format!( + "failed to activate timeline {}: {}", + timeline.timeline_id, e + )); } } } @@ -1509,7 +1515,7 @@ impl Tenant { /// Change tenant status to Stopping, to mark that it is being shut down pub fn set_stopping(&self) { self.state.send_modify(|current_state| { - match *current_state { + match current_state { TenantState::Active | TenantState::Loading | TenantState::Attaching => { *current_state = TenantState::Stopping; @@ -1525,8 +1531,8 @@ impl Tenant { timeline.set_state(TimelineState::Stopping); } } - TenantState::Broken => { - info!("Cannot set tenant to Stopping state, it is already in Broken state"); + TenantState::Broken { reason, .. } => { + info!("Cannot set tenant to Stopping state, it is in Broken state due to: {reason}"); } TenantState::Stopping => { // The tenant was detached, or system shutdown was requested, while we were @@ -1537,7 +1543,7 @@ impl Tenant { }); } - pub fn set_broken(&self, reason: &str) { + pub fn set_broken(&self, reason: String) { self.state.send_modify(|current_state| { match *current_state { TenantState::Active => { @@ -1545,24 +1551,24 @@ impl Tenant { // while loading or attaching a tenant. A tenant that has already been // activated should never be marked as broken. We cope with it the best // we can, but it shouldn't happen. - *current_state = TenantState::Broken; warn!("Changing Active tenant to Broken state, reason: {}", reason); + *current_state = TenantState::broken_from_reason(reason); } - TenantState::Broken => { + TenantState::Broken { .. } => { // This shouldn't happen either warn!("Tenant is already in Broken state"); } TenantState::Stopping => { // This shouldn't happen either - *current_state = TenantState::Broken; warn!( "Marking Stopping tenant as Broken state, reason: {}", reason ); + *current_state = TenantState::broken_from_reason(reason); } TenantState::Loading | TenantState::Attaching => { info!("Setting tenant as Broken state, reason: {}", reason); - *current_state = TenantState::Broken; + *current_state = TenantState::broken_from_reason(reason); } } }); @@ -1575,7 +1581,7 @@ impl Tenant { pub async fn wait_to_become_active(&self) -> anyhow::Result<()> { let mut receiver = self.state.subscribe(); loop { - let current_state = *receiver.borrow_and_update(); + let current_state = receiver.borrow_and_update().clone(); match current_state { TenantState::Loading | TenantState::Attaching => { // in these states, there's a chance that we can reach ::Active @@ -1584,12 +1590,12 @@ impl Tenant { TenantState::Active { .. } => { return Ok(()); } - TenantState::Broken | TenantState::Stopping => { + TenantState::Broken { .. } | TenantState::Stopping => { // There's no chance the tenant can transition back into ::Active anyhow::bail!( "Tenant {} will not become active. Current state: {:?}", self.tenant_id, - current_state, + ¤t_state, ); } } @@ -1770,21 +1776,23 @@ impl Tenant { let (state, mut rx) = watch::channel(state); tokio::spawn(async move { - let current_state = *rx.borrow_and_update(); + let mut current_state: &'static str = From::from(&*rx.borrow_and_update()); let tid = tenant_id.to_string(); TENANT_STATE_METRIC - .with_label_values(&[&tid, current_state.as_str()]) + .with_label_values(&[&tid, current_state]) .inc(); loop { match rx.changed().await { Ok(()) => { - let new_state = *rx.borrow(); + let new_state: &'static str = From::from(&*rx.borrow_and_update()); TENANT_STATE_METRIC - .with_label_values(&[&tid, current_state.as_str()]) + .with_label_values(&[&tid, current_state]) .dec(); TENANT_STATE_METRIC - .with_label_values(&[&tid, new_state.as_str()]) + .with_label_values(&[&tid, new_state]) .inc(); + + current_state = new_state; } Err(_sender_dropped_error) => { info!("Tenant dropped the state updates sender, quitting waiting for tenant state change"); diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 4971186206..754316b3cd 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -537,7 +537,7 @@ where Some(tenant) => match tenant.current_state() { TenantState::Attaching | TenantState::Loading - | TenantState::Broken + | TenantState::Broken { .. } | TenantState::Active => tenant.set_stopping(), TenantState::Stopping => return Err(TenantStateError::IsStopping(tenant_id)), }, @@ -565,7 +565,7 @@ where let tenants_accessor = TENANTS.read().await; match tenants_accessor.get(&tenant_id) { Some(tenant) => { - tenant.set_broken(&e.to_string()); + tenant.set_broken(e.to_string()); } None => { warn!("Tenant {tenant_id} got removed from memory"); diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs index 8aeacc12f5..7e7dbd3c5c 100644 --- a/pageserver/src/tenant/tasks.rs +++ b/pageserver/src/tenant/tasks.rs @@ -209,7 +209,7 @@ async fn wait_for_active_tenant( loop { match tenant_state_updates.changed().await { Ok(()) => { - let new_state = *tenant_state_updates.borrow(); + let new_state = &*tenant_state_updates.borrow(); match new_state { TenantState::Active => { debug!("Tenant state changed to active, continuing the task loop"); diff --git a/test_runner/fixtures/pageserver/utils.py b/test_runner/fixtures/pageserver/utils.py index 65eda5b636..c060fc8dea 100644 --- a/test_runner/fixtures/pageserver/utils.py +++ b/test_runner/fixtures/pageserver/utils.py @@ -1,16 +1,20 @@ import time +from typing import Optional from fixtures.log_helper import log from fixtures.pageserver.http import PageserverHttpClient from fixtures.types import Lsn, TenantId, TimelineId -def assert_tenant_status( - pageserver_http: PageserverHttpClient, tenant: TenantId, expected_status: str +def assert_tenant_state( + pageserver_http: PageserverHttpClient, + tenant: TenantId, + expected_state: str, + message: Optional[str] = None, ): tenant_status = pageserver_http.tenant_status(tenant) log.info(f"tenant_status: {tenant_status}") - assert tenant_status["state"] == expected_status, tenant_status + assert tenant_status["state"]["slug"] == expected_state, message or tenant_status def tenant_exists(pageserver_http: PageserverHttpClient, tenant_id: TenantId): @@ -68,6 +72,7 @@ def wait_until_tenant_state( tenant_id: TenantId, expected_state: str, iterations: int, + period: float = 1.0, ) -> bool: """ Does not use `wait_until` for debugging purposes @@ -76,21 +81,28 @@ def wait_until_tenant_state( try: tenant = pageserver_http.tenant_status(tenant_id=tenant_id) log.debug(f"Tenant {tenant_id} data: {tenant}") - if tenant["state"] == expected_state: + if tenant["state"]["slug"] == expected_state: return True except Exception as e: log.debug(f"Tenant {tenant_id} state retrieval failure: {e}") - time.sleep(1) + time.sleep(period) raise Exception(f"Tenant {tenant_id} did not become {expected_state} in {iterations} seconds") def wait_until_tenant_active( - pageserver_http: PageserverHttpClient, tenant_id: TenantId, iterations: int = 30 + pageserver_http: PageserverHttpClient, + tenant_id: TenantId, + iterations: int = 30, + period: float = 1.0, ): wait_until_tenant_state( - pageserver_http, tenant_id, expected_state="Active", iterations=iterations + pageserver_http, + tenant_id, + expected_state="Active", + iterations=iterations, + period=period, ) diff --git a/test_runner/regress/test_ondemand_download.py b/test_runner/regress/test_ondemand_download.py index 90ab8e68d8..07410b64df 100644 --- a/test_runner/regress/test_ondemand_download.py +++ b/test_runner/regress/test_ondemand_download.py @@ -17,7 +17,7 @@ from fixtures.neon_fixtures import ( ) from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient from fixtures.pageserver.utils import ( - assert_tenant_status, + assert_tenant_state, wait_for_last_record_lsn, wait_for_upload, wait_until_tenant_state, @@ -239,7 +239,7 @@ def test_ondemand_download_timetravel( ##### Second start, restore the data and ensure it's the same env.pageserver.start() - wait_until(10, 0.2, lambda: assert_tenant_status(client, tenant_id, "Active")) + wait_until(10, 0.2, lambda: assert_tenant_state(client, tenant_id, "Active")) # The current_physical_size reports the sum of layers loaded in the layer # map, regardless of where the layer files are located. So even though we @@ -392,7 +392,7 @@ def test_download_remote_layers_api( ] ) - wait_until(10, 0.2, lambda: assert_tenant_status(client, tenant_id, "Active")) + wait_until(10, 0.2, lambda: assert_tenant_state(client, tenant_id, "Active")) ###### Phase 1: exercise download error code path assert ( diff --git a/test_runner/regress/test_pageserver_restart.py b/test_runner/regress/test_pageserver_restart.py index 453ddec0d4..77db729880 100644 --- a/test_runner/regress/test_pageserver_restart.py +++ b/test_runner/regress/test_pageserver_restart.py @@ -59,7 +59,7 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder): client = env.pageserver.http_client() tenant_status = client.tenant_status(env.initial_tenant) log.info("Tenant status : %s", tenant_status) - assert tenant_status["state"] == "Loading" + assert tenant_status["state"]["slug"] == "Loading" # Try to read. This waits until the loading finishes, and then return normally. cur.execute("SELECT count(*) FROM foo") diff --git a/test_runner/regress/test_tenant_conf.py b/test_runner/regress/test_tenant_conf.py index 67aba227e5..80d4b99504 100644 --- a/test_runner/regress/test_tenant_conf.py +++ b/test_runner/regress/test_tenant_conf.py @@ -7,7 +7,7 @@ from fixtures.neon_fixtures import ( NeonEnvBuilder, RemoteStorageKind, ) -from fixtures.pageserver.utils import assert_tenant_status, wait_for_upload +from fixtures.pageserver.utils import assert_tenant_state, wait_for_upload from fixtures.types import Lsn from fixtures.utils import wait_until @@ -278,7 +278,7 @@ def test_creating_tenant_conf_after_attach(neon_env_builder: NeonEnvBuilder): wait_until( number_of_iterations=5, interval=1, - func=lambda: assert_tenant_status(http_client, tenant_id, "Active"), + func=lambda: assert_tenant_state(http_client, tenant_id, "Active"), ) env.neon_cli.config_tenant(tenant_id, {"gc_horizon": "1000000"}) diff --git a/test_runner/regress/test_tenant_relocation.py b/test_runner/regress/test_tenant_relocation.py index 8ad4bd1c11..3569ab0c53 100644 --- a/test_runner/regress/test_tenant_relocation.py +++ b/test_runner/regress/test_tenant_relocation.py @@ -15,7 +15,7 @@ from fixtures.neon_fixtures import ( ) from fixtures.pageserver.http import PageserverHttpClient from fixtures.pageserver.utils import ( - assert_tenant_status, + assert_tenant_state, tenant_exists, wait_for_last_record_lsn, wait_for_upload, @@ -416,11 +416,11 @@ def test_tenant_relocation( # wait for tenant to finish attaching tenant_status = new_pageserver_http.tenant_status(tenant_id=tenant_id) - assert tenant_status["state"] in ["Attaching", "Active"] + assert tenant_status["state"]["slug"] in ["Attaching", "Active"] wait_until( number_of_iterations=10, interval=1, - func=lambda: assert_tenant_status(new_pageserver_http, tenant_id, "Active"), + func=lambda: assert_tenant_state(new_pageserver_http, tenant_id, "Active"), ) check_timeline_attached( diff --git a/test_runner/regress/test_tenant_tasks.py b/test_runner/regress/test_tenant_tasks.py index 24b211e368..8c89100745 100644 --- a/test_runner/regress/test_tenant_tasks.py +++ b/test_runner/regress/test_tenant_tasks.py @@ -1,5 +1,6 @@ from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnvBuilder +from fixtures.pageserver.utils import assert_tenant_state, wait_until_tenant_active from fixtures.types import TenantId, TimelineId from fixtures.utils import wait_until @@ -25,16 +26,16 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder): for t in timelines: client.timeline_delete(tenant, t) - def assert_active(tenant): - assert get_state(tenant) == "Active" - # Create tenant, start compute tenant, _ = env.neon_cli.create_tenant() env.neon_cli.create_timeline(name, tenant_id=tenant) pg = env.postgres.create_start(name, tenant_id=tenant) - assert ( - get_state(tenant) == "Active" - ), "Pageserver should activate a tenant and start background jobs if timelines are loaded" + assert_tenant_state( + client, + tenant, + expected_state="Active", + message="Pageserver should activate a tenant and start background jobs if timelines are loaded", + ) # Stop compute pg.stop() @@ -47,7 +48,7 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder): for tenant_info in client.tenant_list(): tenant_id = TenantId(tenant_info["id"]) delete_all_timelines(tenant_id) - wait_until(10, 0.2, lambda: assert_active(tenant_id)) + wait_until_tenant_active(client, tenant_id, iterations=10, period=0.2) # Assert that all tasks finish quickly after tenant is detached task_starts = client.get_metric_value("pageserver_tenant_task_events_total", {"event": "start"}) diff --git a/test_runner/regress/test_tenants.py b/test_runner/regress/test_tenants.py index 8021bf9914..2162520217 100644 --- a/test_runner/regress/test_tenants.py +++ b/test_runner/regress/test_tenants.py @@ -332,24 +332,24 @@ def test_pageserver_with_empty_tenants( [broken_tenant] = [t for t in tenants if t["id"] == str(tenant_without_timelines_dir)] assert ( - broken_tenant["state"] == "Broken" + broken_tenant["state"]["slug"] == "Broken" ), f"Tenant {tenant_without_timelines_dir} without timelines dir should be broken" broken_tenant_status = client.tenant_status(tenant_without_timelines_dir) assert ( - broken_tenant_status["state"] == "Broken" + broken_tenant_status["state"]["slug"] == "Broken" ), f"Tenant {tenant_without_timelines_dir} without timelines dir should be broken" assert env.pageserver.log_contains(".*Setting tenant as Broken state, reason:.*") [loaded_tenant] = [t for t in tenants if t["id"] == str(tenant_with_empty_timelines_dir)] assert ( - loaded_tenant["state"] == "Active" + loaded_tenant["state"]["slug"] == "Active" ), "Tenant {tenant_with_empty_timelines_dir} with empty timelines dir should be active and ready for timeline creation" loaded_tenant_status = client.tenant_status(tenant_with_empty_timelines_dir) assert ( - loaded_tenant_status["state"] == "Active" + loaded_tenant_status["state"]["slug"] == "Active" ), f"Tenant {tenant_with_empty_timelines_dir} without timelines dir should be active" time.sleep(1) # to allow metrics propagation @@ -357,11 +357,11 @@ def test_pageserver_with_empty_tenants( ps_metrics = client.get_metrics() broken_tenants_metric_filter = { "tenant_id": str(tenant_without_timelines_dir), - "state": "broken", + "state": "Broken", } active_tenants_metric_filter = { "tenant_id": str(tenant_with_empty_timelines_dir), - "state": "active", + "state": "Active", } tenant_active_count = int( diff --git a/test_runner/regress/test_tenants_with_remote_storage.py b/test_runner/regress/test_tenants_with_remote_storage.py index ec1c12a0d8..514e2b6fa0 100644 --- a/test_runner/regress/test_tenants_with_remote_storage.py +++ b/test_runner/regress/test_tenants_with_remote_storage.py @@ -24,7 +24,7 @@ from fixtures.neon_fixtures import ( wait_for_sk_commit_lsn_to_reach_remote_storage, ) from fixtures.pageserver.utils import ( - assert_tenant_status, + assert_tenant_state, wait_for_last_record_lsn, wait_for_upload, ) @@ -202,7 +202,7 @@ def test_tenants_attached_after_download( wait_until( number_of_iterations=5, interval=1, - func=lambda: assert_tenant_status(client, tenant_id, "Active"), + func=lambda: assert_tenant_state(client, tenant_id, "Active"), ) restored_timelines = client.timeline_list(tenant_id) @@ -286,7 +286,7 @@ def test_tenant_redownloads_truncated_file_on_startup( wait_until( number_of_iterations=5, interval=1, - func=lambda: assert_tenant_status(client, tenant_id, "Active"), + func=lambda: assert_tenant_state(client, tenant_id, "Active"), ) restored_timelines = client.timeline_list(tenant_id) diff --git a/test_runner/regress/test_timeline_size.py b/test_runner/regress/test_timeline_size.py index 7c77e1fe59..28da3c5a48 100644 --- a/test_runner/regress/test_timeline_size.py +++ b/test_runner/regress/test_timeline_size.py @@ -23,7 +23,7 @@ from fixtures.neon_fixtures import ( ) from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient from fixtures.pageserver.utils import ( - assert_tenant_status, + assert_tenant_state, wait_for_upload_queue_empty, wait_until_tenant_active, ) @@ -333,7 +333,7 @@ def test_timeline_physical_size_init( wait_until( number_of_iterations=5, interval=1, - func=lambda: assert_tenant_status(client, env.initial_tenant, "Active"), + func=lambda: assert_tenant_state(client, env.initial_tenant, "Active"), ) assert_physical_size_invariants( diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py index 306c492e8f..c24c77bb95 100644 --- a/test_runner/regress/test_wal_acceptor.py +++ b/test_runner/regress/test_wal_acceptor.py @@ -587,7 +587,7 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re raise RuntimeError("Timed out waiting for WAL redo") tenant_status = ps_cli.tenant_status(tenant_id) - if tenant_status["state"] == "Loading": + if tenant_status["state"]["slug"] == "Loading": log.debug(f"Tenant {tenant_id} is still loading, retrying") else: pageserver_lsn = Lsn( From c237a2f5fb12c3940543f49dd4b9ba740744bf3b Mon Sep 17 00:00:00 2001 From: Vadim Kharitonov Date: Wed, 12 Apr 2023 16:24:34 +0300 Subject: [PATCH 18/77] Compile `pg_hint_plan extension` --- Dockerfile.compute-node | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/Dockerfile.compute-node b/Dockerfile.compute-node index 7c64951fa5..5a223ae432 100644 --- a/Dockerfile.compute-node +++ b/Dockerfile.compute-node @@ -337,6 +337,35 @@ RUN apt-get update && \ make install -j $(getconf _NPROCESSORS_ONLN) && \ echo "trusted = true" >> /usr/local/pgsql/share/extension/timescaledb.control +######################################################################################### +# +# Layer "pg-hint-plan-pg-build" +# compile pg_hint_plan extension +# +######################################################################################### +FROM build-deps AS pg-hint-plan-pg-build +COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ + +ARG PG_VERSION +ENV PATH "/usr/local/pgsql/bin:$PATH" + +RUN case "${PG_VERSION}" in \ + "v14") \ + export PG_HINT_PLAN_VERSION=14_1_4_1 \ + ;; \ + "v15") \ + export PG_HINT_PLAN_VERSION=15_1_5_0 \ + ;; \ + *) \ + echo "Export the valid PG_HINT_PLAN_VERSION variable" && exit 1 \ + ;; \ + esac && \ + wget https://github.com/ossc-db/pg_hint_plan/archive/refs/tags/REL${PG_HINT_PLAN_VERSION}.tar.gz -O pg_hint_plan.tar.gz && \ + mkdir pg_hint_plan-src && cd pg_hint_plan-src && tar xvzf ../pg_hint_plan.tar.gz --strip-components=1 -C . && \ + make -j $(getconf _NPROCESSORS_ONLN) && \ + make install -j $(getconf _NPROCESSORS_ONLN) && \ + echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_hint_plan.control + ######################################################################################### # # Layer "rust extensions" @@ -443,6 +472,7 @@ COPY --from=prefix-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=hll-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=plpgsql-check-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=timescaledb-pg-build /usr/local/pgsql/ /usr/local/pgsql/ +COPY --from=pg-hint-plan-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY pgxn/ pgxn/ RUN make -j $(getconf _NPROCESSORS_ONLN) \ From 356439aa333595021ca00bb42840e91233e3d54d Mon Sep 17 00:00:00 2001 From: Vadim Kharitonov Date: Thu, 13 Apr 2023 13:13:24 +0300 Subject: [PATCH 19/77] Add note about `manual_release_instructions` label (#4015) ## Describe your changes Do not forget to process required manual stuff after release ## Issue ticket number and link ## Checklist before requesting a review - [ ] I have performed a self-review of my code. - [ ] If it is a core feature, I have added thorough tests. - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard? - [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section. ## Checklist before merging - [ ] Do not forget to reformat commit message to not include the above checklist --------- Co-authored-by: Dmitry Rodionov --- .github/PULL_REQUEST_TEMPLATE/release-pr.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/PULL_REQUEST_TEMPLATE/release-pr.md b/.github/PULL_REQUEST_TEMPLATE/release-pr.md index a848077e6a..1e18fd5d44 100644 --- a/.github/PULL_REQUEST_TEMPLATE/release-pr.md +++ b/.github/PULL_REQUEST_TEMPLATE/release-pr.md @@ -10,6 +10,7 @@ ### Checklist after release +- [ ] Make sure instructions from PRs included in this release and labeled `manual_release_instructions` are executed (either by you or by people who wrote them). - [ ] Based on the merged commits write release notes and open a PR into `website` repo ([example](https://github.com/neondatabase/website/pull/219/files)) - [ ] Check [#dev-production-stream](https://neondb.slack.com/archives/C03F5SM1N02) Slack channel - [ ] Check [stuck projects page](https://console.neon.tech/admin/projects?sort=last_active&order=desc&stuck=true) From 53f438a8a879ed9b72642bd0ee37a4c45ce94927 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 13 Apr 2023 13:45:43 +0300 Subject: [PATCH 20/77] Rename "Postgres nodes" in control_plane to endpoints. We use the term "endpoint" in for compute Postgres nodes in the web UI and user-facing documentation now. Adjust the nomenclature in the code. This changes the name of the "neon_local pg" command to "neon_local endpoint". Also adjust names of classes, variables etc. in the python tests accordingly. This also changes the directory structure so that endpoints are now stored in: .neon/endpoints/ instead of: .neon/pgdatadirs/tenants// The tenant ID is no longer part of the path. That means that you cannot have two endpoints with the same name/ID in two different tenants anymore. That's consistent with how we treat endpoints in the real control plane and proxy: the endpoint ID must be globally unique. --- README.md | 22 +- control_plane/src/bin/neon_local.rs | 132 +++++++----- control_plane/src/{compute.rs => endpoint.rs} | 108 +++++----- control_plane/src/lib.rs | 2 +- control_plane/src/local_env.rs | 12 +- test_runner/fixtures/compare_fixtures.py | 2 +- test_runner/fixtures/neon_fixtures.py | 201 ++++++++++-------- .../performance/test_branch_creation.py | 18 +- test_runner/performance/test_branching.py | 32 +-- .../performance/test_bulk_tenant_create.py | 4 +- test_runner/performance/test_bulk_update.py | 12 +- test_runner/performance/test_compaction.py | 6 +- test_runner/performance/test_latency.py | 4 +- test_runner/performance/test_layer_map.py | 4 +- test_runner/performance/test_startup.py | 24 +-- test_runner/regress/test_ancestor_branch.py | 12 +- test_runner/regress/test_auth.py | 6 +- test_runner/regress/test_backpressure.py | 14 +- test_runner/regress/test_basebackup_error.py | 2 +- test_runner/regress/test_branch_and_gc.py | 12 +- test_runner/regress/test_branch_behind.py | 16 +- test_runner/regress/test_branching.py | 34 +-- test_runner/regress/test_broken_timeline.py | 12 +- test_runner/regress/test_clog_truncate.py | 14 +- test_runner/regress/test_close_fds.py | 4 +- test_runner/regress/test_compatibility.py | 59 +++-- test_runner/regress/test_compute_ctl.py | 13 +- test_runner/regress/test_config.py | 4 +- test_runner/regress/test_crafted_wal_end.py | 14 +- test_runner/regress/test_createdropdb.py | 30 +-- test_runner/regress/test_createuser.py | 8 +- .../regress/test_disk_usage_eviction.py | 10 +- test_runner/regress/test_fsm_truncate.py | 4 +- test_runner/regress/test_fullbackup.py | 4 +- test_runner/regress/test_gc_aggressive.py | 22 +- test_runner/regress/test_gc_cutoff.py | 4 +- test_runner/regress/test_import.py | 32 +-- test_runner/regress/test_large_schema.py | 16 +- test_runner/regress/test_layer_eviction.py | 22 +- .../regress/test_layer_writers_fail.py | 6 +- test_runner/regress/test_lsn_mapping.py | 14 +- test_runner/regress/test_metric_collection.py | 6 +- test_runner/regress/test_multixact.py | 12 +- test_runner/regress/test_neon_local_cli.py | 6 +- test_runner/regress/test_next_xid.py | 12 +- test_runner/regress/test_normal_work.py | 12 +- test_runner/regress/test_old_request_lsn.py | 4 +- test_runner/regress/test_ondemand_download.py | 70 +++--- test_runner/regress/test_pageserver_api.py | 4 +- .../regress/test_pageserver_catchup.py | 8 +- .../regress/test_pageserver_restart.py | 12 +- ...test_pageserver_restarts_under_workload.py | 9 +- test_runner/regress/test_parallel_copy.py | 16 +- test_runner/regress/test_pg_regress.py | 42 ++-- test_runner/regress/test_pitr_gc.py | 8 +- test_runner/regress/test_read_trace.py | 12 +- test_runner/regress/test_read_validation.py | 8 +- test_runner/regress/test_readonly_node.py | 44 ++-- test_runner/regress/test_recovery.py | 6 +- test_runner/regress/test_remote_storage.py | 56 ++--- test_runner/regress/test_subxacts.py | 6 +- test_runner/regress/test_tenant_conf.py | 6 +- test_runner/regress/test_tenant_detach.py | 88 ++++---- test_runner/regress/test_tenant_relocation.py | 50 ++--- test_runner/regress/test_tenant_size.py | 122 +++++------ test_runner/regress/test_tenant_tasks.py | 4 +- test_runner/regress/test_tenants.py | 30 +-- .../test_tenants_with_remote_storage.py | 48 ++--- test_runner/regress/test_timeline_size.py | 80 +++---- test_runner/regress/test_truncate.py | 4 +- test_runner/regress/test_twophase.py | 18 +- test_runner/regress/test_unlogged.py | 12 +- test_runner/regress/test_vm_bits.py | 10 +- test_runner/regress/test_wal_acceptor.py | 164 +++++++------- .../regress/test_wal_acceptor_async.py | 80 +++---- test_runner/regress/test_wal_restore.py | 6 +- .../test_walredo_not_left_behind_on_detach.py | 4 +- test_runner/test_broken.py | 2 +- 78 files changed, 1061 insertions(+), 991 deletions(-) rename control_plane/src/{compute.rs => endpoint.rs} (88%) diff --git a/README.md b/README.md index 55df67f6c7..810937aff7 100644 --- a/README.md +++ b/README.md @@ -147,15 +147,15 @@ Created an initial timeline 'de200bd42b49cc1814412c7e592dd6e9' at Lsn 0/16B5A50 Setting tenant 9ef87a5bf0d92544f6fafeeb3239695c as a default one # start postgres compute node -> ./target/debug/neon_local pg start main -Starting new postgres (v14) main on timeline de200bd42b49cc1814412c7e592dd6e9 ... +> ./target/debug/neon_local endpoint start main +Starting new endpoint main (PostgreSQL v14) on timeline de200bd42b49cc1814412c7e592dd6e9 ... Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432 -Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres' +Starting postgres at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres' # check list of running postgres instances -> ./target/debug/neon_local pg list - NODE ADDRESS TIMELINE BRANCH NAME LSN STATUS - main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16B5BA8 running +> ./target/debug/neon_local endpoint list + ENDPOINT ADDRESS TIMELINE BRANCH NAME LSN STATUS + main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16B5BA8 running ``` 2. Now, it is possible to connect to postgres and run some queries: @@ -184,14 +184,14 @@ Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant: (L) ┗━ @0/16F9A00: migration_check [b3b863fa45fa9e57e615f9f2d944e601] # start postgres on that branch -> ./target/debug/neon_local pg start migration_check --branch-name migration_check -Starting new postgres migration_check on timeline b3b863fa45fa9e57e615f9f2d944e601 ... +> ./target/debug/neon_local endpoint start migration_check --branch-name migration_check +Starting new endpoint migration_check (PostgreSQL v14) on timeline b3b863fa45fa9e57e615f9f2d944e601 ... Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433 -Starting postgres node at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres' +Starting postgres at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres' # check the new list of running postgres instances -> ./target/debug/neon_local pg list - NODE ADDRESS TIMELINE BRANCH NAME LSN STATUS +> ./target/debug/neon_local endpoint list + ENDPOINT ADDRESS TIMELINE BRANCH NAME LSN STATUS main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16F9A38 running migration_check 127.0.0.1:55433 b3b863fa45fa9e57e615f9f2d944e601 migration_check 0/16F9A70 running diff --git a/control_plane/src/bin/neon_local.rs b/control_plane/src/bin/neon_local.rs index a9b66f479a..665cad8783 100644 --- a/control_plane/src/bin/neon_local.rs +++ b/control_plane/src/bin/neon_local.rs @@ -7,7 +7,7 @@ //! use anyhow::{anyhow, bail, Context, Result}; use clap::{value_parser, Arg, ArgAction, ArgMatches, Command}; -use control_plane::compute::ComputeControlPlane; +use control_plane::endpoint::ComputeControlPlane; use control_plane::local_env::LocalEnv; use control_plane::pageserver::PageServerNode; use control_plane::safekeeper::SafekeeperNode; @@ -106,8 +106,9 @@ fn main() -> Result<()> { "start" => handle_start_all(sub_args, &env), "stop" => handle_stop_all(sub_args, &env), "pageserver" => handle_pageserver(sub_args, &env), - "pg" => handle_pg(sub_args, &env), "safekeeper" => handle_safekeeper(sub_args, &env), + "endpoint" => handle_endpoint(sub_args, &env), + "pg" => bail!("'pg' subcommand has been renamed to 'endpoint'"), _ => bail!("unexpected subcommand {sub_name}"), }; @@ -470,10 +471,10 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) - let mut cplane = ComputeControlPlane::load(env.clone())?; println!("Importing timeline into pageserver ..."); pageserver.timeline_import(tenant_id, timeline_id, base, pg_wal, pg_version)?; - println!("Creating node for imported timeline ..."); env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?; - cplane.new_node(tenant_id, name, timeline_id, None, None, pg_version)?; + println!("Creating endpoint for imported timeline ..."); + cplane.new_endpoint(tenant_id, name, timeline_id, None, None, pg_version)?; println!("Done"); } Some(("branch", branch_match)) => { @@ -521,10 +522,10 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) - Ok(()) } -fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { - let (sub_name, sub_args) = match pg_match.subcommand() { - Some(pg_subcommand_data) => pg_subcommand_data, - None => bail!("no pg subcommand provided"), +fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { + let (sub_name, sub_args) = match ep_match.subcommand() { + Some(ep_subcommand_data) => ep_subcommand_data, + None => bail!("no endpoint subcommand provided"), }; let mut cplane = ComputeControlPlane::load(env.clone())?; @@ -546,7 +547,7 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { table.load_preset(comfy_table::presets::NOTHING); table.set_header([ - "NODE", + "ENDPOINT", "ADDRESS", "TIMELINE", "BRANCH NAME", @@ -554,39 +555,39 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { "STATUS", ]); - for ((_, node_name), node) in cplane - .nodes + for (endpoint_id, endpoint) in cplane + .endpoints .iter() - .filter(|((node_tenant_id, _), _)| node_tenant_id == &tenant_id) + .filter(|(_, endpoint)| endpoint.tenant_id == tenant_id) { - let lsn_str = match node.lsn { + let lsn_str = match endpoint.lsn { None => { - // -> primary node + // -> primary endpoint // Use the LSN at the end of the timeline. timeline_infos - .get(&node.timeline_id) + .get(&endpoint.timeline_id) .map(|bi| bi.last_record_lsn.to_string()) .unwrap_or_else(|| "?".to_string()) } Some(lsn) => { - // -> read-only node - // Use the node's LSN. + // -> read-only endpoint + // Use the endpoint's LSN. lsn.to_string() } }; let branch_name = timeline_name_mappings - .get(&TenantTimelineId::new(tenant_id, node.timeline_id)) + .get(&TenantTimelineId::new(tenant_id, endpoint.timeline_id)) .map(|name| name.as_str()) .unwrap_or("?"); table.add_row([ - node_name.as_str(), - &node.address.to_string(), - &node.timeline_id.to_string(), + endpoint_id.as_str(), + &endpoint.address.to_string(), + &endpoint.timeline_id.to_string(), branch_name, lsn_str.as_str(), - node.status(), + endpoint.status(), ]); } @@ -597,10 +598,10 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { .get_one::("branch-name") .map(|s| s.as_str()) .unwrap_or(DEFAULT_BRANCH_NAME); - let node_name = sub_args - .get_one::("node") - .map(|node_name| node_name.to_string()) - .unwrap_or_else(|| format!("{branch_name}_node")); + let endpoint_id = sub_args + .get_one::("endpoint_id") + .map(String::to_string) + .unwrap_or_else(|| format!("ep-{branch_name}")); let lsn = sub_args .get_one::("lsn") @@ -618,15 +619,15 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { .copied() .context("Failed to parse postgres version from the argument string")?; - cplane.new_node(tenant_id, &node_name, timeline_id, lsn, port, pg_version)?; + cplane.new_endpoint(tenant_id, &endpoint_id, timeline_id, lsn, port, pg_version)?; } "start" => { let port: Option = sub_args.get_one::("port").copied(); - let node_name = sub_args - .get_one::("node") - .ok_or_else(|| anyhow!("No node name was provided to start"))?; + let endpoint_id = sub_args + .get_one::("endpoint_id") + .ok_or_else(|| anyhow!("No endpoint ID was provided to start"))?; - let node = cplane.nodes.get(&(tenant_id, node_name.to_string())); + let endpoint = cplane.endpoints.get(endpoint_id.as_str()); let auth_token = if matches!(env.pageserver.pg_auth_type, AuthType::NeonJWT) { let claims = Claims::new(Some(tenant_id), Scope::Tenant); @@ -636,9 +637,9 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { None }; - if let Some(node) = node { - println!("Starting existing postgres {node_name}..."); - node.start(&auth_token)?; + if let Some(endpoint) = endpoint { + println!("Starting existing endpoint {endpoint_id}..."); + endpoint.start(&auth_token)?; } else { let branch_name = sub_args .get_one::("branch-name") @@ -663,27 +664,33 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { // start --port X // stop // start <-- will also use port X even without explicit port argument - println!("Starting new postgres (v{pg_version}) {node_name} on timeline {timeline_id} ..."); + println!("Starting new endpoint {endpoint_id} (PostgreSQL v{pg_version}) on timeline {timeline_id} ..."); - let node = - cplane.new_node(tenant_id, node_name, timeline_id, lsn, port, pg_version)?; - node.start(&auth_token)?; + let ep = cplane.new_endpoint( + tenant_id, + endpoint_id, + timeline_id, + lsn, + port, + pg_version, + )?; + ep.start(&auth_token)?; } } "stop" => { - let node_name = sub_args - .get_one::("node") - .ok_or_else(|| anyhow!("No node name was provided to stop"))?; + let endpoint_id = sub_args + .get_one::("endpoint_id") + .ok_or_else(|| anyhow!("No endpoint ID was provided to stop"))?; let destroy = sub_args.get_flag("destroy"); - let node = cplane - .nodes - .get(&(tenant_id, node_name.to_string())) - .with_context(|| format!("postgres {node_name} is not found"))?; - node.stop(destroy)?; + let endpoint = cplane + .endpoints + .get(endpoint_id.as_str()) + .with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?; + endpoint.stop(destroy)?; } - _ => bail!("Unexpected pg subcommand '{sub_name}'"), + _ => bail!("Unexpected endpoint subcommand '{sub_name}'"), } Ok(()) @@ -802,7 +809,7 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul } fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<()> { - // Postgres nodes are not started automatically + // Endpoints are not started automatically broker::start_broker_process(env)?; @@ -836,10 +843,10 @@ fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result< fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) { let pageserver = PageServerNode::from_env(env); - // Stop all compute nodes + // Stop all endpoints match ComputeControlPlane::load(env.clone()) { Ok(cplane) => { - for (_k, node) in cplane.nodes { + for (_k, node) in cplane.endpoints { if let Err(e) = node.stop(false) { eprintln!("postgres stop failed: {e:#}"); } @@ -872,7 +879,9 @@ fn cli() -> Command { .help("Name of the branch to be created or used as an alias for other services") .required(false); - let pg_node_arg = Arg::new("node").help("Postgres node name").required(false); + let endpoint_id_arg = Arg::new("endpoint_id") + .help("Postgres endpoint id") + .required(false); let safekeeper_id_arg = Arg::new("id").help("safekeeper id").required(false); @@ -1026,27 +1035,27 @@ fn cli() -> Command { ) ) .subcommand( - Command::new("pg") + Command::new("endpoint") .arg_required_else_help(true) .about("Manage postgres instances") .subcommand(Command::new("list").arg(tenant_id_arg.clone())) .subcommand(Command::new("create") - .about("Create a postgres compute node") - .arg(pg_node_arg.clone()) + .about("Create a compute endpoint") + .arg(endpoint_id_arg.clone()) .arg(branch_name_arg.clone()) .arg(tenant_id_arg.clone()) .arg(lsn_arg.clone()) .arg(port_arg.clone()) .arg( Arg::new("config-only") - .help("Don't do basebackup, create compute node with only config files") + .help("Don't do basebackup, create endpoint directory with only config files") .long("config-only") .required(false)) .arg(pg_version_arg.clone()) ) .subcommand(Command::new("start") - .about("Start a postgres compute node.\n This command actually creates new node from scratch, but preserves existing config files") - .arg(pg_node_arg.clone()) + .about("Start postgres.\n If the endpoint doesn't exist yet, it is created.") + .arg(endpoint_id_arg.clone()) .arg(tenant_id_arg.clone()) .arg(branch_name_arg) .arg(timeline_id_arg) @@ -1056,7 +1065,7 @@ fn cli() -> Command { ) .subcommand( Command::new("stop") - .arg(pg_node_arg) + .arg(endpoint_id_arg) .arg(tenant_id_arg) .arg( Arg::new("destroy") @@ -1068,6 +1077,13 @@ fn cli() -> Command { ) ) + // Obsolete old name for 'endpoint'. We now just print an error if it's used. + .subcommand( + Command::new("pg") + .hide(true) + .arg(Arg::new("ignore-rest").allow_hyphen_values(true).num_args(0..).required(false)) + .trailing_var_arg(true) + ) .subcommand( Command::new("start") .about("Start page server and safekeepers") diff --git a/control_plane/src/compute.rs b/control_plane/src/endpoint.rs similarity index 88% rename from control_plane/src/compute.rs rename to control_plane/src/endpoint.rs index bc81107706..9e85138e68 100644 --- a/control_plane/src/compute.rs +++ b/control_plane/src/endpoint.rs @@ -25,54 +25,45 @@ use crate::postgresql_conf::PostgresConf; // pub struct ComputeControlPlane { base_port: u16, - pageserver: Arc, - pub nodes: BTreeMap<(TenantId, String), Arc>, + + // endpoint ID is the key + pub endpoints: BTreeMap>, + env: LocalEnv, + pageserver: Arc, } impl ComputeControlPlane { - // Load current nodes with ports from data directories on disk - // Directory structure has the following layout: - // pgdatadirs - // |- tenants - // | |- - // | | |- + // Load current endpoints from the endpoints/ subdirectories pub fn load(env: LocalEnv) -> Result { let pageserver = Arc::new(PageServerNode::from_env(&env)); - let mut nodes = BTreeMap::default(); - let pgdatadirspath = &env.pg_data_dirs_path(); - - for tenant_dir in fs::read_dir(pgdatadirspath) - .with_context(|| format!("failed to list {}", pgdatadirspath.display()))? + let mut endpoints = BTreeMap::default(); + for endpoint_dir in fs::read_dir(env.endpoints_path()) + .with_context(|| format!("failed to list {}", env.endpoints_path().display()))? { - let tenant_dir = tenant_dir?; - for timeline_dir in fs::read_dir(tenant_dir.path()) - .with_context(|| format!("failed to list {}", tenant_dir.path().display()))? - { - let node = PostgresNode::from_dir_entry(timeline_dir?, &env, &pageserver)?; - nodes.insert((node.tenant_id, node.name.clone()), Arc::new(node)); - } + let ep = Endpoint::from_dir_entry(endpoint_dir?, &env, &pageserver)?; + endpoints.insert(ep.name.clone(), Arc::new(ep)); } Ok(ComputeControlPlane { base_port: 55431, - pageserver, - nodes, + endpoints, env, + pageserver, }) } fn get_port(&mut self) -> u16 { 1 + self - .nodes + .endpoints .values() - .map(|node| node.address.port()) + .map(|ep| ep.address.port()) .max() .unwrap_or(self.base_port) } - pub fn new_node( + pub fn new_endpoint( &mut self, tenant_id: TenantId, name: &str, @@ -80,9 +71,9 @@ impl ComputeControlPlane { lsn: Option, port: Option, pg_version: u32, - ) -> Result> { + ) -> Result> { let port = port.unwrap_or_else(|| self.get_port()); - let node = Arc::new(PostgresNode { + let ep = Arc::new(Endpoint { name: name.to_owned(), address: SocketAddr::new("127.0.0.1".parse().unwrap(), port), env: self.env.clone(), @@ -93,39 +84,45 @@ impl ComputeControlPlane { pg_version, }); - node.create_pgdata()?; - node.setup_pg_conf()?; + ep.create_pgdata()?; + ep.setup_pg_conf()?; - self.nodes - .insert((tenant_id, node.name.clone()), Arc::clone(&node)); + self.endpoints.insert(ep.name.clone(), Arc::clone(&ep)); - Ok(node) + Ok(ep) } } /////////////////////////////////////////////////////////////////////////////// #[derive(Debug)] -pub struct PostgresNode { - pub address: SocketAddr, +pub struct Endpoint { + /// used as the directory name name: String, + pub tenant_id: TenantId, + pub timeline_id: TimelineId, + // Some(lsn) if this is a read-only endpoint anchored at 'lsn'. None for the primary. + pub lsn: Option, + + // port and address of the Postgres server + pub address: SocketAddr, + pg_version: u32, + + // These are not part of the endpoint as such, but the environment + // the endpoint runs in. pub env: LocalEnv, pageserver: Arc, - pub timeline_id: TimelineId, - pub lsn: Option, // if it's a read-only node. None for primary - pub tenant_id: TenantId, - pg_version: u32, } -impl PostgresNode { +impl Endpoint { fn from_dir_entry( entry: std::fs::DirEntry, env: &LocalEnv, pageserver: &Arc, - ) -> Result { + ) -> Result { if !entry.file_type()?.is_dir() { anyhow::bail!( - "PostgresNode::from_dir_entry failed: '{}' is not a directory", + "Endpoint::from_dir_entry failed: '{}' is not a directory", entry.path().display() ); } @@ -135,7 +132,7 @@ impl PostgresNode { let name = fname.to_str().unwrap().to_string(); // Read config file into memory - let cfg_path = entry.path().join("postgresql.conf"); + let cfg_path = entry.path().join("pgdata").join("postgresql.conf"); let cfg_path_str = cfg_path.to_string_lossy(); let mut conf_file = File::open(&cfg_path) .with_context(|| format!("failed to open config file in {}", cfg_path_str))?; @@ -161,7 +158,7 @@ impl PostgresNode { conf.parse_field_optional("recovery_target_lsn", &context)?; // ok now - Ok(PostgresNode { + Ok(Endpoint { address: SocketAddr::new("127.0.0.1".parse().unwrap(), port), name, env: env.clone(), @@ -269,7 +266,7 @@ impl PostgresNode { } // Write postgresql.conf with default configuration - // and PG_VERSION file to the data directory of a new node. + // and PG_VERSION file to the data directory of a new endpoint. fn setup_pg_conf(&self) -> Result<()> { let mut conf = PostgresConf::new(); conf.append("max_wal_senders", "10"); @@ -289,7 +286,7 @@ impl PostgresNode { // walproposer panics when basebackup is invalid, it is pointless to restart in this case. conf.append("restart_after_crash", "off"); - // Configure the node to fetch pages from pageserver + // Configure the Neon Postgres extension to fetch pages from pageserver let pageserver_connstr = { let config = &self.pageserver.pg_connection_config; let (host, port) = (config.host(), config.port()); @@ -325,7 +322,7 @@ impl PostgresNode { conf.append("max_replication_flush_lag", "10GB"); if !self.env.safekeepers.is_empty() { - // Configure the node to connect to the safekeepers + // Configure Postgres to connect to the safekeepers conf.append("synchronous_standby_names", "walproposer"); let safekeepers = self @@ -380,8 +377,12 @@ impl PostgresNode { Ok(()) } + pub fn endpoint_path(&self) -> PathBuf { + self.env.endpoints_path().join(&self.name) + } + pub fn pgdata(&self) -> PathBuf { - self.env.pg_data_dir(&self.tenant_id, &self.name) + self.endpoint_path().join("pgdata") } pub fn status(&self) -> &str { @@ -443,12 +444,11 @@ impl PostgresNode { } pub fn start(&self, auth_token: &Option) -> Result<()> { - // Bail if the node already running. if self.status() == "running" { - anyhow::bail!("The node is already running"); + anyhow::bail!("The endpoint is already running"); } - // 1. We always start compute node from scratch, so + // 1. We always start Postgres from scratch, so // if old dir exists, preserve 'postgresql.conf' and drop the directory let postgresql_conf_path = self.pgdata().join("postgresql.conf"); let postgresql_conf = fs::read(&postgresql_conf_path).with_context(|| { @@ -470,8 +470,8 @@ impl PostgresNode { File::create(self.pgdata().join("standby.signal"))?; } - // 4. Finally start the compute node postgres - println!("Starting postgres node at '{}'", self.connstr()); + // 4. Finally start postgres + println!("Starting postgres at '{}'", self.connstr()); self.pg_ctl(&["start"], auth_token) } @@ -480,7 +480,7 @@ impl PostgresNode { // use immediate shutdown mode, otherwise, // shutdown gracefully to leave the data directory sane. // - // Compute node always starts from scratch, so stop + // Postgres is always started from scratch, so stop // without destroy only used for testing and debugging. // if destroy { @@ -489,7 +489,7 @@ impl PostgresNode { "Destroying postgres data directory '{}'", self.pgdata().to_str().unwrap() ); - fs::remove_dir_all(self.pgdata())?; + fs::remove_dir_all(self.endpoint_path())?; } else { self.pg_ctl(&["stop"], &None)?; } diff --git a/control_plane/src/lib.rs b/control_plane/src/lib.rs index 6829479ad5..a773b8dcc3 100644 --- a/control_plane/src/lib.rs +++ b/control_plane/src/lib.rs @@ -9,7 +9,7 @@ mod background_process; pub mod broker; -pub mod compute; +pub mod endpoint; pub mod local_env; pub mod pageserver; pub mod postgresql_conf; diff --git a/control_plane/src/local_env.rs b/control_plane/src/local_env.rs index 8cc6329ce6..2b1eec7c4b 100644 --- a/control_plane/src/local_env.rs +++ b/control_plane/src/local_env.rs @@ -200,14 +200,8 @@ impl LocalEnv { self.neon_distrib_dir.join("storage_broker") } - pub fn pg_data_dirs_path(&self) -> PathBuf { - self.base_data_dir.join("pgdatadirs").join("tenants") - } - - pub fn pg_data_dir(&self, tenant_id: &TenantId, branch_name: &str) -> PathBuf { - self.pg_data_dirs_path() - .join(tenant_id.to_string()) - .join(branch_name) + pub fn endpoints_path(&self) -> PathBuf { + self.base_data_dir.join("endpoints") } // TODO: move pageserver files into ./pageserver @@ -427,7 +421,7 @@ impl LocalEnv { } } - fs::create_dir_all(self.pg_data_dirs_path())?; + fs::create_dir_all(self.endpoints_path())?; for safekeeper in &self.safekeepers { fs::create_dir_all(SafekeeperNode::datadir_path_by_id(self, safekeeper.id))?; diff --git a/test_runner/fixtures/compare_fixtures.py b/test_runner/fixtures/compare_fixtures.py index b328cea5c6..f0d9ce4af2 100644 --- a/test_runner/fixtures/compare_fixtures.py +++ b/test_runner/fixtures/compare_fixtures.py @@ -114,7 +114,7 @@ class NeonCompare(PgCompare): self.timeline = self.env.neon_cli.create_timeline(branch_name, tenant_id=self.tenant) # Start pg - self._pg = self.env.postgres.create_start(branch_name, "main", self.tenant) + self._pg = self.env.endpoints.create_start(branch_name, "main", self.tenant) @property def pg(self) -> PgProtocol: diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 5b6f2e5c96..e9f0363843 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -830,7 +830,7 @@ class NeonEnvBuilder: # Stop all the nodes. if self.env: log.info("Cleaning up all storage and compute nodes") - self.env.postgres.stop_all() + self.env.endpoints.stop_all() for sk in self.env.safekeepers: sk.stop(immediate=True) self.env.pageserver.stop(immediate=True) @@ -894,7 +894,7 @@ class NeonEnv: self.port_distributor = config.port_distributor self.s3_mock_server = config.mock_s3_server self.neon_cli = NeonCli(env=self) - self.postgres = PostgresFactory(self) + self.endpoints = EndpointFactory(self) self.safekeepers: List[Safekeeper] = [] self.broker = config.broker self.remote_storage = config.remote_storage @@ -902,6 +902,7 @@ class NeonEnv: self.pg_version = config.pg_version self.neon_binpath = config.neon_binpath self.pg_distrib_dir = config.pg_distrib_dir + self.endpoint_counter = 0 # generate initial tenant ID here instead of letting 'neon init' generate it, # so that we don't need to dig it out of the config file afterwards. @@ -1015,6 +1016,13 @@ class NeonEnv: priv = (Path(self.repo_dir) / "auth_private_key.pem").read_text() return AuthKeys(pub=pub, priv=priv) + def generate_endpoint_id(self) -> str: + """ + Generate a unique endpoint ID + """ + self.endpoint_counter += 1 + return "ep-" + str(self.endpoint_counter) + @pytest.fixture(scope=shareable_scope) def _shared_simple_env( @@ -1073,7 +1081,7 @@ def neon_simple_env(_shared_simple_env: NeonEnv) -> Iterator[NeonEnv]: """ yield _shared_simple_env - _shared_simple_env.postgres.stop_all() + _shared_simple_env.endpoints.stop_all() @pytest.fixture(scope="function") @@ -1097,7 +1105,7 @@ def neon_env_builder( neon_env_builder.init_start(). After the initialization, you can launch compute nodes by calling - the functions in the 'env.postgres' factory object, stop/start the + the functions in the 'env.endpoints' factory object, stop/start the nodes, etc. """ @@ -1438,16 +1446,16 @@ class NeonCli(AbstractNeonCli): args.extend(["-m", "immediate"]) return self.raw_cli(args) - def pg_create( + def endpoint_create( self, branch_name: str, - node_name: Optional[str] = None, + endpoint_id: Optional[str] = None, tenant_id: Optional[TenantId] = None, lsn: Optional[Lsn] = None, port: Optional[int] = None, ) -> "subprocess.CompletedProcess[str]": args = [ - "pg", + "endpoint", "create", "--tenant-id", str(tenant_id or self.env.initial_tenant), @@ -1460,22 +1468,22 @@ class NeonCli(AbstractNeonCli): args.extend(["--lsn", str(lsn)]) if port is not None: args.extend(["--port", str(port)]) - if node_name is not None: - args.append(node_name) + if endpoint_id is not None: + args.append(endpoint_id) res = self.raw_cli(args) res.check_returncode() return res - def pg_start( + def endpoint_start( self, - node_name: str, + endpoint_id: str, tenant_id: Optional[TenantId] = None, lsn: Optional[Lsn] = None, port: Optional[int] = None, ) -> "subprocess.CompletedProcess[str]": args = [ - "pg", + "endpoint", "start", "--tenant-id", str(tenant_id or self.env.initial_tenant), @@ -1486,30 +1494,30 @@ class NeonCli(AbstractNeonCli): args.append(f"--lsn={lsn}") if port is not None: args.append(f"--port={port}") - if node_name is not None: - args.append(node_name) + if endpoint_id is not None: + args.append(endpoint_id) res = self.raw_cli(args) res.check_returncode() return res - def pg_stop( + def endpoint_stop( self, - node_name: str, + endpoint_id: str, tenant_id: Optional[TenantId] = None, destroy=False, check_return_code=True, ) -> "subprocess.CompletedProcess[str]": args = [ - "pg", + "endpoint", "stop", "--tenant-id", str(tenant_id or self.env.initial_tenant), ] if destroy: args.append("--destroy") - if node_name is not None: - args.append(node_name) + if endpoint_id is not None: + args.append(endpoint_id) return self.raw_cli(args, check_return_code=check_return_code) @@ -2167,8 +2175,8 @@ def static_proxy( yield proxy -class Postgres(PgProtocol): - """An object representing a running postgres daemon.""" +class Endpoint(PgProtocol): + """An object representing a Postgres compute endpoint managed by the control plane.""" def __init__( self, env: NeonEnv, tenant_id: TenantId, port: int, check_stop_result: bool = True @@ -2176,33 +2184,40 @@ class Postgres(PgProtocol): super().__init__(host="localhost", port=port, user="cloud_admin", dbname="postgres") self.env = env self.running = False - self.node_name: Optional[str] = None # dubious, see asserts below + self.endpoint_id: Optional[str] = None # dubious, see asserts below self.pgdata_dir: Optional[str] = None # Path to computenode PGDATA self.tenant_id = tenant_id self.port = port self.check_stop_result = check_stop_result - # path to conf is /pgdatadirs/tenants///postgresql.conf + # path to conf is /endpoints//pgdata/postgresql.conf def create( self, branch_name: str, - node_name: Optional[str] = None, + endpoint_id: Optional[str] = None, lsn: Optional[Lsn] = None, config_lines: Optional[List[str]] = None, - ) -> "Postgres": + ) -> "Endpoint": """ - Create the pg data directory. + Create a new Postgres endpoint. Returns self. """ if not config_lines: config_lines = [] - self.node_name = node_name or f"{branch_name}_pg_node" - self.env.neon_cli.pg_create( - branch_name, node_name=self.node_name, tenant_id=self.tenant_id, lsn=lsn, port=self.port + if endpoint_id is None: + endpoint_id = self.env.generate_endpoint_id() + self.endpoint_id = endpoint_id + + self.env.neon_cli.endpoint_create( + branch_name, + endpoint_id=self.endpoint_id, + tenant_id=self.tenant_id, + lsn=lsn, + port=self.port, ) - path = Path("pgdatadirs") / "tenants" / str(self.tenant_id) / self.node_name + path = Path("endpoints") / self.endpoint_id / "pgdata" self.pgdata_dir = os.path.join(self.env.repo_dir, path) if config_lines is None: @@ -2215,26 +2230,30 @@ class Postgres(PgProtocol): return self - def start(self) -> "Postgres": + def start(self) -> "Endpoint": """ Start the Postgres instance. Returns self. """ - assert self.node_name is not None + assert self.endpoint_id is not None - log.info(f"Starting postgres node {self.node_name}") + log.info(f"Starting postgres endpoint {self.endpoint_id}") - self.env.neon_cli.pg_start(self.node_name, tenant_id=self.tenant_id, port=self.port) + self.env.neon_cli.endpoint_start(self.endpoint_id, tenant_id=self.tenant_id, port=self.port) self.running = True return self + def endpoint_path(self) -> Path: + """Path to endpoint directory""" + assert self.endpoint_id + path = Path("endpoints") / self.endpoint_id + return self.env.repo_dir / path + def pg_data_dir_path(self) -> str: - """Path to data directory""" - assert self.node_name - path = Path("pgdatadirs") / "tenants" / str(self.tenant_id) / self.node_name - return os.path.join(self.env.repo_dir, path) + """Path to Postgres data directory""" + return os.path.join(self.endpoint_path(), "pgdata") def pg_xact_dir_path(self) -> str: """Path to pg_xact dir""" @@ -2248,7 +2267,7 @@ class Postgres(PgProtocol): """Path to postgresql.conf""" return os.path.join(self.pg_data_dir_path(), "postgresql.conf") - def adjust_for_safekeepers(self, safekeepers: str) -> "Postgres": + def adjust_for_safekeepers(self, safekeepers: str) -> "Endpoint": """ Adjust instance config for working with wal acceptors instead of pageserver (pre-configured by CLI) directly. @@ -2272,7 +2291,7 @@ class Postgres(PgProtocol): f.write("neon.safekeepers = '{}'\n".format(safekeepers)) return self - def config(self, lines: List[str]) -> "Postgres": + def config(self, lines: List[str]) -> "Endpoint": """ Add lines to postgresql.conf. Lines should be an array of valid postgresql.conf rows. @@ -2286,32 +2305,32 @@ class Postgres(PgProtocol): return self - def stop(self) -> "Postgres": + def stop(self) -> "Endpoint": """ Stop the Postgres instance if it's running. Returns self. """ if self.running: - assert self.node_name is not None - self.env.neon_cli.pg_stop( - self.node_name, self.tenant_id, check_return_code=self.check_stop_result + assert self.endpoint_id is not None + self.env.neon_cli.endpoint_stop( + self.endpoint_id, self.tenant_id, check_return_code=self.check_stop_result ) self.running = False return self - def stop_and_destroy(self) -> "Postgres": + def stop_and_destroy(self) -> "Endpoint": """ - Stop the Postgres instance, then destroy it. + Stop the Postgres instance, then destroy the endpoint. Returns self. """ - assert self.node_name is not None - self.env.neon_cli.pg_stop( - self.node_name, self.tenant_id, True, check_return_code=self.check_stop_result + assert self.endpoint_id is not None + self.env.neon_cli.endpoint_stop( + self.endpoint_id, self.tenant_id, True, check_return_code=self.check_stop_result ) - self.node_name = None + self.endpoint_id = None self.running = False return self @@ -2319,13 +2338,12 @@ class Postgres(PgProtocol): def create_start( self, branch_name: str, - node_name: Optional[str] = None, + endpoint_id: Optional[str] = None, lsn: Optional[Lsn] = None, config_lines: Optional[List[str]] = None, - ) -> "Postgres": + ) -> "Endpoint": """ - Create a Postgres instance, apply config - and then start it. + Create an endpoint, apply config, and start Postgres. Returns self. """ @@ -2333,7 +2351,7 @@ class Postgres(PgProtocol): self.create( branch_name=branch_name, - node_name=node_name, + endpoint_id=endpoint_id, config_lines=config_lines, lsn=lsn, ).start() @@ -2342,7 +2360,7 @@ class Postgres(PgProtocol): return self - def __enter__(self) -> "Postgres": + def __enter__(self) -> "Endpoint": return self def __exit__( @@ -2354,33 +2372,33 @@ class Postgres(PgProtocol): self.stop() -class PostgresFactory: - """An object representing multiple running postgres daemons.""" +class EndpointFactory: + """An object representing multiple compute endpoints.""" def __init__(self, env: NeonEnv): self.env = env self.num_instances: int = 0 - self.instances: List[Postgres] = [] + self.endpoints: List[Endpoint] = [] def create_start( self, branch_name: str, - node_name: Optional[str] = None, + endpoint_id: Optional[str] = None, tenant_id: Optional[TenantId] = None, lsn: Optional[Lsn] = None, config_lines: Optional[List[str]] = None, - ) -> Postgres: - pg = Postgres( + ) -> Endpoint: + ep = Endpoint( self.env, tenant_id=tenant_id or self.env.initial_tenant, port=self.env.port_distributor.get_port(), ) self.num_instances += 1 - self.instances.append(pg) + self.endpoints.append(ep) - return pg.create_start( + return ep.create_start( branch_name=branch_name, - node_name=node_name, + endpoint_id=endpoint_id, config_lines=config_lines, lsn=lsn, ) @@ -2388,30 +2406,33 @@ class PostgresFactory: def create( self, branch_name: str, - node_name: Optional[str] = None, + endpoint_id: Optional[str] = None, tenant_id: Optional[TenantId] = None, lsn: Optional[Lsn] = None, config_lines: Optional[List[str]] = None, - ) -> Postgres: - pg = Postgres( + ) -> Endpoint: + ep = Endpoint( self.env, tenant_id=tenant_id or self.env.initial_tenant, port=self.env.port_distributor.get_port(), ) - self.num_instances += 1 - self.instances.append(pg) + if endpoint_id is None: + endpoint_id = self.env.generate_endpoint_id() - return pg.create( + self.num_instances += 1 + self.endpoints.append(ep) + + return ep.create( branch_name=branch_name, - node_name=node_name, + endpoint_id=endpoint_id, lsn=lsn, config_lines=config_lines, ) - def stop_all(self) -> "PostgresFactory": - for pg in self.instances: - pg.stop() + def stop_all(self) -> "EndpointFactory": + for ep in self.endpoints: + ep.stop() return self @@ -2786,16 +2807,16 @@ def list_files_to_compare(pgdata_dir: Path) -> List[str]: def check_restored_datadir_content( test_output_dir: Path, env: NeonEnv, - pg: Postgres, + endpoint: Endpoint, ): # Get the timeline ID. We need it for the 'basebackup' command - timeline = TimelineId(pg.safe_psql("SHOW neon.timeline_id")[0][0]) + timeline = TimelineId(endpoint.safe_psql("SHOW neon.timeline_id")[0][0]) # stop postgres to ensure that files won't change - pg.stop() + endpoint.stop() # Take a basebackup from pageserver - restored_dir_path = env.repo_dir / f"{pg.node_name}_restored_datadir" + restored_dir_path = env.repo_dir / f"{endpoint.endpoint_id}_restored_datadir" restored_dir_path.mkdir(exist_ok=True) pg_bin = PgBin(test_output_dir, env.pg_distrib_dir, env.pg_version) @@ -2805,7 +2826,7 @@ def check_restored_datadir_content( {psql_path} \ --no-psqlrc \ postgres://localhost:{env.pageserver.service_port.pg} \ - -c 'basebackup {pg.tenant_id} {timeline}' \ + -c 'basebackup {endpoint.tenant_id} {timeline}' \ | tar -x -C {restored_dir_path} """ @@ -2822,8 +2843,8 @@ def check_restored_datadir_content( assert result.returncode == 0 # list files we're going to compare - assert pg.pgdata_dir - pgdata_files = list_files_to_compare(Path(pg.pgdata_dir)) + assert endpoint.pgdata_dir + pgdata_files = list_files_to_compare(Path(endpoint.pgdata_dir)) restored_files = list_files_to_compare(restored_dir_path) # check that file sets are equal @@ -2834,12 +2855,12 @@ def check_restored_datadir_content( # We've already filtered all mismatching files in list_files_to_compare(), # so here expect that the content is identical (match, mismatch, error) = filecmp.cmpfiles( - pg.pgdata_dir, restored_dir_path, pgdata_files, shallow=False + endpoint.pgdata_dir, restored_dir_path, pgdata_files, shallow=False ) log.info(f"filecmp result mismatch and error lists:\n\t mismatch={mismatch}\n\t error={error}") for f in mismatch: - f1 = os.path.join(pg.pgdata_dir, f) + f1 = os.path.join(endpoint.pgdata_dir, f) f2 = os.path.join(restored_dir_path, f) stdout_filename = "{}.filediff".format(f2) @@ -2854,24 +2875,24 @@ def check_restored_datadir_content( def wait_for_last_flush_lsn( - env: NeonEnv, pg: Postgres, tenant: TenantId, timeline: TimelineId + env: NeonEnv, endpoint: Endpoint, tenant: TenantId, timeline: TimelineId ) -> Lsn: """Wait for pageserver to catch up the latest flush LSN, returns the last observed lsn.""" - last_flush_lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) + last_flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) return wait_for_last_record_lsn(env.pageserver.http_client(), tenant, timeline, last_flush_lsn) def wait_for_wal_insert_lsn( - env: NeonEnv, pg: Postgres, tenant: TenantId, timeline: TimelineId + env: NeonEnv, endpoint: Endpoint, tenant: TenantId, timeline: TimelineId ) -> Lsn: """Wait for pageserver to catch up the latest flush LSN, returns the last observed lsn.""" - last_flush_lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_insert_lsn()")[0][0]) + last_flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_insert_lsn()")[0][0]) return wait_for_last_record_lsn(env.pageserver.http_client(), tenant, timeline, last_flush_lsn) def fork_at_current_lsn( env: NeonEnv, - pg: Postgres, + endpoint: Endpoint, new_branch_name: str, ancestor_branch_name: str, tenant_id: Optional[TenantId] = None, @@ -2881,7 +2902,7 @@ def fork_at_current_lsn( The "last LSN" is taken from the given Postgres instance. The pageserver will wait for all the the WAL up to that LSN to arrive in the pageserver before creating the branch. """ - current_lsn = pg.safe_psql("SELECT pg_current_wal_lsn()")[0][0] + current_lsn = endpoint.safe_psql("SELECT pg_current_wal_lsn()")[0][0] return env.neon_cli.create_branch(new_branch_name, ancestor_branch_name, tenant_id, current_lsn) diff --git a/test_runner/performance/test_branch_creation.py b/test_runner/performance/test_branch_creation.py index 16c5438b8f..6edcb8f1f2 100644 --- a/test_runner/performance/test_branch_creation.py +++ b/test_runner/performance/test_branch_creation.py @@ -52,13 +52,13 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int) def run_pgbench(branch: str): log.info(f"Start a pgbench workload on branch {branch}") - pg = env.postgres.create_start(branch, tenant_id=tenant) - connstr = pg.connstr() + endpoint = env.endpoints.create_start(branch, tenant_id=tenant) + connstr = endpoint.connstr() pg_bin.run_capture(["pgbench", "-i", connstr]) pg_bin.run_capture(["pgbench", "-c10", "-T10", connstr]) - pg.stop() + endpoint.stop() env.neon_cli.create_branch("b0", tenant_id=tenant) @@ -96,8 +96,8 @@ def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int): env.neon_cli.create_branch("b0") - pg = env.postgres.create_start("b0") - neon_compare.pg_bin.run_capture(["pgbench", "-i", "-s10", pg.connstr()]) + endpoint = env.endpoints.create_start("b0") + neon_compare.pg_bin.run_capture(["pgbench", "-i", "-s10", endpoint.connstr()]) branch_creation_durations = [] @@ -124,15 +124,15 @@ def test_branch_creation_many_relations(neon_compare: NeonCompare): timeline_id = env.neon_cli.create_branch("root") - pg = env.postgres.create_start("root") - with closing(pg.connect()) as conn: + endpoint = env.endpoints.create_start("root") + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: for i in range(10000): cur.execute(f"CREATE TABLE t{i} as SELECT g FROM generate_series(1, 1000) g") # Wait for the pageserver to finish processing all the pending WALs, # as we don't want the LSN wait time to be included during the branch creation - flush_lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) + flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) wait_for_last_record_lsn( env.pageserver.http_client(), env.initial_tenant, timeline_id, flush_lsn ) @@ -142,7 +142,7 @@ def test_branch_creation_many_relations(neon_compare: NeonCompare): # run a concurrent insertion to make the ancestor "busy" during the branch creation thread = threading.Thread( - target=pg.safe_psql, args=("INSERT INTO t0 VALUES (generate_series(1, 100000))",) + target=endpoint.safe_psql, args=("INSERT INTO t0 VALUES (generate_series(1, 100000))",) ) thread.start() diff --git a/test_runner/performance/test_branching.py b/test_runner/performance/test_branching.py index 4eaec40096..667d1a4c4a 100644 --- a/test_runner/performance/test_branching.py +++ b/test_runner/performance/test_branching.py @@ -42,41 +42,41 @@ def test_compare_child_and_root_pgbench_perf(neon_compare: NeonCompare): neon_compare.zenbenchmark.record_pg_bench_result(branch, res) env.neon_cli.create_branch("root") - pg_root = env.postgres.create_start("root") - pg_bin.run_capture(["pgbench", "-i", pg_root.connstr(), "-s10"]) + endpoint_root = env.endpoints.create_start("root") + pg_bin.run_capture(["pgbench", "-i", endpoint_root.connstr(), "-s10"]) - fork_at_current_lsn(env, pg_root, "child", "root") + fork_at_current_lsn(env, endpoint_root, "child", "root") - pg_child = env.postgres.create_start("child") + endpoint_child = env.endpoints.create_start("child") - run_pgbench_on_branch("root", ["pgbench", "-c10", "-T10", pg_root.connstr()]) - run_pgbench_on_branch("child", ["pgbench", "-c10", "-T10", pg_child.connstr()]) + run_pgbench_on_branch("root", ["pgbench", "-c10", "-T10", endpoint_root.connstr()]) + run_pgbench_on_branch("child", ["pgbench", "-c10", "-T10", endpoint_child.connstr()]) def test_compare_child_and_root_write_perf(neon_compare: NeonCompare): env = neon_compare.env env.neon_cli.create_branch("root") - pg_root = env.postgres.create_start("root") + endpoint_root = env.endpoints.create_start("root") - pg_root.safe_psql( + endpoint_root.safe_psql( "CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')", ) env.neon_cli.create_branch("child", "root") - pg_child = env.postgres.create_start("child") + endpoint_child = env.endpoints.create_start("child") with neon_compare.record_duration("root_run_duration"): - pg_root.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)") + endpoint_root.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)") with neon_compare.record_duration("child_run_duration"): - pg_child.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)") + endpoint_child.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)") def test_compare_child_and_root_read_perf(neon_compare: NeonCompare): env = neon_compare.env env.neon_cli.create_branch("root") - pg_root = env.postgres.create_start("root") + endpoint_root = env.endpoints.create_start("root") - pg_root.safe_psql_many( + endpoint_root.safe_psql_many( [ "CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')", "INSERT INTO foo SELECT FROM generate_series(1,1000000)", @@ -84,12 +84,12 @@ def test_compare_child_and_root_read_perf(neon_compare: NeonCompare): ) env.neon_cli.create_branch("child", "root") - pg_child = env.postgres.create_start("child") + endpoint_child = env.endpoints.create_start("child") with neon_compare.record_duration("root_run_duration"): - pg_root.safe_psql("SELECT count(*) from foo") + endpoint_root.safe_psql("SELECT count(*) from foo") with neon_compare.record_duration("child_run_duration"): - pg_child.safe_psql("SELECT count(*) from foo") + endpoint_child.safe_psql("SELECT count(*) from foo") # ----------------------------------------------------------------------- diff --git a/test_runner/performance/test_bulk_tenant_create.py b/test_runner/performance/test_bulk_tenant_create.py index cef7ce0c6b..9b05903cfa 100644 --- a/test_runner/performance/test_bulk_tenant_create.py +++ b/test_runner/performance/test_bulk_tenant_create.py @@ -35,14 +35,14 @@ def test_bulk_tenant_create( # if use_safekeepers == 'with_sa': # wa_factory.start_n_new(3) - pg_tenant = env.postgres.create_start( + endpoint_tenant = env.endpoints.create_start( f"test_bulk_tenant_create_{tenants_count}_{i}", tenant_id=tenant ) end = timeit.default_timer() time_slices.append(end - start) - pg_tenant.stop() + endpoint_tenant.stop() zenbenchmark.record( "tenant_creation_time", diff --git a/test_runner/performance/test_bulk_update.py b/test_runner/performance/test_bulk_update.py index 7aa6f09a40..2ace31a2d7 100644 --- a/test_runner/performance/test_bulk_update.py +++ b/test_runner/performance/test_bulk_update.py @@ -18,8 +18,8 @@ def test_bulk_update(neon_env_builder: NeonEnvBuilder, zenbenchmark, fillfactor) timeline_id = env.neon_cli.create_branch("test_bulk_update") tenant_id = env.initial_tenant - pg = env.postgres.create_start("test_bulk_update") - cur = pg.connect().cursor() + endpoint = env.endpoints.create_start("test_bulk_update") + cur = endpoint.connect().cursor() cur.execute("set statement_timeout=0") cur.execute(f"create table t(x integer) WITH (fillfactor={fillfactor})") @@ -28,13 +28,13 @@ def test_bulk_update(neon_env_builder: NeonEnvBuilder, zenbenchmark, fillfactor) cur.execute(f"insert into t values (generate_series(1,{n_records}))") cur.execute("vacuum t") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) with zenbenchmark.record_duration("update-no-prefetch"): cur.execute("update t set x=x+1") cur.execute("vacuum t") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) with zenbenchmark.record_duration("delete-no-prefetch"): cur.execute("delete from t") @@ -50,13 +50,13 @@ def test_bulk_update(neon_env_builder: NeonEnvBuilder, zenbenchmark, fillfactor) cur.execute(f"insert into t2 values (generate_series(1,{n_records}))") cur.execute("vacuum t2") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) with zenbenchmark.record_duration("update-with-prefetch"): cur.execute("update t2 set x=x+1") cur.execute("vacuum t2") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) with zenbenchmark.record_duration("delete-with-prefetch"): cur.execute("delete from t2") diff --git a/test_runner/performance/test_compaction.py b/test_runner/performance/test_compaction.py index 89818ee8bd..326c4f5c6f 100644 --- a/test_runner/performance/test_compaction.py +++ b/test_runner/performance/test_compaction.py @@ -33,11 +33,11 @@ def test_compaction(neon_compare: NeonCompare): # Create some tables, and run a bunch of INSERTs and UPDATes on them, # to generate WAL and layers - pg = env.postgres.create_start( + endpoint = env.endpoints.create_start( "main", tenant_id=tenant_id, config_lines=["shared_buffers=512MB"] ) - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: for i in range(100): cur.execute(f"create table tbl{i} (i int, j int);") @@ -45,7 +45,7 @@ def test_compaction(neon_compare: NeonCompare): for j in range(100): cur.execute(f"update tbl{i} set j = {j};") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) # First compaction generates L1 layers with neon_compare.zenbenchmark.record_duration("compaction"): diff --git a/test_runner/performance/test_latency.py b/test_runner/performance/test_latency.py index 257e0421af..6c94ecc482 100644 --- a/test_runner/performance/test_latency.py +++ b/test_runner/performance/test_latency.py @@ -2,13 +2,13 @@ import threading import pytest from fixtures.compare_fixtures import PgCompare -from fixtures.neon_fixtures import Postgres +from fixtures.neon_fixtures import PgProtocol from performance.test_perf_pgbench import get_scales_matrix from performance.test_wal_backpressure import record_read_latency -def start_write_workload(pg: Postgres, scale: int = 10): +def start_write_workload(pg: PgProtocol, scale: int = 10): with pg.connect().cursor() as cur: cur.execute(f"create table big as select generate_series(1,{scale*100_000})") diff --git a/test_runner/performance/test_layer_map.py b/test_runner/performance/test_layer_map.py index fb29c05273..18308e1077 100644 --- a/test_runner/performance/test_layer_map.py +++ b/test_runner/performance/test_layer_map.py @@ -25,8 +25,8 @@ def test_layer_map(neon_env_builder: NeonEnvBuilder, zenbenchmark): ) env.neon_cli.create_timeline("test_layer_map", tenant_id=tenant) - pg = env.postgres.create_start("test_layer_map", tenant_id=tenant) - cur = pg.connect().cursor() + endpoint = env.endpoints.create_start("test_layer_map", tenant_id=tenant) + cur = endpoint.connect().cursor() cur.execute("create table t(x integer)") for i in range(n_iters): cur.execute(f"insert into t values (generate_series(1,{n_records}))") diff --git a/test_runner/performance/test_startup.py b/test_runner/performance/test_startup.py index e91b180154..fa2e058491 100644 --- a/test_runner/performance/test_startup.py +++ b/test_runner/performance/test_startup.py @@ -14,19 +14,19 @@ def test_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker # Start env.neon_cli.create_branch("test_startup") with zenbenchmark.record_duration("startup_time"): - pg = env.postgres.create_start("test_startup") - pg.safe_psql("select 1;") + endpoint = env.endpoints.create_start("test_startup") + endpoint.safe_psql("select 1;") # Restart - pg.stop_and_destroy() + endpoint.stop_and_destroy() with zenbenchmark.record_duration("restart_time"): - pg.create_start("test_startup") - pg.safe_psql("select 1;") + endpoint.create_start("test_startup") + endpoint.safe_psql("select 1;") # Fill up num_rows = 1000000 # 30 MB num_tables = 100 - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: for i in range(num_tables): cur.execute(f"create table t_{i} (i integer);") @@ -34,18 +34,18 @@ def test_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker # Read with zenbenchmark.record_duration("read_time"): - pg.safe_psql("select * from t_0;") + endpoint.safe_psql("select * from t_0;") # Read again with zenbenchmark.record_duration("second_read_time"): - pg.safe_psql("select * from t_0;") + endpoint.safe_psql("select * from t_0;") # Restart - pg.stop_and_destroy() + endpoint.stop_and_destroy() with zenbenchmark.record_duration("restart_with_data"): - pg.create_start("test_startup") - pg.safe_psql("select 1;") + endpoint.create_start("test_startup") + endpoint.safe_psql("select 1;") # Read with zenbenchmark.record_duration("read_after_restart"): - pg.safe_psql("select * from t_0;") + endpoint.safe_psql("select * from t_0;") diff --git a/test_runner/regress/test_ancestor_branch.py b/test_runner/regress/test_ancestor_branch.py index 2406102756..e8c1a2f34c 100644 --- a/test_runner/regress/test_ancestor_branch.py +++ b/test_runner/regress/test_ancestor_branch.py @@ -22,8 +22,8 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder): pageserver_http.configure_failpoints(("flush-frozen-before-sync", "sleep(10000)")) - pg_branch0 = env.postgres.create_start("main", tenant_id=tenant) - branch0_cur = pg_branch0.connect().cursor() + endpoint_branch0 = env.endpoints.create_start("main", tenant_id=tenant) + branch0_cur = endpoint_branch0.connect().cursor() branch0_timeline = TimelineId(query_scalar(branch0_cur, "SHOW neon.timeline_id")) log.info(f"b0 timeline {branch0_timeline}") @@ -44,10 +44,10 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder): # Create branch1. env.neon_cli.create_branch("branch1", "main", tenant_id=tenant, ancestor_start_lsn=lsn_100) - pg_branch1 = env.postgres.create_start("branch1", tenant_id=tenant) + endpoint_branch1 = env.endpoints.create_start("branch1", tenant_id=tenant) log.info("postgres is running on 'branch1' branch") - branch1_cur = pg_branch1.connect().cursor() + branch1_cur = endpoint_branch1.connect().cursor() branch1_timeline = TimelineId(query_scalar(branch1_cur, "SHOW neon.timeline_id")) log.info(f"b1 timeline {branch1_timeline}") @@ -67,9 +67,9 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder): # Create branch2. env.neon_cli.create_branch("branch2", "branch1", tenant_id=tenant, ancestor_start_lsn=lsn_200) - pg_branch2 = env.postgres.create_start("branch2", tenant_id=tenant) + endpoint_branch2 = env.endpoints.create_start("branch2", tenant_id=tenant) log.info("postgres is running on 'branch2' branch") - branch2_cur = pg_branch2.connect().cursor() + branch2_cur = endpoint_branch2.connect().cursor() branch2_timeline = TimelineId(query_scalar(branch2_cur, "SHOW neon.timeline_id")) log.info(f"b2 timeline {branch2_timeline}") diff --git a/test_runner/regress/test_auth.py b/test_runner/regress/test_auth.py index f7c4736e04..3305869dce 100644 --- a/test_runner/regress/test_auth.py +++ b/test_runner/regress/test_auth.py @@ -64,9 +64,9 @@ def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder): branch = "test_compute_auth_to_pageserver" env.neon_cli.create_branch(branch) - pg = env.postgres.create_start(branch) + endpoint = env.endpoints.create_start(branch) - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: # we rely upon autocommit after each statement # as waiting for acceptors happens there @@ -83,7 +83,7 @@ def test_auth_failures(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): branch = f"test_auth_failures_auth_enabled_{auth_enabled}" timeline_id = env.neon_cli.create_branch(branch) - env.postgres.create_start(branch) + env.endpoints.create_start(branch) tenant_token = env.auth_keys.generate_tenant_token(env.initial_tenant) invalid_tenant_token = env.auth_keys.generate_tenant_token(TenantId.generate()) diff --git a/test_runner/regress/test_backpressure.py b/test_runner/regress/test_backpressure.py index a81fa380a9..352e149171 100644 --- a/test_runner/regress/test_backpressure.py +++ b/test_runner/regress/test_backpressure.py @@ -5,7 +5,7 @@ from contextlib import closing, contextmanager import psycopg2.extras import pytest from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnvBuilder, Postgres +from fixtures.neon_fixtures import Endpoint, NeonEnvBuilder pytest_plugins = "fixtures.neon_fixtures" @@ -20,10 +20,10 @@ def pg_cur(pg): # Periodically check that all backpressure lags are below the configured threshold, # assert if they are not. # If the check query fails, stop the thread. Main thread should notice that and stop the test. -def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interval=5): +def check_backpressure(endpoint: Endpoint, stop_event: threading.Event, polling_interval=5): log.info("checks started") - with pg_cur(pg) as cur: + with pg_cur(endpoint) as cur: cur.execute("CREATE EXTENSION neon") # TODO move it to neon_fixtures? cur.execute("select pg_size_bytes(current_setting('max_replication_write_lag'))") @@ -41,7 +41,7 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv max_replication_apply_lag_bytes = res[0] log.info(f"max_replication_apply_lag: {max_replication_apply_lag_bytes} bytes") - with pg_cur(pg) as cur: + with pg_cur(endpoint) as cur: while not stop_event.is_set(): try: cur.execute( @@ -102,14 +102,14 @@ def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder): # Create a branch for us env.neon_cli.create_branch("test_backpressure") - pg = env.postgres.create_start( + endpoint = env.endpoints.create_start( "test_backpressure", config_lines=["max_replication_write_lag=30MB"] ) log.info("postgres is running on 'test_backpressure' branch") # setup check thread check_stop_event = threading.Event() - check_thread = threading.Thread(target=check_backpressure, args=(pg, check_stop_event)) + check_thread = threading.Thread(target=check_backpressure, args=(endpoint, check_stop_event)) check_thread.start() # Configure failpoint to slow down walreceiver ingest @@ -125,7 +125,7 @@ def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder): # because of the lag and waiting for lsn to replay to arrive. time.sleep(2) - with pg_cur(pg) as cur: + with pg_cur(endpoint) as cur: # Create and initialize test table cur.execute("CREATE TABLE foo(x bigint)") diff --git a/test_runner/regress/test_basebackup_error.py b/test_runner/regress/test_basebackup_error.py index 94d3999d17..170b494884 100644 --- a/test_runner/regress/test_basebackup_error.py +++ b/test_runner/regress/test_basebackup_error.py @@ -15,4 +15,4 @@ def test_basebackup_error(neon_simple_env: NeonEnv): pageserver_http.configure_failpoints(("basebackup-before-control-file", "return")) with pytest.raises(Exception, match="basebackup-before-control-file"): - env.postgres.create_start("test_basebackup_error") + env.endpoints.create_start("test_basebackup_error") diff --git a/test_runner/regress/test_branch_and_gc.py b/test_runner/regress/test_branch_and_gc.py index cc807b7ff3..4a03421fcf 100644 --- a/test_runner/regress/test_branch_and_gc.py +++ b/test_runner/regress/test_branch_and_gc.py @@ -67,9 +67,9 @@ def test_branch_and_gc(neon_simple_env: NeonEnv): ) timeline_main = env.neon_cli.create_timeline("test_main", tenant_id=tenant) - pg_main = env.postgres.create_start("test_main", tenant_id=tenant) + endpoint_main = env.endpoints.create_start("test_main", tenant_id=tenant) - main_cur = pg_main.connect().cursor() + main_cur = endpoint_main.connect().cursor() main_cur.execute( "CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')" @@ -90,9 +90,9 @@ def test_branch_and_gc(neon_simple_env: NeonEnv): env.neon_cli.create_branch( "test_branch", "test_main", tenant_id=tenant, ancestor_start_lsn=lsn1 ) - pg_branch = env.postgres.create_start("test_branch", tenant_id=tenant) + endpoint_branch = env.endpoints.create_start("test_branch", tenant_id=tenant) - branch_cur = pg_branch.connect().cursor() + branch_cur = endpoint_branch.connect().cursor() branch_cur.execute("INSERT INTO foo SELECT FROM generate_series(1, 100000)") assert query_scalar(branch_cur, "SELECT count(*) FROM foo") == 200000 @@ -142,8 +142,8 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv): ) b0 = env.neon_cli.create_branch("b0", tenant_id=tenant) - pg0 = env.postgres.create_start("b0", tenant_id=tenant) - res = pg0.safe_psql_many( + endpoint0 = env.endpoints.create_start("b0", tenant_id=tenant) + res = endpoint0.safe_psql_many( queries=[ "CREATE TABLE t(key serial primary key)", "INSERT INTO t SELECT FROM generate_series(1, 100000)", diff --git a/test_runner/regress/test_branch_behind.py b/test_runner/regress/test_branch_behind.py index d19f6a7d39..3f7d49ab03 100644 --- a/test_runner/regress/test_branch_behind.py +++ b/test_runner/regress/test_branch_behind.py @@ -18,10 +18,10 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder): # Branch at the point where only 100 rows were inserted env.neon_cli.create_branch("test_branch_behind") - pgmain = env.postgres.create_start("test_branch_behind") + endpoint_main = env.endpoints.create_start("test_branch_behind") log.info("postgres is running on 'test_branch_behind' branch") - main_cur = pgmain.connect().cursor() + main_cur = endpoint_main.connect().cursor() timeline = TimelineId(query_scalar(main_cur, "SHOW neon.timeline_id")) @@ -74,15 +74,15 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder): "test_branch_behind_more", "test_branch_behind", ancestor_start_lsn=lsn_b ) - pg_hundred = env.postgres.create_start("test_branch_behind_hundred") - pg_more = env.postgres.create_start("test_branch_behind_more") + endpoint_hundred = env.endpoints.create_start("test_branch_behind_hundred") + endpoint_more = env.endpoints.create_start("test_branch_behind_more") # On the 'hundred' branch, we should see only 100 rows - hundred_cur = pg_hundred.connect().cursor() + hundred_cur = endpoint_hundred.connect().cursor() assert query_scalar(hundred_cur, "SELECT count(*) FROM foo") == 100 # On the 'more' branch, we should see 100200 rows - more_cur = pg_more.connect().cursor() + more_cur = endpoint_more.connect().cursor() assert query_scalar(more_cur, "SELECT count(*) FROM foo") == 200100 # All the rows are visible on the main branch @@ -94,8 +94,8 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder): env.neon_cli.create_branch( "test_branch_segment_boundary", "test_branch_behind", ancestor_start_lsn=Lsn("0/3000000") ) - pg = env.postgres.create_start("test_branch_segment_boundary") - assert pg.safe_psql("SELECT 1")[0][0] == 1 + endpoint = env.endpoints.create_start("test_branch_segment_boundary") + assert endpoint.safe_psql("SELECT 1")[0][0] == 1 # branch at pre-initdb lsn with pytest.raises(Exception, match="invalid branch start lsn: .*"): diff --git a/test_runner/regress/test_branching.py b/test_runner/regress/test_branching.py index 3b78700e9f..31f9df6ebe 100644 --- a/test_runner/regress/test_branching.py +++ b/test_runner/regress/test_branching.py @@ -5,7 +5,7 @@ from typing import List import pytest from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres +from fixtures.neon_fixtures import Endpoint, NeonEnv, PgBin from fixtures.types import Lsn from fixtures.utils import query_scalar from performance.test_perf_pgbench import get_scales_matrix @@ -40,20 +40,20 @@ def test_branching_with_pgbench( } ) - def run_pgbench(pg: Postgres): - connstr = pg.connstr() - + def run_pgbench(connstr: str): log.info(f"Start a pgbench workload on pg {connstr}") pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr]) pg_bin.run_capture(["pgbench", "-T15", connstr]) env.neon_cli.create_branch("b0", tenant_id=tenant) - pgs: List[Postgres] = [] - pgs.append(env.postgres.create_start("b0", tenant_id=tenant)) + endpoints: List[Endpoint] = [] + endpoints.append(env.endpoints.create_start("b0", tenant_id=tenant)) threads: List[threading.Thread] = [] - threads.append(threading.Thread(target=run_pgbench, args=(pgs[0],), daemon=True)) + threads.append( + threading.Thread(target=run_pgbench, args=(endpoints[0].connstr(),), daemon=True) + ) threads[-1].start() thread_limit = 4 @@ -79,16 +79,18 @@ def test_branching_with_pgbench( else: env.neon_cli.create_branch("b{}".format(i + 1), "b0", tenant_id=tenant) - pgs.append(env.postgres.create_start("b{}".format(i + 1), tenant_id=tenant)) + endpoints.append(env.endpoints.create_start("b{}".format(i + 1), tenant_id=tenant)) - threads.append(threading.Thread(target=run_pgbench, args=(pgs[-1],), daemon=True)) + threads.append( + threading.Thread(target=run_pgbench, args=(endpoints[-1].connstr(),), daemon=True) + ) threads[-1].start() for thread in threads: thread.join() - for pg in pgs: - res = pg.safe_psql("SELECT count(*) from pgbench_accounts") + for ep in endpoints: + res = ep.safe_psql("SELECT count(*) from pgbench_accounts") assert res[0] == (100000 * scale,) @@ -110,11 +112,11 @@ def test_branching_unnormalized_start_lsn(neon_simple_env: NeonEnv, pg_bin: PgBi env = neon_simple_env env.neon_cli.create_branch("b0") - pg0 = env.postgres.create_start("b0") + endpoint0 = env.endpoints.create_start("b0") - pg_bin.run_capture(["pgbench", "-i", pg0.connstr()]) + pg_bin.run_capture(["pgbench", "-i", endpoint0.connstr()]) - with pg0.cursor() as cur: + with endpoint0.cursor() as cur: curr_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()")) # Specify the `start_lsn` as a number that is divided by `XLOG_BLCKSZ` @@ -123,6 +125,6 @@ def test_branching_unnormalized_start_lsn(neon_simple_env: NeonEnv, pg_bin: PgBi log.info(f"Branching b1 from b0 starting at lsn {start_lsn}...") env.neon_cli.create_branch("b1", "b0", ancestor_start_lsn=start_lsn) - pg1 = env.postgres.create_start("b1") + endpoint1 = env.endpoints.create_start("b1") - pg_bin.run_capture(["pgbench", "-i", pg1.connstr()]) + pg_bin.run_capture(["pgbench", "-i", endpoint1.connstr()]) diff --git a/test_runner/regress/test_broken_timeline.py b/test_runner/regress/test_broken_timeline.py index d12a0223a1..fb592bfbc3 100644 --- a/test_runner/regress/test_broken_timeline.py +++ b/test_runner/regress/test_broken_timeline.py @@ -4,7 +4,7 @@ from typing import List, Tuple import pytest from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres +from fixtures.neon_fixtures import Endpoint, NeonEnv, NeonEnvBuilder from fixtures.types import TenantId, TimelineId @@ -24,17 +24,17 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder): ] ) - tenant_timelines: List[Tuple[TenantId, TimelineId, Postgres]] = [] + tenant_timelines: List[Tuple[TenantId, TimelineId, Endpoint]] = [] for n in range(4): tenant_id, timeline_id = env.neon_cli.create_tenant() - pg = env.postgres.create_start("main", tenant_id=tenant_id) - with pg.cursor() as cur: + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) + with endpoint.cursor() as cur: cur.execute("CREATE TABLE t(key int primary key, value text)") cur.execute("INSERT INTO t SELECT generate_series(1,100), 'payload'") - pg.stop() - tenant_timelines.append((tenant_id, timeline_id, pg)) + endpoint.stop() + tenant_timelines.append((tenant_id, timeline_id, endpoint)) # Stop the pageserver env.pageserver.stop() diff --git a/test_runner/regress/test_clog_truncate.py b/test_runner/regress/test_clog_truncate.py index f47e4a99bf..f22eca02cc 100644 --- a/test_runner/regress/test_clog_truncate.py +++ b/test_runner/regress/test_clog_truncate.py @@ -24,14 +24,14 @@ def test_clog_truncate(neon_simple_env: NeonEnv): "autovacuum_freeze_max_age=100000", ] - pg = env.postgres.create_start("test_clog_truncate", config_lines=config) + endpoint = env.endpoints.create_start("test_clog_truncate", config_lines=config) log.info("postgres is running on test_clog_truncate branch") # Install extension containing function needed for test - pg.safe_psql("CREATE EXTENSION neon_test_utils") + endpoint.safe_psql("CREATE EXTENSION neon_test_utils") # Consume many xids to advance clog - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("select test_consume_xids(1000*1000*10);") log.info("xids consumed") @@ -44,7 +44,7 @@ def test_clog_truncate(neon_simple_env: NeonEnv): # wait for autovacuum to truncate the pg_xact # XXX Is it worth to add a timeout here? - pg_xact_0000_path = os.path.join(pg.pg_xact_dir_path(), "0000") + pg_xact_0000_path = os.path.join(endpoint.pg_xact_dir_path(), "0000") log.info(f"pg_xact_0000_path = {pg_xact_0000_path}") while os.path.isfile(pg_xact_0000_path): @@ -52,7 +52,7 @@ def test_clog_truncate(neon_simple_env: NeonEnv): time.sleep(5) # checkpoint to advance latest lsn - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("CHECKPOINT;") lsn_after_truncation = query_scalar(cur, "select pg_current_wal_insert_lsn()") @@ -61,10 +61,10 @@ def test_clog_truncate(neon_simple_env: NeonEnv): env.neon_cli.create_branch( "test_clog_truncate_new", "test_clog_truncate", ancestor_start_lsn=lsn_after_truncation ) - pg2 = env.postgres.create_start("test_clog_truncate_new") + endpoint2 = env.endpoints.create_start("test_clog_truncate_new") log.info("postgres is running on test_clog_truncate_new branch") # check that new node doesn't contain truncated segment - pg_xact_0000_path_new = os.path.join(pg2.pg_xact_dir_path(), "0000") + pg_xact_0000_path_new = os.path.join(endpoint2.pg_xact_dir_path(), "0000") log.info(f"pg_xact_0000_path_new = {pg_xact_0000_path_new}") assert os.path.isfile(pg_xact_0000_path_new) is False diff --git a/test_runner/regress/test_close_fds.py b/test_runner/regress/test_close_fds.py index 22f245f79b..7059f3360e 100644 --- a/test_runner/regress/test_close_fds.py +++ b/test_runner/regress/test_close_fds.py @@ -24,8 +24,8 @@ def test_lsof_pageserver_pid(neon_simple_env: NeonEnv): def start_workload(): env.neon_cli.create_branch("test_lsof_pageserver_pid") - pg = env.postgres.create_start("test_lsof_pageserver_pid") - with closing(pg.connect()) as conn: + endpoint = env.endpoints.create_start("test_lsof_pageserver_pid") + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute("CREATE TABLE foo as SELECT x FROM generate_series(1,100000) x") cur.execute("update foo set x=x+1") diff --git a/test_runner/regress/test_compatibility.py b/test_runner/regress/test_compatibility.py index 0cc111bd8c..e262202a73 100644 --- a/test_runner/regress/test_compatibility.py +++ b/test_runner/regress/test_compatibility.py @@ -1,3 +1,4 @@ +import copy import os import shutil import subprocess @@ -55,29 +56,31 @@ def test_create_snapshot(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, test_o neon_env_builder.preserve_database_files = True env = neon_env_builder.init_start() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") # FIXME: Is this expected? env.pageserver.allowed_errors.append( ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*" ) - pg_bin.run(["pgbench", "--initialize", "--scale=10", pg.connstr()]) - pg_bin.run(["pgbench", "--time=60", "--progress=2", pg.connstr()]) - pg_bin.run(["pg_dumpall", f"--dbname={pg.connstr()}", f"--file={test_output_dir / 'dump.sql'}"]) + pg_bin.run(["pgbench", "--initialize", "--scale=10", endpoint.connstr()]) + pg_bin.run(["pgbench", "--time=60", "--progress=2", endpoint.connstr()]) + pg_bin.run( + ["pg_dumpall", f"--dbname={endpoint.connstr()}", f"--file={test_output_dir / 'dump.sql'}"] + ) snapshot_config = toml.load(test_output_dir / "repo" / "config") tenant_id = snapshot_config["default_tenant_id"] timeline_id = dict(snapshot_config["branch_name_mappings"]["main"])[tenant_id] pageserver_http = env.pageserver.http_client() - lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) + lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id, lsn) pageserver_http.timeline_checkpoint(tenant_id, timeline_id) wait_for_upload(pageserver_http, tenant_id, timeline_id, lsn) - env.postgres.stop_all() + env.endpoints.stop_all() for sk in env.safekeepers: sk.stop() env.pageserver.stop() @@ -98,6 +101,9 @@ def test_backward_compatibility( pg_version: str, request: FixtureRequest, ): + """ + Test that the new binaries can read old data + """ compatibility_snapshot_dir_env = os.environ.get("COMPATIBILITY_SNAPSHOT_DIR") assert ( compatibility_snapshot_dir_env is not None @@ -120,6 +126,7 @@ def test_backward_compatibility( check_neon_works( test_output_dir / "compatibility_snapshot" / "repo", neon_binpath, + neon_binpath, pg_distrib_dir, pg_version, port_distributor, @@ -148,7 +155,11 @@ def test_forward_compatibility( port_distributor: PortDistributor, pg_version: str, request: FixtureRequest, + neon_binpath: Path, ): + """ + Test that the old binaries can read new data + """ compatibility_neon_bin_env = os.environ.get("COMPATIBILITY_NEON_BIN") assert compatibility_neon_bin_env is not None, ( "COMPATIBILITY_NEON_BIN is not set. It should be set to a path with Neon binaries " @@ -183,6 +194,7 @@ def test_forward_compatibility( check_neon_works( test_output_dir / "compatibility_snapshot" / "repo", compatibility_neon_bin, + neon_binpath, compatibility_postgres_distrib_dir, pg_version, port_distributor, @@ -223,9 +235,13 @@ def prepare_snapshot( for logfile in repo_dir.glob("**/*.log"): logfile.unlink() - # Remove tenants data for compute - for tenant in (repo_dir / "pgdatadirs" / "tenants").glob("*"): - shutil.rmtree(tenant) + # Remove old computes in 'endpoints'. Old versions of the control plane used a directory + # called "pgdatadirs". Delete it, too. + if (repo_dir / "endpoints").exists(): + shutil.rmtree(repo_dir / "endpoints") + if (repo_dir / "pgdatadirs").exists(): + shutil.rmtree(repo_dir / "pgdatadirs") + os.mkdir(repo_dir / "endpoints") # Remove wal-redo temp directory if it exists. Newer pageserver versions don't create # them anymore, but old versions did. @@ -326,7 +342,8 @@ def get_neon_version(neon_binpath: Path): def check_neon_works( repo_dir: Path, - neon_binpath: Path, + neon_target_binpath: Path, + neon_current_binpath: Path, pg_distrib_dir: Path, pg_version: str, port_distributor: PortDistributor, @@ -336,7 +353,7 @@ def check_neon_works( ): snapshot_config_toml = repo_dir / "config" snapshot_config = toml.load(snapshot_config_toml) - snapshot_config["neon_distrib_dir"] = str(neon_binpath) + snapshot_config["neon_distrib_dir"] = str(neon_target_binpath) snapshot_config["postgres_distrib_dir"] = str(pg_distrib_dir) with (snapshot_config_toml).open("w") as f: toml.dump(snapshot_config, f) @@ -347,17 +364,25 @@ def check_neon_works( config.repo_dir = repo_dir config.pg_version = pg_version config.initial_tenant = snapshot_config["default_tenant_id"] - config.neon_binpath = neon_binpath config.pg_distrib_dir = pg_distrib_dir config.preserve_database_files = True - cli = NeonCli(config) - cli.raw_cli(["start"]) - request.addfinalizer(lambda: cli.raw_cli(["stop"])) + # Use the "target" binaries to launch the storage nodes + config_target = config + config_target.neon_binpath = neon_target_binpath + cli_target = NeonCli(config_target) + + # And the current binaries to launch computes + config_current = copy.copy(config) + config_current.neon_binpath = neon_current_binpath + cli_current = NeonCli(config_current) + + cli_target.raw_cli(["start"]) + request.addfinalizer(lambda: cli_target.raw_cli(["stop"])) pg_port = port_distributor.get_port() - cli.pg_start("main", port=pg_port) - request.addfinalizer(lambda: cli.pg_stop("main")) + cli_current.endpoint_start("main", port=pg_port) + request.addfinalizer(lambda: cli_current.endpoint_stop("main")) connstr = f"host=127.0.0.1 port={pg_port} user=cloud_admin dbname=postgres" pg_bin.run(["pg_dumpall", f"--dbname={connstr}", f"--file={test_output_dir / 'dump.sql'}"]) diff --git a/test_runner/regress/test_compute_ctl.py b/test_runner/regress/test_compute_ctl.py index 05ac3841dc..aa99a01c83 100644 --- a/test_runner/regress/test_compute_ctl.py +++ b/test_runner/regress/test_compute_ctl.py @@ -13,10 +13,10 @@ def test_sync_safekeepers_logs(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): ctl = ComputeCtl(env) env.neon_cli.create_branch("test_compute_ctl", "main") - pg = env.postgres.create_start("test_compute_ctl") - pg.safe_psql("CREATE TABLE t(key int primary key, value text)") + endpoint = env.endpoints.create_start("test_compute_ctl") + endpoint.safe_psql("CREATE TABLE t(key int primary key, value text)") - with open(pg.config_file_path(), "r") as f: + with open(endpoint.config_file_path(), "r") as f: cfg_lines = f.readlines() cfg_map = {} for line in cfg_lines: @@ -24,10 +24,13 @@ def test_sync_safekeepers_logs(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): k, v = line.split("=") cfg_map[k] = v.strip("\n '\"") log.info(f"postgres config: {cfg_map}") - pgdata = pg.pg_data_dir_path() + pgdata = endpoint.pg_data_dir_path() pg_bin_path = os.path.join(pg_bin.pg_bin_path, "postgres") - pg.stop_and_destroy() + endpoint.stop_and_destroy() + + # stop_and_destroy removes the whole endpoint directory. Recreate it. + Path(pgdata).mkdir(parents=True) spec = ( """ diff --git a/test_runner/regress/test_config.py b/test_runner/regress/test_config.py index 3477d96b89..0ea5784b67 100755 --- a/test_runner/regress/test_config.py +++ b/test_runner/regress/test_config.py @@ -12,10 +12,10 @@ def test_config(neon_simple_env: NeonEnv): env.neon_cli.create_branch("test_config", "empty") # change config - pg = env.postgres.create_start("test_config", config_lines=["log_min_messages=debug1"]) + endpoint = env.endpoints.create_start("test_config", config_lines=["log_min_messages=debug1"]) log.info("postgres is running on test_config branch") - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute( """ diff --git a/test_runner/regress/test_crafted_wal_end.py b/test_runner/regress/test_crafted_wal_end.py index 9899d424d1..7ec901af34 100644 --- a/test_runner/regress/test_crafted_wal_end.py +++ b/test_runner/regress/test_crafted_wal_end.py @@ -21,11 +21,11 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_crafted_wal_end") - pg = env.postgres.create("test_crafted_wal_end") + endpoint = env.endpoints.create("test_crafted_wal_end") wal_craft = WalCraft(env) - pg.config(wal_craft.postgres_config()) - pg.start() - res = pg.safe_psql_many( + endpoint.config(wal_craft.postgres_config()) + endpoint.start() + res = endpoint.safe_psql_many( queries=[ "CREATE TABLE keys(key int primary key)", "INSERT INTO keys SELECT generate_series(1, 100)", @@ -34,7 +34,7 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str): ) assert res[-1][0] == (5050,) - wal_craft.in_existing(wal_type, pg.connstr()) + wal_craft.in_existing(wal_type, endpoint.connstr()) log.info("Restarting all safekeepers and pageservers") env.pageserver.stop() @@ -43,7 +43,7 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str): env.pageserver.start() log.info("Trying more queries") - res = pg.safe_psql_many( + res = endpoint.safe_psql_many( queries=[ "SELECT SUM(key) FROM keys", "INSERT INTO keys SELECT generate_series(101, 200)", @@ -60,7 +60,7 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str): env.pageserver.start() log.info("Trying more queries (again)") - res = pg.safe_psql_many( + res = endpoint.safe_psql_many( queries=[ "SELECT SUM(key) FROM keys", "INSERT INTO keys SELECT generate_series(201, 300)", diff --git a/test_runner/regress/test_createdropdb.py b/test_runner/regress/test_createdropdb.py index 036e50e6e8..68035b1b14 100644 --- a/test_runner/regress/test_createdropdb.py +++ b/test_runner/regress/test_createdropdb.py @@ -13,10 +13,10 @@ def test_createdb(neon_simple_env: NeonEnv): env = neon_simple_env env.neon_cli.create_branch("test_createdb", "empty") - pg = env.postgres.create_start("test_createdb") + endpoint = env.endpoints.create_start("test_createdb") log.info("postgres is running on 'test_createdb' branch") - with pg.cursor() as cur: + with endpoint.cursor() as cur: # Cause a 'relmapper' change in the original branch cur.execute("VACUUM FULL pg_class") @@ -26,10 +26,10 @@ def test_createdb(neon_simple_env: NeonEnv): # Create a branch env.neon_cli.create_branch("test_createdb2", "test_createdb", ancestor_start_lsn=lsn) - pg2 = env.postgres.create_start("test_createdb2") + endpoint2 = env.endpoints.create_start("test_createdb2") # Test that you can connect to the new database on both branches - for db in (pg, pg2): + for db in (endpoint, endpoint2): with db.cursor(dbname="foodb") as cur: # Check database size in both branches cur.execute( @@ -55,17 +55,17 @@ def test_createdb(neon_simple_env: NeonEnv): def test_dropdb(neon_simple_env: NeonEnv, test_output_dir): env = neon_simple_env env.neon_cli.create_branch("test_dropdb", "empty") - pg = env.postgres.create_start("test_dropdb") + endpoint = env.endpoints.create_start("test_dropdb") log.info("postgres is running on 'test_dropdb' branch") - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("CREATE DATABASE foodb") lsn_before_drop = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()") dboid = query_scalar(cur, "SELECT oid FROM pg_database WHERE datname='foodb';") - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("DROP DATABASE foodb") cur.execute("CHECKPOINT") @@ -76,29 +76,29 @@ def test_dropdb(neon_simple_env: NeonEnv, test_output_dir): env.neon_cli.create_branch( "test_before_dropdb", "test_dropdb", ancestor_start_lsn=lsn_before_drop ) - pg_before = env.postgres.create_start("test_before_dropdb") + endpoint_before = env.endpoints.create_start("test_before_dropdb") env.neon_cli.create_branch( "test_after_dropdb", "test_dropdb", ancestor_start_lsn=lsn_after_drop ) - pg_after = env.postgres.create_start("test_after_dropdb") + endpoint_after = env.endpoints.create_start("test_after_dropdb") # Test that database exists on the branch before drop - pg_before.connect(dbname="foodb").close() + endpoint_before.connect(dbname="foodb").close() # Test that database subdir exists on the branch before drop - assert pg_before.pgdata_dir - dbpath = pathlib.Path(pg_before.pgdata_dir) / "base" / str(dboid) + assert endpoint_before.pgdata_dir + dbpath = pathlib.Path(endpoint_before.pgdata_dir) / "base" / str(dboid) log.info(dbpath) assert os.path.isdir(dbpath) is True # Test that database subdir doesn't exist on the branch after drop - assert pg_after.pgdata_dir - dbpath = pathlib.Path(pg_after.pgdata_dir) / "base" / str(dboid) + assert endpoint_after.pgdata_dir + dbpath = pathlib.Path(endpoint_after.pgdata_dir) / "base" / str(dboid) log.info(dbpath) assert os.path.isdir(dbpath) is False # Check that we restore the content of the datadir correctly - check_restored_datadir_content(test_output_dir, env, pg) + check_restored_datadir_content(test_output_dir, env, endpoint) diff --git a/test_runner/regress/test_createuser.py b/test_runner/regress/test_createuser.py index c5f8246f5b..f1bc405287 100644 --- a/test_runner/regress/test_createuser.py +++ b/test_runner/regress/test_createuser.py @@ -9,10 +9,10 @@ from fixtures.utils import query_scalar def test_createuser(neon_simple_env: NeonEnv): env = neon_simple_env env.neon_cli.create_branch("test_createuser", "empty") - pg = env.postgres.create_start("test_createuser") + endpoint = env.endpoints.create_start("test_createuser") log.info("postgres is running on 'test_createuser' branch") - with pg.cursor() as cur: + with endpoint.cursor() as cur: # Cause a 'relmapper' change in the original branch cur.execute("CREATE USER testuser with password %s", ("testpwd",)) @@ -22,7 +22,7 @@ def test_createuser(neon_simple_env: NeonEnv): # Create a branch env.neon_cli.create_branch("test_createuser2", "test_createuser", ancestor_start_lsn=lsn) - pg2 = env.postgres.create_start("test_createuser2") + endpoint2 = env.endpoints.create_start("test_createuser2") # Test that you can connect to new branch as a new user - assert pg2.safe_psql("select current_user", user="testuser") == [("testuser",)] + assert endpoint2.safe_psql("select current_user", user="testuser") == [("testuser",)] diff --git a/test_runner/regress/test_disk_usage_eviction.py b/test_runner/regress/test_disk_usage_eviction.py index 413d6c9d5a..31c7ef2b17 100644 --- a/test_runner/regress/test_disk_usage_eviction.py +++ b/test_runner/regress/test_disk_usage_eviction.py @@ -91,8 +91,8 @@ class EvictionEnv: This assumes that the tenant is still at the state after pbench -i. """ lsn = self.pgbench_init_lsns[tenant_id] - with self.neon_env.postgres.create_start("main", tenant_id=tenant_id, lsn=lsn) as pg: - self.pg_bin.run(["pgbench", "-S", pg.connstr()]) + with self.neon_env.endpoints.create_start("main", tenant_id=tenant_id, lsn=lsn) as endpoint: + self.pg_bin.run(["pgbench", "-S", endpoint.connstr()]) def pageserver_start_with_disk_usage_eviction( self, period, max_usage_pct, min_avail_bytes, mock_behavior @@ -168,9 +168,9 @@ def eviction_env(request, neon_env_builder: NeonEnvBuilder, pg_bin: PgBin) -> Ev } ) - with env.postgres.create_start("main", tenant_id=tenant_id) as pg: - pg_bin.run(["pgbench", "-i", f"-s{scale}", pg.connstr()]) - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint: + pg_bin.run(["pgbench", "-i", f"-s{scale}", endpoint.connstr()]) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) timelines.append((tenant_id, timeline_id)) diff --git a/test_runner/regress/test_fsm_truncate.py b/test_runner/regress/test_fsm_truncate.py index 4551ff97e0..80e4da8380 100644 --- a/test_runner/regress/test_fsm_truncate.py +++ b/test_runner/regress/test_fsm_truncate.py @@ -4,7 +4,7 @@ from fixtures.neon_fixtures import NeonEnvBuilder def test_fsm_truncate(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_fsm_truncate") - pg = env.postgres.create_start("test_fsm_truncate") - pg.safe_psql( + endpoint = env.endpoints.create_start("test_fsm_truncate") + endpoint.safe_psql( "CREATE TABLE t1(key int); CREATE TABLE t2(key int); TRUNCATE TABLE t1; TRUNCATE TABLE t2;" ) diff --git a/test_runner/regress/test_fullbackup.py b/test_runner/regress/test_fullbackup.py index b3d58edf6b..ece9dccf93 100644 --- a/test_runner/regress/test_fullbackup.py +++ b/test_runner/regress/test_fullbackup.py @@ -24,10 +24,10 @@ def test_fullbackup( env = neon_env_builder.init_start() env.neon_cli.create_branch("test_fullbackup") - pgmain = env.postgres.create_start("test_fullbackup") + endpoint_main = env.endpoints.create_start("test_fullbackup") log.info("postgres is running on 'test_fullbackup' branch") - with pgmain.cursor() as cur: + with endpoint_main.cursor() as cur: timeline = TimelineId(query_scalar(cur, "SHOW neon.timeline_id")) # data loading may take a while, so increase statement timeout diff --git a/test_runner/regress/test_gc_aggressive.py b/test_runner/regress/test_gc_aggressive.py index 702d94c691..d38be057d3 100644 --- a/test_runner/regress/test_gc_aggressive.py +++ b/test_runner/regress/test_gc_aggressive.py @@ -5,9 +5,9 @@ import random import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import ( + Endpoint, NeonEnv, NeonEnvBuilder, - Postgres, RemoteStorageKind, wait_for_last_flush_lsn, ) @@ -26,9 +26,9 @@ updates_performed = 0 # Run random UPDATEs on test table -async def update_table(pg: Postgres): +async def update_table(endpoint: Endpoint): global updates_performed - pg_conn = await pg.connect_async() + pg_conn = await endpoint.connect_async() while updates_performed < updates_to_perform: updates_performed += 1 @@ -52,10 +52,10 @@ async def gc(env: NeonEnv, timeline: TimelineId): # At the same time, run UPDATEs and GC -async def update_and_gc(env: NeonEnv, pg: Postgres, timeline: TimelineId): +async def update_and_gc(env: NeonEnv, endpoint: Endpoint, timeline: TimelineId): workers = [] for worker_id in range(num_connections): - workers.append(asyncio.create_task(update_table(pg))) + workers.append(asyncio.create_task(update_table(endpoint))) workers.append(asyncio.create_task(gc(env, timeline))) # await all workers @@ -72,10 +72,10 @@ def test_gc_aggressive(neon_env_builder: NeonEnvBuilder): neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}" env = neon_env_builder.init_start() env.neon_cli.create_branch("test_gc_aggressive", "main") - pg = env.postgres.create_start("test_gc_aggressive") + endpoint = env.endpoints.create_start("test_gc_aggressive") log.info("postgres is running on test_gc_aggressive branch") - with pg.cursor() as cur: + with endpoint.cursor() as cur: timeline = TimelineId(query_scalar(cur, "SHOW neon.timeline_id")) # Create table, and insert the first 100 rows @@ -89,7 +89,7 @@ def test_gc_aggressive(neon_env_builder: NeonEnvBuilder): ) cur.execute("CREATE INDEX ON foo(id)") - asyncio.run(update_and_gc(env, pg, timeline)) + asyncio.run(update_and_gc(env, endpoint, timeline)) cur.execute("SELECT COUNT(*), SUM(counter) FROM foo") r = cur.fetchone() @@ -110,11 +110,11 @@ def test_gc_index_upload(neon_env_builder: NeonEnvBuilder, remote_storage_kind: env = neon_env_builder.init_start() env.neon_cli.create_branch("test_gc_index_upload", "main") - pg = env.postgres.create_start("test_gc_index_upload") + endpoint = env.endpoints.create_start("test_gc_index_upload") pageserver_http = env.pageserver.http_client() - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() tenant_id = TenantId(query_scalar(cur, "SHOW neon.tenant_id")) @@ -146,7 +146,7 @@ def test_gc_index_upload(neon_env_builder: NeonEnvBuilder, remote_storage_kind: return int(total) # Sanity check that the metric works - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) pageserver_http.timeline_checkpoint(tenant_id, timeline_id) pageserver_http.timeline_gc(tenant_id, timeline_id, 10000) before = get_num_remote_ops("index", "upload") diff --git a/test_runner/regress/test_gc_cutoff.py b/test_runner/regress/test_gc_cutoff.py index 1b98a414da..79453c1bdc 100644 --- a/test_runner/regress/test_gc_cutoff.py +++ b/test_runner/regress/test_gc_cutoff.py @@ -31,8 +31,8 @@ def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): "image_creation_threshold": "2", } ) - pg = env.postgres.create_start("main", tenant_id=tenant_id) - connstr = pg.connstr(options="-csynchronous_commit=off") + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) + connstr = endpoint.connstr(options="-csynchronous_commit=off") pg_bin.run_capture(["pgbench", "-i", "-s10", connstr]) pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit")) diff --git a/test_runner/regress/test_import.py b/test_runner/regress/test_import.py index 774ed98563..137ce457bc 100644 --- a/test_runner/regress/test_import.py +++ b/test_runner/regress/test_import.py @@ -9,10 +9,10 @@ from pathlib import Path import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import ( + Endpoint, NeonEnv, NeonEnvBuilder, PgBin, - Postgres, ) from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload from fixtures.types import Lsn, TenantId, TimelineId @@ -72,7 +72,7 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build start_lsn = manifest["WAL-Ranges"][0]["Start-LSN"] end_lsn = manifest["WAL-Ranges"][0]["End-LSN"] - node_name = "import_from_vanilla" + endpoint_id = "ep-import_from_vanilla" tenant = TenantId.generate() timeline = TimelineId.generate() @@ -113,7 +113,7 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build "--timeline-id", str(timeline), "--node-name", - node_name, + endpoint_id, "--base-lsn", start_lsn, "--base-tarfile", @@ -153,8 +153,8 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build wait_for_upload(client, tenant, timeline, Lsn(end_lsn)) # Check it worked - pg = env.postgres.create_start(node_name, tenant_id=tenant) - assert pg.safe_psql("select count(*) from t") == [(300000,)] + endpoint = env.endpoints.create_start(endpoint_id, tenant_id=tenant) + assert endpoint.safe_psql("select count(*) from t") == [(300000,)] @pytest.mark.timeout(600) @@ -168,10 +168,10 @@ def test_import_from_pageserver_small(pg_bin: PgBin, neon_env_builder: NeonEnvBu ) timeline = env.neon_cli.create_branch("test_import_from_pageserver_small") - pg = env.postgres.create_start("test_import_from_pageserver_small") + endpoint = env.endpoints.create_start("test_import_from_pageserver_small") num_rows = 3000 - lsn = _generate_data(num_rows, pg) + lsn = _generate_data(num_rows, endpoint) _import(num_rows, lsn, env, pg_bin, timeline, env.pg_distrib_dir) @@ -185,14 +185,14 @@ def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: Ne env = neon_env_builder.init_start() timeline = env.neon_cli.create_branch("test_import_from_pageserver_multisegment") - pg = env.postgres.create_start("test_import_from_pageserver_multisegment") + endpoint = env.endpoints.create_start("test_import_from_pageserver_multisegment") # For `test_import_from_pageserver_multisegment`, we want to make sure that the data # is large enough to create multi-segment files. Typically, a segment file's size is # at most 1GB. A large number of inserted rows (`30000000`) is used to increase the # DB size to above 1GB. Related: https://github.com/neondatabase/neon/issues/2097. num_rows = 30000000 - lsn = _generate_data(num_rows, pg) + lsn = _generate_data(num_rows, endpoint) logical_size = env.pageserver.http_client().timeline_detail(env.initial_tenant, timeline)[ "current_logical_size" @@ -213,12 +213,12 @@ def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: Ne assert cnt_seg_files > 0 -def _generate_data(num_rows: int, pg: Postgres) -> Lsn: +def _generate_data(num_rows: int, endpoint: Endpoint) -> Lsn: """Generate a table with `num_rows` rows. Returns: the latest insert WAL's LSN""" - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: # data loading may take a while, so increase statement timeout cur.execute("SET statement_timeout='300s'") @@ -263,7 +263,7 @@ def _import( tar_output_file = result_basepath + ".stdout" # Stop the first pageserver instance, erase all its data - env.postgres.stop_all() + env.endpoints.stop_all() env.pageserver.stop() dir_to_clear = Path(env.repo_dir) / "tenants" @@ -278,7 +278,7 @@ def _import( tenant = TenantId.generate() # Import to pageserver - node_name = "import_from_pageserver" + endpoint_id = "ep-import_from_pageserver" client = env.pageserver.http_client() client.tenant_create(tenant) env.neon_cli.raw_cli( @@ -290,7 +290,7 @@ def _import( "--timeline-id", str(timeline), "--node-name", - node_name, + endpoint_id, "--base-lsn", str(lsn), "--base-tarfile", @@ -305,8 +305,8 @@ def _import( wait_for_upload(client, tenant, timeline, lsn) # Check it worked - pg = env.postgres.create_start(node_name, tenant_id=tenant) - assert pg.safe_psql("select count(*) from tbl") == [(expected_num_rows,)] + endpoint = env.endpoints.create_start(endpoint_id, tenant_id=tenant) + assert endpoint.safe_psql("select count(*) from tbl") == [(expected_num_rows,)] # Take another fullbackup query = f"fullbackup { tenant} {timeline} {lsn}" diff --git a/test_runner/regress/test_large_schema.py b/test_runner/regress/test_large_schema.py index f14265f6fd..ac83131ba2 100644 --- a/test_runner/regress/test_large_schema.py +++ b/test_runner/regress/test_large_schema.py @@ -15,9 +15,9 @@ from fixtures.neon_fixtures import NeonEnvBuilder def test_large_schema(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") - conn = pg.connect() + conn = endpoint.connect() cur = conn.cursor() tables = 2 # 10 is too much for debug build @@ -27,18 +27,18 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder): # Restart compute. Restart is actually not strictly needed. # It is done mostly because this test originally tries to model the problem reported by Ketteq. - pg.stop() + endpoint.stop() # Kill and restart the pageserver. # env.pageserver.stop(immediate=True) # env.pageserver.start() - pg.start() + endpoint.start() retry_sleep = 0.5 max_retries = 200 retries = 0 while True: try: - conn = pg.connect() + conn = endpoint.connect() cur = conn.cursor() cur.execute(f"CREATE TABLE if not exists t_{i}(pk integer) partition by range (pk)") for j in range(1, partitions + 1): @@ -63,7 +63,7 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder): raise break - conn = pg.connect() + conn = endpoint.connect() cur = conn.cursor() for i in range(1, tables + 1): @@ -74,8 +74,8 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder): cur.execute("select * from pg_depend order by refclassid, refobjid, refobjsubid") # Check layer file sizes - tenant_id = pg.safe_psql("show neon.tenant_id")[0][0] - timeline_id = pg.safe_psql("show neon.timeline_id")[0][0] + tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0] + timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0] timeline_path = "{}/tenants/{}/timelines/{}/".format(env.repo_dir, tenant_id, timeline_id) for filename in os.listdir(timeline_path): if filename.startswith("00000"): diff --git a/test_runner/regress/test_layer_eviction.py b/test_runner/regress/test_layer_eviction.py index 2d07d02ce7..1ae32fb398 100644 --- a/test_runner/regress/test_layer_eviction.py +++ b/test_runner/regress/test_layer_eviction.py @@ -27,13 +27,13 @@ def test_basic_eviction( env = neon_env_builder.init_start() client = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) # Create a number of layers in the tenant - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("CREATE TABLE foo (t text)") cur.execute( """ @@ -172,15 +172,15 @@ def test_gc_of_remote_layers(neon_env_builder: NeonEnvBuilder): env.initial_tenant = tenant_id # update_and_gc relies on this ps_http = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") log.info("fill with data, creating delta & image layers, some of which are GC'able after") # no particular reason to create the layers like this, but we are sure # not to hit the image_creation_threshold here. - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("create table a (id bigserial primary key, some_value bigint not null)") cur.execute("insert into a(some_value) select i from generate_series(1, 10000) s(i)") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) ps_http.timeline_checkpoint(tenant_id, timeline_id) # Create delta layers, then turn them into image layers. @@ -191,19 +191,19 @@ def test_gc_of_remote_layers(neon_env_builder: NeonEnvBuilder): for i in range(0, 2): for j in range(0, 3): # create a minimal amount of "delta difficulty" for this table - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("update a set some_value = -some_value + %s", (j,)) - with pg.cursor() as cur: + with endpoint.cursor() as cur: # vacuuming should aid to reuse keys, though it's not really important # with image_creation_threshold=1 which we will use on the last compaction cur.execute("vacuum") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) if i == 1 and j == 2 and k == 1: # last iteration; stop before checkpoint to avoid leaving an inmemory layer - pg.stop_and_destroy() + endpoint.stop_and_destroy() ps_http.timeline_checkpoint(tenant_id, timeline_id) diff --git a/test_runner/regress/test_layer_writers_fail.py b/test_runner/regress/test_layer_writers_fail.py index e8ba0e7d91..d2d85a43e0 100644 --- a/test_runner/regress/test_layer_writers_fail.py +++ b/test_runner/regress/test_layer_writers_fail.py @@ -20,7 +20,7 @@ def test_image_layer_writer_fail_before_finish(neon_simple_env: NeonEnv): } ) - pg = env.postgres.create_start("main", tenant_id=tenant_id) + pg = env.endpoints.create_start("main", tenant_id=tenant_id) pg.safe_psql_many( [ "CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)", @@ -64,8 +64,8 @@ def test_delta_layer_writer_fail_before_finish(neon_simple_env: NeonEnv): } ) - pg = env.postgres.create_start("main", tenant_id=tenant_id) - pg.safe_psql_many( + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) + endpoint.safe_psql_many( [ "CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)", """INSERT INTO foo diff --git a/test_runner/regress/test_lsn_mapping.py b/test_runner/regress/test_lsn_mapping.py index c5a49a6704..8ccfc21cf7 100644 --- a/test_runner/regress/test_lsn_mapping.py +++ b/test_runner/regress/test_lsn_mapping.py @@ -12,10 +12,10 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() new_timeline_id = env.neon_cli.create_branch("test_lsn_mapping") - pgmain = env.postgres.create_start("test_lsn_mapping") + endpoint_main = env.endpoints.create_start("test_lsn_mapping") log.info("postgres is running on 'test_lsn_mapping' branch") - cur = pgmain.connect().cursor() + cur = endpoint_main.connect().cursor() # Create table, and insert rows, each in a separate transaction # Disable synchronous_commit to make this initialization go faster. # @@ -35,7 +35,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder): cur.execute("INSERT INTO foo VALUES (-1)") # Wait until WAL is received by pageserver - wait_for_last_flush_lsn(env, pgmain, env.initial_tenant, new_timeline_id) + wait_for_last_flush_lsn(env, endpoint_main, env.initial_tenant, new_timeline_id) with env.pageserver.http_client() as client: # Check edge cases: timestamp in the future @@ -61,9 +61,9 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder): # Call get_lsn_by_timestamp to get the LSN # Launch a new read-only node at that LSN, and check that only the rows # that were supposed to be committed at that point in time are visible. - pg_here = env.postgres.create_start( - branch_name="test_lsn_mapping", node_name="test_lsn_mapping_read", lsn=lsn + endpoint_here = env.endpoints.create_start( + branch_name="test_lsn_mapping", endpoint_id="ep-lsn_mapping_read", lsn=lsn ) - assert pg_here.safe_psql("SELECT max(x) FROM foo")[0][0] == i + assert endpoint_here.safe_psql("SELECT max(x) FROM foo")[0][0] == i - pg_here.stop_and_destroy() + endpoint_here.stop_and_destroy() diff --git a/test_runner/regress/test_metric_collection.py b/test_runner/regress/test_metric_collection.py index a33af9a3b2..ecbce1f8f7 100644 --- a/test_runner/regress/test_metric_collection.py +++ b/test_runner/regress/test_metric_collection.py @@ -123,9 +123,9 @@ def test_metric_collection( # before pageserver, pageserver log might contain such errors in the end. env.pageserver.allowed_errors.append(".*metrics endpoint refused the sent metrics*") env.neon_cli.create_branch("test_metric_collection") - pg = env.postgres.create_start("test_metric_collection") + endpoint = env.endpoints.create_start("test_metric_collection") - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() tenant_id = TenantId(query_scalar(cur, "SHOW neon.tenant_id")) @@ -158,7 +158,7 @@ def test_metric_collection( # upload some data to remote storage if remote_storage_kind == RemoteStorageKind.LOCAL_FS: - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) pageserver_http = env.pageserver.http_client() pageserver_http.timeline_checkpoint(tenant_id, timeline_id) pageserver_http.timeline_gc(tenant_id, timeline_id, 10000) diff --git a/test_runner/regress/test_multixact.py b/test_runner/regress/test_multixact.py index 635beb16b7..fe50969a0a 100644 --- a/test_runner/regress/test_multixact.py +++ b/test_runner/regress/test_multixact.py @@ -12,10 +12,10 @@ from fixtures.utils import query_scalar def test_multixact(neon_simple_env: NeonEnv, test_output_dir): env = neon_simple_env env.neon_cli.create_branch("test_multixact", "empty") - pg = env.postgres.create_start("test_multixact") + endpoint = env.endpoints.create_start("test_multixact") log.info("postgres is running on 'test_multixact' branch") - cur = pg.connect().cursor() + cur = endpoint.connect().cursor() cur.execute( """ CREATE TABLE t1(i int primary key); @@ -32,7 +32,7 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir): connections = [] for i in range(nclients): # Do not turn on autocommit. We want to hold the key-share locks. - conn = pg.connect(autocommit=False) + conn = endpoint.connect(autocommit=False) connections.append(conn) # On each iteration, we commit the previous transaction on a connection, @@ -65,10 +65,10 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir): # Branch at this point env.neon_cli.create_branch("test_multixact_new", "test_multixact", ancestor_start_lsn=lsn) - pg_new = env.postgres.create_start("test_multixact_new") + endpoint_new = env.endpoints.create_start("test_multixact_new") log.info("postgres is running on 'test_multixact_new' branch") - next_multixact_id_new = pg_new.safe_psql( + next_multixact_id_new = endpoint_new.safe_psql( "SELECT next_multixact_id FROM pg_control_checkpoint()" )[0][0] @@ -76,4 +76,4 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir): assert next_multixact_id_new == next_multixact_id # Check that we can restore the content of the datadir correctly - check_restored_datadir_content(test_output_dir, env, pg) + check_restored_datadir_content(test_output_dir, env, endpoint) diff --git a/test_runner/regress/test_neon_local_cli.py b/test_runner/regress/test_neon_local_cli.py index bd0f550ba5..f6629c54f9 100644 --- a/test_runner/regress/test_neon_local_cli.py +++ b/test_runner/regress/test_neon_local_cli.py @@ -9,9 +9,11 @@ def test_neon_cli_basics(neon_env_builder: NeonEnvBuilder, port_distributor: Por try: env.neon_cli.start() env.neon_cli.create_tenant(tenant_id=env.initial_tenant, set_default=True) - env.neon_cli.pg_start(node_name="main", port=port_distributor.get_port()) + env.neon_cli.endpoint_start(endpoint_id="ep-main", port=port_distributor.get_port()) env.neon_cli.create_branch(new_branch_name="migration_check") - env.neon_cli.pg_start(node_name="migration_check", port=port_distributor.get_port()) + env.neon_cli.endpoint_start( + endpoint_id="ep-migration_check", port=port_distributor.get_port() + ) finally: env.neon_cli.stop() diff --git a/test_runner/regress/test_next_xid.py b/test_runner/regress/test_next_xid.py index 698ea0e1d3..6e94e15227 100644 --- a/test_runner/regress/test_next_xid.py +++ b/test_runner/regress/test_next_xid.py @@ -8,9 +8,9 @@ from fixtures.neon_fixtures import NeonEnvBuilder def test_next_xid(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") - conn = pg.connect() + conn = endpoint.connect() cur = conn.cursor() cur.execute("CREATE TABLE t(x integer)") @@ -19,17 +19,17 @@ def test_next_xid(neon_env_builder: NeonEnvBuilder): print(f"iteration {i} / {iterations}") # Kill and restart the pageserver. - pg.stop() + endpoint.stop() env.pageserver.stop(immediate=True) env.pageserver.start() - pg.start() + endpoint.start() retry_sleep = 0.5 max_retries = 200 retries = 0 while True: try: - conn = pg.connect() + conn = endpoint.connect() cur = conn.cursor() cur.execute(f"INSERT INTO t values({i})") conn.close() @@ -48,7 +48,7 @@ def test_next_xid(neon_env_builder: NeonEnvBuilder): raise break - conn = pg.connect() + conn = endpoint.connect() cur = conn.cursor() cur.execute("SELECT count(*) FROM t") assert cur.fetchone() == (iterations,) diff --git a/test_runner/regress/test_normal_work.py b/test_runner/regress/test_normal_work.py index aa37a2411c..50de99adb5 100644 --- a/test_runner/regress/test_normal_work.py +++ b/test_runner/regress/test_normal_work.py @@ -6,9 +6,9 @@ from fixtures.pageserver.http import PageserverHttpClient def check_tenant(env: NeonEnv, pageserver_http: PageserverHttpClient): tenant_id, timeline_id = env.neon_cli.create_tenant() - pg = env.postgres.create_start("main", tenant_id=tenant_id) + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) # we rely upon autocommit after each statement - res_1 = pg.safe_psql_many( + res_1 = endpoint.safe_psql_many( queries=[ "CREATE TABLE t(key int primary key, value text)", "INSERT INTO t SELECT generate_series(1,100000), 'payload'", @@ -19,14 +19,14 @@ def check_tenant(env: NeonEnv, pageserver_http: PageserverHttpClient): assert res_1[-1][0] == (5000050000,) # TODO check detach on live instance log.info("stopping compute") - pg.stop() + endpoint.stop() log.info("compute stopped") - pg.start() - res_2 = pg.safe_psql("SELECT sum(key) FROM t") + endpoint.start() + res_2 = endpoint.safe_psql("SELECT sum(key) FROM t") assert res_2[0] == (5000050000,) - pg.stop() + endpoint.stop() pageserver_http.tenant_detach(tenant_id) diff --git a/test_runner/regress/test_old_request_lsn.py b/test_runner/regress/test_old_request_lsn.py index 9885a811e1..814b9f3de0 100644 --- a/test_runner/regress/test_old_request_lsn.py +++ b/test_runner/regress/test_old_request_lsn.py @@ -19,10 +19,10 @@ def test_old_request_lsn(neon_env_builder: NeonEnvBuilder): neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}" env = neon_env_builder.init_start() env.neon_cli.create_branch("test_old_request_lsn", "main") - pg = env.postgres.create_start("test_old_request_lsn") + endpoint = env.endpoints.create_start("test_old_request_lsn") log.info("postgres is running on test_old_request_lsn branch") - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() # Get the timeline ID of our branch. We need it for the 'do_gc' command diff --git a/test_runner/regress/test_ondemand_download.py b/test_runner/regress/test_ondemand_download.py index 07410b64df..cb08b014fd 100644 --- a/test_runner/regress/test_ondemand_download.py +++ b/test_runner/regress/test_ondemand_download.py @@ -73,17 +73,17 @@ def test_ondemand_download_large_rel( ) env.initial_tenant = tenant - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") client = env.pageserver.http_client() - tenant_id = pg.safe_psql("show neon.tenant_id")[0][0] - timeline_id = pg.safe_psql("show neon.timeline_id")[0][0] + tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0] + timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0] # We want to make sure that the data is large enough that the keyspace is partitioned. num_rows = 1000000 - with pg.cursor() as cur: + with endpoint.cursor() as cur: # data loading may take a while, so increase statement timeout cur.execute("SET statement_timeout='300s'") cur.execute( @@ -106,7 +106,7 @@ def test_ondemand_download_large_rel( log.info("uploads have finished") ##### Stop the first pageserver instance, erase all its data - pg.stop() + endpoint.stop() env.pageserver.stop() # remove all the layer files @@ -117,7 +117,7 @@ def test_ondemand_download_large_rel( ##### Second start, restore the data and ensure it's the same env.pageserver.start() - pg.start() + endpoint.start() before_downloads = get_num_downloaded_layers(client, tenant_id, timeline_id) # Probe in the middle of the table. There's a high chance that the beginning @@ -125,7 +125,7 @@ def test_ondemand_download_large_rel( # from other tables, and with the entry that stores the size of the # relation, so they are likely already downloaded. But the middle of the # table should not have been needed by anything yet. - with pg.cursor() as cur: + with endpoint.cursor() as cur: assert query_scalar(cur, "select count(*) from tbl where id = 500000") == 1 after_downloads = get_num_downloaded_layers(client, tenant_id, timeline_id) @@ -167,17 +167,17 @@ def test_ondemand_download_timetravel( ) env.initial_tenant = tenant - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") client = env.pageserver.http_client() - tenant_id = pg.safe_psql("show neon.tenant_id")[0][0] - timeline_id = pg.safe_psql("show neon.timeline_id")[0][0] + tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0] + timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0] lsns = [] table_len = 10000 - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute( f""" CREATE TABLE testtab(id serial primary key, checkpoint_number int, data text); @@ -192,7 +192,7 @@ def test_ondemand_download_timetravel( lsns.append((0, current_lsn)) for checkpoint_number in range(1, 20): - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute(f"UPDATE testtab SET checkpoint_number = {checkpoint_number}") current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()")) lsns.append((checkpoint_number, current_lsn)) @@ -204,7 +204,7 @@ def test_ondemand_download_timetravel( client.timeline_checkpoint(tenant_id, timeline_id) ##### Stop the first pageserver instance, erase all its data - env.postgres.stop_all() + env.endpoints.stop_all() # wait until pageserver has successfully uploaded all the data to remote storage wait_for_sk_commit_lsn_to_reach_remote_storage( @@ -251,10 +251,10 @@ def test_ondemand_download_timetravel( num_layers_downloaded = [0] resident_size = [get_resident_physical_size()] for checkpoint_number, lsn in lsns: - pg_old = env.postgres.create_start( - branch_name="main", node_name=f"test_old_lsn_{checkpoint_number}", lsn=lsn + endpoint_old = env.endpoints.create_start( + branch_name="main", endpoint_id=f"ep-old_lsn_{checkpoint_number}", lsn=lsn ) - with pg_old.cursor() as cur: + with endpoint_old.cursor() as cur: # assert query_scalar(cur, f"select count(*) from testtab where checkpoint_number={checkpoint_number}") == 100000 assert ( query_scalar( @@ -331,15 +331,15 @@ def test_download_remote_layers_api( ) env.initial_tenant = tenant - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") client = env.pageserver.http_client() - tenant_id = pg.safe_psql("show neon.tenant_id")[0][0] - timeline_id = pg.safe_psql("show neon.timeline_id")[0][0] + tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0] + timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0] table_len = 10000 - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute( f""" CREATE TABLE testtab(id serial primary key, checkpoint_number int, data text); @@ -347,7 +347,7 @@ def test_download_remote_layers_api( """ ) - env.postgres.stop_all() + env.endpoints.stop_all() wait_for_sk_commit_lsn_to_reach_remote_storage( tenant_id, timeline_id, env.safekeepers, env.pageserver @@ -463,8 +463,8 @@ def test_download_remote_layers_api( sk.start() # ensure that all the data is back - pg_old = env.postgres.create_start(branch_name="main") - with pg_old.cursor() as cur: + endpoint_old = env.endpoints.create_start(branch_name="main") + with endpoint_old.cursor() as cur: assert query_scalar(cur, "select count(*) from testtab") == table_len @@ -513,17 +513,17 @@ def test_compaction_downloads_on_demand_without_image_creation( env.initial_tenant = tenant_id pageserver_http = env.pageserver.http_client() - with env.postgres.create_start("main") as pg: + with env.endpoints.create_start("main") as endpoint: # no particular reason to create the layers like this, but we are sure # not to hit the image_creation_threshold here. - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("create table a as select id::bigint from generate_series(1, 204800) s(id)") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) pageserver_http.timeline_checkpoint(tenant_id, timeline_id) - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("update a set id = -id") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) pageserver_http.timeline_checkpoint(tenant_id, timeline_id) layers = pageserver_http.layer_map_info(tenant_id, timeline_id) @@ -589,32 +589,32 @@ def test_compaction_downloads_on_demand_with_image_creation( env.initial_tenant = tenant_id pageserver_http = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") # no particular reason to create the layers like this, but we are sure # not to hit the image_creation_threshold here. - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("create table a (id bigserial primary key, some_value bigint not null)") cur.execute("insert into a(some_value) select i from generate_series(1, 10000) s(i)") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) pageserver_http.timeline_checkpoint(tenant_id, timeline_id) for i in range(0, 2): for j in range(0, 3): # create a minimal amount of "delta difficulty" for this table - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("update a set some_value = -some_value + %s", (j,)) - with pg.cursor() as cur: + with endpoint.cursor() as cur: # vacuuming should aid to reuse keys, though it's not really important # with image_creation_threshold=1 which we will use on the last compaction cur.execute("vacuum") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) if i == 1 and j == 2: # last iteration; stop before checkpoint to avoid leaving an inmemory layer - pg.stop_and_destroy() + endpoint.stop_and_destroy() pageserver_http.timeline_checkpoint(tenant_id, timeline_id) diff --git a/test_runner/regress/test_pageserver_api.py b/test_runner/regress/test_pageserver_api.py index 5b05989ae4..e86cd18f58 100644 --- a/test_runner/regress/test_pageserver_api.py +++ b/test_runner/regress/test_pageserver_api.py @@ -150,7 +150,7 @@ def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv): env = neon_simple_env with env.pageserver.http_client() as client: tenant_id, timeline_id = env.neon_cli.create_tenant() - pg = env.postgres.create_start(DEFAULT_BRANCH_NAME, tenant_id=tenant_id) + endpoint = env.endpoints.create_start(DEFAULT_BRANCH_NAME, tenant_id=tenant_id) # Wait to make sure that we get a latest WAL receiver data. # We need to wait here because it's possible that we don't have access to @@ -163,7 +163,7 @@ def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv): ) # Make a DB modification then expect getting a new WAL receiver's data. - pg.safe_psql("CREATE TABLE t(key int primary key, value text)") + endpoint.safe_psql("CREATE TABLE t(key int primary key, value text)") wait_until( number_of_iterations=5, interval=1, diff --git a/test_runner/regress/test_pageserver_catchup.py b/test_runner/regress/test_pageserver_catchup.py index cba3203591..c16cbcb4ba 100644 --- a/test_runner/regress/test_pageserver_catchup.py +++ b/test_runner/regress/test_pageserver_catchup.py @@ -11,11 +11,11 @@ def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder) env.neon_cli.create_branch("test_pageserver_catchup_while_compute_down") # Make shared_buffers large to ensure we won't query pageserver while it is down. - pg = env.postgres.create_start( + endpoint = env.endpoints.create_start( "test_pageserver_catchup_while_compute_down", config_lines=["shared_buffers=512MB"] ) - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() # Create table, and insert some rows. @@ -59,10 +59,10 @@ def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder) env.safekeepers[2].start() # restart compute node - pg.stop_and_destroy().create_start("test_pageserver_catchup_while_compute_down") + endpoint.stop_and_destroy().create_start("test_pageserver_catchup_while_compute_down") # Ensure that basebackup went correct and pageserver returned all data - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() cur.execute("SELECT count(*) FROM foo") diff --git a/test_runner/regress/test_pageserver_restart.py b/test_runner/regress/test_pageserver_restart.py index 77db729880..6da5503fb1 100644 --- a/test_runner/regress/test_pageserver_restart.py +++ b/test_runner/regress/test_pageserver_restart.py @@ -11,9 +11,9 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_pageserver_restart") - pg = env.postgres.create_start("test_pageserver_restart") + endpoint = env.endpoints.create_start("test_pageserver_restart") - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() # Create table, and insert some rows. Make it big enough that it doesn't fit in @@ -84,13 +84,13 @@ def test_pageserver_chaos(neon_env_builder: NeonEnvBuilder): } ) env.neon_cli.create_timeline("test_pageserver_chaos", tenant_id=tenant) - pg = env.postgres.create_start("test_pageserver_chaos", tenant_id=tenant) + endpoint = env.endpoints.create_start("test_pageserver_chaos", tenant_id=tenant) # Create table, and insert some rows. Make it big enough that it doesn't fit in # shared_buffers, otherwise the SELECT after restart will just return answer # from shared_buffers without hitting the page server, which defeats the point # of this test. - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute("CREATE TABLE foo (id int, t text, updates int)") cur.execute("CREATE INDEX ON foo (id)") @@ -116,12 +116,12 @@ def test_pageserver_chaos(neon_env_builder: NeonEnvBuilder): # Update the whole table, then immediately kill and restart the pageserver for i in range(1, 15): - pg.safe_psql("UPDATE foo set updates = updates + 1") + endpoint.safe_psql("UPDATE foo set updates = updates + 1") # This kills the pageserver immediately, to simulate a crash env.pageserver.stop(immediate=True) env.pageserver.start() # Check that all the updates are visible - num_updates = pg.safe_psql("SELECT sum(updates) FROM foo")[0][0] + num_updates = endpoint.safe_psql("SELECT sum(updates) FROM foo")[0][0] assert num_updates == i * 100000 diff --git a/test_runner/regress/test_pageserver_restarts_under_workload.py b/test_runner/regress/test_pageserver_restarts_under_workload.py index eab8b112f0..bc3f3f2be4 100644 --- a/test_runner/regress/test_pageserver_restarts_under_workload.py +++ b/test_runner/regress/test_pageserver_restarts_under_workload.py @@ -5,7 +5,7 @@ import threading import time from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres +from fixtures.neon_fixtures import NeonEnv, PgBin # Test restarting page server, while safekeeper and compute node keep @@ -13,7 +13,7 @@ from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres def test_pageserver_restarts_under_worload(neon_simple_env: NeonEnv, pg_bin: PgBin): env = neon_simple_env env.neon_cli.create_branch("test_pageserver_restarts") - pg = env.postgres.create_start("test_pageserver_restarts") + endpoint = env.endpoints.create_start("test_pageserver_restarts") n_restarts = 10 scale = 10 @@ -23,13 +23,12 @@ def test_pageserver_restarts_under_worload(neon_simple_env: NeonEnv, pg_bin: PgB r".*Gc failed, retrying in \S+: Cannot run GC iteration on inactive tenant" ) - def run_pgbench(pg: Postgres): - connstr = pg.connstr() + def run_pgbench(connstr: str): log.info(f"Start a pgbench workload on pg {connstr}") pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr]) pg_bin.run_capture(["pgbench", f"-T{n_restarts}", connstr]) - thread = threading.Thread(target=run_pgbench, args=(pg,), daemon=True) + thread = threading.Thread(target=run_pgbench, args=(endpoint.connstr(),), daemon=True) thread.start() for i in range(n_restarts): diff --git a/test_runner/regress/test_parallel_copy.py b/test_runner/regress/test_parallel_copy.py index 59f19026cc..577bbc21bf 100644 --- a/test_runner/regress/test_parallel_copy.py +++ b/test_runner/regress/test_parallel_copy.py @@ -2,7 +2,7 @@ import asyncio from io import BytesIO from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv, Postgres +from fixtures.neon_fixtures import Endpoint, NeonEnv async def repeat_bytes(buf, repetitions: int): @@ -10,7 +10,7 @@ async def repeat_bytes(buf, repetitions: int): yield buf -async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str): +async def copy_test_data_to_table(endpoint: Endpoint, worker_id: int, table_name: str): buf = BytesIO() for i in range(1000): buf.write( @@ -20,7 +20,7 @@ async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str) copy_input = repeat_bytes(buf.read(), 5000) - pg_conn = await pg.connect_async() + pg_conn = await endpoint.connect_async() # PgProtocol.connect_async sets statement_timeout to 2 minutes. # That's not enough for this test, on a slow system in debug mode. @@ -29,10 +29,10 @@ async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str) await pg_conn.copy_to_table(table_name, source=copy_input) -async def parallel_load_same_table(pg: Postgres, n_parallel: int): +async def parallel_load_same_table(endpoint: Endpoint, n_parallel: int): workers = [] for worker_id in range(n_parallel): - worker = copy_test_data_to_table(pg, worker_id, "copytest") + worker = copy_test_data_to_table(endpoint, worker_id, "copytest") workers.append(asyncio.create_task(worker)) # await all workers @@ -43,13 +43,13 @@ async def parallel_load_same_table(pg: Postgres, n_parallel: int): def test_parallel_copy(neon_simple_env: NeonEnv, n_parallel=5): env = neon_simple_env env.neon_cli.create_branch("test_parallel_copy", "empty") - pg = env.postgres.create_start("test_parallel_copy") + endpoint = env.endpoints.create_start("test_parallel_copy") log.info("postgres is running on 'test_parallel_copy' branch") # Create test table - conn = pg.connect() + conn = endpoint.connect() cur = conn.cursor() cur.execute("CREATE TABLE copytest (i int, t text)") # Run COPY TO to load the table with parallel connections. - asyncio.run(parallel_load_same_table(pg, n_parallel)) + asyncio.run(parallel_load_same_table(endpoint, n_parallel)) diff --git a/test_runner/regress/test_pg_regress.py b/test_runner/regress/test_pg_regress.py index 5eb1ebb3de..64625ea4ee 100644 --- a/test_runner/regress/test_pg_regress.py +++ b/test_runner/regress/test_pg_regress.py @@ -24,8 +24,8 @@ def test_pg_regress( env.neon_cli.create_branch("test_pg_regress", "empty") # Connect to postgres and create a database called "regression". - pg = env.postgres.create_start("test_pg_regress") - pg.safe_psql("CREATE DATABASE regression") + endpoint = env.endpoints.create_start("test_pg_regress") + endpoint.safe_psql("CREATE DATABASE regression") # Create some local directories for pg_regress to run in. runpath = test_output_dir / "regress" @@ -49,9 +49,9 @@ def test_pg_regress( ] env_vars = { - "PGPORT": str(pg.default_options["port"]), - "PGUSER": pg.default_options["user"], - "PGHOST": pg.default_options["host"], + "PGPORT": str(endpoint.default_options["port"]), + "PGUSER": endpoint.default_options["user"], + "PGHOST": endpoint.default_options["host"], } # Run the command. @@ -61,10 +61,10 @@ def test_pg_regress( pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath) # checkpoint one more time to ensure that the lsn we get is the latest one - pg.safe_psql("CHECKPOINT") + endpoint.safe_psql("CHECKPOINT") # Check that we restore the content of the datadir correctly - check_restored_datadir_content(test_output_dir, env, pg) + check_restored_datadir_content(test_output_dir, env, endpoint) # Run the PostgreSQL "isolation" tests, in src/test/isolation. @@ -85,8 +85,10 @@ def test_isolation( env.neon_cli.create_branch("test_isolation", "empty") # Connect to postgres and create a database called "regression". # isolation tests use prepared transactions, so enable them - pg = env.postgres.create_start("test_isolation", config_lines=["max_prepared_transactions=100"]) - pg.safe_psql("CREATE DATABASE isolation_regression") + endpoint = env.endpoints.create_start( + "test_isolation", config_lines=["max_prepared_transactions=100"] + ) + endpoint.safe_psql("CREATE DATABASE isolation_regression") # Create some local directories for pg_isolation_regress to run in. runpath = test_output_dir / "regress" @@ -109,9 +111,9 @@ def test_isolation( ] env_vars = { - "PGPORT": str(pg.default_options["port"]), - "PGUSER": pg.default_options["user"], - "PGHOST": pg.default_options["host"], + "PGPORT": str(endpoint.default_options["port"]), + "PGUSER": endpoint.default_options["user"], + "PGHOST": endpoint.default_options["host"], } # Run the command. @@ -135,8 +137,8 @@ def test_sql_regress( env.neon_cli.create_branch("test_sql_regress", "empty") # Connect to postgres and create a database called "regression". - pg = env.postgres.create_start("test_sql_regress") - pg.safe_psql("CREATE DATABASE regression") + endpoint = env.endpoints.create_start("test_sql_regress") + endpoint.safe_psql("CREATE DATABASE regression") # Create some local directories for pg_regress to run in. runpath = test_output_dir / "regress" @@ -160,9 +162,9 @@ def test_sql_regress( ] env_vars = { - "PGPORT": str(pg.default_options["port"]), - "PGUSER": pg.default_options["user"], - "PGHOST": pg.default_options["host"], + "PGPORT": str(endpoint.default_options["port"]), + "PGUSER": endpoint.default_options["user"], + "PGHOST": endpoint.default_options["host"], } # Run the command. @@ -172,8 +174,8 @@ def test_sql_regress( pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath) # checkpoint one more time to ensure that the lsn we get is the latest one - pg.safe_psql("CHECKPOINT") - pg.safe_psql("select pg_current_wal_insert_lsn()")[0][0] + endpoint.safe_psql("CHECKPOINT") + endpoint.safe_psql("select pg_current_wal_insert_lsn()")[0][0] # Check that we restore the content of the datadir correctly - check_restored_datadir_content(test_output_dir, env, pg) + check_restored_datadir_content(test_output_dir, env, endpoint) diff --git a/test_runner/regress/test_pitr_gc.py b/test_runner/regress/test_pitr_gc.py index fe4fbc0927..c2ea5b332a 100644 --- a/test_runner/regress/test_pitr_gc.py +++ b/test_runner/regress/test_pitr_gc.py @@ -15,10 +15,10 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder): ) env = neon_env_builder.init_start() - pgmain = env.postgres.create_start("main") + endpoint_main = env.endpoints.create_start("main") log.info("postgres is running on 'main' branch") - main_pg_conn = pgmain.connect() + main_pg_conn = endpoint_main.connect() main_cur = main_pg_conn.cursor() timeline = TimelineId(query_scalar(main_cur, "SHOW neon.timeline_id")) @@ -62,10 +62,10 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder): # It must have been preserved by PITR setting env.neon_cli.create_branch("test_pitr_gc_hundred", "main", ancestor_start_lsn=lsn_a) - pg_hundred = env.postgres.create_start("test_pitr_gc_hundred") + endpoint_hundred = env.endpoints.create_start("test_pitr_gc_hundred") # On the 'hundred' branch, we should see only 100 rows - hundred_pg_conn = pg_hundred.connect() + hundred_pg_conn = endpoint_hundred.connect() hundred_cur = hundred_pg_conn.cursor() hundred_cur.execute("SELECT count(*) FROM foo") assert hundred_cur.fetchone() == (100,) diff --git a/test_runner/regress/test_read_trace.py b/test_runner/regress/test_read_trace.py index be0eb76ccd..9ebe53fc17 100644 --- a/test_runner/regress/test_read_trace.py +++ b/test_runner/regress/test_read_trace.py @@ -21,22 +21,22 @@ def test_read_request_tracing(neon_env_builder: NeonEnvBuilder): ) timeline = env.neon_cli.create_timeline("test_trace_replay", tenant_id=tenant) - pg = env.postgres.create_start("test_trace_replay", "main", tenant) + endpoint = env.endpoints.create_start("test_trace_replay", "main", tenant) - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute("create table t (i integer);") cur.execute(f"insert into t values (generate_series(1,{10000}));") cur.execute("select count(*) from t;") - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()")) # wait until pageserver receives that data pageserver_http = env.pageserver.http_client() wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id, current_lsn) - # Stop pg so we drop the connection and flush the traces - pg.stop() + # Stop postgres so we drop the connection and flush the traces + endpoint.stop() trace_path = env.repo_dir / "traces" / str(tenant) / str(timeline) assert trace_path.exists() diff --git a/test_runner/regress/test_read_validation.py b/test_runner/regress/test_read_validation.py index 47135dc56c..47a06359bb 100644 --- a/test_runner/regress/test_read_validation.py +++ b/test_runner/regress/test_read_validation.py @@ -17,10 +17,10 @@ def test_read_validation(neon_simple_env: NeonEnv): env = neon_simple_env env.neon_cli.create_branch("test_read_validation", "empty") - pg = env.postgres.create_start("test_read_validation") + endpoint = env.endpoints.create_start("test_read_validation") log.info("postgres is running on 'test_read_validation' branch") - with closing(pg.connect()) as con: + with closing(endpoint.connect()) as con: with con.cursor() as c: for e in extensions: c.execute("create extension if not exists {};".format(e)) @@ -144,10 +144,10 @@ def test_read_validation_neg(neon_simple_env: NeonEnv): env.pageserver.allowed_errors.append(".*invalid LSN\\(0\\) in request.*") - pg = env.postgres.create_start("test_read_validation_neg") + endpoint = env.endpoints.create_start("test_read_validation_neg") log.info("postgres is running on 'test_read_validation_neg' branch") - with closing(pg.connect()) as con: + with closing(endpoint.connect()) as con: with con.cursor() as c: for e in extensions: c.execute("create extension if not exists {};".format(e)) diff --git a/test_runner/regress/test_readonly_node.py b/test_runner/regress/test_readonly_node.py index 69d6e427ce..2d641e36a7 100644 --- a/test_runner/regress/test_readonly_node.py +++ b/test_runner/regress/test_readonly_node.py @@ -15,12 +15,12 @@ from fixtures.utils import query_scalar def test_readonly_node(neon_simple_env: NeonEnv): env = neon_simple_env env.neon_cli.create_branch("test_readonly_node", "empty") - pgmain = env.postgres.create_start("test_readonly_node") + endpoint_main = env.endpoints.create_start("test_readonly_node") log.info("postgres is running on 'test_readonly_node' branch") env.pageserver.allowed_errors.append(".*basebackup .* failed: invalid basebackup lsn.*") - main_pg_conn = pgmain.connect() + main_pg_conn = endpoint_main.connect() main_cur = main_pg_conn.cursor() # Create table, and insert the first 100 rows @@ -61,23 +61,23 @@ def test_readonly_node(neon_simple_env: NeonEnv): log.info("LSN after 400100 rows: " + lsn_c) # Create first read-only node at the point where only 100 rows were inserted - pg_hundred = env.postgres.create_start( - branch_name="test_readonly_node", node_name="test_readonly_node_hundred", lsn=lsn_a + endpoint_hundred = env.endpoints.create_start( + branch_name="test_readonly_node", endpoint_id="ep-readonly_node_hundred", lsn=lsn_a ) # And another at the point where 200100 rows were inserted - pg_more = env.postgres.create_start( - branch_name="test_readonly_node", node_name="test_readonly_node_more", lsn=lsn_b + endpoint_more = env.endpoints.create_start( + branch_name="test_readonly_node", endpoint_id="ep-readonly_node_more", lsn=lsn_b ) # On the 'hundred' node, we should see only 100 rows - hundred_pg_conn = pg_hundred.connect() + hundred_pg_conn = endpoint_hundred.connect() hundred_cur = hundred_pg_conn.cursor() hundred_cur.execute("SELECT count(*) FROM foo") assert hundred_cur.fetchone() == (100,) # On the 'more' node, we should see 100200 rows - more_pg_conn = pg_more.connect() + more_pg_conn = endpoint_more.connect() more_cur = more_pg_conn.cursor() more_cur.execute("SELECT count(*) FROM foo") assert more_cur.fetchone() == (200100,) @@ -87,21 +87,21 @@ def test_readonly_node(neon_simple_env: NeonEnv): assert main_cur.fetchone() == (400100,) # Check creating a node at segment boundary - pg = env.postgres.create_start( + endpoint = env.endpoints.create_start( branch_name="test_readonly_node", - node_name="test_branch_segment_boundary", + endpoint_id="ep-branch_segment_boundary", lsn=Lsn("0/3000000"), ) - cur = pg.connect().cursor() + cur = endpoint.connect().cursor() cur.execute("SELECT 1") assert cur.fetchone() == (1,) # Create node at pre-initdb lsn with pytest.raises(Exception, match="invalid basebackup lsn"): # compute node startup with invalid LSN should fail - env.postgres.create_start( + env.endpoints.create_start( branch_name="test_readonly_node", - node_name="test_readonly_node_preinitdb", + endpoint_id="ep-readonly_node_preinitdb", lsn=Lsn("0/42"), ) @@ -111,16 +111,16 @@ def test_timetravel(neon_simple_env: NeonEnv): env = neon_simple_env pageserver_http_client = env.pageserver.http_client() env.neon_cli.create_branch("test_timetravel", "empty") - pg = env.postgres.create_start("test_timetravel") + endpoint = env.endpoints.create_start("test_timetravel") client = env.pageserver.http_client() - tenant_id = pg.safe_psql("show neon.tenant_id")[0][0] - timeline_id = pg.safe_psql("show neon.timeline_id")[0][0] + tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0] + timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0] lsns = [] - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute( """ CREATE TABLE testtab(id serial primary key, iteration int, data text); @@ -131,7 +131,7 @@ def test_timetravel(neon_simple_env: NeonEnv): lsns.append((0, current_lsn)) for i in range(1, 5): - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute(f"UPDATE testtab SET iteration = {i}") current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()")) lsns.append((i, current_lsn)) @@ -143,14 +143,14 @@ def test_timetravel(neon_simple_env: NeonEnv): pageserver_http_client.timeline_checkpoint(tenant_id, timeline_id) ##### Restart pageserver - env.postgres.stop_all() + env.endpoints.stop_all() env.pageserver.stop() env.pageserver.start() for i, lsn in lsns: - pg_old = env.postgres.create_start( - branch_name="test_timetravel", node_name=f"test_old_lsn_{i}", lsn=lsn + endpoint_old = env.endpoints.create_start( + branch_name="test_timetravel", endpoint_id=f"ep-old_lsn_{i}", lsn=lsn ) - with pg_old.cursor() as cur: + with endpoint_old.cursor() as cur: assert query_scalar(cur, f"select count(*) from testtab where iteration={i}") == 100000 assert query_scalar(cur, f"select count(*) from testtab where iteration<>{i}") == 0 diff --git a/test_runner/regress/test_recovery.py b/test_runner/regress/test_recovery.py index 09644eaaa1..76e97a35a4 100644 --- a/test_runner/regress/test_recovery.py +++ b/test_runner/regress/test_recovery.py @@ -22,10 +22,10 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder): # Create a branch for us env.neon_cli.create_branch("test_pageserver_recovery", "main") - pg = env.postgres.create_start("test_pageserver_recovery") + endpoint = env.endpoints.create_start("test_pageserver_recovery") log.info("postgres is running on 'test_pageserver_recovery' branch") - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: with env.pageserver.http_client() as pageserver_http: # Create and initialize test table @@ -54,7 +54,7 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder): env.pageserver.stop() env.pageserver.start() - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute("select count(*) from foo") assert cur.fetchone() == (100000,) diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py index 222305f006..6de5f7db04 100644 --- a/test_runner/regress/test_remote_storage.py +++ b/test_runner/regress/test_remote_storage.py @@ -87,17 +87,17 @@ def test_remote_storage_backup_and_restore( env.pageserver.allowed_errors.append(".*simulated failure of remote operation.*") pageserver_http = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") client = env.pageserver.http_client() - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) checkpoint_numbers = range(1, 3) for checkpoint_number in checkpoint_numbers: - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute( f""" CREATE TABLE t{checkpoint_number}(id int primary key, data text); @@ -126,7 +126,7 @@ def test_remote_storage_backup_and_restore( ) ##### Stop the first pageserver instance, erase all its data - env.postgres.stop_all() + env.endpoints.stop_all() env.pageserver.stop() dir_to_clear = Path(env.repo_dir) / "tenants" @@ -187,8 +187,8 @@ def test_remote_storage_backup_and_restore( ), "current db Lsn should should not be less than the one stored on remote storage" log.info("select some data, this will cause layers to be downloaded") - pg = env.postgres.create_start("main") - with pg.cursor() as cur: + endpoint = env.endpoints.create_start("main") + with endpoint.cursor() as cur: for checkpoint_number in checkpoint_numbers: assert ( query_scalar(cur, f"SELECT data FROM t{checkpoint_number} WHERE id = {data_id};") @@ -238,9 +238,9 @@ def test_remote_storage_upload_queue_retries( client = env.pageserver.http_client() - pg = env.postgres.create_start("main", tenant_id=tenant_id) + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) - pg.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)") + endpoint.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)") def configure_storage_sync_failpoints(action): client.configure_failpoints( @@ -253,7 +253,7 @@ def test_remote_storage_upload_queue_retries( def overwrite_data_and_wait_for_it_to_arrive_at_pageserver(data): # create initial set of layers & upload them with failpoints configured - pg.safe_psql_many( + endpoint.safe_psql_many( [ f""" INSERT INTO foo (id, val) @@ -266,7 +266,7 @@ def test_remote_storage_upload_queue_retries( "VACUUM foo", ] ) - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) def get_queued_count(file_kind, op_kind): val = client.get_remote_timeline_client_metric( @@ -343,7 +343,7 @@ def test_remote_storage_upload_queue_retries( # but how do we validate the result after restore? env.pageserver.stop(immediate=True) - env.postgres.stop_all() + env.endpoints.stop_all() dir_to_clear = Path(env.repo_dir) / "tenants" shutil.rmtree(dir_to_clear) @@ -357,8 +357,8 @@ def test_remote_storage_upload_queue_retries( wait_until_tenant_active(client, tenant_id) log.info("restarting postgres to validate") - pg = env.postgres.create_start("main", tenant_id=tenant_id) - with pg.cursor() as cur: + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) + with endpoint.cursor() as cur: assert query_scalar(cur, "SELECT COUNT(*) FROM foo WHERE val = 'd'") == 10000 @@ -394,13 +394,13 @@ def test_remote_timeline_client_calls_started_metric( client = env.pageserver.http_client() - pg = env.postgres.create_start("main", tenant_id=tenant_id) + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) - pg.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)") + endpoint.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)") def overwrite_data_and_wait_for_it_to_arrive_at_pageserver(data): # create initial set of layers & upload them with failpoints configured - pg.safe_psql_many( + endpoint.safe_psql_many( [ f""" INSERT INTO foo (id, val) @@ -413,7 +413,7 @@ def test_remote_timeline_client_calls_started_metric( "VACUUM foo", ] ) - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) calls_started: Dict[Tuple[str, str], List[int]] = { ("layer", "upload"): [0], @@ -478,7 +478,7 @@ def test_remote_timeline_client_calls_started_metric( ) env.pageserver.stop(immediate=True) - env.postgres.stop_all() + env.endpoints.stop_all() dir_to_clear = Path(env.repo_dir) / "tenants" shutil.rmtree(dir_to_clear) @@ -492,8 +492,8 @@ def test_remote_timeline_client_calls_started_metric( wait_until_tenant_active(client, tenant_id) log.info("restarting postgres to validate") - pg = env.postgres.create_start("main", tenant_id=tenant_id) - with pg.cursor() as cur: + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) + with endpoint.cursor() as cur: assert query_scalar(cur, "SELECT COUNT(*) FROM foo WHERE val = 'd'") == 10000 # ensure that we updated the calls_started download metric @@ -543,17 +543,17 @@ def test_timeline_deletion_with_files_stuck_in_upload_queue( ) return int(val) if val is not None else val - pg = env.postgres.create_start("main", tenant_id=tenant_id) + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) client.configure_failpoints(("before-upload-layer", "return")) - pg.safe_psql_many( + endpoint.safe_psql_many( [ "CREATE TABLE foo (x INTEGER)", "INSERT INTO foo SELECT g FROM generate_series(1, 10000) g", ] ) - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) # Kick off a checkpoint operation. # It will get stuck in remote_client.wait_completion(), since the select query will have @@ -627,8 +627,8 @@ def test_empty_branch_remote_storage_upload( new_branch_name = "new_branch" new_branch_timeline_id = env.neon_cli.create_branch(new_branch_name, "main", env.initial_tenant) - with env.postgres.create_start(new_branch_name, tenant_id=env.initial_tenant) as pg: - wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_branch_timeline_id) + with env.endpoints.create_start(new_branch_name, tenant_id=env.initial_tenant) as endpoint: + wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_branch_timeline_id) wait_upload_queue_empty(client, env.initial_tenant, new_branch_timeline_id) timelines_before_detach = set( @@ -676,8 +676,8 @@ def test_empty_branch_remote_storage_upload_on_restart( new_branch_name = "new_branch" new_branch_timeline_id = env.neon_cli.create_branch(new_branch_name, "main", env.initial_tenant) - with env.postgres.create_start(new_branch_name, tenant_id=env.initial_tenant) as pg: - wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_branch_timeline_id) + with env.endpoints.create_start(new_branch_name, tenant_id=env.initial_tenant) as endpoint: + wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_branch_timeline_id) wait_upload_queue_empty(client, env.initial_tenant, new_branch_timeline_id) env.pageserver.stop() diff --git a/test_runner/regress/test_subxacts.py b/test_runner/regress/test_subxacts.py index 42234bf535..494820ef8e 100644 --- a/test_runner/regress/test_subxacts.py +++ b/test_runner/regress/test_subxacts.py @@ -11,10 +11,10 @@ from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content def test_subxacts(neon_simple_env: NeonEnv, test_output_dir): env = neon_simple_env env.neon_cli.create_branch("test_subxacts", "empty") - pg = env.postgres.create_start("test_subxacts") + endpoint = env.endpoints.create_start("test_subxacts") log.info("postgres is running on 'test_subxacts' branch") - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() cur.execute( @@ -37,4 +37,4 @@ def test_subxacts(neon_simple_env: NeonEnv, test_output_dir): cur.execute("checkpoint") # Check that we can restore the content of the datadir correctly - check_restored_datadir_content(test_output_dir, env, pg) + check_restored_datadir_content(test_output_dir, env, endpoint) diff --git a/test_runner/regress/test_tenant_conf.py b/test_runner/regress/test_tenant_conf.py index 80d4b99504..28f1a960df 100644 --- a/test_runner/regress/test_tenant_conf.py +++ b/test_runner/regress/test_tenant_conf.py @@ -43,11 +43,7 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}""" tenant, _ = env.neon_cli.create_tenant(conf=new_conf) env.neon_cli.create_timeline("test_tenant_conf", tenant_id=tenant) - env.postgres.create_start( - "test_tenant_conf", - "main", - tenant, - ) + env.endpoints.create_start("test_tenant_conf", "main", tenant) # check the configuration of the default tenant # it should match global configuration diff --git a/test_runner/regress/test_tenant_detach.py b/test_runner/regress/test_tenant_detach.py index 58a010951e..847ae4b2b8 100644 --- a/test_runner/regress/test_tenant_detach.py +++ b/test_runner/regress/test_tenant_detach.py @@ -7,9 +7,9 @@ import asyncpg import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import ( + Endpoint, NeonEnv, NeonEnvBuilder, - Postgres, RemoteStorageKind, available_remote_storages, ) @@ -59,8 +59,8 @@ def test_tenant_reattach( # create new nenant tenant_id, timeline_id = env.neon_cli.create_tenant() - with env.postgres.create_start("main", tenant_id=tenant_id) as pg: - with pg.cursor() as cur: + with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint: + with endpoint.cursor() as cur: cur.execute("CREATE TABLE t(key int primary key, value text)") cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'") current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()")) @@ -99,8 +99,8 @@ def test_tenant_reattach( assert pageserver_last_record_lsn_before_detach == pageserver_last_record_lsn - with env.postgres.create_start("main", tenant_id=tenant_id) as pg: - with pg.cursor() as cur: + with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint: + with endpoint.cursor() as cur: assert query_scalar(cur, "SELECT count(*) FROM t") == 100000 # Check that we had to retry the downloads @@ -157,11 +157,11 @@ async def sleep_and_reattach(pageserver_http: PageserverHttpClient, tenant_id: T # async guts of test_tenant_reattach_while_bysy test async def reattach_while_busy( - env: NeonEnv, pg: Postgres, pageserver_http: PageserverHttpClient, tenant_id: TenantId + env: NeonEnv, endpoint: Endpoint, pageserver_http: PageserverHttpClient, tenant_id: TenantId ): workers = [] for worker_id in range(num_connections): - pg_conn = await pg.connect_async() + pg_conn = await endpoint.connect_async() workers.append(asyncio.create_task(update_table(pg_conn))) workers.append(asyncio.create_task(sleep_and_reattach(pageserver_http, tenant_id))) @@ -238,15 +238,15 @@ def test_tenant_reattach_while_busy( conf={"checkpoint_distance": "100000"} ) - pg = env.postgres.create_start("main", tenant_id=tenant_id) + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) - cur = pg.connect().cursor() + cur = endpoint.connect().cursor() cur.execute("CREATE TABLE t(id int primary key, counter int)") cur.execute(f"INSERT INTO t SELECT generate_series(1,{num_rows}), 0") # Run the test - asyncio.run(reattach_while_busy(env, pg, pageserver_http, tenant_id)) + asyncio.run(reattach_while_busy(env, endpoint, pageserver_http, tenant_id)) # Verify table contents assert query_scalar(cur, "SELECT count(*) FROM t") == num_rows @@ -278,9 +278,9 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): # assert tenant exists on disk assert (env.repo_dir / "tenants" / str(tenant_id)).exists() - pg = env.postgres.create_start("main", tenant_id=tenant_id) + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) # we rely upon autocommit after each statement - pg.safe_psql_many( + endpoint.safe_psql_many( queries=[ "CREATE TABLE t(key int primary key, value text)", "INSERT INTO t SELECT generate_series(1,100000), 'payload'", @@ -339,9 +339,9 @@ def test_tenant_detach_ignored_tenant(neon_simple_env: NeonEnv): # assert tenant exists on disk assert (env.repo_dir / "tenants" / str(tenant_id)).exists() - pg = env.postgres.create_start("main", tenant_id=tenant_id) + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) # we rely upon autocommit after each statement - pg.safe_psql_many( + endpoint.safe_psql_many( queries=[ "CREATE TABLE t(key int primary key, value text)", "INSERT INTO t SELECT generate_series(1,100000), 'payload'", @@ -388,9 +388,9 @@ def test_tenant_detach_regular_tenant(neon_simple_env: NeonEnv): # assert tenant exists on disk assert (env.repo_dir / "tenants" / str(tenant_id)).exists() - pg = env.postgres.create_start("main", tenant_id=tenant_id) + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) # we rely upon autocommit after each statement - pg.safe_psql_many( + endpoint.safe_psql_many( queries=[ "CREATE TABLE t(key int primary key, value text)", "INSERT INTO t SELECT generate_series(1,100000), 'payload'", @@ -425,18 +425,18 @@ def test_detach_while_attaching( ##### First start, insert secret data and upload it to the remote storage env = neon_env_builder.init_start() pageserver_http = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") client = env.pageserver.http_client() - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) # Create table, and insert some rows. Make it big enough that it doesn't fit in # shared_buffers, otherwise the SELECT after restart will just return answer # from shared_buffers without hitting the page server, which defeats the point # of this test. - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("CREATE TABLE foo (t text)") cur.execute( """ @@ -477,7 +477,7 @@ def test_detach_while_attaching( # cycle are still running, things could get really confusing.. pageserver_http.tenant_attach(tenant_id) - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("SELECT COUNT(*) FROM foo") @@ -572,14 +572,14 @@ def test_ignored_tenant_download_missing_layers( ) env = neon_env_builder.init_start() pageserver_http = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) data_id = 1 data_secret = "very secret secret" - insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, pg) + insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, endpoint) tenants_before_ignore = [tenant["id"] for tenant in pageserver_http.tenant_list()] tenants_before_ignore.sort() @@ -611,9 +611,9 @@ def test_ignored_tenant_download_missing_layers( ] assert timelines_before_ignore == timelines_after_ignore, "Should have all timelines back" - pg.stop() - pg.start() - ensure_test_data(data_id, data_secret, pg) + endpoint.stop() + endpoint.start() + ensure_test_data(data_id, data_secret, endpoint) # Tests that it's possible to `load` broken tenants: @@ -631,10 +631,10 @@ def test_ignored_tenant_stays_broken_without_metadata( ) env = neon_env_builder.init_start() pageserver_http = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) # ignore the tenant and remove its metadata pageserver_http.tenant_ignore(tenant_id) @@ -666,9 +666,9 @@ def test_load_attach_negatives( ) env = neon_env_builder.init_start() pageserver_http = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) env.pageserver.allowed_errors.append(".*tenant .*? already exists, state:.*") with pytest.raises( @@ -707,16 +707,16 @@ def test_ignore_while_attaching( env = neon_env_builder.init_start() pageserver_http = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") pageserver_http = env.pageserver.http_client() - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) data_id = 1 data_secret = "very secret secret" - insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, pg) + insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, endpoint) tenants_before_ignore = [tenant["id"] for tenant in pageserver_http.tenant_list()] @@ -754,9 +754,9 @@ def test_ignore_while_attaching( wait_until_tenant_state(pageserver_http, tenant_id, "Active", 5) - pg.stop() - pg.start() - ensure_test_data(data_id, data_secret, pg) + endpoint.stop() + endpoint.start() + ensure_test_data(data_id, data_secret, endpoint) def insert_test_data( @@ -765,9 +765,9 @@ def insert_test_data( timeline_id: TimelineId, data_id: int, data: str, - pg: Postgres, + endpoint: Endpoint, ): - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute( f""" CREATE TABLE test(id int primary key, secret text); @@ -787,8 +787,8 @@ def insert_test_data( wait_for_upload(pageserver_http, tenant_id, timeline_id, current_lsn) -def ensure_test_data(data_id: int, data: str, pg: Postgres): - with pg.cursor() as cur: +def ensure_test_data(data_id: int, data: str, endpoint: Endpoint): + with endpoint.cursor() as cur: assert ( query_scalar(cur, f"SELECT secret FROM test WHERE id = {data_id};") == data ), "Should have timeline data back" diff --git a/test_runner/regress/test_tenant_relocation.py b/test_runner/regress/test_tenant_relocation.py index 3569ab0c53..180afd88cd 100644 --- a/test_runner/regress/test_tenant_relocation.py +++ b/test_runner/regress/test_tenant_relocation.py @@ -7,11 +7,11 @@ from typing import Any, Dict, Optional, Tuple import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import ( + Endpoint, NeonBroker, NeonEnv, NeonEnvBuilder, PortDistributor, - Postgres, ) from fixtures.pageserver.http import PageserverHttpClient from fixtures.pageserver.utils import ( @@ -87,20 +87,20 @@ def new_pageserver_service( @contextmanager -def pg_cur(pg): - with closing(pg.connect()) as conn: +def pg_cur(endpoint): + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: yield cur -def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Event): +def load(endpoint: Endpoint, stop_event: threading.Event, load_ok_event: threading.Event): log.info("load started") inserted_ctr = 0 failed = False while not stop_event.is_set(): try: - with pg_cur(pg) as cur: + with pg_cur(endpoint) as cur: cur.execute("INSERT INTO load VALUES ('some payload')") inserted_ctr += 1 except: # noqa: E722 @@ -110,7 +110,7 @@ def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Eve load_ok_event.clear() else: if failed: - with pg_cur(pg) as cur: + with pg_cur(endpoint) as cur: # if we recovered after failure verify that we have correct number of rows log.info("recovering at %s", inserted_ctr) cur.execute("SELECT count(*) FROM load") @@ -124,14 +124,14 @@ def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Eve def populate_branch( - pg: Postgres, + endpoint: Endpoint, tenant_id: TenantId, ps_http: PageserverHttpClient, create_table: bool, expected_sum: Optional[int], ) -> Tuple[TimelineId, Lsn]: # insert some data - with pg_cur(pg) as cur: + with pg_cur(endpoint) as cur: cur.execute("SHOW neon.timeline_id") timeline_id = TimelineId(cur.fetchone()[0]) log.info("timeline to relocate %s", timeline_id) @@ -196,19 +196,19 @@ def check_timeline_attached( def switch_pg_to_new_pageserver( env: NeonEnv, - pg: Postgres, + endpoint: Endpoint, new_pageserver_port: int, tenant_id: TenantId, timeline_id: TimelineId, ) -> Path: - pg.stop() + endpoint.stop() - pg_config_file_path = Path(pg.config_file_path()) + pg_config_file_path = Path(endpoint.config_file_path()) pg_config_file_path.open("a").write( f"\nneon.pageserver_connstring = 'postgresql://no_user:@localhost:{new_pageserver_port}'" ) - pg.start() + endpoint.start() timeline_to_detach_local_path = ( env.repo_dir / "tenants" / str(tenant_id) / "timelines" / str(timeline_id) @@ -226,8 +226,8 @@ def switch_pg_to_new_pageserver( return timeline_to_detach_local_path -def post_migration_check(pg: Postgres, sum_before_migration: int, old_local_path: Path): - with pg_cur(pg) as cur: +def post_migration_check(endpoint: Endpoint, sum_before_migration: int, old_local_path: Path): + with pg_cur(endpoint) as cur: # check that data is still there cur.execute("SELECT sum(key) FROM t") assert cur.fetchone() == (sum_before_migration,) @@ -288,12 +288,12 @@ def test_tenant_relocation( log.info("tenant to relocate %s initial_timeline_id %s", tenant_id, initial_timeline_id) env.neon_cli.create_branch("test_tenant_relocation_main", tenant_id=tenant_id) - pg_main = env.postgres.create_start( + ep_main = env.endpoints.create_start( branch_name="test_tenant_relocation_main", tenant_id=tenant_id ) timeline_id_main, current_lsn_main = populate_branch( - pg_main, + ep_main, tenant_id=tenant_id, ps_http=pageserver_http, create_table=True, @@ -306,12 +306,12 @@ def test_tenant_relocation( ancestor_start_lsn=current_lsn_main, tenant_id=tenant_id, ) - pg_second = env.postgres.create_start( + ep_second = env.endpoints.create_start( branch_name="test_tenant_relocation_second", tenant_id=tenant_id ) timeline_id_second, current_lsn_second = populate_branch( - pg_second, + ep_second, tenant_id=tenant_id, ps_http=pageserver_http, create_table=False, @@ -327,14 +327,14 @@ def test_tenant_relocation( if with_load == "with_load": # create load table - with pg_cur(pg_main) as cur: + with pg_cur(ep_main) as cur: cur.execute("CREATE TABLE load(value text)") load_stop_event = threading.Event() load_ok_event = threading.Event() load_thread = threading.Thread( target=load, - args=(pg_main, load_stop_event, load_ok_event), + args=(ep_main, load_stop_event, load_ok_event), daemon=True, # To make sure the child dies when the parent errors ) load_thread.start() @@ -450,7 +450,7 @@ def test_tenant_relocation( old_local_path_main = switch_pg_to_new_pageserver( env, - pg_main, + ep_main, new_pageserver_pg_port, tenant_id, timeline_id_main, @@ -458,7 +458,7 @@ def test_tenant_relocation( old_local_path_second = switch_pg_to_new_pageserver( env, - pg_second, + ep_second, new_pageserver_pg_port, tenant_id, timeline_id_second, @@ -475,11 +475,11 @@ def test_tenant_relocation( interval=1, func=lambda: tenant_exists(pageserver_http, tenant_id), ) - post_migration_check(pg_main, 500500, old_local_path_main) - post_migration_check(pg_second, 1001000, old_local_path_second) + post_migration_check(ep_main, 500500, old_local_path_main) + post_migration_check(ep_second, 1001000, old_local_path_second) # ensure that we can successfully read all relations on the new pageserver - with pg_cur(pg_second) as cur: + with pg_cur(ep_second) as cur: cur.execute( """ DO $$ diff --git a/test_runner/regress/test_tenant_size.py b/test_runner/regress/test_tenant_size.py index 9037fe0045..e8d534142e 100644 --- a/test_runner/regress/test_tenant_size.py +++ b/test_runner/regress/test_tenant_size.py @@ -4,9 +4,9 @@ from typing import List, Tuple import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import ( + Endpoint, NeonEnv, NeonEnvBuilder, - Postgres, wait_for_last_flush_lsn, wait_for_wal_insert_lsn, ) @@ -28,12 +28,12 @@ def test_empty_tenant_size(neon_simple_env: NeonEnv, test_output_dir: Path): branch_name, main_timeline_id = env.neon_cli.list_timelines(tenant_id)[0] assert branch_name == main_branch_name - with env.postgres.create_start( + with env.endpoints.create_start( main_branch_name, tenant_id=tenant_id, config_lines=["autovacuum=off", "checkpoint_timeout=10min"], - ) as pg: - with pg.cursor() as cur: + ) as endpoint: + with endpoint.cursor() as cur: cur.execute("SELECT 1") row = cur.fetchone() assert row is not None @@ -105,12 +105,12 @@ def test_branched_empty_timeline_size(neon_simple_env: NeonEnv, test_output_dir: first_branch_timeline_id = env.neon_cli.create_branch("first-branch", tenant_id=tenant_id) - with env.postgres.create_start("first-branch", tenant_id=tenant_id) as pg: - with pg.cursor() as cur: + with env.endpoints.create_start("first-branch", tenant_id=tenant_id) as endpoint: + with endpoint.cursor() as cur: cur.execute( "CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" ) - wait_for_last_flush_lsn(env, pg, tenant_id, first_branch_timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, first_branch_timeline_id) size_after_branching = http_client.tenant_size(tenant_id) log.info(f"size_after_branching: {size_after_branching}") @@ -164,12 +164,12 @@ def test_branched_from_many_empty_parents_size(neon_simple_env: NeonEnv, test_ou assert last_branch is not None - with env.postgres.create_start(last_branch_name, tenant_id=tenant_id) as pg: - with pg.cursor() as cur: + with env.endpoints.create_start(last_branch_name, tenant_id=tenant_id) as endpoint: + with endpoint.cursor() as cur: cur.execute( "CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" ) - wait_for_last_flush_lsn(env, pg, tenant_id, last_branch) + wait_for_last_flush_lsn(env, endpoint, tenant_id, last_branch) size_after_writes = http_client.tenant_size(tenant_id) assert size_after_writes > initial_size @@ -194,11 +194,11 @@ def test_branch_point_within_horizon(neon_simple_env: NeonEnv, test_output_dir: (tenant_id, main_id) = env.neon_cli.create_tenant(conf={"gc_horizon": str(gc_horizon)}) http_client = env.pageserver.http_client() - with env.postgres.create_start("main", tenant_id=tenant_id) as pg: - initdb_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, main_id) - with pg.cursor() as cur: + with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint: + initdb_lsn = wait_for_last_flush_lsn(env, endpoint, tenant_id, main_id) + with endpoint.cursor() as cur: cur.execute("CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000) s(i)") - flushed_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, main_id) + flushed_lsn = wait_for_last_flush_lsn(env, endpoint, tenant_id, main_id) size_before_branching = http_client.tenant_size(tenant_id) @@ -208,10 +208,10 @@ def test_branch_point_within_horizon(neon_simple_env: NeonEnv, test_output_dir: "branch", tenant_id=tenant_id, ancestor_start_lsn=flushed_lsn ) - with env.postgres.create_start("branch", tenant_id=tenant_id) as pg: - with pg.cursor() as cur: + with env.endpoints.create_start("branch", tenant_id=tenant_id) as endpoint: + with endpoint.cursor() as cur: cur.execute("CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, 1000) s(i)") - wait_for_last_flush_lsn(env, pg, tenant_id, branch_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, branch_id) size_after = http_client.tenant_size(tenant_id) @@ -237,17 +237,17 @@ def test_parent_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Path): (tenant_id, main_id) = env.neon_cli.create_tenant(conf={"gc_horizon": str(gc_horizon)}) http_client = env.pageserver.http_client() - with env.postgres.create_start("main", tenant_id=tenant_id) as pg: - initdb_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, main_id) - with pg.cursor() as cur: + with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint: + initdb_lsn = wait_for_last_flush_lsn(env, endpoint, tenant_id, main_id) + with endpoint.cursor() as cur: cur.execute("CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000) s(i)") - flushed_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, main_id) + flushed_lsn = wait_for_last_flush_lsn(env, endpoint, tenant_id, main_id) - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("CREATE TABLE t00 AS SELECT i::bigint n FROM generate_series(0, 2000) s(i)") - wait_for_last_flush_lsn(env, pg, tenant_id, main_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, main_id) size_before_branching = http_client.tenant_size(tenant_id) @@ -257,10 +257,10 @@ def test_parent_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Path): "branch", tenant_id=tenant_id, ancestor_start_lsn=flushed_lsn ) - with env.postgres.create_start("branch", tenant_id=tenant_id) as pg: - with pg.cursor() as cur: + with env.endpoints.create_start("branch", tenant_id=tenant_id) as endpoint: + with endpoint.cursor() as cur: cur.execute("CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, 10000) s(i)") - wait_for_last_flush_lsn(env, pg, tenant_id, branch_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, branch_id) size_after = http_client.tenant_size(tenant_id) @@ -297,12 +297,12 @@ def test_only_heads_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Pa # gc is not expected to change the results for branch_name, amount in [("main", 2000), ("first", 15000), ("second", 3000)]: - with env.postgres.create_start(branch_name, tenant_id=tenant_id) as pg: - with pg.cursor() as cur: + with env.endpoints.create_start(branch_name, tenant_id=tenant_id) as endpoint: + with endpoint.cursor() as cur: cur.execute( f"CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, {amount}) s(i)" ) - wait_for_last_flush_lsn(env, pg, tenant_id, ids[branch_name]) + wait_for_last_flush_lsn(env, endpoint, tenant_id, ids[branch_name]) size_now = http_client.tenant_size(tenant_id) if latest_size is not None: assert size_now > latest_size @@ -359,7 +359,7 @@ def test_single_branch_get_tenant_size_grows( def get_current_consistent_size( env: NeonEnv, - pg: Postgres, + endpoint: Endpoint, size_debug_file, # apparently there is no public signature for open()... http_client: PageserverHttpClient, tenant_id: TenantId, @@ -368,7 +368,7 @@ def test_single_branch_get_tenant_size_grows( consistent = False size_debug = None - current_lsn = wait_for_wal_insert_lsn(env, pg, tenant_id, timeline_id) + current_lsn = wait_for_wal_insert_lsn(env, endpoint, tenant_id, timeline_id) # We want to make sure we have a self-consistent set of values. # Size changes with WAL, so only if both before and after getting # the size of the tenant reports the same WAL insert LSN, we're OK @@ -382,35 +382,35 @@ def test_single_branch_get_tenant_size_grows( size, sizes = http_client.tenant_size_and_modelinputs(tenant_id) size_debug = http_client.tenant_size_debug(tenant_id) - after_lsn = wait_for_wal_insert_lsn(env, pg, tenant_id, timeline_id) + after_lsn = wait_for_wal_insert_lsn(env, endpoint, tenant_id, timeline_id) consistent = current_lsn == after_lsn current_lsn = after_lsn size_debug_file.write(size_debug) return (current_lsn, size) - with env.postgres.create_start( + with env.endpoints.create_start( branch_name, tenant_id=tenant_id, ### autovacuum is disabled to limit WAL logging. config_lines=["autovacuum=off"], - ) as pg: + ) as endpoint: (initdb_lsn, size) = get_current_consistent_size( - env, pg, size_debug_file, http_client, tenant_id, timeline_id + env, endpoint, size_debug_file, http_client, tenant_id, timeline_id ) collected_responses.append(("INITDB", initdb_lsn, size)) - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("CREATE TABLE t0 (i BIGINT NOT NULL) WITH (fillfactor = 40)") (current_lsn, size) = get_current_consistent_size( - env, pg, size_debug_file, http_client, tenant_id, timeline_id + env, endpoint, size_debug_file, http_client, tenant_id, timeline_id ) collected_responses.append(("CREATE", current_lsn, size)) batch_size = 100 for i in range(3): - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute( f"INSERT INTO t0(i) SELECT i FROM generate_series({batch_size} * %s, ({batch_size} * (%s + 1)) - 1) s(i)", (i, i), @@ -419,7 +419,7 @@ def test_single_branch_get_tenant_size_grows( i += 1 (current_lsn, size) = get_current_consistent_size( - env, pg, size_debug_file, http_client, tenant_id, timeline_id + env, endpoint, size_debug_file, http_client, tenant_id, timeline_id ) prev_size = collected_responses[-1][2] @@ -438,7 +438,7 @@ def test_single_branch_get_tenant_size_grows( collected_responses.append(("INSERT", current_lsn, size)) while True: - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute( f"UPDATE t0 SET i = -i WHERE i IN (SELECT i FROM t0 WHERE i > 0 LIMIT {batch_size})" ) @@ -448,7 +448,7 @@ def test_single_branch_get_tenant_size_grows( break (current_lsn, size) = get_current_consistent_size( - env, pg, size_debug_file, http_client, tenant_id, timeline_id + env, endpoint, size_debug_file, http_client, tenant_id, timeline_id ) prev_size = collected_responses[-1][2] @@ -458,7 +458,7 @@ def test_single_branch_get_tenant_size_grows( collected_responses.append(("UPDATE", current_lsn, size)) while True: - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute(f"DELETE FROM t0 WHERE i IN (SELECT i FROM t0 LIMIT {batch_size})") deleted = cur.rowcount @@ -466,7 +466,7 @@ def test_single_branch_get_tenant_size_grows( break (current_lsn, size) = get_current_consistent_size( - env, pg, size_debug_file, http_client, tenant_id, timeline_id + env, endpoint, size_debug_file, http_client, tenant_id, timeline_id ) prev_size = collected_responses[-1][2] @@ -475,14 +475,14 @@ def test_single_branch_get_tenant_size_grows( collected_responses.append(("DELETE", current_lsn, size)) - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("DROP TABLE t0") # The size of the tenant should still be as large as before we dropped # the table, because the drop operation can still be undone in the PITR # defined by gc_horizon. (current_lsn, size) = get_current_consistent_size( - env, pg, size_debug_file, http_client, tenant_id, timeline_id + env, endpoint, size_debug_file, http_client, tenant_id, timeline_id ) prev_size = collected_responses[-1][2] @@ -532,16 +532,16 @@ def test_get_tenant_size_with_multiple_branches( http_client = env.pageserver.http_client() - main_pg = env.postgres.create_start(main_branch_name, tenant_id=tenant_id) + main_endpoint = env.endpoints.create_start(main_branch_name, tenant_id=tenant_id) batch_size = 10000 - with main_pg.cursor() as cur: + with main_endpoint.cursor() as cur: cur.execute( f"CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, {batch_size}) s(i)" ) - wait_for_last_flush_lsn(env, main_pg, tenant_id, main_timeline_id) + wait_for_last_flush_lsn(env, main_endpoint, tenant_id, main_timeline_id) size_at_branch = http_client.tenant_size(tenant_id) assert size_at_branch > 0 @@ -552,23 +552,23 @@ def test_get_tenant_size_with_multiple_branches( size_after_first_branch = http_client.tenant_size(tenant_id) assert size_after_first_branch == size_at_branch - first_branch_pg = env.postgres.create_start("first-branch", tenant_id=tenant_id) + first_branch_endpoint = env.endpoints.create_start("first-branch", tenant_id=tenant_id) - with first_branch_pg.cursor() as cur: + with first_branch_endpoint.cursor() as cur: cur.execute( f"CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, {batch_size}) s(i)" ) - wait_for_last_flush_lsn(env, first_branch_pg, tenant_id, first_branch_timeline_id) + wait_for_last_flush_lsn(env, first_branch_endpoint, tenant_id, first_branch_timeline_id) size_after_growing_first_branch = http_client.tenant_size(tenant_id) assert size_after_growing_first_branch > size_after_first_branch - with main_pg.cursor() as cur: + with main_endpoint.cursor() as cur: cur.execute( f"CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, 2*{batch_size}) s(i)" ) - wait_for_last_flush_lsn(env, main_pg, tenant_id, main_timeline_id) + wait_for_last_flush_lsn(env, main_endpoint, tenant_id, main_timeline_id) size_after_continuing_on_main = http_client.tenant_size(tenant_id) assert size_after_continuing_on_main > size_after_growing_first_branch @@ -578,31 +578,31 @@ def test_get_tenant_size_with_multiple_branches( size_after_second_branch = http_client.tenant_size(tenant_id) assert size_after_second_branch == size_after_continuing_on_main - second_branch_pg = env.postgres.create_start("second-branch", tenant_id=tenant_id) + second_branch_endpoint = env.endpoints.create_start("second-branch", tenant_id=tenant_id) - with second_branch_pg.cursor() as cur: + with second_branch_endpoint.cursor() as cur: cur.execute( f"CREATE TABLE t2 AS SELECT i::bigint n FROM generate_series(0, 3*{batch_size}) s(i)" ) - wait_for_last_flush_lsn(env, second_branch_pg, tenant_id, second_branch_timeline_id) + wait_for_last_flush_lsn(env, second_branch_endpoint, tenant_id, second_branch_timeline_id) size_after_growing_second_branch = http_client.tenant_size(tenant_id) assert size_after_growing_second_branch > size_after_second_branch - with second_branch_pg.cursor() as cur: + with second_branch_endpoint.cursor() as cur: cur.execute("DROP TABLE t0") cur.execute("DROP TABLE t1") cur.execute("VACUUM FULL") - wait_for_last_flush_lsn(env, second_branch_pg, tenant_id, second_branch_timeline_id) + wait_for_last_flush_lsn(env, second_branch_endpoint, tenant_id, second_branch_timeline_id) size_after_thinning_branch = http_client.tenant_size(tenant_id) assert ( size_after_thinning_branch > size_after_growing_second_branch ), "tenant_size should grow with dropped tables and full vacuum" - first_branch_pg.stop_and_destroy() - second_branch_pg.stop_and_destroy() - main_pg.stop() + first_branch_endpoint.stop_and_destroy() + second_branch_endpoint.stop_and_destroy() + main_endpoint.stop() env.pageserver.stop() env.pageserver.start() diff --git a/test_runner/regress/test_tenant_tasks.py b/test_runner/regress/test_tenant_tasks.py index 8c89100745..21e4af4127 100644 --- a/test_runner/regress/test_tenant_tasks.py +++ b/test_runner/regress/test_tenant_tasks.py @@ -29,7 +29,7 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder): # Create tenant, start compute tenant, _ = env.neon_cli.create_tenant() env.neon_cli.create_timeline(name, tenant_id=tenant) - pg = env.postgres.create_start(name, tenant_id=tenant) + endpoint = env.endpoints.create_start(name, tenant_id=tenant) assert_tenant_state( client, tenant, @@ -38,7 +38,7 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder): ) # Stop compute - pg.stop() + endpoint.stop() # Delete all timelines on all tenants. # diff --git a/test_runner/regress/test_tenants.py b/test_runner/regress/test_tenants.py index 2162520217..8026d7f5c6 100644 --- a/test_runner/regress/test_tenants.py +++ b/test_runner/regress/test_tenants.py @@ -66,17 +66,17 @@ def test_tenants_normal_work(neon_env_builder: NeonEnvBuilder): env.neon_cli.create_timeline("test_tenants_normal_work", tenant_id=tenant_1) env.neon_cli.create_timeline("test_tenants_normal_work", tenant_id=tenant_2) - pg_tenant1 = env.postgres.create_start( + endpoint_tenant1 = env.endpoints.create_start( "test_tenants_normal_work", tenant_id=tenant_1, ) - pg_tenant2 = env.postgres.create_start( + endpoint_tenant2 = env.endpoints.create_start( "test_tenants_normal_work", tenant_id=tenant_2, ) - for pg in [pg_tenant1, pg_tenant2]: - with closing(pg.connect()) as conn: + for endpoint in [endpoint_tenant1, endpoint_tenant2]: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: # we rely upon autocommit after each statement # as waiting for acceptors happens there @@ -97,11 +97,11 @@ def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder): timeline_1 = env.neon_cli.create_timeline("test_metrics_normal_work", tenant_id=tenant_1) timeline_2 = env.neon_cli.create_timeline("test_metrics_normal_work", tenant_id=tenant_2) - pg_tenant1 = env.postgres.create_start("test_metrics_normal_work", tenant_id=tenant_1) - pg_tenant2 = env.postgres.create_start("test_metrics_normal_work", tenant_id=tenant_2) + endpoint_tenant1 = env.endpoints.create_start("test_metrics_normal_work", tenant_id=tenant_1) + endpoint_tenant2 = env.endpoints.create_start("test_metrics_normal_work", tenant_id=tenant_2) - for pg in [pg_tenant1, pg_tenant2]: - with closing(pg.connect()) as conn: + for endpoint in [endpoint_tenant1, endpoint_tenant2]: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute("CREATE TABLE t(key int primary key, value text)") cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'") @@ -242,11 +242,15 @@ def test_pageserver_metrics_removed_after_detach( env.neon_cli.create_timeline("test_metrics_removed_after_detach", tenant_id=tenant_1) env.neon_cli.create_timeline("test_metrics_removed_after_detach", tenant_id=tenant_2) - pg_tenant1 = env.postgres.create_start("test_metrics_removed_after_detach", tenant_id=tenant_1) - pg_tenant2 = env.postgres.create_start("test_metrics_removed_after_detach", tenant_id=tenant_2) + endpoint_tenant1 = env.endpoints.create_start( + "test_metrics_removed_after_detach", tenant_id=tenant_1 + ) + endpoint_tenant2 = env.endpoints.create_start( + "test_metrics_removed_after_detach", tenant_id=tenant_2 + ) - for pg in [pg_tenant1, pg_tenant2]: - with closing(pg.connect()) as conn: + for endpoint in [endpoint_tenant1, endpoint_tenant2]: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute("CREATE TABLE t(key int primary key, value text)") cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'") @@ -317,7 +321,7 @@ def test_pageserver_with_empty_tenants( ), f"Tenant {tenant_with_empty_timelines_dir} should have an empty timelines/ directory" # Trigger timeline re-initialization after pageserver restart - env.postgres.stop_all() + env.endpoints.stop_all() env.pageserver.stop() tenant_without_timelines_dir = env.initial_tenant diff --git a/test_runner/regress/test_tenants_with_remote_storage.py b/test_runner/regress/test_tenants_with_remote_storage.py index 514e2b6fa0..d7c0814570 100644 --- a/test_runner/regress/test_tenants_with_remote_storage.py +++ b/test_runner/regress/test_tenants_with_remote_storage.py @@ -15,10 +15,10 @@ from typing import List, Tuple import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import ( + Endpoint, LocalFsStorage, NeonEnv, NeonEnvBuilder, - Postgres, RemoteStorageKind, available_remote_storages, wait_for_sk_commit_lsn_to_reach_remote_storage, @@ -32,10 +32,10 @@ from fixtures.types import Lsn, TenantId, TimelineId from fixtures.utils import query_scalar, wait_until -async def tenant_workload(env: NeonEnv, pg: Postgres): +async def tenant_workload(env: NeonEnv, endpoint: Endpoint): await env.pageserver.connect_async() - pg_conn = await pg.connect_async() + pg_conn = await endpoint.connect_async() await pg_conn.execute("CREATE TABLE t(key int primary key, value text)") for i in range(1, 100): @@ -49,10 +49,10 @@ async def tenant_workload(env: NeonEnv, pg: Postgres): assert res == i * 1000 -async def all_tenants_workload(env: NeonEnv, tenants_pgs): +async def all_tenants_workload(env: NeonEnv, tenants_endpoints): workers = [] - for _, pg in tenants_pgs: - worker = tenant_workload(env, pg) + for _, endpoint in tenants_endpoints: + worker = tenant_workload(env, endpoint) workers.append(asyncio.create_task(worker)) # await all workers @@ -73,7 +73,7 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Rem ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*" ) - tenants_pgs: List[Tuple[TenantId, Postgres]] = [] + tenants_endpoints: List[Tuple[TenantId, Endpoint]] = [] for _ in range(1, 5): # Use a tiny checkpoint distance, to create a lot of layers quickly @@ -84,18 +84,18 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Rem ) env.neon_cli.create_timeline("test_tenants_many", tenant_id=tenant) - pg = env.postgres.create_start( + endpoint = env.endpoints.create_start( "test_tenants_many", tenant_id=tenant, ) - tenants_pgs.append((tenant, pg)) + tenants_endpoints.append((tenant, endpoint)) - asyncio.run(all_tenants_workload(env, tenants_pgs)) + asyncio.run(all_tenants_workload(env, tenants_endpoints)) # Wait for the remote storage uploads to finish pageserver_http = env.pageserver.http_client() - for tenant, pg in tenants_pgs: - res = pg.safe_psql_many( + for tenant, endpoint in tenants_endpoints: + res = endpoint.safe_psql_many( ["SHOW neon.tenant_id", "SHOW neon.timeline_id", "SELECT pg_current_wal_flush_lsn()"] ) tenant_id = TenantId(res[0][0][0]) @@ -137,15 +137,15 @@ def test_tenants_attached_after_download( ) pageserver_http = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") client = env.pageserver.http_client() - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) for checkpoint_number in range(1, 3): - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute( f""" CREATE TABLE t{checkpoint_number}(id int primary key, secret text); @@ -174,7 +174,7 @@ def test_tenants_attached_after_download( ) ##### Stop the pageserver, erase its layer file to force it being downloaded from S3 - env.postgres.stop_all() + env.endpoints.stop_all() wait_for_sk_commit_lsn_to_reach_remote_storage( tenant_id, timeline_id, env.safekeepers, env.pageserver @@ -244,12 +244,12 @@ def test_tenant_redownloads_truncated_file_on_startup( env.pageserver.allowed_errors.append(".*No timelines to attach received.*") pageserver_http = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("CREATE TABLE t1 AS VALUES (123, 'foobar');") current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()")) @@ -257,7 +257,7 @@ def test_tenant_redownloads_truncated_file_on_startup( pageserver_http.timeline_checkpoint(tenant_id, timeline_id) wait_for_upload(pageserver_http, tenant_id, timeline_id, current_lsn) - env.postgres.stop_all() + env.endpoints.stop_all() env.pageserver.stop() timeline_dir = Path(env.repo_dir) / "tenants" / str(tenant_id) / "timelines" / str(timeline_id) @@ -313,9 +313,9 @@ def test_tenant_redownloads_truncated_file_on_startup( os.stat(remote_layer_path).st_size == expected_size ), "truncated file should not had been uploaded around re-download" - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("INSERT INTO t1 VALUES (234, 'test data');") current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()")) diff --git a/test_runner/regress/test_timeline_size.py b/test_runner/regress/test_timeline_size.py index 28da3c5a48..db278d5646 100644 --- a/test_runner/regress/test_timeline_size.py +++ b/test_runner/regress/test_timeline_size.py @@ -12,11 +12,11 @@ import psycopg2.extras import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import ( + Endpoint, NeonEnv, NeonEnvBuilder, PgBin, PortDistributor, - Postgres, RemoteStorageKind, VanillaPostgres, wait_for_last_flush_lsn, @@ -38,10 +38,10 @@ def test_timeline_size(neon_simple_env: NeonEnv): client = env.pageserver.http_client() wait_for_timeline_size_init(client, tenant=env.initial_tenant, timeline=new_timeline_id) - pgmain = env.postgres.create_start("test_timeline_size") + endpoint_main = env.endpoints.create_start("test_timeline_size") log.info("postgres is running on 'test_timeline_size' branch") - with closing(pgmain.connect()) as conn: + with closing(endpoint_main.connect()) as conn: with conn.cursor() as cur: cur.execute("CREATE TABLE foo (t text)") cur.execute( @@ -74,10 +74,10 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv): env.initial_tenant, new_timeline_id, include_non_incremental_logical_size=True ) - pgmain = env.postgres.create_start("test_timeline_size_createdropdb") + endpoint_main = env.endpoints.create_start("test_timeline_size_createdropdb") log.info("postgres is running on 'test_timeline_size_createdropdb' branch") - with closing(pgmain.connect()) as conn: + with closing(endpoint_main.connect()) as conn: with conn.cursor() as cur: res = client.timeline_detail( env.initial_tenant, new_timeline_id, include_non_incremental_logical_size=True @@ -89,7 +89,7 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv): ), "no writes should not change the incremental logical size" cur.execute("CREATE DATABASE foodb") - with closing(pgmain.connect(dbname="foodb")) as conn: + with closing(endpoint_main.connect(dbname="foodb")) as conn: with conn.cursor() as cur2: cur2.execute("CREATE TABLE foo (t text)") cur2.execute( @@ -118,7 +118,7 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv): # wait until received_lsn_lag is 0 -def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60): +def wait_for_pageserver_catchup(endpoint_main: Endpoint, polling_interval=1, timeout=60): started_at = time.time() received_lsn_lag = 1 @@ -129,7 +129,7 @@ def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60 "timed out waiting for pageserver to reach pg_current_wal_flush_lsn()" ) - res = pgmain.safe_psql( + res = endpoint_main.safe_psql( """ SELECT pg_size_pretty(pg_cluster_size()), @@ -150,20 +150,20 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder): wait_for_timeline_size_init(client, tenant=env.initial_tenant, timeline=new_timeline_id) - pgmain = env.postgres.create_start( + endpoint_main = env.endpoints.create_start( "test_timeline_size_quota", # Set small limit for the test config_lines=["neon.max_cluster_size=30MB"], ) log.info("postgres is running on 'test_timeline_size_quota' branch") - with closing(pgmain.connect()) as conn: + with closing(endpoint_main.connect()) as conn: with conn.cursor() as cur: cur.execute("CREATE EXTENSION neon") # TODO move it to neon_fixtures? cur.execute("CREATE TABLE foo (t text)") - wait_for_pageserver_catchup(pgmain) + wait_for_pageserver_catchup(endpoint_main) # Insert many rows. This query must fail because of space limit try: @@ -175,7 +175,7 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder): """ ) - wait_for_pageserver_catchup(pgmain) + wait_for_pageserver_catchup(endpoint_main) cur.execute( """ @@ -195,7 +195,7 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder): # drop table to free space cur.execute("DROP TABLE foo") - wait_for_pageserver_catchup(pgmain) + wait_for_pageserver_catchup(endpoint_main) # create it again and insert some rows. This query must succeed cur.execute("CREATE TABLE foo (t text)") @@ -207,7 +207,7 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder): """ ) - wait_for_pageserver_catchup(pgmain) + wait_for_pageserver_catchup(endpoint_main) cur.execute("SELECT * from pg_size_pretty(pg_cluster_size())") pg_cluster_size = cur.fetchone() @@ -231,15 +231,15 @@ def test_timeline_initial_logical_size_calculation_cancellation( tenant_id, timeline_id = env.neon_cli.create_tenant() # load in some data - pg = env.postgres.create_start("main", tenant_id=tenant_id) - pg.safe_psql_many( + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) + endpoint.safe_psql_many( [ "CREATE TABLE foo (x INTEGER)", "INSERT INTO foo SELECT g FROM generate_series(1, 10000) g", ] ) - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) - pg.stop() + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) + endpoint.stop() # restart with failpoint inside initial size calculation task env.pageserver.stop() @@ -311,9 +311,9 @@ def test_timeline_physical_size_init( env = neon_env_builder.init_start() new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_init") - pg = env.postgres.create_start("test_timeline_physical_size_init") + endpoint = env.endpoints.create_start("test_timeline_physical_size_init") - pg.safe_psql_many( + endpoint.safe_psql_many( [ "CREATE TABLE foo (t text)", """INSERT INTO foo @@ -322,7 +322,7 @@ def test_timeline_physical_size_init( ] ) - wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) + wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id) # restart the pageserer to force calculating timeline's initial physical size env.pageserver.stop() @@ -355,9 +355,9 @@ def test_timeline_physical_size_post_checkpoint( pageserver_http = env.pageserver.http_client() new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_checkpoint") - pg = env.postgres.create_start("test_timeline_physical_size_post_checkpoint") + endpoint = env.endpoints.create_start("test_timeline_physical_size_post_checkpoint") - pg.safe_psql_many( + endpoint.safe_psql_many( [ "CREATE TABLE foo (t text)", """INSERT INTO foo @@ -366,7 +366,7 @@ def test_timeline_physical_size_post_checkpoint( ] ) - wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) + wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id) pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id) assert_physical_size_invariants( @@ -394,7 +394,7 @@ def test_timeline_physical_size_post_compaction( pageserver_http = env.pageserver.http_client() new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_compaction") - pg = env.postgres.create_start("test_timeline_physical_size_post_compaction") + endpoint = env.endpoints.create_start("test_timeline_physical_size_post_compaction") # We don't want autovacuum to run on the table, while we are calculating the # physical size, because that could cause a new layer to be created and a @@ -402,7 +402,7 @@ def test_timeline_physical_size_post_compaction( # happens, because of some other background activity or autovacuum on other # tables, we could simply retry the size calculations. It's unlikely that # that would happen more than once.) - pg.safe_psql_many( + endpoint.safe_psql_many( [ "CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)", """INSERT INTO foo @@ -411,7 +411,7 @@ def test_timeline_physical_size_post_compaction( ] ) - wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) + wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id) # shutdown safekeepers to prevent new data from coming in for sk in env.safekeepers: @@ -446,10 +446,10 @@ def test_timeline_physical_size_post_gc( pageserver_http = env.pageserver.http_client() new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_gc") - pg = env.postgres.create_start("test_timeline_physical_size_post_gc") + endpoint = env.endpoints.create_start("test_timeline_physical_size_post_gc") # Like in test_timeline_physical_size_post_compaction, disable autovacuum - pg.safe_psql_many( + endpoint.safe_psql_many( [ "CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)", """INSERT INTO foo @@ -458,10 +458,10 @@ def test_timeline_physical_size_post_gc( ] ) - wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) + wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id) pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id) - pg.safe_psql( + endpoint.safe_psql( """ INSERT INTO foo SELECT 'long string to consume some space' || g @@ -469,7 +469,7 @@ def test_timeline_physical_size_post_gc( """ ) - wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) + wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id) pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id) pageserver_http.timeline_gc(env.initial_tenant, new_timeline_id, gc_horizon=None) @@ -495,9 +495,9 @@ def test_timeline_size_metrics( pageserver_http = env.pageserver.http_client() new_timeline_id = env.neon_cli.create_branch("test_timeline_size_metrics") - pg = env.postgres.create_start("test_timeline_size_metrics") + endpoint = env.endpoints.create_start("test_timeline_size_metrics") - pg.safe_psql_many( + endpoint.safe_psql_many( [ "CREATE TABLE foo (t text)", """INSERT INTO foo @@ -506,7 +506,7 @@ def test_timeline_size_metrics( ] ) - wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) + wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id) pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id) # get the metrics and parse the metric for the current timeline's physical size @@ -558,7 +558,7 @@ def test_timeline_size_metrics( # The sum of the sizes of all databases, as seen by pg_database_size(), should also # be close. Again allow some slack, the logical size metric includes some things like # the SLRUs that are not included in pg_database_size(). - dbsize_sum = pg.safe_psql("select sum(pg_database_size(oid)) from pg_database")[0][0] + dbsize_sum = endpoint.safe_psql("select sum(pg_database_size(oid)) from pg_database")[0][0] assert math.isclose(dbsize_sum, tl_logical_size_metric, abs_tol=2 * 1024 * 1024) @@ -592,16 +592,16 @@ def test_tenant_physical_size( n_rows = random.randint(100, 1000) timeline = env.neon_cli.create_branch(f"test_tenant_physical_size_{i}", tenant_id=tenant) - pg = env.postgres.create_start(f"test_tenant_physical_size_{i}", tenant_id=tenant) + endpoint = env.endpoints.create_start(f"test_tenant_physical_size_{i}", tenant_id=tenant) - pg.safe_psql_many( + endpoint.safe_psql_many( [ "CREATE TABLE foo (t text)", f"INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, {n_rows}) g", ] ) - wait_for_last_flush_lsn(env, pg, tenant, timeline) + wait_for_last_flush_lsn(env, endpoint, tenant, timeline) pageserver_http.timeline_checkpoint(tenant, timeline) if remote_storage_kind is not None: @@ -609,7 +609,7 @@ def test_tenant_physical_size( timeline_total_resident_physical_size += get_timeline_resident_physical_size(timeline) - pg.stop() + endpoint.stop() # ensure that tenant_status current_physical size reports sum of timeline current_physical_size tenant_current_physical_size = int( diff --git a/test_runner/regress/test_truncate.py b/test_runner/regress/test_truncate.py index cfe8a7f067..b1ddd93a40 100644 --- a/test_runner/regress/test_truncate.py +++ b/test_runner/regress/test_truncate.py @@ -27,8 +27,8 @@ def test_truncate(neon_env_builder: NeonEnvBuilder, zenbenchmark): ) env.neon_cli.create_timeline("test_truncate", tenant_id=tenant) - pg = env.postgres.create_start("test_truncate", tenant_id=tenant) - cur = pg.connect().cursor() + endpoint = env.endpoints.create_start("test_truncate", tenant_id=tenant) + cur = endpoint.connect().cursor() cur.execute("create table t1(x integer)") cur.execute(f"insert into t1 values (generate_series(1,{n_records}))") cur.execute("vacuum t1") diff --git a/test_runner/regress/test_twophase.py b/test_runner/regress/test_twophase.py index f3b0f9ca06..305271c715 100644 --- a/test_runner/regress/test_twophase.py +++ b/test_runner/regress/test_twophase.py @@ -10,10 +10,12 @@ from fixtures.neon_fixtures import NeonEnv, fork_at_current_lsn def test_twophase(neon_simple_env: NeonEnv): env = neon_simple_env env.neon_cli.create_branch("test_twophase", "empty") - pg = env.postgres.create_start("test_twophase", config_lines=["max_prepared_transactions=5"]) + endpoint = env.endpoints.create_start( + "test_twophase", config_lines=["max_prepared_transactions=5"] + ) log.info("postgres is running on 'test_twophase' branch") - conn = pg.connect() + conn = endpoint.connect() cur = conn.cursor() cur.execute("CREATE TABLE foo (t text)") @@ -42,7 +44,7 @@ def test_twophase(neon_simple_env: NeonEnv): # pg_twophase directory and fsynced cur.execute("CHECKPOINT") - twophase_files = os.listdir(pg.pg_twophase_dir_path()) + twophase_files = os.listdir(endpoint.pg_twophase_dir_path()) log.info(twophase_files) assert len(twophase_files) == 4 @@ -50,25 +52,25 @@ def test_twophase(neon_simple_env: NeonEnv): cur.execute("ROLLBACK PREPARED 'insert_four'") cur.execute("CHECKPOINT") - twophase_files = os.listdir(pg.pg_twophase_dir_path()) + twophase_files = os.listdir(endpoint.pg_twophase_dir_path()) log.info(twophase_files) assert len(twophase_files) == 2 # Create a branch with the transaction in prepared state - fork_at_current_lsn(env, pg, "test_twophase_prepared", "test_twophase") + fork_at_current_lsn(env, endpoint, "test_twophase_prepared", "test_twophase") # Start compute on the new branch - pg2 = env.postgres.create_start( + endpoint2 = env.endpoints.create_start( "test_twophase_prepared", config_lines=["max_prepared_transactions=5"], ) # Check that we restored only needed twophase files - twophase_files2 = os.listdir(pg2.pg_twophase_dir_path()) + twophase_files2 = os.listdir(endpoint2.pg_twophase_dir_path()) log.info(twophase_files2) assert twophase_files2.sort() == twophase_files.sort() - conn2 = pg2.connect() + conn2 = endpoint2.connect() cur2 = conn2.cursor() # On the new branch, commit one of the prepared transactions, diff --git a/test_runner/regress/test_unlogged.py b/test_runner/regress/test_unlogged.py index b6b20f1230..708bf0dfeb 100644 --- a/test_runner/regress/test_unlogged.py +++ b/test_runner/regress/test_unlogged.py @@ -9,9 +9,9 @@ from fixtures.neon_fixtures import NeonEnv, fork_at_current_lsn def test_unlogged(neon_simple_env: NeonEnv): env = neon_simple_env env.neon_cli.create_branch("test_unlogged", "empty") - pg = env.postgres.create_start("test_unlogged") + endpoint = env.endpoints.create_start("test_unlogged") - conn = pg.connect() + conn = endpoint.connect() cur = conn.cursor() cur.execute("CREATE UNLOGGED TABLE iut (id int);") @@ -20,12 +20,10 @@ def test_unlogged(neon_simple_env: NeonEnv): cur.execute("INSERT INTO iut values (42);") # create another compute to fetch inital empty contents from pageserver - fork_at_current_lsn(env, pg, "test_unlogged_basebackup", "test_unlogged") - pg2 = env.postgres.create_start( - "test_unlogged_basebackup", - ) + fork_at_current_lsn(env, endpoint, "test_unlogged_basebackup", "test_unlogged") + endpoint2 = env.endpoints.create_start("test_unlogged_basebackup") - conn2 = pg2.connect() + conn2 = endpoint2.connect() cur2 = conn2.cursor() # after restart table should be empty but valid cur2.execute("PREPARE iut_plan (int) AS INSERT INTO iut VALUES ($1)") diff --git a/test_runner/regress/test_vm_bits.py b/test_runner/regress/test_vm_bits.py index 16a870471b..d8034b31b0 100644 --- a/test_runner/regress/test_vm_bits.py +++ b/test_runner/regress/test_vm_bits.py @@ -10,10 +10,10 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv): env = neon_simple_env env.neon_cli.create_branch("test_vm_bit_clear", "empty") - pg = env.postgres.create_start("test_vm_bit_clear") + endpoint = env.endpoints.create_start("test_vm_bit_clear") log.info("postgres is running on 'test_vm_bit_clear' branch") - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() # Install extension containing function needed for test @@ -33,7 +33,7 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv): cur.execute("UPDATE vmtest_update SET id = 5000 WHERE id = 1") # Branch at this point, to test that later - fork_at_current_lsn(env, pg, "test_vm_bit_clear_new", "test_vm_bit_clear") + fork_at_current_lsn(env, endpoint, "test_vm_bit_clear_new", "test_vm_bit_clear") # Clear the buffer cache, to force the VM page to be re-fetched from # the page server @@ -63,10 +63,10 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv): # a dirty VM page is evicted. If the VM bit was not correctly cleared by the # earlier WAL record, the full-page image hides the problem. Starting a new # server at the right point-in-time avoids that full-page image. - pg_new = env.postgres.create_start("test_vm_bit_clear_new") + endpoint_new = env.endpoints.create_start("test_vm_bit_clear_new") log.info("postgres is running on 'test_vm_bit_clear_new' branch") - pg_new_conn = pg_new.connect() + pg_new_conn = endpoint_new.connect() cur_new = pg_new_conn.cursor() cur_new.execute( diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py index c24c77bb95..77a2987a96 100644 --- a/test_runner/regress/test_wal_acceptor.py +++ b/test_runner/regress/test_wal_acceptor.py @@ -16,6 +16,7 @@ from typing import Any, List, Optional import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import ( + Endpoint, NeonBroker, NeonEnv, NeonEnvBuilder, @@ -23,7 +24,6 @@ from fixtures.neon_fixtures import ( PgBin, PgProtocol, PortDistributor, - Postgres, RemoteStorageKind, RemoteStorageUsers, Safekeeper, @@ -39,11 +39,11 @@ from fixtures.utils import get_dir_size, query_scalar, start_in_background def wait_lsn_force_checkpoint( tenant_id: TenantId, timeline_id: TimelineId, - pg: Postgres, + endpoint: Endpoint, ps: NeonPageserver, pageserver_conn_options={}, ): - lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) + lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) log.info(f"pg_current_wal_flush_lsn is {lsn}, waiting for it on pageserver") auth_token = None @@ -97,10 +97,10 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder): branch_names_to_timeline_ids = {} # start postgres on each timeline - pgs = [] + endpoints = [] for branch_name in branch_names: new_timeline_id = env.neon_cli.create_branch(branch_name) - pgs.append(env.postgres.create_start(branch_name)) + endpoints.append(env.endpoints.create_start(branch_name)) branch_names_to_timeline_ids[branch_name] = new_timeline_id tenant_id = env.initial_tenant @@ -160,8 +160,8 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder): # Do everything in different loops to have actions on different timelines # interleaved. # create schema - for pg in pgs: - pg.safe_psql("CREATE TABLE t(key int primary key, value text)") + for endpoint in endpoints: + endpoint.safe_psql("CREATE TABLE t(key int primary key, value text)") init_m = collect_metrics("after CREATE TABLE") # Populate data for 2/3 timelines @@ -197,16 +197,16 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder): metrics_checker = MetricsChecker() metrics_checker.start() - for pg in pgs[:-1]: - pg.safe_psql("INSERT INTO t SELECT generate_series(1,100000), 'payload'") + for endpoint in endpoints[:-1]: + endpoint.safe_psql("INSERT INTO t SELECT generate_series(1,100000), 'payload'") metrics_checker.stop() collect_metrics("after INSERT INTO") # Check data for 2/3 timelines - for pg in pgs[:-1]: - res = pg.safe_psql("SELECT sum(key) FROM t") + for endpoint in endpoints[:-1]: + res = endpoint.safe_psql("SELECT sum(key) FROM t") assert res[0] == (5000050000,) final_m = collect_metrics("after SELECT") @@ -233,11 +233,11 @@ def test_restarts(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_safekeepers_restarts") - pg = env.postgres.create_start("test_safekeepers_restarts") + endpoint = env.endpoints.create_start("test_safekeepers_restarts") # we rely upon autocommit after each statement # as waiting for acceptors happens there - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() failed_node = None @@ -268,22 +268,22 @@ def test_broker(neon_env_builder: NeonEnvBuilder): ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*" ) - pg = env.postgres.create_start("test_broker") - pg.safe_psql("CREATE TABLE t(key int primary key, value text)") + endpoint = env.endpoints.create_start("test_broker") + endpoint.safe_psql("CREATE TABLE t(key int primary key, value text)") # learn neon timeline from compute - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) # wait until remote_consistent_lsn gets advanced on all safekeepers clients = [sk.http_client() for sk in env.safekeepers] stat_before = [cli.timeline_status(tenant_id, timeline_id) for cli in clients] log.info(f"statuses is {stat_before}") - pg.safe_psql("INSERT INTO t SELECT generate_series(1,100), 'payload'") + endpoint.safe_psql("INSERT INTO t SELECT generate_series(1,100), 'payload'") # force checkpoint in pageserver to advance remote_consistent_lsn - wait_lsn_force_checkpoint(tenant_id, timeline_id, pg, env.pageserver) + wait_lsn_force_checkpoint(tenant_id, timeline_id, endpoint, env.pageserver) # and wait till remote_consistent_lsn propagates to all safekeepers started_at = time.time() @@ -317,26 +317,28 @@ def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): ) env.neon_cli.create_branch("test_safekeepers_wal_removal") - pg = env.postgres.create_start("test_safekeepers_wal_removal") + endpoint = env.endpoints.create_start("test_safekeepers_wal_removal") # Note: it is important to insert at least two segments, as currently # control file is synced roughly once in segment range and WAL is not # removed until all horizons are persisted. - pg.safe_psql_many( + endpoint.safe_psql_many( [ "CREATE TABLE t(key int primary key, value text)", "INSERT INTO t SELECT generate_series(1,200000), 'payload'", ] ) - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) # force checkpoint to advance remote_consistent_lsn pageserver_conn_options = {} if auth_enabled: pageserver_conn_options["password"] = env.auth_keys.generate_tenant_token(tenant_id) - wait_lsn_force_checkpoint(tenant_id, timeline_id, pg, env.pageserver, pageserver_conn_options) + wait_lsn_force_checkpoint( + tenant_id, timeline_id, endpoint, env.pageserver, pageserver_conn_options + ) # We will wait for first segment removal. Make sure they exist for starter. first_segments = [ @@ -436,13 +438,13 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Remot env = neon_env_builder.init_start() env.neon_cli.create_branch("test_safekeepers_wal_backup") - pg = env.postgres.create_start("test_safekeepers_wal_backup") + endpoint = env.endpoints.create_start("test_safekeepers_wal_backup") # learn neon timeline from compute - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() cur.execute("create table t(key int, value text)") @@ -465,9 +467,9 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Remot # put one of safekeepers down again env.safekeepers[0].stop() # restart postgres - pg.stop_and_destroy().create_start("test_safekeepers_wal_backup") + endpoint.stop_and_destroy().create_start("test_safekeepers_wal_backup") # and ensure offloading still works - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute("insert into t select generate_series(1,250000), 'payload'") seg_end = Lsn("0/5000000") @@ -491,15 +493,15 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re env = neon_env_builder.init_start() env.neon_cli.create_branch("test_s3_wal_replay") - pg = env.postgres.create_start("test_s3_wal_replay") + endpoint = env.endpoints.create_start("test_s3_wal_replay") # learn neon timeline from compute - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) expected_sum = 0 - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute("create table t(key int, value text)") cur.execute("insert into t values (1, 'payload')") @@ -547,7 +549,7 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re f"Pageserver last_record_lsn={pageserver_lsn}; flush_lsn={last_lsn}; lag before replay is {lag / 1024}kb" ) - pg.stop_and_destroy() + endpoint.stop_and_destroy() ps_cli.timeline_delete(tenant_id, timeline_id) # Also delete and manually create timeline on safekeepers -- this tests @@ -609,9 +611,9 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re log.info(f"WAL redo took {elapsed} s") # verify data - pg.create_start("test_s3_wal_replay") + endpoint.create_start("test_s3_wal_replay") - assert pg.safe_psql("select sum(key) from t")[0][0] == expected_sum + assert endpoint.safe_psql("select sum(key) from t")[0][0] == expected_sum class ProposerPostgres(PgProtocol): @@ -762,13 +764,13 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_timeline_status") - pg = env.postgres.create_start("test_timeline_status") + endpoint = env.endpoints.create_start("test_timeline_status") wa = env.safekeepers[0] # learn neon timeline from compute - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) if not auth_enabled: wa_http_cli = wa.http_client() @@ -806,11 +808,11 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): assert debug_dump_0["timelines_count"] == 1 assert debug_dump_0["timelines"][0]["timeline_id"] == str(timeline_id) - pg.safe_psql("create table t(i int)") + endpoint.safe_psql("create table t(i int)") # ensure epoch goes up after reboot - pg.stop().start() - pg.safe_psql("insert into t values(10)") + endpoint.stop().start() + endpoint.safe_psql("insert into t values(10)") tli_status = wa_http_cli.timeline_status(tenant_id, timeline_id) epoch_after_reboot = tli_status.acceptor_epoch @@ -992,8 +994,8 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder): def safekeepers_guc(env: NeonEnv, sk_names: List[int]) -> str: return ",".join([f"localhost:{sk.port.pg}" for sk in env.safekeepers if sk.id in sk_names]) - def execute_payload(pg: Postgres): - with closing(pg.connect()) as conn: + def execute_payload(endpoint: Endpoint): + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: # we rely upon autocommit after each statement # as waiting for acceptors happens there @@ -1021,26 +1023,26 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder): log.info("Use only first 3 safekeepers") env.safekeepers[3].stop() active_safekeepers = [1, 2, 3] - pg = env.postgres.create("test_replace_safekeeper") - pg.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers)) - pg.start() + endpoint = env.endpoints.create("test_replace_safekeeper") + endpoint.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers)) + endpoint.start() # learn neon timeline from compute - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) - execute_payload(pg) + execute_payload(endpoint) show_statuses(env.safekeepers, tenant_id, timeline_id) log.info("Restart all safekeepers to flush everything") env.safekeepers[0].stop(immediate=True) - execute_payload(pg) + execute_payload(endpoint) env.safekeepers[0].start() env.safekeepers[1].stop(immediate=True) - execute_payload(pg) + execute_payload(endpoint) env.safekeepers[1].start() env.safekeepers[2].stop(immediate=True) - execute_payload(pg) + execute_payload(endpoint) env.safekeepers[2].start() env.safekeepers[0].stop(immediate=True) @@ -1050,27 +1052,27 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder): env.safekeepers[1].start() env.safekeepers[2].start() - execute_payload(pg) + execute_payload(endpoint) show_statuses(env.safekeepers, tenant_id, timeline_id) log.info("Stop sk1 (simulate failure) and use only quorum of sk2 and sk3") env.safekeepers[0].stop(immediate=True) - execute_payload(pg) + execute_payload(endpoint) show_statuses(env.safekeepers, tenant_id, timeline_id) log.info("Recreate postgres to replace failed sk1 with new sk4") - pg.stop_and_destroy().create("test_replace_safekeeper") + endpoint.stop_and_destroy().create("test_replace_safekeeper") active_safekeepers = [2, 3, 4] env.safekeepers[3].start() - pg.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers)) - pg.start() + endpoint.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers)) + endpoint.start() - execute_payload(pg) + execute_payload(endpoint) show_statuses(env.safekeepers, tenant_id, timeline_id) log.info("Stop sk2 to require quorum of sk3 and sk4 for normal work") env.safekeepers[1].stop(immediate=True) - execute_payload(pg) + execute_payload(endpoint) show_statuses(env.safekeepers, tenant_id, timeline_id) @@ -1082,13 +1084,13 @@ def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder): last_lsn = Lsn(0) # returns pg_wal size in MB - def collect_stats(pg: Postgres, cur, enable_logs=True): + def collect_stats(endpoint: Endpoint, cur, enable_logs=True): nonlocal last_lsn - assert pg.pgdata_dir is not None + assert endpoint.pgdata_dir is not None log.info("executing INSERT to generate WAL") current_lsn = Lsn(query_scalar(cur, "select pg_current_wal_lsn()")) - pg_wal_size_mb = get_dir_size(os.path.join(pg.pgdata_dir, "pg_wal")) / 1024 / 1024 + pg_wal_size_mb = get_dir_size(os.path.join(endpoint.pgdata_dir, "pg_wal")) / 1024 / 1024 if enable_logs: lsn_delta_mb = (current_lsn - last_lsn) / 1024 / 1024 log.info(f"LSN delta: {lsn_delta_mb} MB, current WAL size: {pg_wal_size_mb} MB") @@ -1104,25 +1106,25 @@ def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder): env.neon_cli.create_branch("test_wal_deleted_after_broadcast") # Adjust checkpoint config to prevent keeping old WAL segments - pg = env.postgres.create_start( + endpoint = env.endpoints.create_start( "test_wal_deleted_after_broadcast", config_lines=["min_wal_size=32MB", "max_wal_size=32MB", "log_checkpoints=on"], ) - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() cur.execute("CREATE TABLE t(key int, value text)") - collect_stats(pg, cur) + collect_stats(endpoint, cur) # generate WAL to simulate normal workload for i in range(5): generate_wal(cur) - collect_stats(pg, cur) + collect_stats(endpoint, cur) log.info("executing checkpoint") cur.execute("CHECKPOINT") - wal_size_after_checkpoint = collect_stats(pg, cur) + wal_size_after_checkpoint = collect_stats(endpoint, cur) # there shouldn't be more than 2 WAL segments (but dir may have archive_status files) assert wal_size_after_checkpoint < 16 * 2.5 @@ -1151,13 +1153,13 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): tenant_id_other, timeline_id_other = env.neon_cli.create_tenant() # Populate branches - pg_1 = env.postgres.create_start("br1") - pg_2 = env.postgres.create_start("br2") - pg_3 = env.postgres.create_start("br3") - pg_4 = env.postgres.create_start("br4") - pg_other = env.postgres.create_start("main", tenant_id=tenant_id_other) - for pg in [pg_1, pg_2, pg_3, pg_4, pg_other]: - with closing(pg.connect()) as conn: + endpoint_1 = env.endpoints.create_start("br1") + endpoint_2 = env.endpoints.create_start("br2") + endpoint_3 = env.endpoints.create_start("br3") + endpoint_4 = env.endpoints.create_start("br4") + endpoint_other = env.endpoints.create_start("main", tenant_id=tenant_id_other) + for endpoint in [endpoint_1, endpoint_2, endpoint_3, endpoint_4, endpoint_other]: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute("CREATE TABLE t(key int primary key)") sk = env.safekeepers[0] @@ -1178,14 +1180,14 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir() # Stop branches which should be inactive and restart Safekeeper to drop its in-memory state. - pg_2.stop_and_destroy() - pg_4.stop_and_destroy() + endpoint_2.stop_and_destroy() + endpoint_4.stop_and_destroy() sk.stop() sk.start() # Ensure connections to Safekeeper are established - for pg in [pg_1, pg_3, pg_other]: - with closing(pg.connect()) as conn: + for endpoint in [endpoint_1, endpoint_3, endpoint_other]: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute("INSERT INTO t (key) VALUES (1)") @@ -1244,6 +1246,6 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): # Ensure the other tenant still works sk_http_other.timeline_status(tenant_id_other, timeline_id_other) - with closing(pg_other.connect()) as conn: + with closing(endpoint_other.connect()) as conn: with conn.cursor() as cur: cur.execute("INSERT INTO t (key) VALUES (123)") diff --git a/test_runner/regress/test_wal_acceptor_async.py b/test_runner/regress/test_wal_acceptor_async.py index f10a40690e..7debeed140 100644 --- a/test_runner/regress/test_wal_acceptor_async.py +++ b/test_runner/regress/test_wal_acceptor_async.py @@ -6,7 +6,7 @@ from typing import List, Optional import asyncpg from fixtures.log_helper import getLogger -from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, Safekeeper +from fixtures.neon_fixtures import Endpoint, NeonEnv, NeonEnvBuilder, Safekeeper from fixtures.types import Lsn, TenantId, TimelineId log = getLogger("root.safekeeper_async") @@ -82,8 +82,10 @@ class WorkerStats(object): log.info("All workers made {} transactions".format(progress)) -async def run_random_worker(stats: WorkerStats, pg: Postgres, worker_id, n_accounts, max_transfer): - pg_conn = await pg.connect_async() +async def run_random_worker( + stats: WorkerStats, endpoint: Endpoint, worker_id, n_accounts, max_transfer +): + pg_conn = await endpoint.connect_async() log.debug("Started worker {}".format(worker_id)) while stats.running: @@ -141,7 +143,7 @@ async def wait_for_lsn( # consistent. async def run_restarts_under_load( env: NeonEnv, - pg: Postgres, + endpoint: Endpoint, acceptors: List[Safekeeper], n_workers=10, n_accounts=100, @@ -154,7 +156,7 @@ async def run_restarts_under_load( # taking into account that this timeout is checked only at the beginning of every iteration. test_timeout_at = time.monotonic() + 5 * 60 - pg_conn = await pg.connect_async() + pg_conn = await endpoint.connect_async() tenant_id = TenantId(await pg_conn.fetchval("show neon.tenant_id")) timeline_id = TimelineId(await pg_conn.fetchval("show neon.timeline_id")) @@ -165,7 +167,7 @@ async def run_restarts_under_load( stats = WorkerStats(n_workers) workers = [] for worker_id in range(n_workers): - worker = run_random_worker(stats, pg, worker_id, bank.n_accounts, max_transfer) + worker = run_random_worker(stats, endpoint, worker_id, bank.n_accounts, max_transfer) workers.append(asyncio.create_task(worker)) for it in range(iterations): @@ -212,11 +214,11 @@ def test_restarts_under_load(neon_env_builder: NeonEnvBuilder): env.neon_cli.create_branch("test_safekeepers_restarts_under_load") # Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long - pg = env.postgres.create_start( + endpoint = env.endpoints.create_start( "test_safekeepers_restarts_under_load", config_lines=["max_replication_write_lag=1MB"] ) - asyncio.run(run_restarts_under_load(env, pg, env.safekeepers)) + asyncio.run(run_restarts_under_load(env, endpoint, env.safekeepers)) # Restart acceptors one by one and test that everything is working as expected @@ -228,7 +230,7 @@ def test_restarts_frequent_checkpoints(neon_env_builder: NeonEnvBuilder): env.neon_cli.create_branch("test_restarts_frequent_checkpoints") # Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long - pg = env.postgres.create_start( + endpoint = env.endpoints.create_start( "test_restarts_frequent_checkpoints", config_lines=[ "max_replication_write_lag=1MB", @@ -240,11 +242,13 @@ def test_restarts_frequent_checkpoints(neon_env_builder: NeonEnvBuilder): # we try to simulate large (flush_lsn - truncate_lsn) lag, to test that WAL segments # are not removed before broadcasted to all safekeepers, with the help of replication slot - asyncio.run(run_restarts_under_load(env, pg, env.safekeepers, period_time=15, iterations=5)) + asyncio.run( + run_restarts_under_load(env, endpoint, env.safekeepers, period_time=15, iterations=5) + ) -def postgres_create_start(env: NeonEnv, branch: str, pgdir_name: Optional[str]): - pg = Postgres( +def endpoint_create_start(env: NeonEnv, branch: str, pgdir_name: Optional[str]): + endpoint = Endpoint( env, tenant_id=env.initial_tenant, port=env.port_distributor.get_port(), @@ -253,19 +257,19 @@ def postgres_create_start(env: NeonEnv, branch: str, pgdir_name: Optional[str]): check_stop_result=False, ) - # embed current time in node name - node_name = pgdir_name or f"pg_node_{time.time()}" - return pg.create_start( - branch_name=branch, node_name=node_name, config_lines=["log_statement=all"] + # embed current time in endpoint ID + endpoint_id = pgdir_name or f"ep-{time.time()}" + return endpoint.create_start( + branch_name=branch, endpoint_id=endpoint_id, config_lines=["log_statement=all"] ) async def exec_compute_query( env: NeonEnv, branch: str, query: str, pgdir_name: Optional[str] = None ): - with postgres_create_start(env, branch=branch, pgdir_name=pgdir_name) as pg: + with endpoint_create_start(env, branch=branch, pgdir_name=pgdir_name) as endpoint: before_conn = time.time() - conn = await pg.connect_async() + conn = await endpoint.connect_async() res = await conn.fetch(query) await conn.close() after_conn = time.time() @@ -436,8 +440,8 @@ async def check_unavailability( assert bg_query.done() -async def run_unavailability(env: NeonEnv, pg: Postgres): - conn = await pg.connect_async() +async def run_unavailability(env: NeonEnv, endpoint: Endpoint): + conn = await endpoint.connect_async() # check basic work with table await conn.execute("CREATE TABLE t(key int primary key, value text)") @@ -462,9 +466,9 @@ def test_unavailability(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_safekeepers_unavailability") - pg = env.postgres.create_start("test_safekeepers_unavailability") + endpoint = env.endpoints.create_start("test_safekeepers_unavailability") - asyncio.run(run_unavailability(env, pg)) + asyncio.run(run_unavailability(env, endpoint)) @dataclass @@ -493,8 +497,8 @@ async def xmas_garland(safekeepers: List[Safekeeper], data: RaceConditionTest): await asyncio.sleep(1) -async def run_race_conditions(env: NeonEnv, pg: Postgres): - conn = await pg.connect_async() +async def run_race_conditions(env: NeonEnv, endpoint: Endpoint): + conn = await endpoint.connect_async() await conn.execute("CREATE TABLE t(key int primary key, value text)") data = RaceConditionTest(0, False) @@ -525,14 +529,14 @@ def test_race_conditions(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_safekeepers_race_conditions") - pg = env.postgres.create_start("test_safekeepers_race_conditions") + endpoint = env.endpoints.create_start("test_safekeepers_race_conditions") - asyncio.run(run_race_conditions(env, pg)) + asyncio.run(run_race_conditions(env, endpoint)) # Check that pageserver can select safekeeper with largest commit_lsn # and switch if LSN is not updated for some time (NoWalTimeout). -async def run_wal_lagging(env: NeonEnv, pg: Postgres): +async def run_wal_lagging(env: NeonEnv, endpoint: Endpoint): def safekeepers_guc(env: NeonEnv, active_sk: List[bool]) -> str: # use ports 10, 11 and 12 to simulate unavailable safekeepers return ",".join( @@ -542,10 +546,10 @@ async def run_wal_lagging(env: NeonEnv, pg: Postgres): ] ) - conn = await pg.connect_async() + conn = await endpoint.connect_async() await conn.execute("CREATE TABLE t(key int primary key, value text)") await conn.close() - pg.stop() + endpoint.stop() n_iterations = 20 n_txes = 10000 @@ -561,11 +565,11 @@ async def run_wal_lagging(env: NeonEnv, pg: Postgres): it -= 1 continue - pg.adjust_for_safekeepers(safekeepers_guc(env, active_sk)) + endpoint.adjust_for_safekeepers(safekeepers_guc(env, active_sk)) log.info(f"Iteration {it}: {active_sk}") - pg.start() - conn = await pg.connect_async() + endpoint.start() + conn = await endpoint.connect_async() for _ in range(n_txes): await conn.execute(f"INSERT INTO t values ({i}, 'payload')") @@ -573,11 +577,11 @@ async def run_wal_lagging(env: NeonEnv, pg: Postgres): i += 1 await conn.close() - pg.stop() + endpoint.stop() - pg.adjust_for_safekeepers(safekeepers_guc(env, [True] * len(env.safekeepers))) - pg.start() - conn = await pg.connect_async() + endpoint.adjust_for_safekeepers(safekeepers_guc(env, [True] * len(env.safekeepers))) + endpoint.start() + conn = await endpoint.connect_async() log.info(f"Executed {i-1} queries") @@ -591,6 +595,6 @@ def test_wal_lagging(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_wal_lagging") - pg = env.postgres.create_start("test_wal_lagging") + endpoint = env.endpoints.create_start("test_wal_lagging") - asyncio.run(run_wal_lagging(env, pg)) + asyncio.run(run_wal_lagging(env, endpoint)) diff --git a/test_runner/regress/test_wal_restore.py b/test_runner/regress/test_wal_restore.py index 63d0b46f63..dd944af7eb 100644 --- a/test_runner/regress/test_wal_restore.py +++ b/test_runner/regress/test_wal_restore.py @@ -19,9 +19,9 @@ def test_wal_restore( ): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_wal_restore") - pg = env.postgres.create_start("test_wal_restore") - pg.safe_psql("create table t as select generate_series(1,300000)") - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) + endpoint = env.endpoints.create_start("test_wal_restore") + endpoint.safe_psql("create table t as select generate_series(1,300000)") + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) env.neon_cli.pageserver_stop() port = port_distributor.get_port() data_dir = test_output_dir / "pgsql.restored" diff --git a/test_runner/regress/test_walredo_not_left_behind_on_detach.py b/test_runner/regress/test_walredo_not_left_behind_on_detach.py index d6302f8632..7d944bebb3 100644 --- a/test_runner/regress/test_walredo_not_left_behind_on_detach.py +++ b/test_runner/regress/test_walredo_not_left_behind_on_detach.py @@ -45,9 +45,9 @@ def test_walredo_not_left_behind_on_detach(neon_env_builder: NeonEnvBuilder): # assert tenant exists on disk assert (env.repo_dir / "tenants" / str(tenant_id)).exists() - pg = env.postgres.create_start("main", tenant_id=tenant_id) + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() # Create table, and insert some rows. Make it big enough that it doesn't fit in diff --git a/test_runner/test_broken.py b/test_runner/test_broken.py index 0281f4f48b..7e8aef5a5f 100644 --- a/test_runner/test_broken.py +++ b/test_runner/test_broken.py @@ -24,7 +24,7 @@ def test_broken(neon_simple_env: NeonEnv, pg_bin): env = neon_simple_env env.neon_cli.create_branch("test_broken", "empty") - env.postgres.create_start("test_broken") + env.endpoints.create_start("test_broken") log.info("postgres is running") log.info("THIS NEXT COMMAND WILL FAIL:") From 89b5589b1b7c766100491be3d885783f913cdf54 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 13 Apr 2023 14:59:11 +0300 Subject: [PATCH 21/77] Tenant size should never be zero. Simplify test. Looking at the git history of this test, I think "size == 0" used to have a special meaning earlier, but now it should never happen. --- test_runner/regress/test_tenant_size.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/test_runner/regress/test_tenant_size.py b/test_runner/regress/test_tenant_size.py index e8d534142e..2d905910f8 100644 --- a/test_runner/regress/test_tenant_size.py +++ b/test_runner/regress/test_tenant_size.py @@ -386,6 +386,7 @@ def test_single_branch_get_tenant_size_grows( consistent = current_lsn == after_lsn current_lsn = after_lsn size_debug_file.write(size_debug) + assert size > 0 return (current_lsn, size) with env.endpoints.create_start( @@ -423,17 +424,15 @@ def test_single_branch_get_tenant_size_grows( ) prev_size = collected_responses[-1][2] - if size == 0: - assert prev_size == 0 - else: - # branch start shouldn't be past gc_horizon yet - # thus the size should grow as we insert more data - # "gc_horizon" is tuned so that it kicks in _after_ the - # insert phase, but before the update phase ends. - assert ( - current_lsn - initdb_lsn <= gc_horizon - ), "Tuning of GC window is likely out-of-date" - assert size > prev_size + + # branch start shouldn't be past gc_horizon yet + # thus the size should grow as we insert more data + # "gc_horizon" is tuned so that it kicks in _after_ the + # insert phase, but before the update phase ends. + assert ( + current_lsn - initdb_lsn <= gc_horizon + ), "Tuning of GC window is likely out-of-date" + assert size > prev_size collected_responses.append(("INSERT", current_lsn, size)) @@ -491,6 +490,9 @@ def test_single_branch_get_tenant_size_grows( collected_responses.append(("DROP", current_lsn, size)) + # Should have gone past gc_horizon, otherwise gc_horizon is too large + assert current_lsn - initdb_lsn > gc_horizon + # this isn't too many lines to forget for a while. observed while # developing these tests that locally the value is a bit more than what we # get in the ci. From 36c20946b44dbd305e9bce3ab0a8bf3e13e4386b Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Thu, 13 Apr 2023 15:25:09 +0100 Subject: [PATCH 22/77] Verify extensions checksums (#4014) To not be taken by surprise by upstream git re-tag or by malicious activity, let's verify the checksum for extensions we download Also, unify the installation of `pg_graphql` and `pg_tiktoken` with other extensions. --- Dockerfile.compute-node | 44 +++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/Dockerfile.compute-node b/Dockerfile.compute-node index 5a223ae432..742f2e18a1 100644 --- a/Dockerfile.compute-node +++ b/Dockerfile.compute-node @@ -60,6 +60,7 @@ RUN apt update && \ # SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2 RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \ + echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \ mkdir sfcgal-src && cd sfcgal-src && tar xvzf ../SFCGAL.tar.gz --strip-components=1 -C . && \ cmake . && make -j $(getconf _NPROCESSORS_ONLN) && \ DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \ @@ -68,6 +69,7 @@ RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar ENV PATH "/usr/local/pgsql/bin:$PATH" RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.2.tar.gz -O postgis.tar.gz && \ + echo "9a2a219da005a1730a39d1959a1c7cec619b1efb009b65be80ffc25bad299068 postgis.tar.gz" | sha256sum --check && \ mkdir postgis-src && cd postgis-src && tar xvzf ../postgis.tar.gz --strip-components=1 -C . && \ ./autogen.sh && \ ./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \ @@ -84,6 +86,7 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.2.tar.gz -O postg echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer_data_us.control RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \ + echo "cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e pgrouting.tar.gz" | sha256sum --check && \ mkdir pgrouting-src && cd pgrouting-src && tar xvzf ../pgrouting.tar.gz --strip-components=1 -C . && \ mkdir build && \ cd build && \ @@ -104,6 +107,7 @@ RUN apt update && \ apt install -y ninja-build python3-dev libncurses5 binutils clang RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.5.tar.gz -O plv8.tar.gz && \ + echo "1e108d5df639e4c189e1c5bdfa2432a521c126ca89e7e5a969d46899ca7bf106 plv8.tar.gz" | sha256sum --check && \ mkdir plv8-src && cd plv8-src && tar xvzf ../plv8.tar.gz --strip-components=1 -C . && \ export PATH="/usr/local/pgsql/bin:$PATH" && \ make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \ @@ -125,11 +129,13 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ # packaged cmake is too old RUN wget https://github.com/Kitware/CMake/releases/download/v3.24.2/cmake-3.24.2-linux-x86_64.sh \ -q -O /tmp/cmake-install.sh \ + && echo "739d372726cb23129d57a539ce1432453448816e345e1545f6127296926b6754 /tmp/cmake-install.sh" | sha256sum --check \ && chmod u+x /tmp/cmake-install.sh \ && /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \ && rm /tmp/cmake-install.sh RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \ + echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \ mkdir h3-src && cd h3-src && tar xvzf ../h3.tar.gz --strip-components=1 -C . && \ mkdir build && cd build && \ cmake .. -DCMAKE_BUILD_TYPE=Release && \ @@ -139,6 +145,7 @@ RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz rm -rf build RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.2.tar.gz -O h3-pg.tar.gz && \ + echo "c135aa45999b2ad1326d2537c1cadef96d52660838e4ca371706c08fdea1a956 h3-pg.tar.gz" | sha256sum --check && \ mkdir h3-pg-src && cd h3-pg-src && tar xvzf ../h3-pg.tar.gz --strip-components=1 -C . && \ export PATH="/usr/local/pgsql/bin:$PATH" && \ make -j $(getconf _NPROCESSORS_ONLN) && \ @@ -156,6 +163,7 @@ FROM build-deps AS unit-pg-build COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \ + echo "411d05beeb97e5a4abf17572bfcfbb5a68d98d1018918feff995f6ee3bb03e79 postgresql-unit.tar.gz" | sha256sum --check && \ mkdir postgresql-unit-src && cd postgresql-unit-src && tar xvzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ @@ -176,6 +184,7 @@ FROM build-deps AS vector-pg-build COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.4.0.tar.gz -O pgvector.tar.gz && \ + echo "b76cf84ddad452cc880a6c8c661d137ddd8679c000a16332f4f03ecf6e10bcc8 pgvector.tar.gz" | sha256sum --check && \ mkdir pgvector-src && cd pgvector-src && tar xvzf ../pgvector.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ @@ -192,6 +201,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ # 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021 RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \ + echo "cfdefb15007286f67d3d45510f04a6a7a495004be5b3aecb12cda667e774203f pgjwt.tar.gz" | sha256sum --check && \ mkdir pgjwt-src && cd pgjwt-src && tar xvzf ../pgjwt.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgjwt.control @@ -206,6 +216,7 @@ FROM build-deps AS hypopg-pg-build COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.3.1.tar.gz -O hypopg.tar.gz && \ + echo "e7f01ee0259dc1713f318a108f987663d60f3041948c2ada57a94b469565ca8e hypopg.tar.gz" | sha256sum --check && \ mkdir hypopg-src && cd hypopg-src && tar xvzf ../hypopg.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ @@ -221,6 +232,7 @@ FROM build-deps AS pg-hashids-pg-build COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \ + echo "74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz" | sha256sum --check && \ mkdir pg_hashids-src && cd pg_hashids-src && tar xvzf ../pg_hashids.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \ @@ -236,6 +248,7 @@ FROM build-deps AS rum-pg-build COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \ + echo "6ab370532c965568df6210bd844ac6ba649f53055e48243525b0b7e5c4d69a7d rum.tar.gz" | sha256sum --check && \ mkdir rum-src && cd rum-src && tar xvzf ../rum.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \ @@ -251,6 +264,7 @@ FROM build-deps AS pgtap-pg-build COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \ + echo "9c7c3de67ea41638e14f06da5da57bac6f5bd03fea05c165a0ec862205a5c052 pgtap.tar.gz" | sha256sum --check && \ mkdir pgtap-src && cd pgtap-src && tar xvzf ../pgtap.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ @@ -266,6 +280,7 @@ FROM build-deps AS ip4r-pg-build COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.1.tar.gz -O ip4r.tar.gz && \ + echo "78b9f0c1ae45c22182768fe892a32d533c82281035e10914111400bf6301c726 ip4r.tar.gz" | sha256sum --check && \ mkdir ip4r-src && cd ip4r-src && tar xvzf ../ip4r.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ @@ -281,6 +296,7 @@ FROM build-deps AS prefix-pg-build COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.9.tar.gz -O prefix.tar.gz && \ + echo "38d30a08d0241a8bbb8e1eb8f0152b385051665a8e621c8899e7c5068f8b511e prefix.tar.gz" | sha256sum --check && \ mkdir prefix-src && cd prefix-src && tar xvzf ../prefix.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ @@ -296,6 +312,7 @@ FROM build-deps AS hll-pg-build COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.17.tar.gz -O hll.tar.gz && \ + echo "9a18288e884f197196b0d29b9f178ba595b0dfc21fbf7a8699380e77fa04c1e9 hll.tar.gz" | sha256sum --check && \ mkdir hll-src && cd hll-src && tar xvzf ../hll.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ @@ -311,6 +328,7 @@ FROM build-deps AS plpgsql-check-pg-build COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.3.2.tar.gz -O plpgsql_check.tar.gz && \ + echo "9d81167c4bbeb74eebf7d60147b21961506161addc2aee537f95ad8efeae427b plpgsql_check.tar.gz" | sha256sum --check && \ mkdir plpgsql_check-src && cd plpgsql_check-src && tar xvzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \ @@ -330,6 +348,7 @@ ENV PATH "/usr/local/pgsql/bin:$PATH" RUN apt-get update && \ apt-get install -y cmake && \ wget https://github.com/timescale/timescaledb/archive/refs/tags/2.10.1.tar.gz -O timescaledb.tar.gz && \ + echo "6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 timescaledb.tar.gz" | sha256sum --check && \ mkdir timescaledb-src && cd timescaledb-src && tar xvzf ../timescaledb.tar.gz --strip-components=1 -C . && \ ./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON && \ cd build && \ @@ -352,22 +371,25 @@ ENV PATH "/usr/local/pgsql/bin:$PATH" RUN case "${PG_VERSION}" in \ "v14") \ export PG_HINT_PLAN_VERSION=14_1_4_1 \ + export PG_HINT_PLAN_CHECKSUM=c3501becf70ead27f70626bce80ea401ceac6a77e2083ee5f3ff1f1444ec1ad1 \ ;; \ "v15") \ export PG_HINT_PLAN_VERSION=15_1_5_0 \ + export PG_HINT_PLAN_CHECKSUM=564cbbf4820973ffece63fbf76e3c0af62c4ab23543142c7caaa682bc48918be \ ;; \ *) \ echo "Export the valid PG_HINT_PLAN_VERSION variable" && exit 1 \ ;; \ esac && \ wget https://github.com/ossc-db/pg_hint_plan/archive/refs/tags/REL${PG_HINT_PLAN_VERSION}.tar.gz -O pg_hint_plan.tar.gz && \ + echo "${PG_HINT_PLAN_CHECKSUM} pg_hint_plan.tar.gz" | sha256sum --check && \ mkdir pg_hint_plan-src && cd pg_hint_plan-src && tar xvzf ../pg_hint_plan.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) && \ make install -j $(getconf _NPROCESSORS_ONLN) && \ echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_hint_plan.control ######################################################################################### -# +# # Layer "rust extensions" # This layer is used to build `pgx` deps # @@ -395,7 +417,7 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux USER root ######################################################################################### -# +# # Layer "pg-jsonschema-pg-build" # Compile "pg_jsonschema" extension # @@ -403,15 +425,17 @@ USER root FROM rust-extensions-build AS pg-jsonschema-pg-build -# there is no release tag yet, but we need it due to the superuser fix in the control file +# caeab60d70b2fd3ae421ec66466a3abbb37b7ee6 made on 06/03/2023 +# there is no release tag yet, but we need it due to the superuser fix in the control file, switch to git tag after release >= 0.1.5 RUN wget https://github.com/supabase/pg_jsonschema/archive/caeab60d70b2fd3ae421ec66466a3abbb37b7ee6.tar.gz -O pg_jsonschema.tar.gz && \ + echo "54129ce2e7ee7a585648dbb4cef6d73f795d94fe72f248ac01119992518469a4 pg_jsonschema.tar.gz" | sha256sum --check && \ mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xvzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \ sed -i 's/pgx = "0.7.1"/pgx = { version = "0.7.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \ cargo pgx install --release && \ echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_jsonschema.control ######################################################################################### -# +# # Layer "pg-graphql-pg-build" # Compile "pg_graphql" extension # @@ -419,11 +443,13 @@ RUN wget https://github.com/supabase/pg_jsonschema/archive/caeab60d70b2fd3ae421e FROM rust-extensions-build AS pg-graphql-pg-build +# b4988843647450a153439be367168ed09971af85 made on 22/02/2023 (from remove-pgx-contrib-spiext branch) # Currently pgx version bump to >= 0.7.2 causes "call to unsafe function" compliation errors in # pgx-contrib-spiext. There is a branch that removes that dependency, so use it. It is on the # same 1.1 version we've used before. -RUN git clone -b remove-pgx-contrib-spiext --single-branch https://github.com/yrashk/pg_graphql && \ - cd pg_graphql && \ +RUN wget https://github.com/yrashk/pg_graphql/archive/b4988843647450a153439be367168ed09971af85.tar.gz -O pg_graphql.tar.gz && \ + echo "0c7b0e746441b2ec24187d0e03555faf935c2159e2839bddd14df6dafbc8c9bd pg_graphql.tar.gz" | sha256sum --check && \ + mkdir pg_graphql-src && cd pg_graphql-src && tar xvzf ../pg_graphql.tar.gz --strip-components=1 -C . && \ sed -i 's/pgx = "~0.7.1"/pgx = { version = "0.7.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \ sed -i 's/pgx-tests = "~0.7.1"/pgx-tests = "0.7.3"/g' Cargo.toml && \ cargo pgx install --release && \ @@ -440,8 +466,10 @@ RUN git clone -b remove-pgx-contrib-spiext --single-branch https://github.com/yr FROM rust-extensions-build AS pg-tiktoken-pg-build -RUN git clone --depth=1 --single-branch https://github.com/kelvich/pg_tiktoken && \ - cd pg_tiktoken && \ +# 801f84f08c6881c8aa30f405fafbf00eec386a72 made on 10/03/2023 +RUN wget https://github.com/kelvich/pg_tiktoken/archive/801f84f08c6881c8aa30f405fafbf00eec386a72.tar.gz -O pg_tiktoken.tar.gz && \ + echo "52f60ac800993a49aa8c609961842b611b6b1949717b69ce2ec9117117e16e4a pg_tiktoken.tar.gz" | sha256sum --check && \ + mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xvzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \ cargo pgx install --release && \ echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_tiktoken.control From db8dd6f380c097ab03740ed40dccc9e8ab311b4c Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Thu, 13 Apr 2023 18:07:29 +0200 Subject: [PATCH 23/77] [compute_ctl] Implement live reconfiguration (#3980) With this commit one can request compute reconfiguration from the running `compute_ctl` with compute in `Running` state by sending a new spec: ```shell curl -d "{\"spec\": $(cat ./compute-spec-new.json)}" http://localhost:3080/configure ``` Internally, we start a separate configurator thread that is waiting on `Condvar` for `ConfigurationPending` compute state in a loop. Then it does reconfiguration, sets compute back to `Running` state and notifies other waiters. It will need some follow-ups, e.g. for retry logic for control-plane requests, but should be useful for testing in the current state. This shouldn't affect any existing environment, since computes are configured in a different way there. Resolves neondatabase/cloud#4433 --- compute_tools/src/bin/compute_ctl.rs | 3 ++ compute_tools/src/compute.rs | 42 ++++++++++++++++++++++ compute_tools/src/configurator.rs | 54 ++++++++++++++++++++++++++++ compute_tools/src/http/api.rs | 2 +- compute_tools/src/lib.rs | 1 + compute_tools/src/spec.rs | 17 ++++++--- libs/compute_api/src/responses.rs | 14 +++++++- 7 files changed, 126 insertions(+), 7 deletions(-) create mode 100644 compute_tools/src/configurator.rs diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs index 633e603f6b..309310407d 100644 --- a/compute_tools/src/bin/compute_ctl.rs +++ b/compute_tools/src/bin/compute_ctl.rs @@ -46,6 +46,7 @@ use url::Url; use compute_api::responses::ComputeStatus; use compute_tools::compute::{ComputeNode, ComputeState, ParsedSpec}; +use compute_tools::configurator::launch_configurator; use compute_tools::http::api::launch_http_server; use compute_tools::logger::*; use compute_tools::monitor::launch_monitor; @@ -175,6 +176,8 @@ fn main() -> Result<()> { // Launch remaining service threads let _monitor_handle = launch_monitor(&compute).expect("cannot launch compute monitor thread"); + let _configurator_handle = + launch_configurator(&compute).expect("cannot launch configurator thread"); // Start Postgres let mut delay_exit = false; diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 07ede44c9b..6ddfcf86c2 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -356,6 +356,48 @@ impl ComputeNode { Ok(()) } + // We could've wrapped this around `pg_ctl reload`, but right now we don't use + // `pg_ctl` for start / stop, so this just seems much easier to do as we already + // have opened connection to Postgres and superuser access. + #[instrument(skip(self, client))] + fn pg_reload_conf(&self, client: &mut Client) -> Result<()> { + client.simple_query("SELECT pg_reload_conf()")?; + Ok(()) + } + + /// Similar to `apply_config()`, but does a bit different sequence of operations, + /// as it's used to reconfigure a previously started and configured Postgres node. + #[instrument(skip(self))] + pub fn reconfigure(&self) -> Result<()> { + let spec = self.state.lock().unwrap().pspec.clone().unwrap().spec; + + // Write new config + let pgdata_path = Path::new(&self.pgdata); + config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &spec)?; + + let mut client = Client::connect(self.connstr.as_str(), NoTls)?; + self.pg_reload_conf(&mut client)?; + + // Proceed with post-startup configuration. Note, that order of operations is important. + handle_roles(&spec, &mut client)?; + handle_databases(&spec, &mut client)?; + handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?; + handle_grants(&spec, self.connstr.as_str(), &mut client)?; + handle_extensions(&spec, &mut client)?; + + // 'Close' connection + drop(client); + + let unknown_op = "unknown".to_string(); + let op_id = spec.operation_uuid.as_ref().unwrap_or(&unknown_op); + info!( + "finished reconfiguration of compute node for operation {}", + op_id + ); + + Ok(()) + } + #[instrument(skip(self))] pub fn start_compute(&self) -> Result { let compute_state = self.state.lock().unwrap().clone(); diff --git a/compute_tools/src/configurator.rs b/compute_tools/src/configurator.rs new file mode 100644 index 0000000000..a07fd0b8cd --- /dev/null +++ b/compute_tools/src/configurator.rs @@ -0,0 +1,54 @@ +use std::sync::Arc; +use std::thread; + +use anyhow::Result; +use tracing::{error, info, instrument}; + +use compute_api::responses::ComputeStatus; + +use crate::compute::ComputeNode; + +#[instrument(skip(compute))] +fn configurator_main_loop(compute: &Arc) { + info!("waiting for reconfiguration requests"); + loop { + let state = compute.state.lock().unwrap(); + let mut state = compute.state_changed.wait(state).unwrap(); + + if state.status == ComputeStatus::ConfigurationPending { + info!("got configuration request"); + state.status = ComputeStatus::Configuration; + compute.state_changed.notify_all(); + drop(state); + + let mut new_status = ComputeStatus::Failed; + if let Err(e) = compute.reconfigure() { + error!("could not configure compute node: {}", e); + } else { + new_status = ComputeStatus::Running; + info!("compute node configured"); + } + + // XXX: used to test that API is blocking + // std::thread::sleep(std::time::Duration::from_millis(10000)); + + compute.set_status(new_status); + } else if state.status == ComputeStatus::Failed { + info!("compute node is now in Failed state, exiting"); + break; + } else { + info!("woken up for compute status: {:?}, sleeping", state.status); + } + } +} + +pub fn launch_configurator(compute: &Arc) -> Result> { + let compute = Arc::clone(compute); + + Ok(thread::Builder::new() + .name("compute-configurator".into()) + .spawn(move || { + configurator_main_loop(&compute); + info!("configurator thread is exited"); + })?) +} diff --git a/compute_tools/src/http/api.rs b/compute_tools/src/http/api.rs index 81d4953345..92d058fbd1 100644 --- a/compute_tools/src/http/api.rs +++ b/compute_tools/src/http/api.rs @@ -155,7 +155,7 @@ async fn handle_configure_request( // ``` { let mut state = compute.state.lock().unwrap(); - if state.status != ComputeStatus::Empty { + if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running { let msg = format!( "invalid compute status for configuration request: {:?}", state.status.clone() diff --git a/compute_tools/src/lib.rs b/compute_tools/src/lib.rs index aee6b53e6a..24811f75ee 100644 --- a/compute_tools/src/lib.rs +++ b/compute_tools/src/lib.rs @@ -4,6 +4,7 @@ //! pub mod checker; pub mod config; +pub mod configurator; pub mod http; #[macro_use] pub mod logger; diff --git a/compute_tools/src/spec.rs b/compute_tools/src/spec.rs index 2350113c39..088f74335a 100644 --- a/compute_tools/src/spec.rs +++ b/compute_tools/src/spec.rs @@ -1,7 +1,7 @@ use std::path::Path; use std::str::FromStr; -use anyhow::Result; +use anyhow::{anyhow, bail, Result}; use postgres::config::Config; use postgres::{Client, NoTls}; use tracing::{info, info_span, instrument, span_enabled, warn, Level}; @@ -10,6 +10,7 @@ use crate::config; use crate::params::PG_HBA_ALL_MD5; use crate::pg_helpers::*; +use compute_api::responses::ControlPlaneSpecResponse; use compute_api::spec::{ComputeSpec, Database, PgIdent, Role}; /// Request spec from the control-plane by compute_id. If `NEON_CONSOLE_JWT` @@ -26,13 +27,19 @@ pub fn get_spec_from_control_plane(base_uri: &str, compute_id: &str) -> Result, +} From fd31fafeeeb46ab1f6a68d888dd9b1bf2c1db816 Mon Sep 17 00:00:00 2001 From: Sasha Krassovsky Date: Thu, 13 Apr 2023 09:31:30 -0700 Subject: [PATCH 24/77] Make proxy shutdown when all connections are closed (#3764) ## Describe your changes Makes Proxy start draining connections on SIGTERM. ## Issue ticket number and link #3333 --- Cargo.lock | 1 + proxy/Cargo.toml | 1 + proxy/src/http/websocket.rs | 3 ++ proxy/src/main.rs | 42 ++++++++++++--------- proxy/src/proxy.rs | 53 +++++++++++++++++++-------- test_runner/fixtures/neon_fixtures.py | 11 ++++++ test_runner/regress/test_proxy.py | 18 +++++++++ 7 files changed, 96 insertions(+), 33 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fc587c57bf..f67311cf09 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2965,6 +2965,7 @@ dependencies = [ "tokio-postgres", "tokio-postgres-rustls", "tokio-rustls", + "tokio-util", "tracing", "tracing-opentelemetry", "tracing-subscriber", diff --git a/proxy/Cargo.toml b/proxy/Cargo.toml index add8b14c95..9d702b29c3 100644 --- a/proxy/Cargo.toml +++ b/proxy/Cargo.toml @@ -64,6 +64,7 @@ webpki-roots.workspace = true x509-parser.workspace = true workspace_hack.workspace = true +tokio-util.workspace = true [dev-dependencies] rcgen.workspace = true diff --git a/proxy/src/http/websocket.rs b/proxy/src/http/websocket.rs index 1757652a90..c7676e8e14 100644 --- a/proxy/src/http/websocket.rs +++ b/proxy/src/http/websocket.rs @@ -22,6 +22,7 @@ use tokio::{ io::{self, AsyncBufRead, AsyncRead, AsyncWrite, ReadBuf}, net::TcpListener, }; +use tokio_util::sync::CancellationToken; use tracing::{error, info, info_span, warn, Instrument}; use utils::http::{error::ApiError, json::json_response}; @@ -188,6 +189,7 @@ async fn ws_handler( pub async fn task_main( config: &'static ProxyConfig, ws_listener: TcpListener, + cancellation_token: CancellationToken, ) -> anyhow::Result<()> { scopeguard::defer! { info!("websocket server has shut down"); @@ -231,6 +233,7 @@ pub async fn task_main( hyper::Server::builder(accept::from_stream(tls_listener)) .serve(make_svc) + .with_graceful_shutdown(cancellation_token.cancelled()) .await?; Ok(()) diff --git a/proxy/src/main.rs b/proxy/src/main.rs index c6526e9aff..1fd13c9f68 100644 --- a/proxy/src/main.rs +++ b/proxy/src/main.rs @@ -28,6 +28,7 @@ use config::ProxyConfig; use futures::FutureExt; use std::{borrow::Cow, future::Future, net::SocketAddr}; use tokio::{net::TcpListener, task::JoinError}; +use tokio_util::sync::CancellationToken; use tracing::{info, warn}; use utils::{project_git_version, sentry_init::init_sentry}; @@ -66,39 +67,48 @@ async fn main() -> anyhow::Result<()> { let proxy_address: SocketAddr = args.get_one::("proxy").unwrap().parse()?; info!("Starting proxy on {proxy_address}"); let proxy_listener = TcpListener::bind(proxy_address).await?; + let cancellation_token = CancellationToken::new(); - let mut tasks = vec![ - tokio::spawn(handle_signals()), - tokio::spawn(http::server::task_main(http_listener)), - tokio::spawn(proxy::task_main(config, proxy_listener)), - tokio::spawn(console::mgmt::task_main(mgmt_listener)), - ]; + let mut client_tasks = vec![tokio::spawn(proxy::task_main( + config, + proxy_listener, + cancellation_token.clone(), + ))]; if let Some(wss_address) = args.get_one::("wss") { let wss_address: SocketAddr = wss_address.parse()?; info!("Starting wss on {wss_address}"); let wss_listener = TcpListener::bind(wss_address).await?; - tasks.push(tokio::spawn(http::websocket::task_main( + client_tasks.push(tokio::spawn(http::websocket::task_main( config, wss_listener, + cancellation_token.clone(), ))); } + let mut tasks = vec![ + tokio::spawn(handle_signals(cancellation_token)), + tokio::spawn(http::server::task_main(http_listener)), + tokio::spawn(console::mgmt::task_main(mgmt_listener)), + ]; + if let Some(metrics_config) = &config.metric_collection { tasks.push(tokio::spawn(metrics::task_main(metrics_config))); } - // This combinator will block until either all tasks complete or - // one of them finishes with an error (others will be cancelled). - let tasks = tasks.into_iter().map(flatten_err); - let _: Vec<()> = futures::future::try_join_all(tasks).await?; - + let tasks = futures::future::try_join_all(tasks.into_iter().map(flatten_err)); + let client_tasks = futures::future::try_join_all(client_tasks.into_iter().map(flatten_err)); + tokio::select! { + // We are only expecting an error from these forever tasks + res = tasks => { res?; }, + res = client_tasks => { res?; }, + } Ok(()) } /// Handle unix signals appropriately. -async fn handle_signals() -> anyhow::Result<()> { +async fn handle_signals(token: CancellationToken) -> anyhow::Result<()> { use tokio::signal::unix::{signal, SignalKind}; let mut hangup = signal(SignalKind::hangup())?; @@ -116,11 +126,9 @@ async fn handle_signals() -> anyhow::Result<()> { warn!("received SIGINT, exiting immediately"); bail!("interrupted"); } - // TODO: Don't accept new proxy connections. - // TODO: Shut down once all exisiting connections have been closed. _ = terminate.recv() => { - warn!("received SIGTERM, exiting immediately"); - bail!("terminated"); + warn!("received SIGTERM, shutting down once all existing connections have closed"); + token.cancel(); } } } diff --git a/proxy/src/proxy.rs b/proxy/src/proxy.rs index 70fb25474e..9945e3697f 100644 --- a/proxy/src/proxy.rs +++ b/proxy/src/proxy.rs @@ -17,6 +17,7 @@ use once_cell::sync::Lazy; use pq_proto::{BeMessage as Be, FeStartupPacket, StartupMessageParams}; use std::sync::Arc; use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; +use tokio_util::sync::CancellationToken; use tracing::{error, info, warn}; use utils::measured_stream::MeasuredStream; @@ -63,6 +64,7 @@ static NUM_BYTES_PROXIED_COUNTER: Lazy = Lazy::new(|| { pub async fn task_main( config: &'static ProxyConfig, listener: tokio::net::TcpListener, + cancellation_token: CancellationToken, ) -> anyhow::Result<()> { scopeguard::defer! { info!("proxy has shut down"); @@ -72,29 +74,48 @@ pub async fn task_main( // will be inherited by all accepted client sockets. socket2::SockRef::from(&listener).set_keepalive(true)?; + let mut connections = tokio::task::JoinSet::new(); let cancel_map = Arc::new(CancelMap::default()); + loop { - let (socket, peer_addr) = listener.accept().await?; - info!("accepted postgres client connection from {peer_addr}"); + tokio::select! { + accept_result = listener.accept() => { + let (socket, peer_addr) = accept_result?; + info!("accepted postgres client connection from {peer_addr}"); - let session_id = uuid::Uuid::new_v4(); - let cancel_map = Arc::clone(&cancel_map); - tokio::spawn( - async move { - info!("spawned a task for {peer_addr}"); + let session_id = uuid::Uuid::new_v4(); + let cancel_map = Arc::clone(&cancel_map); + connections.spawn( + async move { + info!("spawned a task for {peer_addr}"); - socket - .set_nodelay(true) - .context("failed to set socket option")?; + socket + .set_nodelay(true) + .context("failed to set socket option")?; - handle_client(config, &cancel_map, session_id, socket).await + handle_client(config, &cancel_map, session_id, socket).await + } + .unwrap_or_else(|e| { + // Acknowledge that the task has finished with an error. + error!("per-client task finished with an error: {e:#}"); + }), + ); } - .unwrap_or_else(|e| { - // Acknowledge that the task has finished with an error. - error!("per-client task finished with an error: {e:#}"); - }), - ); + _ = cancellation_token.cancelled() => { + drop(listener); + break; + } + } } + // Drain connections + while let Some(res) = connections.join_next().await { + if let Err(e) = res { + if !e.is_panic() && !e.is_cancelled() { + warn!("unexpected error from joined connection task: {e:?}"); + } + } + } + Ok(()) } // TODO(tech debt): unite this with its twin below. diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index e9f0363843..fb12752d3c 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -2041,6 +2041,17 @@ class NeonProxy(PgProtocol): self._wait_until_ready() return self + # Sends SIGTERM to the proxy if it has been started + def terminate(self): + if self._popen: + self._popen.terminate() + + # Waits for proxy to exit if it has been opened with a default timeout of + # two seconds. Raises subprocess.TimeoutExpired if the proxy does not exit in time. + def wait_for_exit(self, timeout=2): + if self._popen: + self._popen.wait(timeout=2) + @backoff.on_exception(backoff.expo, requests.exceptions.RequestException, max_time=10) def _wait_until_ready(self): requests.get(f"http://{self.host}:{self.http_port}/v1/status") diff --git a/test_runner/regress/test_proxy.py b/test_runner/regress/test_proxy.py index 51fabdd2a1..ee6349436b 100644 --- a/test_runner/regress/test_proxy.py +++ b/test_runner/regress/test_proxy.py @@ -1,3 +1,5 @@ +import subprocess + import psycopg2 import pytest from fixtures.neon_fixtures import PSQL, NeonProxy, VanillaPostgres @@ -134,3 +136,19 @@ def test_forward_params_to_client(static_proxy: NeonProxy): for name, value in cur.fetchall(): # Check that proxy has forwarded this parameter. assert conn.get_parameter_status(name) == value + + +@pytest.mark.timeout(5) +def test_close_on_connections_exit(static_proxy: NeonProxy): + # Open two connections, send SIGTERM, then ensure that proxy doesn't exit + # until after connections close. + with static_proxy.connect(options="project=irrelevant"), static_proxy.connect( + options="project=irrelevant" + ): + static_proxy.terminate() + with pytest.raises(subprocess.TimeoutExpired): + static_proxy.wait_for_exit(timeout=2) + # Ensure we don't accept any more connections + with pytest.raises(psycopg2.OperationalError): + static_proxy.connect(options="project=irrelevant") + static_proxy.wait_for_exit() From b6c7c3290f795d918f726de2df0015bb4a3cd260 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 13 Apr 2023 20:03:24 +0100 Subject: [PATCH 25/77] Bump h2 from 0.3.15 to 0.3.17 (#4020) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f67311cf09..86787b8f6a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1572,9 +1572,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.15" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f9f29bc9dda355256b2916cf526ab02ce0aeaaaf2bad60d65ef3f12f11dd0f4" +checksum = "66b91535aa35fea1523ad1b86cb6b53c28e0ae566ba4a460f4457e936cad7c6f" dependencies = [ "bytes", "fnv", From 8895f28dae229d84bf58d3660968b404a3f0c2e0 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Fri, 14 Apr 2023 12:25:45 +0200 Subject: [PATCH 26/77] make evictions_low_residence_duration_metric_threshold per-tenant (#3949) Before this patch, if a tenant would override its eviction_policy setting to use a lower LayerAccessThreshold::threshold than the `evictions_low_residence_duration_metric_threshold`, the evictions done for that tenant would count towards the `evictions_with_low_residence_duration` metric. That metric is used to identify pre-mature evictions, commonly triggered by disk-usage-based eviction under disk pressure. We don't want that to happen for the legitimate evictions of the tenant that overrides its eviction_policy. So, this patch - moves the setting into TenantConf - adds test coverage - updates the staging & prod yamls Forward Compatibility: Software before this patch will ignore the new tenant conf field and use the global one instead. So we can roll back safely. Backward Compatibility: Parsing old configs with software as of this patch will fail in `PageServerConf::parse_and_validate` with error `unrecognized pageserver option 'evictions_low_residence_duration_metric_threshold'` if the option is still present in the global section. We deal with this by updating the configs in Ansible. fixes https://github.com/neondatabase/neon/issues/3940 --- .../ansible/prod.ap-southeast-1.hosts.yaml | 2 +- .github/ansible/prod.eu-central-1.hosts.yaml | 2 +- .github/ansible/prod.us-east-2.hosts.yaml | 2 +- .github/ansible/prod.us-west-2.hosts.yaml | 8 +- .github/ansible/staging.eu-west-1.hosts.yaml | 2 +- .github/ansible/staging.us-east-2.hosts.yaml | 2 +- control_plane/src/pageserver.rs | 6 ++ libs/pageserver_api/src/models.rs | 3 + pageserver/src/config.rs | 40 ++------ pageserver/src/http/routes.rs | 26 +++++ pageserver/src/metrics.rs | 24 ++++- pageserver/src/tenant.rs | 10 ++ pageserver/src/tenant/config.rs | 16 ++++ pageserver/src/tenant/timeline.rs | 41 +++++++- test_runner/fixtures/pageserver/http.py | 7 ++ test_runner/regress/test_tenant_conf.py | 94 ++++++++++++++++++- 16 files changed, 239 insertions(+), 46 deletions(-) diff --git a/.github/ansible/prod.ap-southeast-1.hosts.yaml b/.github/ansible/prod.ap-southeast-1.hosts.yaml index c185086eef..9c53733491 100644 --- a/.github/ansible/prod.ap-southeast-1.hosts.yaml +++ b/.github/ansible/prod.ap-southeast-1.hosts.yaml @@ -17,7 +17,7 @@ storage: kind: "LayerAccessThreshold" period: "10m" threshold: &default_eviction_threshold "24h" - evictions_low_residence_duration_metric_threshold: *default_eviction_threshold + evictions_low_residence_duration_metric_threshold: *default_eviction_threshold remote_storage: bucket_name: "{{ bucket_name }}" bucket_region: "{{ bucket_region }}" diff --git a/.github/ansible/prod.eu-central-1.hosts.yaml b/.github/ansible/prod.eu-central-1.hosts.yaml index 0a0f974ea4..3186519ca8 100644 --- a/.github/ansible/prod.eu-central-1.hosts.yaml +++ b/.github/ansible/prod.eu-central-1.hosts.yaml @@ -17,7 +17,7 @@ storage: kind: "LayerAccessThreshold" period: "10m" threshold: &default_eviction_threshold "24h" - evictions_low_residence_duration_metric_threshold: *default_eviction_threshold + evictions_low_residence_duration_metric_threshold: *default_eviction_threshold remote_storage: bucket_name: "{{ bucket_name }}" bucket_region: "{{ bucket_region }}" diff --git a/.github/ansible/prod.us-east-2.hosts.yaml b/.github/ansible/prod.us-east-2.hosts.yaml index 4427bb344e..3062475b20 100644 --- a/.github/ansible/prod.us-east-2.hosts.yaml +++ b/.github/ansible/prod.us-east-2.hosts.yaml @@ -17,7 +17,7 @@ storage: kind: "LayerAccessThreshold" period: "10m" threshold: &default_eviction_threshold "24h" - evictions_low_residence_duration_metric_threshold: *default_eviction_threshold + evictions_low_residence_duration_metric_threshold: *default_eviction_threshold remote_storage: bucket_name: "{{ bucket_name }}" bucket_region: "{{ bucket_region }}" diff --git a/.github/ansible/prod.us-west-2.hosts.yaml b/.github/ansible/prod.us-west-2.hosts.yaml index 53626b4f59..9cf847bcb1 100644 --- a/.github/ansible/prod.us-west-2.hosts.yaml +++ b/.github/ansible/prod.us-west-2.hosts.yaml @@ -17,7 +17,7 @@ storage: kind: "LayerAccessThreshold" period: "10m" threshold: &default_eviction_threshold "24h" - evictions_low_residence_duration_metric_threshold: *default_eviction_threshold + evictions_low_residence_duration_metric_threshold: *default_eviction_threshold remote_storage: bucket_name: "{{ bucket_name }}" bucket_region: "{{ bucket_region }}" @@ -34,7 +34,7 @@ storage: pageservers: hosts: pageserver-0.us-west-2.aws.neon.tech: - ansible_host: i-0d9f6dfae0e1c780d + ansible_host: i-0d9f6dfae0e1c780d pageserver-1.us-west-2.aws.neon.tech: ansible_host: i-0c834be1dddba8b3f pageserver-2.us-west-2.aws.neon.tech: @@ -49,5 +49,5 @@ storage: safekeeper-1.us-west-2.aws.neon.tech: ansible_host: i-074682f9d3c712e7c safekeeper-2.us-west-2.aws.neon.tech: - ansible_host: i-042b7efb1729d7966 - + ansible_host: i-042b7efb1729d7966 + diff --git a/.github/ansible/staging.eu-west-1.hosts.yaml b/.github/ansible/staging.eu-west-1.hosts.yaml index 34c8e77280..39f5613935 100644 --- a/.github/ansible/staging.eu-west-1.hosts.yaml +++ b/.github/ansible/staging.eu-west-1.hosts.yaml @@ -17,7 +17,7 @@ storage: kind: "LayerAccessThreshold" period: "20m" threshold: &default_eviction_threshold "20m" - evictions_low_residence_duration_metric_threshold: *default_eviction_threshold + evictions_low_residence_duration_metric_threshold: *default_eviction_threshold remote_storage: bucket_name: "{{ bucket_name }}" bucket_region: "{{ bucket_region }}" diff --git a/.github/ansible/staging.us-east-2.hosts.yaml b/.github/ansible/staging.us-east-2.hosts.yaml index 94f2be83a4..e63ed6e639 100644 --- a/.github/ansible/staging.us-east-2.hosts.yaml +++ b/.github/ansible/staging.us-east-2.hosts.yaml @@ -17,7 +17,7 @@ storage: kind: "LayerAccessThreshold" period: "20m" threshold: &default_eviction_threshold "20m" - evictions_low_residence_duration_metric_threshold: *default_eviction_threshold + evictions_low_residence_duration_metric_threshold: *default_eviction_threshold remote_storage: bucket_name: "{{ bucket_name }}" bucket_region: "{{ bucket_region }}" diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs index 094069e4c0..b700d426ba 100644 --- a/control_plane/src/pageserver.rs +++ b/control_plane/src/pageserver.rs @@ -368,6 +368,9 @@ impl PageServerNode { .map(|x| x.parse::()) .transpose() .context("Failed to parse 'min_resident_size_override' as integer")?, + evictions_low_residence_duration_metric_threshold: settings + .remove("evictions_low_residence_duration_metric_threshold") + .map(|x| x.to_string()), }; if !settings.is_empty() { bail!("Unrecognized tenant settings: {settings:?}") @@ -445,6 +448,9 @@ impl PageServerNode { .map(|x| x.parse::()) .transpose() .context("Failed to parse 'min_resident_size_override' as an integer")?, + evictions_low_residence_duration_metric_threshold: settings + .get("evictions_low_residence_duration_metric_threshold") + .map(|x| x.to_string()), }) .send()? .error_from_body()?; diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index a351761f4a..15c37b9453 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -135,6 +135,7 @@ pub struct TenantCreateRequest { // For now, this field is not even documented in the openapi_spec.yml. pub eviction_policy: Option, pub min_resident_size_override: Option, + pub evictions_low_residence_duration_metric_threshold: Option, } #[serde_as] @@ -181,6 +182,7 @@ pub struct TenantConfigRequest { // For now, this field is not even documented in the openapi_spec.yml. pub eviction_policy: Option, pub min_resident_size_override: Option, + pub evictions_low_residence_duration_metric_threshold: Option, } impl TenantConfigRequest { @@ -202,6 +204,7 @@ impl TenantConfigRequest { trace_read_requests: None, eviction_policy: None, min_resident_size_override: None, + evictions_low_residence_duration_metric_threshold: None, } } } diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 19f0f22815..826cf1aab3 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -62,7 +62,6 @@ pub mod defaults { pub const DEFAULT_CACHED_METRIC_COLLECTION_INTERVAL: &str = "1 hour"; pub const DEFAULT_METRIC_COLLECTION_ENDPOINT: Option = None; pub const DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL: &str = "10 min"; - pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour"; /// /// Default built-in configuration file. @@ -91,7 +90,6 @@ pub mod defaults { #cached_metric_collection_interval = '{DEFAULT_CACHED_METRIC_COLLECTION_INTERVAL}' #synthetic_size_calculation_interval = '{DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL}' -#evictions_low_residence_duration_metric_threshold = '{DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD}' #disk_usage_based_eviction = {{ max_usage_pct = .., min_avail_bytes = .., period = "10s"}} @@ -108,6 +106,7 @@ pub mod defaults { #pitr_interval = '{DEFAULT_PITR_INTERVAL}' #min_resident_size_override = .. # in bytes +#evictions_low_residence_duration_metric_threshold = '{DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD}' # [remote_storage] @@ -182,9 +181,6 @@ pub struct PageServerConf { pub metric_collection_endpoint: Option, pub synthetic_size_calculation_interval: Duration, - // See the corresponding metric's help string. - pub evictions_low_residence_duration_metric_threshold: Duration, - pub disk_usage_based_eviction: Option, pub test_remote_failures: u64, @@ -257,8 +253,6 @@ struct PageServerConfigBuilder { metric_collection_endpoint: BuilderValue>, synthetic_size_calculation_interval: BuilderValue, - evictions_low_residence_duration_metric_threshold: BuilderValue, - disk_usage_based_eviction: BuilderValue>, test_remote_failures: BuilderValue, @@ -316,11 +310,6 @@ impl Default for PageServerConfigBuilder { .expect("cannot parse default synthetic size calculation interval")), metric_collection_endpoint: Set(DEFAULT_METRIC_COLLECTION_ENDPOINT), - evictions_low_residence_duration_metric_threshold: Set(humantime::parse_duration( - DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD, - ) - .expect("cannot parse DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD")), - disk_usage_based_eviction: Set(None), test_remote_failures: Set(0), @@ -438,10 +427,6 @@ impl PageServerConfigBuilder { self.test_remote_failures = BuilderValue::Set(fail_first); } - pub fn evictions_low_residence_duration_metric_threshold(&mut self, value: Duration) { - self.evictions_low_residence_duration_metric_threshold = BuilderValue::Set(value); - } - pub fn disk_usage_based_eviction(&mut self, value: Option) { self.disk_usage_based_eviction = BuilderValue::Set(value); } @@ -525,11 +510,6 @@ impl PageServerConfigBuilder { synthetic_size_calculation_interval: self .synthetic_size_calculation_interval .ok_or(anyhow!("missing synthetic_size_calculation_interval"))?, - evictions_low_residence_duration_metric_threshold: self - .evictions_low_residence_duration_metric_threshold - .ok_or(anyhow!( - "missing evictions_low_residence_duration_metric_threshold" - ))?, disk_usage_based_eviction: self .disk_usage_based_eviction .ok_or(anyhow!("missing disk_usage_based_eviction"))?, @@ -721,7 +701,6 @@ impl PageServerConf { "synthetic_size_calculation_interval" => builder.synthetic_size_calculation_interval(parse_toml_duration(key, item)?), "test_remote_failures" => builder.test_remote_failures(parse_toml_u64(key, item)?), - "evictions_low_residence_duration_metric_threshold" => builder.evictions_low_residence_duration_metric_threshold(parse_toml_duration(key, item)?), "disk_usage_based_eviction" => { tracing::info!("disk_usage_based_eviction: {:#?}", &item); builder.disk_usage_based_eviction( @@ -839,6 +818,13 @@ impl PageServerConf { ); } + if let Some(item) = item.get("evictions_low_residence_duration_metric_threshold") { + t_conf.evictions_low_residence_duration_metric_threshold = Some(parse_toml_duration( + "evictions_low_residence_duration_metric_threshold", + item, + )?); + } + Ok(t_conf) } @@ -877,10 +863,6 @@ impl PageServerConf { cached_metric_collection_interval: Duration::from_secs(60 * 60), metric_collection_endpoint: defaults::DEFAULT_METRIC_COLLECTION_ENDPOINT, synthetic_size_calculation_interval: Duration::from_secs(60), - evictions_low_residence_duration_metric_threshold: humantime::parse_duration( - defaults::DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD, - ) - .unwrap(), disk_usage_based_eviction: None, test_remote_failures: 0, ondemand_download_behavior_treat_error_as_warn: false, @@ -1029,8 +1011,6 @@ cached_metric_collection_interval = '22200 s' metric_collection_endpoint = 'http://localhost:80/metrics' synthetic_size_calculation_interval = '333 s' -evictions_low_residence_duration_metric_threshold = '444 s' - log_format = 'json' "#; @@ -1087,9 +1067,6 @@ log_format = 'json' synthetic_size_calculation_interval: humantime::parse_duration( defaults::DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL )?, - evictions_low_residence_duration_metric_threshold: humantime::parse_duration( - defaults::DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD - )?, disk_usage_based_eviction: None, test_remote_failures: 0, ondemand_download_behavior_treat_error_as_warn: false, @@ -1144,7 +1121,6 @@ log_format = 'json' cached_metric_collection_interval: Duration::from_secs(22200), metric_collection_endpoint: Some(Url::parse("http://localhost:80/metrics")?), synthetic_size_calculation_interval: Duration::from_secs(333), - evictions_low_residence_duration_metric_threshold: Duration::from_secs(444), disk_usage_based_eviction: None, test_remote_failures: 0, ondemand_download_behavior_treat_error_as_warn: false, diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index e7a86e4822..06a97f6dff 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -781,6 +781,19 @@ async fn tenant_create_handler(mut request: Request) -> Result, } impl TimelineMetrics { @@ -656,7 +672,9 @@ impl TimelineMetrics { num_persistent_files_created, persistent_bytes_written, evictions, - evictions_with_low_residence_duration, + evictions_with_low_residence_duration: std::sync::RwLock::new( + evictions_with_low_residence_duration, + ), } } } @@ -675,6 +693,8 @@ impl Drop for TimelineMetrics { let _ = PERSISTENT_BYTES_WRITTEN.remove_label_values(&[tenant_id, timeline_id]); let _ = EVICTIONS.remove_label_values(&[tenant_id, timeline_id]); self.evictions_with_low_residence_duration + .write() + .unwrap() .remove(tenant_id, timeline_id); for op in STORAGE_TIME_OPERATIONS { let _ = diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index d98aa5c566..18a4d7617b 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -1735,6 +1735,13 @@ impl Tenant { pub fn set_new_tenant_config(&self, new_tenant_conf: TenantConfOpt) { *self.tenant_conf.write().unwrap() = new_tenant_conf; + // Don't hold self.timelines.lock() during the notifies. + // There's no risk of deadlock right now, but there could be if we consolidate + // mutexes in struct Timeline in the future. + let timelines = self.list_timelines(); + for timeline in timelines { + timeline.tenant_conf_updated(); + } } fn create_timeline_data( @@ -2815,6 +2822,9 @@ pub mod harness { trace_read_requests: Some(tenant_conf.trace_read_requests), eviction_policy: Some(tenant_conf.eviction_policy), min_resident_size_override: tenant_conf.min_resident_size_override, + evictions_low_residence_duration_metric_threshold: Some( + tenant_conf.evictions_low_residence_duration_metric_threshold, + ), } } } diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs index cdabb23a7b..c01a8aa8c0 100644 --- a/pageserver/src/tenant/config.rs +++ b/pageserver/src/tenant/config.rs @@ -39,6 +39,7 @@ pub mod defaults { pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "2 seconds"; pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "3 seconds"; pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10 * 1024 * 1024; + pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour"; } /// Per-tenant configuration options @@ -93,6 +94,9 @@ pub struct TenantConf { pub trace_read_requests: bool, pub eviction_policy: EvictionPolicy, pub min_resident_size_override: Option, + // See the corresponding metric's help string. + #[serde(with = "humantime_serde")] + pub evictions_low_residence_duration_metric_threshold: Duration, } /// Same as TenantConf, but this struct preserves the information about @@ -164,6 +168,11 @@ pub struct TenantConfOpt { #[serde(skip_serializing_if = "Option::is_none")] #[serde(default)] pub min_resident_size_override: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(with = "humantime_serde")] + #[serde(default)] + pub evictions_low_residence_duration_metric_threshold: Option, } #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] @@ -228,6 +237,9 @@ impl TenantConfOpt { min_resident_size_override: self .min_resident_size_override .or(global_conf.min_resident_size_override), + evictions_low_residence_duration_metric_threshold: self + .evictions_low_residence_duration_metric_threshold + .unwrap_or(global_conf.evictions_low_residence_duration_metric_threshold), } } } @@ -260,6 +272,10 @@ impl Default for TenantConf { trace_read_requests: false, eviction_policy: EvictionPolicy::NoEviction, min_resident_size_override: None, + evictions_low_residence_duration_metric_threshold: humantime::parse_duration( + DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD, + ) + .expect("cannot parse default evictions_low_residence_duration_metric_threshold"), } } } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 29d8b544cc..b8b1f963e5 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -77,6 +77,7 @@ pub(super) use self::eviction_task::EvictionTaskTenantState; use self::eviction_task::EvictionTaskTimelineState; use self::walreceiver::{WalReceiver, WalReceiverConf}; +use super::config::TenantConf; use super::layer_map::BatchedUpdates; use super::remote_timeline_client::index::IndexPart; use super::remote_timeline_client::RemoteTimelineClient; @@ -161,7 +162,7 @@ pub struct Timeline { ancestor_timeline: Option>, ancestor_lsn: Lsn, - metrics: TimelineMetrics, + pub(super) metrics: TimelineMetrics, /// Ensures layers aren't frozen by checkpointer between /// [`Timeline::get_layer_for_write`] and layer reads. @@ -1136,6 +1137,8 @@ impl Timeline { if let Some(delta) = local_layer_residence_duration { self.metrics .evictions_with_low_residence_duration + .read() + .unwrap() .observe(delta); info!(layer=%local_layer.short_id(), residence_millis=delta.as_millis(), "evicted layer after known residence period"); } else { @@ -1209,6 +1212,35 @@ impl Timeline { .unwrap_or(self.conf.default_tenant_conf.eviction_policy) } + fn get_evictions_low_residence_duration_metric_threshold( + tenant_conf: &TenantConfOpt, + default_tenant_conf: &TenantConf, + ) -> Duration { + tenant_conf + .evictions_low_residence_duration_metric_threshold + .unwrap_or(default_tenant_conf.evictions_low_residence_duration_metric_threshold) + } + + pub(super) fn tenant_conf_updated(&self) { + // NB: Most tenant conf options are read by background loops, so, + // changes will automatically be picked up. + + // The threshold is embedded in the metric. So, we need to update it. + { + let new_threshold = Self::get_evictions_low_residence_duration_metric_threshold( + &self.tenant_conf.read().unwrap(), + &self.conf.default_tenant_conf, + ); + let tenant_id_str = self.tenant_id.to_string(); + let timeline_id_str = self.timeline_id.to_string(); + self.metrics + .evictions_with_low_residence_duration + .write() + .unwrap() + .change_threshold(&tenant_id_str, &timeline_id_str, new_threshold); + } + } + /// Open a Timeline handle. /// /// Loads the metadata for the timeline into memory, but not the layer map. @@ -1240,6 +1272,11 @@ impl Timeline { let max_lsn_wal_lag = tenant_conf_guard .max_lsn_wal_lag .unwrap_or(conf.default_tenant_conf.max_lsn_wal_lag); + let evictions_low_residence_duration_metric_threshold = + Self::get_evictions_low_residence_duration_metric_threshold( + &tenant_conf_guard, + &conf.default_tenant_conf, + ); drop(tenant_conf_guard); Arc::new_cyclic(|myself| { @@ -1287,7 +1324,7 @@ impl Timeline { &timeline_id, crate::metrics::EvictionsWithLowResidenceDurationBuilder::new( "mtime", - conf.evictions_low_residence_duration_metric_threshold, + evictions_low_residence_duration_metric_threshold, ), ), diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index 1e1effe295..69042478c7 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -519,6 +519,13 @@ class PageserverHttpClient(requests.Session): assert res.status_code == 200 + def download_all_layers(self, tenant_id: TenantId, timeline_id: TimelineId): + info = self.layer_map_info(tenant_id, timeline_id) + for layer in info.historic_layers: + if not layer.remote: + continue + self.download_layer(tenant_id, timeline_id, layer.layer_file_name) + def evict_layer(self, tenant_id: TenantId, timeline_id: TimelineId, layer_name: str): res = self.delete( f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/layer/{layer_name}", diff --git a/test_runner/regress/test_tenant_conf.py b/test_runner/regress/test_tenant_conf.py index 28f1a960df..1ed86d19a2 100644 --- a/test_runner/regress/test_tenant_conf.py +++ b/test_runner/regress/test_tenant_conf.py @@ -18,7 +18,11 @@ def test_tenant_config(neon_env_builder: NeonEnvBuilder): neon_env_builder.pageserver_config_override = """ page_cache_size=444; wait_lsn_timeout='111 s'; -tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}""" +[tenant_config] +checkpoint_distance = 10000 +compaction_target_size = 1048576 +evictions_low_residence_duration_metric_threshold = "2 days" +""" env = neon_env_builder.init_start() http_client = env.pageserver.http_client() @@ -39,6 +43,7 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}""" new_conf = { "checkpoint_distance": "20000", "gc_period": "30sec", + "evictions_low_residence_duration_metric_threshold": "42s", } tenant, _ = env.neon_cli.create_tenant(conf=new_conf) @@ -78,6 +83,7 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}""" assert effective_config["gc_period"] == "1h" assert effective_config["image_creation_threshold"] == 3 assert effective_config["pitr_interval"] == "7days" + assert effective_config["evictions_low_residence_duration_metric_threshold"] == "2days" # check the configuration of the new tenant with closing(env.pageserver.connect()) as psconn: @@ -112,6 +118,9 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}""" assert ( new_effective_config["gc_period"] == "30s" ), "Specific 'gc_period' config should override the default value" + assert ( + new_effective_config["evictions_low_residence_duration_metric_threshold"] == "42s" + ), "Should override default value" assert new_effective_config["compaction_target_size"] == 1048576 assert new_effective_config["compaction_period"] == "20s" assert new_effective_config["compaction_threshold"] == 10 @@ -125,6 +134,7 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}""" "gc_period": "80sec", "compaction_period": "80sec", "image_creation_threshold": "2", + "evictions_low_residence_duration_metric_threshold": "23h", } env.neon_cli.config_tenant( tenant_id=tenant, @@ -167,6 +177,9 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}""" assert ( updated_effective_config["compaction_period"] == "1m 20s" ), "Specific 'compaction_period' config should override the default value" + assert ( + updated_effective_config["evictions_low_residence_duration_metric_threshold"] == "23h" + ), "Should override default value" assert updated_effective_config["compaction_target_size"] == 1048576 assert updated_effective_config["compaction_threshold"] == 10 assert updated_effective_config["gc_horizon"] == 67108864 @@ -225,6 +238,7 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}""" assert final_effective_config["gc_horizon"] == 67108864 assert final_effective_config["gc_period"] == "1h" assert final_effective_config["image_creation_threshold"] == 3 + assert final_effective_config["evictions_low_residence_duration_metric_threshold"] == "2days" # restart the pageserver and ensure that the config is still correct env.pageserver.stop() @@ -285,3 +299,81 @@ def test_creating_tenant_conf_after_attach(neon_env_builder: NeonEnvBuilder): # dont test applying the setting here, we have that another test case to show it # we just care about being able to create the file assert len(contents_first) > len(contents_later) + + +def test_live_reconfig_get_evictions_low_residence_duration_metric_threshold( + neon_env_builder: NeonEnvBuilder, +): + neon_env_builder.enable_remote_storage( + remote_storage_kind=RemoteStorageKind.LOCAL_FS, + test_name="test_live_reconfig_get_evictions_low_residence_duration_metric_threshold", + ) + + env = neon_env_builder.init_start() + assert isinstance(env.remote_storage, LocalFsStorage) + + (tenant_id, timeline_id) = env.neon_cli.create_tenant() + ps_http = env.pageserver.http_client() + + def get_metric(): + metrics = ps_http.get_metrics() + metric = metrics.query_one( + "pageserver_evictions_with_low_residence_duration_total", + { + "tenant_id": str(tenant_id), + "timeline_id": str(timeline_id), + }, + ) + return metric + + default_value = ps_http.tenant_config(tenant_id).effective_config[ + "evictions_low_residence_duration_metric_threshold" + ] + metric = get_metric() + assert int(metric.value) == 0, "metric is present with default value" + + assert default_value == "1day" + + ps_http.download_all_layers(tenant_id, timeline_id) + ps_http.evict_all_layers(tenant_id, timeline_id) + metric = get_metric() + assert int(metric.value) > 0, "metric is updated" + + env.neon_cli.config_tenant( + tenant_id, {"evictions_low_residence_duration_metric_threshold": default_value} + ) + updated_metric = get_metric() + assert int(updated_metric.value) == int( + metric.value + ), "metric is unchanged when setting same value" + + env.neon_cli.config_tenant( + tenant_id, {"evictions_low_residence_duration_metric_threshold": "2day"} + ) + metric = get_metric() + assert int(metric.labels["low_threshold_secs"]) == 2 * 24 * 60 * 60 + assert int(metric.value) == 0 + + ps_http.download_all_layers(tenant_id, timeline_id) + ps_http.evict_all_layers(tenant_id, timeline_id) + metric = get_metric() + assert int(metric.labels["low_threshold_secs"]) == 2 * 24 * 60 * 60 + assert int(metric.value) > 0 + + env.neon_cli.config_tenant( + tenant_id, {"evictions_low_residence_duration_metric_threshold": "2h"} + ) + metric = get_metric() + assert int(metric.labels["low_threshold_secs"]) == 2 * 60 * 60 + assert int(metric.value) == 0, "value resets if label changes" + + ps_http.download_all_layers(tenant_id, timeline_id) + ps_http.evict_all_layers(tenant_id, timeline_id) + metric = get_metric() + assert int(metric.labels["low_threshold_secs"]) == 2 * 60 * 60 + assert int(metric.value) > 0, "set a non-zero value for next step" + + env.neon_cli.config_tenant(tenant_id, {}) + metric = get_metric() + assert int(metric.labels["low_threshold_secs"]) == 24 * 60 * 60, "label resets to default" + assert int(metric.value) == 0, "value resets to default" From 0c82ff3d989e592f9c6ea848e2d3538c42feac7a Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Fri, 14 Apr 2023 11:46:47 +0100 Subject: [PATCH 27/77] test_runner: add Timeline Inspector to Grafana links (#4021) --- test_runner/fixtures/neon_fixtures.py | 17 ++++++++++++--- test_runner/fixtures/utils.py | 30 ++++++++++++++++++++++----- 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index fb12752d3c..c6610ba062 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1913,15 +1913,26 @@ def remote_pg( connstr = os.getenv("BENCHMARK_CONNSTR") if connstr is None: raise ValueError("no connstr provided, use BENCHMARK_CONNSTR environment variable") + + host = parse_dsn(connstr).get("host", "") + is_neon = host.endswith(".neon.build") + start_ms = int(datetime.utcnow().timestamp() * 1000) with RemotePostgres(pg_bin, connstr) as remote_pg: + if is_neon: + timeline_id = TimelineId(remote_pg.safe_psql("SHOW neon.timeline_id")[0][0]) + yield remote_pg end_ms = int(datetime.utcnow().timestamp() * 1000) - host = parse_dsn(connstr).get("host", "") - if host.endswith(".neon.build"): + if is_neon: # Add 10s margin to the start and end times - allure_add_grafana_links(host, start_ms - 10_000, end_ms + 10_000) + allure_add_grafana_links( + host, + timeline_id, + start_ms - 10_000, + end_ms + 10_000, + ) class PSQL: diff --git a/test_runner/fixtures/utils.py b/test_runner/fixtures/utils.py index 71df74dfba..30acd3f637 100644 --- a/test_runner/fixtures/utils.py +++ b/test_runner/fixtures/utils.py @@ -13,6 +13,7 @@ import allure from psycopg2.extensions import cursor from fixtures.log_helper import log +from fixtures.types import TimelineId Fn = TypeVar("Fn", bound=Callable[..., Any]) @@ -186,11 +187,15 @@ def allure_attach_from_dir(dir: Path): allure.attach.file(source, name, attachment_type, extension) -DATASOURCE_ID = "xHHYY0dVz" +GRAFANA_URL = "https://neonprod.grafana.net" +GRAFANA_EXPLORE_URL = f"{GRAFANA_URL}/explore" +GRAFANA_TIMELINE_INSPECTOR_DASHBOARD_URL = f"{GRAFANA_URL}/d/8G011dlnk/timeline-inspector" +LOGS_STAGING_DATASOURCE_ID = "xHHYY0dVz" -def allure_add_grafana_links(host: str, start_ms: int, end_ms: int): +def allure_add_grafana_links(host: str, timeline_id: TimelineId, start_ms: int, end_ms: int): """Add links to server logs in Grafana to Allure report""" + links = {} # We expect host to be in format like ep-divine-night-159320.us-east-2.aws.neon.build endpoint_id, region_id, _ = host.split(".", 2) @@ -202,12 +207,12 @@ def allure_add_grafana_links(host: str, start_ms: int, end_ms: int): } params: Dict[str, Any] = { - "datasource": DATASOURCE_ID, + "datasource": LOGS_STAGING_DATASOURCE_ID, "queries": [ { "expr": "", "refId": "A", - "datasource": {"type": "loki", "uid": DATASOURCE_ID}, + "datasource": {"type": "loki", "uid": LOGS_STAGING_DATASOURCE_ID}, "editorMode": "code", "queryType": "range", } @@ -220,8 +225,23 @@ def allure_add_grafana_links(host: str, start_ms: int, end_ms: int): for name, expr in expressions.items(): params["queries"][0]["expr"] = expr query_string = urlencode({"orgId": 1, "left": json.dumps(params)}) - link = f"https://neonprod.grafana.net/explore?{query_string}" + links[name] = f"{GRAFANA_EXPLORE_URL}?{query_string}" + timeline_qs = urlencode( + { + "orgId": 1, + "var-environment": "victoria-metrics-aws-dev", + "var-timeline_id": timeline_id, + "var-endpoint_id": endpoint_id, + "var-log_datasource": "grafanacloud-neonstaging-logs", + "from": start_ms, + "to": end_ms, + } + ) + link = f"{GRAFANA_TIMELINE_INSPECTOR_DASHBOARD_URL}?{timeline_qs}" + links["Timeline Inspector"] = link + + for name, link in links.items(): allure.dynamic.link(link, name=name) log.info(f"{name}: {link}") From 589cf1ed21148035d033701ee911fee79a4cea6f Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Fri, 14 Apr 2023 13:05:07 +0200 Subject: [PATCH 28/77] [compute_ctl] Do not create availability checker data on each start (#4019) Initially, idea was to ensure that when we come and check data availability, special service table already contains one row. So if we loose it for some reason, we will error out. Yet, to do availability check we anyway start compute first! So it doesn't really add some value, but we affect each compute start as we update at least one row in the database. Also this writes some WAL, so if timeline is close to `neon.max_cluster_size` it could prevent compute from starting up. That said, do CREATE TABLE IF NOT EXISTS + UPSERT right in the `/check_writability` handler. --- compute_tools/src/checker.rs | 54 +++++++++++++++++------------------ compute_tools/src/compute.rs | 2 -- compute_tools/src/http/api.rs | 5 +++- 3 files changed, 31 insertions(+), 30 deletions(-) diff --git a/compute_tools/src/checker.rs b/compute_tools/src/checker.rs index b8413de516..b6a287bdeb 100644 --- a/compute_tools/src/checker.rs +++ b/compute_tools/src/checker.rs @@ -1,12 +1,28 @@ use anyhow::{anyhow, Result}; -use postgres::Client; use tokio_postgres::NoTls; use tracing::{error, instrument}; use crate::compute::ComputeNode; +/// Update timestamp in a row in a special service table to check +/// that we can actually write some data in this particular timeline. +/// Create table if it's missing. #[instrument(skip_all)] -pub fn create_writability_check_data(client: &mut Client) -> Result<()> { +pub async fn check_writability(compute: &ComputeNode) -> Result<()> { + // Connect to the database. + let (client, connection) = tokio_postgres::connect(compute.connstr.as_str(), NoTls).await?; + if client.is_closed() { + return Err(anyhow!("connection to postgres closed")); + } + + // The connection object performs the actual communication with the database, + // so spawn it off to run on its own. + tokio::spawn(async move { + if let Err(e) = connection.await { + error!("connection error: {}", e); + } + }); + let query = " CREATE TABLE IF NOT EXISTS health_check ( id serial primary key, @@ -15,31 +31,15 @@ pub fn create_writability_check_data(client: &mut Client) -> Result<()> { INSERT INTO health_check VALUES (1, now()) ON CONFLICT (id) DO UPDATE SET updated_at = now();"; - let result = client.simple_query(query)?; - if result.len() < 2 { - return Err(anyhow::format_err!("executed {} queries", result.len())); - } - Ok(()) -} - -#[instrument(skip_all)] -pub async fn check_writability(compute: &ComputeNode) -> Result<()> { - let (client, connection) = tokio_postgres::connect(compute.connstr.as_str(), NoTls).await?; - if client.is_closed() { - return Err(anyhow!("connection to postgres closed")); - } - tokio::spawn(async move { - if let Err(e) = connection.await { - error!("connection error: {}", e); - } - }); - - let result = client - .simple_query("UPDATE health_check SET updated_at = now() WHERE id = 1;") - .await?; - - if result.len() != 1 { - return Err(anyhow!("statement can't be executed")); + + let result = client.simple_query(query).await?; + + if result.len() != 2 { + return Err(anyhow::format_err!( + "expected 2 query results, but got {}", + result.len() + )); } + Ok(()) } diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 6ddfcf86c2..51de2b6e0a 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -32,7 +32,6 @@ use utils::lsn::Lsn; use compute_api::responses::{ComputeMetrics, ComputeStatus}; use compute_api::spec::ComputeSpec; -use crate::checker::create_writability_check_data; use crate::config; use crate::pg_helpers::*; use crate::spec::*; @@ -342,7 +341,6 @@ impl ComputeNode { handle_databases(spec, &mut client)?; handle_role_deletions(spec, self.connstr.as_str(), &mut client)?; handle_grants(spec, self.connstr.as_str(), &mut client)?; - create_writability_check_data(&mut client)?; handle_extensions(spec, &mut client)?; // 'Close' connection diff --git a/compute_tools/src/http/api.rs b/compute_tools/src/http/api.rs index 92d058fbd1..3ca688de69 100644 --- a/compute_tools/src/http/api.rs +++ b/compute_tools/src/http/api.rs @@ -85,7 +85,10 @@ async fn routes(req: Request, compute: &Arc) -> Response Response::new(Body::from("true")), - Err(e) => Response::new(Body::from(e.to_string())), + Err(e) => { + error!("check_writability failed: {}", e); + Response::new(Body::from(e.to_string())) + } } } From 017d3a390dd17313612d23eb7d757635d42f6365 Mon Sep 17 00:00:00 2001 From: Vadim Kharitonov Date: Fri, 14 Apr 2023 14:00:13 +0300 Subject: [PATCH 29/77] Compile postgres with lz4 and zstd support --- Dockerfile.compute-node | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/Dockerfile.compute-node b/Dockerfile.compute-node index 742f2e18a1..2b1d8d63ae 100644 --- a/Dockerfile.compute-node +++ b/Dockerfile.compute-node @@ -12,7 +12,7 @@ FROM debian:bullseye-slim AS build-deps RUN apt update && \ apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \ zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev \ - libicu-dev libxslt1-dev + libicu-dev libxslt1-dev liblz4-dev libzstd-dev ######################################################################################### # @@ -24,8 +24,13 @@ FROM build-deps AS pg-build ARG PG_VERSION COPY vendor/postgres-${PG_VERSION} postgres RUN cd postgres && \ - ./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp --with-icu \ - --with-libxml --with-libxslt && \ + export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp \ + --with-icu --with-libxml --with-libxslt --with-lz4" && \ + if [ "${PG_VERSION}" != "v14" ]; then \ + # zstd is available only from PG15 + export CONFIGURE_CMD="${CONFIGURE_CMD} --with-zstd"; \ + fi && \ + eval $CONFIGURE_CMD && \ make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \ make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \ # Install headers @@ -565,13 +570,17 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb # Install: # libreadline8 for psql # libicu67, locales for collations (including ICU and plpgsql_check) +# liblz4-1 for lz4 # libossp-uuid16 for extension ossp-uuid # libgeos, libgdal, libsfcgal1, libproj and libprotobuf-c1 for PostGIS # libxml2, libxslt1.1 for xml2 +# libzstd1 for zstd RUN apt update && \ apt install --no-install-recommends -y \ + gdb \ locales \ libicu67 \ + liblz4-1 \ libreadline8 \ libossp-uuid16 \ libgeos-c1v5 \ @@ -581,7 +590,7 @@ RUN apt update && \ libsfcgal1 \ libxml2 \ libxslt1.1 \ - gdb && \ + libzstd1 && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \ localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 From 75ea8106ece7c7a8f1abedbeb382f322e070c686 Mon Sep 17 00:00:00 2001 From: Vadim Kharitonov Date: Fri, 14 Apr 2023 14:01:24 +0300 Subject: [PATCH 30/77] Add `procps` into compute containers --- Dockerfile.compute-node | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile.compute-node b/Dockerfile.compute-node index 2b1d8d63ae..229e09aa98 100644 --- a/Dockerfile.compute-node +++ b/Dockerfile.compute-node @@ -590,7 +590,8 @@ RUN apt update && \ libsfcgal1 \ libxml2 \ libxslt1.1 \ - libzstd1 && \ + libzstd1 \ + procps && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \ localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 From 5ffa20dd822f989019df7013db5459e8238ab1a2 Mon Sep 17 00:00:00 2001 From: Vadim Kharitonov Date: Fri, 14 Apr 2023 13:54:34 +0300 Subject: [PATCH 31/77] [proxy] adjust proxy sleep timeout --- .../dev-eu-west-1-zeta.neon-proxy-scram.yaml | 4 ++-- ...v-us-east-2-beta.neon-proxy-scram-legacy.yaml | 16 ++++++++++++++++ .../dev-us-east-2-beta.neon-proxy-scram.yaml | 5 +++-- ...-ap-southeast-1-epsilon.neon-proxy-scram.yaml | 4 ++-- ...prod-eu-central-1-gamma.neon-proxy-scram.yaml | 4 ++-- .../prod-us-east-2-delta.neon-proxy-scram.yaml | 4 ++-- ...od-us-west-2-eta.neon-proxy-scram-legacy.yaml | 4 ++-- .../prod-us-west-2-eta.neon-proxy-scram.yaml | 4 ++-- 8 files changed, 31 insertions(+), 14 deletions(-) diff --git a/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml b/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml index 2307856464..a8567665d3 100644 --- a/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml +++ b/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml @@ -7,13 +7,13 @@ deploymentStrategy: maxSurge: 100% maxUnavailable: 50% -# Delay the kill signal by 7 days (7 * 24 * 60 * 60) +# Delay the kill signal by 5 minutes (5 * 60) # The pod(s) will stay in Terminating, keeps the existing connections # but doesn't receive new ones containerLifecycle: preStop: exec: - command: ["/bin/sh", "-c", "sleep 604800"] + command: ["/bin/sh", "-c", "sleep 300"] terminationGracePeriodSeconds: 604800 image: diff --git a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml index feee1b369a..46cfdd2e69 100644 --- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml +++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml @@ -1,6 +1,22 @@ # Helm chart values for neon-proxy-scram. # This is a YAML-formatted file. +deploymentStrategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 100% + maxUnavailable: 50% + +# Delay the kill signal by 5 minutes (5 * 60) +# The pod(s) will stay in Terminating, keeps the existing connections +# but doesn't receive new ones +containerLifecycle: + preStop: + exec: + command: ["/bin/sh", "-c", "sleep 300"] +terminationGracePeriodSeconds: 604800 + + image: repository: neondatabase/neon diff --git a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml index 2a8f028f3b..fdd869c122 100644 --- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml +++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml @@ -7,15 +7,16 @@ deploymentStrategy: maxSurge: 100% maxUnavailable: 50% -# Delay the kill signal by 7 days (7 * 24 * 60 * 60) +# Delay the kill signal by 5 minutes (5 * 60) # The pod(s) will stay in Terminating, keeps the existing connections # but doesn't receive new ones containerLifecycle: preStop: exec: - command: ["/bin/sh", "-c", "sleep 604800"] + command: ["/bin/sh", "-c", "sleep 300"] terminationGracePeriodSeconds: 604800 + image: repository: neondatabase/neon diff --git a/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml index 5a98217bae..6088d62fba 100644 --- a/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml +++ b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml @@ -7,13 +7,13 @@ deploymentStrategy: maxSurge: 100% maxUnavailable: 50% -# Delay the kill signal by 7 days (7 * 24 * 60 * 60) +# Delay the kill signal by 5 minutes (5 * 60) # The pod(s) will stay in Terminating, keeps the existing connections # but doesn't receive new ones containerLifecycle: preStop: exec: - command: ["/bin/sh", "-c", "sleep 604800"] + command: ["/bin/sh", "-c", "sleep 300"] terminationGracePeriodSeconds: 604800 diff --git a/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml b/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml index a9ee49d82f..7d26f2e02f 100644 --- a/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml +++ b/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml @@ -7,13 +7,13 @@ deploymentStrategy: maxSurge: 100% maxUnavailable: 50% -# Delay the kill signal by 7 days (7 * 24 * 60 * 60) +# Delay the kill signal by 5 minutes (5 * 60) # The pod(s) will stay in Terminating, keeps the existing connections # but doesn't receive new ones containerLifecycle: preStop: exec: - command: ["/bin/sh", "-c", "sleep 604800"] + command: ["/bin/sh", "-c", "sleep 300"] terminationGracePeriodSeconds: 604800 diff --git a/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml b/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml index 239a9911c7..ae239fd3c1 100644 --- a/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml +++ b/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml @@ -7,13 +7,13 @@ deploymentStrategy: maxSurge: 100% maxUnavailable: 50% -# Delay the kill signal by 7 days (7 * 24 * 60 * 60) +# Delay the kill signal by 5 minutes (5 * 60) # The pod(s) will stay in Terminating, keeps the existing connections # but doesn't receive new ones containerLifecycle: preStop: exec: - command: ["/bin/sh", "-c", "sleep 604800"] + command: ["/bin/sh", "-c", "sleep 300"] terminationGracePeriodSeconds: 604800 diff --git a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml index a186fb833f..7378e8abda 100644 --- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml +++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml @@ -7,13 +7,13 @@ deploymentStrategy: maxSurge: 100% maxUnavailable: 50% -# Delay the kill signal by 7 days (7 * 24 * 60 * 60) +# Delay the kill signal by 5 minutes (5 * 60) # The pod(s) will stay in Terminating, keeps the existing connections # but doesn't receive new ones containerLifecycle: preStop: exec: - command: ["/bin/sh", "-c", "sleep 604800"] + command: ["/bin/sh", "-c", "sleep 300"] terminationGracePeriodSeconds: 604800 diff --git a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml index c987ae236a..d9d458f081 100644 --- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml +++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml @@ -7,13 +7,13 @@ deploymentStrategy: maxSurge: 100% maxUnavailable: 50% -# Delay the kill signal by 7 days (7 * 24 * 60 * 60) +# Delay the kill signal by 5 minutes (5 * 60) # The pod(s) will stay in Terminating, keeps the existing connections # but doesn't receive new ones containerLifecycle: preStop: exec: - command: ["/bin/sh", "-c", "sleep 604800"] + command: ["/bin/sh", "-c", "sleep 300"] terminationGracePeriodSeconds: 604800 From ebea29841517cd189c800e09e67a85202376dcdc Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Fri, 14 Apr 2023 18:28:54 +0300 Subject: [PATCH 32/77] Update most of the dependencies to their latest versions (#4026) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/neondatabase/neon/pull/3991 Brings the changes back with the right way to use new `toml_edit` to deserialize values and a unit test for this. All non-trivial updates extracted into separate commits, also `carho hakari` data and its manifest format were updated. 3 sets of crates remain unupdated: * `base64` — touches proxy in a lot of places and changed its api (by 0.21 version) quite strongly since our version (0.13). * `opentelemetry` and `opentelemetry-*` crates ``` error[E0308]: mismatched types --> libs/tracing-utils/src/http.rs:65:21 | 65 | span.set_parent(parent_ctx); | ---------- ^^^^^^^^^^ expected struct `opentelemetry_api::context::Context`, found struct `opentelemetry::Context` | | | arguments to this method are incorrect | = note: struct `opentelemetry::Context` and struct `opentelemetry_api::context::Context` have similar names, but are actually distinct types note: struct `opentelemetry::Context` is defined in crate `opentelemetry_api` --> /Users/someonetoignore/.cargo/registry/src/github.com-1ecc6299db9ec823/opentelemetry_api-0.19.0/src/context.rs:77:1 | 77 | pub struct Context { | ^^^^^^^^^^^^^^^^^^ note: struct `opentelemetry_api::context::Context` is defined in crate `opentelemetry_api` --> /Users/someonetoignore/.cargo/registry/src/github.com-1ecc6299db9ec823/opentelemetry_api-0.18.0/src/context.rs:77:1 | 77 | pub struct Context { | ^^^^^^^^^^^^^^^^^^ = note: perhaps two different versions of crate `opentelemetry_api` are being used? note: associated function defined here --> /Users/someonetoignore/.cargo/registry/src/github.com-1ecc6299db9ec823/tracing-opentelemetry-0.18.0/src/span_ext.rs:43:8 | 43 | fn set_parent(&self, cx: Context); | ^^^^^^^^^^ For more information about this error, try `rustc --explain E0308`. error: could not compile `tracing-utils` due to previous error warning: build failed, waiting for other jobs to finish... error: could not compile `tracing-utils` due to previous error ``` `tracing-opentelemetry` of version `0.19` is not yet released, that is supposed to have the update we need. * similarly, `rustls`, `tokio-rustls`, `rustls-*` and `tls-listener` crates have similar issue: ``` error[E0308]: mismatched types --> libs/postgres_backend/tests/simple_select.rs:112:78 | 112 | let mut make_tls_connect = tokio_postgres_rustls::MakeRustlsConnect::new(client_cfg); | --------------------------------------------- ^^^^^^^^^^ expected struct `rustls::client::client_conn::ClientConfig`, found struct `ClientConfig` | | | arguments to this function are incorrect | = note: struct `ClientConfig` and struct `rustls::client::client_conn::ClientConfig` have similar names, but are actually distinct types note: struct `ClientConfig` is defined in crate `rustls` --> /Users/someonetoignore/.cargo/registry/src/github.com-1ecc6299db9ec823/rustls-0.21.0/src/client/client_conn.rs:125:1 | 125 | pub struct ClientConfig { | ^^^^^^^^^^^^^^^^^^^^^^^ note: struct `rustls::client::client_conn::ClientConfig` is defined in crate `rustls` --> /Users/someonetoignore/.cargo/registry/src/github.com-1ecc6299db9ec823/rustls-0.20.8/src/client/client_conn.rs:91:1 | 91 | pub struct ClientConfig { | ^^^^^^^^^^^^^^^^^^^^^^^ = note: perhaps two different versions of crate `rustls` are being used? note: associated function defined here --> /Users/someonetoignore/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-postgres-rustls-0.9.0/src/lib.rs:23:12 | 23 | pub fn new(config: ClientConfig) -> Self { | ^^^ For more information about this error, try `rustc --explain E0308`. error: could not compile `postgres_backend` due to previous error warning: build failed, waiting for other jobs to finish... ``` * aws crates: I could not make new API to work with bucket endpoint overload, and console e2e tests failed. Other our tests passed, further investigation is worth to be done in https://github.com/neondatabase/neon/issues/4008 --- .config/hakari.toml | 2 +- Cargo.lock | 1406 +++++++++++------ Cargo.toml | 26 +- libs/consumption_metrics/Cargo.toml | 17 +- libs/postgres_ffi/build.rs | 6 +- libs/remote_storage/tests/pagination_tests.rs | 7 +- libs/tracing-utils/Cargo.toml | 3 +- libs/utils/Cargo.toml | 2 +- pageserver/src/config.rs | 90 +- pageserver/src/page_service.rs | 2 +- pageserver/src/tenant.rs | 2 +- pageserver/src/tenant/config.rs | 4 +- .../tenant/remote_timeline_client/upload.rs | 2 +- storage_broker/src/bin/storage_broker.rs | 3 +- trace/Cargo.toml | 2 - workspace_hack/Cargo.toml | 9 +- 16 files changed, 1023 insertions(+), 560 deletions(-) diff --git a/.config/hakari.toml b/.config/hakari.toml index 12d2d1bf9c..15b939e86f 100644 --- a/.config/hakari.toml +++ b/.config/hakari.toml @@ -4,7 +4,7 @@ hakari-package = "workspace_hack" # Format for `workspace-hack = ...` lines in other Cargo.tomls. Requires cargo-hakari 0.9.8 or above. -dep-format-version = "3" +dep-format-version = "4" # Setting workspace.resolver = "2" in the root Cargo.toml is HIGHLY recommended. # Hakari works much better with the new feature resolver. diff --git a/Cargo.lock b/Cargo.lock index 86787b8f6a..a18f4490da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -64,28 +64,77 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] -name = "anyhow" -version = "1.0.68" +name = "anstream" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cb2f989d18dd141ab8ae82f64d1a8cdd37e0840f73a406896cf5e99502fab61" +checksum = "9e579a7752471abc2a8268df8b20005e3eadd975f585398f17efcfd8d4927371" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is-terminal", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41ed9a86bf92ae6580e0a31281f65a1b1d867c0cc68d5346e2ae128dddfa6a7d" + +[[package]] +name = "anstyle-parse" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e765fd216e48e067936442276d1d57399e37bce53c264d6fefbe298080cb57ee" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +dependencies = [ + "windows-sys 0.48.0", +] + +[[package]] +name = "anstyle-wincon" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bcd8291a340dd8ac70e18878bc4501dd7b4ff970cfa21c207d36ece51ea88fd" +dependencies = [ + "anstyle", + "windows-sys 0.48.0", +] + +[[package]] +name = "anyhow" +version = "1.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7de8ce5e0f9f8d88245311066a578d72b7af3e7088f32783804676302df237e4" dependencies = [ "backtrace", ] [[package]] name = "archery" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a8da9bc4c4053ee067669762bcaeea6e241841295a2b6c948312dad6ef4cc02" +checksum = "b6cd774058b1b415c4855d8b86436c04bf050c003156fe24bc326fb3fe75c343" dependencies = [ "static_assertions", ] [[package]] name = "asn1-rs" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf6690c370453db30743b373a60ba498fc0d6d83b11f4abfd87a84a075db5dd4" +checksum = "7f6fd5ddaf0351dff5b8da21b2fb4ff8e08ddd02857f0bf69c47639106c0fff0" dependencies = [ "asn1-rs-derive", "asn1-rs-impl", @@ -105,7 +154,7 @@ checksum = "726535892e8eae7e70657b4c8ea93d26b8553afb1ce617caee529ef96d7dee6c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", "synstructure", ] @@ -117,46 +166,47 @@ checksum = "2777730b2039ac0f95f093556e61b6d26cebed5393ca6f152717777cec3a42ed" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] name = "async-stream" -version = "0.3.3" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dad5c83079eae9969be7fadefe640a1c566901f05ff91ab221de4b6f68d9507e" +checksum = "cd56dd203fef61ac097dd65721a419ddccb106b2d2b70ba60a6b529f03961a51" dependencies = [ "async-stream-impl", "futures-core", + "pin-project-lite", ] [[package]] name = "async-stream-impl" -version = "0.3.3" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10f203db73a71dfa2fb6dd22763990fa26f3d2625a6da2da900d23b87d26be27" +checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.15", ] [[package]] name = "async-trait" -version = "0.1.64" +version = "0.1.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd7fce9ba8c3c042128ce72d8b2ddbf3a05747efb67ea0313c635e10bda47a2" +checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.15", ] [[package]] name = "atomic-polyfill" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d299f547288d6db8d5c3a2916f7b2f66134b15b8c1ac1c4357dd3b8752af7bb2" +checksum = "c314e70d181aa6053b26e3f7fbf86d1dfff84f816a6175b967666b3506ef7289" dependencies = [ "critical-section", ] @@ -187,13 +237,13 @@ dependencies = [ "aws-http", "aws-sdk-sso", "aws-sdk-sts", - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-http-tower", + "aws-smithy-async 0.51.0", + "aws-smithy-client 0.51.0", + "aws-smithy-http 0.51.0", + "aws-smithy-http-tower 0.51.0", "aws-smithy-json", - "aws-smithy-types", - "aws-types", + "aws-smithy-types 0.51.0", + "aws-types 0.51.0", "bytes", "hex", "http", @@ -206,15 +256,29 @@ dependencies = [ "zeroize", ] +[[package]] +name = "aws-credential-types" +version = "0.55.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4232d3729eefc287adc0d5a8adc97b7d94eefffe6bbe94312cc86c7ab6b06ce" +dependencies = [ + "aws-smithy-async 0.55.1", + "aws-smithy-types 0.55.1", + "fastrand", + "tokio", + "tracing", + "zeroize", +] + [[package]] name = "aws-endpoint" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ca8f374874f6459aaa88dc861d7f5d834ca1ff97668eae190e97266b5f6c3fb" dependencies = [ - "aws-smithy-http", - "aws-smithy-types", - "aws-types", + "aws-smithy-http 0.51.0", + "aws-smithy-types 0.51.0", + "aws-types 0.51.0", "http", "regex", "tracing", @@ -226,9 +290,9 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78d41e19e779b73463f5f0c21b3aacc995f4ba783ab13a7ae9f5dfb159a551b4" dependencies = [ - "aws-smithy-http", - "aws-smithy-types", - "aws-types", + "aws-smithy-http 0.51.0", + "aws-smithy-types 0.51.0", + "aws-types 0.51.0", "bytes", "http", "http-body", @@ -248,15 +312,15 @@ dependencies = [ "aws-http", "aws-sig-auth", "aws-sigv4", - "aws-smithy-async", + "aws-smithy-async 0.51.0", "aws-smithy-checksums", - "aws-smithy-client", + "aws-smithy-client 0.51.0", "aws-smithy-eventstream", - "aws-smithy-http", - "aws-smithy-http-tower", - "aws-smithy-types", + "aws-smithy-http 0.51.0", + "aws-smithy-http-tower 0.51.0", + "aws-smithy-types 0.51.0", "aws-smithy-xml", - "aws-types", + "aws-types 0.51.0", "bytes", "bytes-utils", "http", @@ -275,13 +339,13 @@ dependencies = [ "aws-endpoint", "aws-http", "aws-sig-auth", - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-http-tower", + "aws-smithy-async 0.51.0", + "aws-smithy-client 0.51.0", + "aws-smithy-http 0.51.0", + "aws-smithy-http-tower 0.51.0", "aws-smithy-json", - "aws-smithy-types", - "aws-types", + "aws-smithy-types 0.51.0", + "aws-types 0.51.0", "bytes", "http", "tokio-stream", @@ -297,14 +361,14 @@ dependencies = [ "aws-endpoint", "aws-http", "aws-sig-auth", - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-http-tower", + "aws-smithy-async 0.51.0", + "aws-smithy-client 0.51.0", + "aws-smithy-http 0.51.0", + "aws-smithy-http-tower 0.51.0", "aws-smithy-query", - "aws-smithy-types", + "aws-smithy-types 0.51.0", "aws-smithy-xml", - "aws-types", + "aws-types 0.51.0", "bytes", "http", "tower", @@ -318,20 +382,20 @@ checksum = "12cbe7b2be9e185c1fbce27fc9c41c66b195b32d89aa099f98768d9544221308" dependencies = [ "aws-sigv4", "aws-smithy-eventstream", - "aws-smithy-http", - "aws-types", + "aws-smithy-http 0.51.0", + "aws-types 0.51.0", "http", "tracing", ] [[package]] name = "aws-sigv4" -version = "0.51.0" +version = "0.51.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03ff4cff8c4a101962d593ba94e72cd83891aecd423f0c6e3146bff6fb92c9e3" +checksum = "5c0b2658d2cb66dbf02f0e8dee80810ef1e0ca3530ede463e0ef994c301087d1" dependencies = [ "aws-smithy-eventstream", - "aws-smithy-http", + "aws-smithy-http 0.51.0", "bytes", "form_urlencoded", "hex", @@ -356,14 +420,26 @@ dependencies = [ "tokio-stream", ] +[[package]] +name = "aws-smithy-async" +version = "0.55.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88573bcfbe1dcfd54d4912846df028b42d6255cbf9ce07be216b1bbfd11fc4b9" +dependencies = [ + "futures-util", + "pin-project-lite", + "tokio", + "tokio-stream", +] + [[package]] name = "aws-smithy-checksums" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc227e36e346f45298288359f37123e1a92628d1cec6b11b5eb335553278bd9e" dependencies = [ - "aws-smithy-http", - "aws-smithy-types", + "aws-smithy-http 0.51.0", + "aws-smithy-types 0.51.0", "bytes", "crc32c", "crc32fast", @@ -383,10 +459,10 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff28d553714f8f54cd921227934fc13a536a1c03f106e56b362fd57e16d450ad" dependencies = [ - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-http-tower", - "aws-smithy-types", + "aws-smithy-async 0.51.0", + "aws-smithy-http 0.51.0", + "aws-smithy-http-tower 0.51.0", + "aws-smithy-types 0.51.0", "bytes", "fastrand", "http", @@ -400,13 +476,33 @@ dependencies = [ "tracing", ] +[[package]] +name = "aws-smithy-client" +version = "0.55.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2f52352bae50d3337d5d6151b695d31a8c10ebea113eca5bead531f8301b067" +dependencies = [ + "aws-smithy-async 0.55.1", + "aws-smithy-http 0.55.1", + "aws-smithy-http-tower 0.55.1", + "aws-smithy-types 0.55.1", + "bytes", + "fastrand", + "http", + "http-body", + "pin-project-lite", + "tokio", + "tower", + "tracing", +] + [[package]] name = "aws-smithy-eventstream" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7ea0df7161ce65b5c8ca6eb709a1a907376fa18226976e41c748ce02ccccf24" dependencies = [ - "aws-smithy-types", + "aws-smithy-types 0.51.0", "bytes", "crc32fast", ] @@ -418,7 +514,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf58ed4fefa61dbf038e5421a521cbc2c448ef69deff0ab1d915d8a10eda5664" dependencies = [ "aws-smithy-eventstream", - "aws-smithy-types", + "aws-smithy-types 0.51.0", "bytes", "bytes-utils", "futures-core", @@ -434,13 +530,49 @@ dependencies = [ "tracing", ] +[[package]] +name = "aws-smithy-http" +version = "0.55.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03bcc02d7ed9649d855c8ce4a735e9848d7b8f7568aad0504c158e3baa955df8" +dependencies = [ + "aws-smithy-types 0.55.1", + "bytes", + "bytes-utils", + "futures-core", + "http", + "http-body", + "hyper", + "once_cell", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + [[package]] name = "aws-smithy-http-tower" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20c96d7bd35e7cf96aca1134b2f81b1b59ffe493f7c6539c051791cbbf7a42d3" dependencies = [ - "aws-smithy-http", + "aws-smithy-http 0.51.0", + "bytes", + "http", + "http-body", + "pin-project-lite", + "tower", + "tracing", +] + +[[package]] +name = "aws-smithy-http-tower" +version = "0.55.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da88b3a860f65505996c29192d800f1aeb9480440f56d63aad33a3c12045017a" +dependencies = [ + "aws-smithy-http 0.55.1", + "aws-smithy-types 0.55.1", "bytes", "http", "http-body", @@ -455,7 +587,7 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8324ba98c8a94187723cc16c37aefa09504646ee65c3d2c3af495bab5ea701b" dependencies = [ - "aws-smithy-types", + "aws-smithy-types 0.51.0", ] [[package]] @@ -464,7 +596,7 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83834ed2ff69ea6f6657baf205267dc2c0abe940703503a3e5d60ce23be3d306" dependencies = [ - "aws-smithy-types", + "aws-smithy-types 0.51.0", "urlencoding", ] @@ -480,6 +612,19 @@ dependencies = [ "time", ] +[[package]] +name = "aws-smithy-types" +version = "0.55.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd0afc731fd1417d791f9145a1e0c30e23ae0beaab9b4814017708ead2fc20f1" +dependencies = [ + "base64-simd", + "itoa", + "num-integer", + "ryu", + "time", +] + [[package]] name = "aws-smithy-xml" version = "0.51.0" @@ -495,10 +640,10 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05701d32da168b44f7ee63147781aed8723e792cc131cb9b18363b5393f17f70" dependencies = [ - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-types", + "aws-smithy-async 0.51.0", + "aws-smithy-client 0.51.0", + "aws-smithy-http 0.51.0", + "aws-smithy-types 0.51.0", "http", "rustc_version", "tracing", @@ -506,10 +651,26 @@ dependencies = [ ] [[package]] -name = "axum" -version = "0.6.4" +name = "aws-types" +version = "0.55.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5694b64066a2459918d8074c2ce0d5a88f409431994c2356617c8ae0c4721fc" +checksum = "b9b082e329d9a304d39e193ad5c7ab363a0d6507aca6965e0673a746686fb0cc" +dependencies = [ + "aws-credential-types", + "aws-smithy-async 0.55.1", + "aws-smithy-client 0.55.1", + "aws-smithy-http 0.55.1", + "aws-smithy-types 0.55.1", + "http", + "rustc_version", + "tracing", +] + +[[package]] +name = "axum" +version = "0.6.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b32c5ea3aabaf4deb5f5ced2d688ec0844c881c9e6c696a8b769a05fc691e62" dependencies = [ "async-trait", "axum-core", @@ -529,16 +690,15 @@ dependencies = [ "serde", "sync_wrapper", "tower", - "tower-http", "tower-layer", "tower-service", ] [[package]] name = "axum-core" -version = "0.3.2" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cae3e661676ffbacb30f1a824089a8c9150e71017f7e1e38f2aa32009188d34" +checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c" dependencies = [ "async-trait", "bytes", @@ -584,6 +744,16 @@ version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] + [[package]] name = "bincode" version = "1.3.3" @@ -595,9 +765,9 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.61.0" +version = "0.65.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a022e58a142a46fea340d68012b9201c094e93ec3d033a944a24f8fd4a4f09a" +checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" dependencies = [ "bitflags", "cexpr", @@ -606,12 +776,13 @@ dependencies = [ "lazycell", "log", "peeking_take_while", + "prettyplease 0.2.4", "proc-macro2", "quote", "regex", "rustc-hash", "shlex", - "syn", + "syn 2.0.15", "which", ] @@ -623,18 +794,18 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "block-buffer" -version = "0.10.3" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cce20737498f97b993470a6e536b8523f0af7892a4f928cceb1ac5e52ebe7e" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" dependencies = [ "generic-array", ] [[package]] name = "bstr" -version = "1.2.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7f0778972c64420fdedc63f09919c8a88bda7b25135357fd25a5d9f3257e832" +checksum = "c3d4260bcc2e8fc9df1eac4919a720effeb63a3f0952f5bf4944adfa18897f09" dependencies = [ "memchr", "once_cell", @@ -702,9 +873,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.23" +version = "0.4.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f" +checksum = "4e3c5919066adf22df73762e50cffcde3a758f2a848b113b586d1f86728b673b" dependencies = [ "iana-time-zone", "num-integer", @@ -742,9 +913,9 @@ dependencies = [ [[package]] name = "clang-sys" -version = "1.4.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa2e27ae6ab525c3d369ded447057bca5438d86dc3a68f6faafb8269ba82ebf3" +checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f" dependencies = [ "glob", "libc", @@ -765,30 +936,38 @@ dependencies = [ [[package]] name = "clap" -version = "4.1.4" +version = "4.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f13b9c79b5d1dd500d20ef541215a6423c75829ef43117e1b4d17fd8af0b5d76" +checksum = "9b802d85aaf3a1cdb02b224ba472ebdea62014fccfcb269b95a4d76443b5ee5a" dependencies = [ - "bitflags", + "clap_builder", "clap_derive", - "clap_lex 0.3.1", - "is-terminal", "once_cell", +] + +[[package]] +name = "clap_builder" +version = "4.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14a1a858f532119338887a4b8e1af9c60de8249cd7bafd68036a489e261e37b6" +dependencies = [ + "anstream", + "anstyle", + "bitflags", + "clap_lex 0.4.1", "strsim", - "termcolor", ] [[package]] name = "clap_derive" -version = "4.1.0" +version = "4.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "684a277d672e91966334af371f1a7b5833f9aa00b07c84e92fbce95e00208ce8" +checksum = "3f9644cd56d6b87dbe899ef8b053e331c0637664e9e21a33dfcdc36093f5c5c4" dependencies = [ "heck", - "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 2.0.15", ] [[package]] @@ -802,12 +981,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.3.1" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "783fe232adfca04f90f56201b26d79682d4cd2625e0bc7290b95123afe558ade" -dependencies = [ - "os_str_bytes", -] +checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1" [[package]] name = "close_fds" @@ -829,6 +1005,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + [[package]] name = "comfy-table" version = "6.1.4" @@ -859,7 +1041,7 @@ version = "0.1.0" dependencies = [ "anyhow", "chrono", - "clap 4.1.4", + "clap 4.2.2", "compute_api", "futures", "hyper", @@ -921,7 +1103,7 @@ name = "control_plane" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.1.4", + "clap 4.2.2", "comfy-table", "git-version", "nix", @@ -957,15 +1139,15 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" +checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "cpufeatures" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d997bd5e24a5928dd43e46dc529867e207907fe0b239c3477d924f7f2ca320" +checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181" dependencies = [ "libc", ] @@ -1032,9 +1214,9 @@ checksum = "6548a0ad5d2549e111e1f6a11a6c2e2d00ce6a3dafe22948d67c2b443f775e52" [[package]] name = "crossbeam-channel" -version = "0.5.6" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" +checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" dependencies = [ "cfg-if", "crossbeam-utils", @@ -1042,9 +1224,9 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" +checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" dependencies = [ "cfg-if", "crossbeam-epoch", @@ -1053,22 +1235,22 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.13" +version = "0.9.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a" +checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695" dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", - "memoffset 0.7.1", + "memoffset 0.8.0", "scopeguard", ] [[package]] name = "crossbeam-utils" -version = "0.8.14" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f" +checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b" dependencies = [ "cfg-if", ] @@ -1110,9 +1292,9 @@ dependencies = [ [[package]] name = "cxx" -version = "1.0.89" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc831ee6a32dd495436e317595e639a587aa9907bef96fe6e6abc290ab6204e9" +checksum = "f61f1b6389c3fe1c316bf8a4dccc90a38208354b330925bce1f74a6c4756eb93" dependencies = [ "cc", "cxxbridge-flags", @@ -1122,9 +1304,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.89" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94331d54f1b1a8895cd81049f7eaaaef9d05a7dcb4d1fd08bf3ff0806246789d" +checksum = "12cee708e8962df2aeb38f594aae5d827c022b6460ac71a7a3e2c3c2aae5a07b" dependencies = [ "cc", "codespan-reporting", @@ -1132,31 +1314,31 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn", + "syn 2.0.15", ] [[package]] name = "cxxbridge-flags" -version = "1.0.89" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48dcd35ba14ca9b40d6e4b4b39961f23d835dbb8eed74565ded361d93e1feb8a" +checksum = "7944172ae7e4068c533afbb984114a56c46e9ccddda550499caa222902c7f7bb" [[package]] name = "cxxbridge-macro" -version = "1.0.89" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81bbeb29798b407ccd82a3324ade1a7286e0d29851475990b612670f6f5124d2" +checksum = "2345488264226bf682893e25de0769f3360aac9957980ec49361b083ddaa5bc5" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.15", ] [[package]] name = "darling" -version = "0.14.2" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0dd3cd20dc6b5a876612a6e5accfe7f3dd883db6d07acfbf14c128f61550dfa" +checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" dependencies = [ "darling_core", "darling_macro", @@ -1164,27 +1346,27 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.14.2" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a784d2ccaf7c98501746bf0be29b2022ba41fd62a2e622af997a03e9f972859f" +checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" dependencies = [ "fnv", "ident_case", "proc-macro2", "quote", "strsim", - "syn", + "syn 1.0.109", ] [[package]] name = "darling_macro" -version = "0.14.2" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7618812407e9402654622dd402b0a89dff9ba93badd6540781526117b92aab7e" +checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" dependencies = [ "darling_core", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1218,9 +1400,9 @@ dependencies = [ [[package]] name = "der-parser" -version = "8.1.0" +version = "8.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42d4bc9b0db0a0df9ae64634ac5bdefb7afcb534e182275ca0beadbe486701c1" +checksum = "dbd676fbbab537128ef0278adb5576cf363cff6aa22a7b24effe97347cfab61e" dependencies = [ "asn1-rs", "displaydoc", @@ -1249,7 +1431,7 @@ checksum = "3bf95dc3f046b9da4f2d51833c0d3547d8564ef6910f5c1ed130306a75b92886" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1269,9 +1451,9 @@ dependencies = [ [[package]] name = "enum-map" -version = "2.4.2" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50c25992259941eb7e57b936157961b217a4fc8597829ddef0596d6c3cd86e1a" +checksum = "988f0d17a0fa38291e5f41f71ea8d46a5d5497b9054d5a759fae2cbb819f2356" dependencies = [ "enum-map-derive", ] @@ -1284,7 +1466,7 @@ checksum = "2a4da76b3b6116d758c7ba93f7ec6a35d2e2cf24feda76c6e38a375f4d5c59f2" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1305,7 +1487,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1323,13 +1505,13 @@ dependencies = [ [[package]] name = "errno" -version = "0.2.8" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" dependencies = [ "errno-dragonfly", "libc", - "winapi", + "windows-sys 0.48.0", ] [[package]] @@ -1361,23 +1543,23 @@ checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" [[package]] name = "fastrand" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499" +checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" dependencies = [ "instant", ] [[package]] name = "filetime" -version = "0.2.19" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e884668cd0c7480504233e951174ddc3b382f7c2666e3b7310b5c4e7b0c37f9" +checksum = "5cbc844cecaee9d4443931972e1289c8ff485cb4cc2767cb03ca139ed6885153" dependencies = [ "cfg-if", "libc", - "redox_syscall", - "windows-sys 0.42.0", + "redox_syscall 0.2.16", + "windows-sys 0.48.0", ] [[package]] @@ -1422,9 +1604,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13e2792b0ff0340399d58445b88fd9770e3489eff258a4cbc1523418f12abf84" +checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" dependencies = [ "futures-channel", "futures-core", @@ -1437,9 +1619,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e5317663a9089767a1ec00a487df42e0ca174b61b4483213ac24448e4664df5" +checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" dependencies = [ "futures-core", "futures-sink", @@ -1447,15 +1629,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec90ff4d0fe1f57d600049061dc6bb68ed03c7d2fbd697274c41805dcb3f8608" +checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" [[package]] name = "futures-executor" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8de0a35a6ab97ec8869e32a2473f4b1324459e14c29275d14b10cb1fd19b50e" +checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" dependencies = [ "futures-core", "futures-task", @@ -1464,32 +1646,32 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfb8371b6fb2aeb2d280374607aeabfc99d95c72edfe51692e42d3d7f0d08531" +checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" [[package]] name = "futures-macro" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a73af87da33b5acf53acfebdc339fe592ecf5357ac7c0a7734ab9d8c876a70" +checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.15", ] [[package]] name = "futures-sink" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f310820bb3e8cfd46c80db4d7fb8353e15dfff853a127158425f31e0be6c8364" +checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" [[package]] name = "futures-task" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf79a1bf610b10f42aea489289c5a2c478a786509693b80cd39c44ccd936366" +checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" [[package]] name = "futures-timer" @@ -1499,9 +1681,9 @@ checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c" [[package]] name = "futures-util" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c1d6de3acfef38d2be4b1f543f553131788603495be83da675e180c8d6b7bd1" +checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" dependencies = [ "futures-channel", "futures-core", @@ -1517,9 +1699,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.6" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", @@ -1527,20 +1709,22 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", ] [[package]] name = "gimli" -version = "0.27.1" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "221996f774192f0f718773def8201c4ae31f02616a54ccfc2d358bb0e5cefdec" +checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4" [[package]] name = "git-version" @@ -1561,7 +1745,7 @@ dependencies = [ "proc-macro-hack", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1639,7 +1823,7 @@ dependencies = [ "atomic-polyfill", "hash32", "rustc_version", - "spin 0.9.4", + "spin 0.9.8", "stable_deref_trait", ] @@ -1667,6 +1851,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" + [[package]] name = "hex" version = "0.4.3" @@ -1678,9 +1868,9 @@ dependencies = [ [[package]] name = "hex-literal" -version = "0.3.4" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ebdb29d2ea9ed0083cd8cece49bbd968021bd99b0849edb4a9a7ee0fdf6a4e0" +checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46" [[package]] name = "hmac" @@ -1704,9 +1894,9 @@ dependencies = [ [[package]] name = "http" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399" +checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" dependencies = [ "bytes", "fnv", @@ -1724,12 +1914,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "http-range-header" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bfe8eed0a9285ef776bb792479ea3834e8b94e13d615c2f66d03dd50a435a29" - [[package]] name = "httparse" version = "1.8.0" @@ -1760,9 +1944,9 @@ dependencies = [ [[package]] name = "hyper" -version = "0.14.23" +version = "0.14.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "034711faac9d2166cb1baf1a2fb0b60b1f277f8492fd72176c17f3515e1abd3c" +checksum = "ab302d72a6f11a3b910431ff93aae7e773078c769f0a3ef15fb9ec692ed147d4" dependencies = [ "bytes", "futures-channel", @@ -1775,7 +1959,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2", + "socket2 0.4.9", "tokio", "tower-service", "tracing", @@ -1791,10 +1975,10 @@ dependencies = [ "http", "hyper", "log", - "rustls", + "rustls 0.20.8", "rustls-native-certs", "tokio", - "tokio-rustls", + "tokio-rustls 0.23.4", ] [[package]] @@ -1824,16 +2008,16 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.53" +version = "0.1.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64c122667b287044802d6ce17ee2ddf13207ed924c712de9a66a5814d5b64765" +checksum = "0722cd7114b7de04316e7ea5456a0bbb20e4adb46fd27a3697adb812cff0f37c" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "winapi", + "windows", ] [[package]] @@ -1864,9 +2048,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.9.2" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown 0.12.3", @@ -1904,30 +2088,31 @@ dependencies = [ [[package]] name = "io-lifetimes" -version = "1.0.4" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7d6c6f8c91b4b9ed43484ad1a938e393caf35960fce7f82a040497207bd8e9e" +checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" dependencies = [ + "hermit-abi 0.3.1", "libc", - "windows-sys 0.42.0", + "windows-sys 0.48.0", ] [[package]] name = "ipnet" -version = "2.7.1" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30e22bd8629359895450b59ea7a776c850561b96a3b1d31321c1949d9e6c9146" +checksum = "12b6ee2129af8d4fb011108c73d99a1b83a85977f23b82460c0ae2e25bb4b57f" [[package]] name = "is-terminal" -version = "0.4.2" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28dfb6c8100ccc63462345b67d1bbc3679177c75ee4bf59bf29c8b1d110b8189" +checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" dependencies = [ - "hermit-abi 0.2.6", + "hermit-abi 0.3.1", "io-lifetimes", - "rustix", - "windows-sys 0.42.0", + "rustix 0.37.11", + "windows-sys 0.48.0", ] [[package]] @@ -1941,9 +2126,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440" +checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" [[package]] name = "js-sys" @@ -1956,11 +2141,11 @@ dependencies = [ [[package]] name = "jsonwebtoken" -version = "8.2.0" +version = "8.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09f4f04699947111ec1733e71778d763555737579e44b85844cae8e1940a1828" +checksum = "6971da4d9c3aa03c3d8f3ff0f4155b534aad021292003895a469716b2a230378" dependencies = [ - "base64 0.13.1", + "base64 0.21.0", "pem", "ring", "serde", @@ -2002,9 +2187,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.139" +version = "0.2.141" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" +checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" [[package]] name = "libloading" @@ -2031,6 +2216,12 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" +[[package]] +name = "linux-raw-sys" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f" + [[package]] name = "lock_api" version = "0.4.9" @@ -2123,9 +2314,9 @@ dependencies = [ [[package]] name = "mime" -version = "0.3.16" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "mime_guess" @@ -2145,23 +2336,23 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.6.4" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2e212582ede878b109755efd0773a4f0f4ec851584cf0aefbeb4d9ecc114822" +checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" dependencies = [ "adler", ] [[package]] name = "mio" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" +checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9" dependencies = [ "libc", "log", "wasi", - "windows-sys 0.42.0", + "windows-sys 0.45.0", ] [[package]] @@ -2194,15 +2385,6 @@ dependencies = [ "minimal-lexical", ] -[[package]] -name = "nom8" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae01545c9c7fc4486ab7debaf2aad7003ac19431791868fb2e8066df97fad2f8" -dependencies = [ - "memchr", -] - [[package]] name = "notify" version = "5.1.0" @@ -2291,9 +2473,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.17.0" +version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66" +checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" [[package]] name = "oorandom" @@ -2358,8 +2540,8 @@ dependencies = [ "futures-util", "opentelemetry", "prost", - "tonic", - "tonic-build", + "tonic 0.8.3", + "tonic-build 0.8.4", ] [[package]] @@ -2411,9 +2593,9 @@ dependencies = [ [[package]] name = "os_info" -version = "3.6.0" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c424bc68d15e0778838ac013b5b3449544d8133633d8016319e7e05a820b8c0" +checksum = "006e42d5b888366f1880eda20371fedde764ed2213dc8496f49622fa0c99cd5e" dependencies = [ "log", "serde", @@ -2422,9 +2604,15 @@ dependencies = [ [[package]] name = "os_str_bytes" -version = "6.4.1" +version = "6.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" +checksum = "ceedf44fb00f2d1984b0bc98102627ce622e083e49a5bacdb3e514fa4238e267" + +[[package]] +name = "outref" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" [[package]] name = "overload" @@ -2442,7 +2630,7 @@ dependencies = [ "byteorder", "bytes", "chrono", - "clap 4.1.4", + "clap 4.2.2", "close_fds", "const_format", "consumption_metrics", @@ -2541,7 +2729,7 @@ checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.2.16", "smallvec", "windows-sys 0.45.0", ] @@ -2569,9 +2757,9 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" [[package]] name = "petgraph" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5014253a1331579ce62aa67443b4a658c5e7dd03d4bc6d302b94474888143" +checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4" dependencies = [ "fixedbitset", "indexmap", @@ -2612,7 +2800,7 @@ checksum = "069bdb1e05adc7a8990dce9cc75370895fbe4e3d58b9b73bf1aee56359344a55" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -2706,14 +2894,14 @@ dependencies = [ "futures", "once_cell", "pq_proto", - "rustls", + "rustls 0.20.8", "rustls-pemfile", "serde", "thiserror", "tokio", "tokio-postgres", "tokio-postgres-rustls", - "tokio-rustls", + "tokio-rustls 0.23.4", "tracing", "workspace_hack", ] @@ -2779,36 +2967,22 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.1.23" +version = "0.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e97e3215779627f01ee256d2fad52f3d95e8e1c11e9fc6fd08f7cd455d5d5c78" +checksum = "6c8646e95016a7a6c4adea95bafa8a16baab64b583356217f2c85db4a39d9a86" dependencies = [ "proc-macro2", - "syn", + "syn 1.0.109", ] [[package]] -name = "proc-macro-error" -version = "1.0.4" +name = "prettyplease" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" -dependencies = [ - "proc-macro-error-attr", - "proc-macro2", - "quote", - "syn", - "version_check", -] - -[[package]] -name = "proc-macro-error-attr" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +checksum = "1ceca8aaf45b5c46ec7ed39fff75f57290368c1846d33d24a122ca81416ab058" dependencies = [ "proc-macro2", - "quote", - "version_check", + "syn 2.0.15", ] [[package]] @@ -2819,9 +2993,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.50" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ef7d57beacfaf2d8aee5937dab7b7f28de3cb8b1828479bb5de2a7106f2bae2" +checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" dependencies = [ "unicode-ident", ] @@ -2836,7 +3010,7 @@ dependencies = [ "byteorder", "hex", "lazy_static", - "rustix", + "rustix 0.36.12", ] [[package]] @@ -2857,9 +3031,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.11.6" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21dc42e00223fc37204bd4aa177e69420c604ca4a183209a8f9de30c6d934698" +checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd" dependencies = [ "bytes", "prost-derive", @@ -2867,9 +3041,9 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.11.6" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f8ad728fb08fe212df3c05169e940fbb6d9d16a877ddde14644a983ba2012e" +checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270" dependencies = [ "bytes", "heck", @@ -2878,35 +3052,34 @@ dependencies = [ "log", "multimap", "petgraph", - "prettyplease", + "prettyplease 0.1.25", "prost", "prost-types", "regex", - "syn", + "syn 1.0.109", "tempfile", "which", ] [[package]] name = "prost-derive" -version = "0.11.6" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bda8c0881ea9f722eb9629376db3d0b903b462477c1aafcb0566610ac28ac5d" +checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4" dependencies = [ "anyhow", "itertools", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] name = "prost-types" -version = "0.11.6" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e0526209433e96d83d750dd81a99118edbc55739e7e61a46764fd2ad537788" +checksum = "213622a1460818959ac1181aaeb2dc9c7f63df720db7d788b3e24eacd1983e13" dependencies = [ - "bytes", "prost", ] @@ -2921,7 +3094,7 @@ dependencies = [ "bstr", "bytes", "chrono", - "clap 4.1.4", + "clap 4.2.2", "consumption_metrics", "futures", "git-version", @@ -2951,20 +3124,20 @@ dependencies = [ "reqwest-tracing", "routerify", "rstest", - "rustls", + "rustls 0.20.8", "rustls-pemfile", "scopeguard", "serde", "serde_json", "sha2", - "socket2", + "socket2 0.5.2", "sync_wrapper", "thiserror", "tls-listener", "tokio", "tokio-postgres", "tokio-postgres-rustls", - "tokio-rustls", + "tokio-rustls 0.23.4", "tokio-util", "tracing", "tracing-opentelemetry", @@ -2973,16 +3146,16 @@ dependencies = [ "url", "utils", "uuid", - "webpki-roots", + "webpki-roots 0.23.0", "workspace_hack", "x509-parser", ] [[package]] name = "quote" -version = "1.0.23" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" dependencies = [ "proc-macro2", ] @@ -3019,9 +3192,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7" +checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" dependencies = [ "either", "rayon-core", @@ -3029,9 +3202,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.10.2" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "356a0625f1954f730c0201cdab48611198dc6ce21f4acff55089b5a78e6e835b" +checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" dependencies = [ "crossbeam-channel", "crossbeam-deque", @@ -3061,10 +3234,19 @@ dependencies = [ ] [[package]] -name = "regex" -version = "1.7.1" +name = "redox_syscall" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" dependencies = [ "aho-corasick", "memchr", @@ -3082,9 +3264,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.28" +version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "remote_storage" @@ -3094,8 +3276,8 @@ dependencies = [ "async-trait", "aws-config", "aws-sdk-s3", - "aws-smithy-http", - "aws-types", + "aws-smithy-http 0.51.0", + "aws-types 0.55.1", "hyper", "metrics", "once_cell", @@ -3114,9 +3296,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.14" +version = "0.11.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21eed90ec8570952d53b772ecf8f206aa1ec9a3d76b2521c56c42973f2d91ee9" +checksum = "27b71749df584b7f4cac2c426c127a7c785a5106cc98f7a8feb044115f0fa254" dependencies = [ "base64 0.21.0", "bytes", @@ -3136,27 +3318,27 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls", + "rustls 0.20.8", "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", "tokio", - "tokio-rustls", + "tokio-rustls 0.23.4", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "webpki-roots", + "webpki-roots 0.22.6", "winreg", ] [[package]] name = "reqwest-middleware" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a1c03e9011a8c59716ad13115550469e081e2e9892656b0ba6a47c907921894" +checksum = "99c50db2c7ccd815f976473dd7d0bde296f8c3b77c383acf4fc021cdcf10852b" dependencies = [ "anyhow", "async-trait", @@ -3169,11 +3351,12 @@ dependencies = [ [[package]] name = "reqwest-tracing" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b739d87a6b2cf4743968ad2b4cef648fbe0204c19999509824425babb2097bce" +checksum = "8a71d77945a1c5ae9604f0504901e77a1e2e71f2932b1cb8103078179ca62ff8" dependencies = [ "async-trait", + "getrandom", "opentelemetry", "reqwest", "reqwest-middleware", @@ -3212,18 +3395,18 @@ dependencies = [ [[package]] name = "rpds" -version = "0.12.0" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66262ea963eff99163e6b741fbc3417a52cc13074728c1047e9911789df9b000" +checksum = "9bd6ce569b15c331b1e5fd8cf6adb0bf240678b5f0cdc4d0f41e11683f6feba9" dependencies = [ "archery", ] [[package]] name = "rstest" -version = "0.16.0" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b07f2d176c472198ec1e6551dc7da28f1c089652f66a7b722676c2238ebc0edf" +checksum = "de1bb486a691878cd320c2f0d319ba91eeaa2e894066d8b5f8f117c000e9d962" dependencies = [ "futures", "futures-timer", @@ -3233,23 +3416,23 @@ dependencies = [ [[package]] name = "rstest_macros" -version = "0.16.0" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7229b505ae0706e64f37ffc54a9c163e11022a6636d58fe1f3f52018257ff9f7" +checksum = "290ca1a1c8ca7edb7c3283bd44dc35dd54fdec6253a3912e201ba1072018fca8" dependencies = [ "cfg-if", "proc-macro2", "quote", "rustc_version", - "syn", + "syn 1.0.109", "unicode-ident", ] [[package]] name = "rustc-demangle" -version = "0.1.21" +version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" +checksum = "d4a36c42d1873f9a77c53bde094f9664d9891bc604a45b4798fd2c389ed12e5b" [[package]] name = "rustc-hash" @@ -3277,16 +3460,30 @@ dependencies = [ [[package]] name = "rustix" -version = "0.36.7" +version = "0.36.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4fdebc4b395b7fbb9ab11e462e20ed9051e7b16e42d24042c776eca0ac81b03" +checksum = "e0af200a3324fa5bcd922e84e9b55a298ea9f431a489f01961acdebc6e908f25" dependencies = [ "bitflags", "errno", "io-lifetimes", "libc", - "linux-raw-sys", - "windows-sys 0.42.0", + "linux-raw-sys 0.1.4", + "windows-sys 0.45.0", +] + +[[package]] +name = "rustix" +version = "0.37.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85597d61f83914ddeba6a47b3b8ffe7365107221c2e557ed94426489fefb5f77" +dependencies = [ + "bitflags", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys 0.3.1", + "windows-sys 0.48.0", ] [[package]] @@ -3301,6 +3498,18 @@ dependencies = [ "webpki", ] +[[package]] +name = "rustls" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07180898a28ed6a7f7ba2311594308f595e3dd2e3c3812fa0a80a47b45f17e5d" +dependencies = [ + "log", + "ring", + "rustls-webpki", + "sct", +] + [[package]] name = "rustls-native-certs" version = "0.6.2" @@ -3323,16 +3532,26 @@ dependencies = [ ] [[package]] -name = "rustversion" -version = "1.0.11" +name = "rustls-webpki" +version = "0.100.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5583e89e108996506031660fe09baa5011b9dd0341b89029313006d1fb508d70" +checksum = "d6207cd5ed3d8dca7816f8f3725513a34609c0c765bf652b8c3cb4cfd87db46b" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "rustversion" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06" [[package]] name = "ryu" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde" +checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" [[package]] name = "safekeeper" @@ -3344,7 +3563,7 @@ dependencies = [ "byteorder", "bytes", "chrono", - "clap 4.1.4", + "clap 4.2.2", "const_format", "crc32c", "fs2", @@ -3417,9 +3636,9 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "scratch" -version = "1.0.3" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddccb15bcce173023b3fedd9436f882a0739b8dfb45e4f6b6002bee5929f61b2" +checksum = "1792db035ce95be60c3f8853017b3999209281c24e2ba5bc8e59bf97a0c590c1" [[package]] name = "sct" @@ -3456,33 +3675,33 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.16" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a" +checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" [[package]] name = "sentry" -version = "0.29.2" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6097dc270a9c4555c5d6222ed243eaa97ff38e29299ed7c5cb36099033c604e" +checksum = "b5ce6d3512e2617c209ec1e86b0ca2fea06454cd34653c91092bf0f3ec41f8e3" dependencies = [ "httpdate", "reqwest", - "rustls", + "rustls 0.20.8", "sentry-backtrace", "sentry-contexts", "sentry-core", "sentry-panic", "tokio", "ureq", - "webpki-roots", + "webpki-roots 0.22.6", ] [[package]] name = "sentry-backtrace" -version = "0.29.2" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d92d1e4d591534ae4f872d6142f3b500f4ffc179a6aed8a3e86c7cc96d10a6a" +checksum = "0e7fe408d4d1f8de188a9309916e02e129cbe51ca19e55badea5a64899399b1a" dependencies = [ "backtrace", "once_cell", @@ -3492,9 +3711,9 @@ dependencies = [ [[package]] name = "sentry-contexts" -version = "0.29.2" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3afa877b1898ff67dd9878cf4bec4e53cef7d3be9f14b1fc9e4fcdf36f8e4259" +checksum = "5695096a059a89973ec541062d331ff4c9aeef9c2951416c894f0fff76340e7d" dependencies = [ "hostname", "libc", @@ -3506,9 +3725,9 @@ dependencies = [ [[package]] name = "sentry-core" -version = "0.29.2" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc43eb7e4e3a444151a0fe8a0e9ce60eabd905dae33d66e257fa26f1b509c1bd" +checksum = "5b22828bfd118a7b660cf7a155002a494755c0424cebb7061e4743ecde9c7dbc" dependencies = [ "once_cell", "rand", @@ -3519,9 +3738,9 @@ dependencies = [ [[package]] name = "sentry-panic" -version = "0.29.2" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccab4fab11e3e63c45f4524bee2e75cde39cdf164cb0b0cbe6ccd1948ceddf66" +checksum = "1f4ced2a7a8c14899d58eec402d946f69d5ed26a3fc363a7e8b1e5cb88473a01" dependencies = [ "sentry-backtrace", "sentry-core", @@ -3529,9 +3748,9 @@ dependencies = [ [[package]] name = "sentry-types" -version = "0.29.2" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63708ec450b6bdcb657af760c447416d69c38ce421f34e5e2e9ce8118410bc7" +checksum = "360ee3270f7a4a1eee6c667f7d38360b995431598a73b740dfe420da548d9cc9" dependencies = [ "debugid", "getrandom", @@ -3546,35 +3765,44 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.152" +version = "1.0.160" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb" +checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.152" +version = "1.0.160" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" +checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.15", ] [[package]] name = "serde_json" -version = "1.0.91" +version = "1.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877c235533714907a8c2464236f5c4b2a17262ef1bd71f38f35ea592c8da6883" +checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1" dependencies = [ "itoa", "ryu", "serde", ] +[[package]] +name = "serde_spanned" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0efd8caf556a6cebd3b285caf480045fcc1ac04f6bd786b09a6f11af30c4fcf4" +dependencies = [ + "serde", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -3589,9 +3817,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "2.2.0" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30d904179146de381af4c93d3af6ca4984b3152db687dacb9c3c35e86f39809c" +checksum = "331bb8c3bf9b92457ab7abecf07078c13f7d270ba490103e84e8b014490cd0b0" dependencies = [ "base64 0.13.1", "chrono", @@ -3605,14 +3833,14 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "2.2.0" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1966009f3c05f095697c537312f5415d1e3ed31ce0a56942bac4c771c5c335e" +checksum = "859011bddcc11f289f07f467cc1fe01c7a941daa4d8f6c40d4d1c92eb6d9319c" dependencies = [ "darling", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -3654,9 +3882,9 @@ checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" [[package]] name = "signal-hook" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a253b5e89e2698464fc26b545c9edceb338e18a89effeeecfea192c3025be29d" +checksum = "732768f1176d21d09e076c23a93123d40bba92d50c4058da34d45c8de8e682b9" dependencies = [ "libc", "signal-hook-registry", @@ -3675,9 +3903,9 @@ dependencies = [ [[package]] name = "signal-hook-registry" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" +checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" dependencies = [ "libc", ] @@ -3702,9 +3930,9 @@ checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" [[package]] name = "slab" -version = "0.4.7" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4614a76b2a8be0058caa9dbbaf66d988527d86d003c11a94fbd335d7661edcef" +checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d" dependencies = [ "autocfg", ] @@ -3717,14 +3945,24 @@ checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" [[package]] name = "socket2" -version = "0.4.7" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd" +checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" dependencies = [ "libc", "winapi", ] +[[package]] +name = "socket2" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d283f86695ae989d1e18440a943880967156325ba025f05049946bff47bcc2b" +dependencies = [ + "libc", + "windows-sys 0.48.0", +] + [[package]] name = "spin" version = "0.5.2" @@ -3733,9 +3971,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "spin" -version = "0.9.4" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f6002a767bff9e83f8eeecf883ecb8011875a21ae8da43bffb817a57e78cc09" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" dependencies = [ "lock_api", ] @@ -3759,7 +3997,7 @@ dependencies = [ "anyhow", "async-stream", "bytes", - "clap 4.1.4", + "clap 4.2.2", "const_format", "futures", "futures-core", @@ -3773,8 +4011,8 @@ dependencies = [ "prost", "tokio", "tokio-stream", - "tonic", - "tonic-build", + "tonic 0.9.1", + "tonic-build 0.9.1", "tracing", "utils", "workspace_hack", @@ -3812,7 +4050,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn", + "syn 1.0.109", ] [[package]] @@ -3829,9 +4067,20 @@ checksum = "8fb1df15f412ee2e9dfc1c504260fa695c1c3f10fe9f4a6ee2d2184d7d6450e2" [[package]] name = "syn" -version = "1.0.107" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" dependencies = [ "proc-macro2", "quote", @@ -3852,7 +4101,7 @@ checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", "unicode-xid", ] @@ -3869,24 +4118,24 @@ dependencies = [ [[package]] name = "task-local-extensions" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4167afbec18ae012de40f8cf1b9bf48420abb390678c34821caa07d924941cc4" +checksum = "ba323866e5d033818e3240feeb9f7db2c4296674e4d9e16b97b7bf8f490434e8" dependencies = [ - "tokio", + "pin-utils", ] [[package]] name = "tempfile" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af18f7ae1acd354b992402e9ec5864359d693cd8a79dcbef59f76891701c1e95" +checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" dependencies = [ "cfg-if", "fastrand", - "redox_syscall", - "rustix", - "windows-sys 0.42.0", + "redox_syscall 0.3.5", + "rustix 0.37.11", + "windows-sys 0.45.0", ] [[package]] @@ -3926,7 +4175,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8901a55b0a7a06ebc4a674dcca925170da8e613fa3b163a1df804ed10afb154d" dependencies = [ "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -3937,38 +4186,39 @@ checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" [[package]] name = "thiserror" -version = "1.0.38" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" +checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.38" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" +checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.15", ] [[package]] name = "thread_local" -version = "1.1.4" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" dependencies = [ + "cfg-if", "once_cell", ] [[package]] name = "time" -version = "0.3.17" +version = "0.3.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a561bf4617eebd33bca6434b988f39ed798e527f51a1e797d0ee4f61c0a38376" +checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890" dependencies = [ "itoa", "serde", @@ -3984,9 +4234,9 @@ checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" [[package]] name = "time-macros" -version = "0.2.6" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d967f99f534ca7e495c575c62638eebc2898a8c84c119b89e250477bc4ba16b2" +checksum = "fd80a657e71da814b8e5d60d3374fc6d35045062245d80224748ae522dd76f36" dependencies = [ "time-core", ] @@ -4012,9 +4262,9 @@ dependencies = [ [[package]] name = "tinyvec_macros" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tls-listener" @@ -4027,26 +4277,25 @@ dependencies = [ "pin-project-lite", "thiserror", "tokio", - "tokio-rustls", + "tokio-rustls 0.23.4", ] [[package]] name = "tokio" -version = "1.25.0" +version = "1.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e00990ebabbe4c14c08aca901caed183ecd5c09562a12c824bb53d3c3fd3af" +checksum = "d0de47a4eecbe11f498978a9b29d792f0d2692d1dd003650c24c76510e3bc001" dependencies = [ "autocfg", "bytes", "libc", - "memchr", "mio", "num_cpus", "pin-project-lite", "signal-hook-registry", - "socket2", + "socket2 0.4.9", "tokio-macros", - "windows-sys 0.42.0", + "windows-sys 0.45.0", ] [[package]] @@ -4061,13 +4310,13 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "1.8.2" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d266c00fde287f55d3f1c3e96c500c362a2b8c695076ec180f27918820bc6df8" +checksum = "61a573bdc87985e9d6ddeed1b3d864e8a302c847e40d647746df2f1de209d1ce" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.15", ] [[package]] @@ -4088,7 +4337,7 @@ dependencies = [ "pin-project-lite", "postgres-protocol", "postgres-types", - "socket2", + "socket2 0.4.9", "tokio", "tokio-util", ] @@ -4101,10 +4350,10 @@ checksum = "606f2b73660439474394432239c82249c0d45eb5f23d91f401be1e33590444a7" dependencies = [ "futures", "ring", - "rustls", + "rustls 0.20.8", "tokio", "tokio-postgres", - "tokio-rustls", + "tokio-rustls 0.23.4", ] [[package]] @@ -4113,16 +4362,26 @@ version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" dependencies = [ - "rustls", + "rustls 0.20.8", "tokio", "webpki", ] [[package]] -name = "tokio-stream" -version = "0.1.11" +name = "tokio-rustls" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d660770404473ccd7bc9f8b28494a811bc18542b915c0855c51e8f419d5223ce" +checksum = "e0d409377ff5b1e3ca6437aa86c1eb7d40c134bfec254e44c830defa92669db5" +dependencies = [ + "rustls 0.21.0", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fb52b74f05dbf495a8fba459fdc331812b96aa086d9eb78101fa0d4569c3313" dependencies = [ "futures-core", "pin-project-lite", @@ -4137,7 +4396,7 @@ dependencies = [ "filetime", "futures-core", "libc", - "redox_syscall", + "redox_syscall 0.2.16", "tokio", "tokio-stream", "xattr", @@ -4157,9 +4416,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.4" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bb2e075f03b3d66d8d8785356224ba688d2906a371015e225beeb65ca92c740" +checksum = "5427d89453009325de0d8f342c9490009f76e999cb7672d77e46267448f7e6b2" dependencies = [ "bytes", "futures-core", @@ -4171,33 +4430,36 @@ dependencies = [ [[package]] name = "toml" -version = "0.5.11" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +checksum = "b403acf6f2bb0859c93c7f0d967cb4a75a7ac552100f9322faf64dc047669b21" dependencies = [ "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", ] [[package]] name = "toml_datetime" -version = "0.5.1" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4553f467ac8e3d374bc9a177a26801e5d0f9b211aa1673fb137a403afd1c9cf5" +checksum = "3ab8ed2edee10b50132aed5f331333428b011c99402b5a534154ed15746f9622" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.17.1" +version = "0.19.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a34cc558345efd7e88b9eda9626df2138b80bb46a7606f695e751c892bc7dac6" +checksum = "239410c8609e8125456927e6707163a3b1fdb40561e4b803bc041f466ccfdc13" dependencies = [ "indexmap", - "itertools", - "nom8", "serde", + "serde_spanned", "toml_datetime", + "winnow", ] [[package]] @@ -4222,10 +4484,7 @@ dependencies = [ "pin-project", "prost", "prost-derive", - "rustls-native-certs", - "rustls-pemfile", "tokio", - "tokio-rustls", "tokio-stream", "tokio-util", "tower", @@ -4235,17 +4494,62 @@ dependencies = [ "tracing-futures", ] +[[package]] +name = "tonic" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38bd8e87955eb13c1986671838177d6792cdc52af9bffced0d2c8a9a7f741ab3" +dependencies = [ + "async-stream", + "async-trait", + "axum", + "base64 0.21.0", + "bytes", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-timeout", + "percent-encoding", + "pin-project", + "prost", + "rustls-native-certs", + "rustls-pemfile", + "tokio", + "tokio-rustls 0.24.0", + "tokio-stream", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "tonic-build" version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5bf5e9b9c0f7e0a7c027dcfaba7b2c60816c7049171f679d99ee2ff65d0de8c4" dependencies = [ - "prettyplease", + "prettyplease 0.1.25", "proc-macro2", "prost-build", "quote", - "syn", + "syn 1.0.109", +] + +[[package]] +name = "tonic-build" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f60a933bbea70c95d633c04c951197ddf084958abaa2ed502a3743bdd8d8dd7" +dependencies = [ + "prettyplease 0.1.25", + "proc-macro2", + "prost-build", + "quote", + "syn 1.0.109", ] [[package]] @@ -4268,25 +4572,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "tower-http" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f873044bf02dd1e8239e9c1293ea39dad76dc594ec16185d0a1bf31d8dc8d858" -dependencies = [ - "bitflags", - "bytes", - "futures-core", - "futures-util", - "http", - "http-body", - "http-range-header", - "pin-project-lite", - "tower", - "tower-layer", - "tower-service", -] - [[package]] name = "tower-layer" version = "0.3.2" @@ -4304,7 +4589,7 @@ name = "trace" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.1.4", + "clap 4.2.2", "pageserver_api", "utils", "workspace_hack", @@ -4331,7 +4616,7 @@ checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -4477,15 +4762,15 @@ dependencies = [ [[package]] name = "unicode-bidi" -version = "0.3.10" +version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54675592c1dbefd78cbd98db9bacd89886e1ca50692a0692baefffdeb92dd58" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" [[package]] name = "unicode-ident" -version = "1.0.6" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" +checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" [[package]] name = "unicode-normalization" @@ -4523,10 +4808,10 @@ dependencies = [ "base64 0.13.1", "log", "once_cell", - "rustls", + "rustls 0.20.8", "url", "webpki", - "webpki-roots", + "webpki-roots 0.22.6", ] [[package]] @@ -4553,6 +4838,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + [[package]] name = "utils" version = "0.1.0" @@ -4596,9 +4887,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.3.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1674845326ee10d37ca60470760d4288a6f80f304007d92e5c53bab78c9cfd79" +checksum = "5b55a3fef2a1e3b3a00ce878640918820d3c51081576ac657d23af9fc7928fdb" dependencies = [ "getrandom", "serde", @@ -4616,12 +4907,18 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + [[package]] name = "wal_craft" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.1.4", + "clap 4.2.2", "env_logger", "log", "once_cell", @@ -4633,12 +4930,11 @@ dependencies = [ [[package]] name = "walkdir" -version = "2.3.2" +version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" dependencies = [ "same-file", - "winapi", "winapi-util", ] @@ -4679,7 +4975,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 1.0.109", "wasm-bindgen-shared", ] @@ -4713,7 +5009,7 @@ checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4753,6 +5049,15 @@ dependencies = [ "webpki", ] +[[package]] +name = "webpki-roots" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa54963694b65584e170cf5dc46aeb4dcaa5584e652ff5f3952e56d66aff0125" +dependencies = [ + "rustls-webpki", +] + [[package]] name = "which" version = "4.4.0" @@ -4795,19 +5100,28 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets 0.48.0", +] + [[package]] name = "windows-sys" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", ] [[package]] @@ -4816,65 +5130,140 @@ version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" dependencies = [ - "windows-targets", + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.0", ] [[package]] name = "windows-targets" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-targets" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" +dependencies = [ + "windows_aarch64_gnullvm 0.48.0", + "windows_aarch64_msvc 0.48.0", + "windows_i686_gnu 0.48.0", + "windows_i686_msvc 0.48.0", + "windows_x86_64_gnu 0.48.0", + "windows_x86_64_gnullvm 0.48.0", + "windows_x86_64_msvc 0.48.0", ] [[package]] name = "windows_aarch64_gnullvm" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" [[package]] name = "windows_aarch64_msvc" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" [[package]] name = "windows_i686_gnu" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" [[package]] name = "windows_i686_msvc" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" [[package]] name = "windows_x86_64_gnu" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" [[package]] name = "windows_x86_64_gnullvm" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" [[package]] name = "windows_x86_64_msvc" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" + +[[package]] +name = "winnow" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae8970b36c66498d8ff1d66685dc86b91b29db0c7739899012f63a63814b4b28" +dependencies = [ + "memchr", +] [[package]] name = "winreg" @@ -4893,7 +5282,8 @@ dependencies = [ "byteorder", "bytes", "chrono", - "clap 4.1.4", + "clap 4.2.2", + "clap_builder", "crossbeam-utils", "digest", "either", @@ -4905,7 +5295,6 @@ dependencies = [ "futures-sink", "futures-util", "hashbrown 0.12.3", - "indexmap", "itertools", "libc", "log", @@ -4920,16 +5309,18 @@ dependencies = [ "regex-syntax", "reqwest", "ring", - "rustls", + "rustls 0.20.8", "scopeguard", "serde", "serde_json", - "socket2", - "syn", + "socket2 0.4.9", + "syn 1.0.109", + "syn 2.0.15", "tokio", - "tokio-rustls", + "tokio-rustls 0.23.4", "tokio-util", - "tonic", + "toml_datetime", + "toml_edit", "tower", "tracing", "tracing-core", @@ -4939,12 +5330,11 @@ dependencies = [ [[package]] name = "x509-parser" -version = "0.14.0" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0ecbeb7b67ce215e40e3cc7f2ff902f94a223acf44995934763467e7b1febc8" +checksum = "bab0c2f54ae1d92f4fcb99c0b7ccf0b1e3451cbd395e5f115ccbdbcb18d4f634" dependencies = [ "asn1-rs", - "base64 0.13.1", "data-encoding", "der-parser", "lazy_static", @@ -4972,15 +5362,15 @@ checksum = "4d25c75bf9ea12c4040a97f829154768bbbce366287e2dc044af160cd79a13fd" [[package]] name = "yasna" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aed2e7a52e3744ab4d0c05c20aa065258e84c49fd4226f5191b2ed29712710b4" +checksum = "e17bb3549cc1321ae1296b9cdc2698e2b6cb1992adfa19a8c72e5b7a738f44cd" dependencies = [ "time", ] [[package]] name = "zeroize" -version = "1.5.7" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c394b5bd0c6f669e7275d9c20aa90ae064cb22e75a1cad54e1b34088034b149f" +checksum = "2a0956f1ba7c7909bfb66c2e9e4124ab6f6482560f6628b5aaeba39207c9aad9" diff --git a/Cargo.toml b/Cargo.toml index 679605dc1d..0b545e6190 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,10 +24,10 @@ atty = "0.2.14" aws-config = { version = "0.51.0", default-features = false, features=["rustls"] } aws-sdk-s3 = "0.21.0" aws-smithy-http = "0.51.0" -aws-types = "0.51.0" +aws-types = "0.55" base64 = "0.13.0" bincode = "1.3" -bindgen = "0.61" +bindgen = "0.65" bstr = "1.0" byteorder = "1.4" bytes = "1.0" @@ -50,7 +50,7 @@ git-version = "0.3" hashbrown = "0.13" hashlink = "0.8.1" hex = "0.4" -hex-literal = "0.3" +hex-literal = "0.4" hmac = "0.12.1" hostname = "0.3.1" humantime = "2.1" @@ -80,18 +80,18 @@ reqwest = { version = "0.11", default-features = false, features = ["rustls-tls" reqwest-tracing = { version = "0.4.0", features = ["opentelemetry_0_18"] } reqwest-middleware = "0.2.0" routerify = "3" -rpds = "0.12.0" +rpds = "0.13" rustls = "0.20" rustls-pemfile = "1" rustls-split = "0.3" scopeguard = "1.1" -sentry = { version = "0.29", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] } +sentry = { version = "0.30", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] } serde = { version = "1.0", features = ["derive"] } serde_json = "1" serde_with = "2.0" sha2 = "0.10.2" signal-hook = "0.3" -socket2 = "0.4.4" +socket2 = "0.5" strum = "0.24" strum_macros = "0.24" svg_fmt = "0.4.1" @@ -106,17 +106,17 @@ tokio-postgres-rustls = "0.9.0" tokio-rustls = "0.23" tokio-stream = "0.1" tokio-util = { version = "0.7", features = ["io"] } -toml = "0.5" -toml_edit = { version = "0.17", features = ["easy"] } -tonic = {version = "0.8", features = ["tls", "tls-roots"]} +toml = "0.7" +toml_edit = "0.19" +tonic = {version = "0.9", features = ["tls", "tls-roots"]} tracing = "0.1" tracing-opentelemetry = "0.18.0" tracing-subscriber = { version = "0.3", features = ["env-filter"] } url = "2.2" uuid = { version = "1.2", features = ["v4", "serde"] } walkdir = "2.3.2" -webpki-roots = "0.22.5" -x509-parser = "0.14" +webpki-roots = "0.23" +x509-parser = "0.15" ## TODO replace this with tracing env_logger = "0.10" @@ -154,9 +154,9 @@ workspace_hack = { version = "0.1", path = "./workspace_hack/" } ## Build dependencies criterion = "0.4" rcgen = "0.10" -rstest = "0.16" +rstest = "0.17" tempfile = "3.4" -tonic-build = "0.8" +tonic-build = "0.9" # This is only needed for proxy's tests. # TODO: we should probably fork `tokio-postgres-rustls` instead. diff --git a/libs/consumption_metrics/Cargo.toml b/libs/consumption_metrics/Cargo.toml index f26aa2fbc5..3f290821c2 100644 --- a/libs/consumption_metrics/Cargo.toml +++ b/libs/consumption_metrics/Cargo.toml @@ -4,13 +4,12 @@ version = "0.1.0" edition = "2021" license = "Apache-2.0" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] -anyhow = "1.0.68" -chrono = { version = "0.4", default-features = false, features = ["clock", "serde"] } -rand = "0.8.3" -serde = "1.0.152" -serde_with = "2.1.0" -utils = { version = "0.1.0", path = "../utils" } -workspace_hack = { version = "0.1.0", path = "../../workspace_hack" } +anyhow.workspace = true +chrono.workspace = true +rand.workspace = true +serde.workspace = true +serde_with.workspace = true +utils.workspace = true + +workspace_hack.workspace = true diff --git a/libs/postgres_ffi/build.rs b/libs/postgres_ffi/build.rs index 66221af522..f7e39751ef 100644 --- a/libs/postgres_ffi/build.rs +++ b/libs/postgres_ffi/build.rs @@ -5,7 +5,7 @@ use std::path::PathBuf; use std::process::Command; use anyhow::{anyhow, Context}; -use bindgen::callbacks::ParseCallbacks; +use bindgen::callbacks::{DeriveInfo, ParseCallbacks}; #[derive(Debug)] struct PostgresFfiCallbacks; @@ -20,7 +20,7 @@ impl ParseCallbacks for PostgresFfiCallbacks { // Add any custom #[derive] attributes to the data structures that bindgen // creates. - fn add_derives(&self, name: &str) -> Vec { + fn add_derives(&self, derive_info: &DeriveInfo) -> Vec { // This is the list of data structures that we want to serialize/deserialize. let serde_list = [ "XLogRecord", @@ -31,7 +31,7 @@ impl ParseCallbacks for PostgresFfiCallbacks { "ControlFileData", ]; - if serde_list.contains(&name) { + if serde_list.contains(&derive_info.name) { vec![ "Default".into(), // Default allows us to easily fill the padding fields with 0. "Serialize".into(), diff --git a/libs/remote_storage/tests/pagination_tests.rs b/libs/remote_storage/tests/pagination_tests.rs index eb52409c44..048e99d841 100644 --- a/libs/remote_storage/tests/pagination_tests.rs +++ b/libs/remote_storage/tests/pagination_tests.rs @@ -204,12 +204,7 @@ async fn upload_s3_data( let data = format!("remote blob data {i}").into_bytes(); let data_len = data.len(); task_client - .upload( - Box::new(std::io::Cursor::new(data)), - data_len, - &blob_path, - None, - ) + .upload(std::io::Cursor::new(data), data_len, &blob_path, None) .await?; Ok::<_, anyhow::Error>((blob_prefix, blob_path)) diff --git a/libs/tracing-utils/Cargo.toml b/libs/tracing-utils/Cargo.toml index 8c3d3f9063..b285c9b5b0 100644 --- a/libs/tracing-utils/Cargo.toml +++ b/libs/tracing-utils/Cargo.toml @@ -14,4 +14,5 @@ tokio = { workspace = true, features = ["rt", "rt-multi-thread"] } tracing.workspace = true tracing-opentelemetry.workspace = true tracing-subscriber.workspace = true -workspace_hack = { version = "0.1", path = "../../workspace_hack" } + +workspace_hack.workspace = true diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml index 391bc52a80..dc6326e73e 100644 --- a/libs/utils/Cargo.toml +++ b/libs/utils/Cargo.toml @@ -33,7 +33,7 @@ serde_with.workspace = true strum.workspace = true strum_macros.workspace = true url.workspace = true -uuid = { version = "1.2", features = ["v4", "serde"] } +uuid.workspace = true metrics.workspace = true workspace_hack.workspace = true diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 826cf1aab3..9e341230cf 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -6,6 +6,7 @@ use anyhow::{anyhow, bail, ensure, Context, Result}; use remote_storage::{RemotePath, RemoteStorageConfig}; +use serde::de::IntoDeserializer; use std::env; use storage_broker::Uri; use utils::crashsafe::path_with_suffix_extension; @@ -704,8 +705,9 @@ impl PageServerConf { "disk_usage_based_eviction" => { tracing::info!("disk_usage_based_eviction: {:#?}", &item); builder.disk_usage_based_eviction( - toml_edit::de::from_item(item.clone()) - .context("parse disk_usage_based_eviction")?) + deserialize_from_item("disk_usage_based_eviction", item) + .context("parse disk_usage_based_eviction")? + ) }, "ondemand_download_behavior_treat_error_as_warn" => builder.ondemand_download_behavior_treat_error_as_warn(parse_toml_bool(key, item)?), _ => bail!("unrecognized pageserver option '{key}'"), @@ -806,14 +808,14 @@ impl PageServerConf { if let Some(eviction_policy) = item.get("eviction_policy") { t_conf.eviction_policy = Some( - toml_edit::de::from_item(eviction_policy.clone()) + deserialize_from_item("eviction_policy", eviction_policy) .context("parse eviction_policy")?, ); } if let Some(item) = item.get("min_resident_size_override") { t_conf.min_resident_size_override = Some( - toml_edit::de::from_item(item.clone()) + deserialize_from_item("min_resident_size_override", item) .context("parse min_resident_size_override")?, ); } @@ -920,6 +922,18 @@ where }) } +fn deserialize_from_item(name: &str, item: &Item) -> anyhow::Result +where + T: serde::de::DeserializeOwned, +{ + // ValueDeserializer::new is not public, so use the ValueDeserializer's documented way + let deserializer = match item.clone().into_value() { + Ok(value) => value.into_deserializer(), + Err(item) => anyhow::bail!("toml_edit::Item '{item}' is not a toml_edit::Value"), + }; + T::deserialize(deserializer).with_context(|| format!("deserializing item for node {name}")) +} + /// Configurable semaphore permits setting. /// /// Does not allow semaphore permits to be zero, because at runtime initially zero permits and empty @@ -986,9 +1000,10 @@ mod tests { use remote_storage::{RemoteStorageKind, S3Config}; use tempfile::{tempdir, TempDir}; + use utils::serde_percent::Percent; use super::*; - use crate::DEFAULT_PG_VERSION; + use crate::{tenant::config::EvictionPolicy, DEFAULT_PG_VERSION}; const ALL_BASE_VALUES_TOML: &str = r#" # Initial configuration file created by 'pageserver --init' @@ -1286,6 +1301,71 @@ trace_read_requests = {trace_read_requests}"#, Ok(()) } + #[test] + fn eviction_pageserver_config_parse() -> anyhow::Result<()> { + let tempdir = tempdir()?; + let (workdir, pg_distrib_dir) = prepare_fs(&tempdir)?; + + let pageserver_conf_toml = format!( + r#"pg_distrib_dir = "{}" +metric_collection_endpoint = "http://sample.url" +metric_collection_interval = "10min" +id = 222 + +[disk_usage_based_eviction] +max_usage_pct = 80 +min_avail_bytes = 0 +period = "10s" + +[tenant_config] +evictions_low_residence_duration_metric_threshold = "20m" + +[tenant_config.eviction_policy] +kind = "LayerAccessThreshold" +period = "20m" +threshold = "20m" +"#, + pg_distrib_dir.display(), + ); + let toml: Document = pageserver_conf_toml.parse()?; + let conf = PageServerConf::parse_and_validate(&toml, &workdir)?; + + assert_eq!(conf.pg_distrib_dir, pg_distrib_dir); + assert_eq!( + conf.metric_collection_endpoint, + Some("http://sample.url".parse().unwrap()) + ); + assert_eq!( + conf.metric_collection_interval, + Duration::from_secs(10 * 60) + ); + assert_eq!( + conf.default_tenant_conf + .evictions_low_residence_duration_metric_threshold, + Duration::from_secs(20 * 60) + ); + assert_eq!(conf.id, NodeId(222)); + assert_eq!( + conf.disk_usage_based_eviction, + Some(DiskUsageEvictionTaskConfig { + max_usage_pct: Percent::new(80).unwrap(), + min_avail_bytes: 0, + period: Duration::from_secs(10), + #[cfg(feature = "testing")] + mock_statvfs: None, + }) + ); + match &conf.default_tenant_conf.eviction_policy { + EvictionPolicy::NoEviction => panic!("Unexpected eviction opolicy tenant settings"), + EvictionPolicy::LayerAccessThreshold(eviction_thresold) => { + assert_eq!(eviction_thresold.period, Duration::from_secs(20 * 60)); + assert_eq!(eviction_thresold.threshold, Duration::from_secs(20 * 60)); + } + } + + Ok(()) + } + fn prepare_fs(tempdir: &TempDir) -> anyhow::Result<(PathBuf, PathBuf)> { let tempdir_path = tempdir.path(); diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index c0e4a2a9cf..bd38a7a2f3 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -65,7 +65,7 @@ fn copyin_stream(pgb: &mut PostgresBackendTCP) -> impl Stream { // We were requested to shut down. - let msg = format!("pageserver is shutting down"); + let msg = "pageserver is shutting down".to_string(); let _ = pgb.write_message_noflush(&BeMessage::ErrorResponse(&msg, None)); Err(QueryError::Other(anyhow::anyhow!(msg))) } diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 18a4d7617b..11415b47c4 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -1894,7 +1894,7 @@ impl Tenant { .to_string(); // Convert the config to a toml file. - conf_content += &toml_edit::easy::to_string(&tenant_conf)?; + conf_content += &toml_edit::ser::to_string(&tenant_conf)?; let mut target_config_file = VirtualFile::open_with_options( target_config_path, diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs index c01a8aa8c0..34f57840fb 100644 --- a/pageserver/src/tenant/config.rs +++ b/pageserver/src/tenant/config.rs @@ -291,9 +291,9 @@ mod tests { ..TenantConfOpt::default() }; - let toml_form = toml_edit::easy::to_string(&small_conf).unwrap(); + let toml_form = toml_edit::ser::to_string(&small_conf).unwrap(); assert_eq!(toml_form, "gc_horizon = 42\n"); - assert_eq!(small_conf, toml_edit::easy::from_str(&toml_form).unwrap()); + assert_eq!(small_conf, toml_edit::de::from_str(&toml_form).unwrap()); let json_form = serde_json::to_string(&small_conf).unwrap(); assert_eq!(json_form, "{\"gc_horizon\":42}"); diff --git a/pageserver/src/tenant/remote_timeline_client/upload.rs b/pageserver/src/tenant/remote_timeline_client/upload.rs index ce9f4d9bf8..699121ccd9 100644 --- a/pageserver/src/tenant/remote_timeline_client/upload.rs +++ b/pageserver/src/tenant/remote_timeline_client/upload.rs @@ -74,7 +74,7 @@ pub(super) async fn upload_timeline_layer<'a>( })?; storage - .upload(Box::new(source_file), fs_size, &storage_path, None) + .upload(source_file, fs_size, &storage_path, None) .await .with_context(|| { format!( diff --git a/storage_broker/src/bin/storage_broker.rs b/storage_broker/src/bin/storage_broker.rs index d7ace28426..de7b634ba0 100644 --- a/storage_broker/src/bin/storage_broker.rs +++ b/storage_broker/src/bin/storage_broker.rs @@ -23,7 +23,6 @@ use std::convert::Infallible; use std::net::SocketAddr; use std::pin::Pin; use std::sync::Arc; -use std::task::Poll; use std::time::Duration; use tokio::sync::broadcast; use tokio::sync::broadcast::error::RecvError; @@ -374,7 +373,7 @@ impl BrokerService for Broker { Ok(info) => yield info, Err(RecvError::Lagged(skipped_msg)) => { missed_msgs += skipped_msg; - if let Poll::Ready(_) = futures::poll!(Box::pin(warn_interval.tick())) { + if (futures::poll!(Box::pin(warn_interval.tick()))).is_ready() { warn!("subscription id={}, key={:?} addr={:?} dropped {} messages, channel is full", subscriber.id, subscriber.key, subscriber.remote_addr, missed_msgs); missed_msgs = 0; diff --git a/trace/Cargo.toml b/trace/Cargo.toml index 6ced992d4c..d6eed3f49c 100644 --- a/trace/Cargo.toml +++ b/trace/Cargo.toml @@ -4,8 +4,6 @@ version = "0.1.0" edition.workspace = true license.workspace = true -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] clap.workspace = true anyhow.workspace = true diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml index f885f4a94d..f735ffed4c 100644 --- a/workspace_hack/Cargo.toml +++ b/workspace_hack/Cargo.toml @@ -18,6 +18,7 @@ byteorder = { version = "1" } bytes = { version = "1", features = ["serde"] } chrono = { version = "0.4", default-features = false, features = ["clock", "serde"] } clap = { version = "4", features = ["derive", "string"] } +clap_builder = { version = "4", default-features = false, features = ["color", "help", "std", "string", "suggestions", "usage"] } crossbeam-utils = { version = "0.8" } digest = { version = "0.10", features = ["mac", "std"] } either = { version = "1" } @@ -29,7 +30,6 @@ futures-executor = { version = "0.3" } futures-sink = { version = "0.3" } futures-util = { version = "0.3", features = ["channel", "io", "sink"] } hashbrown = { version = "0.12", features = ["raw"] } -indexmap = { version = "1", default-features = false, features = ["std"] } itertools = { version = "0.10" } libc = { version = "0.2", features = ["extra_traits"] } log = { version = "0.4", default-features = false, features = ["std"] } @@ -52,7 +52,8 @@ socket2 = { version = "0.4", default-features = false, features = ["all"] } tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "net", "process", "rt-multi-thread", "signal", "sync", "time"] } tokio-rustls = { version = "0.23" } tokio-util = { version = "0.7", features = ["codec", "io"] } -tonic = { version = "0.8", features = ["tls-roots"] } +toml_datetime = { version = "0.6", default-features = false, features = ["serde"] } +toml_edit = { version = "0.19", features = ["serde"] } tower = { version = "0.4", features = ["balance", "buffer", "limit", "retry", "timeout", "util"] } tracing = { version = "0.1", features = ["log"] } tracing-core = { version = "0.1" } @@ -64,7 +65,6 @@ anyhow = { version = "1", features = ["backtrace"] } bytes = { version = "1", features = ["serde"] } either = { version = "1" } hashbrown = { version = "0.12", features = ["raw"] } -indexmap = { version = "1", default-features = false, features = ["std"] } itertools = { version = "0.10" } libc = { version = "0.2", features = ["extra_traits"] } log = { version = "0.4", default-features = false, features = ["std"] } @@ -74,6 +74,7 @@ prost = { version = "0.11" } regex = { version = "1" } regex-syntax = { version = "0.6" } serde = { version = "1", features = ["alloc", "derive"] } -syn = { version = "1", features = ["extra-traits", "full", "visit", "visit-mut"] } +syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-traits", "full", "visit", "visit-mut"] } +syn-f595c2ba2a3f28df = { package = "syn", version = "2", features = ["extra-traits", "full", "visit-mut"] } ### END HAKARI SECTION From c2496c7ef261150d5c79c46c5846a83e78d3e226 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Fri, 14 Apr 2023 12:22:43 -0400 Subject: [PATCH 33/77] Added black_box in layer_map benches (fix #3396) --- pageserver/benches/bench_layer_map.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pageserver/benches/bench_layer_map.rs b/pageserver/benches/bench_layer_map.rs index 4882fc518f..8f139a6596 100644 --- a/pageserver/benches/bench_layer_map.rs +++ b/pageserver/benches/bench_layer_map.rs @@ -13,7 +13,7 @@ use std::time::Instant; use utils::lsn::Lsn; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; fn build_layer_map(filename_dump: PathBuf) -> LayerMap { let mut layer_map = LayerMap::::default(); @@ -114,7 +114,7 @@ fn bench_from_captest_env(c: &mut Criterion) { c.bench_function("captest_uniform_queries", |b| { b.iter(|| { for q in queries.clone().into_iter() { - layer_map.search(q.0, q.1); + black_box(layer_map.search(q.0, q.1)); } }); }); @@ -122,11 +122,11 @@ fn bench_from_captest_env(c: &mut Criterion) { // test with a key that corresponds to the RelDir entry. See pgdatadir_mapping.rs. c.bench_function("captest_rel_dir_query", |b| { b.iter(|| { - let result = layer_map.search( + let result = black_box(layer_map.search( Key::from_hex("000000067F00008000000000000000000001").unwrap(), // This LSN is higher than any of the LSNs in the tree Lsn::from_str("D0/80208AE1").unwrap(), - ); + )); result.unwrap(); }); }); @@ -183,7 +183,7 @@ fn bench_from_real_project(c: &mut Criterion) { group.bench_function("uniform_queries", |b| { b.iter(|| { for q in queries.clone().into_iter() { - layer_map.search(q.0, q.1); + black_box(layer_map.search(q.0, q.1)); } }); }); @@ -232,7 +232,7 @@ fn bench_sequential(c: &mut Criterion) { group.bench_function("uniform_queries", |b| { b.iter(|| { for q in queries.clone().into_iter() { - layer_map.search(q.0, q.1); + black_box(layer_map.search(q.0, q.1)); } }); }); From 73f34eaa5e3632f978a19e3db85e555124920651 Mon Sep 17 00:00:00 2001 From: Arthur Petukhovsky Date: Mon, 17 Apr 2023 11:24:57 +0300 Subject: [PATCH 34/77] Send AppendResponse keepalive once per second (#4036) Walproposer sends AppendRequest at least once per second. This patch adds a response to these requests once per second. Fixes https://github.com/neondatabase/neon/issues/4017 --- safekeeper/src/receive_wal.rs | 42 +++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/safekeeper/src/receive_wal.rs b/safekeeper/src/receive_wal.rs index 61e4c5f0fa..195470e3ca 100644 --- a/safekeeper/src/receive_wal.rs +++ b/safekeeper/src/receive_wal.rs @@ -27,6 +27,8 @@ use tokio::sync::mpsc::error::TryRecvError; use tokio::sync::mpsc::Receiver; use tokio::sync::mpsc::Sender; use tokio::task::spawn_blocking; +use tokio::time::Duration; +use tokio::time::Instant; use tracing::*; use utils::id::TenantTimelineId; use utils::lsn::Lsn; @@ -206,6 +208,10 @@ async fn network_write( } } +// Send keepalive messages to walproposer, to make sure it receives updates +// even when it writes a steady stream of messages. +const KEEPALIVE_INTERVAL: Duration = Duration::from_secs(1); + /// Takes messages from msg_rx, processes and pushes replies to reply_tx. struct WalAcceptor { tli: Arc, @@ -253,18 +259,25 @@ impl WalAcceptor { timeline: Arc::clone(&self.tli), }; - let mut next_msg: ProposerAcceptorMessage; + // After this timestamp we will stop processing AppendRequests and send a response + // to the walproposer. walproposer sends at least one AppendRequest per second, + // we will send keepalives by replying to these requests once per second. + let mut next_keepalive = Instant::now(); loop { let opt_msg = self.msg_rx.recv().await; if opt_msg.is_none() { return Ok(()); // chan closed, streaming terminated } - next_msg = opt_msg.unwrap(); + let mut next_msg = opt_msg.unwrap(); - if matches!(next_msg, ProposerAcceptorMessage::AppendRequest(_)) { + let reply_msg = if matches!(next_msg, ProposerAcceptorMessage::AppendRequest(_)) { // loop through AppendRequest's while it's readily available to // write as many WAL as possible without fsyncing + // + // Note: this will need to be rewritten if we want to read non-AppendRequest messages here. + // Otherwise, we might end up in a situation where we read a message, but don't + // process it. while let ProposerAcceptorMessage::AppendRequest(append_request) = next_msg { let noflush_msg = ProposerAcceptorMessage::NoFlushAppendRequest(append_request); @@ -274,6 +287,11 @@ impl WalAcceptor { } } + // get out of this loop if keepalive time is reached + if Instant::now() >= next_keepalive { + break; + } + match self.msg_rx.try_recv() { Ok(msg) => next_msg = msg, Err(TryRecvError::Empty) => break, @@ -282,18 +300,18 @@ impl WalAcceptor { } // flush all written WAL to the disk - if let Some(reply) = self.tli.process_msg(&ProposerAcceptorMessage::FlushWAL)? { - if self.reply_tx.send(reply).await.is_err() { - return Ok(()); // chan closed, streaming terminated - } - } + self.tli.process_msg(&ProposerAcceptorMessage::FlushWAL)? } else { // process message other than AppendRequest - if let Some(reply) = self.tli.process_msg(&next_msg)? { - if self.reply_tx.send(reply).await.is_err() { - return Ok(()); // chan closed, streaming terminated - } + self.tli.process_msg(&next_msg)? + }; + + if let Some(reply) = reply_msg { + if self.reply_tx.send(reply).await.is_err() { + return Ok(()); // chan closed, streaming terminated } + // reset keepalive time + next_keepalive = Instant::now() + KEEPALIVE_INTERVAL; } } } From d8dd60dc811eade6fbf89b4416f1860f0000fb3d Mon Sep 17 00:00:00 2001 From: fcdm <128653800+fcdm@users.noreply.github.com> Date: Mon, 17 Apr 2023 10:58:53 +0100 Subject: [PATCH 35/77] Add helm values for us-east-1 --- ...prod-us-east-1-theta.neon-proxy-scram.yaml | 69 +++++++++++++++++++ ...d-us-east-1-theta.neon-storage-broker.yaml | 52 ++++++++++++++ 2 files changed, 121 insertions(+) create mode 100644 .github/helm-values/prod-us-east-1-theta.neon-proxy-scram.yaml create mode 100644 .github/helm-values/prod-us-east-1-theta.neon-storage-broker.yaml diff --git a/.github/helm-values/prod-us-east-1-theta.neon-proxy-scram.yaml b/.github/helm-values/prod-us-east-1-theta.neon-proxy-scram.yaml new file mode 100644 index 0000000000..f113d1f861 --- /dev/null +++ b/.github/helm-values/prod-us-east-1-theta.neon-proxy-scram.yaml @@ -0,0 +1,69 @@ +# Helm chart values for neon-proxy-scram. +# This is a YAML-formatted file. + +deploymentStrategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 100% + maxUnavailable: 50% + +# Delay the kill signal by 5 minutes (5 * 60) +# The pod(s) will stay in Terminating, keeps the existing connections +# but doesn't receive new ones +containerLifecycle: + preStop: + exec: + command: ["/bin/sh", "-c", "sleep 300"] +terminationGracePeriodSeconds: 604800 + +image: + repository: neondatabase/neon + +settings: + authBackend: "console" + authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2" + domain: "*.us-east-1.aws.neon.tech" + # These domains haven't been delegated yet. + # extraDomains: ["*.us-east-1.retooldb.com", "*.us-east-1.postgres.vercel-storage.com"] + sentryEnvironment: "production" + wssPort: 8443 + metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events" + metricCollectionInterval: "10min" + +podLabels: + neon_service: proxy-scram + neon_env: prod + neon_region: us-east-1 + +exposedService: + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: external + service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip + service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing + external-dns.alpha.kubernetes.io/hostname: us-east-1.aws.neon.tech + httpsPort: 443 + +extraManifests: + - apiVersion: operator.victoriametrics.com/v1beta1 + kind: VMServiceScrape + metadata: + name: "{{ include \"neon-proxy.fullname\" . }}" + labels: + helm.sh/chart: neon-proxy-{{ .Chart.Version }} + app.kubernetes.io/name: neon-proxy + app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}" + app.kubernetes.io/version: "{{ .Chart.AppVersion }}" + app.kubernetes.io/managed-by: Helm + namespace: "{{ .Release.Namespace }}" + spec: + selector: + matchLabels: + app.kubernetes.io/name: "neon-proxy" + endpoints: + - port: http + path: /metrics + interval: 10s + scrapeTimeout: 10s + namespaceSelector: + matchNames: + - "{{ .Release.Namespace }}" diff --git a/.github/helm-values/prod-us-east-1-theta.neon-storage-broker.yaml b/.github/helm-values/prod-us-east-1-theta.neon-storage-broker.yaml new file mode 100644 index 0000000000..7c16911b5e --- /dev/null +++ b/.github/helm-values/prod-us-east-1-theta.neon-storage-broker.yaml @@ -0,0 +1,52 @@ +# Helm chart values for neon-storage-broker +podLabels: + neon_env: production + neon_service: storage-broker + +# Use L4 LB +service: + # service.annotations -- Annotations to add to the service + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller + service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip + service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet + # assign service to this name at external-dns + external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.theta.us-east-1.internal.aws.neon.tech + # service.type -- Service type + type: LoadBalancer + # service.port -- broker listen port + port: 50051 + +ingress: + enabled: false + +metrics: + enabled: false + +extraManifests: + - apiVersion: operator.victoriametrics.com/v1beta1 + kind: VMServiceScrape + metadata: + name: "{{ include \"neon-storage-broker.fullname\" . }}" + labels: + helm.sh/chart: neon-storage-broker-{{ .Chart.Version }} + app.kubernetes.io/name: neon-storage-broker + app.kubernetes.io/instance: neon-storage-broker + app.kubernetes.io/version: "{{ .Chart.AppVersion }}" + app.kubernetes.io/managed-by: Helm + namespace: "{{ .Release.Namespace }}" + spec: + selector: + matchLabels: + app.kubernetes.io/name: "neon-storage-broker" + endpoints: + - port: broker + path: /metrics + interval: 10s + scrapeTimeout: 10s + namespaceSelector: + matchNames: + - "{{ .Release.Namespace }}" + +settings: + sentryEnvironment: "production" From 0c083564ce7f526d7950be757d2d0c6f84afd096 Mon Sep 17 00:00:00 2001 From: Cihan Demirci <128653800+fcdm@users.noreply.github.com> Date: Mon, 17 Apr 2023 13:25:27 +0100 Subject: [PATCH 36/77] Add us-east-1 hosts file and update regions (#4042) ## Describe your changes ## Issue ticket number and link ## Checklist before requesting a review - [x] I have performed a self-review of my code. - [ ] If it is a core feature, I have added thorough tests. - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard? - [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section. ## Checklist before merging - [ ] Do not forget to reformat commit message to not include the above checklist --- .github/ansible/prod.us-east-1.hosts.yaml | 50 +++++++++++++++++++++++ .github/workflows/deploy-prod.yml | 6 ++- 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 .github/ansible/prod.us-east-1.hosts.yaml diff --git a/.github/ansible/prod.us-east-1.hosts.yaml b/.github/ansible/prod.us-east-1.hosts.yaml new file mode 100644 index 0000000000..fcf472432b --- /dev/null +++ b/.github/ansible/prod.us-east-1.hosts.yaml @@ -0,0 +1,50 @@ +storage: + vars: + bucket_name: neon-prod-storage-us-east-1 + bucket_region: us-east-1 + console_mgmt_base_url: http://neon-internal-api.aws.neon.tech + broker_endpoint: http://storage-broker-lb.theta.us-east-1.internal.aws.neon.tech:50051 + pageserver_config_stub: + pg_distrib_dir: /usr/local + metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events + metric_collection_interval: 10min + disk_usage_based_eviction: + max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80 + min_avail_bytes: 0 + period: "10s" + tenant_config: + eviction_policy: + kind: "LayerAccessThreshold" + period: "10m" + threshold: &default_eviction_threshold "24h" + evictions_low_residence_duration_metric_threshold: *default_eviction_threshold + remote_storage: + bucket_name: "{{ bucket_name }}" + bucket_region: "{{ bucket_region }}" + prefix_in_bucket: "pageserver/v1" + safekeeper_s3_prefix: safekeeper/v1/wal + hostname_suffix: "" + remote_user: ssm-user + ansible_aws_ssm_region: us-east-1 + ansible_aws_ssm_bucket_name: neon-prod-storage-us-east-1 + console_region_id: aws-us-east-1 + sentry_environment: production + + children: + pageservers: + hosts: + pageserver-0.us-east-1.aws.neon.tech: + ansible_host: i-0f58137883429f55a + pageserver-1.us-east-1.aws.neon.tech: + ansible_host: i-08e7ee6190a099019 + pageserver-2.us-east-1.aws.neon.tech: + ansible_host: i-0686a4e5e208e31a1 + + safekeepers: + hosts: + safekeeper-0.us-east-1.aws.neon.tech: + ansible_host: i-04ce739e88793d864 + safekeeper-1.us-east-1.aws.neon.tech: + ansible_host: i-0e9e6c9227fb81410 + safekeeper-2.us-east-1.aws.neon.tech: + ansible_host: i-072f4dd86a327d52f diff --git a/.github/workflows/deploy-prod.yml b/.github/workflows/deploy-prod.yml index 6096ac8ab9..92c7eb2492 100644 --- a/.github/workflows/deploy-prod.yml +++ b/.github/workflows/deploy-prod.yml @@ -49,7 +49,7 @@ jobs: shell: bash strategy: matrix: - target_region: [ us-east-2, us-west-2, eu-central-1, ap-southeast-1 ] + target_region: [ us-east-2, us-west-2, eu-central-1, ap-southeast-1, us-east-1 ] environment: name: prod-${{ matrix.target_region }} steps: @@ -97,6 +97,10 @@ jobs: target_cluster: prod-ap-southeast-1-epsilon deploy_link_proxy: false deploy_legacy_scram_proxy: false + - target_region: us-east-1 + target_cluster: prod-us-east-1-theta + deploy_link_proxy: false + deploy_legacy_scram_proxy: false environment: name: prod-${{ matrix.target_region }} steps: From e2a5177e8915db126ae5d033dd83bfb1c7458fc4 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Tue, 18 Apr 2023 16:04:10 +0300 Subject: [PATCH 37/77] Bump h2 from 0.3.17 to 0.3.18 (#4045) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a18f4490da..ce24bbcee8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1756,9 +1756,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.17" +version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66b91535aa35fea1523ad1b86cb6b53c28e0ae566ba4a460f4457e936cad7c6f" +checksum = "17f8a914c2987b688368b5138aa05321db91f4090cf26118185672ad588bce21" dependencies = [ "bytes", "fnv", From f1b7dc40649a044cb614a08de57258fba73d6aa4 Mon Sep 17 00:00:00 2001 From: fcdm <128653800+fcdm@users.noreply.github.com> Date: Tue, 18 Apr 2023 13:25:27 +0100 Subject: [PATCH 38/77] Update pageserver instances in us-east-1 --- .github/ansible/prod.us-east-1.hosts.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/ansible/prod.us-east-1.hosts.yaml b/.github/ansible/prod.us-east-1.hosts.yaml index fcf472432b..b5b2b076bb 100644 --- a/.github/ansible/prod.us-east-1.hosts.yaml +++ b/.github/ansible/prod.us-east-1.hosts.yaml @@ -34,11 +34,11 @@ storage: pageservers: hosts: pageserver-0.us-east-1.aws.neon.tech: - ansible_host: i-0f58137883429f55a + ansible_host: i-085222088b0d2e0c7 pageserver-1.us-east-1.aws.neon.tech: - ansible_host: i-08e7ee6190a099019 + ansible_host: i-0969d4f684d23a21e pageserver-2.us-east-1.aws.neon.tech: - ansible_host: i-0686a4e5e208e31a1 + ansible_host: i-05dee87895da58dad safekeepers: hosts: From 0bfbae2d7302cf8753a999ecde4da9f921668832 Mon Sep 17 00:00:00 2001 From: Cihan Demirci <128653800+fcdm@users.noreply.github.com> Date: Tue, 18 Apr 2023 16:41:09 +0100 Subject: [PATCH 39/77] Add storage broker deployment to us-east-1 (#4048) --- .github/workflows/deploy-prod.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/deploy-prod.yml b/.github/workflows/deploy-prod.yml index 92c7eb2492..9fa31b3225 100644 --- a/.github/workflows/deploy-prod.yml +++ b/.github/workflows/deploy-prod.yml @@ -151,6 +151,8 @@ jobs: target_cluster: prod-eu-central-1-gamma - target_region: ap-southeast-1 target_cluster: prod-ap-southeast-1-epsilon + - target_region: us-east-1 + target_cluster: prod-us-east-1-theta environment: name: prod-${{ matrix.target_region }} steps: From 02b28ae0b107f0fa2cc5b650d36de04bd7bc78e7 Mon Sep 17 00:00:00 2001 From: sharnoff Date: Tue, 18 Apr 2023 18:54:32 +0300 Subject: [PATCH 40/77] fix vm-informant dbname: "neondb" -> "postgres" (#4046) Changes the vm-informant's postgres connection string's dbname from "neondb" (which sometimes doesn't exist) to "postgres" (which _hopefully_ should exist more often?). Currently there are a handful of VMs in prod that aren't working with autoscaling because they don't have the "neondb" database. The vm-informant doesn't require any database in particular; it's just connecting as `cloud_admin` to be able to adjust the file cache settings. --- Dockerfile.vm-compute-node | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.vm-compute-node b/Dockerfile.vm-compute-node index 957166ecd1..aabb3c9953 100644 --- a/Dockerfile.vm-compute-node +++ b/Dockerfile.vm-compute-node @@ -54,7 +54,7 @@ RUN set -e \ RUN set -e \ && echo "::sysinit:cgconfigparser -l /etc/cgconfig.conf -s 1664" >> /etc/inittab \ - && CONNSTR="dbname=neondb user=cloud_admin sslmode=disable" \ + && CONNSTR="dbname=postgres user=cloud_admin sslmode=disable" \ && ARGS="--auto-restart --cgroup=neon-postgres --pgconnstr=\"$CONNSTR\"" \ && echo "::respawn:su vm-informant -c '/usr/local/bin/vm-informant $ARGS'" >> /etc/inittab From 7ba5c286b7c023e39162c6c6bcdad9353a3b5194 Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Fri, 21 Apr 2023 11:10:48 +0200 Subject: [PATCH 41/77] [compute_ctl] Improve 'empty' compute startup sequence (#4034) Do several attempts to get spec from the control-plane and retry network errors and all reasonable HTTP response codes. Do not hang waiting for spec without confirmation from the control-plane that compute is known and is in the `Empty` state. Adjust the way we track `total_startup_ms` metric, it should be calculated since the moment we received spec, not from the moment `compute_ctl` started. Also introduce a new `wait_for_spec_ms` metric to track the time spent sleeping and waiting for spec to be delivered from control-plane. Part of neondatabase/cloud#3533 --- compute_tools/src/bin/compute_ctl.rs | 24 ++++- compute_tools/src/compute.rs | 5 +- compute_tools/src/http/api.rs | 1 + compute_tools/src/http/openapi_spec.yaml | 10 ++ compute_tools/src/spec.rs | 113 +++++++++++++++++++---- libs/compute_api/src/responses.rs | 14 +++ 6 files changed, 141 insertions(+), 26 deletions(-) diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs index 309310407d..36dbc382b5 100644 --- a/compute_tools/src/bin/compute_ctl.rs +++ b/compute_tools/src/bin/compute_ctl.rs @@ -73,7 +73,7 @@ fn main() -> Result<()> { // Try to use just 'postgres' if no path is provided let pgbin = matches.get_one::("pgbin").unwrap(); - let mut spec = None; + let spec; let mut live_config_allowed = false; match spec_json { // First, try to get cluster spec from the cli argument @@ -89,9 +89,13 @@ fn main() -> Result<()> { } else if let Some(id) = compute_id { if let Some(cp_base) = control_plane_uri { live_config_allowed = true; - if let Ok(s) = get_spec_from_control_plane(cp_base, id) { - spec = Some(s); - } + spec = match get_spec_from_control_plane(cp_base, id) { + Ok(s) => s, + Err(e) => { + error!("cannot get response from control plane: {}", e); + panic!("neither spec nor confirmation that compute is in the Empty state was received"); + } + }; } else { panic!("must specify both --control-plane-uri and --compute-id or none"); } @@ -114,7 +118,6 @@ fn main() -> Result<()> { spec_set = false; } let compute_node = ComputeNode { - start_time: Utc::now(), connstr: Url::parse(connstr).context("cannot parse connstr as a URL")?, pgdata: pgdata.to_string(), pgbin: pgbin.to_string(), @@ -147,6 +150,17 @@ fn main() -> Result<()> { let mut state = compute.state.lock().unwrap(); let pspec = state.pspec.as_ref().expect("spec must be set"); let startup_tracing_context = pspec.spec.startup_tracing_context.clone(); + + // Record for how long we slept waiting for the spec. + state.metrics.wait_for_spec_ms = Utc::now() + .signed_duration_since(state.start_time) + .to_std() + .unwrap() + .as_millis() as u64; + // Reset start time to the actual start of the configuration, so that + // total startup time was properly measured at the end. + state.start_time = Utc::now(); + state.status = ComputeStatus::Init; compute.state_changed.notify_all(); drop(state); diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 51de2b6e0a..507dac9c0d 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -38,7 +38,6 @@ use crate::spec::*; /// Compute node info shared across several `compute_ctl` threads. pub struct ComputeNode { - pub start_time: DateTime, // Url type maintains proper escaping pub connstr: url::Url, pub pgdata: String, @@ -66,6 +65,7 @@ pub struct ComputeNode { #[derive(Clone, Debug)] pub struct ComputeState { + pub start_time: DateTime, pub status: ComputeStatus, /// Timestamp of the last Postgres activity pub last_active: DateTime, @@ -77,6 +77,7 @@ pub struct ComputeState { impl ComputeState { pub fn new() -> Self { Self { + start_time: Utc::now(), status: ComputeStatus::Empty, last_active: Utc::now(), error: None, @@ -425,7 +426,7 @@ impl ComputeNode { .unwrap() .as_millis() as u64; state.metrics.total_startup_ms = startup_end_time - .signed_duration_since(self.start_time) + .signed_duration_since(compute_state.start_time) .to_std() .unwrap() .as_millis() as u64; diff --git a/compute_tools/src/http/api.rs b/compute_tools/src/http/api.rs index 3ca688de69..4468f6f5e4 100644 --- a/compute_tools/src/http/api.rs +++ b/compute_tools/src/http/api.rs @@ -18,6 +18,7 @@ use tracing_utils::http::OtelName; fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse { ComputeStatusResponse { + start_time: state.start_time, tenant: state .pspec .as_ref() diff --git a/compute_tools/src/http/openapi_spec.yaml b/compute_tools/src/http/openapi_spec.yaml index bdb09d4a6b..cc8f074a50 100644 --- a/compute_tools/src/http/openapi_spec.yaml +++ b/compute_tools/src/http/openapi_spec.yaml @@ -152,11 +152,14 @@ components: type: object description: Compute startup metrics. required: + - wait_for_spec_ms - sync_safekeepers_ms - basebackup_ms - config_ms - total_startup_ms properties: + wait_for_spec_ms: + type: integer sync_safekeepers_ms: type: integer basebackup_ms: @@ -181,6 +184,13 @@ components: - status - last_active properties: + start_time: + type: string + description: | + Time when compute was started. If initially compute was started in the `empty` + state and then provided with valid spec, `start_time` will be reset to the + moment, when spec was received. + example: "2022-10-12T07:20:50.52Z" status: $ref: '#/components/schemas/ComputeStatus' last_active: diff --git a/compute_tools/src/spec.rs b/compute_tools/src/spec.rs index 088f74335a..28e0ef41b7 100644 --- a/compute_tools/src/spec.rs +++ b/compute_tools/src/spec.rs @@ -4,42 +4,117 @@ use std::str::FromStr; use anyhow::{anyhow, bail, Result}; use postgres::config::Config; use postgres::{Client, NoTls}; -use tracing::{info, info_span, instrument, span_enabled, warn, Level}; +use reqwest::StatusCode; +use tracing::{error, info, info_span, instrument, span_enabled, warn, Level}; use crate::config; use crate::params::PG_HBA_ALL_MD5; use crate::pg_helpers::*; -use compute_api::responses::ControlPlaneSpecResponse; +use compute_api::responses::{ControlPlaneComputeStatus, ControlPlaneSpecResponse}; use compute_api::spec::{ComputeSpec, Database, PgIdent, Role}; +// Do control plane request and return response if any. In case of error it +// returns a bool flag indicating whether it makes sense to retry the request +// and a string with error message. +fn do_control_plane_request( + uri: &str, + jwt: &str, +) -> Result { + let resp = reqwest::blocking::Client::new() + .get(uri) + .header("Authorization", jwt) + .send() + .map_err(|e| { + ( + true, + format!("could not perform spec request to control plane: {}", e), + ) + })?; + + match resp.status() { + StatusCode::OK => match resp.json::() { + Ok(spec_resp) => Ok(spec_resp), + Err(e) => Err(( + true, + format!("could not deserialize control plane response: {}", e), + )), + }, + StatusCode::SERVICE_UNAVAILABLE => { + Err((true, "control plane is temporarily unavailable".to_string())) + } + StatusCode::BAD_GATEWAY => { + // We have a problem with intermittent 502 errors now + // https://github.com/neondatabase/cloud/issues/2353 + // It's fine to retry GET request in this case. + Err((true, "control plane request failed with 502".to_string())) + } + // Another code, likely 500 or 404, means that compute is unknown to the control plane + // or some internal failure happened. Doesn't make much sense to retry in this case. + _ => Err(( + false, + format!( + "unexpected control plane response status code: {}", + resp.status() + ), + )), + } +} + /// Request spec from the control-plane by compute_id. If `NEON_CONSOLE_JWT` /// env variable is set, it will be used for authorization. -pub fn get_spec_from_control_plane(base_uri: &str, compute_id: &str) -> Result { +pub fn get_spec_from_control_plane( + base_uri: &str, + compute_id: &str, +) -> Result> { let cp_uri = format!("{base_uri}/management/api/v2/computes/{compute_id}/spec"); - let jwt: String = match std::env::var("NEON_CONSOLE_JWT") { + let jwt: String = match std::env::var("NEON_CONTROL_PLANE_TOKEN") { Ok(v) => v, Err(_) => "".to_string(), }; + let mut attempt = 1; + let mut spec: Result> = Ok(None); + info!("getting spec from control plane: {}", cp_uri); - // TODO: check the response. We should distinguish cases when it's - // - network error, then retry - // - no spec for compute yet, then wait - // - compute id is unknown or any other error, then bail out - let resp: ControlPlaneSpecResponse = reqwest::blocking::Client::new() - .get(cp_uri) - .header("Authorization", jwt) - .send() - .map_err(|e| anyhow!("could not send spec request to control plane: {}", e))? - .json() - .map_err(|e| anyhow!("could not get compute spec from control plane: {}", e))?; + // Do 3 attempts to get spec from the control plane using the following logic: + // - network error -> then retry + // - compute id is unknown or any other error -> bail out + // - no spec for compute yet (Empty state) -> return Ok(None) + // - got spec -> return Ok(Some(spec)) + while attempt < 4 { + spec = match do_control_plane_request(&cp_uri, &jwt) { + Ok(spec_resp) => match spec_resp.status { + ControlPlaneComputeStatus::Empty => Ok(None), + ControlPlaneComputeStatus::Attached => { + if let Some(spec) = spec_resp.spec { + Ok(Some(spec)) + } else { + bail!("compute is attached, but spec is empty") + } + } + }, + Err((retry, msg)) => { + if retry { + Err(anyhow!(msg)) + } else { + bail!(msg); + } + } + }; - if let Some(spec) = resp.spec { - Ok(spec) - } else { - bail!("could not get compute spec from control plane") + if let Err(e) = &spec { + error!("attempt {} to get spec failed with: {}", attempt, e); + } else { + return spec; + } + + attempt += 1; + std::thread::sleep(std::time::Duration::from_millis(100)); } + + // All attempts failed, return error. + spec } /// It takes cluster specification and does the following: diff --git a/libs/compute_api/src/responses.rs b/libs/compute_api/src/responses.rs index 370b2c5626..c409563b56 100644 --- a/libs/compute_api/src/responses.rs +++ b/libs/compute_api/src/responses.rs @@ -14,6 +14,7 @@ pub struct GenericAPIError { #[derive(Serialize, Debug)] #[serde(rename_all = "snake_case")] pub struct ComputeStatusResponse { + pub start_time: DateTime, pub tenant: Option, pub timeline: Option, pub status: ComputeStatus, @@ -63,6 +64,7 @@ where /// Response of the /metrics.json API #[derive(Clone, Debug, Default, Serialize)] pub struct ComputeMetrics { + pub wait_for_spec_ms: u64, pub sync_safekeepers_ms: u64, pub basebackup_ms: u64, pub config_ms: u64, @@ -75,4 +77,16 @@ pub struct ComputeMetrics { #[derive(Deserialize, Debug)] pub struct ControlPlaneSpecResponse { pub spec: Option, + pub status: ControlPlaneComputeStatus, +} + +#[derive(Deserialize, Clone, Copy, Debug, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum ControlPlaneComputeStatus { + // Compute is known to control-plane, but it's not + // yet attached to any timeline / endpoint. + Empty, + // Compute is attached to some timeline / endpoint and + // should be able to start with provided spec. + Attached, } From afbbc6103612819058db63dc24829ec5eccccef7 Mon Sep 17 00:00:00 2001 From: Eduard Dyckman Date: Mon, 24 Apr 2023 22:19:25 +0900 Subject: [PATCH 42/77] Adding synthetic size to pageserver swagger (#4049) ## Describe your changes I added synthetic size response to the console swagger. Now I am syncing it back to neon --- pageserver/src/http/openapi_spec.yml | 115 +++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml index b0e4e1ca85..95f6e96a5b 100644 --- a/pageserver/src/http/openapi_spec.yml +++ b/pageserver/src/http/openapi_spec.yml @@ -520,6 +520,43 @@ paths: schema: $ref: "#/components/schemas/Error" + /v1/tenant/{tenant_id}/synthetic_size: + parameters: + - name: tenant_id + in: path + required: true + schema: + type: string + format: hex + get: + description: | + Calculate tenant's synthetic size + responses: + "200": + description: Tenant's synthetic size + content: + application/json: + schema: + $ref: "#/components/schemas/SyntheticSizeResponse" + "401": + description: Unauthorized Error + content: + application/json: + schema: + $ref: "#/components/schemas/UnauthorizedError" + "403": + description: Forbidden Error + content: + application/json: + schema: + $ref: "#/components/schemas/ForbiddenError" + "500": + description: Generic operation error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + /v1/tenant/{tenant_id}/size: parameters: - name: tenant_id @@ -948,6 +985,84 @@ components: latest_gc_cutoff_lsn: type: string format: hex + + SyntheticSizeResponse: + type: object + required: + - id + - size + - segment_sizes + - inputs + properties: + id: + type: string + format: hex + size: + type: integer + segment_sizes: + type: array + items: + $ref: "#/components/schemas/SegmentSize" + inputs: + type: object + properties: + segments: + type: array + items: + $ref: "#/components/schemas/SegmentData" + timeline_inputs: + type: array + items: + $ref: "#/components/schemas/TimelineInput" + + SegmentSize: + type: object + required: + - method + - accum_size + properties: + method: + type: string + accum_size: + type: integer + + SegmentData: + type: object + required: + - segment + properties: + segment: + type: object + required: + - lsn + properties: + parent: + type: integer + lsn: + type: integer + size: + type: integer + needed: + type: boolean + timeline_id: + type: string + format: hex + kind: + type: string + + TimelineInput: + type: object + required: + - timeline_id + properties: + ancestor_id: + type: string + ancestor_lsn: + type: string + timeline_id: + type: string + format: hex + Error: type: object required: From e83684b8683847a5f467809cd7dd8e2ccdc9bffa Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Tue, 25 Apr 2023 14:10:18 +0200 Subject: [PATCH 43/77] add libmetric metric for each logged log message (#4055) This patch extends the libmetrics logging setup functionality with a `tracing` layer that increments a Prometheus counter each time we log a log message. We have the counter per tracing event level. This allows for monitoring WARN and ERR log volume without parsing the log. Also, it would allow cross-checking whether logs got dropped on the way into Loki. It would be nicer if we could hook deeper into the tracing logging layer, to avoid evaluating the filter twice. But I don't know how to do it. --- libs/utils/src/logging.rs | 100 ++++++++++++++++++++---- pageserver/src/http/routes.rs | 34 ++++++++ test_runner/fixtures/metrics.py | 1 + test_runner/fixtures/pageserver/http.py | 10 +++ test_runner/regress/test_logging.py | 49 ++++++++++++ 5 files changed, 179 insertions(+), 15 deletions(-) create mode 100644 test_runner/regress/test_logging.py diff --git a/libs/utils/src/logging.rs b/libs/utils/src/logging.rs index f770622a60..ed856b6804 100644 --- a/libs/utils/src/logging.rs +++ b/libs/utils/src/logging.rs @@ -1,6 +1,7 @@ use std::str::FromStr; use anyhow::Context; +use once_cell::sync::Lazy; use strum_macros::{EnumString, EnumVariantNames}; #[derive(EnumString, EnumVariantNames, Eq, PartialEq, Debug, Clone, Copy)] @@ -23,25 +24,64 @@ impl LogFormat { } } -pub fn init(log_format: LogFormat) -> anyhow::Result<()> { - let default_filter_str = "info"; +static TRACING_EVENT_COUNT: Lazy = Lazy::new(|| { + metrics::register_int_counter_vec!( + "libmetrics_tracing_event_count", + "Number of tracing events, by level", + &["level"] + ) + .expect("failed to define metric") +}); +struct TracingEventCountLayer(&'static metrics::IntCounterVec); + +impl tracing_subscriber::layer::Layer for TracingEventCountLayer +where + S: tracing::Subscriber, +{ + fn on_event( + &self, + event: &tracing::Event<'_>, + _ctx: tracing_subscriber::layer::Context<'_, S>, + ) { + let level = event.metadata().level(); + let level = match *level { + tracing::Level::ERROR => "error", + tracing::Level::WARN => "warn", + tracing::Level::INFO => "info", + tracing::Level::DEBUG => "debug", + tracing::Level::TRACE => "trace", + }; + self.0.with_label_values(&[level]).inc(); + } +} + +pub fn init(log_format: LogFormat) -> anyhow::Result<()> { // We fall back to printing all spans at info-level or above if // the RUST_LOG environment variable is not set. - let env_filter = tracing_subscriber::EnvFilter::try_from_default_env() - .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_filter_str)); + let rust_log_env_filter = || { + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")) + }; - let base_logger = tracing_subscriber::fmt() - .with_env_filter(env_filter) - .with_target(false) - .with_ansi(atty::is(atty::Stream::Stdout)) - .with_writer(std::io::stdout); - - match log_format { - LogFormat::Json => base_logger.json().init(), - LogFormat::Plain => base_logger.init(), - LogFormat::Test => base_logger.with_test_writer().init(), - } + // NB: the order of the with() calls does not matter. + // See https://docs.rs/tracing-subscriber/0.3.16/tracing_subscriber/layer/index.html#per-layer-filtering + use tracing_subscriber::prelude::*; + tracing_subscriber::registry() + .with({ + let log_layer = tracing_subscriber::fmt::layer() + .with_target(false) + .with_ansi(atty::is(atty::Stream::Stdout)) + .with_writer(std::io::stdout); + let log_layer = match log_format { + LogFormat::Json => log_layer.json().boxed(), + LogFormat::Plain => log_layer.boxed(), + LogFormat::Test => log_layer.with_test_writer().boxed(), + }; + log_layer.with_filter(rust_log_env_filter()) + }) + .with(TracingEventCountLayer(&TRACING_EVENT_COUNT).with_filter(rust_log_env_filter())) + .init(); Ok(()) } @@ -157,3 +197,33 @@ impl std::fmt::Debug for PrettyLocation<'_, '_> { ::fmt(self, f) } } + +#[cfg(test)] +mod tests { + use metrics::{core::Opts, IntCounterVec}; + + use super::TracingEventCountLayer; + + #[test] + fn tracing_event_count_metric() { + let counter_vec = + IntCounterVec::new(Opts::new("testmetric", "testhelp"), &["level"]).unwrap(); + let counter_vec = Box::leak(Box::new(counter_vec)); // make it 'static + let layer = TracingEventCountLayer(counter_vec); + use tracing_subscriber::prelude::*; + + tracing::subscriber::with_default(tracing_subscriber::registry().with(layer), || { + tracing::trace!("foo"); + tracing::debug!("foo"); + tracing::info!("foo"); + tracing::warn!("foo"); + tracing::error!("foo"); + }); + + assert_eq!(counter_vec.with_label_values(&["trace"]).get(), 1); + assert_eq!(counter_vec.with_label_values(&["debug"]).get(), 1); + assert_eq!(counter_vec.with_label_values(&["info"]).get(), 1); + assert_eq!(counter_vec.with_label_values(&["warn"]).get(), 1); + assert_eq!(counter_vec.with_label_values(&["error"]).get(), 1); + } +} diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 06a97f6dff..3318e5263c 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -1201,6 +1201,36 @@ async fn handler_404(_: Request) -> Result, ApiError> { ) } +async fn post_tracing_event_handler(mut r: Request) -> Result, ApiError> { + #[derive(Debug, serde::Deserialize)] + #[serde(rename_all = "lowercase")] + enum Level { + Error, + Warn, + Info, + Debug, + Trace, + } + #[derive(Debug, serde::Deserialize)] + struct Request { + level: Level, + message: String, + } + let body: Request = json_request(&mut r) + .await + .map_err(|_| ApiError::BadRequest(anyhow::anyhow!("invalid JSON body")))?; + + match body.level { + Level::Error => tracing::error!(?body.message), + Level::Warn => tracing::warn!(?body.message), + Level::Info => tracing::info!(?body.message), + Level::Debug => tracing::debug!(?body.message), + Level::Trace => tracing::trace!(?body.message), + } + + json_response(StatusCode::OK, ()) +} + pub fn make_router( conf: &'static PageServerConf, launch_ts: &'static LaunchTimestamp, @@ -1341,5 +1371,9 @@ pub fn make_router( testing_api!("set tenant state to broken", handle_tenant_break), ) .get("/v1/panic", |r| RequestSpan(always_panic_handler).handle(r)) + .post( + "/v1/tracing/event", + testing_api!("emit a tracing event", post_tracing_event_handler), + ) .any(handler_404)) } diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index 2984f2c7d3..c88b985c8e 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -53,6 +53,7 @@ PAGESERVER_GLOBAL_METRICS: Tuple[str, ...] = ( "pageserver_storage_operations_seconds_global_bucket", "libmetrics_launch_timestamp", "libmetrics_build_info", + "libmetrics_tracing_event_count_total", ) PAGESERVER_PER_TENANT_METRICS: Tuple[str, ...] = ( diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index 69042478c7..cf92aeb6c0 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -550,3 +550,13 @@ class PageserverHttpClient(requests.Session): def tenant_break(self, tenant_id: TenantId): res = self.put(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/break") self.verbose_error(res) + + def post_tracing_event(self, level: str, message: str): + res = self.post( + f"http://localhost:{self.port}/v1/tracing/event", + json={ + "level": level, + "message": message, + }, + ) + self.verbose_error(res) diff --git a/test_runner/regress/test_logging.py b/test_runner/regress/test_logging.py new file mode 100644 index 0000000000..d559be0a8f --- /dev/null +++ b/test_runner/regress/test_logging.py @@ -0,0 +1,49 @@ +import uuid + +import pytest +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnvBuilder +from fixtures.utils import wait_until + + +@pytest.mark.parametrize("level", ["trace", "debug", "info", "warn", "error"]) +def test_logging_event_count(neon_env_builder: NeonEnvBuilder, level: str): + # self-test: make sure the event is logged (i.e., our testing endpoint works) + log_expected = { + "trace": False, + "debug": False, + "info": True, + "warn": True, + "error": True, + }[level] + + env = neon_env_builder.init_start() + ps_http = env.pageserver.http_client() + msg_id = uuid.uuid4().hex + + # NB: the _total suffix is added by our prometheus client + before = ps_http.get_metric_value("libmetrics_tracing_event_count_total", {"level": level}) + + # post the event + ps_http.post_tracing_event(level, msg_id) + if log_expected: + env.pageserver.allowed_errors.append(f".*{msg_id}.*") + + def assert_logged(): + if not log_expected: + return + assert env.pageserver.log_contains(f".*{msg_id}.*") + + wait_until(10, 0.5, assert_logged) + + # make sure it's counted + def assert_metric_value(): + if not log_expected: + return + # NB: the _total suffix is added by our prometheus client + val = ps_http.get_metric_value("libmetrics_tracing_event_count_total", {"level": level}) + val = val or 0.0 + log.info("libmetrics_tracing_event_count: %s", val) + assert val > (before or 0.0) + + wait_until(10, 1, assert_metric_value) From 4911d7ce6f6ab1f89ac1b026add8514e8e84979d Mon Sep 17 00:00:00 2001 From: Joonas Koivunen Date: Tue, 25 Apr 2023 15:22:23 +0300 Subject: [PATCH 44/77] feat: warn when requests get cancelled (#4064) Add a simple disarmable dropguard to log if request is cancelled before it is completed. We currently don't have this, and it makes for difficult to know when the request was dropped. --- libs/utils/src/http/endpoint.rs | 39 ++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/libs/utils/src/http/endpoint.rs b/libs/utils/src/http/endpoint.rs index 616f2b8468..b11aef9892 100644 --- a/libs/utils/src/http/endpoint.rs +++ b/libs/utils/src/http/endpoint.rs @@ -76,6 +76,7 @@ where let log_quietly = method == Method::GET; async move { + let cancellation_guard = RequestCancelled::warn_when_dropped_without_responding(); if log_quietly { debug!("Handling request"); } else { @@ -87,7 +88,11 @@ where // Usage of the error handler also means that we expect only the `ApiError` errors to be raised in this call. // // Panics are not handled separately, there's a `tracing_panic_hook` from another module to do that globally. - match (self.0)(request).await { + let res = (self.0)(request).await; + + cancellation_guard.disarm(); + + match res { Ok(response) => { let response_status = response.status(); if log_quietly && response_status.is_success() { @@ -105,6 +110,38 @@ where } } +/// Drop guard to WARN in case the request was dropped before completion. +struct RequestCancelled { + warn: Option, +} + +impl RequestCancelled { + /// Create the drop guard using the [`tracing::Span::current`] as the span. + fn warn_when_dropped_without_responding() -> Self { + RequestCancelled { + warn: Some(tracing::Span::current()), + } + } + + /// Consume the drop guard without logging anything. + fn disarm(mut self) { + self.warn = None; + } +} + +impl Drop for RequestCancelled { + fn drop(&mut self) { + if let Some(span) = self.warn.take() { + // the span has all of the info already, but the outer `.instrument(span)` has already + // been dropped, so we need to manually re-enter it for this message. + // + // this is what the instrument would do before polling so it is fine. + let _g = span.entered(); + warn!("request was dropped before completing"); + } + } +} + async fn prometheus_metrics_handler(_req: Request) -> Result, ApiError> { SERVE_METRICS_COUNT.inc(); From fa20e3757432a0b900f33a89441f7fee02fc06c9 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Tue, 25 Apr 2023 14:22:48 +0200 Subject: [PATCH 45/77] add gauge for in-flight layer uploads (#3951) For the "worst-case /storage usage panel", we need to compute ``` remote size + local-only size ``` We currently don't have a metric for local-only layers. The number of in-flight layers in the upload queue is just that, so, let Prometheus scrape it. The metric is two counters (started and finished). The delta is the amount of in-flight uploads in the queue. The metrics are incremented in the respective `call_unfinished_metric_*` functions. These track ongoing operations by file_kind and op_kind. We only need this metric for layer uploads, so, there's the new RemoteTimelineClientMetricsCallTrackSize type that forces all call sites to decide whether they want the size tracked or not. If we find that other file_kinds or op_kinds are interesting (metadata uploads, layer downloads, layer deletes) are interesting, we can just enable them, and they'll be just another label combination within the metrics that this PR adds. fixes https://github.com/neondatabase/neon/issues/3922 --- pageserver/src/metrics.rs | 195 +++++++++++- .../src/tenant/remote_timeline_client.rs | 285 ++++++++++++++---- test_runner/fixtures/metrics.py | 2 + 3 files changed, 405 insertions(+), 77 deletions(-) diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index c075315683..cf60a1a404 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -385,6 +385,26 @@ static REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST: Lazy = Lazy::new .expect("failed to define a metric") }); +static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "pageserver_remote_timeline_client_bytes_started", + "Incremented by the number of bytes associated with a remote timeline client operation. \ + The increment happens when the operation is scheduled.", + &["tenant_id", "timeline_id", "file_kind", "op_kind"], + ) + .expect("failed to define a metric") +}); + +static REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "pageserver_remote_timeline_client_bytes_finished", + "Incremented by the number of bytes associated with a remote timeline client operation. \ + The increment happens when the operation finishes (regardless of success/failure/shutdown).", + &["tenant_id", "timeline_id", "file_kind", "op_kind"], + ) + .expect("failed to define a metric") +}); + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum RemoteOpKind { Upload, @@ -739,6 +759,8 @@ pub struct RemoteTimelineClientMetrics { remote_operation_time: Mutex>, calls_unfinished_gauge: Mutex>, calls_started_hist: Mutex>, + bytes_started_counter: Mutex>, + bytes_finished_counter: Mutex>, } impl RemoteTimelineClientMetrics { @@ -749,6 +771,8 @@ impl RemoteTimelineClientMetrics { remote_operation_time: Mutex::new(HashMap::default()), calls_unfinished_gauge: Mutex::new(HashMap::default()), calls_started_hist: Mutex::new(HashMap::default()), + bytes_started_counter: Mutex::new(HashMap::default()), + bytes_finished_counter: Mutex::new(HashMap::default()), remote_physical_size_gauge: Mutex::new(None), } } @@ -787,6 +811,7 @@ impl RemoteTimelineClientMetrics { }); metric.clone() } + fn calls_unfinished_gauge( &self, file_kind: &RemoteOpFileKind, @@ -828,32 +853,125 @@ impl RemoteTimelineClientMetrics { }); metric.clone() } + + fn bytes_started_counter( + &self, + file_kind: &RemoteOpFileKind, + op_kind: &RemoteOpKind, + ) -> IntCounter { + // XXX would be nice to have an upgradable RwLock + let mut guard = self.bytes_started_counter.lock().unwrap(); + let key = (file_kind.as_str(), op_kind.as_str()); + let metric = guard.entry(key).or_insert_with(move || { + REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER + .get_metric_with_label_values(&[ + &self.tenant_id.to_string(), + &self.timeline_id.to_string(), + key.0, + key.1, + ]) + .unwrap() + }); + metric.clone() + } + + fn bytes_finished_counter( + &self, + file_kind: &RemoteOpFileKind, + op_kind: &RemoteOpKind, + ) -> IntCounter { + // XXX would be nice to have an upgradable RwLock + let mut guard = self.bytes_finished_counter.lock().unwrap(); + let key = (file_kind.as_str(), op_kind.as_str()); + let metric = guard.entry(key).or_insert_with(move || { + REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER + .get_metric_with_label_values(&[ + &self.tenant_id.to_string(), + &self.timeline_id.to_string(), + key.0, + key.1, + ]) + .unwrap() + }); + metric.clone() + } +} + +#[cfg(test)] +impl RemoteTimelineClientMetrics { + pub fn get_bytes_started_counter_value( + &self, + file_kind: &RemoteOpFileKind, + op_kind: &RemoteOpKind, + ) -> Option { + let guard = self.bytes_started_counter.lock().unwrap(); + let key = (file_kind.as_str(), op_kind.as_str()); + guard.get(&key).map(|counter| counter.get()) + } + + pub fn get_bytes_finished_counter_value( + &self, + file_kind: &RemoteOpFileKind, + op_kind: &RemoteOpKind, + ) -> Option { + let guard = self.bytes_finished_counter.lock().unwrap(); + let key = (file_kind.as_str(), op_kind.as_str()); + guard.get(&key).map(|counter| counter.get()) + } } /// See [`RemoteTimelineClientMetrics::call_begin`]. #[must_use] -pub(crate) struct RemoteTimelineClientCallMetricGuard(Option); +pub(crate) struct RemoteTimelineClientCallMetricGuard { + /// Decremented on drop. + calls_unfinished_metric: Option, + /// If Some(), this references the bytes_finished metric, and we increment it by the given `u64` on drop. + bytes_finished: Option<(IntCounter, u64)>, +} impl RemoteTimelineClientCallMetricGuard { - /// Consume this guard object without decrementing the metric. - /// The caller vouches to do this manually, so that the prior increment of the gauge will cancel out. + /// Consume this guard object without performing the metric updates it would do on `drop()`. + /// The caller vouches to do the metric updates manually. pub fn will_decrement_manually(mut self) { - self.0 = None; // prevent drop() from decrementing + let RemoteTimelineClientCallMetricGuard { + calls_unfinished_metric, + bytes_finished, + } = &mut self; + calls_unfinished_metric.take(); + bytes_finished.take(); } } impl Drop for RemoteTimelineClientCallMetricGuard { fn drop(&mut self) { - if let RemoteTimelineClientCallMetricGuard(Some(guard)) = self { + let RemoteTimelineClientCallMetricGuard { + calls_unfinished_metric, + bytes_finished, + } = self; + if let Some(guard) = calls_unfinished_metric.take() { guard.dec(); } + if let Some((bytes_finished_metric, value)) = bytes_finished { + bytes_finished_metric.inc_by(*value); + } } } +/// The enum variants communicate to the [`RemoteTimelineClientMetrics`] whether to +/// track the byte size of this call in applicable metric(s). +pub(crate) enum RemoteTimelineClientMetricsCallTrackSize { + /// Do not account for this call's byte size in any metrics. + /// The `reason` field is there to make the call sites self-documenting + /// about why they don't need the metric. + DontTrackSize { reason: &'static str }, + /// Track the byte size of the call in applicable metric(s). + Bytes(u64), +} + impl RemoteTimelineClientMetrics { - /// Increment the metrics that track ongoing calls to the remote timeline client instance. + /// Update the metrics that change when a call to the remote timeline client instance starts. /// - /// Drop the returned guard object once the operation is finished to decrement the values. + /// Drop the returned guard object once the operation is finished to updates corresponding metrics that track completions. /// Or, use [`RemoteTimelineClientCallMetricGuard::will_decrement_manually`] and [`call_end`] if that /// is more suitable. /// Never do both. @@ -861,24 +979,51 @@ impl RemoteTimelineClientMetrics { &self, file_kind: &RemoteOpFileKind, op_kind: &RemoteOpKind, + size: RemoteTimelineClientMetricsCallTrackSize, ) -> RemoteTimelineClientCallMetricGuard { - let unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind); + let calls_unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind); self.calls_started_hist(file_kind, op_kind) - .observe(unfinished_metric.get() as f64); - unfinished_metric.inc(); - RemoteTimelineClientCallMetricGuard(Some(unfinished_metric)) + .observe(calls_unfinished_metric.get() as f64); + calls_unfinished_metric.inc(); // NB: inc after the histogram, see comment on underlying metric + + let bytes_finished = match size { + RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => { + // nothing to do + None + } + RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => { + self.bytes_started_counter(file_kind, op_kind).inc_by(size); + let finished_counter = self.bytes_finished_counter(file_kind, op_kind); + Some((finished_counter, size)) + } + }; + RemoteTimelineClientCallMetricGuard { + calls_unfinished_metric: Some(calls_unfinished_metric), + bytes_finished, + } } - /// Manually decrement the metric instead of using the guard object. + /// Manually udpate the metrics that track completions, instead of using the guard object. /// Using the guard object is generally preferable. /// See [`call_begin`] for more context. - pub(crate) fn call_end(&self, file_kind: &RemoteOpFileKind, op_kind: &RemoteOpKind) { - let unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind); + pub(crate) fn call_end( + &self, + file_kind: &RemoteOpFileKind, + op_kind: &RemoteOpKind, + size: RemoteTimelineClientMetricsCallTrackSize, + ) { + let calls_unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind); debug_assert!( - unfinished_metric.get() > 0, + calls_unfinished_metric.get() > 0, "begin and end should cancel out" ); - unfinished_metric.dec(); + calls_unfinished_metric.dec(); + match size { + RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {} + RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => { + self.bytes_finished_counter(file_kind, op_kind).inc_by(size); + } + } } } @@ -891,6 +1036,8 @@ impl Drop for RemoteTimelineClientMetrics { remote_operation_time, calls_unfinished_gauge, calls_started_hist, + bytes_started_counter, + bytes_finished_counter, } = self; for ((a, b, c), _) in remote_operation_time.get_mut().unwrap().drain() { let _ = REMOTE_OPERATION_TIME.remove_label_values(&[tenant_id, timeline_id, a, b, c]); @@ -911,6 +1058,22 @@ impl Drop for RemoteTimelineClientMetrics { b, ]); } + for ((a, b), _) in bytes_started_counter.get_mut().unwrap().drain() { + let _ = REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER.remove_label_values(&[ + tenant_id, + timeline_id, + a, + b, + ]); + } + for ((a, b), _) in bytes_finished_counter.get_mut().unwrap().drain() { + let _ = REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER.remove_label_values(&[ + tenant_id, + timeline_id, + a, + b, + ]); + } { let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]); diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 28c4943dbd..c42824a8b5 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -219,7 +219,8 @@ use utils::lsn::Lsn; use crate::metrics::{ MeasureRemoteOp, RemoteOpFileKind, RemoteOpKind, RemoteTimelineClientMetrics, - REMOTE_ONDEMAND_DOWNLOADED_BYTES, REMOTE_ONDEMAND_DOWNLOADED_LAYERS, + RemoteTimelineClientMetricsCallTrackSize, REMOTE_ONDEMAND_DOWNLOADED_BYTES, + REMOTE_ONDEMAND_DOWNLOADED_LAYERS, }; use crate::tenant::remote_timeline_client::index::LayerFileMetadata; use crate::{ @@ -367,9 +368,13 @@ impl RemoteTimelineClient { /// Download index file pub async fn download_index_file(&self) -> Result { - let _unfinished_gauge_guard = self - .metrics - .call_begin(&RemoteOpFileKind::Index, &RemoteOpKind::Download); + let _unfinished_gauge_guard = self.metrics.call_begin( + &RemoteOpFileKind::Index, + &RemoteOpKind::Download, + crate::metrics::RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { + reason: "no need for a downloads gauge", + }, + ); download::download_index_part( self.conf, @@ -398,9 +403,13 @@ impl RemoteTimelineClient { layer_metadata: &LayerFileMetadata, ) -> anyhow::Result { let downloaded_size = { - let _unfinished_gauge_guard = self - .metrics - .call_begin(&RemoteOpFileKind::Layer, &RemoteOpKind::Download); + let _unfinished_gauge_guard = self.metrics.call_begin( + &RemoteOpFileKind::Layer, + &RemoteOpKind::Download, + crate::metrics::RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { + reason: "no need for a downloads gauge", + }, + ); download::download_layer_file( self.conf, &self.storage_impl, @@ -886,11 +895,32 @@ impl RemoteTimelineClient { fn calls_unfinished_metric_impl( &self, op: &UploadOp, - ) -> Option<(RemoteOpFileKind, RemoteOpKind)> { + ) -> Option<( + RemoteOpFileKind, + RemoteOpKind, + RemoteTimelineClientMetricsCallTrackSize, + )> { + use RemoteTimelineClientMetricsCallTrackSize::DontTrackSize; let res = match op { - UploadOp::UploadLayer(_, _) => (RemoteOpFileKind::Layer, RemoteOpKind::Upload), - UploadOp::UploadMetadata(_, _) => (RemoteOpFileKind::Index, RemoteOpKind::Upload), - UploadOp::Delete(file_kind, _) => (*file_kind, RemoteOpKind::Delete), + UploadOp::UploadLayer(_, m) => ( + RemoteOpFileKind::Layer, + RemoteOpKind::Upload, + RemoteTimelineClientMetricsCallTrackSize::Bytes(m.file_size()), + ), + UploadOp::UploadMetadata(_, _) => ( + RemoteOpFileKind::Index, + RemoteOpKind::Upload, + DontTrackSize { + reason: "metadata uploads are tiny", + }, + ), + UploadOp::Delete(file_kind, _) => ( + *file_kind, + RemoteOpKind::Delete, + DontTrackSize { + reason: "should we track deletes? positive or negative sign?", + }, + ), UploadOp::Barrier(_) => { // we do not account these return None; @@ -900,20 +930,20 @@ impl RemoteTimelineClient { } fn calls_unfinished_metric_begin(&self, op: &UploadOp) { - let (file_kind, op_kind) = match self.calls_unfinished_metric_impl(op) { + let (file_kind, op_kind, track_bytes) = match self.calls_unfinished_metric_impl(op) { Some(x) => x, None => return, }; - let guard = self.metrics.call_begin(&file_kind, &op_kind); + let guard = self.metrics.call_begin(&file_kind, &op_kind, track_bytes); guard.will_decrement_manually(); // in unfinished_ops_metric_end() } fn calls_unfinished_metric_end(&self, op: &UploadOp) { - let (file_kind, op_kind) = match self.calls_unfinished_metric_impl(op) { + let (file_kind, op_kind, track_bytes) = match self.calls_unfinished_metric_impl(op) { Some(x) => x, None => return, }; - self.metrics.call_end(&file_kind, &op_kind); + self.metrics.call_end(&file_kind, &op_kind, track_bytes); } fn stop(&self) { @@ -981,11 +1011,19 @@ impl RemoteTimelineClient { mod tests { use super::*; use crate::{ - tenant::harness::{TenantHarness, TIMELINE_ID}, + context::RequestContext, + tenant::{ + harness::{TenantHarness, TIMELINE_ID}, + Tenant, + }, DEFAULT_PG_VERSION, }; use remote_storage::{RemoteStorageConfig, RemoteStorageKind}; - use std::{collections::HashSet, path::Path}; + use std::{ + collections::HashSet, + path::{Path, PathBuf}, + }; + use tokio::runtime::EnterGuard; use utils::lsn::Lsn; pub(super) fn dummy_contents(name: &str) -> Vec { @@ -1034,39 +1072,80 @@ mod tests { assert_eq!(found, expected); } + struct TestSetup { + runtime: &'static tokio::runtime::Runtime, + entered_runtime: EnterGuard<'static>, + harness: TenantHarness<'static>, + tenant: Arc, + tenant_ctx: RequestContext, + remote_fs_dir: PathBuf, + client: Arc, + } + + impl TestSetup { + fn new(test_name: &str) -> anyhow::Result { + // Use a current-thread runtime in the test + let runtime = Box::leak(Box::new( + tokio::runtime::Builder::new_current_thread() + .enable_all() + .build()?, + )); + let entered_runtime = runtime.enter(); + + let test_name = Box::leak(Box::new(format!("remote_timeline_client__{test_name}"))); + let harness = TenantHarness::create(test_name)?; + let (tenant, ctx) = runtime.block_on(harness.load()); + // create an empty timeline directory + let timeline = + tenant.create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)?; + let _ = timeline.initialize(&ctx).unwrap(); + + let remote_fs_dir = harness.conf.workdir.join("remote_fs"); + std::fs::create_dir_all(remote_fs_dir)?; + let remote_fs_dir = std::fs::canonicalize(harness.conf.workdir.join("remote_fs"))?; + + let storage_config = RemoteStorageConfig { + max_concurrent_syncs: std::num::NonZeroUsize::new( + remote_storage::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS, + ) + .unwrap(), + max_sync_errors: std::num::NonZeroU32::new( + remote_storage::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS, + ) + .unwrap(), + storage: RemoteStorageKind::LocalFs(remote_fs_dir.clone()), + }; + + let storage = GenericRemoteStorage::from_config(&storage_config).unwrap(); + + let client = Arc::new(RemoteTimelineClient { + conf: harness.conf, + runtime, + tenant_id: harness.tenant_id, + timeline_id: TIMELINE_ID, + storage_impl: storage, + upload_queue: Mutex::new(UploadQueue::Uninitialized), + metrics: Arc::new(RemoteTimelineClientMetrics::new( + &harness.tenant_id, + &TIMELINE_ID, + )), + }); + + Ok(Self { + runtime, + entered_runtime, + harness, + tenant, + tenant_ctx: ctx, + remote_fs_dir, + client, + }) + } + } + // Test scheduling #[test] fn upload_scheduling() -> anyhow::Result<()> { - // Use a current-thread runtime in the test - let runtime = Box::leak(Box::new( - tokio::runtime::Builder::new_current_thread() - .enable_all() - .build()?, - )); - let _entered = runtime.enter(); - - let harness = TenantHarness::create("upload_scheduling")?; - let (tenant, ctx) = runtime.block_on(harness.load()); - let _timeline = - tenant.create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)?; - let timeline_path = harness.timeline_path(&TIMELINE_ID); - - let remote_fs_dir = harness.conf.workdir.join("remote_fs"); - std::fs::create_dir_all(remote_fs_dir)?; - let remote_fs_dir = std::fs::canonicalize(harness.conf.workdir.join("remote_fs"))?; - - let storage_config = RemoteStorageConfig { - max_concurrent_syncs: std::num::NonZeroUsize::new( - remote_storage::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS, - ) - .unwrap(), - max_sync_errors: std::num::NonZeroU32::new( - remote_storage::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS, - ) - .unwrap(), - storage: RemoteStorageKind::LocalFs(remote_fs_dir.clone()), - }; - // Test outline: // // Schedule upload of a bunch of layers. Check that they are started immediately, not queued @@ -1081,21 +1160,19 @@ mod tests { // Schedule another deletion. Check that it's launched immediately. // Schedule index upload. Check that it's queued - println!("workdir: {}", harness.conf.workdir.display()); - - let storage_impl = GenericRemoteStorage::from_config(&storage_config)?; - let client = Arc::new(RemoteTimelineClient { - conf: harness.conf, + let TestSetup { runtime, - tenant_id: harness.tenant_id, - timeline_id: TIMELINE_ID, - storage_impl, - upload_queue: Mutex::new(UploadQueue::Uninitialized), - metrics: Arc::new(RemoteTimelineClientMetrics::new( - &harness.tenant_id, - &TIMELINE_ID, - )), - }); + entered_runtime: _entered_runtime, + harness, + tenant: _tenant, + tenant_ctx: _tenant_ctx, + remote_fs_dir, + client, + } = TestSetup::new("upload_scheduling").unwrap(); + + let timeline_path = harness.timeline_path(&TIMELINE_ID); + + println!("workdir: {}", harness.conf.workdir.display()); let remote_timeline_dir = remote_fs_dir.join(timeline_path.strip_prefix(&harness.conf.workdir)?); @@ -1216,4 +1293,90 @@ mod tests { Ok(()) } + + #[test] + fn bytes_unfinished_gauge_for_layer_file_uploads() -> anyhow::Result<()> { + // Setup + + let TestSetup { + runtime, + harness, + client, + .. + } = TestSetup::new("metrics")?; + + let metadata = dummy_metadata(Lsn(0x10)); + client.init_upload_queue_for_empty_remote(&metadata)?; + + let timeline_path = harness.timeline_path(&TIMELINE_ID); + + let layer_file_name_1: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(); + let content_1 = dummy_contents("foo"); + std::fs::write( + timeline_path.join(layer_file_name_1.file_name()), + &content_1, + )?; + + #[derive(Debug, PartialEq)] + struct BytesStartedFinished { + started: Option, + finished: Option, + } + let get_bytes_started_stopped = || { + let started = client + .metrics + .get_bytes_started_counter_value(&RemoteOpFileKind::Layer, &RemoteOpKind::Upload) + .map(|v| v.try_into().unwrap()); + let stopped = client + .metrics + .get_bytes_finished_counter_value(&RemoteOpFileKind::Layer, &RemoteOpKind::Upload) + .map(|v| v.try_into().unwrap()); + BytesStartedFinished { + started, + finished: stopped, + } + }; + + // Test + + let init = get_bytes_started_stopped(); + + client.schedule_layer_file_upload( + &layer_file_name_1, + &LayerFileMetadata::new(content_1.len() as u64), + )?; + + let pre = get_bytes_started_stopped(); + + runtime.block_on(client.wait_completion())?; + + let post = get_bytes_started_stopped(); + + // Validate + + assert_eq!( + init, + BytesStartedFinished { + started: None, + finished: None + } + ); + assert_eq!( + pre, + BytesStartedFinished { + started: Some(content_1.len()), + // assert that the _finished metric is created eagerly so that subtractions work on first sample + finished: Some(0), + } + ); + assert_eq!( + post, + BytesStartedFinished { + started: Some(content_1.len()), + finished: Some(content_1.len()) + } + ); + + Ok(()) + } } diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index c88b985c8e..5fed6fcf84 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -45,6 +45,8 @@ PAGESERVER_PER_TENANT_REMOTE_TIMELINE_CLIENT_METRICS: Tuple[str, ...] = ( *[f"pageserver_remote_timeline_client_calls_started_{x}" for x in ["bucket", "count", "sum"]], *[f"pageserver_remote_operation_seconds_{x}" for x in ["bucket", "count", "sum"]], "pageserver_remote_physical_size", + "pageserver_remote_timeline_client_bytes_started_total", + "pageserver_remote_timeline_client_bytes_finished_total", ) PAGESERVER_GLOBAL_METRICS: Tuple[str, ...] = ( From cb9473928df94148b42297fe30b0b99682609249 Mon Sep 17 00:00:00 2001 From: Joonas Koivunen Date: Tue, 25 Apr 2023 16:22:16 +0300 Subject: [PATCH 46/77] feat: add rough timings for basebackup (#4062) just record the time needed for waiting the lsn and then the basebackup in a log message in millis. this is related to ongoing investigations to cold start performance. this could also be a a counter. it cannot be added next to smgr histograms, because we don't want another histogram per timeline. the aim is to allow drilling deeper into which timelines were slow, and to understand why some need two basebackups. --- pageserver/src/page_service.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index bd38a7a2f3..135f08e846 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -700,6 +700,8 @@ impl PageServerHandler { full_backup: bool, ctx: RequestContext, ) -> anyhow::Result<()> { + let started = std::time::Instant::now(); + // check that the timeline exists let timeline = get_active_tenant_timeline(tenant_id, timeline_id, &ctx).await?; let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); @@ -712,6 +714,8 @@ impl PageServerHandler { .context("invalid basebackup lsn")?; } + let lsn_awaited_after = started.elapsed(); + // switch client to COPYOUT pgb.write_message_noflush(&BeMessage::CopyOutResponse)?; pgb.flush().await?; @@ -732,7 +736,17 @@ impl PageServerHandler { pgb.write_message_noflush(&BeMessage::CopyDone)?; pgb.flush().await?; - info!("basebackup complete"); + + let basebackup_after = started + .elapsed() + .checked_sub(lsn_awaited_after) + .unwrap_or(Duration::ZERO); + + info!( + lsn_await_millis = lsn_awaited_after.as_millis(), + basebackup_millis = basebackup_after.as_millis(), + "basebackup complete" + ); Ok(()) } From dbbe032c395f7f4a8a13e4e4631adb801a09c1bd Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Tue, 25 Apr 2023 15:33:30 +0200 Subject: [PATCH 47/77] neon_local: fix `tenant create -c eviction_policy:...` (#4004) And add corresponding unit test. The fix is to use `.remove()` instead of `.get()` when processing the arugments hash map. The code uses emptiness of the hash map to determine whether all arguments have been processed. This was likely a copy-paste error. refs https://github.com/neondatabase/neon/issues/3942 --- control_plane/src/pageserver.rs | 4 ++-- test_runner/regress/test_tenant_conf.py | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs index b700d426ba..75991045a4 100644 --- a/control_plane/src/pageserver.rs +++ b/control_plane/src/pageserver.rs @@ -359,8 +359,8 @@ impl PageServerNode { .transpose() .context("Failed to parse 'trace_read_requests' as bool")?, eviction_policy: settings - .get("eviction_policy") - .map(|x| serde_json::from_str(x)) + .remove("eviction_policy") + .map(serde_json::from_str) .transpose() .context("Failed to parse 'eviction_policy' json")?, min_resident_size_override: settings diff --git a/test_runner/regress/test_tenant_conf.py b/test_runner/regress/test_tenant_conf.py index 1ed86d19a2..b83bd5fc99 100644 --- a/test_runner/regress/test_tenant_conf.py +++ b/test_runner/regress/test_tenant_conf.py @@ -1,3 +1,4 @@ +import json from contextlib import closing import psycopg2.extras @@ -22,6 +23,7 @@ wait_lsn_timeout='111 s'; checkpoint_distance = 10000 compaction_target_size = 1048576 evictions_low_residence_duration_metric_threshold = "2 days" +eviction_policy = { "kind" = "LayerAccessThreshold", period = "20s", threshold = "23 hours" } """ env = neon_env_builder.init_start() @@ -44,6 +46,7 @@ evictions_low_residence_duration_metric_threshold = "2 days" "checkpoint_distance": "20000", "gc_period": "30sec", "evictions_low_residence_duration_metric_threshold": "42s", + "eviction_policy": json.dumps({"kind": "NoEviction"}), } tenant, _ = env.neon_cli.create_tenant(conf=new_conf) @@ -84,6 +87,11 @@ evictions_low_residence_duration_metric_threshold = "2 days" assert effective_config["image_creation_threshold"] == 3 assert effective_config["pitr_interval"] == "7days" assert effective_config["evictions_low_residence_duration_metric_threshold"] == "2days" + assert effective_config["eviction_policy"] == { + "kind": "LayerAccessThreshold", + "period": "20s", + "threshold": "23h", + } # check the configuration of the new tenant with closing(env.pageserver.connect()) as psconn: @@ -121,6 +129,9 @@ evictions_low_residence_duration_metric_threshold = "2 days" assert ( new_effective_config["evictions_low_residence_duration_metric_threshold"] == "42s" ), "Should override default value" + assert new_effective_config["eviction_policy"] == { + "kind": "NoEviction" + }, "Specific 'eviction_policy' config should override the default value" assert new_effective_config["compaction_target_size"] == 1048576 assert new_effective_config["compaction_period"] == "20s" assert new_effective_config["compaction_threshold"] == 10 @@ -135,6 +146,9 @@ evictions_low_residence_duration_metric_threshold = "2 days" "compaction_period": "80sec", "image_creation_threshold": "2", "evictions_low_residence_duration_metric_threshold": "23h", + "eviction_policy": json.dumps( + {"kind": "LayerAccessThreshold", "period": "80s", "threshold": "42h"} + ), } env.neon_cli.config_tenant( tenant_id=tenant, @@ -180,6 +194,11 @@ evictions_low_residence_duration_metric_threshold = "2 days" assert ( updated_effective_config["evictions_low_residence_duration_metric_threshold"] == "23h" ), "Should override default value" + assert updated_effective_config["eviction_policy"] == { + "kind": "LayerAccessThreshold", + "period": "1m 20s", + "threshold": "1day 18h", + }, "Specific 'eviction_policy' config should override the default value" assert updated_effective_config["compaction_target_size"] == 1048576 assert updated_effective_config["compaction_threshold"] == 10 assert updated_effective_config["gc_horizon"] == 67108864 @@ -239,6 +258,11 @@ evictions_low_residence_duration_metric_threshold = "2 days" assert final_effective_config["gc_period"] == "1h" assert final_effective_config["image_creation_threshold"] == 3 assert final_effective_config["evictions_low_residence_duration_metric_threshold"] == "2days" + assert final_effective_config["eviction_policy"] == { + "kind": "LayerAccessThreshold", + "period": "20s", + "threshold": "23h", + } # restart the pageserver and ensure that the config is still correct env.pageserver.stop() From 78bbbccadbc66bef6715a5a2ad1324ccacb94587 Mon Sep 17 00:00:00 2001 From: Sergey Melnikov Date: Tue, 25 Apr 2023 16:46:52 +0200 Subject: [PATCH 48/77] Deploy proxies for preview enviroments (#4052) ## Describe your changes Deploy `main` proxies to the preview environments We don't deploy storage there yet, as it's tricky. ## Issue ticket number and link https://github.com/neondatabase/cloud/issues/4737 --- .../ansible/staging.eu-central-1.hosts.yaml | 47 +++++++++++++ ...u-central-1-alpha.neon-storage-broker.yaml | 52 ++++++++++++++ .../preview-template.neon-proxy-scram.yaml | 67 +++++++++++++++++++ .github/workflows/deploy-dev.yml | 52 +++++++++++++- 4 files changed, 217 insertions(+), 1 deletion(-) create mode 100644 .github/ansible/staging.eu-central-1.hosts.yaml create mode 100644 .github/helm-values/dev-eu-central-1-alpha.neon-storage-broker.yaml create mode 100644 .github/helm-values/preview-template.neon-proxy-scram.yaml diff --git a/.github/ansible/staging.eu-central-1.hosts.yaml b/.github/ansible/staging.eu-central-1.hosts.yaml new file mode 100644 index 0000000000..db1d1adcff --- /dev/null +++ b/.github/ansible/staging.eu-central-1.hosts.yaml @@ -0,0 +1,47 @@ +storage: + vars: + bucket_name: neon-dev-storage-eu-central-1 + bucket_region: eu-central-1 + # We only register/update storage in one preview console and manually copy to other instances + console_mgmt_base_url: http://neon-internal-api.helium.aws.neon.build + broker_endpoint: http://storage-broker-lb.alpha.eu-central-1.internal.aws.neon.build:50051 + pageserver_config_stub: + pg_distrib_dir: /usr/local + metric_collection_endpoint: http://neon-internal-api.helium.aws.neon.build/billing/api/v1/usage_events + metric_collection_interval: 10min + disk_usage_based_eviction: + max_usage_pct: 80 + min_avail_bytes: 0 + period: "10s" + tenant_config: + eviction_policy: + kind: "LayerAccessThreshold" + period: "20m" + threshold: &default_eviction_threshold "20m" + evictions_low_residence_duration_metric_threshold: *default_eviction_threshold + remote_storage: + bucket_name: "{{ bucket_name }}" + bucket_region: "{{ bucket_region }}" + prefix_in_bucket: "pageserver/v1" + safekeeper_s3_prefix: safekeeper/v1/wal + hostname_suffix: "" + remote_user: ssm-user + ansible_aws_ssm_region: eu-central-1 + ansible_aws_ssm_bucket_name: neon-dev-storage-eu-central-1 + console_region_id: aws-eu-central-1 + sentry_environment: staging + + children: + pageservers: + hosts: + pageserver-0.eu-central-1.aws.neon.build: + ansible_host: i-011f93ec26cfba2d4 + + safekeepers: + hosts: + safekeeper-0.eu-central-1.aws.neon.build: + ansible_host: i-0ff026d27babf8ddd + safekeeper-1.eu-central-1.aws.neon.build: + ansible_host: i-03983a49ee54725d9 + safekeeper-2.eu-central-1.aws.neon.build: + ansible_host: i-0bd025ecdb61b0db3 diff --git a/.github/helm-values/dev-eu-central-1-alpha.neon-storage-broker.yaml b/.github/helm-values/dev-eu-central-1-alpha.neon-storage-broker.yaml new file mode 100644 index 0000000000..aaa1ec59b4 --- /dev/null +++ b/.github/helm-values/dev-eu-central-1-alpha.neon-storage-broker.yaml @@ -0,0 +1,52 @@ +# Helm chart values for neon-storage-broker +podLabels: + neon_env: staging + neon_service: storage-broker + +# Use L4 LB +service: + # service.annotations -- Annotations to add to the service + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller + service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip + service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet + # assign service to this name at external-dns + external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.alpha.eu-central-1.internal.aws.neon.build + # service.type -- Service type + type: LoadBalancer + # service.port -- broker listen port + port: 50051 + +ingress: + enabled: false + +metrics: + enabled: false + +extraManifests: + - apiVersion: operator.victoriametrics.com/v1beta1 + kind: VMServiceScrape + metadata: + name: "{{ include \"neon-storage-broker.fullname\" . }}" + labels: + helm.sh/chart: neon-storage-broker-{{ .Chart.Version }} + app.kubernetes.io/name: neon-storage-broker + app.kubernetes.io/instance: neon-storage-broker + app.kubernetes.io/version: "{{ .Chart.AppVersion }}" + app.kubernetes.io/managed-by: Helm + namespace: "{{ .Release.Namespace }}" + spec: + selector: + matchLabels: + app.kubernetes.io/name: "neon-storage-broker" + endpoints: + - port: broker + path: /metrics + interval: 10s + scrapeTimeout: 10s + namespaceSelector: + matchNames: + - "{{ .Release.Namespace }}" + +settings: + sentryEnvironment: "staging" diff --git a/.github/helm-values/preview-template.neon-proxy-scram.yaml b/.github/helm-values/preview-template.neon-proxy-scram.yaml new file mode 100644 index 0000000000..f4bd418e28 --- /dev/null +++ b/.github/helm-values/preview-template.neon-proxy-scram.yaml @@ -0,0 +1,67 @@ +# Helm chart values for neon-proxy-scram. +# This is a YAML-formatted file. + +deploymentStrategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 100% + maxUnavailable: 50% + +image: + repository: neondatabase/neon + +settings: + authBackend: "console" + authEndpoint: "http://neon-internal-api.${PREVIEW_NAME}.aws.neon.build/management/api/v2" + domain: "*.cloud.${PREVIEW_NAME}.aws.neon.build" + sentryEnvironment: "staging" + wssPort: 8443 + metricCollectionEndpoint: "http://neon-internal-api.${PREVIEW_NAME}.aws.neon.build/billing/api/v1/usage_events" + metricCollectionInterval: "1min" + +# -- Additional labels for neon-proxy pods +podLabels: + neon_service: proxy-scram + neon_env: test + neon_region: ${PREVIEW_NAME}.eu-central-1 + + +exposedService: + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: external + service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip + service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing + external-dns.alpha.kubernetes.io/hostname: cloud.${PREVIEW_NAME}.aws.neon.build + httpsPort: 443 + +#metrics: +# enabled: true +# serviceMonitor: +# enabled: true +# selector: +# release: kube-prometheus-stack + +extraManifests: + - apiVersion: operator.victoriametrics.com/v1beta1 + kind: VMServiceScrape + metadata: + name: "{{ include \"neon-proxy.fullname\" . }}" + labels: + helm.sh/chart: neon-proxy-{{ .Chart.Version }} + app.kubernetes.io/name: neon-proxy + app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}" + app.kubernetes.io/version: "{{ .Chart.AppVersion }}" + app.kubernetes.io/managed-by: Helm + namespace: "{{ .Release.Namespace }}" + spec: + selector: + matchLabels: + app.kubernetes.io/name: "neon-proxy" + endpoints: + - port: http + path: /metrics + interval: 10s + scrapeTimeout: 10s + namespaceSelector: + matchNames: + - "{{ .Release.Namespace }}" diff --git a/.github/workflows/deploy-dev.yml b/.github/workflows/deploy-dev.yml index b080a29f7c..fba292f0f9 100644 --- a/.github/workflows/deploy-dev.yml +++ b/.github/workflows/deploy-dev.yml @@ -48,7 +48,8 @@ jobs: shell: bash strategy: matrix: - target_region: [ eu-west-1, us-east-2 ] + # TODO(sergey): Fix storage deploy in eu-central-1 + target_region: [ eu-west-1, us-east-2] environment: name: dev-${{ matrix.target_region }} steps: @@ -133,6 +134,53 @@ jobs: - name: Cleanup helm folder run: rm -rf ~/.cache + + deploy-preview-proxy-new: + runs-on: [ self-hosted, gen3, small ] + container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned + if: inputs.deployProxy + defaults: + run: + shell: bash + strategy: + matrix: + include: + - target_region: eu-central-1 + target_cluster: dev-eu-central-1-alpha + environment: + name: dev-${{ matrix.target_region }} + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + submodules: true + fetch-depth: 0 + ref: ${{ inputs.branch }} + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v1-node16 + with: + role-to-assume: arn:aws:iam::369495373322:role/github-runner + aws-region: eu-central-1 + role-skip-session-tagging: true + role-duration-seconds: 1800 + + - name: Configure environment + run: | + helm repo add neondatabase https://neondatabase.github.io/helm-charts + aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }} + + - name: Re-deploy preview proxies + run: | + DOCKER_TAG=${{ inputs.dockerTag }} + for PREVIEW_NAME in helium argon krypton xenon radon oganesson hydrogen nitrogen oxygen fluorine chlorine; do + export PREVIEW_NAME + envsubst <.github/helm-values/preview-template.neon-proxy-scram.yaml >preview-${PREVIEW_NAME}.neon-proxy-scram.yaml + helm upgrade neon-proxy-scram-${PREVIEW_NAME} neondatabase/neon-proxy --namespace neon-proxy-${PREVIEW_NAME} --create-namespace --install --atomic -f preview-${PREVIEW_NAME}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s + done + + - name: Cleanup helm folder + run: rm -rf ~/.cache deploy-storage-broker-new: runs-on: [ self-hosted, gen3, small ] @@ -148,6 +196,8 @@ jobs: target_cluster: dev-us-east-2-beta - target_region: eu-west-1 target_cluster: dev-eu-west-1-zeta + - target_region: eu-central-1 + target_cluster: dev-central-1-alpha environment: name: dev-${{ matrix.target_region }} steps: From 7f80230fd21cacfb20cae09befc7725abb9c0efe Mon Sep 17 00:00:00 2001 From: Joonas Koivunen Date: Tue, 25 Apr 2023 18:07:04 +0300 Subject: [PATCH 49/77] fix: stop dead_code rustc lint (#4070) only happens without `--all-features` which is what `./run_clippy.sh` uses. --- pageserver/src/http/routes.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 3318e5263c..b1251123b2 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -1201,6 +1201,7 @@ async fn handler_404(_: Request) -> Result, ApiError> { ) } +#[cfg(feature = "testing")] async fn post_tracing_event_handler(mut r: Request) -> Result, ApiError> { #[derive(Debug, serde::Deserialize)] #[serde(rename_all = "lowercase")] From bfd45dd6713a2e9038954cf0368b1a082937b045 Mon Sep 17 00:00:00 2001 From: Joonas Koivunen Date: Tue, 25 Apr 2023 18:41:09 +0300 Subject: [PATCH 50/77] test_tenant_config: allow ERROR from eviction task (#4074) --- test_runner/regress/test_tenant_conf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test_runner/regress/test_tenant_conf.py b/test_runner/regress/test_tenant_conf.py index b83bd5fc99..8677a554f7 100644 --- a/test_runner/regress/test_tenant_conf.py +++ b/test_runner/regress/test_tenant_conf.py @@ -27,6 +27,8 @@ eviction_policy = { "kind" = "LayerAccessThreshold", period = "20s", threshold = """ env = neon_env_builder.init_start() + # we configure eviction but no remote storage, there might be error lines + env.pageserver.allowed_errors.append(".* no remote storage configured, cannot evict layers .*") http_client = env.pageserver.http_client() # Check that we raise on misspelled configs From 05ac0e2493ce18992aab525c5c7419b954c1649a Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Tue, 25 Apr 2023 17:54:10 +0100 Subject: [PATCH 51/77] Login to ECR and Docker Hub at once (#4067) - Update kaniko to 1.9.2 (from 1.7.0), problem with reproducible build is fixed - Login to ECR and Docker Hub at once, so we can push to several registries, it makes job `push-docker-hub` unneeded - `push-docker-hub` replaced with `promote-images` in `needs:` clause, Pushing images to production ECR moved to `promote-images` job --- .github/workflows/build_and_test.yml | 216 ++++++++++++++------------- 1 file changed, 115 insertions(+), 101 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 3212b76731..bdcf2463bc 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -541,7 +541,7 @@ jobs: container: image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned options: --init - needs: [ push-docker-hub, tag ] + needs: [ promote-images, tag ] steps: - name: Set PR's status to pending and request a remote CI test run: | @@ -584,8 +584,7 @@ jobs: neon-image: runs-on: [ self-hosted, gen3, large ] needs: [ tag ] - # https://github.com/GoogleContainerTools/kaniko/issues/2005 - container: gcr.io/kaniko-project/executor:v1.7.0-debug + container: gcr.io/kaniko-project/executor:v1.9.2-debug defaults: run: shell: sh -eu {0} @@ -597,11 +596,32 @@ jobs: submodules: true fetch-depth: 0 - - name: Configure ECR login - run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json + - name: Configure ECR and Docker Hub login + run: | + DOCKERHUB_AUTH=$(echo -n "${{ secrets.NEON_DOCKERHUB_USERNAME }}:${{ secrets.NEON_DOCKERHUB_PASSWORD }}" | base64) + echo "::add-mask::${DOCKERHUB_AUTH}" + + cat <<-EOF > /kaniko/.docker/config.json + { + "auths": { + "https://index.docker.io/v1/": { + "auth": "${DOCKERHUB_AUTH}" + } + }, + "credHelpers": { + "369495373322.dkr.ecr.eu-central-1.amazonaws.com": "ecr-login" + } + } + EOF - name: Kaniko build neon - run: /kaniko/executor --reproducible --snapshotMode=redo --skip-unused-stages --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} + run: + /kaniko/executor --reproducible --snapshot-mode=redo --skip-unused-stages --cache=true + --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache + --context . + --build-arg GIT_VERSION=${{ github.sha }} + --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} + --destination neondatabase/neon:${{needs.tag.outputs.build-tag}} # Cleanup script fails otherwise - rm: cannot remove '/nvme/actions-runner/_work/_temp/_github_home/.ecr': Permission denied - name: Cleanup ECR folder @@ -652,7 +672,7 @@ jobs: compute-tools-image: runs-on: [ self-hosted, gen3, large ] needs: [ tag ] - container: gcr.io/kaniko-project/executor:v1.7.0-debug + container: gcr.io/kaniko-project/executor:v1.9.2-debug defaults: run: shell: sh -eu {0} @@ -661,18 +681,41 @@ jobs: - name: Checkout uses: actions/checkout@v1 # v3 won't work with kaniko - - name: Configure ECR login - run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json + - name: Configure ECR and Docker Hub login + run: | + DOCKERHUB_AUTH=$(echo -n "${{ secrets.NEON_DOCKERHUB_USERNAME }}:${{ secrets.NEON_DOCKERHUB_PASSWORD }}" | base64) + echo "::add-mask::${DOCKERHUB_AUTH}" + + cat <<-EOF > /kaniko/.docker/config.json + { + "auths": { + "https://index.docker.io/v1/": { + "auth": "${DOCKERHUB_AUTH}" + } + }, + "credHelpers": { + "369495373322.dkr.ecr.eu-central-1.amazonaws.com": "ecr-login" + } + } + EOF - name: Kaniko build compute tools - run: /kaniko/executor --reproducible --snapshotMode=redo --skip-unused-stages --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} + run: + /kaniko/executor --reproducible --snapshot-mode=redo --skip-unused-stages --cache=true + --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache + --context . + --build-arg GIT_VERSION=${{ github.sha }} + --dockerfile Dockerfile.compute-tools + --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} + --destination neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} + # Cleanup script fails otherwise - rm: cannot remove '/nvme/actions-runner/_work/_temp/_github_home/.ecr': Permission denied - name: Cleanup ECR folder run: rm -rf ~/.ecr compute-node-image: runs-on: [ self-hosted, gen3, large ] - container: gcr.io/kaniko-project/executor:v1.7.0-debug + container: gcr.io/kaniko-project/executor:v1.9.2-debug needs: [ tag ] strategy: fail-fast: false @@ -689,12 +732,36 @@ jobs: submodules: true fetch-depth: 0 - - name: Configure ECR login - run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json + - name: Configure ECR and Docker Hub login + run: | + DOCKERHUB_AUTH=$(echo -n "${{ secrets.NEON_DOCKERHUB_USERNAME }}:${{ secrets.NEON_DOCKERHUB_PASSWORD }}" | base64) + echo "::add-mask::${DOCKERHUB_AUTH}" + + cat <<-EOF > /kaniko/.docker/config.json + { + "auths": { + "https://index.docker.io/v1/": { + "auth": "${DOCKERHUB_AUTH}" + } + }, + "credHelpers": { + "369495373322.dkr.ecr.eu-central-1.amazonaws.com": "ecr-login" + } + } + EOF - name: Kaniko build compute node with extensions - run: /kaniko/executor --reproducible --snapshotMode=redo --skip-unused-stages --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --build-arg PG_VERSION=${{ matrix.version }} --dockerfile Dockerfile.compute-node --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} + run: + /kaniko/executor --reproducible --snapshot-mode=redo --skip-unused-stages --cache=true + --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache + --context . + --build-arg GIT_VERSION=${{ github.sha }} + --build-arg PG_VERSION=${{ matrix.version }} + --dockerfile Dockerfile.compute-node + --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} + --destination neondatabase/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} + # Cleanup script fails otherwise - rm: cannot remove '/nvme/actions-runner/_work/_temp/_github_home/.ecr': Permission denied - name: Cleanup ECR folder run: rm -rf ~/.ecr @@ -786,26 +853,45 @@ jobs: runs-on: [ self-hosted, gen3, small ] needs: [ tag, test-images, vm-compute-node-image ] container: golang:1.19-bullseye - if: github.event_name != 'workflow_dispatch' + # Don't add if-condition here. + # The job should always be run because we have dependant other jobs that shouldn't be skipped steps: - name: Install Crane & ECR helper if: | (github.ref_name == 'main' || github.ref_name == 'release') && - github.event_name != 'workflow_dispatch' + github.event_name != 'workflow_dispatch' run: | go install github.com/google/go-containerregistry/cmd/crane@31786c6cbb82d6ec4fb8eb79cd9387905130534e # v0.11.0 go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@69c85dc22db6511932bbf119e1a0cc5c90c69a7f # v0.6.0 - - name: Configure ECR login + - name: Configure ECR and Docker Hub login + if: | + (github.ref_name == 'main' || github.ref_name == 'release') && + github.event_name != 'workflow_dispatch' run: | + DOCKERHUB_AUTH=$(echo -n "${{ secrets.NEON_DOCKERHUB_USERNAME }}:${{ secrets.NEON_DOCKERHUB_PASSWORD }}" | base64) + echo "::add-mask::${DOCKERHUB_AUTH}" + mkdir /github/home/.docker/ - echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json + cat <<-EOF > /github/home/.docker/config.json + { + "auths": { + "https://index.docker.io/v1/": { + "auth": "${DOCKERHUB_AUTH}" + } + }, + "credHelpers": { + "369495373322.dkr.ecr.eu-central-1.amazonaws.com": "ecr-login", + "093970136003.dkr.ecr.eu-central-1.amazonaws.com": "ecr-login" + } + } + EOF - name: Add latest tag to images if: | (github.ref_name == 'main' || github.ref_name == 'release') && - github.event_name != 'workflow_dispatch' + github.event_name != 'workflow_dispatch' run: | crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} latest crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} latest @@ -814,50 +900,17 @@ jobs: crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} latest crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest - - name: Cleanup ECR folder - run: rm -rf ~/.ecr - - push-docker-hub: - runs-on: [ self-hosted, dev, x64 ] - needs: [ promote-images, tag ] - container: golang:1.19-bullseye - - steps: - - name: Install Crane & ECR helper - run: | - go install github.com/google/go-containerregistry/cmd/crane@31786c6cbb82d6ec4fb8eb79cd9387905130534e # v0.11.0 - go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@69c85dc22db6511932bbf119e1a0cc5c90c69a7f # v0.6.0 - - - name: Configure ECR login - run: | - mkdir /github/home/.docker/ - echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json - - - name: Pull neon image from ECR - run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} neon - - - name: Pull compute tools image from ECR - run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} compute-tools - - - name: Pull compute node v14 image from ECR - run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} compute-node-v14 - - - name: Pull vm compute node v14 image from ECR - run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} vm-compute-node-v14 - - - name: Pull compute node v15 image from ECR - run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} compute-node-v15 - - - name: Pull vm compute node v15 image from ECR - run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} vm-compute-node-v15 - - - name: Pull rust image from ECR - run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned rust + crane tag neondatabase/neon:${{needs.tag.outputs.build-tag}} latest + crane tag neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} latest + crane tag neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}} latest + crane tag neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} latest + crane tag neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}} latest + crane tag neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest - name: Push images to production ECR if: | (github.ref_name == 'main' || github.ref_name == 'release') && - github.event_name != 'workflow_dispatch' + github.event_name != 'workflow_dispatch' run: | crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/neon:latest crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:latest @@ -866,45 +919,6 @@ jobs: crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:latest crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:latest - - name: Configure Docker Hub login - run: | - # ECR Credential Helper & Docker Hub don't work together in config, hence reset - echo "" > /github/home/.docker/config.json - crane auth login -u ${{ secrets.NEON_DOCKERHUB_USERNAME }} -p ${{ secrets.NEON_DOCKERHUB_PASSWORD }} index.docker.io - - - name: Push neon image to Docker Hub - run: crane push neon neondatabase/neon:${{needs.tag.outputs.build-tag}} - - - name: Push compute tools image to Docker Hub - run: crane push compute-tools neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} - - - name: Push compute node v14 image to Docker Hub - run: crane push compute-node-v14 neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}} - - - name: Push vm compute node v14 image to Docker Hub - run: crane push vm-compute-node-v14 neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} - - - name: Push compute node v15 image to Docker Hub - run: crane push compute-node-v15 neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}} - - - name: Push vm compute node v15 image to Docker Hub - run: crane push vm-compute-node-v15 neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} - - - name: Push rust image to Docker Hub - run: crane push rust neondatabase/rust:pinned - - - name: Add latest tag to images in Docker Hub - if: | - (github.ref_name == 'main' || github.ref_name == 'release') && - github.event_name != 'workflow_dispatch' - run: | - crane tag neondatabase/neon:${{needs.tag.outputs.build-tag}} latest - crane tag neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} latest - crane tag neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}} latest - crane tag neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} latest - crane tag neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}} latest - crane tag neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest - - name: Cleanup ECR folder run: rm -rf ~/.ecr @@ -913,7 +927,7 @@ jobs: container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned # We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version. # If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly - needs: [ push-docker-hub, tag, regress-tests ] + needs: [ promote-images, tag, regress-tests ] if: | contains(github.event.pull_request.labels.*.name, 'deploy-test-storage') && github.event_name != 'workflow_dispatch' @@ -947,7 +961,7 @@ jobs: deploy: runs-on: [ self-hosted, gen3, small ] container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest - needs: [ push-docker-hub, tag, regress-tests ] + needs: [ promote-images, tag, regress-tests ] if: ( github.ref_name == 'main' || github.ref_name == 'release' ) && github.event_name != 'workflow_dispatch' steps: - name: Fix git ownership @@ -984,7 +998,7 @@ jobs: container: image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned options: --init - needs: [ push-docker-hub, tag, regress-tests ] + needs: [ promote-images, tag, regress-tests ] if: github.ref_name == 'release' && github.event_name != 'workflow_dispatch' steps: - name: Promote compatibility snapshot for the release From 8945fbdb31d9d28aa88194153b56eee6e4a39605 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Tue, 25 Apr 2023 20:45:36 +0300 Subject: [PATCH 52/77] Enable OpenTelemetry tracing in proxy in staging. (#4065) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Depends on https://github.com/neondatabase/helm-charts/pull/32 Co-authored-by: Lassi Pölönen --- .github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml | 1 + .github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml | 1 + .../helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml | 1 + .github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml | 1 + 4 files changed, 4 insertions(+) diff --git a/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml b/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml index a8567665d3..a7d8587ec2 100644 --- a/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml +++ b/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml @@ -23,6 +23,7 @@ settings: authBackend: "console" authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2" domain: "*.eu-west-1.aws.neon.build" + otelExporterOtlpEndpoint: "https://otel-collector.zeta.eu-west-1.internal.aws.neon.build" sentryEnvironment: "staging" wssPort: 8443 metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events" diff --git a/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml b/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml index feca05aff6..893e0fab10 100644 --- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml +++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml @@ -9,6 +9,7 @@ settings: authEndpoint: "https://console.stage.neon.tech/authenticate_proxy_request/" uri: "https://console.stage.neon.tech/psql_session/" domain: "pg.neon.build" + otelExporterOtlpEndpoint: "https://otel-collector.beta.us-east-2.internal.aws.neon.build" sentryEnvironment: "staging" metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events" metricCollectionInterval: "1min" diff --git a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml index 46cfdd2e69..77f6cf080e 100644 --- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml +++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml @@ -24,6 +24,7 @@ settings: authBackend: "console" authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2" domain: "*.cloud.stage.neon.tech" + otelExporterOtlpEndpoint: "https://otel-collector.beta.us-east-2.internal.aws.neon.build" sentryEnvironment: "staging" wssPort: 8443 metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events" diff --git a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml index fdd869c122..2510d624cd 100644 --- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml +++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml @@ -25,6 +25,7 @@ settings: authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2" domain: "*.us-east-2.aws.neon.build" extraDomains: ["*.us-east-2.postgres.zenith.tech", "*.us-east-2.retooldb-staging.com"] + otelExporterOtlpEndpoint: "https://otel-collector.beta.us-east-2.internal.aws.neon.build" sentryEnvironment: "staging" wssPort: 8443 metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events" From 2d6fd72177c89645b2b718880796a6e04ff4ebfa Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Tue, 25 Apr 2023 23:58:59 +0100 Subject: [PATCH 53/77] GitHub Workflows: Fix crane for several registries (#4076) Follow-up fix after https://github.com/neondatabase/neon/pull/4067 ``` + crane tag neondatabase/vm-compute-node-v14:3064 latest Error: fetching "neondatabase/vm-compute-node-v14:3064": GET https://index.docker.io/v2/neondatabase/vm-compute-node-v14/manifests/3064: MANIFEST_UNKNOWN: manifest unknown; unknown tag=3064 ``` I reverted back the previous approach for promoting images (login to one registry, save images to local fs, logout and login to another registry, and push images from local fs). It turns out what works for one Google project (kaniko), doesn't work for another (crane) [sigh] --- .github/workflows/build_and_test.yml | 60 ++++++++++++++-------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index bdcf2463bc..15a6a611b1 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -858,35 +858,19 @@ jobs: steps: - name: Install Crane & ECR helper - if: | - (github.ref_name == 'main' || github.ref_name == 'release') && - github.event_name != 'workflow_dispatch' run: | go install github.com/google/go-containerregistry/cmd/crane@31786c6cbb82d6ec4fb8eb79cd9387905130534e # v0.11.0 go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@69c85dc22db6511932bbf119e1a0cc5c90c69a7f # v0.6.0 - - name: Configure ECR and Docker Hub login - if: | - (github.ref_name == 'main' || github.ref_name == 'release') && - github.event_name != 'workflow_dispatch' + - name: Configure ECR login run: | - DOCKERHUB_AUTH=$(echo -n "${{ secrets.NEON_DOCKERHUB_USERNAME }}:${{ secrets.NEON_DOCKERHUB_PASSWORD }}" | base64) - echo "::add-mask::${DOCKERHUB_AUTH}" - mkdir /github/home/.docker/ - cat <<-EOF > /github/home/.docker/config.json - { - "auths": { - "https://index.docker.io/v1/": { - "auth": "${DOCKERHUB_AUTH}" - } - }, - "credHelpers": { - "369495373322.dkr.ecr.eu-central-1.amazonaws.com": "ecr-login", - "093970136003.dkr.ecr.eu-central-1.amazonaws.com": "ecr-login" - } - } - EOF + echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json + + - name: Copy vm-compute-node images to Docker Hub + run: | + crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} vm-compute-node-v14 + crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} vm-compute-node-v15 - name: Add latest tag to images if: | @@ -900,13 +884,6 @@ jobs: crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} latest crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest - crane tag neondatabase/neon:${{needs.tag.outputs.build-tag}} latest - crane tag neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} latest - crane tag neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}} latest - crane tag neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} latest - crane tag neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}} latest - crane tag neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest - - name: Push images to production ECR if: | (github.ref_name == 'main' || github.ref_name == 'release') && @@ -919,6 +896,29 @@ jobs: crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:latest crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:latest + - name: Configure Docker Hub login + run: | + # ECR Credential Helper & Docker Hub don't work together in config, hence reset + echo "" > /github/home/.docker/config.json + crane auth login -u ${{ secrets.NEON_DOCKERHUB_USERNAME }} -p ${{ secrets.NEON_DOCKERHUB_PASSWORD }} index.docker.io + + - name: Push vm-compute-node to Docker Hub + run: | + crane push vm-compute-node-v14 neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} + crane push vm-compute-node-v15 neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} + + - name: Push latest tags to Docker Hub + if: | + (github.ref_name == 'main' || github.ref_name == 'release') && + github.event_name != 'workflow_dispatch' + run: | + crane tag neondatabase/neon:${{needs.tag.outputs.build-tag}} latest + crane tag neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} latest + crane tag neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}} latest + crane tag neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} latest + crane tag neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}} latest + crane tag neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest + - name: Cleanup ECR folder run: rm -rf ~/.ecr From 9d0cf08d5f26ba63691335f7169409454e3e608f Mon Sep 17 00:00:00 2001 From: Sergey Melnikov Date: Wed, 26 Apr 2023 09:29:44 +0200 Subject: [PATCH 54/77] Fix new storage-broker deploy for eu-central-1 (#4079) --- .github/workflows/deploy-dev.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy-dev.yml b/.github/workflows/deploy-dev.yml index fba292f0f9..5d1c6e0e16 100644 --- a/.github/workflows/deploy-dev.yml +++ b/.github/workflows/deploy-dev.yml @@ -197,7 +197,7 @@ jobs: - target_region: eu-west-1 target_cluster: dev-eu-west-1-zeta - target_region: eu-central-1 - target_cluster: dev-central-1-alpha + target_cluster: dev-eu-central-1-alpha environment: name: dev-${{ matrix.target_region }} steps: From f19b70b379f426bc48fe692f368dab94f4a6af25 Mon Sep 17 00:00:00 2001 From: Sergey Melnikov Date: Wed, 26 Apr 2023 09:36:26 +0200 Subject: [PATCH 55/77] Configure extra domain for us-east-1 (#4078) --- .../helm-values/prod-us-east-1-theta.neon-proxy-scram.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/helm-values/prod-us-east-1-theta.neon-proxy-scram.yaml b/.github/helm-values/prod-us-east-1-theta.neon-proxy-scram.yaml index f113d1f861..1c7e646810 100644 --- a/.github/helm-values/prod-us-east-1-theta.neon-proxy-scram.yaml +++ b/.github/helm-values/prod-us-east-1-theta.neon-proxy-scram.yaml @@ -23,8 +23,8 @@ settings: authBackend: "console" authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2" domain: "*.us-east-1.aws.neon.tech" - # These domains haven't been delegated yet. - # extraDomains: ["*.us-east-1.retooldb.com", "*.us-east-1.postgres.vercel-storage.com"] + # *.us-east-1.retooldb.com hasn't been delegated yet. + extraDomains: ["*.us-east-1.postgres.vercel-storage.com"] sentryEnvironment: "production" wssPort: 8443 metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events" From 850f6b1cb9baae004a879027d91858237546c56f Mon Sep 17 00:00:00 2001 From: Joonas Koivunen Date: Wed, 26 Apr 2023 11:49:29 +0300 Subject: [PATCH 56/77] refactor: drop pageserver_ondisk_layers (#4071) I didn't get through #3775 fast enough so we wanted to remove this metric. Fixes #3705. --- pageserver/src/metrics.rs | 11 +++-------- pageserver/src/tenant/layer_map.rs | 4 ---- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index cf60a1a404..d6978a8cf6 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -1,9 +1,9 @@ use metrics::core::{AtomicU64, GenericCounter}; use metrics::{ register_counter_vec, register_histogram, register_histogram_vec, register_int_counter, - register_int_counter_vec, register_int_gauge, register_int_gauge_vec, register_uint_gauge_vec, - Counter, CounterVec, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, - UIntGauge, UIntGaugeVec, + register_int_counter_vec, register_int_gauge_vec, register_uint_gauge_vec, Counter, CounterVec, + Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, + UIntGaugeVec, }; use once_cell::sync::Lazy; use pageserver_api::models::TenantState; @@ -350,11 +350,6 @@ pub static LIVE_CONNECTIONS_COUNT: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); -pub static NUM_ONDISK_LAYERS: Lazy = Lazy::new(|| { - register_int_gauge!("pageserver_ondisk_layers", "Number of layers on-disk") - .expect("failed to define a metric") -}); - // remote storage metrics /// NB: increment _after_ recording the current value into [`REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST`]. diff --git a/pageserver/src/tenant/layer_map.rs b/pageserver/src/tenant/layer_map.rs index 02159ee291..0ee0c6f77d 100644 --- a/pageserver/src/tenant/layer_map.rs +++ b/pageserver/src/tenant/layer_map.rs @@ -48,7 +48,6 @@ mod layer_coverage; use crate::context::RequestContext; use crate::keyspace::KeyPartitioning; -use crate::metrics::NUM_ONDISK_LAYERS; use crate::repository::Key; use crate::tenant::storage_layer::InMemoryLayer; use crate::tenant::storage_layer::Layer; @@ -288,7 +287,6 @@ where self.l0_delta_layers.push(layer); } - NUM_ONDISK_LAYERS.inc(); Ok(()) } @@ -314,8 +312,6 @@ where "failed to locate removed historic layer from l0_delta_layers" ); } - - NUM_ONDISK_LAYERS.dec(); } pub(self) fn replace_historic_noflush( From 4625da316447a6bf8e345fefbfd30f860fb51074 Mon Sep 17 00:00:00 2001 From: Joonas Koivunen Date: Wed, 26 Apr 2023 12:07:45 +0300 Subject: [PATCH 57/77] build: remove busted sk-1.us-east-2 from staging hosts (#4082) this should give us complete deployments while a new one is being brought up. --- .github/ansible/staging.us-east-2.hosts.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/ansible/staging.us-east-2.hosts.yaml b/.github/ansible/staging.us-east-2.hosts.yaml index e63ed6e639..dacc5567c3 100644 --- a/.github/ansible/staging.us-east-2.hosts.yaml +++ b/.github/ansible/staging.us-east-2.hosts.yaml @@ -48,8 +48,6 @@ storage: hosts: safekeeper-0.us-east-2.aws.neon.build: ansible_host: i-027662bd552bf5db0 - safekeeper-1.us-east-2.aws.neon.build: - ansible_host: i-0171efc3604a7b907 safekeeper-2.us-east-2.aws.neon.build: ansible_host: i-0de0b03a51676a6ce safekeeper-99.us-east-2.aws.neon.build: From 381c8fca4f1d700ad5118800e6b2b3f9e33a07b5 Mon Sep 17 00:00:00 2001 From: Joonas Koivunen Date: Wed, 26 Apr 2023 12:39:17 +0300 Subject: [PATCH 58/77] feat: log how long tenant activation takes (#4080) Adds just a counter counting up from the creation to the tenant, logged after activation. Might help guide us with the investigation of #4025. --- pageserver/src/tenant.rs | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 11415b47c4..b5966b4618 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -118,6 +118,10 @@ pub struct Tenant { // Global pageserver config parameters pub conf: &'static PageServerConf, + /// The value creation timestamp, used to measure activation delay, see: + /// + loading_started_at: Instant, + state: watch::Sender, // Overridden tenant-specific config parameters. @@ -1476,7 +1480,7 @@ impl Tenant { TenantState::Loading | TenantState::Attaching => { *current_state = TenantState::Active; - info!("Activating tenant {}", self.tenant_id); + debug!(tenant_id = %self.tenant_id, "Activating tenant"); let timelines_accessor = self.timelines.lock().unwrap(); let not_broken_timelines = timelines_accessor @@ -1487,12 +1491,17 @@ impl Tenant { // down when they notice that the tenant is inactive. tasks::start_background_loops(self.tenant_id); + let mut activated_timelines = 0; + let mut timelines_broken_during_activation = 0; + for timeline in not_broken_timelines { match timeline .activate(ctx) .context("timeline activation for activating tenant") { - Ok(()) => {} + Ok(()) => { + activated_timelines += 1; + } Err(e) => { error!( "Failed to activate timeline {}: {:#}", @@ -1503,9 +1512,26 @@ impl Tenant { "failed to activate timeline {}: {}", timeline.timeline_id, e )); + + timelines_broken_during_activation += 1; } } } + + let elapsed = self.loading_started_at.elapsed(); + let total_timelines = timelines_accessor.len(); + + // log a lot of stuff, because some tenants sometimes suffer from user-visible + // times to activate. see https://github.com/neondatabase/neon/issues/4025 + info!( + since_creation_millis = elapsed.as_millis(), + tenant_id = %self.tenant_id, + activated_timelines, + timelines_broken_during_activation, + total_timelines, + post_state = <&'static str>::from(&*current_state), + "activation attempt finished" + ); } } }); @@ -1812,6 +1838,9 @@ impl Tenant { Tenant { tenant_id, conf, + // using now here is good enough approximation to catch tenants with really long + // activation times. + loading_started_at: Instant::now(), tenant_conf: Arc::new(RwLock::new(tenant_conf)), timelines: Mutex::new(HashMap::new()), gc_cs: tokio::sync::Mutex::new(()), From 31a3910fd9b60043651380d58771f97558f10771 Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Fri, 14 Apr 2023 12:59:08 +0400 Subject: [PATCH 59/77] Remove wait_for_sk_commit_lsn_to_reach_remote_storage. It had a couple of inherent races: 1) Even if compute is killed before the call, some more data might still arrive to safekeepers after commit_lsn on them is polled, advancing it. Then checkpoint on pageserver might not include this tail, and so upload of expected LSN won't happen until one more checkpoint. 2) commit_lsn is updated asynchronously -- compute can commit transaction before communicating commit_lsn to even single safekeeper (sync-safekeepers can be used to forces the advancement). This makes semantics of wait_for_sk_commit_lsn_to_reach_remote_storage quite complicated. Replace it with last_flush_lsn_upload which 1) Learns last flush LSN on compute; 2) Waits for it to arrive to pageserver; 3) Checkpoints it; 4) Waits for the upload. In some tests this keeps compute alive longer than before, but this doesn't seem to be important. There is a chance this fixes https://github.com/neondatabase/neon/issues/3209 --- test_runner/fixtures/neon_fixtures.py | 40 ++++++------------- test_runner/fixtures/pageserver/utils.py | 5 +-- test_runner/regress/test_layer_eviction.py | 9 ++--- test_runner/regress/test_ondemand_download.py | 11 ++--- .../test_tenants_with_remote_storage.py | 7 +--- 5 files changed, 23 insertions(+), 49 deletions(-) diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index c6610ba062..f209dca560 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -2928,32 +2928,18 @@ def fork_at_current_lsn( return env.neon_cli.create_branch(new_branch_name, ancestor_branch_name, tenant_id, current_lsn) -def wait_for_sk_commit_lsn_to_arrive_at_pageserver_last_record_lsn( - tenant_id: TenantId, - timeline_id: TimelineId, - safekeepers: List[Safekeeper], - pageserver: NeonPageserver, -): - sk_commit_lsns = [ - sk.http_client().timeline_status(tenant_id, timeline_id).commit_lsn for sk in safekeepers - ] - lsn = max(sk_commit_lsns) - ps_http = pageserver.http_client() - wait_for_last_record_lsn(ps_http, tenant_id, timeline_id, lsn) - return lsn - - -def wait_for_sk_commit_lsn_to_reach_remote_storage( - tenant_id: TenantId, - timeline_id: TimelineId, - safekeepers: List[Safekeeper], - pageserver: NeonPageserver, -): - lsn = wait_for_sk_commit_lsn_to_arrive_at_pageserver_last_record_lsn( - tenant_id, timeline_id, safekeepers, pageserver - ) - ps_http = pageserver.http_client() +def last_flush_lsn_upload( + env: NeonEnv, endpoint: Endpoint, tenant_id: TenantId, timeline_id: TimelineId +) -> Lsn: + """ + Wait for pageserver to catch to the latest flush LSN of given endpoint, + checkpoint pageserver, and wait for it to be uploaded (remote_consistent_lsn + reaching flush LSN). + """ + last_flush_lsn = wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) + ps_http = env.pageserver.http_client() + wait_for_last_record_lsn(ps_http, tenant_id, timeline_id, last_flush_lsn) # force a checkpoint to trigger upload ps_http.timeline_checkpoint(tenant_id, timeline_id) - wait_for_upload(ps_http, tenant_id, timeline_id, lsn) - return lsn + wait_for_upload(ps_http, tenant_id, timeline_id, last_flush_lsn) + return last_flush_lsn diff --git a/test_runner/fixtures/pageserver/utils.py b/test_runner/fixtures/pageserver/utils.py index c060fc8dea..7f8bb40bda 100644 --- a/test_runner/fixtures/pageserver/utils.py +++ b/test_runner/fixtures/pageserver/utils.py @@ -54,10 +54,9 @@ def wait_for_upload( if current_lsn >= lsn: log.info("wait finished") return + lr_lsn = last_record_lsn(pageserver_http, tenant, timeline) log.info( - "waiting for remote_consistent_lsn to reach {}, now {}, iteration {}".format( - lsn, current_lsn, i + 1 - ) + f"waiting for remote_consistent_lsn to reach {lsn}, now {current_lsn}, last_record_lsn={lr_lsn}, iteration {i + 1}" ) time.sleep(1) raise Exception( diff --git a/test_runner/regress/test_layer_eviction.py b/test_runner/regress/test_layer_eviction.py index 1ae32fb398..a96532c0d8 100644 --- a/test_runner/regress/test_layer_eviction.py +++ b/test_runner/regress/test_layer_eviction.py @@ -6,7 +6,6 @@ from fixtures.neon_fixtures import ( NeonEnvBuilder, RemoteStorageKind, wait_for_last_flush_lsn, - wait_for_sk_commit_lsn_to_reach_remote_storage, ) from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload from fixtures.types import Lsn, TenantId, TimelineId @@ -199,7 +198,7 @@ def test_gc_of_remote_layers(neon_env_builder: NeonEnvBuilder): # with image_creation_threshold=1 which we will use on the last compaction cur.execute("vacuum") - wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) + last_lsn = wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) if i == 1 and j == 2 and k == 1: # last iteration; stop before checkpoint to avoid leaving an inmemory layer @@ -222,10 +221,8 @@ def test_gc_of_remote_layers(neon_env_builder: NeonEnvBuilder): tenant_update_config({"image_creation_threshold": "1"}) ps_http.timeline_compact(tenant_id, timeline_id) - # wait for all uploads to finish - wait_for_sk_commit_lsn_to_reach_remote_storage( - tenant_id, timeline_id, env.safekeepers, env.pageserver - ) + # wait for all uploads to finish (checkpoint has been done above) + wait_for_upload(ps_http, tenant_id, timeline_id, last_lsn) # shutdown safekeepers to avoid on-demand downloads from walreceiver for sk in env.safekeepers: diff --git a/test_runner/regress/test_ondemand_download.py b/test_runner/regress/test_ondemand_download.py index cb08b014fd..5c02708457 100644 --- a/test_runner/regress/test_ondemand_download.py +++ b/test_runner/regress/test_ondemand_download.py @@ -12,8 +12,8 @@ from fixtures.neon_fixtures import ( NeonEnvBuilder, RemoteStorageKind, available_remote_storages, + last_flush_lsn_upload, wait_for_last_flush_lsn, - wait_for_sk_commit_lsn_to_reach_remote_storage, ) from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient from fixtures.pageserver.utils import ( @@ -207,9 +207,7 @@ def test_ondemand_download_timetravel( env.endpoints.stop_all() # wait until pageserver has successfully uploaded all the data to remote storage - wait_for_sk_commit_lsn_to_reach_remote_storage( - tenant_id, timeline_id, env.safekeepers, env.pageserver - ) + wait_for_upload(client, tenant_id, timeline_id, current_lsn) def get_api_current_physical_size(): d = client.timeline_detail(tenant_id, timeline_id) @@ -347,12 +345,9 @@ def test_download_remote_layers_api( """ ) + last_flush_lsn_upload(env, endpoint, tenant_id, timeline_id) env.endpoints.stop_all() - wait_for_sk_commit_lsn_to_reach_remote_storage( - tenant_id, timeline_id, env.safekeepers, env.pageserver - ) - def get_api_current_physical_size(): d = client.timeline_detail(tenant_id, timeline_id) return d["current_physical_size"] diff --git a/test_runner/regress/test_tenants_with_remote_storage.py b/test_runner/regress/test_tenants_with_remote_storage.py index d7c0814570..dca2cd3d28 100644 --- a/test_runner/regress/test_tenants_with_remote_storage.py +++ b/test_runner/regress/test_tenants_with_remote_storage.py @@ -21,7 +21,7 @@ from fixtures.neon_fixtures import ( NeonEnvBuilder, RemoteStorageKind, available_remote_storages, - wait_for_sk_commit_lsn_to_reach_remote_storage, + last_flush_lsn_upload, ) from fixtures.pageserver.utils import ( assert_tenant_state, @@ -174,12 +174,9 @@ def test_tenants_attached_after_download( ) ##### Stop the pageserver, erase its layer file to force it being downloaded from S3 + last_flush_lsn_upload(env, endpoint, tenant_id, timeline_id) env.endpoints.stop_all() - wait_for_sk_commit_lsn_to_reach_remote_storage( - tenant_id, timeline_id, env.safekeepers, env.pageserver - ) - env.pageserver.stop() timeline_dir = Path(env.repo_dir) / "tenants" / str(tenant_id) / "timelines" / str(timeline_id) From 11df2ee5d70d23bac233051d5e974d830222a967 Mon Sep 17 00:00:00 2001 From: Sergey Melnikov Date: Wed, 26 Apr 2023 13:40:36 +0200 Subject: [PATCH 60/77] Add safekeeper-3.us-east-2.aws.neon.build (#4085) --- .github/ansible/staging.us-east-2.hosts.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/ansible/staging.us-east-2.hosts.yaml b/.github/ansible/staging.us-east-2.hosts.yaml index dacc5567c3..fb218c443d 100644 --- a/.github/ansible/staging.us-east-2.hosts.yaml +++ b/.github/ansible/staging.us-east-2.hosts.yaml @@ -50,5 +50,7 @@ storage: ansible_host: i-027662bd552bf5db0 safekeeper-2.us-east-2.aws.neon.build: ansible_host: i-0de0b03a51676a6ce + safekeeper-3.us-east-2.aws.neon.build: + ansible_host: i-05f8ba2cda243bd18 safekeeper-99.us-east-2.aws.neon.build: ansible_host: i-0d61b6a2ea32028d5 From 6861259be7ee63f6a4bb2a9fdb5546147bf20389 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Wed, 26 Apr 2023 15:18:26 +0200 Subject: [PATCH 61/77] add global metric for unexpected on-demand downloads (#4069) Until we have toned down the prod logs to zero WARN and ERROR, we want a dedicated metric for which we can have a dedicated alert. fixes https://github.com/neondatabase/neon/issues/3924 --- pageserver/src/bin/pageserver.rs | 1 + pageserver/src/lib.rs | 2 ++ pageserver/src/metrics.rs | 16 ++++++++++++++++ pageserver/src/tenant/timeline.rs | 3 ++- test_runner/fixtures/metrics.py | 1 + 5 files changed, 22 insertions(+), 1 deletion(-) diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index ed23a18ee0..8e4897c09c 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -226,6 +226,7 @@ fn start_pageserver( ); set_build_info_metric(GIT_VERSION); set_launch_timestamp_metric(launch_ts); + pageserver::preinitialize_metrics(); // If any failpoints were set from FAILPOINTS environment variable, // print them to the log for debugging purposes diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs index 278658eba3..04863886cb 100644 --- a/pageserver/src/lib.rs +++ b/pageserver/src/lib.rs @@ -44,6 +44,8 @@ pub const DELTA_FILE_MAGIC: u16 = 0x5A61; static ZERO_PAGE: bytes::Bytes = bytes::Bytes::from_static(&[0u8; 8192]); +pub use crate::metrics::preinitialize_metrics; + pub async fn shutdown_pageserver(exit_code: i32) { // Shut down the libpq endpoint task. This prevents new connections from // being accepted. diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index d6978a8cf6..deb20f21f8 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -205,6 +205,15 @@ static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy = Lazy::new(|| .expect("failed to define a metric") }); +pub static UNEXPECTED_ONDEMAND_DOWNLOADS: Lazy = Lazy::new(|| { + register_int_counter!( + "pageserver_unexpected_ondemand_downloads_count", + "Number of unexpected on-demand downloads. \ + We log more context for each increment, so, forgo any labels in this metric.", + ) + .expect("failed to define a metric") +}); + /// Each [`Timeline`]'s [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric. #[derive(Debug)] pub struct EvictionsWithLowResidenceDuration { @@ -1132,3 +1141,10 @@ impl>, O, E> Future for MeasuredRemoteOp { poll_result } } + +pub fn preinitialize_metrics() { + // We want to alert on this metric increasing. + // Initialize it eagerly, so that our alert rule can distinguish absence of the metric from metric value 0. + assert_eq!(UNEXPECTED_ONDEMAND_DOWNLOADS.get(), 0); + UNEXPECTED_ONDEMAND_DOWNLOADS.reset(); +} diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index b8b1f963e5..6c34f5a5b5 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -48,7 +48,7 @@ use crate::tenant::{ use crate::config::PageServerConf; use crate::keyspace::{KeyPartitioning, KeySpace}; -use crate::metrics::TimelineMetrics; +use crate::metrics::{TimelineMetrics, UNEXPECTED_ONDEMAND_DOWNLOADS}; use crate::pgdatadir_mapping::LsnForTimestamp; use crate::pgdatadir_mapping::{is_rel_fsm_block_key, is_rel_vm_block_key}; use crate::pgdatadir_mapping::{BlockNumber, CalculateLogicalSizeError}; @@ -2355,6 +2355,7 @@ impl Timeline { id, ctx.task_kind() ); + UNEXPECTED_ONDEMAND_DOWNLOADS.inc(); timeline.download_remote_layer(remote_layer).await?; continue 'layer_map_search; } diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index 5fed6fcf84..0e958ddd06 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -53,6 +53,7 @@ PAGESERVER_GLOBAL_METRICS: Tuple[str, ...] = ( "pageserver_storage_operations_seconds_global_count", "pageserver_storage_operations_seconds_global_sum", "pageserver_storage_operations_seconds_global_bucket", + "pageserver_unexpected_ondemand_downloads_count_total", "libmetrics_launch_timestamp", "libmetrics_build_info", "libmetrics_tracing_event_count_total", From 92214578af3311c8d2ea6885f59562c9b53df628 Mon Sep 17 00:00:00 2001 From: Anastasia Lubennikova Date: Wed, 26 Apr 2023 17:47:54 +0300 Subject: [PATCH 62/77] Fix proxy_io_bytes_per_client metric: use branch_id identifier properly. (#4084) It fixes the miscalculation of the metric for projects that use multiple branches for the same endpoint. We were under billing users with such projects. So we need to communicate the change in Release Notes. --- proxy/src/metrics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proxy/src/metrics.rs b/proxy/src/metrics.rs index 445c2e930c..6ae1e3a447 100644 --- a/proxy/src/metrics.rs +++ b/proxy/src/metrics.rs @@ -95,7 +95,7 @@ fn gather_proxy_io_bytes_per_client() -> Vec<(Ids, (u64, DateTime))> { current_metrics.push(( Ids { endpoint_id: endpoint_id.to_string(), - branch_id: "".to_string(), + branch_id: branch_id.to_string(), }, (value, Utc::now()), )); From 0112a602e1b748b959bf578e7eaaecef392c09a3 Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Tue, 25 Apr 2023 12:22:58 +0400 Subject: [PATCH 63/77] Add timeout on proxy -> compute connection establishment. Otherwise we sit up to default tcp_syn_retries (about 2+ min) before gettings os error 110 if compute has been migrated to another pod. --- proxy/src/compute.rs | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs index b5efc72803..0465703ae6 100644 --- a/proxy/src/compute.rs +++ b/proxy/src/compute.rs @@ -1,8 +1,8 @@ use crate::{cancellation::CancelClosure, error::UserFacingError}; -use futures::TryFutureExt; +use futures::{FutureExt, TryFutureExt}; use itertools::Itertools; use pq_proto::StartupMessageParams; -use std::{io, net::SocketAddr}; +use std::{io, net::SocketAddr, time::Duration}; use thiserror::Error; use tokio::net::TcpStream; use tokio_postgres::NoTls; @@ -130,9 +130,23 @@ impl ConnCfg { async fn connect_raw(&self) -> io::Result<(SocketAddr, TcpStream)> { use tokio_postgres::config::Host; + // wrap TcpStream::connect with timeout + let connect_with_timeout = |host, port| { + let connection_timeout = Duration::from_millis(10000); + tokio::time::timeout(connection_timeout, TcpStream::connect((host, port))).map( + move |res| match res { + Ok(tcpstream_connect_res) => tcpstream_connect_res, + Err(_) => Err(io::Error::new( + io::ErrorKind::TimedOut, + format!("exceeded connection timeout {connection_timeout:?}"), + )), + }, + ) + }; + let connect_once = |host, port| { info!("trying to connect to compute node at {host}:{port}"); - TcpStream::connect((host, port)).and_then(|socket| async { + connect_with_timeout(host, port).and_then(|socket| async { let socket_addr = socket.peer_addr()?; // This prevents load balancer from severing the connection. socket2::SockRef::from(&socket).set_keepalive(true)?; @@ -165,7 +179,6 @@ impl ConnCfg { Host::Unix(_) => continue, // unix sockets are not welcome here }; - // TODO: maybe we should add a timeout. match connect_once(host, *port).await { Ok(socket) => return Ok(socket), Err(err) => { From 9ea7b5dd38cd1fc89311eba3fcb6e8987d51e787 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Thu, 27 Apr 2023 11:54:48 +0200 Subject: [PATCH 64/77] clean up logging around on-demand downloads (#4030) - Remove repeated tenant & timeline from span - Demote logging of the path to debug level - Log completion at info level, in the same function where we log errors - distinguish between layer file download success & on-demand download succeeding as a whole in the log message wording - Assert that the span contains a tenant id and a timeline id fixes https://github.com/neondatabase/neon/issues/3945 Before: ``` INFO compaction_loop{tenant_id=$TENANT_ID}:compact_timeline{timeline=$TIMELINE_ID}:download_remote_layer{tenant_id=$TENANT_ID timeline_id=$TIMELINE_ID layer=000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000020C8A71-00000000020CAF91}: download complete: /storage/pageserver/data/tenants/$TENANT_ID/timelines/$TIMELINE_ID/000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000020C8A71-00000000020CAF91 INFO compaction_loop{tenant_id=$TENANT_ID}:compact_timeline{timeline=$TIMELINE_ID}:download_remote_layer{tenant_id=$TENANT_ID timeline_id=$TIMELINE_ID layer=000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000020C8A71-00000000020CAF91}: Rebuilt layer map. Did 9 insertions to process a batch of 1 updates. ``` After: ``` INFO compaction_loop{tenant_id=$TENANT_ID}:compact_timeline{timeline=$TIMELINE_ID}:download_remote_layer{layer=000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000020C8A71-00000000020CAF91}: layer file download finished INFO compaction_loop{tenant_id=$TENANT_ID}:compact_timeline{timeline=$TIMELINE_ID}:download_remote_layer{layer=000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000020C8A71-00000000020CAF91}: Rebuilt layer map. Did 9 insertions to process a batch of 1 updates. INFO compaction_loop{tenant_id=$TENANT_ID}:compact_timeline{timeline=$TIMELINE_ID}:download_remote_layer{layer=000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000020C8A71-00000000020CAF91}: on-demand download successful ``` --- Cargo.lock | 11 + Cargo.toml | 1 + libs/remote_storage/tests/pagination_tests.rs | 6 +- libs/utils/Cargo.toml | 3 +- libs/utils/src/lib.rs | 2 + libs/utils/src/logging.rs | 50 ++- libs/utils/src/tracing_span_assert.rs | 287 ++++++++++++++++++ pageserver/src/bin/pageserver.rs | 16 +- pageserver/src/tenant.rs | 8 +- .../tenant/remote_timeline_client/download.rs | 5 +- pageserver/src/tenant/timeline.rs | 39 ++- .../walreceiver/connection_manager.rs | 2 +- safekeeper/src/bin/safekeeper.rs | 5 +- storage_broker/src/bin/storage_broker.rs | 5 +- 14 files changed, 413 insertions(+), 27 deletions(-) create mode 100644 libs/utils/src/tracing_span_assert.rs diff --git a/Cargo.lock b/Cargo.lock index ce24bbcee8..08b24d263c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4629,6 +4629,16 @@ dependencies = [ "valuable", ] +[[package]] +name = "tracing-error" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d686ec1c0f384b1277f097b2f279a2ecc11afe8c133c1aabf036a27cb4cd206e" +dependencies = [ + "tracing", + "tracing-subscriber", +] + [[package]] name = "tracing-futures" version = "0.2.5" @@ -4879,6 +4889,7 @@ dependencies = [ "thiserror", "tokio", "tracing", + "tracing-error", "tracing-subscriber", "url", "uuid", diff --git a/Cargo.toml b/Cargo.toml index 0b545e6190..f4872433cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -110,6 +110,7 @@ toml = "0.7" toml_edit = "0.19" tonic = {version = "0.9", features = ["tls", "tls-roots"]} tracing = "0.1" +tracing-error = "0.2.0" tracing-opentelemetry = "0.18.0" tracing-subscriber = { version = "0.3", features = ["env-filter"] } url = "2.2" diff --git a/libs/remote_storage/tests/pagination_tests.rs b/libs/remote_storage/tests/pagination_tests.rs index 048e99d841..86a6888f98 100644 --- a/libs/remote_storage/tests/pagination_tests.rs +++ b/libs/remote_storage/tests/pagination_tests.rs @@ -99,7 +99,11 @@ struct S3WithTestBlobs { #[async_trait::async_trait] impl AsyncTestContext for MaybeEnabledS3 { async fn setup() -> Self { - utils::logging::init(utils::logging::LogFormat::Test).expect("logging init failed"); + utils::logging::init( + utils::logging::LogFormat::Test, + utils::logging::TracingErrorLayerEnablement::Disabled, + ) + .expect("logging init failed"); if env::var(ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME).is_err() { info!( "`{}` env variable is not set, skipping the test", diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml index dc6326e73e..2b04dfdef6 100644 --- a/libs/utils/Cargo.toml +++ b/libs/utils/Cargo.toml @@ -27,7 +27,8 @@ signal-hook.workspace = true thiserror.workspace = true tokio.workspace = true tracing.workspace = true -tracing-subscriber = { workspace = true, features = ["json"] } +tracing-error.workspace = true +tracing-subscriber = { workspace = true, features = ["json", "registry"] } rand.workspace = true serde_with.workspace = true strum.workspace = true diff --git a/libs/utils/src/lib.rs b/libs/utils/src/lib.rs index d4176911ac..9b52aa75b7 100644 --- a/libs/utils/src/lib.rs +++ b/libs/utils/src/lib.rs @@ -54,6 +54,8 @@ pub mod measured_stream; pub mod serde_percent; pub mod serde_regex; +pub mod tracing_span_assert; + /// use with fail::cfg("$name", "return(2000)") #[macro_export] macro_rules! failpoint_sleep_millis_async { diff --git a/libs/utils/src/logging.rs b/libs/utils/src/logging.rs index ed856b6804..2b8c852d86 100644 --- a/libs/utils/src/logging.rs +++ b/libs/utils/src/logging.rs @@ -56,7 +56,20 @@ where } } -pub fn init(log_format: LogFormat) -> anyhow::Result<()> { +/// Whether to add the `tracing_error` crate's `ErrorLayer` +/// to the global tracing subscriber. +/// +pub enum TracingErrorLayerEnablement { + /// Do not add the `ErrorLayer`. + Disabled, + /// Add the `ErrorLayer` with the filter specified by RUST_LOG, defaulting to `info` if `RUST_LOG` is unset. + EnableWithRustLogFilter, +} + +pub fn init( + log_format: LogFormat, + tracing_error_layer_enablement: TracingErrorLayerEnablement, +) -> anyhow::Result<()> { // We fall back to printing all spans at info-level or above if // the RUST_LOG environment variable is not set. let rust_log_env_filter = || { @@ -67,21 +80,26 @@ pub fn init(log_format: LogFormat) -> anyhow::Result<()> { // NB: the order of the with() calls does not matter. // See https://docs.rs/tracing-subscriber/0.3.16/tracing_subscriber/layer/index.html#per-layer-filtering use tracing_subscriber::prelude::*; - tracing_subscriber::registry() - .with({ - let log_layer = tracing_subscriber::fmt::layer() - .with_target(false) - .with_ansi(atty::is(atty::Stream::Stdout)) - .with_writer(std::io::stdout); - let log_layer = match log_format { - LogFormat::Json => log_layer.json().boxed(), - LogFormat::Plain => log_layer.boxed(), - LogFormat::Test => log_layer.with_test_writer().boxed(), - }; - log_layer.with_filter(rust_log_env_filter()) - }) - .with(TracingEventCountLayer(&TRACING_EVENT_COUNT).with_filter(rust_log_env_filter())) - .init(); + let r = tracing_subscriber::registry(); + let r = r.with({ + let log_layer = tracing_subscriber::fmt::layer() + .with_target(false) + .with_ansi(atty::is(atty::Stream::Stdout)) + .with_writer(std::io::stdout); + let log_layer = match log_format { + LogFormat::Json => log_layer.json().boxed(), + LogFormat::Plain => log_layer.boxed(), + LogFormat::Test => log_layer.with_test_writer().boxed(), + }; + log_layer.with_filter(rust_log_env_filter()) + }); + let r = r.with(TracingEventCountLayer(&TRACING_EVENT_COUNT).with_filter(rust_log_env_filter())); + match tracing_error_layer_enablement { + TracingErrorLayerEnablement::EnableWithRustLogFilter => r + .with(tracing_error::ErrorLayer::default().with_filter(rust_log_env_filter())) + .init(), + TracingErrorLayerEnablement::Disabled => r.init(), + } Ok(()) } diff --git a/libs/utils/src/tracing_span_assert.rs b/libs/utils/src/tracing_span_assert.rs new file mode 100644 index 0000000000..b9f7986442 --- /dev/null +++ b/libs/utils/src/tracing_span_assert.rs @@ -0,0 +1,287 @@ +//! Assert that the current [`tracing::Span`] has a given set of fields. +//! +//! # Usage +//! +//! ``` +//! use tracing_subscriber::prelude::*; +//! let registry = tracing_subscriber::registry() +//! .with(tracing_error::ErrorLayer::default()); +//! +//! // Register the registry as the global subscriber. +//! // In this example, we'll only use it as a thread-local subscriber. +//! let _guard = tracing::subscriber::set_default(registry); +//! +//! // Then, in the main code: +//! +//! let span = tracing::info_span!("TestSpan", test_id = 1); +//! let _guard = span.enter(); +//! +//! // ... down the call stack +//! +//! use utils::tracing_span_assert::{check_fields_present, MultiNameExtractor}; +//! let extractor = MultiNameExtractor::new("TestExtractor", ["test", "test_id"]); +//! match check_fields_present([&extractor]) { +//! Ok(()) => {}, +//! Err(missing) => { +//! panic!("Missing fields: {:?}", missing.into_iter().map(|f| f.name() ).collect::>()); +//! } +//! } +//! ``` +//! +//! Recommended reading: https://docs.rs/tracing-subscriber/0.3.16/tracing_subscriber/layer/index.html#per-layer-filtering +//! + +use std::{ + collections::HashSet, + fmt::{self}, + hash::{Hash, Hasher}, +}; + +pub enum ExtractionResult { + Present, + Absent, +} + +pub trait Extractor: Send + Sync + std::fmt::Debug { + fn name(&self) -> &str; + fn extract(&self, fields: &tracing::field::FieldSet) -> ExtractionResult; +} + +#[derive(Debug)] +pub struct MultiNameExtractor { + name: &'static str, + field_names: [&'static str; L], +} + +impl MultiNameExtractor { + pub fn new(name: &'static str, field_names: [&'static str; L]) -> MultiNameExtractor { + MultiNameExtractor { name, field_names } + } +} +impl Extractor for MultiNameExtractor { + fn name(&self) -> &str { + self.name + } + fn extract(&self, fields: &tracing::field::FieldSet) -> ExtractionResult { + if fields.iter().any(|f| self.field_names.contains(&f.name())) { + ExtractionResult::Present + } else { + ExtractionResult::Absent + } + } +} + +struct MemoryIdentity<'a>(&'a dyn Extractor); + +impl<'a> MemoryIdentity<'a> { + fn as_ptr(&self) -> *const () { + self.0 as *const _ as *const () + } +} +impl<'a> PartialEq for MemoryIdentity<'a> { + fn eq(&self, other: &Self) -> bool { + self.as_ptr() == other.as_ptr() + } +} +impl<'a> Eq for MemoryIdentity<'a> {} +impl<'a> Hash for MemoryIdentity<'a> { + fn hash(&self, state: &mut H) { + self.as_ptr().hash(state); + } +} +impl<'a> fmt::Debug for MemoryIdentity<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:p}: {}", self.as_ptr(), self.0.name()) + } +} + +/// The extractor names passed as keys to [`new`]. +pub fn check_fields_present( + must_be_present: [&dyn Extractor; L], +) -> Result<(), Vec<&dyn Extractor>> { + let mut missing: HashSet = + HashSet::from_iter(must_be_present.into_iter().map(|r| MemoryIdentity(r))); + let trace = tracing_error::SpanTrace::capture(); + trace.with_spans(|md, _formatted_fields| { + missing.retain(|extractor| match extractor.0.extract(md.fields()) { + ExtractionResult::Present => false, + ExtractionResult::Absent => true, + }); + !missing.is_empty() // continue walking up until we've found all missing + }); + if missing.is_empty() { + Ok(()) + } else { + Err(missing.into_iter().map(|mi| mi.0).collect()) + } +} + +#[cfg(test)] +mod tests { + + use tracing_subscriber::prelude::*; + + use super::*; + + struct Setup { + _current_thread_subscriber_guard: tracing::subscriber::DefaultGuard, + tenant_extractor: MultiNameExtractor<2>, + timeline_extractor: MultiNameExtractor<2>, + } + + fn setup_current_thread() -> Setup { + let tenant_extractor = MultiNameExtractor::new("TenantId", ["tenant_id", "tenant"]); + let timeline_extractor = MultiNameExtractor::new("TimelineId", ["timeline_id", "timeline"]); + + let registry = tracing_subscriber::registry() + .with(tracing_subscriber::fmt::layer()) + .with(tracing_error::ErrorLayer::default()); + + let guard = tracing::subscriber::set_default(registry); + + Setup { + _current_thread_subscriber_guard: guard, + tenant_extractor, + timeline_extractor, + } + } + + fn assert_missing(missing: Vec<&dyn Extractor>, expected: Vec<&dyn Extractor>) { + let missing: HashSet = + HashSet::from_iter(missing.into_iter().map(MemoryIdentity)); + let expected: HashSet = + HashSet::from_iter(expected.into_iter().map(MemoryIdentity)); + assert_eq!(missing, expected); + } + + #[test] + fn positive_one_level() { + let setup = setup_current_thread(); + let span = tracing::info_span!("root", tenant_id = "tenant-1", timeline_id = "timeline-1"); + let _guard = span.enter(); + check_fields_present([&setup.tenant_extractor, &setup.timeline_extractor]).unwrap(); + } + + #[test] + fn negative_one_level() { + let setup = setup_current_thread(); + let span = tracing::info_span!("root", timeline_id = "timeline-1"); + let _guard = span.enter(); + let missing = + check_fields_present([&setup.tenant_extractor, &setup.timeline_extractor]).unwrap_err(); + assert_missing(missing, vec![&setup.tenant_extractor]); + } + + #[test] + fn positive_multiple_levels() { + let setup = setup_current_thread(); + + let span = tracing::info_span!("root"); + let _guard = span.enter(); + + let span = tracing::info_span!("child", tenant_id = "tenant-1"); + let _guard = span.enter(); + + let span = tracing::info_span!("grandchild", timeline_id = "timeline-1"); + let _guard = span.enter(); + + check_fields_present([&setup.tenant_extractor, &setup.timeline_extractor]).unwrap(); + } + + #[test] + fn negative_multiple_levels() { + let setup = setup_current_thread(); + + let span = tracing::info_span!("root"); + let _guard = span.enter(); + + let span = tracing::info_span!("child", timeline_id = "timeline-1"); + let _guard = span.enter(); + + let missing = check_fields_present([&setup.tenant_extractor]).unwrap_err(); + assert_missing(missing, vec![&setup.tenant_extractor]); + } + + #[test] + fn positive_subset_one_level() { + let setup = setup_current_thread(); + let span = tracing::info_span!("root", tenant_id = "tenant-1", timeline_id = "timeline-1"); + let _guard = span.enter(); + check_fields_present([&setup.tenant_extractor]).unwrap(); + } + + #[test] + fn positive_subset_multiple_levels() { + let setup = setup_current_thread(); + + let span = tracing::info_span!("root"); + let _guard = span.enter(); + + let span = tracing::info_span!("child", tenant_id = "tenant-1"); + let _guard = span.enter(); + + let span = tracing::info_span!("grandchild", timeline_id = "timeline-1"); + let _guard = span.enter(); + + check_fields_present([&setup.tenant_extractor]).unwrap(); + } + + #[test] + fn negative_subset_one_level() { + let setup = setup_current_thread(); + let span = tracing::info_span!("root", timeline_id = "timeline-1"); + let _guard = span.enter(); + let missing = check_fields_present([&setup.tenant_extractor]).unwrap_err(); + assert_missing(missing, vec![&setup.tenant_extractor]); + } + + #[test] + fn negative_subset_multiple_levels() { + let setup = setup_current_thread(); + + let span = tracing::info_span!("root"); + let _guard = span.enter(); + + let span = tracing::info_span!("child", timeline_id = "timeline-1"); + let _guard = span.enter(); + + let missing = check_fields_present([&setup.tenant_extractor]).unwrap_err(); + assert_missing(missing, vec![&setup.tenant_extractor]); + } + + #[test] + fn tracing_error_subscriber_not_set_up() { + // no setup + + let span = tracing::info_span!("foo", e = "some value"); + let _guard = span.enter(); + + let extractor = MultiNameExtractor::new("E", ["e"]); + let missing = check_fields_present([&extractor]).unwrap_err(); + assert_missing(missing, vec![&extractor]); + } + + #[test] + #[should_panic] + fn panics_if_tracing_error_subscriber_has_wrong_filter() { + let r = tracing_subscriber::registry().with({ + tracing_error::ErrorLayer::default().with_filter( + tracing_subscriber::filter::dynamic_filter_fn(|md, _| { + if md.is_span() && *md.level() == tracing::Level::INFO { + return false; + } + true + }), + ) + }); + + let _guard = tracing::subscriber::set_default(r); + + let span = tracing::info_span!("foo", e = "some value"); + let _guard = span.enter(); + + let extractor = MultiNameExtractor::new("E", ["e"]); + let missing = check_fields_present([&extractor]).unwrap_err(); + assert_missing(missing, vec![&extractor]); + } +} diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index 8e4897c09c..d843b01ed7 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -25,6 +25,7 @@ use pageserver::{ virtual_file, }; use postgres_backend::AuthType; +use utils::logging::TracingErrorLayerEnablement; use utils::signals::ShutdownSignals; use utils::{ auth::JwtAuth, logging, project_git_version, sentry_init::init_sentry, signals::Signal, @@ -86,8 +87,19 @@ fn main() -> anyhow::Result<()> { } }; - // Initialize logging, which must be initialized before the custom panic hook is installed. - logging::init(conf.log_format)?; + // Initialize logging. + // + // It must be initialized before the custom panic hook is installed below. + // + // Regarding tracing_error enablement: at this time, we only use the + // tracing_error crate to debug_assert that log spans contain tenant and timeline ids. + // See `debug_assert_current_span_has_tenant_and_timeline_id` in the timeline module + let tracing_error_layer_enablement = if cfg!(debug_assertions) { + TracingErrorLayerEnablement::EnableWithRustLogFilter + } else { + TracingErrorLayerEnablement::Disabled + }; + logging::init(conf.log_format, tracing_error_layer_enablement)?; // mind the order required here: 1. logging, 2. panic_hook, 3. sentry. // disarming this hook on pageserver, because we never tear down tracing. diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index b5966b4618..d69d5e4b45 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -2886,7 +2886,13 @@ pub mod harness { }; LOG_HANDLE.get_or_init(|| { - logging::init(logging::LogFormat::Test).expect("Failed to init test logging") + logging::init( + logging::LogFormat::Test, + // enable it in case in case the tests exercise code paths that use + // debug_assert_current_span_has_tenant_and_timeline_id + logging::TracingErrorLayerEnablement::EnableWithRustLogFilter, + ) + .expect("Failed to init test logging") }); let repo_dir = PageServerConf::test_repo_dir(test_name); diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs index bda095d850..a0d8c0193a 100644 --- a/pageserver/src/tenant/remote_timeline_client/download.rs +++ b/pageserver/src/tenant/remote_timeline_client/download.rs @@ -16,6 +16,7 @@ use tracing::{info, warn}; use crate::config::PageServerConf; use crate::tenant::storage_layer::LayerFileName; +use crate::tenant::timeline::debug_assert_current_span_has_tenant_and_timeline_id; use crate::{exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS}; use remote_storage::{DownloadError, GenericRemoteStorage}; use utils::crashsafe::path_with_suffix_extension; @@ -43,6 +44,8 @@ pub async fn download_layer_file<'a>( layer_file_name: &'a LayerFileName, layer_metadata: &'a LayerFileMetadata, ) -> Result { + debug_assert_current_span_has_tenant_and_timeline_id(); + let timeline_path = conf.timeline_path(&timeline_id, &tenant_id); let local_path = timeline_path.join(layer_file_name.file_name()); @@ -154,7 +157,7 @@ pub async fn download_layer_file<'a>( .with_context(|| format!("Could not fsync layer file {}", local_path.display(),)) .map_err(DownloadError::Other)?; - tracing::info!("download complete: {}", local_path.display()); + tracing::debug!("download complete: {}", local_path.display()); Ok(bytes_amount) } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 6c34f5a5b5..87f03f30b6 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -19,6 +19,7 @@ use tokio::sync::{oneshot, watch, Semaphore, TryAcquireError}; use tokio_util::sync::CancellationToken; use tracing::*; use utils::id::TenantTimelineId; +use utils::tracing_span_assert; use std::cmp::{max, min, Ordering}; use std::collections::BinaryHeap; @@ -936,6 +937,7 @@ impl Timeline { } } + #[instrument(skip_all, fields(tenant = %self.tenant_id, timeline = %self.timeline_id))] pub async fn download_layer(&self, layer_file_name: &str) -> anyhow::Result> { let Some(layer) = self.find_layer(layer_file_name) else { return Ok(None) }; let Some(remote_layer) = layer.downcast_remote_layer() else { return Ok(Some(false)) }; @@ -3819,11 +3821,13 @@ impl Timeline { /// If the caller has a deadline or needs a timeout, they can simply stop polling: /// we're **cancellation-safe** because the download happens in a separate task_mgr task. /// So, the current download attempt will run to completion even if we stop polling. - #[instrument(skip_all, fields(tenant_id=%self.tenant_id, timeline_id=%self.timeline_id, layer=%remote_layer.short_id()))] + #[instrument(skip_all, fields(layer=%remote_layer.short_id()))] pub async fn download_remote_layer( &self, remote_layer: Arc, ) -> anyhow::Result<()> { + debug_assert_current_span_has_tenant_and_timeline_id(); + use std::sync::atomic::Ordering::Relaxed; let permit = match Arc::clone(&remote_layer.ongoing_download) @@ -3867,6 +3871,8 @@ impl Timeline { .await; if let Ok(size) = &result { + info!("layer file download finished"); + // XXX the temp file is still around in Err() case // and consumes space until we clean up upon pageserver restart. self_clone.metrics.resident_physical_size_gauge.add(*size); @@ -3938,6 +3944,8 @@ impl Timeline { updates.flush(); drop(layers); + info!("on-demand download successful"); + // Now that we've inserted the download into the layer map, // close the semaphore. This will make other waiters for // this download return Ok(()). @@ -3945,7 +3953,7 @@ impl Timeline { remote_layer.ongoing_download.close(); } else { // Keep semaphore open. We'll drop the permit at the end of the function. - error!("on-demand download failed: {:?}", result.as_ref().unwrap_err()); + error!("layer file download failed: {:?}", result.as_ref().unwrap_err()); } // Don't treat it as an error if the task that triggered the download @@ -4256,3 +4264,30 @@ fn rename_to_backup(path: &Path) -> anyhow::Result<()> { bail!("couldn't find an unused backup number for {:?}", path) } + +#[inline] +pub(crate) fn debug_assert_current_span_has_tenant_and_timeline_id() { + pub static TENANT_ID_EXTRACTOR: once_cell::sync::Lazy< + tracing_span_assert::MultiNameExtractor<2>, + > = once_cell::sync::Lazy::new(|| { + tracing_span_assert::MultiNameExtractor::new("TenantId", ["tenant_id", "tenant"]) + }); + + pub static TIMELINE_ID_EXTRACTOR: once_cell::sync::Lazy< + tracing_span_assert::MultiNameExtractor<2>, + > = once_cell::sync::Lazy::new(|| { + tracing_span_assert::MultiNameExtractor::new("TimelineId", ["timeline_id", "timeline"]) + }); + + #[cfg(debug_assertions)] + match tracing_span_assert::check_fields_present([ + &*TENANT_ID_EXTRACTOR, + &*TIMELINE_ID_EXTRACTOR, + ]) { + Ok(()) => (), + Err(missing) => panic!( + "missing extractors: {:?}", + missing.into_iter().map(|e| e.name()).collect::>() + ), + } +} diff --git a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs index efcbfbce3d..731c5c4644 100644 --- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs +++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs @@ -348,7 +348,7 @@ impl ConnectionManagerState { .context("walreceiver connection handling failure") } .instrument( - info_span!("walreceiver_connection", id = %id, node_id = %new_sk.safekeeper_id), + info_span!("walreceiver_connection", tenant_id = %id.tenant_id, timeline_id = %id.timeline_id, node_id = %new_sk.safekeeper_id), ) }); diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index ace921a26d..3699a2a74c 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -134,7 +134,10 @@ fn main() -> anyhow::Result<()> { // 1. init logging // 2. tracing panic hook // 3. sentry - logging::init(LogFormat::from_config(&args.log_format)?)?; + logging::init( + LogFormat::from_config(&args.log_format)?, + logging::TracingErrorLayerEnablement::Disabled, + )?; logging::replace_panic_hook_with_tracing_panic_hook().forget(); info!("version: {GIT_VERSION}"); diff --git a/storage_broker/src/bin/storage_broker.rs b/storage_broker/src/bin/storage_broker.rs index de7b634ba0..597d9860d8 100644 --- a/storage_broker/src/bin/storage_broker.rs +++ b/storage_broker/src/bin/storage_broker.rs @@ -430,7 +430,10 @@ async fn main() -> Result<(), Box> { // 1. init logging // 2. tracing panic hook // 3. sentry - logging::init(LogFormat::from_config(&args.log_format)?)?; + logging::init( + LogFormat::from_config(&args.log_format)?, + logging::TracingErrorLayerEnablement::Disabled, + )?; logging::replace_panic_hook_with_tracing_panic_hook().forget(); // initialize sentry if SENTRY_DSN is provided let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]); From 5b911e1f9f6f5e49fabfb3fde12084b1e69bd4a2 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Thu, 27 Apr 2023 14:01:27 +0200 Subject: [PATCH 65/77] build: run clippy for powerset of features (#4077) This will catch compiler & clippy warnings in all feature combinations. We should probably use cargo hack for build and test as well, but, that's quite expensive and would add to overall CI wait times. obsoletes https://github.com/neondatabase/neon/pull/4073 refs https://github.com/neondatabase/neon/pull/4070 --- .github/workflows/build_and_test.yml | 17 +++++++++++++++-- .neon_clippy_args | 4 ++++ run_clippy.sh | 15 ++++++++++----- 3 files changed, 29 insertions(+), 7 deletions(-) create mode 100644 .neon_clippy_args diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 15a6a611b1..e5ba7aa3eb 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -111,8 +111,21 @@ jobs: - name: Get postgres headers run: make postgres-headers -j$(nproc) - - name: Run cargo clippy - run: ./run_clippy.sh + # cargo hack runs the given cargo subcommand (clippy in this case) for all feature combinations. + # This will catch compiler & clippy warnings in all feature combinations. + # TODO: use cargo hack for build and test as well, but, that's quite expensive. + # NB: keep clippy args in sync with ./run_clippy.sh + - run: | + CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")" + if [ "$CLIPPY_COMMON_ARGS" = "" ]; then + echo "No clippy args found in .neon_clippy_args" + exit 1 + fi + echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV + - name: Run cargo clippy (debug) + run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS + - name: Run cargo clippy (release) + run: cargo hack --feature-powerset clippy --release $CLIPPY_COMMON_ARGS # Use `${{ !cancelled() }}` to run quck tests after the longer clippy run - name: Check formatting diff --git a/.neon_clippy_args b/.neon_clippy_args new file mode 100644 index 0000000000..25e09c61a6 --- /dev/null +++ b/.neon_clippy_args @@ -0,0 +1,4 @@ +# * `-A unknown_lints` – do not warn about unknown lint suppressions +# that people with newer toolchains might use +# * `-D warnings` - fail on any warnings (`cargo` returns non-zero exit status) +export CLIPPY_COMMON_ARGS="--locked --workspace --all-targets -- -A unknown_lints -D warnings" diff --git a/run_clippy.sh b/run_clippy.sh index 9adfddedc2..ae2a17ec0c 100755 --- a/run_clippy.sh +++ b/run_clippy.sh @@ -1,4 +1,5 @@ -#!/bin/bash +#!/usr/bin/env bash +set -euo pipefail # If you save this in your path under the name "cargo-zclippy" (or whatever # name you like), then you can run it as "cargo zclippy" from the shell prompt. @@ -8,7 +9,11 @@ # warnings and errors right in the editor. # In vscode, this setting is Rust-analyzer>Check On Save:Command -# * `-A unknown_lints` – do not warn about unknown lint suppressions -# that people with newer toolchains might use -# * `-D warnings` - fail on any warnings (`cargo` returns non-zero exit status) -cargo clippy --locked --all --all-targets --all-features -- -A unknown_lints -D warnings +# NB: the CI runs the full feature powerset, so, it catches slightly more errors +# at the expense of longer runtime. This script is used by developers, so, don't +# do that here. + +thisscript="${BASH_SOURCE[0]}" +thisscript_dir="$(dirname "$thisscript")" +CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")" +exec cargo clippy --all-features $CLIPPY_COMMON_ARGS From e6ec2400fc0a8c2975b96f23c5b391064362e0da Mon Sep 17 00:00:00 2001 From: MMeent Date: Thu, 27 Apr 2023 15:26:44 +0200 Subject: [PATCH 66/77] Enable hot standby PostgreSQL replicas. Notes: - This still needs UI support from the Console - I've not tuned any GUCs for PostgreSQL to make this work better - Safekeeper has gotten a tweak in which WAL is sent and how: It now sends zero-ed WAL data from the start of the timeline's first segment up to the first byte of the timeline to be compatible with normal PostgreSQL WAL streaming. - This includes the commits of #3714 Fixes one part of https://github.com/neondatabase/neon/issues/769 Co-authored-by: Anastasia Lubennikova --- compute_tools/src/compute.rs | 62 +++++- compute_tools/src/pg_helpers.rs | 7 + compute_tools/src/spec.rs | 16 ++ control_plane/src/bin/neon_local.rs | 79 ++++++- control_plane/src/endpoint.rs | 182 ++++++++++------ control_plane/src/postgresql_conf.rs | 2 +- .../var/db/postgres/specs/spec.json | 5 - libs/postgres_ffi/src/lib.rs | 7 +- libs/postgres_ffi/src/pg_constants.rs | 1 + libs/postgres_ffi/src/xlog_utils.rs | 57 ++++- libs/utils/src/lsn.rs | 19 ++ pageserver/src/basebackup.rs | 10 +- pgxn/neon/file_cache.c | 69 ++++++- pgxn/neon/libpagestore.c | 6 + pgxn/neon/neon.c | 1 + pgxn/neon/neon.h | 8 + pgxn/neon/pagestore_client.h | 1 + pgxn/neon/pagestore_smgr.c | 194 +++++++++++++++++- pgxn/neon/walproposer.c | 20 +- safekeeper/src/handler.rs | 9 +- safekeeper/src/wal_storage.rs | 86 +++++++- test_runner/fixtures/neon_fixtures.py | 43 ++++ test_runner/regress/test_compute_ctl.py | 5 - test_runner/regress/test_hot_standby.py | 79 +++++++ vendor/postgres-v14 | 2 +- vendor/postgres-v15 | 2 +- 26 files changed, 851 insertions(+), 121 deletions(-) create mode 100644 test_runner/regress/test_hot_standby.py diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 507dac9c0d..b6bc234beb 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -249,18 +249,63 @@ impl ComputeNode { /// safekeepers sync, basebackup, etc. #[instrument(skip(self, compute_state))] pub fn prepare_pgdata(&self, compute_state: &ComputeState) -> Result<()> { + #[derive(Clone)] + enum Replication { + Primary, + Static { lsn: Lsn }, + HotStandby, + } + let pspec = compute_state.pspec.as_ref().expect("spec must be set"); + let spec = &pspec.spec; let pgdata_path = Path::new(&self.pgdata); + let hot_replica = if let Some(option) = spec.cluster.settings.find_ref("hot_standby") { + if let Some(value) = &option.value { + anyhow::ensure!(option.vartype == "bool"); + matches!(value.as_str(), "on" | "yes" | "true") + } else { + false + } + } else { + false + }; + + let replication = if hot_replica { + Replication::HotStandby + } else if let Some(lsn) = spec.cluster.settings.find("recovery_target_lsn") { + Replication::Static { + lsn: Lsn::from_str(&lsn)?, + } + } else { + Replication::Primary + }; + // Remove/create an empty pgdata directory and put configuration there. self.create_pgdata()?; config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &pspec.spec)?; - info!("starting safekeepers syncing"); - let lsn = self - .sync_safekeepers(pspec.storage_auth_token.clone()) - .with_context(|| "failed to sync safekeepers")?; - info!("safekeepers synced at LSN {}", lsn); + // Syncing safekeepers is only safe with primary nodes: if a primary + // is already connected it will be kicked out, so a secondary (standby) + // cannot sync safekeepers. + let lsn = match &replication { + Replication::Primary => { + info!("starting safekeepers syncing"); + let lsn = self + .sync_safekeepers(pspec.storage_auth_token.clone()) + .with_context(|| "failed to sync safekeepers")?; + info!("safekeepers synced at LSN {}", lsn); + lsn + } + Replication::Static { lsn } => { + info!("Starting read-only node at static LSN {}", lsn); + *lsn + } + Replication::HotStandby => { + info!("Initializing standby from latest Pageserver LSN"); + Lsn(0) + } + }; info!( "getting basebackup@{} from pageserver {}", @@ -276,6 +321,13 @@ impl ComputeNode { // Update pg_hba.conf received with basebackup. update_pg_hba(pgdata_path)?; + match &replication { + Replication::Primary | Replication::Static { .. } => {} + Replication::HotStandby => { + add_standby_signal(pgdata_path)?; + } + } + Ok(()) } diff --git a/compute_tools/src/pg_helpers.rs b/compute_tools/src/pg_helpers.rs index bb787d0506..40dbea6907 100644 --- a/compute_tools/src/pg_helpers.rs +++ b/compute_tools/src/pg_helpers.rs @@ -94,6 +94,7 @@ impl PgOptionsSerialize for GenericOptions { pub trait GenericOptionsSearch { fn find(&self, name: &str) -> Option; + fn find_ref(&self, name: &str) -> Option<&GenericOption>; } impl GenericOptionsSearch for GenericOptions { @@ -103,6 +104,12 @@ impl GenericOptionsSearch for GenericOptions { let op = ops.iter().find(|s| s.name == name)?; op.value.clone() } + + /// Lookup option by name, returning ref + fn find_ref(&self, name: &str) -> Option<&GenericOption> { + let ops = self.as_ref()?; + ops.iter().find(|s| s.name == name) + } } pub trait RoleExt { diff --git a/compute_tools/src/spec.rs b/compute_tools/src/spec.rs index 28e0ef41b7..bf3c407202 100644 --- a/compute_tools/src/spec.rs +++ b/compute_tools/src/spec.rs @@ -1,3 +1,4 @@ +use std::fs::File; use std::path::Path; use std::str::FromStr; @@ -145,6 +146,21 @@ pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> { Ok(()) } +/// Create a standby.signal file +pub fn add_standby_signal(pgdata_path: &Path) -> Result<()> { + // XXX: consider making it a part of spec.json + info!("adding standby.signal"); + let signalfile = pgdata_path.join("standby.signal"); + + if !signalfile.exists() { + info!("created standby.signal"); + File::create(signalfile)?; + } else { + info!("reused pre-existing standby.signal"); + } + Ok(()) +} + /// Given a cluster spec json and open transaction it handles roles creation, /// deletion and update. #[instrument(skip_all)] diff --git a/control_plane/src/bin/neon_local.rs b/control_plane/src/bin/neon_local.rs index 665cad8783..09278e1726 100644 --- a/control_plane/src/bin/neon_local.rs +++ b/control_plane/src/bin/neon_local.rs @@ -8,6 +8,7 @@ use anyhow::{anyhow, bail, Context, Result}; use clap::{value_parser, Arg, ArgAction, ArgMatches, Command}; use control_plane::endpoint::ComputeControlPlane; +use control_plane::endpoint::Replication; use control_plane::local_env::LocalEnv; use control_plane::pageserver::PageServerNode; use control_plane::safekeeper::SafekeeperNode; @@ -474,7 +475,14 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) - env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?; println!("Creating endpoint for imported timeline ..."); - cplane.new_endpoint(tenant_id, name, timeline_id, None, None, pg_version)?; + cplane.new_endpoint( + tenant_id, + name, + timeline_id, + None, + pg_version, + Replication::Primary, + )?; println!("Done"); } Some(("branch", branch_match)) => { @@ -560,20 +568,20 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<( .iter() .filter(|(_, endpoint)| endpoint.tenant_id == tenant_id) { - let lsn_str = match endpoint.lsn { - None => { - // -> primary endpoint + let lsn_str = match endpoint.replication { + Replication::Static(lsn) => { + // -> read-only endpoint + // Use the node's LSN. + lsn.to_string() + } + _ => { + // -> primary endpoint or hot replica // Use the LSN at the end of the timeline. timeline_infos .get(&endpoint.timeline_id) .map(|bi| bi.last_record_lsn.to_string()) .unwrap_or_else(|| "?".to_string()) } - Some(lsn) => { - // -> read-only endpoint - // Use the endpoint's LSN. - lsn.to_string() - } }; let branch_name = timeline_name_mappings @@ -619,7 +627,26 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<( .copied() .context("Failed to parse postgres version from the argument string")?; - cplane.new_endpoint(tenant_id, &endpoint_id, timeline_id, lsn, port, pg_version)?; + let hot_standby = sub_args + .get_one::("hot-standby") + .copied() + .unwrap_or(false); + + let replication = match (lsn, hot_standby) { + (Some(lsn), false) => Replication::Static(lsn), + (None, true) => Replication::Replica, + (None, false) => Replication::Primary, + (Some(_), true) => anyhow::bail!("cannot specify both lsn and hot-standby"), + }; + + cplane.new_endpoint( + tenant_id, + &endpoint_id, + timeline_id, + port, + pg_version, + replication, + )?; } "start" => { let port: Option = sub_args.get_one::("port").copied(); @@ -637,7 +664,21 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<( None }; + let hot_standby = sub_args + .get_one::("hot-standby") + .copied() + .unwrap_or(false); + if let Some(endpoint) = endpoint { + match (&endpoint.replication, hot_standby) { + (Replication::Static(_), true) => { + bail!("Cannot start a node in hot standby mode when it is already configured as a static replica") + } + (Replication::Primary, true) => { + bail!("Cannot start a node as a hot standby replica, it is already configured as primary node") + } + _ => {} + } println!("Starting existing endpoint {endpoint_id}..."); endpoint.start(&auth_token)?; } else { @@ -659,6 +700,14 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<( .get_one::("pg-version") .copied() .context("Failed to `pg-version` from the argument string")?; + + let replication = match (lsn, hot_standby) { + (Some(lsn), false) => Replication::Static(lsn), + (None, true) => Replication::Replica, + (None, false) => Replication::Primary, + (Some(_), true) => anyhow::bail!("cannot specify both lsn and hot-standby"), + }; + // when used with custom port this results in non obvious behaviour // port is remembered from first start command, i e // start --port X @@ -670,9 +719,9 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<( tenant_id, endpoint_id, timeline_id, - lsn, port, pg_version, + replication, )?; ep.start(&auth_token)?; } @@ -928,6 +977,12 @@ fn cli() -> Command { .help("Specify Lsn on the timeline to start from. By default, end of the timeline would be used.") .required(false); + let hot_standby_arg = Arg::new("hot-standby") + .value_parser(value_parser!(bool)) + .long("hot-standby") + .help("If set, the node will be a hot replica on the specified timeline") + .required(false); + Command::new("Neon CLI") .arg_required_else_help(true) .version(GIT_VERSION) @@ -1052,6 +1107,7 @@ fn cli() -> Command { .long("config-only") .required(false)) .arg(pg_version_arg.clone()) + .arg(hot_standby_arg.clone()) ) .subcommand(Command::new("start") .about("Start postgres.\n If the endpoint doesn't exist yet, it is created.") @@ -1062,6 +1118,7 @@ fn cli() -> Command { .arg(lsn_arg) .arg(port_arg) .arg(pg_version_arg) + .arg(hot_standby_arg) ) .subcommand( Command::new("stop") diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs index 9e85138e68..7d3485518f 100644 --- a/control_plane/src/endpoint.rs +++ b/control_plane/src/endpoint.rs @@ -68,18 +68,19 @@ impl ComputeControlPlane { tenant_id: TenantId, name: &str, timeline_id: TimelineId, - lsn: Option, port: Option, pg_version: u32, + replication: Replication, ) -> Result> { let port = port.unwrap_or_else(|| self.get_port()); + let ep = Arc::new(Endpoint { name: name.to_owned(), address: SocketAddr::new("127.0.0.1".parse().unwrap(), port), env: self.env.clone(), pageserver: Arc::clone(&self.pageserver), timeline_id, - lsn, + replication, tenant_id, pg_version, }); @@ -95,6 +96,18 @@ impl ComputeControlPlane { /////////////////////////////////////////////////////////////////////////////// +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum Replication { + // Regular read-write node + Primary, + // if recovery_target_lsn is provided, and we want to pin the node to a specific LSN + Static(Lsn), + // Hot standby; read-only replica. + // Future versions may want to distinguish between replicas with hot standby + // feedback and other kinds of replication configurations. + Replica, +} + #[derive(Debug)] pub struct Endpoint { /// used as the directory name @@ -102,7 +115,7 @@ pub struct Endpoint { pub tenant_id: TenantId, pub timeline_id: TimelineId, // Some(lsn) if this is a read-only endpoint anchored at 'lsn'. None for the primary. - pub lsn: Option, + pub replication: Replication, // port and address of the Postgres server pub address: SocketAddr, @@ -153,9 +166,17 @@ impl Endpoint { fs::read_to_string(pg_version_path).unwrap_or_else(|_| DEFAULT_PG_VERSION.to_string()); let pg_version = u32::from_str(&pg_version_str)?; - // parse recovery_target_lsn, if any - let recovery_target_lsn: Option = - conf.parse_field_optional("recovery_target_lsn", &context)?; + // parse recovery_target_lsn and primary_conninfo into Recovery Target, if any + let replication = if let Some(lsn_str) = conf.get("recovery_target_lsn") { + Replication::Static(Lsn::from_str(lsn_str)?) + } else if let Some(slot_name) = conf.get("primary_slot_name") { + let slot_name = slot_name.to_string(); + let prefix = format!("repl_{}_", timeline_id); + assert!(slot_name.starts_with(&prefix)); + Replication::Replica + } else { + Replication::Primary + }; // ok now Ok(Endpoint { @@ -164,7 +185,7 @@ impl Endpoint { env: env.clone(), pageserver: Arc::clone(pageserver), timeline_id, - lsn: recovery_target_lsn, + replication, tenant_id, pg_version, }) @@ -299,50 +320,83 @@ impl Endpoint { conf.append("neon.pageserver_connstring", &pageserver_connstr); conf.append("neon.tenant_id", &self.tenant_id.to_string()); conf.append("neon.timeline_id", &self.timeline_id.to_string()); - if let Some(lsn) = self.lsn { - conf.append("recovery_target_lsn", &lsn.to_string()); - } conf.append_line(""); - // Configure backpressure - // - Replication write lag depends on how fast the walreceiver can process incoming WAL. - // This lag determines latency of get_page_at_lsn. Speed of applying WAL is about 10MB/sec, - // so to avoid expiration of 1 minute timeout, this lag should not be larger than 600MB. - // Actually latency should be much smaller (better if < 1sec). But we assume that recently - // updates pages are not requested from pageserver. - // - Replication flush lag depends on speed of persisting data by checkpointer (creation of - // delta/image layers) and advancing disk_consistent_lsn. Safekeepers are able to - // remove/archive WAL only beyond disk_consistent_lsn. Too large a lag can cause long - // recovery time (in case of pageserver crash) and disk space overflow at safekeepers. - // - Replication apply lag depends on speed of uploading changes to S3 by uploader thread. - // To be able to restore database in case of pageserver node crash, safekeeper should not - // remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers - // (if they are not able to upload WAL to S3). - conf.append("max_replication_write_lag", "15MB"); - conf.append("max_replication_flush_lag", "10GB"); + // Replication-related configurations, such as WAL sending + match &self.replication { + Replication::Primary => { + // Configure backpressure + // - Replication write lag depends on how fast the walreceiver can process incoming WAL. + // This lag determines latency of get_page_at_lsn. Speed of applying WAL is about 10MB/sec, + // so to avoid expiration of 1 minute timeout, this lag should not be larger than 600MB. + // Actually latency should be much smaller (better if < 1sec). But we assume that recently + // updates pages are not requested from pageserver. + // - Replication flush lag depends on speed of persisting data by checkpointer (creation of + // delta/image layers) and advancing disk_consistent_lsn. Safekeepers are able to + // remove/archive WAL only beyond disk_consistent_lsn. Too large a lag can cause long + // recovery time (in case of pageserver crash) and disk space overflow at safekeepers. + // - Replication apply lag depends on speed of uploading changes to S3 by uploader thread. + // To be able to restore database in case of pageserver node crash, safekeeper should not + // remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers + // (if they are not able to upload WAL to S3). + conf.append("max_replication_write_lag", "15MB"); + conf.append("max_replication_flush_lag", "10GB"); - if !self.env.safekeepers.is_empty() { - // Configure Postgres to connect to the safekeepers - conf.append("synchronous_standby_names", "walproposer"); + if !self.env.safekeepers.is_empty() { + // Configure Postgres to connect to the safekeepers + conf.append("synchronous_standby_names", "walproposer"); - let safekeepers = self - .env - .safekeepers - .iter() - .map(|sk| format!("localhost:{}", sk.pg_port)) - .collect::>() - .join(","); - conf.append("neon.safekeepers", &safekeepers); - } else { - // We only use setup without safekeepers for tests, - // and don't care about data durability on pageserver, - // so set more relaxed synchronous_commit. - conf.append("synchronous_commit", "remote_write"); + let safekeepers = self + .env + .safekeepers + .iter() + .map(|sk| format!("localhost:{}", sk.pg_port)) + .collect::>() + .join(","); + conf.append("neon.safekeepers", &safekeepers); + } else { + // We only use setup without safekeepers for tests, + // and don't care about data durability on pageserver, + // so set more relaxed synchronous_commit. + conf.append("synchronous_commit", "remote_write"); - // Configure the node to stream WAL directly to the pageserver - // This isn't really a supported configuration, but can be useful for - // testing. - conf.append("synchronous_standby_names", "pageserver"); + // Configure the node to stream WAL directly to the pageserver + // This isn't really a supported configuration, but can be useful for + // testing. + conf.append("synchronous_standby_names", "pageserver"); + } + } + Replication::Static(lsn) => { + conf.append("recovery_target_lsn", &lsn.to_string()); + } + Replication::Replica => { + assert!(!self.env.safekeepers.is_empty()); + + // TODO: use future host field from safekeeper spec + // Pass the list of safekeepers to the replica so that it can connect to any of them, + // whichever is availiable. + let sk_ports = self + .env + .safekeepers + .iter() + .map(|x| x.pg_port.to_string()) + .collect::>() + .join(","); + let sk_hosts = vec!["localhost"; self.env.safekeepers.len()].join(","); + + let connstr = format!( + "host={} port={} options='-c timeline_id={} tenant_id={}' application_name=replica replication=true", + sk_hosts, + sk_ports, + &self.timeline_id.to_string(), + &self.tenant_id.to_string(), + ); + + let slot_name = format!("repl_{}_", self.timeline_id); + conf.append("primary_conninfo", connstr.as_str()); + conf.append("primary_slot_name", slot_name.as_str()); + conf.append("hot_standby", "on"); + } } let mut file = File::create(self.pgdata().join("postgresql.conf"))?; @@ -355,21 +409,27 @@ impl Endpoint { } fn load_basebackup(&self, auth_token: &Option) -> Result<()> { - let backup_lsn = if let Some(lsn) = self.lsn { - Some(lsn) - } else if !self.env.safekeepers.is_empty() { - // LSN 0 means that it is bootstrap and we need to download just - // latest data from the pageserver. That is a bit clumsy but whole bootstrap - // procedure evolves quite actively right now, so let's think about it again - // when things would be more stable (TODO). - let lsn = self.sync_safekeepers(auth_token, self.pg_version)?; - if lsn == Lsn(0) { - None - } else { - Some(lsn) + let backup_lsn = match &self.replication { + Replication::Primary => { + if !self.env.safekeepers.is_empty() { + // LSN 0 means that it is bootstrap and we need to download just + // latest data from the pageserver. That is a bit clumsy but whole bootstrap + // procedure evolves quite actively right now, so let's think about it again + // when things would be more stable (TODO). + let lsn = self.sync_safekeepers(auth_token, self.pg_version)?; + if lsn == Lsn(0) { + None + } else { + Some(lsn) + } + } else { + None + } + } + Replication::Static(lsn) => Some(*lsn), + Replication::Replica => { + None // Take the latest snapshot available to start with } - } else { - None }; self.do_basebackup(backup_lsn)?; @@ -466,7 +526,7 @@ impl Endpoint { // 3. Load basebackup self.load_basebackup(auth_token)?; - if self.lsn.is_some() { + if self.replication != Replication::Primary { File::create(self.pgdata().join("standby.signal"))?; } diff --git a/control_plane/src/postgresql_conf.rs b/control_plane/src/postgresql_conf.rs index 34dc769e78..638575eb82 100644 --- a/control_plane/src/postgresql_conf.rs +++ b/control_plane/src/postgresql_conf.rs @@ -13,7 +13,7 @@ use std::io::BufRead; use std::str::FromStr; /// In-memory representation of a postgresql.conf file -#[derive(Default)] +#[derive(Default, Debug)] pub struct PostgresConf { lines: Vec, hash: HashMap, diff --git a/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json b/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json index 10ae0b0ecf..565e5e368e 100644 --- a/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json +++ b/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json @@ -28,11 +28,6 @@ "value": "replica", "vartype": "enum" }, - { - "name": "hot_standby", - "value": "on", - "vartype": "bool" - }, { "name": "wal_log_hints", "value": "on", diff --git a/libs/postgres_ffi/src/lib.rs b/libs/postgres_ffi/src/lib.rs index 492ec9748a..b8eb469cb0 100644 --- a/libs/postgres_ffi/src/lib.rs +++ b/libs/postgres_ffi/src/lib.rs @@ -95,10 +95,13 @@ pub fn generate_wal_segment( segno: u64, system_id: u64, pg_version: u32, + lsn: Lsn, ) -> Result { + assert_eq!(segno, lsn.segment_number(WAL_SEGMENT_SIZE)); + match pg_version { - 14 => v14::xlog_utils::generate_wal_segment(segno, system_id), - 15 => v15::xlog_utils::generate_wal_segment(segno, system_id), + 14 => v14::xlog_utils::generate_wal_segment(segno, system_id, lsn), + 15 => v15::xlog_utils::generate_wal_segment(segno, system_id, lsn), _ => Err(SerializeError::BadInput), } } diff --git a/libs/postgres_ffi/src/pg_constants.rs b/libs/postgres_ffi/src/pg_constants.rs index 09678353af..6bc89ed37e 100644 --- a/libs/postgres_ffi/src/pg_constants.rs +++ b/libs/postgres_ffi/src/pg_constants.rs @@ -195,6 +195,7 @@ pub const FIRST_NORMAL_OBJECT_ID: u32 = 16384; pub const XLOG_CHECKPOINT_SHUTDOWN: u8 = 0x00; pub const XLOG_CHECKPOINT_ONLINE: u8 = 0x10; +pub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001; pub const XLP_LONG_HEADER: u16 = 0x0002; /* From fsm_internals.h */ diff --git a/libs/postgres_ffi/src/xlog_utils.rs b/libs/postgres_ffi/src/xlog_utils.rs index 272c4d6dcc..8ed00a9e13 100644 --- a/libs/postgres_ffi/src/xlog_utils.rs +++ b/libs/postgres_ffi/src/xlog_utils.rs @@ -270,6 +270,11 @@ impl XLogPageHeaderData { use utils::bin_ser::LeSer; XLogPageHeaderData::des_from(&mut buf.reader()) } + + pub fn encode(&self) -> Result { + use utils::bin_ser::LeSer; + self.ser().map(|b| b.into()) + } } impl XLogLongPageHeaderData { @@ -328,22 +333,32 @@ impl CheckPoint { } } -// -// Generate new, empty WAL segment. -// We need this segment to start compute node. -// -pub fn generate_wal_segment(segno: u64, system_id: u64) -> Result { +/// Generate new, empty WAL segment, with correct block headers at the first +/// page of the segment and the page that contains the given LSN. +/// We need this segment to start compute node. +pub fn generate_wal_segment(segno: u64, system_id: u64, lsn: Lsn) -> Result { let mut seg_buf = BytesMut::with_capacity(WAL_SEGMENT_SIZE); let pageaddr = XLogSegNoOffsetToRecPtr(segno, 0, WAL_SEGMENT_SIZE); + + let page_off = lsn.block_offset(); + let seg_off = lsn.segment_offset(WAL_SEGMENT_SIZE); + + let first_page_only = seg_off < XLOG_BLCKSZ; + let (shdr_rem_len, infoflags) = if first_page_only { + (seg_off, pg_constants::XLP_FIRST_IS_CONTRECORD) + } else { + (0, 0) + }; + let hdr = XLogLongPageHeaderData { std: { XLogPageHeaderData { xlp_magic: XLOG_PAGE_MAGIC as u16, - xlp_info: pg_constants::XLP_LONG_HEADER, + xlp_info: pg_constants::XLP_LONG_HEADER | infoflags, xlp_tli: PG_TLI, xlp_pageaddr: pageaddr, - xlp_rem_len: 0, + xlp_rem_len: shdr_rem_len as u32, ..Default::default() // Put 0 in padding fields. } }, @@ -357,9 +372,37 @@ pub fn generate_wal_segment(segno: u64, system_id: u64) -> Result= pg_constants::SIZE_OF_PAGE_HEADER as u64 { + pg_constants::XLP_FIRST_IS_CONTRECORD + } else { + 0 + }, + xlp_tli: PG_TLI, + xlp_pageaddr: lsn.page_lsn().0, + xlp_rem_len: if page_off >= pg_constants::SIZE_OF_PAGE_HEADER as u64 { + page_off as u32 + } else { + 0u32 + }, + ..Default::default() // Put 0 in padding fields. + }; + let hdr_bytes = header.encode()?; + + debug_assert!(seg_buf.len() > block_offset + hdr_bytes.len()); + debug_assert_ne!(block_offset, 0); + + seg_buf[block_offset..block_offset + hdr_bytes.len()].copy_from_slice(&hdr_bytes[..]); + } + Ok(seg_buf.freeze()) } + #[repr(C)] #[derive(Serialize)] struct XlLogicalMessage { diff --git a/libs/utils/src/lsn.rs b/libs/utils/src/lsn.rs index acf5ea28d7..0493d43088 100644 --- a/libs/utils/src/lsn.rs +++ b/libs/utils/src/lsn.rs @@ -62,29 +62,48 @@ impl Lsn { } /// Compute the offset into a segment + #[inline] pub fn segment_offset(self, seg_sz: usize) -> usize { (self.0 % seg_sz as u64) as usize } /// Compute LSN of the segment start. + #[inline] pub fn segment_lsn(self, seg_sz: usize) -> Lsn { Lsn(self.0 - (self.0 % seg_sz as u64)) } /// Compute the segment number + #[inline] pub fn segment_number(self, seg_sz: usize) -> u64 { self.0 / seg_sz as u64 } /// Compute the offset into a block + #[inline] pub fn block_offset(self) -> u64 { const BLCKSZ: u64 = XLOG_BLCKSZ as u64; self.0 % BLCKSZ } + /// Compute the block offset of the first byte of this Lsn within this + /// segment + #[inline] + pub fn page_lsn(self) -> Lsn { + Lsn(self.0 - self.block_offset()) + } + + /// Compute the block offset of the first byte of this Lsn within this + /// segment + #[inline] + pub fn page_offset_in_segment(self, seg_sz: usize) -> u64 { + (self.0 - self.block_offset()) - self.segment_lsn(seg_sz).0 + } + /// Compute the bytes remaining in this block /// /// If the LSN is already at the block boundary, it will return `XLOG_BLCKSZ`. + #[inline] pub fn remaining_in_block(self) -> u64 { const BLCKSZ: u64 = XLOG_BLCKSZ as u64; BLCKSZ - (self.0 % BLCKSZ) diff --git a/pageserver/src/basebackup.rs b/pageserver/src/basebackup.rs index 41fa0a67bb..c666fc785c 100644 --- a/pageserver/src/basebackup.rs +++ b/pageserver/src/basebackup.rs @@ -463,9 +463,13 @@ where let wal_file_path = format!("pg_wal/{}", wal_file_name); let header = new_tar_header(&wal_file_path, WAL_SEGMENT_SIZE as u64)?; - let wal_seg = - postgres_ffi::generate_wal_segment(segno, system_identifier, self.timeline.pg_version) - .map_err(|e| anyhow!(e).context("Failed generating wal segment"))?; + let wal_seg = postgres_ffi::generate_wal_segment( + segno, + system_identifier, + self.timeline.pg_version, + self.lsn, + ) + .map_err(|e| anyhow!(e).context("Failed generating wal segment"))?; ensure!(wal_seg.len() == WAL_SEGMENT_SIZE); self.ar.append(&header, &wal_seg[..]).await?; Ok(()) diff --git a/pgxn/neon/file_cache.c b/pgxn/neon/file_cache.c index 8dff259f02..cc46fb5a25 100644 --- a/pgxn/neon/file_cache.c +++ b/pgxn/neon/file_cache.c @@ -370,6 +370,74 @@ lfc_cache_contains(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno) return found; } +/* + * Evict a page (if present) from the local file cache + */ +void +lfc_evict(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno) +{ + BufferTag tag; + FileCacheEntry* entry; + ssize_t rc; + bool found; + int chunk_offs = blkno & (BLOCKS_PER_CHUNK-1); + uint32 hash; + + if (lfc_size_limit == 0) /* fast exit if file cache is disabled */ + return; + + INIT_BUFFERTAG(tag, rnode, forkNum, (blkno & ~(BLOCKS_PER_CHUNK-1))); + + hash = get_hash_value(lfc_hash, &tag); + + LWLockAcquire(lfc_lock, LW_EXCLUSIVE); + entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, &found); + + if (!found) + { + /* nothing to do */ + LWLockRelease(lfc_lock); + return; + } + + /* remove the page from the cache */ + entry->bitmap[chunk_offs >> 5] &= ~(1 << (chunk_offs & (32 - 1))); + + /* + * If the chunk has no live entries, we can position the chunk to be + * recycled first. + */ + if (entry->bitmap[chunk_offs >> 5] == 0) + { + bool has_remaining_pages; + + for (int i = 0; i < (BLOCKS_PER_CHUNK / 32); i++) { + if (entry->bitmap[i] != 0) + { + has_remaining_pages = true; + break; + } + } + + /* + * Put the entry at the position that is first to be reclaimed when + * we have no cached pages remaining in the chunk + */ + if (!has_remaining_pages) + { + dlist_delete(&entry->lru_node); + dlist_push_head(&lfc_ctl->lru, &entry->lru_node); + } + } + + /* + * Done: apart from empty chunks, we don't move chunks in the LRU when + * they're empty because eviction isn't usage. + */ + + LWLockRelease(lfc_lock); +} + /* * Try to read page from local cache. * Returns true if page is found in local cache. @@ -528,7 +596,6 @@ lfc_write(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno, LWLockRelease(lfc_lock); } - /* * Record structure holding the to be exposed cache data. */ diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c index c44e8fcda5..21330c018f 100644 --- a/pgxn/neon/libpagestore.c +++ b/pgxn/neon/libpagestore.c @@ -17,6 +17,8 @@ #include "pagestore_client.h" #include "fmgr.h" #include "access/xlog.h" +#include "access/xlogutils.h" +#include "storage/buf_internals.h" #include "libpq-fe.h" #include "libpq/pqformat.h" @@ -57,6 +59,8 @@ int n_unflushed_requests = 0; int flush_every_n_requests = 8; int readahead_buffer_size = 128; +bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id) = NULL; + static void pageserver_flush(void); static bool @@ -467,6 +471,8 @@ pg_init_libpagestore(void) smgr_hook = smgr_neon; smgr_init_hook = smgr_init_neon; dbsize_hook = neon_dbsize; + old_redo_read_buffer_filter = redo_read_buffer_filter; + redo_read_buffer_filter = neon_redo_read_buffer_filter; } lfc_init(); } diff --git a/pgxn/neon/neon.c b/pgxn/neon/neon.c index 5c98902554..217c1974a0 100644 --- a/pgxn/neon/neon.c +++ b/pgxn/neon/neon.c @@ -24,6 +24,7 @@ #include "neon.h" #include "walproposer.h" +#include "pagestore_client.h" PG_MODULE_MAGIC; void _PG_init(void); diff --git a/pgxn/neon/neon.h b/pgxn/neon/neon.h index 3eac8f4570..60d321a945 100644 --- a/pgxn/neon/neon.h +++ b/pgxn/neon/neon.h @@ -11,6 +11,7 @@ #ifndef NEON_H #define NEON_H +#include "access/xlogreader.h" /* GUCs */ extern char *neon_auth_token; @@ -20,4 +21,11 @@ extern char *neon_tenant; extern void pg_init_libpagestore(void); extern void pg_init_walproposer(void); +/* + * Returns true if we shouldn't do REDO on that block in record indicated by + * block_id; false otherwise. + */ +extern bool neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id); +extern bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id); + #endif /* NEON_H */ diff --git a/pgxn/neon/pagestore_client.h b/pgxn/neon/pagestore_client.h index a1f05ac685..22f5cdb73a 100644 --- a/pgxn/neon/pagestore_client.h +++ b/pgxn/neon/pagestore_client.h @@ -207,6 +207,7 @@ extern void forget_cached_relsize(RelFileNode rnode, ForkNumber forknum); extern void lfc_write(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno, char *buffer); extern bool lfc_read(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno, char *buffer); extern bool lfc_cache_contains(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno); +extern void lfc_evict(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno); extern void lfc_init(void); diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c index 5b30641856..528d4eb051 100644 --- a/pgxn/neon/pagestore_smgr.c +++ b/pgxn/neon/pagestore_smgr.c @@ -189,6 +189,7 @@ typedef struct PrfHashEntry { #define SH_DEFINE #define SH_DECLARE #include "lib/simplehash.h" +#include "neon.h" /* * PrefetchState maintains the state of (prefetch) getPage@LSN requests. @@ -1209,6 +1210,9 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, ch if (ShutdownRequestPending) return; + /* Don't log any pages if we're not allowed to do so. */ + if (!XLogInsertAllowed()) + return; /* * Whenever a VM or FSM page is evicted, WAL-log it. FSM and (some) VM @@ -1375,8 +1379,18 @@ neon_get_request_lsn(bool *latest, RelFileNode rnode, ForkNumber forknum, BlockN if (RecoveryInProgress()) { + /* + * We don't know if WAL has been generated but not yet replayed, so + * we're conservative in our estimates about latest pages. + */ *latest = false; - lsn = GetXLogReplayRecPtr(NULL); + + /* + * Get the last written LSN of this page. + */ + lsn = GetLastWrittenLSN(rnode, forknum, blkno); + lsn = nm_adjust_lsn(lsn); + elog(DEBUG1, "neon_get_request_lsn GetXLogReplayRecPtr %X/%X request lsn 0 ", (uint32) ((lsn) >> 32), (uint32) (lsn)); } @@ -1559,6 +1573,15 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo) /* * Newly created relation is empty, remember that in the relsize cache. * + * Note that in REDO, this is called to make sure the relation fork exists, + * but it does not truncate the relation. So, we can only update the + * relsize if it didn't exist before. + * + * Also, in redo, we must make sure to update the cached size of the + * relation, as that is the primary source of truth for REDO's + * file length considerations, and as file extension isn't (perfectly) + * logged, we need to take care of that before we hit file size checks. + * * FIXME: This is currently not just an optimization, but required for * correctness. Postgres can call smgrnblocks() on the newly-created * relation. Currently, we don't call SetLastWrittenLSN() when a new @@ -1566,7 +1589,14 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo) * cache, we might call smgrnblocks() on the newly-created relation before * the creation WAL record hass been received by the page server. */ - set_cached_relsize(reln->smgr_rnode.node, forkNum, 0); + if (isRedo) + { + update_cached_relsize(reln->smgr_rnode.node, forkNum, 0); + get_cached_relsize(reln->smgr_rnode.node, forkNum, + &reln->smgr_cached_nblocks[forkNum]); + } + else + set_cached_relsize(reln->smgr_rnode.node, forkNum, 0); #ifdef DEBUG_COMPARE_LOCAL if (IS_LOCAL_REL(reln)) @@ -1831,6 +1861,26 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno, .blockNum = blkno, }; + /* + * The redo process does not lock pages that it needs to replay but are + * not in the shared buffers, so a concurrent process may request the + * page after redo has decided it won't redo that page and updated the + * LwLSN for that page. + * If we're in hot standby we need to take care that we don't return + * until after REDO has finished replaying up to that LwLSN, as the page + * should have been locked up to that point. + * + * See also the description on neon_redo_read_buffer_filter below. + * + * NOTE: It is possible that the WAL redo process will still do IO due to + * concurrent failed read IOs. Those IOs should never have a request_lsn + * that is as large as the WAL record we're currently replaying, if it + * weren't for the behaviour of the LwLsn cache that uses the highest + * value of the LwLsn cache when the entry is not found. + */ + if (RecoveryInProgress() && !(MyBackendType == B_STARTUP)) + XLogWaitForReplayOf(request_lsn); + /* * Try to find prefetched page in the list of received pages. */ @@ -2584,3 +2634,143 @@ smgr_init_neon(void) smgr_init_standard(); neon_init(); } + + +/* + * Return whether we can skip the redo for this block. + * + * The conditions for skipping the IO are: + * + * - The block is not in the shared buffers, and + * - The block is not in the local file cache + * + * ... because any subsequent read of the page requires us to read + * the new version of the page from the PageServer. We do not + * check the local file cache; we instead evict the page from LFC: it + * is cheaper than going through the FS calls to read the page, and + * limits the number of lock operations used in the REDO process. + * + * We have one exception to the rules for skipping IO: We always apply + * changes to shared catalogs' pages. Although this is mostly out of caution, + * catalog updates usually result in backends rebuilding their catalog snapshot, + * which means it's quite likely the modified page is going to be used soon. + * + * It is important to note that skipping WAL redo for a page also means + * the page isn't locked by the redo process, as there is no Buffer + * being returned, nor is there a buffer descriptor to lock. + * This means that any IO that wants to read this block needs to wait + * for the WAL REDO process to finish processing the WAL record before + * it allows the system to start reading the block, as releasing the + * block early could lead to phantom reads. + * + * For example, REDO for a WAL record that modifies 3 blocks could skip + * the first block, wait for a lock on the second, and then modify the + * third block. Without skipping, all blocks would be locked and phantom + * reads would not occur, but with skipping, a concurrent process could + * read block 1 with post-REDO contents and read block 3 with pre-REDO + * contents, where with REDO locking it would wait on block 1 and see + * block 3 with post-REDO contents only. + */ +bool +neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id) +{ + XLogRecPtr end_recptr = record->EndRecPtr; + XLogRecPtr prev_end_recptr = record->ReadRecPtr - 1; + RelFileNode rnode; + ForkNumber forknum; + BlockNumber blkno; + BufferTag tag; + uint32 hash; + LWLock *partitionLock; + Buffer buffer; + bool no_redo_needed; + BlockNumber relsize; + + if (old_redo_read_buffer_filter && old_redo_read_buffer_filter(record, block_id)) + return true; + +#if PG_VERSION_NUM < 150000 + if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno)) + elog(PANIC, "failed to locate backup block with ID %d", block_id); +#else + XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno); +#endif + + /* + * Out of an abundance of caution, we always run redo on shared catalogs, + * regardless of whether the block is stored in shared buffers. + * See also this function's top comment. + */ + if (!OidIsValid(rnode.dbNode)) + return false; + + INIT_BUFFERTAG(tag, rnode, forknum, blkno); + hash = BufTableHashCode(&tag); + partitionLock = BufMappingPartitionLock(hash); + + /* + * Lock the partition of shared_buffers so that it can't be updated + * concurrently. + */ + LWLockAcquire(partitionLock, LW_SHARED); + + /* Try to find the relevant buffer */ + buffer = BufTableLookup(&tag, hash); + + no_redo_needed = buffer < 0; + + /* we don't have the buffer in memory, update lwLsn past this record */ + if (no_redo_needed) + { + SetLastWrittenLSNForBlock(end_recptr, rnode, forknum, blkno); + lfc_evict(rnode, forknum, blkno); + } + else + { + SetLastWrittenLSNForBlock(prev_end_recptr, rnode, forknum, blkno); + } + + LWLockRelease(partitionLock); + + /* Extend the relation if we know its size */ + if (get_cached_relsize(rnode, forknum, &relsize)) + { + if (relsize < blkno + 1) + update_cached_relsize(rnode, forknum, blkno + 1); + } + else + { + /* + * Size was not cached. We populate the cache now, with the size of the + * relation measured after this WAL record is applied. + * + * This length is later reused when we open the smgr to read the block, + * which is fine and expected. + */ + + NeonResponse *response; + NeonNblocksResponse *nbresponse; + NeonNblocksRequest request = { + .req = (NeonRequest) { + .lsn = end_recptr, + .latest = false, + .tag = T_NeonNblocksRequest, + }, + .rnode = rnode, + .forknum = forknum, + }; + + response = page_server_request(&request); + + Assert(response->tag == T_NeonNblocksResponse); + nbresponse = (NeonNblocksResponse *) response; + + Assert(nbresponse->n_blocks > blkno); + + set_cached_relsize(rnode, forknum, nbresponse->n_blocks); + + elog(SmgrTrace, "Set length to %d", nbresponse->n_blocks); + } + + return no_redo_needed; +} diff --git a/pgxn/neon/walproposer.c b/pgxn/neon/walproposer.c index 45037a8c01..a99be40955 100644 --- a/pgxn/neon/walproposer.c +++ b/pgxn/neon/walproposer.c @@ -1964,18 +1964,26 @@ CombineHotStanbyFeedbacks(HotStandbyFeedback * hs) { if (safekeeper[i].appendResponse.hs.ts != 0) { - if (FullTransactionIdPrecedes(safekeeper[i].appendResponse.hs.xmin, hs->xmin)) + HotStandbyFeedback *skhs = &safekeeper[i].appendResponse.hs; + if (FullTransactionIdIsNormal(skhs->xmin) + && FullTransactionIdPrecedes(skhs->xmin, hs->xmin)) { - hs->xmin = safekeeper[i].appendResponse.hs.xmin; - hs->ts = safekeeper[i].appendResponse.hs.ts; + hs->xmin = skhs->xmin; + hs->ts = skhs->ts; } - if (FullTransactionIdPrecedes(safekeeper[i].appendResponse.hs.catalog_xmin, hs->catalog_xmin)) + if (FullTransactionIdIsNormal(skhs->catalog_xmin) + && FullTransactionIdPrecedes(skhs->catalog_xmin, hs->xmin)) { - hs->catalog_xmin = safekeeper[i].appendResponse.hs.catalog_xmin; - hs->ts = safekeeper[i].appendResponse.hs.ts; + hs->catalog_xmin = skhs->catalog_xmin; + hs->ts = skhs->ts; } } } + + if (hs->xmin.value == ~0) + hs->xmin = InvalidFullTransactionId; + if (hs->catalog_xmin.value == ~0) + hs->catalog_xmin = InvalidFullTransactionId; } /* diff --git a/safekeeper/src/handler.rs b/safekeeper/src/handler.rs index a589fe1869..2c3d1cea0e 100644 --- a/safekeeper/src/handler.rs +++ b/safekeeper/src/handler.rs @@ -3,6 +3,7 @@ use anyhow::Context; use std::str; +use std::str::FromStr; use tokio::io::{AsyncRead, AsyncWrite}; use tracing::{info, info_span, Instrument}; @@ -49,12 +50,14 @@ fn parse_cmd(cmd: &str) -> anyhow::Result { if cmd.starts_with("START_WAL_PUSH") { Ok(SafekeeperPostgresCommand::StartWalPush) } else if cmd.starts_with("START_REPLICATION") { - let re = - Regex::new(r"START_REPLICATION(?: PHYSICAL)? ([[:xdigit:]]+/[[:xdigit:]]+)").unwrap(); + let re = Regex::new( + r"START_REPLICATION(?: SLOT [^ ]+)?(?: PHYSICAL)? ([[:xdigit:]]+/[[:xdigit:]]+)", + ) + .unwrap(); let mut caps = re.captures_iter(cmd); let start_lsn = caps .next() - .map(|cap| cap[1].parse::()) + .map(|cap| Lsn::from_str(&cap[1])) .context("parse start LSN from START_REPLICATION command")??; Ok(SafekeeperPostgresCommand::StartReplication { start_lsn }) } else if cmd.starts_with("IDENTIFY_SYSTEM") { diff --git a/safekeeper/src/wal_storage.rs b/safekeeper/src/wal_storage.rs index 9b385630c2..54e27714ea 100644 --- a/safekeeper/src/wal_storage.rs +++ b/safekeeper/src/wal_storage.rs @@ -18,6 +18,7 @@ use postgres_ffi::v14::xlog_utils::{IsPartialXLogFileName, IsXLogFileName, XLogF use postgres_ffi::{XLogSegNo, PG_TLI}; use std::cmp::{max, min}; +use bytes::Bytes; use std::fs::{self, remove_file, File, OpenOptions}; use std::io::Write; use std::path::{Path, PathBuf}; @@ -36,6 +37,7 @@ use postgres_ffi::XLOG_BLCKSZ; use postgres_ffi::waldecoder::WalStreamDecoder; +use pq_proto::SystemId; use tokio::io::{AsyncReadExt, AsyncSeekExt}; pub trait Storage { @@ -478,6 +480,13 @@ pub struct WalReader { // We don't have WAL locally if LSN is less than local_start_lsn local_start_lsn: Lsn, + // We will respond with zero-ed bytes before this Lsn as long as + // pos is in the same segment as timeline_start_lsn. + timeline_start_lsn: Lsn, + // integer version number of PostgreSQL, e.g. 14; 15; 16 + pg_version: u32, + system_id: SystemId, + timeline_start_segment: Option, } impl WalReader { @@ -488,19 +497,27 @@ impl WalReader { start_pos: Lsn, enable_remote_read: bool, ) -> Result { - if start_pos < state.timeline_start_lsn { + if state.server.wal_seg_size == 0 || state.local_start_lsn == Lsn(0) { + bail!("state uninitialized, no data to read"); + } + + // TODO: Upgrade to bail!() once we know this couldn't possibly happen + if state.timeline_start_lsn == Lsn(0) { + warn!("timeline_start_lsn uninitialized before initializing wal reader"); + } + + if start_pos + < state + .timeline_start_lsn + .segment_lsn(state.server.wal_seg_size as usize) + { bail!( - "Requested streaming from {}, which is before the start of the timeline {}", + "Requested streaming from {}, which is before the start of the timeline {}, and also doesn't start at the first segment of that timeline", start_pos, state.timeline_start_lsn ); } - // TODO: add state.timeline_start_lsn == Lsn(0) check - if state.server.wal_seg_size == 0 || state.local_start_lsn == Lsn(0) { - bail!("state uninitialized, no data to read"); - } - Ok(Self { workdir, timeline_dir, @@ -509,10 +526,65 @@ impl WalReader { wal_segment: None, enable_remote_read, local_start_lsn: state.local_start_lsn, + timeline_start_lsn: state.timeline_start_lsn, + pg_version: state.server.pg_version / 10000, + system_id: state.server.system_id, + timeline_start_segment: None, }) } pub async fn read(&mut self, buf: &mut [u8]) -> Result { + // If this timeline is new, we may not have a full segment yet, so + // we pad the first bytes of the timeline's first WAL segment with 0s + if self.pos < self.timeline_start_lsn { + debug_assert_eq!( + self.pos.segment_number(self.wal_seg_size), + self.timeline_start_lsn.segment_number(self.wal_seg_size) + ); + + // All bytes after timeline_start_lsn are in WAL, but those before + // are not, so we manually construct an empty segment for the bytes + // not available in this timeline. + if self.timeline_start_segment.is_none() { + let it = postgres_ffi::generate_wal_segment( + self.timeline_start_lsn.segment_number(self.wal_seg_size), + self.system_id, + self.pg_version, + self.timeline_start_lsn, + )?; + self.timeline_start_segment = Some(it); + } + + assert!(self.timeline_start_segment.is_some()); + let segment = self.timeline_start_segment.take().unwrap(); + + let seg_bytes = &segment[..]; + + // How much of the current segment have we already consumed? + let pos_seg_offset = self.pos.segment_offset(self.wal_seg_size); + + // How many bytes may we consume in total? + let tl_start_seg_offset = self.timeline_start_lsn.segment_offset(self.wal_seg_size); + + debug_assert!(seg_bytes.len() > pos_seg_offset); + debug_assert!(seg_bytes.len() > tl_start_seg_offset); + + // Copy as many bytes as possible into the buffer + let len = (tl_start_seg_offset - pos_seg_offset).min(buf.len()); + buf[0..len].copy_from_slice(&seg_bytes[pos_seg_offset..pos_seg_offset + len]); + + self.pos += len as u64; + + // If we're done with the segment, we can release it's memory. + // However, if we're not yet done, store it so that we don't have to + // construct the segment the next time this function is called. + if self.pos < self.timeline_start_lsn { + self.timeline_start_segment = Some(segment); + } + + return Ok(len); + } + let mut wal_segment = match self.wal_segment.take() { Some(reader) => reader, None => self.open_segment().await?, diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index f209dca560..a46c19d7fd 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1451,6 +1451,7 @@ class NeonCli(AbstractNeonCli): branch_name: str, endpoint_id: Optional[str] = None, tenant_id: Optional[TenantId] = None, + hot_standby: bool = False, lsn: Optional[Lsn] = None, port: Optional[int] = None, ) -> "subprocess.CompletedProcess[str]": @@ -1470,6 +1471,8 @@ class NeonCli(AbstractNeonCli): args.extend(["--port", str(port)]) if endpoint_id is not None: args.append(endpoint_id) + if hot_standby: + args.extend(["--hot-standby", "true"]) res = self.raw_cli(args) res.check_returncode() @@ -2206,6 +2209,7 @@ class Endpoint(PgProtocol): super().__init__(host="localhost", port=port, user="cloud_admin", dbname="postgres") self.env = env self.running = False + self.branch_name: Optional[str] = None # dubious self.endpoint_id: Optional[str] = None # dubious, see asserts below self.pgdata_dir: Optional[str] = None # Path to computenode PGDATA self.tenant_id = tenant_id @@ -2217,6 +2221,7 @@ class Endpoint(PgProtocol): self, branch_name: str, endpoint_id: Optional[str] = None, + hot_standby: bool = False, lsn: Optional[Lsn] = None, config_lines: Optional[List[str]] = None, ) -> "Endpoint": @@ -2231,12 +2236,14 @@ class Endpoint(PgProtocol): if endpoint_id is None: endpoint_id = self.env.generate_endpoint_id() self.endpoint_id = endpoint_id + self.branch_name = branch_name self.env.neon_cli.endpoint_create( branch_name, endpoint_id=self.endpoint_id, tenant_id=self.tenant_id, lsn=lsn, + hot_standby=hot_standby, port=self.port, ) path = Path("endpoints") / self.endpoint_id / "pgdata" @@ -2361,6 +2368,7 @@ class Endpoint(PgProtocol): self, branch_name: str, endpoint_id: Optional[str] = None, + hot_standby: bool = False, lsn: Optional[Lsn] = None, config_lines: Optional[List[str]] = None, ) -> "Endpoint": @@ -2375,6 +2383,7 @@ class Endpoint(PgProtocol): branch_name=branch_name, endpoint_id=endpoint_id, config_lines=config_lines, + hot_standby=hot_standby, lsn=lsn, ).start() @@ -2408,6 +2417,7 @@ class EndpointFactory: endpoint_id: Optional[str] = None, tenant_id: Optional[TenantId] = None, lsn: Optional[Lsn] = None, + hot_standby: bool = False, config_lines: Optional[List[str]] = None, ) -> Endpoint: ep = Endpoint( @@ -2421,6 +2431,7 @@ class EndpointFactory: return ep.create_start( branch_name=branch_name, endpoint_id=endpoint_id, + hot_standby=hot_standby, config_lines=config_lines, lsn=lsn, ) @@ -2431,6 +2442,7 @@ class EndpointFactory: endpoint_id: Optional[str] = None, tenant_id: Optional[TenantId] = None, lsn: Optional[Lsn] = None, + hot_standby: bool = False, config_lines: Optional[List[str]] = None, ) -> Endpoint: ep = Endpoint( @@ -2449,6 +2461,7 @@ class EndpointFactory: branch_name=branch_name, endpoint_id=endpoint_id, lsn=lsn, + hot_standby=hot_standby, config_lines=config_lines, ) @@ -2458,6 +2471,36 @@ class EndpointFactory: return self + def new_replica(self, origin: Endpoint, endpoint_id: str, config_lines: Optional[List[str]]): + branch_name = origin.branch_name + assert origin in self.endpoints + assert branch_name is not None + + return self.create( + branch_name=branch_name, + endpoint_id=endpoint_id, + tenant_id=origin.tenant_id, + lsn=None, + hot_standby=True, + config_lines=config_lines, + ) + + def new_replica_start( + self, origin: Endpoint, endpoint_id: str, config_lines: Optional[List[str]] = None + ): + branch_name = origin.branch_name + assert origin in self.endpoints + assert branch_name is not None + + return self.create_start( + branch_name=branch_name, + endpoint_id=endpoint_id, + tenant_id=origin.tenant_id, + lsn=None, + hot_standby=True, + config_lines=config_lines, + ) + @dataclass class SafekeeperPort: diff --git a/test_runner/regress/test_compute_ctl.py b/test_runner/regress/test_compute_ctl.py index aa99a01c83..d72ffe078d 100644 --- a/test_runner/regress/test_compute_ctl.py +++ b/test_runner/regress/test_compute_ctl.py @@ -59,11 +59,6 @@ def test_sync_safekeepers_logs(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): "value": "replica", "vartype": "enum" }, - { - "name": "hot_standby", - "value": "on", - "vartype": "bool" - }, { "name": "neon.safekeepers", "value": """ diff --git a/test_runner/regress/test_hot_standby.py b/test_runner/regress/test_hot_standby.py new file mode 100644 index 0000000000..12e034cea2 --- /dev/null +++ b/test_runner/regress/test_hot_standby.py @@ -0,0 +1,79 @@ +import pytest +from fixtures.neon_fixtures import NeonEnv + + +@pytest.mark.timeout(1800) +def test_hot_standby(neon_simple_env: NeonEnv): + env = neon_simple_env + + with env.endpoints.create_start( + branch_name="main", + endpoint_id="primary", + ) as primary: + with env.endpoints.new_replica_start(origin=primary, endpoint_id="secondary") as secondary: + primary_lsn = None + cought_up = False + queries = [ + "SHOW neon.timeline_id", + "SHOW neon.tenant_id", + "SELECT relname FROM pg_class WHERE relnamespace = current_schema()::regnamespace::oid", + "SELECT COUNT(*), SUM(i) FROM test", + ] + responses = dict() + + with primary.connect() as p_con: + with p_con.cursor() as p_cur: + p_cur.execute("CREATE TABLE test AS SELECT generate_series(1, 100) AS i") + + # Explicit commit to make sure other connections (and replicas) can + # see the changes of this commit. + p_con.commit() + + with p_con.cursor() as p_cur: + p_cur.execute("SELECT pg_current_wal_insert_lsn()::text") + res = p_cur.fetchone() + assert res is not None + (lsn,) = res + primary_lsn = lsn + + # Explicit commit to make sure other connections (and replicas) can + # see the changes of this commit. + # Note that this may generate more WAL if the transaction has changed + # things, but we don't care about that. + p_con.commit() + + for query in queries: + with p_con.cursor() as p_cur: + p_cur.execute(query) + res = p_cur.fetchone() + assert res is not None + response = res + responses[query] = response + + with secondary.connect() as s_con: + with s_con.cursor() as s_cur: + s_cur.execute("SELECT 1 WHERE pg_is_in_recovery()") + res = s_cur.fetchone() + assert res is not None + + while not cought_up: + with s_con.cursor() as secondary_cursor: + secondary_cursor.execute("SELECT pg_last_wal_replay_lsn()") + res = secondary_cursor.fetchone() + assert res is not None + (secondary_lsn,) = res + # There may be more changes on the primary after we got our LSN + # due to e.g. autovacuum, but that shouldn't impact the content + # of the tables, so we check whether we've replayed up to at + # least after the commit of the `test` table. + cought_up = secondary_lsn >= primary_lsn + + # Explicit commit to flush any transient transaction-level state. + s_con.commit() + + for query in queries: + with s_con.cursor() as secondary_cursor: + secondary_cursor.execute(query) + response = secondary_cursor.fetchone() + assert response is not None + assert response == responses[query] diff --git a/vendor/postgres-v14 b/vendor/postgres-v14 index 3e70693c91..a2daebc6b4 160000 --- a/vendor/postgres-v14 +++ b/vendor/postgres-v14 @@ -1 +1 @@ -Subproject commit 3e70693c9178878404d14a61c96b15b74eb02688 +Subproject commit a2daebc6b445dcbcca9c18e1711f47c1db7ffb04 diff --git a/vendor/postgres-v15 b/vendor/postgres-v15 index 4ad87b0f36..aee72b7be9 160000 --- a/vendor/postgres-v15 +++ b/vendor/postgres-v15 @@ -1 +1 @@ -Subproject commit 4ad87b0f364a2313600c1d9774ca33df00e606f4 +Subproject commit aee72b7be903e52d9bdc6449aa4c17fb852d8708 From 3be81dd36bcda1288ad25ea2ff5d3acd8b26b24f Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Thu, 27 Apr 2023 16:07:25 +0200 Subject: [PATCH 67/77] fix `clippy --release` failure introduced in #4030 (#4095) PR `build: run clippy for powerset of features (#4077)` brought us a `clippy --release` pass. It was merged after #4030, which fails under `clippy --release` with ``` error: static `TENANT_ID_EXTRACTOR` is never used --> pageserver/src/tenant/timeline.rs:4270:16 | 4270 | pub static TENANT_ID_EXTRACTOR: once_cell::sync::Lazy< | ^^^^^^^^^^^^^^^^^^^ | = note: `-D dead-code` implied by `-D warnings` error: static `TIMELINE_ID_EXTRACTOR` is never used --> pageserver/src/tenant/timeline.rs:4276:16 | 4276 | pub static TIMELINE_ID_EXTRACTOR: once_cell::sync::Lazy< | ^^^^^^^^^^^^^^^^^^^^^ ``` A merge queue would have prevented this. --- pageserver/src/tenant/timeline.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 87f03f30b6..5c671ffd63 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -19,7 +19,6 @@ use tokio::sync::{oneshot, watch, Semaphore, TryAcquireError}; use tokio_util::sync::CancellationToken; use tracing::*; use utils::id::TenantTimelineId; -use utils::tracing_span_assert; use std::cmp::{max, min, Ordering}; use std::collections::BinaryHeap; @@ -4265,8 +4264,15 @@ fn rename_to_backup(path: &Path) -> anyhow::Result<()> { bail!("couldn't find an unused backup number for {:?}", path) } +#[cfg(not(debug_assertions))] +#[inline] +pub(crate) fn debug_assert_current_span_has_tenant_and_timeline_id() {} + +#[cfg(debug_assertions)] #[inline] pub(crate) fn debug_assert_current_span_has_tenant_and_timeline_id() { + use utils::tracing_span_assert; + pub static TENANT_ID_EXTRACTOR: once_cell::sync::Lazy< tracing_span_assert::MultiNameExtractor<2>, > = once_cell::sync::Lazy::new(|| { @@ -4279,7 +4285,6 @@ pub(crate) fn debug_assert_current_span_has_tenant_and_timeline_id() { tracing_span_assert::MultiNameExtractor::new("TimelineId", ["timeline_id", "timeline"]) }); - #[cfg(debug_assertions)] match tracing_span_assert::check_fields_present([ &*TENANT_ID_EXTRACTOR, &*TIMELINE_ID_EXTRACTOR, From f5b4697c90cb37cc1386b77b03cc2a013fde1af3 Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Wed, 5 Apr 2023 14:55:55 +0400 Subject: [PATCH 68/77] Log session_id when proxy per client task errors out. --- proxy/src/proxy.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/proxy/src/proxy.rs b/proxy/src/proxy.rs index 9945e3697f..1169d76160 100644 --- a/proxy/src/proxy.rs +++ b/proxy/src/proxy.rs @@ -95,9 +95,9 @@ pub async fn task_main( handle_client(config, &cancel_map, session_id, socket).await } - .unwrap_or_else(|e| { + .unwrap_or_else(move |e| { // Acknowledge that the task has finished with an error. - error!("per-client task finished with an error: {e:#}"); + error!(?session_id, "per-client task finished with an error: {e:#}"); }), ); } From d1e86d65dc64635bf0a3cef1aaa26766e683cd4c Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 27 Apr 2023 17:27:21 +0300 Subject: [PATCH 69/77] Run rustfmt to fix whitespace. Commit e6ec2400fc introduced some trivial whitespace issues. --- libs/postgres_ffi/src/xlog_utils.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libs/postgres_ffi/src/xlog_utils.rs b/libs/postgres_ffi/src/xlog_utils.rs index 8ed00a9e13..4d7bb61883 100644 --- a/libs/postgres_ffi/src/xlog_utils.rs +++ b/libs/postgres_ffi/src/xlog_utils.rs @@ -346,7 +346,7 @@ pub fn generate_wal_segment(segno: u64, system_id: u64, lsn: Lsn) -> Result Result Result Date: Thu, 27 Apr 2023 18:51:57 +0300 Subject: [PATCH 70/77] refactor: Cleanup page service (#4097) Refactoring part of #4093. Numerious `Send + Sync` bounds were a distraction, that were not needed at all. The proper `Bytes` usage and one `"error_message".to_string()` are just drive-by fixes. Not using the `PostgresBackendTCP` allows us to start setting read timeouts (and more). `PostgresBackendTCP` is still used from proxy, so it cannot be removed. --- pageserver/src/import_datadir.rs | 8 ++-- pageserver/src/page_service.rs | 64 +++++++++++++++++++++----------- 2 files changed, 46 insertions(+), 26 deletions(-) diff --git a/pageserver/src/import_datadir.rs b/pageserver/src/import_datadir.rs index 39e434a023..936de35eb9 100644 --- a/pageserver/src/import_datadir.rs +++ b/pageserver/src/import_datadir.rs @@ -114,7 +114,7 @@ async fn import_rel( path: &Path, spcoid: Oid, dboid: Oid, - reader: &mut (impl AsyncRead + Send + Sync + Unpin), + reader: &mut (impl AsyncRead + Unpin), len: usize, ctx: &RequestContext, ) -> anyhow::Result<()> { @@ -200,7 +200,7 @@ async fn import_slru( modification: &mut DatadirModification<'_>, slru: SlruKind, path: &Path, - reader: &mut (impl AsyncRead + Send + Sync + Unpin), + reader: &mut (impl AsyncRead + Unpin), len: usize, ctx: &RequestContext, ) -> anyhow::Result<()> { @@ -612,8 +612,8 @@ async fn import_file( Ok(None) } -async fn read_all_bytes(reader: &mut (impl AsyncRead + Send + Sync + Unpin)) -> Result { +async fn read_all_bytes(reader: &mut (impl AsyncRead + Unpin)) -> Result { let mut buf: Vec = vec![]; reader.read_to_end(&mut buf).await?; - Ok(Bytes::copy_from_slice(&buf[..])) + Ok(Bytes::from(buf)) } diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index 135f08e846..3610704f2c 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -20,7 +20,6 @@ use pageserver_api::models::{ PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetPageResponse, PagestreamNblocksRequest, PagestreamNblocksResponse, }; -use postgres_backend::PostgresBackendTCP; use postgres_backend::{self, is_expected_io_error, AuthType, PostgresBackend, QueryError}; use pq_proto::framed::ConnectionError; use pq_proto::FeStartupPacket; @@ -32,6 +31,7 @@ use std::str; use std::str::FromStr; use std::sync::Arc; use std::time::Duration; +use tokio::io::{AsyncRead, AsyncWrite}; use tokio_util::io::StreamReader; use tracing::*; use utils::id::ConnectionId; @@ -57,7 +57,10 @@ use crate::trace::Tracer; use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID; use postgres_ffi::BLCKSZ; -fn copyin_stream(pgb: &mut PostgresBackendTCP) -> impl Stream> + '_ { +fn copyin_stream(pgb: &mut PostgresBackend) -> impl Stream> + '_ +where + IO: AsyncRead + AsyncWrite + Unpin, +{ async_stream::try_stream! { loop { let msg = tokio::select! { @@ -65,8 +68,8 @@ fn copyin_stream(pgb: &mut PostgresBackendTCP) -> impl Stream { // We were requested to shut down. - let msg = "pageserver is shutting down".to_string(); - let _ = pgb.write_message_noflush(&BeMessage::ErrorResponse(&msg, None)); + let msg = "pageserver is shutting down"; + let _ = pgb.write_message_noflush(&BeMessage::ErrorResponse(msg, None)); Err(QueryError::Other(anyhow::anyhow!(msg))) } @@ -125,7 +128,7 @@ fn copyin_stream(pgb: &mut PostgresBackendTCP) -> impl Stream anyhow::Result<()> { +async fn read_tar_eof(mut reader: (impl AsyncRead + Unpin)) -> anyhow::Result<()> { use tokio::io::AsyncReadExt; let mut buf = [0u8; 512]; @@ -245,12 +248,14 @@ async fn page_service_conn_main( .set_nodelay(true) .context("could not set TCP_NODELAY")?; + let peer_addr = socket.peer_addr().context("get peer address")?; + // XXX: pgbackend.run() should take the connection_ctx, // and create a child per-query context when it invokes process_query. // But it's in a shared crate, so, we store connection_ctx inside PageServerHandler // and create the per-query context in process_query ourselves. let mut conn_handler = PageServerHandler::new(conf, auth, connection_ctx); - let pgbackend = PostgresBackend::new(socket, auth_type, None)?; + let pgbackend = PostgresBackend::new_from_io(socket, peer_addr, auth_type, None)?; match pgbackend .run(&mut conn_handler, task_mgr::shutdown_watcher) @@ -332,13 +337,16 @@ impl PageServerHandler { } #[instrument(skip(self, pgb, ctx))] - async fn handle_pagerequests( + async fn handle_pagerequests( &self, - pgb: &mut PostgresBackendTCP, + pgb: &mut PostgresBackend, tenant_id: TenantId, timeline_id: TimelineId, ctx: RequestContext, - ) -> anyhow::Result<()> { + ) -> anyhow::Result<()> + where + IO: AsyncRead + AsyncWrite + Send + Sync + Unpin, + { // NOTE: pagerequests handler exits when connection is closed, // so there is no need to reset the association task_mgr::associate_with(Some(tenant_id), Some(timeline_id)); @@ -436,16 +444,19 @@ impl PageServerHandler { #[allow(clippy::too_many_arguments)] #[instrument(skip(self, pgb, ctx))] - async fn handle_import_basebackup( + async fn handle_import_basebackup( &self, - pgb: &mut PostgresBackendTCP, + pgb: &mut PostgresBackend, tenant_id: TenantId, timeline_id: TimelineId, base_lsn: Lsn, _end_lsn: Lsn, pg_version: u32, ctx: RequestContext, - ) -> Result<(), QueryError> { + ) -> Result<(), QueryError> + where + IO: AsyncRead + AsyncWrite + Send + Sync + Unpin, + { task_mgr::associate_with(Some(tenant_id), Some(timeline_id)); // Create empty timeline info!("creating new timeline"); @@ -486,15 +497,18 @@ impl PageServerHandler { } #[instrument(skip(self, pgb, ctx))] - async fn handle_import_wal( + async fn handle_import_wal( &self, - pgb: &mut PostgresBackendTCP, + pgb: &mut PostgresBackend, tenant_id: TenantId, timeline_id: TimelineId, start_lsn: Lsn, end_lsn: Lsn, ctx: RequestContext, - ) -> Result<(), QueryError> { + ) -> Result<(), QueryError> + where + IO: AsyncRead + AsyncWrite + Send + Sync + Unpin, + { task_mgr::associate_with(Some(tenant_id), Some(timeline_id)); let timeline = get_active_tenant_timeline(tenant_id, timeline_id, &ctx).await?; @@ -690,16 +704,19 @@ impl PageServerHandler { #[allow(clippy::too_many_arguments)] #[instrument(skip(self, pgb, ctx))] - async fn handle_basebackup_request( + async fn handle_basebackup_request( &mut self, - pgb: &mut PostgresBackendTCP, + pgb: &mut PostgresBackend, tenant_id: TenantId, timeline_id: TimelineId, lsn: Option, prev_lsn: Option, full_backup: bool, ctx: RequestContext, - ) -> anyhow::Result<()> { + ) -> anyhow::Result<()> + where + IO: AsyncRead + AsyncWrite + Send + Sync + Unpin, + { let started = std::time::Instant::now(); // check that the timeline exists @@ -770,10 +787,13 @@ impl PageServerHandler { } #[async_trait::async_trait] -impl postgres_backend::Handler for PageServerHandler { +impl postgres_backend::Handler for PageServerHandler +where + IO: AsyncRead + AsyncWrite + Send + Sync + Unpin, +{ fn check_auth_jwt( &mut self, - _pgb: &mut PostgresBackendTCP, + _pgb: &mut PostgresBackend, jwt_response: &[u8], ) -> Result<(), QueryError> { // this unwrap is never triggered, because check_auth_jwt only called when auth_type is NeonJWT @@ -801,7 +821,7 @@ impl postgres_backend::Handler for PageServerHandler { fn startup( &mut self, - _pgb: &mut PostgresBackendTCP, + _pgb: &mut PostgresBackend, _sm: &FeStartupPacket, ) -> Result<(), QueryError> { Ok(()) @@ -809,7 +829,7 @@ impl postgres_backend::Handler for PageServerHandler { async fn process_query( &mut self, - pgb: &mut PostgresBackendTCP, + pgb: &mut PostgresBackend, query_string: &str, ) -> Result<(), QueryError> { let ctx = self.connection_ctx.attached_child(); From c4e1cafb6304f4cc7e2c65b77b6e3b4ef4afb17b Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Thu, 27 Apr 2023 17:08:00 +0100 Subject: [PATCH 71/77] scripts/flaky_tests.py: handle connection error (#4096) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Increase `connect_timeout` to 30s, which should be enough for most of the cases - If the script cannot connect to the DB (or any other `psycopg2.OperationalError` occur) — do not fail the script, log the error and proceed. Problems with fetching flaky tests shouldn't block the PR --- scripts/flaky_tests.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/scripts/flaky_tests.py b/scripts/flaky_tests.py index 829cc814e8..262950b61d 100755 --- a/scripts/flaky_tests.py +++ b/scripts/flaky_tests.py @@ -42,12 +42,16 @@ def main(args: argparse.Namespace): res: DefaultDict[str, DefaultDict[str, Dict[str, bool]]] res = defaultdict(lambda: defaultdict(dict)) - logging.info("connecting to the database...") - with psycopg2.connect(connstr, connect_timeout=10) as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - logging.info("fetching flaky tests...") - cur.execute(FLAKY_TESTS_QUERY, (interval_days,)) - rows = cur.fetchall() + try: + logging.info("connecting to the database...") + with psycopg2.connect(connstr, connect_timeout=30) as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + logging.info("fetching flaky tests...") + cur.execute(FLAKY_TESTS_QUERY, (interval_days,)) + rows = cur.fetchall() + except psycopg2.OperationalError as exc: + logging.error("cannot fetch flaky tests from the DB due to an error", exc) + rows = [] for row in rows: logging.info(f"\t{row['parent_suite'].replace('.', '/')}/{row['suite']}.py::{row['test']}") From fe0b6162992b32f874fe9c21d48b0580013a556f Mon Sep 17 00:00:00 2001 From: Joonas Koivunen Date: Thu, 27 Apr 2023 20:55:35 +0300 Subject: [PATCH 72/77] feat(page_service): read timeouts (#4093) Introduce read timeouts to our `page_service` connections. Without read timeouts, we essentially leak connections. This is a port of #3995. Split the refactorings to the other PR: #4097. Fixes #4028. --- Cargo.lock | 1 + pageserver/Cargo.toml | 1 + pageserver/src/page_service.rs | 9 +++++++++ 3 files changed, 11 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 08b24d263c..2f5878dc6e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2681,6 +2681,7 @@ dependencies = [ "tenant_size_model", "thiserror", "tokio", + "tokio-io-timeout", "tokio-postgres", "tokio-tar", "tokio-util", diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index 0bc7eba95e..ea81544cbe 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -52,6 +52,7 @@ sync_wrapper.workspace = true tokio-tar.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time"] } +tokio-io-timeout.workspace = true tokio-postgres.workspace = true tokio-util.workspace = true toml_edit = { workspace = true, features = [ "serde" ] } diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index 3610704f2c..8b0795db3c 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -250,6 +250,15 @@ async fn page_service_conn_main( let peer_addr = socket.peer_addr().context("get peer address")?; + // setup read timeout of 10 minutes. the timeout is rather arbitrary for requirements: + // - long enough for most valid compute connections + // - less than infinite to stop us from "leaking" connections to long-gone computes + // + // no write timeout is used, because the kernel is assumed to error writes after some time. + let mut socket = tokio_io_timeout::TimeoutReader::new(socket); + socket.set_timeout(Some(std::time::Duration::from_secs(60 * 10))); + let socket = std::pin::pin!(socket); + // XXX: pgbackend.run() should take the connection_ctx, // and create a child per-query context when it invokes process_query. // But it's in a shared crate, so, we store connection_ctx inside PageServerHandler From b2a3981eaded4a1c277068563b0f69c5d6f6f986 Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Wed, 12 Apr 2023 13:26:07 +0400 Subject: [PATCH 73/77] Move tracking of walsenders out of Timeline. Refactors walsenders out of timeline.rs to makes it less convoluted into separate WalSenders with its own lock, but otherwise having the same structure. Tracking of in-memory remote_consistent_lsn is also moved there as it is mainly received from pageserver. State of walsender (feedback) is also restructured to be cleaner; now it is either PageserverFeedback or StandbyFeedback(StandbyReply, HotStandbyFeedback), but not both. --- libs/pq_proto/src/lib.rs | 5 +- safekeeper/src/broker.rs | 2 +- safekeeper/src/debug_dump.rs | 4 +- safekeeper/src/http/routes.rs | 4 +- safekeeper/src/metrics.rs | 38 +- safekeeper/src/safekeeper.rs | 22 +- safekeeper/src/send_wal.rs | 472 ++++++++++++++++++++--- safekeeper/src/timeline.rs | 218 +++-------- test_runner/fixtures/neon_fixtures.py | 2 + test_runner/regress/test_wal_acceptor.py | 11 +- 10 files changed, 514 insertions(+), 264 deletions(-) diff --git a/libs/pq_proto/src/lib.rs b/libs/pq_proto/src/lib.rs index ed0239072a..1e7afa9bc0 100644 --- a/libs/pq_proto/src/lib.rs +++ b/libs/pq_proto/src/lib.rs @@ -947,9 +947,10 @@ impl<'a> BeMessage<'a> { pub struct PageserverFeedback { /// Last known size of the timeline. Used to enforce timeline size limit. pub current_timeline_size: u64, - /// LSN last received and ingested by the pageserver. + /// LSN last received and ingested by the pageserver. Controls backpressure. pub last_received_lsn: u64, /// LSN up to which data is persisted by the pageserver to its local disc. + /// Controls backpressure. pub disk_consistent_lsn: u64, /// LSN up to which data is persisted by the pageserver on s3; safekeepers /// consider WAL before it can be removed. @@ -968,7 +969,7 @@ impl PageserverFeedback { last_received_lsn: 0, remote_consistent_lsn: 0, disk_consistent_lsn: 0, - replytime: SystemTime::now(), + replytime: *PG_EPOCH, } } diff --git a/safekeeper/src/broker.rs b/safekeeper/src/broker.rs index 92f35bf51f..6a98d8fd84 100644 --- a/safekeeper/src/broker.rs +++ b/safekeeper/src/broker.rs @@ -91,7 +91,7 @@ async fn pull_loop(conf: SafeKeeperConf) -> Result<()> { // connection to the broker. // note: there are blocking operations below, but it's considered fine for now - tli.record_safekeeper_info(&msg).await? + tli.record_safekeeper_info(msg).await? } } bail!("end of stream"); diff --git a/safekeeper/src/debug_dump.rs b/safekeeper/src/debug_dump.rs index 674cf9f6eb..954fbfc438 100644 --- a/safekeeper/src/debug_dump.rs +++ b/safekeeper/src/debug_dump.rs @@ -22,7 +22,7 @@ use crate::safekeeper::SafekeeperMemState; use crate::safekeeper::TermHistory; use crate::SafeKeeperConf; -use crate::timeline::ReplicaState; +use crate::send_wal::WalSenderState; use crate::GlobalTimelines; /// Various filters that influence the resulting JSON output. @@ -87,7 +87,7 @@ pub struct Timeline { pub struct Memory { pub is_cancelled: bool, pub peers_info_len: usize, - pub replicas: Vec>, + pub walsenders: Vec, pub wal_backup_active: bool, pub active: bool, pub num_computes: u32, diff --git a/safekeeper/src/http/routes.rs b/safekeeper/src/http/routes.rs index cdec45c148..ef691c5fe6 100644 --- a/safekeeper/src/http/routes.rs +++ b/safekeeper/src/http/routes.rs @@ -144,7 +144,7 @@ async fn timeline_status_handler(request: Request) -> Result) -> Result Result<()>) -> Result { /// Metrics for a single timeline. pub struct FullTimelineInfo { pub ttid: TenantTimelineId, - pub replicas: Vec, + pub ps_feedback: PageserverFeedback, pub wal_backup_active: bool, pub timeline_is_active: bool, pub num_computes: u32, @@ -242,6 +242,7 @@ pub struct FullTimelineInfo { pub persisted_state: SafeKeeperState, pub flush_lsn: Lsn, + pub remote_consistent_lsn: Lsn, pub wal_storage: WalStorageMetrics, } @@ -514,19 +515,6 @@ impl Collector for TimelineCollector { let timeline_id = tli.ttid.timeline_id.to_string(); let labels = &[tenant_id.as_str(), timeline_id.as_str()]; - let mut most_advanced: Option = None; - for replica in tli.replicas.iter() { - if let Some(replica_feedback) = replica.pageserver_feedback { - if let Some(current) = most_advanced { - if current.last_received_lsn < replica_feedback.last_received_lsn { - most_advanced = Some(replica_feedback); - } - } else { - most_advanced = Some(replica_feedback); - } - } - } - self.commit_lsn .with_label_values(labels) .set(tli.mem_state.commit_lsn.into()); @@ -544,7 +532,7 @@ impl Collector for TimelineCollector { .set(tli.mem_state.peer_horizon_lsn.into()); self.remote_consistent_lsn .with_label_values(labels) - .set(tli.mem_state.remote_consistent_lsn.into()); + .set(tli.remote_consistent_lsn.into()); self.timeline_active .with_label_values(labels) .set(tli.timeline_is_active as u64); @@ -567,15 +555,17 @@ impl Collector for TimelineCollector { .with_label_values(labels) .set(tli.wal_storage.flush_wal_seconds); - if let Some(feedback) = most_advanced { - self.ps_last_received_lsn + self.ps_last_received_lsn + .with_label_values(labels) + .set(tli.ps_feedback.last_received_lsn); + if let Ok(unix_time) = tli + .ps_feedback + .replytime + .duration_since(SystemTime::UNIX_EPOCH) + { + self.feedback_last_time_seconds .with_label_values(labels) - .set(feedback.last_received_lsn); - if let Ok(unix_time) = feedback.replytime.duration_since(SystemTime::UNIX_EPOCH) { - self.feedback_last_time_seconds - .with_label_values(labels) - .set(unix_time.as_secs()); - } + .set(unix_time.as_secs()); } if tli.last_removed_segno != 0 { diff --git a/safekeeper/src/safekeeper.rs b/safekeeper/src/safekeeper.rs index 10b4842cbd..6864a9713d 100644 --- a/safekeeper/src/safekeeper.rs +++ b/safekeeper/src/safekeeper.rs @@ -212,7 +212,6 @@ pub struct SafekeeperMemState { pub commit_lsn: Lsn, pub backup_lsn: Lsn, pub peer_horizon_lsn: Lsn, - pub remote_consistent_lsn: Lsn, #[serde(with = "hex")] pub proposer_uuid: PgUuid, } @@ -540,7 +539,6 @@ where commit_lsn: state.commit_lsn, backup_lsn: state.backup_lsn, peer_horizon_lsn: state.peer_horizon_lsn, - remote_consistent_lsn: state.remote_consistent_lsn, proposer_uuid: state.proposer_uuid, }, state, @@ -781,10 +779,6 @@ where // Initializing backup_lsn is useful to avoid making backup think it should upload 0 segment. self.inmem.backup_lsn = max(self.inmem.backup_lsn, state.timeline_start_lsn); - // Initializing remote_consistent_lsn sets that we have nothing to - // stream to pageserver(s) immediately after creation. - self.inmem.remote_consistent_lsn = - max(self.inmem.remote_consistent_lsn, state.timeline_start_lsn); state.acceptor_state.term_history = msg.term_history.clone(); self.persist_control_file(state)?; @@ -837,7 +831,6 @@ where state.commit_lsn = self.inmem.commit_lsn; state.backup_lsn = self.inmem.backup_lsn; state.peer_horizon_lsn = self.inmem.peer_horizon_lsn; - state.remote_consistent_lsn = self.inmem.remote_consistent_lsn; state.proposer_uuid = self.inmem.proposer_uuid; self.state.persist(&state) } @@ -940,14 +933,12 @@ where self.state.backup_lsn + (self.state.server.wal_seg_size as u64) < new_backup_lsn; self.inmem.backup_lsn = new_backup_lsn; - let new_remote_consistent_lsn = max( - Lsn(sk_info.remote_consistent_lsn), - self.inmem.remote_consistent_lsn, - ); + // value in sk_info should be maximized over our local in memory value. + let new_remote_consistent_lsn = Lsn(sk_info.remote_consistent_lsn); + assert!(self.state.remote_consistent_lsn <= new_remote_consistent_lsn); sync_control_file |= self.state.remote_consistent_lsn + (self.state.server.wal_seg_size as u64) < new_remote_consistent_lsn; - self.inmem.remote_consistent_lsn = new_remote_consistent_lsn; let new_peer_horizon_lsn = max(Lsn(sk_info.peer_horizon_lsn), self.inmem.peer_horizon_lsn); sync_control_file |= self.state.peer_horizon_lsn + (self.state.server.wal_seg_size as u64) @@ -955,7 +946,12 @@ where self.inmem.peer_horizon_lsn = new_peer_horizon_lsn; if sync_control_file { - self.persist_control_file(self.state.clone())?; + let mut state = self.state.clone(); + // Note: we do not persist remote_consistent_lsn in other paths of + // persisting cf -- that is not much needed currently. We could do + // that by storing Arc to walsenders in Safekeeper. + state.remote_consistent_lsn = new_remote_consistent_lsn; + self.persist_control_file(state)?; } Ok(()) } diff --git a/safekeeper/src/send_wal.rs b/safekeeper/src/send_wal.rs index a6ca89efa4..abd213deff 100644 --- a/safekeeper/src/send_wal.rs +++ b/safekeeper/src/send_wal.rs @@ -1,12 +1,14 @@ //! This module implements the streaming side of replication protocol, starting -//! with the "START_REPLICATION" message. +//! with the "START_REPLICATION" message, and registry of walsenders. use crate::handler::SafekeeperPostgresHandler; -use crate::timeline::{ReplicaState, Timeline}; +use crate::timeline::Timeline; +use crate::wal_service::ConnectionId; use crate::wal_storage::WalReader; use crate::GlobalTimelines; use anyhow::Context as AnyhowContext; use bytes::Bytes; +use parking_lot::Mutex; use postgres_backend::PostgresBackend; use postgres_backend::{CopyStreamHandlerEnd, PostgresBackendReader, QueryError}; use postgres_ffi::get_current_timestamp; @@ -14,8 +16,12 @@ use postgres_ffi::{TimestampTz, MAX_SEND_SIZE}; use pq_proto::{BeMessage, PageserverFeedback, WalSndKeepAlive, XLogDataBody}; use serde::{Deserialize, Serialize}; use tokio::io::{AsyncRead, AsyncWrite}; +use utils::http::json::display_serialize; +use utils::id::TenantTimelineId; +use utils::lsn::AtomicLsn; -use std::cmp::min; +use std::cmp::{max, min}; +use std::net::SocketAddr; use std::str; use std::sync::Arc; use std::time::Duration; @@ -40,6 +46,8 @@ pub struct HotStandbyFeedback { pub catalog_xmin: FullTransactionId, } +const INVALID_FULL_TRANSACTION_ID: FullTransactionId = 0; + impl HotStandbyFeedback { pub fn empty() -> HotStandbyFeedback { HotStandbyFeedback { @@ -51,24 +59,293 @@ impl HotStandbyFeedback { } /// Standby status update -#[derive(Debug, Clone, Deserialize)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] pub struct StandbyReply { - pub write_lsn: Lsn, // last lsn received by pageserver - pub flush_lsn: Lsn, // pageserver's disk consistent lSN - pub apply_lsn: Lsn, // pageserver's remote consistent lSN - pub reply_ts: TimestampTz, + pub write_lsn: Lsn, // The location of the last WAL byte + 1 received and written to disk in the standby. + pub flush_lsn: Lsn, // The location of the last WAL byte + 1 flushed to disk in the standby. + pub apply_lsn: Lsn, // The location of the last WAL byte + 1 applied in the standby. + pub reply_ts: TimestampTz, // The client's system clock at the time of transmission, as microseconds since midnight on 2000-01-01. pub reply_requested: bool, } -/// Scope guard to unregister replication connection from timeline -struct ReplicationConnGuard { - replica: usize, // replica internal ID assigned by timeline - timeline: Arc, +impl StandbyReply { + fn empty() -> Self { + StandbyReply { + write_lsn: Lsn::INVALID, + flush_lsn: Lsn::INVALID, + apply_lsn: Lsn::INVALID, + reply_ts: 0, + reply_requested: false, + } + } } -impl Drop for ReplicationConnGuard { +#[derive(Debug, Clone, Copy, Serialize)] +pub struct StandbyFeedback { + reply: StandbyReply, + hs_feedback: HotStandbyFeedback, +} + +/// WalSenders registry. Timeline holds it (wrapped in Arc). +pub struct WalSenders { + /// Lsn maximized over all walsenders *and* peer data, so might be higher + /// than what we receive from replicas. + remote_consistent_lsn: AtomicLsn, + mutex: Mutex, +} + +impl WalSenders { + pub fn new(remote_consistent_lsn: Lsn) -> Arc { + Arc::new(WalSenders { + remote_consistent_lsn: AtomicLsn::from(remote_consistent_lsn), + mutex: Mutex::new(WalSendersShared::new()), + }) + } + + /// Register new walsender. Returned guard provides access to the slot and + /// automatically deregisters in Drop. + fn register( + self: &Arc, + ttid: TenantTimelineId, + addr: SocketAddr, + conn_id: ConnectionId, + appname: Option, + ) -> WalSenderGuard { + let slots = &mut self.mutex.lock().slots; + let walsender_state = WalSenderState { + ttid, + addr, + conn_id, + appname, + feedback: ReplicationFeedback::Pageserver(PageserverFeedback::empty()), + }; + // find empty slot or create new one + let pos = if let Some(pos) = slots.iter().position(|s| s.is_none()) { + slots[pos] = Some(walsender_state); + pos + } else { + let pos = slots.len(); + slots.push(Some(walsender_state)); + pos + }; + WalSenderGuard { + id: pos, + walsenders: self.clone(), + } + } + + /// Get state of all walsenders. + pub fn get_all(self: &Arc) -> Vec { + self.mutex.lock().slots.iter().flatten().cloned().collect() + } + + /// Get aggregated pageserver feedback. + pub fn get_ps_feedback(self: &Arc) -> PageserverFeedback { + self.mutex.lock().agg_ps_feedback + } + + /// Get aggregated pageserver and hot standby feedback (we send them to compute). + pub fn get_feedbacks(self: &Arc) -> (PageserverFeedback, HotStandbyFeedback) { + let shared = self.mutex.lock(); + (shared.agg_ps_feedback, shared.agg_hs_feedback) + } + + /// Record new pageserver feedback, update aggregated values. + fn record_ps_feedback(self: &Arc, id: WalSenderId, feedback: &PageserverFeedback) { + let mut shared = self.mutex.lock(); + shared.get_slot_mut(id).feedback = ReplicationFeedback::Pageserver(*feedback); + shared.update_ps_feedback(); + self.update_remote_consistent_lsn(Lsn(shared.agg_ps_feedback.remote_consistent_lsn)); + } + + /// Record standby reply. + fn record_standby_reply(self: &Arc, id: WalSenderId, reply: &StandbyReply) { + let mut shared = self.mutex.lock(); + let slot = shared.get_slot_mut(id); + match &mut slot.feedback { + ReplicationFeedback::Standby(sf) => sf.reply = *reply, + ReplicationFeedback::Pageserver(_) => { + slot.feedback = ReplicationFeedback::Standby(StandbyFeedback { + reply: *reply, + hs_feedback: HotStandbyFeedback::empty(), + }) + } + } + } + + /// Record hot standby feedback, update aggregated value. + fn record_hs_feedback(self: &Arc, id: WalSenderId, feedback: &HotStandbyFeedback) { + let mut shared = self.mutex.lock(); + let slot = shared.get_slot_mut(id); + match &mut slot.feedback { + ReplicationFeedback::Standby(sf) => sf.hs_feedback = *feedback, + ReplicationFeedback::Pageserver(_) => { + slot.feedback = ReplicationFeedback::Standby(StandbyFeedback { + reply: StandbyReply::empty(), + hs_feedback: *feedback, + }) + } + } + shared.update_hs_feedback(); + } + + /// Get remote_consistent_lsn reported by the pageserver. Returns None if + /// client is not pageserver. + fn get_ws_remote_consistent_lsn(self: &Arc, id: WalSenderId) -> Option { + let shared = self.mutex.lock(); + let slot = shared.get_slot(id); + match slot.feedback { + ReplicationFeedback::Pageserver(feedback) => Some(Lsn(feedback.remote_consistent_lsn)), + _ => None, + } + } + + /// Get remote_consistent_lsn maximized across all walsenders and peers. + pub fn get_remote_consistent_lsn(self: &Arc) -> Lsn { + self.remote_consistent_lsn.load() + } + + /// Update maximized remote_consistent_lsn, return new (potentially) value. + pub fn update_remote_consistent_lsn(self: &Arc, candidate: Lsn) -> Lsn { + self.remote_consistent_lsn + .fetch_max(candidate) + .max(candidate) + } + + /// Unregister walsender. + fn unregister(self: &Arc, id: WalSenderId) { + let mut shared = self.mutex.lock(); + shared.slots[id] = None; + shared.update_hs_feedback(); + } +} + +struct WalSendersShared { + // aggregated over all walsenders value + agg_hs_feedback: HotStandbyFeedback, + // aggregated over all walsenders value + agg_ps_feedback: PageserverFeedback, + slots: Vec>, +} + +impl WalSendersShared { + fn new() -> Self { + WalSendersShared { + agg_hs_feedback: HotStandbyFeedback::empty(), + agg_ps_feedback: PageserverFeedback::empty(), + slots: Vec::new(), + } + } + + /// Get content of provided id slot, it must exist. + fn get_slot(&self, id: WalSenderId) -> &WalSenderState { + self.slots[id].as_ref().expect("walsender doesn't exist") + } + + /// Get mut content of provided id slot, it must exist. + fn get_slot_mut(&mut self, id: WalSenderId) -> &mut WalSenderState { + self.slots[id].as_mut().expect("walsender doesn't exist") + } + + /// Update aggregated hot standy feedback. We just take min of valid xmins + /// and ts. + fn update_hs_feedback(&mut self) { + let mut agg = HotStandbyFeedback::empty(); + for ws_state in self.slots.iter().flatten() { + if let ReplicationFeedback::Standby(standby_feedback) = ws_state.feedback { + let hs_feedback = standby_feedback.hs_feedback; + // doing Option math like op1.iter().chain(op2.iter()).min() + // would be nicer, but we serialize/deserialize this struct + // directly, so leave as is for now + if hs_feedback.xmin != INVALID_FULL_TRANSACTION_ID { + if agg.xmin != INVALID_FULL_TRANSACTION_ID { + agg.xmin = min(agg.xmin, hs_feedback.xmin); + } else { + agg.xmin = hs_feedback.xmin; + } + agg.ts = min(agg.ts, hs_feedback.ts); + } + if hs_feedback.catalog_xmin != INVALID_FULL_TRANSACTION_ID { + if agg.catalog_xmin != INVALID_FULL_TRANSACTION_ID { + agg.catalog_xmin = min(agg.catalog_xmin, hs_feedback.catalog_xmin); + } else { + agg.catalog_xmin = hs_feedback.catalog_xmin; + } + agg.ts = min(agg.ts, hs_feedback.ts); + } + } + } + self.agg_hs_feedback = agg; + } + + /// Update aggregated pageserver feedback. LSNs (last_received, + /// disk_consistent, remote_consistent) and reply timestamp are just + /// maximized; timeline_size if taken from feedback with highest + /// last_received lsn. This is generally reasonable, but we might want to + /// implement other policies once multiple pageservers start to be actively + /// used. + fn update_ps_feedback(&mut self) { + let init = PageserverFeedback::empty(); + let acc = + self.slots + .iter() + .flatten() + .fold(init, |mut acc, ws_state| match ws_state.feedback { + ReplicationFeedback::Pageserver(feedback) => { + if feedback.last_received_lsn > acc.last_received_lsn { + acc.current_timeline_size = feedback.current_timeline_size; + } + acc.last_received_lsn = + max(feedback.last_received_lsn, acc.last_received_lsn); + acc.disk_consistent_lsn = + max(feedback.disk_consistent_lsn, acc.disk_consistent_lsn); + acc.remote_consistent_lsn = + max(feedback.remote_consistent_lsn, acc.remote_consistent_lsn); + acc.replytime = max(feedback.replytime, acc.replytime); + acc + } + ReplicationFeedback::Standby(_) => acc, + }); + self.agg_ps_feedback = acc; + } +} + +// Serialized is used only for pretty printing in json. +#[derive(Debug, Clone, Serialize)] +pub struct WalSenderState { + #[serde(serialize_with = "display_serialize")] + ttid: TenantTimelineId, + addr: SocketAddr, + conn_id: ConnectionId, + // postgres application_name + appname: Option, + feedback: ReplicationFeedback, +} + +// Receiver is either pageserver or regular standby, which have different +// feedbacks. +#[derive(Debug, Clone, Copy, Serialize)] +enum ReplicationFeedback { + Pageserver(PageserverFeedback), + Standby(StandbyFeedback), +} + +// id of the occupied slot in WalSenders to access it (and save in the +// WalSenderGuard). We could give Arc directly to the slot, but there is not +// much sense in that as values aggregation which is performed on each feedback +// receival iterates over all walsenders. +pub type WalSenderId = usize; + +/// Scope guard to access slot in WalSenders registry and unregister from it in +/// Drop. +pub struct WalSenderGuard { + id: WalSenderId, + walsenders: Arc, +} + +impl Drop for WalSenderGuard { fn drop(&mut self) { - self.timeline.remove_replica(self.replica); + self.walsenders.unregister(self.id); } } @@ -97,16 +374,13 @@ impl SafekeeperPostgresHandler { let tli = GlobalTimelines::get(self.ttid).map_err(|e| CopyStreamHandlerEnd::Other(e.into()))?; - let state = ReplicaState::new(); - // This replica_id is used below to check if it's time to stop replication. - let replica_id = tli.add_replica(state); - - // Use a guard object to remove our entry from the timeline, when the background - // thread and us have both finished using it. - let _guard = Arc::new(ReplicationConnGuard { - replica: replica_id, - timeline: tli.clone(), - }); + // Use a guard object to remove our entry from the timeline when we are done. + let ws_guard = Arc::new(tli.get_walsenders().register( + self.ttid, + *pgb.get_peer_addr(), + self.conn_id, + self.appname.clone(), + )); // Walproposer gets special handling: safekeeper must give proposer all // local WAL till the end, whether committed or not (walproposer will @@ -154,16 +428,11 @@ impl SafekeeperPostgresHandler { end_pos, stop_pos, commit_lsn_watch_rx: tli.get_commit_lsn_watch_rx(), - replica_id, + ws_guard: ws_guard.clone(), wal_reader, send_buf: [0; MAX_SEND_SIZE], }; - let mut reply_reader = ReplyReader { - reader, - tli, - replica_id, - feedback: ReplicaState::new(), - }; + let mut reply_reader = ReplyReader { reader, ws_guard }; let res = tokio::select! { // todo: add read|write .context to these errors @@ -190,7 +459,7 @@ struct WalSender<'a, IO> { // in recovery. stop_pos: Option, commit_lsn_watch_rx: Receiver, - replica_id: usize, + ws_guard: Arc, wal_reader: WalReader, // buffer for readling WAL into to send it send_buf: [u8; MAX_SEND_SIZE], @@ -264,14 +533,20 @@ impl WalSender<'_, IO> { return Ok(()); } // Timed out waiting for WAL, check for termination and send KA - if self.tli.should_walsender_stop(self.replica_id) { - // Terminate if there is nothing more to send. - // TODO close the stream properly - return Err(CopyStreamHandlerEnd::ServerInitiated(format!( - "ending streaming to {:?} at {}, receiver is caughtup and there is no computes", - self.appname, self.start_pos, - ))); + if let Some(remote_consistent_lsn) = self + .ws_guard + .walsenders + .get_ws_remote_consistent_lsn(self.ws_guard.id) + { + if self.tli.should_walsender_stop(remote_consistent_lsn) { + // Terminate if there is nothing more to send. + return Err(CopyStreamHandlerEnd::ServerInitiated(format!( + "ending streaming to {:?} at {}, receiver is caughtup and there is no computes", + self.appname, self.start_pos, + ))); + } } + self.pgb .write_message(&BeMessage::KeepAlive(WalSndKeepAlive { sent_ptr: self.end_pos.0, @@ -286,9 +561,7 @@ impl WalSender<'_, IO> { /// A half driving receiving replies. struct ReplyReader { reader: PostgresBackendReader, - tli: Arc, - replica_id: usize, - feedback: ReplicaState, + ws_guard: Arc, } impl ReplyReader { @@ -303,29 +576,32 @@ impl ReplyReader { match msg.first().cloned() { Some(HOT_STANDBY_FEEDBACK_TAG_BYTE) => { // Note: deserializing is on m[1..] because we skip the tag byte. - self.feedback.hs_feedback = HotStandbyFeedback::des(&msg[1..]) + let hs_feedback = HotStandbyFeedback::des(&msg[1..]) .context("failed to deserialize HotStandbyFeedback")?; - self.tli - .update_replica_state(self.replica_id, self.feedback); + self.ws_guard + .walsenders + .record_hs_feedback(self.ws_guard.id, &hs_feedback); } Some(STANDBY_STATUS_UPDATE_TAG_BYTE) => { - let _reply = + let reply = StandbyReply::des(&msg[1..]).context("failed to deserialize StandbyReply")?; - // This must be a regular postgres replica, - // because pageserver doesn't send this type of messages to safekeeper. - // Currently we just ignore this, tracking progress for them is not supported. + self.ws_guard + .walsenders + .record_standby_reply(self.ws_guard.id, &reply); } Some(NEON_STATUS_UPDATE_TAG_BYTE) => { // pageserver sends this. // Note: deserializing is on m[9..] because we skip the tag byte and len bytes. let buf = Bytes::copy_from_slice(&msg[9..]); - let reply = PageserverFeedback::parse(buf); + let ps_feedback = PageserverFeedback::parse(buf); - trace!("PageserverFeedback is {:?}", reply); - self.feedback.pageserver_feedback = Some(reply); - - self.tli - .update_replica_state(self.replica_id, self.feedback); + trace!("PageserverFeedback is {:?}", ps_feedback); + self.ws_guard + .walsenders + .record_ps_feedback(self.ws_guard.id, &ps_feedback); + // in principle new remote_consistent_lsn could allow to + // deactivate the timeline, but we check that regularly through + // broker updated, not need to do it here } _ => warn!("unexpected message {:?}", msg), } @@ -368,3 +644,89 @@ async fn wait_for_lsn(rx: &mut Receiver, lsn: Lsn) -> anyhow::Result