From 739f627b96b3b0e00479f90621559375b3382582 Mon Sep 17 00:00:00 2001 From: Misha Sakhnov Date: Thu, 12 Dec 2024 10:45:52 +0200 Subject: [PATCH 01/49] Bump vm-builder v0.35.0 -> v0.37.1 (#10015) Bump version to pick up changes introduced in the neonvm-daemon to support sys fs based CPU scaling (https://github.com/neondatabase/autoscaling/issues/1082). Previous update: https://github.com/neondatabase/neon/pull/9208 --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index ee22f2ff54..67d59c7da1 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -797,7 +797,7 @@ jobs: - pg: v17 debian: bookworm env: - VM_BUILDER_VERSION: v0.35.0 + VM_BUILDER_VERSION: v0.37.1 steps: - uses: actions/checkout@v4 From 0bd8eca9ca98ffbcaa403a47b2e6dfc2961fafa1 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Thu, 12 Dec 2024 09:18:50 +0000 Subject: [PATCH 02/49] Storage: create release PRs On Fridays (#10017) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem To give Storage more time on preprod — create a release branch on Friday ## Summary of changes - Automatically create Storage release PR on Friday instead of Monday --- .github/workflows/release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f0273b977f..3c1af1d9c6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -3,7 +3,7 @@ name: Create Release Branch on: schedule: # It should be kept in sync with if-condition in jobs - - cron: '0 6 * * MON' # Storage release + - cron: '0 6 * * FRI' # Storage release - cron: '0 6 * * THU' # Proxy release workflow_dispatch: inputs: @@ -29,7 +29,7 @@ defaults: jobs: create-storage-release-branch: - if: ${{ github.event.schedule == '0 6 * * MON' || inputs.create-storage-release-branch }} + if: ${{ github.event.schedule == '0 6 * * FRI' || inputs.create-storage-release-branch }} permissions: contents: write From ec0ce06c164dcbd301acf1bc96822b6aa542d173 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Thu, 12 Dec 2024 10:53:10 +0000 Subject: [PATCH 03/49] tests: default interpreted proto in tests (#10079) ## Problem We aren't using the sharded interpreted wal receiver protocol in all tests. ## Summary of changes Default to the interpreted protocol. --- test_runner/fixtures/neon_fixtures.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 0ecc324030..0b7cdec50d 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -435,7 +435,10 @@ class NeonEnvBuilder: self.pageserver_virtual_file_io_mode = pageserver_virtual_file_io_mode - self.pageserver_wal_receiver_protocol = pageserver_wal_receiver_protocol + if pageserver_wal_receiver_protocol is not None: + self.pageserver_wal_receiver_protocol = pageserver_wal_receiver_protocol + else: + self.pageserver_wal_receiver_protocol = PageserverWalReceiverProtocol.INTERPRETED assert test_name.startswith( "test_" From c9a773af37207979955ba08e40edcf2eb927745d Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Thu, 12 Dec 2024 14:57:00 +0300 Subject: [PATCH 04/49] Fix test_subscriber_synchronous_commit flakiness. (#10057) 6f7aeaa configured LFC for USE_LFC case, but omitted setting shared_buffers for non USE_LFC, causing flakiness. ref https://github.com/neondatabase/neon/issues/9989 --- .../regress/test_logical_replication.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/test_runner/regress/test_logical_replication.py b/test_runner/regress/test_logical_replication.py index db18e1758c..8908763109 100644 --- a/test_runner/regress/test_logical_replication.py +++ b/test_runner/regress/test_logical_replication.py @@ -573,17 +573,18 @@ def test_subscriber_synchronous_commit(neon_simple_env: NeonEnv, vanilla_pg: Van vanilla_pg.safe_psql("create extension neon;") env.create_branch("subscriber") - # We want all data to fit into shared_buffers because later we stop - # safekeeper and insert more; this shouldn't cause page requests as they - # will be stuck. + # We want all data to fit into shared_buffers or LFC cache because later we + # stop safekeeper and insert more; this shouldn't cause page requests as + # they will be stuck. + if USE_LFC: + config_lines = ["neon.max_file_cache_size = 32MB", "neon.file_cache_size_limit = 32MB"] + else: + config_lines = [ + "shared_buffers = 32MB", + ] sub = env.endpoints.create( "subscriber", - config_lines=[ - "neon.max_file_cache_size = 32MB", - "neon.file_cache_size_limit = 32MB", - ] - if USE_LFC - else [], + config_lines=config_lines, ) sub.start() From e502e880b542e54b13f29a45ec74d36c93361520 Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Thu, 12 Dec 2024 13:42:50 +0000 Subject: [PATCH 05/49] chore(proxy): remove code for old API (#10109) ## Problem Now that https://github.com/neondatabase/cloud/issues/15245 is done, we can remove the old code. ## Summary of changes Removes support for the ManagementV2 API, in favour of the ProxyV1 API. --- proxy/src/auth/backend/mod.rs | 4 - proxy/src/bin/proxy.rs | 97 +---- proxy/src/control_plane/client/mod.rs | 9 +- proxy/src/control_plane/client/neon.rs | 511 ------------------------- proxy/src/control_plane/messages.rs | 22 +- test_runner/fixtures/neon_fixtures.py | 14 +- 6 files changed, 12 insertions(+), 645 deletions(-) delete mode 100644 proxy/src/control_plane/client/neon.rs diff --git a/proxy/src/auth/backend/mod.rs b/proxy/src/auth/backend/mod.rs index 1bad7b3086..f38ecf715f 100644 --- a/proxy/src/auth/backend/mod.rs +++ b/proxy/src/auth/backend/mod.rs @@ -74,10 +74,6 @@ impl std::fmt::Display for Backend<'_, ()> { .debug_tuple("ControlPlane::ProxyV1") .field(&endpoint.url()) .finish(), - ControlPlaneClient::Neon(endpoint) => fmt - .debug_tuple("ControlPlane::Neon") - .field(&endpoint.url()) - .finish(), #[cfg(any(test, feature = "testing"))] ControlPlaneClient::PostgresMock(endpoint) => fmt .debug_tuple("ControlPlane::PostgresMock") diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs index 99144acef0..97c4037009 100644 --- a/proxy/src/bin/proxy.rs +++ b/proxy/src/bin/proxy.rs @@ -43,9 +43,6 @@ static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; #[derive(Clone, Debug, ValueEnum)] enum AuthBackendType { - #[value(name("console"), alias("cplane"))] - ControlPlane, - #[value(name("cplane-v1"), alias("control-plane"))] ControlPlaneV1, @@ -488,40 +485,7 @@ async fn main() -> anyhow::Result<()> { } if let Either::Left(auth::Backend::ControlPlane(api, _)) = &auth_backend { - if let proxy::control_plane::client::ControlPlaneClient::Neon(api) = &**api { - match (redis_notifications_client, regional_redis_client.clone()) { - (None, None) => {} - (client1, client2) => { - let cache = api.caches.project_info.clone(); - if let Some(client) = client1 { - maintenance_tasks.spawn(notifications::task_main( - client, - cache.clone(), - cancel_map.clone(), - args.region.clone(), - )); - } - if let Some(client) = client2 { - maintenance_tasks.spawn(notifications::task_main( - client, - cache.clone(), - cancel_map.clone(), - args.region.clone(), - )); - } - maintenance_tasks.spawn(async move { cache.clone().gc_worker().await }); - } - } - if let Some(regional_redis_client) = regional_redis_client { - let cache = api.caches.endpoints_cache.clone(); - let con = regional_redis_client; - let span = tracing::info_span!("endpoints_cache"); - maintenance_tasks.spawn( - async move { cache.do_read(con, cancellation_token.clone()).await } - .instrument(span), - ); - } - } else if let proxy::control_plane::client::ControlPlaneClient::ProxyV1(api) = &**api { + if let proxy::control_plane::client::ControlPlaneClient::ProxyV1(api) = &**api { match (redis_notifications_client, regional_redis_client.clone()) { (None, None) => {} (client1, client2) => { @@ -757,65 +721,6 @@ fn build_auth_backend( Ok(Either::Left(config)) } - AuthBackendType::ControlPlane => { - let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?; - let project_info_cache_config: ProjectInfoCacheOptions = - args.project_info_cache.parse()?; - let endpoint_cache_config: config::EndpointCacheConfig = - args.endpoint_cache_config.parse()?; - - info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}"); - info!( - "Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}" - ); - info!("Using EndpointCacheConfig with options={endpoint_cache_config:?}"); - let caches = Box::leak(Box::new(control_plane::caches::ApiCaches::new( - wake_compute_cache_config, - project_info_cache_config, - endpoint_cache_config, - ))); - - let config::ConcurrencyLockOptions { - shards, - limiter, - epoch, - timeout, - } = args.wake_compute_lock.parse()?; - info!(?limiter, shards, ?epoch, "Using NodeLocks (wake_compute)"); - let locks = Box::leak(Box::new(control_plane::locks::ApiLocks::new( - "wake_compute_lock", - limiter, - shards, - timeout, - epoch, - &Metrics::get().wake_compute_lock, - )?)); - tokio::spawn(locks.garbage_collect_worker()); - - let url: proxy::url::ApiUrl = args.auth_endpoint.parse()?; - - let endpoint = http::Endpoint::new(url, http::new_client()); - - let mut wake_compute_rps_limit = args.wake_compute_limit.clone(); - RateBucketInfo::validate(&mut wake_compute_rps_limit)?; - let wake_compute_endpoint_rate_limiter = - Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit)); - - let api = control_plane::client::neon::NeonControlPlaneClient::new( - endpoint, - args.control_plane_token.clone(), - caches, - locks, - wake_compute_endpoint_rate_limiter, - ); - let api = control_plane::client::ControlPlaneClient::Neon(api); - let auth_backend = auth::Backend::ControlPlane(MaybeOwned::Owned(api), ()); - - let config = Box::leak(Box::new(auth_backend)); - - Ok(Either::Left(config)) - } - #[cfg(feature = "testing")] AuthBackendType::Postgres => { let url = args.auth_endpoint.parse()?; diff --git a/proxy/src/control_plane/client/mod.rs b/proxy/src/control_plane/client/mod.rs index 7ef5a9c9fd..d559d96bbc 100644 --- a/proxy/src/control_plane/client/mod.rs +++ b/proxy/src/control_plane/client/mod.rs @@ -1,7 +1,6 @@ pub mod cplane_proxy_v1; #[cfg(any(test, feature = "testing"))] pub mod mock; -pub mod neon; use std::hash::Hash; use std::sync::Arc; @@ -28,10 +27,8 @@ use crate::types::EndpointId; #[non_exhaustive] #[derive(Clone)] pub enum ControlPlaneClient { - /// New Proxy V1 control plane API + /// Proxy V1 control plane API ProxyV1(cplane_proxy_v1::NeonControlPlaneClient), - /// Current Management API (V2). - Neon(neon::NeonControlPlaneClient), /// Local mock control plane. #[cfg(any(test, feature = "testing"))] PostgresMock(mock::MockControlPlane), @@ -49,7 +46,6 @@ impl ControlPlaneApi for ControlPlaneClient { ) -> Result { match self { Self::ProxyV1(api) => api.get_role_secret(ctx, user_info).await, - Self::Neon(api) => api.get_role_secret(ctx, user_info).await, #[cfg(any(test, feature = "testing"))] Self::PostgresMock(api) => api.get_role_secret(ctx, user_info).await, #[cfg(test)] @@ -66,7 +62,6 @@ impl ControlPlaneApi for ControlPlaneClient { ) -> Result<(CachedAllowedIps, Option), errors::GetAuthInfoError> { match self { Self::ProxyV1(api) => api.get_allowed_ips_and_secret(ctx, user_info).await, - Self::Neon(api) => api.get_allowed_ips_and_secret(ctx, user_info).await, #[cfg(any(test, feature = "testing"))] Self::PostgresMock(api) => api.get_allowed_ips_and_secret(ctx, user_info).await, #[cfg(test)] @@ -81,7 +76,6 @@ impl ControlPlaneApi for ControlPlaneClient { ) -> Result, errors::GetEndpointJwksError> { match self { Self::ProxyV1(api) => api.get_endpoint_jwks(ctx, endpoint).await, - Self::Neon(api) => api.get_endpoint_jwks(ctx, endpoint).await, #[cfg(any(test, feature = "testing"))] Self::PostgresMock(api) => api.get_endpoint_jwks(ctx, endpoint).await, #[cfg(test)] @@ -96,7 +90,6 @@ impl ControlPlaneApi for ControlPlaneClient { ) -> Result { match self { Self::ProxyV1(api) => api.wake_compute(ctx, user_info).await, - Self::Neon(api) => api.wake_compute(ctx, user_info).await, #[cfg(any(test, feature = "testing"))] Self::PostgresMock(api) => api.wake_compute(ctx, user_info).await, #[cfg(test)] diff --git a/proxy/src/control_plane/client/neon.rs b/proxy/src/control_plane/client/neon.rs deleted file mode 100644 index bf62c0d6ab..0000000000 --- a/proxy/src/control_plane/client/neon.rs +++ /dev/null @@ -1,511 +0,0 @@ -//! Stale console backend, remove after migrating to Proxy V1 API (#15245). - -use std::sync::Arc; -use std::time::Duration; - -use ::http::header::AUTHORIZATION; -use ::http::HeaderName; -use futures::TryFutureExt; -use postgres_client::config::SslMode; -use tokio::time::Instant; -use tracing::{debug, info, info_span, warn, Instrument}; - -use super::super::messages::{ControlPlaneErrorMessage, GetRoleSecret, WakeCompute}; -use crate::auth::backend::jwt::AuthRule; -use crate::auth::backend::ComputeUserInfo; -use crate::cache::Cached; -use crate::context::RequestContext; -use crate::control_plane::caches::ApiCaches; -use crate::control_plane::errors::{ - ControlPlaneError, GetAuthInfoError, GetEndpointJwksError, WakeComputeError, -}; -use crate::control_plane::locks::ApiLocks; -use crate::control_plane::messages::{ColdStartInfo, EndpointJwksResponse, Reason}; -use crate::control_plane::{ - AuthInfo, AuthSecret, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, NodeInfo, -}; -use crate::metrics::{CacheOutcome, Metrics}; -use crate::rate_limiter::WakeComputeRateLimiter; -use crate::types::{EndpointCacheKey, EndpointId}; -use crate::{compute, http, scram}; - -const X_REQUEST_ID: HeaderName = HeaderName::from_static("x-request-id"); - -#[derive(Clone)] -pub struct NeonControlPlaneClient { - endpoint: http::Endpoint, - pub caches: &'static ApiCaches, - pub(crate) locks: &'static ApiLocks, - pub(crate) wake_compute_endpoint_rate_limiter: Arc, - // put in a shared ref so we don't copy secrets all over in memory - jwt: Arc, -} - -impl NeonControlPlaneClient { - /// Construct an API object containing the auth parameters. - pub fn new( - endpoint: http::Endpoint, - jwt: Arc, - caches: &'static ApiCaches, - locks: &'static ApiLocks, - wake_compute_endpoint_rate_limiter: Arc, - ) -> Self { - Self { - endpoint, - caches, - locks, - wake_compute_endpoint_rate_limiter, - jwt, - } - } - - pub(crate) fn url(&self) -> &str { - self.endpoint.url().as_str() - } - - async fn do_get_auth_info( - &self, - ctx: &RequestContext, - user_info: &ComputeUserInfo, - ) -> Result { - if !self - .caches - .endpoints_cache - .is_valid(ctx, &user_info.endpoint.normalize()) - { - // TODO: refactor this because it's weird - // this is a failure to authenticate but we return Ok. - info!("endpoint is not valid, skipping the request"); - return Ok(AuthInfo::default()); - } - let request_id = ctx.session_id().to_string(); - let application_name = ctx.console_application_name(); - async { - let request = self - .endpoint - .get_path("proxy_get_role_secret") - .header(X_REQUEST_ID, &request_id) - .header(AUTHORIZATION, format!("Bearer {}", &self.jwt)) - .query(&[("session_id", ctx.session_id())]) - .query(&[ - ("application_name", application_name.as_str()), - ("project", user_info.endpoint.as_str()), - ("role", user_info.user.as_str()), - ]) - .build()?; - - debug!(url = request.url().as_str(), "sending http request"); - let start = Instant::now(); - let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane); - let response = self.endpoint.execute(request).await?; - drop(pause); - info!(duration = ?start.elapsed(), "received http response"); - let body = match parse_body::(response).await { - Ok(body) => body, - // Error 404 is special: it's ok not to have a secret. - // TODO(anna): retry - Err(e) => { - return if e.get_reason().is_not_found() { - // TODO: refactor this because it's weird - // this is a failure to authenticate but we return Ok. - Ok(AuthInfo::default()) - } else { - Err(e.into()) - }; - } - }; - - let secret = if body.role_secret.is_empty() { - None - } else { - let secret = scram::ServerSecret::parse(&body.role_secret) - .map(AuthSecret::Scram) - .ok_or(GetAuthInfoError::BadSecret)?; - Some(secret) - }; - let allowed_ips = body.allowed_ips.unwrap_or_default(); - Metrics::get() - .proxy - .allowed_ips_number - .observe(allowed_ips.len() as f64); - Ok(AuthInfo { - secret, - allowed_ips, - project_id: body.project_id, - }) - } - .inspect_err(|e| tracing::debug!(error = ?e)) - .instrument(info_span!("do_get_auth_info")) - .await - } - - async fn do_get_endpoint_jwks( - &self, - ctx: &RequestContext, - endpoint: EndpointId, - ) -> Result, GetEndpointJwksError> { - if !self - .caches - .endpoints_cache - .is_valid(ctx, &endpoint.normalize()) - { - return Err(GetEndpointJwksError::EndpointNotFound); - } - let request_id = ctx.session_id().to_string(); - async { - let request = self - .endpoint - .get_with_url(|url| { - url.path_segments_mut() - .push("endpoints") - .push(endpoint.as_str()) - .push("jwks"); - }) - .header(X_REQUEST_ID, &request_id) - .header(AUTHORIZATION, format!("Bearer {}", &self.jwt)) - .query(&[("session_id", ctx.session_id())]) - .build() - .map_err(GetEndpointJwksError::RequestBuild)?; - - debug!(url = request.url().as_str(), "sending http request"); - let start = Instant::now(); - let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane); - let response = self - .endpoint - .execute(request) - .await - .map_err(GetEndpointJwksError::RequestExecute)?; - drop(pause); - info!(duration = ?start.elapsed(), "received http response"); - - let body = parse_body::(response).await?; - - let rules = body - .jwks - .into_iter() - .map(|jwks| AuthRule { - id: jwks.id, - jwks_url: jwks.jwks_url, - audience: jwks.jwt_audience, - role_names: jwks.role_names, - }) - .collect(); - - Ok(rules) - } - .inspect_err(|e| tracing::debug!(error = ?e)) - .instrument(info_span!("do_get_endpoint_jwks")) - .await - } - - async fn do_wake_compute( - &self, - ctx: &RequestContext, - user_info: &ComputeUserInfo, - ) -> Result { - let request_id = ctx.session_id().to_string(); - let application_name = ctx.console_application_name(); - async { - let mut request_builder = self - .endpoint - .get_path("proxy_wake_compute") - .header("X-Request-ID", &request_id) - .header("Authorization", format!("Bearer {}", &self.jwt)) - .query(&[("session_id", ctx.session_id())]) - .query(&[ - ("application_name", application_name.as_str()), - ("project", user_info.endpoint.as_str()), - ]); - - let options = user_info.options.to_deep_object(); - if !options.is_empty() { - request_builder = request_builder.query(&options); - } - - let request = request_builder.build()?; - - debug!(url = request.url().as_str(), "sending http request"); - let start = Instant::now(); - let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane); - let response = self.endpoint.execute(request).await?; - drop(pause); - info!(duration = ?start.elapsed(), "received http response"); - let body = parse_body::(response).await?; - - // Unfortunately, ownership won't let us use `Option::ok_or` here. - let (host, port) = match parse_host_port(&body.address) { - None => return Err(WakeComputeError::BadComputeAddress(body.address)), - Some(x) => x, - }; - - // Don't set anything but host and port! This config will be cached. - // We'll set username and such later using the startup message. - // TODO: add more type safety (in progress). - let mut config = compute::ConnCfg::new(host.to_owned(), port); - config.ssl_mode(SslMode::Disable); // TLS is not configured on compute nodes. - - let node = NodeInfo { - config, - aux: body.aux, - allow_self_signed_compute: false, - }; - - Ok(node) - } - .inspect_err(|e| tracing::debug!(error = ?e)) - .instrument(info_span!("do_wake_compute")) - .await - } -} - -impl super::ControlPlaneApi for NeonControlPlaneClient { - #[tracing::instrument(skip_all)] - async fn get_role_secret( - &self, - ctx: &RequestContext, - user_info: &ComputeUserInfo, - ) -> Result { - let normalized_ep = &user_info.endpoint.normalize(); - let user = &user_info.user; - if let Some(role_secret) = self - .caches - .project_info - .get_role_secret(normalized_ep, user) - { - return Ok(role_secret); - } - let auth_info = self.do_get_auth_info(ctx, user_info).await?; - if let Some(project_id) = auth_info.project_id { - let normalized_ep_int = normalized_ep.into(); - self.caches.project_info.insert_role_secret( - project_id, - normalized_ep_int, - user.into(), - auth_info.secret.clone(), - ); - self.caches.project_info.insert_allowed_ips( - project_id, - normalized_ep_int, - Arc::new(auth_info.allowed_ips), - ); - ctx.set_project_id(project_id); - } - // When we just got a secret, we don't need to invalidate it. - Ok(Cached::new_uncached(auth_info.secret)) - } - - async fn get_allowed_ips_and_secret( - &self, - ctx: &RequestContext, - user_info: &ComputeUserInfo, - ) -> Result<(CachedAllowedIps, Option), GetAuthInfoError> { - let normalized_ep = &user_info.endpoint.normalize(); - if let Some(allowed_ips) = self.caches.project_info.get_allowed_ips(normalized_ep) { - Metrics::get() - .proxy - .allowed_ips_cache_misses - .inc(CacheOutcome::Hit); - return Ok((allowed_ips, None)); - } - Metrics::get() - .proxy - .allowed_ips_cache_misses - .inc(CacheOutcome::Miss); - let auth_info = self.do_get_auth_info(ctx, user_info).await?; - let allowed_ips = Arc::new(auth_info.allowed_ips); - let user = &user_info.user; - if let Some(project_id) = auth_info.project_id { - let normalized_ep_int = normalized_ep.into(); - self.caches.project_info.insert_role_secret( - project_id, - normalized_ep_int, - user.into(), - auth_info.secret.clone(), - ); - self.caches.project_info.insert_allowed_ips( - project_id, - normalized_ep_int, - allowed_ips.clone(), - ); - ctx.set_project_id(project_id); - } - Ok(( - Cached::new_uncached(allowed_ips), - Some(Cached::new_uncached(auth_info.secret)), - )) - } - - #[tracing::instrument(skip_all)] - async fn get_endpoint_jwks( - &self, - ctx: &RequestContext, - endpoint: EndpointId, - ) -> Result, GetEndpointJwksError> { - self.do_get_endpoint_jwks(ctx, endpoint).await - } - - #[tracing::instrument(skip_all)] - async fn wake_compute( - &self, - ctx: &RequestContext, - user_info: &ComputeUserInfo, - ) -> Result { - let key = user_info.endpoint_cache_key(); - - macro_rules! check_cache { - () => { - if let Some(cached) = self.caches.node_info.get(&key) { - let (cached, info) = cached.take_value(); - let info = info.map_err(|c| { - info!(key = &*key, "found cached wake_compute error"); - WakeComputeError::ControlPlane(ControlPlaneError::Message(Box::new(*c))) - })?; - - debug!(key = &*key, "found cached compute node info"); - ctx.set_project(info.aux.clone()); - return Ok(cached.map(|()| info)); - } - }; - } - - // Every time we do a wakeup http request, the compute node will stay up - // for some time (highly depends on the console's scale-to-zero policy); - // The connection info remains the same during that period of time, - // which means that we might cache it to reduce the load and latency. - check_cache!(); - - let permit = self.locks.get_permit(&key).await?; - - // after getting back a permit - it's possible the cache was filled - // double check - if permit.should_check_cache() { - // TODO: if there is something in the cache, mark the permit as success. - check_cache!(); - } - - // check rate limit - if !self - .wake_compute_endpoint_rate_limiter - .check(user_info.endpoint.normalize_intern(), 1) - { - return Err(WakeComputeError::TooManyConnections); - } - - let node = permit.release_result(self.do_wake_compute(ctx, user_info).await); - match node { - Ok(node) => { - ctx.set_project(node.aux.clone()); - debug!(key = &*key, "created a cache entry for woken compute node"); - - let mut stored_node = node.clone(); - // store the cached node as 'warm_cached' - stored_node.aux.cold_start_info = ColdStartInfo::WarmCached; - - let (_, cached) = self.caches.node_info.insert_unit(key, Ok(stored_node)); - - Ok(cached.map(|()| node)) - } - Err(err) => match err { - WakeComputeError::ControlPlane(ControlPlaneError::Message(err)) => { - let Some(status) = &err.status else { - return Err(WakeComputeError::ControlPlane(ControlPlaneError::Message( - err, - ))); - }; - - let reason = status - .details - .error_info - .map_or(Reason::Unknown, |x| x.reason); - - // if we can retry this error, do not cache it. - if reason.can_retry() { - return Err(WakeComputeError::ControlPlane(ControlPlaneError::Message( - err, - ))); - } - - // at this point, we should only have quota errors. - debug!( - key = &*key, - "created a cache entry for the wake compute error" - ); - - self.caches.node_info.insert_ttl( - key, - Err(err.clone()), - Duration::from_secs(30), - ); - - Err(WakeComputeError::ControlPlane(ControlPlaneError::Message( - err, - ))) - } - err => return Err(err), - }, - } - } -} - -/// Parse http response body, taking status code into account. -async fn parse_body serde::Deserialize<'a>>( - response: http::Response, -) -> Result { - let status = response.status(); - if status.is_success() { - // We shouldn't log raw body because it may contain secrets. - info!("request succeeded, processing the body"); - return Ok(response.json().await?); - } - let s = response.bytes().await?; - // Log plaintext to be able to detect, whether there are some cases not covered by the error struct. - info!("response_error plaintext: {:?}", s); - - // Don't throw an error here because it's not as important - // as the fact that the request itself has failed. - let mut body = serde_json::from_slice(&s).unwrap_or_else(|e| { - warn!("failed to parse error body: {e}"); - ControlPlaneErrorMessage { - error: "reason unclear (malformed error message)".into(), - http_status_code: status, - status: None, - } - }); - body.http_status_code = status; - - warn!("console responded with an error ({status}): {body:?}"); - Err(ControlPlaneError::Message(Box::new(body))) -} - -fn parse_host_port(input: &str) -> Option<(&str, u16)> { - let (host, port) = input.rsplit_once(':')?; - let ipv6_brackets: &[_] = &['[', ']']; - Some((host.trim_matches(ipv6_brackets), port.parse().ok()?)) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_parse_host_port_v4() { - let (host, port) = parse_host_port("127.0.0.1:5432").expect("failed to parse"); - assert_eq!(host, "127.0.0.1"); - assert_eq!(port, 5432); - } - - #[test] - fn test_parse_host_port_v6() { - let (host, port) = parse_host_port("[2001:db8::1]:5432").expect("failed to parse"); - assert_eq!(host, "2001:db8::1"); - assert_eq!(port, 5432); - } - - #[test] - fn test_parse_host_port_url() { - let (host, port) = parse_host_port("compute-foo-bar-1234.default.svc.cluster.local:5432") - .expect("failed to parse"); - assert_eq!(host, "compute-foo-bar-1234.default.svc.cluster.local"); - assert_eq!(port, 5432); - } -} diff --git a/proxy/src/control_plane/messages.rs b/proxy/src/control_plane/messages.rs index 2662ab85f9..d068614b24 100644 --- a/proxy/src/control_plane/messages.rs +++ b/proxy/src/control_plane/messages.rs @@ -221,15 +221,6 @@ pub(crate) struct UserFacingMessage { pub(crate) message: Box, } -/// Response which holds client's auth secret, e.g. [`crate::scram::ServerSecret`]. -/// Returned by the `/proxy_get_role_secret` API method. -#[derive(Deserialize)] -pub(crate) struct GetRoleSecret { - pub(crate) role_secret: Box, - pub(crate) allowed_ips: Option>, - pub(crate) project_id: Option, -} - /// Response which holds client's auth secret, e.g. [`crate::scram::ServerSecret`]. /// Returned by the `/get_endpoint_access_control` API method. #[derive(Deserialize)] @@ -240,13 +231,6 @@ pub(crate) struct GetEndpointAccessControl { pub(crate) allowed_vpc_endpoint_ids: Option>, } -// Manually implement debug to omit sensitive info. -impl fmt::Debug for GetRoleSecret { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("GetRoleSecret").finish_non_exhaustive() - } -} - /// Response which holds compute node's `host:port` pair. /// Returned by the `/proxy_wake_compute` API method. #[derive(Debug, Deserialize)] @@ -477,18 +461,18 @@ mod tests { let json = json!({ "role_secret": "secret", }); - serde_json::from_str::(&json.to_string())?; + serde_json::from_str::(&json.to_string())?; let json = json!({ "role_secret": "secret", "allowed_ips": ["8.8.8.8"], }); - serde_json::from_str::(&json.to_string())?; + serde_json::from_str::(&json.to_string())?; let json = json!({ "role_secret": "secret", "allowed_ips": ["8.8.8.8"], "project_id": "project", }); - serde_json::from_str::(&json.to_string())?; + serde_json::from_str::(&json.to_string())?; Ok(()) } diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 0b7cdec50d..13ada1361e 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -3222,7 +3222,7 @@ class NeonProxy(PgProtocol): *["--allow-self-signed-compute", "true"], ] - class Console(AuthBackend): + class ProxyV1(AuthBackend): def __init__(self, endpoint: str, fixed_rate_limit: int | None = None): self.endpoint = endpoint self.fixed_rate_limit = fixed_rate_limit @@ -3230,7 +3230,7 @@ class NeonProxy(PgProtocol): def extra_args(self) -> list[str]: args = [ # Console auth backend params - *["--auth-backend", "console"], + *["--auth-backend", "cplane-v1"], *["--auth-endpoint", self.endpoint], *["--sql-over-http-pool-opt-in", "false"], ] @@ -3478,13 +3478,13 @@ class NeonProxy(PgProtocol): class NeonAuthBroker: - class ControlPlane: + class ProxyV1: def __init__(self, endpoint: str): self.endpoint = endpoint def extra_args(self) -> list[str]: args = [ - *["--auth-backend", "console"], + *["--auth-backend", "cplane-v1"], *["--auth-endpoint", self.endpoint], ] return args @@ -3496,7 +3496,7 @@ class NeonAuthBroker: http_port: int, mgmt_port: int, external_http_port: int, - auth_backend: NeonAuthBroker.ControlPlane, + auth_backend: NeonAuthBroker.ProxyV1, ): self.domain = "apiauth.localtest.me" # resolves to 127.0.0.1 self.host = "127.0.0.1" @@ -3682,7 +3682,7 @@ def static_auth_broker( local_proxy_addr = f"{http2_echoserver.host}:{http2_echoserver.port}" # return local_proxy addr on ProxyWakeCompute. - httpserver.expect_request("/cplane/proxy_wake_compute").respond_with_json( + httpserver.expect_request("/cplane/wake_compute").respond_with_json( { "address": local_proxy_addr, "aux": { @@ -3722,7 +3722,7 @@ def static_auth_broker( http_port=http_port, mgmt_port=mgmt_port, external_http_port=external_http_port, - auth_backend=NeonAuthBroker.ControlPlane(httpserver.url_for("/cplane")), + auth_backend=NeonAuthBroker.ProxyV1(httpserver.url_for("/cplane")), ) as proxy: proxy.start() yield proxy From 58d45c6e86b88b9693e8ab4dd1e8d402b51f1a4d Mon Sep 17 00:00:00 2001 From: Rahul Patil Date: Thu, 12 Dec 2024 16:13:08 +0100 Subject: [PATCH 06/49] ci(fix): Use OIDC auth to login on ECR (#10055) ## Problem CI currently uses static credentials in some places. These are less secure and hard to maintain, so we are going to deprecate them and use OIDC auth. ## Summary of changes - ci(fix): Use OIDC auth to upload artifact on s3 - ci(fix): Use OIDC auth to login on ECR --- .github/actions/download/action.yml | 11 +++ .../actions/run-python-test-set/action.yml | 5 ++ .github/actions/save-coverage-data/action.yml | 2 + .github/actions/upload/action.yml | 11 +++ .../workflows/_benchmarking_preparation.yml | 1 + .github/workflows/_build-and-test-locally.yml | 18 +++- .github/workflows/benchmarking.yml | 6 ++ .github/workflows/build_and_test.yml | 87 ++++++++++++++----- .github/workflows/cloud-regress.yml | 8 +- .github/workflows/ingest_benchmark.yml | 1 + .github/workflows/neon_extra_builds.yml | 13 ++- .github/workflows/periodic_pagebench.yml | 8 +- .github/workflows/pg-clients.yml | 10 ++- .github/workflows/pin-build-tools-image.yml | 14 +-- .github/workflows/pre-merge-checks.yml | 1 + control_plane/src/background_process.rs | 1 + test_runner/fixtures/remote_storage.py | 23 ++++- 17 files changed, 180 insertions(+), 40 deletions(-) diff --git a/.github/actions/download/action.yml b/.github/actions/download/action.yml index 01c216b1ac..d6b1fac9f7 100644 --- a/.github/actions/download/action.yml +++ b/.github/actions/download/action.yml @@ -15,10 +15,21 @@ inputs: prefix: description: "S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'" required: false + aws_oicd_role_arn: + description: "the OIDC role arn for aws auth" + required: false + default: "" runs: using: "composite" steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ inputs.aws_oicd_role_arn }} + role-duration-seconds: 3600 + - name: Download artifact id: download-artifact shell: bash -euxo pipefail {0} diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml index 1159627302..dd5c890f5b 100644 --- a/.github/actions/run-python-test-set/action.yml +++ b/.github/actions/run-python-test-set/action.yml @@ -62,6 +62,7 @@ runs: with: name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact path: /tmp/neon + aws_oicd_role_arn: ${{ inputs.aws_oicd_role_arn }} - name: Download Neon binaries for the previous release if: inputs.build_type != 'remote' @@ -70,6 +71,7 @@ runs: name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact path: /tmp/neon-previous prefix: latest + aws_oicd_role_arn: ${{ inputs.aws_oicd_role_arn }} - name: Download compatibility snapshot if: inputs.build_type != 'remote' @@ -81,6 +83,7 @@ runs: # The lack of compatibility snapshot (for example, for the new Postgres version) # shouldn't fail the whole job. Only relevant test should fail. skip-if-does-not-exist: true + aws_oicd_role_arn: ${{ inputs.aws_oicd_role_arn }} - name: Checkout if: inputs.needs_postgres_source == 'true' @@ -218,6 +221,7 @@ runs: # The lack of compatibility snapshot shouldn't fail the job # (for example if we didn't run the test for non build-and-test workflow) skip-if-does-not-exist: true + aws_oicd_role_arn: ${{ inputs.aws_oicd_role_arn }} - name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }} @@ -232,3 +236,4 @@ runs: with: report-dir: /tmp/test_output/allure/results unique-key: ${{ inputs.build_type }}-${{ inputs.pg_version }} + aws_oicd_role_arn: ${{ inputs.aws_oicd_role_arn }} diff --git a/.github/actions/save-coverage-data/action.yml b/.github/actions/save-coverage-data/action.yml index 6fbe19a96e..9e3a7cba24 100644 --- a/.github/actions/save-coverage-data/action.yml +++ b/.github/actions/save-coverage-data/action.yml @@ -14,9 +14,11 @@ runs: name: coverage-data-artifact path: /tmp/coverage skip-if-does-not-exist: true # skip if there's no previous coverage to download + aws_oicd_role_arn: ${{ inputs.aws_oicd_role_arn }} - name: Upload coverage data uses: ./.github/actions/upload with: name: coverage-data-artifact path: /tmp/coverage + aws_oicd_role_arn: ${{ inputs.aws_oicd_role_arn }} diff --git a/.github/actions/upload/action.yml b/.github/actions/upload/action.yml index 8a4cfe2eff..6616d08899 100644 --- a/.github/actions/upload/action.yml +++ b/.github/actions/upload/action.yml @@ -14,6 +14,10 @@ inputs: prefix: description: "S3 prefix. Default is '${GITHUB_SHA}/${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'" required: false + aws_oicd_role_arn: + description: "the OIDC role arn for aws auth" + required: false + default: "" runs: using: "composite" @@ -53,6 +57,13 @@ runs: echo 'SKIPPED=false' >> $GITHUB_OUTPUT + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ inputs.aws_oicd_role_arn }} + role-duration-seconds: 3600 + - name: Upload artifact if: ${{ steps.prepare-artifact.outputs.SKIPPED == 'false' }} shell: bash -euxo pipefail {0} diff --git a/.github/workflows/_benchmarking_preparation.yml b/.github/workflows/_benchmarking_preparation.yml index 5cdc16f248..371d815fc8 100644 --- a/.github/workflows/_benchmarking_preparation.yml +++ b/.github/workflows/_benchmarking_preparation.yml @@ -70,6 +70,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} # we create a table that has one row for each database that we want to restore with the status whether the restore is done - name: Create benchmark_restore_status table if it does not exist diff --git a/.github/workflows/_build-and-test-locally.yml b/.github/workflows/_build-and-test-locally.yml index 7d47f78d6b..456399f3c3 100644 --- a/.github/workflows/_build-and-test-locally.yml +++ b/.github/workflows/_build-and-test-locally.yml @@ -31,12 +31,13 @@ defaults: env: RUST_BACKTRACE: 1 COPT: '-Werror' - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }} jobs: build-neon: runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }} + permissions: + id-token: write # aws-actions/configure-aws-credentials + contents: read container: image: ${{ inputs.build-tools-image }} credentials: @@ -205,6 +206,13 @@ jobs: done fi + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + role-duration-seconds: 18000 # 5 hours + - name: Run rust tests env: NEXTEST_RETRIES: 3 @@ -256,6 +264,7 @@ jobs: with: name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-artifact path: /tmp/neon + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} # XXX: keep this after the binaries.list is formed, so the coverage can properly work later - name: Merge and upload coverage data @@ -265,6 +274,10 @@ jobs: regress-tests: # Don't run regression tests on debug arm64 builds if: inputs.build-type != 'debug' || inputs.arch != 'arm64' + permissions: + id-token: write # aws-actions/configure-aws-credentials + contents: read + statuses: write needs: [ build-neon ] runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }} container: @@ -295,6 +308,7 @@ jobs: real_s3_region: eu-central-1 rerun_failed: true pg_version: ${{ matrix.pg_version }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }} CHECK_ONDISK_DATA_COMPATIBILITY: nonempty diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index 7621d72f64..2d37be8837 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -105,6 +105,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create Neon Project id: create-neon-project @@ -204,6 +205,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Run Logical Replication benchmarks uses: ./.github/actions/run-python-test-set @@ -405,6 +407,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create Neon Project if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform) @@ -708,6 +711,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Set up Connection String id: set-up-connstr @@ -818,6 +822,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Get Connstring Secret Name run: | @@ -926,6 +931,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Set up Connection String id: set-up-connstr diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 67d59c7da1..62f190a0c2 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -21,8 +21,6 @@ concurrency: env: RUST_BACKTRACE: 1 COPT: '-Werror' - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }} # A concurrency group that we use for e2e-tests runs, matches `concurrency.group` above with `github.repository` as a prefix E2E_CONCURRENCY_GROUP: ${{ github.repository }}-e2e-tests-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }} @@ -362,6 +360,10 @@ jobs: create-test-report: needs: [ check-permissions, build-and-test-locally, coverage-report, build-build-tools-image, benchmarks ] if: ${{ !cancelled() && contains(fromJSON('["skipped", "success"]'), needs.check-permissions.result) }} + permissions: + id-token: write # aws-actions/configure-aws-credentials + statuses: write + contents: write outputs: report-url: ${{ steps.create-allure-report.outputs.report-url }} @@ -382,6 +384,7 @@ jobs: uses: ./.github/actions/allure-report-generate with: store-test-results-into-db: true + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }} @@ -413,6 +416,10 @@ jobs: coverage-report: if: ${{ !startsWith(github.ref_name, 'release') }} needs: [ check-permissions, build-build-tools-image, build-and-test-locally ] + permissions: + id-token: write # aws-actions/configure-aws-credentials + statuses: write + contents: write runs-on: [ self-hosted, small ] container: image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm @@ -439,12 +446,14 @@ jobs: with: name: neon-${{ runner.os }}-${{ runner.arch }}-${{ matrix.build_type }}-artifact path: /tmp/neon + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Get coverage artifact uses: ./.github/actions/download with: name: coverage-data-artifact path: /tmp/coverage + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Merge coverage data run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge @@ -575,6 +584,10 @@ jobs: neon-image: needs: [ neon-image-arch, tag ] runs-on: ubuntu-22.04 + permissions: + id-token: write # aws-actions/configure-aws-credentials + statuses: write + contents: read steps: - uses: docker/login-action@v3 @@ -589,11 +602,15 @@ jobs: neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-x64 \ neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-arm64 - - uses: docker/login-action@v3 + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 with: - registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com - username: ${{ secrets.AWS_ACCESS_KEY_DEV }} - password: ${{ secrets.AWS_SECRET_KEY_DEV }} + aws-region: eu-central-1 + role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + role-duration-seconds: 3600 + + - name: Login to Amazon Dev ECR + uses: aws-actions/amazon-ecr-login@v2 - name: Push multi-arch image to ECR run: | @@ -602,6 +619,10 @@ jobs: compute-node-image-arch: needs: [ check-permissions, build-build-tools-image, tag ] + permissions: + id-token: write # aws-actions/configure-aws-credentials + statuses: write + contents: read strategy: fail-fast: false matrix: @@ -642,11 +663,15 @@ jobs: username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} - - uses: docker/login-action@v3 + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 with: - registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com - username: ${{ secrets.AWS_ACCESS_KEY_DEV }} - password: ${{ secrets.AWS_SECRET_KEY_DEV }} + aws-region: eu-central-1 + role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + role-duration-seconds: 3600 + + - name: Login to Amazon Dev ECR + uses: aws-actions/amazon-ecr-login@v2 - uses: docker/login-action@v3 with: @@ -719,6 +744,10 @@ jobs: compute-node-image: needs: [ compute-node-image-arch, tag ] + permissions: + id-token: write # aws-actions/configure-aws-credentials + statuses: write + contents: read runs-on: ubuntu-22.04 strategy: @@ -763,11 +792,15 @@ jobs: neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \ neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64 - - uses: docker/login-action@v3 + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 with: - registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com - username: ${{ secrets.AWS_ACCESS_KEY_DEV }} - password: ${{ secrets.AWS_SECRET_KEY_DEV }} + aws-region: eu-central-1 + role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + role-duration-seconds: 3600 + + - name: Login to Amazon Dev ECR + uses: aws-actions/amazon-ecr-login@v2 - name: Push multi-arch compute-node-${{ matrix.version.pg }} image to ECR run: | @@ -892,7 +925,9 @@ jobs: runs-on: ubuntu-22.04 permissions: - id-token: write # for `aws-actions/configure-aws-credentials` + id-token: write # aws-actions/configure-aws-credentials + statuses: write + contents: read env: VERSIONS: v14 v15 v16 v17 @@ -903,12 +938,15 @@ jobs: username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} - - name: Login to dev ECR - uses: docker/login-action@v3 + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 with: - registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com - username: ${{ secrets.AWS_ACCESS_KEY_DEV }} - password: ${{ secrets.AWS_SECRET_KEY_DEV }} + aws-region: eu-central-1 + role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + role-duration-seconds: 3600 + + - name: Login to Amazon Dev ECR + uses: aws-actions/amazon-ecr-login@v2 - name: Copy vm-compute-node images to ECR run: | @@ -1062,7 +1100,10 @@ jobs: needs: [ check-permissions, promote-images, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ] # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod` if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute') && !failure() && !cancelled() - + permissions: + id-token: write # aws-actions/configure-aws-credentials + statuses: write + contents: write runs-on: [ self-hosted, small ] container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest steps: @@ -1184,6 +1225,10 @@ jobs: # The job runs on `release` branch and copies compatibility data and Neon artifact from the last *release PR* to the latest directory promote-compatibility-data: needs: [ deploy ] + permissions: + id-token: write # aws-actions/configure-aws-credentials + statuses: write + contents: read # `!failure() && !cancelled()` is required because the workflow transitively depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod` if: github.ref_name == 'release' && !failure() && !cancelled() diff --git a/.github/workflows/cloud-regress.yml b/.github/workflows/cloud-regress.yml index 57194090cf..457634ddad 100644 --- a/.github/workflows/cloud-regress.yml +++ b/.github/workflows/cloud-regress.yml @@ -19,14 +19,15 @@ concurrency: group: ${{ github.workflow }} cancel-in-progress: true +permissions: + id-token: write # aws-actions/configure-aws-credentials + jobs: regress: env: POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install TEST_OUTPUT: /tmp/test_output BUILD_TYPE: remote - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }} strategy: fail-fast: false matrix: @@ -78,6 +79,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create a new branch id: create-branch @@ -107,6 +109,8 @@ jobs: id: create-allure-report if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate + with: + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} diff --git a/.github/workflows/ingest_benchmark.yml b/.github/workflows/ingest_benchmark.yml index a5810e91a4..6773032263 100644 --- a/.github/workflows/ingest_benchmark.yml +++ b/.github/workflows/ingest_benchmark.yml @@ -64,6 +64,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create Neon Project if: ${{ matrix.target_project == 'new_empty_project' }} diff --git a/.github/workflows/neon_extra_builds.yml b/.github/workflows/neon_extra_builds.yml index 092831adb9..1f85c2e102 100644 --- a/.github/workflows/neon_extra_builds.yml +++ b/.github/workflows/neon_extra_builds.yml @@ -143,6 +143,10 @@ jobs: gather-rust-build-stats: needs: [ check-permissions, build-build-tools-image ] + permissions: + id-token: write # aws-actions/configure-aws-credentials + statuses: write + contents: write if: | contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') || contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') || @@ -177,13 +181,18 @@ jobs: - name: Produce the build stats run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release --timings -j$(nproc) + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + role-duration-seconds: 3600 + - name: Upload the build stats id: upload-stats env: BUCKET: neon-github-public-dev SHA: ${{ github.event.pull_request.head.sha || github.sha }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }} run: | REPORT_URL=https://${BUCKET}.s3.amazonaws.com/build-stats/${SHA}/${GITHUB_RUN_ID}/cargo-timing.html aws s3 cp --only-show-errors ./target/cargo-timings/cargo-timing.html "s3://${BUCKET}/build-stats/${SHA}/${GITHUB_RUN_ID}/" diff --git a/.github/workflows/periodic_pagebench.yml b/.github/workflows/periodic_pagebench.yml index 6b98bc873f..a04ceb4a24 100644 --- a/.github/workflows/periodic_pagebench.yml +++ b/.github/workflows/periodic_pagebench.yml @@ -21,6 +21,9 @@ defaults: run: shell: bash -euo pipefail {0} +permissions: + id-token: write # aws-actions/configure-aws-credentials + concurrency: group: ${{ github.workflow }} cancel-in-progress: false @@ -124,11 +127,10 @@ jobs: cat "test_log_${GITHUB_RUN_ID}" - name: Create Allure report - env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }} if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate + with: + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} diff --git a/.github/workflows/pg-clients.yml b/.github/workflows/pg-clients.yml index 4f5495cbe2..5c999d3810 100644 --- a/.github/workflows/pg-clients.yml +++ b/.github/workflows/pg-clients.yml @@ -25,11 +25,13 @@ defaults: run: shell: bash -euxo pipefail {0} +permissions: + id-token: write # aws-actions/configure-aws-credentials + statuses: write # require for posting a status update + env: DEFAULT_PG_VERSION: 16 PLATFORM: neon-captest-new - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }} AWS_DEFAULT_REGION: eu-central-1 jobs: @@ -94,6 +96,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create Neon Project id: create-neon-project @@ -126,6 +129,7 @@ jobs: uses: ./.github/actions/allure-report-generate with: store-test-results-into-db: true + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }} @@ -159,6 +163,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create Neon Project id: create-neon-project @@ -191,6 +196,7 @@ jobs: uses: ./.github/actions/allure-report-generate with: store-test-results-into-db: true + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }} diff --git a/.github/workflows/pin-build-tools-image.yml b/.github/workflows/pin-build-tools-image.yml index 5b43d97de6..626de2b0e0 100644 --- a/.github/workflows/pin-build-tools-image.yml +++ b/.github/workflows/pin-build-tools-image.yml @@ -67,7 +67,7 @@ jobs: runs-on: ubuntu-22.04 permissions: - id-token: write # for `azure/login` + id-token: write # for `azure/login` and aws auth steps: - uses: docker/login-action@v3 @@ -75,11 +75,15 @@ jobs: username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} - - uses: docker/login-action@v3 + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 with: - registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com - username: ${{ secrets.AWS_ACCESS_KEY_DEV }} - password: ${{ secrets.AWS_SECRET_KEY_DEV }} + aws-region: eu-central-1 + role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + role-duration-seconds: 3600 + + - name: Login to Amazon Dev ECR + uses: aws-actions/amazon-ecr-login@v2 - name: Azure login uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a # @v2.1.1 diff --git a/.github/workflows/pre-merge-checks.yml b/.github/workflows/pre-merge-checks.yml index d2f9d8a666..b2e00d94f7 100644 --- a/.github/workflows/pre-merge-checks.yml +++ b/.github/workflows/pre-merge-checks.yml @@ -63,6 +63,7 @@ jobs: if: always() permissions: statuses: write # for `github.repos.createCommitStatus(...)` + contents: write needs: - get-changed-files - check-codestyle-python diff --git a/control_plane/src/background_process.rs b/control_plane/src/background_process.rs index 94a072e394..af312d73a7 100644 --- a/control_plane/src/background_process.rs +++ b/control_plane/src/background_process.rs @@ -274,6 +274,7 @@ fn fill_remote_storage_secrets_vars(mut cmd: &mut Command) -> &mut Command { for env_key in [ "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", + "AWS_SESSION_TOKEN", "AWS_PROFILE", // HOME is needed in combination with `AWS_PROFILE` to pick up the SSO sessions. "HOME", diff --git a/test_runner/fixtures/remote_storage.py b/test_runner/fixtures/remote_storage.py index 4e1e8a884f..d969971a35 100644 --- a/test_runner/fixtures/remote_storage.py +++ b/test_runner/fixtures/remote_storage.py @@ -70,6 +70,9 @@ class MockS3Server: def secret_key(self) -> str: return "test" + def session_token(self) -> str: + return "test" + def kill(self): self.server.stop() @@ -161,6 +164,7 @@ class S3Storage: bucket_region: str access_key: str | None secret_key: str | None + session_token: str | None aws_profile: str | None prefix_in_bucket: str client: S3Client @@ -181,13 +185,18 @@ class S3Storage: if home is not None: env["HOME"] = home return env - if self.access_key is not None and self.secret_key is not None: + if ( + self.access_key is not None + and self.secret_key is not None + and self.session_token is not None + ): return { "AWS_ACCESS_KEY_ID": self.access_key, "AWS_SECRET_ACCESS_KEY": self.secret_key, + "AWS_SESSION_TOKEN": self.session_token, } raise RuntimeError( - "Either AWS_PROFILE or (AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY) have to be set for S3Storage" + "Either AWS_PROFILE or (AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN) have to be set for S3Storage" ) def to_string(self) -> str: @@ -352,6 +361,7 @@ class RemoteStorageKind(StrEnum): mock_region = mock_s3_server.region() access_key, secret_key = mock_s3_server.access_key(), mock_s3_server.secret_key() + session_token = mock_s3_server.session_token() client = boto3.client( "s3", @@ -359,6 +369,7 @@ class RemoteStorageKind(StrEnum): region_name=mock_region, aws_access_key_id=access_key, aws_secret_access_key=secret_key, + aws_session_token=session_token, ) bucket_name = to_bucket_name(user, test_name) @@ -372,6 +383,7 @@ class RemoteStorageKind(StrEnum): bucket_region=mock_region, access_key=access_key, secret_key=secret_key, + session_token=session_token, aws_profile=None, prefix_in_bucket="", client=client, @@ -383,9 +395,10 @@ class RemoteStorageKind(StrEnum): env_access_key = os.getenv("AWS_ACCESS_KEY_ID") env_secret_key = os.getenv("AWS_SECRET_ACCESS_KEY") + env_access_token = os.getenv("AWS_SESSION_TOKEN") env_profile = os.getenv("AWS_PROFILE") assert ( - env_access_key and env_secret_key + env_access_key and env_secret_key and env_access_token ) or env_profile, "need to specify either access key and secret access key or profile" bucket_name = bucket_name or os.getenv("REMOTE_STORAGE_S3_BUCKET") @@ -398,6 +411,9 @@ class RemoteStorageKind(StrEnum): client = boto3.client( "s3", region_name=bucket_region, + aws_access_key_id=env_access_key, + aws_secret_access_key=env_secret_key, + aws_session_token=env_access_token, ) return S3Storage( @@ -405,6 +421,7 @@ class RemoteStorageKind(StrEnum): bucket_region=bucket_region, access_key=env_access_key, secret_key=env_secret_key, + session_token=env_access_token, aws_profile=env_profile, prefix_in_bucket=prefix_in_bucket, client=client, From 2f3433876fdce12093ee0c706caf2e04b4de2f51 Mon Sep 17 00:00:00 2001 From: a-masterov <72613290+a-masterov@users.noreply.github.com> Date: Thu, 12 Dec 2024 17:34:07 +0100 Subject: [PATCH 07/49] Change the channel for notification. (#10112) ## Problem Now notifications about failures in `pg_regress` tests run on the staging cloud instance, reach the channel `on-call-staging-stream`, while they should reach `on-call-qa-staging-stream` ## Summary of changes The channel changed. --- .github/actionlint.yml | 1 + .github/workflows/cloud-regress.yml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/actionlint.yml b/.github/actionlint.yml index 27c8fb3c23..9d8389faa5 100644 --- a/.github/actionlint.yml +++ b/.github/actionlint.yml @@ -23,3 +23,4 @@ config-variables: - BENCHMARK_INGEST_TARGET_PROJECTID - PGREGRESS_PG16_PROJECT_ID - PGREGRESS_PG17_PROJECT_ID + - SLACK_ON_CALL_QA_STAGING_STREAM diff --git a/.github/workflows/cloud-regress.yml b/.github/workflows/cloud-regress.yml index 457634ddad..2fc26baa21 100644 --- a/.github/workflows/cloud-regress.yml +++ b/.github/workflows/cloud-regress.yml @@ -116,7 +116,7 @@ jobs: if: ${{ github.event.schedule && failure() }} uses: slackapi/slack-github-action@v1 with: - channel-id: "C033QLM5P7D" # on-call-staging-stream + channel-id: ${{ vars.SLACK_ON_CALL_QA_STAGING_STREAM }} slack-message: | Periodic pg_regress on staging: ${{ job.status }} <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run> From 53721266f1cf6f835541a8b64bbd0b46c60761e2 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 12 Dec 2024 19:05:58 +0200 Subject: [PATCH 08/49] Disable connection logging in pgbouncer by default (#10118) It can produce a lot of logs, making pgbouncer itself consume all CPU in extreme cases. We saw that happen in stress testing. --- compute/etc/pgbouncer.ini | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/compute/etc/pgbouncer.ini b/compute/etc/pgbouncer.ini index abcd165636..604b4e41ea 100644 --- a/compute/etc/pgbouncer.ini +++ b/compute/etc/pgbouncer.ini @@ -19,3 +19,10 @@ max_prepared_statements=0 admin_users=postgres unix_socket_dir=/tmp/ unix_socket_mode=0777 + +;; Disable connection logging. It produces a lot of logs that no one looks at, +;; and we can get similar log entries from the proxy too. We had incidents in +;; the past where the logging significantly stressed the log device or pgbouncer +;; itself. +log_connections=0 +log_disconnections=0 From 6d5687521b6eede3166884c0247405ebd59c6f8a Mon Sep 17 00:00:00 2001 From: Rahul Patil Date: Thu, 12 Dec 2024 19:53:35 +0100 Subject: [PATCH 09/49] fix(ci): Allow github-script to post test reports (#10120) Allow github-script to post test reports --- .github/workflows/build_and_test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 62f190a0c2..4bfb5077c6 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -364,6 +364,7 @@ jobs: id-token: write # aws-actions/configure-aws-credentials statuses: write contents: write + pull-requests: write outputs: report-url: ${{ steps.create-allure-report.outputs.report-url }} From a93e3d31cc5152c279480ded76ea886867fbfea3 Mon Sep 17 00:00:00 2001 From: John Spray Date: Thu, 12 Dec 2024 19:35:38 +0000 Subject: [PATCH 10/49] storcon: refine logic for choosing AZ on tenant creation (#10054) ## Problem When we update our scheduler/optimization code to respect AZs properly (https://github.com/neondatabase/neon/pull/9916), the choice of AZ becomes a much higher-stakes decision. We will pretty much always run a tenant in its preferred AZ, and that AZ is fixed for the lifetime of the tenant (unless a human intervenes) Eventually, when we do auto-balancing based on utilization, I anticipate that part of that will be to automatically change the AZ of tenants if our original scheduling decisions have caused imbalance, but as an interim measure, we can at least avoid making this scheduling decision based purely on which AZ contains the emptiest node. This is a precursor to https://github.com/neondatabase/neon/pull/9947 ## Summary of changes - When creating a tenant, instead of scheduling a shard and then reading its preferred AZ back, make the AZ decision first. - Instead of choosing AZ based on which node is emptiest, use the median utilization of nodes in each AZ to pick the AZ to use. This avoids bad AZ decisions during periods when some node has very low utilization (such as after replacing a dead node) I considered also making the selection a weighted pseudo-random choice based on utilization, but wanted to avoid destabilising tests with that for now. --- libs/pageserver_api/src/controller_api.rs | 2 +- storage_controller/src/scheduler.rs | 93 +++++++++++++++++++++++ storage_controller/src/service.rs | 62 +++++---------- storage_controller/src/tenant_shard.rs | 15 ++-- 4 files changed, 118 insertions(+), 54 deletions(-) diff --git a/libs/pageserver_api/src/controller_api.rs b/libs/pageserver_api/src/controller_api.rs index 6839ef69f5..ec7b81423a 100644 --- a/libs/pageserver_api/src/controller_api.rs +++ b/libs/pageserver_api/src/controller_api.rs @@ -75,7 +75,7 @@ pub struct TenantPolicyRequest { pub scheduling: Option, } -#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Debug)] +#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)] pub struct AvailabilityZone(pub String); impl Display for AvailabilityZone { diff --git a/storage_controller/src/scheduler.rs b/storage_controller/src/scheduler.rs index ecc6b11e47..51a4cf35be 100644 --- a/storage_controller/src/scheduler.rs +++ b/storage_controller/src/scheduler.rs @@ -742,6 +742,50 @@ impl Scheduler { self.schedule_shard::(&[], &None, &ScheduleContext::default()) } + /// For choosing which AZ to schedule a new shard into, use this. It will return the + /// AZ with the lowest median utilization. + /// + /// We use an AZ-wide measure rather than simply selecting the AZ of the least-loaded + /// node, because while tenants start out single sharded, when they grow and undergo + /// shard-split, they will occupy space on many nodes within an AZ. + /// + /// We use median rather than total free space or mean utilization, because + /// we wish to avoid preferring AZs that have low-load nodes resulting from + /// recent replacements. + /// + /// The practical result is that we will pick an AZ based on its median node, and + /// then actually _schedule_ the new shard onto the lowest-loaded node in that AZ. + pub(crate) fn get_az_for_new_tenant(&self) -> Option { + if self.nodes.is_empty() { + return None; + } + + let mut scores_by_az = HashMap::new(); + for (node_id, node) in &self.nodes { + let az_scores = scores_by_az.entry(&node.az).or_insert_with(Vec::new); + let score = match &node.may_schedule { + MaySchedule::Yes(utilization) => utilization.score(), + MaySchedule::No => PageserverUtilization::full().score(), + }; + az_scores.push((node_id, node, score)); + } + + // Sort by utilization. Also include the node ID to break ties. + for scores in scores_by_az.values_mut() { + scores.sort_by_key(|i| (i.2, i.0)); + } + + let mut median_by_az = scores_by_az + .iter() + .map(|(az, nodes)| (*az, nodes.get(nodes.len() / 2).unwrap().2)) + .collect::>(); + // Sort by utilization. Also include the AZ to break ties. + median_by_az.sort_by_key(|i| (i.1, i.0)); + + // Return the AZ with the lowest median utilization + Some(median_by_az.first().unwrap().0.clone()) + } + /// Unit test access to internal state #[cfg(test)] pub(crate) fn get_node_shard_count(&self, node_id: NodeId) -> usize { @@ -1087,4 +1131,53 @@ mod tests { intent.clear(&mut scheduler); } } + + #[test] + fn az_scheduling_for_new_tenant() { + let az_a_tag = AvailabilityZone("az-a".to_string()); + let az_b_tag = AvailabilityZone("az-b".to_string()); + let nodes = test_utils::make_test_nodes( + 6, + &[ + az_a_tag.clone(), + az_a_tag.clone(), + az_a_tag.clone(), + az_b_tag.clone(), + az_b_tag.clone(), + az_b_tag.clone(), + ], + ); + + let mut scheduler = Scheduler::new(nodes.values()); + + /// Force the utilization of a node in Scheduler's state to a particular + /// number of bytes used. + fn set_utilization(scheduler: &mut Scheduler, node_id: NodeId, shard_count: u32) { + let mut node = Node::new( + node_id, + "".to_string(), + 0, + "".to_string(), + 0, + scheduler.nodes.get(&node_id).unwrap().az.clone(), + ); + node.set_availability(NodeAvailability::Active(test_utilization::simple( + shard_count, + 0, + ))); + scheduler.node_upsert(&node); + } + + // Initial empty state. Scores are tied, scheduler prefers lower AZ ID. + assert_eq!(scheduler.get_az_for_new_tenant(), Some(az_a_tag.clone())); + + // Put some utilization on one node in AZ A: this should change nothing, as the median hasn't changed + set_utilization(&mut scheduler, NodeId(1), 1000000); + assert_eq!(scheduler.get_az_for_new_tenant(), Some(az_a_tag.clone())); + + // Put some utilization on a second node in AZ A: now the median has changed, so the scheduler + // should prefer the other AZ. + set_utilization(&mut scheduler, NodeId(2), 1000000); + assert_eq!(scheduler.get_az_for_new_tenant(), Some(az_b_tag.clone())); + } } diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 894b67fdc6..746177c089 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -1582,6 +1582,7 @@ impl Service { attach_req.tenant_shard_id, ShardIdentity::unsharded(), PlacementPolicy::Attached(0), + None, ), ); tracing::info!("Inserted shard {} in memory", attach_req.tenant_shard_id); @@ -2109,6 +2110,16 @@ impl Service { ) }; + let preferred_az_id = { + let locked = self.inner.read().unwrap(); + // Idempotency: take the existing value if the tenant already exists + if let Some(shard) = locked.tenants.get(create_ids.first().unwrap()) { + shard.preferred_az().cloned() + } else { + locked.scheduler.get_az_for_new_tenant() + } + }; + // Ordering: we persist tenant shards before creating them on the pageserver. This enables a caller // to clean up after themselves by issuing a tenant deletion if something goes wrong and we restart // during the creation, rather than risking leaving orphan objects in S3. @@ -2128,7 +2139,7 @@ impl Service { splitting: SplitState::default(), scheduling_policy: serde_json::to_string(&ShardSchedulingPolicy::default()) .unwrap(), - preferred_az_id: None, + preferred_az_id: preferred_az_id.as_ref().map(|az| az.to_string()), }) .collect(); @@ -2164,6 +2175,7 @@ impl Service { &create_req.shard_parameters, create_req.config.clone(), placement_policy.clone(), + preferred_az_id.as_ref(), &mut schedule_context, ) .await; @@ -2177,44 +2189,6 @@ impl Service { } } - let preferred_azs = { - let locked = self.inner.read().unwrap(); - response_shards - .iter() - .filter_map(|resp| { - let az_id = locked - .nodes - .get(&resp.node_id) - .map(|n| n.get_availability_zone_id().clone())?; - - Some((resp.shard_id, az_id)) - }) - .collect::>() - }; - - // Note that we persist the preferred AZ for the new shards separately. - // In theory, we could "peek" the scheduler to determine where the shard will - // land, but the subsequent "real" call into the scheduler might select a different - // node. Hence, we do this awkward update to keep things consistent. - let updated = self - .persistence - .set_tenant_shard_preferred_azs(preferred_azs) - .await - .map_err(|err| { - ApiError::InternalServerError(anyhow::anyhow!( - "Failed to persist preferred az ids: {err}" - )) - })?; - - { - let mut locked = self.inner.write().unwrap(); - for (tid, az_id) in updated { - if let Some(shard) = locked.tenants.get_mut(&tid) { - shard.set_preferred_az(az_id); - } - } - } - // If we failed to schedule shards, then they are still created in the controller, // but we return an error to the requester to avoid a silent failure when someone // tries to e.g. create a tenant whose placement policy requires more nodes than @@ -2245,6 +2219,7 @@ impl Service { /// Helper for tenant creation that does the scheduling for an individual shard. Covers both the /// case of a new tenant and a pre-existing one. + #[allow(clippy::too_many_arguments)] async fn do_initial_shard_scheduling( &self, tenant_shard_id: TenantShardId, @@ -2252,6 +2227,7 @@ impl Service { shard_params: &ShardParameters, config: TenantConfig, placement_policy: PlacementPolicy, + preferred_az_id: Option<&AvailabilityZone>, schedule_context: &mut ScheduleContext, ) -> InitialShardScheduleOutcome { let mut locked = self.inner.write().unwrap(); @@ -2262,10 +2238,6 @@ impl Service { Entry::Occupied(mut entry) => { tracing::info!("Tenant shard {tenant_shard_id} already exists while creating"); - // TODO: schedule() should take an anti-affinity expression that pushes - // attached and secondary locations (independently) away frorm those - // pageservers also holding a shard for this tenant. - if let Err(err) = entry.get_mut().schedule(scheduler, schedule_context) { return InitialShardScheduleOutcome::ShardScheduleError(err); } @@ -2289,6 +2261,7 @@ impl Service { tenant_shard_id, ShardIdentity::from_params(tenant_shard_id.shard_number, shard_params), placement_policy, + preferred_az_id.cloned(), )); state.generation = initial_generation; @@ -4256,7 +4229,8 @@ impl Service { }, ); - let mut child_state = TenantShard::new(child, child_shard, policy.clone()); + let mut child_state = + TenantShard::new(child, child_shard, policy.clone(), preferred_az.clone()); child_state.intent = IntentState::single(scheduler, Some(pageserver)); child_state.observed = ObservedState { locations: child_observed, diff --git a/storage_controller/src/tenant_shard.rs b/storage_controller/src/tenant_shard.rs index 2eb98ee825..f1b921646f 100644 --- a/storage_controller/src/tenant_shard.rs +++ b/storage_controller/src/tenant_shard.rs @@ -472,6 +472,7 @@ impl TenantShard { tenant_shard_id: TenantShardId, shard: ShardIdentity, policy: PlacementPolicy, + preferred_az_id: Option, ) -> Self { metrics::METRICS_REGISTRY .metrics_group @@ -495,7 +496,7 @@ impl TenantShard { last_error: Arc::default(), pending_compute_notification: false, scheduling_policy: ShardSchedulingPolicy::default(), - preferred_az_id: None, + preferred_az_id, } } @@ -1571,6 +1572,7 @@ pub(crate) mod tests { ) .unwrap(), policy, + None, ) } @@ -1597,7 +1599,7 @@ pub(crate) mod tests { shard_number, shard_count, }; - let mut ts = TenantShard::new( + TenantShard::new( tenant_shard_id, ShardIdentity::new( shard_number, @@ -1606,13 +1608,8 @@ pub(crate) mod tests { ) .unwrap(), policy.clone(), - ); - - if let Some(az) = &preferred_az { - ts.set_preferred_az(az.clone()); - } - - ts + preferred_az.clone(), + ) }) .collect() } From 5ff4b991c74141505927a2498310b20c617a8786 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Thu, 12 Dec 2024 15:23:24 -0500 Subject: [PATCH 11/49] feat(pageserver): gc-compaction split over LSN (#9900) ## Problem part of https://github.com/neondatabase/neon/issues/9114, stacked PR over https://github.com/neondatabase/neon/pull/9897, partially refactored to help with https://github.com/neondatabase/neon/issues/10031 ## Summary of changes * gc-compaction takes `above_lsn` parameter. We only compact the layers above this LSN, and all data below the LSN are treated as if they are on the ancestor branch. * refactored gc-compaction to take `GcCompactJob` that describes the rectangular range to be compacted. * Added unit test for this case. --------- Signed-off-by: Alex Chi Z Co-authored-by: Christian Schwarz --- pageserver/src/http/routes.rs | 13 +- pageserver/src/tenant.rs | 661 ++++++++++++++++++- pageserver/src/tenant/timeline.rs | 71 +- pageserver/src/tenant/timeline/compaction.rs | 233 ++++--- test_runner/regress/test_compaction.py | 4 +- 5 files changed, 879 insertions(+), 103 deletions(-) diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 6e9ee976f4..db7d293856 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -2081,13 +2081,20 @@ async fn timeline_compact_handler( .as_ref() .map(|r| r.sub_compaction) .unwrap_or(false); + let sub_compaction_max_job_size_mb = compact_request + .as_ref() + .and_then(|r| r.sub_compaction_max_job_size_mb); + let options = CompactOptions { - compact_range: compact_request + compact_key_range: compact_request .as_ref() - .and_then(|r| r.compact_range.clone()), - compact_below_lsn: compact_request.as_ref().and_then(|r| r.compact_below_lsn), + .and_then(|r| r.compact_key_range.clone()), + compact_lsn_range: compact_request + .as_ref() + .and_then(|r| r.compact_lsn_range.clone()), flags, sub_compaction, + sub_compaction_max_job_size_mb, }; let scheduled = compact_request diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 92078e4b08..99289d5f15 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -44,6 +44,7 @@ use std::sync::atomic::AtomicBool; use std::sync::Weak; use std::time::SystemTime; use storage_broker::BrokerClientChannel; +use timeline::compaction::GcCompactJob; use timeline::compaction::ScheduledCompactionTask; use timeline::import_pgdata; use timeline::offload::offload_timeline; @@ -3017,8 +3018,15 @@ impl Tenant { warn!("ignoring scheduled compaction task: scheduled task must be gc compaction: {:?}", next_scheduled_compaction_task.options); } else if next_scheduled_compaction_task.options.sub_compaction { info!("running scheduled enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs"); - let jobs = timeline - .gc_compaction_split_jobs(next_scheduled_compaction_task.options) + let jobs: Vec = timeline + .gc_compaction_split_jobs( + GcCompactJob::from_compact_options( + next_scheduled_compaction_task.options.clone(), + ), + next_scheduled_compaction_task + .options + .sub_compaction_max_job_size_mb, + ) .await .map_err(CompactionError::Other)?; if jobs.is_empty() { @@ -3029,9 +3037,23 @@ impl Tenant { let mut guard = self.scheduled_compaction_tasks.lock().unwrap(); let tline_pending_tasks = guard.entry(*timeline_id).or_default(); for (idx, job) in jobs.into_iter().enumerate() { + // Unfortunately we need to convert the `GcCompactJob` back to `CompactionOptions` + // until we do further refactors to allow directly call `compact_with_gc`. + let mut flags: EnumSet = EnumSet::default(); + flags |= CompactFlags::EnhancedGcBottomMostCompaction; + if job.dry_run { + flags |= CompactFlags::DryRun; + } + let options = CompactOptions { + flags, + sub_compaction: false, + compact_key_range: Some(job.compact_key_range.into()), + compact_lsn_range: Some(job.compact_lsn_range.into()), + sub_compaction_max_job_size_mb: None, + }; tline_pending_tasks.push_back(if idx == jobs_len - 1 { ScheduledCompactionTask { - options: job, + options, // The last job in the queue sends the signal and releases the gc guard result_tx: next_scheduled_compaction_task .result_tx @@ -3042,7 +3064,7 @@ impl Tenant { } } else { ScheduledCompactionTask { - options: job, + options, result_tx: None, gc_block: None, } @@ -5742,6 +5764,8 @@ mod tests { #[cfg(feature = "testing")] use timeline::compaction::{KeyHistoryRetention, KeyLogAtLsn}; #[cfg(feature = "testing")] + use timeline::CompactLsnRange; + #[cfg(feature = "testing")] use timeline::GcInfo; static TEST_KEY: Lazy = @@ -9333,7 +9357,6 @@ mod tests { &cancel, CompactOptions { flags: dryrun_flags, - compact_range: None, ..Default::default() }, &ctx, @@ -9582,7 +9605,6 @@ mod tests { &cancel, CompactOptions { flags: dryrun_flags, - compact_range: None, ..Default::default() }, &ctx, @@ -9612,6 +9634,8 @@ mod tests { #[cfg(feature = "testing")] #[tokio::test] async fn test_simple_bottom_most_compaction_on_branch() -> anyhow::Result<()> { + use timeline::CompactLsnRange; + let harness = TenantHarness::create("test_simple_bottom_most_compaction_on_branch").await?; let (tenant, ctx) = harness.load().await; @@ -9804,6 +9828,22 @@ mod tests { verify_result().await; + // Piggyback a compaction with above_lsn. Ensure it works correctly when the specified LSN intersects with the layer files. + // Now we already have a single large delta layer, so the compaction min_layer_lsn should be the same as ancestor LSN (0x18). + branch_tline + .compact_with_gc( + &cancel, + CompactOptions { + compact_lsn_range: Some(CompactLsnRange::above(Lsn(0x40))), + ..Default::default() + }, + &ctx, + ) + .await + .unwrap(); + + verify_result().await; + Ok(()) } @@ -10092,7 +10132,7 @@ mod tests { &cancel, CompactOptions { flags: EnumSet::new(), - compact_range: Some((get_key(0)..get_key(2)).into()), + compact_key_range: Some((get_key(0)..get_key(2)).into()), ..Default::default() }, &ctx, @@ -10139,7 +10179,7 @@ mod tests { &cancel, CompactOptions { flags: EnumSet::new(), - compact_range: Some((get_key(2)..get_key(4)).into()), + compact_key_range: Some((get_key(2)..get_key(4)).into()), ..Default::default() }, &ctx, @@ -10191,7 +10231,7 @@ mod tests { &cancel, CompactOptions { flags: EnumSet::new(), - compact_range: Some((get_key(4)..get_key(9)).into()), + compact_key_range: Some((get_key(4)..get_key(9)).into()), ..Default::default() }, &ctx, @@ -10242,7 +10282,7 @@ mod tests { &cancel, CompactOptions { flags: EnumSet::new(), - compact_range: Some((get_key(9)..get_key(10)).into()), + compact_key_range: Some((get_key(9)..get_key(10)).into()), ..Default::default() }, &ctx, @@ -10298,7 +10338,7 @@ mod tests { &cancel, CompactOptions { flags: EnumSet::new(), - compact_range: Some((get_key(0)..get_key(10)).into()), + compact_key_range: Some((get_key(0)..get_key(10)).into()), ..Default::default() }, &ctx, @@ -10327,7 +10367,6 @@ mod tests { }, ], ); - Ok(()) } @@ -10380,4 +10419,602 @@ mod tests { Ok(()) } + + #[cfg(feature = "testing")] + #[tokio::test] + async fn test_simple_bottom_most_compaction_above_lsn() -> anyhow::Result<()> { + let harness = TenantHarness::create("test_simple_bottom_most_compaction_above_lsn").await?; + let (tenant, ctx) = harness.load().await; + + fn get_key(id: u32) -> Key { + // using aux key here b/c they are guaranteed to be inside `collect_keyspace`. + let mut key = Key::from_hex("620000000033333333444444445500000000").unwrap(); + key.field6 = id; + key + } + + let img_layer = (0..10) + .map(|id| (get_key(id), Bytes::from(format!("value {id}@0x10")))) + .collect_vec(); + + let delta1 = vec![( + get_key(1), + Lsn(0x20), + Value::WalRecord(NeonWalRecord::wal_append("@0x20")), + )]; + let delta4 = vec![( + get_key(1), + Lsn(0x28), + Value::WalRecord(NeonWalRecord::wal_append("@0x28")), + )]; + let delta2 = vec![ + ( + get_key(1), + Lsn(0x30), + Value::WalRecord(NeonWalRecord::wal_append("@0x30")), + ), + ( + get_key(1), + Lsn(0x38), + Value::WalRecord(NeonWalRecord::wal_append("@0x38")), + ), + ]; + let delta3 = vec![ + ( + get_key(8), + Lsn(0x48), + Value::WalRecord(NeonWalRecord::wal_append("@0x48")), + ), + ( + get_key(9), + Lsn(0x48), + Value::WalRecord(NeonWalRecord::wal_append("@0x48")), + ), + ]; + + let tline = tenant + .create_test_timeline_with_layers( + TIMELINE_ID, + Lsn(0x10), + DEFAULT_PG_VERSION, + &ctx, + vec![ + // delta1/2/4 only contain a single key but multiple updates + DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x20)..Lsn(0x28), delta1), + DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x30)..Lsn(0x50), delta2), + DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x28)..Lsn(0x30), delta4), + DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x30)..Lsn(0x50), delta3), + ], // delta layers + vec![(Lsn(0x10), img_layer)], // image layers + Lsn(0x50), + ) + .await?; + { + tline + .latest_gc_cutoff_lsn + .lock_for_write() + .store_and_unlock(Lsn(0x30)) + .wait() + .await; + // Update GC info + let mut guard = tline.gc_info.write().unwrap(); + *guard = GcInfo { + retain_lsns: vec![ + (Lsn(0x10), tline.timeline_id, MaybeOffloaded::No), + (Lsn(0x20), tline.timeline_id, MaybeOffloaded::No), + ], + cutoffs: GcCutoffs { + time: Lsn(0x30), + space: Lsn(0x30), + }, + leases: Default::default(), + within_ancestor_pitr: false, + }; + } + + let expected_result = [ + Bytes::from_static(b"value 0@0x10"), + Bytes::from_static(b"value 1@0x10@0x20@0x28@0x30@0x38"), + Bytes::from_static(b"value 2@0x10"), + Bytes::from_static(b"value 3@0x10"), + Bytes::from_static(b"value 4@0x10"), + Bytes::from_static(b"value 5@0x10"), + Bytes::from_static(b"value 6@0x10"), + Bytes::from_static(b"value 7@0x10"), + Bytes::from_static(b"value 8@0x10@0x48"), + Bytes::from_static(b"value 9@0x10@0x48"), + ]; + + let expected_result_at_gc_horizon = [ + Bytes::from_static(b"value 0@0x10"), + Bytes::from_static(b"value 1@0x10@0x20@0x28@0x30"), + Bytes::from_static(b"value 2@0x10"), + Bytes::from_static(b"value 3@0x10"), + Bytes::from_static(b"value 4@0x10"), + Bytes::from_static(b"value 5@0x10"), + Bytes::from_static(b"value 6@0x10"), + Bytes::from_static(b"value 7@0x10"), + Bytes::from_static(b"value 8@0x10"), + Bytes::from_static(b"value 9@0x10"), + ]; + + let expected_result_at_lsn_20 = [ + Bytes::from_static(b"value 0@0x10"), + Bytes::from_static(b"value 1@0x10@0x20"), + Bytes::from_static(b"value 2@0x10"), + Bytes::from_static(b"value 3@0x10"), + Bytes::from_static(b"value 4@0x10"), + Bytes::from_static(b"value 5@0x10"), + Bytes::from_static(b"value 6@0x10"), + Bytes::from_static(b"value 7@0x10"), + Bytes::from_static(b"value 8@0x10"), + Bytes::from_static(b"value 9@0x10"), + ]; + + let expected_result_at_lsn_10 = [ + Bytes::from_static(b"value 0@0x10"), + Bytes::from_static(b"value 1@0x10"), + Bytes::from_static(b"value 2@0x10"), + Bytes::from_static(b"value 3@0x10"), + Bytes::from_static(b"value 4@0x10"), + Bytes::from_static(b"value 5@0x10"), + Bytes::from_static(b"value 6@0x10"), + Bytes::from_static(b"value 7@0x10"), + Bytes::from_static(b"value 8@0x10"), + Bytes::from_static(b"value 9@0x10"), + ]; + + let verify_result = || async { + let gc_horizon = { + let gc_info = tline.gc_info.read().unwrap(); + gc_info.cutoffs.time + }; + for idx in 0..10 { + assert_eq!( + tline + .get(get_key(idx as u32), Lsn(0x50), &ctx) + .await + .unwrap(), + &expected_result[idx] + ); + assert_eq!( + tline + .get(get_key(idx as u32), gc_horizon, &ctx) + .await + .unwrap(), + &expected_result_at_gc_horizon[idx] + ); + assert_eq!( + tline + .get(get_key(idx as u32), Lsn(0x20), &ctx) + .await + .unwrap(), + &expected_result_at_lsn_20[idx] + ); + assert_eq!( + tline + .get(get_key(idx as u32), Lsn(0x10), &ctx) + .await + .unwrap(), + &expected_result_at_lsn_10[idx] + ); + } + }; + + verify_result().await; + + let cancel = CancellationToken::new(); + tline + .compact_with_gc( + &cancel, + CompactOptions { + compact_lsn_range: Some(CompactLsnRange::above(Lsn(0x28))), + ..Default::default() + }, + &ctx, + ) + .await + .unwrap(); + verify_result().await; + + let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await; + check_layer_map_key_eq( + all_layers, + vec![ + // The original image layer, not compacted + PersistentLayerKey { + key_range: get_key(0)..get_key(10), + lsn_range: Lsn(0x10)..Lsn(0x11), + is_delta: false, + }, + // Delta layer below the specified above_lsn not compacted + PersistentLayerKey { + key_range: get_key(1)..get_key(2), + lsn_range: Lsn(0x20)..Lsn(0x28), + is_delta: true, + }, + // Delta layer compacted above the LSN + PersistentLayerKey { + key_range: get_key(1)..get_key(10), + lsn_range: Lsn(0x28)..Lsn(0x50), + is_delta: true, + }, + ], + ); + + // compact again + tline + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) + .await + .unwrap(); + verify_result().await; + + let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await; + check_layer_map_key_eq( + all_layers, + vec![ + // The compacted image layer (full key range) + PersistentLayerKey { + key_range: Key::MIN..Key::MAX, + lsn_range: Lsn(0x10)..Lsn(0x11), + is_delta: false, + }, + // All other data in the delta layer + PersistentLayerKey { + key_range: get_key(1)..get_key(10), + lsn_range: Lsn(0x10)..Lsn(0x50), + is_delta: true, + }, + ], + ); + + Ok(()) + } + + #[cfg(feature = "testing")] + #[tokio::test] + async fn test_simple_bottom_most_compaction_rectangle() -> anyhow::Result<()> { + let harness = TenantHarness::create("test_simple_bottom_most_compaction_rectangle").await?; + let (tenant, ctx) = harness.load().await; + + fn get_key(id: u32) -> Key { + // using aux key here b/c they are guaranteed to be inside `collect_keyspace`. + let mut key = Key::from_hex("620000000033333333444444445500000000").unwrap(); + key.field6 = id; + key + } + + let img_layer = (0..10) + .map(|id| (get_key(id), Bytes::from(format!("value {id}@0x10")))) + .collect_vec(); + + let delta1 = vec![( + get_key(1), + Lsn(0x20), + Value::WalRecord(NeonWalRecord::wal_append("@0x20")), + )]; + let delta4 = vec![( + get_key(1), + Lsn(0x28), + Value::WalRecord(NeonWalRecord::wal_append("@0x28")), + )]; + let delta2 = vec![ + ( + get_key(1), + Lsn(0x30), + Value::WalRecord(NeonWalRecord::wal_append("@0x30")), + ), + ( + get_key(1), + Lsn(0x38), + Value::WalRecord(NeonWalRecord::wal_append("@0x38")), + ), + ]; + let delta3 = vec![ + ( + get_key(8), + Lsn(0x48), + Value::WalRecord(NeonWalRecord::wal_append("@0x48")), + ), + ( + get_key(9), + Lsn(0x48), + Value::WalRecord(NeonWalRecord::wal_append("@0x48")), + ), + ]; + + let tline = tenant + .create_test_timeline_with_layers( + TIMELINE_ID, + Lsn(0x10), + DEFAULT_PG_VERSION, + &ctx, + vec![ + // delta1/2/4 only contain a single key but multiple updates + DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x20)..Lsn(0x28), delta1), + DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x30)..Lsn(0x50), delta2), + DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x28)..Lsn(0x30), delta4), + DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x30)..Lsn(0x50), delta3), + ], // delta layers + vec![(Lsn(0x10), img_layer)], // image layers + Lsn(0x50), + ) + .await?; + { + tline + .latest_gc_cutoff_lsn + .lock_for_write() + .store_and_unlock(Lsn(0x30)) + .wait() + .await; + // Update GC info + let mut guard = tline.gc_info.write().unwrap(); + *guard = GcInfo { + retain_lsns: vec![ + (Lsn(0x10), tline.timeline_id, MaybeOffloaded::No), + (Lsn(0x20), tline.timeline_id, MaybeOffloaded::No), + ], + cutoffs: GcCutoffs { + time: Lsn(0x30), + space: Lsn(0x30), + }, + leases: Default::default(), + within_ancestor_pitr: false, + }; + } + + let expected_result = [ + Bytes::from_static(b"value 0@0x10"), + Bytes::from_static(b"value 1@0x10@0x20@0x28@0x30@0x38"), + Bytes::from_static(b"value 2@0x10"), + Bytes::from_static(b"value 3@0x10"), + Bytes::from_static(b"value 4@0x10"), + Bytes::from_static(b"value 5@0x10"), + Bytes::from_static(b"value 6@0x10"), + Bytes::from_static(b"value 7@0x10"), + Bytes::from_static(b"value 8@0x10@0x48"), + Bytes::from_static(b"value 9@0x10@0x48"), + ]; + + let expected_result_at_gc_horizon = [ + Bytes::from_static(b"value 0@0x10"), + Bytes::from_static(b"value 1@0x10@0x20@0x28@0x30"), + Bytes::from_static(b"value 2@0x10"), + Bytes::from_static(b"value 3@0x10"), + Bytes::from_static(b"value 4@0x10"), + Bytes::from_static(b"value 5@0x10"), + Bytes::from_static(b"value 6@0x10"), + Bytes::from_static(b"value 7@0x10"), + Bytes::from_static(b"value 8@0x10"), + Bytes::from_static(b"value 9@0x10"), + ]; + + let expected_result_at_lsn_20 = [ + Bytes::from_static(b"value 0@0x10"), + Bytes::from_static(b"value 1@0x10@0x20"), + Bytes::from_static(b"value 2@0x10"), + Bytes::from_static(b"value 3@0x10"), + Bytes::from_static(b"value 4@0x10"), + Bytes::from_static(b"value 5@0x10"), + Bytes::from_static(b"value 6@0x10"), + Bytes::from_static(b"value 7@0x10"), + Bytes::from_static(b"value 8@0x10"), + Bytes::from_static(b"value 9@0x10"), + ]; + + let expected_result_at_lsn_10 = [ + Bytes::from_static(b"value 0@0x10"), + Bytes::from_static(b"value 1@0x10"), + Bytes::from_static(b"value 2@0x10"), + Bytes::from_static(b"value 3@0x10"), + Bytes::from_static(b"value 4@0x10"), + Bytes::from_static(b"value 5@0x10"), + Bytes::from_static(b"value 6@0x10"), + Bytes::from_static(b"value 7@0x10"), + Bytes::from_static(b"value 8@0x10"), + Bytes::from_static(b"value 9@0x10"), + ]; + + let verify_result = || async { + let gc_horizon = { + let gc_info = tline.gc_info.read().unwrap(); + gc_info.cutoffs.time + }; + for idx in 0..10 { + assert_eq!( + tline + .get(get_key(idx as u32), Lsn(0x50), &ctx) + .await + .unwrap(), + &expected_result[idx] + ); + assert_eq!( + tline + .get(get_key(idx as u32), gc_horizon, &ctx) + .await + .unwrap(), + &expected_result_at_gc_horizon[idx] + ); + assert_eq!( + tline + .get(get_key(idx as u32), Lsn(0x20), &ctx) + .await + .unwrap(), + &expected_result_at_lsn_20[idx] + ); + assert_eq!( + tline + .get(get_key(idx as u32), Lsn(0x10), &ctx) + .await + .unwrap(), + &expected_result_at_lsn_10[idx] + ); + } + }; + + verify_result().await; + + let cancel = CancellationToken::new(); + + tline + .compact_with_gc( + &cancel, + CompactOptions { + compact_key_range: Some((get_key(0)..get_key(2)).into()), + compact_lsn_range: Some((Lsn(0x20)..Lsn(0x28)).into()), + ..Default::default() + }, + &ctx, + ) + .await + .unwrap(); + verify_result().await; + + let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await; + check_layer_map_key_eq( + all_layers, + vec![ + // The original image layer, not compacted + PersistentLayerKey { + key_range: get_key(0)..get_key(10), + lsn_range: Lsn(0x10)..Lsn(0x11), + is_delta: false, + }, + // According the selection logic, we select all layers with start key <= 0x28, so we would merge the layer 0x20-0x28 and + // the layer 0x28-0x30 into one. + PersistentLayerKey { + key_range: get_key(1)..get_key(2), + lsn_range: Lsn(0x20)..Lsn(0x30), + is_delta: true, + }, + // Above the upper bound and untouched + PersistentLayerKey { + key_range: get_key(1)..get_key(2), + lsn_range: Lsn(0x30)..Lsn(0x50), + is_delta: true, + }, + // This layer is untouched + PersistentLayerKey { + key_range: get_key(8)..get_key(10), + lsn_range: Lsn(0x30)..Lsn(0x50), + is_delta: true, + }, + ], + ); + + tline + .compact_with_gc( + &cancel, + CompactOptions { + compact_key_range: Some((get_key(3)..get_key(8)).into()), + compact_lsn_range: Some((Lsn(0x28)..Lsn(0x40)).into()), + ..Default::default() + }, + &ctx, + ) + .await + .unwrap(); + verify_result().await; + + let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await; + check_layer_map_key_eq( + all_layers, + vec![ + // The original image layer, not compacted + PersistentLayerKey { + key_range: get_key(0)..get_key(10), + lsn_range: Lsn(0x10)..Lsn(0x11), + is_delta: false, + }, + // Not in the compaction key range, uncompacted + PersistentLayerKey { + key_range: get_key(1)..get_key(2), + lsn_range: Lsn(0x20)..Lsn(0x30), + is_delta: true, + }, + // Not in the compaction key range, uncompacted but need rewrite because the delta layer overlaps with the range + PersistentLayerKey { + key_range: get_key(1)..get_key(2), + lsn_range: Lsn(0x30)..Lsn(0x50), + is_delta: true, + }, + // Note that when we specify the LSN upper bound to be 0x40, the compaction algorithm will not try to cut the layer + // horizontally in half. Instead, it will include all LSNs that overlap with 0x40. So the real max_lsn of the compaction + // becomes 0x50. + PersistentLayerKey { + key_range: get_key(8)..get_key(10), + lsn_range: Lsn(0x30)..Lsn(0x50), + is_delta: true, + }, + ], + ); + + // compact again + tline + .compact_with_gc( + &cancel, + CompactOptions { + compact_key_range: Some((get_key(0)..get_key(5)).into()), + compact_lsn_range: Some((Lsn(0x20)..Lsn(0x50)).into()), + ..Default::default() + }, + &ctx, + ) + .await + .unwrap(); + verify_result().await; + + let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await; + check_layer_map_key_eq( + all_layers, + vec![ + // The original image layer, not compacted + PersistentLayerKey { + key_range: get_key(0)..get_key(10), + lsn_range: Lsn(0x10)..Lsn(0x11), + is_delta: false, + }, + // The range gets compacted + PersistentLayerKey { + key_range: get_key(1)..get_key(2), + lsn_range: Lsn(0x20)..Lsn(0x50), + is_delta: true, + }, + // Not touched during this iteration of compaction + PersistentLayerKey { + key_range: get_key(8)..get_key(10), + lsn_range: Lsn(0x30)..Lsn(0x50), + is_delta: true, + }, + ], + ); + + // final full compaction + tline + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) + .await + .unwrap(); + verify_result().await; + + let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await; + check_layer_map_key_eq( + all_layers, + vec![ + // The compacted image layer (full key range) + PersistentLayerKey { + key_range: Key::MIN..Key::MAX, + lsn_range: Lsn(0x10)..Lsn(0x11), + is_delta: false, + }, + // All other data in the delta layer + PersistentLayerKey { + key_range: get_key(1)..get_key(10), + lsn_range: Lsn(0x10)..Lsn(0x50), + is_delta: true, + }, + ], + ); + + Ok(()) + } } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 8f1d5f6577..b5c7079226 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -780,46 +780,90 @@ pub(crate) enum CompactFlags { #[serde_with::serde_as] #[derive(Debug, Clone, serde::Deserialize)] pub(crate) struct CompactRequest { - pub compact_range: Option, - pub compact_below_lsn: Option, + pub compact_key_range: Option, + pub compact_lsn_range: Option, /// Whether the compaction job should be scheduled. #[serde(default)] pub scheduled: bool, /// Whether the compaction job should be split across key ranges. #[serde(default)] pub sub_compaction: bool, + /// Max job size for each subcompaction job. + pub sub_compaction_max_job_size_mb: Option, } #[serde_with::serde_as] #[derive(Debug, Clone, serde::Deserialize)] -pub(crate) struct CompactRange { +pub(crate) struct CompactLsnRange { + pub start: Lsn, + pub end: Lsn, +} + +#[serde_with::serde_as] +#[derive(Debug, Clone, serde::Deserialize)] +pub(crate) struct CompactKeyRange { #[serde_as(as = "serde_with::DisplayFromStr")] pub start: Key, #[serde_as(as = "serde_with::DisplayFromStr")] pub end: Key, } -impl From> for CompactRange { - fn from(range: Range) -> Self { - CompactRange { +impl From> for CompactLsnRange { + fn from(range: Range) -> Self { + Self { start: range.start, end: range.end, } } } +impl From> for CompactKeyRange { + fn from(range: Range) -> Self { + Self { + start: range.start, + end: range.end, + } + } +} + +impl From for Range { + fn from(range: CompactLsnRange) -> Self { + range.start..range.end + } +} + +impl From for Range { + fn from(range: CompactKeyRange) -> Self { + range.start..range.end + } +} + +impl CompactLsnRange { + #[cfg(test)] + #[cfg(feature = "testing")] + pub fn above(lsn: Lsn) -> Self { + Self { + start: lsn, + end: Lsn::MAX, + } + } +} + #[derive(Debug, Clone, Default)] pub(crate) struct CompactOptions { pub flags: EnumSet, /// If set, the compaction will only compact the key range specified by this option. - /// This option is only used by GC compaction. - pub compact_range: Option, - /// If set, the compaction will only compact the LSN below this value. - /// This option is only used by GC compaction. - pub compact_below_lsn: Option, + /// This option is only used by GC compaction. For the full explanation, see [`compaction::GcCompactJob`]. + pub compact_key_range: Option, + /// If set, the compaction will only compact the LSN within this value. + /// This option is only used by GC compaction. For the full explanation, see [`compaction::GcCompactJob`]. + pub compact_lsn_range: Option, /// Enable sub-compaction (split compaction job across key ranges). /// This option is only used by GC compaction. pub sub_compaction: bool, + /// Set job size for the GC compaction. + /// This option is only used by GC compaction. + pub sub_compaction_max_job_size_mb: Option, } impl std::fmt::Debug for Timeline { @@ -1641,9 +1685,10 @@ impl Timeline { cancel, CompactOptions { flags, - compact_range: None, - compact_below_lsn: None, + compact_key_range: None, + compact_lsn_range: None, sub_compaction: false, + sub_compaction_max_job_size_mb: None, }, ctx, ) diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index fa924d23b0..701247194b 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -10,8 +10,8 @@ use std::sync::Arc; use super::layer_manager::LayerManager; use super::{ - CompactFlags, CompactOptions, CompactRange, CreateImageLayersError, DurationRecorder, - ImageLayerCreationMode, RecordedDuration, Timeline, + CompactFlags, CompactOptions, CreateImageLayersError, DurationRecorder, ImageLayerCreationMode, + RecordedDuration, Timeline, }; use anyhow::{anyhow, bail, Context}; @@ -64,6 +64,9 @@ const COMPACTION_DELTA_THRESHOLD: usize = 5; /// A scheduled compaction task. pub(crate) struct ScheduledCompactionTask { + /// It's unfortunate that we need to store a compact options struct here because the only outer + /// API we can call here is `compact_with_options` which does a few setup calls before starting the + /// actual compaction job... We should refactor this to store `GcCompactionJob` in the future. pub options: CompactOptions, /// The channel to send the compaction result. If this is a subcompaction, the last compaction job holds the sender. pub result_tx: Option>, @@ -71,16 +74,57 @@ pub(crate) struct ScheduledCompactionTask { pub gc_block: Option, } +/// A job description for the gc-compaction job. This structure describes the rectangle range that the job will +/// process. The exact layers that need to be compacted/rewritten will be generated when `compact_with_gc` gets +/// called. +#[derive(Debug, Clone)] +pub(crate) struct GcCompactJob { + pub dry_run: bool, + /// The key range to be compacted. The compaction algorithm will only regenerate key-value pairs within this range + /// [left inclusive, right exclusive), and other pairs will be rewritten into new files if necessary. + pub compact_key_range: Range, + /// The LSN range to be compacted. The compaction algorithm will use this range to determine the layers to be + /// selected for the compaction, and it does not guarantee the generated layers will have exactly the same LSN range + /// as specified here. The true range being compacted is `min_lsn/max_lsn` in [`GcCompactionJobDescription`]. + /// min_lsn will always <= the lower bound specified here, and max_lsn will always >= the upper bound specified here. + pub compact_lsn_range: Range, +} + +impl GcCompactJob { + pub fn from_compact_options(options: CompactOptions) -> Self { + GcCompactJob { + dry_run: options.flags.contains(CompactFlags::DryRun), + compact_key_range: options + .compact_key_range + .map(|x| x.into()) + .unwrap_or(Key::MIN..Key::MAX), + compact_lsn_range: options + .compact_lsn_range + .map(|x| x.into()) + .unwrap_or(Lsn::INVALID..Lsn::MAX), + } + } +} + +/// A job description for the gc-compaction job. This structure is generated when `compact_with_gc` is called +/// and contains the exact layers we want to compact. pub struct GcCompactionJobDescription { /// All layers to read in the compaction job selected_layers: Vec, - /// GC cutoff of the job + /// GC cutoff of the job. This is the lowest LSN that will be accessed by the read/GC path and we need to + /// keep all deltas <= this LSN or generate an image == this LSN. gc_cutoff: Lsn, - /// LSNs to retain for the job + /// LSNs to retain for the job. Read path will use this LSN so we need to keep deltas <= this LSN or + /// generate an image == this LSN. retain_lsns_below_horizon: Vec, - /// Maximum layer LSN processed in this compaction + /// Maximum layer LSN processed in this compaction, that is max(end_lsn of layers). Exclusive. All data + /// \>= this LSN will be kept and will not be rewritten. max_layer_lsn: Lsn, - /// Only compact layers overlapping with this range + /// Minimum layer LSN processed in this compaction, that is min(start_lsn of layers). Inclusive. + /// All access below (strict lower than `<`) this LSN will be routed through the normal read path instead of + /// k-merge within gc-compaction. + min_layer_lsn: Lsn, + /// Only compact layers overlapping with this range. compaction_key_range: Range, /// When partial compaction is enabled, these layers need to be rewritten to ensure no overlap. /// This field is here solely for debugging. The field will not be read once the compaction @@ -299,7 +343,7 @@ impl Timeline { ))); } - if options.compact_range.is_some() { + if options.compact_key_range.is_some() || options.compact_lsn_range.is_some() { // maybe useful in the future? could implement this at some point return Err(CompactionError::Other(anyhow!( "compaction range is not supported for legacy compaction for now" @@ -1754,25 +1798,26 @@ impl Timeline { Ok(()) } - /// Split a gc-compaction job into multiple compaction jobs. Optimally, this function should return a vector of - /// `GcCompactionJobDesc`. But we want to keep it simple on the tenant scheduling side without exposing too much - /// ad-hoc information about gc compaction itself. + /// Split a gc-compaction job into multiple compaction jobs. The split is based on the key range and the estimated size of the compaction job. + /// The function returns a list of compaction jobs that can be executed separately. If the upper bound of the compact LSN + /// range is not specified, we will use the latest gc_cutoff as the upper bound, so that all jobs in the jobset acts + /// like a full compaction of the specified keyspace. pub(crate) async fn gc_compaction_split_jobs( self: &Arc, - options: CompactOptions, - ) -> anyhow::Result> { - if !options.sub_compaction { - return Ok(vec![options]); - } - let compact_range = options.compact_range.clone().unwrap_or(CompactRange { - start: Key::MIN, - end: Key::MAX, - }); - let compact_below_lsn = if let Some(compact_below_lsn) = options.compact_below_lsn { - compact_below_lsn + job: GcCompactJob, + sub_compaction_max_job_size_mb: Option, + ) -> anyhow::Result> { + let compact_below_lsn = if job.compact_lsn_range.end != Lsn::MAX { + job.compact_lsn_range.end } else { *self.get_latest_gc_cutoff_lsn() // use the real gc cutoff }; + + // Split compaction job to about 4GB each + const GC_COMPACT_MAX_SIZE_MB: u64 = 4 * 1024; + let sub_compaction_max_job_size_mb = + sub_compaction_max_job_size_mb.unwrap_or(GC_COMPACT_MAX_SIZE_MB); + let mut compact_jobs = Vec::new(); // For now, we simply use the key partitioning information; we should do a more fine-grained partitioning // by estimating the amount of files read for a compaction job. We should also partition on LSN. @@ -1808,8 +1853,8 @@ impl Timeline { let Some((start, end)) = truncate_to( &range.start, &range.end, - &compact_range.start, - &compact_range.end, + &job.compact_key_range.start, + &job.compact_key_range.end, ) else { continue; }; @@ -1819,8 +1864,6 @@ impl Timeline { let guard = self.layers.read().await; let layer_map = guard.layer_map()?; let mut current_start = None; - // Split compaction job to about 2GB each - const GC_COMPACT_MAX_SIZE_MB: u64 = 4 * 1024; // 4GB, TODO: should be configuration in the future let ranges_num = split_key_ranges.len(); for (idx, (start, end)) in split_key_ranges.into_iter().enumerate() { if current_start.is_none() { @@ -1833,8 +1876,7 @@ impl Timeline { } let res = layer_map.range_search(start..end, compact_below_lsn); let total_size = res.found.keys().map(|x| x.layer.file_size()).sum::(); - if total_size > GC_COMPACT_MAX_SIZE_MB * 1024 * 1024 || ranges_num == idx + 1 { - let mut compact_options = options.clone(); + if total_size > sub_compaction_max_job_size_mb * 1024 * 1024 || ranges_num == idx + 1 { // Try to extend the compaction range so that we include at least one full layer file. let extended_end = res .found @@ -1852,10 +1894,11 @@ impl Timeline { "splitting compaction job: {}..{}, estimated_size={}", start, end, total_size ); - compact_options.compact_range = Some(CompactRange { start, end }); - compact_options.compact_below_lsn = Some(compact_below_lsn); - compact_options.sub_compaction = false; - compact_jobs.push(compact_options); + compact_jobs.push(GcCompactJob { + dry_run: job.dry_run, + compact_key_range: start..end, + compact_lsn_range: job.compact_lsn_range.start..compact_below_lsn, + }); current_start = Some(end); } } @@ -1877,7 +1920,7 @@ impl Timeline { /// Key::MIN..Key..MAX to the function indicates a full compaction, though technically, `Key::MAX` is not /// part of the range. /// - /// If `options.compact_below_lsn` is provided, the compaction will only compact layers below or intersect with + /// If `options.compact_lsn_range.end` is provided, the compaction will only compact layers below or intersect with /// the LSN. Otherwise, it will use the gc cutoff by default. pub(crate) async fn compact_with_gc( self: &Arc, @@ -1885,9 +1928,13 @@ impl Timeline { options: CompactOptions, ctx: &RequestContext, ) -> anyhow::Result<()> { - if options.sub_compaction { + let sub_compaction = options.sub_compaction; + let job = GcCompactJob::from_compact_options(options.clone()); + if sub_compaction { info!("running enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs"); - let jobs = self.gc_compaction_split_jobs(options).await?; + let jobs = self + .gc_compaction_split_jobs(job, options.sub_compaction_max_job_size_mb) + .await?; let jobs_len = jobs.len(); for (idx, job) in jobs.into_iter().enumerate() { info!( @@ -1902,19 +1949,15 @@ impl Timeline { } return Ok(()); } - self.compact_with_gc_inner(cancel, options, ctx).await + self.compact_with_gc_inner(cancel, job, ctx).await } async fn compact_with_gc_inner( self: &Arc, cancel: &CancellationToken, - options: CompactOptions, + job: GcCompactJob, ctx: &RequestContext, ) -> anyhow::Result<()> { - assert!( - !options.sub_compaction, - "sub-compaction should be handled by the outer function" - ); // Block other compaction/GC tasks from running for now. GC-compaction could run along // with legacy compaction tasks in the future. Always ensure the lock order is compaction -> gc. // Note that we already acquired the compaction lock when the outer `compact` function gets called. @@ -1934,19 +1977,11 @@ impl Timeline { ) .await?; - let flags = options.flags; - let compaction_key_range = options - .compact_range - .map(|range| range.start..range.end) - .unwrap_or_else(|| Key::MIN..Key::MAX); + let dry_run = job.dry_run; + let compact_key_range = job.compact_key_range; + let compact_lsn_range = job.compact_lsn_range; - let dry_run = flags.contains(CompactFlags::DryRun); - - if compaction_key_range == (Key::MIN..Key::MAX) { - info!("running enhanced gc bottom-most compaction, dry_run={dry_run}, compaction_key_range={}..{}", compaction_key_range.start, compaction_key_range.end); - } else { - info!("running enhanced gc bottom-most compaction, dry_run={dry_run}"); - } + info!("running enhanced gc bottom-most compaction, dry_run={dry_run}, compact_key_range={}..{}, compact_lsn_range={}..{}", compact_key_range.start, compact_key_range.end, compact_lsn_range.start, compact_lsn_range.end); scopeguard::defer! { info!("done enhanced gc bottom-most compaction"); @@ -1970,11 +2005,15 @@ impl Timeline { // to get the truth data. let real_gc_cutoff = *self.get_latest_gc_cutoff_lsn(); // The compaction algorithm will keep all keys above the gc_cutoff while keeping only necessary keys below the gc_cutoff for - // each of the retain_lsn. Therefore, if the user-provided `compact_below_lsn` is larger than the real gc cutoff, we will use + // each of the retain_lsn. Therefore, if the user-provided `compact_lsn_range.end` is larger than the real gc cutoff, we will use // the real cutoff. - let mut gc_cutoff = options.compact_below_lsn.unwrap_or(real_gc_cutoff); + let mut gc_cutoff = if compact_lsn_range.end == Lsn::MAX { + real_gc_cutoff + } else { + compact_lsn_range.end + }; if gc_cutoff > real_gc_cutoff { - warn!("provided compact_below_lsn={} is larger than the real_gc_cutoff={}, using the real gc cutoff", gc_cutoff, real_gc_cutoff); + warn!("provided compact_lsn_range.end={} is larger than the real_gc_cutoff={}, using the real gc cutoff", gc_cutoff, real_gc_cutoff); gc_cutoff = real_gc_cutoff; } gc_cutoff @@ -1991,7 +2030,7 @@ impl Timeline { } let mut selected_layers: Vec = Vec::new(); drop(gc_info); - // Pick all the layers intersect or below the gc_cutoff, get the largest LSN in the selected layers. + // Firstly, pick all the layers intersect or below the gc_cutoff, get the largest LSN in the selected layers. let Some(max_layer_lsn) = layers .iter_historic_layers() .filter(|desc| desc.get_lsn_range().start <= gc_cutoff) @@ -2001,27 +2040,45 @@ impl Timeline { info!("no layers to compact with gc: no historic layers below gc_cutoff, gc_cutoff={}", gc_cutoff); return Ok(()); }; + // Next, if the user specifies compact_lsn_range.start, we need to filter some layers out. All the layers (strictly) below + // the min_layer_lsn computed as below will be filtered out and the data will be accessed using the normal read path, as if + // it is a branch. + let Some(min_layer_lsn) = layers + .iter_historic_layers() + .filter(|desc| { + if compact_lsn_range.start == Lsn::INVALID { + true // select all layers below if start == Lsn(0) + } else { + desc.get_lsn_range().end > compact_lsn_range.start // strictly larger than compact_above_lsn + } + }) + .map(|desc| desc.get_lsn_range().start) + .min() + else { + info!("no layers to compact with gc: no historic layers above compact_above_lsn, compact_above_lsn={}", compact_lsn_range.end); + return Ok(()); + }; // Then, pick all the layers that are below the max_layer_lsn. This is to ensure we can pick all single-key // layers to compact. let mut rewrite_layers = Vec::new(); for desc in layers.iter_historic_layers() { if desc.get_lsn_range().end <= max_layer_lsn - && overlaps_with(&desc.get_key_range(), &compaction_key_range) + && desc.get_lsn_range().start >= min_layer_lsn + && overlaps_with(&desc.get_key_range(), &compact_key_range) { // If the layer overlaps with the compaction key range, we need to read it to obtain all keys within the range, // even if it might contain extra keys selected_layers.push(guard.get_from_desc(&desc)); // If the layer is not fully contained within the key range, we need to rewrite it if it's a delta layer (it's fine // to overlap image layers) - if desc.is_delta() - && !fully_contains(&compaction_key_range, &desc.get_key_range()) + if desc.is_delta() && !fully_contains(&compact_key_range, &desc.get_key_range()) { rewrite_layers.push(desc); } } } if selected_layers.is_empty() { - info!("no layers to compact with gc: no layers within the key range, gc_cutoff={}, key_range={}..{}", gc_cutoff, compaction_key_range.start, compaction_key_range.end); + info!("no layers to compact with gc: no layers within the key range, gc_cutoff={}, key_range={}..{}", gc_cutoff, compact_key_range.start, compact_key_range.end); return Ok(()); } retain_lsns_below_horizon.sort(); @@ -2029,13 +2086,20 @@ impl Timeline { selected_layers, gc_cutoff, retain_lsns_below_horizon, + min_layer_lsn, max_layer_lsn, - compaction_key_range, + compaction_key_range: compact_key_range, rewrite_layers, } }; - let lowest_retain_lsn = if self.ancestor_timeline.is_some() { - Lsn(self.ancestor_lsn.0 + 1) + let (has_data_below, lowest_retain_lsn) = if compact_lsn_range.start != Lsn::INVALID { + // If we only compact above some LSN, we should get the history from the current branch below the specified LSN. + // We use job_desc.min_layer_lsn as if it's the lowest branch point. + (true, job_desc.min_layer_lsn) + } else if self.ancestor_timeline.is_some() { + // In theory, we can also use min_layer_lsn here, but using ancestor LSN makes sure the delta layers cover the + // LSN ranges all the way to the ancestor timeline. + (true, self.ancestor_lsn) } else { let res = job_desc .retain_lsns_below_horizon @@ -2053,17 +2117,19 @@ impl Timeline { .unwrap_or(job_desc.gc_cutoff) ); } - res + (false, res) }; info!( - "picked {} layers for compaction ({} layers need rewriting) with max_layer_lsn={} gc_cutoff={} lowest_retain_lsn={}, key_range={}..{}", + "picked {} layers for compaction ({} layers need rewriting) with max_layer_lsn={} min_layer_lsn={} gc_cutoff={} lowest_retain_lsn={}, key_range={}..{}, has_data_below={}", job_desc.selected_layers.len(), job_desc.rewrite_layers.len(), job_desc.max_layer_lsn, + job_desc.min_layer_lsn, job_desc.gc_cutoff, lowest_retain_lsn, job_desc.compaction_key_range.start, - job_desc.compaction_key_range.end + job_desc.compaction_key_range.end, + has_data_below, ); for layer in &job_desc.selected_layers { @@ -2107,10 +2173,22 @@ impl Timeline { let mut delta_layers = Vec::new(); let mut image_layers = Vec::new(); let mut downloaded_layers = Vec::new(); + let mut total_downloaded_size = 0; + let mut total_layer_size = 0; for layer in &job_desc.selected_layers { + if layer.needs_download().await?.is_some() { + total_downloaded_size += layer.layer_desc().file_size; + } + total_layer_size += layer.layer_desc().file_size; let resident_layer = layer.download_and_keep_resident().await?; downloaded_layers.push(resident_layer); } + info!( + "finish downloading layers, downloaded={}, total={}, ratio={:.2}", + total_downloaded_size, + total_layer_size, + total_downloaded_size as f64 / total_layer_size as f64 + ); for resident_layer in &downloaded_layers { if resident_layer.layer_desc().is_delta() { let layer = resident_layer.get_as_delta(ctx).await?; @@ -2133,7 +2211,7 @@ impl Timeline { // Only create image layers when there is no ancestor branches. TODO: create covering image layer // when some condition meet. - let mut image_layer_writer = if self.ancestor_timeline.is_none() { + let mut image_layer_writer = if !has_data_below { Some( SplitImageLayerWriter::new( self.conf, @@ -2166,7 +2244,11 @@ impl Timeline { } let mut delta_layer_rewriters = HashMap::, RewritingLayers>::new(); - /// Returns None if there is no ancestor branch. Throw an error when the key is not found. + /// When compacting not at a bottom range (=`[0,X)`) of the root branch, we "have data below" (`has_data_below=true`). + /// The two cases are compaction in ancestor branches and when `compact_lsn_range.start` is set. + /// In those cases, we need to pull up data from below the LSN range we're compaction. + /// + /// This function unifies the cases so that later code doesn't have to think about it. /// /// Currently, we always get the ancestor image for each key in the child branch no matter whether the image /// is needed for reconstruction. This should be fixed in the future. @@ -2174,17 +2256,19 @@ impl Timeline { /// Furthermore, we should do vectored get instead of a single get, or better, use k-merge for ancestor /// images. async fn get_ancestor_image( - tline: &Arc, + this_tline: &Arc, key: Key, ctx: &RequestContext, + has_data_below: bool, + history_lsn_point: Lsn, ) -> anyhow::Result> { - if tline.ancestor_timeline.is_none() { + if !has_data_below { return Ok(None); }; // This function is implemented as a get of the current timeline at ancestor LSN, therefore reusing // as much existing code as possible. - let img = tline.get(key, tline.ancestor_lsn, ctx).await?; - Ok(Some((key, tline.ancestor_lsn, img))) + let img = this_tline.get(key, history_lsn_point, ctx).await?; + Ok(Some((key, history_lsn_point, img))) } // Actually, we can decide not to write to the image layer at all at this point because @@ -2268,7 +2352,8 @@ impl Timeline { job_desc.gc_cutoff, &job_desc.retain_lsns_below_horizon, COMPACTION_DELTA_THRESHOLD, - get_ancestor_image(self, *last_key, ctx).await?, + get_ancestor_image(self, *last_key, ctx, has_data_below, lowest_retain_lsn) + .await?, ) .await?; retention @@ -2297,7 +2382,7 @@ impl Timeline { job_desc.gc_cutoff, &job_desc.retain_lsns_below_horizon, COMPACTION_DELTA_THRESHOLD, - get_ancestor_image(self, last_key, ctx).await?, + get_ancestor_image(self, last_key, ctx, has_data_below, lowest_retain_lsn).await?, ) .await?; retention diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py index 810a9723e0..88873c63c2 100644 --- a/test_runner/regress/test_compaction.py +++ b/test_runner/regress/test_compaction.py @@ -153,6 +153,7 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder): if i % 10 == 0: log.info(f"Running churn round {i}/{churn_rounds} ...") + if (i - 1) % 10 == 0: # Run gc-compaction every 10 rounds to ensure the test doesn't take too long time. ps_http.timeline_compact( tenant_id, @@ -161,10 +162,11 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder): body={ "scheduled": True, "sub_compaction": True, - "compact_range": { + "compact_key_range": { "start": "000000000000000000000000000000000000", "end": "030000000000000000000000000000000000", }, + "sub_compaction_max_job_size_mb": 16, }, ) From 2f3f98a3190a787b8420b8e969a7c5f6930de0b7 Mon Sep 17 00:00:00 2001 From: Peter Bendel Date: Thu, 12 Dec 2024 21:25:39 +0100 Subject: [PATCH 12/49] use OIDC role instead of AWS access keys for managing test runner (#10117) in periodic pagebench workflow ## Problem for background see https://github.com/neondatabase/cloud/issues/21545 ## Summary of changes use OIDC role to manage runners instead of AWS access key which needs to be periodically rotated ## logs seems to work in https://github.com/neondatabase/neon/actions/runs/12298575888/job/34322306127#step:6:1 --- .github/actionlint.yml | 1 + .github/workflows/periodic_pagebench.yml | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/.github/actionlint.yml b/.github/actionlint.yml index 9d8389faa5..7a97e2ae55 100644 --- a/.github/actionlint.yml +++ b/.github/actionlint.yml @@ -24,3 +24,4 @@ config-variables: - PGREGRESS_PG16_PROJECT_ID - PGREGRESS_PG17_PROJECT_ID - SLACK_ON_CALL_QA_STAGING_STREAM + - DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN diff --git a/.github/workflows/periodic_pagebench.yml b/.github/workflows/periodic_pagebench.yml index a04ceb4a24..9f5a16feca 100644 --- a/.github/workflows/periodic_pagebench.yml +++ b/.github/workflows/periodic_pagebench.yml @@ -41,8 +41,6 @@ jobs: env: API_KEY: ${{ secrets.PERIODIC_PAGEBENCH_EC2_RUNNER_API_KEY }} RUN_ID: ${{ github.run_id }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_EC2_US_TEST_RUNNER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY : ${{ secrets.AWS_EC2_US_TEST_RUNNER_ACCESS_KEY_SECRET }} AWS_DEFAULT_REGION : "eu-central-1" AWS_INSTANCE_ID : "i-02a59a3bf86bc7e74" steps: @@ -53,6 +51,13 @@ jobs: - name: Show my own (github runner) external IP address - usefull for IP allowlisting run: curl https://ifconfig.me + - name: Assume AWS OIDC role that allows to manage (start/stop/describe... EC machine) + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN }} + role-duration-seconds: 3600 + - name: Start EC2 instance and wait for the instance to boot up run: | aws ec2 start-instances --instance-ids $AWS_INSTANCE_ID @@ -150,6 +155,14 @@ jobs: -H "Authorization: Bearer $API_KEY" \ -d '' + - name: Assume AWS OIDC role that allows to manage (start/stop/describe... EC machine) + if: always() && steps.poll_step.outputs.too_many_runs != 'true' + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN }} + role-duration-seconds: 3600 + - name: Stop EC2 instance and wait for the instance to be stopped if: always() && steps.poll_step.outputs.too_many_runs != 'true' run: | From ac04bad45751bd60f19e6ac51935f7de01e6b87d Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Thu, 12 Dec 2024 22:55:38 +0000 Subject: [PATCH 13/49] CI: don't run debug builds with LFC (#10123) ## Problem I've noticed that debug builds with LFC fail more frequently and for some reason ,their failure do block merging (but it should not) ## Summary of changes - Do not run Debug builds with LFC --- .github/workflows/build_and_test.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 4bfb5077c6..1bae23f7ef 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -254,16 +254,14 @@ jobs: build-tag: ${{ needs.tag.outputs.build-tag }} build-type: ${{ matrix.build-type }} # Run tests on all Postgres versions in release builds and only on the latest version in debug builds. - # Run without LFC on v17 release and debug builds only. For all the other cases LFC is enabled. Failure on the - # debug build with LFC enabled doesn't block merging. + # Run without LFC on v17 release and debug builds only. For all the other cases LFC is enabled. test-cfg: | ${{ matrix.build-type == 'release' && '[{"pg_version":"v14", "lfc_state": "with-lfc"}, {"pg_version":"v15", "lfc_state": "with-lfc"}, {"pg_version":"v16", "lfc_state": "with-lfc"}, {"pg_version":"v17", "lfc_state": "with-lfc"}, {"pg_version":"v17", "lfc_state": "without-lfc"}]' - || '[{"pg_version":"v17", "lfc_state": "without-lfc"}, - {"pg_version":"v17", "lfc_state": "with-lfc" }]' }} + || '[{"pg_version":"v17", "lfc_state": "without-lfc" }]' }} secrets: inherit # Keep `benchmarks` job outside of `build-and-test-locally` workflow to make job failures non-blocking From 59ef70192591c0fc2a5c2a69eea0d47f7bd8e033 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= Date: Fri, 13 Dec 2024 00:38:20 +0100 Subject: [PATCH 14/49] CI(deploy): fix git tag/release creation (#10119) ## Problem When moving the comment on proxy-releases from the yaml doc into a javascript code block, I missed converting the comment marker from `#` to `//`. ## Summary of changes Correctly convert comment marker. --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 1bae23f7ef..a3943cba91 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -1143,7 +1143,7 @@ jobs: console.log(`Tag ${tag} created successfully.`); } - # TODO: check how GitHub releases looks for proxy/compute releases and enable them if they're ok + // TODO: check how GitHub releases looks for proxy/compute releases and enable them if they're ok if (context.ref !== 'refs/heads/release') { console.log(`GitHub release skipped for ${context.ref}.`); return; From 2451969d5c533ab4ca085cdce12fdd79d55cca8b Mon Sep 17 00:00:00 2001 From: Rahul Patil Date: Fri, 13 Dec 2024 13:22:15 +0100 Subject: [PATCH 15/49] fix(ci): Allow github-action-script to post reports (#10136) Allow github-action-script to post reports. Failed CI: https://github.com/neondatabase/neon/actions/runs/12304655364/job/34342554049#step:13:514 --- .github/workflows/build_and_test.yml | 15 +++++++++++++++ .github/workflows/periodic_pagebench.yml | 8 +++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index a3943cba91..b3556debe3 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -303,6 +303,11 @@ jobs: benchmarks: if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks') needs: [ check-permissions, build-and-test-locally, build-build-tools-image, get-benchmarks-durations ] + permissions: + id-token: write # aws-actions/configure-aws-credentials + statuses: write + contents: write + pull-requests: write runs-on: [ self-hosted, small ] container: image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm @@ -343,6 +348,11 @@ jobs: report-benchmarks-failures: needs: [ benchmarks, create-test-report ] if: github.ref_name == 'main' && failure() && needs.benchmarks.result == 'failure' + permissions: + id-token: write # aws-actions/configure-aws-credentials + statuses: write + contents: write + pull-requests: write runs-on: ubuntu-22.04 steps: @@ -1024,6 +1034,11 @@ jobs: trigger-custom-extensions-build-and-wait: needs: [ check-permissions, tag ] runs-on: ubuntu-22.04 + permissions: + id-token: write # aws-actions/configure-aws-credentials + statuses: write + contents: write + pull-requests: write steps: - name: Set PR's status to pending and request a remote CI test run: | diff --git a/.github/workflows/periodic_pagebench.yml b/.github/workflows/periodic_pagebench.yml index 9f5a16feca..049990f17b 100644 --- a/.github/workflows/periodic_pagebench.yml +++ b/.github/workflows/periodic_pagebench.yml @@ -21,15 +21,17 @@ defaults: run: shell: bash -euo pipefail {0} -permissions: - id-token: write # aws-actions/configure-aws-credentials - concurrency: group: ${{ github.workflow }} cancel-in-progress: false jobs: trigger_bench_on_ec2_machine_in_eu_central_1: + permissions: + id-token: write # aws-actions/configure-aws-credentials + statuses: write + contents: write + pull-requests: write runs-on: [ self-hosted, small ] container: image: neondatabase/build-tools:pinned-bookworm From 7dc382601cb9f2c508f11acdac897a8a0bbbdce3 Mon Sep 17 00:00:00 2001 From: a-masterov <72613290+a-masterov@users.noreply.github.com> Date: Fri, 13 Dec 2024 14:59:04 +0100 Subject: [PATCH 16/49] Fix pg_regress tests on a cloud staging instance (#10134) ## Problem pg_regress tests start failing due to unique ids added to Neon error messages ## Summary of changes Patches updated --- .github/workflows/cloud-regress.yml | 1 + compute/patches/cloud_regress_pg16.patch | 173 ++++++++++++---------- compute/patches/cloud_regress_pg17.patch | 179 ++++++++++++----------- 3 files changed, 190 insertions(+), 163 deletions(-) diff --git a/.github/workflows/cloud-regress.yml b/.github/workflows/cloud-regress.yml index 2fc26baa21..7b9e434ec3 100644 --- a/.github/workflows/cloud-regress.yml +++ b/.github/workflows/cloud-regress.yml @@ -99,6 +99,7 @@ jobs: BENCHMARK_CONNSTR: ${{steps.create-branch.outputs.dsn}} - name: Delete branch + if: always() uses: ./.github/actions/neon-branch-delete with: api_key: ${{ secrets.NEON_STAGING_API_KEY }} diff --git a/compute/patches/cloud_regress_pg16.patch b/compute/patches/cloud_regress_pg16.patch index a4b93d0260..3f0bb84ae7 100644 --- a/compute/patches/cloud_regress_pg16.patch +++ b/compute/patches/cloud_regress_pg16.patch @@ -981,7 +981,7 @@ index fc42d418bf..e38f517574 100644 CREATE SCHEMA addr_nsp; SET search_path TO 'addr_nsp'; diff --git a/src/test/regress/expected/password.out b/src/test/regress/expected/password.out -index 8475231735..1afae5395f 100644 +index 8475231735..0653946337 100644 --- a/src/test/regress/expected/password.out +++ b/src/test/regress/expected/password.out @@ -12,11 +12,11 @@ SET password_encryption = 'md5'; -- ok @@ -1006,65 +1006,63 @@ index 8475231735..1afae5395f 100644 -----------------+--------------------------------------------------- - regress_passwd1 | md5783277baca28003b33453252be4dbb34 - regress_passwd2 | md54044304ba511dd062133eb5b4b84a2a3 -+ regress_passwd1 | NEON_MD5_PLACEHOLDER_regress_passwd1 -+ regress_passwd2 | NEON_MD5_PLACEHOLDER_regress_passwd2 ++ regress_passwd1 | NEON_MD5_PLACEHOLDER:regress_passwd1 ++ regress_passwd2 | NEON_MD5_PLACEHOLDER:regress_passwd2 regress_passwd3 | SCRAM-SHA-256$4096:$: - regress_passwd4 | + regress_passwd4 | SCRAM-SHA-256$4096:$: (4 rows) -- Rename a role -@@ -54,24 +54,30 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2; +@@ -54,24 +54,16 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2; -- passwords. SET password_encryption = 'md5'; -- encrypt with MD5 -ALTER ROLE regress_passwd2 PASSWORD 'foo'; +--- already encrypted, use as they are +-ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70'; +-ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo='; +ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER; - -- already encrypted, use as they are - ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70'; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo='; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} SET password_encryption = 'scram-sha-256'; -- create SCRAM secret -ALTER ROLE regress_passwd4 PASSWORD 'foo'; +--- already encrypted with MD5, use as it is +-CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023'; +--- This looks like a valid SCRAM-SHA-256 secret, but it is not +--- so it should be hashed with SCRAM-SHA-256. +-CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234'; +--- These may look like valid MD5 secrets, but they are not, so they +--- should be hashed with SCRAM-SHA-256. +--- trailing garbage at the end +-CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz'; +--- invalid length +-CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz'; +ALTER ROLE regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER; - -- already encrypted with MD5, use as it is - CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023'; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - -- This looks like a valid SCRAM-SHA-256 secret, but it is not - -- so it should be hashed with SCRAM-SHA-256. - CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234'; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - -- These may look like valid MD5 secrets, but they are not, so they - -- should be hashed with SCRAM-SHA-256. - -- trailing garbage at the end - CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz'; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - -- invalid length - CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz'; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd5 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd6 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd7 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd8 PASSWORD NEON_PASSWORD_PLACEHOLDER; -- Changing the SCRAM iteration count SET scram_iterations = 1024; CREATE ROLE regress_passwd9 PASSWORD 'alterediterationcount'; -@@ -81,63 +87,67 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+ +@@ -81,11 +73,11 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+ ORDER BY rolname, rolpassword; rolname | rolpassword_masked -----------------+--------------------------------------------------- - regress_passwd1 | md5cd3578025fe2c3d7ed1b9a9b26238b70 - regress_passwd2 | md5dfa155cadd5f4ad57860162f3fab9cdb -+ regress_passwd1 | NEON_MD5_PLACEHOLDER_regress_passwd1 -+ regress_passwd2 | NEON_MD5_PLACEHOLDER_regress_passwd2 ++ regress_passwd1 | NEON_MD5_PLACEHOLDER:regress_passwd1 ++ regress_passwd2 | NEON_MD5_PLACEHOLDER:regress_passwd2 regress_passwd3 | SCRAM-SHA-256$4096:$: regress_passwd4 | SCRAM-SHA-256$4096:$: - regress_passwd5 | md5e73a4b11df52a6068f8b39f90be36023 -- regress_passwd6 | SCRAM-SHA-256$4096:$: -- regress_passwd7 | SCRAM-SHA-256$4096:$: -- regress_passwd8 | SCRAM-SHA-256$4096:$: - regress_passwd9 | SCRAM-SHA-256$1024:$: --(9 rows) -+(5 rows) - ++ regress_passwd5 | SCRAM-SHA-256$4096:$: + regress_passwd6 | SCRAM-SHA-256$4096:$: + regress_passwd7 | SCRAM-SHA-256$4096:$: + regress_passwd8 | SCRAM-SHA-256$4096:$: +@@ -95,23 +87,20 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+ -- An empty password is not allowed, in any form CREATE ROLE regress_passwd_empty PASSWORD ''; NOTICE: empty string is not a valid password, clearing password @@ -1082,56 +1080,37 @@ index 8475231735..1afae5395f 100644 -(1 row) +(0 rows) - -- Test with invalid stored and server keys. - -- - -- The first is valid, to act as a control. The others have too long - -- stored/server keys. They will be re-hashed. - CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA='; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} +--- Test with invalid stored and server keys. +--- +--- The first is valid, to act as a control. The others have too long +--- stored/server keys. They will be re-hashed. +-CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; +-CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; +-CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA='; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd_sha_len0 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd_sha_len1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd_sha_len2 PASSWORD NEON_PASSWORD_PLACEHOLDER; -- Check that the invalid secrets were re-hashed. A re-hashed secret -- should not contain the original salt. SELECT rolname, rolpassword not like '%A6xHKoH/494E941doaPOYg==%' as is_rolpassword_rehashed - FROM pg_authid - WHERE rolname LIKE 'regress_passwd_sha_len%' +@@ -120,7 +109,7 @@ SELECT rolname, rolpassword not like '%A6xHKoH/494E941doaPOYg==%' as is_rolpassw ORDER BY rolname; -- rolname | is_rolpassword_rehashed ---------------------------+------------------------- + rolname | is_rolpassword_rehashed + -------------------------+------------------------- - regress_passwd_sha_len0 | f -- regress_passwd_sha_len1 | t -- regress_passwd_sha_len2 | t --(3 rows) -+ rolname | is_rolpassword_rehashed -+---------+------------------------- -+(0 rows) - - DROP ROLE regress_passwd1; - DROP ROLE regress_passwd2; - DROP ROLE regress_passwd3; - DROP ROLE regress_passwd4; - DROP ROLE regress_passwd5; -+ERROR: role "regress_passwd5" does not exist - DROP ROLE regress_passwd6; -+ERROR: role "regress_passwd6" does not exist - DROP ROLE regress_passwd7; -+ERROR: role "regress_passwd7" does not exist ++ regress_passwd_sha_len0 | t + regress_passwd_sha_len1 | t + regress_passwd_sha_len2 | t + (3 rows) +@@ -135,6 +124,7 @@ DROP ROLE regress_passwd7; DROP ROLE regress_passwd8; -+ERROR: role "regress_passwd8" does not exist DROP ROLE regress_passwd9; DROP ROLE regress_passwd_empty; +ERROR: role "regress_passwd_empty" does not exist DROP ROLE regress_passwd_sha_len0; -+ERROR: role "regress_passwd_sha_len0" does not exist DROP ROLE regress_passwd_sha_len1; -+ERROR: role "regress_passwd_sha_len1" does not exist DROP ROLE regress_passwd_sha_len2; -+ERROR: role "regress_passwd_sha_len2" does not exist - -- all entries should have been removed - SELECT rolname, rolpassword - FROM pg_authid diff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out index 5b9dba7b32..cc408dad42 100644 --- a/src/test/regress/expected/privileges.out @@ -3194,7 +3173,7 @@ index 1a6c61f49d..1c31ac6a53 100644 -- Test generic object addressing/identification functions CREATE SCHEMA addr_nsp; diff --git a/src/test/regress/sql/password.sql b/src/test/regress/sql/password.sql -index 53e86b0b6c..f07cf1ec54 100644 +index 53e86b0b6c..0303fdfe96 100644 --- a/src/test/regress/sql/password.sql +++ b/src/test/regress/sql/password.sql @@ -10,11 +10,11 @@ SET password_encryption = 'scram-sha-256'; -- ok @@ -3213,23 +3192,59 @@ index 53e86b0b6c..f07cf1ec54 100644 -- check list of created entries -- -@@ -42,14 +42,14 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2; +@@ -42,26 +42,18 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2; SET password_encryption = 'md5'; -- encrypt with MD5 -ALTER ROLE regress_passwd2 PASSWORD 'foo'; +--- already encrypted, use as they are +-ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70'; +-ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo='; +ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER; - -- already encrypted, use as they are - ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70'; - ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo='; SET password_encryption = 'scram-sha-256'; -- create SCRAM secret -ALTER ROLE regress_passwd4 PASSWORD 'foo'; +--- already encrypted with MD5, use as it is +-CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023'; +ALTER ROLE regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER; - -- already encrypted with MD5, use as it is - CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023'; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd5 PASSWORD NEON_PASSWORD_PLACEHOLDER; +--- This looks like a valid SCRAM-SHA-256 secret, but it is not +--- so it should be hashed with SCRAM-SHA-256. +-CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234'; +--- These may look like valid MD5 secrets, but they are not, so they +--- should be hashed with SCRAM-SHA-256. +--- trailing garbage at the end +-CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz'; +--- invalid length +-CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz'; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd6 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd7 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd8 PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- Changing the SCRAM iteration count + SET scram_iterations = 1024; +@@ -78,13 +70,10 @@ ALTER ROLE regress_passwd_empty PASSWORD 'md585939a5ce845f1a1b620742e3c659e0a'; + ALTER ROLE regress_passwd_empty PASSWORD 'SCRAM-SHA-256$4096:hpFyHTUsSWcR7O9P$LgZFIt6Oqdo27ZFKbZ2nV+vtnYM995pDh9ca6WSi120=:qVV5NeluNfUPkwm7Vqat25RjSPLkGeoZBQs6wVv+um4='; + SELECT rolpassword FROM pg_authid WHERE rolname='regress_passwd_empty'; + +--- Test with invalid stored and server keys. +--- +--- The first is valid, to act as a control. The others have too long +--- stored/server keys. They will be re-hashed. +-CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; +-CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; +-CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA='; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd_sha_len0 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd_sha_len1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd_sha_len2 PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- Check that the invalid secrets were re-hashed. A re-hashed secret + -- should not contain the original salt. diff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql index 249df17a58..b258e7f26a 100644 --- a/src/test/regress/sql/privileges.sql diff --git a/compute/patches/cloud_regress_pg17.patch b/compute/patches/cloud_regress_pg17.patch index cbe84ef54b..e57447a2c6 100644 --- a/compute/patches/cloud_regress_pg17.patch +++ b/compute/patches/cloud_regress_pg17.patch @@ -1014,10 +1014,10 @@ index fc42d418bf..e38f517574 100644 CREATE SCHEMA addr_nsp; SET search_path TO 'addr_nsp'; diff --git a/src/test/regress/expected/password.out b/src/test/regress/expected/password.out -index 924d6e001d..5966531db6 100644 +index 924d6e001d..7fdda73439 100644 --- a/src/test/regress/expected/password.out +++ b/src/test/regress/expected/password.out -@@ -12,13 +12,13 @@ SET password_encryption = 'md5'; -- ok +@@ -12,13 +12,11 @@ SET password_encryption = 'md5'; -- ok SET password_encryption = 'scram-sha-256'; -- ok -- consistency of password entries SET password_encryption = 'md5'; @@ -1026,9 +1026,7 @@ index 924d6e001d..5966531db6 100644 -CREATE ROLE regress_passwd2; -ALTER ROLE regress_passwd2 PASSWORD 'role_pwd2'; +CREATE ROLE regress_passwd1 PASSWORD NEON_PASSWORD_PLACEHOLDER; -+ALTER ROLE regress_passwd1 PASSWORD NEON_PASSWORD_PLACEHOLDER; +CREATE ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER; -+ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER; SET password_encryption = 'scram-sha-256'; -CREATE ROLE regress_passwd3 PASSWORD 'role_pwd3'; -CREATE ROLE regress_passwd4 PASSWORD NULL; @@ -1037,71 +1035,69 @@ index 924d6e001d..5966531db6 100644 -- check list of created entries -- -- The scram secret will look something like: -@@ -32,10 +32,10 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+ +@@ -32,10 +30,10 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+ ORDER BY rolname, rolpassword; rolname | rolpassword_masked -----------------+--------------------------------------------------- - regress_passwd1 | md5783277baca28003b33453252be4dbb34 - regress_passwd2 | md54044304ba511dd062133eb5b4b84a2a3 -+ regress_passwd1 | NEON_MD5_PLACEHOLDER_regress_passwd1 -+ regress_passwd2 | NEON_MD5_PLACEHOLDER_regress_passwd2 ++ regress_passwd1 | NEON_MD5_PLACEHOLDER:regress_passwd1 ++ regress_passwd2 | NEON_MD5_PLACEHOLDER:regress_passwd2 regress_passwd3 | SCRAM-SHA-256$4096:$: - regress_passwd4 | + regress_passwd4 | SCRAM-SHA-256$4096:$: (4 rows) -- Rename a role -@@ -56,24 +56,30 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2; +@@ -56,24 +54,17 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2; -- passwords. SET password_encryption = 'md5'; -- encrypt with MD5 -ALTER ROLE regress_passwd2 PASSWORD 'foo'; +--- already encrypted, use as they are +-ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70'; +-ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo='; +ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER; - -- already encrypted, use as they are - ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70'; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo='; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} SET password_encryption = 'scram-sha-256'; -- create SCRAM secret -ALTER ROLE regress_passwd4 PASSWORD 'foo'; +ALTER ROLE regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER; -- already encrypted with MD5, use as it is - CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023'; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - -- This looks like a valid SCRAM-SHA-256 secret, but it is not - -- so it should be hashed with SCRAM-SHA-256. - CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234'; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - -- These may look like valid MD5 secrets, but they are not, so they - -- should be hashed with SCRAM-SHA-256. - -- trailing garbage at the end - CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz'; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - -- invalid length - CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz'; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} +-CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023'; +--- This looks like a valid SCRAM-SHA-256 secret, but it is not +--- so it should be hashed with SCRAM-SHA-256. +-CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234'; +--- These may look like valid MD5 secrets, but they are not, so they +--- should be hashed with SCRAM-SHA-256. +--- trailing garbage at the end +-CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz'; +--- invalid length +-CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz'; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd5 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd6 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd7 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd8 PASSWORD NEON_PASSWORD_PLACEHOLDER; -- Changing the SCRAM iteration count SET scram_iterations = 1024; CREATE ROLE regress_passwd9 PASSWORD 'alterediterationcount'; -@@ -83,63 +89,67 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+ +@@ -83,11 +74,11 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+ ORDER BY rolname, rolpassword; rolname | rolpassword_masked -----------------+--------------------------------------------------- - regress_passwd1 | md5cd3578025fe2c3d7ed1b9a9b26238b70 - regress_passwd2 | md5dfa155cadd5f4ad57860162f3fab9cdb -+ regress_passwd1 | NEON_MD5_PLACEHOLDER_regress_passwd1 -+ regress_passwd2 | NEON_MD5_PLACEHOLDER_regress_passwd2 ++ regress_passwd1 | NEON_MD5_PLACEHOLDER:regress_passwd1 ++ regress_passwd2 | NEON_MD5_PLACEHOLDER:regress_passwd2 regress_passwd3 | SCRAM-SHA-256$4096:$: regress_passwd4 | SCRAM-SHA-256$4096:$: - regress_passwd5 | md5e73a4b11df52a6068f8b39f90be36023 -- regress_passwd6 | SCRAM-SHA-256$4096:$: -- regress_passwd7 | SCRAM-SHA-256$4096:$: -- regress_passwd8 | SCRAM-SHA-256$4096:$: - regress_passwd9 | SCRAM-SHA-256$1024:$: --(9 rows) -+(5 rows) - ++ regress_passwd5 | SCRAM-SHA-256$4096:$: + regress_passwd6 | SCRAM-SHA-256$4096:$: + regress_passwd7 | SCRAM-SHA-256$4096:$: + regress_passwd8 | SCRAM-SHA-256$4096:$: +@@ -97,23 +88,20 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+ -- An empty password is not allowed, in any form CREATE ROLE regress_passwd_empty PASSWORD ''; NOTICE: empty string is not a valid password, clearing password @@ -1119,56 +1115,37 @@ index 924d6e001d..5966531db6 100644 -(1 row) +(0 rows) - -- Test with invalid stored and server keys. - -- - -- The first is valid, to act as a control. The others have too long - -- stored/server keys. They will be re-hashed. - CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA='; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} +--- Test with invalid stored and server keys. +--- +--- The first is valid, to act as a control. The others have too long +--- stored/server keys. They will be re-hashed. +-CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; +-CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; +-CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA='; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd_sha_len0 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd_sha_len1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd_sha_len2 PASSWORD NEON_PASSWORD_PLACEHOLDER; -- Check that the invalid secrets were re-hashed. A re-hashed secret -- should not contain the original salt. SELECT rolname, rolpassword not like '%A6xHKoH/494E941doaPOYg==%' as is_rolpassword_rehashed - FROM pg_authid - WHERE rolname LIKE 'regress_passwd_sha_len%' +@@ -122,7 +110,7 @@ SELECT rolname, rolpassword not like '%A6xHKoH/494E941doaPOYg==%' as is_rolpassw ORDER BY rolname; -- rolname | is_rolpassword_rehashed ---------------------------+------------------------- + rolname | is_rolpassword_rehashed + -------------------------+------------------------- - regress_passwd_sha_len0 | f -- regress_passwd_sha_len1 | t -- regress_passwd_sha_len2 | t --(3 rows) -+ rolname | is_rolpassword_rehashed -+---------+------------------------- -+(0 rows) - - DROP ROLE regress_passwd1; - DROP ROLE regress_passwd2; - DROP ROLE regress_passwd3; - DROP ROLE regress_passwd4; - DROP ROLE regress_passwd5; -+ERROR: role "regress_passwd5" does not exist - DROP ROLE regress_passwd6; -+ERROR: role "regress_passwd6" does not exist - DROP ROLE regress_passwd7; -+ERROR: role "regress_passwd7" does not exist ++ regress_passwd_sha_len0 | t + regress_passwd_sha_len1 | t + regress_passwd_sha_len2 | t + (3 rows) +@@ -137,6 +125,7 @@ DROP ROLE regress_passwd7; DROP ROLE regress_passwd8; -+ERROR: role "regress_passwd8" does not exist DROP ROLE regress_passwd9; DROP ROLE regress_passwd_empty; +ERROR: role "regress_passwd_empty" does not exist DROP ROLE regress_passwd_sha_len0; -+ERROR: role "regress_passwd_sha_len0" does not exist DROP ROLE regress_passwd_sha_len1; -+ERROR: role "regress_passwd_sha_len1" does not exist DROP ROLE regress_passwd_sha_len2; -+ERROR: role "regress_passwd_sha_len2" does not exist - -- all entries should have been removed - SELECT rolname, rolpassword - FROM pg_authid diff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out index 1296da0d57..f43fffa44c 100644 --- a/src/test/regress/expected/privileges.out @@ -3249,10 +3226,10 @@ index 1a6c61f49d..1c31ac6a53 100644 -- Test generic object addressing/identification functions CREATE SCHEMA addr_nsp; diff --git a/src/test/regress/sql/password.sql b/src/test/regress/sql/password.sql -index bb82aa4aa2..7424c91b10 100644 +index bb82aa4aa2..dd8a05e24d 100644 --- a/src/test/regress/sql/password.sql +++ b/src/test/regress/sql/password.sql -@@ -10,13 +10,13 @@ SET password_encryption = 'scram-sha-256'; -- ok +@@ -10,13 +10,11 @@ SET password_encryption = 'scram-sha-256'; -- ok -- consistency of password entries SET password_encryption = 'md5'; @@ -3261,9 +3238,7 @@ index bb82aa4aa2..7424c91b10 100644 -CREATE ROLE regress_passwd2; -ALTER ROLE regress_passwd2 PASSWORD 'role_pwd2'; +CREATE ROLE regress_passwd1 PASSWORD NEON_PASSWORD_PLACEHOLDER; -+ALTER ROLE regress_passwd1 PASSWORD NEON_PASSWORD_PLACEHOLDER; +CREATE ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER; -+ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER; SET password_encryption = 'scram-sha-256'; -CREATE ROLE regress_passwd3 PASSWORD 'role_pwd3'; -CREATE ROLE regress_passwd4 PASSWORD NULL; @@ -3272,23 +3247,59 @@ index bb82aa4aa2..7424c91b10 100644 -- check list of created entries -- -@@ -44,14 +44,14 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2; +@@ -44,26 +42,19 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2; SET password_encryption = 'md5'; -- encrypt with MD5 -ALTER ROLE regress_passwd2 PASSWORD 'foo'; +--- already encrypted, use as they are +-ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70'; +-ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo='; +ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER; - -- already encrypted, use as they are - ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70'; - ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo='; SET password_encryption = 'scram-sha-256'; -- create SCRAM secret -ALTER ROLE regress_passwd4 PASSWORD 'foo'; +ALTER ROLE regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER; -- already encrypted with MD5, use as it is - CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023'; +-CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023'; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd5 PASSWORD NEON_PASSWORD_PLACEHOLDER; +--- This looks like a valid SCRAM-SHA-256 secret, but it is not +--- so it should be hashed with SCRAM-SHA-256. +-CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234'; +--- These may look like valid MD5 secrets, but they are not, so they +--- should be hashed with SCRAM-SHA-256. +--- trailing garbage at the end +-CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz'; +--- invalid length +-CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz'; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd6 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd7 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd8 PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- Changing the SCRAM iteration count + SET scram_iterations = 1024; +@@ -80,13 +71,10 @@ ALTER ROLE regress_passwd_empty PASSWORD 'md585939a5ce845f1a1b620742e3c659e0a'; + ALTER ROLE regress_passwd_empty PASSWORD 'SCRAM-SHA-256$4096:hpFyHTUsSWcR7O9P$LgZFIt6Oqdo27ZFKbZ2nV+vtnYM995pDh9ca6WSi120=:qVV5NeluNfUPkwm7Vqat25RjSPLkGeoZBQs6wVv+um4='; + SELECT rolpassword FROM pg_authid WHERE rolname='regress_passwd_empty'; + +--- Test with invalid stored and server keys. +--- +--- The first is valid, to act as a control. The others have too long +--- stored/server keys. They will be re-hashed. +-CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; +-CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; +-CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA='; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd_sha_len0 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd_sha_len1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd_sha_len2 PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- Check that the invalid secrets were re-hashed. A re-hashed secret + -- should not contain the original salt. diff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql index 5880bc018d..27aa952b18 100644 --- a/src/test/regress/sql/privileges.sql From ce8eb089f3d002e5057f860454adeb0993431a19 Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Fri, 13 Dec 2024 17:06:27 +0300 Subject: [PATCH 17/49] Extract public sk types to safekeeper_api (#10137) ## Problem We want to extract safekeeper http client to separate crate for use in storage controller and neon_local. However, many types used in the API are internal to safekeeper. ## Summary of changes Move them to safekeeper_api crate. No functional changes. ref https://github.com/neondatabase/neon/issues/9011 --- Cargo.lock | 3 + libs/safekeeper_api/Cargo.toml | 5 +- libs/safekeeper_api/src/lib.rs | 17 ++ libs/safekeeper_api/src/models.rs | 170 +++++++++++++++++- safekeeper/src/control_file_upgrade.rs | 3 +- safekeeper/src/debug_dump.rs | 2 +- safekeeper/src/handler.rs | 4 +- safekeeper/src/http/client.rs | 3 +- safekeeper/src/http/routes.rs | 72 ++------ safekeeper/src/json_ctrl.rs | 5 +- safekeeper/src/pull_timeline.rs | 7 +- safekeeper/src/receive_wal.rs | 21 +-- safekeeper/src/recovery.rs | 8 +- safekeeper/src/safekeeper.rs | 19 +- safekeeper/src/send_wal.rs | 89 +-------- safekeeper/src/state.rs | 7 +- safekeeper/src/timeline.rs | 55 ++---- safekeeper/src/timeline_manager.rs | 4 +- safekeeper/src/timelines_global_map.rs | 2 +- safekeeper/src/wal_backup.rs | 3 +- safekeeper/src/wal_backup_partial.rs | 2 +- safekeeper/src/wal_reader_stream.rs | 2 +- safekeeper/src/wal_service.rs | 3 +- .../tests/walproposer_sim/safekeeper.rs | 3 +- 24 files changed, 264 insertions(+), 245 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e2d5e03613..c4f80f63c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5565,7 +5565,10 @@ name = "safekeeper_api" version = "0.1.0" dependencies = [ "const_format", + "postgres_ffi", + "pq_proto", "serde", + "tokio", "utils", ] diff --git a/libs/safekeeper_api/Cargo.toml b/libs/safekeeper_api/Cargo.toml index 14811232d3..4234ec6779 100644 --- a/libs/safekeeper_api/Cargo.toml +++ b/libs/safekeeper_api/Cargo.toml @@ -5,6 +5,9 @@ edition.workspace = true license.workspace = true [dependencies] -serde.workspace = true const_format.workspace = true +serde.workspace = true +postgres_ffi.workspace = true +pq_proto.workspace = true +tokio.workspace = true utils.workspace = true diff --git a/libs/safekeeper_api/src/lib.rs b/libs/safekeeper_api/src/lib.rs index 63c2c51188..be6923aca9 100644 --- a/libs/safekeeper_api/src/lib.rs +++ b/libs/safekeeper_api/src/lib.rs @@ -1,10 +1,27 @@ #![deny(unsafe_code)] #![deny(clippy::undocumented_unsafe_blocks)] use const_format::formatcp; +use pq_proto::SystemId; +use serde::{Deserialize, Serialize}; /// Public API types pub mod models; +/// Consensus logical timestamp. Note: it is a part of sk control file. +pub type Term = u64; +pub const INVALID_TERM: Term = 0; + +/// Information about Postgres. Safekeeper gets it once and then verifies all +/// further connections from computes match. Note: it is a part of sk control +/// file. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ServerInfo { + /// Postgres server version + pub pg_version: u32, + pub system_id: SystemId, + pub wal_seg_size: u32, +} + pub const DEFAULT_PG_LISTEN_PORT: u16 = 5454; pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}"); diff --git a/libs/safekeeper_api/src/models.rs b/libs/safekeeper_api/src/models.rs index 28666d197a..3e424a792c 100644 --- a/libs/safekeeper_api/src/models.rs +++ b/libs/safekeeper_api/src/models.rs @@ -1,10 +1,23 @@ +//! Types used in safekeeper http API. Many of them are also reused internally. + +use postgres_ffi::TimestampTz; use serde::{Deserialize, Serialize}; +use std::net::SocketAddr; +use tokio::time::Instant; use utils::{ - id::{NodeId, TenantId, TimelineId}, + id::{NodeId, TenantId, TenantTimelineId, TimelineId}, lsn::Lsn, + pageserver_feedback::PageserverFeedback, }; +use crate::{ServerInfo, Term}; + +#[derive(Debug, Serialize)] +pub struct SafekeeperStatus { + pub id: NodeId, +} + #[derive(Serialize, Deserialize)] pub struct TimelineCreateRequest { pub tenant_id: TenantId, @@ -18,6 +31,161 @@ pub struct TimelineCreateRequest { pub local_start_lsn: Option, } +/// Same as TermLsn, but serializes LSN using display serializer +/// in Postgres format, i.e. 0/FFFFFFFF. Used only for the API response. +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub struct TermSwitchApiEntry { + pub term: Term, + pub lsn: Lsn, +} + +/// Augment AcceptorState with last_log_term for convenience +#[derive(Debug, Serialize, Deserialize)] +pub struct AcceptorStateStatus { + pub term: Term, + pub epoch: Term, // aka last_log_term, old `epoch` name is left for compatibility + pub term_history: Vec, +} + +/// Things safekeeper should know about timeline state on peers. +/// Used as both model and internally. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PeerInfo { + pub sk_id: NodeId, + pub term: Term, + /// Term of the last entry. + pub last_log_term: Term, + /// LSN of the last record. + pub flush_lsn: Lsn, + pub commit_lsn: Lsn, + /// Since which LSN safekeeper has WAL. + pub local_start_lsn: Lsn, + /// When info was received. Serde annotations are not very useful but make + /// the code compile -- we don't rely on this field externally. + #[serde(skip)] + #[serde(default = "Instant::now")] + pub ts: Instant, + pub pg_connstr: String, + pub http_connstr: String, +} + +pub type FullTransactionId = u64; + +/// Hot standby feedback received from replica +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub struct HotStandbyFeedback { + pub ts: TimestampTz, + pub xmin: FullTransactionId, + pub catalog_xmin: FullTransactionId, +} + +pub const INVALID_FULL_TRANSACTION_ID: FullTransactionId = 0; + +impl HotStandbyFeedback { + pub fn empty() -> HotStandbyFeedback { + HotStandbyFeedback { + ts: 0, + xmin: 0, + catalog_xmin: 0, + } + } +} + +/// Standby status update +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub struct StandbyReply { + pub write_lsn: Lsn, // The location of the last WAL byte + 1 received and written to disk in the standby. + pub flush_lsn: Lsn, // The location of the last WAL byte + 1 flushed to disk in the standby. + pub apply_lsn: Lsn, // The location of the last WAL byte + 1 applied in the standby. + pub reply_ts: TimestampTz, // The client's system clock at the time of transmission, as microseconds since midnight on 2000-01-01. + pub reply_requested: bool, +} + +impl StandbyReply { + pub fn empty() -> Self { + StandbyReply { + write_lsn: Lsn::INVALID, + flush_lsn: Lsn::INVALID, + apply_lsn: Lsn::INVALID, + reply_ts: 0, + reply_requested: false, + } + } +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub struct StandbyFeedback { + pub reply: StandbyReply, + pub hs_feedback: HotStandbyFeedback, +} + +impl StandbyFeedback { + pub fn empty() -> Self { + StandbyFeedback { + reply: StandbyReply::empty(), + hs_feedback: HotStandbyFeedback::empty(), + } + } +} + +/// Receiver is either pageserver or regular standby, which have different +/// feedbacks. +/// Used as both model and internally. +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub enum ReplicationFeedback { + Pageserver(PageserverFeedback), + Standby(StandbyFeedback), +} + +/// Uniquely identifies a WAL service connection. Logged in spans for +/// observability. +pub type ConnectionId = u32; + +/// Serialize is used only for json'ing in API response. Also used internally. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct WalSenderState { + pub ttid: TenantTimelineId, + pub addr: SocketAddr, + pub conn_id: ConnectionId, + // postgres application_name + pub appname: Option, + pub feedback: ReplicationFeedback, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct WalReceiverState { + /// None means it is recovery initiated by us (this safekeeper). + pub conn_id: Option, + pub status: WalReceiverStatus, +} + +/// Walreceiver status. Currently only whether it passed voting stage and +/// started receiving the stream, but it is easy to add more if needed. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum WalReceiverStatus { + Voting, + Streaming, +} + +/// Info about timeline on safekeeper ready for reporting. +#[derive(Debug, Serialize, Deserialize)] +pub struct TimelineStatus { + pub tenant_id: TenantId, + pub timeline_id: TimelineId, + pub acceptor_state: AcceptorStateStatus, + pub pg_info: ServerInfo, + pub flush_lsn: Lsn, + pub timeline_start_lsn: Lsn, + pub local_start_lsn: Lsn, + pub commit_lsn: Lsn, + pub backup_lsn: Lsn, + pub peer_horizon_lsn: Lsn, + pub remote_consistent_lsn: Lsn, + pub peers: Vec, + pub walsenders: Vec, + pub walreceivers: Vec, +} + fn lsn_invalid() -> Lsn { Lsn::INVALID } diff --git a/safekeeper/src/control_file_upgrade.rs b/safekeeper/src/control_file_upgrade.rs index a4b4670e42..dd152fd4cc 100644 --- a/safekeeper/src/control_file_upgrade.rs +++ b/safekeeper/src/control_file_upgrade.rs @@ -1,11 +1,12 @@ //! Code to deal with safekeeper control file upgrades use crate::{ - safekeeper::{AcceptorState, PgUuid, ServerInfo, Term, TermHistory, TermLsn}, + safekeeper::{AcceptorState, PgUuid, TermHistory, TermLsn}, state::{EvictionState, PersistedPeers, TimelinePersistentState}, wal_backup_partial, }; use anyhow::{bail, Result}; use pq_proto::SystemId; +use safekeeper_api::{ServerInfo, Term}; use serde::{Deserialize, Serialize}; use tracing::*; use utils::{ diff --git a/safekeeper/src/debug_dump.rs b/safekeeper/src/debug_dump.rs index 93011eddec..19362a0992 100644 --- a/safekeeper/src/debug_dump.rs +++ b/safekeeper/src/debug_dump.rs @@ -14,6 +14,7 @@ use camino::Utf8PathBuf; use chrono::{DateTime, Utc}; use postgres_ffi::XLogSegNo; use postgres_ffi::MAX_SEND_SIZE; +use safekeeper_api::models::WalSenderState; use serde::Deserialize; use serde::Serialize; @@ -25,7 +26,6 @@ use utils::id::{TenantId, TimelineId}; use utils::lsn::Lsn; use crate::safekeeper::TermHistory; -use crate::send_wal::WalSenderState; use crate::state::TimelineMemState; use crate::state::TimelinePersistentState; use crate::timeline::get_timeline_dir; diff --git a/safekeeper/src/handler.rs b/safekeeper/src/handler.rs index 2ca6333ba8..bb639bfb32 100644 --- a/safekeeper/src/handler.rs +++ b/safekeeper/src/handler.rs @@ -4,6 +4,8 @@ use anyhow::Context; use pageserver_api::models::ShardParameters; use pageserver_api::shard::{ShardIdentity, ShardStripeSize}; +use safekeeper_api::models::ConnectionId; +use safekeeper_api::Term; use std::future::Future; use std::str::{self, FromStr}; use std::sync::Arc; @@ -16,9 +18,7 @@ use crate::auth::check_permission; use crate::json_ctrl::{handle_json_ctrl, AppendLogicalMessage}; use crate::metrics::{TrafficMetrics, PG_QUERIES_GAUGE}; -use crate::safekeeper::Term; use crate::timeline::TimelineError; -use crate::wal_service::ConnectionId; use crate::{GlobalTimelines, SafeKeeperConf}; use postgres_backend::PostgresBackend; use postgres_backend::QueryError; diff --git a/safekeeper/src/http/client.rs b/safekeeper/src/http/client.rs index a166fc1ab9..669a9c0ce9 100644 --- a/safekeeper/src/http/client.rs +++ b/safekeeper/src/http/client.rs @@ -8,6 +8,7 @@ //! etc. use reqwest::{IntoUrl, Method, StatusCode}; +use safekeeper_api::models::TimelineStatus; use std::error::Error as _; use utils::{ http::error::HttpErrorBody, @@ -15,8 +16,6 @@ use utils::{ logging::SecretString, }; -use super::routes::TimelineStatus; - #[derive(Debug, Clone)] pub struct Client { mgmt_api_endpoint: String, diff --git a/safekeeper/src/http/routes.rs b/safekeeper/src/http/routes.rs index 71c36f1d46..9bc1bf3409 100644 --- a/safekeeper/src/http/routes.rs +++ b/safekeeper/src/http/routes.rs @@ -1,5 +1,9 @@ use hyper::{Body, Request, Response, StatusCode}; -use serde::{Deserialize, Serialize}; +use safekeeper_api::models::AcceptorStateStatus; +use safekeeper_api::models::SafekeeperStatus; +use safekeeper_api::models::TermSwitchApiEntry; +use safekeeper_api::models::TimelineStatus; +use safekeeper_api::ServerInfo; use std::collections::HashMap; use std::fmt; use std::io::Write as _; @@ -31,26 +35,17 @@ use utils::{ request::{ensure_no_body, parse_request_param}, RequestExt, RouterBuilder, }, - id::{NodeId, TenantId, TenantTimelineId, TimelineId}, + id::{TenantId, TenantTimelineId, TimelineId}, lsn::Lsn, }; use crate::debug_dump::TimelineDigestRequest; -use crate::receive_wal::WalReceiverState; -use crate::safekeeper::Term; -use crate::safekeeper::{ServerInfo, TermLsn}; -use crate::send_wal::WalSenderState; -use crate::timeline::PeerInfo; +use crate::safekeeper::TermLsn; use crate::timelines_global_map::TimelineDeleteForceResult; use crate::GlobalTimelines; use crate::SafeKeeperConf; use crate::{copy_timeline, debug_dump, patch_control_file, pull_timeline}; -#[derive(Debug, Serialize)] -struct SafekeeperStatus { - id: NodeId, -} - /// Healthcheck handler. async fn status_handler(request: Request) -> Result, ApiError> { check_permission(&request, None)?; @@ -73,50 +68,6 @@ fn get_global_timelines(request: &Request) -> Arc { .clone() } -/// Same as TermLsn, but serializes LSN using display serializer -/// in Postgres format, i.e. 0/FFFFFFFF. Used only for the API response. -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] -pub struct TermSwitchApiEntry { - pub term: Term, - pub lsn: Lsn, -} - -impl From for TermLsn { - fn from(api_val: TermSwitchApiEntry) -> Self { - TermLsn { - term: api_val.term, - lsn: api_val.lsn, - } - } -} - -/// Augment AcceptorState with last_log_term for convenience -#[derive(Debug, Serialize, Deserialize)] -pub struct AcceptorStateStatus { - pub term: Term, - pub epoch: Term, // aka last_log_term - pub term_history: Vec, -} - -/// Info about timeline on safekeeper ready for reporting. -#[derive(Debug, Serialize, Deserialize)] -pub struct TimelineStatus { - pub tenant_id: TenantId, - pub timeline_id: TimelineId, - pub acceptor_state: AcceptorStateStatus, - pub pg_info: ServerInfo, - pub flush_lsn: Lsn, - pub timeline_start_lsn: Lsn, - pub local_start_lsn: Lsn, - pub commit_lsn: Lsn, - pub backup_lsn: Lsn, - pub peer_horizon_lsn: Lsn, - pub remote_consistent_lsn: Lsn, - pub peers: Vec, - pub walsenders: Vec, - pub walreceivers: Vec, -} - fn check_permission(request: &Request, tenant_id: Option) -> Result<(), ApiError> { check_permission_with(request, |claims| { crate::auth::check_permission(claims, tenant_id) @@ -187,6 +138,15 @@ async fn timeline_list_handler(request: Request) -> Result, json_response(StatusCode::OK, res) } +impl From for TermLsn { + fn from(api_val: TermSwitchApiEntry) -> Self { + TermLsn { + term: api_val.term, + lsn: api_val.lsn, + } + } +} + /// Report info about timeline. async fn timeline_status_handler(request: Request) -> Result, ApiError> { let ttid = TenantTimelineId::new( diff --git a/safekeeper/src/json_ctrl.rs b/safekeeper/src/json_ctrl.rs index dc4ad3706e..256e350ceb 100644 --- a/safekeeper/src/json_ctrl.rs +++ b/safekeeper/src/json_ctrl.rs @@ -8,16 +8,17 @@ use anyhow::Context; use postgres_backend::QueryError; +use safekeeper_api::{ServerInfo, Term}; use serde::{Deserialize, Serialize}; use tokio::io::{AsyncRead, AsyncWrite}; use tracing::*; use crate::handler::SafekeeperPostgresHandler; -use crate::safekeeper::{AcceptorProposerMessage, AppendResponse, ServerInfo}; +use crate::safekeeper::{AcceptorProposerMessage, AppendResponse}; use crate::safekeeper::{ AppendRequest, AppendRequestHeader, ProposerAcceptorMessage, ProposerElected, }; -use crate::safekeeper::{Term, TermHistory, TermLsn}; +use crate::safekeeper::{TermHistory, TermLsn}; use crate::state::TimelinePersistentState; use crate::timeline::WalResidentTimeline; use postgres_backend::PostgresBackend; diff --git a/safekeeper/src/pull_timeline.rs b/safekeeper/src/pull_timeline.rs index f58a9dca1d..00777273cb 100644 --- a/safekeeper/src/pull_timeline.rs +++ b/safekeeper/src/pull_timeline.rs @@ -4,6 +4,7 @@ use camino::Utf8PathBuf; use chrono::{DateTime, Utc}; use futures::{SinkExt, StreamExt, TryStreamExt}; use postgres_ffi::{XLogFileName, XLogSegNo, PG_TLI}; +use safekeeper_api::{models::TimelineStatus, Term}; use serde::{Deserialize, Serialize}; use std::{ cmp::min, @@ -21,11 +22,7 @@ use tracing::{error, info, instrument}; use crate::{ control_file::CONTROL_FILE_NAME, debug_dump, - http::{ - client::{self, Client}, - routes::TimelineStatus, - }, - safekeeper::Term, + http::client::{self, Client}, state::{EvictionState, TimelinePersistentState}, timeline::{Timeline, WalResidentTimeline}, timelines_global_map::{create_temp_timeline_dir, validate_temp_timeline}, diff --git a/safekeeper/src/receive_wal.rs b/safekeeper/src/receive_wal.rs index 2a49890d61..08371177cd 100644 --- a/safekeeper/src/receive_wal.rs +++ b/safekeeper/src/receive_wal.rs @@ -9,9 +9,7 @@ use crate::metrics::{ }; use crate::safekeeper::AcceptorProposerMessage; use crate::safekeeper::ProposerAcceptorMessage; -use crate::safekeeper::ServerInfo; use crate::timeline::WalResidentTimeline; -use crate::wal_service::ConnectionId; use crate::GlobalTimelines; use anyhow::{anyhow, Context}; use bytes::BytesMut; @@ -23,8 +21,8 @@ use postgres_backend::PostgresBackend; use postgres_backend::PostgresBackendReader; use postgres_backend::QueryError; use pq_proto::BeMessage; -use serde::Deserialize; -use serde::Serialize; +use safekeeper_api::models::{ConnectionId, WalReceiverState, WalReceiverStatus}; +use safekeeper_api::ServerInfo; use std::future; use std::net::SocketAddr; use std::sync::Arc; @@ -171,21 +169,6 @@ impl WalReceiversShared { } } -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct WalReceiverState { - /// None means it is recovery initiated by us (this safekeeper). - pub conn_id: Option, - pub status: WalReceiverStatus, -} - -/// Walreceiver status. Currently only whether it passed voting stage and -/// started receiving the stream, but it is easy to add more if needed. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum WalReceiverStatus { - Voting, - Streaming, -} - /// Scope guard to access slot in WalReceivers registry and unregister from /// it in Drop. pub struct WalReceiverGuard { diff --git a/safekeeper/src/recovery.rs b/safekeeper/src/recovery.rs index 7b87166aa0..61647c16b0 100644 --- a/safekeeper/src/recovery.rs +++ b/safekeeper/src/recovery.rs @@ -7,6 +7,8 @@ use std::{fmt, pin::pin}; use anyhow::{bail, Context}; use futures::StreamExt; use postgres_protocol::message::backend::ReplicationMessage; +use safekeeper_api::models::{PeerInfo, TimelineStatus}; +use safekeeper_api::Term; use tokio::sync::mpsc::{channel, Receiver, Sender}; use tokio::time::timeout; use tokio::{ @@ -24,13 +26,11 @@ use crate::receive_wal::{WalAcceptor, REPLY_QUEUE_SIZE}; use crate::safekeeper::{AppendRequest, AppendRequestHeader}; use crate::timeline::WalResidentTimeline; use crate::{ - http::routes::TimelineStatus, receive_wal::MSG_QUEUE_SIZE, safekeeper::{ - AcceptorProposerMessage, ProposerAcceptorMessage, ProposerElected, Term, TermHistory, - TermLsn, VoteRequest, + AcceptorProposerMessage, ProposerAcceptorMessage, ProposerElected, TermHistory, TermLsn, + VoteRequest, }, - timeline::PeerInfo, SafeKeeperConf, }; diff --git a/safekeeper/src/safekeeper.rs b/safekeeper/src/safekeeper.rs index 6eb69f0b7c..ccd7940c72 100644 --- a/safekeeper/src/safekeeper.rs +++ b/safekeeper/src/safekeeper.rs @@ -5,6 +5,9 @@ use byteorder::{LittleEndian, ReadBytesExt}; use bytes::{Buf, BufMut, Bytes, BytesMut}; use postgres_ffi::{TimeLineID, MAX_SEND_SIZE}; +use safekeeper_api::models::HotStandbyFeedback; +use safekeeper_api::Term; +use safekeeper_api::INVALID_TERM; use serde::{Deserialize, Serialize}; use std::cmp::max; use std::cmp::min; @@ -16,7 +19,6 @@ use tracing::*; use crate::control_file; use crate::metrics::MISC_OPERATION_SECONDS; -use crate::send_wal::HotStandbyFeedback; use crate::state::TimelineState; use crate::wal_storage; @@ -31,10 +33,6 @@ use utils::{ const SK_PROTOCOL_VERSION: u32 = 2; pub const UNKNOWN_SERVER_VERSION: u32 = 0; -/// Consensus logical timestamp. -pub type Term = u64; -pub const INVALID_TERM: Term = 0; - #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] pub struct TermLsn { pub term: Term, @@ -198,16 +196,6 @@ impl AcceptorState { } } -/// Information about Postgres. Safekeeper gets it once and then verifies -/// all further connections from computes match. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct ServerInfo { - /// Postgres server version - pub pg_version: u32, - pub system_id: SystemId, - pub wal_seg_size: u32, -} - #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct PersistedPeerInfo { /// LSN up to which safekeeper offloaded WAL to s3. @@ -1041,6 +1029,7 @@ where mod tests { use futures::future::BoxFuture; use postgres_ffi::{XLogSegNo, WAL_SEGMENT_SIZE}; + use safekeeper_api::ServerInfo; use super::*; use crate::state::{EvictionState, PersistedPeers, TimelinePersistentState}; diff --git a/safekeeper/src/send_wal.rs b/safekeeper/src/send_wal.rs index 0887cf7264..8463221998 100644 --- a/safekeeper/src/send_wal.rs +++ b/safekeeper/src/send_wal.rs @@ -4,11 +4,10 @@ use crate::handler::SafekeeperPostgresHandler; use crate::metrics::RECEIVED_PS_FEEDBACKS; use crate::receive_wal::WalReceivers; -use crate::safekeeper::{Term, TermLsn}; +use crate::safekeeper::TermLsn; use crate::send_interpreted_wal::InterpretedWalSender; use crate::timeline::WalResidentTimeline; use crate::wal_reader_stream::WalReaderStreamBuilder; -use crate::wal_service::ConnectionId; use crate::wal_storage::WalReader; use anyhow::{bail, Context as AnyhowContext}; use bytes::Bytes; @@ -19,7 +18,11 @@ use postgres_backend::{CopyStreamHandlerEnd, PostgresBackendReader, QueryError}; use postgres_ffi::get_current_timestamp; use postgres_ffi::{TimestampTz, MAX_SEND_SIZE}; use pq_proto::{BeMessage, WalSndKeepAlive, XLogDataBody}; -use serde::{Deserialize, Serialize}; +use safekeeper_api::models::{ + ConnectionId, HotStandbyFeedback, ReplicationFeedback, StandbyFeedback, StandbyReply, + WalSenderState, INVALID_FULL_TRANSACTION_ID, +}; +use safekeeper_api::Term; use tokio::io::{AsyncRead, AsyncWrite}; use utils::failpoint_support; use utils::id::TenantTimelineId; @@ -28,7 +31,6 @@ use utils::postgres_client::PostgresClientProtocol; use std::cmp::{max, min}; use std::net::SocketAddr; -use std::str; use std::sync::Arc; use std::time::Duration; use tokio::sync::watch::Receiver; @@ -42,65 +44,6 @@ const STANDBY_STATUS_UPDATE_TAG_BYTE: u8 = b'r'; // neon extension of replication protocol const NEON_STATUS_UPDATE_TAG_BYTE: u8 = b'z'; -type FullTransactionId = u64; - -/// Hot standby feedback received from replica -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] -pub struct HotStandbyFeedback { - pub ts: TimestampTz, - pub xmin: FullTransactionId, - pub catalog_xmin: FullTransactionId, -} - -const INVALID_FULL_TRANSACTION_ID: FullTransactionId = 0; - -impl HotStandbyFeedback { - pub fn empty() -> HotStandbyFeedback { - HotStandbyFeedback { - ts: 0, - xmin: 0, - catalog_xmin: 0, - } - } -} - -/// Standby status update -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] -pub struct StandbyReply { - pub write_lsn: Lsn, // The location of the last WAL byte + 1 received and written to disk in the standby. - pub flush_lsn: Lsn, // The location of the last WAL byte + 1 flushed to disk in the standby. - pub apply_lsn: Lsn, // The location of the last WAL byte + 1 applied in the standby. - pub reply_ts: TimestampTz, // The client's system clock at the time of transmission, as microseconds since midnight on 2000-01-01. - pub reply_requested: bool, -} - -impl StandbyReply { - fn empty() -> Self { - StandbyReply { - write_lsn: Lsn::INVALID, - flush_lsn: Lsn::INVALID, - apply_lsn: Lsn::INVALID, - reply_ts: 0, - reply_requested: false, - } - } -} - -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] -pub struct StandbyFeedback { - pub reply: StandbyReply, - pub hs_feedback: HotStandbyFeedback, -} - -impl StandbyFeedback { - pub fn empty() -> Self { - StandbyFeedback { - reply: StandbyReply::empty(), - hs_feedback: HotStandbyFeedback::empty(), - } - } -} - /// WalSenders registry. Timeline holds it (wrapped in Arc). pub struct WalSenders { mutex: Mutex, @@ -341,25 +284,6 @@ impl WalSendersShared { } } -// Serialized is used only for pretty printing in json. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct WalSenderState { - ttid: TenantTimelineId, - addr: SocketAddr, - conn_id: ConnectionId, - // postgres application_name - appname: Option, - feedback: ReplicationFeedback, -} - -// Receiver is either pageserver or regular standby, which have different -// feedbacks. -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] -enum ReplicationFeedback { - Pageserver(PageserverFeedback), - Standby(StandbyFeedback), -} - // id of the occupied slot in WalSenders to access it (and save in the // WalSenderGuard). We could give Arc directly to the slot, but there is not // much sense in that as values aggregation which is performed on each feedback @@ -888,6 +812,7 @@ impl ReplyReader { #[cfg(test)] mod tests { + use safekeeper_api::models::FullTransactionId; use utils::id::{TenantId, TimelineId}; use super::*; diff --git a/safekeeper/src/state.rs b/safekeeper/src/state.rs index 941b7e67d0..c6ae6c1d2b 100644 --- a/safekeeper/src/state.rs +++ b/safekeeper/src/state.rs @@ -5,7 +5,7 @@ use std::{cmp::max, ops::Deref}; use anyhow::{bail, Result}; use postgres_ffi::WAL_SEGMENT_SIZE; -use safekeeper_api::models::TimelineTermBumpResponse; +use safekeeper_api::{models::TimelineTermBumpResponse, ServerInfo, Term}; use serde::{Deserialize, Serialize}; use utils::{ id::{NodeId, TenantId, TenantTimelineId, TimelineId}, @@ -14,10 +14,7 @@ use utils::{ use crate::{ control_file, - safekeeper::{ - AcceptorState, PersistedPeerInfo, PgUuid, ServerInfo, Term, TermHistory, - UNKNOWN_SERVER_VERSION, - }, + safekeeper::{AcceptorState, PersistedPeerInfo, PgUuid, TermHistory, UNKNOWN_SERVER_VERSION}, timeline::TimelineError, wal_backup_partial::{self}, }; diff --git a/safekeeper/src/timeline.rs b/safekeeper/src/timeline.rs index 94d6ef1061..36860a0da2 100644 --- a/safekeeper/src/timeline.rs +++ b/safekeeper/src/timeline.rs @@ -4,8 +4,8 @@ use anyhow::{anyhow, bail, Result}; use camino::{Utf8Path, Utf8PathBuf}; use remote_storage::RemotePath; -use safekeeper_api::models::TimelineTermBumpResponse; -use serde::{Deserialize, Serialize}; +use safekeeper_api::models::{PeerInfo, TimelineTermBumpResponse}; +use safekeeper_api::Term; use tokio::fs::{self}; use tokio_util::sync::CancellationToken; use utils::id::TenantId; @@ -31,9 +31,7 @@ use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId; use crate::control_file; use crate::rate_limit::RateLimiter; use crate::receive_wal::WalReceivers; -use crate::safekeeper::{ - AcceptorProposerMessage, ProposerAcceptorMessage, SafeKeeper, Term, TermLsn, -}; +use crate::safekeeper::{AcceptorProposerMessage, ProposerAcceptorMessage, SafeKeeper, TermLsn}; use crate::send_wal::WalSenders; use crate::state::{EvictionState, TimelineMemState, TimelinePersistentState, TimelineState}; use crate::timeline_guard::ResidenceGuard; @@ -47,40 +45,17 @@ use crate::wal_storage::{Storage as wal_storage_iface, WalReader}; use crate::SafeKeeperConf; use crate::{debug_dump, timeline_manager, wal_storage}; -/// Things safekeeper should know about timeline state on peers. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PeerInfo { - pub sk_id: NodeId, - pub term: Term, - /// Term of the last entry. - pub last_log_term: Term, - /// LSN of the last record. - pub flush_lsn: Lsn, - pub commit_lsn: Lsn, - /// Since which LSN safekeeper has WAL. - pub local_start_lsn: Lsn, - /// When info was received. Serde annotations are not very useful but make - /// the code compile -- we don't rely on this field externally. - #[serde(skip)] - #[serde(default = "Instant::now")] - ts: Instant, - pub pg_connstr: String, - pub http_connstr: String, -} - -impl PeerInfo { - fn from_sk_info(sk_info: &SafekeeperTimelineInfo, ts: Instant) -> PeerInfo { - PeerInfo { - sk_id: NodeId(sk_info.safekeeper_id), - term: sk_info.term, - last_log_term: sk_info.last_log_term, - flush_lsn: Lsn(sk_info.flush_lsn), - commit_lsn: Lsn(sk_info.commit_lsn), - local_start_lsn: Lsn(sk_info.local_start_lsn), - pg_connstr: sk_info.safekeeper_connstr.clone(), - http_connstr: sk_info.http_connstr.clone(), - ts, - } +fn peer_info_from_sk_info(sk_info: &SafekeeperTimelineInfo, ts: Instant) -> PeerInfo { + PeerInfo { + sk_id: NodeId(sk_info.safekeeper_id), + term: sk_info.term, + last_log_term: sk_info.last_log_term, + flush_lsn: Lsn(sk_info.flush_lsn), + commit_lsn: Lsn(sk_info.commit_lsn), + local_start_lsn: Lsn(sk_info.local_start_lsn), + pg_connstr: sk_info.safekeeper_connstr.clone(), + http_connstr: sk_info.http_connstr.clone(), + ts, } } @@ -697,7 +672,7 @@ impl Timeline { { let mut shared_state = self.write_shared_state().await; shared_state.sk.record_safekeeper_info(&sk_info).await?; - let peer_info = PeerInfo::from_sk_info(&sk_info, Instant::now()); + let peer_info = peer_info_from_sk_info(&sk_info, Instant::now()); shared_state.peers_info.upsert(&peer_info); } Ok(()) diff --git a/safekeeper/src/timeline_manager.rs b/safekeeper/src/timeline_manager.rs index c02fb904cf..a33994dcab 100644 --- a/safekeeper/src/timeline_manager.rs +++ b/safekeeper/src/timeline_manager.rs @@ -14,6 +14,7 @@ use std::{ use futures::channel::oneshot; use postgres_ffi::XLogSegNo; +use safekeeper_api::{models::PeerInfo, Term}; use serde::{Deserialize, Serialize}; use tokio::{ task::{JoinError, JoinHandle}, @@ -32,10 +33,9 @@ use crate::{ rate_limit::{rand_duration, RateLimiter}, recovery::recovery_main, remove_wal::calc_horizon_lsn, - safekeeper::Term, send_wal::WalSenders, state::TimelineState, - timeline::{ManagerTimeline, PeerInfo, ReadGuardSharedState, StateSK, WalResidentTimeline}, + timeline::{ManagerTimeline, ReadGuardSharedState, StateSK, WalResidentTimeline}, timeline_guard::{AccessService, GuardId, ResidenceGuard}, timelines_set::{TimelineSetGuard, TimelinesSet}, wal_backup::{self, WalBackupTaskHandle}, diff --git a/safekeeper/src/timelines_global_map.rs b/safekeeper/src/timelines_global_map.rs index e1241ceb9b..ad29c9f66c 100644 --- a/safekeeper/src/timelines_global_map.rs +++ b/safekeeper/src/timelines_global_map.rs @@ -4,7 +4,6 @@ use crate::defaults::DEFAULT_EVICTION_CONCURRENCY; use crate::rate_limit::RateLimiter; -use crate::safekeeper::ServerInfo; use crate::state::TimelinePersistentState; use crate::timeline::{get_tenant_dir, get_timeline_dir, Timeline, TimelineError}; use crate::timelines_set::TimelinesSet; @@ -13,6 +12,7 @@ use crate::{control_file, wal_storage, SafeKeeperConf}; use anyhow::{bail, Context, Result}; use camino::Utf8PathBuf; use camino_tempfile::Utf8TempDir; +use safekeeper_api::ServerInfo; use serde::Serialize; use std::collections::HashMap; use std::str::FromStr; diff --git a/safekeeper/src/wal_backup.rs b/safekeeper/src/wal_backup.rs index 34b5dbeaa1..8517fa0344 100644 --- a/safekeeper/src/wal_backup.rs +++ b/safekeeper/src/wal_backup.rs @@ -3,6 +3,7 @@ use anyhow::{Context, Result}; use camino::{Utf8Path, Utf8PathBuf}; use futures::stream::FuturesOrdered; use futures::StreamExt; +use safekeeper_api::models::PeerInfo; use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use utils::backoff; @@ -30,7 +31,7 @@ use tracing::*; use utils::{id::TenantTimelineId, lsn::Lsn}; use crate::metrics::{BACKED_UP_SEGMENTS, BACKUP_ERRORS, WAL_BACKUP_TASKS}; -use crate::timeline::{PeerInfo, WalResidentTimeline}; +use crate::timeline::WalResidentTimeline; use crate::timeline_manager::{Manager, StateSnapshot}; use crate::{SafeKeeperConf, WAL_BACKUP_RUNTIME}; diff --git a/safekeeper/src/wal_backup_partial.rs b/safekeeper/src/wal_backup_partial.rs index bddfca50e4..4e5b34a9bf 100644 --- a/safekeeper/src/wal_backup_partial.rs +++ b/safekeeper/src/wal_backup_partial.rs @@ -22,6 +22,7 @@ use camino::Utf8PathBuf; use postgres_ffi::{XLogFileName, XLogSegNo, PG_TLI}; use remote_storage::RemotePath; +use safekeeper_api::Term; use serde::{Deserialize, Serialize}; use tokio_util::sync::CancellationToken; @@ -31,7 +32,6 @@ use utils::{id::NodeId, lsn::Lsn}; use crate::{ metrics::{MISC_OPERATION_SECONDS, PARTIAL_BACKUP_UPLOADED_BYTES, PARTIAL_BACKUP_UPLOADS}, rate_limit::{rand_duration, RateLimiter}, - safekeeper::Term, timeline::WalResidentTimeline, timeline_manager::StateSnapshot, wal_backup::{self}, diff --git a/safekeeper/src/wal_reader_stream.rs b/safekeeper/src/wal_reader_stream.rs index f8c0c502cd..aea628c208 100644 --- a/safekeeper/src/wal_reader_stream.rs +++ b/safekeeper/src/wal_reader_stream.rs @@ -4,12 +4,12 @@ use async_stream::try_stream; use bytes::Bytes; use futures::Stream; use postgres_backend::CopyStreamHandlerEnd; +use safekeeper_api::Term; use std::time::Duration; use tokio::time::timeout; use utils::lsn::Lsn; use crate::{ - safekeeper::Term, send_wal::{EndWatch, WalSenderGuard}, timeline::WalResidentTimeline, }; diff --git a/safekeeper/src/wal_service.rs b/safekeeper/src/wal_service.rs index 1ff83918a7..1ebcb060e7 100644 --- a/safekeeper/src/wal_service.rs +++ b/safekeeper/src/wal_service.rs @@ -4,6 +4,7 @@ //! use anyhow::{Context, Result}; use postgres_backend::QueryError; +use safekeeper_api::models::ConnectionId; use std::sync::Arc; use std::time::Duration; use tokio::net::TcpStream; @@ -114,8 +115,6 @@ async fn handle_socket( .await } -/// Unique WAL service connection ids are logged in spans for observability. -pub type ConnectionId = u32; pub type ConnectionCount = u32; pub fn issue_connection_id(count: &mut ConnectionCount) -> ConnectionId { diff --git a/safekeeper/tests/walproposer_sim/safekeeper.rs b/safekeeper/tests/walproposer_sim/safekeeper.rs index 12aa025771..efcdd89e7d 100644 --- a/safekeeper/tests/walproposer_sim/safekeeper.rs +++ b/safekeeper/tests/walproposer_sim/safekeeper.rs @@ -15,12 +15,13 @@ use desim::{ }; use http::Uri; use safekeeper::{ - safekeeper::{ProposerAcceptorMessage, SafeKeeper, ServerInfo, UNKNOWN_SERVER_VERSION}, + safekeeper::{ProposerAcceptorMessage, SafeKeeper, UNKNOWN_SERVER_VERSION}, state::{TimelinePersistentState, TimelineState}, timeline::TimelineError, wal_storage::Storage, SafeKeeperConf, }; +use safekeeper_api::ServerInfo; use tracing::{debug, info_span, warn}; use utils::{ id::{NodeId, TenantId, TenantTimelineId, TimelineId}, From 2c910628288a29435f102471c6927d3cd24266ba Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Fri, 13 Dec 2024 14:52:54 +0000 Subject: [PATCH 18/49] test_prefetch: reduce timeout to default 5m from 10m (#10105) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem `test_prefetch` is flaky (https://github.com/neondatabase/neon/issues/9961), but if it passes, the run time is less than 30 seconds — we don't need an extended timeout for it. ## Summary of changes - Remove extended test timeout for `test_prefetch` --- test_runner/regress/test_prefetch_buffer_resize.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test_runner/regress/test_prefetch_buffer_resize.py b/test_runner/regress/test_prefetch_buffer_resize.py index 7676b78b0e..99fe80e621 100644 --- a/test_runner/regress/test_prefetch_buffer_resize.py +++ b/test_runner/regress/test_prefetch_buffer_resize.py @@ -7,7 +7,6 @@ from fixtures.neon_fixtures import NeonEnvBuilder @pytest.mark.parametrize("shard_count", [None, 4]) -@pytest.mark.timeout(600) def test_prefetch(neon_env_builder: NeonEnvBuilder, shard_count: int | None): if shard_count is not None: neon_env_builder.num_pageservers = shard_count From fcff7528517b47f79a55334c22dc6dc89c113be1 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Fri, 13 Dec 2024 17:28:21 +0100 Subject: [PATCH 19/49] fix(test_timeline_archival_chaos): flakiness caused by orphan layers (#10083) The test was failing with the scary but generic message `Remote storage metadata corrupted`. The underlying scrubber error is `Orphan layer detected: ...`. The test kills pageserver at random points, hence it's expected that we leak layers if we're killed in the window after layer upload but before it's referenced from index part. Refer to generation numbers RFC for details. Refs: - fixes https://github.com/neondatabase/neon/issues/9988 - root-cause analysis https://github.com/neondatabase/neon/issues/9988#issuecomment-2520673167 --- test_runner/regress/test_timeline_archive.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test_runner/regress/test_timeline_archive.py b/test_runner/regress/test_timeline_archive.py index e808dd1396..addf702893 100644 --- a/test_runner/regress/test_timeline_archive.py +++ b/test_runner/regress/test_timeline_archive.py @@ -435,6 +435,14 @@ def test_timeline_archival_chaos(neon_env_builder: NeonEnvBuilder): ] ) + env.storage_scrubber.allowed_errors.extend( + [ + # Unclcean shutdowns of pageserver can legitimately result in orphan layers + # (https://github.com/neondatabase/neon/issues/9988#issuecomment-2520558211) + f".*Orphan layer detected: tenants/{tenant_id}/.*" + ] + ) + class TimelineState: def __init__(self): self.timeline_id = TimelineId.generate() From eeabecd89f89a24fa8ee642c916efd97b4e8fa06 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Fri, 13 Dec 2024 19:40:26 +0200 Subject: [PATCH 20/49] Correctly update LFC used_pages in case of LFC resize (#10128) ## Problem LFC used_pages statistic is not updated in case of LFC resize (shrinking `neon.file_cache_size_limit`) ## Summary of changes Update `lfc_ctl->used_pages` in `lfc_change_limit_hook` Co-authored-by: Konstantin Knizhnik --- pgxn/neon/file_cache.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pgxn/neon/file_cache.c b/pgxn/neon/file_cache.c index 70b250d394..f49415be68 100644 --- a/pgxn/neon/file_cache.c +++ b/pgxn/neon/file_cache.c @@ -365,6 +365,10 @@ lfc_change_limit_hook(int newval, void *extra) neon_log(LOG, "Failed to punch hole in file: %m"); #endif /* We remove the old entry, and re-enter a hole to the hash table */ + for (int i = 0; i < BLOCKS_PER_CHUNK; i++) + { + lfc_ctl->used_pages -= (victim->bitmap[i >> 5] >> (i & 31)) & 1; + } hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL); memset(&holetag, 0, sizeof(holetag)); From 07d1db54b3f0b9c113aa28fcdb85bacf15979cd9 Mon Sep 17 00:00:00 2001 From: Tristan Partin Date: Fri, 13 Dec 2024 12:10:42 -0600 Subject: [PATCH 21/49] Improve comments and log messages in the logical replication monitor (#9974) Improved comments will help others when they read the code, and the log messages will help others understand why the logical replication monitor works the way it does. Signed-off-by: Tristan Partin --- pgxn/neon/logical_replication_monitor.c | 53 ++++++++++++++----------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/pgxn/neon/logical_replication_monitor.c b/pgxn/neon/logical_replication_monitor.c index 5eee5a1679..b94faafdfa 100644 --- a/pgxn/neon/logical_replication_monitor.c +++ b/pgxn/neon/logical_replication_monitor.c @@ -131,8 +131,8 @@ get_snapshots_cutoff_lsn(void) { cutoff = snapshot_descriptors[logical_replication_max_snap_files - 1].lsn; elog(LOG, - "ls_monitor: dropping logical slots with restart_lsn lower %X/%X, found %zu snapshot files, limit is %d", - LSN_FORMAT_ARGS(cutoff), snapshot_index, logical_replication_max_snap_files); + "ls_monitor: number of snapshot files, %zu, is larger than limit of %d", + snapshot_index, logical_replication_max_snap_files); } /* Is the size of the logical snapshots directory larger than specified? @@ -162,8 +162,8 @@ get_snapshots_cutoff_lsn(void) } if (cutoff != original) - elog(LOG, "ls_monitor: dropping logical slots with restart_lsn lower than %X/%X, " SNAPDIR " is larger than %d KB", - LSN_FORMAT_ARGS(cutoff), logical_replication_max_logicalsnapdir_size); + elog(LOG, "ls_monitor: " SNAPDIR " is larger than %d KB", + logical_replication_max_logicalsnapdir_size); } pfree(snapshot_descriptors); @@ -214,9 +214,13 @@ InitLogicalReplicationMonitor(void) } /* - * Unused logical replication slots pins WAL and prevents deletion of snapshots. + * Unused logical replication slots pins WAL and prevent deletion of snapshots. * WAL bloat is guarded by max_slot_wal_keep_size; this bgw removes slots which - * need too many .snap files. + * need too many .snap files. These files are stored as AUX files, which are a + * pageserver mechanism for storing non-relation data. AUX files are shipped in + * in the basebackup which is requested by compute_ctl before Postgres starts. + * The larger the time to retrieve the basebackup, the more likely it is the + * compute will be killed by the control plane due to a timeout. */ void LogicalSlotsMonitorMain(Datum main_arg) @@ -239,10 +243,7 @@ LogicalSlotsMonitorMain(Datum main_arg) ProcessConfigFile(PGC_SIGHUP); } - /* - * If there are too many .snap files, just drop all logical slots to - * prevent aux files bloat. - */ + /* Get the cutoff LSN */ cutoff_lsn = get_snapshots_cutoff_lsn(); if (cutoff_lsn > 0) { @@ -252,31 +253,37 @@ LogicalSlotsMonitorMain(Datum main_arg) ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i]; XLogRecPtr restart_lsn; - /* find the name */ LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); - /* Consider only logical repliction slots */ + + /* Consider only active logical repliction slots */ if (!s->in_use || !SlotIsLogical(s)) { LWLockRelease(ReplicationSlotControlLock); continue; } - /* do we need to drop it? */ + /* + * Retrieve the restart LSN to determine if we need to drop the + * slot + */ SpinLockAcquire(&s->mutex); restart_lsn = s->data.restart_lsn; SpinLockRelease(&s->mutex); + + strlcpy(slot_name, s->data.name.data, sizeof(slot_name)); + LWLockRelease(ReplicationSlotControlLock); + if (restart_lsn >= cutoff_lsn) { - LWLockRelease(ReplicationSlotControlLock); + elog(LOG, "ls_monitor: not dropping replication slot %s because restart LSN %X/%X is greater than cutoff LSN %X/%X", + slot_name, LSN_FORMAT_ARGS(restart_lsn), LSN_FORMAT_ARGS(cutoff_lsn)); continue; } - strlcpy(slot_name, s->data.name.data, NAMEDATALEN); - elog(LOG, "ls_monitor: dropping slot %s with restart_lsn %X/%X below horizon %X/%X", + elog(LOG, "ls_monitor: dropping replication slot %s because restart LSN %X/%X lower than cutoff LSN %X/%X", slot_name, LSN_FORMAT_ARGS(restart_lsn), LSN_FORMAT_ARGS(cutoff_lsn)); - LWLockRelease(ReplicationSlotControlLock); - /* now try to drop it, killing owner before if any */ + /* now try to drop it, killing owner before, if any */ for (;;) { pid_t active_pid; @@ -288,9 +295,9 @@ LogicalSlotsMonitorMain(Datum main_arg) if (active_pid == 0) { /* - * Slot is releasted, try to drop it. Though of course + * Slot is released, try to drop it. Though of course, * it could have been reacquired, so drop can ERROR - * out. Similarly it could have been dropped in the + * out. Similarly, it could have been dropped in the * meanwhile. * * In principle we could remove pg_try/pg_catch, that @@ -300,14 +307,14 @@ LogicalSlotsMonitorMain(Datum main_arg) PG_TRY(); { ReplicationSlotDrop(slot_name, true); - elog(LOG, "ls_monitor: slot %s dropped", slot_name); + elog(LOG, "ls_monitor: replication slot %s dropped", slot_name); } PG_CATCH(); { /* log ERROR and reset elog stack */ EmitErrorReport(); FlushErrorState(); - elog(LOG, "ls_monitor: failed to drop slot %s", slot_name); + elog(LOG, "ls_monitor: failed to drop replication slot %s", slot_name); } PG_END_TRY(); break; @@ -315,7 +322,7 @@ LogicalSlotsMonitorMain(Datum main_arg) else { /* kill the owner and wait for release */ - elog(LOG, "ls_monitor: killing slot %s owner %d", slot_name, active_pid); + elog(LOG, "ls_monitor: killing replication slot %s owner %d", slot_name, active_pid); (void) kill(active_pid, SIGTERM); /* We shouldn't get stuck, but to be safe add timeout. */ ConditionVariableTimedSleep(&s->active_cv, 1000, WAIT_EVENT_REPLICATION_SLOT_DROP); From 7ee5dca752c9e9b7e65752c2561798bdd91ea3f6 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Fri, 13 Dec 2024 13:22:25 -0500 Subject: [PATCH 22/49] fix(pageserver): race between gc-compaction and repartition (#10127) ## Problem close https://github.com/neondatabase/neon/issues/10124 gc-compaction split_gc_jobs is holding the repartition lock for too long time. ## Summary of changes * Ensure split_gc_compaction_jobs drops the repartition lock once it finishes cloning the structures. * Update comments. --------- Signed-off-by: Alex Chi Z --- pageserver/src/tenant/timeline.rs | 5 ++++- pageserver/src/tenant/timeline/compaction.rs | 8 +++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index b5c7079226..75e268a1b9 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -4064,8 +4064,11 @@ impl Timeline { // NB: there are two callers, one is the compaction task, of which there is only one per struct Tenant and hence Timeline. // The other is the initdb optimization in flush_frozen_layer, used by `boostrap_timeline`, which runs before `.activate()` // and hence before the compaction task starts. + // Note that there are a third "caller" that will take the `partitioning` lock. It is `gc_compaction_split_jobs` for + // gc-compaction where it uses the repartition data to determine the split jobs. In the future, it might use its own + // heuristics, but for now, we should allow concurrent access to it and let the caller retry compaction. return Err(CompactionError::Other(anyhow!( - "repartition() called concurrently, this should not happen" + "repartition() called concurrently, this is rare and a retry should be fine" ))); }; let ((dense_partition, sparse_partition), partition_lsn) = &*partitioning_guard; diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 701247194b..5e6290729c 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -1821,10 +1821,12 @@ impl Timeline { let mut compact_jobs = Vec::new(); // For now, we simply use the key partitioning information; we should do a more fine-grained partitioning // by estimating the amount of files read for a compaction job. We should also partition on LSN. - let Ok(partition) = self.partitioning.try_lock() else { - bail!("failed to acquire partition lock"); + let ((dense_ks, sparse_ks), _) = { + let Ok(partition) = self.partitioning.try_lock() else { + bail!("failed to acquire partition lock"); + }; + partition.clone() }; - let ((dense_ks, sparse_ks), _) = &*partition; // Truncate the key range to be within user specified compaction range. fn truncate_to( source_start: &Key, From d56fea680ec2f2a741a3383c63a55eadf86ad602 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Fri, 13 Dec 2024 19:56:32 +0000 Subject: [PATCH 23/49] CI: always require aws-oicd-role-arn input to be set (#10145) ## Problem `benchmarking` job fails because `aws-oicd-role-arn` input is not set ## Summary of changes: - Set `aws-oicd-role-arn` for `benchmarking job - Always require `aws-oicd-role-arn` to be set - Rename `aws_oicd_role_arn` to `aws-oicd-role-arn` for consistency --- .../actions/allure-report-generate/action.yml | 14 +++--- .../actions/allure-report-store/action.yml | 14 +++--- .github/actions/download/action.yml | 12 ++--- .../actions/run-python-test-set/action.yml | 25 +++++----- .github/actions/save-coverage-data/action.yml | 4 +- .github/actions/upload/action.yml | 4 +- .../workflows/_benchmarking_preparation.yml | 2 +- .github/workflows/_build-and-test-locally.yml | 4 +- .github/workflows/benchmarking.yml | 48 +++++++++---------- .github/workflows/build_and_test.yml | 13 +++-- .github/workflows/cloud-regress.yml | 5 +- .github/workflows/ingest_benchmark.yml | 16 +++---- .github/workflows/periodic_pagebench.yml | 2 +- .github/workflows/pg-clients.yml | 10 ++-- 14 files changed, 88 insertions(+), 85 deletions(-) diff --git a/.github/actions/allure-report-generate/action.yml b/.github/actions/allure-report-generate/action.yml index d6219c31b4..d07e3e32e8 100644 --- a/.github/actions/allure-report-generate/action.yml +++ b/.github/actions/allure-report-generate/action.yml @@ -7,10 +7,9 @@ inputs: type: boolean required: false default: false - aws_oicd_role_arn: - description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role' - required: false - default: '' + aws-oicd-role-arn: + description: 'OIDC role arn to interract with S3' + required: true outputs: base-url: @@ -84,12 +83,11 @@ runs: ALLURE_VERSION: 2.27.0 ALLURE_ZIP_SHA256: b071858fb2fa542c65d8f152c5c40d26267b2dfb74df1f1608a589ecca38e777 - - name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test - if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }} - uses: aws-actions/configure-aws-credentials@v4 + - uses: aws-actions/configure-aws-credentials@v4 + if: ${{ !cancelled() }} with: aws-region: eu-central-1 - role-to-assume: ${{ inputs.aws_oicd_role_arn }} + role-to-assume: ${{ inputs.aws-oicd-role-arn }} role-duration-seconds: 3600 # 1 hour should be more than enough to upload report # Potentially we could have several running build for the same key (for example, for the main branch), so we use improvised lock for this diff --git a/.github/actions/allure-report-store/action.yml b/.github/actions/allure-report-store/action.yml index 3c83656c89..8548a886cf 100644 --- a/.github/actions/allure-report-store/action.yml +++ b/.github/actions/allure-report-store/action.yml @@ -8,10 +8,9 @@ inputs: unique-key: description: 'string to distinguish different results in the same run' required: true - aws_oicd_role_arn: - description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role' - required: false - default: '' + aws-oicd-role-arn: + description: 'OIDC role arn to interract with S3' + required: true runs: using: "composite" @@ -36,12 +35,11 @@ runs: env: REPORT_DIR: ${{ inputs.report-dir }} - - name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test - if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }} - uses: aws-actions/configure-aws-credentials@v4 + - uses: aws-actions/configure-aws-credentials@v4 + if: ${{ !cancelled() }} with: aws-region: eu-central-1 - role-to-assume: ${{ inputs.aws_oicd_role_arn }} + role-to-assume: ${{ inputs.aws-oicd-role-arn }} role-duration-seconds: 3600 # 1 hour should be more than enough to upload report - name: Upload test results diff --git a/.github/actions/download/action.yml b/.github/actions/download/action.yml index d6b1fac9f7..14b2ef8eac 100644 --- a/.github/actions/download/action.yml +++ b/.github/actions/download/action.yml @@ -15,19 +15,17 @@ inputs: prefix: description: "S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'" required: false - aws_oicd_role_arn: - description: "the OIDC role arn for aws auth" - required: false - default: "" + aws-oicd-role-arn: + description: 'OIDC role arn to interract with S3' + required: true runs: using: "composite" steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v4 + - uses: aws-actions/configure-aws-credentials@v4 with: aws-region: eu-central-1 - role-to-assume: ${{ inputs.aws_oicd_role_arn }} + role-to-assume: ${{ inputs.aws-oicd-role-arn }} role-duration-seconds: 3600 - name: Download artifact diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml index dd5c890f5b..9a0261d430 100644 --- a/.github/actions/run-python-test-set/action.yml +++ b/.github/actions/run-python-test-set/action.yml @@ -48,10 +48,9 @@ inputs: description: 'benchmark durations JSON' required: false default: '{}' - aws_oicd_role_arn: - description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role' - required: false - default: '' + aws-oicd-role-arn: + description: 'OIDC role arn to interract with S3' + required: true runs: using: "composite" @@ -62,7 +61,7 @@ runs: with: name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact path: /tmp/neon - aws_oicd_role_arn: ${{ inputs.aws_oicd_role_arn }} + aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }} - name: Download Neon binaries for the previous release if: inputs.build_type != 'remote' @@ -71,7 +70,7 @@ runs: name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact path: /tmp/neon-previous prefix: latest - aws_oicd_role_arn: ${{ inputs.aws_oicd_role_arn }} + aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }} - name: Download compatibility snapshot if: inputs.build_type != 'remote' @@ -83,7 +82,7 @@ runs: # The lack of compatibility snapshot (for example, for the new Postgres version) # shouldn't fail the whole job. Only relevant test should fail. skip-if-does-not-exist: true - aws_oicd_role_arn: ${{ inputs.aws_oicd_role_arn }} + aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }} - name: Checkout if: inputs.needs_postgres_source == 'true' @@ -221,19 +220,19 @@ runs: # The lack of compatibility snapshot shouldn't fail the job # (for example if we didn't run the test for non build-and-test workflow) skip-if-does-not-exist: true - aws_oicd_role_arn: ${{ inputs.aws_oicd_role_arn }} + aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }} - - name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test - if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }} - uses: aws-actions/configure-aws-credentials@v4 + - uses: aws-actions/configure-aws-credentials@v4 + if: ${{ !cancelled() }} with: aws-region: eu-central-1 - role-to-assume: ${{ inputs.aws_oicd_role_arn }} + role-to-assume: ${{ inputs.aws-oicd-role-arn }} role-duration-seconds: 3600 # 1 hour should be more than enough to upload report + - name: Upload test results if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-store with: report-dir: /tmp/test_output/allure/results unique-key: ${{ inputs.build_type }}-${{ inputs.pg_version }} - aws_oicd_role_arn: ${{ inputs.aws_oicd_role_arn }} + aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }} diff --git a/.github/actions/save-coverage-data/action.yml b/.github/actions/save-coverage-data/action.yml index 9e3a7cba24..1bbea5400f 100644 --- a/.github/actions/save-coverage-data/action.yml +++ b/.github/actions/save-coverage-data/action.yml @@ -14,11 +14,11 @@ runs: name: coverage-data-artifact path: /tmp/coverage skip-if-does-not-exist: true # skip if there's no previous coverage to download - aws_oicd_role_arn: ${{ inputs.aws_oicd_role_arn }} + aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }} - name: Upload coverage data uses: ./.github/actions/upload with: name: coverage-data-artifact path: /tmp/coverage - aws_oicd_role_arn: ${{ inputs.aws_oicd_role_arn }} + aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }} diff --git a/.github/actions/upload/action.yml b/.github/actions/upload/action.yml index 6616d08899..ac5579ccea 100644 --- a/.github/actions/upload/action.yml +++ b/.github/actions/upload/action.yml @@ -14,7 +14,7 @@ inputs: prefix: description: "S3 prefix. Default is '${GITHUB_SHA}/${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'" required: false - aws_oicd_role_arn: + aws-oicd-role-arn: description: "the OIDC role arn for aws auth" required: false default: "" @@ -61,7 +61,7 @@ runs: uses: aws-actions/configure-aws-credentials@v4 with: aws-region: eu-central-1 - role-to-assume: ${{ inputs.aws_oicd_role_arn }} + role-to-assume: ${{ inputs.aws-oicd-role-arn }} role-duration-seconds: 3600 - name: Upload artifact diff --git a/.github/workflows/_benchmarking_preparation.yml b/.github/workflows/_benchmarking_preparation.yml index 371d815fc8..fd328586b3 100644 --- a/.github/workflows/_benchmarking_preparation.yml +++ b/.github/workflows/_benchmarking_preparation.yml @@ -70,7 +70,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} # we create a table that has one row for each database that we want to restore with the status whether the restore is done - name: Create benchmark_restore_status table if it does not exist diff --git a/.github/workflows/_build-and-test-locally.yml b/.github/workflows/_build-and-test-locally.yml index 456399f3c3..4263bacce8 100644 --- a/.github/workflows/_build-and-test-locally.yml +++ b/.github/workflows/_build-and-test-locally.yml @@ -264,7 +264,7 @@ jobs: with: name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-artifact path: /tmp/neon - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} # XXX: keep this after the binaries.list is formed, so the coverage can properly work later - name: Merge and upload coverage data @@ -308,7 +308,7 @@ jobs: real_s3_region: eu-central-1 rerun_failed: true pg_version: ${{ matrix.pg_version }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }} CHECK_ONDISK_DATA_COMPATIBILITY: nonempty diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index 2d37be8837..bbdcf5ef49 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -105,7 +105,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create Neon Project id: create-neon-project @@ -123,7 +123,7 @@ jobs: run_in_parallel: false save_perf_report: ${{ env.SAVE_PERF_REPORT }} pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} # Set --sparse-ordering option of pytest-order plugin # to ensure tests are running in order of appears in the file. # It's important for test_perf_pgbench.py::test_pgbench_remote_* tests @@ -153,7 +153,7 @@ jobs: if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate with: - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} @@ -205,7 +205,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Run Logical Replication benchmarks uses: ./.github/actions/run-python-test-set @@ -216,7 +216,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 5400 pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -233,7 +233,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 5400 pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -245,7 +245,7 @@ jobs: uses: ./.github/actions/allure-report-generate with: store-test-results-into-db: true - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }} @@ -407,7 +407,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create Neon Project if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform) @@ -455,7 +455,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" @@ -470,7 +470,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" @@ -485,7 +485,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" @@ -503,7 +503,7 @@ jobs: if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate with: - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} @@ -614,7 +614,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -629,7 +629,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" @@ -640,7 +640,7 @@ jobs: if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate with: - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} @@ -711,7 +711,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Set up Connection String id: set-up-connstr @@ -743,7 +743,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 43200 -k test_clickbench pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -757,7 +757,7 @@ jobs: if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate with: - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} @@ -822,7 +822,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Get Connstring Secret Name run: | @@ -861,7 +861,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_tpch pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -873,7 +873,7 @@ jobs: if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate with: - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} @@ -931,7 +931,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Set up Connection String id: set-up-connstr @@ -963,7 +963,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_user_examples pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -974,7 +974,7 @@ jobs: if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate with: - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index b3556debe3..55c4bf08b9 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -336,6 +336,7 @@ jobs: extra_params: --splits 5 --group ${{ matrix.pytest_split_group }} benchmark_durations: ${{ needs.get-benchmarks-durations.outputs.json }} pg_version: v16 + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -393,7 +394,7 @@ jobs: uses: ./.github/actions/allure-report-generate with: store-test-results-into-db: true - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }} @@ -455,14 +456,14 @@ jobs: with: name: neon-${{ runner.os }}-${{ runner.arch }}-${{ matrix.build_type }}-artifact path: /tmp/neon - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Get coverage artifact uses: ./.github/actions/download with: name: coverage-data-artifact path: /tmp/coverage - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Merge coverage data run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge @@ -1279,6 +1280,12 @@ jobs: echo "run-id=${run_id}" | tee -a ${GITHUB_OUTPUT} echo "commit-sha=${last_commit_sha}" | tee -a ${GITHUB_OUTPUT} + - uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + role-duration-seconds: 3600 + - name: Promote compatibility snapshot and Neon artifact env: BUCKET: neon-github-public-dev diff --git a/.github/workflows/cloud-regress.yml b/.github/workflows/cloud-regress.yml index 7b9e434ec3..55f42ea533 100644 --- a/.github/workflows/cloud-regress.yml +++ b/.github/workflows/cloud-regress.yml @@ -79,7 +79,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create a new branch id: create-branch @@ -95,6 +95,7 @@ jobs: test_selection: cloud_regress pg_version: ${{matrix.pg-version}} extra_params: -m remote_cluster + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{steps.create-branch.outputs.dsn}} @@ -111,7 +112,7 @@ jobs: if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate with: - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} diff --git a/.github/workflows/ingest_benchmark.yml b/.github/workflows/ingest_benchmark.yml index 6773032263..fc33c0a980 100644 --- a/.github/workflows/ingest_benchmark.yml +++ b/.github/workflows/ingest_benchmark.yml @@ -13,7 +13,7 @@ on: # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) - cron: '0 9 * * *' # run once a day, timezone is utc workflow_dispatch: # adds ability to run this manually - + defaults: run: shell: bash -euxo pipefail {0} @@ -28,7 +28,7 @@ jobs: strategy: fail-fast: false # allow other variants to continue even if one fails matrix: - target_project: [new_empty_project, large_existing_project] + target_project: [new_empty_project, large_existing_project] permissions: contents: write statuses: write @@ -56,7 +56,7 @@ jobs: with: aws-region: eu-central-1 role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role + role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role - name: Download Neon artifact uses: ./.github/actions/download @@ -64,7 +64,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create Neon Project if: ${{ matrix.target_project == 'new_empty_project' }} @@ -95,7 +95,7 @@ jobs: project_id: ${{ vars.BENCHMARK_INGEST_TARGET_PROJECTID }} api_key: ${{ secrets.NEON_STAGING_API_KEY }} - - name: Initialize Neon project + - name: Initialize Neon project if: ${{ matrix.target_project == 'large_existing_project' }} env: BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }} @@ -123,7 +123,7 @@ jobs: ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;" echo "BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}" >> $GITHUB_ENV - - name: Invoke pgcopydb + - name: Invoke pgcopydb uses: ./.github/actions/run-python-test-set with: build_type: remote @@ -132,7 +132,7 @@ jobs: extra_params: -s -m remote_cluster --timeout 86400 -k test_ingest_performance_using_pgcopydb pg_version: v16 save_perf_report: true - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }} TARGET_PROJECT_TYPE: ${{ matrix.target_project }} @@ -144,7 +144,7 @@ jobs: run: | export LD_LIBRARY_PATH=${PG_16_LIB_PATH} ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "\dt+" - + - name: Delete Neon Project if: ${{ always() && matrix.target_project == 'new_empty_project' }} uses: ./.github/actions/neon-project-delete diff --git a/.github/workflows/periodic_pagebench.yml b/.github/workflows/periodic_pagebench.yml index 049990f17b..af877029e4 100644 --- a/.github/workflows/periodic_pagebench.yml +++ b/.github/workflows/periodic_pagebench.yml @@ -137,7 +137,7 @@ jobs: if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate with: - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} diff --git a/.github/workflows/pg-clients.yml b/.github/workflows/pg-clients.yml index 5c999d3810..4947907eb0 100644 --- a/.github/workflows/pg-clients.yml +++ b/.github/workflows/pg-clients.yml @@ -96,7 +96,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create Neon Project id: create-neon-project @@ -113,6 +113,7 @@ jobs: run_in_parallel: false extra_params: -m remote_cluster pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }} @@ -129,7 +130,7 @@ jobs: uses: ./.github/actions/allure-report-generate with: store-test-results-into-db: true - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }} @@ -163,7 +164,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create Neon Project id: create-neon-project @@ -180,6 +181,7 @@ jobs: run_in_parallel: false extra_params: -m remote_cluster pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }} @@ -196,7 +198,7 @@ jobs: uses: ./.github/actions/allure-report-generate with: store-test-results-into-db: true - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }} From 2521eba6741f000d71c1f6e0d2d0b279c8465f47 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Fri, 13 Dec 2024 22:46:41 +0200 Subject: [PATCH 24/49] Check for invalid down link while prefetching B-Tree leave pages for index-only scan (#9867) ## Problem See #9866 Index-only scan prefetch implementation doesn't take in account that down link may be invalid ## Summary of changes Check that downlink is valid block number Correspondent Postgres PRs: https://github.com/neondatabase/postgres/pull/534 https://github.com/neondatabase/postgres/pull/535 https://github.com/neondatabase/postgres/pull/536 https://github.com/neondatabase/postgres/pull/537 --------- Co-authored-by: Konstantin Knizhnik --- vendor/postgres-v14 | 2 +- vendor/postgres-v15 | 2 +- vendor/postgres-v16 | 2 +- vendor/postgres-v17 | 2 +- vendor/revisions.json | 8 ++++---- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/vendor/postgres-v14 b/vendor/postgres-v14 index 13ff324150..c2f65b3201 160000 --- a/vendor/postgres-v14 +++ b/vendor/postgres-v14 @@ -1 +1 @@ -Subproject commit 13ff324150fceaac72920e01742addc053db9462 +Subproject commit c2f65b3201591e02ce45b66731392f98d3388e73 diff --git a/vendor/postgres-v15 b/vendor/postgres-v15 index 8736b10c1d..f262d631ad 160000 --- a/vendor/postgres-v15 +++ b/vendor/postgres-v15 @@ -1 +1 @@ -Subproject commit 8736b10c1d93d11b9c0489872dd529c4c0f5338f +Subproject commit f262d631ad477a1819e84a183e5a7ef561830085 diff --git a/vendor/postgres-v16 b/vendor/postgres-v16 index 81428621f7..97f9fde349 160000 --- a/vendor/postgres-v16 +++ b/vendor/postgres-v16 @@ -1 +1 @@ -Subproject commit 81428621f7c04aed03671cf80a928e0a36d92505 +Subproject commit 97f9fde349c6de6d573f5ce96db07eca60ce6185 diff --git a/vendor/postgres-v17 b/vendor/postgres-v17 index 01fa3c4866..010c0ea2eb 160000 --- a/vendor/postgres-v17 +++ b/vendor/postgres-v17 @@ -1 +1 @@ -Subproject commit 01fa3c48664ca030cfb69bb4a350aa9df4691d88 +Subproject commit 010c0ea2eb06afe76485a33c43954cbcf3d99f86 diff --git a/vendor/revisions.json b/vendor/revisions.json index 7329aa437f..afcb922a5e 100644 --- a/vendor/revisions.json +++ b/vendor/revisions.json @@ -1,18 +1,18 @@ { "v17": [ "17.2", - "01fa3c48664ca030cfb69bb4a350aa9df4691d88" + "010c0ea2eb06afe76485a33c43954cbcf3d99f86" ], "v16": [ "16.6", - "81428621f7c04aed03671cf80a928e0a36d92505" + "97f9fde349c6de6d573f5ce96db07eca60ce6185" ], "v15": [ "15.10", - "8736b10c1d93d11b9c0489872dd529c4c0f5338f" + "f262d631ad477a1819e84a183e5a7ef561830085" ], "v14": [ "14.15", - "13ff324150fceaac72920e01742addc053db9462" + "c2f65b3201591e02ce45b66731392f98d3388e73" ] } From cf161e1556229dae17b5b73e03c3b5781f7bf952 Mon Sep 17 00:00:00 2001 From: Mikhail Kot Date: Sat, 14 Dec 2024 17:37:13 +0000 Subject: [PATCH 25/49] fix(adapter): password not set in role drop (#10130) ## Problem When entry was dropped and password wasn't set, new entry had uninitialized memory in controlplane adapter Resolves: https://github.com/neondatabase/cloud/issues/14914 ## Summary of changes Initialize password in all cases, add tests. Minor formatting for less indentation --- pgxn/neon/control_plane_connector.c | 39 ++++++++++------------ test_runner/regress/test_ddl_forwarding.py | 29 ++++++++++++---- 2 files changed, 39 insertions(+), 29 deletions(-) diff --git a/pgxn/neon/control_plane_connector.c b/pgxn/neon/control_plane_connector.c index b47b22cd20..59096a1bc8 100644 --- a/pgxn/neon/control_plane_connector.c +++ b/pgxn/neon/control_plane_connector.c @@ -428,6 +428,8 @@ MergeTable() hash_seq_init(&status, old_table->role_table); while ((entry = hash_seq_search(&status)) != NULL) { + RoleEntry * old; + bool found_old = false; RoleEntry *to_write = hash_search( CurrentDdlTable->role_table, entry->name, @@ -435,30 +437,23 @@ MergeTable() NULL); to_write->type = entry->type; - if (entry->password) - to_write->password = entry->password; + to_write->password = entry->password; strlcpy(to_write->old_name, entry->old_name, NAMEDATALEN); - if (entry->old_name[0] != '\0') - { - bool found_old = false; - RoleEntry *old = hash_search( - CurrentDdlTable->role_table, - entry->old_name, - HASH_FIND, - &found_old); + if (entry->old_name[0] == '\0') + continue; - if (found_old) - { - if (old->old_name[0] != '\0') - strlcpy(to_write->old_name, old->old_name, NAMEDATALEN); - else - strlcpy(to_write->old_name, entry->old_name, NAMEDATALEN); - hash_search(CurrentDdlTable->role_table, - entry->old_name, - HASH_REMOVE, - NULL); - } - } + old = hash_search( + CurrentDdlTable->role_table, + entry->old_name, + HASH_FIND, + &found_old); + if (!found_old) + continue; + strlcpy(to_write->old_name, old->old_name, NAMEDATALEN); + hash_search(CurrentDdlTable->role_table, + entry->old_name, + HASH_REMOVE, + NULL); } hash_destroy(old_table->role_table); } diff --git a/test_runner/regress/test_ddl_forwarding.py b/test_runner/regress/test_ddl_forwarding.py index de44bbcbc8..b10e38885e 100644 --- a/test_runner/regress/test_ddl_forwarding.py +++ b/test_runner/regress/test_ddl_forwarding.py @@ -60,14 +60,12 @@ def ddl_forward_handler( if request.json is None: log.info("Received invalid JSON") return Response(status=400) - json = request.json + json: dict[str, list[str]] = request.json # Handle roles first - if "roles" in json: - for operation in json["roles"]: - handle_role(dbs, roles, operation) - if "dbs" in json: - for operation in json["dbs"]: - handle_db(dbs, roles, operation) + for operation in json.get("roles", []): + handle_role(dbs, roles, operation) + for operation in json.get("dbs", []): + handle_db(dbs, roles, operation) return Response(status=200) @@ -207,6 +205,23 @@ def test_ddl_forwarding(ddl: DdlForwardingContext): ddl.wait() assert ddl.roles == {} + cur.execute("CREATE ROLE bork WITH PASSWORD 'newyork'") + cur.execute("BEGIN") + cur.execute("SAVEPOINT point") + cur.execute("DROP ROLE bork") + cur.execute("COMMIT") + ddl.wait() + assert ddl.roles == {} + + cur.execute("CREATE ROLE bork WITH PASSWORD 'oldyork'") + cur.execute("BEGIN") + cur.execute("SAVEPOINT point") + cur.execute("ALTER ROLE bork PASSWORD NULL") + cur.execute("COMMIT") + cur.execute("DROP ROLE bork") + ddl.wait() + assert ddl.roles == {} + cur.execute("CREATE ROLE bork WITH PASSWORD 'dork'") cur.execute("CREATE DATABASE stork WITH OWNER=bork") cur.execute("ALTER ROLE bork RENAME TO cork") From f3ecd5d76ad8b858b2bfaaabba5018046aca46ac Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Sun, 15 Dec 2024 10:45:12 +0100 Subject: [PATCH 26/49] pageserver: revert flush backpressure (#8550) (#10135) ## Problem In #8550, we made the flush loop wait for uploads after every layer. This was to avoid unbounded buildup of uploads, and to reduce compaction debt. However, the approach has several problems: * It prevents upload parallelism. * It prevents flush and upload pipelining. * It slows down ingestion even when there is no need to backpressure. * It does not directly backpressure WAL ingestion (only via `disk_consistent_lsn`), and will build up in-memory layers. * It does not directly backpressure based on compaction debt and read amplification. An alternative solution to these problems is proposed in #8390. In the meanwhile, we revert the change to reduce the impact on ingest throughput. This does reintroduce some risk of unbounded upload/compaction buildup. Until https://github.com/neondatabase/neon/issues/8390, this can be addressed in other ways: * Use `max_replication_apply_lag` (aka `remote_consistent_lsn`), which will more directly limit upload debt. * Shard the tenant, which will spread the flush/upload work across more Pageservers and move the bottleneck to Safekeeper. Touches #10095. ## Summary of changes Remove waiting on the upload queue in the flush loop. --- pageserver/src/metrics.rs | 25 +---------- pageserver/src/tenant/timeline.rs | 38 ++++------------- test_runner/fixtures/metrics.py | 1 - test_runner/regress/test_branching.py | 13 ++---- test_runner/regress/test_remote_storage.py | 48 ---------------------- 5 files changed, 13 insertions(+), 112 deletions(-) diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index b4e20cb8b9..bdbabf3f75 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -3,7 +3,7 @@ use metrics::{ register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec, register_int_counter, register_int_counter_pair_vec, register_int_counter_vec, register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec, - Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair, + Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair, IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec, }; use once_cell::sync::Lazy; @@ -445,15 +445,6 @@ pub(crate) static WAIT_LSN_TIME: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); -static FLUSH_WAIT_UPLOAD_TIME: Lazy = Lazy::new(|| { - register_gauge_vec!( - "pageserver_flush_wait_upload_seconds", - "Time spent waiting for preceding uploads during layer flush", - &["tenant_id", "shard_id", "timeline_id"] - ) - .expect("failed to define a metric") -}); - static LAST_RECORD_LSN: Lazy = Lazy::new(|| { register_int_gauge_vec!( "pageserver_last_record_lsn", @@ -2586,7 +2577,6 @@ pub(crate) struct TimelineMetrics { shard_id: String, timeline_id: String, pub flush_time_histo: StorageTimeMetrics, - pub flush_wait_upload_time_gauge: Gauge, pub compact_time_histo: StorageTimeMetrics, pub create_images_time_histo: StorageTimeMetrics, pub logical_size_histo: StorageTimeMetrics, @@ -2632,9 +2622,6 @@ impl TimelineMetrics { &shard_id, &timeline_id, ); - let flush_wait_upload_time_gauge = FLUSH_WAIT_UPLOAD_TIME - .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) - .unwrap(); let compact_time_histo = StorageTimeMetrics::new( StorageTimeOperation::Compact, &tenant_id, @@ -2780,7 +2767,6 @@ impl TimelineMetrics { shard_id, timeline_id, flush_time_histo, - flush_wait_upload_time_gauge, compact_time_histo, create_images_time_histo, logical_size_histo, @@ -2830,14 +2816,6 @@ impl TimelineMetrics { self.resident_physical_size_gauge.get() } - pub(crate) fn flush_wait_upload_time_gauge_add(&self, duration: f64) { - self.flush_wait_upload_time_gauge.add(duration); - crate::metrics::FLUSH_WAIT_UPLOAD_TIME - .get_metric_with_label_values(&[&self.tenant_id, &self.shard_id, &self.timeline_id]) - .unwrap() - .add(duration); - } - pub(crate) fn shutdown(&self) { let was_shutdown = self .shutdown @@ -2855,7 +2833,6 @@ impl TimelineMetrics { let shard_id = &self.shard_id; let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]); let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]); - let _ = FLUSH_WAIT_UPLOAD_TIME.remove_label_values(&[tenant_id, shard_id, timeline_id]); let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]); { RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get()); diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 75e268a1b9..0416953c1f 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -144,19 +144,15 @@ use self::layer_manager::LayerManager; use self::logical_size::LogicalSize; use self::walreceiver::{WalReceiver, WalReceiverConf}; +use super::config::TenantConf; +use super::remote_timeline_client::index::IndexPart; +use super::remote_timeline_client::RemoteTimelineClient; +use super::secondary::heatmap::{HeatMapLayer, HeatMapTimeline}; +use super::storage_layer::{LayerFringe, LayerVisibilityHint, ReadableLayer}; +use super::upload_queue::NotInitialized; +use super::GcError; use super::{ - config::TenantConf, storage_layer::LayerVisibilityHint, upload_queue::NotInitialized, - MaybeOffloaded, -}; -use super::{debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf}; -use super::{remote_timeline_client::index::IndexPart, storage_layer::LayerFringe}; -use super::{ - remote_timeline_client::RemoteTimelineClient, remote_timeline_client::WaitCompletionError, - storage_layer::ReadableLayer, -}; -use super::{ - secondary::heatmap::{HeatMapLayer, HeatMapTimeline}, - GcError, + debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf, MaybeOffloaded, }; #[cfg(test)] @@ -3897,24 +3893,6 @@ impl Timeline { // release lock on 'layers' }; - // Backpressure mechanism: wait with continuation of the flush loop until we have uploaded all layer files. - // This makes us refuse ingest until the new layers have been persisted to the remote - let start = Instant::now(); - self.remote_client - .wait_completion() - .await - .map_err(|e| match e { - WaitCompletionError::UploadQueueShutDownOrStopped - | WaitCompletionError::NotInitialized( - NotInitialized::ShuttingDown | NotInitialized::Stopped, - ) => FlushLayerError::Cancelled, - WaitCompletionError::NotInitialized(NotInitialized::Uninitialized) => { - FlushLayerError::Other(anyhow!(e).into()) - } - })?; - let duration = start.elapsed().as_secs_f64(); - self.metrics.flush_wait_upload_time_gauge_add(duration); - // FIXME: between create_delta_layer and the scheduling of the upload in `update_metadata_file`, // a compaction can delete the file and then it won't be available for uploads any more. // We still schedule the upload, resulting in an error, but ideally we'd somehow avoid this diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index c5295360c3..eb3d06b949 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -170,7 +170,6 @@ PAGESERVER_PER_TENANT_METRICS: tuple[str, ...] = ( "pageserver_evictions_with_low_residence_duration_total", "pageserver_aux_file_estimated_size", "pageserver_valid_lsn_lease_count", - "pageserver_flush_wait_upload_seconds", counter("pageserver_tenant_throttling_count_accounted_start"), counter("pageserver_tenant_throttling_count_accounted_finish"), counter("pageserver_tenant_throttling_wait_usecs_sum"), diff --git a/test_runner/regress/test_branching.py b/test_runner/regress/test_branching.py index 34e4e994cb..a4056404f0 100644 --- a/test_runner/regress/test_branching.py +++ b/test_runner/regress/test_branching.py @@ -19,6 +19,7 @@ from fixtures.pageserver.utils import wait_until_tenant_active from fixtures.utils import query_scalar from performance.test_perf_pgbench import get_scales_matrix from requests import RequestException +from requests.exceptions import RetryError # Test branch creation @@ -176,11 +177,8 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE env.neon_cli.mappings_map_branch(initial_branch, env.initial_tenant, env.initial_timeline) - with pytest.raises(RuntimeError, match="ERROR: Not found: Timeline"): - env.endpoints.create_start( - initial_branch, tenant_id=env.initial_tenant, basebackup_request_tries=2 - ) - ps_http.configure_failpoints(("before-upload-index-pausable", "off")) + with pytest.raises(RuntimeError, match="is not active, state: Loading"): + env.endpoints.create_start(initial_branch, tenant_id=env.initial_tenant) finally: env.pageserver.stop(immediate=True) @@ -221,10 +219,7 @@ def test_cannot_branch_from_non_uploaded_branch(neon_env_builder: NeonEnvBuilder branch_id = TimelineId.generate() - with pytest.raises( - PageserverApiException, - match="Cannot branch off the timeline that's not present in pageserver", - ): + with pytest.raises(RetryError, match="too many 503 error responses"): ps_http.timeline_create( env.pg_version, env.initial_tenant, diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py index 76a42ef4a2..52b6b254aa 100644 --- a/test_runner/regress/test_remote_storage.py +++ b/test_runner/regress/test_remote_storage.py @@ -784,54 +784,6 @@ def test_empty_branch_remote_storage_upload_on_restart(neon_env_builder: NeonEnv create_thread.join() -def test_paused_upload_stalls_checkpoint( - neon_env_builder: NeonEnvBuilder, -): - """ - This test checks that checkpoints block on uploads to remote storage. - """ - neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS) - - env = neon_env_builder.init_start( - initial_tenant_conf={ - # Set a small compaction threshold - "compaction_threshold": "3", - # Disable GC - "gc_period": "0s", - # disable PITR - "pitr_interval": "0s", - } - ) - - env.pageserver.allowed_errors.append( - f".*PUT.* path=/v1/tenant/{env.initial_tenant}/timeline.* request was dropped before completing" - ) - - tenant_id = env.initial_tenant - timeline_id = env.initial_timeline - - client = env.pageserver.http_client() - layers_at_creation = client.layer_map_info(tenant_id, timeline_id) - deltas_at_creation = len(layers_at_creation.delta_layers()) - assert ( - deltas_at_creation == 1 - ), "are you fixing #5863? make sure we end up with 2 deltas at the end of endpoint lifecycle" - - # Make new layer uploads get stuck. - # Note that timeline creation waits for the initial layers to reach remote storage. - # So at this point, the `layers_at_creation` are in remote storage. - client.configure_failpoints(("before-upload-layer-pausable", "pause")) - - with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint: - # Build two tables with some data inside - endpoint.safe_psql("CREATE TABLE foo AS SELECT x FROM generate_series(1, 10000) g(x)") - wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) - - with pytest.raises(ReadTimeout): - client.timeline_checkpoint(tenant_id, timeline_id, timeout=5) - client.configure_failpoints(("before-upload-layer-pausable", "off")) - - def wait_upload_queue_empty( client: PageserverHttpClient, tenant_id: TenantId, timeline_id: TimelineId ): From 117c1b5ddec110677f06dbc769712a722b55f2d3 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Mon, 16 Dec 2024 08:03:53 +0200 Subject: [PATCH 27/49] Do not perform prefetch for temp relations (#10146) ## Problem See https://neondb.slack.com/archives/C04DGM6SMTM/p1734002916827019 With recent prefetch fixes for pg17 and `effective_io_concurrency=100` pg_regress test stats.sql is failed when set temp_buffers to 100. Stream API will try to lock all this 100 buffers for prefetch. ## Summary of changes Disable such behaviour for temp relations. Postgres PR: https://github.com/neondatabase/postgres/pull/548 Co-authored-by: Konstantin Knizhnik --- vendor/postgres-v17 | 2 +- vendor/revisions.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/vendor/postgres-v17 b/vendor/postgres-v17 index 010c0ea2eb..65c4e46baf 160000 --- a/vendor/postgres-v17 +++ b/vendor/postgres-v17 @@ -1 +1 @@ -Subproject commit 010c0ea2eb06afe76485a33c43954cbcf3d99f86 +Subproject commit 65c4e46baf56ec05412c7dd63d62faff0b33dcfb diff --git a/vendor/revisions.json b/vendor/revisions.json index afcb922a5e..c8db81c73f 100644 --- a/vendor/revisions.json +++ b/vendor/revisions.json @@ -1,7 +1,7 @@ { "v17": [ "17.2", - "010c0ea2eb06afe76485a33c43954cbcf3d99f86" + "65c4e46baf56ec05412c7dd63d62faff0b33dcfb" ], "v16": [ "16.6", From ebcbc1a4822c566a865ca2dd9138e5af2838c794 Mon Sep 17 00:00:00 2001 From: John Spray Date: Mon, 16 Dec 2024 10:06:08 +0000 Subject: [PATCH 28/49] pageserver: tighten up code around SLRU dir key handling (#10082) ## Problem Changes in #9786 were functionally complete but missed some edges that made testing less robust than it should have been: - `is_key_disposable` didn't consider SLRU dir keys disposable - Timeline `init_empty` was always creating SLRU dir keys on all shards The result was that when we had a bug (https://github.com/neondatabase/neon/pull/10080), it wasn't apparent in tests, because one would only encounter the issue if running on a long-lived timeline with enough compaction to drop the initially created empty SLRU dir keys, _and_ some CLog truncation going on. Closes: https://github.com/neondatabase/cloud/issues/21516 ## Summary of changes - Update is_key_global and init_empty to handle SLRU dir keys properly -- the only functional impact is that we avoid writing some spurious keys in shards >0, but this makes testing much more robust. - Make `test_clog_truncate` explicitly use a sharded tenant The net result is that if one reverts #10080, then tests fail (i.e. this PR is a reproducer for the issue) --- libs/pageserver_api/src/key.rs | 4 ++++ libs/pageserver_api/src/shard.rs | 6 ++++- pageserver/src/pgdatadir_mapping.rs | 29 +++++++++++++---------- test_runner/regress/test_clog_truncate.py | 24 ++++++++++++------- 4 files changed, 42 insertions(+), 21 deletions(-) diff --git a/libs/pageserver_api/src/key.rs b/libs/pageserver_api/src/key.rs index 373329c9b4..f0cd713c38 100644 --- a/libs/pageserver_api/src/key.rs +++ b/libs/pageserver_api/src/key.rs @@ -565,6 +565,10 @@ impl Key { && self.field5 == 0 && self.field6 == u32::MAX } + + pub fn is_slru_dir_key(&self) -> bool { + slru_dir_kind(self).is_some() + } } #[inline(always)] diff --git a/libs/pageserver_api/src/shard.rs b/libs/pageserver_api/src/shard.rs index cf0cd3a46b..4cc0a739e8 100644 --- a/libs/pageserver_api/src/shard.rs +++ b/libs/pageserver_api/src/shard.rs @@ -173,7 +173,11 @@ impl ShardIdentity { /// Return true if the key should be stored on all shards, not just one. pub fn is_key_global(&self, key: &Key) -> bool { - if key.is_slru_block_key() || key.is_slru_segment_size_key() || key.is_aux_file_key() { + if key.is_slru_block_key() + || key.is_slru_segment_size_key() + || key.is_aux_file_key() + || key.is_slru_dir_key() + { // Special keys that are only stored on shard 0 false } else if key.is_rel_block_key() { diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index 255bd01e25..3eaecd3a08 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -1319,18 +1319,23 @@ impl<'a> DatadirModification<'a> { let buf: Bytes = SlruSegmentDirectory::ser(&SlruSegmentDirectory::default())?.into(); let empty_dir = Value::Image(buf); - self.put(slru_dir_to_key(SlruKind::Clog), empty_dir.clone()); - self.pending_directory_entries - .push((DirectoryKind::SlruSegment(SlruKind::Clog), 0)); - self.put( - slru_dir_to_key(SlruKind::MultiXactMembers), - empty_dir.clone(), - ); - self.pending_directory_entries - .push((DirectoryKind::SlruSegment(SlruKind::Clog), 0)); - self.put(slru_dir_to_key(SlruKind::MultiXactOffsets), empty_dir); - self.pending_directory_entries - .push((DirectoryKind::SlruSegment(SlruKind::MultiXactOffsets), 0)); + + // Initialize SLRUs on shard 0 only: creating these on other shards would be + // harmless but they'd just be dropped on later compaction. + if self.tline.tenant_shard_id.is_shard_zero() { + self.put(slru_dir_to_key(SlruKind::Clog), empty_dir.clone()); + self.pending_directory_entries + .push((DirectoryKind::SlruSegment(SlruKind::Clog), 0)); + self.put( + slru_dir_to_key(SlruKind::MultiXactMembers), + empty_dir.clone(), + ); + self.pending_directory_entries + .push((DirectoryKind::SlruSegment(SlruKind::Clog), 0)); + self.put(slru_dir_to_key(SlruKind::MultiXactOffsets), empty_dir); + self.pending_directory_entries + .push((DirectoryKind::SlruSegment(SlruKind::MultiXactOffsets), 0)); + } Ok(()) } diff --git a/test_runner/regress/test_clog_truncate.py b/test_runner/regress/test_clog_truncate.py index 10027ce689..2ae38e6d88 100644 --- a/test_runner/regress/test_clog_truncate.py +++ b/test_runner/regress/test_clog_truncate.py @@ -1,18 +1,19 @@ from __future__ import annotations import os -import time from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv -from fixtures.utils import query_scalar +from fixtures.neon_fixtures import NeonEnvBuilder +from fixtures.utils import query_scalar, wait_until # # Test compute node start after clog truncation # -def test_clog_truncate(neon_simple_env: NeonEnv): - env = neon_simple_env +def test_clog_truncate(neon_env_builder: NeonEnvBuilder): + # Use a multi-sharded tenant because WAL ingest logic is shard-dependent, and + # this test is one of the very few that exercises a CLogTruncate WAL record. + env = neon_env_builder.init_start(initial_tenant_shard_count=2) # set aggressive autovacuum to make sure that truncation will happen config = [ @@ -31,6 +32,7 @@ def test_clog_truncate(neon_simple_env: NeonEnv): endpoint.safe_psql("CREATE EXTENSION neon_test_utils") # Consume many xids to advance clog + log.info("Consuming xids...") with endpoint.cursor() as cur: cur.execute("select test_consume_xids(1000*1000*10);") log.info("xids consumed") @@ -47,11 +49,17 @@ def test_clog_truncate(neon_simple_env: NeonEnv): pg_xact_0000_path = os.path.join(endpoint.pg_xact_dir_path(), "0000") log.info(f"pg_xact_0000_path = {pg_xact_0000_path}") - while os.path.isfile(pg_xact_0000_path): - log.info(f"file exists. wait for truncation: {pg_xact_0000_path=}") - time.sleep(5) + def assert_file_removed(): + exists = os.path.isfile(pg_xact_0000_path) + if exists: + log.info(f"file exists. wait for truncation: {pg_xact_0000_path=}") + assert not exists + + log.info("Waiting for truncation...") + wait_until(assert_file_removed) # checkpoint to advance latest lsn + log.info("Checkpointing...") with endpoint.cursor() as cur: cur.execute("CHECKPOINT;") lsn_after_truncation = query_scalar(cur, "select pg_current_wal_insert_lsn()") From 24d658791410d81d9b854ba1c8036068d6467bc2 Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Mon, 16 Dec 2024 11:15:25 +0000 Subject: [PATCH 29/49] chore(proxy): refactor self-signed config (#10154) ## Problem While reviewing #10152 I found it tricky to actually determine whether the connection used `allow_self_signed_compute` or not. I've tried to remove this setting in the past: * https://github.com/neondatabase/neon/pull/7884 * https://github.com/neondatabase/neon/pull/7437 * https://github.com/neondatabase/cloud/pull/13702 But each time it seems it is used by e2e tests ## Summary of changes The `node_info.allow_self_signed_computes` is always initialised to false, and then sometimes inherits the proxy config value. There's no need this needs to be in the node_info, so removing it and propagating it via `TcpMechansim` is simpler. --- proxy/src/auth/backend/console_redirect.rs | 1 - proxy/src/auth/backend/local.rs | 1 - proxy/src/console_redirect_proxy.rs | 2 +- proxy/src/control_plane/client/cplane_proxy_v1.rs | 1 - proxy/src/control_plane/client/mock.rs | 1 - proxy/src/control_plane/mod.rs | 13 +++---------- proxy/src/proxy/connect_compute.rs | 11 ++++++++--- proxy/src/proxy/mod.rs | 2 +- proxy/src/proxy/tests/mod.rs | 14 ++++++-------- proxy/src/redis/notifications.rs | 2 +- proxy/src/serverless/backend.rs | 2 -- 11 files changed, 20 insertions(+), 30 deletions(-) diff --git a/proxy/src/auth/backend/console_redirect.rs b/proxy/src/auth/backend/console_redirect.rs index 575d60be85..c3de77b352 100644 --- a/proxy/src/auth/backend/console_redirect.rs +++ b/proxy/src/auth/backend/console_redirect.rs @@ -187,7 +187,6 @@ async fn authenticate( NodeInfo { config, aux: db_info.aux, - allow_self_signed_compute: false, // caller may override }, db_info.allowed_ips, )) diff --git a/proxy/src/auth/backend/local.rs b/proxy/src/auth/backend/local.rs index d4273fb521..d10f0e82b2 100644 --- a/proxy/src/auth/backend/local.rs +++ b/proxy/src/auth/backend/local.rs @@ -37,7 +37,6 @@ impl LocalBackend { branch_id: BranchIdTag::get_interner().get_or_intern("local"), cold_start_info: ColdStartInfo::WarmCached, }, - allow_self_signed_compute: false, }, } } diff --git a/proxy/src/console_redirect_proxy.rs b/proxy/src/console_redirect_proxy.rs index 65702e0e4c..02398fb777 100644 --- a/proxy/src/console_redirect_proxy.rs +++ b/proxy/src/console_redirect_proxy.rs @@ -213,9 +213,9 @@ pub(crate) async fn handle_client( params_compat: true, params: ¶ms, locks: &config.connect_compute_locks, + allow_self_signed_compute: config.allow_self_signed_compute, }, &user_info, - config.allow_self_signed_compute, config.wake_compute_retry_config, config.connect_to_compute_retry_config, ) diff --git a/proxy/src/control_plane/client/cplane_proxy_v1.rs b/proxy/src/control_plane/client/cplane_proxy_v1.rs index e33a37f643..00038a6ac6 100644 --- a/proxy/src/control_plane/client/cplane_proxy_v1.rs +++ b/proxy/src/control_plane/client/cplane_proxy_v1.rs @@ -250,7 +250,6 @@ impl NeonControlPlaneClient { let node = NodeInfo { config, aux: body.aux, - allow_self_signed_compute: false, }; Ok(node) diff --git a/proxy/src/control_plane/client/mock.rs b/proxy/src/control_plane/client/mock.rs index eaf692ab27..93edd65476 100644 --- a/proxy/src/control_plane/client/mock.rs +++ b/proxy/src/control_plane/client/mock.rs @@ -174,7 +174,6 @@ impl MockControlPlane { branch_id: (&BranchId::from("branch")).into(), cold_start_info: crate::control_plane::messages::ColdStartInfo::Warm, }, - allow_self_signed_compute: false, }; Ok(node) diff --git a/proxy/src/control_plane/mod.rs b/proxy/src/control_plane/mod.rs index 41972e4e44..c0718920b4 100644 --- a/proxy/src/control_plane/mod.rs +++ b/proxy/src/control_plane/mod.rs @@ -67,28 +67,21 @@ pub(crate) struct NodeInfo { /// Labels for proxy's metrics. pub(crate) aux: MetricsAuxInfo, - - /// Whether we should accept self-signed certificates (for testing) - pub(crate) allow_self_signed_compute: bool, } impl NodeInfo { pub(crate) async fn connect( &self, ctx: &RequestContext, + allow_self_signed_compute: bool, timeout: Duration, ) -> Result { self.config - .connect( - ctx, - self.allow_self_signed_compute, - self.aux.clone(), - timeout, - ) + .connect(ctx, allow_self_signed_compute, self.aux.clone(), timeout) .await } + pub(crate) fn reuse_settings(&mut self, other: Self) { - self.allow_self_signed_compute = other.allow_self_signed_compute; self.config.reuse_password(other.config); } diff --git a/proxy/src/proxy/connect_compute.rs b/proxy/src/proxy/connect_compute.rs index a3027abd7c..6da4c90a53 100644 --- a/proxy/src/proxy/connect_compute.rs +++ b/proxy/src/proxy/connect_compute.rs @@ -73,6 +73,9 @@ pub(crate) struct TcpMechanism<'a> { /// connect_to_compute concurrency lock pub(crate) locks: &'static ApiLocks, + + /// Whether we should accept self-signed certificates (for testing) + pub(crate) allow_self_signed_compute: bool, } #[async_trait] @@ -90,7 +93,11 @@ impl ConnectMechanism for TcpMechanism<'_> { ) -> Result { let host = node_info.config.get_host(); let permit = self.locks.get_permit(&host).await?; - permit.release_result(node_info.connect(ctx, timeout).await) + permit.release_result( + node_info + .connect(ctx, self.allow_self_signed_compute, timeout) + .await, + ) } fn update_connect_config(&self, config: &mut compute::ConnCfg) { @@ -104,7 +111,6 @@ pub(crate) async fn connect_to_compute Result @@ -117,7 +123,6 @@ where wake_compute(&mut num_retries, ctx, user_info, wake_compute_retry_config).await?; node_info.set_keys(user_info.get_keys()); - node_info.allow_self_signed_compute = allow_self_signed_compute; mechanism.update_connect_config(&mut node_info.config); // try once diff --git a/proxy/src/proxy/mod.rs b/proxy/src/proxy/mod.rs index cc04bc5e5c..de0ec0f799 100644 --- a/proxy/src/proxy/mod.rs +++ b/proxy/src/proxy/mod.rs @@ -355,9 +355,9 @@ pub(crate) async fn handle_client( params_compat, params: ¶ms, locks: &config.connect_compute_locks, + allow_self_signed_compute: mode.allow_self_signed_compute(config), }, &user_info, - mode.allow_self_signed_compute(config), config.wake_compute_retry_config, config.connect_to_compute_retry_config, ) diff --git a/proxy/src/proxy/tests/mod.rs b/proxy/src/proxy/tests/mod.rs index 911b349416..3899ba4267 100644 --- a/proxy/src/proxy/tests/mod.rs +++ b/proxy/src/proxy/tests/mod.rs @@ -553,7 +553,6 @@ fn helper_create_cached_node_info(cache: &'static NodeInfoCache) -> CachedNodeIn branch_id: (&BranchId::from("branch")).into(), cold_start_info: crate::control_plane::messages::ColdStartInfo::Warm, }, - allow_self_signed_compute: false, }; let (_, node2) = cache.insert_unit("key".into(), Ok(node.clone())); node2.map(|()| node) @@ -588,7 +587,7 @@ async fn connect_to_compute_success() { max_retries: 5, backoff_factor: 2.0, }; - connect_to_compute(&ctx, &mechanism, &user_info, false, config, config) + connect_to_compute(&ctx, &mechanism, &user_info, config, config) .await .unwrap(); mechanism.verify(); @@ -606,7 +605,7 @@ async fn connect_to_compute_retry() { max_retries: 5, backoff_factor: 2.0, }; - connect_to_compute(&ctx, &mechanism, &user_info, false, config, config) + connect_to_compute(&ctx, &mechanism, &user_info, config, config) .await .unwrap(); mechanism.verify(); @@ -625,7 +624,7 @@ async fn connect_to_compute_non_retry_1() { max_retries: 5, backoff_factor: 2.0, }; - connect_to_compute(&ctx, &mechanism, &user_info, false, config, config) + connect_to_compute(&ctx, &mechanism, &user_info, config, config) .await .unwrap_err(); mechanism.verify(); @@ -644,7 +643,7 @@ async fn connect_to_compute_non_retry_2() { max_retries: 5, backoff_factor: 2.0, }; - connect_to_compute(&ctx, &mechanism, &user_info, false, config, config) + connect_to_compute(&ctx, &mechanism, &user_info, config, config) .await .unwrap(); mechanism.verify(); @@ -674,7 +673,6 @@ async fn connect_to_compute_non_retry_3() { &ctx, &mechanism, &user_info, - false, wake_compute_retry_config, connect_to_compute_retry_config, ) @@ -696,7 +694,7 @@ async fn wake_retry() { max_retries: 5, backoff_factor: 2.0, }; - connect_to_compute(&ctx, &mechanism, &user_info, false, config, config) + connect_to_compute(&ctx, &mechanism, &user_info, config, config) .await .unwrap(); mechanism.verify(); @@ -715,7 +713,7 @@ async fn wake_non_retry() { max_retries: 5, backoff_factor: 2.0, }; - connect_to_compute(&ctx, &mechanism, &user_info, false, config, config) + connect_to_compute(&ctx, &mechanism, &user_info, config, config) .await .unwrap_err(); mechanism.verify(); diff --git a/proxy/src/redis/notifications.rs b/proxy/src/redis/notifications.rs index f3aa97c032..d18dfd2465 100644 --- a/proxy/src/redis/notifications.rs +++ b/proxy/src/redis/notifications.rs @@ -6,6 +6,7 @@ use pq_proto::CancelKeyData; use redis::aio::PubSub; use serde::{Deserialize, Serialize}; use tokio_util::sync::CancellationToken; +use tracing::Instrument; use uuid::Uuid; use super::connection_with_credentials_provider::ConnectionWithCredentialsProvider; @@ -13,7 +14,6 @@ use crate::cache::project_info::ProjectInfoCache; use crate::cancellation::{CancelMap, CancellationHandler}; use crate::intern::{ProjectIdInt, RoleNameInt}; use crate::metrics::{Metrics, RedisErrors, RedisEventsCount}; -use tracing::Instrument; const CPLANE_CHANNEL_NAME: &str = "neondb-proxy-ws-updates"; pub(crate) const PROXY_CHANNEL_NAME: &str = "neondb-proxy-to-proxy-updates"; diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs index 251aa47084..15d883bdb0 100644 --- a/proxy/src/serverless/backend.rs +++ b/proxy/src/serverless/backend.rs @@ -195,7 +195,6 @@ impl PoolingBackend { locks: &self.config.connect_compute_locks, }, &backend, - false, // do not allow self signed compute for http flow self.config.wake_compute_retry_config, self.config.connect_to_compute_retry_config, ) @@ -237,7 +236,6 @@ impl PoolingBackend { locks: &self.config.connect_compute_locks, }, &backend, - false, // do not allow self signed compute for http flow self.config.wake_compute_retry_config, self.config.connect_to_compute_retry_config, ) From 1ed0e52bc837fd8eb356847e452444b5df1f4b0b Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Mon, 16 Dec 2024 15:07:24 +0300 Subject: [PATCH 30/49] Extract safekeeper http client to separate crate. (#10140) ## Problem We want to use safekeeper http client in storage controller and neon_local. ## Summary of changes Extract it to separate crate. No functional changes. --- Cargo.lock | 13 +++++++++++++ Cargo.toml | 2 ++ safekeeper/Cargo.toml | 1 + safekeeper/client/Cargo.toml | 13 +++++++++++++ safekeeper/client/src/lib.rs | 1 + .../{src/http/client.rs => client/src/mgmt_api.rs} | 4 ---- safekeeper/src/http/mod.rs | 1 - safekeeper/src/pull_timeline.rs | 5 +++-- 8 files changed, 33 insertions(+), 7 deletions(-) create mode 100644 safekeeper/client/Cargo.toml create mode 100644 safekeeper/client/src/lib.rs rename safekeeper/{src/http/client.rs => client/src/mgmt_api.rs} (96%) diff --git a/Cargo.lock b/Cargo.lock index c4f80f63c9..d1f7746969 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5535,6 +5535,7 @@ dependencies = [ "remote_storage", "reqwest", "safekeeper_api", + "safekeeper_client", "scopeguard", "sd-notify", "serde", @@ -5572,6 +5573,18 @@ dependencies = [ "utils", ] +[[package]] +name = "safekeeper_client" +version = "0.1.0" +dependencies = [ + "reqwest", + "safekeeper_api", + "serde", + "thiserror", + "utils", + "workspace_hack", +] + [[package]] name = "same-file" version = "1.0.6" diff --git a/Cargo.toml b/Cargo.toml index 0654c25a3d..056cd5798f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ members = [ "pageserver/pagebench", "proxy", "safekeeper", + "safekeeper/client", "storage_broker", "storage_controller", "storage_controller/client", @@ -233,6 +234,7 @@ postgres_initdb = { path = "./libs/postgres_initdb" } pq_proto = { version = "0.1", path = "./libs/pq_proto/" } remote_storage = { version = "0.1", path = "./libs/remote_storage/" } safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" } +safekeeper_client = { path = "./safekeeper/client" } desim = { version = "0.1", path = "./libs/desim" } storage_broker = { version = "0.1", path = "./storage_broker/" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy. storage_controller_client = { path = "./storage_controller/client" } diff --git a/safekeeper/Cargo.toml b/safekeeper/Cargo.toml index 0422c46ab1..086407603f 100644 --- a/safekeeper/Cargo.toml +++ b/safekeeper/Cargo.toml @@ -55,6 +55,7 @@ postgres_ffi.workspace = true pq_proto.workspace = true remote_storage.workspace = true safekeeper_api.workspace = true +safekeeper_client.workspace = true sha2.workspace = true sd-notify.workspace = true storage_broker.workspace = true diff --git a/safekeeper/client/Cargo.toml b/safekeeper/client/Cargo.toml new file mode 100644 index 0000000000..6c5a52de3a --- /dev/null +++ b/safekeeper/client/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "safekeeper_client" +version = "0.1.0" +edition.workspace = true +license.workspace = true + +[dependencies] +safekeeper_api.workspace = true +thiserror.workspace = true +reqwest = { workspace = true, features = [ "stream" ] } +serde.workspace = true +utils.workspace = true +workspace_hack = { version = "0.1", path = "../../workspace_hack" } diff --git a/safekeeper/client/src/lib.rs b/safekeeper/client/src/lib.rs new file mode 100644 index 0000000000..3963fd466c --- /dev/null +++ b/safekeeper/client/src/lib.rs @@ -0,0 +1 @@ +pub mod mgmt_api; diff --git a/safekeeper/src/http/client.rs b/safekeeper/client/src/mgmt_api.rs similarity index 96% rename from safekeeper/src/http/client.rs rename to safekeeper/client/src/mgmt_api.rs index 669a9c0ce9..f78745043a 100644 --- a/safekeeper/src/http/client.rs +++ b/safekeeper/client/src/mgmt_api.rs @@ -2,10 +2,6 @@ //! //! Partially copied from pageserver client; some parts might be better to be //! united. -//! -//! It would be also good to move it out to separate crate, but this needs -//! duplication of internal-but-reported structs like WalSenderState, ServerInfo -//! etc. use reqwest::{IntoUrl, Method, StatusCode}; use safekeeper_api::models::TimelineStatus; diff --git a/safekeeper/src/http/mod.rs b/safekeeper/src/http/mod.rs index 7229ccb739..d82a713f8a 100644 --- a/safekeeper/src/http/mod.rs +++ b/safekeeper/src/http/mod.rs @@ -1,4 +1,3 @@ -pub mod client; pub mod routes; pub use routes::make_router; diff --git a/safekeeper/src/pull_timeline.rs b/safekeeper/src/pull_timeline.rs index 00777273cb..f2d8e4c85f 100644 --- a/safekeeper/src/pull_timeline.rs +++ b/safekeeper/src/pull_timeline.rs @@ -5,6 +5,8 @@ use chrono::{DateTime, Utc}; use futures::{SinkExt, StreamExt, TryStreamExt}; use postgres_ffi::{XLogFileName, XLogSegNo, PG_TLI}; use safekeeper_api::{models::TimelineStatus, Term}; +use safekeeper_client::mgmt_api; +use safekeeper_client::mgmt_api::Client; use serde::{Deserialize, Serialize}; use std::{ cmp::min, @@ -22,7 +24,6 @@ use tracing::{error, info, instrument}; use crate::{ control_file::CONTROL_FILE_NAME, debug_dump, - http::client::{self, Client}, state::{EvictionState, TimelinePersistentState}, timeline::{Timeline, WalResidentTimeline}, timelines_global_map::{create_temp_timeline_dir, validate_temp_timeline}, @@ -419,7 +420,7 @@ pub async fn handle_request( let http_hosts = request.http_hosts.clone(); // Figure out statuses of potential donors. - let responses: Vec> = + let responses: Vec> = futures::future::join_all(http_hosts.iter().map(|url| async { let cclient = Client::new(url.clone(), sk_auth_token.clone()); let info = cclient From c5e3314c6e3a3d693310626a4c9c8deec34931c3 Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Mon, 16 Dec 2024 17:53:04 +0300 Subject: [PATCH 31/49] Add test restarting compute at WAL page boundary (#10111) ## Problem We've had similar test in test_logical_replication, but then removed it because it wasn't needed to trigger LR related bug. Restarting at WAL page boundary is still a useful test, so add it separately back. ## Summary of changes Add the test. --- test_runner/regress/test_wal_acceptor.py | 56 ++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py index 23d4f23cdb..0a8900b351 100644 --- a/test_runner/regress/test_wal_acceptor.py +++ b/test_runner/regress/test_wal_acceptor.py @@ -1090,6 +1090,62 @@ def test_restart_endpoint_after_switch_wal(neon_env_builder: NeonEnvBuilder): endpoint.safe_psql("SELECT 'works'") +# Test restarting compute at WAL page boundary. +def test_restart_endpoint_wal_page_boundary(neon_env_builder: NeonEnvBuilder): + env = neon_env_builder.init_start() + + ep = env.endpoints.create_start("main") + ep.safe_psql("create table t (i int)") + + with ep.cursor() as cur: + # measure how much space logical message takes. Sometimes first attempt + # creates huge message and then it stabilizes, have no idea why. + for _ in range(3): + lsn_before = Lsn(query_scalar(cur, "select pg_current_wal_lsn()")) + log.info(f"current_lsn={lsn_before}") + # Non-transactional logical message doesn't write WAL, only XLogInsert's + # it, so use transactional. Which is a bit problematic as transactional + # necessitates commit record. Alternatively we can do smth like + # select neon_xlogflush(pg_current_wal_insert_lsn()); + # but isn't much better + that particular call complains on 'xlog flush + # request 0/282C018 is not satisfied' as pg_current_wal_insert_lsn skips + # page headers. + payload = "blahblah" + cur.execute(f"select pg_logical_emit_message(true, 'pref', '{payload}')") + lsn_after_by_curr_wal_lsn = Lsn(query_scalar(cur, "select pg_current_wal_lsn()")) + lsn_diff = lsn_after_by_curr_wal_lsn - lsn_before + logical_message_base = lsn_after_by_curr_wal_lsn - lsn_before - len(payload) + log.info( + f"before {lsn_before}, after {lsn_after_by_curr_wal_lsn}, lsn diff is {lsn_diff}, base {logical_message_base}" + ) + + # and write logical message spanning exactly as we want + lsn_before = Lsn(query_scalar(cur, "select pg_current_wal_lsn()")) + log.info(f"current_lsn={lsn_before}") + curr_lsn = Lsn(query_scalar(cur, "select pg_current_wal_lsn()")) + offs = int(curr_lsn) % 8192 + till_page = 8192 - offs + target_lsn = curr_lsn + till_page + payload_len = ( + till_page - logical_message_base - 8 + ) # not sure why 8 is here, it is deduced from experiments + log.info( + f"current_lsn={curr_lsn}, offs {offs}, till_page {till_page}, target_lsn {target_lsn}" + ) + + cur.execute(f"select pg_logical_emit_message(true, 'pref', 'f{'a' * payload_len}')") + supposedly_contrecord_end = Lsn(query_scalar(cur, "select pg_current_wal_lsn()")) + log.info(f"supposedly_page_boundary={supposedly_contrecord_end}") + # The calculations to hit the page boundary are very fuzzy, so just + # ignore test if we fail to reach it. + if not (int(supposedly_contrecord_end) % 8192 == 0): + pytest.skip(f"missed page boundary, bad luck: lsn is {supposedly_contrecord_end}") + + ep.stop(mode="immediate") + ep = env.endpoints.create_start("main") + ep.safe_psql("insert into t values (42)") # should be ok + + # Context manager which logs passed time on exit. class DurationLogger: def __init__(self, desc): From 6565fd4056e0c040f26cc593a03561fe775595ff Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Mon, 16 Dec 2024 15:33:21 +0000 Subject: [PATCH 32/49] chore: fix clippy lints 2024-12-06 (#10138) --- libs/desim/src/time.rs | 2 +- .../wal_craft/src/xlog_utils_test.rs | 2 +- .../proxy/tokio-postgres2/src/to_statement.rs | 2 +- libs/remote_storage/src/azure_blob.rs | 4 +-- libs/remote_storage/src/lib.rs | 4 +-- libs/remote_storage/src/local_fs.rs | 4 +-- libs/remote_storage/src/s3_bucket.rs | 4 +-- libs/remote_storage/src/simulate_failures.rs | 4 +-- pageserver/compaction/src/compact_tiered.rs | 2 +- pageserver/compaction/src/identify_levels.rs | 5 ++-- pageserver/compaction/src/interface.rs | 2 +- pageserver/compaction/src/simulator.rs | 2 +- pageserver/src/basebackup.rs | 2 +- pageserver/src/pgdatadir_mapping.rs | 8 +++--- pageserver/src/tenant/blob_io.rs | 2 +- pageserver/src/tenant/block_io.rs | 2 +- pageserver/src/tenant/disk_btree.rs | 2 +- pageserver/src/tenant/ephemeral_file.rs | 6 ++--- pageserver/src/tenant/layer_map.rs | 4 +-- .../tenant/remote_timeline_client/download.rs | 4 +-- .../tenant/remote_timeline_client/upload.rs | 4 +-- pageserver/src/tenant/storage_layer.rs | 5 +--- .../src/tenant/storage_layer/delta_layer.rs | 4 +-- .../src/tenant/storage_layer/image_layer.rs | 2 +- .../inmemory_layer/vectored_dio_read.rs | 26 +++++++++---------- pageserver/src/tenant/timeline.rs | 2 +- pageserver/src/tenant/timeline/compaction.rs | 4 +-- safekeeper/src/receive_wal.rs | 2 +- safekeeper/src/safekeeper.rs | 5 +--- storage_controller/src/service.rs | 5 +--- 30 files changed, 58 insertions(+), 68 deletions(-) diff --git a/libs/desim/src/time.rs b/libs/desim/src/time.rs index 7bb71db95c..7ce605bda8 100644 --- a/libs/desim/src/time.rs +++ b/libs/desim/src/time.rs @@ -91,7 +91,7 @@ impl Timing { /// Return true if there is a ready event. fn is_event_ready(&self, queue: &mut BinaryHeap) -> bool { - queue.peek().map_or(false, |x| x.time <= self.now()) + queue.peek().is_some_and(|x| x.time <= self.now()) } /// Clear all pending events. diff --git a/libs/postgres_ffi/wal_craft/src/xlog_utils_test.rs b/libs/postgres_ffi/wal_craft/src/xlog_utils_test.rs index 9eb3f0e95a..4a33dbe25b 100644 --- a/libs/postgres_ffi/wal_craft/src/xlog_utils_test.rs +++ b/libs/postgres_ffi/wal_craft/src/xlog_utils_test.rs @@ -81,7 +81,7 @@ fn test_end_of_wal(test_name: &str) { continue; } let mut f = File::options().write(true).open(file.path()).unwrap(); - const ZEROS: [u8; WAL_SEGMENT_SIZE] = [0u8; WAL_SEGMENT_SIZE]; + static ZEROS: [u8; WAL_SEGMENT_SIZE] = [0u8; WAL_SEGMENT_SIZE]; f.write_all( &ZEROS[0..min( WAL_SEGMENT_SIZE, diff --git a/libs/proxy/tokio-postgres2/src/to_statement.rs b/libs/proxy/tokio-postgres2/src/to_statement.rs index 427f77dd79..7e12992728 100644 --- a/libs/proxy/tokio-postgres2/src/to_statement.rs +++ b/libs/proxy/tokio-postgres2/src/to_statement.rs @@ -11,7 +11,7 @@ mod private { Query(&'a str), } - impl<'a> ToStatementType<'a> { + impl ToStatementType<'_> { pub async fn into_statement(self, client: &Client) -> Result { match self { ToStatementType::Statement(s) => Ok(s.clone()), diff --git a/libs/remote_storage/src/azure_blob.rs b/libs/remote_storage/src/azure_blob.rs index 32c51bc2ad..19c8251ccd 100644 --- a/libs/remote_storage/src/azure_blob.rs +++ b/libs/remote_storage/src/azure_blob.rs @@ -544,9 +544,9 @@ impl RemoteStorage for AzureBlobStorage { .await } - async fn delete_objects<'a>( + async fn delete_objects( &self, - paths: &'a [RemotePath], + paths: &[RemotePath], cancel: &CancellationToken, ) -> anyhow::Result<()> { let kind = RequestKind::Delete; diff --git a/libs/remote_storage/src/lib.rs b/libs/remote_storage/src/lib.rs index 2a3468f986..7a864151ec 100644 --- a/libs/remote_storage/src/lib.rs +++ b/libs/remote_storage/src/lib.rs @@ -341,9 +341,9 @@ pub trait RemoteStorage: Send + Sync + 'static { /// If the operation fails because of timeout or cancellation, the root cause of the error will be /// set to `TimeoutOrCancel`. In such situation it is unknown which deletions, if any, went /// through. - async fn delete_objects<'a>( + async fn delete_objects( &self, - paths: &'a [RemotePath], + paths: &[RemotePath], cancel: &CancellationToken, ) -> anyhow::Result<()>; diff --git a/libs/remote_storage/src/local_fs.rs b/libs/remote_storage/src/local_fs.rs index 1a2d421c66..a8b00173ba 100644 --- a/libs/remote_storage/src/local_fs.rs +++ b/libs/remote_storage/src/local_fs.rs @@ -562,9 +562,9 @@ impl RemoteStorage for LocalFs { } } - async fn delete_objects<'a>( + async fn delete_objects( &self, - paths: &'a [RemotePath], + paths: &[RemotePath], cancel: &CancellationToken, ) -> anyhow::Result<()> { for path in paths { diff --git a/libs/remote_storage/src/s3_bucket.rs b/libs/remote_storage/src/s3_bucket.rs index 2891f92d07..d3f19f0b11 100644 --- a/libs/remote_storage/src/s3_bucket.rs +++ b/libs/remote_storage/src/s3_bucket.rs @@ -813,9 +813,9 @@ impl RemoteStorage for S3Bucket { .await } - async fn delete_objects<'a>( + async fn delete_objects( &self, - paths: &'a [RemotePath], + paths: &[RemotePath], cancel: &CancellationToken, ) -> anyhow::Result<()> { let kind = RequestKind::Delete; diff --git a/libs/remote_storage/src/simulate_failures.rs b/libs/remote_storage/src/simulate_failures.rs index 51833c1fe6..63c24beb51 100644 --- a/libs/remote_storage/src/simulate_failures.rs +++ b/libs/remote_storage/src/simulate_failures.rs @@ -181,9 +181,9 @@ impl RemoteStorage for UnreliableWrapper { self.delete_inner(path, true, cancel).await } - async fn delete_objects<'a>( + async fn delete_objects( &self, - paths: &'a [RemotePath], + paths: &[RemotePath], cancel: &CancellationToken, ) -> anyhow::Result<()> { self.attempt(RemoteOp::DeleteObjects(paths.to_vec()))?; diff --git a/pageserver/compaction/src/compact_tiered.rs b/pageserver/compaction/src/compact_tiered.rs index 20f88868f9..7779ffaf8b 100644 --- a/pageserver/compaction/src/compact_tiered.rs +++ b/pageserver/compaction/src/compact_tiered.rs @@ -272,7 +272,7 @@ struct CompactionJob { completed: bool, } -impl<'a, E> LevelCompactionState<'a, E> +impl LevelCompactionState<'_, E> where E: CompactionJobExecutor, { diff --git a/pageserver/compaction/src/identify_levels.rs b/pageserver/compaction/src/identify_levels.rs index 1853afffdd..e04bd15396 100644 --- a/pageserver/compaction/src/identify_levels.rs +++ b/pageserver/compaction/src/identify_levels.rs @@ -224,9 +224,8 @@ impl Level { } // recalculate depth if this was the last event at this point - let more_events_at_this_key = events_iter - .peek() - .map_or(false, |next_e| next_e.key == e.key); + let more_events_at_this_key = + events_iter.peek().is_some_and(|next_e| next_e.key == e.key); if !more_events_at_this_key { let mut active_depth = 0; for (_end_lsn, is_image, _idx) in active_set.iter().rev() { diff --git a/pageserver/compaction/src/interface.rs b/pageserver/compaction/src/interface.rs index 5bc9b5ca1d..8ed393a645 100644 --- a/pageserver/compaction/src/interface.rs +++ b/pageserver/compaction/src/interface.rs @@ -148,7 +148,7 @@ pub trait CompactionDeltaLayer: CompactionLay Self: 'a; /// Return all keys in this delta layer. - fn load_keys<'a>( + fn load_keys( &self, ctx: &E::RequestContext, ) -> impl Future>>> + Send; diff --git a/pageserver/compaction/src/simulator.rs b/pageserver/compaction/src/simulator.rs index 776c537d03..673b80c313 100644 --- a/pageserver/compaction/src/simulator.rs +++ b/pageserver/compaction/src/simulator.rs @@ -143,7 +143,7 @@ impl interface::CompactionLayer for Arc { impl interface::CompactionDeltaLayer for Arc { type DeltaEntry<'a> = MockRecord; - async fn load_keys<'a>(&self, _ctx: &MockRequestContext) -> anyhow::Result> { + async fn load_keys(&self, _ctx: &MockRequestContext) -> anyhow::Result> { Ok(self.records.clone()) } } diff --git a/pageserver/src/basebackup.rs b/pageserver/src/basebackup.rs index cae0ffb980..e1b5676f46 100644 --- a/pageserver/src/basebackup.rs +++ b/pageserver/src/basebackup.rs @@ -248,7 +248,7 @@ where } } -impl<'a, W> Basebackup<'a, W> +impl Basebackup<'_, W> where W: AsyncWrite + Send + Sync + Unpin, { diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index 3eaecd3a08..14c7e0d2f8 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -1242,7 +1242,7 @@ pub struct DatadirModification<'a> { pending_metadata_bytes: usize, } -impl<'a> DatadirModification<'a> { +impl DatadirModification<'_> { // When a DatadirModification is committed, we do a monolithic serialization of all its contents. WAL records can // contain multiple pages, so the pageserver's record-based batch size isn't sufficient to bound this allocation: we // additionally specify a limit on how much payload a DatadirModification may contain before it should be committed. @@ -1263,7 +1263,7 @@ impl<'a> DatadirModification<'a> { pub(crate) fn has_dirty_data(&self) -> bool { self.pending_data_batch .as_ref() - .map_or(false, |b| b.has_data()) + .is_some_and(|b| b.has_data()) } /// Set the current lsn @@ -2230,7 +2230,7 @@ impl<'a> DatadirModification<'a> { assert!(!self .pending_data_batch .as_ref() - .map_or(false, |b| b.updates_key(&key))); + .is_some_and(|b| b.updates_key(&key))); } } @@ -2299,7 +2299,7 @@ pub enum Version<'a> { Modified(&'a DatadirModification<'a>), } -impl<'a> Version<'a> { +impl Version<'_> { async fn get( &self, timeline: &Timeline, diff --git a/pageserver/src/tenant/blob_io.rs b/pageserver/src/tenant/blob_io.rs index dd70f6bbff..7b55df52a5 100644 --- a/pageserver/src/tenant/blob_io.rs +++ b/pageserver/src/tenant/blob_io.rs @@ -35,7 +35,7 @@ pub struct CompressionInfo { pub compressed_size: Option, } -impl<'a> BlockCursor<'a> { +impl BlockCursor<'_> { /// Read a blob into a new buffer. pub async fn read_blob( &self, diff --git a/pageserver/src/tenant/block_io.rs b/pageserver/src/tenant/block_io.rs index 2bd7f2d619..990211f80a 100644 --- a/pageserver/src/tenant/block_io.rs +++ b/pageserver/src/tenant/block_io.rs @@ -89,7 +89,7 @@ pub(crate) enum BlockReaderRef<'a> { VirtualFile(&'a VirtualFile), } -impl<'a> BlockReaderRef<'a> { +impl BlockReaderRef<'_> { #[inline(always)] async fn read_blk( &self, diff --git a/pageserver/src/tenant/disk_btree.rs b/pageserver/src/tenant/disk_btree.rs index b302cbc975..c77342b144 100644 --- a/pageserver/src/tenant/disk_btree.rs +++ b/pageserver/src/tenant/disk_btree.rs @@ -532,7 +532,7 @@ pub struct DiskBtreeIterator<'a> { >, } -impl<'a> DiskBtreeIterator<'a> { +impl DiskBtreeIterator<'_> { pub async fn next(&mut self) -> Option, u64), DiskBtreeError>> { self.stream.next().await } diff --git a/pageserver/src/tenant/ephemeral_file.rs b/pageserver/src/tenant/ephemeral_file.rs index aaec8a4c31..ba79672bc7 100644 --- a/pageserver/src/tenant/ephemeral_file.rs +++ b/pageserver/src/tenant/ephemeral_file.rs @@ -174,11 +174,11 @@ impl EphemeralFile { } impl super::storage_layer::inmemory_layer::vectored_dio_read::File for EphemeralFile { - async fn read_exact_at_eof_ok<'a, 'b, B: IoBufAlignedMut + Send>( - &'b self, + async fn read_exact_at_eof_ok( + &self, start: u64, dst: tokio_epoll_uring::Slice, - ctx: &'a RequestContext, + ctx: &RequestContext, ) -> std::io::Result<(tokio_epoll_uring::Slice, usize)> { let submitted_offset = self.buffered_writer.bytes_submitted(); diff --git a/pageserver/src/tenant/layer_map.rs b/pageserver/src/tenant/layer_map.rs index 7f15baed10..1b6924425c 100644 --- a/pageserver/src/tenant/layer_map.rs +++ b/pageserver/src/tenant/layer_map.rs @@ -392,8 +392,8 @@ impl LayerMap { image_layer: Option>, end_lsn: Lsn, ) -> Option { - assert!(delta_layer.as_ref().map_or(true, |l| l.is_delta())); - assert!(image_layer.as_ref().map_or(true, |l| !l.is_delta())); + assert!(delta_layer.as_ref().is_none_or(|l| l.is_delta())); + assert!(image_layer.as_ref().is_none_or(|l| !l.is_delta())); match (delta_layer, image_layer) { (None, None) => None, diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs index d15f161fb6..b4d45dca75 100644 --- a/pageserver/src/tenant/remote_timeline_client/download.rs +++ b/pageserver/src/tenant/remote_timeline_client/download.rs @@ -145,8 +145,8 @@ pub async fn download_layer_file<'a>( /// /// If Err() is returned, there was some error. The file at `dst_path` has been unlinked. /// The unlinking has _not_ been made durable. -async fn download_object<'a>( - storage: &'a GenericRemoteStorage, +async fn download_object( + storage: &GenericRemoteStorage, src_path: &RemotePath, dst_path: &Utf8PathBuf, #[cfg_attr(target_os = "macos", allow(unused_variables))] gate: &utils::sync::gate::Gate, diff --git a/pageserver/src/tenant/remote_timeline_client/upload.rs b/pageserver/src/tenant/remote_timeline_client/upload.rs index 0cd5d05aa2..e434d24e5f 100644 --- a/pageserver/src/tenant/remote_timeline_client/upload.rs +++ b/pageserver/src/tenant/remote_timeline_client/upload.rs @@ -25,8 +25,8 @@ use utils::id::{TenantId, TimelineId}; use tracing::info; /// Serializes and uploads the given index part data to the remote storage. -pub(crate) async fn upload_index_part<'a>( - storage: &'a GenericRemoteStorage, +pub(crate) async fn upload_index_part( + storage: &GenericRemoteStorage, tenant_shard_id: &TenantShardId, timeline_id: &TimelineId, generation: Generation, diff --git a/pageserver/src/tenant/storage_layer.rs b/pageserver/src/tenant/storage_layer.rs index 9e3a25cbbc..b8206fca5a 100644 --- a/pageserver/src/tenant/storage_layer.rs +++ b/pageserver/src/tenant/storage_layer.rs @@ -345,10 +345,7 @@ impl LayerFringe { } pub(crate) fn next_layer(&mut self) -> Option<(ReadableLayer, KeySpace, Range)> { - let read_desc = match self.planned_visits_by_lsn.pop() { - Some(desc) => desc, - None => return None, - }; + let read_desc = self.planned_visits_by_lsn.pop()?; let removed = self.visit_reads.remove_entry(&read_desc.layer_to_visit_id); diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs index fec8a0a16c..ade1b794c6 100644 --- a/pageserver/src/tenant/storage_layer/delta_layer.rs +++ b/pageserver/src/tenant/storage_layer/delta_layer.rs @@ -1486,7 +1486,7 @@ pub struct ValueRef<'a> { layer: &'a DeltaLayerInner, } -impl<'a> ValueRef<'a> { +impl ValueRef<'_> { /// Loads the value from disk pub async fn load(&self, ctx: &RequestContext) -> Result { let buf = self.load_raw(ctx).await?; @@ -1543,7 +1543,7 @@ pub struct DeltaLayerIterator<'a> { is_end: bool, } -impl<'a> DeltaLayerIterator<'a> { +impl DeltaLayerIterator<'_> { pub(crate) fn layer_dbg_info(&self) -> String { self.delta_layer.layer_dbg_info() } diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs index 834d1931d0..0d3c9d5a44 100644 --- a/pageserver/src/tenant/storage_layer/image_layer.rs +++ b/pageserver/src/tenant/storage_layer/image_layer.rs @@ -1052,7 +1052,7 @@ pub struct ImageLayerIterator<'a> { is_end: bool, } -impl<'a> ImageLayerIterator<'a> { +impl ImageLayerIterator<'_> { pub(crate) fn layer_dbg_info(&self) -> String { self.image_layer.layer_dbg_info() } diff --git a/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs b/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs index a4bb3a6bfc..1d86015fab 100644 --- a/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs +++ b/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs @@ -25,11 +25,11 @@ pub trait File: Send { /// [`std::io::ErrorKind::UnexpectedEof`] error if the file is shorter than `start+dst.len()`. /// /// No guarantees are made about the remaining bytes in `dst` in case of a short read. - async fn read_exact_at_eof_ok<'a, 'b, B: IoBufAlignedMut + Send>( - &'b self, + async fn read_exact_at_eof_ok( + &self, start: u64, dst: Slice, - ctx: &'a RequestContext, + ctx: &RequestContext, ) -> std::io::Result<(Slice, usize)>; } @@ -479,11 +479,11 @@ mod tests { } impl File for InMemoryFile { - async fn read_exact_at_eof_ok<'a, 'b, B: IoBufMut + Send>( - &'b self, + async fn read_exact_at_eof_ok( + &self, start: u64, mut dst: Slice, - _ctx: &'a RequestContext, + _ctx: &RequestContext, ) -> std::io::Result<(Slice, usize)> { let dst_slice: &mut [u8] = dst.as_mut_rust_slice_full_zeroed(); let nread = { @@ -609,12 +609,12 @@ mod tests { } } - impl<'x> File for RecorderFile<'x> { - async fn read_exact_at_eof_ok<'a, 'b, B: IoBufAlignedMut + Send>( - &'b self, + impl File for RecorderFile<'_> { + async fn read_exact_at_eof_ok( + &self, start: u64, dst: Slice, - ctx: &'a RequestContext, + ctx: &RequestContext, ) -> std::io::Result<(Slice, usize)> { let (dst, nread) = self.file.read_exact_at_eof_ok(start, dst, ctx).await?; self.recorded.borrow_mut().push(RecordedRead { @@ -740,11 +740,11 @@ mod tests { } impl File for MockFile { - async fn read_exact_at_eof_ok<'a, 'b, B: IoBufMut + Send>( - &'b self, + async fn read_exact_at_eof_ok( + &self, start: u64, mut dst: Slice, - _ctx: &'a RequestContext, + _ctx: &RequestContext, ) -> std::io::Result<(Slice, usize)> { let ExpectedRead { expect_pos, diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 0416953c1f..87f5a03382 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -5842,7 +5842,7 @@ enum OpenLayerAction { None, } -impl<'a> TimelineWriter<'a> { +impl TimelineWriter<'_> { async fn handle_open_layer_action( &mut self, at: Lsn, diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 5e6290729c..8b6cc8ed84 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -1110,7 +1110,7 @@ impl Timeline { return Err(CompactionError::ShuttingDown); } - let same_key = prev_key.map_or(false, |prev_key| prev_key == key); + let same_key = prev_key == Some(key); // We need to check key boundaries once we reach next key or end of layer with the same key if !same_key || lsn == dup_end_lsn { let mut next_key_size = 0u64; @@ -2904,7 +2904,7 @@ impl CompactionLayer for ResidentDeltaLayer { impl CompactionDeltaLayer for ResidentDeltaLayer { type DeltaEntry<'a> = DeltaEntry<'a>; - async fn load_keys<'a>(&self, ctx: &RequestContext) -> anyhow::Result>> { + async fn load_keys(&self, ctx: &RequestContext) -> anyhow::Result>> { self.0.get_as_delta(ctx).await?.index_entries(ctx).await } } diff --git a/safekeeper/src/receive_wal.rs b/safekeeper/src/receive_wal.rs index 08371177cd..3e9ce1da8e 100644 --- a/safekeeper/src/receive_wal.rs +++ b/safekeeper/src/receive_wal.rs @@ -318,7 +318,7 @@ struct NetworkReader<'a, IO> { global_timelines: Arc, } -impl<'a, IO: AsyncRead + AsyncWrite + Unpin> NetworkReader<'a, IO> { +impl NetworkReader<'_, IO> { async fn read_first_message( &mut self, ) -> Result<(WalResidentTimeline, ProposerAcceptorMessage), CopyStreamHandlerEnd> { diff --git a/safekeeper/src/safekeeper.rs b/safekeeper/src/safekeeper.rs index ccd7940c72..6ceaf325b0 100644 --- a/safekeeper/src/safekeeper.rs +++ b/safekeeper/src/safekeeper.rs @@ -125,10 +125,7 @@ impl TermHistory { ); last_common_idx = Some(i); } - let last_common_idx = match last_common_idx { - None => return None, // no common point - Some(lci) => lci, - }; + let last_common_idx = last_common_idx?; // Now find where it ends at both prop and sk and take min. End of // (common) term is the start of the next except it is the last one; // there it is flush_lsn in case of safekeeper or, in case of proposer diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 746177c089..a89e4741f6 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -6873,10 +6873,7 @@ impl Service { let mut plan = Vec::new(); for (node_id, attached) in nodes_by_load { - let available = locked - .nodes - .get(&node_id) - .map_or(false, |n| n.is_available()); + let available = locked.nodes.get(&node_id).is_some_and(|n| n.is_available()); if !available { continue; } From 3d30a7a9348efbb0d1faca6d44dccc1885c5b8c9 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Mon, 16 Dec 2024 16:54:47 +0100 Subject: [PATCH 33/49] pageserver: make `RemoteTimelineClient::schedule_index_upload` infallible (#10155) Remove an unnecessary `Result` and address a `FIXME`. --- .../src/tenant/remote_timeline_client.rs | 39 ++++++++----------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 20e0536a00..fee11bc742 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -749,7 +749,7 @@ impl RemoteTimelineClient { // ahead of what's _actually_ on the remote during index upload. upload_queue.dirty.metadata = metadata.clone(); - self.schedule_index_upload(upload_queue)?; + self.schedule_index_upload(upload_queue); Ok(()) } @@ -770,7 +770,7 @@ impl RemoteTimelineClient { upload_queue.dirty.metadata.apply(update); - self.schedule_index_upload(upload_queue)?; + self.schedule_index_upload(upload_queue); Ok(()) } @@ -809,7 +809,7 @@ impl RemoteTimelineClient { if let Some(archived_at_set) = need_upload_scheduled { let intended_archived_at = archived_at_set.then(|| Utc::now().naive_utc()); upload_queue.dirty.archived_at = intended_archived_at; - self.schedule_index_upload(upload_queue)?; + self.schedule_index_upload(upload_queue); } let need_wait = need_change(&upload_queue.clean.0.archived_at, state).is_some(); @@ -824,7 +824,7 @@ impl RemoteTimelineClient { let mut guard = self.upload_queue.lock().unwrap(); let upload_queue = guard.initialized_mut()?; upload_queue.dirty.import_pgdata = state; - self.schedule_index_upload(upload_queue)?; + self.schedule_index_upload(upload_queue); Ok(()) } @@ -843,17 +843,14 @@ impl RemoteTimelineClient { let upload_queue = guard.initialized_mut()?; if upload_queue.latest_files_changes_since_metadata_upload_scheduled > 0 { - self.schedule_index_upload(upload_queue)?; + self.schedule_index_upload(upload_queue); } Ok(()) } /// Launch an index-file upload operation in the background (internal function) - fn schedule_index_upload( - self: &Arc, - upload_queue: &mut UploadQueueInitialized, - ) -> Result<(), NotInitialized> { + fn schedule_index_upload(self: &Arc, upload_queue: &mut UploadQueueInitialized) { let disk_consistent_lsn = upload_queue.dirty.metadata.disk_consistent_lsn(); // fix up the duplicated field upload_queue.dirty.disk_consistent_lsn = disk_consistent_lsn; @@ -880,7 +877,6 @@ impl RemoteTimelineClient { // Launch the task immediately, if possible self.launch_queued_tasks(upload_queue); - Ok(()) } /// Reparent this timeline to a new parent. @@ -909,7 +905,7 @@ impl RemoteTimelineClient { upload_queue.dirty.metadata.reparent(new_parent); upload_queue.dirty.lineage.record_previous_ancestor(&prev); - self.schedule_index_upload(upload_queue)?; + self.schedule_index_upload(upload_queue); Some(self.schedule_barrier0(upload_queue)) } @@ -948,7 +944,7 @@ impl RemoteTimelineClient { assert!(prev.is_none(), "copied layer existed already {layer}"); } - self.schedule_index_upload(upload_queue)?; + self.schedule_index_upload(upload_queue); Some(self.schedule_barrier0(upload_queue)) } @@ -1004,7 +1000,7 @@ impl RemoteTimelineClient { upload_queue.dirty.gc_blocking = current .map(|x| x.with_reason(reason)) .or_else(|| Some(index::GcBlocking::started_now_for(reason))); - self.schedule_index_upload(upload_queue)?; + self.schedule_index_upload(upload_queue); Some(self.schedule_barrier0(upload_queue)) } } @@ -1057,8 +1053,7 @@ impl RemoteTimelineClient { upload_queue.dirty.gc_blocking = current.as_ref().and_then(|x| x.without_reason(reason)); assert!(wanted(upload_queue.dirty.gc_blocking.as_ref())); - // FIXME: bogus ? - self.schedule_index_upload(upload_queue)?; + self.schedule_index_upload(upload_queue); Some(self.schedule_barrier0(upload_queue)) } } @@ -1125,8 +1120,8 @@ impl RemoteTimelineClient { let mut guard = self.upload_queue.lock().unwrap(); let upload_queue = guard.initialized_mut()?; - let with_metadata = self - .schedule_unlinking_of_layers_from_index_part0(upload_queue, names.iter().cloned())?; + let with_metadata = + self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names.iter().cloned()); self.schedule_deletion_of_unlinked0(upload_queue, with_metadata); @@ -1153,7 +1148,7 @@ impl RemoteTimelineClient { let names = gc_layers.iter().map(|x| x.layer_desc().layer_name()); - self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names)?; + self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names); self.launch_queued_tasks(upload_queue); @@ -1166,7 +1161,7 @@ impl RemoteTimelineClient { self: &Arc, upload_queue: &mut UploadQueueInitialized, names: I, - ) -> Result, NotInitialized> + ) -> Vec<(LayerName, LayerFileMetadata)> where I: IntoIterator, { @@ -1208,10 +1203,10 @@ impl RemoteTimelineClient { // index_part update, because that needs to be uploaded before we can actually delete the // files. if upload_queue.latest_files_changes_since_metadata_upload_scheduled > 0 { - self.schedule_index_upload(upload_queue)?; + self.schedule_index_upload(upload_queue); } - Ok(with_metadata) + with_metadata } /// Schedules deletion for layer files which have previously been unlinked from the @@ -1302,7 +1297,7 @@ impl RemoteTimelineClient { let names = compacted_from.iter().map(|x| x.layer_desc().layer_name()); - self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names)?; + self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names); self.launch_queued_tasks(upload_queue); Ok(()) From 2e4c9c570491b6747e65bdeba64dc55f0dec2ea3 Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Mon, 16 Dec 2024 16:11:39 +0000 Subject: [PATCH 34/49] chore(proxy): remove allow_self_signed from regular proxy (#10157) I noticed that the only place we use this flag is for testing console redirect proxy. Makes sense to me to make this assumption more explicit. --- proxy/src/proxy/mod.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/proxy/src/proxy/mod.rs b/proxy/src/proxy/mod.rs index de0ec0f799..5db92d748a 100644 --- a/proxy/src/proxy/mod.rs +++ b/proxy/src/proxy/mod.rs @@ -191,13 +191,6 @@ impl ClientMode { } } - pub(crate) fn allow_self_signed_compute(&self, config: &ProxyConfig) -> bool { - match self { - ClientMode::Tcp => config.allow_self_signed_compute, - ClientMode::Websockets { .. } => false, - } - } - fn hostname<'a, S>(&'a self, s: &'a Stream) -> Option<&'a str> { match self { ClientMode::Tcp => s.sni_hostname(), @@ -355,7 +348,8 @@ pub(crate) async fn handle_client( params_compat, params: ¶ms, locks: &config.connect_compute_locks, - allow_self_signed_compute: mode.allow_self_signed_compute(config), + // only used for console redirect testing. + allow_self_signed_compute: false, }, &user_info, config.wake_compute_retry_config, From 59b7ff89881ac218f35f545ce38f556d08af36dd Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Mon, 16 Dec 2024 16:37:15 +0000 Subject: [PATCH 35/49] chore(proxy): disallow unwrap and unimplemented (#10142) As the title says, I updated the lint rules to no longer allow unwrap or unimplemented. Three special cases: * Tests are allowed to use them * std::sync::Mutex lock().unwrap() is common because it's usually correct to continue panicking on poison * `tokio::spawn_blocking(...).await.unwrap()` is common because it will only error if the blocking fn panics, so continuing the panic is also correct I've introduced two extension traits to help with these last two, that are a bit more explicit so they don't need an expect message every time. --- proxy/src/auth/backend/jwt.rs | 1 + proxy/src/auth/backend/mod.rs | 2 + proxy/src/auth/credentials.rs | 1 + proxy/src/cache/endpoints.rs | 4 +- proxy/src/cache/project_info.rs | 1 + proxy/src/cancellation.rs | 4 +- proxy/src/config.rs | 13 ++---- proxy/src/context/parquet.rs | 14 ++++-- proxy/src/control_plane/client/mock.rs | 4 +- proxy/src/ext.rs | 41 +++++++++++++++++ proxy/src/http/health_server.rs | 7 +-- proxy/src/intern.rs | 3 +- proxy/src/lib.rs | 5 ++- proxy/src/logging.rs | 12 ++++- proxy/src/parse.rs | 15 ------- proxy/src/protocol2.rs | 1 + proxy/src/proxy/copy_bidirectional.rs | 1 + proxy/src/proxy/mod.rs | 2 +- proxy/src/proxy/tests/mod.rs | 1 + proxy/src/rate_limiter/leaky_bucket.rs | 2 +- .../src/rate_limiter/limit_algorithm/aimd.rs | 1 + proxy/src/rate_limiter/limiter.rs | 4 +- .../connection_with_credentials_provider.rs | 6 ++- proxy/src/sasl/messages.rs | 5 ++- proxy/src/scram/messages.rs | 1 + proxy/src/scram/mod.rs | 1 + proxy/src/scram/secret.rs | 1 + proxy/src/scram/threadpool.rs | 16 +++---- proxy/src/serverless/backend.rs | 6 ++- proxy/src/serverless/conn_pool.rs | 34 ++++++-------- proxy/src/serverless/conn_pool_lib.rs | 45 +++++++++---------- proxy/src/serverless/http_util.rs | 11 +++-- proxy/src/serverless/json.rs | 6 ++- proxy/src/serverless/local_conn_pool.rs | 11 ++--- proxy/src/serverless/mod.rs | 5 ++- proxy/src/serverless/sql_over_http.rs | 1 + proxy/src/serverless/websocket.rs | 1 + proxy/src/url.rs | 1 + proxy/src/usage_metrics.rs | 1 + 39 files changed, 178 insertions(+), 113 deletions(-) create mode 100644 proxy/src/ext.rs diff --git a/proxy/src/auth/backend/jwt.rs b/proxy/src/auth/backend/jwt.rs index a258090b15..df716f8455 100644 --- a/proxy/src/auth/backend/jwt.rs +++ b/proxy/src/auth/backend/jwt.rs @@ -776,6 +776,7 @@ impl From<&jose_jwk::Key> for KeyType { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use std::future::IntoFuture; use std::net::SocketAddr; diff --git a/proxy/src/auth/backend/mod.rs b/proxy/src/auth/backend/mod.rs index f38ecf715f..50cb94bfa0 100644 --- a/proxy/src/auth/backend/mod.rs +++ b/proxy/src/auth/backend/mod.rs @@ -463,6 +463,8 @@ impl ComputeConnectBackend for Backend<'_, ComputeCredentials> { #[cfg(test)] mod tests { + #![allow(clippy::unimplemented, clippy::unwrap_used)] + use std::net::IpAddr; use std::sync::Arc; use std::time::Duration; diff --git a/proxy/src/auth/credentials.rs b/proxy/src/auth/credentials.rs index f6bce9f2d8..eff49a402a 100644 --- a/proxy/src/auth/credentials.rs +++ b/proxy/src/auth/credentials.rs @@ -250,6 +250,7 @@ fn project_name_valid(name: &str) -> bool { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use serde_json::json; use ComputeUserInfoParseError::*; diff --git a/proxy/src/cache/endpoints.rs b/proxy/src/cache/endpoints.rs index 20db1fbb14..0136446d6d 100644 --- a/proxy/src/cache/endpoints.rs +++ b/proxy/src/cache/endpoints.rs @@ -12,6 +12,7 @@ use tracing::info; use crate::config::EndpointCacheConfig; use crate::context::RequestContext; +use crate::ext::LockExt; use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt}; use crate::metrics::{Metrics, RedisErrors, RedisEventsCount}; use crate::rate_limiter::GlobalRateLimiter; @@ -96,7 +97,7 @@ impl EndpointsCache { // If the limiter allows, we can pretend like it's valid // (incase it is, due to redis channel lag). - if self.limiter.lock().unwrap().check() { + if self.limiter.lock_propagate_poison().check() { return true; } @@ -258,6 +259,7 @@ impl EndpointsCache { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use super::*; diff --git a/proxy/src/cache/project_info.rs b/proxy/src/cache/project_info.rs index 84430dc812..cab0b8b905 100644 --- a/proxy/src/cache/project_info.rs +++ b/proxy/src/cache/project_info.rs @@ -365,6 +365,7 @@ impl Cache for ProjectInfoCacheImpl { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use super::*; use crate::scram::ServerSecret; diff --git a/proxy/src/cancellation.rs b/proxy/src/cancellation.rs index ed717507ee..dd3edd6abc 100644 --- a/proxy/src/cancellation.rs +++ b/proxy/src/cancellation.rs @@ -13,6 +13,7 @@ use uuid::Uuid; use crate::auth::{check_peer_addr_is_in_list, IpPattern}; use crate::error::ReportableError; +use crate::ext::LockExt; use crate::metrics::{CancellationRequest, CancellationSource, Metrics}; use crate::rate_limiter::LeakyBucketRateLimiter; use crate::redis::cancellation_publisher::{ @@ -114,7 +115,7 @@ impl CancellationHandler

{ IpAddr::V4(ip) => IpNet::V4(Ipv4Net::new_assert(ip, 24).trunc()), // use defaut mask here IpAddr::V6(ip) => IpNet::V6(Ipv6Net::new_assert(ip, 64).trunc()), }; - if !self.limiter.lock().unwrap().check(subnet_key, 1) { + if !self.limiter.lock_propagate_poison().check(subnet_key, 1) { // log only the subnet part of the IP address to know which subnet is rate limited tracing::warn!("Rate limit exceeded. Skipping cancellation message, {subnet_key}"); Metrics::get() @@ -283,6 +284,7 @@ impl

Drop for Session

{ } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use super::*; diff --git a/proxy/src/config.rs b/proxy/src/config.rs index 8bc8e3f96f..1f991d595e 100644 --- a/proxy/src/config.rs +++ b/proxy/src/config.rs @@ -221,15 +221,10 @@ impl CertResolver { ) -> anyhow::Result<()> { let priv_key = { let key_bytes = std::fs::read(key_path) - .context(format!("Failed to read TLS keys at '{key_path}'"))?; - let mut keys = rustls_pemfile::pkcs8_private_keys(&mut &key_bytes[..]).collect_vec(); - - ensure!(keys.len() == 1, "keys.len() = {} (should be 1)", keys.len()); - PrivateKeyDer::Pkcs8( - keys.pop() - .unwrap() - .context(format!("Failed to parse TLS keys at '{key_path}'"))?, - ) + .with_context(|| format!("Failed to read TLS keys at '{key_path}'"))?; + rustls_pemfile::private_key(&mut &key_bytes[..]) + .with_context(|| format!("Failed to parse TLS keys at '{key_path}'"))? + .with_context(|| format!("Failed to parse TLS keys at '{key_path}'"))? }; let cert_chain_bytes = std::fs::read(cert_path) diff --git a/proxy/src/context/parquet.rs b/proxy/src/context/parquet.rs index 3105d08526..5f65b17374 100644 --- a/proxy/src/context/parquet.rs +++ b/proxy/src/context/parquet.rs @@ -23,6 +23,7 @@ use utils::backoff; use super::{RequestContextInner, LOG_CHAN}; use crate::config::remote_storage_from_toml; use crate::context::LOG_CHAN_DISCONNECT; +use crate::ext::TaskExt; #[derive(clap::Args, Clone, Debug)] pub struct ParquetUploadArgs { @@ -171,7 +172,9 @@ pub async fn worker( }; let (tx, mut rx) = mpsc::unbounded_channel(); - LOG_CHAN.set(tx.downgrade()).unwrap(); + LOG_CHAN + .set(tx.downgrade()) + .expect("only one worker should set the channel"); // setup row stream that will close on cancellation let cancellation_token2 = cancellation_token.clone(); @@ -207,7 +210,9 @@ pub async fn worker( config.parquet_upload_disconnect_events_remote_storage { let (tx_disconnect, mut rx_disconnect) = mpsc::unbounded_channel(); - LOG_CHAN_DISCONNECT.set(tx_disconnect.downgrade()).unwrap(); + LOG_CHAN_DISCONNECT + .set(tx_disconnect.downgrade()) + .expect("only one worker should set the channel"); // setup row stream that will close on cancellation tokio::spawn(async move { @@ -326,7 +331,7 @@ where Ok::<_, parquet::errors::ParquetError>((rows, w, rg_meta)) }) .await - .unwrap()?; + .propagate_task_panic()?; rows.clear(); Ok((rows, w, rg_meta)) @@ -352,7 +357,7 @@ async fn upload_parquet( Ok((buffer, metadata)) }) .await - .unwrap()?; + .propagate_task_panic()?; let data = buffer.split().freeze(); @@ -409,6 +414,7 @@ async fn upload_parquet( } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use std::net::Ipv4Addr; use std::num::NonZeroUsize; diff --git a/proxy/src/control_plane/client/mock.rs b/proxy/src/control_plane/client/mock.rs index 93edd65476..5f8bda0f35 100644 --- a/proxy/src/control_plane/client/mock.rs +++ b/proxy/src/control_plane/client/mock.rs @@ -102,7 +102,9 @@ impl MockControlPlane { Some(s) => { info!("got allowed_ips: {s}"); s.split(',') - .map(|s| IpPattern::from_str(s).unwrap()) + .map(|s| { + IpPattern::from_str(s).expect("mocked ip pattern should be correct") + }) .collect() } None => vec![], diff --git a/proxy/src/ext.rs b/proxy/src/ext.rs new file mode 100644 index 0000000000..8d00afbf51 --- /dev/null +++ b/proxy/src/ext.rs @@ -0,0 +1,41 @@ +use std::panic::resume_unwind; +use std::sync::{Mutex, MutexGuard}; + +use tokio::task::JoinError; + +pub(crate) trait LockExt { + fn lock_propagate_poison(&self) -> MutexGuard<'_, T>; +} + +impl LockExt for Mutex { + /// Lock the mutex and panic if the mutex was poisoned. + #[track_caller] + fn lock_propagate_poison(&self) -> MutexGuard<'_, T> { + match self.lock() { + Ok(guard) => guard, + // poison occurs when another thread panicked while holding the lock guard. + // since panicking is often unrecoverable, propagating the poison panic is reasonable. + Err(poison) => panic!("{poison}"), + } + } +} + +pub(crate) trait TaskExt { + fn propagate_task_panic(self) -> T; +} + +impl TaskExt for Result { + /// Unwrap the result and panic if the inner task panicked. + /// Also panics if the task was cancelled + #[track_caller] + fn propagate_task_panic(self) -> T { + match self { + Ok(t) => t, + // Using resume_unwind prevents the panic hook being called twice. + // Since we use this for structured concurrency, there is only + // 1 logical panic, so this is more correct. + Err(e) if e.is_panic() => resume_unwind(e.into_panic()), + Err(e) => panic!("unexpected task error: {e}"), + } + } +} diff --git a/proxy/src/http/health_server.rs b/proxy/src/http/health_server.rs index 978ad9f761..6ca091feb7 100644 --- a/proxy/src/http/health_server.rs +++ b/proxy/src/http/health_server.rs @@ -14,6 +14,7 @@ use utils::http::error::ApiError; use utils::http::json::json_response; use utils::http::{RouterBuilder, RouterService}; +use crate::ext::{LockExt, TaskExt}; use crate::jemalloc; async fn status_handler(_: Request) -> Result, ApiError> { @@ -76,7 +77,7 @@ async fn prometheus_metrics_handler( let body = tokio::task::spawn_blocking(move || { let _span = span.entered(); - let mut state = state.lock().unwrap(); + let mut state = state.lock_propagate_poison(); let PrometheusHandler { encoder, metrics } = &mut *state; metrics @@ -94,13 +95,13 @@ async fn prometheus_metrics_handler( body }) .await - .unwrap(); + .propagate_task_panic(); let response = Response::builder() .status(200) .header(CONTENT_TYPE, "text/plain; version=0.0.4") .body(Body::from(body)) - .unwrap(); + .expect("response headers should be valid"); Ok(response) } diff --git a/proxy/src/intern.rs b/proxy/src/intern.rs index f56d92a6b3..79c6020302 100644 --- a/proxy/src/intern.rs +++ b/proxy/src/intern.rs @@ -83,7 +83,7 @@ impl StringInterner { pub(crate) fn new() -> Self { StringInterner { inner: ThreadedRodeo::with_capacity_memory_limits_and_hasher( - Capacity::new(2500, NonZeroUsize::new(1 << 16).unwrap()), + Capacity::new(2500, NonZeroUsize::new(1 << 16).expect("value is nonzero")), // unbounded MemoryLimits::for_memory_usage(usize::MAX), BuildHasherDefault::::default(), @@ -207,6 +207,7 @@ impl From for ProjectIdInt { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use std::sync::OnceLock; diff --git a/proxy/src/lib.rs b/proxy/src/lib.rs index ba69f9cf2d..a5a72f26d9 100644 --- a/proxy/src/lib.rs +++ b/proxy/src/lib.rs @@ -22,8 +22,8 @@ clippy::string_add, clippy::string_to_string, clippy::todo, - // TODO: consider clippy::unimplemented - // TODO: consider clippy::unwrap_used + clippy::unimplemented, + clippy::unwrap_used, )] // List of permanently allowed lints. #![allow( @@ -82,6 +82,7 @@ pub mod console_redirect_proxy; pub mod context; pub mod control_plane; pub mod error; +mod ext; pub mod http; pub mod intern; pub mod jemalloc; diff --git a/proxy/src/logging.rs b/proxy/src/logging.rs index 74d2b9a1d0..41f10f052f 100644 --- a/proxy/src/logging.rs +++ b/proxy/src/logging.rs @@ -18,8 +18,16 @@ pub async fn init() -> anyhow::Result { let env_filter = EnvFilter::builder() .with_default_directive(LevelFilter::INFO.into()) .from_env_lossy() - .add_directive("aws_config=info".parse().unwrap()) - .add_directive("azure_core::policies::transport=off".parse().unwrap()); + .add_directive( + "aws_config=info" + .parse() + .expect("this should be a valid filter directive"), + ) + .add_directive( + "azure_core::policies::transport=off" + .parse() + .expect("this should be a valid filter directive"), + ); let fmt_layer = tracing_subscriber::fmt::layer() .with_ansi(false) diff --git a/proxy/src/parse.rs b/proxy/src/parse.rs index 8c0f251066..095d6278cc 100644 --- a/proxy/src/parse.rs +++ b/proxy/src/parse.rs @@ -8,14 +8,6 @@ pub(crate) fn split_cstr(bytes: &[u8]) -> Option<(&CStr, &[u8])> { Some((cstr, other)) } -/// See . -pub(crate) fn split_at_const(bytes: &[u8]) -> Option<(&[u8; N], &[u8])> { - (bytes.len() >= N).then(|| { - let (head, tail) = bytes.split_at(N); - (head.try_into().unwrap(), tail) - }) -} - #[cfg(test)] mod tests { use super::*; @@ -33,11 +25,4 @@ mod tests { assert_eq!(cstr.to_bytes(), b"foo"); assert_eq!(rest, b"bar"); } - - #[test] - fn test_split_at_const() { - assert!(split_at_const::<0>(b"").is_some()); - assert!(split_at_const::<1>(b"").is_none()); - assert!(matches!(split_at_const::<1>(b"ok"), Some((b"o", b"k")))); - } } diff --git a/proxy/src/protocol2.rs b/proxy/src/protocol2.rs index 33a5eb5e1e..0dc97b7097 100644 --- a/proxy/src/protocol2.rs +++ b/proxy/src/protocol2.rs @@ -396,6 +396,7 @@ impl NetworkEndianIpv6 { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use tokio::io::AsyncReadExt; diff --git a/proxy/src/proxy/copy_bidirectional.rs b/proxy/src/proxy/copy_bidirectional.rs index 4e4af88634..3336a9556a 100644 --- a/proxy/src/proxy/copy_bidirectional.rs +++ b/proxy/src/proxy/copy_bidirectional.rs @@ -257,6 +257,7 @@ impl CopyBuffer { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use tokio::io::AsyncWriteExt; diff --git a/proxy/src/proxy/mod.rs b/proxy/src/proxy/mod.rs index 5db92d748a..4e5ecda237 100644 --- a/proxy/src/proxy/mod.rs +++ b/proxy/src/proxy/mod.rs @@ -488,7 +488,7 @@ impl NeonOptions { pub(crate) fn neon_option(bytes: &str) -> Option<(&str, &str)> { static RE: OnceCell = OnceCell::new(); - let re = RE.get_or_init(|| Regex::new(r"^neon_(\w+):(.+)").unwrap()); + let re = RE.get_or_init(|| Regex::new(r"^neon_(\w+):(.+)").expect("regex should be correct")); let cap = re.captures(bytes)?; let (_, [k, v]) = cap.extract(); diff --git a/proxy/src/proxy/tests/mod.rs b/proxy/src/proxy/tests/mod.rs index 3899ba4267..95c518fed9 100644 --- a/proxy/src/proxy/tests/mod.rs +++ b/proxy/src/proxy/tests/mod.rs @@ -1,4 +1,5 @@ //! A group of high-level tests for connection establishing logic and auth. +#![allow(clippy::unimplemented, clippy::unwrap_used)] mod mitm; diff --git a/proxy/src/rate_limiter/leaky_bucket.rs b/proxy/src/rate_limiter/leaky_bucket.rs index 45f9630dde..bff800f0a2 100644 --- a/proxy/src/rate_limiter/leaky_bucket.rs +++ b/proxy/src/rate_limiter/leaky_bucket.rs @@ -83,7 +83,7 @@ impl From for utils::leaky_bucket::LeakyBucketConfig { } #[cfg(test)] -#[allow(clippy::float_cmp)] +#[allow(clippy::float_cmp, clippy::unwrap_used)] mod tests { use std::time::Duration; diff --git a/proxy/src/rate_limiter/limit_algorithm/aimd.rs b/proxy/src/rate_limiter/limit_algorithm/aimd.rs index 3000cc4c2a..04e136b6d5 100644 --- a/proxy/src/rate_limiter/limit_algorithm/aimd.rs +++ b/proxy/src/rate_limiter/limit_algorithm/aimd.rs @@ -63,6 +63,7 @@ impl LimitAlgorithm for Aimd { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use std::time::Duration; diff --git a/proxy/src/rate_limiter/limiter.rs b/proxy/src/rate_limiter/limiter.rs index a048721e77..6f6a8c9d47 100644 --- a/proxy/src/rate_limiter/limiter.rs +++ b/proxy/src/rate_limiter/limiter.rs @@ -12,6 +12,7 @@ use rand::{Rng, SeedableRng}; use tokio::time::{Duration, Instant}; use tracing::info; +use crate::ext::LockExt; use crate::intern::EndpointIdInt; pub struct GlobalRateLimiter { @@ -246,12 +247,13 @@ impl BucketRateLimiter { let n = self.map.shards().len(); // this lock is ok as the periodic cycle of do_gc makes this very unlikely to collide // (impossible, infact, unless we have 2048 threads) - let shard = self.rand.lock().unwrap().gen_range(0..n); + let shard = self.rand.lock_propagate_poison().gen_range(0..n); self.map.shards()[shard].write().clear(); } } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use std::hash::BuildHasherDefault; use std::time::Duration; diff --git a/proxy/src/redis/connection_with_credentials_provider.rs b/proxy/src/redis/connection_with_credentials_provider.rs index 82139ea1d5..0f6e765b02 100644 --- a/proxy/src/redis/connection_with_credentials_provider.rs +++ b/proxy/src/redis/connection_with_credentials_provider.rs @@ -69,7 +69,11 @@ impl ConnectionWithCredentialsProvider { pub fn new_with_static_credentials(params: T) -> Self { Self { - credentials: Credentials::Static(params.into_connection_info().unwrap()), + credentials: Credentials::Static( + params + .into_connection_info() + .expect("static configured redis credentials should be a valid format"), + ), con: None, refresh_token_task: None, mutex: tokio::sync::Mutex::new(()), diff --git a/proxy/src/sasl/messages.rs b/proxy/src/sasl/messages.rs index 1373dfba3d..4922ece615 100644 --- a/proxy/src/sasl/messages.rs +++ b/proxy/src/sasl/messages.rs @@ -2,7 +2,7 @@ use pq_proto::{BeAuthenticationSaslMessage, BeMessage}; -use crate::parse::{split_at_const, split_cstr}; +use crate::parse::split_cstr; /// SASL-specific payload of [`PasswordMessage`](pq_proto::FeMessage::PasswordMessage). #[derive(Debug)] @@ -19,7 +19,7 @@ impl<'a> FirstMessage<'a> { let (method_cstr, tail) = split_cstr(bytes)?; let method = method_cstr.to_str().ok()?; - let (len_bytes, bytes) = split_at_const(tail)?; + let (len_bytes, bytes) = tail.split_first_chunk()?; let len = u32::from_be_bytes(*len_bytes) as usize; if len != bytes.len() { return None; @@ -51,6 +51,7 @@ impl<'a> ServerMessage<&'a str> { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use super::*; diff --git a/proxy/src/scram/messages.rs b/proxy/src/scram/messages.rs index 5ee3a51352..0e54e7ded9 100644 --- a/proxy/src/scram/messages.rs +++ b/proxy/src/scram/messages.rs @@ -185,6 +185,7 @@ impl fmt::Debug for OwnedServerFirstMessage { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use super::*; diff --git a/proxy/src/scram/mod.rs b/proxy/src/scram/mod.rs index 718445f61d..b49a9f32ee 100644 --- a/proxy/src/scram/mod.rs +++ b/proxy/src/scram/mod.rs @@ -57,6 +57,7 @@ fn sha256<'a>(parts: impl IntoIterator) -> [u8; 32] { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use super::threadpool::ThreadPool; use super::{Exchange, ServerSecret}; diff --git a/proxy/src/scram/secret.rs b/proxy/src/scram/secret.rs index 8c6a08d432..eb21b26ab4 100644 --- a/proxy/src/scram/secret.rs +++ b/proxy/src/scram/secret.rs @@ -72,6 +72,7 @@ impl ServerSecret { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use super::*; diff --git a/proxy/src/scram/threadpool.rs b/proxy/src/scram/threadpool.rs index ebc6dd2a3c..8f1684c75b 100644 --- a/proxy/src/scram/threadpool.rs +++ b/proxy/src/scram/threadpool.rs @@ -33,14 +33,11 @@ thread_local! { } impl ThreadPool { - pub fn new(n_workers: u8) -> Arc { + pub fn new(mut n_workers: u8) -> Arc { // rayon would be nice here, but yielding in rayon does not work well afaict. if n_workers == 0 { - return Arc::new(Self { - runtime: None, - metrics: Arc::new(ThreadPoolMetrics::new(n_workers as usize)), - }); + n_workers = 1; } Arc::new_cyclic(|pool| { @@ -66,7 +63,7 @@ impl ThreadPool { }); }) .build() - .unwrap(); + .expect("password threadpool runtime should be configured correctly"); Self { runtime: Some(runtime), @@ -79,7 +76,7 @@ impl ThreadPool { JobHandle( self.runtime .as_ref() - .unwrap() + .expect("runtime is always set") .spawn(JobSpec { pbkdf2, endpoint }), ) } @@ -87,7 +84,10 @@ impl ThreadPool { impl Drop for ThreadPool { fn drop(&mut self) { - self.runtime.take().unwrap().shutdown_background(); + self.runtime + .take() + .expect("runtime is always set") + .shutdown_background(); } } diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs index 15d883bdb0..449d50b6e7 100644 --- a/proxy/src/serverless/backend.rs +++ b/proxy/src/serverless/backend.rs @@ -268,7 +268,11 @@ impl PoolingBackend { if !self.local_pool.initialized(&conn_info) { // only install and grant usage one at a time. - let _permit = local_backend.initialize.acquire().await.unwrap(); + let _permit = local_backend + .initialize + .acquire() + .await + .expect("semaphore should never be closed"); // check again for race if !self.local_pool.initialized(&conn_info) { diff --git a/proxy/src/serverless/conn_pool.rs b/proxy/src/serverless/conn_pool.rs index cac5a173cb..447103edce 100644 --- a/proxy/src/serverless/conn_pool.rs +++ b/proxy/src/serverless/conn_pool.rs @@ -186,8 +186,8 @@ impl ClientDataRemote { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { - use std::mem; use std::sync::atomic::AtomicBool; use super::*; @@ -269,39 +269,33 @@ mod tests { assert_eq!(0, pool.get_global_connections_count()); } { - let mut client = Client::new(create_inner(), conn_info.clone(), ep_pool.clone()); - client.do_drop().unwrap()(); - mem::forget(client); // drop the client + let client = Client::new(create_inner(), conn_info.clone(), ep_pool.clone()); + drop(client); assert_eq!(1, pool.get_global_connections_count()); } { - let mut closed_client = Client::new( + let closed_client = Client::new( create_inner_with(MockClient::new(true)), conn_info.clone(), ep_pool.clone(), ); - closed_client.do_drop().unwrap()(); - mem::forget(closed_client); // drop the client - // The closed client shouldn't be added to the pool. + drop(closed_client); assert_eq!(1, pool.get_global_connections_count()); } let is_closed: Arc = Arc::new(false.into()); { - let mut client = Client::new( + let client = Client::new( create_inner_with(MockClient(is_closed.clone())), conn_info.clone(), ep_pool.clone(), ); - client.do_drop().unwrap()(); - mem::forget(client); // drop the client - + drop(client); // The client should be added to the pool. assert_eq!(2, pool.get_global_connections_count()); } { - let mut client = Client::new(create_inner(), conn_info, ep_pool); - client.do_drop().unwrap()(); - mem::forget(client); // drop the client + let client = Client::new(create_inner(), conn_info, ep_pool); + drop(client); // The client shouldn't be added to the pool. Because the ep-pool is full. assert_eq!(2, pool.get_global_connections_count()); @@ -319,15 +313,13 @@ mod tests { &pool.get_or_create_endpoint_pool(&conn_info.endpoint_cache_key().unwrap()), ); { - let mut client = Client::new(create_inner(), conn_info.clone(), ep_pool.clone()); - client.do_drop().unwrap()(); - mem::forget(client); // drop the client + let client = Client::new(create_inner(), conn_info.clone(), ep_pool.clone()); + drop(client); assert_eq!(3, pool.get_global_connections_count()); } { - let mut client = Client::new(create_inner(), conn_info.clone(), ep_pool.clone()); - client.do_drop().unwrap()(); - mem::forget(client); // drop the client + let client = Client::new(create_inner(), conn_info.clone(), ep_pool.clone()); + drop(client); // The client shouldn't be added to the pool. Because the global pool is full. assert_eq!(3, pool.get_global_connections_count()); diff --git a/proxy/src/serverless/conn_pool_lib.rs b/proxy/src/serverless/conn_pool_lib.rs index 2a46c8f9c5..44eac77e8f 100644 --- a/proxy/src/serverless/conn_pool_lib.rs +++ b/proxy/src/serverless/conn_pool_lib.rs @@ -187,19 +187,22 @@ impl EndpointConnPool { pub(crate) fn put(pool: &RwLock, conn_info: &ConnInfo, client: ClientInnerCommon) { let conn_id = client.get_conn_id(); - let pool_name = pool.read().get_name().to_string(); + let (max_conn, conn_count, pool_name) = { + let pool = pool.read(); + ( + pool.global_pool_size_max_conns, + pool.global_connections_count + .load(atomic::Ordering::Relaxed), + pool.get_name().to_string(), + ) + }; + if client.inner.is_closed() { info!(%conn_id, "{}: throwing away connection '{conn_info}' because connection is closed", pool_name); return; } - let global_max_conn = pool.read().global_pool_size_max_conns; - if pool - .read() - .global_connections_count - .load(atomic::Ordering::Relaxed) - >= global_max_conn - { + if conn_count >= max_conn { info!(%conn_id, "{}: throwing away connection '{conn_info}' because pool is full", pool_name); return; } @@ -633,35 +636,29 @@ impl Client { } pub(crate) fn metrics(&self) -> Arc { - let aux = &self.inner.as_ref().unwrap().aux; + let aux = &self + .inner + .as_ref() + .expect("client inner should not be removed") + .aux; USAGE_METRICS.register(Ids { endpoint_id: aux.endpoint_id, branch_id: aux.branch_id, }) } +} - pub(crate) fn do_drop(&mut self) -> Option> { +impl Drop for Client { + fn drop(&mut self) { let conn_info = self.conn_info.clone(); let client = self .inner .take() .expect("client inner should not be removed"); if let Some(conn_pool) = std::mem::take(&mut self.pool).upgrade() { - let current_span = self.span.clone(); + let _current_span = self.span.enter(); // return connection to the pool - return Some(move || { - let _span = current_span.enter(); - EndpointConnPool::put(&conn_pool, &conn_info, client); - }); - } - None - } -} - -impl Drop for Client { - fn drop(&mut self) { - if let Some(drop) = self.do_drop() { - tokio::task::spawn_blocking(drop); + EndpointConnPool::put(&conn_pool, &conn_info, client); } } } diff --git a/proxy/src/serverless/http_util.rs b/proxy/src/serverless/http_util.rs index c0208d4f68..d5c948777c 100644 --- a/proxy/src/serverless/http_util.rs +++ b/proxy/src/serverless/http_util.rs @@ -81,11 +81,14 @@ impl HttpErrorBody { .header(http::header::CONTENT_TYPE, "application/json") // we do not have nested maps with non string keys so serialization shouldn't fail .body( - Full::new(Bytes::from(serde_json::to_string(self).unwrap())) - .map_err(|x| match x {}) - .boxed(), + Full::new(Bytes::from( + serde_json::to_string(self) + .expect("serialising HttpErrorBody should never fail"), + )) + .map_err(|x| match x {}) + .boxed(), ) - .unwrap() + .expect("content-type header should be valid") } } diff --git a/proxy/src/serverless/json.rs b/proxy/src/serverless/json.rs index 25b25c66d3..ab012bd020 100644 --- a/proxy/src/serverless/json.rs +++ b/proxy/src/serverless/json.rs @@ -204,7 +204,10 @@ fn pg_array_parse_inner( if c == '\\' { escaped = true; - (i, c) = pg_array_chr.next().unwrap(); + let Some(x) = pg_array_chr.next() else { + return Err(JsonConversionError::UnbalancedArray); + }; + (i, c) = x; } match c { @@ -253,6 +256,7 @@ fn pg_array_parse_inner( } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use serde_json::json; diff --git a/proxy/src/serverless/local_conn_pool.rs b/proxy/src/serverless/local_conn_pool.rs index b84cde9e25..c51a2bc9ba 100644 --- a/proxy/src/serverless/local_conn_pool.rs +++ b/proxy/src/serverless/local_conn_pool.rs @@ -179,7 +179,6 @@ pub(crate) fn poll_client( info!(cold_start_info = cold_start_info.as_str(), %conn_info, %session_id, "new connection"); }); let pool = Arc::downgrade(&global_pool); - let pool_clone = pool.clone(); let db_user = conn_info.db_and_user(); let idle = global_pool.get_idle_timeout(); @@ -273,11 +272,7 @@ pub(crate) fn poll_client( }), }; - Client::new( - inner, - conn_info, - Arc::downgrade(&pool_clone.upgrade().unwrap().global_pool), - ) + Client::new(inner, conn_info, Arc::downgrade(&global_pool.global_pool)) } impl ClientInnerCommon { @@ -321,7 +316,8 @@ fn resign_jwt(sk: &SigningKey, payload: &[u8], jti: u64) -> Result(buffer.format(jti)).unwrap(); + let jti = serde_json::from_str::<&RawValue>(buffer.format(jti)) + .expect("itoa formatted integer should be guaranteed valid json"); // update the jti in-place let payload = @@ -368,6 +364,7 @@ fn sign_jwt(sk: &SigningKey, payload: &[u8]) -> String { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use p256::ecdsa::SigningKey; use typed_json::json; diff --git a/proxy/src/serverless/mod.rs b/proxy/src/serverless/mod.rs index 80b42f9e55..c2623e0eca 100644 --- a/proxy/src/serverless/mod.rs +++ b/proxy/src/serverless/mod.rs @@ -46,6 +46,7 @@ use utils::http::error::ApiError; use crate::cancellation::CancellationHandlerMain; use crate::config::{ProxyConfig, ProxyProtocolV2}; use crate::context::RequestContext; +use crate::ext::TaskExt; use crate::metrics::Metrics; use crate::protocol2::{read_proxy_protocol, ChainRW, ConnectHeader, ConnectionInfo}; use crate::proxy::run_until_cancelled; @@ -84,7 +85,7 @@ pub async fn task_main( cancellation_token.cancelled().await; tokio::task::spawn_blocking(move || conn_pool.shutdown()) .await - .unwrap(); + .propagate_task_panic(); } }); @@ -104,7 +105,7 @@ pub async fn task_main( cancellation_token.cancelled().await; tokio::task::spawn_blocking(move || http_conn_pool.shutdown()) .await - .unwrap(); + .propagate_task_panic(); } }); diff --git a/proxy/src/serverless/sql_over_http.rs b/proxy/src/serverless/sql_over_http.rs index 5e85f5ec40..3e42787a09 100644 --- a/proxy/src/serverless/sql_over_http.rs +++ b/proxy/src/serverless/sql_over_http.rs @@ -1110,6 +1110,7 @@ impl Discard<'_> { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use super::*; diff --git a/proxy/src/serverless/websocket.rs b/proxy/src/serverless/websocket.rs index bdb83fe6be..812fedaf04 100644 --- a/proxy/src/serverless/websocket.rs +++ b/proxy/src/serverless/websocket.rs @@ -178,6 +178,7 @@ pub(crate) async fn serve_websocket( } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use std::pin::pin; diff --git a/proxy/src/url.rs b/proxy/src/url.rs index 270cd7c24d..d73a84057a 100644 --- a/proxy/src/url.rs +++ b/proxy/src/url.rs @@ -50,6 +50,7 @@ impl std::fmt::Display for ApiUrl { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use super::*; diff --git a/proxy/src/usage_metrics.rs b/proxy/src/usage_metrics.rs index 65e74466f2..487504d709 100644 --- a/proxy/src/usage_metrics.rs +++ b/proxy/src/usage_metrics.rs @@ -407,6 +407,7 @@ async fn upload_backup_events( } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use std::fs; use std::io::BufReader; From 28ccda0a63f9661d961dbb1e5372f4a78d80346d Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Mon, 16 Dec 2024 18:10:55 +0100 Subject: [PATCH 36/49] test_runner: ignore error in `test_timeline_archival_chaos` (#10161) Resolves #10159. --- test_runner/regress/test_timeline_archive.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test_runner/regress/test_timeline_archive.py b/test_runner/regress/test_timeline_archive.py index addf702893..87579f9e92 100644 --- a/test_runner/regress/test_timeline_archive.py +++ b/test_runner/regress/test_timeline_archive.py @@ -426,6 +426,7 @@ def test_timeline_archival_chaos(neon_env_builder: NeonEnvBuilder): [ ".*removing local file.*because it has unexpected length.*", ".*__temp.*", + ".*method=POST path=\\S+/timeline .*: Not activating a Stopping timeline.*", # FIXME: there are still anyhow::Error paths in timeline creation/deletion which # generate 500 results when called during shutdown (https://github.com/neondatabase/neon/issues/9768) ".*InternalServerError.*", From aa7ab9b3aca4c399e6caa5fd8fb31e6f0a7723b0 Mon Sep 17 00:00:00 2001 From: Folke Behrens Date: Mon, 16 Dec 2024 19:56:24 +0100 Subject: [PATCH 37/49] proxy: Allow dumping TLS session keys for debugging (#10163) ## Problem To debug issues with TLS connections there's no easy way to decrypt packets unless a client has special support for logging the keys. ## Summary of changes Add TLS session keys logging to proxy via `SSLKEYLOGFILE` env var gated by flag. --- proxy/src/bin/proxy.rs | 4 ++++ proxy/src/config.rs | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs index 97c4037009..e90555e250 100644 --- a/proxy/src/bin/proxy.rs +++ b/proxy/src/bin/proxy.rs @@ -105,6 +105,9 @@ struct ProxyCliArgs { /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir #[clap(short = 'c', long, alias = "ssl-cert")] tls_cert: Option, + /// Allow writing TLS session keys to the given file pointed to by the environment variable `SSLKEYLOGFILE`. + #[clap(long, alias = "allow-ssl-keylogfile")] + allow_tls_keylogfile: bool, /// path to directory with TLS certificates for client postgres connections #[clap(long)] certs_dir: Option, @@ -555,6 +558,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> { key_path, cert_path, args.certs_dir.as_ref(), + args.allow_tls_keylogfile, )?), (None, None) => None, _ => bail!("either both or neither tls-key and tls-cert must be specified"), diff --git a/proxy/src/config.rs b/proxy/src/config.rs index 1f991d595e..debd77ac32 100644 --- a/proxy/src/config.rs +++ b/proxy/src/config.rs @@ -95,6 +95,7 @@ pub fn configure_tls( key_path: &str, cert_path: &str, certs_dir: Option<&String>, + allow_tls_keylogfile: bool, ) -> anyhow::Result { let mut cert_resolver = CertResolver::new(); @@ -135,6 +136,11 @@ pub fn configure_tls( config.alpn_protocols = vec![PG_ALPN_PROTOCOL.to_vec()]; + if allow_tls_keylogfile { + // KeyLogFile will check for the SSLKEYLOGFILE environment variable. + config.key_log = Arc::new(rustls::KeyLogFile::new()); + } + Ok(TlsConfig { config: Arc::new(config), common_names, From e226d7a3d1901c1a89f640c8d95b2efd057628b1 Mon Sep 17 00:00:00 2001 From: a-masterov <72613290+a-masterov@users.noreply.github.com> Date: Tue, 17 Dec 2024 09:16:54 +0100 Subject: [PATCH 38/49] Fix docker compose with PG17 (#10165) ## Problem It's impossible to run docker compose with compute v17 due to `pg_anon` extension which is not supported under PG17. ## Summary of changes The auto-loading of `pg_anon` is disabled by default --- .../compute_wrapper/var/db/postgres/specs/spec.json | 5 ----- docker-compose/docker_compose_test.sh | 12 ++++++------ 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json b/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json index 8e582e74e1..0308cab451 100644 --- a/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json +++ b/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json @@ -132,11 +132,6 @@ "name": "cron.database", "value": "postgres", "vartype": "string" - }, - { - "name": "session_preload_libraries", - "value": "anon", - "vartype": "string" } ] }, diff --git a/docker-compose/docker_compose_test.sh b/docker-compose/docker_compose_test.sh index c97dfaa901..063664d0c6 100755 --- a/docker-compose/docker_compose_test.sh +++ b/docker-compose/docker_compose_test.sh @@ -35,11 +35,11 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do echo "clean up containers if exists" cleanup PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version)) - # The support of pg_anon not yet added to PG17, so we have to remove the corresponding option - if [ $pg_version -eq 17 ]; then + # The support of pg_anon not yet added to PG17, so we have to add the corresponding option for other PG versions + if [ "${pg_version}" -ne 17 ]; then SPEC_PATH="compute_wrapper/var/db/postgres/specs" mv $SPEC_PATH/spec.json $SPEC_PATH/spec.bak - jq 'del(.cluster.settings[] | select (.name == "session_preload_libraries"))' $SPEC_PATH/spec.bak > $SPEC_PATH/spec.json + jq '.cluster.settings += [{"name": "session_preload_libraries","value": "anon","vartype": "string"}]' "${SPEC_PATH}/spec.bak" > "${SPEC_PATH}/spec.json" fi PG_VERSION=$pg_version PG_TEST_VERSION=$PG_TEST_VERSION docker compose --profile test-extensions -f $COMPOSE_FILE up --build -d @@ -106,8 +106,8 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do fi fi cleanup - # The support of pg_anon not yet added to PG17, so we have to remove the corresponding option - if [ $pg_version -eq 17 ]; then - mv $SPEC_PATH/spec.bak $SPEC_PATH/spec.json + # Restore the original spec.json + if [ "$pg_version" -ne 17 ]; then + mv "$SPEC_PATH/spec.bak" "$SPEC_PATH/spec.json" fi done From b0e43c2f88bf81473c931e3dcf50a22d62f3ebd4 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Tue, 17 Dec 2024 11:35:00 +0100 Subject: [PATCH 39/49] postgres_ffi: add `WalStreamDecoder::complete_record()` benchmark (#10158) Touches #10097. --- Cargo.lock | 2 + libs/postgres_ffi/Cargo.toml | 6 +++ libs/postgres_ffi/benches/README.md | 26 +++++++++++++ libs/postgres_ffi/benches/waldecoder.rs | 49 +++++++++++++++++++++++++ libs/postgres_ffi/src/wal_generator.rs | 16 ++++++++ 5 files changed, 99 insertions(+) create mode 100644 libs/postgres_ffi/benches/README.md create mode 100644 libs/postgres_ffi/benches/waldecoder.rs diff --git a/Cargo.lock b/Cargo.lock index d1f7746969..5ec5253719 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4401,11 +4401,13 @@ dependencies = [ "bindgen", "bytes", "crc32c", + "criterion", "env_logger", "log", "memoffset 0.9.0", "once_cell", "postgres", + "pprof", "regex", "serde", "thiserror", diff --git a/libs/postgres_ffi/Cargo.toml b/libs/postgres_ffi/Cargo.toml index e1f5443cbe..b7a376841d 100644 --- a/libs/postgres_ffi/Cargo.toml +++ b/libs/postgres_ffi/Cargo.toml @@ -9,9 +9,11 @@ regex.workspace = true bytes.workspace = true anyhow.workspace = true crc32c.workspace = true +criterion.workspace = true once_cell.workspace = true log.workspace = true memoffset.workspace = true +pprof.workspace = true thiserror.workspace = true serde.workspace = true utils.workspace = true @@ -24,3 +26,7 @@ postgres.workspace = true [build-dependencies] anyhow.workspace = true bindgen.workspace = true + +[[bench]] +name = "waldecoder" +harness = false diff --git a/libs/postgres_ffi/benches/README.md b/libs/postgres_ffi/benches/README.md new file mode 100644 index 0000000000..00a8980174 --- /dev/null +++ b/libs/postgres_ffi/benches/README.md @@ -0,0 +1,26 @@ +## Benchmarks + +To run benchmarks: + +```sh +# All benchmarks. +cargo bench --package postgres_ffi + +# Specific file. +cargo bench --package postgres_ffi --bench waldecoder + +# Specific benchmark. +cargo bench --package postgres_ffi --bench waldecoder complete_record/size=1024 + +# List available benchmarks. +cargo bench --package postgres_ffi --benches -- --list + +# Generate flamegraph profiles using pprof-rs, profiling for 10 seconds. +# Output in target/criterion/*/profile/flamegraph.svg. +cargo bench --package postgres_ffi --bench waldecoder complete_record/size=1024 -- --profile-time 10 +``` + +Additional charts and statistics are available in `target/criterion/report/index.html`. + +Benchmarks are automatically compared against the previous run. To compare against other runs, see +`--baseline` and `--save-baseline`. \ No newline at end of file diff --git a/libs/postgres_ffi/benches/waldecoder.rs b/libs/postgres_ffi/benches/waldecoder.rs new file mode 100644 index 0000000000..c8cf0d322a --- /dev/null +++ b/libs/postgres_ffi/benches/waldecoder.rs @@ -0,0 +1,49 @@ +use std::ffi::CStr; + +use criterion::{criterion_group, criterion_main, Bencher, Criterion}; +use postgres_ffi::v17::wal_generator::LogicalMessageGenerator; +use postgres_ffi::v17::waldecoder_handler::WalStreamDecoderHandler; +use postgres_ffi::waldecoder::WalStreamDecoder; +use pprof::criterion::{Output, PProfProfiler}; +use utils::lsn::Lsn; + +const KB: usize = 1024; + +// Register benchmarks with Criterion. +criterion_group!( + name = benches; + config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None))); + targets = bench_complete_record, +); +criterion_main!(benches); + +/// Benchmarks WalStreamDecoder::complete_record() for a logical message of varying size. +fn bench_complete_record(c: &mut Criterion) { + let mut g = c.benchmark_group("complete_record"); + for size in [64, KB, 8 * KB, 128 * KB] { + // Kind of weird to change the group throughput per benchmark, but it's the only way + // to vary it per benchmark. It works. + g.throughput(criterion::Throughput::Bytes(size as u64)); + g.bench_function(format!("size={size}"), |b| run_bench(b, size).unwrap()); + } + + fn run_bench(b: &mut Bencher, size: usize) -> anyhow::Result<()> { + const PREFIX: &CStr = c""; + let value_size = LogicalMessageGenerator::make_value_size(size, PREFIX); + let value = vec![1; value_size]; + + let mut decoder = WalStreamDecoder::new(Lsn(0), 170000); + let msg = LogicalMessageGenerator::new(PREFIX, &value) + .next() + .unwrap() + .encode(Lsn(0)); + assert_eq!(msg.len(), size); + + b.iter(|| { + let msg = msg.clone(); // Bytes::clone() is cheap + decoder.complete_record(msg).unwrap(); + }); + + Ok(()) + } +} diff --git a/libs/postgres_ffi/src/wal_generator.rs b/libs/postgres_ffi/src/wal_generator.rs index dc679eea33..69cc4b771f 100644 --- a/libs/postgres_ffi/src/wal_generator.rs +++ b/libs/postgres_ffi/src/wal_generator.rs @@ -231,6 +231,22 @@ impl LogicalMessageGenerator { }; [&header.encode(), prefix, message].concat().into() } + + /// Computes how large a value must be to get a record of the given size. Convenience method to + /// construct records of pre-determined size. Panics if the record size is too small. + pub fn make_value_size(record_size: usize, prefix: &CStr) -> usize { + let xlog_header_size = XLOG_SIZE_OF_XLOG_RECORD; + let lm_header_size = size_of::(); + let prefix_size = prefix.to_bytes_with_nul().len(); + let data_header_size = match record_size - xlog_header_size - 2 { + 0..=255 => 2, + 256..=258 => panic!("impossible record_size {record_size}"), + 259.. => 5, + }; + record_size + .checked_sub(xlog_header_size + lm_header_size + prefix_size + data_header_size) + .expect("record_size too small") + } } impl Iterator for LogicalMessageGenerator { From b5833ef2594b39b6616526b60cbc8c506e5870fc Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 17 Dec 2024 12:24:51 +0000 Subject: [PATCH 40/49] remote_storage: configurable connection pooling for ABS (#10169) ## Problem The ABS SDK's default behavior is to do no connection pooling, i.e. open and close a fresh connection for each request. Under high request rates, this can result in an accumulation of TCP connections in TIME_WAIT or CLOSE_WAIT state, and in extreme cases exhaustion of client ports. Related: https://github.com/neondatabase/cloud/issues/20971 ## Summary of changes - Add a configurable `conn_pool_size` parameter for Azure storage, defaulting to zero (current behavior) - Construct a custom reqwest client using this connection pool size. --- Cargo.lock | 1 + libs/remote_storage/Cargo.toml | 1 + libs/remote_storage/src/azure_blob.rs | 20 ++++++++++++++++++-- libs/remote_storage/src/config.rs | 14 ++++++++++++++ libs/remote_storage/tests/test_real_azure.rs | 1 + 5 files changed, 35 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5ec5253719..b9b89efa02 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5064,6 +5064,7 @@ dependencies = [ "once_cell", "pin-project-lite", "rand 0.8.5", + "reqwest", "scopeguard", "serde", "serde_json", diff --git a/libs/remote_storage/Cargo.toml b/libs/remote_storage/Cargo.toml index 1816825bda..33fa6e89f5 100644 --- a/libs/remote_storage/Cargo.toml +++ b/libs/remote_storage/Cargo.toml @@ -18,6 +18,7 @@ camino = { workspace = true, features = ["serde1"] } humantime-serde.workspace = true hyper = { workspace = true, features = ["client"] } futures.workspace = true +reqwest.workspace = true serde.workspace = true serde_json.workspace = true tokio = { workspace = true, features = ["sync", "fs", "io-util"] } diff --git a/libs/remote_storage/src/azure_blob.rs b/libs/remote_storage/src/azure_blob.rs index 19c8251ccd..c89f50ef2b 100644 --- a/libs/remote_storage/src/azure_blob.rs +++ b/libs/remote_storage/src/azure_blob.rs @@ -8,6 +8,7 @@ use std::io; use std::num::NonZeroU32; use std::pin::Pin; use std::str::FromStr; +use std::sync::Arc; use std::time::Duration; use std::time::SystemTime; @@ -15,6 +16,8 @@ use super::REMOTE_STORAGE_PREFIX_SEPARATOR; use anyhow::Context; use anyhow::Result; use azure_core::request_options::{IfMatchCondition, MaxResults, Metadata, Range}; +use azure_core::HttpClient; +use azure_core::TransportOptions; use azure_core::{Continuable, RetryOptions}; use azure_storage::StorageCredentials; use azure_storage_blobs::blob::CopyStatus; @@ -80,8 +83,13 @@ impl AzureBlobStorage { StorageCredentials::token_credential(token_credential) }; - // we have an outer retry - let builder = ClientBuilder::new(account, credentials).retry(RetryOptions::none()); + let builder = ClientBuilder::new(account, credentials) + // we have an outer retry + .retry(RetryOptions::none()) + // Customize transport to configure conneciton pooling + .transport(TransportOptions::new(Self::reqwest_client( + azure_config.conn_pool_size, + ))); let client = builder.container_client(azure_config.container_name.to_owned()); @@ -106,6 +114,14 @@ impl AzureBlobStorage { }) } + fn reqwest_client(conn_pool_size: usize) -> Arc { + let client = reqwest::ClientBuilder::new() + .pool_max_idle_per_host(conn_pool_size) + .build() + .expect("failed to build `reqwest` client"); + Arc::new(client) + } + pub fn relative_path_to_name(&self, path: &RemotePath) -> String { assert_eq!(std::path::MAIN_SEPARATOR, REMOTE_STORAGE_PREFIX_SEPARATOR); let path_string = path.get_path().as_str(); diff --git a/libs/remote_storage/src/config.rs b/libs/remote_storage/src/config.rs index f6ef31077c..dd49d4d5e7 100644 --- a/libs/remote_storage/src/config.rs +++ b/libs/remote_storage/src/config.rs @@ -114,6 +114,16 @@ fn default_max_keys_per_list_response() -> Option { DEFAULT_MAX_KEYS_PER_LIST_RESPONSE } +fn default_azure_conn_pool_size() -> usize { + // Conservative default: no connection pooling. At time of writing this is the Azure + // SDK's default as well, due to historic reports of hard-to-reproduce issues + // (https://github.com/hyperium/hyper/issues/2312) + // + // However, using connection pooling is important to avoid exhausting client ports when + // doing huge numbers of requests (https://github.com/neondatabase/cloud/issues/20971) + 0 +} + impl Debug for S3Config { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("S3Config") @@ -146,6 +156,8 @@ pub struct AzureConfig { pub concurrency_limit: NonZeroUsize, #[serde(default = "default_max_keys_per_list_response")] pub max_keys_per_list_response: Option, + #[serde(default = "default_azure_conn_pool_size")] + pub conn_pool_size: usize, } fn default_remote_storage_azure_concurrency_limit() -> NonZeroUsize { @@ -302,6 +314,7 @@ timeout = '5s'"; container_region = 'westeurope' upload_storage_class = 'INTELLIGENT_TIERING' timeout = '7s' + conn_pool_size = 8 "; let config = parse(toml).unwrap(); @@ -316,6 +329,7 @@ timeout = '5s'"; prefix_in_container: None, concurrency_limit: default_remote_storage_azure_concurrency_limit(), max_keys_per_list_response: DEFAULT_MAX_KEYS_PER_LIST_RESPONSE, + conn_pool_size: 8, }), timeout: Duration::from_secs(7), small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT diff --git a/libs/remote_storage/tests/test_real_azure.rs b/libs/remote_storage/tests/test_real_azure.rs index 92d579fec8..15004dbf83 100644 --- a/libs/remote_storage/tests/test_real_azure.rs +++ b/libs/remote_storage/tests/test_real_azure.rs @@ -218,6 +218,7 @@ async fn create_azure_client( prefix_in_container: Some(format!("test_{millis}_{random:08x}/")), concurrency_limit: NonZeroUsize::new(100).unwrap(), max_keys_per_list_response, + conn_pool_size: 8, }), timeout: RemoteStorageConfig::DEFAULT_TIMEOUT, small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT, From 2dfd3cab8cd7840afee57acffaa45aa803d04f20 Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Tue, 17 Dec 2024 17:14:07 +0100 Subject: [PATCH 41/49] fix(compute): Report compute_backpressure_throttling_seconds as counter (#10125) ## Problem It was reported as `gauge`, but it's actually a `counter`. Also add `_total` suffix as that's the convention for counters. The corresponding flux-fleet PR: https://github.com/neondatabase/flux-fleet/pull/386 --- compute/etc/neon_collector.jsonnet | 2 +- ...compute_backpressure_throttling_seconds_total.libsonnet} | 6 +++--- ...ql => compute_backpressure_throttling_seconds_total.sql} | 0 3 files changed, 4 insertions(+), 4 deletions(-) rename compute/etc/sql_exporter/{compute_backpressure_throttling_seconds.libsonnet => compute_backpressure_throttling_seconds_total.libsonnet} (61%) rename compute/etc/sql_exporter/{compute_backpressure_throttling_seconds.sql => compute_backpressure_throttling_seconds_total.sql} (100%) diff --git a/compute/etc/neon_collector.jsonnet b/compute/etc/neon_collector.jsonnet index aa6cc1cfc8..f8f4cab63b 100644 --- a/compute/etc/neon_collector.jsonnet +++ b/compute/etc/neon_collector.jsonnet @@ -3,7 +3,7 @@ metrics: [ import 'sql_exporter/checkpoints_req.libsonnet', import 'sql_exporter/checkpoints_timed.libsonnet', - import 'sql_exporter/compute_backpressure_throttling_seconds.libsonnet', + import 'sql_exporter/compute_backpressure_throttling_seconds_total.libsonnet', import 'sql_exporter/compute_current_lsn.libsonnet', import 'sql_exporter/compute_logical_snapshot_files.libsonnet', import 'sql_exporter/compute_logical_snapshots_bytes.libsonnet', diff --git a/compute/etc/sql_exporter/compute_backpressure_throttling_seconds.libsonnet b/compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.libsonnet similarity index 61% rename from compute/etc/sql_exporter/compute_backpressure_throttling_seconds.libsonnet rename to compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.libsonnet index 02c803cfa6..31725bd179 100644 --- a/compute/etc/sql_exporter/compute_backpressure_throttling_seconds.libsonnet +++ b/compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.libsonnet @@ -1,10 +1,10 @@ { - metric_name: 'compute_backpressure_throttling_seconds', - type: 'gauge', + metric_name: 'compute_backpressure_throttling_seconds_total', + type: 'counter', help: 'Time compute has spent throttled', key_labels: null, values: [ 'throttled', ], - query: importstr 'sql_exporter/compute_backpressure_throttling_seconds.sql', + query: importstr 'sql_exporter/compute_backpressure_throttling_seconds_total.sql', } diff --git a/compute/etc/sql_exporter/compute_backpressure_throttling_seconds.sql b/compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.sql similarity index 100% rename from compute/etc/sql_exporter/compute_backpressure_throttling_seconds.sql rename to compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.sql From 007b13b79af0fa75151a8fba58a65d3c4933942f Mon Sep 17 00:00:00 2001 From: Mikhail Kot Date: Tue, 17 Dec 2024 16:43:54 +0000 Subject: [PATCH 42/49] Don't build tests in compute image, use ninja (#10149) Don't build tests in h3 and rdkit: ~15 min speedup. Use Ninja as cmake generator where possible: ~10 min speedup. Clean apt cache for smaller images: around 250mb size loss for intermediate layers --- compute/compute-node.Dockerfile | 78 ++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 30 deletions(-) diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile index 33d2a10285..1e11efeaf8 100644 --- a/compute/compute-node.Dockerfile +++ b/compute/compute-node.Dockerfile @@ -35,10 +35,12 @@ RUN case $DEBIAN_VERSION in \ ;; \ esac && \ apt update && \ - apt install --no-install-recommends -y git autoconf automake libtool build-essential bison flex libreadline-dev \ + apt install --no-install-recommends --no-install-suggests -y \ + ninja-build git autoconf automake libtool build-essential bison flex libreadline-dev \ zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget ca-certificates pkg-config libssl-dev \ libicu-dev libxslt1-dev liblz4-dev libzstd-dev zstd \ - $VERSION_INSTALLS + $VERSION_INSTALLS \ + && apt clean && rm -rf /var/lib/apt/lists/* ######################################################################################### # @@ -113,10 +115,12 @@ ARG DEBIAN_VERSION ARG PG_VERSION COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN apt update && \ - apt install --no-install-recommends -y gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \ + apt install --no-install-recommends --no-install-suggests -y \ + gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \ libboost-system-dev libboost-iostreams-dev libboost-program-options-dev libboost-timer-dev \ libcgal-dev libgdal-dev libgmp-dev libmpfr-dev libopenscenegraph-dev libprotobuf-c-dev \ - protobuf-c-compiler xsltproc + protobuf-c-compiler xsltproc \ + && apt clean && rm -rf /var/lib/apt/lists/* # Postgis 3.5.0 requires SFCGAL 1.4+ @@ -143,9 +147,9 @@ RUN case "${DEBIAN_VERSION}" in \ wget https://gitlab.com/sfcgal/SFCGAL/-/archive/v${SFCGAL_VERSION}/SFCGAL-v${SFCGAL_VERSION}.tar.gz -O SFCGAL.tar.gz && \ echo "${SFCGAL_CHECKSUM} SFCGAL.tar.gz" | sha256sum --check && \ mkdir sfcgal-src && cd sfcgal-src && tar xzf ../SFCGAL.tar.gz --strip-components=1 -C . && \ - cmake -DCMAKE_BUILD_TYPE=Release . && make -j $(getconf _NPROCESSORS_ONLN) && \ - DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \ - make clean && cp -R /sfcgal/* / + cmake -DCMAKE_BUILD_TYPE=Release -GNinja . && ninja -j $(getconf _NPROCESSORS_ONLN) && \ + DESTDIR=/sfcgal ninja install -j $(getconf _NPROCESSORS_ONLN) && \ + ninja clean && cp -R /sfcgal/* / ENV PATH="/usr/local/pgsql/bin:$PATH" @@ -213,9 +217,9 @@ RUN case "${PG_VERSION}" in \ echo "${PGROUTING_CHECKSUM} pgrouting.tar.gz" | sha256sum --check && \ mkdir pgrouting-src && cd pgrouting-src && tar xzf ../pgrouting.tar.gz --strip-components=1 -C . && \ mkdir build && cd build && \ - cmake -DCMAKE_BUILD_TYPE=Release .. && \ - make -j $(getconf _NPROCESSORS_ONLN) && \ - make -j $(getconf _NPROCESSORS_ONLN) install && \ + cmake -GNinja -DCMAKE_BUILD_TYPE=Release .. && \ + ninja -j $(getconf _NPROCESSORS_ONLN) && \ + ninja -j $(getconf _NPROCESSORS_ONLN) install && \ echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrouting.control && \ find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /after.txt &&\ cp /usr/local/pgsql/share/extension/pgrouting.control /extensions/postgis && \ @@ -235,7 +239,9 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY compute/patches/plv8-3.1.10.patch /plv8-3.1.10.patch RUN apt update && \ - apt install --no-install-recommends -y ninja-build python3-dev libncurses5 binutils clang + apt install --no-install-recommends --no-install-suggests -y \ + ninja-build python3-dev libncurses5 binutils clang \ + && apt clean && rm -rf /var/lib/apt/lists/* # plv8 3.2.3 supports v17 # last release v3.2.3 - Sep 7, 2024 @@ -301,9 +307,10 @@ RUN mkdir -p /h3/usr/ && \ echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \ mkdir h3-src && cd h3-src && tar xzf ../h3.tar.gz --strip-components=1 -C . && \ mkdir build && cd build && \ - cmake .. -DCMAKE_BUILD_TYPE=Release && \ - make -j $(getconf _NPROCESSORS_ONLN) && \ - DESTDIR=/h3 make install && \ + cmake .. -GNinja -DBUILD_BENCHMARKS=0 -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_FUZZERS=0 -DBUILD_FILTERS=0 -DBUILD_GENERATORS=0 -DBUILD_TESTING=0 \ + && ninja -j $(getconf _NPROCESSORS_ONLN) && \ + DESTDIR=/h3 ninja install && \ cp -R /h3/usr / && \ rm -rf build @@ -650,14 +657,15 @@ FROM build-deps AS rdkit-pg-build ARG PG_VERSION COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ -RUN apt-get update && \ - apt-get install --no-install-recommends -y \ +RUN apt update && \ + apt install --no-install-recommends --no-install-suggests -y \ libboost-iostreams1.74-dev \ libboost-regex1.74-dev \ libboost-serialization1.74-dev \ libboost-system1.74-dev \ libeigen3-dev \ - libboost-all-dev + libboost-all-dev \ + && apt clean && rm -rf /var/lib/apt/lists/* # rdkit Release_2024_09_1 supports v17 # last release Release_2024_09_1 - Sep 27, 2024 @@ -693,6 +701,8 @@ RUN case "${PG_VERSION}" in \ -D RDK_BUILD_MOLINTERCHANGE_SUPPORT=OFF \ -D RDK_BUILD_YAEHMOP_SUPPORT=OFF \ -D RDK_BUILD_STRUCTCHECKER_SUPPORT=OFF \ + -D RDK_TEST_MULTITHREADED=OFF \ + -D RDK_BUILD_CPP_TESTS=OFF \ -D RDK_USE_URF=OFF \ -D RDK_BUILD_PGSQL=ON \ -D RDK_PGSQL_STATIC=ON \ @@ -704,9 +714,10 @@ RUN case "${PG_VERSION}" in \ -D RDK_INSTALL_COMIC_FONTS=OFF \ -D RDK_BUILD_FREETYPE_SUPPORT=OFF \ -D CMAKE_BUILD_TYPE=Release \ + -GNinja \ . && \ - make -j $(getconf _NPROCESSORS_ONLN) && \ - make -j $(getconf _NPROCESSORS_ONLN) install && \ + ninja -j $(getconf _NPROCESSORS_ONLN) && \ + ninja -j $(getconf _NPROCESSORS_ONLN) install && \ echo 'trusted = true' >> /usr/local/pgsql/share/extension/rdkit.control ######################################################################################### @@ -849,8 +860,9 @@ FROM build-deps AS rust-extensions-build ARG PG_VERSION COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ -RUN apt-get update && \ - apt-get install --no-install-recommends -y curl libclang-dev && \ +RUN apt update && \ + apt install --no-install-recommends --no-install-suggests -y curl libclang-dev && \ + apt clean && rm -rf /var/lib/apt/lists/* && \ useradd -ms /bin/bash nonroot -b /home ENV HOME=/home/nonroot @@ -885,8 +897,9 @@ FROM build-deps AS rust-extensions-build-pgrx12 ARG PG_VERSION COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ -RUN apt-get update && \ - apt-get install --no-install-recommends -y curl libclang-dev && \ +RUN apt update && \ + apt install --no-install-recommends --no-install-suggests -y curl libclang-dev && \ + apt clean && rm -rf /var/lib/apt/lists/* && \ useradd -ms /bin/bash nonroot -b /home ENV HOME=/home/nonroot @@ -914,18 +927,22 @@ FROM rust-extensions-build-pgrx12 AS pg-onnx-build # cmake 3.26 or higher is required, so installing it using pip (bullseye-backports has cmake 3.25). # Install it using virtual environment, because Python 3.11 (the default version on Debian 12 (Bookworm)) complains otherwise -RUN apt-get update && apt-get install -y python3 python3-pip python3-venv && \ +RUN apt update && apt install --no-install-recommends --no-install-suggests -y \ + python3 python3-pip python3-venv && \ + apt clean && rm -rf /var/lib/apt/lists/* && \ python3 -m venv venv && \ . venv/bin/activate && \ python3 -m pip install cmake==3.30.5 && \ wget https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.18.1.tar.gz -O onnxruntime.tar.gz && \ mkdir onnxruntime-src && cd onnxruntime-src && tar xzf ../onnxruntime.tar.gz --strip-components=1 -C . && \ - ./build.sh --config Release --parallel --skip_submodule_sync --skip_tests --allow_running_as_root + ./build.sh --config Release --parallel --cmake_generator Ninja \ + --skip_submodule_sync --skip_tests --allow_running_as_root FROM pg-onnx-build AS pgrag-pg-build -RUN apt-get install -y protobuf-compiler && \ +RUN apt update && apt install --no-install-recommends --no-install-suggests -y protobuf-compiler \ + && apt clean && rm -rf /var/lib/apt/lists/* && \ wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.0.0.tar.gz -O pgrag.tar.gz && \ echo "2cbe394c1e74fc8bcad9b52d5fbbfb783aef834ca3ce44626cfd770573700bb4 pgrag.tar.gz" | sha256sum --check && \ mkdir pgrag-src && cd pgrag-src && tar xzf ../pgrag.tar.gz --strip-components=1 -C . && \ @@ -1279,8 +1296,8 @@ COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/fast_ FROM debian:$DEBIAN_FLAVOR AS pgbouncer RUN set -e \ - && apt-get update \ - && apt-get install --no-install-recommends -y \ + && apt update \ + && apt install --no-install-suggests --no-install-recommends -y \ build-essential \ git \ ca-certificates \ @@ -1288,7 +1305,8 @@ RUN set -e \ automake \ libevent-dev \ libtool \ - pkg-config + pkg-config \ + && apt clean && rm -rf /var/lib/apt/lists/* # Use `dist_man_MANS=` to skip manpage generation (which requires python3/pandoc) ENV PGBOUNCER_TAG=pgbouncer_1_22_1 @@ -1519,7 +1537,7 @@ RUN apt update && \ procps \ ca-certificates \ $VERSION_INSTALLS && \ - rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \ + apt clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \ localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 # s5cmd 2.2.2 from https://github.com/peak/s5cmd/releases/tag/v2.2.2 From a55853f67fad71c06fd0b856ada7cf8fea4306bd Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Tue, 17 Dec 2024 17:51:58 +0100 Subject: [PATCH 43/49] utils: symbolize heap profiles (#10153) ## Problem Jemalloc heap profiles aren't symbolized. This is inconvenient, and doesn't work with Grafana Cloud Profiles. Resolves #9964. ## Summary of changes Symbolize the heap profiles in-process, and strip unnecessary cruft. This uses about 100 MB additional memory to cache the DWARF information, but I believe this is already the case with CPU profiles, which use the same library for symbolization. With cached DWARF information, the symbolization CPU overhead is negligible. Example profiles: * [pageserver.pb.gz](https://github.com/user-attachments/files/18141395/pageserver.pb.gz) * [safekeeper.pb.gz](https://github.com/user-attachments/files/18141396/safekeeper.pb.gz) --- Cargo.lock | 105 +++++++++++------- Cargo.toml | 1 + libs/utils/Cargo.toml | 5 +- libs/utils/src/http/endpoint.rs | 46 ++++++-- libs/utils/src/lib.rs | 2 + libs/utils/src/pprof.rs | 190 ++++++++++++++++++++++++++++++++ 6 files changed, 298 insertions(+), 51 deletions(-) create mode 100644 libs/utils/src/pprof.rs diff --git a/Cargo.lock b/Cargo.lock index b9b89efa02..d9ac167042 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10,9 +10,9 @@ checksum = "8b5ace29ee3216de37c0546865ad08edef58b0f9e76838ed8959a84a990e58c5" [[package]] name = "addr2line" -version = "0.21.0" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" dependencies = [ "gimli", ] @@ -23,6 +23,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + [[package]] name = "ahash" version = "0.8.11" @@ -871,17 +877,17 @@ dependencies = [ [[package]] name = "backtrace" -version = "0.3.69" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" dependencies = [ "addr2line", - "cc", "cfg-if", "libc", - "miniz_oxide", + "miniz_oxide 0.8.0", "object", "rustc-demangle", + "windows-targets 0.52.6", ] [[package]] @@ -1127,7 +1133,7 @@ dependencies = [ "num-traits", "serde", "wasm-bindgen", - "windows-targets 0.52.4", + "windows-targets 0.52.6", ] [[package]] @@ -2107,7 +2113,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743" dependencies = [ "crc32fast", - "miniz_oxide", + "miniz_oxide 0.7.1", ] [[package]] @@ -2308,9 +2314,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.28.1" +version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "git-version" @@ -3404,6 +3410,15 @@ dependencies = [ "adler", ] +[[package]] +name = "miniz_oxide" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +dependencies = [ + "adler2", +] + [[package]] name = "mio" version = "0.8.11" @@ -3638,9 +3653,9 @@ dependencies = [ [[package]] name = "object" -version = "0.32.2" +version = "0.36.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" dependencies = [ "memchr", ] @@ -5323,9 +5338,9 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] name = "rustc-hash" @@ -7219,6 +7234,7 @@ dependencies = [ "anyhow", "arc-swap", "async-compression", + "backtrace", "bincode", "byteorder", "bytes", @@ -7229,12 +7245,14 @@ dependencies = [ "criterion", "diatomic-waker", "fail", + "flate2", "futures", "git-version", "hex", "hex-literal", "humantime", "hyper 0.14.30", + "itertools 0.10.5", "jemalloc_pprof", "jsonwebtoken", "metrics", @@ -7591,7 +7609,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" dependencies = [ "windows-core", - "windows-targets 0.52.4", + "windows-targets 0.52.6", ] [[package]] @@ -7600,7 +7618,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.4", + "windows-targets 0.52.6", ] [[package]] @@ -7618,7 +7636,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.4", + "windows-targets 0.52.6", ] [[package]] @@ -7638,17 +7656,18 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.4", - "windows_aarch64_msvc 0.52.4", - "windows_i686_gnu 0.52.4", - "windows_i686_msvc 0.52.4", - "windows_x86_64_gnu 0.52.4", - "windows_x86_64_gnullvm 0.52.4", - "windows_x86_64_msvc 0.52.4", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", ] [[package]] @@ -7659,9 +7678,9 @@ checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" @@ -7671,9 +7690,9 @@ checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" [[package]] name = "windows_aarch64_msvc" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" @@ -7683,9 +7702,15 @@ checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" [[package]] name = "windows_i686_gnu" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" @@ -7695,9 +7720,9 @@ checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" [[package]] name = "windows_i686_msvc" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" @@ -7707,9 +7732,9 @@ checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" [[package]] name = "windows_x86_64_gnu" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" @@ -7719,9 +7744,9 @@ checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" @@ -7731,9 +7756,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" [[package]] name = "windows_x86_64_msvc" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" diff --git a/Cargo.toml b/Cargo.toml index 056cd5798f..885f02ba81 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,6 +52,7 @@ anyhow = { version = "1.0", features = ["backtrace"] } arc-swap = "1.6" async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] } atomic-take = "1.1.0" +backtrace = "0.3.74" flate2 = "1.0.26" async-stream = "0.3" async-trait = "0.1" diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml index 66500fb141..02bf77760a 100644 --- a/libs/utils/Cargo.toml +++ b/libs/utils/Cargo.toml @@ -15,17 +15,20 @@ arc-swap.workspace = true sentry.workspace = true async-compression.workspace = true anyhow.workspace = true +backtrace.workspace = true bincode.workspace = true bytes.workspace = true camino.workspace = true chrono.workspace = true diatomic-waker.workspace = true +flate2.workspace = true git-version.workspace = true hex = { workspace = true, features = ["serde"] } humantime.workspace = true hyper0 = { workspace = true, features = ["full"] } +itertools.workspace = true fail.workspace = true -futures = { workspace = true} +futures = { workspace = true } jemalloc_pprof.workspace = true jsonwebtoken.workspace = true nix.workspace = true diff --git a/libs/utils/src/http/endpoint.rs b/libs/utils/src/http/endpoint.rs index d975b63677..9b37b69939 100644 --- a/libs/utils/src/http/endpoint.rs +++ b/libs/utils/src/http/endpoint.rs @@ -1,15 +1,22 @@ use crate::auth::{AuthError, Claims, SwappableJwtAuth}; use crate::http::error::{api_error_handler, route_error_handler, ApiError}; use crate::http::request::{get_query_param, parse_query_param}; +use crate::pprof; +use ::pprof::protos::Message as _; +use ::pprof::ProfilerGuardBuilder; use anyhow::{anyhow, Context}; +use bytes::{Bytes, BytesMut}; use hyper::header::{HeaderName, AUTHORIZATION, CONTENT_DISPOSITION}; use hyper::http::HeaderValue; use hyper::Method; use hyper::{header::CONTENT_TYPE, Body, Request, Response}; use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder}; use once_cell::sync::Lazy; +use regex::Regex; use routerify::ext::RequestExt; use routerify::{Middleware, RequestInfo, Router, RouterBuilder}; +use tokio::sync::{mpsc, Mutex}; +use tokio_stream::wrappers::ReceiverStream; use tokio_util::io::ReaderStream; use tracing::{debug, info, info_span, warn, Instrument}; @@ -18,11 +25,6 @@ use std::io::Write as _; use std::str::FromStr; use std::time::Duration; -use bytes::{Bytes, BytesMut}; -use pprof::protos::Message as _; -use tokio::sync::{mpsc, Mutex}; -use tokio_stream::wrappers::ReceiverStream; - static SERVE_METRICS_COUNT: Lazy = Lazy::new(|| { register_int_counter!( "libmetrics_metric_handler_requests_total", @@ -365,7 +367,7 @@ pub async fn profile_cpu_handler(req: Request) -> Result, A // Take the profile. let report = tokio::task::spawn_blocking(move || { - let guard = pprof::ProfilerGuardBuilder::default() + let guard = ProfilerGuardBuilder::default() .frequency(frequency_hz) .blocklist(&["libc", "libgcc", "pthread", "vdso"]) .build()?; @@ -457,10 +459,34 @@ pub async fn profile_heap_handler(req: Request) -> Result, } Format::Pprof => { - let data = tokio::task::spawn_blocking(move || prof_ctl.dump_pprof()) - .await - .map_err(|join_err| ApiError::InternalServerError(join_err.into()))? - .map_err(ApiError::InternalServerError)?; + let data = tokio::task::spawn_blocking(move || { + let bytes = prof_ctl.dump_pprof()?; + // Symbolize the profile. + // TODO: consider moving this upstream to jemalloc_pprof and avoiding the + // serialization roundtrip. + static STRIP_FUNCTIONS: Lazy> = Lazy::new(|| { + // Functions to strip from profiles. If true, also remove child frames. + vec![ + (Regex::new("^__rust").unwrap(), false), + (Regex::new("^_start$").unwrap(), false), + (Regex::new("^irallocx_prof").unwrap(), true), + (Regex::new("^prof_alloc_prep").unwrap(), true), + (Regex::new("^std::rt::lang_start").unwrap(), false), + (Regex::new("^std::sys::backtrace::__rust").unwrap(), false), + ] + }); + let profile = pprof::decode(&bytes)?; + let profile = pprof::symbolize(profile)?; + let profile = pprof::strip_locations( + profile, + &["libc", "libgcc", "pthread", "vdso"], + &STRIP_FUNCTIONS, + ); + pprof::encode(&profile) + }) + .await + .map_err(|join_err| ApiError::InternalServerError(join_err.into()))? + .map_err(ApiError::InternalServerError)?; Response::builder() .status(200) .header(CONTENT_TYPE, "application/octet-stream") diff --git a/libs/utils/src/lib.rs b/libs/utils/src/lib.rs index bccd0e0488..2c56dd750f 100644 --- a/libs/utils/src/lib.rs +++ b/libs/utils/src/lib.rs @@ -96,6 +96,8 @@ pub mod circuit_breaker; pub mod try_rcu; +pub mod pprof; + // Re-export used in macro. Avoids adding git-version as dep in target crates. #[doc(hidden)] pub use git_version; diff --git a/libs/utils/src/pprof.rs b/libs/utils/src/pprof.rs new file mode 100644 index 0000000000..90910897bf --- /dev/null +++ b/libs/utils/src/pprof.rs @@ -0,0 +1,190 @@ +use flate2::write::{GzDecoder, GzEncoder}; +use flate2::Compression; +use itertools::Itertools as _; +use once_cell::sync::Lazy; +use pprof::protos::{Function, Line, Message as _, Profile}; +use regex::Regex; + +use std::borrow::Cow; +use std::collections::{HashMap, HashSet}; +use std::ffi::c_void; +use std::io::Write as _; + +/// Decodes a gzip-compressed Protobuf-encoded pprof profile. +pub fn decode(bytes: &[u8]) -> anyhow::Result { + let mut gz = GzDecoder::new(Vec::new()); + gz.write_all(bytes)?; + Ok(Profile::parse_from_bytes(&gz.finish()?)?) +} + +/// Encodes a pprof profile as gzip-compressed Protobuf. +pub fn encode(profile: &Profile) -> anyhow::Result> { + let mut gz = GzEncoder::new(Vec::new(), Compression::default()); + profile.write_to_writer(&mut gz)?; + Ok(gz.finish()?) +} + +/// Symbolizes a pprof profile using the current binary. +pub fn symbolize(mut profile: Profile) -> anyhow::Result { + if !profile.function.is_empty() { + return Ok(profile); // already symbolized + } + + // Collect function names. + let mut functions: HashMap = HashMap::new(); + let mut strings: HashMap = profile + .string_table + .into_iter() + .enumerate() + .map(|(i, s)| (s, i as i64)) + .collect(); + + // Helper to look up or register a string. + let mut string_id = |s: &str| -> i64 { + // Don't use .entry() to avoid unnecessary allocations. + if let Some(id) = strings.get(s) { + return *id; + } + let id = strings.len() as i64; + strings.insert(s.to_string(), id); + id + }; + + for loc in &mut profile.location { + if !loc.line.is_empty() { + continue; + } + + // Resolve the line and function for each location. + backtrace::resolve(loc.address as *mut c_void, |symbol| { + let Some(symname) = symbol.name() else { + return; + }; + let mut name = symname.to_string(); + + // Strip the Rust monomorphization suffix from the symbol name. + static SUFFIX_REGEX: Lazy = + Lazy::new(|| Regex::new("::h[0-9a-f]{16}$").expect("invalid regex")); + if let Some(m) = SUFFIX_REGEX.find(&name) { + name.truncate(m.start()); + } + + let function_id = match functions.get(&name) { + Some(function) => function.id, + None => { + let id = functions.len() as u64 + 1; + let system_name = String::from_utf8_lossy(symname.as_bytes()); + let filename = symbol + .filename() + .map(|path| path.to_string_lossy()) + .unwrap_or(Cow::Borrowed("")); + let function = Function { + id, + name: string_id(&name), + system_name: string_id(&system_name), + filename: string_id(&filename), + ..Default::default() + }; + functions.insert(name, function); + id + } + }; + loc.line.push(Line { + function_id, + line: symbol.lineno().unwrap_or(0) as i64, + ..Default::default() + }); + }); + } + + // Store the resolved functions, and mark the mapping as resolved. + profile.function = functions.into_values().sorted_by_key(|f| f.id).collect(); + profile.string_table = strings + .into_iter() + .sorted_by_key(|(_, i)| *i) + .map(|(s, _)| s) + .collect(); + + for mapping in &mut profile.mapping { + mapping.has_functions = true; + mapping.has_filenames = true; + } + + Ok(profile) +} + +/// Strips locations (stack frames) matching the given mappings (substring) or function names +/// (regex). The function bool specifies whether child frames should be stripped as well. +/// +/// The string definitions are left behind in the profile for simplicity, to avoid rewriting all +/// string references. +pub fn strip_locations( + mut profile: Profile, + mappings: &[&str], + functions: &[(Regex, bool)], +) -> Profile { + // Strip mappings. + let mut strip_mappings: HashSet = HashSet::new(); + + profile.mapping.retain(|mapping| { + let Some(name) = profile.string_table.get(mapping.filename as usize) else { + return true; + }; + if mappings.iter().any(|substr| name.contains(substr)) { + strip_mappings.insert(mapping.id); + return false; + } + true + }); + + // Strip functions. + let mut strip_functions: HashMap = HashMap::new(); + + profile.function.retain(|function| { + let Some(name) = profile.string_table.get(function.name as usize) else { + return true; + }; + for (regex, strip_children) in functions { + if regex.is_match(name) { + strip_functions.insert(function.id, *strip_children); + return false; + } + } + true + }); + + // Strip locations. The bool specifies whether child frames should be stripped too. + let mut strip_locations: HashMap = HashMap::new(); + + profile.location.retain(|location| { + for line in &location.line { + if let Some(strip_children) = strip_functions.get(&line.function_id) { + strip_locations.insert(location.id, *strip_children); + return false; + } + } + if strip_mappings.contains(&location.mapping_id) { + strip_locations.insert(location.id, false); + return false; + } + true + }); + + // Strip sample locations. + for sample in &mut profile.sample { + // First, find the uppermost function with child removal and truncate the stack. + if let Some(truncate) = sample + .location_id + .iter() + .rposition(|id| strip_locations.get(id) == Some(&true)) + { + sample.location_id.drain(..=truncate); + } + // Next, strip any individual frames without child removal. + sample + .location_id + .retain(|id| !strip_locations.contains_key(id)); + } + + profile +} From 7dddbb9570ca52d174b7d53ca2495fb272796a7f Mon Sep 17 00:00:00 2001 From: Tristan Partin Date: Tue, 17 Dec 2024 12:36:55 -0600 Subject: [PATCH 44/49] Add pg_repack extension (#10100) Our solutions engineers and some customers would like to have this extension available. Link: https://github.com/neondatabase/cloud/issues/18890 Signed-off-by: Tristan Partin --- compute/compute-node.Dockerfile | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile index 1e11efeaf8..9f1f3b7343 100644 --- a/compute/compute-node.Dockerfile +++ b/compute/compute-node.Dockerfile @@ -1185,6 +1185,25 @@ RUN case "${PG_VERSION}" in \ make BUILD_TYPE=release -j $(getconf _NPROCESSORS_ONLN) install && \ echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_mooncake.control +######################################################################################### +# +# Layer "pg_repack" +# compile pg_repack extension +# +######################################################################################### + +FROM build-deps AS pg-repack-build +ARG PG_VERSION +COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ + +ENV PATH="/usr/local/pgsql/bin/:$PATH" + +RUN wget https://github.com/reorg/pg_repack/archive/refs/tags/ver_1.5.2.tar.gz -O pg_repack.tar.gz && \ + echo '4516cad42251ed3ad53ff619733004db47d5755acac83f75924cd94d1c4fb681 pg_repack.tar.gz' | sha256sum --check && \ + mkdir pg_repack-src && cd pg_repack-src && tar xzf ../pg_repack.tar.gz --strip-components=1 -C . && \ + make -j $(getconf _NPROCESSORS_ONLN) && \ + make -j $(getconf _NPROCESSORS_ONLN) install + ######################################################################################### # # Layer "neon-pg-ext-build" @@ -1230,6 +1249,7 @@ COPY --from=pg-anon-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-ivm-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-partman-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-mooncake-build /usr/local/pgsql/ /usr/local/pgsql/ +COPY --from=pg-repack-build /usr/local/pgsql/ /usr/local/pgsql/ COPY pgxn/ pgxn/ RUN make -j $(getconf _NPROCESSORS_ONLN) \ From 93e958341fc47317a0b430b40ae4906da7294d2f Mon Sep 17 00:00:00 2001 From: Ivan Efremov Date: Tue, 17 Dec 2024 21:26:54 +0200 Subject: [PATCH 45/49] [proxy]: Use TLS for cancellation queries (#10152) ## Problem pg_sni_router assumes that all the streams are upgradable to TLS. Cancellation requests were declined because of using NoTls config. ## Summary of changes Provide TLS client config for cancellation requests. Fixes [#21789](https://github.com/orgs/neondatabase/projects/65/views/1?pane=issue&itemId=90911361&issue=neondatabase%7Ccloud%7C21789) --- proxy/src/bin/pg_sni_router.rs | 2 +- proxy/src/cancellation.rs | 61 ++++++++++++++++++++++++++++++++-- proxy/src/compute.rs | 6 ++-- 3 files changed, 63 insertions(+), 6 deletions(-) diff --git a/proxy/src/bin/pg_sni_router.rs b/proxy/src/bin/pg_sni_router.rs index 623a0fd3b2..9538384b9e 100644 --- a/proxy/src/bin/pg_sni_router.rs +++ b/proxy/src/bin/pg_sni_router.rs @@ -229,7 +229,7 @@ async fn ssl_handshake( let (raw, read_buf) = stream.into_inner(); // TODO: Normally, client doesn't send any data before - // server says TLS handshake is ok and read_buf is empy. + // server says TLS handshake is ok and read_buf is empty. // However, you could imagine pipelining of postgres // SSLRequest + TLS ClientHello in one hunk similar to // pipelining in our node js driver. We should probably diff --git a/proxy/src/cancellation.rs b/proxy/src/cancellation.rs index dd3edd6abc..a58e3961da 100644 --- a/proxy/src/cancellation.rs +++ b/proxy/src/cancellation.rs @@ -3,8 +3,10 @@ use std::sync::Arc; use dashmap::DashMap; use ipnet::{IpNet, Ipv4Net, Ipv6Net}; -use postgres_client::{CancelToken, NoTls}; +use once_cell::sync::OnceCell; +use postgres_client::{tls::MakeTlsConnect, CancelToken}; use pq_proto::CancelKeyData; +use rustls::crypto::ring; use thiserror::Error; use tokio::net::TcpStream; use tokio::sync::Mutex; @@ -20,6 +22,9 @@ use crate::redis::cancellation_publisher::{ CancellationPublisher, CancellationPublisherMut, RedisPublisherClient, }; +use crate::compute::{load_certs, AcceptEverythingVerifier}; +use crate::postgres_rustls::MakeRustlsConnect; + pub type CancelMap = Arc>>; pub type CancellationHandlerMain = CancellationHandler>>>; pub(crate) type CancellationHandlerMainInternal = Option>>; @@ -174,7 +179,10 @@ impl CancellationHandler

{ source: self.from, kind: crate::metrics::CancellationOutcome::Found, }); - info!("cancelling query per user's request using key {key}"); + info!( + "cancelling query per user's request using key {key}, hostname {}, address: {}", + cancel_closure.hostname, cancel_closure.socket_addr + ); cancel_closure.try_cancel_query().await } @@ -221,6 +229,8 @@ impl CancellationHandler>>> { } } +static TLS_ROOTS: OnceCell> = OnceCell::new(); + /// This should've been a [`std::future::Future`], but /// it's impossible to name a type of an unboxed future /// (we'd need something like `#![feature(type_alias_impl_trait)]`). @@ -229,6 +239,8 @@ pub struct CancelClosure { socket_addr: SocketAddr, cancel_token: CancelToken, ip_allowlist: Vec, + hostname: String, // for pg_sni router + allow_self_signed_compute: bool, } impl CancelClosure { @@ -236,17 +248,60 @@ impl CancelClosure { socket_addr: SocketAddr, cancel_token: CancelToken, ip_allowlist: Vec, + hostname: String, + allow_self_signed_compute: bool, ) -> Self { Self { socket_addr, cancel_token, ip_allowlist, + hostname, + allow_self_signed_compute, } } /// Cancels the query running on user's compute node. pub(crate) async fn try_cancel_query(self) -> Result<(), CancelError> { let socket = TcpStream::connect(self.socket_addr).await?; - self.cancel_token.cancel_query_raw(socket, NoTls).await?; + + let client_config = if self.allow_self_signed_compute { + // Allow all certificates for creating the connection. Used only for tests + let verifier = Arc::new(AcceptEverythingVerifier); + rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider())) + .with_safe_default_protocol_versions() + .expect("ring should support the default protocol versions") + .dangerous() + .with_custom_certificate_verifier(verifier) + } else { + let root_store = TLS_ROOTS + .get_or_try_init(load_certs) + .map_err(|_e| { + CancelError::IO(std::io::Error::new( + std::io::ErrorKind::Other, + "TLS root store initialization failed".to_string(), + )) + })? + .clone(); + rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider())) + .with_safe_default_protocol_versions() + .expect("ring should support the default protocol versions") + .with_root_certificates(root_store) + }; + + let client_config = client_config.with_no_client_auth(); + + let mut mk_tls = crate::postgres_rustls::MakeRustlsConnect::new(client_config); + let tls = >::make_tls_connect( + &mut mk_tls, + &self.hostname, + ) + .map_err(|e| { + CancelError::IO(std::io::Error::new( + std::io::ErrorKind::Other, + e.to_string(), + )) + })?; + + self.cancel_token.cancel_query_raw(socket, tls).await?; debug!("query was cancelled"); Ok(()) } diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs index 4113b5bb80..42df5ff5e3 100644 --- a/proxy/src/compute.rs +++ b/proxy/src/compute.rs @@ -319,6 +319,8 @@ impl ConnCfg { secret_key, }, vec![], + host.to_string(), + allow_self_signed_compute, ); let connection = PostgresConnection { @@ -350,7 +352,7 @@ fn filtered_options(options: &str) -> Option { Some(options) } -fn load_certs() -> Result, Vec> { +pub(crate) fn load_certs() -> Result, Vec> { let der_certs = rustls_native_certs::load_native_certs(); if !der_certs.errors.is_empty() { @@ -364,7 +366,7 @@ fn load_certs() -> Result, Vec> = OnceCell::new(); #[derive(Debug)] -struct AcceptEverythingVerifier; +pub(crate) struct AcceptEverythingVerifier; impl ServerCertVerifier for AcceptEverythingVerifier { fn supported_verify_schemes(&self) -> Vec { use rustls::SignatureScheme; From fd230227f2fd370821610f91601c9b02dda23e0d Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 17 Dec 2024 20:04:09 +0000 Subject: [PATCH 46/49] storcon: include preferred AZ in compute notifications (#9953) ## Problem It is unreliable for the control plane to infer the AZ for computes from where the tenant is currently attached, because if a tenant happens to be in a degraded state or a release is ongoing while a compute starts, then the tenant's attached AZ can be a different one to where it will run long-term, and the control plane doesn't check back later to restart the compute. This can land in parallel with https://github.com/neondatabase/neon/pull/9947 ## Summary of changes - Thread through the preferred AZ into the compute hook code via the reconciler - Include the preferred AZ in the body of compute hook notifications --- storage_controller/src/compute_hook.rs | 138 +++++++++++++----- storage_controller/src/reconciler.rs | 15 +- storage_controller/src/service.rs | 25 +++- storage_controller/src/tenant_shard.rs | 1 + test_runner/fixtures/neon_fixtures.py | 5 +- test_runner/regress/test_sharding.py | 3 + .../regress/test_storage_controller.py | 5 + 7 files changed, 139 insertions(+), 53 deletions(-) diff --git a/storage_controller/src/compute_hook.rs b/storage_controller/src/compute_hook.rs index 2b2ece3f02..69db48f8d1 100644 --- a/storage_controller/src/compute_hook.rs +++ b/storage_controller/src/compute_hook.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::error::Error as _; use std::sync::Arc; use std::{collections::HashMap, time::Duration}; @@ -6,6 +7,7 @@ use control_plane::endpoint::{ComputeControlPlane, EndpointStatus}; use control_plane::local_env::LocalEnv; use futures::StreamExt; use hyper::StatusCode; +use pageserver_api::controller_api::AvailabilityZone; use pageserver_api::shard::{ShardCount, ShardNumber, ShardStripeSize, TenantShardId}; use postgres_connection::parse_host_port; use serde::{Deserialize, Serialize}; @@ -28,6 +30,9 @@ struct UnshardedComputeHookTenant { // Which node is this tenant attached to node_id: NodeId, + // The tenant's preferred AZ, so that we may pass this on to the control plane + preferred_az: Option, + // Must hold this lock to send a notification. send_lock: Arc>>, } @@ -36,6 +41,9 @@ struct ShardedComputeHookTenant { shard_count: ShardCount, shards: Vec<(ShardNumber, NodeId)>, + // The tenant's preferred AZ, so that we may pass this on to the control plane + preferred_az: Option, + // Must hold this lock to send a notification. The contents represent // the last successfully sent notification, and are used to coalesce multiple // updates by only sending when there is a chance since our last successful send. @@ -64,17 +72,24 @@ enum ComputeHookTenant { impl ComputeHookTenant { /// Construct with at least one shard's information - fn new(tenant_shard_id: TenantShardId, stripe_size: ShardStripeSize, node_id: NodeId) -> Self { + fn new( + tenant_shard_id: TenantShardId, + stripe_size: ShardStripeSize, + preferred_az: Option, + node_id: NodeId, + ) -> Self { if tenant_shard_id.shard_count.count() > 1 { Self::Sharded(ShardedComputeHookTenant { shards: vec![(tenant_shard_id.shard_number, node_id)], stripe_size, shard_count: tenant_shard_id.shard_count, + preferred_az, send_lock: Arc::default(), }) } else { Self::Unsharded(UnshardedComputeHookTenant { node_id, + preferred_az, send_lock: Arc::default(), }) } @@ -120,15 +135,20 @@ impl ComputeHookTenant { /// Set one shard's location. If stripe size or shard count have changed, Self is reset /// and drops existing content. - fn update( - &mut self, - tenant_shard_id: TenantShardId, - stripe_size: ShardStripeSize, - node_id: NodeId, - ) { + fn update(&mut self, shard_update: ShardUpdate) { + let tenant_shard_id = shard_update.tenant_shard_id; + let node_id = shard_update.node_id; + let stripe_size = shard_update.stripe_size; + let preferred_az = shard_update.preferred_az; + match self { Self::Unsharded(unsharded_tenant) if tenant_shard_id.shard_count.count() == 1 => { - unsharded_tenant.node_id = node_id + unsharded_tenant.node_id = node_id; + if unsharded_tenant.preferred_az.as_ref() + != preferred_az.as_ref().map(|az| az.as_ref()) + { + unsharded_tenant.preferred_az = preferred_az.map(|az| az.as_ref().clone()); + } } Self::Sharded(sharded_tenant) if sharded_tenant.stripe_size == stripe_size @@ -146,10 +166,21 @@ impl ComputeHookTenant { .push((tenant_shard_id.shard_number, node_id)); sharded_tenant.shards.sort_by_key(|s| s.0) } + + if sharded_tenant.preferred_az.as_ref() + != preferred_az.as_ref().map(|az| az.as_ref()) + { + sharded_tenant.preferred_az = preferred_az.map(|az| az.as_ref().clone()); + } } _ => { // Shard count changed: reset struct. - *self = Self::new(tenant_shard_id, stripe_size, node_id); + *self = Self::new( + tenant_shard_id, + stripe_size, + preferred_az.map(|az| az.into_owned()), + node_id, + ); } } } @@ -165,6 +196,7 @@ struct ComputeHookNotifyRequestShard { #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)] struct ComputeHookNotifyRequest { tenant_id: TenantId, + preferred_az: Option, stripe_size: Option, shards: Vec, } @@ -238,6 +270,10 @@ impl ComputeHookTenant { node_id: unsharded_tenant.node_id, }], stripe_size: None, + preferred_az: unsharded_tenant + .preferred_az + .as_ref() + .map(|az| az.0.clone()), }), Self::Sharded(sharded_tenant) if sharded_tenant.shards.len() == sharded_tenant.shard_count.count() as usize => @@ -253,6 +289,7 @@ impl ComputeHookTenant { }) .collect(), stripe_size: Some(sharded_tenant.stripe_size), + preferred_az: sharded_tenant.preferred_az.as_ref().map(|az| az.0.clone()), }) } Self::Sharded(sharded_tenant) => { @@ -313,6 +350,17 @@ pub(super) struct ComputeHook { client: reqwest::Client, } +/// Callers may give us a list of these when asking us to send a bulk batch +/// of notifications in the background. This is a 'notification' in the sense of +/// other code notifying us of a shard's status, rather than being the final notification +/// that we send upwards to the control plane for the whole tenant. +pub(crate) struct ShardUpdate<'a> { + pub(crate) tenant_shard_id: TenantShardId, + pub(crate) node_id: NodeId, + pub(crate) stripe_size: ShardStripeSize, + pub(crate) preferred_az: Option>, +} + impl ComputeHook { pub(super) fn new(config: Config) -> Self { let authorization_header = config @@ -363,6 +411,7 @@ impl ComputeHook { tenant_id, shards, stripe_size, + preferred_az: _preferred_az, } = reconfigure_request; let compute_pageservers = shards @@ -503,24 +552,30 @@ impl ComputeHook { } /// Synchronous phase: update the per-tenant state for the next intended notification - fn notify_prepare( - &self, - tenant_shard_id: TenantShardId, - node_id: NodeId, - stripe_size: ShardStripeSize, - ) -> MaybeSendResult { + fn notify_prepare(&self, shard_update: ShardUpdate) -> MaybeSendResult { let mut state_locked = self.state.lock().unwrap(); use std::collections::hash_map::Entry; + let tenant_shard_id = shard_update.tenant_shard_id; + let tenant = match state_locked.entry(tenant_shard_id.tenant_id) { - Entry::Vacant(e) => e.insert(ComputeHookTenant::new( - tenant_shard_id, - stripe_size, - node_id, - )), + Entry::Vacant(e) => { + let ShardUpdate { + tenant_shard_id, + node_id, + stripe_size, + preferred_az, + } = shard_update; + e.insert(ComputeHookTenant::new( + tenant_shard_id, + stripe_size, + preferred_az.map(|az| az.into_owned()), + node_id, + )) + } Entry::Occupied(e) => { let tenant = e.into_mut(); - tenant.update(tenant_shard_id, stripe_size, node_id); + tenant.update(shard_update); tenant } }; @@ -608,13 +663,14 @@ impl ComputeHook { /// if something failed. pub(super) fn notify_background( self: &Arc, - notifications: Vec<(TenantShardId, NodeId, ShardStripeSize)>, + notifications: Vec, result_tx: tokio::sync::mpsc::Sender>, cancel: &CancellationToken, ) { let mut maybe_sends = Vec::new(); - for (tenant_shard_id, node_id, stripe_size) in notifications { - let maybe_send_result = self.notify_prepare(tenant_shard_id, node_id, stripe_size); + for shard_update in notifications { + let tenant_shard_id = shard_update.tenant_shard_id; + let maybe_send_result = self.notify_prepare(shard_update); maybe_sends.push((tenant_shard_id, maybe_send_result)) } @@ -678,15 +734,14 @@ impl ComputeHook { /// periods, but we don't retry forever. The **caller** is responsible for handling failures and /// ensuring that they eventually call again to ensure that the compute is eventually notified of /// the proper pageserver nodes for a tenant. - #[tracing::instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), node_id))] - pub(super) async fn notify( + #[tracing::instrument(skip_all, fields(tenant_id=%shard_update.tenant_shard_id.tenant_id, shard_id=%shard_update.tenant_shard_id.shard_slug(), node_id))] + pub(super) async fn notify<'a>( &self, - tenant_shard_id: TenantShardId, - node_id: NodeId, - stripe_size: ShardStripeSize, + shard_update: ShardUpdate<'a>, cancel: &CancellationToken, ) -> Result<(), NotifyError> { - let maybe_send_result = self.notify_prepare(tenant_shard_id, node_id, stripe_size); + let tenant_shard_id = shard_update.tenant_shard_id; + let maybe_send_result = self.notify_prepare(shard_update); self.notify_execute(maybe_send_result, tenant_shard_id, cancel) .await } @@ -739,6 +794,7 @@ pub(crate) mod tests { shard_number: ShardNumber(0), }, ShardStripeSize(12345), + None, NodeId(1), ); @@ -765,30 +821,32 @@ pub(crate) mod tests { // Writing the first shard of a multi-sharded situation (i.e. in a split) // resets the tenant state and puts it in an non-notifying state (need to // see all shards) - tenant_state.update( - TenantShardId { + tenant_state.update(ShardUpdate { + tenant_shard_id: TenantShardId { tenant_id, shard_count: ShardCount::new(2), shard_number: ShardNumber(1), }, - ShardStripeSize(32768), - NodeId(1), - ); + stripe_size: ShardStripeSize(32768), + preferred_az: None, + node_id: NodeId(1), + }); assert!(matches!( tenant_state.maybe_send(tenant_id, None), MaybeSendResult::Noop )); // Writing the second shard makes it ready to notify - tenant_state.update( - TenantShardId { + tenant_state.update(ShardUpdate { + tenant_shard_id: TenantShardId { tenant_id, shard_count: ShardCount::new(2), shard_number: ShardNumber(0), }, - ShardStripeSize(32768), - NodeId(1), - ); + stripe_size: ShardStripeSize(32768), + preferred_az: None, + node_id: NodeId(1), + }); let send_result = tenant_state.maybe_send(tenant_id, None); let MaybeSendResult::Transmit((request, mut guard)) = send_result else { diff --git a/storage_controller/src/reconciler.rs b/storage_controller/src/reconciler.rs index 3ad386a95b..475f91eff4 100644 --- a/storage_controller/src/reconciler.rs +++ b/storage_controller/src/reconciler.rs @@ -1,13 +1,14 @@ use crate::pageserver_client::PageserverClient; use crate::persistence::Persistence; -use crate::service; -use pageserver_api::controller_api::PlacementPolicy; +use crate::{compute_hook, service}; +use pageserver_api::controller_api::{AvailabilityZone, PlacementPolicy}; use pageserver_api::models::{ LocationConfig, LocationConfigMode, LocationConfigSecondary, TenantConfig, }; use pageserver_api::shard::{ShardIdentity, TenantShardId}; use pageserver_client::mgmt_api; use reqwest::StatusCode; +use std::borrow::Cow; use std::collections::HashMap; use std::sync::Arc; use std::time::{Duration, Instant}; @@ -45,6 +46,7 @@ pub(super) struct Reconciler { pub(crate) reconciler_config: ReconcilerConfig, pub(crate) config: TenantConfig, + pub(crate) preferred_az: Option, /// Observed state from the point of view of the reconciler. /// This gets updated as the reconciliation makes progress. @@ -834,9 +836,12 @@ impl Reconciler { let result = self .compute_hook .notify( - self.tenant_shard_id, - node.get_id(), - self.shard.stripe_size, + compute_hook::ShardUpdate { + tenant_shard_id: self.tenant_shard_id, + node_id: node.get_id(), + stripe_size: self.shard.stripe_size, + preferred_az: self.preferred_az.as_ref().map(Cow::Borrowed), + }, &self.cancel, ) .await; diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index a89e4741f6..42b50835f8 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -18,7 +18,7 @@ use crate::{ background_node_operations::{ Drain, Fill, Operation, OperationError, OperationHandler, MAX_RECONCILES_PER_OPERATION, }, - compute_hook::NotifyError, + compute_hook::{self, NotifyError}, drain_utils::{self, TenantShardDrain, TenantShardIterator}, id_lock_map::{trace_exclusive_lock, trace_shared_lock, IdLockMap, TracingExclusiveGuard}, leadership::Leadership, @@ -656,11 +656,14 @@ impl Service { // emit a compute notification for this. In the case where our observed state does not // yet match our intent, we will eventually reconcile, and that will emit a compute notification. if let Some(attached_at) = tenant_shard.stably_attached() { - compute_notifications.push(( - *tenant_shard_id, - attached_at, - tenant_shard.shard.stripe_size, - )); + compute_notifications.push(compute_hook::ShardUpdate { + tenant_shard_id: *tenant_shard_id, + node_id: attached_at, + stripe_size: tenant_shard.shard.stripe_size, + preferred_az: tenant_shard + .preferred_az() + .map(|az| Cow::Owned(az.clone())), + }); } } } @@ -4786,7 +4789,15 @@ impl Service { for (child_id, child_ps, stripe_size) in child_locations { if let Err(e) = self .compute_hook - .notify(child_id, child_ps, stripe_size, &self.cancel) + .notify( + compute_hook::ShardUpdate { + tenant_shard_id: child_id, + node_id: child_ps, + stripe_size, + preferred_az: preferred_az_id.as_ref().map(Cow::Borrowed), + }, + &self.cancel, + ) .await { tracing::warn!("Failed to update compute of {}->{} during split, proceeding anyway to complete split ({e})", diff --git a/storage_controller/src/tenant_shard.rs b/storage_controller/src/tenant_shard.rs index f1b921646f..cba579e8a7 100644 --- a/storage_controller/src/tenant_shard.rs +++ b/storage_controller/src/tenant_shard.rs @@ -1198,6 +1198,7 @@ impl TenantShard { detach, reconciler_config, config: self.config.clone(), + preferred_az: self.preferred_az_id.clone(), observed: self.observed.clone(), original_observed: self.observed.clone(), compute_hook: compute_hook.clone(), diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 13ada1361e..2553a0c99a 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -134,6 +134,9 @@ DEFAULT_BRANCH_NAME: str = "main" BASE_PORT: int = 15000 +# By default we create pageservers with this phony AZ +DEFAULT_AZ_ID: str = "us-east-2a" + @pytest.fixture(scope="session") def neon_api_key() -> str: @@ -1093,7 +1096,7 @@ class NeonEnv: "pg_auth_type": pg_auth_type, "http_auth_type": http_auth_type, # Default which can be overriden with `NeonEnvBuilder.pageserver_config_override` - "availability_zone": "us-east-2a", + "availability_zone": DEFAULT_AZ_ID, # Disable pageserver disk syncs in tests: when running tests concurrently, this avoids # the pageserver taking a long time to start up due to syncfs flushing other tests' data "no_sync": True, diff --git a/test_runner/regress/test_sharding.py b/test_runner/regress/test_sharding.py index 743ab0088b..4c381b563f 100644 --- a/test_runner/regress/test_sharding.py +++ b/test_runner/regress/test_sharding.py @@ -11,6 +11,7 @@ from fixtures.common_types import Lsn, TenantId, TenantShardId, TimelineArchival from fixtures.compute_reconfigure import ComputeReconfigure from fixtures.log_helper import log from fixtures.neon_fixtures import ( + DEFAULT_AZ_ID, NeonEnv, NeonEnvBuilder, StorageControllerApiException, @@ -793,6 +794,7 @@ def test_sharding_split_stripe_size( "tenant_id": str(env.initial_tenant), "stripe_size": None, "shards": [{"node_id": int(env.pageservers[0].id), "shard_number": 0}], + "preferred_az": DEFAULT_AZ_ID, } assert notifications[0] == expect @@ -812,6 +814,7 @@ def test_sharding_split_stripe_size( {"node_id": int(env.pageservers[0].id), "shard_number": 0}, {"node_id": int(env.pageservers[0].id), "shard_number": 1}, ], + "preferred_az": DEFAULT_AZ_ID, } log.info(f"Got notification: {notifications[1]}") assert notifications[1] == expect_after diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py index ae9b596a1b..0be800d103 100644 --- a/test_runner/regress/test_storage_controller.py +++ b/test_runner/regress/test_storage_controller.py @@ -16,6 +16,7 @@ from fixtures.common_types import TenantId, TenantShardId, TimelineId from fixtures.compute_reconfigure import ComputeReconfigure from fixtures.log_helper import log from fixtures.neon_fixtures import ( + DEFAULT_AZ_ID, NeonEnv, NeonEnvBuilder, NeonPageserver, @@ -599,6 +600,7 @@ def test_storage_controller_compute_hook( "tenant_id": str(env.initial_tenant), "stripe_size": None, "shards": [{"node_id": int(env.pageservers[0].id), "shard_number": 0}], + "preferred_az": DEFAULT_AZ_ID, } assert notifications[0] == expect @@ -616,6 +618,7 @@ def test_storage_controller_compute_hook( "tenant_id": str(env.initial_tenant), "stripe_size": None, "shards": [{"node_id": int(env.pageservers[1].id), "shard_number": 0}], + "preferred_az": DEFAULT_AZ_ID, } def received_migration_notification(): @@ -643,6 +646,7 @@ def test_storage_controller_compute_hook( {"node_id": int(env.pageservers[1].id), "shard_number": 0}, {"node_id": int(env.pageservers[1].id), "shard_number": 1}, ], + "preferred_az": DEFAULT_AZ_ID, } def received_split_notification(): @@ -714,6 +718,7 @@ def test_storage_controller_stuck_compute_hook( "tenant_id": str(env.initial_tenant), "stripe_size": None, "shards": [{"node_id": int(env.pageservers[0].id), "shard_number": 0}], + "preferred_az": DEFAULT_AZ_ID, } assert notifications[0] == expect From 2ee6bc5ec42590e958fc246092f8e98202fc9173 Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Tue, 17 Dec 2024 20:06:18 +0000 Subject: [PATCH 47/49] chore(proxy): update vendored postgres libs to edition 2021 (#10139) I ran `cargo fix --edition` in each project prior, and it found nothing that needed fixing. --- libs/proxy/postgres-protocol2/Cargo.toml | 2 +- libs/proxy/postgres-protocol2/src/lib.rs | 3 +-- libs/proxy/postgres-protocol2/src/message/frontend.rs | 1 - libs/proxy/postgres-types2/Cargo.toml | 2 +- libs/proxy/postgres-types2/src/lib.rs | 3 +-- libs/proxy/tokio-postgres2/Cargo.toml | 2 +- libs/proxy/tokio-postgres2/src/lib.rs | 2 +- 7 files changed, 6 insertions(+), 9 deletions(-) diff --git a/libs/proxy/postgres-protocol2/Cargo.toml b/libs/proxy/postgres-protocol2/Cargo.toml index f71c1599c7..f66a292d5e 100644 --- a/libs/proxy/postgres-protocol2/Cargo.toml +++ b/libs/proxy/postgres-protocol2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "postgres-protocol2" version = "0.1.0" -edition = "2018" +edition = "2021" license = "MIT/Apache-2.0" [dependencies] diff --git a/libs/proxy/postgres-protocol2/src/lib.rs b/libs/proxy/postgres-protocol2/src/lib.rs index 947f2f835d..6032440f9a 100644 --- a/libs/proxy/postgres-protocol2/src/lib.rs +++ b/libs/proxy/postgres-protocol2/src/lib.rs @@ -9,8 +9,7 @@ //! //! This library assumes that the `client_encoding` backend parameter has been //! set to `UTF8`. It will most likely not behave properly if that is not the case. -#![doc(html_root_url = "https://docs.rs/postgres-protocol/0.6")] -#![warn(missing_docs, rust_2018_idioms, clippy::all)] +#![warn(missing_docs, clippy::all)] use byteorder::{BigEndian, ByteOrder}; use bytes::{BufMut, BytesMut}; diff --git a/libs/proxy/postgres-protocol2/src/message/frontend.rs b/libs/proxy/postgres-protocol2/src/message/frontend.rs index bc6168f337..640f35ada3 100644 --- a/libs/proxy/postgres-protocol2/src/message/frontend.rs +++ b/libs/proxy/postgres-protocol2/src/message/frontend.rs @@ -3,7 +3,6 @@ use byteorder::{BigEndian, ByteOrder}; use bytes::{Buf, BufMut, BytesMut}; -use std::convert::TryFrom; use std::error::Error; use std::io; use std::marker; diff --git a/libs/proxy/postgres-types2/Cargo.toml b/libs/proxy/postgres-types2/Cargo.toml index 58cfb5571f..57efd94cd3 100644 --- a/libs/proxy/postgres-types2/Cargo.toml +++ b/libs/proxy/postgres-types2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "postgres-types2" version = "0.1.0" -edition = "2018" +edition = "2021" license = "MIT/Apache-2.0" [dependencies] diff --git a/libs/proxy/postgres-types2/src/lib.rs b/libs/proxy/postgres-types2/src/lib.rs index 18ba032151..d4f3afdfd4 100644 --- a/libs/proxy/postgres-types2/src/lib.rs +++ b/libs/proxy/postgres-types2/src/lib.rs @@ -2,8 +2,7 @@ //! //! This crate is used by the `tokio-postgres` and `postgres` crates. You normally don't need to depend directly on it //! unless you want to define your own `ToSql` or `FromSql` definitions. -#![doc(html_root_url = "https://docs.rs/postgres-types/0.2")] -#![warn(clippy::all, rust_2018_idioms, missing_docs)] +#![warn(clippy::all, missing_docs)] use fallible_iterator::FallibleIterator; use postgres_protocol2::types; diff --git a/libs/proxy/tokio-postgres2/Cargo.toml b/libs/proxy/tokio-postgres2/Cargo.toml index 7130c1b726..56e7c4da47 100644 --- a/libs/proxy/tokio-postgres2/Cargo.toml +++ b/libs/proxy/tokio-postgres2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tokio-postgres2" version = "0.1.0" -edition = "2018" +edition = "2021" license = "MIT/Apache-2.0" [dependencies] diff --git a/libs/proxy/tokio-postgres2/src/lib.rs b/libs/proxy/tokio-postgres2/src/lib.rs index 901ed0c96c..9155dd8279 100644 --- a/libs/proxy/tokio-postgres2/src/lib.rs +++ b/libs/proxy/tokio-postgres2/src/lib.rs @@ -1,5 +1,5 @@ //! An asynchronous, pipelined, PostgreSQL client. -#![warn(rust_2018_idioms, clippy::all)] +#![warn(clippy::all)] pub use crate::cancel_token::CancelToken; pub use crate::client::{Client, SocketConfig}; From c52514ab02b96e273f8a99362e2401e4a7bc3982 Mon Sep 17 00:00:00 2001 From: a-masterov <72613290+a-masterov@users.noreply.github.com> Date: Tue, 17 Dec 2024 21:47:44 +0100 Subject: [PATCH 48/49] Fix allure report creation on periodic `pg_regress` testing (#10171) ## Problem The allure report finishes with the error `HttpError: Resource not accessible by integration` while running the `pg_regress` test against a cloud staging project due to a lack of permissions. ## Summary of changes The permissions are added. --- .github/workflows/cloud-regress.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/cloud-regress.yml b/.github/workflows/cloud-regress.yml index 55f42ea533..09d6acd325 100644 --- a/.github/workflows/cloud-regress.yml +++ b/.github/workflows/cloud-regress.yml @@ -21,6 +21,8 @@ concurrency: permissions: id-token: write # aws-actions/configure-aws-credentials + statuses: write + contents: write jobs: regress: From 758680d4f874c9f290176bfc2124361472ba4e4f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 17 Dec 2024 22:06:42 +0000 Subject: [PATCH 49/49] Proxy release 2024-12-17