From 75d583c04a1c68a3bc859151e12ccc3bb6514862 Mon Sep 17 00:00:00 2001 From: Dmitry Rodionov Date: Wed, 21 Jun 2023 14:25:58 +0300 Subject: [PATCH 1/5] Tenant::load: fix uninit timeline marker processing (#4458) ## Problem During timeline creation we create special mark file which presense indicates that initialization didnt complete successfully. In case of a crash restart we can remove such half-initialized timeline and following retry from control plane side should perform another attempt. So in case of a possible crash restart during initial loading we have following picture: ``` timelines | - ___uninit | - | - | ``` We call `std::fs::read_dir` to walk files in `timelines` directory one by one. If we see uninit file we proceed with deletion of both, timeline directory and uninit file. If we see timeline we check if uninit file exists and do the same cleanup. But in fact its possible to get both branches to be true at the same time. Result of readdir doesnt reflect following directory state modifications. So you can still get "valid" entry on the next iteration of the loop despite the fact that it was deleted in one of the previous iterations of the loop. To see that you can apply the following patch (it disables uninit mark cleanup on successful timeline creation): ```diff diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 4beb2664..b3cdad8f 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -224,11 +224,6 @@ impl UninitializedTimeline<'_> { ) })?; } - uninit_mark.remove_uninit_mark().with_context(|| { - format!( - "Failed to remove uninit mark file for timeline {tenant_id}/{timeline_id}" - ) - })?; v.insert(Arc::clone(&new_timeline)); new_timeline.maybe_spawn_flush_loop(); ``` And perform the following steps: ```bash neon_local init neon_local start neon_local tenant create neon_local stop neon_local start ``` The error is: ```log INFO load{tenant_id=X}:blocking: Found an uninit mark file .neon/tenants/X/timelines/Y.___uninit, removing the timeline and its uninit mark 2023-06-09T18:43:41.664247Z ERROR load{tenant_id=X}: load failed, setting tenant state to Broken: failed to load metadata Caused by: 0: Failed to read metadata bytes from path .neon/tenants/X/timelines/Y/metadata 1: No such file or directory (os error 2) ``` So uninit mark got deleted together with timeline directory but we still got directory entry for it and tried to load it. The bug prevented tenant from being successfully loaded. ## Summary of changes Ideally I think we shouldnt place uninit marks in the same directory as timeline directories but move them to separate directory and gather them as an input to actual listing, but that would be sort of an on-disk format change, so just check whether entries are still valid before operating on them. --- libs/utils/src/http/error.rs | 3 +- pageserver/src/http/routes.rs | 14 +- pageserver/src/page_service.rs | 6 +- pageserver/src/tenant.rs | 337 ++++++++++-------- pageserver/src/tenant/mgr.rs | 6 +- .../src/tenant/remote_timeline_client.rs | 2 +- .../walreceiver/connection_manager.rs | 2 +- safekeeper/src/timeline.rs | 2 +- 8 files changed, 208 insertions(+), 164 deletions(-) diff --git a/libs/utils/src/http/error.rs b/libs/utils/src/http/error.rs index f9c06453df..527e486fd0 100644 --- a/libs/utils/src/http/error.rs +++ b/libs/utils/src/http/error.rs @@ -1,5 +1,6 @@ use hyper::{header, Body, Response, StatusCode}; use serde::{Deserialize, Serialize}; +use std::error::Error as StdError; use thiserror::Error; use tracing::error; @@ -15,7 +16,7 @@ pub enum ApiError { Unauthorized(String), #[error("NotFound: {0}")] - NotFound(anyhow::Error), + NotFound(Box), #[error("Conflict: {0}")] Conflict(String), diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index fc8da70cc0..5bec07b74a 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -142,7 +142,7 @@ impl From for ApiError { impl From for ApiError { fn from(tse: TenantStateError) -> ApiError { match tse { - TenantStateError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid)), + TenantStateError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid).into()), _ => ApiError::InternalServerError(anyhow::Error::new(tse)), } } @@ -151,7 +151,7 @@ impl From for ApiError { impl From for ApiError { fn from(tse: GetTenantError) -> ApiError { match tse { - GetTenantError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid)), + GetTenantError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid).into()), e @ GetTenantError::NotActive(_) => { // Why is this not `ApiError::NotFound`? // Because we must be careful to never return 404 for a tenant if it does @@ -169,7 +169,7 @@ impl From for ApiError { fn from(e: SetNewTenantConfigError) -> ApiError { match e { SetNewTenantConfigError::GetTenant(tid) => { - ApiError::NotFound(anyhow!("tenant {}", tid)) + ApiError::NotFound(anyhow!("tenant {}", tid).into()) } e @ SetNewTenantConfigError::Persist(_) => { ApiError::InternalServerError(anyhow::Error::new(e)) @@ -182,7 +182,7 @@ impl From for ApiError { fn from(value: crate::tenant::DeleteTimelineError) -> Self { use crate::tenant::DeleteTimelineError::*; match value { - NotFound => ApiError::NotFound(anyhow::anyhow!("timeline not found")), + NotFound => ApiError::NotFound(anyhow::anyhow!("timeline not found").into()), HasChildren(children) => ApiError::PreconditionFailed( format!("Cannot delete timeline which has child timelines: {children:?}") .into_boxed_str(), @@ -397,7 +397,7 @@ async fn timeline_detail_handler( let timeline = tenant .get_timeline(timeline_id, false) - .map_err(ApiError::NotFound)?; + .map_err(|e| ApiError::NotFound(e.into()))?; let timeline_info = build_timeline_info( &timeline, @@ -1061,7 +1061,7 @@ async fn timeline_download_remote_layers_handler_get( let info = timeline .get_download_all_remote_layers_task_info() .context("task never started since last pageserver process start") - .map_err(ApiError::NotFound)?; + .map_err(|e| ApiError::NotFound(e.into()))?; json_response(StatusCode::OK, info) } @@ -1072,7 +1072,7 @@ async fn active_timeline_of_active_tenant( let tenant = mgr::get_tenant(tenant_id, true).await?; tenant .get_timeline(timeline_id, true) - .map_err(ApiError::NotFound) + .map_err(|e| ApiError::NotFound(e.into())) } async fn always_panic_handler( diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index 9e9285a009..31ad45790c 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -390,7 +390,9 @@ impl PageServerHandler { }; // Check that the timeline exists - let timeline = tenant.get_timeline(timeline_id, true)?; + let timeline = tenant + .get_timeline(timeline_id, true) + .map_err(|e| anyhow::anyhow!(e))?; // switch client to COPYBOTH pgb.write_message_noflush(&BeMessage::CopyBothResponse)?; @@ -1230,6 +1232,6 @@ async fn get_active_tenant_timeline( .map_err(GetActiveTimelineError::Tenant)?; let timeline = tenant .get_timeline(timeline_id, true) - .map_err(GetActiveTimelineError::Timeline)?; + .map_err(|e| GetActiveTimelineError::Timeline(anyhow::anyhow!(e)))?; Ok(timeline) } diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 7fdd047c96..0e8d6b1287 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -421,6 +421,21 @@ remote: } } +#[derive(Debug, thiserror::Error, PartialEq, Eq)] +pub enum GetTimelineError { + #[error("Timeline {tenant_id}/{timeline_id} is not active, state: {state:?}")] + NotActive { + tenant_id: TenantId, + timeline_id: TimelineId, + state: TimelineState, + }, + #[error("Timeline {tenant_id}/{timeline_id} was not found")] + NotFound { + tenant_id: TenantId, + timeline_id: TimelineId, + }, +} + #[derive(Debug, thiserror::Error)] pub enum DeleteTimelineError { #[error("NotFound")] @@ -946,6 +961,117 @@ impl Tenant { tenant } + pub fn scan_and_sort_timelines_dir( + self: Arc, + ) -> anyhow::Result> { + let timelines_dir = self.conf.timelines_path(&self.tenant_id); + let mut timelines_to_load: HashMap = HashMap::new(); + + for entry in + std::fs::read_dir(&timelines_dir).context("list timelines directory for tenant")? + { + let entry = entry.context("read timeline dir entry")?; + let timeline_dir = entry.path(); + + if crate::is_temporary(&timeline_dir) { + info!( + "Found temporary timeline directory, removing: {}", + timeline_dir.display() + ); + if let Err(e) = std::fs::remove_dir_all(&timeline_dir) { + error!( + "Failed to remove temporary directory '{}': {:?}", + timeline_dir.display(), + e + ); + } + } else if is_uninit_mark(&timeline_dir) { + if !timeline_dir.exists() { + warn!( + "Timeline dir entry become invalid: {}", + timeline_dir.display() + ); + continue; + } + let timeline_uninit_mark_file = &timeline_dir; + info!( + "Found an uninit mark file {}, removing the timeline and its uninit mark", + timeline_uninit_mark_file.display() + ); + let timeline_id = timeline_uninit_mark_file + .file_stem() + .and_then(OsStr::to_str) + .unwrap_or_default() + .parse::() + .with_context(|| { + format!( + "Could not parse timeline id out of the timeline uninit mark name {}", + timeline_uninit_mark_file.display() + ) + })?; + let timeline_dir = self.conf.timeline_path(&timeline_id, &self.tenant_id); + if let Err(e) = + remove_timeline_and_uninit_mark(&timeline_dir, timeline_uninit_mark_file) + { + error!("Failed to clean up uninit marked timeline: {e:?}"); + } + } else { + if !timeline_dir.exists() { + warn!( + "Timeline dir entry become invalid: {}", + timeline_dir.display() + ); + continue; + } + let timeline_id = timeline_dir + .file_name() + .and_then(OsStr::to_str) + .unwrap_or_default() + .parse::() + .with_context(|| { + format!( + "Could not parse timeline id out of the timeline dir name {}", + timeline_dir.display() + ) + })?; + let timeline_uninit_mark_file = self + .conf + .timeline_uninit_mark_file_path(self.tenant_id, timeline_id); + if timeline_uninit_mark_file.exists() { + info!( + %timeline_id, + "Found an uninit mark file, removing the timeline and its uninit mark", + ); + if let Err(e) = + remove_timeline_and_uninit_mark(&timeline_dir, &timeline_uninit_mark_file) + { + error!("Failed to clean up uninit marked timeline: {e:?}"); + } + continue; + } + + let file_name = entry.file_name(); + if let Ok(timeline_id) = + file_name.to_str().unwrap_or_default().parse::() + { + let metadata = load_metadata(self.conf, timeline_id, self.tenant_id) + .context("failed to load metadata")?; + timelines_to_load.insert(timeline_id, metadata); + } else { + // A file or directory that doesn't look like a timeline ID + warn!( + "unexpected file or directory in timelines directory: {}", + file_name.to_string_lossy() + ); + } + } + } + + // Sort the array of timeline IDs into tree-order, so that parent comes before + // all its children. + tree_sort_timelines(timelines_to_load) + } + /// /// Background task to load in-memory data structures for this tenant, from /// files on disk. Used at pageserver startup. @@ -962,110 +1088,16 @@ impl Tenant { utils::failpoint_sleep_millis_async!("before-loading-tenant"); - // TODO split this into two functions, scan and actual load - // Load in-memory state to reflect the local files on disk // // Scan the directory, peek into the metadata file of each timeline, and // collect a list of timelines and their ancestors. - let tenant_id = self.tenant_id; - let conf = self.conf; let span = info_span!("blocking"); + let cloned = Arc::clone(self); let sorted_timelines: Vec<(_, _)> = tokio::task::spawn_blocking(move || { let _g = span.entered(); - let mut timelines_to_load: HashMap = HashMap::new(); - let timelines_dir = conf.timelines_path(&tenant_id); - - for entry in - std::fs::read_dir(&timelines_dir).context("list timelines directory for tenant")? - { - let entry = entry.context("read timeline dir entry")?; - let timeline_dir = entry.path(); - - if crate::is_temporary(&timeline_dir) { - info!( - "Found temporary timeline directory, removing: {}", - timeline_dir.display() - ); - if let Err(e) = std::fs::remove_dir_all(&timeline_dir) { - error!( - "Failed to remove temporary directory '{}': {:?}", - timeline_dir.display(), - e - ); - } - } else if is_uninit_mark(&timeline_dir) { - let timeline_uninit_mark_file = &timeline_dir; - info!( - "Found an uninit mark file {}, removing the timeline and its uninit mark", - timeline_uninit_mark_file.display() - ); - let timeline_id = timeline_uninit_mark_file - .file_stem() - .and_then(OsStr::to_str) - .unwrap_or_default() - .parse::() - .with_context(|| { - format!( - "Could not parse timeline id out of the timeline uninit mark name {}", - timeline_uninit_mark_file.display() - ) - })?; - let timeline_dir = conf.timeline_path(&timeline_id, &tenant_id); - if let Err(e) = - remove_timeline_and_uninit_mark(&timeline_dir, timeline_uninit_mark_file) - { - error!("Failed to clean up uninit marked timeline: {e:?}"); - } - } else { - let timeline_id = timeline_dir - .file_name() - .and_then(OsStr::to_str) - .unwrap_or_default() - .parse::() - .with_context(|| { - format!( - "Could not parse timeline id out of the timeline dir name {}", - timeline_dir.display() - ) - })?; - let timeline_uninit_mark_file = - conf.timeline_uninit_mark_file_path(tenant_id, timeline_id); - if timeline_uninit_mark_file.exists() { - info!( - %timeline_id, - "Found an uninit mark file, removing the timeline and its uninit mark", - ); - if let Err(e) = remove_timeline_and_uninit_mark( - &timeline_dir, - &timeline_uninit_mark_file, - ) { - error!("Failed to clean up uninit marked timeline: {e:?}"); - } - continue; - } - - let file_name = entry.file_name(); - if let Ok(timeline_id) = - file_name.to_str().unwrap_or_default().parse::() - { - let metadata = load_metadata(conf, timeline_id, tenant_id) - .context("failed to load metadata")?; - timelines_to_load.insert(timeline_id, metadata); - } else { - // A file or directory that doesn't look like a timeline ID - warn!( - "unexpected file or directory in timelines directory: {}", - file_name.to_string_lossy() - ); - } - } - } - - // Sort the array of timeline IDs into tree-order, so that parent comes before - // all its children. - tree_sort_timelines(timelines_to_load) + cloned.scan_and_sort_timelines_dir() }) .await .context("load spawn_blocking") @@ -1213,19 +1245,21 @@ impl Tenant { &self, timeline_id: TimelineId, active_only: bool, - ) -> anyhow::Result> { + ) -> Result, GetTimelineError> { let timelines_accessor = self.timelines.lock().unwrap(); - let timeline = timelines_accessor.get(&timeline_id).with_context(|| { - format!("Timeline {}/{} was not found", self.tenant_id, timeline_id) - })?; + let timeline = timelines_accessor + .get(&timeline_id) + .ok_or(GetTimelineError::NotFound { + tenant_id: self.tenant_id, + timeline_id, + })?; if active_only && !timeline.is_active() { - anyhow::bail!( - "Timeline {}/{} is not active, state: {:?}", - self.tenant_id, + Err(GetTimelineError::NotActive { + tenant_id: self.tenant_id, timeline_id, - timeline.current_state() - ) + state: timeline.current_state(), + }) } else { Ok(Arc::clone(timeline)) } @@ -3375,9 +3409,8 @@ where #[cfg(test)] pub mod harness { use bytes::{Bytes, BytesMut}; - use once_cell::sync::Lazy; use once_cell::sync::OnceCell; - use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard}; + use std::sync::Arc; use std::{fs, path::PathBuf}; use utils::logging; use utils::lsn::Lsn; @@ -3410,8 +3443,6 @@ pub mod harness { buf.freeze() } - static LOCK: Lazy> = Lazy::new(|| RwLock::new(())); - impl From for TenantConfOpt { fn from(tenant_conf: TenantConf) -> Self { Self { @@ -3438,33 +3469,16 @@ pub mod harness { } } - pub struct TenantHarness<'a> { + pub struct TenantHarness { pub conf: &'static PageServerConf, pub tenant_conf: TenantConf, pub tenant_id: TenantId, - - pub lock_guard: ( - Option>, - Option>, - ), } static LOG_HANDLE: OnceCell<()> = OnceCell::new(); - impl<'a> TenantHarness<'a> { + impl TenantHarness { pub fn create(test_name: &'static str) -> anyhow::Result { - Self::create_internal(test_name, false) - } - pub fn create_exclusive(test_name: &'static str) -> anyhow::Result { - Self::create_internal(test_name, true) - } - fn create_internal(test_name: &'static str, exclusive: bool) -> anyhow::Result { - let lock_guard = if exclusive { - (None, Some(LOCK.write().unwrap())) - } else { - (Some(LOCK.read().unwrap()), None) - }; - LOG_HANDLE.get_or_init(|| { logging::init( logging::LogFormat::Test, @@ -3500,7 +3514,6 @@ pub mod harness { conf, tenant_conf, tenant_id, - lock_guard, }) } @@ -3525,26 +3538,12 @@ pub mod harness { self.tenant_id, None, )); - // populate tenant with locally available timelines - let mut timelines_to_load = HashMap::new(); - for timeline_dir_entry in fs::read_dir(self.conf.timelines_path(&self.tenant_id)) - .expect("should be able to read timelines dir") - { - let timeline_dir_entry = timeline_dir_entry?; - let timeline_id: TimelineId = timeline_dir_entry - .path() - .file_name() - .unwrap() - .to_string_lossy() - .parse()?; - - let timeline_metadata = load_metadata(self.conf, timeline_id, self.tenant_id)?; - timelines_to_load.insert(timeline_id, timeline_metadata); - } tenant .load(None, ctx) .instrument(info_span!("try_load", tenant_id=%self.tenant_id)) .await?; + + // TODO reuse Tenant::activate (needs broker) tenant.state.send_replace(TenantState::Active); for timeline in tenant.timelines.lock().unwrap().values() { timeline.set_state(TimelineState::Active); @@ -4070,9 +4069,13 @@ mod tests { std::fs::write(metadata_path, metadata_bytes)?; let err = harness.try_load(&ctx).await.err().expect("should fail"); - assert!(err - .to_string() - .starts_with("Failed to parse metadata bytes from path")); + // get all the stack with all .context, not tonly the last one + let message = format!("{err:#}"); + let expected = "Failed to parse metadata bytes from path"; + assert!( + message.contains(expected), + "message '{message}' expected to contain {expected}" + ); let mut found_error_message = false; let mut err_source = err.source(); @@ -4506,6 +4509,44 @@ mod tests { assert!(expect_initdb_optimization); assert!(initdb_optimization_count > 0); } + Ok(()) + } + + #[tokio::test] + async fn test_uninit_mark_crash() -> anyhow::Result<()> { + let name = "test_uninit_mark_crash"; + let harness = TenantHarness::create(name)?; + { + let (tenant, ctx) = harness.load().await; + let tline = + tenant.create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)?; + // Keeps uninit mark in place + std::mem::forget(tline); + } + + let (tenant, _) = harness.load().await; + match tenant.get_timeline(TIMELINE_ID, false) { + Ok(_) => panic!("timeline should've been removed during load"), + Err(e) => { + assert_eq!( + e, + GetTimelineError::NotFound { + tenant_id: tenant.tenant_id, + timeline_id: TIMELINE_ID, + } + ) + } + } + + assert!(!harness + .conf + .timeline_path(&TIMELINE_ID, &tenant.tenant_id) + .exists()); + + assert!(!harness + .conf + .timeline_uninit_mark_file_path(tenant.tenant_id, TIMELINE_ID) + .exists()); Ok(()) } diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 7e123c3fbd..09b825d2e9 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -675,7 +675,7 @@ pub async fn immediate_gc( .get(&tenant_id) .map(Arc::clone) .with_context(|| format!("tenant {tenant_id}")) - .map_err(ApiError::NotFound)?; + .map_err(|e| ApiError::NotFound(e.into()))?; let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| tenant.get_gc_horizon()); // Use tenant's pitr setting @@ -724,11 +724,11 @@ pub async fn immediate_compact( .get(&tenant_id) .map(Arc::clone) .with_context(|| format!("tenant {tenant_id}")) - .map_err(ApiError::NotFound)?; + .map_err(|e| ApiError::NotFound(e.into()))?; let timeline = tenant .get_timeline(timeline_id, true) - .map_err(ApiError::NotFound)?; + .map_err(|e| ApiError::NotFound(e.into()))?; // Run in task_mgr to avoid race with tenant_detach operation let ctx = ctx.detached_child(TaskKind::Compaction, DownloadBehavior::Download); diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 8db2bc4eb2..7808b64d35 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -1367,7 +1367,7 @@ mod tests { struct TestSetup { runtime: &'static tokio::runtime::Runtime, entered_runtime: EnterGuard<'static>, - harness: TenantHarness<'static>, + harness: TenantHarness, tenant: Arc, tenant_ctx: RequestContext, remote_fs_dir: PathBuf, diff --git a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs index 83dfc5f598..fa23ae765d 100644 --- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs +++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs @@ -1321,7 +1321,7 @@ mod tests { const DUMMY_SAFEKEEPER_HOST: &str = "safekeeper_connstr"; - async fn dummy_state(harness: &TenantHarness<'_>) -> ConnectionManagerState { + async fn dummy_state(harness: &TenantHarness) -> ConnectionManagerState { let (tenant, ctx) = harness.load().await; let timeline = tenant .create_test_timeline(TIMELINE_ID, Lsn(0x8), crate::DEFAULT_PG_VERSION, &ctx) diff --git a/safekeeper/src/timeline.rs b/safekeeper/src/timeline.rs index 52c3e8d4be..30036cc7f2 100644 --- a/safekeeper/src/timeline.rs +++ b/safekeeper/src/timeline.rs @@ -266,7 +266,7 @@ impl From for ApiError { fn from(te: TimelineError) -> ApiError { match te { TimelineError::NotFound(ttid) => { - ApiError::NotFound(anyhow!("timeline {} not found", ttid)) + ApiError::NotFound(anyhow!("timeline {} not found", ttid).into()) } _ => ApiError::InternalServerError(anyhow!("{}", te)), } From 870740c9490adec5d58f2d1ea4c8b18a286ab0a7 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Wed, 21 Jun 2023 15:50:52 +0300 Subject: [PATCH 2/5] cargo update -p openssl (#4542) To unblock release https://github.com/neondatabase/neon/pull/4536#issuecomment-1600678054 Context: https://rustsec.org/advisories/RUSTSEC-2023-0044 --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 71a6699c50..4be74614c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2349,9 +2349,9 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" [[package]] name = "openssl" -version = "0.10.52" +version = "0.10.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01b8574602df80f7b85fdfc5392fa884a4e3b3f4f35402c070ab34c3d3f78d56" +checksum = "345df152bc43501c5eb9e4654ff05f794effb78d4efe3d53abc158baddc0703d" dependencies = [ "bitflags", "cfg-if", @@ -2381,9 +2381,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.87" +version = "0.9.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e17f59264b2809d77ae94f0e1ebabc434773f370d6ca667bd223ea10e06cc7e" +checksum = "374533b0e45f3a7ced10fcaeccca020e66656bc03dac384f852e4e5a7a8104a6" dependencies = [ "cc", "libc", From e4da76f0218e7ffb29cf5e343654b07feacb5148 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Wed, 21 Jun 2023 18:00:14 +0300 Subject: [PATCH 3/5] update_gc_info: fix typo in timeline_id tracing field (#4546) Commit ``` commit 472cc17b7aba4f78bc7a71a2c04d2e7cb8b696d8 Author: Dmitry Rodionov Date: Thu Jun 15 17:30:12 2023 +0300 propagate lock guard to background deletion task (#4495) ``` did a drive-by fix, but, the drive-by had a typo. ``` gc_loop{tenant_id=2e2f2bff091b258ac22a4c4dd39bd25d}:update_gc_info{timline_id=837c688fd37c903639b9aa0a6dd3f1f1}:download_remote_layer{layer=000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000024DA0D1-000000000443FB51}:panic{thread=background op worker location=pageserver/src/tenant/timeline.rs:4843:25}: missing extractors: ["TimelineId"] Stack backtrace: 0: utils::logging::tracing_panic_hook at /libs/utils/src/logging.rs:166:21 1: as core::ops::function::Fn>::call at /rustc/9eb3afe9ebe9c7d2b84b71002d44f4a0edac95e0/library/alloc/src/boxed.rs:2002:9 2: std::panicking::rust_panic_with_hook at /rustc/9eb3afe9ebe9c7d2b84b71002d44f4a0edac95e0/library/std/src/panicking.rs:692:13 3: std::panicking::begin_panic_handler::{{closure}} at /rustc/9eb3afe9ebe9c7d2b84b71002d44f4a0edac95e0/library/std/src/panicking.rs:579:13 4: std::sys_common::backtrace::__rust_end_short_backtrace at /rustc/9eb3afe9ebe9c7d2b84b71002d44f4a0edac95e0/library/std/src/sys_common/backtrace.rs:137:18 5: rust_begin_unwind at /rustc/9eb3afe9ebe9c7d2b84b71002d44f4a0edac95e0/library/std/src/panicking.rs:575:5 6: core::panicking::panic_fmt at /rustc/9eb3afe9ebe9c7d2b84b71002d44f4a0edac95e0/library/core/src/panicking.rs:64:14 7: pageserver::tenant::timeline::debug_assert_current_span_has_tenant_and_timeline_id at /pageserver/src/tenant/timeline.rs:4843:25 8: ::download_remote_layer::{closure#0}::{closure#0} at /pageserver/src/tenant/timeline.rs:4368:9 9: ::download_remote_layer::{closure#0}::{closure#0}> as core::future::future::Future>::poll at /.cargo/registry/src/github.com-1ecc6299db9ec823/tracing-0.1.37/src/instrument.rs:272:9 10: ::download_remote_layer::{closure#0} at /pageserver/src/tenant/timeline.rs:4363:5 11: ::get_reconstruct_data::{closure#0} at /pageserver/src/tenant/timeline.rs:2618:69 12: ::get::{closure#0} at /pageserver/src/tenant/timeline.rs:565:13 13: ::list_slru_segments::{closure#0} at /pageserver/src/pgdatadir_mapping.rs:427:42 14: ::is_latest_commit_timestamp_ge_than::{closure#0} at /pageserver/src/pgdatadir_mapping.rs:390:13 15: ::find_lsn_for_timestamp::{closure#0} at /pageserver/src/pgdatadir_mapping.rs:338:17 16: ::update_gc_info::{closure#0}::{closure#0} at /pageserver/src/tenant/timeline.rs:3967:71 17: ::update_gc_info::{closure#0}::{closure#0}> as core::future::future::Future>::poll at /.cargo/registry/src/github.com-1ecc6299db9ec823/tracing-0.1.37/src/instrument.rs:272:9 18: ::update_gc_info::{closure#0} at /pageserver/src/tenant/timeline.rs:3948:5 19: ::refresh_gc_info_internal::{closure#0} at /pageserver/src/tenant.rs:2687:21 20: ::gc_iteration_internal::{closure#0} at /pageserver/src/tenant.rs:2551:13 21: ::gc_iteration::{closure#0} at /pageserver/src/tenant.rs:1490:13 22: pageserver::tenant::tasks::gc_loop::{closure#0}::{closure#0} at /pageserver/src/tenant/tasks.rs:187:21 23: pageserver::tenant::tasks::gc_loop::{closure#0} at /pageserver/src/tenant/tasks.rs:208:5 ``` ## Problem ## Summary of changes ## Checklist before requesting a review - [ ] I have performed a self-review of my code. - [ ] If it is a core feature, I have added thorough tests. - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard? - [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section. ## Checklist before merging - [ ] Do not forget to reformat commit message to not include the above checklist --- pageserver/src/tenant/timeline.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index de786da322..122331ac19 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -3953,7 +3953,7 @@ impl Timeline { /// for example. The caller should hold `Tenant::gc_cs` lock to ensure /// that. /// - #[instrument(skip_all, fields(timline_id=%self.timeline_id))] + #[instrument(skip_all, fields(timeline_id=%self.timeline_id))] pub(super) async fn update_gc_info( &self, retain_lsns: Vec, From d3aa8a48ea402a550e2f933ee19b486c1135f801 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Wed, 21 Jun 2023 16:20:35 +0100 Subject: [PATCH 4/5] Update client libs for test_runner/pg_clients to their latest versions (#4547) Resolves https://github.com/neondatabase/neon/security/dependabot/27 --- .../pg_clients/python/pg8000/requirements.txt | 2 +- .../pg_clients/rust/tokio-postgres/Cargo.lock | 8 ++-- .../pg_clients/rust/tokio-postgres/Dockerfile | 2 +- .../swift/PostgresNIOExample/Package.resolved | 8 ++-- .../swift/PostgresNIOExample/Package.swift | 2 +- .../postgresql-client/package-lock.json | 47 +++++-------------- .../typescript/postgresql-client/package.json | 2 +- .../typescript/serverless-driver/Dockerfile | 2 +- .../serverless-driver/package-lock.json | 10 ++-- .../typescript/serverless-driver/package.json | 2 +- 10 files changed, 31 insertions(+), 54 deletions(-) diff --git a/test_runner/pg_clients/python/pg8000/requirements.txt b/test_runner/pg_clients/python/pg8000/requirements.txt index 7bba8da06d..a8407c3cb0 100644 --- a/test_runner/pg_clients/python/pg8000/requirements.txt +++ b/test_runner/pg_clients/python/pg8000/requirements.txt @@ -1,2 +1,2 @@ -pg8000==1.29.4 +pg8000==1.29.8 scramp>=1.4.3 diff --git a/test_runner/pg_clients/rust/tokio-postgres/Cargo.lock b/test_runner/pg_clients/rust/tokio-postgres/Cargo.lock index 30deb3ff20..bdbbe0ad69 100644 --- a/test_runner/pg_clients/rust/tokio-postgres/Cargo.lock +++ b/test_runner/pg_clients/rust/tokio-postgres/Cargo.lock @@ -396,9 +396,9 @@ checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" [[package]] name = "openssl" -version = "0.10.52" +version = "0.10.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01b8574602df80f7b85fdfc5392fa884a4e3b3f4f35402c070ab34c3d3f78d56" +checksum = "345df152bc43501c5eb9e4654ff05f794effb78d4efe3d53abc158baddc0703d" dependencies = [ "bitflags", "cfg-if", @@ -428,9 +428,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.87" +version = "0.9.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e17f59264b2809d77ae94f0e1ebabc434773f370d6ca667bd223ea10e06cc7e" +checksum = "374533b0e45f3a7ced10fcaeccca020e66656bc03dac384f852e4e5a7a8104a6" dependencies = [ "cc", "libc", diff --git a/test_runner/pg_clients/rust/tokio-postgres/Dockerfile b/test_runner/pg_clients/rust/tokio-postgres/Dockerfile index 43fc6f6c92..35ae25a470 100644 --- a/test_runner/pg_clients/rust/tokio-postgres/Dockerfile +++ b/test_runner/pg_clients/rust/tokio-postgres/Dockerfile @@ -1,4 +1,4 @@ -FROM rust:1.69 +FROM rust:1.70 WORKDIR /source COPY . . diff --git a/test_runner/pg_clients/swift/PostgresNIOExample/Package.resolved b/test_runner/pg_clients/swift/PostgresNIOExample/Package.resolved index cc12acda4c..9f13106011 100644 --- a/test_runner/pg_clients/swift/PostgresNIOExample/Package.resolved +++ b/test_runner/pg_clients/swift/PostgresNIOExample/Package.resolved @@ -5,8 +5,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/vapor/postgres-nio.git", "state" : { - "revision" : "dbf9c2eb596df39cba8ff3f74d74b2e6a31bd937", - "version" : "1.14.1" + "revision" : "061a0836d7c1887e04a975d1d2eaa2ef5fd7dfab", + "version" : "1.16.0" } }, { @@ -59,8 +59,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/apple/swift-nio.git", "state" : { - "revision" : "d1690f85419fdac8d54e350fb6d2ab9fd95afd75", - "version" : "2.51.1" + "revision" : "6213ba7a06febe8fef60563a4a7d26a4085783cf", + "version" : "2.54.0" } }, { diff --git a/test_runner/pg_clients/swift/PostgresNIOExample/Package.swift b/test_runner/pg_clients/swift/PostgresNIOExample/Package.swift index ac32b982e2..a80590daa2 100644 --- a/test_runner/pg_clients/swift/PostgresNIOExample/Package.swift +++ b/test_runner/pg_clients/swift/PostgresNIOExample/Package.swift @@ -4,7 +4,7 @@ import PackageDescription let package = Package( name: "PostgresNIOExample", dependencies: [ - .package(url: "https://github.com/vapor/postgres-nio.git", from: "1.14.1") + .package(url: "https://github.com/vapor/postgres-nio.git", from: "1.16.0") ], targets: [ .executableTarget( diff --git a/test_runner/pg_clients/typescript/postgresql-client/package-lock.json b/test_runner/pg_clients/typescript/postgresql-client/package-lock.json index e4dfd1dd9d..4cedf56acd 100644 --- a/test_runner/pg_clients/typescript/postgresql-client/package-lock.json +++ b/test_runner/pg_clients/typescript/postgresql-client/package-lock.json @@ -5,23 +5,7 @@ "packages": { "": { "dependencies": { - "postgresql-client": "2.5.5" - } - }, - "node_modules/debug": { - "version": "4.3.4", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", - "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", - "dependencies": { - "ms": "2.1.2" - }, - "engines": { - "node": ">=6.0" - }, - "peerDependenciesMeta": { - "supports-color": { - "optional": true - } + "postgresql-client": "2.5.9" } }, "node_modules/doublylinked": { @@ -41,11 +25,6 @@ "putil-promisify": "^1.8.6" } }, - "node_modules/ms": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", - "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" - }, "node_modules/obuf": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/obuf/-/obuf-1.1.2.tgz", @@ -63,30 +42,28 @@ } }, "node_modules/postgresql-client": { - "version": "2.5.5", - "resolved": "https://registry.npmjs.org/postgresql-client/-/postgresql-client-2.5.5.tgz", - "integrity": "sha512-2Mu3i+6NQ9cnkoZNd0XeSZo9WoUpuWf4ZSiCCoDWSj82T93py2/SKXZ1aUaP8mVaU0oKpyyGe0IwLYZ1VHShnA==", + "version": "2.5.9", + "resolved": "https://registry.npmjs.org/postgresql-client/-/postgresql-client-2.5.9.tgz", + "integrity": "sha512-s+kgTN6TfWLzehEyxw4Im4odnxVRCbZ0DEJzWS6SLowPAmB2m1/DOiOvZC0+ZVoi5AfbGE6SBqFxKguSyVAXZg==", "dependencies": { - "debug": "^4.3.4", "doublylinked": "^2.5.2", "lightning-pool": "^4.2.1", "postgres-bytea": "^3.0.0", - "power-tasks": "^1.6.4", + "power-tasks": "^1.7.0", "putil-merge": "^3.10.3", "putil-promisify": "^1.10.0", "putil-varhelpers": "^1.6.5" }, "engines": { - "node": ">=14.0", + "node": ">=16.0", "npm": ">=7.0.0" } }, "node_modules/power-tasks": { - "version": "1.6.4", - "resolved": "https://registry.npmjs.org/power-tasks/-/power-tasks-1.6.4.tgz", - "integrity": "sha512-LX8GGgEIP1N7jsZqlqZ275e6f1Ehq97APCEGj8uVO0NoEoB+77QUX12BFv3LmlNKfq4fIuNSPiHhyHFjqn2gfA==", + "version": "1.7.0", + "resolved": "https://registry.npmjs.org/power-tasks/-/power-tasks-1.7.0.tgz", + "integrity": "sha512-rndZXCDxhuIDjPUJJvQwBDHaYagCkjvbPF/NA+omh/Ef4rAI9KtnvdA0k98dyiGpn1zXOpc6c2c0JWzg/xAhJg==", "dependencies": { - "debug": "^4.3.4", "doublylinked": "^2.5.2", "strict-typed-events": "^2.3.1" }, @@ -132,9 +109,9 @@ } }, "node_modules/ts-gems": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/ts-gems/-/ts-gems-2.3.0.tgz", - "integrity": "sha512-bUvrwrzlct7vfaNvtgMhynDf6lAki/kTtrNsIGhX6l7GJGK3s6b8Ro7dazOLXabV0m2jyShBzDQ8X1+h/C2Cug==" + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/ts-gems/-/ts-gems-2.4.0.tgz", + "integrity": "sha512-SdugYAXoWvbqrxLodIObzxhEKacDxh5LfAJIiIkiH7q5thvuuCzdmkdTVQYf7uEDrEpPhfx4tokDMamdO3be9A==" } } } diff --git a/test_runner/pg_clients/typescript/postgresql-client/package.json b/test_runner/pg_clients/typescript/postgresql-client/package.json index 9eaa13437a..12703ce89f 100644 --- a/test_runner/pg_clients/typescript/postgresql-client/package.json +++ b/test_runner/pg_clients/typescript/postgresql-client/package.json @@ -1,6 +1,6 @@ { "type": "module", "dependencies": { - "postgresql-client": "2.5.5" + "postgresql-client": "2.5.9" } } diff --git a/test_runner/pg_clients/typescript/serverless-driver/Dockerfile b/test_runner/pg_clients/typescript/serverless-driver/Dockerfile index a5ad832a5c..07e98c586b 100644 --- a/test_runner/pg_clients/typescript/serverless-driver/Dockerfile +++ b/test_runner/pg_clients/typescript/serverless-driver/Dockerfile @@ -1,4 +1,4 @@ -FROM node:18 +FROM node:20 WORKDIR /source COPY . . diff --git a/test_runner/pg_clients/typescript/serverless-driver/package-lock.json b/test_runner/pg_clients/typescript/serverless-driver/package-lock.json index 0fb84cf5b7..72cc452817 100644 --- a/test_runner/pg_clients/typescript/serverless-driver/package-lock.json +++ b/test_runner/pg_clients/typescript/serverless-driver/package-lock.json @@ -5,16 +5,16 @@ "packages": { "": { "dependencies": { - "@neondatabase/serverless": "0.4.3", + "@neondatabase/serverless": "0.4.18", "ws": "8.13.0" } }, "node_modules/@neondatabase/serverless": { - "version": "0.4.3", - "resolved": "https://registry.npmjs.org/@neondatabase/serverless/-/serverless-0.4.3.tgz", - "integrity": "sha512-U8tpuF5f0R5WRsciR7iaJ5S2h54DWa6Z6CEW+J4KgwyvRN3q3qDz0MibdfFXU0WqnRoi/9RSf/2XN4TfeaOCbQ==", + "version": "0.4.18", + "resolved": "https://registry.npmjs.org/@neondatabase/serverless/-/serverless-0.4.18.tgz", + "integrity": "sha512-2TZnIyRGC/+0fjZ8TKCzaSTPUD94PM7NBGuantGZbUrbWyqBwGnUoRtdZAQ95qBKVHqORLVfymlv2NE+HQMFeA==", "dependencies": { - "@types/pg": "^8.6.6" + "@types/pg": "8.6.6" } }, "node_modules/@types/node": { diff --git a/test_runner/pg_clients/typescript/serverless-driver/package.json b/test_runner/pg_clients/typescript/serverless-driver/package.json index 71ba181afc..840c7a5c4c 100644 --- a/test_runner/pg_clients/typescript/serverless-driver/package.json +++ b/test_runner/pg_clients/typescript/serverless-driver/package.json @@ -1,7 +1,7 @@ { "type": "module", "dependencies": { - "@neondatabase/serverless": "0.4.3", + "@neondatabase/serverless": "0.4.18", "ws": "8.13.0" } } From 2f618f46be510178632bb44afa5f0c1dfc96b7a4 Mon Sep 17 00:00:00 2001 From: Anastasia Lubennikova Date: Thu, 22 Jun 2023 17:06:16 +0300 Subject: [PATCH 5/5] Use BUILD_TAG in compute_ctl binary. (#4541) Pass BUILD_TAG to compute_ctl binary. We need it to access versioned extension storage. --- .github/workflows/build_and_test.yml | 2 ++ Dockerfile.compute-node | 4 ++++ Dockerfile.compute-tools | 3 +++ compute_tools/src/bin/compute_ctl.rs | 6 ++++++ 4 files changed, 15 insertions(+) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 5f82ab7aca..94fbb02cf6 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -659,6 +659,7 @@ jobs: --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} + --build-arg BUILD_TAG=${{needs.tag.outputs.build-tag}} --build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} @@ -716,6 +717,7 @@ jobs: --context . --build-arg GIT_VERSION=${{ github.sha }} --build-arg PG_VERSION=${{ matrix.version }} + --build-arg BUILD_TAG=${{needs.tag.outputs.build-tag}} --build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com --dockerfile Dockerfile.compute-node --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} diff --git a/Dockerfile.compute-node b/Dockerfile.compute-node index ae330d8a20..fc575536bc 100644 --- a/Dockerfile.compute-node +++ b/Dockerfile.compute-node @@ -2,6 +2,7 @@ ARG PG_VERSION ARG REPOSITORY=neondatabase ARG IMAGE=rust ARG TAG=pinned +ARG BUILD_TAG ######################################################################################### # @@ -634,6 +635,9 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \ # ######################################################################################### FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools +ARG BUILD_TAG +ENV BUILD_TAG=$BUILD_TAG + USER nonroot # Copy entire project to get Cargo.* files with proper dependencies for the whole project COPY --chown=nonroot . . diff --git a/Dockerfile.compute-tools b/Dockerfile.compute-tools index e86fb40ca4..3066e3f7ca 100644 --- a/Dockerfile.compute-tools +++ b/Dockerfile.compute-tools @@ -3,6 +3,7 @@ ARG REPOSITORY=neondatabase ARG IMAGE=rust ARG TAG=pinned +ARG BUILD_TAG FROM $REPOSITORY/$IMAGE:$TAG AS rust-build WORKDIR /home/nonroot @@ -16,6 +17,8 @@ ENV CACHEPOT_S3_KEY_PREFIX=cachepot ARG CACHEPOT_BUCKET=neon-github-dev #ARG AWS_ACCESS_KEY_ID #ARG AWS_SECRET_ACCESS_KEY +ARG BUILD_TAG +ENV BUILD_TAG=$BUILD_TAG COPY . . diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs index c6cfde1d1a..90b39e9dd9 100644 --- a/compute_tools/src/bin/compute_ctl.rs +++ b/compute_tools/src/bin/compute_ctl.rs @@ -54,9 +54,15 @@ use compute_tools::monitor::launch_monitor; use compute_tools::params::*; use compute_tools::spec::*; +const BUILD_TAG_DEFAULT: &str = "local"; + fn main() -> Result<()> { init_tracing_and_logging(DEFAULT_LOG_LEVEL)?; + let build_tag = option_env!("BUILD_TAG").unwrap_or(BUILD_TAG_DEFAULT); + + info!("build_tag: {build_tag}"); + let matches = cli().get_matches(); let http_port = *matches