From 341563261abe240c1a9a5fbe82919d70ef89928c Mon Sep 17 00:00:00 2001 From: Bojan Serafimov Date: Fri, 9 Jun 2023 00:39:06 -0400 Subject: [PATCH] Store compute spec id inside basebackup --- compute_tools/src/compute.rs | 31 +++++++++++++++++++++++++++---- pageserver/src/basebackup.rs | 10 ++++++++++ pageserver/src/page_service.rs | 22 ++++++++++++++++++++++ pageserver/src/tenant/timeline.rs | 5 +++++ 4 files changed, 64 insertions(+), 4 deletions(-) diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 894a256cd6..bcc47519d1 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -466,9 +466,8 @@ impl ComputeNode { }); // Get current spec_id - // TODO use pageserver instead of local storage - let path = Path::new("/home/bojan/tmp/spec_id.txt"); - let current_spec_id = std::fs::read_to_string(path).ok(); + let path = Path::new(&self.pgdata).join("neon_compute_spec_id.txt"); + let current_spec_id = std::fs::read_to_string(&path).ok(); // Respec if needed if current_spec_id == Some(spec_id.clone()) { @@ -477,7 +476,7 @@ impl ComputeNode { info!("respeccing {:?} {:?}", current_spec_id, spec_id.clone()); self.apply_config(&compute_state)?; - std::fs::write(path, spec_id)?; + self.cache_spec_id(&compute_state, spec_id)?; } } @@ -500,6 +499,30 @@ impl ComputeNode { Ok(pg) } + fn cache_spec_id(&self, compute_state: &ComputeState, spec_id: String) -> anyhow::Result<()>{ + let spec = &compute_state.pspec.as_ref().expect("spec must be set"); + let cmd = format!( + "set_compute_spec_id {} {} {}", + spec.tenant_id, + spec.timeline_id, + spec_id, + ); + let mut config = postgres::Config::from_str(&spec.pageserver_connstr)?; + + // Use the storage auth token from the config file, if given. + // Note: this overrides any password set in the connection string. + if let Some(storage_auth_token) = &spec.storage_auth_token { + info!("Got storage auth token from spec file"); + config.password(storage_auth_token); + } else { + info!("Storage auth token not set"); + } + let mut client = config.connect(NoTls)?; + client.simple_query(&cmd)?; + + Ok(()) + } + // Look for core dumps and collect backtraces. // // EKS worker nodes have following core dump settings: diff --git a/pageserver/src/basebackup.rs b/pageserver/src/basebackup.rs index c666fc785c..f53696cee6 100644 --- a/pageserver/src/basebackup.rs +++ b/pageserver/src/basebackup.rs @@ -417,6 +417,16 @@ where // Also send zenith.signal file with extra bootstrap data. // async fn add_pgcontrol_file(&mut self) -> anyhow::Result<()> { + // Add neon_compute_spec_id.txt + if let Some(spec_id) = &self.timeline.compute_spec_id.lock().await.clone() { + self.ar + .append( + &new_tar_header("neon_compute_spec_id.txt", spec_id.len() as u64)?, + spec_id.as_bytes(), + ) + .await?; + } + // add zenith.signal file let mut zenith_signal = String::new(); if self.prev_record_lsn == Lsn(0) { diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index 9e9285a009..21c36ae725 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -916,6 +916,28 @@ where .await?; pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?; } + else if query_string.starts_with("set_compute_spec_id ") { + let (_, params_raw) = query_string.split_at("set_compute_spec_id ".len()); + let params = params_raw.split_whitespace().collect::>(); + + if params.len() != 3 { + return Err(QueryError::Other(anyhow::anyhow!( + "invalid param number for set_compute_spec_id command" + ))); + } + + let tenant_id = TenantId::from_str(params[0]) + .with_context(|| format!("Failed to parse tenant id from {}", params[0]))?; + let timeline_id = TimelineId::from_str(params[1]) + .with_context(|| format!("Failed to parse timeline id from {}", params[1]))?; + let spec_id = params[2].to_string(); + + self.check_permission(Some(tenant_id))?; + let timeline = get_active_tenant_timeline(tenant_id, timeline_id, &ctx).await?; + *timeline.compute_spec_id.lock().await = Some(spec_id); + + pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?; + } // return pair of prev_lsn and last_lsn else if query_string.starts_with("get_last_record_rlsn ") { let (_, params_raw) = query_string.split_at("get_last_record_rlsn ".len()); diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 71f83bf127..7de950fa41 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -215,6 +215,10 @@ pub struct Timeline { // though let's keep them both for better error visibility. pub initdb_lsn: Lsn, + // Compute nodes can set this field after successful application + // of a new spec, in order to avoid reapplying it on next restart. + pub compute_spec_id: tokio::sync::Mutex>, + /// When did we last calculate the partitioning? partitioning: Mutex<(KeyPartitioning, Lsn)>, @@ -1456,6 +1460,7 @@ impl Timeline { latest_gc_cutoff_lsn: Rcu::new(metadata.latest_gc_cutoff_lsn()), initdb_lsn: metadata.initdb_lsn(), + compute_spec_id: tokio::sync::Mutex::new(None), current_logical_size: if disk_consistent_lsn.is_valid() { // we're creating timeline data with some layer files existing locally,