Store compute spec id inside basebackup

This commit is contained in:
Bojan Serafimov
2023-06-09 00:39:06 -04:00
parent b836013721
commit 341563261a
4 changed files with 64 additions and 4 deletions

View File

@@ -466,9 +466,8 @@ impl ComputeNode {
});
// Get current spec_id
// TODO use pageserver instead of local storage
let path = Path::new("/home/bojan/tmp/spec_id.txt");
let current_spec_id = std::fs::read_to_string(path).ok();
let path = Path::new(&self.pgdata).join("neon_compute_spec_id.txt");
let current_spec_id = std::fs::read_to_string(&path).ok();
// Respec if needed
if current_spec_id == Some(spec_id.clone()) {
@@ -477,7 +476,7 @@ impl ComputeNode {
info!("respeccing {:?} {:?}", current_spec_id, spec_id.clone());
self.apply_config(&compute_state)?;
std::fs::write(path, spec_id)?;
self.cache_spec_id(&compute_state, spec_id)?;
}
}
@@ -500,6 +499,30 @@ impl ComputeNode {
Ok(pg)
}
fn cache_spec_id(&self, compute_state: &ComputeState, spec_id: String) -> anyhow::Result<()>{
let spec = &compute_state.pspec.as_ref().expect("spec must be set");
let cmd = format!(
"set_compute_spec_id {} {} {}",
spec.tenant_id,
spec.timeline_id,
spec_id,
);
let mut config = postgres::Config::from_str(&spec.pageserver_connstr)?;
// Use the storage auth token from the config file, if given.
// Note: this overrides any password set in the connection string.
if let Some(storage_auth_token) = &spec.storage_auth_token {
info!("Got storage auth token from spec file");
config.password(storage_auth_token);
} else {
info!("Storage auth token not set");
}
let mut client = config.connect(NoTls)?;
client.simple_query(&cmd)?;
Ok(())
}
// Look for core dumps and collect backtraces.
//
// EKS worker nodes have following core dump settings:

View File

@@ -417,6 +417,16 @@ where
// Also send zenith.signal file with extra bootstrap data.
//
async fn add_pgcontrol_file(&mut self) -> anyhow::Result<()> {
// Add neon_compute_spec_id.txt
if let Some(spec_id) = &self.timeline.compute_spec_id.lock().await.clone() {
self.ar
.append(
&new_tar_header("neon_compute_spec_id.txt", spec_id.len() as u64)?,
spec_id.as_bytes(),
)
.await?;
}
// add zenith.signal file
let mut zenith_signal = String::new();
if self.prev_record_lsn == Lsn(0) {

View File

@@ -916,6 +916,28 @@ where
.await?;
pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
}
else if query_string.starts_with("set_compute_spec_id ") {
let (_, params_raw) = query_string.split_at("set_compute_spec_id ".len());
let params = params_raw.split_whitespace().collect::<Vec<_>>();
if params.len() != 3 {
return Err(QueryError::Other(anyhow::anyhow!(
"invalid param number for set_compute_spec_id command"
)));
}
let tenant_id = TenantId::from_str(params[0])
.with_context(|| format!("Failed to parse tenant id from {}", params[0]))?;
let timeline_id = TimelineId::from_str(params[1])
.with_context(|| format!("Failed to parse timeline id from {}", params[1]))?;
let spec_id = params[2].to_string();
self.check_permission(Some(tenant_id))?;
let timeline = get_active_tenant_timeline(tenant_id, timeline_id, &ctx).await?;
*timeline.compute_spec_id.lock().await = Some(spec_id);
pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
}
// return pair of prev_lsn and last_lsn
else if query_string.starts_with("get_last_record_rlsn ") {
let (_, params_raw) = query_string.split_at("get_last_record_rlsn ".len());

View File

@@ -215,6 +215,10 @@ pub struct Timeline {
// though let's keep them both for better error visibility.
pub initdb_lsn: Lsn,
// Compute nodes can set this field after successful application
// of a new spec, in order to avoid reapplying it on next restart.
pub compute_spec_id: tokio::sync::Mutex<Option<String>>,
/// When did we last calculate the partitioning?
partitioning: Mutex<(KeyPartitioning, Lsn)>,
@@ -1456,6 +1460,7 @@ impl Timeline {
latest_gc_cutoff_lsn: Rcu::new(metadata.latest_gc_cutoff_lsn()),
initdb_lsn: metadata.initdb_lsn(),
compute_spec_id: tokio::sync::Mutex::new(None),
current_logical_size: if disk_consistent_lsn.is_valid() {
// we're creating timeline data with some layer files existing locally,