Compare commits

...

12 Commits

Author SHA1 Message Date
Bojan Serafimov
aa9baddd3d Fix unrelated test 2023-06-09 10:27:54 -04:00
Bojan Serafimov
4756dcd0cc fmt 2023-06-09 09:57:06 -04:00
Bojan Serafimov
ab23e28768 revert test changes 2023-06-09 09:50:05 -04:00
Bojan Serafimov
341563261a Store compute spec id inside basebackup 2023-06-09 00:39:06 -04:00
Bojan Serafimov
b836013721 Cleanup pg_stat_statements 2023-06-08 22:52:52 -04:00
Bojan Serafimov
44ad006eb3 Merge branch 'main' into startup-no-config 2023-06-08 18:12:15 -04:00
Bojan Serafimov
aff94b54c8 more roles, dbs 2023-06-08 12:47:59 -04:00
Bojan Serafimov
1adb38bb82 Merge branch 'new-startup-test' into startup-no-config 2023-06-08 12:41:14 -04:00
Bojan Serafimov
1baecdc27a comments 2023-06-08 12:33:19 -04:00
Bojan Serafimov
eceda63379 Do two iterations 2023-06-08 12:30:47 -04:00
Bojan Serafimov
881bfc4da8 WIP 2023-06-08 10:21:07 -04:00
Bojan Serafimov
eda4f86588 Add startup test 2023-06-06 16:45:16 -04:00
5 changed files with 94 additions and 1 deletions

View File

@@ -442,8 +442,42 @@ impl ComputeNode {
let pg = self.start_postgres(spec.storage_auth_token.clone())?;
// Maybe apply the spec
if spec.spec.mode == ComputeMode::Primary {
self.apply_config(&compute_state)?;
let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
// Get spec_id or make it up by hashing
//
// TODO Make spec_id required so there would be no need to hash.
let spec_id = spec.operation_uuid.clone().unwrap_or_else(|| {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
// HACK Exclude postgresql.conf because it doesn't need
// to be applied like the other fields in the spec
let mut spec_no_conf = spec.clone();
spec_no_conf.cluster.postgresql_conf = None;
let json = serde_json::to_vec(&spec_no_conf).unwrap();
let mut hasher = DefaultHasher::new();
json.hash(&mut hasher);
let hash = hasher.finish();
format!("{:x}", hash)
});
// Get current spec_id
let path = Path::new(&self.pgdata).join("neon_compute_spec_id.txt");
let current_spec_id = std::fs::read_to_string(path).ok();
// Respec if needed
if current_spec_id == Some(spec_id.clone()) {
info!("no need to respec");
} else {
info!("respeccing {:?} {:?}", current_spec_id, &spec_id);
self.apply_config(&compute_state)?;
self.cache_spec_id(&compute_state, spec_id)?;
}
}
let startup_end_time = Utc::now();
@@ -465,6 +499,28 @@ impl ComputeNode {
Ok(pg)
}
fn cache_spec_id(&self, compute_state: &ComputeState, spec_id: String) -> anyhow::Result<()> {
let spec = &compute_state.pspec.as_ref().expect("spec must be set");
let cmd = format!(
"set_compute_spec_id {} {} {}",
spec.tenant_id, spec.timeline_id, spec_id,
);
let mut config = postgres::Config::from_str(&spec.pageserver_connstr)?;
// Use the storage auth token from the config file, if given.
// Note: this overrides any password set in the connection string.
if let Some(storage_auth_token) = &spec.storage_auth_token {
info!("Got storage auth token from spec file");
config.password(storage_auth_token);
} else {
info!("Storage auth token not set");
}
let mut client = config.connect(NoTls)?;
client.simple_query(&cmd)?;
Ok(())
}
// Look for core dumps and collect backtraces.
//
// EKS worker nodes have following core dump settings:

View File

@@ -417,6 +417,16 @@ where
// Also send zenith.signal file with extra bootstrap data.
//
async fn add_pgcontrol_file(&mut self) -> anyhow::Result<()> {
// Add neon_compute_spec_id.txt
if let Some(spec_id) = &self.timeline.compute_spec_id.lock().await.clone() {
self.ar
.append(
&new_tar_header("neon_compute_spec_id.txt", spec_id.len() as u64)?,
spec_id.as_bytes(),
)
.await?;
}
// add zenith.signal file
let mut zenith_signal = String::new();
if self.prev_record_lsn == Lsn(0) {

View File

@@ -915,6 +915,27 @@ where
self.handle_basebackup_request(pgb, tenant_id, timeline_id, lsn, None, false, ctx)
.await?;
pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
} else if query_string.starts_with("set_compute_spec_id ") {
let (_, params_raw) = query_string.split_at("set_compute_spec_id ".len());
let params = params_raw.split_whitespace().collect::<Vec<_>>();
if params.len() != 3 {
return Err(QueryError::Other(anyhow::anyhow!(
"invalid param number for set_compute_spec_id command"
)));
}
let tenant_id = TenantId::from_str(params[0])
.with_context(|| format!("Failed to parse tenant id from {}", params[0]))?;
let timeline_id = TimelineId::from_str(params[1])
.with_context(|| format!("Failed to parse timeline id from {}", params[1]))?;
let spec_id = params[2].to_string();
self.check_permission(Some(tenant_id))?;
let timeline = get_active_tenant_timeline(tenant_id, timeline_id, &ctx).await?;
*timeline.compute_spec_id.lock().await = Some(spec_id);
pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
}
// return pair of prev_lsn and last_lsn
else if query_string.starts_with("get_last_record_rlsn ") {

View File

@@ -215,6 +215,10 @@ pub struct Timeline {
// though let's keep them both for better error visibility.
pub initdb_lsn: Lsn,
// Compute nodes can set this field after successful application
// of a new spec, in order to avoid reapplying it on next restart.
pub compute_spec_id: tokio::sync::Mutex<Option<String>>,
/// When did we last calculate the partitioning?
partitioning: Mutex<(KeyPartitioning, Lsn)>,
@@ -1456,6 +1460,7 @@ impl Timeline {
latest_gc_cutoff_lsn: Rcu::new(metadata.latest_gc_cutoff_lsn()),
initdb_lsn: metadata.initdb_lsn(),
compute_spec_id: tokio::sync::Mutex::new(None),
current_logical_size: if disk_consistent_lsn.is_valid() {
// we're creating timeline data with some layer files existing locally,

View File

@@ -2912,6 +2912,7 @@ SKIP_FILES = frozenset(
"pg_internal.init",
"pg.log",
"zenith.signal",
"neon_compute_spec_id.txt",
"pg_hba.conf",
"postgresql.conf",
"postmaster.opts",