offload_lfc_interval_seconds in ComputeSpec (#12447)

- Add ComputeSpec flag `offload_lfc_interval_seconds` controlling
  whether LFC should be offloaded to endpoint storage. Default value
  (None) means "don't offload".
- Add glue code around it for `neon_local` and integration tests.
- Add `autoprewarm` mode for `test_lfc_prewarm` testing
  `offload_lfc_interval_seconds` and `autoprewarm` flags in conjunction.
- Rename `compute_ctl_lfc_prewarm_requests_total` and
`compute_ctl_lfc_offload_requests_total` to
`compute_ctl_lfc_prewarms_total`
  and `compute_ctl_lfc_offloads_total` to reflect we count prewarms and
  offloads, not `compute_ctl` requests of those.
  Don't count request in metrics if there is a prewarm/offload already
  ongoing.

https://github.com/neondatabase/cloud/issues/19011
Resolves: https://github.com/neondatabase/cloud/issues/30770
This commit is contained in:
Mikhail
2025-07-04 19:49:57 +01:00
committed by GitHub
parent 3a44774227
commit 7ed4530618
13 changed files with 296 additions and 178 deletions

View File

@@ -29,7 +29,8 @@ use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
use std::sync::{Arc, Condvar, Mutex, RwLock}; use std::sync::{Arc, Condvar, Mutex, RwLock};
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use std::{env, fs}; use std::{env, fs};
use tokio::spawn; use tokio::task::JoinHandle;
use tokio::{spawn, time};
use tracing::{Instrument, debug, error, info, instrument, warn}; use tracing::{Instrument, debug, error, info, instrument, warn};
use url::Url; use url::Url;
use utils::id::{TenantId, TimelineId}; use utils::id::{TenantId, TimelineId};
@@ -107,6 +108,8 @@ pub struct ComputeNodeParams {
pub installed_extensions_collection_interval: Arc<AtomicU64>, pub installed_extensions_collection_interval: Arc<AtomicU64>,
} }
type TaskHandle = Mutex<Option<JoinHandle<()>>>;
/// Compute node info shared across several `compute_ctl` threads. /// Compute node info shared across several `compute_ctl` threads.
pub struct ComputeNode { pub struct ComputeNode {
pub params: ComputeNodeParams, pub params: ComputeNodeParams,
@@ -129,7 +132,8 @@ pub struct ComputeNode {
pub compute_ctl_config: ComputeCtlConfig, pub compute_ctl_config: ComputeCtlConfig,
/// Handle to the extension stats collection task /// Handle to the extension stats collection task
extension_stats_task: Mutex<Option<tokio::task::JoinHandle<()>>>, extension_stats_task: TaskHandle,
lfc_offload_task: TaskHandle,
} }
// store some metrics about download size that might impact startup time // store some metrics about download size that might impact startup time
@@ -368,7 +372,7 @@ fn maybe_cgexec(cmd: &str) -> Command {
struct PostgresHandle { struct PostgresHandle {
postgres: std::process::Child, postgres: std::process::Child,
log_collector: tokio::task::JoinHandle<Result<()>>, log_collector: JoinHandle<Result<()>>,
} }
impl PostgresHandle { impl PostgresHandle {
@@ -382,7 +386,7 @@ struct StartVmMonitorResult {
#[cfg(target_os = "linux")] #[cfg(target_os = "linux")]
token: tokio_util::sync::CancellationToken, token: tokio_util::sync::CancellationToken,
#[cfg(target_os = "linux")] #[cfg(target_os = "linux")]
vm_monitor: Option<tokio::task::JoinHandle<Result<()>>>, vm_monitor: Option<JoinHandle<Result<()>>>,
} }
impl ComputeNode { impl ComputeNode {
@@ -433,6 +437,7 @@ impl ComputeNode {
ext_download_progress: RwLock::new(HashMap::new()), ext_download_progress: RwLock::new(HashMap::new()),
compute_ctl_config: config.compute_ctl_config, compute_ctl_config: config.compute_ctl_config,
extension_stats_task: Mutex::new(None), extension_stats_task: Mutex::new(None),
lfc_offload_task: Mutex::new(None),
}) })
} }
@@ -520,8 +525,8 @@ impl ComputeNode {
None None
}; };
// Terminate the extension stats collection task
this.terminate_extension_stats_task(); this.terminate_extension_stats_task();
this.terminate_lfc_offload_task();
// Terminate the vm_monitor so it releases the file watcher on // Terminate the vm_monitor so it releases the file watcher on
// /sys/fs/cgroup/neon-postgres. // /sys/fs/cgroup/neon-postgres.
@@ -851,12 +856,15 @@ impl ComputeNode {
// Log metrics so that we can search for slow operations in logs // Log metrics so that we can search for slow operations in logs
info!(?metrics, postmaster_pid = %postmaster_pid, "compute start finished"); info!(?metrics, postmaster_pid = %postmaster_pid, "compute start finished");
// Spawn the extension stats background task
self.spawn_extension_stats_task(); self.spawn_extension_stats_task();
if pspec.spec.autoprewarm { if pspec.spec.autoprewarm {
info!("autoprewarming on startup as requested");
self.prewarm_lfc(None); self.prewarm_lfc(None);
} }
if let Some(seconds) = pspec.spec.offload_lfc_interval_seconds {
self.spawn_lfc_offload_task(Duration::from_secs(seconds.into()));
};
Ok(()) Ok(())
} }
@@ -2357,10 +2365,7 @@ LIMIT 100",
} }
pub fn spawn_extension_stats_task(&self) { pub fn spawn_extension_stats_task(&self) {
// Cancel any existing task self.terminate_extension_stats_task();
if let Some(handle) = self.extension_stats_task.lock().unwrap().take() {
handle.abort();
}
let conf = self.tokio_conn_conf.clone(); let conf = self.tokio_conn_conf.clone();
let atomic_interval = self.params.installed_extensions_collection_interval.clone(); let atomic_interval = self.params.installed_extensions_collection_interval.clone();
@@ -2396,8 +2401,30 @@ LIMIT 100",
} }
fn terminate_extension_stats_task(&self) { fn terminate_extension_stats_task(&self) {
if let Some(handle) = self.extension_stats_task.lock().unwrap().take() { if let Some(h) = self.extension_stats_task.lock().unwrap().take() {
handle.abort(); h.abort()
}
}
pub fn spawn_lfc_offload_task(self: &Arc<Self>, interval: Duration) {
self.terminate_lfc_offload_task();
let secs = interval.as_secs();
info!("spawning lfc offload worker with {secs}s interval");
let this = self.clone();
let handle = spawn(async move {
let mut interval = time::interval(interval);
interval.tick().await; // returns immediately
loop {
interval.tick().await;
this.offload_lfc_async().await;
}
});
*self.lfc_offload_task.lock().unwrap() = Some(handle);
}
fn terminate_lfc_offload_task(&self) {
if let Some(h) = self.lfc_offload_task.lock().unwrap().take() {
h.abort()
} }
} }

View File

@@ -5,6 +5,7 @@ use compute_api::responses::LfcOffloadState;
use compute_api::responses::LfcPrewarmState; use compute_api::responses::LfcPrewarmState;
use http::StatusCode; use http::StatusCode;
use reqwest::Client; use reqwest::Client;
use std::mem::replace;
use std::sync::Arc; use std::sync::Arc;
use tokio::{io::AsyncReadExt, spawn}; use tokio::{io::AsyncReadExt, spawn};
use tracing::{error, info}; use tracing::{error, info};
@@ -88,17 +89,15 @@ impl ComputeNode {
self.state.lock().unwrap().lfc_offload_state.clone() self.state.lock().unwrap().lfc_offload_state.clone()
} }
/// Returns false if there is a prewarm request ongoing, true otherwise /// If there is a prewarm request ongoing, return false, true otherwise
pub fn prewarm_lfc(self: &Arc<Self>, from_endpoint: Option<String>) -> bool { pub fn prewarm_lfc(self: &Arc<Self>, from_endpoint: Option<String>) -> bool {
crate::metrics::LFC_PREWARM_REQUESTS.inc();
{ {
let state = &mut self.state.lock().unwrap().lfc_prewarm_state; let state = &mut self.state.lock().unwrap().lfc_prewarm_state;
if let LfcPrewarmState::Prewarming = if let LfcPrewarmState::Prewarming = replace(state, LfcPrewarmState::Prewarming) {
std::mem::replace(state, LfcPrewarmState::Prewarming)
{
return false; return false;
} }
} }
crate::metrics::LFC_PREWARMS.inc();
let cloned = self.clone(); let cloned = self.clone();
spawn(async move { spawn(async move {
@@ -152,30 +151,39 @@ impl ComputeNode {
.map(|_| ()) .map(|_| ())
} }
/// Returns false if there is an offload request ongoing, true otherwise /// If offload request is ongoing, return false, true otherwise
pub fn offload_lfc(self: &Arc<Self>) -> bool { pub fn offload_lfc(self: &Arc<Self>) -> bool {
crate::metrics::LFC_OFFLOAD_REQUESTS.inc();
{ {
let state = &mut self.state.lock().unwrap().lfc_offload_state; let state = &mut self.state.lock().unwrap().lfc_offload_state;
if let LfcOffloadState::Offloading = if replace(state, LfcOffloadState::Offloading) == LfcOffloadState::Offloading {
std::mem::replace(state, LfcOffloadState::Offloading)
{
return false; return false;
} }
} }
let cloned = self.clone(); let cloned = self.clone();
spawn(async move { spawn(async move { cloned.offload_lfc_with_state_update().await });
let Err(err) = cloned.offload_lfc_impl().await else { true
cloned.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Completed; }
pub async fn offload_lfc_async(self: &Arc<Self>) {
{
let state = &mut self.state.lock().unwrap().lfc_offload_state;
if replace(state, LfcOffloadState::Offloading) == LfcOffloadState::Offloading {
return;
}
}
self.offload_lfc_with_state_update().await
}
async fn offload_lfc_with_state_update(&self) {
crate::metrics::LFC_OFFLOADS.inc();
let Err(err) = self.offload_lfc_impl().await else {
self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Completed;
return; return;
}; };
error!(%err); error!(%err);
cloned.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Failed { self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Failed {
error: err.to_string(), error: err.to_string(),
}; };
});
true
} }
async fn offload_lfc_impl(&self) -> Result<()> { async fn offload_lfc_impl(&self) -> Result<()> {

View File

@@ -97,20 +97,18 @@ pub(crate) static PG_TOTAL_DOWNTIME_MS: Lazy<GenericCounter<AtomicU64>> = Lazy::
.expect("failed to define a metric") .expect("failed to define a metric")
}); });
/// Needed as neon.file_cache_prewarm_batch == 0 doesn't mean we never tried to prewarm. pub(crate) static LFC_PREWARMS: Lazy<IntCounter> = Lazy::new(|| {
/// On the other hand, LFC_PREWARMED_PAGES is excessive as we can GET /lfc/prewarm
pub(crate) static LFC_PREWARM_REQUESTS: Lazy<IntCounter> = Lazy::new(|| {
register_int_counter!( register_int_counter!(
"compute_ctl_lfc_prewarm_requests_total", "compute_ctl_lfc_prewarms_total",
"Total number of LFC prewarm requests made by compute_ctl", "Total number of LFC prewarms requested by compute_ctl or autoprewarm option",
) )
.expect("failed to define a metric") .expect("failed to define a metric")
}); });
pub(crate) static LFC_OFFLOAD_REQUESTS: Lazy<IntCounter> = Lazy::new(|| { pub(crate) static LFC_OFFLOADS: Lazy<IntCounter> = Lazy::new(|| {
register_int_counter!( register_int_counter!(
"compute_ctl_lfc_offload_requests_total", "compute_ctl_lfc_offloads_total",
"Total number of LFC offload requests made by compute_ctl", "Total number of LFC offloads requested by compute_ctl or lfc_offload_period_seconds option",
) )
.expect("failed to define a metric") .expect("failed to define a metric")
}); });
@@ -124,7 +122,7 @@ pub fn collect() -> Vec<MetricFamily> {
metrics.extend(AUDIT_LOG_DIR_SIZE.collect()); metrics.extend(AUDIT_LOG_DIR_SIZE.collect());
metrics.extend(PG_CURR_DOWNTIME_MS.collect()); metrics.extend(PG_CURR_DOWNTIME_MS.collect());
metrics.extend(PG_TOTAL_DOWNTIME_MS.collect()); metrics.extend(PG_TOTAL_DOWNTIME_MS.collect());
metrics.extend(LFC_PREWARM_REQUESTS.collect()); metrics.extend(LFC_PREWARMS.collect());
metrics.extend(LFC_OFFLOAD_REQUESTS.collect()); metrics.extend(LFC_OFFLOADS.collect());
metrics metrics
} }

View File

@@ -31,6 +31,7 @@ mod pg_helpers_tests {
wal_level = logical wal_level = logical
hot_standby = on hot_standby = on
autoprewarm = off autoprewarm = off
offload_lfc_interval_seconds = 20
neon.safekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501' neon.safekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'
wal_log_hints = on wal_log_hints = on
log_connections = on log_connections = on

View File

@@ -675,6 +675,16 @@ struct EndpointStartCmdArgs {
#[arg(default_value = "90s")] #[arg(default_value = "90s")]
start_timeout: Duration, start_timeout: Duration,
#[clap(
long,
help = "Download LFC cache from endpoint storage on endpoint startup",
default_value = "false"
)]
autoprewarm: bool,
#[clap(long, help = "Upload LFC cache to endpoint storage periodically")]
offload_lfc_interval_seconds: Option<std::num::NonZeroU64>,
#[clap( #[clap(
long, long,
help = "Run in development mode, skipping VM-specific operations like process termination", help = "Run in development mode, skipping VM-specific operations like process termination",
@@ -1585,22 +1595,24 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
let endpoint_storage_token = env.generate_auth_token(&claims)?; let endpoint_storage_token = env.generate_auth_token(&claims)?;
let endpoint_storage_addr = env.endpoint_storage.listen_addr.to_string(); let endpoint_storage_addr = env.endpoint_storage.listen_addr.to_string();
println!("Starting existing endpoint {endpoint_id}..."); let args = control_plane::endpoint::EndpointStartArgs {
endpoint auth_token,
.start(
&auth_token,
endpoint_storage_token, endpoint_storage_token,
endpoint_storage_addr, endpoint_storage_addr,
safekeepers_generation, safekeepers_generation,
safekeepers, safekeepers,
pageservers, pageservers,
remote_ext_base_url.as_ref(), remote_ext_base_url: remote_ext_base_url.clone(),
stripe_size.0 as usize, shard_stripe_size: stripe_size.0 as usize,
args.create_test_user, create_test_user: args.create_test_user,
args.start_timeout, start_timeout: args.start_timeout,
args.dev, autoprewarm: args.autoprewarm,
) offload_lfc_interval_seconds: args.offload_lfc_interval_seconds,
.await?; dev: args.dev,
};
println!("Starting existing endpoint {endpoint_id}...");
endpoint.start(args).await?;
} }
EndpointCmd::Reconfigure(args) => { EndpointCmd::Reconfigure(args) => {
let endpoint_id = &args.endpoint_id; let endpoint_id = &args.endpoint_id;

View File

@@ -373,6 +373,22 @@ impl std::fmt::Display for EndpointTerminateMode {
} }
} }
pub struct EndpointStartArgs {
pub auth_token: Option<String>,
pub endpoint_storage_token: String,
pub endpoint_storage_addr: String,
pub safekeepers_generation: Option<SafekeeperGeneration>,
pub safekeepers: Vec<NodeId>,
pub pageservers: Vec<(PageserverProtocol, Host, u16)>,
pub remote_ext_base_url: Option<String>,
pub shard_stripe_size: usize,
pub create_test_user: bool,
pub start_timeout: Duration,
pub autoprewarm: bool,
pub offload_lfc_interval_seconds: Option<std::num::NonZeroU64>,
pub dev: bool,
}
impl Endpoint { impl Endpoint {
fn from_dir_entry(entry: std::fs::DirEntry, env: &LocalEnv) -> Result<Endpoint> { fn from_dir_entry(entry: std::fs::DirEntry, env: &LocalEnv) -> Result<Endpoint> {
if !entry.file_type()?.is_dir() { if !entry.file_type()?.is_dir() {
@@ -677,21 +693,7 @@ impl Endpoint {
}) })
} }
#[allow(clippy::too_many_arguments)] pub async fn start(&self, args: EndpointStartArgs) -> Result<()> {
pub async fn start(
&self,
auth_token: &Option<String>,
endpoint_storage_token: String,
endpoint_storage_addr: String,
safekeepers_generation: Option<SafekeeperGeneration>,
safekeepers: Vec<NodeId>,
pageservers: Vec<(PageserverProtocol, Host, u16)>,
remote_ext_base_url: Option<&String>,
shard_stripe_size: usize,
create_test_user: bool,
start_timeout: Duration,
dev: bool,
) -> Result<()> {
if self.status() == EndpointStatus::Running { if self.status() == EndpointStatus::Running {
anyhow::bail!("The endpoint is already running"); anyhow::bail!("The endpoint is already running");
} }
@@ -704,10 +706,10 @@ impl Endpoint {
std::fs::remove_dir_all(self.pgdata())?; std::fs::remove_dir_all(self.pgdata())?;
} }
let pageserver_connstring = Self::build_pageserver_connstr(&pageservers); let pageserver_connstring = Self::build_pageserver_connstr(&args.pageservers);
assert!(!pageserver_connstring.is_empty()); assert!(!pageserver_connstring.is_empty());
let safekeeper_connstrings = self.build_safekeepers_connstrs(safekeepers)?; let safekeeper_connstrings = self.build_safekeepers_connstrs(args.safekeepers)?;
// check for file remote_extensions_spec.json // check for file remote_extensions_spec.json
// if it is present, read it and pass to compute_ctl // if it is present, read it and pass to compute_ctl
@@ -735,7 +737,7 @@ impl Endpoint {
cluster_id: None, // project ID: not used cluster_id: None, // project ID: not used
name: None, // project name: not used name: None, // project name: not used
state: None, state: None,
roles: if create_test_user { roles: if args.create_test_user {
vec![Role { vec![Role {
name: PgIdent::from_str("test").unwrap(), name: PgIdent::from_str("test").unwrap(),
encrypted_password: None, encrypted_password: None,
@@ -744,7 +746,7 @@ impl Endpoint {
} else { } else {
Vec::new() Vec::new()
}, },
databases: if create_test_user { databases: if args.create_test_user {
vec![Database { vec![Database {
name: PgIdent::from_str("neondb").unwrap(), name: PgIdent::from_str("neondb").unwrap(),
owner: PgIdent::from_str("test").unwrap(), owner: PgIdent::from_str("test").unwrap(),
@@ -766,20 +768,21 @@ impl Endpoint {
endpoint_id: Some(self.endpoint_id.clone()), endpoint_id: Some(self.endpoint_id.clone()),
mode: self.mode, mode: self.mode,
pageserver_connstring: Some(pageserver_connstring), pageserver_connstring: Some(pageserver_connstring),
safekeepers_generation: safekeepers_generation.map(|g| g.into_inner()), safekeepers_generation: args.safekeepers_generation.map(|g| g.into_inner()),
safekeeper_connstrings, safekeeper_connstrings,
storage_auth_token: auth_token.clone(), storage_auth_token: args.auth_token.clone(),
remote_extensions, remote_extensions,
pgbouncer_settings: None, pgbouncer_settings: None,
shard_stripe_size: Some(shard_stripe_size), shard_stripe_size: Some(args.shard_stripe_size),
local_proxy_config: None, local_proxy_config: None,
reconfigure_concurrency: self.reconfigure_concurrency, reconfigure_concurrency: self.reconfigure_concurrency,
drop_subscriptions_before_start: self.drop_subscriptions_before_start, drop_subscriptions_before_start: self.drop_subscriptions_before_start,
audit_log_level: ComputeAudit::Disabled, audit_log_level: ComputeAudit::Disabled,
logs_export_host: None::<String>, logs_export_host: None::<String>,
endpoint_storage_addr: Some(endpoint_storage_addr), endpoint_storage_addr: Some(args.endpoint_storage_addr),
endpoint_storage_token: Some(endpoint_storage_token), endpoint_storage_token: Some(args.endpoint_storage_token),
autoprewarm: false, autoprewarm: args.autoprewarm,
offload_lfc_interval_seconds: args.offload_lfc_interval_seconds,
suspend_timeout_seconds: -1, // Only used in neon_local. suspend_timeout_seconds: -1, // Only used in neon_local.
}; };
@@ -791,7 +794,7 @@ impl Endpoint {
debug!("spec.cluster {:?}", spec.cluster); debug!("spec.cluster {:?}", spec.cluster);
// fill missing fields again // fill missing fields again
if create_test_user { if args.create_test_user {
spec.cluster.roles.push(Role { spec.cluster.roles.push(Role {
name: PgIdent::from_str("test").unwrap(), name: PgIdent::from_str("test").unwrap(),
encrypted_password: None, encrypted_password: None,
@@ -826,7 +829,7 @@ impl Endpoint {
// Launch compute_ctl // Launch compute_ctl
let conn_str = self.connstr("cloud_admin", "postgres"); let conn_str = self.connstr("cloud_admin", "postgres");
println!("Starting postgres node at '{conn_str}'"); println!("Starting postgres node at '{conn_str}'");
if create_test_user { if args.create_test_user {
let conn_str = self.connstr("test", "neondb"); let conn_str = self.connstr("test", "neondb");
println!("Also at '{conn_str}'"); println!("Also at '{conn_str}'");
} }
@@ -858,11 +861,11 @@ impl Endpoint {
.stderr(logfile.try_clone()?) .stderr(logfile.try_clone()?)
.stdout(logfile); .stdout(logfile);
if let Some(remote_ext_base_url) = remote_ext_base_url { if let Some(remote_ext_base_url) = args.remote_ext_base_url {
cmd.args(["--remote-ext-base-url", remote_ext_base_url]); cmd.args(["--remote-ext-base-url", &remote_ext_base_url]);
} }
if dev { if args.dev {
cmd.arg("--dev"); cmd.arg("--dev");
} }
@@ -894,10 +897,11 @@ impl Endpoint {
Ok(state) => { Ok(state) => {
match state.status { match state.status {
ComputeStatus::Init => { ComputeStatus::Init => {
if Instant::now().duration_since(start_at) > start_timeout { let timeout = args.start_timeout;
if Instant::now().duration_since(start_at) > timeout {
bail!( bail!(
"compute startup timed out {:?}; still in Init state", "compute startup timed out {:?}; still in Init state",
start_timeout timeout
); );
} }
// keep retrying // keep retrying
@@ -925,9 +929,10 @@ impl Endpoint {
} }
} }
Err(e) => { Err(e) => {
if Instant::now().duration_since(start_at) > start_timeout { if Instant::now().duration_since(start_at) > args.start_timeout {
return Err(e).context(format!( return Err(e).context(format!(
"timed out {start_timeout:?} waiting to connect to compute_ctl HTTP", "timed out {:?} waiting to connect to compute_ctl HTTP",
args.start_timeout
)); ));
} }
} }

View File

@@ -58,7 +58,7 @@ pub enum LfcPrewarmState {
}, },
} }
#[derive(Serialize, Default, Debug, Clone)] #[derive(Serialize, Default, Debug, Clone, PartialEq)]
#[serde(tag = "status", rename_all = "snake_case")] #[serde(tag = "status", rename_all = "snake_case")]
pub enum LfcOffloadState { pub enum LfcOffloadState {
#[default] #[default]

View File

@@ -181,10 +181,14 @@ pub struct ComputeSpec {
/// JWT for authorizing requests to endpoint storage service /// JWT for authorizing requests to endpoint storage service
pub endpoint_storage_token: Option<String>, pub endpoint_storage_token: Option<String>,
/// Download LFC state from endpoint_storage and pass it to Postgres on startup
#[serde(default)] #[serde(default)]
/// Download LFC state from endpoint storage and pass it to Postgres on compute startup
pub autoprewarm: bool, pub autoprewarm: bool,
#[serde(default)]
/// Upload LFC state to endpoint storage periodically. Default value (None) means "don't upload"
pub offload_lfc_interval_seconds: Option<std::num::NonZeroU64>,
/// Suspend timeout in seconds. /// Suspend timeout in seconds.
/// ///
/// We use this value to derive other values, such as the installed extensions metric. /// We use this value to derive other values, such as the installed extensions metric.

View File

@@ -90,6 +90,11 @@
"value": "off", "value": "off",
"vartype": "bool" "vartype": "bool"
}, },
{
"name": "offload_lfc_interval_seconds",
"value": "20",
"vartype": "integer"
},
{ {
"name": "neon.safekeepers", "name": "neon.safekeepers",
"value": "127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501", "value": "127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501",

View File

@@ -57,6 +57,8 @@ class EndpointHttpClient(requests.Session):
self.auth = BearerAuth(jwt) self.auth = BearerAuth(jwt)
self.mount("http://", HTTPAdapter()) self.mount("http://", HTTPAdapter())
self.prewarm_url = f"http://localhost:{external_port}/lfc/prewarm"
self.offload_url = f"http://localhost:{external_port}/lfc/offload"
def dbs_and_roles(self): def dbs_and_roles(self):
res = self.get(f"http://localhost:{self.external_port}/dbs_and_roles", auth=self.auth) res = self.get(f"http://localhost:{self.external_port}/dbs_and_roles", auth=self.auth)
@@ -64,33 +66,39 @@ class EndpointHttpClient(requests.Session):
return res.json() return res.json()
def prewarm_lfc_status(self) -> dict[str, str]: def prewarm_lfc_status(self) -> dict[str, str]:
res = self.get(f"http://localhost:{self.external_port}/lfc/prewarm") res = self.get(self.prewarm_url)
res.raise_for_status() res.raise_for_status()
json: dict[str, str] = res.json() json: dict[str, str] = res.json()
return json return json
def prewarm_lfc(self, from_endpoint_id: str | None = None): def prewarm_lfc(self, from_endpoint_id: str | None = None):
url: str = f"http://localhost:{self.external_port}/lfc/prewarm"
params = {"from_endpoint": from_endpoint_id} if from_endpoint_id else dict() params = {"from_endpoint": from_endpoint_id} if from_endpoint_id else dict()
self.post(url, params=params).raise_for_status() self.post(self.prewarm_url, params=params).raise_for_status()
self.prewarm_lfc_wait()
def prewarm_lfc_wait(self):
def prewarmed(): def prewarmed():
json = self.prewarm_lfc_status() json = self.prewarm_lfc_status()
status, err = json["status"], json.get("error") status, err = json["status"], json.get("error")
assert status == "completed", f"{status}, error {err}" assert status == "completed", f"{status}, {err=}"
wait_until(prewarmed, timeout=60) wait_until(prewarmed, timeout=60)
def offload_lfc(self): def offload_lfc_status(self) -> dict[str, str]:
url = f"http://localhost:{self.external_port}/lfc/offload" res = self.get(self.offload_url)
self.post(url).raise_for_status()
def offloaded():
res = self.get(url)
res.raise_for_status() res.raise_for_status()
json = res.json() json: dict[str, str] = res.json()
return json
def offload_lfc(self):
self.post(self.offload_url).raise_for_status()
self.offload_lfc_wait()
def offload_lfc_wait(self):
def offloaded():
json = self.offload_lfc_status()
status, err = json["status"], json.get("error") status, err = json["status"], json.get("error")
assert status == "completed", f"{status}, error {err}" assert status == "completed", f"{status}, {err=}"
wait_until(offloaded) wait_until(offloaded)

View File

@@ -568,6 +568,8 @@ class NeonLocalCli(AbstractNeonCli):
timeout: str | None = None, timeout: str | None = None,
env: dict[str, str] | None = None, env: dict[str, str] | None = None,
dev: bool = False, dev: bool = False,
autoprewarm: bool = False,
offload_lfc_interval_seconds: int | None = None,
) -> subprocess.CompletedProcess[str]: ) -> subprocess.CompletedProcess[str]:
args = [ args = [
"endpoint", "endpoint",
@@ -593,6 +595,10 @@ class NeonLocalCli(AbstractNeonCli):
args.extend(["--create-test-user"]) args.extend(["--create-test-user"])
if timeout is not None: if timeout is not None:
args.extend(["--start-timeout", str(timeout)]) args.extend(["--start-timeout", str(timeout)])
if autoprewarm:
args.extend(["--autoprewarm"])
if offload_lfc_interval_seconds is not None:
args.extend(["--offload-lfc-interval-seconds", str(offload_lfc_interval_seconds)])
if dev: if dev:
args.extend(["--dev"]) args.extend(["--dev"])

View File

@@ -4362,6 +4362,8 @@ class Endpoint(PgProtocol, LogUtils):
basebackup_request_tries: int | None = None, basebackup_request_tries: int | None = None,
timeout: str | None = None, timeout: str | None = None,
env: dict[str, str] | None = None, env: dict[str, str] | None = None,
autoprewarm: bool = False,
offload_lfc_interval_seconds: int | None = None,
) -> Self: ) -> Self:
""" """
Start the Postgres instance. Start the Postgres instance.
@@ -4386,6 +4388,8 @@ class Endpoint(PgProtocol, LogUtils):
basebackup_request_tries=basebackup_request_tries, basebackup_request_tries=basebackup_request_tries,
timeout=timeout, timeout=timeout,
env=env, env=env,
autoprewarm=autoprewarm,
offload_lfc_interval_seconds=offload_lfc_interval_seconds,
) )
self._running.release(1) self._running.release(1)
self.log_config_value("shared_buffers") self.log_config_value("shared_buffers")
@@ -4601,6 +4605,8 @@ class Endpoint(PgProtocol, LogUtils):
pageserver_id: int | None = None, pageserver_id: int | None = None,
allow_multiple: bool = False, allow_multiple: bool = False,
basebackup_request_tries: int | None = None, basebackup_request_tries: int | None = None,
autoprewarm: bool = False,
offload_lfc_interval_seconds: int | None = None,
) -> Self: ) -> Self:
""" """
Create an endpoint, apply config, and start Postgres. Create an endpoint, apply config, and start Postgres.
@@ -4621,6 +4627,8 @@ class Endpoint(PgProtocol, LogUtils):
pageserver_id=pageserver_id, pageserver_id=pageserver_id,
allow_multiple=allow_multiple, allow_multiple=allow_multiple,
basebackup_request_tries=basebackup_request_tries, basebackup_request_tries=basebackup_request_tries,
autoprewarm=autoprewarm,
offload_lfc_interval_seconds=offload_lfc_interval_seconds,
) )
return self return self
@@ -4705,6 +4713,8 @@ class EndpointFactory:
remote_ext_base_url: str | None = None, remote_ext_base_url: str | None = None,
pageserver_id: int | None = None, pageserver_id: int | None = None,
basebackup_request_tries: int | None = None, basebackup_request_tries: int | None = None,
autoprewarm: bool = False,
offload_lfc_interval_seconds: int | None = None,
) -> Endpoint: ) -> Endpoint:
ep = Endpoint( ep = Endpoint(
self.env, self.env,
@@ -4726,6 +4736,8 @@ class EndpointFactory:
remote_ext_base_url=remote_ext_base_url, remote_ext_base_url=remote_ext_base_url,
pageserver_id=pageserver_id, pageserver_id=pageserver_id,
basebackup_request_tries=basebackup_request_tries, basebackup_request_tries=basebackup_request_tries,
autoprewarm=autoprewarm,
offload_lfc_interval_seconds=offload_lfc_interval_seconds,
) )
def create( def create(

View File

@@ -1,34 +1,38 @@
import random import random
import threading import threading
import time from enum import StrEnum
from enum import Enum from time import sleep
from typing import Any
import pytest import pytest
from fixtures.endpoint.http import EndpointHttpClient from fixtures.endpoint.http import EndpointHttpClient
from fixtures.log_helper import log from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv from fixtures.neon_fixtures import NeonEnv
from fixtures.utils import USE_LFC from fixtures.utils import USE_LFC, wait_until
from prometheus_client.parser import text_string_to_metric_families as prom_parse_impl from prometheus_client.parser import text_string_to_metric_families as prom_parse_impl
from psycopg2.extensions import cursor as Cursor
class LfcQueryMethod(Enum): class PrewarmMethod(StrEnum):
COMPUTE_CTL = False POSTGRES = "postgres"
POSTGRES = True COMPUTE_CTL = "compute-ctl"
AUTOPREWARM = "autoprewarm"
PREWARM_LABEL = "compute_ctl_lfc_prewarm_requests_total" PREWARM_LABEL = "compute_ctl_lfc_prewarms_total"
OFFLOAD_LABEL = "compute_ctl_lfc_offload_requests_total" OFFLOAD_LABEL = "compute_ctl_lfc_offloads_total"
QUERY_OPTIONS = LfcQueryMethod.POSTGRES, LfcQueryMethod.COMPUTE_CTL METHOD_VALUES = [e for e in PrewarmMethod]
METHOD_IDS = [e.value for e in PrewarmMethod]
def check_pinned_entries(cur): def check_pinned_entries(cur: Cursor):
# some LFC buffer can be temporary locked by autovacuum or background writer # some LFC buffer can be temporary locked by autovacuum or background writer
for _ in range(10): for _ in range(10):
cur.execute("select lfc_value from neon_lfc_stats where lfc_key='file_cache_chunks_pinned'") cur.execute("select lfc_value from neon_lfc_stats where lfc_key='file_cache_chunks_pinned'")
n_pinned = cur.fetchall()[0][0] n_pinned = cur.fetchall()[0][0]
if n_pinned == 0: if n_pinned == 0:
break break
time.sleep(1) sleep(1)
assert n_pinned == 0 assert n_pinned == 0
@@ -41,21 +45,68 @@ def prom_parse(client: EndpointHttpClient) -> dict[str, float]:
} }
def offload_lfc(method: PrewarmMethod, client: EndpointHttpClient, cur: Cursor) -> Any:
if method == PrewarmMethod.AUTOPREWARM:
client.offload_lfc_wait()
elif method == PrewarmMethod.COMPUTE_CTL:
status = client.prewarm_lfc_status()
assert status["status"] == "not_prewarmed"
assert "error" not in status
client.offload_lfc()
assert client.prewarm_lfc_status()["status"] == "not_prewarmed"
assert prom_parse(client) == {OFFLOAD_LABEL: 1, PREWARM_LABEL: 0}
elif method == PrewarmMethod.POSTGRES:
cur.execute("select get_local_cache_state()")
return cur.fetchall()[0][0]
else:
raise AssertionError(f"{method} not in PrewarmMethod")
def prewarm_endpoint(
method: PrewarmMethod, client: EndpointHttpClient, cur: Cursor, lfc_state: str | None
):
if method == PrewarmMethod.AUTOPREWARM:
client.prewarm_lfc_wait()
elif method == PrewarmMethod.COMPUTE_CTL:
client.prewarm_lfc()
elif method == PrewarmMethod.POSTGRES:
cur.execute("select prewarm_local_cache(%s)", (lfc_state,))
def check_prewarmed(
method: PrewarmMethod, client: EndpointHttpClient, desired_status: dict[str, str | int]
):
if method == PrewarmMethod.AUTOPREWARM:
assert client.prewarm_lfc_status() == desired_status
assert prom_parse(client)[PREWARM_LABEL] == 1
elif method == PrewarmMethod.COMPUTE_CTL:
assert client.prewarm_lfc_status() == desired_status
assert prom_parse(client) == {OFFLOAD_LABEL: 0, PREWARM_LABEL: 1}
@pytest.mark.skipif(not USE_LFC, reason="LFC is disabled, skipping") @pytest.mark.skipif(not USE_LFC, reason="LFC is disabled, skipping")
@pytest.mark.parametrize("query", QUERY_OPTIONS, ids=["postgres", "compute-ctl"]) @pytest.mark.parametrize("method", METHOD_VALUES, ids=METHOD_IDS)
def test_lfc_prewarm(neon_simple_env: NeonEnv, query: LfcQueryMethod): def test_lfc_prewarm(neon_simple_env: NeonEnv, method: PrewarmMethod):
env = neon_simple_env env = neon_simple_env
n_records = 1000000 n_records = 1000000
endpoint = env.endpoints.create_start( cfg = [
branch_name="main",
config_lines=[
"autovacuum = off", "autovacuum = off",
"shared_buffers=1MB", "shared_buffers=1MB",
"neon.max_file_cache_size=1GB", "neon.max_file_cache_size=1GB",
"neon.file_cache_size_limit=1GB", "neon.file_cache_size_limit=1GB",
"neon.file_cache_prewarm_limit=1000", "neon.file_cache_prewarm_limit=1000",
], ]
offload_secs = 2
if method == PrewarmMethod.AUTOPREWARM:
endpoint = env.endpoints.create_start(
branch_name="main",
config_lines=cfg,
autoprewarm=True,
offload_lfc_interval_seconds=offload_secs,
) )
else:
endpoint = env.endpoints.create_start(branch_name="main", config_lines=cfg)
pg_conn = endpoint.connect() pg_conn = endpoint.connect()
pg_cur = pg_conn.cursor() pg_cur = pg_conn.cursor()
@@ -69,19 +120,13 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv, query: LfcQueryMethod):
lfc_cur.execute(f"insert into t (pk) values (generate_series(1,{n_records}))") lfc_cur.execute(f"insert into t (pk) values (generate_series(1,{n_records}))")
log.info(f"Inserted {n_records} rows") log.info(f"Inserted {n_records} rows")
http_client = endpoint.http_client() client = endpoint.http_client()
if query is LfcQueryMethod.COMPUTE_CTL: lfc_state = offload_lfc(method, client, pg_cur)
status = http_client.prewarm_lfc_status()
assert status["status"] == "not_prewarmed"
assert "error" not in status
http_client.offload_lfc()
assert http_client.prewarm_lfc_status()["status"] == "not_prewarmed"
assert prom_parse(http_client) == {OFFLOAD_LABEL: 1, PREWARM_LABEL: 0}
else:
pg_cur.execute("select get_local_cache_state()")
lfc_state = pg_cur.fetchall()[0][0]
endpoint.stop() endpoint.stop()
if method == PrewarmMethod.AUTOPREWARM:
endpoint.start(autoprewarm=True, offload_lfc_interval_seconds=offload_secs)
else:
endpoint.start() endpoint.start()
pg_conn = endpoint.connect() pg_conn = endpoint.connect()
@@ -89,11 +134,7 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv, query: LfcQueryMethod):
lfc_conn = endpoint.connect(dbname="lfc") lfc_conn = endpoint.connect(dbname="lfc")
lfc_cur = lfc_conn.cursor() lfc_cur = lfc_conn.cursor()
prewarm_endpoint(method, client, pg_cur, lfc_state)
if query is LfcQueryMethod.COMPUTE_CTL:
http_client.prewarm_lfc()
else:
pg_cur.execute("select prewarm_local_cache(%s)", (lfc_state,))
pg_cur.execute("select lfc_value from neon_lfc_stats where lfc_key='file_cache_used_pages'") pg_cur.execute("select lfc_value from neon_lfc_stats where lfc_key='file_cache_used_pages'")
lfc_used_pages = pg_cur.fetchall()[0][0] lfc_used_pages = pg_cur.fetchall()[0][0]
@@ -111,33 +152,32 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv, query: LfcQueryMethod):
and prewarm_info[1] > 0 and prewarm_info[1] > 0
and prewarm_info[0] == prewarm_info[1] + prewarm_info[2] and prewarm_info[0] == prewarm_info[1] + prewarm_info[2]
) )
lfc_cur.execute("select sum(pk) from t") lfc_cur.execute("select sum(pk) from t")
assert lfc_cur.fetchall()[0][0] == n_records * (n_records + 1) / 2 assert lfc_cur.fetchall()[0][0] == n_records * (n_records + 1) / 2
check_pinned_entries(pg_cur) check_pinned_entries(pg_cur)
desired = {"status": "completed", "total": total, "prewarmed": prewarmed, "skipped": skipped} desired = {"status": "completed", "total": total, "prewarmed": prewarmed, "skipped": skipped}
if query is LfcQueryMethod.COMPUTE_CTL: check_prewarmed(method, client, desired)
assert http_client.prewarm_lfc_status() == desired
assert prom_parse(http_client) == {OFFLOAD_LABEL: 0, PREWARM_LABEL: 1}
# autoprewarm isn't needed as we prewarm manually
WORKLOAD_VALUES = METHOD_VALUES[:-1]
WORKLOAD_IDS = METHOD_IDS[:-1]
@pytest.mark.skipif(not USE_LFC, reason="LFC is disabled, skipping") @pytest.mark.skipif(not USE_LFC, reason="LFC is disabled, skipping")
@pytest.mark.parametrize("query", QUERY_OPTIONS, ids=["postgres", "compute-ctl"]) @pytest.mark.parametrize("method", WORKLOAD_VALUES, ids=WORKLOAD_IDS)
def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, query: LfcQueryMethod): def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, method: PrewarmMethod):
env = neon_simple_env env = neon_simple_env
n_records = 10000 n_records = 10000
n_threads = 4 n_threads = 4
endpoint = env.endpoints.create_start( cfg = [
branch_name="main",
config_lines=[
"shared_buffers=1MB", "shared_buffers=1MB",
"neon.max_file_cache_size=1GB", "neon.max_file_cache_size=1GB",
"neon.file_cache_size_limit=1GB", "neon.file_cache_size_limit=1GB",
"neon.file_cache_prewarm_limit=1000000", "neon.file_cache_prewarm_limit=1000000",
], ]
) endpoint = env.endpoints.create_start(branch_name="main", config_lines=cfg)
pg_conn = endpoint.connect() pg_conn = endpoint.connect()
pg_cur = pg_conn.cursor() pg_cur = pg_conn.cursor()
@@ -154,12 +194,7 @@ def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, query: LfcQueryMet
log.info(f"Inserted {n_records} rows") log.info(f"Inserted {n_records} rows")
http_client = endpoint.http_client() http_client = endpoint.http_client()
if query is LfcQueryMethod.COMPUTE_CTL: lfc_state = offload_lfc(method, http_client, pg_cur)
http_client.offload_lfc()
else:
pg_cur.execute("select get_local_cache_state()")
lfc_state = pg_cur.fetchall()[0][0]
running = True running = True
n_prewarms = 0 n_prewarms = 0
@@ -170,8 +205,8 @@ def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, query: LfcQueryMet
while running: while running:
src = random.randint(1, n_records) src = random.randint(1, n_records)
dst = random.randint(1, n_records) dst = random.randint(1, n_records)
lfc_cur.execute("update accounts set balance=balance-100 where id=%s", (src,)) lfc_cur.execute(f"update accounts set balance=balance-100 where id={src}")
lfc_cur.execute("update accounts set balance=balance+100 where id=%s", (dst,)) lfc_cur.execute(f"update accounts set balance=balance+100 where id={dst}")
n_transfers += 1 n_transfers += 1
log.info(f"Number of transfers: {n_transfers}") log.info(f"Number of transfers: {n_transfers}")
@@ -183,13 +218,7 @@ def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, query: LfcQueryMet
pg_cur.execute("select pg_reload_conf()") pg_cur.execute("select pg_reload_conf()")
pg_cur.execute("alter system set neon.file_cache_size_limit='1GB'") pg_cur.execute("alter system set neon.file_cache_size_limit='1GB'")
pg_cur.execute("select pg_reload_conf()") pg_cur.execute("select pg_reload_conf()")
prewarm_endpoint(method, http_client, pg_cur, lfc_state)
if query is LfcQueryMethod.COMPUTE_CTL:
# Same thing as prewarm_lfc(), testing other method
http_client.prewarm_lfc(endpoint.endpoint_id)
else:
pg_cur.execute("select prewarm_local_cache(%s)", (lfc_state,))
nonlocal n_prewarms nonlocal n_prewarms
n_prewarms += 1 n_prewarms += 1
log.info(f"Number of prewarms: {n_prewarms}") log.info(f"Number of prewarms: {n_prewarms}")
@@ -203,7 +232,10 @@ def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, query: LfcQueryMet
prewarm_thread = threading.Thread(target=prewarm) prewarm_thread = threading.Thread(target=prewarm)
prewarm_thread.start() prewarm_thread.start()
time.sleep(20) def prewarmed():
assert n_prewarms > 5
wait_until(prewarmed)
running = False running = False
for t in workload_threads: for t in workload_threads:
@@ -215,5 +247,5 @@ def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, query: LfcQueryMet
assert total_balance == 0 assert total_balance == 0
check_pinned_entries(pg_cur) check_pinned_entries(pg_cur)
if query is LfcQueryMethod.COMPUTE_CTL: if method != PrewarmMethod.POSTGRES:
assert prom_parse(http_client) == {OFFLOAD_LABEL: 1, PREWARM_LABEL: n_prewarms} assert prom_parse(http_client) == {OFFLOAD_LABEL: 1, PREWARM_LABEL: n_prewarms}