Override neon.max_cluster_size for the time of compute_ctl (#5998)

Temporarily reset neon.max_cluster_size to avoid
the possibility of hitting the limit, while we are applying config:
creating new extensions, roles, etc...
This commit is contained in:
Anastasia Lubennikova
2023-12-03 15:21:44 +00:00
committed by GitHub
parent e43cde7aba
commit e3512340c1
4 changed files with 106 additions and 14 deletions

View File

@@ -728,7 +728,12 @@ impl ComputeNode {
// Write new config
let pgdata_path = Path::new(&self.pgdata);
config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &spec, None)?;
let postgresql_conf_path = pgdata_path.join("postgresql.conf");
config::write_postgres_conf(&postgresql_conf_path, &spec, None)?;
// temporarily reset max_cluster_size in config
// to avoid the possibility of hitting the limit, while we are reconfiguring:
// creating new extensions, roles, etc...
config::compute_ctl_temp_override_create(pgdata_path, "neon.max_cluster_size=-1")?;
self.pg_reload_conf()?;
let mut client = Client::connect(self.connstr.as_str(), NoTls)?;
@@ -749,6 +754,10 @@ impl ComputeNode {
// 'Close' connection
drop(client);
// reset max_cluster_size in config back to original value and reload config
config::compute_ctl_temp_override_remove(pgdata_path)?;
self.pg_reload_conf()?;
let unknown_op = "unknown".to_string();
let op_id = spec.operation_uuid.as_ref().unwrap_or(&unknown_op);
info!(
@@ -809,7 +818,17 @@ impl ComputeNode {
let config_time = Utc::now();
if pspec.spec.mode == ComputeMode::Primary && !pspec.spec.skip_pg_catalog_updates {
let pgdata_path = Path::new(&self.pgdata);
// temporarily reset max_cluster_size in config
// to avoid the possibility of hitting the limit, while we are applying config:
// creating new extensions, roles, etc...
config::compute_ctl_temp_override_create(pgdata_path, "neon.max_cluster_size=-1")?;
self.pg_reload_conf()?;
self.apply_config(&compute_state)?;
config::compute_ctl_temp_override_remove(pgdata_path)?;
self.pg_reload_conf()?;
}
let startup_end_time = Utc::now();

View File

@@ -93,5 +93,25 @@ pub fn write_postgres_conf(
writeln!(file, "neon.extension_server_port={}", port)?;
}
// This is essential to keep this line at the end of the file,
// because it is intended to override any settings above.
writeln!(file, "include_if_exists = 'compute_ctl_temp_override.conf'")?;
Ok(())
}
/// create file compute_ctl_temp_override.conf in pgdata_dir
/// add provided options to this file
pub fn compute_ctl_temp_override_create(pgdata_path: &Path, options: &str) -> Result<()> {
let path = pgdata_path.join("compute_ctl_temp_override.conf");
let mut file = File::create(path)?;
write!(file, "{}", options)?;
Ok(())
}
/// remove file compute_ctl_temp_override.conf in pgdata_dir
pub fn compute_ctl_temp_override_remove(pgdata_path: &Path) -> Result<()> {
let path = pgdata_path.join("compute_ctl_temp_override.conf");
std::fs::remove_file(path)?;
Ok(())
}

View File

@@ -118,19 +118,6 @@ pub fn get_spec_from_control_plane(
spec
}
/// It takes cluster specification and does the following:
/// - Serialize cluster config and put it into `postgresql.conf` completely rewriting the file.
/// - Update `pg_hba.conf` to allow external connections.
pub fn handle_configuration(spec: &ComputeSpec, pgdata_path: &Path) -> Result<()> {
// File `postgresql.conf` is no longer included into `basebackup`, so just
// always write all config into it creating new file.
config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec, None)?;
update_pg_hba(pgdata_path)?;
Ok(())
}
/// Check `pg_hba.conf` and update if needed to allow external connections.
pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {
// XXX: consider making it a part of spec.json

View File

@@ -146,6 +146,72 @@ def wait_for_pageserver_catchup(endpoint_main: Endpoint, polling_interval=1, tim
time.sleep(polling_interval)
def test_timeline_size_quota_on_startup(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
client = env.pageserver.http_client()
new_timeline_id = env.neon_cli.create_branch("test_timeline_size_quota_on_startup")
wait_for_timeline_size_init(client, tenant=env.initial_tenant, timeline=new_timeline_id)
endpoint_main = env.endpoints.create(
"test_timeline_size_quota_on_startup",
# Set small limit for the test
config_lines=["neon.max_cluster_size=30MB"],
)
endpoint_main.start()
log.info("postgres is running on 'test_timeline_size_quota_on_startup' branch")
with closing(endpoint_main.connect()) as conn:
with conn.cursor() as cur:
cur.execute("CREATE TABLE foo (t text)")
# Insert many rows. This query must fail because of space limit
try:
for _i in range(5000):
cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 100) g
"""
)
# If we get here, the timeline size limit failed
log.error("Query unexpectedly succeeded")
raise AssertionError()
except psycopg2.errors.DiskFull as err:
log.info(f"Query expectedly failed with: {err}")
# Restart endpoint that reached the limit to ensure that it doesn't fail on startup
# i.e. the size limit is not enforced during startup.
endpoint_main.stop()
# don't skip pg_catalog updates - it runs CREATE EXTENSION neon
# which is needed for neon.pg_cluster_size() to work
endpoint_main.respec(skip_pg_catalog_updates=False)
endpoint_main.start()
# ensure that the limit is enforced after startup
with closing(endpoint_main.connect()) as conn:
with conn.cursor() as cur:
# This query must fail because of space limit
try:
cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 100000) g
"""
)
# If we get here, the timeline size limit failed
log.error("Query unexpectedly succeeded")
raise AssertionError()
except psycopg2.errors.DiskFull as err:
log.info(f"Query expectedly failed with: {err}")
def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
client = env.pageserver.http_client()