diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 0dfacb615c..28770acdcd 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -728,7 +728,12 @@ impl ComputeNode { // Write new config let pgdata_path = Path::new(&self.pgdata); - config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &spec, None)?; + let postgresql_conf_path = pgdata_path.join("postgresql.conf"); + config::write_postgres_conf(&postgresql_conf_path, &spec, None)?; + // temporarily reset max_cluster_size in config + // to avoid the possibility of hitting the limit, while we are reconfiguring: + // creating new extensions, roles, etc... + config::compute_ctl_temp_override_create(pgdata_path, "neon.max_cluster_size=-1")?; self.pg_reload_conf()?; let mut client = Client::connect(self.connstr.as_str(), NoTls)?; @@ -749,6 +754,10 @@ impl ComputeNode { // 'Close' connection drop(client); + // reset max_cluster_size in config back to original value and reload config + config::compute_ctl_temp_override_remove(pgdata_path)?; + self.pg_reload_conf()?; + let unknown_op = "unknown".to_string(); let op_id = spec.operation_uuid.as_ref().unwrap_or(&unknown_op); info!( @@ -809,7 +818,17 @@ impl ComputeNode { let config_time = Utc::now(); if pspec.spec.mode == ComputeMode::Primary && !pspec.spec.skip_pg_catalog_updates { + let pgdata_path = Path::new(&self.pgdata); + // temporarily reset max_cluster_size in config + // to avoid the possibility of hitting the limit, while we are applying config: + // creating new extensions, roles, etc... + config::compute_ctl_temp_override_create(pgdata_path, "neon.max_cluster_size=-1")?; + self.pg_reload_conf()?; + self.apply_config(&compute_state)?; + + config::compute_ctl_temp_override_remove(pgdata_path)?; + self.pg_reload_conf()?; } let startup_end_time = Utc::now(); diff --git a/compute_tools/src/config.rs b/compute_tools/src/config.rs index bc48a2110d..a7ef8cea92 100644 --- a/compute_tools/src/config.rs +++ b/compute_tools/src/config.rs @@ -93,5 +93,25 @@ pub fn write_postgres_conf( writeln!(file, "neon.extension_server_port={}", port)?; } + // This is essential to keep this line at the end of the file, + // because it is intended to override any settings above. + writeln!(file, "include_if_exists = 'compute_ctl_temp_override.conf'")?; + + Ok(()) +} + +/// create file compute_ctl_temp_override.conf in pgdata_dir +/// add provided options to this file +pub fn compute_ctl_temp_override_create(pgdata_path: &Path, options: &str) -> Result<()> { + let path = pgdata_path.join("compute_ctl_temp_override.conf"); + let mut file = File::create(path)?; + write!(file, "{}", options)?; + Ok(()) +} + +/// remove file compute_ctl_temp_override.conf in pgdata_dir +pub fn compute_ctl_temp_override_remove(pgdata_path: &Path) -> Result<()> { + let path = pgdata_path.join("compute_ctl_temp_override.conf"); + std::fs::remove_file(path)?; Ok(()) } diff --git a/compute_tools/src/spec.rs b/compute_tools/src/spec.rs index 8c44c6d519..f98333d8bf 100644 --- a/compute_tools/src/spec.rs +++ b/compute_tools/src/spec.rs @@ -118,19 +118,6 @@ pub fn get_spec_from_control_plane( spec } -/// It takes cluster specification and does the following: -/// - Serialize cluster config and put it into `postgresql.conf` completely rewriting the file. -/// - Update `pg_hba.conf` to allow external connections. -pub fn handle_configuration(spec: &ComputeSpec, pgdata_path: &Path) -> Result<()> { - // File `postgresql.conf` is no longer included into `basebackup`, so just - // always write all config into it creating new file. - config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec, None)?; - - update_pg_hba(pgdata_path)?; - - Ok(()) -} - /// Check `pg_hba.conf` and update if needed to allow external connections. pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> { // XXX: consider making it a part of spec.json diff --git a/test_runner/regress/test_timeline_size.py b/test_runner/regress/test_timeline_size.py index eb98348823..24cbe34457 100644 --- a/test_runner/regress/test_timeline_size.py +++ b/test_runner/regress/test_timeline_size.py @@ -146,6 +146,72 @@ def wait_for_pageserver_catchup(endpoint_main: Endpoint, polling_interval=1, tim time.sleep(polling_interval) +def test_timeline_size_quota_on_startup(neon_env_builder: NeonEnvBuilder): + env = neon_env_builder.init_start() + client = env.pageserver.http_client() + new_timeline_id = env.neon_cli.create_branch("test_timeline_size_quota_on_startup") + + wait_for_timeline_size_init(client, tenant=env.initial_tenant, timeline=new_timeline_id) + + endpoint_main = env.endpoints.create( + "test_timeline_size_quota_on_startup", + # Set small limit for the test + config_lines=["neon.max_cluster_size=30MB"], + ) + endpoint_main.start() + + log.info("postgres is running on 'test_timeline_size_quota_on_startup' branch") + + with closing(endpoint_main.connect()) as conn: + with conn.cursor() as cur: + cur.execute("CREATE TABLE foo (t text)") + + # Insert many rows. This query must fail because of space limit + try: + for _i in range(5000): + cur.execute( + """ + INSERT INTO foo + SELECT 'long string to consume some space' || g + FROM generate_series(1, 100) g + """ + ) + + # If we get here, the timeline size limit failed + log.error("Query unexpectedly succeeded") + raise AssertionError() + + except psycopg2.errors.DiskFull as err: + log.info(f"Query expectedly failed with: {err}") + + # Restart endpoint that reached the limit to ensure that it doesn't fail on startup + # i.e. the size limit is not enforced during startup. + endpoint_main.stop() + # don't skip pg_catalog updates - it runs CREATE EXTENSION neon + # which is needed for neon.pg_cluster_size() to work + endpoint_main.respec(skip_pg_catalog_updates=False) + endpoint_main.start() + + # ensure that the limit is enforced after startup + with closing(endpoint_main.connect()) as conn: + with conn.cursor() as cur: + # This query must fail because of space limit + try: + cur.execute( + """ + INSERT INTO foo + SELECT 'long string to consume some space' || g + FROM generate_series(1, 100000) g + """ + ) + # If we get here, the timeline size limit failed + log.error("Query unexpectedly succeeded") + raise AssertionError() + + except psycopg2.errors.DiskFull as err: + log.info(f"Query expectedly failed with: {err}") + + def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() client = env.pageserver.http_client()