From b1477b4448e120ea3fc6443043664f07bc4b6f82 Mon Sep 17 00:00:00 2001 From: Sasha Krassovsky Date: Fri, 23 Jun 2023 15:38:27 -0700 Subject: [PATCH] Create neon_superuser role, grant it to roles created from control plane (#4425) ## Problem Currently, if a user creates a role, it won't by default have any grants applied to it. If the compute restarts, the grants get applied. This gives a very strange UX of being able to drop roles/not have any access to anything at first, and then once something triggers a config application, suddenly grants are applied. This removes these grants. --- compute_tools/src/compute.rs | 88 ++++++++++++++++++++++- compute_tools/src/pg_helpers.rs | 2 +- compute_tools/src/spec.rs | 45 +++--------- test_runner/regress/test_compatibility.py | 54 +++++++++++++- test_runner/regress/test_hot_standby.py | 9 ++- test_runner/regress/test_tenant_size.py | 16 +++-- 6 files changed, 164 insertions(+), 50 deletions(-) diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 94cebf93de..8415d5dad5 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -133,6 +133,84 @@ impl TryFrom for ParsedSpec { } } +/// Create special neon_superuser role, that's a slightly nerfed version of a real superuser +/// that we give to customers +fn create_neon_superuser(spec: &ComputeSpec, client: &mut Client) -> Result<()> { + let roles = spec + .cluster + .roles + .iter() + .map(|r| format!("'{}'", escape_literal(&r.name))) + .collect::>(); + + let dbs = spec + .cluster + .databases + .iter() + .map(|db| format!("'{}'", escape_literal(&db.name))) + .collect::>(); + + let roles_decl = if roles.is_empty() { + String::from("roles text[] := NULL;") + } else { + format!( + r#" + roles text[] := ARRAY(SELECT rolname + FROM pg_catalog.pg_roles + WHERE rolname IN ({}));"#, + roles.join(", ") + ) + }; + + let database_decl = if dbs.is_empty() { + String::from("dbs text[] := NULL;") + } else { + format!( + r#" + dbs text[] := ARRAY(SELECT datname + FROM pg_catalog.pg_database + WHERE datname IN ({}));"#, + dbs.join(", ") + ) + }; + + // ALL PRIVILEGES grants CREATE, CONNECT, and TEMPORARY on all databases + // (see https://www.postgresql.org/docs/current/ddl-priv.html) + let query = format!( + r#" + DO $$ + DECLARE + r text; + {} + {} + BEGIN + IF NOT EXISTS ( + SELECT FROM pg_catalog.pg_roles WHERE rolname = 'neon_superuser') + THEN + CREATE ROLE neon_superuser CREATEDB CREATEROLE NOLOGIN IN ROLE pg_read_all_data, pg_write_all_data; + IF array_length(roles, 1) IS NOT NULL THEN + EXECUTE format('GRANT neon_superuser TO %s', + array_to_string(roles, ', ')); + FOREACH r IN ARRAY roles LOOP + EXECUTE format('ALTER ROLE %s CREATEROLE CREATEDB', r); + END LOOP; + END IF; + IF array_length(dbs, 1) IS NOT NULL THEN + EXECUTE format('GRANT ALL PRIVILEGES ON DATABASE %s TO neon_superuser', + array_to_string(dbs, ', ')); + END IF; + END IF; + END + $$;"#, + roles_decl, database_decl, + ); + info!("Neon superuser created:\n{}", &query); + client + .simple_query(&query) + .map_err(|e| anyhow::anyhow!(e).context(query))?; + Ok(()) +} + impl ComputeNode { pub fn set_status(&self, status: ComputeStatus) { let mut state = self.state.lock().unwrap(); @@ -347,6 +425,8 @@ impl ComputeNode { .map_err(|_| anyhow::anyhow!("invalid connstr"))?; let mut client = Client::connect(zenith_admin_connstr.as_str(), NoTls)?; + // Disable forwarding so that users don't get a cloud_admin role + client.simple_query("SET neon.forward_ddl = false")?; client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?; client.simple_query("GRANT zenith_admin TO cloud_admin")?; drop(client); @@ -357,14 +437,16 @@ impl ComputeNode { Ok(client) => client, }; - // Proceed with post-startup configuration. Note, that order of operations is important. // Disable DDL forwarding because control plane already knows about these roles/databases. client.simple_query("SET neon.forward_ddl = false")?; + + // Proceed with post-startup configuration. Note, that order of operations is important. let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec; + create_neon_superuser(spec, &mut client)?; handle_roles(spec, &mut client)?; handle_databases(spec, &mut client)?; handle_role_deletions(spec, self.connstr.as_str(), &mut client)?; - handle_grants(spec, self.connstr.as_str(), &mut client)?; + handle_grants(spec, self.connstr.as_str())?; handle_extensions(spec, &mut client)?; // 'Close' connection @@ -402,7 +484,7 @@ impl ComputeNode { handle_roles(&spec, &mut client)?; handle_databases(&spec, &mut client)?; handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?; - handle_grants(&spec, self.connstr.as_str(), &mut client)?; + handle_grants(&spec, self.connstr.as_str())?; handle_extensions(&spec, &mut client)?; } diff --git a/compute_tools/src/pg_helpers.rs b/compute_tools/src/pg_helpers.rs index d5c845e9ea..b76ed1fd85 100644 --- a/compute_tools/src/pg_helpers.rs +++ b/compute_tools/src/pg_helpers.rs @@ -17,7 +17,7 @@ use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role}; const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds /// Escape a string for including it in a SQL literal -fn escape_literal(s: &str) -> String { +pub fn escape_literal(s: &str) -> String { s.replace('\'', "''").replace('\\', "\\\\") } diff --git a/compute_tools/src/spec.rs b/compute_tools/src/spec.rs index a2a19ae0da..520696da00 100644 --- a/compute_tools/src/spec.rs +++ b/compute_tools/src/spec.rs @@ -269,17 +269,13 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> { xact.execute(query.as_str(), &[])?; } RoleAction::Create => { - let mut query: String = format!("CREATE ROLE {} ", name.pg_quote()); + let mut query: String = format!( + "CREATE ROLE {} CREATEROLE CREATEDB IN ROLE neon_superuser", + name.pg_quote() + ); info!("role create query: '{}'", &query); query.push_str(&role.to_pg_options()); xact.execute(query.as_str(), &[])?; - - let grant_query = format!( - "GRANT pg_read_all_data, pg_write_all_data TO {}", - name.pg_quote() - ); - xact.execute(grant_query.as_str(), &[])?; - info!("role grant query: '{}'", &grant_query); } } @@ -476,6 +472,11 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> { query.push_str(&db.to_pg_options()); let _guard = info_span!("executing", query).entered(); client.execute(query.as_str(), &[])?; + let grant_query: String = format!( + "GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser", + name.pg_quote() + ); + client.execute(grant_query.as_str(), &[])?; } }; @@ -495,35 +496,9 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> { /// Grant CREATE ON DATABASE to the database owner and do some other alters and grants /// to allow users creating trusted extensions and re-creating `public` schema, for example. #[instrument(skip_all)] -pub fn handle_grants(spec: &ComputeSpec, connstr: &str, client: &mut Client) -> Result<()> { +pub fn handle_grants(spec: &ComputeSpec, connstr: &str) -> Result<()> { info!("cluster spec grants:"); - // We now have a separate `web_access` role to connect to the database - // via the web interface and proxy link auth. And also we grant a - // read / write all data privilege to every role. So also grant - // create to everyone. - // XXX: later we should stop messing with Postgres ACL in such horrible - // ways. - let roles = spec - .cluster - .roles - .iter() - .map(|r| r.name.pg_quote()) - .collect::>(); - - for db in &spec.cluster.databases { - let dbname = &db.name; - - let query: String = format!( - "GRANT CREATE ON DATABASE {} TO {}", - dbname.pg_quote(), - roles.join(", ") - ); - info!("grant query {}", &query); - - client.execute(query.as_str(), &[])?; - } - // Do some per-database access adjustments. We'd better do this at db creation time, // but CREATE DATABASE isn't transactional. So we cannot create db + do some grants // atomically. diff --git a/test_runner/regress/test_compatibility.py b/test_runner/regress/test_compatibility.py index 61f86dc3ce..51e7b01eba 100644 --- a/test_runner/regress/test_compatibility.py +++ b/test_runner/regress/test_compatibility.py @@ -2,6 +2,7 @@ import copy import os import shutil import subprocess +import tempfile from pathlib import Path from typing import Any, Optional @@ -448,7 +449,7 @@ def dump_differs(first: Path, second: Path, output: Path) -> bool: """ with output.open("w") as stdout: - rv = subprocess.run( + res = subprocess.run( [ "diff", "--unified", # Make diff output more readable @@ -460,4 +461,53 @@ def dump_differs(first: Path, second: Path, output: Path) -> bool: stdout=stdout, ) - return rv.returncode != 0 + differs = res.returncode != 0 + + # TODO: Remove after https://github.com/neondatabase/neon/pull/4425 is merged, and a couple of releases are made + if differs: + with tempfile.NamedTemporaryFile(mode="w") as tmp: + tmp.write(PR4425_ALLOWED_DIFF) + tmp.flush() + + allowed = subprocess.run( + [ + "diff", + "--unified", # Make diff output more readable + r"--ignore-matching-lines=^---", # Ignore diff headers + r"--ignore-matching-lines=^\+\+\+", # Ignore diff headers + "--ignore-matching-lines=^@@", # Ignore diff blocks location + "--ignore-matching-lines=^ *$", # Ignore lines with only spaces + "--ignore-matching-lines=^ --.*", # Ignore the " --" lines for compatibility with PG14 + "--ignore-blank-lines", + str(output), + str(tmp.name), + ], + ) + + differs = allowed.returncode != 0 + + return differs + + +PR4425_ALLOWED_DIFF = """ +--- /tmp/test_output/test_backward_compatibility[release-pg15]/compatibility_snapshot/dump.sql 2023-06-08 18:12:45.000000000 +0000 ++++ /tmp/test_output/test_backward_compatibility[release-pg15]/dump.sql 2023-06-13 07:25:35.211733653 +0000 +@@ -13,12 +13,20 @@ + + CREATE ROLE cloud_admin; + ALTER ROLE cloud_admin WITH SUPERUSER INHERIT CREATEROLE CREATEDB LOGIN REPLICATION BYPASSRLS; ++CREATE ROLE neon_superuser; ++ALTER ROLE neon_superuser WITH NOSUPERUSER INHERIT CREATEROLE CREATEDB NOLOGIN NOREPLICATION NOBYPASSRLS; + + -- + -- User Configurations + -- + + ++-- ++-- Role memberships ++-- ++ ++GRANT pg_read_all_data TO neon_superuser GRANTED BY cloud_admin; ++GRANT pg_write_all_data TO neon_superuser GRANTED BY cloud_admin; +""" diff --git a/test_runner/regress/test_hot_standby.py b/test_runner/regress/test_hot_standby.py index 12e034cea2..6b003ce356 100644 --- a/test_runner/regress/test_hot_standby.py +++ b/test_runner/regress/test_hot_standby.py @@ -1,3 +1,5 @@ +import time + import pytest from fixtures.neon_fixtures import NeonEnv @@ -10,9 +12,10 @@ def test_hot_standby(neon_simple_env: NeonEnv): branch_name="main", endpoint_id="primary", ) as primary: + time.sleep(1) with env.endpoints.new_replica_start(origin=primary, endpoint_id="secondary") as secondary: primary_lsn = None - cought_up = False + caught_up = False queries = [ "SHOW neon.timeline_id", "SHOW neon.tenant_id", @@ -56,7 +59,7 @@ def test_hot_standby(neon_simple_env: NeonEnv): res = s_cur.fetchone() assert res is not None - while not cought_up: + while not caught_up: with s_con.cursor() as secondary_cursor: secondary_cursor.execute("SELECT pg_last_wal_replay_lsn()") res = secondary_cursor.fetchone() @@ -66,7 +69,7 @@ def test_hot_standby(neon_simple_env: NeonEnv): # due to e.g. autovacuum, but that shouldn't impact the content # of the tables, so we check whether we've replayed up to at # least after the commit of the `test` table. - cought_up = secondary_lsn >= primary_lsn + caught_up = secondary_lsn >= primary_lsn # Explicit commit to flush any transient transaction-level state. s_con.commit() diff --git a/test_runner/regress/test_tenant_size.py b/test_runner/regress/test_tenant_size.py index a0f9f854ed..0ebe606066 100644 --- a/test_runner/regress/test_tenant_size.py +++ b/test_runner/regress/test_tenant_size.py @@ -44,12 +44,16 @@ def test_empty_tenant_size(neon_simple_env: NeonEnv, test_output_dir: Path): # we've disabled the autovacuum and checkpoint # so background processes should not change the size. # If this test will flake we should probably loosen the check - assert size == initial_size, "starting idle compute should not change the tenant size" + assert ( + size == initial_size + ), f"starting idle compute should not change the tenant size (Currently {size}, expected {initial_size})" # the size should be the same, until we increase the size over the # gc_horizon size, inputs = http_client.tenant_size_and_modelinputs(tenant_id) - assert size == initial_size, "tenant_size should not be affected by shutdown of compute" + assert ( + size == initial_size + ), f"tenant_size should not be affected by shutdown of compute (Currently {size}, expected {initial_size})" expected_inputs = { "segments": [ @@ -333,13 +337,13 @@ def test_single_branch_get_tenant_size_grows( # inserts is larger than gc_horizon. for example 0x20000 here hid the fact # that there next_gc_cutoff could be smaller than initdb_lsn, which will # obviously lead to issues when calculating the size. - gc_horizon = 0x38000 + gc_horizon = 0x3BA00 # it's a bit of a hack, but different versions of postgres have different # amount of WAL generated for the same amount of data. so we need to # adjust the gc_horizon accordingly. if pg_version == PgVersion.V14: - gc_horizon = 0x40000 + gc_horizon = 0x4A000 neon_env_builder.pageserver_config_override = f"tenant_config={{compaction_period='0s', gc_period='0s', pitr_interval='0sec', gc_horizon={gc_horizon}}}" @@ -360,11 +364,11 @@ def test_single_branch_get_tenant_size_grows( if current_lsn - initdb_lsn >= gc_horizon: assert ( size >= prev_size - ), "tenant_size may grow or not grow, because we only add gc_horizon amount of WAL to initial snapshot size" + ), f"tenant_size may grow or not grow, because we only add gc_horizon amount of WAL to initial snapshot size (Currently at: {current_lsn}, Init at: {initdb_lsn})" else: assert ( size > prev_size - ), "tenant_size should grow, because we continue to add WAL to initial snapshot size" + ), f"tenant_size should grow, because we continue to add WAL to initial snapshot size (Currently at: {current_lsn}, Init at: {initdb_lsn})" def get_current_consistent_size( env: NeonEnv,