fix(compute_ctl): Resolve issues with dropping roles having dangling permissions (#10299)

## Problem

In Postgres, one cannot drop a role if it has any dependent objects in
the DB. In `compute_ctl`, we automatically reassign all dependent
objects in every DB to the corresponding DB owner. Yet, it seems that it
doesn't help with some implicit permissions. The issue is reproduced by
installing a `postgis` extension because it creates some views and
tables in the public schema.

## Summary of changes

Added a repro test without using a `postgis`: i) create a role via
`compute_ctl` (with `neon_superuser` grant); ii) create a test role, a
table in schema public, and grant permissions via the role in
`neon_superuser`.

To fix the issue, I added a new `compute_ctl` code that removes such
dangling permissions before dropping the role. It's done in the least
invasive way, i.e., only touches the schema public, because i) that's
the problem we had with PostGIS; ii) it creates a smaller chance of
messing anything up and getting a stuck operation again, just for a
different reason.

Properly, any API-based catalog operations should fail gracefully and
provide an actionable error and status code to the control plane,
allowing the latter to unwind the operation and propagate an error
message and hint to the user. In this sense, it's aligned with another
feature request https://github.com/neondatabase/cloud/issues/21611

Resolve neondatabase/cloud#13582
This commit is contained in:
Alexey Kondratov
2025-01-09 17:39:53 +01:00
committed by GitHub
parent bebc46e713
commit f37eeb56ad
3 changed files with 156 additions and 1 deletions

View File

@@ -75,7 +75,7 @@ pub struct MutableApplyContext {
pub dbs: HashMap<String, Database>,
}
/// Appply the operations that belong to the given spec apply phase.
/// Apply the operations that belong to the given spec apply phase.
///
/// Commands within a single phase are executed in order of Iterator yield.
/// Commands of ApplySpecPhase::RunInEachDatabase will execute in the database
@@ -498,7 +498,19 @@ async fn get_operations<'a>(
),
comment: None,
},
// Revoke some potentially blocking privileges (Neon-specific currently)
Operation {
query: format!(
include_str!("sql/pre_drop_role_revoke_privileges.sql"),
role_name = quoted,
),
comment: None,
},
// This now will only drop privileges of the role
// TODO: this is obviously not 100% true because of the above case,
// there could be still some privileges that are not revoked. Maybe this
// only drops privileges that were granted *by this* role, not *to this* role,
// but this has to be checked.
Operation {
query: format!("DROP OWNED BY {}", quoted),
comment: None,

View File

@@ -0,0 +1,28 @@
SET SESSION ROLE neon_superuser;
DO $$
DECLARE
schema TEXT;
revoke_query TEXT;
BEGIN
FOR schema IN
SELECT schema_name
FROM information_schema.schemata
-- So far, we only had issues with 'public' schema. Probably, because we do some additional grants,
-- e.g., make DB owner the owner of 'public' schema automatically (when created via API).
-- See https://github.com/neondatabase/cloud/issues/13582 for the context.
-- Still, keep the loop because i) it efficiently handles the case when there is no 'public' schema,
-- ii) it's easy to add more schemas to the list if needed.
WHERE schema_name IN ('public')
LOOP
revoke_query := format(
'REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA %I FROM {role_name} GRANTED BY neon_superuser;',
schema
);
EXECUTE revoke_query;
END LOOP;
END;
$$;
RESET ROLE;

View File

@@ -250,3 +250,118 @@ def test_dropdb_with_subscription(neon_simple_env: NeonEnv):
assert curr_db is not None
assert len(curr_db) == 1
assert curr_db[0] == "publisher_db"
def test_compute_drop_role(neon_simple_env: NeonEnv):
"""
Test that compute_ctl can drop a role even if it has some depending objects
like permissions in one of the databases.
Reproduction test for https://github.com/neondatabase/cloud/issues/13582
"""
env = neon_simple_env
TEST_DB_NAME = "db_with_permissions"
endpoint = env.endpoints.create_start("main")
endpoint.respec_deep(
**{
"skip_pg_catalog_updates": False,
"cluster": {
"roles": [
{
# We need to create role via compute_ctl, because in this case it will receive
# additional grants equivalent to our real environment, so we can repro some
# issues.
"name": "neon",
# Some autocomplete-suggested hash, no specific meaning.
"encrypted_password": "SCRAM-SHA-256$4096:hBT22QjqpydQWqEulorfXA==$miBogcoj68JWYdsNB5PW1X6PjSLBEcNuctuhtGkb4PY=:hxk2gxkwxGo6P7GCtfpMlhA9zwHvPMsCz+NQf2HfvWk=",
"options": [],
},
],
"databases": [
{
"name": TEST_DB_NAME,
"owner": "neon",
},
],
},
}
)
endpoint.reconfigure()
with endpoint.cursor(dbname=TEST_DB_NAME) as cursor:
# Create table and view as `cloud_admin`. This is the case when, for example,
# PostGIS extensions creates tables in `public` schema.
cursor.execute("create table test_table (id int)")
cursor.execute("create view test_view as select * from test_table")
with endpoint.cursor(dbname=TEST_DB_NAME, user="neon") as cursor:
cursor.execute("create role readonly")
# We (`compute_ctl`) make 'neon' the owner of schema 'public' in the owned database.
# Postgres has all sorts of permissions and grants that we may not handle well,
# but this is the shortest repro grant for the issue
# https://github.com/neondatabase/cloud/issues/13582
cursor.execute("grant select on all tables in schema public to readonly")
# Check that role was created
with endpoint.cursor() as cursor:
cursor.execute("SELECT rolname FROM pg_roles WHERE rolname = 'readonly'")
role = cursor.fetchone()
assert role is not None
# Confirm that we actually have some permissions for 'readonly' role
# that may block our ability to drop the role.
with endpoint.cursor(dbname=TEST_DB_NAME) as cursor:
cursor.execute(
"select grantor from information_schema.role_table_grants where grantee = 'readonly'"
)
res = cursor.fetchall()
assert len(res) == 2, f"Expected 2 table grants, got {len(res)}"
for row in res:
assert row[0] == "neon_superuser"
# Drop role via compute_ctl
endpoint.respec_deep(
**{
"skip_pg_catalog_updates": False,
"delta_operations": [
{
"action": "delete_role",
"name": "readonly",
},
],
}
)
endpoint.reconfigure()
# Check that role is dropped
with endpoint.cursor() as cursor:
cursor.execute("SELECT rolname FROM pg_roles WHERE rolname = 'readonly'")
role = cursor.fetchone()
assert role is None
#
# Drop schema 'public' and check that we can still drop the role
#
with endpoint.cursor(dbname=TEST_DB_NAME) as cursor:
cursor.execute("create role readonly2")
cursor.execute("grant select on all tables in schema public to readonly2")
cursor.execute("drop schema public cascade")
endpoint.respec_deep(
**{
"skip_pg_catalog_updates": False,
"delta_operations": [
{
"action": "delete_role",
"name": "readonly2",
},
],
}
)
endpoint.reconfigure()
with endpoint.cursor() as cursor:
cursor.execute("SELECT rolname FROM pg_roles WHERE rolname = 'readonly2'")
role = cursor.fetchone()
assert role is None