Update docs/rfcs/017-user-management.md

Co-authored-by: Anna Stepanyan <stepa6ka@gmail.com>
User management RFC
2026-02-02 18:20:37 +00:00 · 2022-07-15 13:30:23 +03:00 · 2022-07-15 13:19:02 +03:00 · 2022-07-14 23:08:57 +03:00 · 2022-07-14 22:13:51 +03:00 · 2022-07-14 22:13:51 +03:00
11 changed files with 547 additions and 421 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -0,0 +1,13 @@
+# The binaries are really slow, if you compile them in 'dev' mode with the defaults.
+# Enable some optimizations even in 'dev' mode, to make tests faster. The basic
+# optimizations enabled by "opt-level=1" don't affect debuggability too much.
+#
+# See https://www.reddit.com/r/rust/comments/gvrgca/this_is_a_neat_trick_for_getting_good_runtime/
+#
+[profile.dev.package."*"]
+# Set the default for dependencies in Development mode.
+opt-level = 3
+
+[profile.dev]
+# Turn on a small amount of optimization in Development mode.
+opt-level = 1
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -11,8 +11,9 @@ defaults:
    shell: bash -ex {0}

 concurrency:
-   group: ${{ github.workflow }}-${{ github.ref }}
-   cancel-in-progress: true
+  # Allow only one workflow per any non-`main` branch.
+  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
+  cancel-in-progress: true

 env:
  RUST_BACKTRACE: 1
@@ -170,7 +171,10 @@ jobs:
            for bin in $test_exe_paths; do
              SRC=$bin
              DST=/tmp/neon/test_bin/$(basename $bin)
-              cp "$SRC" "$DST"
+
+              # We don't need debug symbols for code coverage, so strip them out to make
+              # the artifact smaller.
+              strip "$SRC" -o "$DST"
              echo "$DST" >> /tmp/coverage/binaries.list
            done
          fi
@@ -441,14 +445,14 @@ jobs:
          fi
        id: legacy-build-tag

-      - name: Build compute-tools Docker image
+      - name: Build neon Docker image
        uses: docker/build-push-action@v2
        with:
          context: .
          build-args: |
-            GIT_VERSION="${GITHUB_SHA}"
-            AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
-            AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
+            GIT_VERSION="${{github.sha}}"
+            AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
+            AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
          pull: true
          push: true
          tags: neondatabase/neon:${{steps.legacy-build-tag.outputs.tag}}, neondatabase/neon:${{steps.build-tag.outputs.tag}}
@@ -508,8 +512,9 @@ jobs:
        with:
          context: .
          build-args: |
-            AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
-            AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
+            GIT_VERSION="${{github.sha}}"
+            AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
+            AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
          push: false
          file: Dockerfile.compute-tools
          tags: neondatabase/compute-tools:local
@@ -519,8 +524,9 @@ jobs:
        with:
          context: .
          build-args: |
-            AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
-            AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
+            GIT_VERSION="${{github.sha}}"
+            AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
+            AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
          push: true
          file: Dockerfile.compute-tools
          tags: neondatabase/compute-tools:${{steps.legacy-build-tag.outputs.tag}}
@@ -558,7 +564,11 @@ jobs:

  deploy:
    runs-on: [ self-hosted, Linux, k8s-runner ]
-    needs: [ docker-image, calculate-deploy-targets ]
+    # We need both storage **and** compute images for deploy, because control plane
+    # picks the compute version based on the storage version. If it notices a fresh
+    # storage it may bump the compute version. And if compute image failed to build
+    # it may break things badly.
+    needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
    if: |
      (github.ref_name == 'main' || github.ref_name == 'release') &&
      github.event_name != 'workflow_dispatch'
@@ -601,7 +611,9 @@ jobs:

  deploy-proxy:
    runs-on: [ self-hosted, Linux, k8s-runner ]
-    needs: [ docker-image, calculate-deploy-targets ]
+    # Compute image isn't strictly required for proxy deploy, but let's still wait for it
+    # to run all deploy jobs consistently.
+    needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
    if: |
      (github.ref_name == 'main' || github.ref_name == 'release') &&
      github.event_name != 'workflow_dispatch'
--- a/.github/workflows/codestyle.yml
+++ b/.github/workflows/codestyle.yml
@@ -11,8 +11,9 @@ defaults:
    shell: bash -ex {0}

 concurrency:
-   group: ${{ github.workflow }}-${{ github.ref }}
-   cancel-in-progress: true
+  # Allow only one workflow per any non-`main` branch.
+  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
+  cancel-in-progress: true

 env:
  RUST_BACKTRACE: 1
--- a/.github/workflows/pg_clients.yml
+++ b/.github/workflows/pg_clients.yml
@@ -13,8 +13,9 @@ on:
  workflow_dispatch:

 concurrency:
-   group: ${{ github.workflow }}-${{ github.ref }}
-   cancel-in-progress: true
+  # Allow only one workflow per any non-`main` branch.
+  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
+  cancel-in-progress: true

 jobs:
  test-postgres-client-libs:
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/README.md
+++ b/README.md
@@ -62,6 +62,13 @@ brew install protobuf etcd openssl
 curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
 ```

+3. Install PostgreSQL Client
+```
+# from https://stackoverflow.com/questions/44654216/correct-way-to-install-psql-without-full-postgres-on-macos
+brew install libpq
+brew link --force libpq
+```
+
 #### Building on Linux and OSX

 1. Build neon and patched postgres
--- a/docs/rfcs/017-user-management.md
+++ b/docs/rfcs/017-user-management.md
@@ -0,0 +1,80 @@
+# Postgres user and database management
+
+We've accumulated a bunch of problems with our approach to role and database management, namely:
+
+1. we don't allow role and database creation from Postgres, and users are complaining about that
+2. fine-grained role management is not possible both from Postgres and console
+3. web_access and @user are different roles, which creates object access problems in some cases
+
+Right now, we do store users and databases both in console and Postgres, and there are two main reasons for
+that:
+
+* we want to be able to authenticate users in proxy against the console without Postgres involvement. Otherwise,
+malicious brute force attempts will wake up Postgres (expensive) and may exhaust the Postgres connection pool (deny of service).
+* it is handy when we can render console UI without waking up compute (e.g., show database list)
+
+Storing the same information in two systems is a form of replication. And in the current scheme
+the console is primary, and Postgres catalog is a replica.
+
+This RFC proposes to address problems 1. and 2. by making Postgres a source of truth for roles/databases and
+only caching this info in the console. So using the replication analogy, now the Postgres catalog will be primary, and
+the console will be a replica. Problem 3 is a bit different and could be addressed by ditching the web_access
+user and using, e.g., JWT auth for the @username user so that we do not introduce a new user (JWT is needed
+since we don't know users password).
+
+This RFC doesn't talk about giving root access to the database, which is blocked by a secure runtime setup.
+
+## Overview
+
+* Add `/tenant/$tenant/branch/$branch/refresh_catalog` endpoint to console management API which asks `/get_catalog` and updates cached roles/databases info.
+* Whenever user edits list of databases or users postgres signals `compute_ctl` to call `/<...>/refresh_catalog` in the console
+* Add password strenght check in our extension
+
+## Postgres behavior
+
+Default user role (@username) should have `CREATE ROLE`, `CREATE DB`, and `BYPASSRLS` privileges. We expose Postgres port
+to the open internet, so we need to check passwords strength. We can use the `passwordcheck` extension or do the same
+from our extension.
+
+Whenever a user edits a list of databases or users, Postgres sends SIGHUP to `compute_ctl`. `compute_ctl` should write PID to `compute_ctl.pid` file.
+
+
+## Compute_ctl behavior
+
+Upon `SIGHUP` signal `compute_ctl` should call `/tenant/$tenant/branch/$branch/refresh_catalog` to inform console about changes in the database. The console will circle back and load the data from `/get_catalog` on compute (see next section on why this approach instead of direct PUT/PATH to the console). In the case of `/refresh_catalog` failure, we should retry it N times.
+
+Also `compute_ctl` listens for http `/get_catalog` and returns list of databases and users upon request:
+```
+/get_catalog: -> {
+    databases: [{
+        name: "db1",
+        owner: "jack"
+    }],
+    roles: [{
+        name: "jack",
+        rolepassword: "SCRAM-SHA-256..."
+    }]
+}
+```
+
+## Console behavior
+
+Whenever the console receives `/refresh_catalog` on the management API it goes to compute and asks for `/get_catalog`. I suggest using this way instead of accepting a list of databases/roles directly to the console endpoint for the following reasons:
+
+* we, anyway, will need console originated call to compute's `/get_catalog` after historical branch creation
+* If an intruder gains access to some other `/tenant/$tenant/.../refresh_catalog` he won't be able to change the roles list and will just force an unnecessary reload.
+
+`/refresh_catalog` returns HTTP 200 OK on success.
+
+We should have a button in the admin UI to manually force `/refresh_catalog` in case of data desync.
+
+# Scalability
+
+On my laptop, I can create 4200 roles per second. That corresponds to 363 million roles per day. So both `/get_catalog` can become expensive, and our roles database can snowball. While we can address `/get_catalog` size by catching only the latest changes (e.g., maintain the audit table and drain it by the console), it is still not nice that a single tenant can blow up a multi-tenant console database. I would instead propose to limit the number of databases and roles by some big number like 1000 and bump this limit if somebody asks for it with a legit use case. 
+
+
+# QA:
+
+- Why implement `/get_catalog` instead of sending an SQL query from the console to the compute?
+
+- So far, we do not allow remote superuser access to Postgres, and exposing only endpoints with fixed queries beneath them reduces the attack surface.
--- a/pageserver/src/storage_sync/download.rs
+++ b/pageserver/src/storage_sync/download.rs
@@ -122,9 +122,7 @@ where
    download_index_parts(conf, storage, sync_ids)
        .await
        .remove(&tenant_id)
-        .ok_or(anyhow::anyhow!(
-            "Missing tenant index parts. This is a bug."
-        ))
+        .ok_or_else(|| anyhow::anyhow!("Missing tenant index parts. This is a bug."))
 }

 /// Retrieves index data from the remote storage for a given timeline.
--- a/safekeeper/src/broker.rs
+++ b/safekeeper/src/broker.rs
@@ -83,7 +83,9 @@ impl ElectionLeader {
    ) -> Result<bool> {
        let resp = self.client.leader(election_name).await?;

-        let kv = resp.kv().ok_or(anyhow!("failed to get leader response"))?;
+        let kv = resp
+            .kv()
+            .ok_or_else(|| anyhow!("failed to get leader response"))?;
        let leader = kv.value_str()?;

        Ok(leader == candidate_name)
--- a/test_runner/batch_others/test_wal_acceptor_async.py
+++ b/test_runner/batch_others/test_wal_acceptor_async.py
@@ -302,6 +302,8 @@ def test_compute_restarts(neon_env_builder: NeonEnvBuilder):


 class BackgroundCompute(object):
+    MAX_QUERY_GAP_SECONDS = 2
+
    def __init__(self, index: int, env: NeonEnv, branch: str):
        self.index = index
        self.env = env
@@ -339,7 +341,7 @@ class BackgroundCompute(object):

            # With less sleep, there is a very big chance of not committing
            # anything or only 1 xact during test run.
-            await asyncio.sleep(2 * random.random())
+            await asyncio.sleep(random.uniform(0, self.MAX_QUERY_GAP_SECONDS))
        self.running = False


@@ -356,20 +358,34 @@ async def run_concurrent_computes(env: NeonEnv,
    background_tasks = [asyncio.create_task(compute.run()) for compute in computes]

    await asyncio.sleep(run_seconds)
+    log.info("stopping all tasks but one")
    for compute in computes[1:]:
        compute.stopped = True
+    await asyncio.gather(*background_tasks[1:])
    log.info("stopped all tasks but one")

    # work for some time with only one compute -- it should be able to make some xacts
-    await asyncio.sleep(8)
+    TIMEOUT_SECONDS = computes[0].MAX_QUERY_GAP_SECONDS + 3
+    initial_queries_by_0 = len(computes[0].successful_queries)
+    log.info(f'Waiting for another query by computes[0], '
+             f'it already had {initial_queries_by_0}, timeout is {TIMEOUT_SECONDS}s')
+    for _ in range(10 * TIMEOUT_SECONDS):
+        current_queries_by_0 = len(computes[0].successful_queries) - initial_queries_by_0
+        if current_queries_by_0 >= 1:
+            log.info(f'Found {current_queries_by_0} successful queries '
+                     f'by computes[0], completing the test')
+            break
+        await asyncio.sleep(0.1)
+    else:
+        assert False, "Timed out while waiting for another query by computes[0]"
    computes[0].stopped = True

-    await asyncio.gather(*background_tasks)
+    await asyncio.gather(background_tasks[0])

    result = await exec_compute_query(env, branch, 'SELECT * FROM query_log')
    # we should have inserted something while single compute was running
-    assert len(result) >= 4
-    log.info(f'Executed {len(result)} queries')
+    log.info(f'Executed {len(result)} queries, {current_queries_by_0} of them '
+             f'by computes[0] after we started stopping the others')
    for row in result:
        log.info(f'{row[0]} {row[1]} {row[2]}')

--- a/workspace_hack/Cargo.toml
+++ b/workspace_hack/Cargo.toml
@@ -33,7 +33,9 @@ itoa = { version = "0.4", features = ["i128", "std"] }
 libc = { version = "0.2", features = ["extra_traits", "std"] }
 log = { version = "0.4", default-features = false, features = ["serde", "std"] }
 memchr = { version = "2", features = ["std", "use_std"] }
-num-integer = { version = "0.1", default-features = false, features = ["i128"] }
+nom = { version = "7", features = ["alloc", "std"] }
+num-bigint = { version = "0.4", features = ["std"] }
+num-integer = { version = "0.1", default-features = false, features = ["i128", "std"] }
 num-traits = { version = "0.2", features = ["i128", "std"] }
 prost = { version = "0.10", features = ["prost-derive", "std"] }
 rand = { version = "0.8", features = ["alloc", "getrandom", "libc", "rand_chacha", "rand_hc", "small_rng", "std", "std_rng"] }
@@ -41,10 +43,11 @@ regex = { version = "1", features = ["aho-corasick", "memchr", "perf", "perf-cac
 regex-syntax = { version = "0.6", features = ["unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] }
 scopeguard = { version = "1", features = ["use_std"] }
 serde = { version = "1", features = ["alloc", "derive", "serde_derive", "std"] }
-tokio = { version = "1", features = ["bytes", "fs", "io-std", "io-util", "libc", "macros", "memchr", "mio", "net", "num_cpus", "once_cell", "process", "rt", "rt-multi-thread", "signal-hook-registry", "socket2", "sync", "time", "tokio-macros"] }
+time = { version = "0.3", features = ["alloc", "formatting", "itoa", "macros", "parsing", "quickcheck", "quickcheck-dep", "std", "time-macros"] }
+tokio = { version = "1", features = ["bytes", "fs", "io-std", "io-util", "libc", "macros", "memchr", "mio", "net", "num_cpus", "once_cell", "process", "rt", "rt-multi-thread", "signal-hook-registry", "socket2", "sync", "time", "tokio-macros", "winapi"] }
 tokio-util = { version = "0.7", features = ["codec", "io"] }
 tracing = { version = "0.1", features = ["attributes", "log", "std", "tracing-attributes"] }
-tracing-core = { version = "0.1", features = ["lazy_static", "std"] }
+tracing-core = { version = "0.1", features = ["lazy_static", "std", "valuable"] }

 [build-dependencies]
 ahash = { version = "0.7", features = ["std"] }
@@ -57,6 +60,7 @@ indexmap = { version = "1", default-features = false, features = ["std"] }
 libc = { version = "0.2", features = ["extra_traits", "std"] }
 log = { version = "0.4", default-features = false, features = ["serde", "std"] }
 memchr = { version = "2", features = ["std", "use_std"] }
+nom = { version = "7", features = ["alloc", "std"] }
 prost = { version = "0.10", features = ["prost-derive", "std"] }
 regex = { version = "1", features = ["aho-corasick", "memchr", "perf", "perf-cache", "perf-dfa", "perf-inline", "perf-literal", "std", "unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] }
 regex-syntax = { version = "0.6", features = ["unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] }
Author	SHA1	Message	Date
Stas Kelvich	b3a6c4cf8a	Update docs/rfcs/017-user-management.md Co-authored-by: Anna Stepanyan <stepa6ka@gmail.com>	2022-07-15 13:30:23 +03:00
Stas Kelvich	fb8935305a	User management RFC	2022-07-15 13:19:02 +03:00
Heikki Linnakangas	c68336a246	Strip debug symbols from test binaries, to make the artifact smaller. Uploading large artifacts is slow in github actions. To speed that up, make the artifact smaller. The code coverage tool doesn't require debug symbols, so remove them. We've discussed doing the same for all binaries, but it's nice to have debugging symbols for debugging purposes, and so that you get more complete stack traces. The discussion is ongoing, but let's at least do this for the test symbols now.	2022-07-14 23:08:57 +03:00
Heikki Linnakangas	0886aced86	Update dependencies. - Updated dependencies with "cargo update" - Updated workspace_hack with "cargo hakari generate" There's no particular reason to do this now, just a periodic refresh.	2022-07-14 22:13:51 +03:00
Heikki Linnakangas	a342957aee	Use ok_or_else() instead of ok_or(), to silence clippy warnings. "cargo clippy" started to complain about these, after running "cargo update". Not sure why it didn't complain before, but seems reasonable to fix these. (The "cargo update" is not included in this commit)	2022-07-14 22:13:51 +03:00
Heikki Linnakangas	79f5685d00	Enable basic optimizations even in 'dev' builds. Change the build options to enable basic optimizations even in debug mode, and always build dependencies with more optimizations. That makes the debug-mode binaries somewhat faster, without messing up stack traces and line-by-line debugging too much.	2022-07-14 20:46:35 +03:00
Egor Suvorov	c004a6d62f	Do not cancel in-progress checks on the `main` branch See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#concurrency * Previously there was a single concurrency group per each branch. As the `main` branch got pushed into frequently, very few commits got tested to the end. It resulted in "broken" `main` branch as there were no fully successful workflow runs. Now the `main` branch gets a separate concurrency group for each commit. * As GitHub Actions syntax does not have the conditional operator, it is emulated via logical and/or operations. Although undocumented, they return one of their operands instead of plain true/false. * Replace 3-space indentation with 2-space indentation while we are here to be consistent with the rest of the file.	2022-07-14 17:20:00 +03:00
Egor Suvorov	1b6a80a38f	Fix flaky test_concurrent_computes * Wait for all computes (except one) to complete before proceeding with the single compute. * It previously waited for too few seconds. As the test is randomized, it was not failing all the time, but only in specific unlucky cases. E.g. when there were no successfuly queries by concurrent computes, and the single node had big timeouts and spent lots of time making the transaction. See https://github.com/neondatabase/neon/runs/7234456482?check_suite_focus=true (around line 980). * Wait for exactly one extra transaction by the single compute.	2022-07-14 16:23:39 +03:00
Alexey Kondratov	12bac9c12b	Wait for compute image before deploy in GitHub Action We need both storage and compute images for deploy, because control plane picks the compute version based on the storage version. If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly.	2022-07-14 11:27:16 +02:00
Kirill Bulatov	9a7427c203	Fill build-args for Docker builds via GH Actions context	2022-07-14 10:28:15 +03:00