Merge branch 'main' into communicator-rewrite

2026-05-30 03:20:36 +00:00 · 2025-07-17 10:59:37 +02:00
parent 62af2a14e2 f2828bbe19
commit edcdd6ca9c
129 changed files with 5887 additions and 1063 deletions
--- a/test_runner/fixtures/neon_cli.py
+++ b/test_runner/fixtures/neon_cli.py
@@ -503,6 +503,7 @@ class NeonLocalCli(AbstractNeonCli):
        pageserver_id: int | None = None,
        allow_multiple=False,
        update_catalog: bool = False,
+        privileged_role_name: str | None = None,
    ) -> subprocess.CompletedProcess[str]:
        args = [
            "endpoint",
@@ -534,6 +535,8 @@ class NeonLocalCli(AbstractNeonCli):
            args.extend(["--allow-multiple"])
        if update_catalog:
            args.extend(["--update-catalog"])
+        if privileged_role_name is not None:
+            args.extend(["--privileged-role-name", privileged_role_name])

        res = self.raw_cli(args)
        res.check_returncode()
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -728,7 +728,7 @@ class NeonEnvBuilder:
        # NB: neon_local rewrites postgresql.conf on each start based on neon_local config. No need to patch it.
        # However, in this new NeonEnv, the pageservers and safekeepers listen on different ports, and the storage
        # controller will currently reject re-attach requests from them because the NodeMetadata isn't identical.
-        # So, from_repo_dir patches up the the storcon database.
+        # So, from_repo_dir patches up the storcon database.
        patch_script_path = self.repo_dir / "storage_controller_db.startup.sql"
        assert not patch_script_path.exists()
        patch_script = ""
@@ -4324,6 +4324,7 @@ class Endpoint(PgProtocol, LogUtils):
        pageserver_id: int | None = None,
        allow_multiple: bool = False,
        update_catalog: bool = False,
+        privileged_role_name: str | None = None,
    ) -> Self:
        """
        Create a new Postgres endpoint.
@@ -4351,6 +4352,7 @@ class Endpoint(PgProtocol, LogUtils):
            pageserver_id=pageserver_id,
            allow_multiple=allow_multiple,
            update_catalog=update_catalog,
+            privileged_role_name=privileged_role_name,
        )
        path = Path("endpoints") / self.endpoint_id / "pgdata"
        self.pgdata_dir = self.env.repo_dir / path
@@ -4811,6 +4813,7 @@ class EndpointFactory:
        config_lines: list[str] | None = None,
        pageserver_id: int | None = None,
        update_catalog: bool = False,
+        privileged_role_name: str | None = None,
    ) -> Endpoint:
        ep = Endpoint(
            self.env,
@@ -4834,6 +4837,7 @@ class EndpointFactory:
            config_lines=config_lines,
            pageserver_id=pageserver_id,
            update_catalog=update_catalog,
+            privileged_role_name=privileged_role_name,
        )

    def stop_all(self, fail_on_error=True) -> Self:
--- a/test_runner/performance/test_lfc_prewarm.py
+++ b/test_runner/performance/test_lfc_prewarm.py
@@ -60,7 +60,7 @@ def test_compare_prewarmed_pgbench_perf(neon_compare: NeonCompare):


@pytest.mark.remote_cluster
-@pytest.mark.timeout(30 * 60)
+@pytest.mark.timeout(2 * 60 * 60)
 def test_compare_prewarmed_pgbench_perf_benchmark(
    pg_bin: PgBin,
    neon_api: NeonAPI,
@@ -91,8 +91,9 @@ def benchmark_impl(
    test_duration_min = 5
    pgbench_duration = f"-T{test_duration_min * 60}"
    # prewarm API is not publicly exposed. In order to test performance of a
-    # fully prewarmed endpoint, wait after it restarts
-    prewarmed_sleep_secs = 30
+    # fully prewarmed endpoint, wait after it restarts.
+    # The number here is empirical, based on manual runs on staging
+    prewarmed_sleep_secs = 180

    branch_id = project["branch"]["id"]
    project_id = project["project"]["id"]
--- a/test_runner/performance/test_sharding_autosplit.py
+++ b/test_runner/performance/test_sharding_autosplit.py
@@ -73,6 +73,11 @@ def test_sharding_autosplit(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
            ".*Local notification hook failed.*",
            ".*Marking shard.*for notification retry.*",
            ".*Failed to notify compute.*",
+            # As an optimization, the storage controller kicks the downloads on the secondary
+            # after the shard split. However, secondaries are created async, so it's possible
+            # that the intent state was modified, but the actual secondary hasn't been created,
+            # which results in an error.
+            ".*Error calling secondary download after shard split.*",
        ]
    )

--- a/test_runner/regress/test_broken_timeline.py
+++ b/test_runner/regress/test_broken_timeline.py
@@ -24,10 +24,7 @@ def test_local_corruption(neon_env_builder: NeonEnvBuilder):
        [
            ".*get_values_reconstruct_data for layer .*",
            ".*could not find data for key.*",
-            ".*is not active. Current state: Broken.*",
            ".*will not become active. Current state: Broken.*",
-            ".*failed to load metadata.*",
-            ".*load failed.*load local timeline.*",
            ".*: layer load failed, assuming permanent failure:.*",
            ".*failed to get checkpoint bytes.*",
            ".*failed to get control bytes.*",
--- a/test_runner/regress/test_compaction.py
+++ b/test_runner/regress/test_compaction.py
@@ -687,7 +687,7 @@ def test_sharding_compaction(
    for _i in range(0, 10):
        # Each of these does some writes then a checkpoint: because we set image_creation_threshold to 1,
        # these should result in image layers each time we write some data into a shard, and also shards
-        # recieving less data hitting their "empty image layer" path (wherre they should skip writing the layer,
+        # receiving less data hitting their "empty image layer" path (where they should skip writing the layer,
        # rather than asserting)
        workload.churn_rows(64)

--- a/test_runner/regress/test_compute_metrics.py
+++ b/test_runner/regress/test_compute_metrics.py
@@ -217,7 +217,7 @@ if SQL_EXPORTER is None:
            self, logs_dir: Path, config_file: Path, collector_file: Path, port: int
        ) -> None:
            # NOTE: Keep the version the same as in
-            # compute/compute-node.Dockerfile and build-tools.Dockerfile.
+            # compute/compute-node.Dockerfile and build-tools/Dockerfile.
            #
            # The "host" network mode allows sql_exporter to talk to the
            # endpoint which is running on the host.
--- a/test_runner/regress/test_neon_superuser.py
+++ b/test_runner/regress/test_neon_superuser.py
@@ -103,3 +103,90 @@ def test_neon_superuser(neon_simple_env: NeonEnv, pg_version: PgVersion):
        query = "DROP SUBSCRIPTION sub CASCADE"
        log.info(f"Dropping subscription: {query}")
        cur.execute(query)
+
+
+def test_privileged_role_override(neon_simple_env: NeonEnv, pg_version: PgVersion):
+    """
+    Test that we can override the privileged role for an endpoint and when we do it,
+    everything is correctly bootstrapped inside Postgres and we don't have neon_superuser
+    role in the database.
+    """
+    PRIVILEGED_ROLE_NAME = "my_superuser"
+
+    env = neon_simple_env
+    env.create_branch("test_privileged_role_override")
+    ep = env.endpoints.create(
+        "test_privileged_role_override",
+        privileged_role_name=PRIVILEGED_ROLE_NAME,
+        update_catalog=True,
+    )
+
+    ep.start()
+
+    ep.wait_for_migrations()
+
+    member_roles = [
+        "pg_read_all_data",
+        "pg_write_all_data",
+        "pg_monitor",
+        "pg_signal_backend",
+    ]
+
+    non_member_roles = [
+        "pg_execute_server_program",
+        "pg_read_server_files",
+        "pg_write_server_files",
+    ]
+
+    role_attributes = {
+        "rolsuper": False,
+        "rolinherit": True,
+        "rolcreaterole": True,
+        "rolcreatedb": True,
+        "rolcanlogin": False,
+        "rolreplication": True,
+        "rolconnlimit": -1,
+        "rolbypassrls": True,
+    }
+
+    if pg_version >= PgVersion.V15:
+        non_member_roles.append("pg_checkpoint")
+
+    if pg_version >= PgVersion.V16:
+        member_roles.append("pg_create_subscription")
+        non_member_roles.append("pg_use_reserved_connections")
+
+    with ep.cursor() as cur:
+        cur.execute(f"SELECT rolname FROM pg_roles WHERE rolname = '{PRIVILEGED_ROLE_NAME}'")
+        assert cur.fetchall()[0][0] == PRIVILEGED_ROLE_NAME
+
+        cur.execute("SELECT rolname FROM pg_roles WHERE rolname = 'neon_superuser'")
+        assert len(cur.fetchall()) == 0
+
+        cur.execute("SHOW neon.privileged_role_name")
+        assert cur.fetchall()[0][0] == PRIVILEGED_ROLE_NAME
+
+        # check PRIVILEGED_ROLE_NAME role is created
+        cur.execute(f"select * from pg_roles where rolname = '{PRIVILEGED_ROLE_NAME}'")
+        assert cur.fetchone() is not None
+
+        # check PRIVILEGED_ROLE_NAME role has the correct member roles
+        for role in member_roles:
+            cur.execute(f"SELECT pg_has_role('{PRIVILEGED_ROLE_NAME}', '{role}', 'member')")
+            assert cur.fetchone() == (True,), (
+                f"Role {role} should be a member of {PRIVILEGED_ROLE_NAME}"
+            )
+
+        for role in non_member_roles:
+            cur.execute(f"SELECT pg_has_role('{PRIVILEGED_ROLE_NAME}', '{role}', 'member')")
+            assert cur.fetchone() == (False,), (
+                f"Role {role} should not be a member of {PRIVILEGED_ROLE_NAME}"
+            )
+
+        # check PRIVILEGED_ROLE_NAME role has the correct role attributes
+        for attr, val in role_attributes.items():
+            cur.execute(f"SELECT {attr} FROM pg_roles WHERE rolname = '{PRIVILEGED_ROLE_NAME}'")
+            curr_val = cur.fetchone()
+            assert curr_val == (val,), (
+                f"Role attribute {attr} should be {val} instead of {curr_val}"
+            )
--- a/test_runner/regress/test_tenants.py
+++ b/test_runner/regress/test_tenants.py
@@ -76,7 +76,6 @@ def test_tenants_normal_work(neon_env_builder: NeonEnvBuilder):
    neon_env_builder.num_safekeepers = 3

    env = neon_env_builder.init_start()
-    """Tests tenants with and without wal acceptors"""
    tenant_1, _ = env.create_tenant()
    tenant_2, _ = env.create_tenant()

--- a/test_runner/regress/test_wal_acceptor.py
+++ b/test_runner/regress/test_wal_acceptor.py
@@ -2788,7 +2788,8 @@ def test_timeline_disk_usage_limit(neon_env_builder: NeonEnvBuilder):

    # Wait for the error message to appear in the compute log
    def error_logged():
-        return endpoint.log_contains("WAL storage utilization exceeds configured limit") is not None
+        if endpoint.log_contains("WAL storage utilization exceeds configured limit") is None:
+            raise Exception("Expected error message not found in compute log yet")

    wait_until(error_logged)
    log.info("Found expected error message in compute log, resuming.")
@@ -2822,3 +2823,87 @@ def test_timeline_disk_usage_limit(neon_env_builder: NeonEnvBuilder):
            cur.execute("select count(*) from t")
            # 2000 rows from first insert + 1000 from last insert
            assert cur.fetchone() == (3000,)
+
+
+def test_global_disk_usage_limit(neon_env_builder: NeonEnvBuilder):
+    """
+    Similar to `test_timeline_disk_usage_limit`, but test that the global disk usage circuit breaker
+    also works as expected. The test scenario:
+    1. Create a timeline and endpoint.
+    2. Mock high disk usage via failpoint
+    3. Write data to the timeline so that disk usage exceeds the limit.
+    4. Verify that the writes hang and the expected error message appears in the compute log.
+    5. Mock low disk usage via failpoint
+    6. Verify that the hanging writes unblock and we can continue to write as normal.
+    """
+    neon_env_builder.num_safekeepers = 1
+    remote_storage_kind = s3_storage()
+    neon_env_builder.enable_safekeeper_remote_storage(remote_storage_kind)
+
+    env = neon_env_builder.init_start()
+
+    env.create_branch("test_global_disk_usage_limit")
+    endpoint = env.endpoints.create_start("test_global_disk_usage_limit")
+
+    with closing(endpoint.connect()) as conn:
+        with conn.cursor() as cur:
+            cur.execute("create table t2(key int, value text)")
+
+    for sk in env.safekeepers:
+        sk.stop().start(
+            extra_opts=["--global-disk-check-interval=1s", "--max-global-disk-usage-ratio=0.8"]
+        )
+
+    # Set the failpoint to have the disk usage check return u64::MAX, which definitely exceeds the practical
+    # limits in the test environment.
+    for sk in env.safekeepers:
+        sk.http_client().configure_failpoints(
+            [("sk-global-disk-usage", "return(18446744073709551615)")]
+        )
+
+    # Wait until the global disk usage limit watcher trips the circuit breaker.
+    def error_logged_in_sk():
+        for sk in env.safekeepers:
+            if sk.log_contains("Global disk usage exceeded limit") is None:
+                raise Exception("Expected error message not found in safekeeper log yet")
+
+    wait_until(error_logged_in_sk)
+
+    def run_hanging_insert_global():
+        with closing(endpoint.connect()) as bg_conn:
+            with bg_conn.cursor() as bg_cur:
+                # This should generate more than 1KiB of WAL
+                bg_cur.execute("insert into t2 select generate_series(1,2000), 'payload'")
+
+    bg_thread_global = threading.Thread(target=run_hanging_insert_global)
+    bg_thread_global.start()
+
+    def error_logged_in_compute():
+        if endpoint.log_contains("Global disk usage exceeded limit") is None:
+            raise Exception("Expected error message not found in compute log yet")
+
+    wait_until(error_logged_in_compute)
+    log.info("Found the expected error message in compute log, resuming.")
+
+    time.sleep(2)
+    assert bg_thread_global.is_alive(), "Global hanging insert unblocked prematurely!"
+
+    # Make the disk usage check always return 0 through the failpoint to simulate the disk pressure easing.
+    # The SKs should resume accepting WAL writes without restarting.
+    for sk in env.safekeepers:
+        sk.http_client().configure_failpoints([("sk-global-disk-usage", "return(0)")])
+
+    bg_thread_global.join(timeout=120)
+    assert not bg_thread_global.is_alive(), "Hanging global insert did not complete after restart"
+    log.info("Global hanging insert unblocked.")
+
+    # Verify that we can continue to write as normal and we don't have obvious data corruption
+    # following the recovery.
+    with closing(endpoint.connect()) as conn:
+        with conn.cursor() as cur:
+            cur.execute("insert into t2 select generate_series(2001,3000), 'payload'")
+
+    with closing(endpoint.connect()) as conn:
+        with conn.cursor() as cur:
+            cur.execute("select count(*) from t2")
+            assert cur.fetchone() == (3000,)
--- a/test_runner/regress/test_wal_restore.py
+++ b/test_runner/regress/test_wal_restore.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 import sys
 import tarfile
 import tempfile
+from pathlib import Path
 from typing import TYPE_CHECKING

 import pytest
@@ -198,3 +199,115 @@ def test_wal_restore_http(neon_env_builder: NeonEnvBuilder, broken_tenant: bool)
    # the table is back now!
    restored = env.endpoints.create_start("main")
    assert restored.safe_psql("select count(*) from t", user="cloud_admin") == [(300000,)]
+
+
+# BEGIN_HADRON
+# TODO: re-enable once CM python is integreated.
+# def clear_directory(directory):
+#     for item in os.listdir(directory):
+#         item_path = os.path.join(directory, item)
+#         if os.path.isdir(item_path):
+#             log.info(f"removing SK directory: {item_path}")
+#             shutil.rmtree(item_path)
+#         else:
+#             log.info(f"removing SK file: {item_path}")
+#             os.remove(item_path)
+
+
+# def test_sk_pull_timelines(
+#     neon_env_builder: NeonEnvBuilder,
+# ):
+#     DBNAME = "regression"
+#     superuser_name = "databricks_superuser"
+#     neon_env_builder.num_safekeepers = 3
+#     neon_env_builder.num_pageservers = 4
+#     neon_env_builder.safekeeper_extra_opts = ["--enable-pull-timeline-on-startup"]
+#     neon_env_builder.enable_safekeeper_remote_storage(s3_storage())
+
+#     env = neon_env_builder.init_start(initial_tenant_shard_count=4)
+
+#     env.compute_manager.start(base_port=env.compute_manager_port)
+
+#     test_creator = "test_creator"
+#     test_metastore_id = uuid4()
+#     test_account_id = uuid4()
+#     test_workspace_id = 1
+#     test_workspace_url = "http://test_workspace_url"
+#     test_metadata_version = 1
+#     test_metadata = {
+#         "state": "INSTANCE_PROVISIONING",
+#         "admin_rolename": "admin",
+#         "admin_password_scram": "abc123456",
+#     }
+
+#     test_instance_name_1 = "test_instance_1"
+#     test_instance_read_write_compute_pool_1 = {
+#         "instance_name": test_instance_name_1,
+#         "compute_pool_name": "compute_pool_1",
+#         "creator": test_creator,
+#         "capacity": 2.0,
+#         "node_count": 1,
+#         "metadata_version": 0,
+#         "metadata": {
+#             "state": "INSTANCE_PROVISIONING",
+#         },
+#     }
+
+#     test_instance_1_readable_secondaries_enabled = False
+
+#     # Test creation
+#     create_instance_with_retries(
+#         env,
+#         test_instance_name_1,
+#         test_creator,
+#         test_metastore_id,
+#         test_account_id,
+#         test_workspace_id,
+#         test_workspace_url,
+#         test_instance_read_write_compute_pool_1,
+#         test_metadata_version,
+#         test_metadata,
+#         test_instance_1_readable_secondaries_enabled,
+#     )
+#     instance = env.compute_manager.get_instance_by_name(test_instance_name_1, test_workspace_id)
+#     log.info(f"haoyu Instance created: {instance}")
+#     assert instance["instance_name"] == test_instance_name_1
+#     test_instance_id = instance["instance_id"]
+#     instance_detail = env.compute_manager.describe_instance(test_instance_id)
+#     log.info(f"haoyu Instance detail: {instance_detail}")
+
+#     env.initial_tenant = instance_detail[0]["tenant_id"]
+#     env.initial_timeline = instance_detail[0]["timeline_id"]
+
+#     # Connect to postgres and create a database called "regression".
+#     endpoint = env.endpoints.create_start("main")
+#     endpoint.safe_psql(f"CREATE ROLE {superuser_name}")
+#     endpoint.safe_psql(f"CREATE DATABASE {DBNAME}")
+
+#     endpoint.safe_psql("CREATE TABLE usertable ( YCSB_KEY INT, FIELD0 TEXT);")
+#     # Write some data. ~20 MB.
+#     num_rows = 0
+#     for _i in range(0, 20000):
+#         endpoint.safe_psql(
+#             "INSERT INTO usertable SELECT random(), repeat('a', 1000);", log_query=False
+#         )
+#         num_rows += 1
+
+#     log.info(f"SKs {env.storage_controller.hcc_sk_node_list()}")
+
+#     env.safekeepers[0].stop(immediate=True)
+#     clear_directory(env.safekeepers[0].data_dir)
+#     env.safekeepers[0].start()
+
+#     # PG can still write data. ~20 MB.
+#     for _i in range(0, 20000):
+#         endpoint.safe_psql(
+#             "INSERT INTO usertable SELECT random(), repeat('a', 1000);", log_query=False
+#         )
+#         num_rows += 1
+
+#     tuples = endpoint.safe_psql("SELECT COUNT(*) FROM usertable;")
+#     assert tuples[0][0] == num_rows
+#     endpoint.stop_and_destroy()
+
+# END_HADRON