Merge branch 'main' into communicator-rewrite

This commit is contained in:
Erik Grinaker
2025-07-17 10:59:37 +02:00
129 changed files with 5887 additions and 1063 deletions

View File

@@ -503,6 +503,7 @@ class NeonLocalCli(AbstractNeonCli):
pageserver_id: int | None = None,
allow_multiple=False,
update_catalog: bool = False,
privileged_role_name: str | None = None,
) -> subprocess.CompletedProcess[str]:
args = [
"endpoint",
@@ -534,6 +535,8 @@ class NeonLocalCli(AbstractNeonCli):
args.extend(["--allow-multiple"])
if update_catalog:
args.extend(["--update-catalog"])
if privileged_role_name is not None:
args.extend(["--privileged-role-name", privileged_role_name])
res = self.raw_cli(args)
res.check_returncode()

View File

@@ -728,7 +728,7 @@ class NeonEnvBuilder:
# NB: neon_local rewrites postgresql.conf on each start based on neon_local config. No need to patch it.
# However, in this new NeonEnv, the pageservers and safekeepers listen on different ports, and the storage
# controller will currently reject re-attach requests from them because the NodeMetadata isn't identical.
# So, from_repo_dir patches up the the storcon database.
# So, from_repo_dir patches up the storcon database.
patch_script_path = self.repo_dir / "storage_controller_db.startup.sql"
assert not patch_script_path.exists()
patch_script = ""
@@ -4324,6 +4324,7 @@ class Endpoint(PgProtocol, LogUtils):
pageserver_id: int | None = None,
allow_multiple: bool = False,
update_catalog: bool = False,
privileged_role_name: str | None = None,
) -> Self:
"""
Create a new Postgres endpoint.
@@ -4351,6 +4352,7 @@ class Endpoint(PgProtocol, LogUtils):
pageserver_id=pageserver_id,
allow_multiple=allow_multiple,
update_catalog=update_catalog,
privileged_role_name=privileged_role_name,
)
path = Path("endpoints") / self.endpoint_id / "pgdata"
self.pgdata_dir = self.env.repo_dir / path
@@ -4811,6 +4813,7 @@ class EndpointFactory:
config_lines: list[str] | None = None,
pageserver_id: int | None = None,
update_catalog: bool = False,
privileged_role_name: str | None = None,
) -> Endpoint:
ep = Endpoint(
self.env,
@@ -4834,6 +4837,7 @@ class EndpointFactory:
config_lines=config_lines,
pageserver_id=pageserver_id,
update_catalog=update_catalog,
privileged_role_name=privileged_role_name,
)
def stop_all(self, fail_on_error=True) -> Self:

View File

@@ -60,7 +60,7 @@ def test_compare_prewarmed_pgbench_perf(neon_compare: NeonCompare):
@pytest.mark.remote_cluster
@pytest.mark.timeout(30 * 60)
@pytest.mark.timeout(2 * 60 * 60)
def test_compare_prewarmed_pgbench_perf_benchmark(
pg_bin: PgBin,
neon_api: NeonAPI,
@@ -91,8 +91,9 @@ def benchmark_impl(
test_duration_min = 5
pgbench_duration = f"-T{test_duration_min * 60}"
# prewarm API is not publicly exposed. In order to test performance of a
# fully prewarmed endpoint, wait after it restarts
prewarmed_sleep_secs = 30
# fully prewarmed endpoint, wait after it restarts.
# The number here is empirical, based on manual runs on staging
prewarmed_sleep_secs = 180
branch_id = project["branch"]["id"]
project_id = project["project"]["id"]

View File

@@ -73,6 +73,11 @@ def test_sharding_autosplit(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
".*Local notification hook failed.*",
".*Marking shard.*for notification retry.*",
".*Failed to notify compute.*",
# As an optimization, the storage controller kicks the downloads on the secondary
# after the shard split. However, secondaries are created async, so it's possible
# that the intent state was modified, but the actual secondary hasn't been created,
# which results in an error.
".*Error calling secondary download after shard split.*",
]
)

View File

@@ -24,10 +24,7 @@ def test_local_corruption(neon_env_builder: NeonEnvBuilder):
[
".*get_values_reconstruct_data for layer .*",
".*could not find data for key.*",
".*is not active. Current state: Broken.*",
".*will not become active. Current state: Broken.*",
".*failed to load metadata.*",
".*load failed.*load local timeline.*",
".*: layer load failed, assuming permanent failure:.*",
".*failed to get checkpoint bytes.*",
".*failed to get control bytes.*",

View File

@@ -687,7 +687,7 @@ def test_sharding_compaction(
for _i in range(0, 10):
# Each of these does some writes then a checkpoint: because we set image_creation_threshold to 1,
# these should result in image layers each time we write some data into a shard, and also shards
# recieving less data hitting their "empty image layer" path (wherre they should skip writing the layer,
# receiving less data hitting their "empty image layer" path (where they should skip writing the layer,
# rather than asserting)
workload.churn_rows(64)

View File

@@ -217,7 +217,7 @@ if SQL_EXPORTER is None:
self, logs_dir: Path, config_file: Path, collector_file: Path, port: int
) -> None:
# NOTE: Keep the version the same as in
# compute/compute-node.Dockerfile and build-tools.Dockerfile.
# compute/compute-node.Dockerfile and build-tools/Dockerfile.
#
# The "host" network mode allows sql_exporter to talk to the
# endpoint which is running on the host.

View File

@@ -103,3 +103,90 @@ def test_neon_superuser(neon_simple_env: NeonEnv, pg_version: PgVersion):
query = "DROP SUBSCRIPTION sub CASCADE"
log.info(f"Dropping subscription: {query}")
cur.execute(query)
def test_privileged_role_override(neon_simple_env: NeonEnv, pg_version: PgVersion):
"""
Test that we can override the privileged role for an endpoint and when we do it,
everything is correctly bootstrapped inside Postgres and we don't have neon_superuser
role in the database.
"""
PRIVILEGED_ROLE_NAME = "my_superuser"
env = neon_simple_env
env.create_branch("test_privileged_role_override")
ep = env.endpoints.create(
"test_privileged_role_override",
privileged_role_name=PRIVILEGED_ROLE_NAME,
update_catalog=True,
)
ep.start()
ep.wait_for_migrations()
member_roles = [
"pg_read_all_data",
"pg_write_all_data",
"pg_monitor",
"pg_signal_backend",
]
non_member_roles = [
"pg_execute_server_program",
"pg_read_server_files",
"pg_write_server_files",
]
role_attributes = {
"rolsuper": False,
"rolinherit": True,
"rolcreaterole": True,
"rolcreatedb": True,
"rolcanlogin": False,
"rolreplication": True,
"rolconnlimit": -1,
"rolbypassrls": True,
}
if pg_version >= PgVersion.V15:
non_member_roles.append("pg_checkpoint")
if pg_version >= PgVersion.V16:
member_roles.append("pg_create_subscription")
non_member_roles.append("pg_use_reserved_connections")
with ep.cursor() as cur:
cur.execute(f"SELECT rolname FROM pg_roles WHERE rolname = '{PRIVILEGED_ROLE_NAME}'")
assert cur.fetchall()[0][0] == PRIVILEGED_ROLE_NAME
cur.execute("SELECT rolname FROM pg_roles WHERE rolname = 'neon_superuser'")
assert len(cur.fetchall()) == 0
cur.execute("SHOW neon.privileged_role_name")
assert cur.fetchall()[0][0] == PRIVILEGED_ROLE_NAME
# check PRIVILEGED_ROLE_NAME role is created
cur.execute(f"select * from pg_roles where rolname = '{PRIVILEGED_ROLE_NAME}'")
assert cur.fetchone() is not None
# check PRIVILEGED_ROLE_NAME role has the correct member roles
for role in member_roles:
cur.execute(f"SELECT pg_has_role('{PRIVILEGED_ROLE_NAME}', '{role}', 'member')")
assert cur.fetchone() == (True,), (
f"Role {role} should be a member of {PRIVILEGED_ROLE_NAME}"
)
for role in non_member_roles:
cur.execute(f"SELECT pg_has_role('{PRIVILEGED_ROLE_NAME}', '{role}', 'member')")
assert cur.fetchone() == (False,), (
f"Role {role} should not be a member of {PRIVILEGED_ROLE_NAME}"
)
# check PRIVILEGED_ROLE_NAME role has the correct role attributes
for attr, val in role_attributes.items():
cur.execute(f"SELECT {attr} FROM pg_roles WHERE rolname = '{PRIVILEGED_ROLE_NAME}'")
curr_val = cur.fetchone()
assert curr_val == (val,), (
f"Role attribute {attr} should be {val} instead of {curr_val}"
)

View File

@@ -76,7 +76,6 @@ def test_tenants_normal_work(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
"""Tests tenants with and without wal acceptors"""
tenant_1, _ = env.create_tenant()
tenant_2, _ = env.create_tenant()

View File

@@ -2788,7 +2788,8 @@ def test_timeline_disk_usage_limit(neon_env_builder: NeonEnvBuilder):
# Wait for the error message to appear in the compute log
def error_logged():
return endpoint.log_contains("WAL storage utilization exceeds configured limit") is not None
if endpoint.log_contains("WAL storage utilization exceeds configured limit") is None:
raise Exception("Expected error message not found in compute log yet")
wait_until(error_logged)
log.info("Found expected error message in compute log, resuming.")
@@ -2822,3 +2823,87 @@ def test_timeline_disk_usage_limit(neon_env_builder: NeonEnvBuilder):
cur.execute("select count(*) from t")
# 2000 rows from first insert + 1000 from last insert
assert cur.fetchone() == (3000,)
def test_global_disk_usage_limit(neon_env_builder: NeonEnvBuilder):
"""
Similar to `test_timeline_disk_usage_limit`, but test that the global disk usage circuit breaker
also works as expected. The test scenario:
1. Create a timeline and endpoint.
2. Mock high disk usage via failpoint
3. Write data to the timeline so that disk usage exceeds the limit.
4. Verify that the writes hang and the expected error message appears in the compute log.
5. Mock low disk usage via failpoint
6. Verify that the hanging writes unblock and we can continue to write as normal.
"""
neon_env_builder.num_safekeepers = 1
remote_storage_kind = s3_storage()
neon_env_builder.enable_safekeeper_remote_storage(remote_storage_kind)
env = neon_env_builder.init_start()
env.create_branch("test_global_disk_usage_limit")
endpoint = env.endpoints.create_start("test_global_disk_usage_limit")
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
cur.execute("create table t2(key int, value text)")
for sk in env.safekeepers:
sk.stop().start(
extra_opts=["--global-disk-check-interval=1s", "--max-global-disk-usage-ratio=0.8"]
)
# Set the failpoint to have the disk usage check return u64::MAX, which definitely exceeds the practical
# limits in the test environment.
for sk in env.safekeepers:
sk.http_client().configure_failpoints(
[("sk-global-disk-usage", "return(18446744073709551615)")]
)
# Wait until the global disk usage limit watcher trips the circuit breaker.
def error_logged_in_sk():
for sk in env.safekeepers:
if sk.log_contains("Global disk usage exceeded limit") is None:
raise Exception("Expected error message not found in safekeeper log yet")
wait_until(error_logged_in_sk)
def run_hanging_insert_global():
with closing(endpoint.connect()) as bg_conn:
with bg_conn.cursor() as bg_cur:
# This should generate more than 1KiB of WAL
bg_cur.execute("insert into t2 select generate_series(1,2000), 'payload'")
bg_thread_global = threading.Thread(target=run_hanging_insert_global)
bg_thread_global.start()
def error_logged_in_compute():
if endpoint.log_contains("Global disk usage exceeded limit") is None:
raise Exception("Expected error message not found in compute log yet")
wait_until(error_logged_in_compute)
log.info("Found the expected error message in compute log, resuming.")
time.sleep(2)
assert bg_thread_global.is_alive(), "Global hanging insert unblocked prematurely!"
# Make the disk usage check always return 0 through the failpoint to simulate the disk pressure easing.
# The SKs should resume accepting WAL writes without restarting.
for sk in env.safekeepers:
sk.http_client().configure_failpoints([("sk-global-disk-usage", "return(0)")])
bg_thread_global.join(timeout=120)
assert not bg_thread_global.is_alive(), "Hanging global insert did not complete after restart"
log.info("Global hanging insert unblocked.")
# Verify that we can continue to write as normal and we don't have obvious data corruption
# following the recovery.
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
cur.execute("insert into t2 select generate_series(2001,3000), 'payload'")
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
cur.execute("select count(*) from t2")
assert cur.fetchone() == (3000,)

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
import sys
import tarfile
import tempfile
from pathlib import Path
from typing import TYPE_CHECKING
import pytest
@@ -198,3 +199,115 @@ def test_wal_restore_http(neon_env_builder: NeonEnvBuilder, broken_tenant: bool)
# the table is back now!
restored = env.endpoints.create_start("main")
assert restored.safe_psql("select count(*) from t", user="cloud_admin") == [(300000,)]
# BEGIN_HADRON
# TODO: re-enable once CM python is integreated.
# def clear_directory(directory):
# for item in os.listdir(directory):
# item_path = os.path.join(directory, item)
# if os.path.isdir(item_path):
# log.info(f"removing SK directory: {item_path}")
# shutil.rmtree(item_path)
# else:
# log.info(f"removing SK file: {item_path}")
# os.remove(item_path)
# def test_sk_pull_timelines(
# neon_env_builder: NeonEnvBuilder,
# ):
# DBNAME = "regression"
# superuser_name = "databricks_superuser"
# neon_env_builder.num_safekeepers = 3
# neon_env_builder.num_pageservers = 4
# neon_env_builder.safekeeper_extra_opts = ["--enable-pull-timeline-on-startup"]
# neon_env_builder.enable_safekeeper_remote_storage(s3_storage())
# env = neon_env_builder.init_start(initial_tenant_shard_count=4)
# env.compute_manager.start(base_port=env.compute_manager_port)
# test_creator = "test_creator"
# test_metastore_id = uuid4()
# test_account_id = uuid4()
# test_workspace_id = 1
# test_workspace_url = "http://test_workspace_url"
# test_metadata_version = 1
# test_metadata = {
# "state": "INSTANCE_PROVISIONING",
# "admin_rolename": "admin",
# "admin_password_scram": "abc123456",
# }
# test_instance_name_1 = "test_instance_1"
# test_instance_read_write_compute_pool_1 = {
# "instance_name": test_instance_name_1,
# "compute_pool_name": "compute_pool_1",
# "creator": test_creator,
# "capacity": 2.0,
# "node_count": 1,
# "metadata_version": 0,
# "metadata": {
# "state": "INSTANCE_PROVISIONING",
# },
# }
# test_instance_1_readable_secondaries_enabled = False
# # Test creation
# create_instance_with_retries(
# env,
# test_instance_name_1,
# test_creator,
# test_metastore_id,
# test_account_id,
# test_workspace_id,
# test_workspace_url,
# test_instance_read_write_compute_pool_1,
# test_metadata_version,
# test_metadata,
# test_instance_1_readable_secondaries_enabled,
# )
# instance = env.compute_manager.get_instance_by_name(test_instance_name_1, test_workspace_id)
# log.info(f"haoyu Instance created: {instance}")
# assert instance["instance_name"] == test_instance_name_1
# test_instance_id = instance["instance_id"]
# instance_detail = env.compute_manager.describe_instance(test_instance_id)
# log.info(f"haoyu Instance detail: {instance_detail}")
# env.initial_tenant = instance_detail[0]["tenant_id"]
# env.initial_timeline = instance_detail[0]["timeline_id"]
# # Connect to postgres and create a database called "regression".
# endpoint = env.endpoints.create_start("main")
# endpoint.safe_psql(f"CREATE ROLE {superuser_name}")
# endpoint.safe_psql(f"CREATE DATABASE {DBNAME}")
# endpoint.safe_psql("CREATE TABLE usertable ( YCSB_KEY INT, FIELD0 TEXT);")
# # Write some data. ~20 MB.
# num_rows = 0
# for _i in range(0, 20000):
# endpoint.safe_psql(
# "INSERT INTO usertable SELECT random(), repeat('a', 1000);", log_query=False
# )
# num_rows += 1
# log.info(f"SKs {env.storage_controller.hcc_sk_node_list()}")
# env.safekeepers[0].stop(immediate=True)
# clear_directory(env.safekeepers[0].data_dir)
# env.safekeepers[0].start()
# # PG can still write data. ~20 MB.
# for _i in range(0, 20000):
# endpoint.safe_psql(
# "INSERT INTO usertable SELECT random(), repeat('a', 1000);", log_query=False
# )
# num_rows += 1
# tuples = endpoint.safe_psql("SELECT COUNT(*) FROM usertable;")
# assert tuples[0][0] == num_rows
# endpoint.stop_and_destroy()
# END_HADRON