mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-18 13:40:37 +00:00
Merge branch 'main' into devin/1745492468-add-dev-flag-pr11517
This commit is contained in:
@@ -24,7 +24,7 @@ The value to place in the `aud` claim.
|
||||
|
||||
@final
|
||||
class ComputeClaimsScope(StrEnum):
|
||||
ADMIN = "admin"
|
||||
ADMIN = "compute_ctl:admin"
|
||||
|
||||
|
||||
@final
|
||||
@@ -69,15 +69,17 @@ class EndpointHttpClient(requests.Session):
|
||||
json: dict[str, str] = res.json()
|
||||
return json
|
||||
|
||||
def prewarm_lfc(self):
|
||||
self.post(f"http://localhost:{self.external_port}/lfc/prewarm").raise_for_status()
|
||||
def prewarm_lfc(self, from_endpoint_id: str | None = None):
|
||||
url: str = f"http://localhost:{self.external_port}/lfc/prewarm"
|
||||
params = {"from_endpoint": from_endpoint_id} if from_endpoint_id else dict()
|
||||
self.post(url, params=params).raise_for_status()
|
||||
|
||||
def prewarmed():
|
||||
json = self.prewarm_lfc_status()
|
||||
status, err = json["status"], json.get("error")
|
||||
assert status == "completed", f"{status}, error {err}"
|
||||
|
||||
wait_until(prewarmed)
|
||||
wait_until(prewarmed, timeout=60)
|
||||
|
||||
def offload_lfc(self):
|
||||
url = f"http://localhost:{self.external_port}/lfc/offload"
|
||||
|
||||
@@ -129,6 +129,18 @@ class NeonAPI:
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def get_project_limits(self, project_id: str) -> dict[str, Any]:
|
||||
resp = self.__request(
|
||||
"GET",
|
||||
f"/projects/{project_id}/limits",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
return cast("dict[str, Any]", resp.json())
|
||||
|
||||
def delete_project(
|
||||
self,
|
||||
project_id: str,
|
||||
|
||||
@@ -497,6 +497,7 @@ class NeonLocalCli(AbstractNeonCli):
|
||||
tenant_id: TenantId,
|
||||
pg_version: PgVersion,
|
||||
endpoint_id: str | None = None,
|
||||
grpc: bool | None = None,
|
||||
hot_standby: bool = False,
|
||||
lsn: Lsn | None = None,
|
||||
pageserver_id: int | None = None,
|
||||
@@ -521,6 +522,8 @@ class NeonLocalCli(AbstractNeonCli):
|
||||
args.extend(["--external-http-port", str(external_http_port)])
|
||||
if internal_http_port is not None:
|
||||
args.extend(["--internal-http-port", str(internal_http_port)])
|
||||
if grpc:
|
||||
args.append("--grpc")
|
||||
if endpoint_id is not None:
|
||||
args.append(endpoint_id)
|
||||
if hot_standby:
|
||||
@@ -564,6 +567,7 @@ class NeonLocalCli(AbstractNeonCli):
|
||||
basebackup_request_tries: int | None = None,
|
||||
timeout: str | None = None,
|
||||
env: dict[str, str] | None = None,
|
||||
dev: bool = False,
|
||||
) -> subprocess.CompletedProcess[str]:
|
||||
args = [
|
||||
"endpoint",
|
||||
@@ -589,6 +593,8 @@ class NeonLocalCli(AbstractNeonCli):
|
||||
args.extend(["--create-test-user"])
|
||||
if timeout is not None:
|
||||
args.extend(["--start-timeout", str(timeout)])
|
||||
if dev:
|
||||
args.extend(["--dev"])
|
||||
|
||||
res = self.raw_cli(args, extra_env_vars)
|
||||
res.check_returncode()
|
||||
@@ -617,7 +623,7 @@ class NeonLocalCli(AbstractNeonCli):
|
||||
destroy=False,
|
||||
check_return_code=True,
|
||||
mode: str | None = None,
|
||||
) -> subprocess.CompletedProcess[str]:
|
||||
) -> tuple[Lsn | None, subprocess.CompletedProcess[str]]:
|
||||
args = [
|
||||
"endpoint",
|
||||
"stop",
|
||||
@@ -629,7 +635,11 @@ class NeonLocalCli(AbstractNeonCli):
|
||||
if endpoint_id is not None:
|
||||
args.append(endpoint_id)
|
||||
|
||||
return self.raw_cli(args, check_return_code=check_return_code)
|
||||
proc = self.raw_cli(args, check_return_code=check_return_code)
|
||||
log.debug(f"endpoint stop stdout: {proc.stdout}")
|
||||
lsn_str = proc.stdout.split()[-1]
|
||||
lsn: Lsn | None = None if lsn_str == "null" else Lsn(lsn_str)
|
||||
return lsn, proc
|
||||
|
||||
def mappings_map_branch(
|
||||
self, name: str, tenant_id: TenantId, timeline_id: TimelineId
|
||||
|
||||
@@ -453,6 +453,7 @@ class NeonEnvBuilder:
|
||||
pageserver_get_vectored_concurrent_io: str | None = None,
|
||||
pageserver_tracing_config: PageserverTracingConfig | None = None,
|
||||
pageserver_import_config: PageserverImportConfig | None = None,
|
||||
storcon_kick_secondary_downloads: bool | None = None,
|
||||
):
|
||||
self.repo_dir = repo_dir
|
||||
self.rust_log_override = rust_log_override
|
||||
@@ -489,7 +490,9 @@ class NeonEnvBuilder:
|
||||
self.config_init_force: str | None = None
|
||||
self.top_output_dir = top_output_dir
|
||||
self.control_plane_hooks_api: str | None = None
|
||||
self.storage_controller_config: dict[Any, Any] | None = None
|
||||
self.storage_controller_config: dict[Any, Any] | None = {
|
||||
"timelines_onto_safekeepers": True,
|
||||
}
|
||||
|
||||
# Flag to enable https listener in pageserver, generate local ssl certs,
|
||||
# and force storage controller to use https for pageserver api.
|
||||
@@ -512,6 +515,8 @@ class NeonEnvBuilder:
|
||||
self.pageserver_tracing_config = pageserver_tracing_config
|
||||
self.pageserver_import_config = pageserver_import_config
|
||||
|
||||
self.storcon_kick_secondary_downloads = storcon_kick_secondary_downloads
|
||||
|
||||
self.pageserver_default_tenant_config_compaction_algorithm: dict[str, Any] | None = (
|
||||
pageserver_default_tenant_config_compaction_algorithm
|
||||
)
|
||||
@@ -1219,6 +1224,14 @@ class NeonEnv:
|
||||
else:
|
||||
cfg["storage_controller"] = {"use_local_compute_notifications": False}
|
||||
|
||||
if config.storcon_kick_secondary_downloads is not None:
|
||||
# Configure whether storage controller should actively kick off secondary downloads
|
||||
if "storage_controller" not in cfg:
|
||||
cfg["storage_controller"] = {}
|
||||
cfg["storage_controller"]["kick_secondary_downloads"] = (
|
||||
config.storcon_kick_secondary_downloads
|
||||
)
|
||||
|
||||
# Create config for pageserver
|
||||
http_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
|
||||
pg_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
|
||||
@@ -1228,6 +1241,7 @@ class NeonEnv:
|
||||
):
|
||||
pageserver_port = PageserverPort(
|
||||
pg=self.port_distributor.get_port(),
|
||||
grpc=self.port_distributor.get_port(),
|
||||
http=self.port_distributor.get_port(),
|
||||
https=self.port_distributor.get_port() if config.use_https_pageserver_api else None,
|
||||
)
|
||||
@@ -1243,13 +1257,14 @@ class NeonEnv:
|
||||
ps_cfg: dict[str, Any] = {
|
||||
"id": ps_id,
|
||||
"listen_pg_addr": f"localhost:{pageserver_port.pg}",
|
||||
"listen_grpc_addr": f"localhost:{pageserver_port.grpc}",
|
||||
"listen_http_addr": f"localhost:{pageserver_port.http}",
|
||||
"listen_https_addr": f"localhost:{pageserver_port.https}"
|
||||
if config.use_https_pageserver_api
|
||||
else None,
|
||||
"pg_auth_type": pg_auth_type,
|
||||
"http_auth_type": http_auth_type,
|
||||
"grpc_auth_type": grpc_auth_type,
|
||||
"http_auth_type": http_auth_type,
|
||||
"availability_zone": availability_zone,
|
||||
# Disable pageserver disk syncs in tests: when running tests concurrently, this avoids
|
||||
# the pageserver taking a long time to start up due to syncfs flushing other tests' data
|
||||
@@ -1762,6 +1777,7 @@ def neon_env_builder(
|
||||
@dataclass
|
||||
class PageserverPort:
|
||||
pg: int
|
||||
grpc: int
|
||||
http: int
|
||||
https: int | None = None
|
||||
|
||||
@@ -2054,6 +2070,14 @@ class NeonStorageController(MetricsGetter, LogUtils):
|
||||
headers=self.headers(TokenScope.ADMIN),
|
||||
)
|
||||
|
||||
def tombstone_delete(self, node_id):
|
||||
log.info(f"tombstone_delete({node_id})")
|
||||
self.request(
|
||||
"DELETE",
|
||||
f"{self.api}/debug/v1/tombstone/{node_id}",
|
||||
headers=self.headers(TokenScope.ADMIN),
|
||||
)
|
||||
|
||||
def node_drain(self, node_id):
|
||||
log.info(f"node_drain({node_id})")
|
||||
self.request(
|
||||
@@ -2110,6 +2134,14 @@ class NeonStorageController(MetricsGetter, LogUtils):
|
||||
)
|
||||
return response.json()
|
||||
|
||||
def tombstone_list(self):
|
||||
response = self.request(
|
||||
"GET",
|
||||
f"{self.api}/debug/v1/tombstone",
|
||||
headers=self.headers(TokenScope.ADMIN),
|
||||
)
|
||||
return response.json()
|
||||
|
||||
def tenant_shard_dump(self):
|
||||
"""
|
||||
Debug listing API: dumps the internal map of tenant shards
|
||||
@@ -2207,6 +2239,17 @@ class NeonStorageController(MetricsGetter, LogUtils):
|
||||
shards: list[dict[str, Any]] = body["shards"]
|
||||
return shards
|
||||
|
||||
def timeline_locate(self, tenant_id: TenantId, timeline_id: TimelineId):
|
||||
"""
|
||||
:return: dict {"generation": int, "sk_set": [int], "new_sk_set": [int]}
|
||||
"""
|
||||
response = self.request(
|
||||
"GET",
|
||||
f"{self.api}/debug/v1/tenant/{tenant_id}/timeline/{timeline_id}/locate",
|
||||
headers=self.headers(TokenScope.ADMIN),
|
||||
)
|
||||
return response.json()
|
||||
|
||||
def tenant_describe(self, tenant_id: TenantId):
|
||||
"""
|
||||
:return: list of {"shard_id": "", "node_id": int, "listen_pg_addr": str, "listen_pg_port": int, "listen_http_addr: str, "listen_http_port: int, preferred_az_id: str}
|
||||
@@ -2333,6 +2376,7 @@ class NeonStorageController(MetricsGetter, LogUtils):
|
||||
delay_max = max_interval
|
||||
while n > 0:
|
||||
n = self.reconcile_all()
|
||||
|
||||
if n == 0:
|
||||
break
|
||||
elif time.time() - start_at > timeout_secs:
|
||||
@@ -4030,6 +4074,16 @@ def static_proxy(
|
||||
"CREATE TABLE neon_control_plane.endpoints (endpoint_id VARCHAR(255) PRIMARY KEY, allowed_ips VARCHAR(255))"
|
||||
)
|
||||
|
||||
vanilla_pg.stop()
|
||||
vanilla_pg.edit_hba(
|
||||
[
|
||||
"local all all trust",
|
||||
"host all all 127.0.0.1/32 scram-sha-256",
|
||||
"host all all ::1/128 scram-sha-256",
|
||||
]
|
||||
)
|
||||
vanilla_pg.start()
|
||||
|
||||
proxy_port = port_distributor.get_port()
|
||||
mgmt_port = port_distributor.get_port()
|
||||
http_port = port_distributor.get_port()
|
||||
@@ -4155,6 +4209,8 @@ class Endpoint(PgProtocol, LogUtils):
|
||||
self._running = threading.Semaphore(0)
|
||||
self.__jwt: str | None = None
|
||||
|
||||
self.terminate_flush_lsn: Lsn | None = None
|
||||
|
||||
def http_client(self, retries: Retry | None = None) -> EndpointHttpClient:
|
||||
assert self.__jwt is not None
|
||||
return EndpointHttpClient(
|
||||
@@ -4167,6 +4223,7 @@ class Endpoint(PgProtocol, LogUtils):
|
||||
self,
|
||||
branch_name: str,
|
||||
endpoint_id: str | None = None,
|
||||
grpc: bool | None = None,
|
||||
hot_standby: bool = False,
|
||||
lsn: Lsn | None = None,
|
||||
config_lines: list[str] | None = None,
|
||||
@@ -4191,6 +4248,7 @@ class Endpoint(PgProtocol, LogUtils):
|
||||
endpoint_id=self.endpoint_id,
|
||||
tenant_id=self.tenant_id,
|
||||
lsn=lsn,
|
||||
grpc=grpc,
|
||||
hot_standby=hot_standby,
|
||||
pg_port=self.pg_port,
|
||||
external_http_port=self.external_http_port,
|
||||
@@ -4457,9 +4515,10 @@ class Endpoint(PgProtocol, LogUtils):
|
||||
running = self._running.acquire(blocking=False)
|
||||
if running:
|
||||
assert self.endpoint_id is not None
|
||||
self.env.neon_cli.endpoint_stop(
|
||||
lsn, _ = self.env.neon_cli.endpoint_stop(
|
||||
self.endpoint_id, check_return_code=self.check_stop_result, mode=mode
|
||||
)
|
||||
self.terminate_flush_lsn = lsn
|
||||
|
||||
if sks_wait_walreceiver_gone is not None:
|
||||
for sk in sks_wait_walreceiver_gone[0]:
|
||||
@@ -4477,9 +4536,10 @@ class Endpoint(PgProtocol, LogUtils):
|
||||
running = self._running.acquire(blocking=False)
|
||||
if running:
|
||||
assert self.endpoint_id is not None
|
||||
self.env.neon_cli.endpoint_stop(
|
||||
lsn, _ = self.env.neon_cli.endpoint_stop(
|
||||
self.endpoint_id, True, check_return_code=self.check_stop_result, mode=mode
|
||||
)
|
||||
self.terminate_flush_lsn = lsn
|
||||
self.endpoint_id = None
|
||||
|
||||
return self
|
||||
@@ -4488,6 +4548,7 @@ class Endpoint(PgProtocol, LogUtils):
|
||||
self,
|
||||
branch_name: str,
|
||||
endpoint_id: str | None = None,
|
||||
grpc: bool | None = None,
|
||||
hot_standby: bool = False,
|
||||
lsn: Lsn | None = None,
|
||||
config_lines: list[str] | None = None,
|
||||
@@ -4505,6 +4566,7 @@ class Endpoint(PgProtocol, LogUtils):
|
||||
branch_name=branch_name,
|
||||
endpoint_id=endpoint_id,
|
||||
config_lines=config_lines,
|
||||
grpc=grpc,
|
||||
hot_standby=hot_standby,
|
||||
lsn=lsn,
|
||||
pageserver_id=pageserver_id,
|
||||
@@ -4592,6 +4654,7 @@ class EndpointFactory:
|
||||
endpoint_id: str | None = None,
|
||||
tenant_id: TenantId | None = None,
|
||||
lsn: Lsn | None = None,
|
||||
grpc: bool | None = None,
|
||||
hot_standby: bool = False,
|
||||
config_lines: list[str] | None = None,
|
||||
remote_ext_base_url: str | None = None,
|
||||
@@ -4611,6 +4674,7 @@ class EndpointFactory:
|
||||
return ep.create_start(
|
||||
branch_name=branch_name,
|
||||
endpoint_id=endpoint_id,
|
||||
grpc=grpc,
|
||||
hot_standby=hot_standby,
|
||||
config_lines=config_lines,
|
||||
lsn=lsn,
|
||||
@@ -4625,6 +4689,7 @@ class EndpointFactory:
|
||||
endpoint_id: str | None = None,
|
||||
tenant_id: TenantId | None = None,
|
||||
lsn: Lsn | None = None,
|
||||
grpc: bool | None = None,
|
||||
hot_standby: bool = False,
|
||||
config_lines: list[str] | None = None,
|
||||
pageserver_id: int | None = None,
|
||||
@@ -4647,6 +4712,7 @@ class EndpointFactory:
|
||||
branch_name=branch_name,
|
||||
endpoint_id=endpoint_id,
|
||||
lsn=lsn,
|
||||
grpc=grpc,
|
||||
hot_standby=hot_standby,
|
||||
config_lines=config_lines,
|
||||
pageserver_id=pageserver_id,
|
||||
@@ -4671,6 +4737,7 @@ class EndpointFactory:
|
||||
self,
|
||||
origin: Endpoint,
|
||||
endpoint_id: str | None = None,
|
||||
grpc: bool | None = None,
|
||||
config_lines: list[str] | None = None,
|
||||
) -> Endpoint:
|
||||
branch_name = origin.branch_name
|
||||
@@ -4682,6 +4749,7 @@ class EndpointFactory:
|
||||
endpoint_id=endpoint_id,
|
||||
tenant_id=origin.tenant_id,
|
||||
lsn=None,
|
||||
grpc=grpc,
|
||||
hot_standby=True,
|
||||
config_lines=config_lines,
|
||||
)
|
||||
@@ -4690,6 +4758,7 @@ class EndpointFactory:
|
||||
self,
|
||||
origin: Endpoint,
|
||||
endpoint_id: str | None = None,
|
||||
grpc: bool | None = None,
|
||||
config_lines: list[str] | None = None,
|
||||
) -> Endpoint:
|
||||
branch_name = origin.branch_name
|
||||
@@ -4701,6 +4770,7 @@ class EndpointFactory:
|
||||
endpoint_id=endpoint_id,
|
||||
tenant_id=origin.tenant_id,
|
||||
lsn=None,
|
||||
grpc=grpc,
|
||||
hot_standby=True,
|
||||
config_lines=config_lines,
|
||||
)
|
||||
@@ -4852,6 +4922,9 @@ class Safekeeper(LogUtils):
|
||||
log.info(f"finished pulling timeline from {src_ids} to {self.id}")
|
||||
return res
|
||||
|
||||
def safekeeper_id(self) -> SafekeeperId:
|
||||
return SafekeeperId(self.id, "localhost", self.port.pg_tenant_only)
|
||||
|
||||
@property
|
||||
def data_dir(self) -> Path:
|
||||
return self.env.repo_dir / "safekeepers" / f"sk{self.id}"
|
||||
|
||||
@@ -1219,3 +1219,31 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
|
||||
)
|
||||
self.verbose_error(res)
|
||||
return res.json()
|
||||
|
||||
def force_override_feature_flag(self, flag: str, value: str | None = None):
|
||||
if value is None:
|
||||
res = self.delete(
|
||||
f"http://localhost:{self.port}/v1/feature_flag/{flag}",
|
||||
)
|
||||
else:
|
||||
res = self.put(
|
||||
f"http://localhost:{self.port}/v1/feature_flag/{flag}",
|
||||
params={"value": value},
|
||||
)
|
||||
self.verbose_error(res)
|
||||
|
||||
def evaluate_feature_flag_boolean(self, tenant_id: TenantId, flag: str) -> Any:
|
||||
res = self.get(
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/feature_flag/{flag}",
|
||||
params={"as": "boolean"},
|
||||
)
|
||||
self.verbose_error(res)
|
||||
return res.json()
|
||||
|
||||
def evaluate_feature_flag_multivariate(self, tenant_id: TenantId, flag: str) -> Any:
|
||||
res = self.get(
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/feature_flag/{flag}",
|
||||
params={"as": "multivariate"},
|
||||
)
|
||||
self.verbose_error(res)
|
||||
return res.json()
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
-- add 100000 rows or approximately 11 MB to the action_blocks table
|
||||
-- takes about 1 second
|
||||
INSERT INTO workflows.action_blocks (
|
||||
id,
|
||||
uuid,
|
||||
created_at,
|
||||
status,
|
||||
function_signature,
|
||||
reference_id,
|
||||
blocking,
|
||||
run_synchronously
|
||||
)
|
||||
SELECT
|
||||
id,
|
||||
uuid_generate_v4(),
|
||||
now() - (random() * interval '100 days'), -- Random date within the last 100 days
|
||||
'CONDITIONS_NOT_MET',
|
||||
'function_signature_' || id, -- Create a unique function signature using id
|
||||
CASE WHEN random() > 0.5 THEN 'reference_' || id ELSE NULL END, -- 50% chance of being NULL
|
||||
true,
|
||||
CASE WHEN random() > 0.5 THEN true ELSE false END -- Random boolean value
|
||||
FROM generate_series(1, 100000) AS id;
|
||||
@@ -0,0 +1,11 @@
|
||||
-- add 100000 rows or approximately 10 MB to the action_kwargs table
|
||||
-- takes about 5 minutes
|
||||
INSERT INTO workflows.action_kwargs (created_at, key, uuid, value_id, state_value_id, action_block_id)
|
||||
SELECT
|
||||
now(), -- Using the default value for `created_at`
|
||||
'key_' || gs.id, -- Generating a unique key based on the id
|
||||
uuid_generate_v4(), -- Generating a new UUID for each row
|
||||
CASE WHEN gs.id % 2 = 0 THEN gs.id ELSE NULL END, -- Setting value_id for even ids
|
||||
CASE WHEN gs.id % 2 <> 0 THEN gs.id ELSE NULL END, -- Setting state_value_id for odd ids
|
||||
1 -- Setting action_block_id as 1 for simplicity
|
||||
FROM generate_series(1, 100000) AS gs(id);
|
||||
@@ -0,0 +1,56 @@
|
||||
-- add 100000 rows or approx. 30 MB to the device_fingerprint_event table
|
||||
-- takes about 4 minutes
|
||||
INSERT INTO authentication.device_fingerprint_event (
|
||||
uuid,
|
||||
created_at,
|
||||
identity_uuid,
|
||||
fingerprint_request_id,
|
||||
fingerprint_id,
|
||||
confidence_score,
|
||||
ip_address,
|
||||
url,
|
||||
client_referrer,
|
||||
last_seen_at,
|
||||
raw_fingerprint_response,
|
||||
session_uuid,
|
||||
fingerprint_response,
|
||||
browser_version,
|
||||
browser_name,
|
||||
device,
|
||||
operating_system,
|
||||
operating_system_version,
|
||||
user_agent,
|
||||
ip_address_location_city,
|
||||
ip_address_location_region,
|
||||
ip_address_location_country_code,
|
||||
ip_address_location_latitude,
|
||||
ip_address_location_longitude,
|
||||
is_incognito
|
||||
)
|
||||
SELECT
|
||||
gen_random_uuid(), -- Generates a random UUID for primary key
|
||||
now() - (random() * interval '10 days'), -- Random timestamp within the last 10 days
|
||||
gen_random_uuid(), -- Random UUID for identity
|
||||
md5(gs::text), -- Simulates unique fingerprint request ID using `md5` hash of series number
|
||||
md5((gs + 10000)::text), -- Simulates unique fingerprint ID
|
||||
round(CAST(random() AS numeric), 2), -- Generates a random score between 0 and 1, cast `random()` to numeric
|
||||
'192.168.' || (random() * 255)::int || '.' || (random() * 255)::int, -- Random IP address
|
||||
'https://example.com/' || (gs % 1000), -- Random URL with series number suffix
|
||||
CASE WHEN random() < 0.5 THEN NULL ELSE 'https://referrer.com/' || (gs % 100)::text END, -- Random referrer, 50% chance of being NULL
|
||||
now() - (random() * interval '5 days'), -- Last seen timestamp within the last 5 days
|
||||
NULL, -- Keeping raw_fingerprint_response NULL for simplicity
|
||||
CASE WHEN random() < 0.3 THEN gen_random_uuid() ELSE NULL END, -- Session UUID, 30% chance of NULL
|
||||
NULL, -- Keeping fingerprint_response NULL for simplicity
|
||||
CASE WHEN random() < 0.5 THEN '93.0' ELSE '92.0' END, -- Random browser version
|
||||
CASE WHEN random() < 0.5 THEN 'Firefox' ELSE 'Chrome' END, -- Random browser name
|
||||
CASE WHEN random() < 0.5 THEN 'Desktop' ELSE 'Mobile' END, -- Random device type
|
||||
'Windows', -- Static value for operating system
|
||||
'10.0', -- Static value for operating system version
|
||||
'Mozilla/5.0', -- Static value for user agent
|
||||
'City ' || (gs % 1000)::text, -- Random city name
|
||||
'Region ' || (gs % 100)::text, -- Random region name
|
||||
'US', -- Static country code
|
||||
random() * 180 - 90, -- Random latitude between -90 and 90
|
||||
random() * 360 - 180, -- Random longitude between -180 and 180
|
||||
random() < 0.1 -- 10% chance of being incognito
|
||||
FROM generate_series(1, 100000) AS gs;
|
||||
10
test_runner/performance/large_synthetic_oltp/grow_edges.sql
Normal file
10
test_runner/performance/large_synthetic_oltp/grow_edges.sql
Normal file
@@ -0,0 +1,10 @@
|
||||
-- add 100000 rows or approximately 11 MB to the edges table
|
||||
-- takes about 1 minute
|
||||
INSERT INTO workflows.edges (created_at, workflow_id, uuid, from_vertex_id, to_vertex_id)
|
||||
SELECT
|
||||
now() - (random() * interval '365 days'), -- Random `created_at` timestamp in the last year
|
||||
(random() * 100)::int + 1, -- Random `workflow_id` between 1 and 100
|
||||
uuid_generate_v4(), -- Generate a new UUID for each row
|
||||
(random() * 100000)::bigint + 1, -- Random `from_vertex_id` between 1 and 100,000
|
||||
(random() * 100000)::bigint + 1 -- Random `to_vertex_id` between 1 and 100,000
|
||||
FROM generate_series(1, 100000) AS gs; -- Generate 100,000 sequential IDs
|
||||
@@ -0,0 +1,21 @@
|
||||
-- add 100000 rows or approximately 10 MB to the hotel_rate_mapping table
|
||||
-- takes about 1 second
|
||||
INSERT INTO booking_inventory.hotel_rate_mapping (
|
||||
uuid,
|
||||
created_at,
|
||||
updated_at,
|
||||
hotel_rate_id,
|
||||
remote_id,
|
||||
source
|
||||
)
|
||||
SELECT
|
||||
uuid_generate_v4(), -- Unique UUID for each row
|
||||
now(), -- Created at timestamp
|
||||
now(), -- Updated at timestamp
|
||||
'rate_' || gs AS hotel_rate_id, -- Unique hotel_rate_id
|
||||
'remote_' || gs AS remote_id, -- Unique remote_id
|
||||
CASE WHEN gs % 3 = 0 THEN 'source_1'
|
||||
WHEN gs % 3 = 1 THEN 'source_2'
|
||||
ELSE 'source_3'
|
||||
END AS source -- Distributing sources among three options
|
||||
FROM generate_series(1, 100000) AS gs;
|
||||
@@ -0,0 +1,31 @@
|
||||
-- add 100000 rows or approximately 20 MB to the ocr_pipeline_results_version table
|
||||
-- takes about 1 second
|
||||
INSERT INTO ocr.ocr_pipeline_results_version (
|
||||
id, transaction_id, operation_type, created_at, updated_at, s3_filename, completed_at, result,
|
||||
end_transaction_id, pipeline_type, is_async, callback, callback_kwargs, input, error, file_type, s3_bucket_name, pipeline_kwargs
|
||||
)
|
||||
SELECT
|
||||
gs.aid, -- id
|
||||
gs.aid, -- transaction_id (same as id for simplicity)
|
||||
(gs.aid % 5)::smallint + 1, -- operation_type (cyclic values from 1 to 5)
|
||||
now() - interval '1 day' * (random() * 30), -- created_at (random timestamp within the last 30 days)
|
||||
now() - interval '1 day' * (random() * 30), -- updated_at (random timestamp within the last 30 days)
|
||||
's3_file_' || gs.aid || '.txt', -- s3_filename (synthetic filename)
|
||||
now() - interval '1 day' * (random() * 30), -- completed_at (random timestamp within the last 30 days)
|
||||
'{}'::jsonb, -- result (empty JSON object)
|
||||
NULL, -- end_transaction_id (NULL)
|
||||
CASE (gs.aid % 3) -- pipeline_type (cyclic text values)
|
||||
WHEN 0 THEN 'OCR'
|
||||
WHEN 1 THEN 'PDF'
|
||||
ELSE 'Image'
|
||||
END,
|
||||
gs.aid % 2 = 0, -- is_async (alternating between true and false)
|
||||
'http://callback/' || gs.aid, -- callback (synthetic URL)
|
||||
'{}'::jsonb, -- callback_kwargs (empty JSON object)
|
||||
'Input text ' || gs.aid, -- input (synthetic input text)
|
||||
NULL, -- error (NULL)
|
||||
'pdf', -- file_type (default to 'pdf')
|
||||
'bucket_' || gs.aid % 10, -- s3_bucket_name (synthetic bucket names)
|
||||
'{}'::jsonb -- pipeline_kwargs (empty JSON object)
|
||||
FROM
|
||||
generate_series(1, 100000) AS gs(aid);
|
||||
@@ -0,0 +1,18 @@
|
||||
-- add 100000 rows or approx. 20 MB to the priceline_raw_response table
|
||||
-- takes about 20 seconds
|
||||
INSERT INTO booking_inventory.priceline_raw_response (
|
||||
uuid, created_at, updated_at, url, base_url, path, method, params, request, response
|
||||
)
|
||||
SELECT
|
||||
gen_random_uuid(), -- Generate random UUIDs
|
||||
now() - (random() * interval '30 days'), -- Random creation time within the past 30 days
|
||||
now() - (random() * interval '30 days'), -- Random update time within the past 30 days
|
||||
'https://example.com/resource/' || gs, -- Construct a unique URL for each row
|
||||
'https://example.com', -- Base URL for all rows
|
||||
'/resource/' || gs, -- Path for each row
|
||||
CASE WHEN gs % 2 = 0 THEN 'GET' ELSE 'POST' END, -- Alternate between GET and POST methods
|
||||
'id=' || gs, -- Simple parameter pattern for each row
|
||||
'{}'::jsonb, -- Empty JSON object for request
|
||||
jsonb_build_object('status', 'success', 'data', gs) -- Construct a valid JSON response
|
||||
FROM
|
||||
generate_series(1, 100000) AS gs;
|
||||
@@ -0,0 +1,26 @@
|
||||
-- add 100000 rows or approx. 1 MB to the relabeled_transactions table
|
||||
-- takes about 1 second
|
||||
INSERT INTO heron.relabeled_transactions (
|
||||
id,
|
||||
created_at,
|
||||
universal_transaction_id,
|
||||
raw_result,
|
||||
category,
|
||||
category_confidence,
|
||||
merchant,
|
||||
batch_id
|
||||
)
|
||||
SELECT
|
||||
gs.aid AS id,
|
||||
now() - (gs.aid % 1000) * interval '1 second' AS created_at,
|
||||
'txn_' || gs.aid AS universal_transaction_id,
|
||||
'{}'::jsonb AS raw_result,
|
||||
CASE WHEN gs.aid % 5 = 0 THEN 'grocery'
|
||||
WHEN gs.aid % 5 = 1 THEN 'electronics'
|
||||
WHEN gs.aid % 5 = 2 THEN 'clothing'
|
||||
WHEN gs.aid % 5 = 3 THEN 'utilities'
|
||||
ELSE NULL END AS category,
|
||||
ROUND(RANDOM()::numeric, 2) AS category_confidence,
|
||||
CASE WHEN gs.aid % 2 = 0 THEN 'Merchant_' || gs.aid % 20 ELSE NULL END AS merchant,
|
||||
gs.aid % 100 + 1 AS batch_id
|
||||
FROM generate_series(1, 100000) AS gs(aid);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- add 100000 rows or approx.10 MB to the state_values table
|
||||
-- takes about 14 seconds
|
||||
INSERT INTO workflows.state_values (key, workflow_id, state_type, value_id)
|
||||
SELECT
|
||||
'key_' || gs::text, -- Key: Generate as 'key_1', 'key_2', etc.
|
||||
(gs - 1) / 1000 + 1, -- workflow_id: Distribute over a range (1000 workflows)
|
||||
'STATIC', -- state_type: Use constant 'STATIC' as defined in schema
|
||||
gs::bigint -- value_id: Use the same as the series value
|
||||
FROM generate_series(1, 100000) AS gs; -- Generate 100,000 rows
|
||||
30
test_runner/performance/large_synthetic_oltp/grow_values.sql
Normal file
30
test_runner/performance/large_synthetic_oltp/grow_values.sql
Normal file
@@ -0,0 +1,30 @@
|
||||
-- add 100000 rows or approx. 24 MB to the values table
|
||||
-- takes about 126 seconds
|
||||
INSERT INTO workflows.values (
|
||||
id,
|
||||
type,
|
||||
int_value,
|
||||
string_value,
|
||||
child_type,
|
||||
bool_value,
|
||||
uuid,
|
||||
numeric_value,
|
||||
workflow_id,
|
||||
jsonb_value,
|
||||
parent_value_id
|
||||
)
|
||||
SELECT
|
||||
gs AS id,
|
||||
'TYPE_A' AS type,
|
||||
CASE WHEN selector = 1 THEN gs ELSE NULL END AS int_value,
|
||||
CASE WHEN selector = 2 THEN 'string_value_' || gs::text ELSE NULL END AS string_value,
|
||||
'CHILD_TYPE_A' AS child_type, -- Always non-null
|
||||
CASE WHEN selector = 3 THEN (gs % 2 = 0) ELSE NULL END AS bool_value,
|
||||
uuid_generate_v4() AS uuid, -- Always non-null
|
||||
CASE WHEN selector = 4 THEN gs * 1.0 ELSE NULL END AS numeric_value,
|
||||
(array[1, 2, 3, 4, 5])[gs % 5 + 1] AS workflow_id, -- Use only existing workflow IDs
|
||||
CASE WHEN selector = 5 THEN ('{"key":' || gs::text || '}')::jsonb ELSE NULL END AS jsonb_value,
|
||||
(gs % 100) + 1 AS parent_value_id -- Always non-null
|
||||
FROM
|
||||
generate_series(1, 100000) AS gs,
|
||||
(SELECT floor(random() * 5 + 1)::int AS selector) AS s;
|
||||
@@ -0,0 +1,26 @@
|
||||
-- add 100000 rows or approx. 18 MB to the vertices table
|
||||
-- takes about 90 seconds
|
||||
INSERT INTO workflows.vertices(
|
||||
uuid,
|
||||
created_at,
|
||||
condition_block_id,
|
||||
operator,
|
||||
has_been_visited,
|
||||
reference_id,
|
||||
workflow_id,
|
||||
meta_data,
|
||||
-- id,
|
||||
action_block_id
|
||||
)
|
||||
SELECT
|
||||
uuid_generate_v4() AS uuid,
|
||||
now() AS created_at,
|
||||
CASE WHEN (gs % 2 = 0) THEN gs % 10 ELSE NULL END AS condition_block_id, -- Every alternate row has a condition_block_id
|
||||
'operator_' || (gs % 10) AS operator, -- Cyclical operator values (e.g., operator_0, operator_1)
|
||||
false AS has_been_visited,
|
||||
'ref_' || gs AS reference_id, -- Unique reference_id for each row
|
||||
(gs % 1000) + 1 AS workflow_id, -- Random workflow_id values between 1 and 1000
|
||||
'{}'::jsonb AS meta_data, -- Empty JSON metadata
|
||||
-- gs AS id, -- default from sequence to get unique ID
|
||||
CASE WHEN (gs % 2 = 1) THEN gs ELSE NULL END AS action_block_id -- Complementary to condition_block_id
|
||||
FROM generate_series(1, 100000) AS gs;
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 2000 rows or 200 kb in the accounting_coding_body_tracking_category_selection table
|
||||
-- takes about 1 second
|
||||
UPDATE accounting.accounting_coding_body_tracking_category_selection
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM accounting.accounting_coding_body_tracking_category_selection
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 9000 rows or 1 MB in the action_blocks table
|
||||
-- takes about 1 second
|
||||
UPDATE workflows.action_blocks
|
||||
SET run_synchronously = NOT run_synchronously
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM workflows.action_blocks
|
||||
TABLESAMPLE SYSTEM (0.001)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 5000 rows or 1 MB in the action_kwargs table
|
||||
-- takes about 1 second
|
||||
UPDATE workflows.action_kwargs
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM workflows.action_kwargs
|
||||
TABLESAMPLE SYSTEM (0.0002)
|
||||
);
|
||||
@@ -0,0 +1,10 @@
|
||||
-- update approximately 3000 rows or 500 KB in the denormalized_approval_workflow table
|
||||
-- takes about 1 second
|
||||
UPDATE approvals_v2.denormalized_approval_workflow
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM approvals_v2.denormalized_approval_workflow
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 2000 rows or 1 MB in the device_fingerprint_event table
|
||||
-- takes about 5 seconds
|
||||
UPDATE authentication.device_fingerprint_event
|
||||
SET is_incognito = NOT is_incognito
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM authentication.device_fingerprint_event
|
||||
TABLESAMPLE SYSTEM (0.001)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 4000 rows or 600 kb in the edges table
|
||||
-- takes about 1 second
|
||||
UPDATE workflows.edges
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM workflows.edges
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 10000 rows or 200 KB in the heron_transaction_enriched_log table
|
||||
-- takes about 1 minutes
|
||||
UPDATE heron.heron_transaction_enriched_log
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM heron.heron_transaction_enriched_log
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 4000 rows or 1 MB in the heron_transaction_enrichment_requests table
|
||||
-- takes about 2 minutes
|
||||
UPDATE heron.heron_transaction_enrichment_requests
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM heron.heron_transaction_enrichment_requests
|
||||
TABLESAMPLE SYSTEM (0.0002)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 6000 rows or 600 kb in the hotel_rate_mapping table
|
||||
-- takes about 1 second
|
||||
UPDATE booking_inventory.hotel_rate_mapping
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM booking_inventory.hotel_rate_mapping
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 2000 rows or 1 MB in the incoming_webhooks table
|
||||
-- takes about 5 seconds
|
||||
UPDATE webhook.incoming_webhooks
|
||||
SET is_body_encrypted = NOT is_body_encrypted
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM webhook.incoming_webhooks
|
||||
TABLESAMPLE SYSTEM (0.0002)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 1000 rows or 200 kb in the manual_transaction table
|
||||
-- takes about 2 seconds
|
||||
UPDATE banking.manual_transaction
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM banking.manual_transaction
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 1000 rows or 100 kb in the ml_receipt_matching_log table
|
||||
-- takes about 1 second
|
||||
UPDATE receipt.ml_receipt_matching_log
|
||||
SET is_shadow_mode = NOT is_shadow_mode
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM receipt.ml_receipt_matching_log
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 2000 rows or 400 kb in the ocr_pipeline_results_version table
|
||||
-- takes about 1 second
|
||||
UPDATE ocr.ocr_pipeline_results_version
|
||||
SET is_async = NOT is_async
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM ocr.ocr_pipeline_results_version
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 3000 rows or 1 MB in the ocr_pipeline_step_results table
|
||||
-- takes about 11 seconds
|
||||
UPDATE ocr.ocr_pipeline_step_results
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM ocr.ocr_pipeline_step_results
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 5000 rows or 1 MB in the ocr_pipeline_step_results_version table
|
||||
-- takes about 40 seconds
|
||||
UPDATE ocr.ocr_pipeline_step_results_version
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM ocr.ocr_pipeline_step_results_version
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 5000 rows or 1 MB in the priceline_raw_response table
|
||||
-- takes about 1 second
|
||||
UPDATE booking_inventory.priceline_raw_response
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM booking_inventory.priceline_raw_response
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 5000 rows or 1 MB in the quickbooks_transactions table
|
||||
-- takes about 30 seconds
|
||||
UPDATE accounting.quickbooks_transactions
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM accounting.quickbooks_transactions
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,15 @@
|
||||
-- update approximately 6000 rows or 600 kb in the raw_finicity_transaction table
|
||||
-- takes about 1 second
|
||||
UPDATE banking.raw_finicity_transaction
|
||||
SET raw_data =
|
||||
jsonb_set(
|
||||
raw_data,
|
||||
'{updated}',
|
||||
to_jsonb(now()),
|
||||
true
|
||||
)
|
||||
WHERE ctid IN (
|
||||
SELECT ctid
|
||||
FROM banking.raw_finicity_transaction
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 8000 rows or 1 MB in the relabeled_transactions table
|
||||
-- takes about 1 second
|
||||
UPDATE heron.relabeled_transactions
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM heron.relabeled_transactions
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 8000 rows or 1 MB in the state_values table
|
||||
-- takes about 2 minutes
|
||||
UPDATE workflows.state_values
|
||||
SET state_type = now()::text
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM workflows.state_values
|
||||
TABLESAMPLE SYSTEM (0.0002)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 4000 rows or 1 MB in the stripe_authorization_event_log table
|
||||
-- takes about 5 minutes
|
||||
UPDATE stripe.stripe_authorization_event_log
|
||||
SET approved = NOT approved
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM stripe.stripe_authorization_event_log
|
||||
TABLESAMPLE SYSTEM (0.0002)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 2000 rows or 301 MB in the transaction table
|
||||
-- takes about 90 seconds
|
||||
UPDATE transaction.transaction
|
||||
SET is_last = NOT is_last
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM transaction.transaction
|
||||
TABLESAMPLE SYSTEM (0.0002)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 2500 rows or 1 MB in the values table
|
||||
-- takes about 3 minutes
|
||||
UPDATE workflows.values
|
||||
SET bool_value = NOT bool_value
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM workflows.values
|
||||
TABLESAMPLE SYSTEM (0.0002)
|
||||
) AND bool_value IS NOT NULL;
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 10000 rows or 2 MB in the vertices table
|
||||
-- takes about 1 minute
|
||||
UPDATE workflows.vertices
|
||||
SET has_been_visited = NOT has_been_visited
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM workflows.vertices
|
||||
TABLESAMPLE SYSTEM (0.0002)
|
||||
);
|
||||
@@ -146,8 +146,6 @@ def run_benchmark(env: NeonEnv, pg_bin: PgBin, record, duration_secs: int):
|
||||
ps_http.base_url,
|
||||
"--page-service-connstring",
|
||||
env.pageserver.connstr(password=None),
|
||||
"--gzip-probability",
|
||||
"1",
|
||||
"--runtime",
|
||||
f"{duration_secs}s",
|
||||
# don't specify the targets explicitly, let pagebench auto-discover them
|
||||
|
||||
@@ -31,7 +31,9 @@ def get_custom_scripts(
|
||||
return rv
|
||||
|
||||
|
||||
def run_test_pgbench(env: PgCompare, custom_scripts: str, duration: int):
|
||||
def run_test_pgbench(
|
||||
env: PgCompare, custom_scripts: str, duration: int, clients: int = 500, jobs: int = 100
|
||||
):
|
||||
password = env.pg.default_options.get("password", None)
|
||||
options = env.pg.default_options.get("options", "")
|
||||
# drop password from the connection string by passing password=None and set password separately
|
||||
@@ -46,8 +48,8 @@ def run_test_pgbench(env: PgCompare, custom_scripts: str, duration: int):
|
||||
"-n", # no explicit vacuum before the test - we want to rely on auto-vacuum
|
||||
"-M",
|
||||
"prepared",
|
||||
"--client=500",
|
||||
"--jobs=100",
|
||||
f"--client={clients}",
|
||||
f"--jobs={jobs}",
|
||||
f"-T{duration}",
|
||||
"-P60", # progress every minute
|
||||
"--progress-timestamp",
|
||||
@@ -164,6 +166,12 @@ def test_perf_oltp_large_tenant_pgbench(
|
||||
run_test_pgbench(remote_compare, custom_scripts, duration)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("duration", get_durations_matrix())
|
||||
@pytest.mark.remote_cluster
|
||||
def test_perf_oltp_large_tenant_growth(remote_compare: PgCompare, duration: int):
|
||||
run_test_pgbench(remote_compare, " ".join(get_custom_scripts()), duration, 35, 35)
|
||||
|
||||
|
||||
@pytest.mark.remote_cluster
|
||||
def test_perf_oltp_large_tenant_maintenance(remote_compare: PgCompare):
|
||||
# run analyze, vacuum, re-index after the test and measure and report its duration
|
||||
|
||||
@@ -45,6 +45,8 @@ class NeonEndpoint:
|
||||
if self.branch.connect_env:
|
||||
self.connect_env = self.branch.connect_env.copy()
|
||||
self.connect_env["PGHOST"] = self.host
|
||||
if self.type == "read_only":
|
||||
self.project.read_only_endpoints_total += 1
|
||||
|
||||
def delete(self):
|
||||
self.project.delete_endpoint(self.id)
|
||||
@@ -228,8 +230,13 @@ class NeonProject:
|
||||
self.benchmarks: dict[str, subprocess.Popen[Any]] = {}
|
||||
self.restore_num: int = 0
|
||||
self.restart_pgbench_on_console_errors: bool = False
|
||||
self.limits: dict[str, Any] = self.get_limits()["limits"]
|
||||
self.read_only_endpoints_total: int = 0
|
||||
|
||||
def delete(self):
|
||||
def get_limits(self) -> dict[str, Any]:
|
||||
return self.neon_api.get_project_limits(self.id)
|
||||
|
||||
def delete(self) -> None:
|
||||
self.neon_api.delete_project(self.id)
|
||||
|
||||
def create_branch(self, parent_id: str | None = None) -> NeonBranch | None:
|
||||
@@ -282,6 +289,7 @@ class NeonProject:
|
||||
self.neon_api.delete_endpoint(self.id, endpoint_id)
|
||||
self.endpoints[endpoint_id].branch.endpoints.pop(endpoint_id)
|
||||
self.endpoints.pop(endpoint_id)
|
||||
self.read_only_endpoints_total -= 1
|
||||
self.wait()
|
||||
|
||||
def start_benchmark(self, target: str, clients: int = 10) -> subprocess.Popen[Any]:
|
||||
@@ -369,49 +377,64 @@ def setup_class(
|
||||
print(f"::warning::Retried on 524 error {neon_api.retries524} times")
|
||||
if neon_api.retries4xx > 0:
|
||||
print(f"::warning::Retried on 4xx error {neon_api.retries4xx} times")
|
||||
log.info("Removing the project")
|
||||
log.info("Removing the project %s", project.id)
|
||||
project.delete()
|
||||
|
||||
|
||||
def do_action(project: NeonProject, action: str) -> None:
|
||||
def do_action(project: NeonProject, action: str) -> bool:
|
||||
"""
|
||||
Runs the action
|
||||
"""
|
||||
log.info("Action: %s", action)
|
||||
if action == "new_branch":
|
||||
log.info("Trying to create a new branch")
|
||||
if 0 <= project.limits["max_branches"] <= len(project.branches):
|
||||
log.info(
|
||||
"Maximum branch limit exceeded (%s of %s)",
|
||||
len(project.branches),
|
||||
project.limits["max_branches"],
|
||||
)
|
||||
return False
|
||||
parent = project.branches[
|
||||
random.choice(list(set(project.branches.keys()) - project.reset_branches))
|
||||
]
|
||||
log.info("Parent: %s", parent)
|
||||
child = parent.create_child_branch()
|
||||
if child is None:
|
||||
return
|
||||
return False
|
||||
log.info("Created branch %s", child)
|
||||
child.start_benchmark()
|
||||
elif action == "delete_branch":
|
||||
if project.leaf_branches:
|
||||
target = random.choice(list(project.leaf_branches.values()))
|
||||
target: NeonBranch = random.choice(list(project.leaf_branches.values()))
|
||||
log.info("Trying to delete branch %s", target)
|
||||
target.delete()
|
||||
else:
|
||||
log.info("Leaf branches not found, skipping")
|
||||
return False
|
||||
elif action == "new_ro_endpoint":
|
||||
if 0 <= project.limits["max_read_only_endpoints"] <= project.read_only_endpoints_total:
|
||||
log.info(
|
||||
"Maximum read only endpoint limit exceeded (%s of %s)",
|
||||
project.read_only_endpoints_total,
|
||||
project.limits["max_read_only_endpoints"],
|
||||
)
|
||||
return False
|
||||
ep = random.choice(
|
||||
[br for br in project.branches.values() if br.id not in project.reset_branches]
|
||||
).create_ro_endpoint()
|
||||
log.info("Created the RO endpoint with id %s branch: %s", ep.id, ep.branch.id)
|
||||
ep.start_benchmark()
|
||||
elif action == "delete_ro_endpoint":
|
||||
if project.read_only_endpoints_total == 0:
|
||||
log.info("no read_only endpoints present, skipping")
|
||||
return False
|
||||
ro_endpoints: list[NeonEndpoint] = [
|
||||
endpoint for endpoint in project.endpoints.values() if endpoint.type == "read_only"
|
||||
]
|
||||
if ro_endpoints:
|
||||
target_ep: NeonEndpoint = random.choice(ro_endpoints)
|
||||
target_ep.delete()
|
||||
log.info("endpoint %s deleted", target_ep.id)
|
||||
else:
|
||||
log.info("no read_only endpoints present, skipping")
|
||||
target_ep: NeonEndpoint = random.choice(ro_endpoints)
|
||||
target_ep.delete()
|
||||
log.info("endpoint %s deleted", target_ep.id)
|
||||
elif action == "restore_random_time":
|
||||
if project.leaf_branches:
|
||||
br: NeonBranch = random.choice(list(project.leaf_branches.values()))
|
||||
@@ -419,8 +442,10 @@ def do_action(project: NeonProject, action: str) -> None:
|
||||
br.restore_random_time()
|
||||
else:
|
||||
log.info("No leaf branches found")
|
||||
return False
|
||||
else:
|
||||
raise ValueError(f"The action {action} is unknown")
|
||||
return True
|
||||
|
||||
|
||||
@pytest.mark.timeout(7200)
|
||||
@@ -457,8 +482,9 @@ def test_api_random(
|
||||
pg_bin.run(["pgbench", "-i", "-I", "dtGvp", "-s100"], env=project.main_branch.connect_env)
|
||||
for _ in range(num_operations):
|
||||
log.info("Starting action #%s", _ + 1)
|
||||
do_action(
|
||||
while not do_action(
|
||||
project, random.choices([a[0] for a in ACTIONS], weights=[w[1] for w in ACTIONS])[0]
|
||||
)
|
||||
):
|
||||
log.info("Retrying...")
|
||||
project.check_all_benchmarks()
|
||||
assert True
|
||||
|
||||
@@ -184,7 +184,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
|
||||
"timeline_offloading": False,
|
||||
"rel_size_v2_enabled": True,
|
||||
"relsize_snapshot_cache_capacity": 10000,
|
||||
"gc_compaction_enabled": True,
|
||||
"gc_compaction_enabled": False,
|
||||
"gc_compaction_verification": False,
|
||||
"gc_compaction_initial_threshold_kb": 1024000,
|
||||
"gc_compaction_ratio_percent": 200,
|
||||
|
||||
@@ -26,6 +26,10 @@ def test_basebackup_cache(neon_env_builder: NeonEnvBuilder):
|
||||
ps = env.pageserver
|
||||
ps_http = ps.http_client()
|
||||
|
||||
storcon_managed_timelines = (env.storage_controller_config or {}).get(
|
||||
"timelines_onto_safekeepers", False
|
||||
)
|
||||
|
||||
# 1. Check that we always hit the cache after compute restart.
|
||||
for i in range(3):
|
||||
ep.start()
|
||||
@@ -33,15 +37,26 @@ def test_basebackup_cache(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
def check_metrics(i=i):
|
||||
metrics = ps_http.get_metrics()
|
||||
# Never miss.
|
||||
# The first time compute_ctl sends `get_basebackup` with lsn=None, we do not cache such requests.
|
||||
# All other requests should be a hit
|
||||
assert (
|
||||
metrics.query_one(
|
||||
"pageserver_basebackup_cache_read_total", {"result": "miss"}
|
||||
).value
|
||||
== 0
|
||||
)
|
||||
if storcon_managed_timelines:
|
||||
# We do not cache the initial basebackup yet,
|
||||
# so the first compute startup should be a miss.
|
||||
assert (
|
||||
metrics.query_one(
|
||||
"pageserver_basebackup_cache_read_total", {"result": "miss"}
|
||||
).value
|
||||
== 1
|
||||
)
|
||||
else:
|
||||
# If the timeline is not initialized on safekeeprs,
|
||||
# the compute_ctl sends `get_basebackup` with lsn=None for the first startup.
|
||||
# We do not use cache for such requests, so it's niether a hit nor a miss.
|
||||
assert (
|
||||
metrics.query_one(
|
||||
"pageserver_basebackup_cache_read_total", {"result": "miss"}
|
||||
).value
|
||||
== 0
|
||||
)
|
||||
|
||||
# All but the first requests are hits.
|
||||
assert (
|
||||
metrics.query_one("pageserver_basebackup_cache_read_total", {"result": "hit"}).value
|
||||
@@ -54,6 +69,11 @@ def test_basebackup_cache(neon_env_builder: NeonEnvBuilder):
|
||||
).value
|
||||
== i + 1
|
||||
)
|
||||
# There should be only one basebackup file in the cache.
|
||||
assert metrics.query_one("pageserver_basebackup_cache_entries_total").value == 1
|
||||
# The size of one basebackup for new DB is ~20KB.
|
||||
size_bytes = metrics.query_one("pageserver_basebackup_cache_size_bytes").value
|
||||
assert 10 * 1024 <= size_bytes <= 100 * 1024
|
||||
|
||||
wait_until(check_metrics)
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ from fixtures.common_types import Lsn, TimelineId
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.pageserver.http import PageserverApiException
|
||||
from fixtures.pageserver.utils import wait_until_tenant_active
|
||||
from fixtures.safekeeper.http import MembershipConfiguration, TimelineCreateRequest
|
||||
from fixtures.utils import query_scalar
|
||||
from performance.test_perf_pgbench import get_scales_matrix
|
||||
from requests import RequestException
|
||||
@@ -164,6 +165,19 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE
|
||||
ps_http.configure_failpoints(("before-upload-index-pausable", "pause"))
|
||||
env.pageserver.tenant_create(env.initial_tenant)
|
||||
|
||||
sk = env.safekeepers[0]
|
||||
assert sk
|
||||
sk.http_client().timeline_create(
|
||||
TimelineCreateRequest(
|
||||
env.initial_tenant,
|
||||
env.initial_timeline,
|
||||
MembershipConfiguration(generation=1, members=[sk.safekeeper_id()], new_members=None),
|
||||
int(env.pg_version) * 10000,
|
||||
Lsn(0),
|
||||
None,
|
||||
)
|
||||
)
|
||||
|
||||
initial_branch = "initial_branch"
|
||||
|
||||
def start_creating_timeline():
|
||||
|
||||
@@ -18,6 +18,8 @@ from fixtures.neon_fixtures import (
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
PgBin,
|
||||
Safekeeper,
|
||||
StorageControllerApiException,
|
||||
flush_ep_to_pageserver,
|
||||
)
|
||||
from fixtures.pageserver.http import PageserverApiException
|
||||
@@ -26,6 +28,7 @@ from fixtures.pageserver.utils import (
|
||||
)
|
||||
from fixtures.pg_version import PgVersion
|
||||
from fixtures.remote_storage import RemoteStorageKind, S3Storage, s3_storage
|
||||
from fixtures.safekeeper.http import MembershipConfiguration
|
||||
from fixtures.workload import Workload
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -125,6 +128,12 @@ check_ondisk_data_compatibility_if_enabled = pytest.mark.skipif(
|
||||
reason="CHECK_ONDISK_DATA_COMPATIBILITY env is not set",
|
||||
)
|
||||
|
||||
skip_old_debug_versions = pytest.mark.skipif(
|
||||
os.getenv("BUILD_TYPE", "debug") == "debug"
|
||||
and os.getenv("DEFAULT_PG_VERSION") in [PgVersion.V14, PgVersion.V15, PgVersion.V16],
|
||||
reason="compatibility snaphots not available for old versions of debug builds",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.xdist_group("compatibility")
|
||||
@pytest.mark.order(before="test_forward_compatibility")
|
||||
@@ -195,6 +204,7 @@ ingest_lag_log_line = ".*ingesting record with timestamp lagging more than wait_
|
||||
|
||||
|
||||
@check_ondisk_data_compatibility_if_enabled
|
||||
@skip_old_debug_versions
|
||||
@pytest.mark.xdist_group("compatibility")
|
||||
@pytest.mark.order(after="test_create_snapshot")
|
||||
def test_backward_compatibility(
|
||||
@@ -222,6 +232,7 @@ def test_backward_compatibility(
|
||||
|
||||
|
||||
@check_ondisk_data_compatibility_if_enabled
|
||||
@skip_old_debug_versions
|
||||
@pytest.mark.xdist_group("compatibility")
|
||||
@pytest.mark.order(after="test_create_snapshot")
|
||||
def test_forward_compatibility(
|
||||
@@ -291,7 +302,20 @@ def test_forward_compatibility(
|
||||
def check_neon_works(env: NeonEnv, test_output_dir: Path, sql_dump_path: Path, repo_dir: Path):
|
||||
ep = env.endpoints.create("main")
|
||||
ep_env = {"LD_LIBRARY_PATH": str(env.pg_distrib_dir / f"v{env.pg_version}/lib")}
|
||||
ep.start(env=ep_env)
|
||||
|
||||
# If the compatibility snapshot was created with --timelines-onto-safekeepers=false,
|
||||
# we should not pass safekeeper_generation to the endpoint because the compute
|
||||
# will not be able to start.
|
||||
# Zero generation is INVALID_GENERATION.
|
||||
generation = 0
|
||||
try:
|
||||
res = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
|
||||
generation = res["generation"]
|
||||
except StorageControllerApiException as e:
|
||||
if e.status_code != 404 or not re.search(r"Timeline .* not found", str(e)):
|
||||
raise e
|
||||
|
||||
ep.start(env=ep_env, safekeeper_generation=generation)
|
||||
|
||||
connstr = ep.connstr()
|
||||
|
||||
@@ -341,7 +365,7 @@ def check_neon_works(env: NeonEnv, test_output_dir: Path, sql_dump_path: Path, r
|
||||
)
|
||||
|
||||
# Timeline exists again: restart the endpoint
|
||||
ep.start(env=ep_env)
|
||||
ep.start(env=ep_env, safekeeper_generation=generation)
|
||||
|
||||
pg_bin.run_capture(
|
||||
["pg_dumpall", f"--dbname={connstr}", f"--file={test_output_dir / 'dump-from-wal.sql'}"]
|
||||
@@ -542,6 +566,24 @@ def test_historic_storage_formats(
|
||||
# All our artifacts should contain at least one timeline
|
||||
assert len(timelines) > 0
|
||||
|
||||
# Import tenant does not create the timeline on safekeepers,
|
||||
# because it is a debug handler and the timeline may have already been
|
||||
# created on some set of safekeepers.
|
||||
# Create the timeline on safekeepers manually.
|
||||
# TODO(diko): when we have the script/storcon handler to migrate
|
||||
# the timeline to storcon, we can replace this code with it.
|
||||
mconf = MembershipConfiguration(
|
||||
generation=1,
|
||||
members=Safekeeper.sks_to_safekeeper_ids([env.safekeepers[0]]),
|
||||
new_members=None,
|
||||
)
|
||||
members_sks = Safekeeper.mconf_sks(env, mconf)
|
||||
|
||||
for timeline in timelines:
|
||||
Safekeeper.create_timeline(
|
||||
dataset.tenant_id, timeline["timeline_id"], env.pageserver, mconf, members_sks
|
||||
)
|
||||
|
||||
# TODO: ensure that the snapshots we're importing contain a sensible variety of content, at the very
|
||||
# least they should include a mixture of deltas and image layers. Preferably they should also
|
||||
# contain some "exotic" stuff like aux files from logical replication.
|
||||
@@ -573,6 +615,7 @@ def test_historic_storage_formats(
|
||||
|
||||
|
||||
@check_ondisk_data_compatibility_if_enabled
|
||||
@skip_old_debug_versions
|
||||
@pytest.mark.xdist_group("compatibility")
|
||||
@pytest.mark.parametrize(
|
||||
**fixtures.utils.allpairs_versions(),
|
||||
|
||||
@@ -418,7 +418,7 @@ def test_sql_exporter_metrics_e2e(
|
||||
pg_user = conn_options["user"]
|
||||
pg_dbname = conn_options["dbname"]
|
||||
pg_application_name = f"sql_exporter{stem_suffix}"
|
||||
connstr = f"postgresql://{pg_user}@{pg_host}:{pg_port}/{pg_dbname}?sslmode=disable&application_name={pg_application_name}"
|
||||
connstr = f"postgresql://{pg_user}@{pg_host}:{pg_port}/{pg_dbname}?sslmode=disable&application_name={pg_application_name}&pgaudit.log=none"
|
||||
|
||||
def escape_go_filepath_match_characters(s: str) -> str:
|
||||
"""
|
||||
|
||||
@@ -9,6 +9,8 @@ from fixtures.utils import wait_until
|
||||
if TYPE_CHECKING:
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
def test_compute_reconfigure(neon_simple_env: NeonEnv):
|
||||
"""
|
||||
@@ -85,3 +87,57 @@ def test_compute_reconfigure(neon_simple_env: NeonEnv):
|
||||
samples = metrics.query_all("compute_ctl_up", {"build_tag": build_tag})
|
||||
assert len(samples) == 1
|
||||
assert samples[0].value == 1
|
||||
|
||||
|
||||
def test_compute_safekeeper_connstrings_duplicate(neon_simple_env: NeonEnv):
|
||||
"""
|
||||
Test that we catch duplicate entries in neon.safekeepers.
|
||||
"""
|
||||
env = neon_simple_env
|
||||
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
# grab the current value of neon.safekeepers
|
||||
sk_list = []
|
||||
with endpoint.cursor() as cursor:
|
||||
cursor.execute("SHOW neon.safekeepers;")
|
||||
row = cursor.fetchone()
|
||||
assert row is not None
|
||||
|
||||
log.info(f' initial neon.safekeepers: "{row}"')
|
||||
|
||||
# build a safekeepers list with a duplicate
|
||||
sk_list.append(row[0])
|
||||
sk_list.append(row[0])
|
||||
|
||||
safekeepers = ",".join(sk_list)
|
||||
log.info(f'reconfigure neon.safekeepers: "{safekeepers}"')
|
||||
|
||||
# introduce duplicate entry in neon.safekeepers, on purpose
|
||||
endpoint.respec_deep(
|
||||
**{
|
||||
"spec": {
|
||||
"skip_pg_catalog_updates": True,
|
||||
"cluster": {
|
||||
"settings": [
|
||||
{
|
||||
"name": "neon.safekeepers",
|
||||
"vartype": "string",
|
||||
"value": safekeepers,
|
||||
}
|
||||
]
|
||||
},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
endpoint.reconfigure()
|
||||
|
||||
# Check that in logs we see that it was actually reconfigured,
|
||||
# not restarted or something else.
|
||||
endpoint.log_contains("INFO request{method=POST uri=/configure")
|
||||
|
||||
except Exception as e:
|
||||
# we except a failure here
|
||||
log.info(f"RAISED: {e}" % e)
|
||||
|
||||
51
test_runner/regress/test_feature_flag.py
Normal file
51
test_runner/regress/test_feature_flag.py
Normal file
@@ -0,0 +1,51 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from fixtures.utils import run_only_on_default_postgres
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
|
||||
|
||||
@run_only_on_default_postgres("Pageserver-only test only needs to run on one version")
|
||||
def test_feature_flag(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
env.pageserver.http_client().force_override_feature_flag("test-feature-flag", "true")
|
||||
assert env.pageserver.http_client().evaluate_feature_flag_boolean(
|
||||
env.initial_tenant, "test-feature-flag"
|
||||
)["result"]["Ok"]
|
||||
assert (
|
||||
env.pageserver.http_client().evaluate_feature_flag_multivariate(
|
||||
env.initial_tenant, "test-feature-flag"
|
||||
)["result"]["Ok"]
|
||||
== "true"
|
||||
)
|
||||
|
||||
env.pageserver.http_client().force_override_feature_flag("test-feature-flag", "false")
|
||||
assert (
|
||||
env.pageserver.http_client().evaluate_feature_flag_boolean(
|
||||
env.initial_tenant, "test-feature-flag"
|
||||
)["result"]["Err"]
|
||||
== "No condition group is matched"
|
||||
)
|
||||
assert (
|
||||
env.pageserver.http_client().evaluate_feature_flag_multivariate(
|
||||
env.initial_tenant, "test-feature-flag"
|
||||
)["result"]["Ok"]
|
||||
== "false"
|
||||
)
|
||||
|
||||
env.pageserver.http_client().force_override_feature_flag("test-feature-flag", None)
|
||||
assert (
|
||||
"Err"
|
||||
in env.pageserver.http_client().evaluate_feature_flag_boolean(
|
||||
env.initial_tenant, "test-feature-flag"
|
||||
)["result"]
|
||||
)
|
||||
assert (
|
||||
"Err"
|
||||
in env.pageserver.http_client().evaluate_feature_flag_multivariate(
|
||||
env.initial_tenant, "test-feature-flag"
|
||||
)["result"]
|
||||
)
|
||||
@@ -87,6 +87,9 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
|
||||
|
||||
# Set up pageserver for import
|
||||
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
|
||||
neon_env_builder.storage_controller_config = {
|
||||
"timelines_onto_safekeepers": True,
|
||||
}
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.pageserver.tenant_create(tenant)
|
||||
|
||||
@@ -59,7 +59,7 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv, query: LfcQueryMethod):
|
||||
|
||||
pg_conn = endpoint.connect()
|
||||
pg_cur = pg_conn.cursor()
|
||||
pg_cur.execute("create extension neon version '1.6'")
|
||||
pg_cur.execute("create extension neon")
|
||||
pg_cur.execute("create database lfc")
|
||||
|
||||
lfc_conn = endpoint.connect(dbname="lfc")
|
||||
@@ -84,11 +84,8 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv, query: LfcQueryMethod):
|
||||
endpoint.stop()
|
||||
endpoint.start()
|
||||
|
||||
# wait until compute_ctl completes downgrade of extension to default version
|
||||
time.sleep(1)
|
||||
pg_conn = endpoint.connect()
|
||||
pg_cur = pg_conn.cursor()
|
||||
pg_cur.execute("alter extension neon update to '1.6'")
|
||||
|
||||
lfc_conn = endpoint.connect(dbname="lfc")
|
||||
lfc_cur = lfc_conn.cursor()
|
||||
@@ -144,7 +141,7 @@ def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, query: LfcQueryMet
|
||||
|
||||
pg_conn = endpoint.connect()
|
||||
pg_cur = pg_conn.cursor()
|
||||
pg_cur.execute("create extension neon version '1.6'")
|
||||
pg_cur.execute("create extension neon")
|
||||
pg_cur.execute("CREATE DATABASE lfc")
|
||||
|
||||
lfc_conn = endpoint.connect(dbname="lfc")
|
||||
@@ -188,7 +185,8 @@ def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, query: LfcQueryMet
|
||||
pg_cur.execute("select pg_reload_conf()")
|
||||
|
||||
if query is LfcQueryMethod.COMPUTE_CTL:
|
||||
http_client.prewarm_lfc()
|
||||
# Same thing as prewarm_lfc(), testing other method
|
||||
http_client.prewarm_lfc(endpoint.endpoint_id)
|
||||
else:
|
||||
pg_cur.execute("select prewarm_local_cache(%s)", (lfc_state,))
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ def test_neon_extension(neon_env_builder: NeonEnvBuilder):
|
||||
# IMPORTANT:
|
||||
# If the version has changed, the test should be updated.
|
||||
# Ensure that the default version is also updated in the neon.control file
|
||||
assert cur.fetchone() == ("1.5",)
|
||||
assert cur.fetchone() == ("1.6",)
|
||||
cur.execute("SELECT * from neon.NEON_STAT_FILE_CACHE")
|
||||
res = cur.fetchall()
|
||||
log.info(res)
|
||||
@@ -53,10 +53,10 @@ def test_neon_extension_compatibility(neon_env_builder: NeonEnvBuilder):
|
||||
# IMPORTANT:
|
||||
# If the version has changed, the test should be updated.
|
||||
# Ensure that the default version is also updated in the neon.control file
|
||||
assert cur.fetchone() == ("1.5",)
|
||||
assert cur.fetchone() == ("1.6",)
|
||||
cur.execute("SELECT * from neon.NEON_STAT_FILE_CACHE")
|
||||
all_versions = ["1.5", "1.4", "1.3", "1.2", "1.1", "1.0"]
|
||||
current_version = "1.5"
|
||||
all_versions = ["1.6", "1.5", "1.4", "1.3", "1.2", "1.1", "1.0"]
|
||||
current_version = "1.6"
|
||||
for idx, begin_version in enumerate(all_versions):
|
||||
for target_version in all_versions[idx + 1 :]:
|
||||
if current_version != begin_version:
|
||||
|
||||
@@ -64,6 +64,11 @@ def test_normal_work(
|
||||
"""
|
||||
|
||||
neon_env_builder.num_safekeepers = num_safekeepers
|
||||
|
||||
if safekeeper_proto_version == 2:
|
||||
neon_env_builder.storage_controller_config = {
|
||||
"timelines_onto_safekeepers": False,
|
||||
}
|
||||
env = neon_env_builder.init_start()
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ if TYPE_CHECKING:
|
||||
|
||||
# Test restarting page server, while safekeeper and compute node keep
|
||||
# running.
|
||||
def test_pageserver_restarts_under_worload(neon_simple_env: NeonEnv, pg_bin: PgBin):
|
||||
def test_pageserver_restarts_under_workload(neon_simple_env: NeonEnv, pg_bin: PgBin):
|
||||
env = neon_simple_env
|
||||
env.create_branch("test_pageserver_restarts")
|
||||
endpoint = env.endpoints.create_start("test_pageserver_restarts")
|
||||
@@ -28,7 +28,11 @@ def test_pageserver_restarts_under_worload(neon_simple_env: NeonEnv, pg_bin: PgB
|
||||
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", f"-s{scale}", connstr])
|
||||
pg_bin.run_capture(["pgbench", f"-T{n_restarts}", connstr])
|
||||
|
||||
thread = threading.Thread(target=run_pgbench, args=(endpoint.connstr(),), daemon=True)
|
||||
thread = threading.Thread(
|
||||
target=run_pgbench,
|
||||
args=(endpoint.connstr(options="-cstatement_timeout=360s"),),
|
||||
daemon=True,
|
||||
)
|
||||
thread.start()
|
||||
|
||||
for _ in range(n_restarts):
|
||||
|
||||
@@ -173,7 +173,11 @@ def test_pg_regress(
|
||||
(runpath / "testtablespace").mkdir(parents=True)
|
||||
|
||||
# Compute all the file locations that pg_regress will need.
|
||||
build_path = pg_distrib_dir / f"build/{env.pg_version.v_prefixed}/src/test/regress"
|
||||
#
|
||||
# XXX: We assume that the `build` directory is a sibling of the
|
||||
# pg_distrib_dir. That is the default when you check out the
|
||||
# repository; `build` and `pg_install` are created side by side.
|
||||
build_path = pg_distrib_dir / f"../build/{env.pg_version.v_prefixed}/src/test/regress"
|
||||
src_path = base_dir / f"vendor/postgres-{env.pg_version.v_prefixed}/src/test/regress"
|
||||
bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
|
||||
schedule = src_path / "parallel_schedule"
|
||||
@@ -250,7 +254,11 @@ def test_isolation(
|
||||
(runpath / "testtablespace").mkdir(parents=True)
|
||||
|
||||
# Compute all the file locations that pg_isolation_regress will need.
|
||||
build_path = pg_distrib_dir / f"build/{env.pg_version.v_prefixed}/src/test/isolation"
|
||||
#
|
||||
# XXX: We assume that the `build` directory is a sibling of the
|
||||
# pg_distrib_dir. That is the default when you check out the
|
||||
# repository; `build` and `pg_install` are created side by side.
|
||||
build_path = pg_distrib_dir / f"../build/{env.pg_version.v_prefixed}/src/test/isolation"
|
||||
src_path = base_dir / f"vendor/postgres-{env.pg_version.v_prefixed}/src/test/isolation"
|
||||
bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
|
||||
schedule = src_path / "isolation_schedule"
|
||||
@@ -306,13 +314,7 @@ def test_sql_regress(
|
||||
)
|
||||
|
||||
# Connect to postgres and create a database called "regression".
|
||||
endpoint = env.endpoints.create_start(
|
||||
"main",
|
||||
config_lines=[
|
||||
# Enable the test mode, so that we don't need to patch the test cases.
|
||||
"neon.regress_test_mode = true",
|
||||
],
|
||||
)
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
endpoint.safe_psql(f"CREATE DATABASE {DBNAME}")
|
||||
|
||||
# Create some local directories for pg_regress to run in.
|
||||
@@ -320,8 +322,11 @@ def test_sql_regress(
|
||||
(runpath / "testtablespace").mkdir(parents=True)
|
||||
|
||||
# Compute all the file locations that pg_regress will need.
|
||||
# This test runs neon specific tests
|
||||
build_path = pg_distrib_dir / f"build/v{env.pg_version}/src/test/regress"
|
||||
#
|
||||
# XXX: We assume that the `build` directory is a sibling of the
|
||||
# pg_distrib_dir. That is the default when you check out the
|
||||
# repository; `build` and `pg_install` are created side by side.
|
||||
build_path = pg_distrib_dir / f"../build/{env.pg_version.v_prefixed}/src/test/regress"
|
||||
src_path = base_dir / "test_runner/sql_regress"
|
||||
bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
|
||||
schedule = src_path / "parallel_schedule"
|
||||
|
||||
@@ -19,11 +19,15 @@ TABLE_NAME = "neon_control_plane.endpoints"
|
||||
async def test_proxy_psql_allowed_ips(static_proxy: NeonProxy, vanilla_pg: VanillaPostgres):
|
||||
# Shouldn't be able to connect to this project
|
||||
vanilla_pg.safe_psql(
|
||||
f"INSERT INTO {TABLE_NAME} (endpoint_id, allowed_ips) VALUES ('private-project', '8.8.8.8')"
|
||||
f"INSERT INTO {TABLE_NAME} (endpoint_id, allowed_ips) VALUES ('private-project', '8.8.8.8')",
|
||||
user="proxy",
|
||||
password="password",
|
||||
)
|
||||
# Should be able to connect to this project
|
||||
vanilla_pg.safe_psql(
|
||||
f"INSERT INTO {TABLE_NAME} (endpoint_id, allowed_ips) VALUES ('generic-project', '::1,127.0.0.1')"
|
||||
f"INSERT INTO {TABLE_NAME} (endpoint_id, allowed_ips) VALUES ('generic-project', '::1,127.0.0.1')",
|
||||
user="proxy",
|
||||
password="password",
|
||||
)
|
||||
|
||||
def check_cannot_connect(**kwargs):
|
||||
@@ -60,7 +64,9 @@ async def test_proxy_http_allowed_ips(static_proxy: NeonProxy, vanilla_pg: Vanil
|
||||
|
||||
# Shouldn't be able to connect to this project
|
||||
vanilla_pg.safe_psql(
|
||||
f"INSERT INTO {TABLE_NAME} (endpoint_id, allowed_ips) VALUES ('proxy', '8.8.8.8')"
|
||||
f"INSERT INTO {TABLE_NAME} (endpoint_id, allowed_ips) VALUES ('proxy', '8.8.8.8')",
|
||||
user="proxy",
|
||||
password="password",
|
||||
)
|
||||
|
||||
def query(status: int, query: str, *args):
|
||||
@@ -75,6 +81,8 @@ async def test_proxy_http_allowed_ips(static_proxy: NeonProxy, vanilla_pg: Vanil
|
||||
query(400, "select 1;") # ip address is not allowed
|
||||
# Should be able to connect to this project
|
||||
vanilla_pg.safe_psql(
|
||||
f"UPDATE {TABLE_NAME} SET allowed_ips = '8.8.8.8,127.0.0.1' WHERE endpoint_id = 'proxy'"
|
||||
f"UPDATE {TABLE_NAME} SET allowed_ips = '8.8.8.8,127.0.0.1' WHERE endpoint_id = 'proxy'",
|
||||
user="proxy",
|
||||
password="password",
|
||||
)
|
||||
query(200, "select 1;") # should work now
|
||||
|
||||
@@ -4,13 +4,25 @@ File with secondary->primary promotion testing.
|
||||
This far, only contains a test that we don't break and that the data is persisted.
|
||||
"""
|
||||
|
||||
from typing import cast
|
||||
|
||||
import psycopg2
|
||||
from fixtures.common_types import Lsn
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import Endpoint, NeonEnv, wait_replica_caughtup
|
||||
from fixtures.pg_version import PgVersion
|
||||
from pytest import raises
|
||||
|
||||
|
||||
def stop_and_check_lsn(ep: Endpoint, expected_lsn: Lsn | None):
|
||||
ep.stop(mode="immediate-terminate")
|
||||
lsn = ep.terminate_flush_lsn
|
||||
if expected_lsn is not None:
|
||||
assert lsn >= expected_lsn, f"{expected_lsn=} < {lsn=}"
|
||||
else:
|
||||
assert lsn == expected_lsn, f"{expected_lsn=} != {lsn=}"
|
||||
|
||||
|
||||
def test_replica_promotes(neon_simple_env: NeonEnv, pg_version: PgVersion):
|
||||
"""
|
||||
Test that a replica safely promotes, and can commit data updates which
|
||||
@@ -37,7 +49,9 @@ def test_replica_promotes(neon_simple_env: NeonEnv, pg_version: PgVersion):
|
||||
pg_current_wal_flush_lsn()
|
||||
"""
|
||||
)
|
||||
log.info(f"Primary: Current LSN after workload is {primary_cur.fetchone()}")
|
||||
lsn_triple = cast("tuple[str, str, str]", primary_cur.fetchone())
|
||||
log.info(f"Primary: Current LSN after workload is {lsn_triple}")
|
||||
expected_primary_lsn: Lsn = Lsn(lsn_triple[2])
|
||||
primary_cur.execute("show neon.safekeepers")
|
||||
safekeepers = primary_cur.fetchall()[0][0]
|
||||
|
||||
@@ -57,7 +71,7 @@ def test_replica_promotes(neon_simple_env: NeonEnv, pg_version: PgVersion):
|
||||
secondary_cur.execute("select count(*) from t")
|
||||
assert secondary_cur.fetchone() == (100,)
|
||||
|
||||
primary.stop_and_destroy(mode="immediate")
|
||||
stop_and_check_lsn(primary, expected_primary_lsn)
|
||||
|
||||
# Reconnect to the secondary to make sure we get a read-write connection
|
||||
promo_conn = secondary.connect()
|
||||
@@ -109,9 +123,10 @@ def test_replica_promotes(neon_simple_env: NeonEnv, pg_version: PgVersion):
|
||||
|
||||
# wait_for_last_flush_lsn(env, secondary, env.initial_tenant, env.initial_timeline)
|
||||
|
||||
secondary.stop_and_destroy()
|
||||
# secondaries don't sync safekeepers on finish so LSN will be None
|
||||
stop_and_check_lsn(secondary, None)
|
||||
|
||||
primary = env.endpoints.create_start(branch_name="main", endpoint_id="primary")
|
||||
primary = env.endpoints.create_start(branch_name="main", endpoint_id="primary2")
|
||||
|
||||
with primary.connect() as new_primary:
|
||||
new_primary_cur = new_primary.cursor()
|
||||
@@ -122,7 +137,9 @@ def test_replica_promotes(neon_simple_env: NeonEnv, pg_version: PgVersion):
|
||||
pg_current_wal_flush_lsn()
|
||||
"""
|
||||
)
|
||||
log.info(f"New primary: Boot LSN is {new_primary_cur.fetchone()}")
|
||||
lsn_triple = cast("tuple[str, str, str]", new_primary_cur.fetchone())
|
||||
expected_primary_lsn = Lsn(lsn_triple[2])
|
||||
log.info(f"New primary: Boot LSN is {lsn_triple}")
|
||||
|
||||
new_primary_cur.execute("select count(*) from t")
|
||||
assert new_primary_cur.fetchone() == (200,)
|
||||
@@ -130,4 +147,4 @@ def test_replica_promotes(neon_simple_env: NeonEnv, pg_version: PgVersion):
|
||||
new_primary_cur.execute("select count(*) from t")
|
||||
assert new_primary_cur.fetchone() == (300,)
|
||||
|
||||
primary.stop(mode="immediate")
|
||||
stop_and_check_lsn(primary, expected_primary_lsn)
|
||||
|
||||
@@ -74,7 +74,7 @@ def test_tenant_s3_restore(
|
||||
last_flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
|
||||
last_flush_lsns.append(last_flush_lsn)
|
||||
ps_http.timeline_checkpoint(tenant_id, timeline_id)
|
||||
wait_for_upload(ps_http, tenant_id, timeline_id, last_flush_lsn)
|
||||
wait_for_upload(ps_http, tenant_id, timeline_id, last_flush_lsn, timeout=60)
|
||||
log.info(f"{timeline} timeline {timeline_id} {last_flush_lsn=}")
|
||||
parent = timeline
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@ def test_safekeeper_delete_timeline(neon_env_builder: NeonEnvBuilder, auth_enabl
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*Timeline .* was not found in global map.*",
|
||||
".*Timeline .* has been deleted.*",
|
||||
".*Timeline .* was cancelled and cannot be used anymore.*",
|
||||
]
|
||||
)
|
||||
@@ -198,6 +199,7 @@ def test_safekeeper_delete_timeline_under_load(neon_env_builder: NeonEnvBuilder)
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*Timeline.*was cancelled.*",
|
||||
".*Timeline.*has been deleted.*",
|
||||
".*Timeline.*was not found.*",
|
||||
]
|
||||
)
|
||||
|
||||
@@ -1337,7 +1337,7 @@ def test_sharding_split_failures(
|
||||
# Create bystander tenants with various shard counts. They should not be affected by the aborted
|
||||
# splits. Regression test for https://github.com/neondatabase/cloud/issues/28589.
|
||||
bystanders = {} # id → shard_count
|
||||
for bystander_shard_count in [1, 2, 4, 8]:
|
||||
for bystander_shard_count in [1, 2, 4]:
|
||||
id, _ = env.create_tenant(shard_count=bystander_shard_count)
|
||||
bystanders[id] = bystander_shard_count
|
||||
|
||||
@@ -1358,6 +1358,8 @@ def test_sharding_split_failures(
|
||||
".*Reconcile error.*Cancelled.*",
|
||||
# While parent shard's client is stopped during split, flush loop updating LSNs will emit this warning
|
||||
".*Failed to schedule metadata upload after updating disk_consistent_lsn.*",
|
||||
# We didn't identify a secondary to remove.
|
||||
".*Keeping extra secondaries.*",
|
||||
]
|
||||
)
|
||||
|
||||
@@ -1388,51 +1390,36 @@ def test_sharding_split_failures(
|
||||
with pytest.raises(failure.expect_exception()):
|
||||
env.storage_controller.tenant_shard_split(tenant_id, shard_count=4)
|
||||
|
||||
def assert_shard_count(shard_count: int, exclude_ps_id: int | None = None) -> None:
|
||||
secondary_count = 0
|
||||
attached_count = 0
|
||||
log.info(f"Iterating over {len(env.pageservers)} pageservers to check shard count")
|
||||
for ps in env.pageservers:
|
||||
if exclude_ps_id is not None and ps.id == exclude_ps_id:
|
||||
continue
|
||||
|
||||
locations = ps.http_client().tenant_list_locations()["tenant_shards"]
|
||||
for loc in locations:
|
||||
tenant_shard_id = TenantShardId.parse(loc[0])
|
||||
if tenant_shard_id.tenant_id != tenant_id:
|
||||
continue # skip bystanders
|
||||
log.info(f"Shard {tenant_shard_id} seen on node {ps.id} in mode {loc[1]['mode']}")
|
||||
assert tenant_shard_id.shard_count == shard_count
|
||||
if loc[1]["mode"] == "Secondary":
|
||||
secondary_count += 1
|
||||
else:
|
||||
attached_count += 1
|
||||
assert secondary_count == shard_count
|
||||
assert attached_count == shard_count
|
||||
|
||||
# We expect that the overall operation will fail, but some split requests
|
||||
# will have succeeded: the net result should be to return to a clean state, including
|
||||
# detaching any child shards.
|
||||
def assert_rolled_back(exclude_ps_id=None) -> None:
|
||||
secondary_count = 0
|
||||
attached_count = 0
|
||||
for ps in env.pageservers:
|
||||
if exclude_ps_id is not None and ps.id == exclude_ps_id:
|
||||
continue
|
||||
|
||||
locations = ps.http_client().tenant_list_locations()["tenant_shards"]
|
||||
for loc in locations:
|
||||
tenant_shard_id = TenantShardId.parse(loc[0])
|
||||
if tenant_shard_id.tenant_id != tenant_id:
|
||||
continue # skip bystanders
|
||||
log.info(f"Shard {tenant_shard_id} seen on node {ps.id} in mode {loc[1]['mode']}")
|
||||
assert tenant_shard_id.shard_count == initial_shard_count
|
||||
if loc[1]["mode"] == "Secondary":
|
||||
secondary_count += 1
|
||||
else:
|
||||
attached_count += 1
|
||||
|
||||
assert secondary_count == initial_shard_count
|
||||
assert attached_count == initial_shard_count
|
||||
assert_shard_count(initial_shard_count, exclude_ps_id)
|
||||
|
||||
def assert_split_done(exclude_ps_id: int | None = None) -> None:
|
||||
secondary_count = 0
|
||||
attached_count = 0
|
||||
for ps in env.pageservers:
|
||||
if exclude_ps_id is not None and ps.id == exclude_ps_id:
|
||||
continue
|
||||
|
||||
locations = ps.http_client().tenant_list_locations()["tenant_shards"]
|
||||
for loc in locations:
|
||||
tenant_shard_id = TenantShardId.parse(loc[0])
|
||||
if tenant_shard_id.tenant_id != tenant_id:
|
||||
continue # skip bystanders
|
||||
log.info(f"Shard {tenant_shard_id} seen on node {ps.id} in mode {loc[1]['mode']}")
|
||||
assert tenant_shard_id.shard_count == split_shard_count
|
||||
if loc[1]["mode"] == "Secondary":
|
||||
secondary_count += 1
|
||||
else:
|
||||
attached_count += 1
|
||||
assert attached_count == split_shard_count
|
||||
assert secondary_count == split_shard_count
|
||||
assert_shard_count(split_shard_count, exclude_ps_id)
|
||||
|
||||
def finish_split():
|
||||
# Having failed+rolled back, we should be able to split again
|
||||
@@ -1468,6 +1455,7 @@ def test_sharding_split_failures(
|
||||
|
||||
# The split should appear to be rolled back from the point of view of all pageservers
|
||||
# apart from the one that is offline
|
||||
env.storage_controller.reconcile_until_idle(timeout_secs=60, max_interval=2)
|
||||
wait_until(lambda: assert_rolled_back(exclude_ps_id=failure.pageserver_id))
|
||||
|
||||
finish_split()
|
||||
@@ -1482,6 +1470,7 @@ def test_sharding_split_failures(
|
||||
log.info("Clearing failure...")
|
||||
failure.clear(env)
|
||||
|
||||
env.storage_controller.reconcile_until_idle(timeout_secs=60, max_interval=2)
|
||||
wait_until(assert_rolled_back)
|
||||
|
||||
# Having rolled back, the tenant should be working
|
||||
@@ -1836,3 +1825,90 @@ def test_sharding_gc(
|
||||
shard_gc_cutoff_lsn = Lsn(shard_index["metadata_bytes"]["latest_gc_cutoff_lsn"])
|
||||
log.info(f"Shard {shard_number} cutoff LSN: {shard_gc_cutoff_lsn}")
|
||||
assert shard_gc_cutoff_lsn == shard_0_gc_cutoff_lsn
|
||||
|
||||
|
||||
def test_split_ps_delete_old_shard_after_commit(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Check that PageServer only deletes old shards after the split is committed such that it doesn't
|
||||
have to download a lot of files during abort.
|
||||
"""
|
||||
DBNAME = "regression"
|
||||
|
||||
init_shard_count = 4
|
||||
neon_env_builder.num_pageservers = init_shard_count
|
||||
stripe_size = 32
|
||||
|
||||
env = neon_env_builder.init_start(
|
||||
initial_tenant_shard_count=init_shard_count, initial_tenant_shard_stripe_size=stripe_size
|
||||
)
|
||||
|
||||
env.storage_controller.allowed_errors.extend(
|
||||
[
|
||||
# All split failures log a warning when they enqueue the abort operation
|
||||
".*Enqueuing background abort.*",
|
||||
# Tolerate any error logs that mention a failpoint
|
||||
".*failpoint.*",
|
||||
]
|
||||
)
|
||||
|
||||
endpoint = env.endpoints.create("main")
|
||||
endpoint.respec(skip_pg_catalog_updates=False)
|
||||
endpoint.start()
|
||||
|
||||
# Write some initial data.
|
||||
endpoint.safe_psql(f"CREATE DATABASE {DBNAME}")
|
||||
endpoint.safe_psql("CREATE TABLE usertable ( YCSB_KEY INT, FIELD0 TEXT);")
|
||||
|
||||
for _ in range(1000):
|
||||
endpoint.safe_psql(
|
||||
"INSERT INTO usertable SELECT random(), repeat('a', 1000);", log_query=False
|
||||
)
|
||||
|
||||
# Record how many bytes we've downloaded before the split.
|
||||
def collect_downloaded_bytes() -> list[float | None]:
|
||||
downloaded_bytes = []
|
||||
for page_server in env.pageservers:
|
||||
metric = page_server.http_client().get_metric_value(
|
||||
"pageserver_remote_ondemand_downloaded_bytes_total"
|
||||
)
|
||||
downloaded_bytes.append(metric)
|
||||
return downloaded_bytes
|
||||
|
||||
downloaded_bytes_before = collect_downloaded_bytes()
|
||||
|
||||
# Attempt to split the tenant, but fail the split before it completes.
|
||||
env.storage_controller.configure_failpoints(("shard-split-pre-complete", "return(1)"))
|
||||
with pytest.raises(StorageControllerApiException):
|
||||
env.storage_controller.tenant_shard_split(env.initial_tenant, shard_count=16)
|
||||
|
||||
# Wait until split is aborted.
|
||||
def check_split_is_aborted():
|
||||
tenants = env.storage_controller.tenant_list()
|
||||
assert len(tenants) == 1
|
||||
shards = tenants[0]["shards"]
|
||||
assert len(shards) == 4
|
||||
for shard in shards:
|
||||
assert not shard["is_splitting"]
|
||||
assert not shard["is_reconciling"]
|
||||
|
||||
# Make sure all new shards have been deleted.
|
||||
valid_shards = 0
|
||||
for ps in env.pageservers:
|
||||
for tenant_dir in os.listdir(ps.workdir / "tenants"):
|
||||
try:
|
||||
tenant_shard_id = TenantShardId.parse(tenant_dir)
|
||||
valid_shards += 1
|
||||
assert tenant_shard_id.shard_count == 4
|
||||
except ValueError:
|
||||
log.info(f"{tenant_dir} is not valid tenant shard id")
|
||||
assert valid_shards >= 4
|
||||
|
||||
wait_until(check_split_is_aborted)
|
||||
|
||||
endpoint.safe_psql("SELECT count(*) from usertable;", log_query=False)
|
||||
|
||||
# Make sure we didn't download anything following the aborted split.
|
||||
downloaded_bytes_after = collect_downloaded_bytes()
|
||||
|
||||
assert downloaded_bytes_before == downloaded_bytes_after
|
||||
endpoint.stop_and_destroy()
|
||||
|
||||
@@ -88,6 +88,12 @@ def test_storage_controller_smoke(
|
||||
neon_env_builder.control_plane_hooks_api = compute_reconfigure_listener.control_plane_hooks_api
|
||||
env = neon_env_builder.init_configs()
|
||||
|
||||
# These bubble up from safekeepers
|
||||
for ps in env.pageservers:
|
||||
ps.allowed_errors.extend(
|
||||
[".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"]
|
||||
)
|
||||
|
||||
# Start services by hand so that we can skip a pageserver (this will start + register later)
|
||||
env.broker.start()
|
||||
env.storage_controller.start()
|
||||
@@ -2956,7 +2962,7 @@ def test_storage_controller_leadership_transfer_during_split(
|
||||
env.storage_controller.allowed_errors.extend(
|
||||
[".*Unexpected child shard count.*", ".*Enqueuing background abort.*"]
|
||||
)
|
||||
pause_failpoint = "shard-split-pre-complete"
|
||||
pause_failpoint = "shard-split-pre-complete-pause"
|
||||
env.storage_controller.configure_failpoints((pause_failpoint, "pause"))
|
||||
|
||||
split_fut = executor.submit(
|
||||
@@ -3003,7 +3009,7 @@ def test_storage_controller_leadership_transfer_during_split(
|
||||
env.storage_controller.request(
|
||||
"PUT",
|
||||
f"http://127.0.0.1:{storage_controller_1_port}/debug/v1/failpoints",
|
||||
json=[{"name": "shard-split-pre-complete", "actions": "off"}],
|
||||
json=[{"name": pause_failpoint, "actions": "off"}],
|
||||
headers=env.storage_controller.headers(TokenScope.ADMIN),
|
||||
)
|
||||
|
||||
@@ -3093,6 +3099,58 @@ def test_storage_controller_ps_restarted_during_drain(neon_env_builder: NeonEnvB
|
||||
wait_until(reconfigure_node_again)
|
||||
|
||||
|
||||
def test_ps_unavailable_after_delete(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_pageservers = 3
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
def assert_nodes_count(n: int):
|
||||
nodes = env.storage_controller.node_list()
|
||||
assert len(nodes) == n
|
||||
|
||||
# Nodes count must remain the same before deletion
|
||||
assert_nodes_count(3)
|
||||
|
||||
ps = env.pageservers[0]
|
||||
env.storage_controller.node_delete(ps.id)
|
||||
|
||||
# After deletion, the node count must be reduced
|
||||
assert_nodes_count(2)
|
||||
|
||||
# Running pageserver CLI init in a separate thread
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
||||
log.info("Restarting tombstoned pageserver...")
|
||||
ps.stop()
|
||||
ps_start_fut = executor.submit(lambda: ps.start(await_active=False))
|
||||
|
||||
# After deleted pageserver restart, the node count must remain the same
|
||||
assert_nodes_count(2)
|
||||
|
||||
tombstones = env.storage_controller.tombstone_list()
|
||||
assert len(tombstones) == 1 and tombstones[0]["id"] == ps.id
|
||||
|
||||
env.storage_controller.tombstone_delete(ps.id)
|
||||
|
||||
tombstones = env.storage_controller.tombstone_list()
|
||||
assert len(tombstones) == 0
|
||||
|
||||
# Wait for the pageserver start operation to complete.
|
||||
# If it fails with an exception, we try restarting the pageserver since the failure
|
||||
# may be due to the storage controller refusing to register the node.
|
||||
# However, if we get a TimeoutError that means the pageserver is completely hung,
|
||||
# which is an unexpected failure mode that we'll let propagate up.
|
||||
try:
|
||||
ps_start_fut.result(timeout=20)
|
||||
except TimeoutError:
|
||||
raise
|
||||
except Exception:
|
||||
log.info("Restarting deleted pageserver...")
|
||||
ps.restart()
|
||||
|
||||
# Finally, the node can be registered again after tombstone is deleted
|
||||
wait_until(lambda: assert_nodes_count(3))
|
||||
|
||||
|
||||
def test_storage_controller_timeline_crud_race(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
The storage controller is meant to handle the case where a timeline CRUD operation races
|
||||
@@ -3403,7 +3461,7 @@ def test_safekeeper_deployment_time_update(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
assert target.get_safekeeper(fake_id) is None
|
||||
|
||||
assert len(target.get_safekeepers()) == 0
|
||||
start_sks = target.get_safekeepers()
|
||||
|
||||
sk_0 = env.safekeepers[0]
|
||||
|
||||
@@ -3425,7 +3483,7 @@ def test_safekeeper_deployment_time_update(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
inserted = target.get_safekeeper(fake_id)
|
||||
assert inserted is not None
|
||||
assert target.get_safekeepers() == [inserted]
|
||||
assert target.get_safekeepers() == start_sks + [inserted]
|
||||
assert eq_safekeeper_records(body, inserted)
|
||||
|
||||
# error out if pk is changed (unexpected)
|
||||
@@ -3437,7 +3495,7 @@ def test_safekeeper_deployment_time_update(neon_env_builder: NeonEnvBuilder):
|
||||
assert exc.value.status_code == 400
|
||||
|
||||
inserted_again = target.get_safekeeper(fake_id)
|
||||
assert target.get_safekeepers() == [inserted_again]
|
||||
assert target.get_safekeepers() == start_sks + [inserted_again]
|
||||
assert inserted_again is not None
|
||||
assert eq_safekeeper_records(inserted, inserted_again)
|
||||
|
||||
@@ -3446,7 +3504,7 @@ def test_safekeeper_deployment_time_update(neon_env_builder: NeonEnvBuilder):
|
||||
body["version"] += 1
|
||||
target.on_safekeeper_deploy(fake_id, body)
|
||||
inserted_now = target.get_safekeeper(fake_id)
|
||||
assert target.get_safekeepers() == [inserted_now]
|
||||
assert target.get_safekeepers() == start_sks + [inserted_now]
|
||||
assert inserted_now is not None
|
||||
|
||||
assert eq_safekeeper_records(body, inserted_now)
|
||||
@@ -3455,7 +3513,7 @@ def test_safekeeper_deployment_time_update(neon_env_builder: NeonEnvBuilder):
|
||||
body["https_port"] = 123
|
||||
target.on_safekeeper_deploy(fake_id, body)
|
||||
inserted_now = target.get_safekeeper(fake_id)
|
||||
assert target.get_safekeepers() == [inserted_now]
|
||||
assert target.get_safekeepers() == start_sks + [inserted_now]
|
||||
assert inserted_now is not None
|
||||
assert eq_safekeeper_records(body, inserted_now)
|
||||
env.storage_controller.consistency_check()
|
||||
@@ -3464,7 +3522,7 @@ def test_safekeeper_deployment_time_update(neon_env_builder: NeonEnvBuilder):
|
||||
body["https_port"] = None
|
||||
target.on_safekeeper_deploy(fake_id, body)
|
||||
inserted_now = target.get_safekeeper(fake_id)
|
||||
assert target.get_safekeepers() == [inserted_now]
|
||||
assert target.get_safekeepers() == start_sks + [inserted_now]
|
||||
assert inserted_now is not None
|
||||
assert eq_safekeeper_records(body, inserted_now)
|
||||
env.storage_controller.consistency_check()
|
||||
@@ -3583,6 +3641,11 @@ def test_timeline_delete_mid_live_migration(neon_env_builder: NeonEnvBuilder, mi
|
||||
env = neon_env_builder.init_configs()
|
||||
env.start()
|
||||
|
||||
for ps in env.pageservers:
|
||||
ps.allowed_errors.extend(
|
||||
[".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"]
|
||||
)
|
||||
|
||||
tenant_id = TenantId.generate()
|
||||
timeline_id = TimelineId.generate()
|
||||
env.storage_controller.tenant_create(tenant_id, placement_policy={"Attached": 1})
|
||||
@@ -4373,6 +4436,53 @@ def test_storage_controller_graceful_migration(neon_env_builder: NeonEnvBuilder,
|
||||
assert initial_ps.http_client().tenant_list_locations()["tenant_shards"] == []
|
||||
|
||||
|
||||
def test_attached_0_graceful_migration(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_pageservers = 4
|
||||
neon_env_builder.num_azs = 2
|
||||
|
||||
neon_env_builder.storcon_kick_secondary_downloads = False
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# It is default, but we want to ensure that there are no secondary locations requested
|
||||
env.storage_controller.tenant_policy_update(env.initial_tenant, {"placement": {"Attached": 0}})
|
||||
env.storage_controller.reconcile_until_idle()
|
||||
|
||||
desc = env.storage_controller.tenant_describe(env.initial_tenant)["shards"][0]
|
||||
src_ps_id = desc["node_attached"]
|
||||
src_ps = env.get_pageserver(src_ps_id)
|
||||
src_az = desc["preferred_az_id"]
|
||||
|
||||
# There must be no secondary locations with Attached(0) placement policy
|
||||
assert len(desc["node_secondary"]) == 0
|
||||
|
||||
# Migrate tenant shard to the same AZ node
|
||||
dst_ps = [ps for ps in env.pageservers if ps.id != src_ps_id and ps.az_id == src_az][0]
|
||||
|
||||
env.storage_controller.tenant_shard_migrate(
|
||||
TenantShardId(env.initial_tenant, 0, 0),
|
||||
dst_ps.id,
|
||||
config=StorageControllerMigrationConfig(prewarm=True),
|
||||
)
|
||||
|
||||
def tenant_shard_migrated():
|
||||
src_locations = src_ps.http_client().tenant_list_locations()["tenant_shards"]
|
||||
assert len(src_locations) == 0
|
||||
log.info(f"Tenant shard migrated from {src_ps.id}")
|
||||
dst_locations = dst_ps.http_client().tenant_list_locations()["tenant_shards"]
|
||||
assert len(dst_locations) == 1
|
||||
assert dst_locations[0][1]["mode"] == "AttachedSingle"
|
||||
log.info(f"Tenant shard migrated to {dst_ps.id}")
|
||||
|
||||
# After all we expect that tenant shard exists only on dst node.
|
||||
# We wait so long because [`DEFAULT_HEATMAP_PERIOD`] and [`DEFAULT_DOWNLOAD_INTERVAL`]
|
||||
# are set to 60 seconds by default.
|
||||
#
|
||||
# TODO: we should consider making these configurable, so the test can run faster.
|
||||
wait_until(tenant_shard_migrated, timeout=180, interval=5, status_interval=10)
|
||||
log.info("Tenant shard migrated successfully")
|
||||
|
||||
|
||||
@run_only_on_default_postgres("this is like a 'unit test' against storcon db")
|
||||
def test_storage_controller_migrate_with_pageserver_restart(
|
||||
neon_env_builder: NeonEnvBuilder, make_httpserver
|
||||
|
||||
@@ -341,6 +341,11 @@ def test_scrubber_physical_gc_timeline_deletion(neon_env_builder: NeonEnvBuilder
|
||||
env = neon_env_builder.init_configs()
|
||||
env.start()
|
||||
|
||||
for ps in env.pageservers:
|
||||
ps.allowed_errors.extend(
|
||||
[".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"]
|
||||
)
|
||||
|
||||
tenant_id = TenantId.generate()
|
||||
timeline_id = TimelineId.generate()
|
||||
env.create_tenant(
|
||||
|
||||
@@ -430,6 +430,7 @@ def test_tenant_delete_stale_shards(neon_env_builder: NeonEnvBuilder, pg_bin: Pg
|
||||
workload.init()
|
||||
workload.write_rows(256)
|
||||
workload.validate()
|
||||
workload.stop()
|
||||
|
||||
assert_prefix_not_empty(
|
||||
neon_env_builder.pageserver_remote_storage,
|
||||
|
||||
@@ -12,7 +12,7 @@ from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
from fixtures.common_types import Lsn, TenantId, TimelineId
|
||||
from fixtures.common_types import Lsn, TenantId, TimelineArchivalState, TimelineId
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.metrics import (
|
||||
PAGESERVER_GLOBAL_METRICS,
|
||||
@@ -299,6 +299,65 @@ def test_pageserver_metrics_removed_after_detach(neon_env_builder: NeonEnvBuilde
|
||||
assert post_detach_samples == set()
|
||||
|
||||
|
||||
def test_pageserver_metrics_removed_after_offload(neon_env_builder: NeonEnvBuilder):
|
||||
"""Tests that when a timeline is offloaded, the tenant specific metrics are not left behind"""
|
||||
|
||||
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.MOCK_S3)
|
||||
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
tenant_1, _ = env.create_tenant()
|
||||
|
||||
timeline_1 = env.create_timeline("test_metrics_removed_after_offload_1", tenant_id=tenant_1)
|
||||
timeline_2 = env.create_timeline("test_metrics_removed_after_offload_2", tenant_id=tenant_1)
|
||||
|
||||
endpoint_tenant1 = env.endpoints.create_start(
|
||||
"test_metrics_removed_after_offload_1", tenant_id=tenant_1
|
||||
)
|
||||
endpoint_tenant2 = env.endpoints.create_start(
|
||||
"test_metrics_removed_after_offload_2", tenant_id=tenant_1
|
||||
)
|
||||
|
||||
for endpoint in [endpoint_tenant1, endpoint_tenant2]:
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("CREATE TABLE t(key int primary key, value text)")
|
||||
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
|
||||
cur.execute("SELECT sum(key) FROM t")
|
||||
assert cur.fetchone() == (5000050000,)
|
||||
endpoint.stop()
|
||||
|
||||
def get_ps_metric_samples_for_timeline(
|
||||
tenant_id: TenantId, timeline_id: TimelineId
|
||||
) -> list[Sample]:
|
||||
ps_metrics = env.pageserver.http_client().get_metrics()
|
||||
samples = []
|
||||
for metric_name in ps_metrics.metrics:
|
||||
for sample in ps_metrics.query_all(
|
||||
name=metric_name,
|
||||
filter={"tenant_id": str(tenant_id), "timeline_id": str(timeline_id)},
|
||||
):
|
||||
samples.append(sample)
|
||||
return samples
|
||||
|
||||
for timeline in [timeline_1, timeline_2]:
|
||||
pre_offload_samples = set(
|
||||
[x.name for x in get_ps_metric_samples_for_timeline(tenant_1, timeline)]
|
||||
)
|
||||
assert len(pre_offload_samples) > 0, f"expected at least one sample for {timeline}"
|
||||
env.pageserver.http_client().timeline_archival_config(
|
||||
tenant_1,
|
||||
timeline,
|
||||
state=TimelineArchivalState.ARCHIVED,
|
||||
)
|
||||
env.pageserver.http_client().timeline_offload(tenant_1, timeline)
|
||||
post_offload_samples = set(
|
||||
[x.name for x in get_ps_metric_samples_for_timeline(tenant_1, timeline)]
|
||||
)
|
||||
assert post_offload_samples == set()
|
||||
|
||||
|
||||
def test_pageserver_with_empty_tenants(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
|
||||
@@ -21,7 +21,10 @@ from fixtures.neon_fixtures import (
|
||||
last_flush_lsn_upload,
|
||||
wait_for_last_flush_lsn,
|
||||
)
|
||||
from fixtures.pageserver.http import HistoricLayerInfo, PageserverApiException
|
||||
from fixtures.pageserver.http import (
|
||||
HistoricLayerInfo,
|
||||
PageserverApiException,
|
||||
)
|
||||
from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_timeline_detail_404
|
||||
from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind
|
||||
from fixtures.utils import assert_pageserver_backups_equal, skip_in_debug_build, wait_until
|
||||
@@ -413,6 +416,7 @@ def test_ancestor_detach_behavior_v2(neon_env_builder: NeonEnvBuilder, snapshots
|
||||
"read_only": True,
|
||||
},
|
||||
)
|
||||
|
||||
sk = env.safekeepers[0]
|
||||
assert sk
|
||||
with pytest.raises(requests.exceptions.HTTPError, match="Not Found"):
|
||||
@@ -504,8 +508,15 @@ def test_ancestor_detach_behavior_v2(neon_env_builder: NeonEnvBuilder, snapshots
|
||||
assert len(lineage.get("original_ancestor", [])) == 0
|
||||
assert len(lineage.get("reparenting_history", [])) == 0
|
||||
|
||||
for name, _, _, rows, starts in expected_result:
|
||||
with env.endpoints.create_start(name, tenant_id=env.initial_tenant) as ep:
|
||||
for branch_name, queried_timeline, _, rows, starts in expected_result:
|
||||
details = client.timeline_detail(env.initial_tenant, queried_timeline)
|
||||
log.info(f"reading data from branch {branch_name}")
|
||||
# specifying the lsn makes the endpoint read-only and not connect to safekeepers
|
||||
with env.endpoints.create(
|
||||
branch_name,
|
||||
lsn=Lsn(details["last_record_lsn"]),
|
||||
) as ep:
|
||||
ep.start(safekeeper_generation=1)
|
||||
assert ep.safe_psql("SELECT count(*) FROM foo;")[0][0] == rows
|
||||
assert ep.safe_psql(f"SELECT count(*) FROM audit WHERE starts = {starts}")[0][0] == 1
|
||||
|
||||
@@ -1088,6 +1099,9 @@ def test_timeline_detach_ancestor_interrupted_by_deletion(
|
||||
|
||||
for ps in env.pageservers:
|
||||
ps.allowed_errors.extend(SHUTDOWN_ALLOWED_ERRORS)
|
||||
ps.allowed_errors.extend(
|
||||
[".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"]
|
||||
)
|
||||
|
||||
pageservers = dict((int(p.id), p) for p in env.pageservers)
|
||||
|
||||
@@ -1209,6 +1223,9 @@ def test_sharded_tad_interleaved_after_partial_success(neon_env_builder: NeonEnv
|
||||
|
||||
for ps in env.pageservers:
|
||||
ps.allowed_errors.extend(SHUTDOWN_ALLOWED_ERRORS)
|
||||
ps.allowed_errors.extend(
|
||||
[".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"]
|
||||
)
|
||||
|
||||
pageservers = dict((int(p.id), p) for p in env.pageservers)
|
||||
|
||||
|
||||
@@ -24,6 +24,10 @@ def test_gc_blocking_by_timeline(neon_env_builder: NeonEnvBuilder, sharded: bool
|
||||
initial_tenant_conf={"gc_period": "1s", "lsn_lease_length": "0s"},
|
||||
initial_tenant_shard_count=2 if sharded else None,
|
||||
)
|
||||
for ps in env.pageservers:
|
||||
ps.allowed_errors.extend(
|
||||
[".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"]
|
||||
)
|
||||
|
||||
if sharded:
|
||||
http = env.storage_controller.pageserver_api()
|
||||
|
||||
@@ -229,7 +229,7 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# Test timeline_list endpoint.
|
||||
http_cli = env.safekeepers[0].http_client()
|
||||
assert len(http_cli.timeline_list()) == 3
|
||||
assert len(http_cli.timeline_list()) == 4
|
||||
|
||||
|
||||
# Check that dead minority doesn't prevent the commits: execute insert n_inserts
|
||||
@@ -433,6 +433,7 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder):
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*Timeline .* was not found in global map.*",
|
||||
".*Timeline .* has been deleted.*",
|
||||
".*Timeline .* was cancelled and cannot be used anymore.*",
|
||||
]
|
||||
)
|
||||
@@ -739,8 +740,8 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.create_branch("test_timeline_status")
|
||||
endpoint = env.endpoints.create_start("test_timeline_status")
|
||||
timeline_id = env.initial_timeline
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
wa = env.safekeepers[0]
|
||||
|
||||
@@ -1291,6 +1292,12 @@ def test_lagging_sk(neon_env_builder: NeonEnvBuilder):
|
||||
# it works without compute at all.
|
||||
def test_peer_recovery(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
|
||||
# timelines should be created the old way
|
||||
neon_env_builder.storage_controller_config = {
|
||||
"timelines_onto_safekeepers": False,
|
||||
}
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
@@ -1532,6 +1539,11 @@ def test_safekeeper_without_pageserver(
|
||||
|
||||
|
||||
def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
|
||||
# timelines should be created the old way manually until we have migration support
|
||||
neon_env_builder.storage_controller_config = {
|
||||
"timelines_onto_safekeepers": False,
|
||||
}
|
||||
|
||||
def execute_payload(endpoint: Endpoint):
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
@@ -1661,6 +1673,15 @@ def test_pull_timeline(neon_env_builder: NeonEnvBuilder, live_sk_change: bool):
|
||||
res = env.safekeepers[3].pull_timeline(
|
||||
[env.safekeepers[0], env.safekeepers[2]], tenant_id, timeline_id
|
||||
)
|
||||
sk_id_1 = env.safekeepers[0].safekeeper_id()
|
||||
sk_id_3 = env.safekeepers[2].safekeeper_id()
|
||||
sk_id_4 = env.safekeepers[3].safekeeper_id()
|
||||
new_conf = MembershipConfiguration(
|
||||
generation=2, members=[sk_id_1, sk_id_3, sk_id_4], new_members=None
|
||||
)
|
||||
for i in [0, 2, 3]:
|
||||
env.safekeepers[i].http_client().membership_switch(tenant_id, timeline_id, new_conf)
|
||||
|
||||
log.info("Finished pulling timeline")
|
||||
log.info(res)
|
||||
|
||||
@@ -1705,13 +1726,15 @@ def test_pull_timeline_gc(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
neon_env_builder.enable_safekeeper_remote_storage(default_remote_storage())
|
||||
env = neon_env_builder.init_start()
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
|
||||
(src_sk, dst_sk) = (env.safekeepers[0], env.safekeepers[2])
|
||||
|
||||
dst_sk.stop()
|
||||
|
||||
[tenant_id, timeline_id] = env.create_tenant()
|
||||
|
||||
log.info("use only first 2 safekeepers, 3rd will be seeded")
|
||||
endpoint = env.endpoints.create("main")
|
||||
endpoint = env.endpoints.create("main", tenant_id=tenant_id)
|
||||
endpoint.active_safekeepers = [1, 2]
|
||||
endpoint.start()
|
||||
endpoint.safe_psql("create table t(key int, value text)")
|
||||
@@ -1723,6 +1746,7 @@ def test_pull_timeline_gc(neon_env_builder: NeonEnvBuilder):
|
||||
src_http = src_sk.http_client()
|
||||
# run pull_timeline which will halt before downloading files
|
||||
src_http.configure_failpoints(("sk-snapshot-after-list-pausable", "pause"))
|
||||
dst_sk.start()
|
||||
pt_handle = PropagatingThread(
|
||||
target=dst_sk.pull_timeline, args=([src_sk], tenant_id, timeline_id)
|
||||
)
|
||||
@@ -1782,23 +1806,27 @@ def test_pull_timeline_term_change(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.enable_safekeeper_remote_storage(default_remote_storage())
|
||||
env = neon_env_builder.init_start()
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
|
||||
(src_sk, dst_sk) = (env.safekeepers[0], env.safekeepers[2])
|
||||
dst_sk.stop()
|
||||
|
||||
src_http = src_sk.http_client()
|
||||
src_http.configure_failpoints(("sk-snapshot-after-list-pausable", "pause"))
|
||||
|
||||
timeline_id = env.create_branch("pull_timeline_term_changes")
|
||||
|
||||
# run pull_timeline which will halt before downloading files
|
||||
log.info("use only first 2 safekeepers, 3rd will be seeded")
|
||||
ep = env.endpoints.create("main")
|
||||
ep = env.endpoints.create("pull_timeline_term_changes")
|
||||
ep.active_safekeepers = [1, 2]
|
||||
ep.start()
|
||||
ep.safe_psql("create table t(key int, value text)")
|
||||
ep.safe_psql("insert into t select generate_series(1, 1000), 'pear'")
|
||||
|
||||
src_http = src_sk.http_client()
|
||||
# run pull_timeline which will halt before downloading files
|
||||
src_http.configure_failpoints(("sk-snapshot-after-list-pausable", "pause"))
|
||||
pt_handle = PropagatingThread(
|
||||
target=dst_sk.pull_timeline, args=([src_sk], tenant_id, timeline_id)
|
||||
)
|
||||
dst_sk.start()
|
||||
pt_handle.start()
|
||||
src_sk.wait_until_paused("sk-snapshot-after-list-pausable")
|
||||
|
||||
@@ -1807,7 +1835,7 @@ def test_pull_timeline_term_change(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# restart compute to bump term
|
||||
ep.stop()
|
||||
ep = env.endpoints.create("main")
|
||||
ep = env.endpoints.create("pull_timeline_term_changes")
|
||||
ep.active_safekeepers = [1, 2]
|
||||
ep.start()
|
||||
ep.safe_psql("insert into t select generate_series(1, 100), 'pear'")
|
||||
@@ -1929,12 +1957,18 @@ def test_pull_timeline_while_evicted(neon_env_builder: NeonEnvBuilder):
|
||||
@run_only_on_default_postgres("tests only safekeeper API")
|
||||
def test_membership_api(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_safekeepers = 1
|
||||
# timelines should be created the old way
|
||||
neon_env_builder.storage_controller_config = {
|
||||
"timelines_onto_safekeepers": False,
|
||||
}
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# These are expected after timeline deletion on safekeepers.
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*Timeline .* was not found in global map.*",
|
||||
".*Timeline .* has been deleted.*",
|
||||
".*Timeline .* was cancelled and cannot be used anymore.*",
|
||||
]
|
||||
)
|
||||
@@ -2008,6 +2042,12 @@ def test_explicit_timeline_creation(neon_env_builder: NeonEnvBuilder):
|
||||
created manually, later storcon will do that.
|
||||
"""
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
|
||||
# timelines should be created the old way manually
|
||||
neon_env_builder.storage_controller_config = {
|
||||
"timelines_onto_safekeepers": False,
|
||||
}
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
@@ -2063,7 +2103,7 @@ def test_idle_reconnections(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.create_branch("test_idle_reconnections")
|
||||
timeline_id = env.initial_timeline
|
||||
|
||||
def collect_stats() -> dict[str, float]:
|
||||
# we need to collect safekeeper_pg_queries_received_total metric from all safekeepers
|
||||
@@ -2094,7 +2134,7 @@ def test_idle_reconnections(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
collect_stats()
|
||||
|
||||
endpoint = env.endpoints.create_start("test_idle_reconnections")
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
# just write something to the timeline
|
||||
endpoint.safe_psql("create table t(i int)")
|
||||
collect_stats()
|
||||
|
||||
@@ -590,6 +590,13 @@ async def run_wal_truncation(env: NeonEnv, safekeeper_proto_version: int):
|
||||
@pytest.mark.parametrize("safekeeper_proto_version", [2, 3])
|
||||
def test_wal_truncation(neon_env_builder: NeonEnvBuilder, safekeeper_proto_version: int):
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
if safekeeper_proto_version == 2:
|
||||
# On the legacy protocol, we don't support generations, which are part of
|
||||
# `timelines_onto_safekeepers`
|
||||
neon_env_builder.storage_controller_config = {
|
||||
"timelines_onto_safekeepers": False,
|
||||
}
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
asyncio.run(run_wal_truncation(env, safekeeper_proto_version))
|
||||
@@ -713,6 +720,11 @@ async def run_quorum_sanity(env: NeonEnv):
|
||||
# we don't.
|
||||
def test_quorum_sanity(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_safekeepers = 4
|
||||
|
||||
# The test fails basically always on the new mode.
|
||||
neon_env_builder.storage_controller_config = {
|
||||
"timelines_onto_safekeepers": False,
|
||||
}
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
asyncio.run(run_quorum_sanity(env))
|
||||
|
||||
@@ -16,6 +16,13 @@ if TYPE_CHECKING:
|
||||
# Checks that pageserver's walreceiver state is printed in the logs during WAL wait timeout.
|
||||
# Ensures that walreceiver does not run without any data inserted and only starts after the insertion.
|
||||
def test_pageserver_lsn_wait_error_start(neon_env_builder: NeonEnvBuilder):
|
||||
# we assert below that the walreceiver is not active before data writes.
|
||||
# with manually created timelines, it is active.
|
||||
# FIXME: remove this test once we remove timelines_onto_safekeepers
|
||||
neon_env_builder.storage_controller_config = {
|
||||
"timelines_onto_safekeepers": False,
|
||||
}
|
||||
|
||||
# Trigger WAL wait timeout faster
|
||||
neon_env_builder.pageserver_config_override = "wait_lsn_timeout = '1s'"
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
90
test_runner/sql_regress/expected/neon-event-triggers.out
Normal file
90
test_runner/sql_regress/expected/neon-event-triggers.out
Normal file
@@ -0,0 +1,90 @@
|
||||
create or replace function admin_proc()
|
||||
returns event_trigger
|
||||
language plpgsql as
|
||||
$$
|
||||
begin
|
||||
raise notice 'admin event trigger is executed for %', current_user;
|
||||
end;
|
||||
$$;
|
||||
create role neon_superuser;
|
||||
create role neon_admin login inherit createrole createdb in role neon_superuser;
|
||||
grant create on schema public to neon_admin;
|
||||
create database neondb with owner neon_admin;
|
||||
grant all privileges on database neondb to neon_superuser;
|
||||
create role neon_user;
|
||||
grant create on schema public to neon_user;
|
||||
create event trigger on_ddl1 on ddl_command_end
|
||||
execute procedure admin_proc();
|
||||
set role neon_user;
|
||||
-- check that non-privileged user can not change neon.event_triggers
|
||||
set neon.event_triggers to false;
|
||||
ERROR: permission denied to set neon.event_triggers
|
||||
DETAIL: Only "neon_superuser" is allowed to set the GUC
|
||||
-- Non-privileged neon user should not be able to create event trigers
|
||||
create event trigger on_ddl2 on ddl_command_end
|
||||
execute procedure admin_proc();
|
||||
ERROR: permission denied to create event trigger "on_ddl2"
|
||||
HINT: Must be superuser to create an event trigger.
|
||||
set role neon_admin;
|
||||
-- neon_superuser should be able to create event trigers
|
||||
create or replace function neon_proc()
|
||||
returns event_trigger
|
||||
language plpgsql as
|
||||
$$
|
||||
begin
|
||||
raise notice 'neon event trigger is executed for %', current_user;
|
||||
end;
|
||||
$$;
|
||||
NOTICE: admin event trigger is executed for neon_admin
|
||||
create event trigger on_ddl2 on ddl_command_end
|
||||
execute procedure neon_proc();
|
||||
\c neondb neon_admin
|
||||
create or replace function neondb_proc()
|
||||
returns event_trigger
|
||||
language plpgsql as
|
||||
$$
|
||||
begin
|
||||
raise notice 'neondb event trigger is executed for %', current_user;
|
||||
end;
|
||||
$$;
|
||||
create or replace function neondb_secdef_proc()
|
||||
returns event_trigger
|
||||
language plpgsql
|
||||
SECURITY DEFINER
|
||||
as
|
||||
$$
|
||||
begin
|
||||
raise notice 'neondb secdef event trigger is executed for %', current_user;
|
||||
end;
|
||||
$$;
|
||||
-- neon_admin (neon_superuser member) should be able to create event triggers
|
||||
create event trigger on_ddl3 on ddl_command_end
|
||||
execute procedure neondb_proc();
|
||||
create event trigger on_ddl4 on ddl_command_end
|
||||
execute procedure neondb_secdef_proc();
|
||||
-- Check that event trigger is fired for neon_admin
|
||||
create table t1(x integer);
|
||||
NOTICE: neondb event trigger is executed for neon_admin
|
||||
NOTICE: neondb secdef event trigger is executed for neon_admin
|
||||
-- Check that event trigger can be skipped
|
||||
set neon.event_triggers to false;
|
||||
create table t2(x integer);
|
||||
WARNING: Skipping Event Trigger: neon.event_triggers is false
|
||||
WARNING: Skipping Event Trigger: neon.event_triggers is false
|
||||
\c regression cloud_admin
|
||||
-- Check that event triggers are not fired for superuser
|
||||
create table t3(x integer);
|
||||
NOTICE: admin event trigger is executed for cloud_admin
|
||||
WARNING: Skipping Event Trigger
|
||||
DETAIL: Event Trigger function "neon_proc" is owned by non-superuser role "neon_admin", and current_user "cloud_admin" is superuser
|
||||
\c neondb cloud_admin
|
||||
-- Check that user-defined event triggers are not fired for superuser
|
||||
create table t4(x integer);
|
||||
WARNING: Skipping Event Trigger
|
||||
DETAIL: Event Trigger function "neondb_proc" is owned by non-superuser role "neon_admin", and current_user "cloud_admin" is superuser
|
||||
WARNING: Skipping Event Trigger
|
||||
DETAIL: Event Trigger function "neondb_secdef_proc" is owned by non-superuser role "neon_admin", and current_user "cloud_admin" is superuser
|
||||
\c neondb neon_admin
|
||||
-- Check that neon_admin can drop event triggers
|
||||
drop event trigger on_ddl3;
|
||||
drop event trigger on_ddl4;
|
||||
@@ -9,3 +9,4 @@ test: neon-rel-truncate
|
||||
test: neon-clog
|
||||
test: neon-test-utils
|
||||
test: neon-vacuum-full
|
||||
test: neon-event-triggers
|
||||
|
||||
96
test_runner/sql_regress/sql/neon-event-triggers.sql
Normal file
96
test_runner/sql_regress/sql/neon-event-triggers.sql
Normal file
@@ -0,0 +1,96 @@
|
||||
create or replace function admin_proc()
|
||||
returns event_trigger
|
||||
language plpgsql as
|
||||
$$
|
||||
begin
|
||||
raise notice 'admin event trigger is executed for %', current_user;
|
||||
end;
|
||||
$$;
|
||||
|
||||
create role neon_superuser;
|
||||
create role neon_admin login inherit createrole createdb in role neon_superuser;
|
||||
grant create on schema public to neon_admin;
|
||||
create database neondb with owner neon_admin;
|
||||
grant all privileges on database neondb to neon_superuser;
|
||||
|
||||
create role neon_user;
|
||||
grant create on schema public to neon_user;
|
||||
|
||||
create event trigger on_ddl1 on ddl_command_end
|
||||
execute procedure admin_proc();
|
||||
|
||||
set role neon_user;
|
||||
|
||||
-- check that non-privileged user can not change neon.event_triggers
|
||||
set neon.event_triggers to false;
|
||||
|
||||
-- Non-privileged neon user should not be able to create event trigers
|
||||
create event trigger on_ddl2 on ddl_command_end
|
||||
execute procedure admin_proc();
|
||||
|
||||
set role neon_admin;
|
||||
|
||||
-- neon_superuser should be able to create event trigers
|
||||
create or replace function neon_proc()
|
||||
returns event_trigger
|
||||
language plpgsql as
|
||||
$$
|
||||
begin
|
||||
raise notice 'neon event trigger is executed for %', current_user;
|
||||
end;
|
||||
$$;
|
||||
|
||||
create event trigger on_ddl2 on ddl_command_end
|
||||
execute procedure neon_proc();
|
||||
|
||||
\c neondb neon_admin
|
||||
|
||||
create or replace function neondb_proc()
|
||||
returns event_trigger
|
||||
language plpgsql as
|
||||
$$
|
||||
begin
|
||||
raise notice 'neondb event trigger is executed for %', current_user;
|
||||
end;
|
||||
$$;
|
||||
|
||||
create or replace function neondb_secdef_proc()
|
||||
returns event_trigger
|
||||
language plpgsql
|
||||
SECURITY DEFINER
|
||||
as
|
||||
$$
|
||||
begin
|
||||
raise notice 'neondb secdef event trigger is executed for %', current_user;
|
||||
end;
|
||||
$$;
|
||||
|
||||
-- neon_admin (neon_superuser member) should be able to create event triggers
|
||||
create event trigger on_ddl3 on ddl_command_end
|
||||
execute procedure neondb_proc();
|
||||
|
||||
create event trigger on_ddl4 on ddl_command_end
|
||||
execute procedure neondb_secdef_proc();
|
||||
|
||||
-- Check that event trigger is fired for neon_admin
|
||||
create table t1(x integer);
|
||||
|
||||
-- Check that event trigger can be skipped
|
||||
set neon.event_triggers to false;
|
||||
create table t2(x integer);
|
||||
|
||||
\c regression cloud_admin
|
||||
|
||||
-- Check that event triggers are not fired for superuser
|
||||
create table t3(x integer);
|
||||
|
||||
\c neondb cloud_admin
|
||||
|
||||
-- Check that user-defined event triggers are not fired for superuser
|
||||
create table t4(x integer);
|
||||
|
||||
\c neondb neon_admin
|
||||
|
||||
-- Check that neon_admin can drop event triggers
|
||||
drop event trigger on_ddl3;
|
||||
drop event trigger on_ddl4;
|
||||
Reference in New Issue
Block a user