Merge branch 'main' into devin/1745492468-add-dev-flag-pr11517

This commit is contained in:
John Spray
2025-06-26 07:32:08 -07:00
committed by GitHub
425 changed files with 12213 additions and 5436 deletions

View File

@@ -24,7 +24,7 @@ The value to place in the `aud` claim.
@final
class ComputeClaimsScope(StrEnum):
ADMIN = "admin"
ADMIN = "compute_ctl:admin"
@final
@@ -69,15 +69,17 @@ class EndpointHttpClient(requests.Session):
json: dict[str, str] = res.json()
return json
def prewarm_lfc(self):
self.post(f"http://localhost:{self.external_port}/lfc/prewarm").raise_for_status()
def prewarm_lfc(self, from_endpoint_id: str | None = None):
url: str = f"http://localhost:{self.external_port}/lfc/prewarm"
params = {"from_endpoint": from_endpoint_id} if from_endpoint_id else dict()
self.post(url, params=params).raise_for_status()
def prewarmed():
json = self.prewarm_lfc_status()
status, err = json["status"], json.get("error")
assert status == "completed", f"{status}, error {err}"
wait_until(prewarmed)
wait_until(prewarmed, timeout=60)
def offload_lfc(self):
url = f"http://localhost:{self.external_port}/lfc/offload"

View File

@@ -129,6 +129,18 @@ class NeonAPI:
return cast("dict[str, Any]", resp.json())
def get_project_limits(self, project_id: str) -> dict[str, Any]:
resp = self.__request(
"GET",
f"/projects/{project_id}/limits",
headers={
"Accept": "application/json",
"Content-Type": "application/json",
},
)
return cast("dict[str, Any]", resp.json())
def delete_project(
self,
project_id: str,

View File

@@ -497,6 +497,7 @@ class NeonLocalCli(AbstractNeonCli):
tenant_id: TenantId,
pg_version: PgVersion,
endpoint_id: str | None = None,
grpc: bool | None = None,
hot_standby: bool = False,
lsn: Lsn | None = None,
pageserver_id: int | None = None,
@@ -521,6 +522,8 @@ class NeonLocalCli(AbstractNeonCli):
args.extend(["--external-http-port", str(external_http_port)])
if internal_http_port is not None:
args.extend(["--internal-http-port", str(internal_http_port)])
if grpc:
args.append("--grpc")
if endpoint_id is not None:
args.append(endpoint_id)
if hot_standby:
@@ -564,6 +567,7 @@ class NeonLocalCli(AbstractNeonCli):
basebackup_request_tries: int | None = None,
timeout: str | None = None,
env: dict[str, str] | None = None,
dev: bool = False,
) -> subprocess.CompletedProcess[str]:
args = [
"endpoint",
@@ -589,6 +593,8 @@ class NeonLocalCli(AbstractNeonCli):
args.extend(["--create-test-user"])
if timeout is not None:
args.extend(["--start-timeout", str(timeout)])
if dev:
args.extend(["--dev"])
res = self.raw_cli(args, extra_env_vars)
res.check_returncode()
@@ -617,7 +623,7 @@ class NeonLocalCli(AbstractNeonCli):
destroy=False,
check_return_code=True,
mode: str | None = None,
) -> subprocess.CompletedProcess[str]:
) -> tuple[Lsn | None, subprocess.CompletedProcess[str]]:
args = [
"endpoint",
"stop",
@@ -629,7 +635,11 @@ class NeonLocalCli(AbstractNeonCli):
if endpoint_id is not None:
args.append(endpoint_id)
return self.raw_cli(args, check_return_code=check_return_code)
proc = self.raw_cli(args, check_return_code=check_return_code)
log.debug(f"endpoint stop stdout: {proc.stdout}")
lsn_str = proc.stdout.split()[-1]
lsn: Lsn | None = None if lsn_str == "null" else Lsn(lsn_str)
return lsn, proc
def mappings_map_branch(
self, name: str, tenant_id: TenantId, timeline_id: TimelineId

View File

@@ -453,6 +453,7 @@ class NeonEnvBuilder:
pageserver_get_vectored_concurrent_io: str | None = None,
pageserver_tracing_config: PageserverTracingConfig | None = None,
pageserver_import_config: PageserverImportConfig | None = None,
storcon_kick_secondary_downloads: bool | None = None,
):
self.repo_dir = repo_dir
self.rust_log_override = rust_log_override
@@ -489,7 +490,9 @@ class NeonEnvBuilder:
self.config_init_force: str | None = None
self.top_output_dir = top_output_dir
self.control_plane_hooks_api: str | None = None
self.storage_controller_config: dict[Any, Any] | None = None
self.storage_controller_config: dict[Any, Any] | None = {
"timelines_onto_safekeepers": True,
}
# Flag to enable https listener in pageserver, generate local ssl certs,
# and force storage controller to use https for pageserver api.
@@ -512,6 +515,8 @@ class NeonEnvBuilder:
self.pageserver_tracing_config = pageserver_tracing_config
self.pageserver_import_config = pageserver_import_config
self.storcon_kick_secondary_downloads = storcon_kick_secondary_downloads
self.pageserver_default_tenant_config_compaction_algorithm: dict[str, Any] | None = (
pageserver_default_tenant_config_compaction_algorithm
)
@@ -1219,6 +1224,14 @@ class NeonEnv:
else:
cfg["storage_controller"] = {"use_local_compute_notifications": False}
if config.storcon_kick_secondary_downloads is not None:
# Configure whether storage controller should actively kick off secondary downloads
if "storage_controller" not in cfg:
cfg["storage_controller"] = {}
cfg["storage_controller"]["kick_secondary_downloads"] = (
config.storcon_kick_secondary_downloads
)
# Create config for pageserver
http_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
pg_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
@@ -1228,6 +1241,7 @@ class NeonEnv:
):
pageserver_port = PageserverPort(
pg=self.port_distributor.get_port(),
grpc=self.port_distributor.get_port(),
http=self.port_distributor.get_port(),
https=self.port_distributor.get_port() if config.use_https_pageserver_api else None,
)
@@ -1243,13 +1257,14 @@ class NeonEnv:
ps_cfg: dict[str, Any] = {
"id": ps_id,
"listen_pg_addr": f"localhost:{pageserver_port.pg}",
"listen_grpc_addr": f"localhost:{pageserver_port.grpc}",
"listen_http_addr": f"localhost:{pageserver_port.http}",
"listen_https_addr": f"localhost:{pageserver_port.https}"
if config.use_https_pageserver_api
else None,
"pg_auth_type": pg_auth_type,
"http_auth_type": http_auth_type,
"grpc_auth_type": grpc_auth_type,
"http_auth_type": http_auth_type,
"availability_zone": availability_zone,
# Disable pageserver disk syncs in tests: when running tests concurrently, this avoids
# the pageserver taking a long time to start up due to syncfs flushing other tests' data
@@ -1762,6 +1777,7 @@ def neon_env_builder(
@dataclass
class PageserverPort:
pg: int
grpc: int
http: int
https: int | None = None
@@ -2054,6 +2070,14 @@ class NeonStorageController(MetricsGetter, LogUtils):
headers=self.headers(TokenScope.ADMIN),
)
def tombstone_delete(self, node_id):
log.info(f"tombstone_delete({node_id})")
self.request(
"DELETE",
f"{self.api}/debug/v1/tombstone/{node_id}",
headers=self.headers(TokenScope.ADMIN),
)
def node_drain(self, node_id):
log.info(f"node_drain({node_id})")
self.request(
@@ -2110,6 +2134,14 @@ class NeonStorageController(MetricsGetter, LogUtils):
)
return response.json()
def tombstone_list(self):
response = self.request(
"GET",
f"{self.api}/debug/v1/tombstone",
headers=self.headers(TokenScope.ADMIN),
)
return response.json()
def tenant_shard_dump(self):
"""
Debug listing API: dumps the internal map of tenant shards
@@ -2207,6 +2239,17 @@ class NeonStorageController(MetricsGetter, LogUtils):
shards: list[dict[str, Any]] = body["shards"]
return shards
def timeline_locate(self, tenant_id: TenantId, timeline_id: TimelineId):
"""
:return: dict {"generation": int, "sk_set": [int], "new_sk_set": [int]}
"""
response = self.request(
"GET",
f"{self.api}/debug/v1/tenant/{tenant_id}/timeline/{timeline_id}/locate",
headers=self.headers(TokenScope.ADMIN),
)
return response.json()
def tenant_describe(self, tenant_id: TenantId):
"""
:return: list of {"shard_id": "", "node_id": int, "listen_pg_addr": str, "listen_pg_port": int, "listen_http_addr: str, "listen_http_port: int, preferred_az_id: str}
@@ -2333,6 +2376,7 @@ class NeonStorageController(MetricsGetter, LogUtils):
delay_max = max_interval
while n > 0:
n = self.reconcile_all()
if n == 0:
break
elif time.time() - start_at > timeout_secs:
@@ -4030,6 +4074,16 @@ def static_proxy(
"CREATE TABLE neon_control_plane.endpoints (endpoint_id VARCHAR(255) PRIMARY KEY, allowed_ips VARCHAR(255))"
)
vanilla_pg.stop()
vanilla_pg.edit_hba(
[
"local all all trust",
"host all all 127.0.0.1/32 scram-sha-256",
"host all all ::1/128 scram-sha-256",
]
)
vanilla_pg.start()
proxy_port = port_distributor.get_port()
mgmt_port = port_distributor.get_port()
http_port = port_distributor.get_port()
@@ -4155,6 +4209,8 @@ class Endpoint(PgProtocol, LogUtils):
self._running = threading.Semaphore(0)
self.__jwt: str | None = None
self.terminate_flush_lsn: Lsn | None = None
def http_client(self, retries: Retry | None = None) -> EndpointHttpClient:
assert self.__jwt is not None
return EndpointHttpClient(
@@ -4167,6 +4223,7 @@ class Endpoint(PgProtocol, LogUtils):
self,
branch_name: str,
endpoint_id: str | None = None,
grpc: bool | None = None,
hot_standby: bool = False,
lsn: Lsn | None = None,
config_lines: list[str] | None = None,
@@ -4191,6 +4248,7 @@ class Endpoint(PgProtocol, LogUtils):
endpoint_id=self.endpoint_id,
tenant_id=self.tenant_id,
lsn=lsn,
grpc=grpc,
hot_standby=hot_standby,
pg_port=self.pg_port,
external_http_port=self.external_http_port,
@@ -4457,9 +4515,10 @@ class Endpoint(PgProtocol, LogUtils):
running = self._running.acquire(blocking=False)
if running:
assert self.endpoint_id is not None
self.env.neon_cli.endpoint_stop(
lsn, _ = self.env.neon_cli.endpoint_stop(
self.endpoint_id, check_return_code=self.check_stop_result, mode=mode
)
self.terminate_flush_lsn = lsn
if sks_wait_walreceiver_gone is not None:
for sk in sks_wait_walreceiver_gone[0]:
@@ -4477,9 +4536,10 @@ class Endpoint(PgProtocol, LogUtils):
running = self._running.acquire(blocking=False)
if running:
assert self.endpoint_id is not None
self.env.neon_cli.endpoint_stop(
lsn, _ = self.env.neon_cli.endpoint_stop(
self.endpoint_id, True, check_return_code=self.check_stop_result, mode=mode
)
self.terminate_flush_lsn = lsn
self.endpoint_id = None
return self
@@ -4488,6 +4548,7 @@ class Endpoint(PgProtocol, LogUtils):
self,
branch_name: str,
endpoint_id: str | None = None,
grpc: bool | None = None,
hot_standby: bool = False,
lsn: Lsn | None = None,
config_lines: list[str] | None = None,
@@ -4505,6 +4566,7 @@ class Endpoint(PgProtocol, LogUtils):
branch_name=branch_name,
endpoint_id=endpoint_id,
config_lines=config_lines,
grpc=grpc,
hot_standby=hot_standby,
lsn=lsn,
pageserver_id=pageserver_id,
@@ -4592,6 +4654,7 @@ class EndpointFactory:
endpoint_id: str | None = None,
tenant_id: TenantId | None = None,
lsn: Lsn | None = None,
grpc: bool | None = None,
hot_standby: bool = False,
config_lines: list[str] | None = None,
remote_ext_base_url: str | None = None,
@@ -4611,6 +4674,7 @@ class EndpointFactory:
return ep.create_start(
branch_name=branch_name,
endpoint_id=endpoint_id,
grpc=grpc,
hot_standby=hot_standby,
config_lines=config_lines,
lsn=lsn,
@@ -4625,6 +4689,7 @@ class EndpointFactory:
endpoint_id: str | None = None,
tenant_id: TenantId | None = None,
lsn: Lsn | None = None,
grpc: bool | None = None,
hot_standby: bool = False,
config_lines: list[str] | None = None,
pageserver_id: int | None = None,
@@ -4647,6 +4712,7 @@ class EndpointFactory:
branch_name=branch_name,
endpoint_id=endpoint_id,
lsn=lsn,
grpc=grpc,
hot_standby=hot_standby,
config_lines=config_lines,
pageserver_id=pageserver_id,
@@ -4671,6 +4737,7 @@ class EndpointFactory:
self,
origin: Endpoint,
endpoint_id: str | None = None,
grpc: bool | None = None,
config_lines: list[str] | None = None,
) -> Endpoint:
branch_name = origin.branch_name
@@ -4682,6 +4749,7 @@ class EndpointFactory:
endpoint_id=endpoint_id,
tenant_id=origin.tenant_id,
lsn=None,
grpc=grpc,
hot_standby=True,
config_lines=config_lines,
)
@@ -4690,6 +4758,7 @@ class EndpointFactory:
self,
origin: Endpoint,
endpoint_id: str | None = None,
grpc: bool | None = None,
config_lines: list[str] | None = None,
) -> Endpoint:
branch_name = origin.branch_name
@@ -4701,6 +4770,7 @@ class EndpointFactory:
endpoint_id=endpoint_id,
tenant_id=origin.tenant_id,
lsn=None,
grpc=grpc,
hot_standby=True,
config_lines=config_lines,
)
@@ -4852,6 +4922,9 @@ class Safekeeper(LogUtils):
log.info(f"finished pulling timeline from {src_ids} to {self.id}")
return res
def safekeeper_id(self) -> SafekeeperId:
return SafekeeperId(self.id, "localhost", self.port.pg_tenant_only)
@property
def data_dir(self) -> Path:
return self.env.repo_dir / "safekeepers" / f"sk{self.id}"

View File

@@ -1219,3 +1219,31 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
)
self.verbose_error(res)
return res.json()
def force_override_feature_flag(self, flag: str, value: str | None = None):
if value is None:
res = self.delete(
f"http://localhost:{self.port}/v1/feature_flag/{flag}",
)
else:
res = self.put(
f"http://localhost:{self.port}/v1/feature_flag/{flag}",
params={"value": value},
)
self.verbose_error(res)
def evaluate_feature_flag_boolean(self, tenant_id: TenantId, flag: str) -> Any:
res = self.get(
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/feature_flag/{flag}",
params={"as": "boolean"},
)
self.verbose_error(res)
return res.json()
def evaluate_feature_flag_multivariate(self, tenant_id: TenantId, flag: str) -> Any:
res = self.get(
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/feature_flag/{flag}",
params={"as": "multivariate"},
)
self.verbose_error(res)
return res.json()