Implement a second HTTP server within compute_ctl (#10574)

The compute_ctl HTTP server has the following purposes:

- Allow management via the control plane
- Provide an endpoint for scaping metrics
- Provide APIs for compute internal clients
  - Neon Postgres extension for installing remote extensions
  - local_proxy for installing extensions and adding grants

The first two purposes require the HTTP server to be available outside
the compute.

The Neon threat model is a bad actor within our internal network. We
need to reduce the surface area of attack. By exposing unnecessary
unauthenticated HTTP endpoints to the internal network, we increase the
surface area of attack. For endpoints described in the third bullet
point, we can just run an extra HTTP server, which is only bound to the
loopback interface since all consumers of those endpoints are within the
compute.
This commit is contained in:
Tristan Partin
2025-02-11 12:02:22 -06:00
committed by GitHub
parent f7b2293317
commit da9c101939
16 changed files with 310 additions and 167 deletions

View File

@@ -9,21 +9,23 @@ from requests.adapters import HTTPAdapter
class EndpointHttpClient(requests.Session):
def __init__(
self,
port: int,
external_port: int,
internal_port: int,
):
super().__init__()
self.port = port
self.external_port: int = external_port
self.internal_port: int = internal_port
self.mount("http://", HTTPAdapter())
def dbs_and_roles(self):
res = self.get(f"http://localhost:{self.port}/dbs_and_roles")
res = self.get(f"http://localhost:{self.external_port}/dbs_and_roles")
res.raise_for_status()
return res.json()
def database_schema(self, database: str):
res = self.get(
f"http://localhost:{self.port}/database_schema?database={urllib.parse.quote(database, safe='')}"
f"http://localhost:{self.external_port}/database_schema?database={urllib.parse.quote(database, safe='')}"
)
res.raise_for_status()
return res.text
@@ -34,20 +36,20 @@ class EndpointHttpClient(requests.Session):
"version": version,
"database": database,
}
res = self.post(f"http://localhost:{self.port}/extensions", json=body)
res = self.post(f"http://localhost:{self.internal_port}/extensions", json=body)
res.raise_for_status()
return res.json()
def set_role_grants(self, database: str, role: str, schema: str, privileges: list[str]):
res = self.post(
f"http://localhost:{self.port}/grants",
f"http://localhost:{self.internal_port}/grants",
json={"database": database, "schema": schema, "role": role, "privileges": privileges},
)
res.raise_for_status()
return res.json()
def metrics(self) -> str:
res = self.get(f"http://localhost:{self.port}/metrics")
res = self.get(f"http://localhost:{self.external_port}/metrics")
res.raise_for_status()
return res.text
@@ -62,5 +64,5 @@ class EndpointHttpClient(requests.Session):
}
)
res = self.post(f"http://localhost:{self.port}/failpoints", json=body)
res = self.post(f"http://localhost:{self.internal_port}/failpoints", json=body)
res.raise_for_status()

View File

@@ -478,7 +478,8 @@ class NeonLocalCli(AbstractNeonCli):
self,
branch_name: str,
pg_port: int,
http_port: int,
external_http_port: int,
internal_http_port: int,
tenant_id: TenantId,
pg_version: PgVersion,
endpoint_id: str | None = None,
@@ -501,8 +502,10 @@ class NeonLocalCli(AbstractNeonCli):
args.extend(["--lsn", str(lsn)])
if pg_port is not None:
args.extend(["--pg-port", str(pg_port)])
if http_port is not None:
args.extend(["--http-port", str(http_port)])
if external_http_port is not None:
args.extend(["--external-http-port", str(external_http_port)])
if internal_http_port is not None:
args.extend(["--internal-http-port", str(internal_http_port)])
if endpoint_id is not None:
args.append(endpoint_id)
if hot_standby:

View File

@@ -3807,7 +3807,8 @@ class Endpoint(PgProtocol, LogUtils):
env: NeonEnv,
tenant_id: TenantId,
pg_port: int,
http_port: int,
external_http_port: int,
internal_http_port: int,
check_stop_result: bool = True,
):
super().__init__(host="localhost", port=pg_port, user="cloud_admin", dbname="postgres")
@@ -3817,7 +3818,8 @@ class Endpoint(PgProtocol, LogUtils):
self.pgdata_dir: Path | None = None # Path to computenode PGDATA
self.tenant_id = tenant_id
self.pg_port = pg_port
self.http_port = http_port
self.external_http_port = external_http_port
self.internal_http_port = internal_http_port
self.check_stop_result = check_stop_result
# passed to endpoint create and endpoint reconfigure
self.active_safekeepers: list[int] = list(map(lambda sk: sk.id, env.safekeepers))
@@ -3834,7 +3836,8 @@ class Endpoint(PgProtocol, LogUtils):
self, auth_token: str | None = None, retries: Retry | None = None
) -> EndpointHttpClient:
return EndpointHttpClient(
port=self.http_port,
external_port=self.external_http_port,
internal_port=self.internal_http_port,
)
def create(
@@ -3866,7 +3869,8 @@ class Endpoint(PgProtocol, LogUtils):
lsn=lsn,
hot_standby=hot_standby,
pg_port=self.pg_port,
http_port=self.http_port,
external_http_port=self.external_http_port,
internal_http_port=self.internal_http_port,
pg_version=self.env.pg_version,
pageserver_id=pageserver_id,
allow_multiple=allow_multiple,
@@ -4258,7 +4262,8 @@ class EndpointFactory:
self.env,
tenant_id=tenant_id or self.env.initial_tenant,
pg_port=self.env.port_distributor.get_port(),
http_port=self.env.port_distributor.get_port(),
external_http_port=self.env.port_distributor.get_port(),
internal_http_port=self.env.port_distributor.get_port(),
)
self.num_instances += 1
self.endpoints.append(ep)
@@ -4288,7 +4293,8 @@ class EndpointFactory:
self.env,
tenant_id=tenant_id or self.env.initial_tenant,
pg_port=self.env.port_distributor.get_port(),
http_port=self.env.port_distributor.get_port(),
external_http_port=self.env.port_distributor.get_port(),
internal_http_port=self.env.port_distributor.get_port(),
)
endpoint_id = endpoint_id or self.env.generate_endpoint_id()

View File

@@ -79,7 +79,9 @@ def test_lazy_startup(slru: str, neon_env_builder: NeonEnvBuilder, zenbenchmark:
assert sum == 1000000
# Get metrics
metrics = requests.get(f"http://localhost:{endpoint.http_port}/metrics.json").json()
metrics = requests.get(
f"http://localhost:{endpoint.external_http_port}/metrics.json"
).json()
durations = {
"wait_for_spec_ms": f"{slru}_{i}_wait_for_spec",
"sync_safekeepers_ms": f"{slru}_{i}_sync_safekeepers",

View File

@@ -56,7 +56,9 @@ def test_startup_simple(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenc
endpoint.safe_psql("select 1;")
# Get metrics
metrics = requests.get(f"http://localhost:{endpoint.http_port}/metrics.json").json()
metrics = requests.get(
f"http://localhost:{endpoint.external_http_port}/metrics.json"
).json()
durations = {
"wait_for_spec_ms": f"{i}_wait_for_spec",
"sync_safekeepers_ms": f"{i}_sync_safekeepers",

View File

@@ -17,11 +17,13 @@ def test_neon_cli_basics(neon_env_builder: NeonEnvBuilder, port_distributor: Por
main_branch_name = "main"
pg_port = port_distributor.get_port()
http_port = port_distributor.get_port()
external_http_port = port_distributor.get_port()
internal_http_port = port_distributor.get_port()
env.neon_cli.endpoint_create(
main_branch_name,
pg_port,
http_port,
external_http_port,
internal_http_port,
endpoint_id="ep-basic-main",
tenant_id=env.initial_tenant,
pg_version=env.pg_version,
@@ -35,11 +37,13 @@ def test_neon_cli_basics(neon_env_builder: NeonEnvBuilder, port_distributor: Por
new_branch_name=branch_name,
)
pg_port = port_distributor.get_port()
http_port = port_distributor.get_port()
external_http_port = port_distributor.get_port()
internal_http_port = port_distributor.get_port()
env.neon_cli.endpoint_create(
branch_name,
pg_port,
http_port,
external_http_port,
internal_http_port,
endpoint_id=f"ep-{branch_name}",
tenant_id=env.initial_tenant,
pg_version=env.pg_version,
@@ -59,23 +63,27 @@ def test_neon_two_primary_endpoints_fail(
branch_name = "main"
pg_port = port_distributor.get_port()
http_port = port_distributor.get_port()
external_http_port = port_distributor.get_port()
internal_http_port = port_distributor.get_port()
env.neon_cli.endpoint_create(
branch_name,
pg_port,
http_port,
external_http_port,
internal_http_port,
endpoint_id="ep1",
tenant_id=env.initial_tenant,
pg_version=env.pg_version,
)
pg_port = port_distributor.get_port()
http_port = port_distributor.get_port()
external_http_port = port_distributor.get_port()
internal_http_port = port_distributor.get_port()
# ep1 is not running so create will succeed
env.neon_cli.endpoint_create(
branch_name,
pg_port,
http_port,
external_http_port,
internal_http_port,
endpoint_id="ep2",
tenant_id=env.initial_tenant,
pg_version=env.pg_version,

View File

@@ -268,7 +268,8 @@ def endpoint_create_start(
env,
tenant_id=env.initial_tenant,
pg_port=env.port_distributor.get_port(),
http_port=env.port_distributor.get_port(),
external_http_port=env.port_distributor.get_port(),
internal_http_port=env.port_distributor.get_port(),
# In these tests compute has high probability of terminating on its own
# before our stop() due to lost consensus leadership.
check_stop_result=False,