attachment_service: JWT auth enforcement (#6897)

## Problem
Attachment service does not do auth based on JWT scopes.

## Summary of changes
Do JWT based permission checking for requests coming into the attachment
service.

Requests into the attachment service must use different tokens based on
the endpoint:
* `/control` and `/debug` require `admin` scope
* `/upcall` requires `generations_api` scope
* `/v1/...` requires `pageserverapi` scope

Requests into the pageserver from the attachment service must use
`pageserverapi` scope.
This commit is contained in:
Vlad Lazar
2024-02-26 18:17:06 +00:00
committed by GitHub
parent 0881d4f9e3
commit 5accf6e24a
12 changed files with 268 additions and 73 deletions

View File

@@ -17,6 +17,7 @@ import uuid
from contextlib import closing, contextmanager
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from fcntl import LOCK_EX, LOCK_UN, flock
from functools import cached_property
from itertools import chain, product
@@ -388,7 +389,8 @@ class PgProtocol:
class AuthKeys:
priv: str
def generate_token(self, *, scope: str, **token_data: str) -> str:
def generate_token(self, *, scope: TokenScope, **token_data: Any) -> str:
token_data = {key: str(val) for key, val in token_data.items()}
token = jwt.encode({"scope": scope, **token_data}, self.priv, algorithm="EdDSA")
# cast(Any, self.priv)
@@ -401,14 +403,23 @@ class AuthKeys:
return token
def generate_pageserver_token(self) -> str:
return self.generate_token(scope="pageserverapi")
return self.generate_token(scope=TokenScope.PAGE_SERVER_API)
def generate_safekeeper_token(self) -> str:
return self.generate_token(scope="safekeeperdata")
return self.generate_token(scope=TokenScope.SAFEKEEPER_DATA)
# generate token giving access to only one tenant
def generate_tenant_token(self, tenant_id: TenantId) -> str:
return self.generate_token(scope="tenant", tenant_id=str(tenant_id))
return self.generate_token(scope=TokenScope.TENANT, tenant_id=str(tenant_id))
# TODO: Replace with `StrEnum` when we upgrade to python 3.11
class TokenScope(str, Enum):
ADMIN = "admin"
PAGE_SERVER_API = "pageserverapi"
GENERATIONS_API = "generations_api"
SAFEKEEPER_DATA = "safekeeperdata"
TENANT = "tenant"
class NeonEnvBuilder:
@@ -1922,6 +1933,13 @@ class Pagectl(AbstractNeonCli):
return IndexPartDump.from_json(parsed)
class AttachmentServiceApiException(Exception):
def __init__(self, message, status_code: int):
super().__init__(message)
self.message = message
self.status_code = status_code
class NeonAttachmentService(MetricsGetter):
def __init__(self, env: NeonEnv, auth_enabled: bool):
self.env = env
@@ -1940,39 +1958,60 @@ class NeonAttachmentService(MetricsGetter):
self.running = False
return self
@staticmethod
def raise_api_exception(res: requests.Response):
try:
res.raise_for_status()
except requests.RequestException as e:
try:
msg = res.json()["msg"]
except: # noqa: E722
msg = ""
raise AttachmentServiceApiException(msg, res.status_code) from e
def pageserver_api(self) -> PageserverHttpClient:
"""
The attachment service implements a subset of the pageserver REST API, for mapping
per-tenant actions into per-shard actions (e.g. timeline creation). Tests should invoke those
functions via the HttpClient, as an implicit check that these APIs remain compatible.
"""
return PageserverHttpClient(self.env.attachment_service_port, lambda: True)
auth_token = None
if self.auth_enabled:
auth_token = self.env.auth_keys.generate_token(scope=TokenScope.PAGE_SERVER_API)
return PageserverHttpClient(self.env.attachment_service_port, lambda: True, auth_token)
def request(self, method, *args, **kwargs) -> requests.Response:
kwargs["headers"] = self.headers()
return requests.request(method, *args, **kwargs)
resp = requests.request(method, *args, **kwargs)
NeonAttachmentService.raise_api_exception(resp)
def headers(self) -> Dict[str, str]:
return resp
def headers(self, scope: Optional[TokenScope]) -> Dict[str, str]:
headers = {}
if self.auth_enabled:
jwt_token = self.env.auth_keys.generate_pageserver_token()
if self.auth_enabled and scope is not None:
jwt_token = self.env.auth_keys.generate_token(scope=scope)
headers["Authorization"] = f"Bearer {jwt_token}"
return headers
def get_metrics(self) -> Metrics:
res = self.request("GET", f"{self.env.attachment_service_api}/metrics")
res.raise_for_status()
return parse_metrics(res.text)
def ready(self) -> bool:
resp = self.request("GET", f"{self.env.attachment_service_api}/ready")
if resp.status_code == 503:
status = None
try:
resp = self.request("GET", f"{self.env.attachment_service_api}/ready")
status = resp.status_code
except AttachmentServiceApiException as e:
status = e.status_code
if status == 503:
return False
elif resp.status_code == 200:
elif status == 200:
return True
else:
raise RuntimeError(f"Unexpected status {resp.status_code} from readiness endpoint")
raise RuntimeError(f"Unexpected status {status} from readiness endpoint")
def attach_hook_issue(
self, tenant_shard_id: Union[TenantId, TenantShardId], pageserver_id: int
@@ -1981,21 +2020,19 @@ class NeonAttachmentService(MetricsGetter):
"POST",
f"{self.env.attachment_service_api}/debug/v1/attach-hook",
json={"tenant_shard_id": str(tenant_shard_id), "node_id": pageserver_id},
headers=self.headers(),
headers=self.headers(TokenScope.ADMIN),
)
response.raise_for_status()
gen = response.json()["gen"]
assert isinstance(gen, int)
return gen
def attach_hook_drop(self, tenant_shard_id: Union[TenantId, TenantShardId]):
response = self.request(
self.request(
"POST",
f"{self.env.attachment_service_api}/debug/v1/attach-hook",
json={"tenant_shard_id": str(tenant_shard_id), "node_id": None},
headers=self.headers(),
headers=self.headers(TokenScope.ADMIN),
)
response.raise_for_status()
def inspect(self, tenant_shard_id: Union[TenantId, TenantShardId]) -> Optional[tuple[int, int]]:
"""
@@ -2005,9 +2042,8 @@ class NeonAttachmentService(MetricsGetter):
"POST",
f"{self.env.attachment_service_api}/debug/v1/inspect",
json={"tenant_shard_id": str(tenant_shard_id)},
headers=self.headers(),
headers=self.headers(TokenScope.ADMIN),
)
response.raise_for_status()
json = response.json()
log.info(f"Response: {json}")
if json["attachment"]:
@@ -2027,14 +2063,15 @@ class NeonAttachmentService(MetricsGetter):
"POST",
f"{self.env.attachment_service_api}/control/v1/node",
json=body,
headers=self.headers(),
).raise_for_status()
headers=self.headers(TokenScope.ADMIN),
)
def node_list(self):
response = self.request(
"GET", f"{self.env.attachment_service_api}/control/v1/node", headers=self.headers()
"GET",
f"{self.env.attachment_service_api}/control/v1/node",
headers=self.headers(TokenScope.ADMIN),
)
response.raise_for_status()
return response.json()
def node_configure(self, node_id, body: dict[str, Any]):
@@ -2044,8 +2081,8 @@ class NeonAttachmentService(MetricsGetter):
"PUT",
f"{self.env.attachment_service_api}/control/v1/node/{node_id}/config",
json=body,
headers=self.headers(),
).raise_for_status()
headers=self.headers(TokenScope.ADMIN),
)
def tenant_create(
self,
@@ -2070,8 +2107,12 @@ class NeonAttachmentService(MetricsGetter):
for k, v in tenant_config.items():
body[k] = v
response = self.request("POST", f"{self.env.attachment_service_api}/v1/tenant", json=body)
response.raise_for_status()
response = self.request(
"POST",
f"{self.env.attachment_service_api}/v1/tenant",
json=body,
headers=self.headers(TokenScope.PAGE_SERVER_API),
)
log.info(f"tenant_create success: {response.json()}")
def locate(self, tenant_id: TenantId) -> list[dict[str, Any]]:
@@ -2079,9 +2120,10 @@ class NeonAttachmentService(MetricsGetter):
:return: list of {"shard_id": "", "node_id": int, "listen_pg_addr": str, "listen_pg_port": int, "listen_http_addr: str, "listen_http_port: int}
"""
response = self.request(
"GET", f"{self.env.attachment_service_api}/control/v1/tenant/{tenant_id}/locate"
"GET",
f"{self.env.attachment_service_api}/control/v1/tenant/{tenant_id}/locate",
headers=self.headers(TokenScope.ADMIN),
)
response.raise_for_status()
body = response.json()
shards: list[dict[str, Any]] = body["shards"]
return shards
@@ -2091,20 +2133,20 @@ class NeonAttachmentService(MetricsGetter):
"PUT",
f"{self.env.attachment_service_api}/control/v1/tenant/{tenant_id}/shard_split",
json={"new_shard_count": shard_count},
headers=self.headers(TokenScope.ADMIN),
)
response.raise_for_status()
body = response.json()
log.info(f"tenant_shard_split success: {body}")
shards: list[TenantShardId] = body["new_shards"]
return shards
def tenant_shard_migrate(self, tenant_shard_id: TenantShardId, dest_ps_id: int):
response = self.request(
self.request(
"PUT",
f"{self.env.attachment_service_api}/control/v1/tenant/{tenant_shard_id}/migrate",
json={"tenant_shard_id": str(tenant_shard_id), "node_id": dest_ps_id},
headers=self.headers(TokenScope.ADMIN),
)
response.raise_for_status()
log.info(f"Migrated tenant {tenant_shard_id} to pageserver {dest_ps_id}")
assert self.env.get_tenant_pageserver(tenant_shard_id).id == dest_ps_id
@@ -2112,11 +2154,11 @@ class NeonAttachmentService(MetricsGetter):
"""
Throw an exception if the service finds any inconsistencies in its state
"""
response = self.request(
self.request(
"POST",
f"{self.env.attachment_service_api}/debug/v1/consistency_check",
headers=self.headers(TokenScope.ADMIN),
)
response.raise_for_status()
log.info("Attachment service passed consistency check")
def __enter__(self) -> "NeonAttachmentService":
@@ -2894,7 +2936,6 @@ class NeonProxy(PgProtocol):
def get_metrics(self) -> str:
request_result = requests.get(f"http://{self.host}:{self.http_port}/metrics")
request_result.raise_for_status()
return request_result.text
@staticmethod

View File

@@ -1,13 +1,16 @@
import time
from collections import defaultdict
from datetime import datetime, timezone
from typing import List
from typing import Any, Dict, List
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
AttachmentServiceApiException,
NeonEnv,
NeonEnvBuilder,
PgBin,
TokenScope,
)
from fixtures.pageserver.http import PageserverHttpClient
from fixtures.pageserver.utils import (
@@ -457,37 +460,40 @@ def test_sharding_service_debug_apis(neon_env_builder: NeonEnvBuilder):
# Initial tenant (1 shard) and the one we just created (2 shards) should be visible
response = env.attachment_service.request(
"GET", f"{env.attachment_service_api}/debug/v1/tenant"
"GET",
f"{env.attachment_service_api}/debug/v1/tenant",
headers=env.attachment_service.headers(TokenScope.ADMIN),
)
response.raise_for_status()
assert len(response.json()) == 3
# Scheduler should report the expected nodes and shard counts
response = env.attachment_service.request(
"GET", f"{env.attachment_service_api}/debug/v1/scheduler"
)
response.raise_for_status()
# Two nodes, in a dict of node_id->node
assert len(response.json()["nodes"]) == 2
assert sum(v["shard_count"] for v in response.json()["nodes"].values()) == 3
assert all(v["may_schedule"] for v in response.json()["nodes"].values())
response = env.attachment_service.request(
"POST", f"{env.attachment_service_api}/debug/v1/node/{env.pageservers[1].id}/drop"
"POST",
f"{env.attachment_service_api}/debug/v1/node/{env.pageservers[1].id}/drop",
headers=env.attachment_service.headers(TokenScope.ADMIN),
)
response.raise_for_status()
assert len(env.attachment_service.node_list()) == 1
response = env.attachment_service.request(
"POST", f"{env.attachment_service_api}/debug/v1/tenant/{tenant_id}/drop"
"POST",
f"{env.attachment_service_api}/debug/v1/tenant/{tenant_id}/drop",
headers=env.attachment_service.headers(TokenScope.ADMIN),
)
response.raise_for_status()
# Tenant drop should be reflected in dump output
response = env.attachment_service.request(
"GET", f"{env.attachment_service_api}/debug/v1/tenant"
"GET",
f"{env.attachment_service_api}/debug/v1/tenant",
headers=env.attachment_service.headers(TokenScope.ADMIN),
)
response.raise_for_status()
assert len(response.json()) == 1
# Check that the 'drop' APIs didn't leave things in a state that would fail a consistency check: they're
@@ -603,3 +609,64 @@ def test_sharding_service_s3_time_travel_recovery(
endpoint.safe_psql("SELECT * FROM created_foo;")
env.attachment_service.consistency_check()
def test_sharding_service_auth(neon_env_builder: NeonEnvBuilder):
neon_env_builder.auth_enabled = True
env = neon_env_builder.init_start()
svc = env.attachment_service
api = env.attachment_service_api
tenant_id = TenantId.generate()
body: Dict[str, Any] = {"new_tenant_id": str(tenant_id)}
# No token
with pytest.raises(
AttachmentServiceApiException,
match="Unauthorized: missing authorization header",
):
svc.request("POST", f"{env.attachment_service_api}/v1/tenant", json=body)
# Token with incorrect scope
with pytest.raises(
AttachmentServiceApiException,
match="Forbidden: JWT authentication error",
):
svc.request("POST", f"{api}/v1/tenant", json=body, headers=svc.headers(TokenScope.ADMIN))
# Token with correct scope
svc.request(
"POST", f"{api}/v1/tenant", json=body, headers=svc.headers(TokenScope.PAGE_SERVER_API)
)
# No token
with pytest.raises(
AttachmentServiceApiException,
match="Unauthorized: missing authorization header",
):
svc.request("GET", f"{api}/debug/v1/tenant")
# Token with incorrect scope
with pytest.raises(
AttachmentServiceApiException,
match="Forbidden: JWT authentication error",
):
svc.request(
"GET", f"{api}/debug/v1/tenant", headers=svc.headers(TokenScope.GENERATIONS_API)
)
# No token
with pytest.raises(
AttachmentServiceApiException,
match="Unauthorized: missing authorization header",
):
svc.request("POST", f"{api}/upcall/v1/re-attach")
# Token with incorrect scope
with pytest.raises(
AttachmentServiceApiException,
match="Forbidden: JWT authentication error",
):
svc.request(
"POST", f"{api}/upcall/v1/re-attach", headers=svc.headers(TokenScope.PAGE_SERVER_API)
)