Object storage proxy (#11357)

Service targeted for storing and retrieving LFC prewarm data.
Can be used for proxying S3 access for Postgres extensions like
pg_mooncake as well.

Requests must include a Bearer JWT token.
Token is validated using a pemfile (should be passed in infra/).

Note: app is not tolerant to extra trailing slashes, see app.rs
`delete_prefix` test for comments.

Resolves: https://github.com/neondatabase/cloud/issues/26342
Unrelated changes: gate a `rename_noreplace` feature and disable it in
`remote_storage` so as `object_storage` can be built with musl
This commit is contained in:
Mikhail Kot
2025-04-08 15:54:53 +01:00
committed by GitHub
parent a7142f3bc6
commit 6138d61592
23 changed files with 1424 additions and 38 deletions

View File

@@ -417,6 +417,19 @@ class NeonLocalCli(AbstractNeonCli):
cmd.append(f"--instance-id={instance_id}")
return self.raw_cli(cmd)
def object_storage_start(self, timeout_in_seconds: int | None = None):
cmd = ["object-storage", "start"]
if timeout_in_seconds is not None:
cmd.append(f"--start-timeout={timeout_in_seconds}s")
return self.raw_cli(cmd)
def object_storage_stop(self, immediate: bool):
cmd = ["object-storage", "stop"]
if immediate:
cmd.extend(["-m", "immediate"])
return self.raw_cli(cmd)
pass
def pageserver_start(
self,
id: int,

View File

@@ -1023,6 +1023,8 @@ class NeonEnvBuilder:
self.env.broker.assert_no_errors()
self.env.object_storage.assert_no_errors()
try:
self.overlay_cleanup_teardown()
except Exception as e:
@@ -1118,6 +1120,8 @@ class NeonEnv:
pagectl_env_vars["RUST_LOG"] = self.rust_log_override
self.pagectl = Pagectl(extra_env=pagectl_env_vars, binpath=self.neon_binpath)
self.object_storage = ObjectStorage(self)
# The URL for the pageserver to use as its control_plane_api config
if config.storage_controller_port_override is not None:
log.info(
@@ -1173,6 +1177,7 @@ class NeonEnv:
},
"safekeepers": [],
"pageservers": [],
"object_storage": {"port": self.port_distributor.get_port()},
"generate_local_ssl_certs": self.generate_local_ssl_certs,
}
@@ -1408,6 +1413,8 @@ class NeonEnv:
self.storage_controller.on_safekeeper_deploy(sk_id, body)
self.storage_controller.safekeeper_scheduling_policy(sk_id, "Active")
self.object_storage.start(timeout_in_seconds=timeout_in_seconds)
def stop(self, immediate=False, ps_assert_metric_no_errors=False, fail_on_endpoint_errors=True):
"""
After this method returns, there should be no child processes running.
@@ -1425,6 +1432,8 @@ class NeonEnv:
except Exception as e:
raise_later = e
self.object_storage.stop(immediate=immediate)
# Stop storage controller before pageservers: we don't want it to spuriously
# detect a pageserver "failure" during test teardown
self.storage_controller.stop(immediate=immediate)
@@ -2635,6 +2644,26 @@ class NeonStorageController(MetricsGetter, LogUtils):
self.stop(immediate=True)
class ObjectStorage(LogUtils):
def __init__(self, env: NeonEnv):
service_dir = env.repo_dir / "object_storage"
super().__init__(logfile=service_dir / "object_storage.log")
self.conf_path = service_dir / "object_storage.json"
self.env = env
def base_url(self):
return json.loads(self.conf_path.read_text())["listen"]
def start(self, timeout_in_seconds: int | None = None):
self.env.neon_cli.object_storage_start(timeout_in_seconds)
def stop(self, immediate: bool = False):
self.env.neon_cli.object_storage_stop(immediate)
def assert_no_errors(self):
assert_no_errors(self.logfile, "object_storage", [])
class NeonProxiedStorageController(NeonStorageController):
def __init__(self, env: NeonEnv, proxy_port: int, auth_enabled: bool, use_https: bool):
super().__init__(env, proxy_port, auth_enabled, use_https)

View File

@@ -134,10 +134,11 @@ def test_cli_start_stop(neon_env_builder: NeonEnvBuilder):
"""
env = neon_env_builder.init_start()
# Stop default ps/sk
# Stop default services
env.neon_cli.pageserver_stop(env.pageserver.id)
env.neon_cli.safekeeper_stop()
env.neon_cli.storage_controller_stop(False)
env.neon_cli.object_storage_stop(False)
env.neon_cli.storage_broker_stop()
# Keep NeonEnv state up to date, it usually owns starting/stopping services
@@ -179,11 +180,13 @@ def test_cli_start_stop_multi(neon_env_builder: NeonEnvBuilder):
# Using the single-pageserver shortcut property throws when there are multiple pageservers
with pytest.raises(AssertionError):
_drop = env.pageserver
_ = env.pageserver
env.neon_cli.safekeeper_stop(neon_env_builder.safekeepers_id_start + 1)
env.neon_cli.safekeeper_stop(neon_env_builder.safekeepers_id_start + 2)
env.neon_cli.object_storage_stop(False)
# Stop this to get out of the way of the following `start`
env.neon_cli.storage_controller_stop(False)
env.neon_cli.storage_broker_stop()

View File

@@ -0,0 +1,56 @@
from time import time
import pytest
from aiohttp import ClientSession
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv
from jwcrypto import jwk, jwt
@pytest.mark.asyncio
async def test_object_storage_insert_retrieve_delete(neon_simple_env: NeonEnv):
"""
Inserts, retrieves, and deletes test file using a JWT token
"""
env = neon_simple_env
ep = env.endpoints.create_start(branch_name="main")
tenant_id = str(ep.tenant_id)
timeline_id = str(ep.show_timeline_id())
endpoint_id = ep.endpoint_id
key_path = env.repo_dir / "auth_private_key.pem"
key = jwk.JWK.from_pem(key_path.read_bytes())
claims = {
"tenant_id": tenant_id,
"timeline_id": timeline_id,
"endpoint_id": endpoint_id,
"exp": round(time()) + 99,
}
log.info(f"key path {key_path}\nclaims {claims}")
token = jwt.JWT(header={"alg": "EdDSA"}, claims=claims)
token.make_signed_token(key)
token = token.serialize()
base_url = env.object_storage.base_url()
key = f"http://{base_url}/{tenant_id}/{timeline_id}/{endpoint_id}/key"
headers = {"Authorization": f"Bearer {token}"}
log.info(f"cache key url {key}")
log.info(f"token {token}")
async with ClientSession(headers=headers) as session:
async with session.get(key) as res:
assert res.status == 404, f"Non-existing file is present: {res}"
data = b"cheburash"
async with session.put(key, data=data) as res:
assert res.status == 200, f"Error writing file: {res}"
async with session.get(key) as res:
read_data = await res.read()
assert data == read_data
async with session.delete(key) as res:
assert res.status == 200, f"Error removing file {res}"
async with session.get(key) as res:
assert res.status == 404, f"File was not deleted: {res}"

View File

@@ -95,6 +95,7 @@ def test_storage_controller_smoke(
env.pageservers[1].start()
for sk in env.safekeepers:
sk.start()
env.object_storage.start()
# The pageservers we started should have registered with the sharding service on startup
nodes = env.storage_controller.node_list()
@@ -346,6 +347,7 @@ def prepare_onboarding_env(
env = neon_env_builder.init_configs()
env.broker.start()
env.storage_controller.start()
env.object_storage.start()
# This is the pageserver where we'll initially create the tenant. Run it in emergency
# mode so that it doesn't talk to storage controller, and do not register it.