pageserver: add a /reset API for tenants (#6014)

## Problem

Traditionally we would detach/attach directly with curl if we wanted to
"reboot" a single tenant. That's kind of inconvenient these days,
because one needs to know a generation number to issue an attach
request.

Closes: https://github.com/neondatabase/neon/issues/6011

## Summary of changes

- Introduce a new `/reset` API, which remembers the LocationConf from
the current attachment so that callers do not have to work out the
correct configuration/generation to use.
- As an additional support tool, allow an optional `drop_cache` query
parameter, for situations where we are concerned that some on-disk state
might be bad and want to clear that as well as the in-memory state.

One might wonder why I didn't call this "reattach" -- it's because
there's already a PS->CP API of that name and it could get confusing.
This commit is contained in:
John Spray
2023-12-05 15:38:27 +00:00
committed by GitHub
parent be885370f6
commit da5e03b0d8
4 changed files with 132 additions and 9 deletions

View File

@@ -260,6 +260,14 @@ class PageserverHttpClient(requests.Session):
res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/detach", params=params)
self.verbose_error(res)
def tenant_reset(self, tenant_id: TenantId, drop_cache: bool):
params = {}
if drop_cache:
params["drop_cache"] = "true"
res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/reset", params=params)
self.verbose_error(res)
def tenant_delete(self, tenant_id: TenantId):
res = self.delete(f"http://localhost:{self.port}/v1/tenant/{tenant_id}")
self.verbose_error(res)

View File

@@ -1,4 +1,5 @@
import asyncio
import enum
import random
import time
from threading import Thread
@@ -51,11 +52,20 @@ def do_gc_target(
log.info("gc http thread returning")
class ReattachMode(str, enum.Enum):
REATTACH_EXPLICIT = "explicit"
REATTACH_RESET = "reset"
REATTACH_RESET_DROP = "reset"
# Basic detach and re-attach test
@pytest.mark.parametrize("remote_storage_kind", available_remote_storages())
@pytest.mark.parametrize(
"mode",
[ReattachMode.REATTACH_EXPLICIT, ReattachMode.REATTACH_RESET, ReattachMode.REATTACH_RESET_DROP],
)
def test_tenant_reattach(
neon_env_builder: NeonEnvBuilder,
remote_storage_kind: RemoteStorageKind,
neon_env_builder: NeonEnvBuilder, remote_storage_kind: RemoteStorageKind, mode: str
):
neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)
@@ -100,8 +110,15 @@ def test_tenant_reattach(
ps_metrics.query_one("pageserver_last_record_lsn", filter=tenant_metric_filter).value
)
pageserver_http.tenant_detach(tenant_id)
pageserver_http.tenant_attach(tenant_id)
if mode == ReattachMode.REATTACH_EXPLICIT:
# Explicitly detach then attach the tenant as two separate API calls
pageserver_http.tenant_detach(tenant_id)
pageserver_http.tenant_attach(tenant_id)
elif mode in (ReattachMode.REATTACH_RESET, ReattachMode.REATTACH_RESET_DROP):
# Use the reset API to detach/attach in one shot
pageserver_http.tenant_reset(tenant_id, mode == ReattachMode.REATTACH_RESET_DROP)
else:
raise NotImplementedError(mode)
time.sleep(1) # for metrics propagation