Implement a sharded time travel recovery endpoint (#6821)

The sharding service didn't have support for S3 disaster recovery.

This PR adds a new endpoint to the attachment service, which is slightly
different from the endpoint on the pageserver, in that it takes the
shard count history of the tenant as json parameters: we need to do
time travel recovery for both the shard count at the target time and the
shard count at the current moment in time, as well as the past shard
counts that either still reference.

Fixes #6604, part of https://github.com/neondatabase/cloud/issues/8233

---------

Co-authored-by: John Spray <john@neon.tech>
This commit is contained in:
Arpad Müller
2024-02-21 16:35:37 +01:00
committed by GitHub
parent 41464325c7
commit 4de2f0f3e0
13 changed files with 304 additions and 17 deletions

View File

@@ -512,7 +512,7 @@ class NeonEnvBuilder:
def init_start(
self,
initial_tenant_conf: Optional[Dict[str, str]] = None,
initial_tenant_conf: Optional[Dict[str, Any]] = None,
default_remote_storage_if_missing: bool = True,
initial_tenant_shard_count: Optional[int] = None,
initial_tenant_shard_stripe_size: Optional[int] = None,
@@ -1497,7 +1497,7 @@ class NeonCli(AbstractNeonCli):
self,
tenant_id: Optional[TenantId] = None,
timeline_id: Optional[TimelineId] = None,
conf: Optional[Dict[str, str]] = None,
conf: Optional[Dict[str, Any]] = None,
shard_count: Optional[int] = None,
shard_stripe_size: Optional[int] = None,
set_default: bool = False,

View File

@@ -395,12 +395,20 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
tenant_id: Union[TenantId, TenantShardId],
timestamp: datetime,
done_if_after: datetime,
shard_counts: Optional[List[int]] = None,
):
"""
Issues a request to perform time travel operations on the remote storage
"""
if shard_counts is None:
shard_counts = []
body: Dict[str, Any] = {
"shard_counts": shard_counts,
}
res = self.put(
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/time_travel_remote_storage?travel_to={timestamp.isoformat()}Z&done_if_after={done_if_after.isoformat()}Z"
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/time_travel_remote_storage?travel_to={timestamp.isoformat()}Z&done_if_after={done_if_after.isoformat()}Z",
json=body,
)
self.verbose_error(res)

View File

@@ -482,8 +482,8 @@ def tenant_delete_wait_completed(
MANY_SMALL_LAYERS_TENANT_CONFIG = {
"gc_period": "0s",
"compaction_period": "0s",
"checkpoint_distance": f"{1024**2}",
"image_creation_threshold": "100",
"checkpoint_distance": 1024**2,
"image_creation_threshold": 100,
}

View File

@@ -1,13 +1,30 @@
import time
from collections import defaultdict
from datetime import datetime, timezone
from typing import List
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder
from fixtures.neon_fixtures import (
NeonEnv,
NeonEnvBuilder,
PgBin,
)
from fixtures.pageserver.http import PageserverHttpClient
from fixtures.pageserver.utils import tenant_delete_wait_completed, timeline_delete_wait_completed
from fixtures.pageserver.utils import (
MANY_SMALL_LAYERS_TENANT_CONFIG,
enable_remote_storage_versioning,
list_prefix,
remote_storage_delete_key,
tenant_delete_wait_completed,
timeline_delete_wait_completed,
)
from fixtures.pg_version import PgVersion
from fixtures.remote_storage import RemoteStorageKind, s3_storage
from fixtures.types import TenantId, TimelineId
from fixtures.utils import wait_until
from fixtures.utils import run_pg_bench_small, wait_until
from mypy_boto3_s3.type_defs import (
ObjectTypeDef,
)
from pytest_httpserver import HTTPServer
from werkzeug.wrappers.request import Request
from werkzeug.wrappers.response import Response
@@ -457,3 +474,113 @@ def test_sharding_service_debug_apis(neon_env_builder: NeonEnvBuilder):
# Check that the 'drop' APIs didn't leave things in a state that would fail a consistency check: they're
# meant to be unclean wrt the pageserver state, but not leave a broken storage controller behind.
env.attachment_service.consistency_check()
def test_sharding_service_s3_time_travel_recovery(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
"""
Test for S3 time travel
"""
remote_storage_kind = s3_storage()
neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)
# Mock S3 doesn't have versioning enabled by default, enable it
# (also do it before there is any writes to the bucket)
if remote_storage_kind == RemoteStorageKind.MOCK_S3:
remote_storage = neon_env_builder.pageserver_remote_storage
assert remote_storage, "remote storage not configured"
enable_remote_storage_versioning(remote_storage)
neon_env_builder.num_pageservers = 1
env = neon_env_builder.init_start()
virtual_ps_http = PageserverHttpClient(env.attachment_service_port, lambda: True)
tenant_id = TenantId.generate()
env.attachment_service.tenant_create(
tenant_id,
shard_count=2,
shard_stripe_size=8192,
tenant_config=MANY_SMALL_LAYERS_TENANT_CONFIG,
)
# Check that the consistency check passes
env.attachment_service.consistency_check()
branch_name = "main"
timeline_id = env.neon_cli.create_timeline(
branch_name,
tenant_id=tenant_id,
)
# Write some nontrivial amount of data into the endpoint and wait until it is uploaded
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
run_pg_bench_small(pg_bin, endpoint.connstr())
endpoint.safe_psql("CREATE TABLE created_foo(id integer);")
# last_flush_lsn_upload(env, endpoint, tenant_id, timeline_id)
# Give the data time to be uploaded
time.sleep(4)
# Detach the tenant
virtual_ps_http.tenant_location_conf(
tenant_id,
{
"mode": "Detached",
"secondary_conf": None,
"tenant_conf": {},
"generation": None,
},
)
time.sleep(4)
ts_before_disaster = datetime.now(tz=timezone.utc).replace(tzinfo=None)
time.sleep(4)
# Simulate a "disaster": delete some random files from remote storage for one of the shards
assert env.pageserver_remote_storage
shard_id_for_list = "0002"
objects: List[ObjectTypeDef] = list_prefix(
env.pageserver_remote_storage,
f"tenants/{tenant_id}-{shard_id_for_list}/timelines/{timeline_id}/",
).get("Contents", [])
assert len(objects) > 1
log.info(f"Found {len(objects)} objects in remote storage")
should_delete = False
for obj in objects:
obj_key = obj["Key"]
should_delete = not should_delete
if not should_delete:
log.info(f"Keeping key on remote storage: {obj_key}")
continue
log.info(f"Deleting key from remote storage: {obj_key}")
remote_storage_delete_key(env.pageserver_remote_storage, obj_key)
pass
time.sleep(4)
ts_after_disaster = datetime.now(tz=timezone.utc).replace(tzinfo=None)
time.sleep(4)
# Do time travel recovery
virtual_ps_http.tenant_time_travel_remote_storage(
tenant_id, ts_before_disaster, ts_after_disaster, shard_counts=[2]
)
time.sleep(4)
# Attach the tenant again
virtual_ps_http.tenant_location_conf(
tenant_id,
{
"mode": "AttachedSingle",
"secondary_conf": None,
"tenant_conf": {},
"generation": 100,
},
)
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
endpoint.safe_psql("SELECT * FROM created_foo;")
env.attachment_service.consistency_check()