mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-30 03:20:36 +00:00
Add existing_initdb_timeline_id param to timeline creation (#5912)
This PR adds an `existing_initdb_timeline_id` option to timeline creation APIs, taking an optional timeline ID. Follow-up of #5390. If the `existing_initdb_timeline_id` option is specified via the HTTP API, the pageserver downloads the existing initdb archive from the given timeline ID and extracts it, instead of running initdb itself. --------- Co-authored-by: Christian Schwarz <christian@neon.tech>
This commit is contained in:
@@ -362,12 +362,16 @@ class PageserverHttpClient(requests.Session):
|
||||
new_timeline_id: TimelineId,
|
||||
ancestor_timeline_id: Optional[TimelineId] = None,
|
||||
ancestor_start_lsn: Optional[Lsn] = None,
|
||||
existing_initdb_timeline_id: Optional[TimelineId] = None,
|
||||
**kwargs,
|
||||
) -> Dict[Any, Any]:
|
||||
body: Dict[str, Any] = {
|
||||
"new_timeline_id": str(new_timeline_id),
|
||||
"ancestor_start_lsn": str(ancestor_start_lsn) if ancestor_start_lsn else None,
|
||||
"ancestor_timeline_id": str(ancestor_timeline_id) if ancestor_timeline_id else None,
|
||||
"existing_initdb_timeline_id": str(existing_initdb_timeline_id)
|
||||
if existing_initdb_timeline_id
|
||||
else None,
|
||||
}
|
||||
if pg_version != PgVersion.NOT_SET:
|
||||
body["pg_version"] = int(pg_version)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import time
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
||||
|
||||
from mypy_boto3_s3.type_defs import ListObjectsV2OutputTypeDef
|
||||
from mypy_boto3_s3.type_defs import ListObjectsV2OutputTypeDef, ObjectTypeDef
|
||||
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient
|
||||
@@ -235,10 +235,14 @@ if TYPE_CHECKING:
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
|
||||
|
||||
def assert_prefix_empty(neon_env_builder: "NeonEnvBuilder", prefix: Optional[str] = None):
|
||||
def assert_prefix_empty(
|
||||
neon_env_builder: "NeonEnvBuilder",
|
||||
prefix: Optional[str] = None,
|
||||
allowed_postfix: Optional[str] = None,
|
||||
):
|
||||
response = list_prefix(neon_env_builder, prefix)
|
||||
keys = response["KeyCount"]
|
||||
objects = response.get("Contents", [])
|
||||
objects: List[ObjectTypeDef] = response.get("Contents", [])
|
||||
common_prefixes = response.get("CommonPrefixes", [])
|
||||
|
||||
remote_storage = neon_env_builder.pageserver_remote_storage
|
||||
@@ -261,7 +265,18 @@ def assert_prefix_empty(neon_env_builder: "NeonEnvBuilder", prefix: Optional[str
|
||||
f"contradicting ListObjectsV2 response with KeyCount={keys} and Contents={objects}, CommonPrefixes={common_prefixes}"
|
||||
)
|
||||
|
||||
assert keys == 0, f"remote dir with prefix {prefix} is not empty after deletion: {objects}"
|
||||
filtered_count = 0
|
||||
if allowed_postfix is None:
|
||||
filtered_count = len(objects)
|
||||
else:
|
||||
for _obj in objects:
|
||||
key: str = str(response.get("Key", []))
|
||||
if not (allowed_postfix.endswith(key)):
|
||||
filtered_count += 1
|
||||
|
||||
assert (
|
||||
filtered_count == 0
|
||||
), f"remote dir with prefix {prefix} is not empty after deletion: {objects}"
|
||||
|
||||
|
||||
def assert_prefix_not_empty(neon_env_builder: "NeonEnvBuilder", prefix: Optional[str] = None):
|
||||
|
||||
@@ -603,7 +603,12 @@ def test_timeline_deletion_with_files_stuck_in_upload_queue(
|
||||
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
|
||||
remote_timeline_path = env.pageserver_remote_storage.timeline_path(tenant_id, timeline_id)
|
||||
|
||||
assert not list(remote_timeline_path.iterdir())
|
||||
filtered = [
|
||||
path
|
||||
for path in remote_timeline_path.iterdir()
|
||||
if not (path.name.endswith("initdb.tar.zst"))
|
||||
]
|
||||
assert len(filtered) == 0
|
||||
|
||||
# timeline deletion should kill ongoing uploads, so, the metric will be gone
|
||||
assert get_queued_count(file_kind="index", op_kind="upload") is None
|
||||
|
||||
@@ -197,6 +197,7 @@ def test_delete_tenant_exercise_crash_safety_failpoints(
|
||||
# So by ignoring these instead of waiting for empty upload queue
|
||||
# we execute more distinct code paths.
|
||||
'.*stopping left-over name="remote upload".*',
|
||||
".*Failed to load index_part from remote storage, failed creation?.*",
|
||||
]
|
||||
)
|
||||
|
||||
@@ -285,6 +286,7 @@ def test_delete_tenant_exercise_crash_safety_failpoints(
|
||||
str(tenant_id),
|
||||
)
|
||||
),
|
||||
allowed_postfix="initdb.tar.zst",
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -290,10 +290,13 @@ def test_pageserver_with_empty_tenants(
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*marking .* as locally complete, while it doesnt exist in remote index.*"
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*marking .* as locally complete, while it doesnt exist in remote index.*",
|
||||
".*Failed to load index_part from remote storage, failed creation?.*",
|
||||
".*load failed.*list timelines directory.*",
|
||||
]
|
||||
)
|
||||
env.pageserver.allowed_errors.append(".*load failed.*list timelines directory.*")
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
|
||||
@@ -230,6 +230,9 @@ def test_delete_timeline_exercise_crash_safety_failpoints(
|
||||
env.pageserver.allowed_errors.append(".*Timeline dir entry become invalid.*")
|
||||
# In one of the branches we poll for tenant to become active. Polls can generate this log message:
|
||||
env.pageserver.allowed_errors.append(f".*Tenant {env.initial_tenant} is not active*")
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*Failed to load index_part from remote storage, failed creation?.*"
|
||||
)
|
||||
|
||||
ps_http.configure_failpoints((failpoint, "return"))
|
||||
|
||||
@@ -308,8 +311,9 @@ def test_delete_timeline_exercise_crash_safety_failpoints(
|
||||
)
|
||||
|
||||
timeline_dir = env.pageserver.timeline_dir(env.initial_tenant, timeline_id)
|
||||
# Check local is empty
|
||||
assert not timeline_dir.exists()
|
||||
if failpoint != "timeline-delete-after-index-delete":
|
||||
# Check local is empty
|
||||
assert (not timeline_dir.exists()) or len(os.listdir(timeline_dir)) == 0
|
||||
# Check no delete mark present
|
||||
assert not (timeline_dir.parent / f"{timeline_id}.___deleted").exists()
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import sys
|
||||
import tarfile
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
@@ -125,3 +126,43 @@ def test_wal_restore_initdb(
|
||||
)
|
||||
log.info(f"original lsn: {original_lsn}, restored lsn: {restored_lsn}")
|
||||
assert restored.safe_psql("select count(*) from t", user="cloud_admin") == [(300000,)]
|
||||
|
||||
|
||||
def test_wal_restore_http(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
test_output_dir: Path,
|
||||
):
|
||||
env = neon_env_builder.init_start()
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
endpoint.safe_psql("create table t as select generate_series(1,300000)")
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
|
||||
ps_client = env.pageserver.http_client()
|
||||
|
||||
# shut down the endpoint and delete the timeline from the pageserver
|
||||
endpoint.stop()
|
||||
|
||||
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
|
||||
|
||||
test_output_dir / "initdb.tar.zst"
|
||||
|
||||
(env.pageserver_remote_storage.timeline_path(tenant_id, timeline_id) / "initdb.tar.zst")
|
||||
|
||||
ps_client.timeline_delete(tenant_id, timeline_id)
|
||||
time.sleep(2)
|
||||
|
||||
# verify that it is indeed deleted
|
||||
# TODO
|
||||
|
||||
# issue the restoration command
|
||||
ps_client.timeline_create(
|
||||
tenant_id=tenant_id,
|
||||
new_timeline_id=timeline_id,
|
||||
existing_initdb_timeline_id=timeline_id,
|
||||
pg_version=env.pg_version,
|
||||
)
|
||||
|
||||
# the table is back now!
|
||||
restored = env.endpoints.create_start("main")
|
||||
assert restored.safe_psql("select count(*) from t", user="cloud_admin") == [(300000,)]
|
||||
|
||||
Reference in New Issue
Block a user