mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-15 01:12:56 +00:00
The code in this change was extracted from PR #2595, i.e., Heikki’s draft
PR for on-demand download.
High-Level Changes
- storage_sync module rewrite
- Changes to Tenant Loading
- Changes to Timeline States
- Crash-safe & Resumable Tenant Attach
There are several follow-up work items planned.
Refer to the Epic issue on GitHub:
https://github.com/neondatabase/neon/issues/2029
Metadata:
closes https://github.com/neondatabase/neon/pull/2785
unsquashed history of this patch: archive/pr-2785-storage-sync2/pre-squash
Co-authored-by: Dmitry Rodionov <dmitry@neon.tech>
Co-authored-by: Christian Schwarz <christian@neon.tech>
===============================================================================
storage_sync module rewrite
===========================
The storage_sync code is rewritten. New module name is storage_sync2, mostly to
make a more reasonable git diff.
The updated block comment in storage_sync2.rs describes the changes quite well,
so, we will not reproduce that comment here. TL;DR:
- Global sync queue and RemoteIndex are replaced with per-timeline
`RemoteTimelineClient` structure that contains a queue for UploadOperations
to ensure proper ordering and necessary metadata.
- Before deleting local layer files, wait for ongoing UploadOps to finish
(wait_completion()).
- Download operations are not queued and executed immediately.
Changes to Tenant Loading
=========================
Initial sync part was rewritten as well and represents the other major change
that serves as a foundation for on-demand downloads. Routines for attaching and
loading shifted directly to Tenant struct and now are asynchronous and spawned
into the background.
Since this patch doesn’t introduce on-demand download of layers we fully
synchronize with the remote during pageserver startup. See details in
`Timeline::reconcile_with_remote` and `Timeline::download_missing`.
Changes to Tenant States
========================
The “Active” state has lost its “background_jobs_running: bool” member. That
variable indicated whether the GC & Compaction background loops are spawned or
not. With this patch, they are now always spawned. Unit tests (#[test]) use the
TenantConf::{gc_period,compaction_period} to disable their effect (15db566).
This patch introduces a new tenant state, “Attaching”. A tenant that is being
attached starts in this state and transitions to “Active” once it finishes
download.
The `GET /tenant` endpoints returns `TenantInfo::has_in_progress_downloads`. We
derive the value for that field from the tenant state now, to remain
backwards-compatible with cloud.git. We will remove that field when we switch
to on-demand downloads.
Changes to Timeline States
==========================
The TimelineInfo::awaits_download field is now equivalent to the tenant being
in Attaching state. Previously, download progress was tracked per timeline.
With this change, it’s only tracked per tenant. When on-demand downloads
arrive, the field will be completely obsolete. Deprecation is tracked in
isuse #2930.
Crash-safe & Resumable Tenant Attach
====================================
Previously, the attach operation was not persistent. I.e., when tenant attach
was interrupted by a crash, the pageserver would not continue attaching after
pageserver restart. In fact, the half-finished tenant directory on disk would
simply be skipped by tenant_mgr because it lacked the metadata file (it’s
written last). This patch introduces an “attaching” marker file inside that is
present inside the tenant directory while the tenant is attaching. During
pageserver startup, tenant_mgr will resume attach if that file is present. If
not, it assumes that the local tenant state is consistent and tries to load the
tenant. If that fails, the tenant transitions into Broken state.
188 lines
7.6 KiB
Python
188 lines
7.6 KiB
Python
import concurrent.futures
|
|
import os
|
|
from typing import List, Tuple
|
|
|
|
import pytest
|
|
from fixtures.log_helper import log
|
|
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres
|
|
from fixtures.types import TenantId, TimelineId
|
|
|
|
|
|
# Test restarting page server, while safekeeper and compute node keep
|
|
# running.
|
|
def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
|
|
env = neon_env_builder.init_start()
|
|
|
|
env.pageserver.allowed_errors.extend(
|
|
[
|
|
".*Failed to load delta layer.*",
|
|
".*could not find data for key.*",
|
|
".*is not active. Current state: Broken.*",
|
|
".*will not become active. Current state: Broken.*",
|
|
".*failed to load metadata.*",
|
|
".*could not load tenant.*load local timeline.*",
|
|
]
|
|
)
|
|
|
|
tenant_timelines: List[Tuple[TenantId, TimelineId, Postgres]] = []
|
|
|
|
for n in range(4):
|
|
tenant_id, timeline_id = env.neon_cli.create_tenant()
|
|
|
|
pg = env.postgres.create_start("main", tenant_id=tenant_id)
|
|
with pg.cursor() as cur:
|
|
cur.execute("CREATE TABLE t(key int primary key, value text)")
|
|
cur.execute("INSERT INTO t SELECT generate_series(1,100), 'payload'")
|
|
pg.stop()
|
|
tenant_timelines.append((tenant_id, timeline_id, pg))
|
|
|
|
# Stop the pageserver
|
|
env.pageserver.stop()
|
|
|
|
# Leave the first timeline alone, but corrupt the others in different ways
|
|
(tenant0, timeline0, pg0) = tenant_timelines[0]
|
|
log.info(f"Timeline {tenant0}/{timeline0} is left intact")
|
|
|
|
(tenant1, timeline1, pg1) = tenant_timelines[1]
|
|
metadata_path = f"{env.repo_dir}/tenants/{tenant1}/timelines/{timeline1}/metadata"
|
|
f = open(metadata_path, "w")
|
|
f.write("overwritten with garbage!")
|
|
f.close()
|
|
log.info(f"Timeline {tenant1}/{timeline1} got its metadata spoiled")
|
|
|
|
(tenant2, timeline2, pg2) = tenant_timelines[2]
|
|
timeline_path = f"{env.repo_dir}/tenants/{tenant2}/timelines/{timeline2}/"
|
|
for filename in os.listdir(timeline_path):
|
|
if filename.startswith("00000"):
|
|
# Looks like a layer file. Remove it
|
|
os.remove(f"{timeline_path}/{filename}")
|
|
log.info(
|
|
f"Timeline {tenant2}/{timeline2} got its layer files removed (no remote storage enabled)"
|
|
)
|
|
|
|
(tenant3, timeline3, pg3) = tenant_timelines[3]
|
|
timeline_path = f"{env.repo_dir}/tenants/{tenant3}/timelines/{timeline3}/"
|
|
for filename in os.listdir(timeline_path):
|
|
if filename.startswith("00000"):
|
|
# Looks like a layer file. Corrupt it
|
|
f = open(f"{timeline_path}/{filename}", "w")
|
|
f.write("overwritten with garbage!")
|
|
f.close()
|
|
log.info(f"Timeline {tenant3}/{timeline3} got its layer files spoiled")
|
|
|
|
env.pageserver.start()
|
|
|
|
# Tenant 0 should still work
|
|
pg0.start()
|
|
assert pg0.safe_psql("SELECT COUNT(*) FROM t")[0][0] == 100
|
|
|
|
# But all others are broken
|
|
|
|
# First timeline would not get loaded into pageserver due to corrupt metadata file
|
|
with pytest.raises(
|
|
Exception, match=f"Tenant {tenant1} will not become active. Current state: Broken"
|
|
) as err:
|
|
pg1.start()
|
|
log.info(
|
|
f"As expected, compute startup failed eagerly for timeline with corrupt metadata: {err}"
|
|
)
|
|
|
|
# Second timeline has no ancestors, only the metadata file and no layer files.
|
|
# That is checked explicitly in the pageserver, and causes the tenant to be marked
|
|
# as broken.
|
|
with pytest.raises(
|
|
Exception, match=f"Tenant {tenant2} will not become active. Current state: Broken"
|
|
) as err:
|
|
pg2.start()
|
|
log.info(f"As expected, compute startup failed for timeline with missing layers: {err}")
|
|
|
|
# Third timeline will also fail during basebackup, because the layer file is corrupt.
|
|
# (We don't check layer file contents on startup, when loading the timeline)
|
|
with pytest.raises(Exception, match="Failed to load delta layer") as err:
|
|
pg3.start()
|
|
log.info(
|
|
f"As expected, compute startup failed for timeline {tenant3}/{timeline3} with corrupt layers: {err}"
|
|
)
|
|
|
|
|
|
def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv):
|
|
env = neon_simple_env
|
|
|
|
tenant_id, _ = env.neon_cli.create_tenant()
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
|
|
futures = [
|
|
executor.submit(
|
|
env.neon_cli.create_timeline, f"test-create-multiple-timelines-{i}", tenant_id
|
|
)
|
|
for i in range(4)
|
|
]
|
|
for future in futures:
|
|
future.result()
|
|
|
|
|
|
def test_timeline_init_break_before_checkpoint(neon_simple_env: NeonEnv):
|
|
env = neon_simple_env
|
|
pageserver_http = env.pageserver.http_client()
|
|
|
|
env.pageserver.allowed_errors.extend(
|
|
[
|
|
".*Failed to process timeline dir contents.*Timeline has no ancestor and no layer files.*",
|
|
".*Timeline got dropped without initializing, cleaning its files.*",
|
|
]
|
|
)
|
|
|
|
tenant_id, _ = env.neon_cli.create_tenant()
|
|
|
|
timelines_dir = env.repo_dir / "tenants" / str(tenant_id) / "timelines"
|
|
old_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
|
|
initial_timeline_dirs = [d for d in timelines_dir.iterdir()]
|
|
|
|
# Introduce failpoint during timeline init (some intermediate files are on disk), before it's checkpointed.
|
|
pageserver_http.configure_failpoints(("before-checkpoint-new-timeline", "return"))
|
|
with pytest.raises(Exception, match="before-checkpoint-new-timeline"):
|
|
_ = env.neon_cli.create_timeline("test_timeline_init_break_before_checkpoint", tenant_id)
|
|
|
|
# Restart the page server
|
|
env.neon_cli.pageserver_stop(immediate=True)
|
|
env.neon_cli.pageserver_start()
|
|
|
|
# Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally.
|
|
new_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
|
|
assert (
|
|
new_tenant_timelines == old_tenant_timelines
|
|
), f"Pageserver after restart should ignore non-initialized timelines for tenant {tenant_id}"
|
|
|
|
timeline_dirs = [d for d in timelines_dir.iterdir()]
|
|
assert (
|
|
timeline_dirs == initial_timeline_dirs
|
|
), "pageserver should clean its temp timeline files on timeline creation failure"
|
|
|
|
|
|
def test_timeline_create_break_after_uninit_mark(neon_simple_env: NeonEnv):
|
|
env = neon_simple_env
|
|
pageserver_http = env.pageserver.http_client()
|
|
|
|
tenant_id, _ = env.neon_cli.create_tenant()
|
|
|
|
timelines_dir = env.repo_dir / "tenants" / str(tenant_id) / "timelines"
|
|
old_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
|
|
initial_timeline_dirs = [d for d in timelines_dir.iterdir()]
|
|
|
|
# Introduce failpoint when creating a new timeline uninit mark, before any other files were created
|
|
pageserver_http.configure_failpoints(("after-timeline-uninit-mark-creation", "return"))
|
|
with pytest.raises(Exception, match="after-timeline-uninit-mark-creation"):
|
|
_ = env.neon_cli.create_timeline("test_timeline_create_break_after_uninit_mark", tenant_id)
|
|
|
|
# Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally.
|
|
# "New" timeline is not present in the list, allowing pageserver to retry the same request
|
|
new_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
|
|
assert (
|
|
new_tenant_timelines == old_tenant_timelines
|
|
), f"Pageserver after restart should ignore non-initialized timelines for tenant {tenant_id}"
|
|
|
|
timeline_dirs = [d for d in timelines_dir.iterdir()]
|
|
assert (
|
|
timeline_dirs == initial_timeline_dirs
|
|
), "pageserver should clean its temp timeline files on timeline creation failure"
|