mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-26 01:20:38 +00:00
The code in this change was extracted from #2595 (Heikki’s on-demand download draft PR). High-Level Changes - New RemoteLayer Type - On-Demand Download As An Effect Of Page Reconstruction - Breaking Semantics For Physical Size Metrics There are several follow-up work items planned. Refer to the Epic issue on GitHub: https://github.com/neondatabase/neon/issues/2029 closes https://github.com/neondatabase/neon/pull/3013 Co-authored-by: Kirill Bulatov <kirill@neon.tech> Co-authored-by: Christian Schwarz <christian@neon.tech> New RemoteLayer Type ==================== Instead of downloading all layers during tenant attach, we create RemoteLayer instances for each of them and add them to the layer map. On-Demand Download As An Effect Of Page Reconstruction ====================================================== At the heart of pageserver is Timeline::get_reconstruct_data(). It traverses the layer map until it has collected all the data it needs to produce the page image. Most code in the code base uses it, though many layers of indirection. Before this patch, the function would use synchronous filesystem IO to load data from disk-resident layer files if the data was not cached. That is not possible with RemoteLayer, because the layer file has not been downloaded yet. So, we do the download when get_reconstruct_data gets there, i.e., “on demand”. The mechanics of how the download is done are rather involved, because of the infamous async-sync-async sandwich problem that plagues the async Rust world. We use the new PageReconstructResult type to work around this. Its introduction is the cause for a good amount of code churn in this patch. Refer to the block comment on `with_ondemand_download()` for details. Breaking Semantics For Physical Size Metrics ============================================ We rename prometheus metric pageserver_{current,resident}_physical_size to reflect what this metric actually represents with on-demand download. This intentionally BREAKS existing grafana dashboard and the cost model data pipeline. Breaking is desirable because the meaning of this metrics has changed with on-demand download. See https://docs.google.com/document/d/12AFpvKY-7FZdR5a4CaD6Ir_rI3QokdCLSPJ6upHxJBo/edit# for how we will handle this breakage. Likewise, we rename the new billing_metrics’s PhysicalSize => ResidentSize. This is not yet used anywhere, so, this is not a breaking change. There is still a field called TimelineInfo::current_physical_size. It is now the sum of the layer sizes in layer map, regardless of whether local or remote. To compute that sum, we added a new trait method PersistentLayer::file_size(). When updating the Python tests, we got rid of current_physical_size_non_incremental. An earlier commit removed it from the OpenAPI spec already, so this is not a breaking change. test_timeline_size.py has grown additional assertions on the resident_physical_size metric.
189 lines
7.7 KiB
Python
189 lines
7.7 KiB
Python
import concurrent.futures
|
|
import os
|
|
from typing import List, Tuple
|
|
|
|
import pytest
|
|
from fixtures.log_helper import log
|
|
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres
|
|
from fixtures.types import TenantId, TimelineId
|
|
|
|
|
|
# Test restarting page server, while safekeeper and compute node keep
|
|
# running.
|
|
def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
|
|
env = neon_env_builder.init_start()
|
|
|
|
env.pageserver.allowed_errors.extend(
|
|
[
|
|
".*Failed to reconstruct the page.*",
|
|
".*could not find data for key.*",
|
|
".*is not active. Current state: Broken.*",
|
|
".*will not become active. Current state: Broken.*",
|
|
".*failed to load metadata.*",
|
|
".*could not load tenant.*load local timeline.*",
|
|
]
|
|
)
|
|
|
|
tenant_timelines: List[Tuple[TenantId, TimelineId, Postgres]] = []
|
|
|
|
for n in range(4):
|
|
tenant_id, timeline_id = env.neon_cli.create_tenant()
|
|
|
|
pg = env.postgres.create_start("main", tenant_id=tenant_id)
|
|
with pg.cursor() as cur:
|
|
cur.execute("CREATE TABLE t(key int primary key, value text)")
|
|
cur.execute("INSERT INTO t SELECT generate_series(1,100), 'payload'")
|
|
pg.stop()
|
|
tenant_timelines.append((tenant_id, timeline_id, pg))
|
|
|
|
# Stop the pageserver
|
|
env.pageserver.stop()
|
|
|
|
# Leave the first timeline alone, but corrupt the others in different ways
|
|
(tenant0, timeline0, pg0) = tenant_timelines[0]
|
|
log.info(f"Timeline {tenant0}/{timeline0} is left intact")
|
|
|
|
(tenant1, timeline1, pg1) = tenant_timelines[1]
|
|
metadata_path = f"{env.repo_dir}/tenants/{tenant1}/timelines/{timeline1}/metadata"
|
|
f = open(metadata_path, "w")
|
|
f.write("overwritten with garbage!")
|
|
f.close()
|
|
log.info(f"Timeline {tenant1}/{timeline1} got its metadata spoiled")
|
|
|
|
(tenant2, timeline2, pg2) = tenant_timelines[2]
|
|
timeline_path = f"{env.repo_dir}/tenants/{tenant2}/timelines/{timeline2}/"
|
|
for filename in os.listdir(timeline_path):
|
|
if filename.startswith("00000"):
|
|
# Looks like a layer file. Remove it
|
|
os.remove(f"{timeline_path}/{filename}")
|
|
log.info(
|
|
f"Timeline {tenant2}/{timeline2} got its layer files removed (no remote storage enabled)"
|
|
)
|
|
|
|
(tenant3, timeline3, pg3) = tenant_timelines[3]
|
|
timeline_path = f"{env.repo_dir}/tenants/{tenant3}/timelines/{timeline3}/"
|
|
for filename in os.listdir(timeline_path):
|
|
if filename.startswith("00000"):
|
|
# Looks like a layer file. Corrupt it
|
|
f = open(f"{timeline_path}/{filename}", "w")
|
|
f.write("overwritten with garbage!")
|
|
f.close()
|
|
log.info(f"Timeline {tenant3}/{timeline3} got its layer files spoiled")
|
|
|
|
env.pageserver.start()
|
|
|
|
# Tenant 0 should still work
|
|
pg0.start()
|
|
assert pg0.safe_psql("SELECT COUNT(*) FROM t")[0][0] == 100
|
|
|
|
# But all others are broken
|
|
|
|
# First timeline would not get loaded into pageserver due to corrupt metadata file
|
|
with pytest.raises(
|
|
Exception, match=f"Tenant {tenant1} will not become active. Current state: Broken"
|
|
) as err:
|
|
pg1.start()
|
|
log.info(
|
|
f"As expected, compute startup failed eagerly for timeline with corrupt metadata: {err}"
|
|
)
|
|
|
|
# Second timeline has no ancestors, only the metadata file and no layer files locally,
|
|
# and we don't have the remote storage enabled. It is loaded into memory, but getting
|
|
# the basebackup from it will fail.
|
|
with pytest.raises(
|
|
Exception, match=f"Tenant {tenant2} will not become active. Current state: Broken"
|
|
) as err:
|
|
pg2.start()
|
|
log.info(f"As expected, compute startup failed for timeline with missing layers: {err}")
|
|
|
|
# Third timeline will also fail during basebackup, because the layer file is corrupt.
|
|
# It will fail when we try to read (and reconstruct) a page from it, ergo the error message.
|
|
# (We don't check layer file contents on startup, when loading the timeline)
|
|
with pytest.raises(Exception, match="Failed to reconstruct the page") as err:
|
|
pg3.start()
|
|
log.info(
|
|
f"As expected, compute startup failed for timeline {tenant3}/{timeline3} with corrupt layers: {err}"
|
|
)
|
|
|
|
|
|
def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv):
|
|
env = neon_simple_env
|
|
|
|
tenant_id, _ = env.neon_cli.create_tenant()
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
|
|
futures = [
|
|
executor.submit(
|
|
env.neon_cli.create_timeline, f"test-create-multiple-timelines-{i}", tenant_id
|
|
)
|
|
for i in range(4)
|
|
]
|
|
for future in futures:
|
|
future.result()
|
|
|
|
|
|
def test_timeline_init_break_before_checkpoint(neon_simple_env: NeonEnv):
|
|
env = neon_simple_env
|
|
pageserver_http = env.pageserver.http_client()
|
|
|
|
env.pageserver.allowed_errors.extend(
|
|
[
|
|
".*Failed to process timeline dir contents.*Timeline has no ancestor and no layer files.*",
|
|
".*Timeline got dropped without initializing, cleaning its files.*",
|
|
]
|
|
)
|
|
|
|
tenant_id, _ = env.neon_cli.create_tenant()
|
|
|
|
timelines_dir = env.repo_dir / "tenants" / str(tenant_id) / "timelines"
|
|
old_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
|
|
initial_timeline_dirs = [d for d in timelines_dir.iterdir()]
|
|
|
|
# Introduce failpoint during timeline init (some intermediate files are on disk), before it's checkpointed.
|
|
pageserver_http.configure_failpoints(("before-checkpoint-new-timeline", "return"))
|
|
with pytest.raises(Exception, match="before-checkpoint-new-timeline"):
|
|
_ = env.neon_cli.create_timeline("test_timeline_init_break_before_checkpoint", tenant_id)
|
|
|
|
# Restart the page server
|
|
env.neon_cli.pageserver_stop(immediate=True)
|
|
env.neon_cli.pageserver_start()
|
|
|
|
# Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally.
|
|
new_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
|
|
assert (
|
|
new_tenant_timelines == old_tenant_timelines
|
|
), f"Pageserver after restart should ignore non-initialized timelines for tenant {tenant_id}"
|
|
|
|
timeline_dirs = [d for d in timelines_dir.iterdir()]
|
|
assert (
|
|
timeline_dirs == initial_timeline_dirs
|
|
), "pageserver should clean its temp timeline files on timeline creation failure"
|
|
|
|
|
|
def test_timeline_create_break_after_uninit_mark(neon_simple_env: NeonEnv):
|
|
env = neon_simple_env
|
|
pageserver_http = env.pageserver.http_client()
|
|
|
|
tenant_id, _ = env.neon_cli.create_tenant()
|
|
|
|
timelines_dir = env.repo_dir / "tenants" / str(tenant_id) / "timelines"
|
|
old_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
|
|
initial_timeline_dirs = [d for d in timelines_dir.iterdir()]
|
|
|
|
# Introduce failpoint when creating a new timeline uninit mark, before any other files were created
|
|
pageserver_http.configure_failpoints(("after-timeline-uninit-mark-creation", "return"))
|
|
with pytest.raises(Exception, match="after-timeline-uninit-mark-creation"):
|
|
_ = env.neon_cli.create_timeline("test_timeline_create_break_after_uninit_mark", tenant_id)
|
|
|
|
# Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally.
|
|
# "New" timeline is not present in the list, allowing pageserver to retry the same request
|
|
new_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
|
|
assert (
|
|
new_tenant_timelines == old_tenant_timelines
|
|
), f"Pageserver after restart should ignore non-initialized timelines for tenant {tenant_id}"
|
|
|
|
timeline_dirs = [d for d in timelines_dir.iterdir()]
|
|
assert (
|
|
timeline_dirs == initial_timeline_dirs
|
|
), "pageserver should clean its temp timeline files on timeline creation failure"
|