mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-05 20:42:54 +00:00
Problem ------- Tests that directly call the Pageserver Management API to set tenant config are flaky if the Pageserver is managed by Storcon because Storcon is the source of truth and may (theoretically) reconcile a tenant at any time. Solution -------- Switch all users of `set_tenant_config`/`patch_tenant_config_client_side` to use the `env.storage_controller.pageserver_api()` Future Work ----------- Prevent regressions from creeping in. And generally clean up up tenant configuration. Maybe we can avoid the Pageserver having a default tenant config at all and put the default into Storcon instead? * => https://github.com/neondatabase/neon/issues/9621 Refs ---- fixes https://github.com/neondatabase/neon/issues/9522
250 lines
10 KiB
Python
250 lines
10 KiB
Python
from __future__ import annotations
|
|
|
|
import time
|
|
|
|
from fixtures.common_types import Lsn
|
|
from fixtures.log_helper import log
|
|
from fixtures.neon_fixtures import NeonEnvBuilder, flush_ep_to_pageserver
|
|
from fixtures.pageserver.common_types import (
|
|
DeltaLayerName,
|
|
ImageLayerName,
|
|
is_future_layer,
|
|
)
|
|
from fixtures.pageserver.utils import (
|
|
wait_for_last_record_lsn,
|
|
wait_for_upload_queue_empty,
|
|
wait_until_tenant_active,
|
|
)
|
|
from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind
|
|
from fixtures.utils import query_scalar, wait_until
|
|
|
|
|
|
def test_issue_5878(neon_env_builder: NeonEnvBuilder):
|
|
"""
|
|
Regression test for issue https://github.com/neondatabase/neon/issues/5878 .
|
|
|
|
Create a situation where IndexPart contains an image layer from a future
|
|
(i.e., image layer > IndexPart::disk_consistent_lsn).
|
|
Detach.
|
|
Attach.
|
|
Wait for tenant to finish load_layer_map (by waiting for it to become active).
|
|
Wait for any remote timeline client ops to finish that the attach started.
|
|
Integrity-check the index part.
|
|
|
|
Before fixing the issue, load_layer_map would schedule removal of the future
|
|
image layer. A compaction run could later re-create the image layer with
|
|
the same file name, scheduling a PUT.
|
|
Due to lack of an upload queue barrier, the PUT and DELETE could be re-ordered.
|
|
The result was IndexPart referencing a non-existent object.
|
|
"""
|
|
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
|
|
|
|
env = neon_env_builder.init_configs()
|
|
env.start()
|
|
|
|
ps_http = env.pageserver.http_client()
|
|
|
|
l0_l1_threshold = 3
|
|
image_creation_threshold = 1
|
|
|
|
tenant_config = {
|
|
"gc_period": "0s", # disable GC (shouldn't matter for this test but still)
|
|
"compaction_period": "0s", # we want to control when compaction runs
|
|
"checkpoint_timeout": "24h", # something we won't reach
|
|
"checkpoint_distance": f"{50 * (1024**2)}", # something we won't reach, we checkpoint manually
|
|
"image_creation_threshold": "100", # we want to control when image is created
|
|
"image_layer_creation_check_threshold": "0",
|
|
"compaction_threshold": f"{l0_l1_threshold}",
|
|
"compaction_target_size": f"{128 * (1024**3)}", # make it so that we only have 1 partition => image coverage for delta layers => enables gc of delta layers
|
|
}
|
|
|
|
tenant_id, timeline_id = env.create_tenant(conf=tenant_config)
|
|
|
|
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
|
|
|
def get_index_part():
|
|
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
|
|
ip_path = env.pageserver_remote_storage.index_path(tenant_id, timeline_id)
|
|
return env.pagectl.dump_index_part(ip_path)
|
|
|
|
def get_future_layers():
|
|
ip = get_index_part()
|
|
future_layers = [
|
|
layer_file_name
|
|
for layer_file_name in ip.layer_metadata.keys()
|
|
if is_future_layer(layer_file_name, ip.disk_consistent_lsn)
|
|
]
|
|
return future_layers
|
|
|
|
assert len(get_future_layers()) == 0
|
|
|
|
current = get_index_part()
|
|
assert len(set(current.layer_metadata.keys())) == 1
|
|
layer_file_name = list(current.layer_metadata.keys())[0]
|
|
assert isinstance(layer_file_name, DeltaLayerName)
|
|
assert layer_file_name.is_l0(), f"{layer_file_name}"
|
|
|
|
log.info("force image layer creation in the future by writing some data into in-memory layer")
|
|
|
|
# Create a number of layers in the tenant
|
|
with endpoint.cursor() as cur:
|
|
cur.execute("CREATE TABLE foo (t text)")
|
|
iters = l0_l1_threshold * image_creation_threshold
|
|
for i in range(0, iters):
|
|
cur.execute(
|
|
f"""
|
|
INSERT INTO foo
|
|
SELECT '{i}' || g
|
|
FROM generate_series(1, 10000) g
|
|
"""
|
|
)
|
|
last_record_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
|
|
wait_for_last_record_lsn(ps_http, tenant_id, timeline_id, last_record_lsn)
|
|
# 0..iters-1: create a stack of delta layers
|
|
# iters: leave a non-empty in-memory layer which we'll use for image layer generation
|
|
if i < iters - 1:
|
|
ps_http.timeline_checkpoint(tenant_id, timeline_id, force_repartition=True)
|
|
assert (
|
|
len(
|
|
[
|
|
layer
|
|
for layer in ps_http.layer_map_info(
|
|
tenant_id, timeline_id
|
|
).historic_layers
|
|
if layer.kind == "Image"
|
|
]
|
|
)
|
|
== 0
|
|
)
|
|
last_record_lsn = flush_ep_to_pageserver(env, endpoint, tenant_id, timeline_id)
|
|
|
|
wait_for_upload_queue_empty(ps_http, tenant_id, timeline_id)
|
|
|
|
ip = get_index_part()
|
|
assert len(ip.layer_metadata.keys())
|
|
assert (
|
|
ip.disk_consistent_lsn < last_record_lsn
|
|
), "sanity check for what above loop is supposed to do"
|
|
|
|
# create the image layer from the future
|
|
env.storage_controller.pageserver_api().patch_tenant_config_client_side(
|
|
tenant_id, {"image_creation_threshold": image_creation_threshold}, None
|
|
)
|
|
assert ps_http.tenant_config(tenant_id).effective_config["image_creation_threshold"] == 1
|
|
ps_http.timeline_compact(tenant_id, timeline_id, force_repartition=True)
|
|
assert (
|
|
len(
|
|
[
|
|
layer
|
|
for layer in ps_http.layer_map_info(tenant_id, timeline_id).historic_layers
|
|
if layer.kind == "Image"
|
|
]
|
|
)
|
|
== 1
|
|
)
|
|
wait_for_upload_queue_empty(ps_http, tenant_id, timeline_id)
|
|
future_layers = get_future_layers()
|
|
assert len(future_layers) == 1
|
|
future_layer = future_layers[0]
|
|
assert isinstance(future_layer, ImageLayerName)
|
|
assert future_layer.lsn == last_record_lsn
|
|
log.info(
|
|
f"got layer from the future: lsn={future_layer.lsn} disk_consistent_lsn={ip.disk_consistent_lsn} last_record_lsn={last_record_lsn}"
|
|
)
|
|
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
|
|
future_layer_path = env.pageserver_remote_storage.remote_layer_path(
|
|
tenant_id, timeline_id, future_layer.to_str()
|
|
)
|
|
log.info(f"future layer path: {future_layer_path}")
|
|
pre_stat = future_layer_path.stat()
|
|
time.sleep(1.1) # so that we can use change in pre_stat.st_mtime to detect overwrites
|
|
|
|
def get_generation_number():
|
|
attachment = env.storage_controller.inspect(tenant_id)
|
|
assert attachment is not None
|
|
return attachment[0]
|
|
|
|
# force removal of layers from the future
|
|
tenant_conf = ps_http.tenant_config(tenant_id)
|
|
generation_before_detach = get_generation_number()
|
|
env.pageserver.tenant_detach(tenant_id)
|
|
failpoint_name = "before-delete-layer-pausable"
|
|
|
|
ps_http.configure_failpoints((failpoint_name, "pause"))
|
|
env.pageserver.tenant_attach(tenant_id, tenant_conf.tenant_specific_overrides)
|
|
generation_after_reattach = get_generation_number()
|
|
wait_until_tenant_active(ps_http, tenant_id)
|
|
|
|
# Ensure the IndexPart upload that unlinks the layer file finishes, i.e., doesn't clog the queue.
|
|
def future_layer_is_gone_from_index_part():
|
|
future_layers = set(get_future_layers())
|
|
assert future_layer not in future_layers
|
|
|
|
wait_until(10, 0.5, future_layer_is_gone_from_index_part)
|
|
|
|
# NB: the layer file is unlinked index part now, but, because we made the delete
|
|
# operation stuck, the layer file itself is still in the remote_storage
|
|
wait_until(
|
|
10,
|
|
0.5,
|
|
lambda: env.pageserver.assert_log_contains(
|
|
f".*{tenant_id}.*at failpoint.*{failpoint_name}"
|
|
),
|
|
)
|
|
future_layer_path = env.pageserver_remote_storage.remote_layer_path(
|
|
tenant_id, timeline_id, future_layer.to_str(), generation=generation_before_detach
|
|
)
|
|
log.info(f"future layer path: {future_layer_path}")
|
|
assert future_layer_path.exists()
|
|
|
|
# wait for re-ingestion of the WAL from safekeepers into the in-memory layer
|
|
# (this happens in parallel to the above)
|
|
wait_for_last_record_lsn(ps_http, tenant_id, timeline_id, last_record_lsn)
|
|
|
|
# re-do image layer generation
|
|
# This will produce the same image layer and queue an upload.
|
|
# However, we still have the deletion for the layer queued, stuck on the failpoint.
|
|
# An incorrect implementation would let the PUT execute before the DELETE.
|
|
# The later code in this test asserts that this doesn't happen.
|
|
ps_http.timeline_compact(tenant_id, timeline_id, force_repartition=True)
|
|
|
|
# Let things sit for some time; a good implementation makes no progress because
|
|
# we can't execute the PUT before the DELETE. A bad implementation would do that.
|
|
max_race_opportunity_window = 4
|
|
start = time.monotonic()
|
|
while True:
|
|
post_stat = future_layer_path.stat()
|
|
assert (
|
|
pre_stat.st_mtime == post_stat.st_mtime
|
|
), "observed PUT overtake the stucked DELETE => bug isn't fixed yet"
|
|
if time.monotonic() - start > max_race_opportunity_window:
|
|
log.info(
|
|
"a correct implementation would never let the later PUT overtake the earlier DELETE"
|
|
)
|
|
break
|
|
time.sleep(1)
|
|
|
|
# Window has passed, unstuck the delete, let upload queue drain.
|
|
log.info("unstuck the DELETE")
|
|
ps_http.configure_failpoints(("before-delete-layer-pausable", "off"))
|
|
|
|
wait_for_upload_queue_empty(ps_http, tenant_id, timeline_id)
|
|
|
|
# Examine the resulting S3 state.
|
|
log.info("integrity-check the remote storage")
|
|
ip = get_index_part()
|
|
for layer_file_name, layer_metadata in ip.layer_metadata.items():
|
|
log.info(f"Layer metadata {layer_file_name.to_str()}: {layer_metadata}")
|
|
layer_path = env.pageserver_remote_storage.remote_layer_path(
|
|
tenant_id, timeline_id, layer_file_name.to_str(), layer_metadata.generation
|
|
)
|
|
assert layer_path.exists(), f"{layer_file_name.to_str()}"
|
|
|
|
log.info("assert that the overwritten layer won")
|
|
future_layer_path = env.pageserver_remote_storage.remote_layer_path(
|
|
tenant_id, timeline_id, future_layer.to_str(), generation=generation_after_reattach
|
|
)
|
|
final_stat = future_layer_path.stat()
|
|
log.info(f"future layer path: {future_layer_path}")
|
|
assert final_stat.st_mtime != pre_stat.st_mtime
|