mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-08 05:52:55 +00:00
## Problem https://neon-github-public-dev.s3.amazonaws.com/reports/pr-6254/7388706419/index.html#suites/5a4b8734277a9878cb429b80c314f470/e54c4f6f6ed22672 ## Summary of changes Permit the log message: because the test helper's detach function increments the generation number, a detach/attach cycle can cause the error if the test runner node is slow enough for the opportunistic deletion queue flush on detach not to complete by the time we call attach.
247 lines
10 KiB
Python
247 lines
10 KiB
Python
import time
|
|
|
|
from fixtures.log_helper import log
|
|
from fixtures.neon_fixtures import NeonEnvBuilder
|
|
from fixtures.pageserver.types import (
|
|
DeltaLayerFileName,
|
|
ImageLayerFileName,
|
|
is_future_layer,
|
|
)
|
|
from fixtures.pageserver.utils import (
|
|
wait_for_last_record_lsn,
|
|
wait_for_upload_queue_empty,
|
|
wait_until_tenant_active,
|
|
)
|
|
from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind
|
|
from fixtures.types import Lsn
|
|
from fixtures.utils import query_scalar, wait_until
|
|
|
|
|
|
def test_issue_5878(neon_env_builder: NeonEnvBuilder):
|
|
"""
|
|
Regression test for issue https://github.com/neondatabase/neon/issues/5878 .
|
|
|
|
Create a situation where IndexPart contains an image layer from a future
|
|
(i.e., image layer > IndexPart::disk_consistent_lsn).
|
|
Detach.
|
|
Attach.
|
|
Wait for tenant to finish load_layer_map (by waiting for it to become active).
|
|
Wait for any remote timeline client ops to finish that the attach started.
|
|
Integrity-check the index part.
|
|
|
|
Before fixing the issue, load_layer_map would schedule removal of the future
|
|
image layer. A compaction run could later re-create the image layer with
|
|
the same file name, scheduling a PUT.
|
|
Due to lack of an upload queue barrier, the PUT and DELETE could be re-ordered.
|
|
The result was IndexPart referencing a non-existent object.
|
|
"""
|
|
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
|
|
|
|
env = neon_env_builder.init_start()
|
|
env.pageserver.allowed_errors.extend(
|
|
[".*Dropped remote consistent LSN updates.*", ".*Dropping stale deletions.*"]
|
|
)
|
|
|
|
ps_http = env.pageserver.http_client()
|
|
|
|
l0_l1_threshold = 3
|
|
image_creation_threshold = 1
|
|
|
|
tenant_config = {
|
|
"gc_period": "0s", # disable GC (shouldn't matter for this test but still)
|
|
"compaction_period": "0s", # we want to control when compaction runs
|
|
"checkpoint_timeout": "24h", # something we won't reach
|
|
"checkpoint_distance": f"{50 * (1024**2)}", # something we won't reach, we checkpoint manually
|
|
"image_creation_threshold": "100", # we want to control when image is created
|
|
"compaction_threshold": f"{l0_l1_threshold}",
|
|
"compaction_target_size": f"{128 * (1024**3)}", # make it so that we only have 1 partition => image coverage for delta layers => enables gc of delta layers
|
|
}
|
|
|
|
tenant_id, timeline_id = env.neon_cli.create_tenant(conf=tenant_config)
|
|
|
|
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
|
|
|
def get_index_part():
|
|
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
|
|
ip_path = env.pageserver_remote_storage.index_path(tenant_id, timeline_id)
|
|
return env.pagectl.dump_index_part(ip_path)
|
|
|
|
def get_future_layers():
|
|
ip = get_index_part()
|
|
future_layers = [
|
|
layer_file_name
|
|
for layer_file_name in ip.layer_metadata.keys()
|
|
if is_future_layer(layer_file_name, ip.disk_consistent_lsn)
|
|
]
|
|
return future_layers
|
|
|
|
assert len(get_future_layers()) == 0
|
|
|
|
current = get_index_part()
|
|
assert len(set(current.layer_metadata.keys())) == 1
|
|
layer_file_name = list(current.layer_metadata.keys())[0]
|
|
assert isinstance(layer_file_name, DeltaLayerFileName)
|
|
assert layer_file_name.is_l0(), f"{layer_file_name}"
|
|
|
|
log.info("force image layer creation in the future by writing some data into in-memory layer")
|
|
|
|
# Create a number of layers in the tenant
|
|
with endpoint.cursor() as cur:
|
|
cur.execute("CREATE TABLE foo (t text)")
|
|
iters = l0_l1_threshold * image_creation_threshold
|
|
for i in range(0, iters):
|
|
cur.execute(
|
|
f"""
|
|
INSERT INTO foo
|
|
SELECT '{i}' || g
|
|
FROM generate_series(1, 10000) g
|
|
"""
|
|
)
|
|
last_record_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
|
|
wait_for_last_record_lsn(ps_http, tenant_id, timeline_id, last_record_lsn)
|
|
# 0..iters-1: create a stack of delta layers
|
|
# iters: leave a non-empty in-memory layer which we'll use for image layer generation
|
|
if i < iters - 1:
|
|
ps_http.timeline_checkpoint(tenant_id, timeline_id, force_repartition=True)
|
|
assert (
|
|
len(
|
|
[
|
|
layer
|
|
for layer in ps_http.layer_map_info(
|
|
tenant_id, timeline_id
|
|
).historic_layers
|
|
if layer.kind == "Image"
|
|
]
|
|
)
|
|
== 0
|
|
)
|
|
|
|
endpoint.stop()
|
|
|
|
wait_for_upload_queue_empty(ps_http, tenant_id, timeline_id)
|
|
|
|
ip = get_index_part()
|
|
assert len(ip.layer_metadata.keys())
|
|
assert (
|
|
ip.disk_consistent_lsn < last_record_lsn
|
|
), "sanity check for what above loop is supposed to do"
|
|
|
|
# create the image layer from the future
|
|
ps_http.patch_tenant_config_client_side(
|
|
tenant_id, {"image_creation_threshold": image_creation_threshold}, None
|
|
)
|
|
assert ps_http.tenant_config(tenant_id).effective_config["image_creation_threshold"] == 1
|
|
ps_http.timeline_compact(tenant_id, timeline_id, force_repartition=True)
|
|
assert (
|
|
len(
|
|
[
|
|
layer
|
|
for layer in ps_http.layer_map_info(tenant_id, timeline_id).historic_layers
|
|
if layer.kind == "Image"
|
|
]
|
|
)
|
|
== 1
|
|
)
|
|
wait_for_upload_queue_empty(ps_http, tenant_id, timeline_id)
|
|
future_layers = get_future_layers()
|
|
assert len(future_layers) == 1
|
|
future_layer = future_layers[0]
|
|
assert isinstance(future_layer, ImageLayerFileName)
|
|
assert future_layer.lsn == last_record_lsn
|
|
log.info(
|
|
f"got layer from the future: lsn={future_layer.lsn} disk_consistent_lsn={ip.disk_consistent_lsn} last_record_lsn={last_record_lsn}"
|
|
)
|
|
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
|
|
future_layer_path = env.pageserver_remote_storage.remote_layer_path(
|
|
tenant_id, timeline_id, future_layer.to_str()
|
|
)
|
|
log.info(f"future layer path: {future_layer_path}")
|
|
pre_stat = future_layer_path.stat()
|
|
time.sleep(1.1) # so that we can use change in pre_stat.st_mtime to detect overwrites
|
|
|
|
def get_generation_number():
|
|
attachment = env.attachment_service.inspect(tenant_id)
|
|
assert attachment is not None
|
|
return attachment[0]
|
|
|
|
# force removal of layers from the future
|
|
tenant_conf = ps_http.tenant_config(tenant_id)
|
|
generation_before_detach = get_generation_number()
|
|
env.pageserver.tenant_detach(tenant_id)
|
|
failpoint_name = "before-delete-layer-pausable"
|
|
|
|
ps_http.configure_failpoints((failpoint_name, "pause"))
|
|
env.pageserver.tenant_attach(tenant_id, tenant_conf.tenant_specific_overrides)
|
|
generation_after_reattach = get_generation_number()
|
|
wait_until_tenant_active(ps_http, tenant_id)
|
|
|
|
# Ensure the IndexPart upload that unlinks the layer file finishes, i.e., doesn't clog the queue.
|
|
def future_layer_is_gone_from_index_part():
|
|
future_layers = set(get_future_layers())
|
|
assert future_layer not in future_layers
|
|
|
|
wait_until(10, 0.5, future_layer_is_gone_from_index_part)
|
|
|
|
# NB: the layer file is unlinked index part now, but, because we made the delete
|
|
# operation stuck, the layer file itself is still in the remote_storage
|
|
def delete_at_pause_point():
|
|
assert env.pageserver.log_contains(f".*{tenant_id}.*at failpoint.*{failpoint_name}")
|
|
|
|
wait_until(10, 0.5, delete_at_pause_point)
|
|
future_layer_path = env.pageserver_remote_storage.remote_layer_path(
|
|
tenant_id, timeline_id, future_layer.to_str(), generation=generation_before_detach
|
|
)
|
|
log.info(f"future layer path: {future_layer_path}")
|
|
assert future_layer_path.exists()
|
|
|
|
# wait for re-ingestion of the WAL from safekeepers into the in-memory layer
|
|
# (this happens in parallel to the above)
|
|
wait_for_last_record_lsn(ps_http, tenant_id, timeline_id, last_record_lsn)
|
|
|
|
# re-do image layer generation
|
|
# This will produce the same image layer and queue an upload.
|
|
# However, we still have the deletion for the layer queued, stuck on the failpoint.
|
|
# An incorrect implementation would let the PUT execute before the DELETE.
|
|
# The later code in this test asserts that this doesn't happen.
|
|
ps_http.timeline_compact(tenant_id, timeline_id, force_repartition=True)
|
|
|
|
# Let things sit for some time; a good implementation makes no progress because
|
|
# we can't execute the PUT before the DELETE. A bad implementation would do that.
|
|
max_race_opportunity_window = 4
|
|
start = time.monotonic()
|
|
while True:
|
|
post_stat = future_layer_path.stat()
|
|
assert (
|
|
pre_stat.st_mtime == post_stat.st_mtime
|
|
), "observed PUT overtake the stucked DELETE => bug isn't fixed yet"
|
|
if time.monotonic() - start > max_race_opportunity_window:
|
|
log.info(
|
|
"a correct implementation would never let the later PUT overtake the earlier DELETE"
|
|
)
|
|
break
|
|
time.sleep(1)
|
|
|
|
# Window has passed, unstuck the delete, let upload queue drain.
|
|
log.info("unstuck the DELETE")
|
|
ps_http.configure_failpoints(("before-delete-layer-pausable", "off"))
|
|
|
|
wait_for_upload_queue_empty(ps_http, tenant_id, timeline_id)
|
|
|
|
# Examine the resulting S3 state.
|
|
log.info("integrity-check the remote storage")
|
|
ip = get_index_part()
|
|
for layer_file_name, layer_metadata in ip.layer_metadata.items():
|
|
log.info(f"Layer metadata {layer_file_name.to_str()}: {layer_metadata}")
|
|
layer_path = env.pageserver_remote_storage.remote_layer_path(
|
|
tenant_id, timeline_id, layer_file_name.to_str(), layer_metadata.generation
|
|
)
|
|
assert layer_path.exists(), f"{layer_file_name.to_str()}"
|
|
|
|
log.info("assert that the overwritten layer won")
|
|
future_layer_path = env.pageserver_remote_storage.remote_layer_path(
|
|
tenant_id, timeline_id, future_layer.to_str(), generation=generation_after_reattach
|
|
)
|
|
final_stat = future_layer_path.stat()
|
|
log.info(f"future layer path: {future_layer_path}")
|
|
assert final_stat.st_mtime != pre_stat.st_mtime
|