mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-23 06:09:59 +00:00
tests: add optional cursor to log_contains + fix truthiness issues in callers (#6960)
Extracted from https://github.com/neondatabase/neon/pull/6953 Part of https://github.com/neondatabase/neon/issues/5899 Core Change ----------- In #6953, we need the ability to scan the log _after_ a specific line and ignore anything before that line. This PR changes `log_contains` to returns a tuple of `(matching line, cursor)`. Hand that cursor to a subsequent `log_contains` call to search the log for the next occurrence of the pattern. Other Changes ------------- - Inspect all the callsites of `log_contains` to handle the new tuple return type. - Above inspection unveiled many callers aren't using `assert log_contains(...) is not None` but some weaker version of the code that breaks if `log_contains` ever returns a not-None but falsy value. Fix that. - Above changes unveiled that `test_remote_storage_upload_queue_retries` was using `wait_until` incorrectly; after fixing the usage, I had to raise the `wait_until` timeout. So, maybe this will fix its flakiness.
This commit is contained in:
committed by
GitHub
parent
ee93700a0f
commit
e9e77ee744
@@ -2180,6 +2180,11 @@ class NeonAttachmentService(MetricsGetter):
|
||||
self.stop(immediate=True)
|
||||
|
||||
|
||||
@dataclass
|
||||
class LogCursor:
|
||||
_line_no: int
|
||||
|
||||
|
||||
class NeonPageserver(PgProtocol):
|
||||
"""
|
||||
An object representing a running pageserver.
|
||||
@@ -2343,7 +2348,18 @@ class NeonPageserver(PgProtocol):
|
||||
value = self.http_client().get_metric_value(metric)
|
||||
assert value == 0, f"Nonzero {metric} == {value}"
|
||||
|
||||
def log_contains(self, pattern: str) -> Optional[str]:
|
||||
def assert_log_contains(
|
||||
self, pattern: str, offset: None | LogCursor = None
|
||||
) -> Tuple[str, LogCursor]:
|
||||
"""Convenient for use inside wait_until()"""
|
||||
|
||||
res = self.log_contains(pattern, offset=offset)
|
||||
assert res is not None
|
||||
return res
|
||||
|
||||
def log_contains(
|
||||
self, pattern: str, offset: None | LogCursor = None
|
||||
) -> Optional[Tuple[str, LogCursor]]:
|
||||
"""Check that the pageserver log contains a line that matches the given regex"""
|
||||
logfile = self.workdir / "pageserver.log"
|
||||
if not logfile.exists():
|
||||
@@ -2357,12 +2373,17 @@ class NeonPageserver(PgProtocol):
|
||||
# no guarantee it is already present in the log file. This hasn't
|
||||
# been a problem in practice, our python tests are not fast enough
|
||||
# to hit that race condition.
|
||||
skip_until_line_no = 0 if offset is None else offset._line_no
|
||||
cur_line_no = 0
|
||||
with logfile.open("r") as f:
|
||||
for line in f:
|
||||
if cur_line_no < skip_until_line_no:
|
||||
cur_line_no += 1
|
||||
continue
|
||||
if contains_re.search(line):
|
||||
# found it!
|
||||
return line
|
||||
|
||||
cur_line_no += 1
|
||||
return (line, LogCursor(cur_line_no))
|
||||
return None
|
||||
|
||||
def tenant_attach(
|
||||
|
||||
@@ -20,7 +20,7 @@ def assert_tenant_state(
|
||||
tenant: TenantId,
|
||||
expected_state: str,
|
||||
message: Optional[str] = None,
|
||||
):
|
||||
) -> None:
|
||||
tenant_status = pageserver_http.tenant_status(tenant)
|
||||
log.info(f"tenant_status: {tenant_status}")
|
||||
assert tenant_status["state"]["slug"] == expected_state, message or tenant_status
|
||||
@@ -292,7 +292,7 @@ def timeline_delete_wait_completed(
|
||||
iterations: int = 20,
|
||||
interval: Optional[float] = None,
|
||||
**delete_args,
|
||||
):
|
||||
) -> None:
|
||||
pageserver_http.timeline_delete(tenant_id=tenant_id, timeline_id=timeline_id, **delete_args)
|
||||
wait_timeline_detail_404(pageserver_http, tenant_id, timeline_id, iterations, interval)
|
||||
|
||||
@@ -302,7 +302,7 @@ def assert_prefix_empty(
|
||||
remote_storage: Optional[RemoteStorage],
|
||||
prefix: Optional[str] = None,
|
||||
allowed_postfix: Optional[str] = None,
|
||||
):
|
||||
) -> None:
|
||||
assert remote_storage is not None
|
||||
response = list_prefix(remote_storage, prefix)
|
||||
keys = response["KeyCount"]
|
||||
|
||||
@@ -369,7 +369,12 @@ def start_in_background(
|
||||
return spawned_process
|
||||
|
||||
|
||||
def wait_until(number_of_iterations: int, interval: float, func: Fn):
|
||||
WaitUntilRet = TypeVar("WaitUntilRet")
|
||||
|
||||
|
||||
def wait_until(
|
||||
number_of_iterations: int, interval: float, func: Callable[[], WaitUntilRet]
|
||||
) -> WaitUntilRet:
|
||||
"""
|
||||
Wait until 'func' returns successfully, without exception. Returns the
|
||||
last return value from the function.
|
||||
@@ -387,6 +392,18 @@ def wait_until(number_of_iterations: int, interval: float, func: Fn):
|
||||
raise Exception("timed out while waiting for %s" % func) from last_exception
|
||||
|
||||
|
||||
def assert_eq(a, b) -> None:
|
||||
assert a == b
|
||||
|
||||
|
||||
def assert_gt(a, b) -> None:
|
||||
assert a > b
|
||||
|
||||
|
||||
def assert_ge(a, b) -> None:
|
||||
assert a >= b
|
||||
|
||||
|
||||
def run_pg_bench_small(pg_bin: "PgBin", connstr: str):
|
||||
"""
|
||||
Fast way to populate data.
|
||||
|
||||
@@ -63,10 +63,11 @@ def negative_env(neon_env_builder: NeonEnvBuilder) -> Generator[NegativeTests, N
|
||||
]
|
||||
)
|
||||
|
||||
def log_contains_bad_request():
|
||||
env.pageserver.log_contains(".*Error processing HTTP request: Bad request")
|
||||
|
||||
wait_until(50, 0.1, log_contains_bad_request)
|
||||
wait_until(
|
||||
50,
|
||||
0.1,
|
||||
lambda: env.pageserver.assert_log_contains(".*Error processing HTTP request: Bad request"),
|
||||
)
|
||||
|
||||
|
||||
def test_null_body(negative_env: NegativeTests):
|
||||
|
||||
@@ -200,7 +200,7 @@ class EvictionEnv:
|
||||
tenant_ps.http_client().timeline_wait_logical_size(tenant_id, timeline_id)
|
||||
|
||||
def statvfs_called():
|
||||
assert pageserver.log_contains(".*running mocked statvfs.*")
|
||||
pageserver.assert_log_contains(".*running mocked statvfs.*")
|
||||
|
||||
# we most likely have already completed multiple runs
|
||||
wait_until(10, 1, statvfs_called)
|
||||
@@ -533,7 +533,7 @@ def test_pageserver_falls_back_to_global_lru(eviction_env: EvictionEnv, order: E
|
||||
assert actual_change >= target, "eviction must always evict more than target"
|
||||
|
||||
time.sleep(1) # give log time to flush
|
||||
assert env.neon_env.pageserver.log_contains(GLOBAL_LRU_LOG_LINE)
|
||||
env.neon_env.pageserver.assert_log_contains(GLOBAL_LRU_LOG_LINE)
|
||||
env.neon_env.pageserver.allowed_errors.append(".*" + GLOBAL_LRU_LOG_LINE)
|
||||
|
||||
|
||||
@@ -767,7 +767,7 @@ def test_statvfs_error_handling(eviction_env: EvictionEnv):
|
||||
eviction_order=EvictionOrder.ABSOLUTE_ORDER,
|
||||
)
|
||||
|
||||
assert env.neon_env.pageserver.log_contains(".*statvfs failed.*EIO")
|
||||
env.neon_env.pageserver.assert_log_contains(".*statvfs failed.*EIO")
|
||||
env.neon_env.pageserver.allowed_errors.append(".*statvfs failed.*EIO")
|
||||
|
||||
|
||||
@@ -801,10 +801,9 @@ def test_statvfs_pressure_usage(eviction_env: EvictionEnv):
|
||||
eviction_order=EvictionOrder.ABSOLUTE_ORDER,
|
||||
)
|
||||
|
||||
def relieved_log_message():
|
||||
assert env.neon_env.pageserver.log_contains(".*disk usage pressure relieved")
|
||||
|
||||
wait_until(10, 1, relieved_log_message)
|
||||
wait_until(
|
||||
10, 1, lambda: env.neon_env.pageserver.assert_log_contains(".*disk usage pressure relieved")
|
||||
)
|
||||
|
||||
def less_than_max_usage_pct():
|
||||
post_eviction_total_size, _, _ = env.timelines_du(env.pageserver)
|
||||
@@ -845,10 +844,9 @@ def test_statvfs_pressure_min_avail_bytes(eviction_env: EvictionEnv):
|
||||
eviction_order=EvictionOrder.ABSOLUTE_ORDER,
|
||||
)
|
||||
|
||||
def relieved_log_message():
|
||||
assert env.neon_env.pageserver.log_contains(".*disk usage pressure relieved")
|
||||
|
||||
wait_until(10, 1, relieved_log_message)
|
||||
wait_until(
|
||||
10, 1, lambda: env.neon_env.pageserver.assert_log_contains(".*disk usage pressure relieved")
|
||||
)
|
||||
|
||||
def more_than_min_avail_bytes_freed():
|
||||
post_eviction_total_size, _, _ = env.timelines_du(env.pageserver)
|
||||
|
||||
@@ -36,7 +36,7 @@ def test_duplicate_layers(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
|
||||
pg_bin.run_capture(["pgbench", "-i", "-s1", connstr])
|
||||
|
||||
time.sleep(10) # let compaction to be performed
|
||||
assert env.pageserver.log_contains("compact-level0-phase1-return-same")
|
||||
env.pageserver.assert_log_contains("compact-level0-phase1-return-same")
|
||||
|
||||
|
||||
def test_actually_duplicated_l1(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
|
||||
|
||||
@@ -184,10 +184,13 @@ def test_issue_5878(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# NB: the layer file is unlinked index part now, but, because we made the delete
|
||||
# operation stuck, the layer file itself is still in the remote_storage
|
||||
def delete_at_pause_point():
|
||||
assert env.pageserver.log_contains(f".*{tenant_id}.*at failpoint.*{failpoint_name}")
|
||||
|
||||
wait_until(10, 0.5, delete_at_pause_point)
|
||||
wait_until(
|
||||
10,
|
||||
0.5,
|
||||
lambda: env.pageserver.assert_log_contains(
|
||||
f".*{tenant_id}.*at failpoint.*{failpoint_name}"
|
||||
),
|
||||
)
|
||||
future_layer_path = env.pageserver_remote_storage.remote_layer_path(
|
||||
tenant_id, timeline_id, future_layer.to_str(), generation=generation_before_detach
|
||||
)
|
||||
|
||||
@@ -34,7 +34,7 @@ def test_logging_event_count(neon_env_builder: NeonEnvBuilder, level: str):
|
||||
def assert_logged():
|
||||
if not log_expected:
|
||||
return
|
||||
assert env.pageserver.log_contains(f".*{msg_id}.*")
|
||||
env.pageserver.assert_log_contains(f".*{msg_id}.*")
|
||||
|
||||
wait_until(10, 0.5, assert_logged)
|
||||
|
||||
|
||||
@@ -432,7 +432,7 @@ def test_deletion_queue_recovery(
|
||||
|
||||
main_pageserver.start()
|
||||
|
||||
def assert_deletions_submitted(n: int):
|
||||
def assert_deletions_submitted(n: int) -> None:
|
||||
assert ps_http.get_metric_value("pageserver_deletion_queue_submitted_total") == n
|
||||
|
||||
# After restart, issue a flush to kick the deletion frontend to do recovery.
|
||||
|
||||
@@ -28,7 +28,14 @@ from fixtures.remote_storage import (
|
||||
available_remote_storages,
|
||||
)
|
||||
from fixtures.types import Lsn, TenantId, TimelineId
|
||||
from fixtures.utils import print_gc_result, query_scalar, wait_until
|
||||
from fixtures.utils import (
|
||||
assert_eq,
|
||||
assert_ge,
|
||||
assert_gt,
|
||||
print_gc_result,
|
||||
query_scalar,
|
||||
wait_until,
|
||||
)
|
||||
from requests import ReadTimeout
|
||||
|
||||
|
||||
@@ -120,10 +127,10 @@ def test_remote_storage_backup_and_restore(
|
||||
log.info(f"upload of checkpoint {checkpoint_number} is done")
|
||||
|
||||
# Check that we had to retry the uploads
|
||||
assert env.pageserver.log_contains(
|
||||
env.pageserver.assert_log_contains(
|
||||
".*failed to perform remote task UploadLayer.*, will retry.*"
|
||||
)
|
||||
assert env.pageserver.log_contains(
|
||||
env.pageserver.assert_log_contains(
|
||||
".*failed to perform remote task UploadMetadata.*, will retry.*"
|
||||
)
|
||||
|
||||
@@ -292,9 +299,9 @@ def test_remote_storage_upload_queue_retries(
|
||||
print_gc_result(gc_result)
|
||||
assert gc_result["layers_removed"] > 0
|
||||
|
||||
wait_until(2, 1, lambda: get_queued_count(file_kind="layer", op_kind="upload") == 0)
|
||||
wait_until(2, 1, lambda: get_queued_count(file_kind="index", op_kind="upload") == 0)
|
||||
wait_until(2, 1, lambda: get_queued_count(file_kind="layer", op_kind="delete") == 0)
|
||||
wait_until(2, 1, lambda: assert_eq(get_queued_count(file_kind="layer", op_kind="upload"), 0))
|
||||
wait_until(2, 1, lambda: assert_eq(get_queued_count(file_kind="index", op_kind="upload"), 0))
|
||||
wait_until(2, 1, lambda: assert_eq(get_queued_count(file_kind="layer", op_kind="delete"), 0))
|
||||
|
||||
# let all future operations queue up
|
||||
configure_storage_sync_failpoints("return")
|
||||
@@ -322,17 +329,17 @@ def test_remote_storage_upload_queue_retries(
|
||||
churn_while_failpoints_active_thread.start()
|
||||
|
||||
# wait for churn thread's data to get stuck in the upload queue
|
||||
wait_until(10, 0.1, lambda: get_queued_count(file_kind="layer", op_kind="upload") > 0)
|
||||
wait_until(10, 0.1, lambda: get_queued_count(file_kind="index", op_kind="upload") >= 2)
|
||||
wait_until(10, 0.1, lambda: get_queued_count(file_kind="layer", op_kind="delete") > 0)
|
||||
wait_until(10, 0.5, lambda: assert_gt(get_queued_count(file_kind="layer", op_kind="upload"), 0))
|
||||
wait_until(10, 0.5, lambda: assert_ge(get_queued_count(file_kind="index", op_kind="upload"), 2))
|
||||
wait_until(10, 0.5, lambda: assert_gt(get_queued_count(file_kind="layer", op_kind="delete"), 0))
|
||||
|
||||
# unblock churn operations
|
||||
configure_storage_sync_failpoints("off")
|
||||
|
||||
# ... and wait for them to finish. Exponential back-off in upload queue, so, gracious timeouts.
|
||||
wait_until(30, 1, lambda: get_queued_count(file_kind="layer", op_kind="upload") == 0)
|
||||
wait_until(30, 1, lambda: get_queued_count(file_kind="index", op_kind="upload") == 0)
|
||||
wait_until(30, 1, lambda: get_queued_count(file_kind="layer", op_kind="delete") == 0)
|
||||
wait_until(30, 1, lambda: assert_eq(get_queued_count(file_kind="layer", op_kind="upload"), 0))
|
||||
wait_until(30, 1, lambda: assert_eq(get_queued_count(file_kind="index", op_kind="upload"), 0))
|
||||
wait_until(30, 1, lambda: assert_eq(get_queued_count(file_kind="layer", op_kind="delete"), 0))
|
||||
|
||||
# The churn thread doesn't make progress once it blocks on the first wait_completion() call,
|
||||
# so, give it some time to wrap up.
|
||||
@@ -884,26 +891,23 @@ def wait_upload_queue_empty(
|
||||
wait_until(
|
||||
2,
|
||||
1,
|
||||
lambda: get_queued_count(
|
||||
client, tenant_id, timeline_id, file_kind="layer", op_kind="upload"
|
||||
)
|
||||
== 0,
|
||||
lambda: assert_eq(
|
||||
get_queued_count(client, tenant_id, timeline_id, file_kind="layer", op_kind="upload"), 0
|
||||
),
|
||||
)
|
||||
wait_until(
|
||||
2,
|
||||
1,
|
||||
lambda: get_queued_count(
|
||||
client, tenant_id, timeline_id, file_kind="index", op_kind="upload"
|
||||
)
|
||||
== 0,
|
||||
lambda: assert_eq(
|
||||
get_queued_count(client, tenant_id, timeline_id, file_kind="index", op_kind="upload"), 0
|
||||
),
|
||||
)
|
||||
wait_until(
|
||||
2,
|
||||
1,
|
||||
lambda: get_queued_count(
|
||||
client, tenant_id, timeline_id, file_kind="layer", op_kind="delete"
|
||||
)
|
||||
== 0,
|
||||
lambda: assert_eq(
|
||||
get_queued_count(client, tenant_id, timeline_id, file_kind="layer", op_kind="delete"), 0
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -116,7 +116,7 @@ def test_sharding_service_smoke(
|
||||
# Marking a pageserver offline should migrate tenants away from it.
|
||||
env.attachment_service.node_configure(env.pageservers[0].id, {"availability": "Offline"})
|
||||
|
||||
def node_evacuated(node_id: int):
|
||||
def node_evacuated(node_id: int) -> None:
|
||||
counts = get_node_shard_counts(env, tenant_ids)
|
||||
assert counts[node_id] == 0
|
||||
|
||||
@@ -405,7 +405,7 @@ def test_sharding_service_compute_hook(
|
||||
|
||||
env.attachment_service.node_configure(env.pageservers[0].id, {"availability": "Offline"})
|
||||
|
||||
def node_evacuated(node_id: int):
|
||||
def node_evacuated(node_id: int) -> None:
|
||||
counts = get_node_shard_counts(env, [env.initial_tenant])
|
||||
assert counts[node_id] == 0
|
||||
|
||||
|
||||
@@ -505,10 +505,10 @@ def test_tenant_delete_concurrent(
|
||||
return ps_http.tenant_delete(tenant_id)
|
||||
|
||||
def hit_remove_failpoint():
|
||||
assert env.pageserver.log_contains(f"at failpoint {BEFORE_REMOVE_FAILPOINT}")
|
||||
env.pageserver.assert_log_contains(f"at failpoint {BEFORE_REMOVE_FAILPOINT}")
|
||||
|
||||
def hit_run_failpoint():
|
||||
assert env.pageserver.log_contains(f"at failpoint {BEFORE_RUN_FAILPOINT}")
|
||||
env.pageserver.assert_log_contains(f"at failpoint {BEFORE_RUN_FAILPOINT}")
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
background_200_req = executor.submit(delete_tenant)
|
||||
@@ -612,12 +612,12 @@ def test_tenant_delete_races_timeline_creation(
|
||||
Thread(target=timeline_create).start()
|
||||
|
||||
def hit_initdb_upload_failpoint():
|
||||
assert env.pageserver.log_contains(f"at failpoint {BEFORE_INITDB_UPLOAD_FAILPOINT}")
|
||||
env.pageserver.assert_log_contains(f"at failpoint {BEFORE_INITDB_UPLOAD_FAILPOINT}")
|
||||
|
||||
wait_until(100, 0.1, hit_initdb_upload_failpoint)
|
||||
|
||||
def creation_connection_timed_out():
|
||||
assert env.pageserver.log_contains(
|
||||
env.pageserver.assert_log_contains(
|
||||
"POST.*/timeline.* request was dropped before completing"
|
||||
)
|
||||
|
||||
@@ -636,7 +636,7 @@ def test_tenant_delete_races_timeline_creation(
|
||||
Thread(target=tenant_delete).start()
|
||||
|
||||
def deletion_arrived():
|
||||
assert env.pageserver.log_contains(
|
||||
env.pageserver.assert_log_contains(
|
||||
f"cfg failpoint: {DELETE_BEFORE_CLEANUP_FAILPOINT} pause"
|
||||
)
|
||||
|
||||
@@ -663,7 +663,7 @@ def test_tenant_delete_races_timeline_creation(
|
||||
)
|
||||
|
||||
# Ensure that creation cancelled and deletion didn't end up in broken state or encountered the leftover temp file
|
||||
assert env.pageserver.log_contains(CANCELLED_ERROR)
|
||||
env.pageserver.assert_log_contains(CANCELLED_ERROR)
|
||||
assert not env.pageserver.log_contains(
|
||||
".*ERROR.*delete_tenant.*Timelines directory is not empty after all timelines deletion"
|
||||
)
|
||||
|
||||
@@ -92,10 +92,10 @@ def test_tenant_reattach(neon_env_builder: NeonEnvBuilder, mode: str):
|
||||
wait_for_upload(pageserver_http, tenant_id, timeline_id, current_lsn)
|
||||
|
||||
# Check that we had to retry the uploads
|
||||
assert env.pageserver.log_contains(
|
||||
env.pageserver.assert_log_contains(
|
||||
".*failed to perform remote task UploadLayer.*, will retry.*"
|
||||
)
|
||||
assert env.pageserver.log_contains(
|
||||
env.pageserver.assert_log_contains(
|
||||
".*failed to perform remote task UploadMetadata.*, will retry.*"
|
||||
)
|
||||
|
||||
|
||||
@@ -495,7 +495,7 @@ def test_emergency_relocate_with_branches_slow_replay(
|
||||
assert cur.fetchall() == [("before pause",), ("after pause",)]
|
||||
|
||||
# Sanity check that the failpoint was reached
|
||||
assert env.pageserver.log_contains('failpoint "wal-ingest-logical-message-sleep": sleep done')
|
||||
env.pageserver.assert_log_contains('failpoint "wal-ingest-logical-message-sleep": sleep done')
|
||||
assert time.time() - before_attach_time > 5
|
||||
|
||||
# Clean up
|
||||
@@ -632,7 +632,7 @@ def test_emergency_relocate_with_branches_createdb(
|
||||
assert query_scalar(cur, "SELECT count(*) FROM test_migrate_one") == 200
|
||||
|
||||
# Sanity check that the failpoint was reached
|
||||
assert env.pageserver.log_contains('failpoint "wal-ingest-logical-message-sleep": sleep done')
|
||||
env.pageserver.assert_log_contains('failpoint "wal-ingest-logical-message-sleep": sleep done')
|
||||
assert time.time() - before_attach_time > 5
|
||||
|
||||
# Clean up
|
||||
|
||||
@@ -147,10 +147,10 @@ def test_tenants_attached_after_download(neon_env_builder: NeonEnvBuilder):
|
||||
log.info(f"upload of checkpoint {checkpoint_number} is done")
|
||||
|
||||
# Check that we had to retry the uploads
|
||||
assert env.pageserver.log_contains(
|
||||
env.pageserver.assert_log_contains(
|
||||
".*failed to perform remote task UploadLayer.*, will retry.*"
|
||||
)
|
||||
assert env.pageserver.log_contains(
|
||||
env.pageserver.assert_log_contains(
|
||||
".*failed to perform remote task UploadMetadata.*, will retry.*"
|
||||
)
|
||||
|
||||
|
||||
@@ -179,6 +179,6 @@ def test_threshold_based_eviction(
|
||||
assert len(post.remote_layers) > 0, "some layers should be evicted once it's stabilized"
|
||||
assert len(post.local_layers) > 0, "the imitate accesses should keep some layers resident"
|
||||
|
||||
assert env.pageserver.log_contains(
|
||||
metrics_refused_log_line
|
||||
assert (
|
||||
env.pageserver.log_contains(metrics_refused_log_line) is not None
|
||||
), "ensure the metrics collection worker ran"
|
||||
|
||||
@@ -89,6 +89,7 @@ def test_timeline_delete(neon_simple_env: NeonEnv):
|
||||
assert timeline_path.exists()
|
||||
|
||||
# retry deletes when compaction or gc is running in pageserver
|
||||
# TODO: review whether this wait_until is actually necessary, we do an await() internally
|
||||
wait_until(
|
||||
number_of_iterations=3,
|
||||
interval=0.2,
|
||||
@@ -531,7 +532,7 @@ def test_concurrent_timeline_delete_stuck_on(
|
||||
try:
|
||||
|
||||
def first_call_hit_failpoint():
|
||||
assert env.pageserver.log_contains(
|
||||
env.pageserver.assert_log_contains(
|
||||
f".*{child_timeline_id}.*at failpoint {stuck_failpoint}"
|
||||
)
|
||||
|
||||
@@ -602,7 +603,7 @@ def test_delete_timeline_client_hangup(neon_env_builder: NeonEnvBuilder):
|
||||
at_failpoint_log_message = f".*{child_timeline_id}.*at failpoint {failpoint_name}.*"
|
||||
|
||||
def hit_failpoint():
|
||||
assert env.pageserver.log_contains(at_failpoint_log_message)
|
||||
env.pageserver.assert_log_contains(at_failpoint_log_message)
|
||||
|
||||
wait_until(50, 0.1, hit_failpoint)
|
||||
|
||||
@@ -612,7 +613,7 @@ def test_delete_timeline_client_hangup(neon_env_builder: NeonEnvBuilder):
|
||||
env.pageserver.allowed_errors.append(hangup_log_message)
|
||||
|
||||
def got_hangup_log_message():
|
||||
assert env.pageserver.log_contains(hangup_log_message)
|
||||
env.pageserver.assert_log_contains(hangup_log_message)
|
||||
|
||||
wait_until(50, 0.1, got_hangup_log_message)
|
||||
|
||||
@@ -624,7 +625,7 @@ def test_delete_timeline_client_hangup(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
def first_request_finished():
|
||||
message = f".*DELETE.*{child_timeline_id}.*Cancelled request finished"
|
||||
assert env.pageserver.log_contains(message)
|
||||
env.pageserver.assert_log_contains(message)
|
||||
|
||||
wait_until(50, 0.1, first_request_finished)
|
||||
|
||||
@@ -759,7 +760,7 @@ def test_delete_orphaned_objects(
|
||||
|
||||
for orphan in orphans:
|
||||
assert not orphan.exists()
|
||||
assert env.pageserver.log_contains(
|
||||
env.pageserver.assert_log_contains(
|
||||
f"deleting a file not referenced from index_part.json name={orphan.stem}"
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user