mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-26 01:20:38 +00:00
ingest: rate-limited warning if WAL commit timestamps lags for > wait_lsn_timeout (#8839)
refs https://github.com/neondatabase/cloud/issues/13750 The logging in this commit will make it easier to detect lagging ingest. We're trusting compute timestamps --- ideally we'd use SK timestmaps instead. But trusting the compute timestamp is ok for now.
This commit is contained in:
committed by
GitHub
parent
cfa45ff5ee
commit
c2f8fdccd7
@@ -173,6 +173,11 @@ def test_backward_compatibility(
|
||||
try:
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.from_repo_dir(compatibility_snapshot_dir / "repo")
|
||||
# check_neon_works does recovery from WAL => the compatibility snapshot's WAL is old => will log this warning
|
||||
ingest_lag_log_line = (
|
||||
".*ingesting record with timestamp lagging more than wait_lsn_timeout.*"
|
||||
)
|
||||
env.pageserver.allowed_errors.append(ingest_lag_log_line)
|
||||
neon_env_builder.start()
|
||||
|
||||
check_neon_works(
|
||||
@@ -181,6 +186,9 @@ def test_backward_compatibility(
|
||||
sql_dump_path=compatibility_snapshot_dir / "dump.sql",
|
||||
repo_dir=env.repo_dir,
|
||||
)
|
||||
|
||||
env.pageserver.assert_log_contains(ingest_lag_log_line)
|
||||
|
||||
except Exception:
|
||||
if breaking_changes_allowed:
|
||||
pytest.xfail(
|
||||
|
||||
@@ -62,6 +62,12 @@ def test_pageserver_lsn_wait_error_safekeeper_stop(neon_env_builder: NeonEnvBuil
|
||||
elements_to_insert = 1_000_000
|
||||
expected_timeout_error = f"Timed out while waiting for WAL record at LSN {future_lsn} to arrive"
|
||||
env.pageserver.allowed_errors.append(f".*{expected_timeout_error}.*")
|
||||
# we configure wait_lsn_timeout to a shorter value than the lagging_wal_timeout / walreceiver_connect_timeout
|
||||
# => after we run into a timeout and reconnect to a different SK, more time than wait_lsn_timeout has passed
|
||||
# ==> we log this error
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*ingesting record with timestamp lagging more than wait_lsn_timeout.*"
|
||||
)
|
||||
|
||||
insert_test_elements(env, tenant_id, start=0, count=elements_to_insert)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user