mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-05 20:42:54 +00:00
Fix test_pageserver_http_get_wal_receiver_success flaky test. (#1786)
Fixes #1768. ## Context Previously, to test `get_wal_receiver` API, we make run some DB transactions then call the API to check the latest message's LSN from the WAL receiver. However, this test won't work because it's not guaranteed that the WAL receiver will get the latest WAL from the postgres/safekeeper at the time of making the API call. This PR resolves the above issue by adding a "poll and wait" code that waits to retrieve the latest data from the WAL receiver. This PR also fixes a bug that tries to compare two hex LSNs, should convert to number before the comparison. See: https://github.com/neondatabase/neon/issues/1768#issuecomment-1133752122.
This commit is contained in:
@@ -1,11 +1,14 @@
|
||||
from typing import Optional
|
||||
from uuid import uuid4, UUID
|
||||
import pytest
|
||||
from fixtures.utils import lsn_from_hex
|
||||
from fixtures.zenith_fixtures import (
|
||||
DEFAULT_BRANCH_NAME,
|
||||
ZenithEnv,
|
||||
ZenithEnvBuilder,
|
||||
ZenithPageserverHttpClient,
|
||||
ZenithPageserverApiException,
|
||||
wait_until,
|
||||
)
|
||||
|
||||
|
||||
@@ -73,18 +76,35 @@ def test_pageserver_http_get_wal_receiver_success(zenith_simple_env: ZenithEnv):
|
||||
tenant_id, timeline_id = env.zenith_cli.create_tenant()
|
||||
pg = env.postgres.create_start(DEFAULT_BRANCH_NAME, tenant_id=tenant_id)
|
||||
|
||||
res = client.wal_receiver_get(tenant_id, timeline_id)
|
||||
assert list(res.keys()) == [
|
||||
"thread_id",
|
||||
"wal_producer_connstr",
|
||||
"last_received_msg_lsn",
|
||||
"last_received_msg_ts",
|
||||
]
|
||||
def expect_updated_msg_lsn(prev_msg_lsn: Optional[int]) -> int:
|
||||
res = client.wal_receiver_get(tenant_id, timeline_id)
|
||||
|
||||
# make a DB modification then expect getting a new WAL receiver's data
|
||||
# a successful `wal_receiver_get` response must contain the below fields
|
||||
assert list(res.keys()) == [
|
||||
"thread_id",
|
||||
"wal_producer_connstr",
|
||||
"last_received_msg_lsn",
|
||||
"last_received_msg_ts",
|
||||
]
|
||||
|
||||
assert res["last_received_msg_lsn"] is not None, "the last received message's LSN is empty"
|
||||
|
||||
last_msg_lsn = lsn_from_hex(res["last_received_msg_lsn"])
|
||||
assert prev_msg_lsn is None or prev_msg_lsn < last_msg_lsn, \
|
||||
f"the last received message's LSN {last_msg_lsn} hasn't been updated \
|
||||
compared to the previous message's LSN {prev_msg_lsn}"
|
||||
|
||||
return last_msg_lsn
|
||||
|
||||
# Wait to make sure that we get a latest WAL receiver data.
|
||||
# We need to wait here because it's possible that we don't have access to
|
||||
# the latest WAL during the time the `wal_receiver_get` API is called.
|
||||
# See: https://github.com/neondatabase/neon/issues/1768.
|
||||
lsn = wait_until(number_of_iterations=5, interval=1, func=lambda: expect_updated_msg_lsn(None))
|
||||
|
||||
# Make a DB modification then expect getting a new WAL receiver's data.
|
||||
pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
|
||||
res2 = client.wal_receiver_get(tenant_id, timeline_id)
|
||||
assert res2["last_received_msg_lsn"] > res["last_received_msg_lsn"]
|
||||
wait_until(number_of_iterations=5, interval=1, func=lambda: expect_updated_msg_lsn(lsn))
|
||||
|
||||
|
||||
def test_pageserver_http_api_client(zenith_simple_env: ZenithEnv):
|
||||
|
||||
Reference in New Issue
Block a user