mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-26 07:39:58 +00:00
Fix test_lfc_prewarm: reduce number of prewarms, sleep before LFC offloading (#12515)
Fixes: - Sleep before LFC offloading in `test_lfc_prewarm[autoprewarm]` to ensure offloaded LFC is the one exported after all writes finish - Reduce number of prewarms and increase timeout in `test_lfc_prewarm_under_workload` as debug builds were failing due to timeout. Additional changes: - Remove `check_pinned_entries`: https://github.com/neondatabase/neon/pull/12447#discussion_r2185946210 - Fix LFC error metrics description: https://github.com/neondatabase/neon/pull/12486#discussion_r2190763107
This commit is contained in:
@@ -108,7 +108,7 @@ pub(crate) static LFC_PREWARMS: Lazy<IntCounter> = Lazy::new(|| {
|
|||||||
pub(crate) static LFC_PREWARM_ERRORS: Lazy<IntCounter> = Lazy::new(|| {
|
pub(crate) static LFC_PREWARM_ERRORS: Lazy<IntCounter> = Lazy::new(|| {
|
||||||
register_int_counter!(
|
register_int_counter!(
|
||||||
"compute_ctl_lfc_prewarm_errors_total",
|
"compute_ctl_lfc_prewarm_errors_total",
|
||||||
"Total number of LFC prewarms errors requested by compute_ctl or autoprewarm option",
|
"Total number of LFC prewarm errors",
|
||||||
)
|
)
|
||||||
.expect("failed to define a metric")
|
.expect("failed to define a metric")
|
||||||
});
|
});
|
||||||
@@ -124,7 +124,7 @@ pub(crate) static LFC_OFFLOADS: Lazy<IntCounter> = Lazy::new(|| {
|
|||||||
pub(crate) static LFC_OFFLOAD_ERRORS: Lazy<IntCounter> = Lazy::new(|| {
|
pub(crate) static LFC_OFFLOAD_ERRORS: Lazy<IntCounter> = Lazy::new(|| {
|
||||||
register_int_counter!(
|
register_int_counter!(
|
||||||
"compute_ctl_lfc_offload_errors_total",
|
"compute_ctl_lfc_offload_errors_total",
|
||||||
"Total number of LFC offload errors requested by compute_ctl or lfc_offload_period_seconds option",
|
"Total number of LFC offload errors",
|
||||||
)
|
)
|
||||||
.expect("failed to define a metric")
|
.expect("failed to define a metric")
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import random
|
import random
|
||||||
import threading
|
import threading
|
||||||
from enum import StrEnum
|
from enum import StrEnum
|
||||||
|
from time import sleep
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
@@ -24,18 +25,7 @@ OFFLOAD_LABEL = "compute_ctl_lfc_offloads_total"
|
|||||||
OFFLOAD_ERR_LABEL = "compute_ctl_lfc_offload_errors_total"
|
OFFLOAD_ERR_LABEL = "compute_ctl_lfc_offload_errors_total"
|
||||||
METHOD_VALUES = [e for e in PrewarmMethod]
|
METHOD_VALUES = [e for e in PrewarmMethod]
|
||||||
METHOD_IDS = [e.value for e in PrewarmMethod]
|
METHOD_IDS = [e.value for e in PrewarmMethod]
|
||||||
|
AUTOOFFLOAD_INTERVAL_SECS = 2
|
||||||
|
|
||||||
def check_pinned_entries(cur: Cursor):
|
|
||||||
"""
|
|
||||||
Wait till none of LFC buffers are pinned
|
|
||||||
"""
|
|
||||||
|
|
||||||
def none_pinned():
|
|
||||||
cur.execute("select lfc_value from neon_lfc_stats where lfc_key='file_cache_chunks_pinned'")
|
|
||||||
assert cur.fetchall()[0][0] == 0
|
|
||||||
|
|
||||||
wait_until(none_pinned)
|
|
||||||
|
|
||||||
|
|
||||||
def prom_parse(client: EndpointHttpClient) -> dict[str, float]:
|
def prom_parse(client: EndpointHttpClient) -> dict[str, float]:
|
||||||
@@ -49,9 +39,18 @@ def prom_parse(client: EndpointHttpClient) -> dict[str, float]:
|
|||||||
|
|
||||||
|
|
||||||
def offload_lfc(method: PrewarmMethod, client: EndpointHttpClient, cur: Cursor) -> Any:
|
def offload_lfc(method: PrewarmMethod, client: EndpointHttpClient, cur: Cursor) -> Any:
|
||||||
|
if method == PrewarmMethod.POSTGRES:
|
||||||
|
cur.execute("select get_local_cache_state()")
|
||||||
|
return cur.fetchall()[0][0]
|
||||||
|
|
||||||
if method == PrewarmMethod.AUTOPREWARM:
|
if method == PrewarmMethod.AUTOPREWARM:
|
||||||
|
# With autoprewarm, we need to be sure LFC was offloaded after all writes
|
||||||
|
# finish, so we sleep. Otherwise we'll have less prewarmed pages than we want
|
||||||
|
sleep(AUTOOFFLOAD_INTERVAL_SECS)
|
||||||
client.offload_lfc_wait()
|
client.offload_lfc_wait()
|
||||||
elif method == PrewarmMethod.COMPUTE_CTL:
|
return
|
||||||
|
|
||||||
|
if method == PrewarmMethod.COMPUTE_CTL:
|
||||||
status = client.prewarm_lfc_status()
|
status = client.prewarm_lfc_status()
|
||||||
assert status["status"] == "not_prewarmed"
|
assert status["status"] == "not_prewarmed"
|
||||||
assert "error" not in status
|
assert "error" not in status
|
||||||
@@ -60,11 +59,9 @@ def offload_lfc(method: PrewarmMethod, client: EndpointHttpClient, cur: Cursor)
|
|||||||
parsed = prom_parse(client)
|
parsed = prom_parse(client)
|
||||||
desired = {OFFLOAD_LABEL: 1, PREWARM_LABEL: 0, OFFLOAD_ERR_LABEL: 0, PREWARM_ERR_LABEL: 0}
|
desired = {OFFLOAD_LABEL: 1, PREWARM_LABEL: 0, OFFLOAD_ERR_LABEL: 0, PREWARM_ERR_LABEL: 0}
|
||||||
assert parsed == desired, f"{parsed=} != {desired=}"
|
assert parsed == desired, f"{parsed=} != {desired=}"
|
||||||
elif method == PrewarmMethod.POSTGRES:
|
return
|
||||||
cur.execute("select get_local_cache_state()")
|
|
||||||
return cur.fetchall()[0][0]
|
raise AssertionError(f"{method} not in PrewarmMethod")
|
||||||
else:
|
|
||||||
raise AssertionError(f"{method} not in PrewarmMethod")
|
|
||||||
|
|
||||||
|
|
||||||
def prewarm_endpoint(
|
def prewarm_endpoint(
|
||||||
@@ -106,14 +103,13 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv, method: PrewarmMethod):
|
|||||||
"neon.file_cache_size_limit=1GB",
|
"neon.file_cache_size_limit=1GB",
|
||||||
"neon.file_cache_prewarm_limit=1000",
|
"neon.file_cache_prewarm_limit=1000",
|
||||||
]
|
]
|
||||||
offload_secs = 2
|
|
||||||
|
|
||||||
if method == PrewarmMethod.AUTOPREWARM:
|
if method == PrewarmMethod.AUTOPREWARM:
|
||||||
endpoint = env.endpoints.create_start(
|
endpoint = env.endpoints.create_start(
|
||||||
branch_name="main",
|
branch_name="main",
|
||||||
config_lines=cfg,
|
config_lines=cfg,
|
||||||
autoprewarm=True,
|
autoprewarm=True,
|
||||||
offload_lfc_interval_seconds=offload_secs,
|
offload_lfc_interval_seconds=AUTOOFFLOAD_INTERVAL_SECS,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
endpoint = env.endpoints.create_start(branch_name="main", config_lines=cfg)
|
endpoint = env.endpoints.create_start(branch_name="main", config_lines=cfg)
|
||||||
@@ -135,7 +131,7 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv, method: PrewarmMethod):
|
|||||||
|
|
||||||
endpoint.stop()
|
endpoint.stop()
|
||||||
if method == PrewarmMethod.AUTOPREWARM:
|
if method == PrewarmMethod.AUTOPREWARM:
|
||||||
endpoint.start(autoprewarm=True, offload_lfc_interval_seconds=offload_secs)
|
endpoint.start(autoprewarm=True, offload_lfc_interval_seconds=AUTOOFFLOAD_INTERVAL_SECS)
|
||||||
else:
|
else:
|
||||||
endpoint.start()
|
endpoint.start()
|
||||||
|
|
||||||
@@ -162,7 +158,6 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv, method: PrewarmMethod):
|
|||||||
lfc_cur.execute("select sum(pk) from t")
|
lfc_cur.execute("select sum(pk) from t")
|
||||||
assert lfc_cur.fetchall()[0][0] == n_records * (n_records + 1) / 2
|
assert lfc_cur.fetchall()[0][0] == n_records * (n_records + 1) / 2
|
||||||
|
|
||||||
check_pinned_entries(pg_cur)
|
|
||||||
desired = {"status": "completed", "total": total, "prewarmed": prewarmed, "skipped": skipped}
|
desired = {"status": "completed", "total": total, "prewarmed": prewarmed, "skipped": skipped}
|
||||||
check_prewarmed(method, client, desired)
|
check_prewarmed(method, client, desired)
|
||||||
|
|
||||||
@@ -243,9 +238,9 @@ def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, method: PrewarmMet
|
|||||||
prewarm_thread.start()
|
prewarm_thread.start()
|
||||||
|
|
||||||
def prewarmed():
|
def prewarmed():
|
||||||
assert n_prewarms > 5
|
assert n_prewarms > 3
|
||||||
|
|
||||||
wait_until(prewarmed)
|
wait_until(prewarmed, timeout=40) # debug builds don't finish in 20s
|
||||||
|
|
||||||
running = False
|
running = False
|
||||||
for t in workload_threads:
|
for t in workload_threads:
|
||||||
@@ -256,7 +251,6 @@ def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, method: PrewarmMet
|
|||||||
total_balance = lfc_cur.fetchall()[0][0]
|
total_balance = lfc_cur.fetchall()[0][0]
|
||||||
assert total_balance == 0
|
assert total_balance == 0
|
||||||
|
|
||||||
check_pinned_entries(pg_cur)
|
|
||||||
if method == PrewarmMethod.POSTGRES:
|
if method == PrewarmMethod.POSTGRES:
|
||||||
return
|
return
|
||||||
desired = {
|
desired = {
|
||||||
|
|||||||
Reference in New Issue
Block a user