Merge remote-tracking branch 'origin/main' into vlad/hadron-jwt

This commit is contained in:
Vlad Lazar
2025-07-31 11:29:07 +01:00
139 changed files with 3324 additions and 1217 deletions

View File

@@ -78,20 +78,26 @@ class EndpointHttpClient(requests.Session):
json: dict[str, str] = res.json()
return json
def prewarm_lfc(self, from_endpoint_id: str | None = None):
def prewarm_lfc(self, from_endpoint_id: str | None = None) -> dict[str, str]:
"""
Prewarm LFC cache from given endpoint and wait till it finishes or errors
"""
params = {"from_endpoint": from_endpoint_id} if from_endpoint_id else dict()
self.post(self.prewarm_url, params=params).raise_for_status()
self.prewarm_lfc_wait()
return self.prewarm_lfc_wait()
def prewarm_lfc_wait(self):
def cancel_prewarm_lfc(self):
"""
Cancel LFC prewarm if any is ongoing
"""
self.delete(self.prewarm_url).raise_for_status()
def prewarm_lfc_wait(self) -> dict[str, str]:
"""
Wait till LFC prewarm returns with error or success.
If prewarm was not requested before calling this function, it will error
"""
statuses = "failed", "completed", "skipped"
statuses = "failed", "completed", "skipped", "cancelled"
def prewarmed():
json = self.prewarm_lfc_status()
@@ -101,6 +107,7 @@ class EndpointHttpClient(requests.Session):
wait_until(prewarmed, timeout=60)
res = self.prewarm_lfc_status()
assert res["status"] != "failed", res
return res
def offload_lfc_status(self) -> dict[str, str]:
res = self.get(self.offload_url)
@@ -108,29 +115,31 @@ class EndpointHttpClient(requests.Session):
json: dict[str, str] = res.json()
return json
def offload_lfc(self):
def offload_lfc(self) -> dict[str, str]:
"""
Offload LFC cache to endpoint storage and wait till offload finishes or errors
"""
self.post(self.offload_url).raise_for_status()
self.offload_lfc_wait()
return self.offload_lfc_wait()
def offload_lfc_wait(self):
def offload_lfc_wait(self) -> dict[str, str]:
"""
Wait till LFC offload returns with error or success.
If offload was not requested before calling this function, it will error
"""
statuses = "failed", "completed", "skipped"
def offloaded():
json = self.offload_lfc_status()
status, err = json["status"], json.get("error")
assert status in ["failed", "completed"], f"{status}, {err=}"
assert status in statuses, f"{status}, {err=}"
wait_until(offloaded, timeout=60)
res = self.offload_lfc_status()
assert res["status"] != "failed", res
return res
def promote(self, promote_spec: dict[str, Any], disconnect: bool = False):
def promote(self, promote_spec: dict[str, Any], disconnect: bool = False) -> dict[str, str]:
url = f"http://localhost:{self.external_port}/promote"
if disconnect:
try: # send first request to start promote and disconnect

View File

@@ -266,7 +266,6 @@ class PgProtocol:
# pooler does not support statement_timeout
# Check if the hostname contains the string 'pooler'
hostname = result.get("host", "")
log.info(f"Hostname: {hostname}")
options = result.get("options", "")
if "statement_timeout" not in options and "pooler" not in hostname:
options = f"-cstatement_timeout=120s {options}"
@@ -2370,6 +2369,7 @@ class NeonStorageController(MetricsGetter, LogUtils):
timeline_id: TimelineId,
new_sk_set: list[int],
):
log.info(f"migrate_safekeepers({tenant_id}, {timeline_id}, {new_sk_set})")
response = self.request(
"POST",
f"{self.api}/v1/tenant/{tenant_id}/timeline/{timeline_id}/safekeeper_migrate",
@@ -5336,16 +5336,32 @@ class EndpointFactory:
)
def stop_all(self, fail_on_error=True) -> Self:
exception = None
for ep in self.endpoints:
"""
Stop all the endpoints in parallel.
"""
# Note: raising an exception from a task in a task group cancels
# all the other tasks. We don't want that, hence the 'stop_one'
# function catches exceptions and puts them on the 'exceptions'
# list for later processing.
exceptions = []
async def stop_one(ep):
try:
ep.stop()
await asyncio.to_thread(ep.stop)
except Exception as e:
log.error(f"Failed to stop endpoint {ep.endpoint_id}: {e}")
exception = e
exceptions.append(e)
if fail_on_error and exception is not None:
raise exception
async def async_stop_all():
async with asyncio.TaskGroup() as tg:
for ep in self.endpoints:
tg.create_task(stop_one(ep))
asyncio.run(async_stop_all())
if fail_on_error and exceptions:
raise ExceptionGroup("stopping an endpoint failed", exceptions)
return self

View File

@@ -2,13 +2,15 @@ from __future__ import annotations
from typing import TYPE_CHECKING
import pytest
from fixtures.utils import wait_until
if TYPE_CHECKING:
from fixtures.neon_fixtures import NeonEnvBuilder
def test_basebackup_cache(neon_env_builder: NeonEnvBuilder):
@pytest.mark.parametrize("grpc", [True, False])
def test_basebackup_cache(neon_env_builder: NeonEnvBuilder, grpc: bool):
"""
Simple test for basebackup cache.
1. Check that we always hit the cache after compute restart.
@@ -22,7 +24,7 @@ def test_basebackup_cache(neon_env_builder: NeonEnvBuilder):
"""
env = neon_env_builder.init_start()
ep = env.endpoints.create("main")
ep = env.endpoints.create("main", grpc=grpc)
ps = env.pageserver
ps_http = ps.http_client()

View File

@@ -863,6 +863,89 @@ def test_pageserver_compaction_circuit_breaker(neon_env_builder: NeonEnvBuilder)
assert not env.pageserver.log_contains(".*Circuit breaker failure ended.*")
def test_ps_corruption_detection_feedback(neon_env_builder: NeonEnvBuilder):
"""
Test that when the pageserver detects corruption during image layer creation,
it sends corruption feedback to the safekeeper which gets recorded in its
safekeeper_ps_corruption_detected metric.
"""
# Configure tenant with aggressive compaction settings to easily trigger compaction
TENANT_CONF = {
# Small checkpoint distance to create many layers
"checkpoint_distance": 1024 * 128,
# Compact small layers
"compaction_target_size": 1024 * 128,
# Create image layers eagerly
"image_creation_threshold": 1,
"image_layer_creation_check_threshold": 0,
# Force frequent compaction
"compaction_period": "1s",
}
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
# We are simulating compaction failures so we should allow these error messages.
env.pageserver.allowed_errors.append(".*Compaction failed.*")
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
pageserver_http = env.pageserver.http_client()
workload = Workload(
env, tenant_id, timeline_id, endpoint_opts={"config_lines": ["neon.lakebase_mode=true"]}
)
workload.init()
# Enable the failpoint that will cause image layer creation to fail due to a (simulated) detected
# corruption.
pageserver_http.configure_failpoints(("create-image-layer-fail-simulated-corruption", "return"))
# Write some data to trigger compaction and image layer creation
log.info("Writing data to trigger compaction...")
workload.write_rows(1024 * 64, upload=False)
workload.write_rows(1024 * 64, upload=False)
# Returns True if the corruption signal from PS is propagated to the SK according to the "safekeeper_ps_corruption_detected" metric.
# Raises an exception otherwise.
def check_corruption_signal_propagated_to_sk():
# Get metrics from all safekeepers
for sk in env.safekeepers:
sk_metrics = sk.http_client().get_metrics()
# Look for our corruption detected metric with the right tenant and timeline
corruption_metrics = sk_metrics.query_all("safekeeper_ps_corruption_detected")
for metric in corruption_metrics:
# Check if there's a metric for our tenant and timeline that has value 1
if (
metric.labels.get("tenant_id") == str(tenant_id)
and metric.labels.get("timeline_id") == str(timeline_id)
and metric.value == 1
):
log.info(f"Corruption detected by safekeeper {sk.id}: {metric}")
return True
raise Exception("Corruption detection feedback not found in any safekeeper metrics")
# Returns True if the corruption signal from PS is propagated to the PG according to the "ps_corruption_detected" metric
# in "neon_perf_counters".
# Raises an exception otherwise.
def check_corruption_signal_propagated_to_pg():
endpoint = workload.endpoint()
results = endpoint.safe_psql("CREATE EXTENSION IF NOT EXISTS neon")
results = endpoint.safe_psql(
"SELECT value FROM neon_perf_counters WHERE metric = 'ps_corruption_detected'"
)
log.info("Query corruption detection metric, results: %s", results)
if results[0][0] == 1:
log.info("Corruption detection signal is raised on Postgres")
return True
raise Exception("Corruption detection signal is not raise on Postgres")
# Confirm that the corruption signal propagates to both the safekeeper and Postgres
wait_until(check_corruption_signal_propagated_to_sk, timeout=10, interval=0.1)
wait_until(check_corruption_signal_propagated_to_pg, timeout=10, interval=0.1)
# Cleanup the failpoint
pageserver_http.configure_failpoints(("create-image-layer-fail-simulated-corruption", "off"))
@pytest.mark.parametrize("enabled", [True, False])
def test_image_layer_compression(neon_env_builder: NeonEnvBuilder, enabled: bool):
tenant_conf = {

View File

@@ -1,6 +1,6 @@
import random
import threading
from enum import StrEnum
from threading import Thread
from time import sleep
from typing import Any
@@ -47,19 +47,23 @@ def offload_lfc(method: PrewarmMethod, client: EndpointHttpClient, cur: Cursor)
# With autoprewarm, we need to be sure LFC was offloaded after all writes
# finish, so we sleep. Otherwise we'll have less prewarmed pages than we want
sleep(AUTOOFFLOAD_INTERVAL_SECS)
client.offload_lfc_wait()
return
offload_res = client.offload_lfc_wait()
log.info(offload_res)
return offload_res
if method == PrewarmMethod.COMPUTE_CTL:
status = client.prewarm_lfc_status()
assert status["status"] == "not_prewarmed"
assert "error" not in status
client.offload_lfc()
offload_res = client.offload_lfc()
log.info(offload_res)
assert client.prewarm_lfc_status()["status"] == "not_prewarmed"
parsed = prom_parse(client)
desired = {OFFLOAD_LABEL: 1, PREWARM_LABEL: 0, OFFLOAD_ERR_LABEL: 0, PREWARM_ERR_LABEL: 0}
assert parsed == desired, f"{parsed=} != {desired=}"
return
return offload_res
raise AssertionError(f"{method} not in PrewarmMethod")
@@ -68,21 +72,30 @@ def prewarm_endpoint(
method: PrewarmMethod, client: EndpointHttpClient, cur: Cursor, lfc_state: str | None
):
if method == PrewarmMethod.AUTOPREWARM:
client.prewarm_lfc_wait()
prewarm_res = client.prewarm_lfc_wait()
log.info(prewarm_res)
elif method == PrewarmMethod.COMPUTE_CTL:
client.prewarm_lfc()
prewarm_res = client.prewarm_lfc()
log.info(prewarm_res)
return prewarm_res
elif method == PrewarmMethod.POSTGRES:
cur.execute("select neon.prewarm_local_cache(%s)", (lfc_state,))
def check_prewarmed(
def check_prewarmed_contains(
method: PrewarmMethod, client: EndpointHttpClient, desired_status: dict[str, str | int]
):
if method == PrewarmMethod.AUTOPREWARM:
assert client.prewarm_lfc_status() == desired_status
prewarm_status = client.prewarm_lfc_status()
for k in desired_status:
assert desired_status[k] == prewarm_status[k]
assert prom_parse(client)[PREWARM_LABEL] == 1
elif method == PrewarmMethod.COMPUTE_CTL:
assert client.prewarm_lfc_status() == desired_status
prewarm_status = client.prewarm_lfc_status()
for k in desired_status:
assert desired_status[k] == prewarm_status[k]
desired = {OFFLOAD_LABEL: 0, PREWARM_LABEL: 1, PREWARM_ERR_LABEL: 0, OFFLOAD_ERR_LABEL: 0}
assert prom_parse(client) == desired
@@ -149,9 +162,6 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv, method: PrewarmMethod):
log.info(f"Used LFC size: {lfc_used_pages}")
pg_cur.execute("select * from neon.get_prewarm_info()")
total, prewarmed, skipped, _ = pg_cur.fetchall()[0]
log.info(f"Prewarm info: {total=} {prewarmed=} {skipped=}")
progress = (prewarmed + skipped) * 100 // total
log.info(f"Prewarm progress: {progress}%")
assert lfc_used_pages > 10000
assert total > 0
assert prewarmed > 0
@@ -161,7 +171,54 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv, method: PrewarmMethod):
assert lfc_cur.fetchall()[0][0] == n_records * (n_records + 1) / 2
desired = {"status": "completed", "total": total, "prewarmed": prewarmed, "skipped": skipped}
check_prewarmed(method, client, desired)
check_prewarmed_contains(method, client, desired)
@pytest.mark.skipif(not USE_LFC, reason="LFC is disabled, skipping")
def test_lfc_prewarm_cancel(neon_simple_env: NeonEnv):
"""
Test we can cancel LFC prewarm and prewarm successfully after
"""
env = neon_simple_env
n_records = 1000000
cfg = [
"autovacuum = off",
"shared_buffers=1MB",
"neon.max_file_cache_size=1GB",
"neon.file_cache_size_limit=1GB",
"neon.file_cache_prewarm_limit=1000",
]
endpoint = env.endpoints.create_start(branch_name="main", config_lines=cfg)
pg_conn = endpoint.connect()
pg_cur = pg_conn.cursor()
pg_cur.execute("create schema neon; create extension neon with schema neon")
pg_cur.execute("create database lfc")
lfc_conn = endpoint.connect(dbname="lfc")
lfc_cur = lfc_conn.cursor()
log.info(f"Inserting {n_records} rows")
lfc_cur.execute("create table t(pk integer primary key, payload text default repeat('?', 128))")
lfc_cur.execute(f"insert into t (pk) values (generate_series(1,{n_records}))")
log.info(f"Inserted {n_records} rows")
client = endpoint.http_client()
method = PrewarmMethod.COMPUTE_CTL
offload_lfc(method, client, pg_cur)
endpoint.stop()
endpoint.start()
thread = Thread(target=lambda: prewarm_endpoint(method, client, pg_cur, None))
thread.start()
# wait 2 seconds to ensure we cancel prewarm SQL query
sleep(2)
client.cancel_prewarm_lfc()
thread.join()
assert client.prewarm_lfc_status()["status"] == "cancelled"
prewarm_endpoint(method, client, pg_cur, None)
assert client.prewarm_lfc_status()["status"] == "completed"
@pytest.mark.skipif(not USE_LFC, reason="LFC is disabled, skipping")
@@ -178,9 +235,8 @@ def test_lfc_prewarm_empty(neon_simple_env: NeonEnv):
cur = conn.cursor()
cur.execute("create schema neon; create extension neon with schema neon")
method = PrewarmMethod.COMPUTE_CTL
offload_lfc(method, client, cur)
prewarm_endpoint(method, client, cur, None)
assert client.prewarm_lfc_status()["status"] == "skipped"
assert offload_lfc(method, client, cur)["status"] == "skipped"
assert prewarm_endpoint(method, client, cur, None)["status"] == "skipped"
# autoprewarm isn't needed as we prewarm manually
@@ -251,11 +307,11 @@ def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, method: PrewarmMet
workload_threads = []
for _ in range(n_threads):
t = threading.Thread(target=workload)
t = Thread(target=workload)
workload_threads.append(t)
t.start()
prewarm_thread = threading.Thread(target=prewarm)
prewarm_thread = Thread(target=prewarm)
prewarm_thread.start()
def prewarmed():

View File

@@ -129,7 +129,10 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
Test static endpoint is protected from GC by acquiring and renewing lsn leases.
"""
LSN_LEASE_LENGTH = 8
LSN_LEASE_LENGTH = (
14 # This value needs to be large enough for compute_ctl to send two lease requests.
)
neon_env_builder.num_pageservers = 2
# GC is manual triggered.
env = neon_env_builder.init_start(
@@ -230,6 +233,15 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
log.info(f"`SELECT` query succeed after GC, {ctx=}")
return offset
# It's not reliable to let the compute renew the lease in this test case as we have a very tight
# lease timeout. Therefore, the test case itself will renew the lease.
#
# This is a workaround to make the test case more deterministic.
def renew_lease(env: NeonEnv, lease_lsn: Lsn):
env.storage_controller.pageserver_api().timeline_lsn_lease(
env.initial_tenant, env.initial_timeline, lease_lsn
)
# Insert some records on main branch
with env.endpoints.create_start("main", config_lines=["shared_buffers=1MB"]) as ep_main:
with ep_main.cursor() as cur:
@@ -242,6 +254,9 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
XLOG_BLCKSZ = 8192
lsn = Lsn((int(lsn) // XLOG_BLCKSZ) * XLOG_BLCKSZ)
# We need to mock the way cplane works: it gets a lease for a branch before starting the compute.
renew_lease(env, lsn)
with env.endpoints.create_start(
branch_name="main",
endpoint_id="static",
@@ -251,9 +266,6 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
cur.execute("SELECT count(*) FROM t0")
assert cur.fetchone() == (ROW_COUNT,)
# Wait for static compute to renew lease at least once.
time.sleep(LSN_LEASE_LENGTH / 2)
generate_updates_on_main(env, ep_main, 3, end=100)
offset = trigger_gc_and_select(
@@ -263,10 +275,10 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
# Trigger Pageserver restarts
for ps in env.pageservers:
ps.stop()
# Static compute should have at least one lease request failure due to connection.
time.sleep(LSN_LEASE_LENGTH / 2)
ps.start()
renew_lease(env, lsn)
trigger_gc_and_select(
env,
ep_static,
@@ -282,6 +294,9 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
)
env.storage_controller.reconcile_until_idle()
# Wait for static compute to renew lease on the new pageserver.
time.sleep(LSN_LEASE_LENGTH + 3)
trigger_gc_and_select(
env,
ep_static,
@@ -292,7 +307,6 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
# Do some update so we can increment gc_cutoff
generate_updates_on_main(env, ep_main, i, end=100)
# Wait for the existing lease to expire.
time.sleep(LSN_LEASE_LENGTH + 1)
# Now trigger GC again, layers should be removed.

View File

@@ -286,3 +286,177 @@ def test_sk_generation_aware_tombstones(neon_env_builder: NeonEnvBuilder):
assert re.match(r".*Timeline .* deleted.*", exc.value.response.text)
# The timeline should remain deleted.
expect_deleted(second_sk)
def test_safekeeper_migration_stale_timeline(neon_env_builder: NeonEnvBuilder):
"""
Test that safekeeper migration handles stale timeline correctly by migrating to
a safekeeper with a stale timeline.
1. Check that we are waiting for the stale timeline to catch up with the commit lsn.
The migration might fail if there is no compute to advance the WAL.
2. Check that we rely on last_log_term (and not the current term) when waiting for the
sync_position on step 7.
3. Check that migration succeeds if the compute is running.
"""
neon_env_builder.num_safekeepers = 2
neon_env_builder.storage_controller_config = {
"timelines_onto_safekeepers": True,
"timeline_safekeeper_count": 1,
}
env = neon_env_builder.init_start()
env.pageserver.allowed_errors.extend(PAGESERVER_ALLOWED_ERRORS)
env.storage_controller.allowed_errors.append(".*not enough successful .* to reach quorum.*")
mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
active_sk = env.get_safekeeper(mconf["sk_set"][0])
other_sk = [sk for sk in env.safekeepers if sk.id != active_sk.id][0]
ep = env.endpoints.create("main", tenant_id=env.initial_tenant)
ep.start(safekeeper_generation=1, safekeepers=[active_sk.id])
ep.safe_psql("CREATE TABLE t(a int)")
ep.safe_psql("INSERT INTO t VALUES (0)")
# Pull the timeline to other_sk, so other_sk now has a "stale" timeline on it.
other_sk.pull_timeline([active_sk], env.initial_tenant, env.initial_timeline)
# Advance the WAL on active_sk.
ep.safe_psql("INSERT INTO t VALUES (1)")
# The test is more tricky if we have the same last_log_term but different term/flush_lsn.
# Stop the active_sk during the endpoint shutdown because otherwise compute_ctl runs
# sync_safekeepers and advances last_log_term on active_sk.
active_sk.stop()
ep.stop(mode="immediate")
active_sk.start()
active_sk_status = active_sk.http_client().timeline_status(
env.initial_tenant, env.initial_timeline
)
other_sk_status = other_sk.http_client().timeline_status(
env.initial_tenant, env.initial_timeline
)
# other_sk should have the same last_log_term, but a stale flush_lsn.
assert active_sk_status.last_log_term == other_sk_status.last_log_term
assert active_sk_status.flush_lsn > other_sk_status.flush_lsn
commit_lsn = active_sk_status.flush_lsn
# Bump the term on other_sk to make it higher than active_sk.
# This is to make sure we don't use current term instead of last_log_term in the algorithm.
other_sk.http_client().term_bump(
env.initial_tenant, env.initial_timeline, active_sk_status.term + 100
)
# TODO(diko): now it fails because the timeline on other_sk is stale and there is no compute
# to catch up it with active_sk. It might be fixed in https://databricks.atlassian.net/browse/LKB-946
# if we delete stale timelines before starting the migration.
# But the rest of the test is still valid: we should not lose committed WAL after the migration.
with pytest.raises(
StorageControllerApiException, match="not enough successful .* to reach quorum"
):
env.storage_controller.migrate_safekeepers(
env.initial_tenant, env.initial_timeline, [other_sk.id]
)
mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
assert mconf["new_sk_set"] == [other_sk.id]
assert mconf["sk_set"] == [active_sk.id]
assert mconf["generation"] == 2
# Start the endpoint, so it advances the WAL on other_sk.
ep.start(safekeeper_generation=2, safekeepers=[active_sk.id, other_sk.id])
# Now the migration should succeed.
env.storage_controller.migrate_safekeepers(
env.initial_tenant, env.initial_timeline, [other_sk.id]
)
# Check that we didn't lose committed WAL.
assert (
other_sk.http_client().timeline_status(env.initial_tenant, env.initial_timeline).flush_lsn
>= commit_lsn
)
assert ep.safe_psql("SELECT * FROM t") == [(0,), (1,)]
def test_pull_from_most_advanced_sk(neon_env_builder: NeonEnvBuilder):
"""
Test that we pull the timeline from the most advanced safekeeper during the
migration and do not lose committed WAL.
"""
neon_env_builder.num_safekeepers = 4
neon_env_builder.storage_controller_config = {
"timelines_onto_safekeepers": True,
"timeline_safekeeper_count": 3,
}
env = neon_env_builder.init_start()
env.pageserver.allowed_errors.extend(PAGESERVER_ALLOWED_ERRORS)
mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
sk_set = mconf["sk_set"]
assert len(sk_set) == 3
other_sk = [sk.id for sk in env.safekeepers if sk.id not in sk_set][0]
ep = env.endpoints.create("main", tenant_id=env.initial_tenant)
ep.start(safekeeper_generation=1, safekeepers=sk_set)
ep.safe_psql("CREATE TABLE t(a int)")
ep.safe_psql("INSERT INTO t VALUES (0)")
# Stop one sk, so we have a lagging WAL on it.
env.get_safekeeper(sk_set[0]).stop()
# Advance the WAL on the other sks.
ep.safe_psql("INSERT INTO t VALUES (1)")
# Stop other sks to make sure compute_ctl doesn't advance the last_log_term on them during shutdown.
for sk_id in sk_set[1:]:
env.get_safekeeper(sk_id).stop()
ep.stop(mode="immediate")
for sk_id in sk_set:
env.get_safekeeper(sk_id).start()
# Bump the term on the lagging sk to make sure we don't use it to choose the most advanced sk.
env.get_safekeeper(sk_set[0]).http_client().term_bump(
env.initial_tenant, env.initial_timeline, 100
)
def get_commit_lsn(sk_set: list[int]):
flush_lsns = []
last_log_terms = []
for sk_id in sk_set:
sk = env.get_safekeeper(sk_id)
status = sk.http_client().timeline_status(env.initial_tenant, env.initial_timeline)
flush_lsns.append(status.flush_lsn)
last_log_terms.append(status.last_log_term)
# In this test we assume that all sks have the same last_log_term.
assert len(set(last_log_terms)) == 1
flush_lsns.sort(reverse=True)
commit_lsn = flush_lsns[len(sk_set) // 2]
log.info(f"sk_set: {sk_set}, flush_lsns: {flush_lsns}, commit_lsn: {commit_lsn}")
return commit_lsn
commit_lsn_before_migration = get_commit_lsn(sk_set)
# Make two migrations, so the lagging sk stays in the sk_set, but other sks are replaced.
new_sk_set1 = [sk_set[0], sk_set[1], other_sk] # remove sk_set[2], add other_sk
new_sk_set2 = [sk_set[0], other_sk, sk_set[2]] # remove sk_set[1], add sk_set[2] back
env.storage_controller.migrate_safekeepers(
env.initial_tenant, env.initial_timeline, new_sk_set1
)
env.storage_controller.migrate_safekeepers(
env.initial_tenant, env.initial_timeline, new_sk_set2
)
commit_lsn_after_migration = get_commit_lsn(new_sk_set2)
# We should not lose committed WAL.
# If we have choosen the lagging sk to pull the timeline from, this might fail.
assert commit_lsn_before_migration <= commit_lsn_after_migration
ep.start(safekeeper_generation=5, safekeepers=new_sk_set2)
assert ep.safe_psql("SELECT * FROM t") == [(0,), (1,)]

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import os
from concurrent.futures import ThreadPoolExecutor
from typing import TYPE_CHECKING
@@ -768,6 +769,14 @@ def test_lsn_lease_storcon(neon_env_builder: NeonEnvBuilder):
"compaction_period": "0s",
}
env = neon_env_builder.init_start(initial_tenant_conf=conf)
# ShardSplit is slow in debug builds, so ignore the warning
if os.getenv("BUILD_TYPE", "debug") == "debug":
env.storage_controller.allowed_errors.extend(
[
".*Exclusive lock by ShardSplit was held.*",
]
)
with env.endpoints.create_start(
"main",
) as ep:

View File

@@ -2757,18 +2757,37 @@ def test_timeline_disk_usage_limit(neon_env_builder: NeonEnvBuilder):
remote_storage_kind = s3_storage()
neon_env_builder.enable_safekeeper_remote_storage(remote_storage_kind)
# Set a very small disk usage limit (1KB)
neon_env_builder.safekeeper_extra_opts = ["--max-timeline-disk-usage-bytes=1024"]
env = neon_env_builder.init_start()
# Create a timeline and endpoint
env.create_branch("test_timeline_disk_usage_limit")
endpoint = env.endpoints.create_start("test_timeline_disk_usage_limit")
endpoint = env.endpoints.create_start(
"test_timeline_disk_usage_limit",
config_lines=[
"neon.lakebase_mode=true",
],
)
# Install the neon extension in the test database. We need it to query perf counter metrics.
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
cur.execute("CREATE EXTENSION IF NOT EXISTS neon")
# Sanity-check safekeeper connection status in neon_perf_counters in the happy case.
cur.execute(
"SELECT value FROM neon_perf_counters WHERE metric = 'num_active_safekeepers'"
)
assert cur.fetchone() == (1,), "Expected 1 active safekeeper"
cur.execute(
"SELECT value FROM neon_perf_counters WHERE metric = 'num_configured_safekeepers'"
)
assert cur.fetchone() == (1,), "Expected 1 configured safekeeper"
# Get the safekeeper
sk = env.safekeepers[0]
# Restart the safekeeper with a very small disk usage limit (1KB)
sk.stop().start(["--max-timeline-disk-usage-bytes=1024"])
# Inject a failpoint to stop WAL backup
with sk.http_client() as http_cli:
http_cli.configure_failpoints([("backup-lsn-range-pausable", "pause")])
@@ -2794,6 +2813,18 @@ def test_timeline_disk_usage_limit(neon_env_builder: NeonEnvBuilder):
wait_until(error_logged)
log.info("Found expected error message in compute log, resuming.")
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
# Confirm that neon_perf_counters also indicates that there are no active safekeepers
cur.execute(
"SELECT value FROM neon_perf_counters WHERE metric = 'num_active_safekeepers'"
)
assert cur.fetchone() == (0,), "Expected 0 active safekeepers"
cur.execute(
"SELECT value FROM neon_perf_counters WHERE metric = 'num_configured_safekeepers'"
)
assert cur.fetchone() == (1,), "Expected 1 configured safekeeper"
# Sanity check that the hanging insert is indeed still hanging. Otherwise means the circuit breaker we
# implemented didn't work as expected.
time.sleep(2)