mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-25 09:00:37 +00:00
Merge remote-tracking branch 'origin/main' into vlad/hadron-jwt
This commit is contained in:
@@ -78,20 +78,26 @@ class EndpointHttpClient(requests.Session):
|
||||
json: dict[str, str] = res.json()
|
||||
return json
|
||||
|
||||
def prewarm_lfc(self, from_endpoint_id: str | None = None):
|
||||
def prewarm_lfc(self, from_endpoint_id: str | None = None) -> dict[str, str]:
|
||||
"""
|
||||
Prewarm LFC cache from given endpoint and wait till it finishes or errors
|
||||
"""
|
||||
params = {"from_endpoint": from_endpoint_id} if from_endpoint_id else dict()
|
||||
self.post(self.prewarm_url, params=params).raise_for_status()
|
||||
self.prewarm_lfc_wait()
|
||||
return self.prewarm_lfc_wait()
|
||||
|
||||
def prewarm_lfc_wait(self):
|
||||
def cancel_prewarm_lfc(self):
|
||||
"""
|
||||
Cancel LFC prewarm if any is ongoing
|
||||
"""
|
||||
self.delete(self.prewarm_url).raise_for_status()
|
||||
|
||||
def prewarm_lfc_wait(self) -> dict[str, str]:
|
||||
"""
|
||||
Wait till LFC prewarm returns with error or success.
|
||||
If prewarm was not requested before calling this function, it will error
|
||||
"""
|
||||
statuses = "failed", "completed", "skipped"
|
||||
statuses = "failed", "completed", "skipped", "cancelled"
|
||||
|
||||
def prewarmed():
|
||||
json = self.prewarm_lfc_status()
|
||||
@@ -101,6 +107,7 @@ class EndpointHttpClient(requests.Session):
|
||||
wait_until(prewarmed, timeout=60)
|
||||
res = self.prewarm_lfc_status()
|
||||
assert res["status"] != "failed", res
|
||||
return res
|
||||
|
||||
def offload_lfc_status(self) -> dict[str, str]:
|
||||
res = self.get(self.offload_url)
|
||||
@@ -108,29 +115,31 @@ class EndpointHttpClient(requests.Session):
|
||||
json: dict[str, str] = res.json()
|
||||
return json
|
||||
|
||||
def offload_lfc(self):
|
||||
def offload_lfc(self) -> dict[str, str]:
|
||||
"""
|
||||
Offload LFC cache to endpoint storage and wait till offload finishes or errors
|
||||
"""
|
||||
self.post(self.offload_url).raise_for_status()
|
||||
self.offload_lfc_wait()
|
||||
return self.offload_lfc_wait()
|
||||
|
||||
def offload_lfc_wait(self):
|
||||
def offload_lfc_wait(self) -> dict[str, str]:
|
||||
"""
|
||||
Wait till LFC offload returns with error or success.
|
||||
If offload was not requested before calling this function, it will error
|
||||
"""
|
||||
statuses = "failed", "completed", "skipped"
|
||||
|
||||
def offloaded():
|
||||
json = self.offload_lfc_status()
|
||||
status, err = json["status"], json.get("error")
|
||||
assert status in ["failed", "completed"], f"{status}, {err=}"
|
||||
assert status in statuses, f"{status}, {err=}"
|
||||
|
||||
wait_until(offloaded, timeout=60)
|
||||
res = self.offload_lfc_status()
|
||||
assert res["status"] != "failed", res
|
||||
return res
|
||||
|
||||
def promote(self, promote_spec: dict[str, Any], disconnect: bool = False):
|
||||
def promote(self, promote_spec: dict[str, Any], disconnect: bool = False) -> dict[str, str]:
|
||||
url = f"http://localhost:{self.external_port}/promote"
|
||||
if disconnect:
|
||||
try: # send first request to start promote and disconnect
|
||||
|
||||
@@ -266,7 +266,6 @@ class PgProtocol:
|
||||
# pooler does not support statement_timeout
|
||||
# Check if the hostname contains the string 'pooler'
|
||||
hostname = result.get("host", "")
|
||||
log.info(f"Hostname: {hostname}")
|
||||
options = result.get("options", "")
|
||||
if "statement_timeout" not in options and "pooler" not in hostname:
|
||||
options = f"-cstatement_timeout=120s {options}"
|
||||
@@ -2370,6 +2369,7 @@ class NeonStorageController(MetricsGetter, LogUtils):
|
||||
timeline_id: TimelineId,
|
||||
new_sk_set: list[int],
|
||||
):
|
||||
log.info(f"migrate_safekeepers({tenant_id}, {timeline_id}, {new_sk_set})")
|
||||
response = self.request(
|
||||
"POST",
|
||||
f"{self.api}/v1/tenant/{tenant_id}/timeline/{timeline_id}/safekeeper_migrate",
|
||||
@@ -5336,16 +5336,32 @@ class EndpointFactory:
|
||||
)
|
||||
|
||||
def stop_all(self, fail_on_error=True) -> Self:
|
||||
exception = None
|
||||
for ep in self.endpoints:
|
||||
"""
|
||||
Stop all the endpoints in parallel.
|
||||
"""
|
||||
|
||||
# Note: raising an exception from a task in a task group cancels
|
||||
# all the other tasks. We don't want that, hence the 'stop_one'
|
||||
# function catches exceptions and puts them on the 'exceptions'
|
||||
# list for later processing.
|
||||
exceptions = []
|
||||
|
||||
async def stop_one(ep):
|
||||
try:
|
||||
ep.stop()
|
||||
await asyncio.to_thread(ep.stop)
|
||||
except Exception as e:
|
||||
log.error(f"Failed to stop endpoint {ep.endpoint_id}: {e}")
|
||||
exception = e
|
||||
exceptions.append(e)
|
||||
|
||||
if fail_on_error and exception is not None:
|
||||
raise exception
|
||||
async def async_stop_all():
|
||||
async with asyncio.TaskGroup() as tg:
|
||||
for ep in self.endpoints:
|
||||
tg.create_task(stop_one(ep))
|
||||
|
||||
asyncio.run(async_stop_all())
|
||||
|
||||
if fail_on_error and exceptions:
|
||||
raise ExceptionGroup("stopping an endpoint failed", exceptions)
|
||||
|
||||
return self
|
||||
|
||||
|
||||
@@ -2,13 +2,15 @@ from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
from fixtures.utils import wait_until
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
|
||||
|
||||
def test_basebackup_cache(neon_env_builder: NeonEnvBuilder):
|
||||
@pytest.mark.parametrize("grpc", [True, False])
|
||||
def test_basebackup_cache(neon_env_builder: NeonEnvBuilder, grpc: bool):
|
||||
"""
|
||||
Simple test for basebackup cache.
|
||||
1. Check that we always hit the cache after compute restart.
|
||||
@@ -22,7 +24,7 @@ def test_basebackup_cache(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
ep = env.endpoints.create("main")
|
||||
ep = env.endpoints.create("main", grpc=grpc)
|
||||
ps = env.pageserver
|
||||
ps_http = ps.http_client()
|
||||
|
||||
|
||||
@@ -863,6 +863,89 @@ def test_pageserver_compaction_circuit_breaker(neon_env_builder: NeonEnvBuilder)
|
||||
assert not env.pageserver.log_contains(".*Circuit breaker failure ended.*")
|
||||
|
||||
|
||||
def test_ps_corruption_detection_feedback(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Test that when the pageserver detects corruption during image layer creation,
|
||||
it sends corruption feedback to the safekeeper which gets recorded in its
|
||||
safekeeper_ps_corruption_detected metric.
|
||||
"""
|
||||
# Configure tenant with aggressive compaction settings to easily trigger compaction
|
||||
TENANT_CONF = {
|
||||
# Small checkpoint distance to create many layers
|
||||
"checkpoint_distance": 1024 * 128,
|
||||
# Compact small layers
|
||||
"compaction_target_size": 1024 * 128,
|
||||
# Create image layers eagerly
|
||||
"image_creation_threshold": 1,
|
||||
"image_layer_creation_check_threshold": 0,
|
||||
# Force frequent compaction
|
||||
"compaction_period": "1s",
|
||||
}
|
||||
|
||||
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
|
||||
# We are simulating compaction failures so we should allow these error messages.
|
||||
env.pageserver.allowed_errors.append(".*Compaction failed.*")
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
workload = Workload(
|
||||
env, tenant_id, timeline_id, endpoint_opts={"config_lines": ["neon.lakebase_mode=true"]}
|
||||
)
|
||||
workload.init()
|
||||
|
||||
# Enable the failpoint that will cause image layer creation to fail due to a (simulated) detected
|
||||
# corruption.
|
||||
pageserver_http.configure_failpoints(("create-image-layer-fail-simulated-corruption", "return"))
|
||||
|
||||
# Write some data to trigger compaction and image layer creation
|
||||
log.info("Writing data to trigger compaction...")
|
||||
workload.write_rows(1024 * 64, upload=False)
|
||||
workload.write_rows(1024 * 64, upload=False)
|
||||
|
||||
# Returns True if the corruption signal from PS is propagated to the SK according to the "safekeeper_ps_corruption_detected" metric.
|
||||
# Raises an exception otherwise.
|
||||
def check_corruption_signal_propagated_to_sk():
|
||||
# Get metrics from all safekeepers
|
||||
for sk in env.safekeepers:
|
||||
sk_metrics = sk.http_client().get_metrics()
|
||||
# Look for our corruption detected metric with the right tenant and timeline
|
||||
corruption_metrics = sk_metrics.query_all("safekeeper_ps_corruption_detected")
|
||||
|
||||
for metric in corruption_metrics:
|
||||
# Check if there's a metric for our tenant and timeline that has value 1
|
||||
if (
|
||||
metric.labels.get("tenant_id") == str(tenant_id)
|
||||
and metric.labels.get("timeline_id") == str(timeline_id)
|
||||
and metric.value == 1
|
||||
):
|
||||
log.info(f"Corruption detected by safekeeper {sk.id}: {metric}")
|
||||
return True
|
||||
raise Exception("Corruption detection feedback not found in any safekeeper metrics")
|
||||
|
||||
# Returns True if the corruption signal from PS is propagated to the PG according to the "ps_corruption_detected" metric
|
||||
# in "neon_perf_counters".
|
||||
# Raises an exception otherwise.
|
||||
def check_corruption_signal_propagated_to_pg():
|
||||
endpoint = workload.endpoint()
|
||||
results = endpoint.safe_psql("CREATE EXTENSION IF NOT EXISTS neon")
|
||||
results = endpoint.safe_psql(
|
||||
"SELECT value FROM neon_perf_counters WHERE metric = 'ps_corruption_detected'"
|
||||
)
|
||||
log.info("Query corruption detection metric, results: %s", results)
|
||||
if results[0][0] == 1:
|
||||
log.info("Corruption detection signal is raised on Postgres")
|
||||
return True
|
||||
raise Exception("Corruption detection signal is not raise on Postgres")
|
||||
|
||||
# Confirm that the corruption signal propagates to both the safekeeper and Postgres
|
||||
wait_until(check_corruption_signal_propagated_to_sk, timeout=10, interval=0.1)
|
||||
wait_until(check_corruption_signal_propagated_to_pg, timeout=10, interval=0.1)
|
||||
|
||||
# Cleanup the failpoint
|
||||
pageserver_http.configure_failpoints(("create-image-layer-fail-simulated-corruption", "off"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("enabled", [True, False])
|
||||
def test_image_layer_compression(neon_env_builder: NeonEnvBuilder, enabled: bool):
|
||||
tenant_conf = {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import random
|
||||
import threading
|
||||
from enum import StrEnum
|
||||
from threading import Thread
|
||||
from time import sleep
|
||||
from typing import Any
|
||||
|
||||
@@ -47,19 +47,23 @@ def offload_lfc(method: PrewarmMethod, client: EndpointHttpClient, cur: Cursor)
|
||||
# With autoprewarm, we need to be sure LFC was offloaded after all writes
|
||||
# finish, so we sleep. Otherwise we'll have less prewarmed pages than we want
|
||||
sleep(AUTOOFFLOAD_INTERVAL_SECS)
|
||||
client.offload_lfc_wait()
|
||||
return
|
||||
offload_res = client.offload_lfc_wait()
|
||||
log.info(offload_res)
|
||||
return offload_res
|
||||
|
||||
if method == PrewarmMethod.COMPUTE_CTL:
|
||||
status = client.prewarm_lfc_status()
|
||||
assert status["status"] == "not_prewarmed"
|
||||
assert "error" not in status
|
||||
client.offload_lfc()
|
||||
offload_res = client.offload_lfc()
|
||||
log.info(offload_res)
|
||||
assert client.prewarm_lfc_status()["status"] == "not_prewarmed"
|
||||
|
||||
parsed = prom_parse(client)
|
||||
desired = {OFFLOAD_LABEL: 1, PREWARM_LABEL: 0, OFFLOAD_ERR_LABEL: 0, PREWARM_ERR_LABEL: 0}
|
||||
assert parsed == desired, f"{parsed=} != {desired=}"
|
||||
return
|
||||
|
||||
return offload_res
|
||||
|
||||
raise AssertionError(f"{method} not in PrewarmMethod")
|
||||
|
||||
@@ -68,21 +72,30 @@ def prewarm_endpoint(
|
||||
method: PrewarmMethod, client: EndpointHttpClient, cur: Cursor, lfc_state: str | None
|
||||
):
|
||||
if method == PrewarmMethod.AUTOPREWARM:
|
||||
client.prewarm_lfc_wait()
|
||||
prewarm_res = client.prewarm_lfc_wait()
|
||||
log.info(prewarm_res)
|
||||
elif method == PrewarmMethod.COMPUTE_CTL:
|
||||
client.prewarm_lfc()
|
||||
prewarm_res = client.prewarm_lfc()
|
||||
log.info(prewarm_res)
|
||||
return prewarm_res
|
||||
elif method == PrewarmMethod.POSTGRES:
|
||||
cur.execute("select neon.prewarm_local_cache(%s)", (lfc_state,))
|
||||
|
||||
|
||||
def check_prewarmed(
|
||||
def check_prewarmed_contains(
|
||||
method: PrewarmMethod, client: EndpointHttpClient, desired_status: dict[str, str | int]
|
||||
):
|
||||
if method == PrewarmMethod.AUTOPREWARM:
|
||||
assert client.prewarm_lfc_status() == desired_status
|
||||
prewarm_status = client.prewarm_lfc_status()
|
||||
for k in desired_status:
|
||||
assert desired_status[k] == prewarm_status[k]
|
||||
|
||||
assert prom_parse(client)[PREWARM_LABEL] == 1
|
||||
elif method == PrewarmMethod.COMPUTE_CTL:
|
||||
assert client.prewarm_lfc_status() == desired_status
|
||||
prewarm_status = client.prewarm_lfc_status()
|
||||
for k in desired_status:
|
||||
assert desired_status[k] == prewarm_status[k]
|
||||
|
||||
desired = {OFFLOAD_LABEL: 0, PREWARM_LABEL: 1, PREWARM_ERR_LABEL: 0, OFFLOAD_ERR_LABEL: 0}
|
||||
assert prom_parse(client) == desired
|
||||
|
||||
@@ -149,9 +162,6 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv, method: PrewarmMethod):
|
||||
log.info(f"Used LFC size: {lfc_used_pages}")
|
||||
pg_cur.execute("select * from neon.get_prewarm_info()")
|
||||
total, prewarmed, skipped, _ = pg_cur.fetchall()[0]
|
||||
log.info(f"Prewarm info: {total=} {prewarmed=} {skipped=}")
|
||||
progress = (prewarmed + skipped) * 100 // total
|
||||
log.info(f"Prewarm progress: {progress}%")
|
||||
assert lfc_used_pages > 10000
|
||||
assert total > 0
|
||||
assert prewarmed > 0
|
||||
@@ -161,7 +171,54 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv, method: PrewarmMethod):
|
||||
assert lfc_cur.fetchall()[0][0] == n_records * (n_records + 1) / 2
|
||||
|
||||
desired = {"status": "completed", "total": total, "prewarmed": prewarmed, "skipped": skipped}
|
||||
check_prewarmed(method, client, desired)
|
||||
check_prewarmed_contains(method, client, desired)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not USE_LFC, reason="LFC is disabled, skipping")
|
||||
def test_lfc_prewarm_cancel(neon_simple_env: NeonEnv):
|
||||
"""
|
||||
Test we can cancel LFC prewarm and prewarm successfully after
|
||||
"""
|
||||
env = neon_simple_env
|
||||
n_records = 1000000
|
||||
cfg = [
|
||||
"autovacuum = off",
|
||||
"shared_buffers=1MB",
|
||||
"neon.max_file_cache_size=1GB",
|
||||
"neon.file_cache_size_limit=1GB",
|
||||
"neon.file_cache_prewarm_limit=1000",
|
||||
]
|
||||
endpoint = env.endpoints.create_start(branch_name="main", config_lines=cfg)
|
||||
|
||||
pg_conn = endpoint.connect()
|
||||
pg_cur = pg_conn.cursor()
|
||||
pg_cur.execute("create schema neon; create extension neon with schema neon")
|
||||
pg_cur.execute("create database lfc")
|
||||
|
||||
lfc_conn = endpoint.connect(dbname="lfc")
|
||||
lfc_cur = lfc_conn.cursor()
|
||||
log.info(f"Inserting {n_records} rows")
|
||||
lfc_cur.execute("create table t(pk integer primary key, payload text default repeat('?', 128))")
|
||||
lfc_cur.execute(f"insert into t (pk) values (generate_series(1,{n_records}))")
|
||||
log.info(f"Inserted {n_records} rows")
|
||||
|
||||
client = endpoint.http_client()
|
||||
method = PrewarmMethod.COMPUTE_CTL
|
||||
offload_lfc(method, client, pg_cur)
|
||||
|
||||
endpoint.stop()
|
||||
endpoint.start()
|
||||
|
||||
thread = Thread(target=lambda: prewarm_endpoint(method, client, pg_cur, None))
|
||||
thread.start()
|
||||
# wait 2 seconds to ensure we cancel prewarm SQL query
|
||||
sleep(2)
|
||||
client.cancel_prewarm_lfc()
|
||||
thread.join()
|
||||
assert client.prewarm_lfc_status()["status"] == "cancelled"
|
||||
|
||||
prewarm_endpoint(method, client, pg_cur, None)
|
||||
assert client.prewarm_lfc_status()["status"] == "completed"
|
||||
|
||||
|
||||
@pytest.mark.skipif(not USE_LFC, reason="LFC is disabled, skipping")
|
||||
@@ -178,9 +235,8 @@ def test_lfc_prewarm_empty(neon_simple_env: NeonEnv):
|
||||
cur = conn.cursor()
|
||||
cur.execute("create schema neon; create extension neon with schema neon")
|
||||
method = PrewarmMethod.COMPUTE_CTL
|
||||
offload_lfc(method, client, cur)
|
||||
prewarm_endpoint(method, client, cur, None)
|
||||
assert client.prewarm_lfc_status()["status"] == "skipped"
|
||||
assert offload_lfc(method, client, cur)["status"] == "skipped"
|
||||
assert prewarm_endpoint(method, client, cur, None)["status"] == "skipped"
|
||||
|
||||
|
||||
# autoprewarm isn't needed as we prewarm manually
|
||||
@@ -251,11 +307,11 @@ def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, method: PrewarmMet
|
||||
|
||||
workload_threads = []
|
||||
for _ in range(n_threads):
|
||||
t = threading.Thread(target=workload)
|
||||
t = Thread(target=workload)
|
||||
workload_threads.append(t)
|
||||
t.start()
|
||||
|
||||
prewarm_thread = threading.Thread(target=prewarm)
|
||||
prewarm_thread = Thread(target=prewarm)
|
||||
prewarm_thread.start()
|
||||
|
||||
def prewarmed():
|
||||
|
||||
@@ -129,7 +129,10 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
|
||||
Test static endpoint is protected from GC by acquiring and renewing lsn leases.
|
||||
"""
|
||||
|
||||
LSN_LEASE_LENGTH = 8
|
||||
LSN_LEASE_LENGTH = (
|
||||
14 # This value needs to be large enough for compute_ctl to send two lease requests.
|
||||
)
|
||||
|
||||
neon_env_builder.num_pageservers = 2
|
||||
# GC is manual triggered.
|
||||
env = neon_env_builder.init_start(
|
||||
@@ -230,6 +233,15 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
|
||||
log.info(f"`SELECT` query succeed after GC, {ctx=}")
|
||||
return offset
|
||||
|
||||
# It's not reliable to let the compute renew the lease in this test case as we have a very tight
|
||||
# lease timeout. Therefore, the test case itself will renew the lease.
|
||||
#
|
||||
# This is a workaround to make the test case more deterministic.
|
||||
def renew_lease(env: NeonEnv, lease_lsn: Lsn):
|
||||
env.storage_controller.pageserver_api().timeline_lsn_lease(
|
||||
env.initial_tenant, env.initial_timeline, lease_lsn
|
||||
)
|
||||
|
||||
# Insert some records on main branch
|
||||
with env.endpoints.create_start("main", config_lines=["shared_buffers=1MB"]) as ep_main:
|
||||
with ep_main.cursor() as cur:
|
||||
@@ -242,6 +254,9 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
|
||||
XLOG_BLCKSZ = 8192
|
||||
lsn = Lsn((int(lsn) // XLOG_BLCKSZ) * XLOG_BLCKSZ)
|
||||
|
||||
# We need to mock the way cplane works: it gets a lease for a branch before starting the compute.
|
||||
renew_lease(env, lsn)
|
||||
|
||||
with env.endpoints.create_start(
|
||||
branch_name="main",
|
||||
endpoint_id="static",
|
||||
@@ -251,9 +266,6 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
|
||||
cur.execute("SELECT count(*) FROM t0")
|
||||
assert cur.fetchone() == (ROW_COUNT,)
|
||||
|
||||
# Wait for static compute to renew lease at least once.
|
||||
time.sleep(LSN_LEASE_LENGTH / 2)
|
||||
|
||||
generate_updates_on_main(env, ep_main, 3, end=100)
|
||||
|
||||
offset = trigger_gc_and_select(
|
||||
@@ -263,10 +275,10 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
|
||||
# Trigger Pageserver restarts
|
||||
for ps in env.pageservers:
|
||||
ps.stop()
|
||||
# Static compute should have at least one lease request failure due to connection.
|
||||
time.sleep(LSN_LEASE_LENGTH / 2)
|
||||
ps.start()
|
||||
|
||||
renew_lease(env, lsn)
|
||||
|
||||
trigger_gc_and_select(
|
||||
env,
|
||||
ep_static,
|
||||
@@ -282,6 +294,9 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
|
||||
)
|
||||
env.storage_controller.reconcile_until_idle()
|
||||
|
||||
# Wait for static compute to renew lease on the new pageserver.
|
||||
time.sleep(LSN_LEASE_LENGTH + 3)
|
||||
|
||||
trigger_gc_and_select(
|
||||
env,
|
||||
ep_static,
|
||||
@@ -292,7 +307,6 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# Do some update so we can increment gc_cutoff
|
||||
generate_updates_on_main(env, ep_main, i, end=100)
|
||||
|
||||
# Wait for the existing lease to expire.
|
||||
time.sleep(LSN_LEASE_LENGTH + 1)
|
||||
# Now trigger GC again, layers should be removed.
|
||||
|
||||
@@ -286,3 +286,177 @@ def test_sk_generation_aware_tombstones(neon_env_builder: NeonEnvBuilder):
|
||||
assert re.match(r".*Timeline .* deleted.*", exc.value.response.text)
|
||||
# The timeline should remain deleted.
|
||||
expect_deleted(second_sk)
|
||||
|
||||
|
||||
def test_safekeeper_migration_stale_timeline(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Test that safekeeper migration handles stale timeline correctly by migrating to
|
||||
a safekeeper with a stale timeline.
|
||||
1. Check that we are waiting for the stale timeline to catch up with the commit lsn.
|
||||
The migration might fail if there is no compute to advance the WAL.
|
||||
2. Check that we rely on last_log_term (and not the current term) when waiting for the
|
||||
sync_position on step 7.
|
||||
3. Check that migration succeeds if the compute is running.
|
||||
"""
|
||||
neon_env_builder.num_safekeepers = 2
|
||||
neon_env_builder.storage_controller_config = {
|
||||
"timelines_onto_safekeepers": True,
|
||||
"timeline_safekeeper_count": 1,
|
||||
}
|
||||
env = neon_env_builder.init_start()
|
||||
env.pageserver.allowed_errors.extend(PAGESERVER_ALLOWED_ERRORS)
|
||||
env.storage_controller.allowed_errors.append(".*not enough successful .* to reach quorum.*")
|
||||
|
||||
mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
|
||||
|
||||
active_sk = env.get_safekeeper(mconf["sk_set"][0])
|
||||
other_sk = [sk for sk in env.safekeepers if sk.id != active_sk.id][0]
|
||||
|
||||
ep = env.endpoints.create("main", tenant_id=env.initial_tenant)
|
||||
ep.start(safekeeper_generation=1, safekeepers=[active_sk.id])
|
||||
ep.safe_psql("CREATE TABLE t(a int)")
|
||||
ep.safe_psql("INSERT INTO t VALUES (0)")
|
||||
|
||||
# Pull the timeline to other_sk, so other_sk now has a "stale" timeline on it.
|
||||
other_sk.pull_timeline([active_sk], env.initial_tenant, env.initial_timeline)
|
||||
|
||||
# Advance the WAL on active_sk.
|
||||
ep.safe_psql("INSERT INTO t VALUES (1)")
|
||||
|
||||
# The test is more tricky if we have the same last_log_term but different term/flush_lsn.
|
||||
# Stop the active_sk during the endpoint shutdown because otherwise compute_ctl runs
|
||||
# sync_safekeepers and advances last_log_term on active_sk.
|
||||
active_sk.stop()
|
||||
ep.stop(mode="immediate")
|
||||
active_sk.start()
|
||||
|
||||
active_sk_status = active_sk.http_client().timeline_status(
|
||||
env.initial_tenant, env.initial_timeline
|
||||
)
|
||||
other_sk_status = other_sk.http_client().timeline_status(
|
||||
env.initial_tenant, env.initial_timeline
|
||||
)
|
||||
|
||||
# other_sk should have the same last_log_term, but a stale flush_lsn.
|
||||
assert active_sk_status.last_log_term == other_sk_status.last_log_term
|
||||
assert active_sk_status.flush_lsn > other_sk_status.flush_lsn
|
||||
|
||||
commit_lsn = active_sk_status.flush_lsn
|
||||
|
||||
# Bump the term on other_sk to make it higher than active_sk.
|
||||
# This is to make sure we don't use current term instead of last_log_term in the algorithm.
|
||||
other_sk.http_client().term_bump(
|
||||
env.initial_tenant, env.initial_timeline, active_sk_status.term + 100
|
||||
)
|
||||
|
||||
# TODO(diko): now it fails because the timeline on other_sk is stale and there is no compute
|
||||
# to catch up it with active_sk. It might be fixed in https://databricks.atlassian.net/browse/LKB-946
|
||||
# if we delete stale timelines before starting the migration.
|
||||
# But the rest of the test is still valid: we should not lose committed WAL after the migration.
|
||||
with pytest.raises(
|
||||
StorageControllerApiException, match="not enough successful .* to reach quorum"
|
||||
):
|
||||
env.storage_controller.migrate_safekeepers(
|
||||
env.initial_tenant, env.initial_timeline, [other_sk.id]
|
||||
)
|
||||
|
||||
mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
|
||||
assert mconf["new_sk_set"] == [other_sk.id]
|
||||
assert mconf["sk_set"] == [active_sk.id]
|
||||
assert mconf["generation"] == 2
|
||||
|
||||
# Start the endpoint, so it advances the WAL on other_sk.
|
||||
ep.start(safekeeper_generation=2, safekeepers=[active_sk.id, other_sk.id])
|
||||
# Now the migration should succeed.
|
||||
env.storage_controller.migrate_safekeepers(
|
||||
env.initial_tenant, env.initial_timeline, [other_sk.id]
|
||||
)
|
||||
|
||||
# Check that we didn't lose committed WAL.
|
||||
assert (
|
||||
other_sk.http_client().timeline_status(env.initial_tenant, env.initial_timeline).flush_lsn
|
||||
>= commit_lsn
|
||||
)
|
||||
assert ep.safe_psql("SELECT * FROM t") == [(0,), (1,)]
|
||||
|
||||
|
||||
def test_pull_from_most_advanced_sk(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Test that we pull the timeline from the most advanced safekeeper during the
|
||||
migration and do not lose committed WAL.
|
||||
"""
|
||||
neon_env_builder.num_safekeepers = 4
|
||||
neon_env_builder.storage_controller_config = {
|
||||
"timelines_onto_safekeepers": True,
|
||||
"timeline_safekeeper_count": 3,
|
||||
}
|
||||
env = neon_env_builder.init_start()
|
||||
env.pageserver.allowed_errors.extend(PAGESERVER_ALLOWED_ERRORS)
|
||||
|
||||
mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
|
||||
|
||||
sk_set = mconf["sk_set"]
|
||||
assert len(sk_set) == 3
|
||||
|
||||
other_sk = [sk.id for sk in env.safekeepers if sk.id not in sk_set][0]
|
||||
|
||||
ep = env.endpoints.create("main", tenant_id=env.initial_tenant)
|
||||
ep.start(safekeeper_generation=1, safekeepers=sk_set)
|
||||
ep.safe_psql("CREATE TABLE t(a int)")
|
||||
ep.safe_psql("INSERT INTO t VALUES (0)")
|
||||
|
||||
# Stop one sk, so we have a lagging WAL on it.
|
||||
env.get_safekeeper(sk_set[0]).stop()
|
||||
# Advance the WAL on the other sks.
|
||||
ep.safe_psql("INSERT INTO t VALUES (1)")
|
||||
|
||||
# Stop other sks to make sure compute_ctl doesn't advance the last_log_term on them during shutdown.
|
||||
for sk_id in sk_set[1:]:
|
||||
env.get_safekeeper(sk_id).stop()
|
||||
ep.stop(mode="immediate")
|
||||
for sk_id in sk_set:
|
||||
env.get_safekeeper(sk_id).start()
|
||||
|
||||
# Bump the term on the lagging sk to make sure we don't use it to choose the most advanced sk.
|
||||
env.get_safekeeper(sk_set[0]).http_client().term_bump(
|
||||
env.initial_tenant, env.initial_timeline, 100
|
||||
)
|
||||
|
||||
def get_commit_lsn(sk_set: list[int]):
|
||||
flush_lsns = []
|
||||
last_log_terms = []
|
||||
for sk_id in sk_set:
|
||||
sk = env.get_safekeeper(sk_id)
|
||||
status = sk.http_client().timeline_status(env.initial_tenant, env.initial_timeline)
|
||||
flush_lsns.append(status.flush_lsn)
|
||||
last_log_terms.append(status.last_log_term)
|
||||
|
||||
# In this test we assume that all sks have the same last_log_term.
|
||||
assert len(set(last_log_terms)) == 1
|
||||
|
||||
flush_lsns.sort(reverse=True)
|
||||
commit_lsn = flush_lsns[len(sk_set) // 2]
|
||||
|
||||
log.info(f"sk_set: {sk_set}, flush_lsns: {flush_lsns}, commit_lsn: {commit_lsn}")
|
||||
return commit_lsn
|
||||
|
||||
commit_lsn_before_migration = get_commit_lsn(sk_set)
|
||||
|
||||
# Make two migrations, so the lagging sk stays in the sk_set, but other sks are replaced.
|
||||
new_sk_set1 = [sk_set[0], sk_set[1], other_sk] # remove sk_set[2], add other_sk
|
||||
new_sk_set2 = [sk_set[0], other_sk, sk_set[2]] # remove sk_set[1], add sk_set[2] back
|
||||
env.storage_controller.migrate_safekeepers(
|
||||
env.initial_tenant, env.initial_timeline, new_sk_set1
|
||||
)
|
||||
env.storage_controller.migrate_safekeepers(
|
||||
env.initial_tenant, env.initial_timeline, new_sk_set2
|
||||
)
|
||||
|
||||
commit_lsn_after_migration = get_commit_lsn(new_sk_set2)
|
||||
|
||||
# We should not lose committed WAL.
|
||||
# If we have choosen the lagging sk to pull the timeline from, this might fail.
|
||||
assert commit_lsn_before_migration <= commit_lsn_after_migration
|
||||
|
||||
ep.start(safekeeper_generation=5, safekeepers=new_sk_set2)
|
||||
assert ep.safe_psql("SELECT * FROM t") == [(0,), (1,)]
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
@@ -768,6 +769,14 @@ def test_lsn_lease_storcon(neon_env_builder: NeonEnvBuilder):
|
||||
"compaction_period": "0s",
|
||||
}
|
||||
env = neon_env_builder.init_start(initial_tenant_conf=conf)
|
||||
# ShardSplit is slow in debug builds, so ignore the warning
|
||||
if os.getenv("BUILD_TYPE", "debug") == "debug":
|
||||
env.storage_controller.allowed_errors.extend(
|
||||
[
|
||||
".*Exclusive lock by ShardSplit was held.*",
|
||||
]
|
||||
)
|
||||
|
||||
with env.endpoints.create_start(
|
||||
"main",
|
||||
) as ep:
|
||||
|
||||
@@ -2757,18 +2757,37 @@ def test_timeline_disk_usage_limit(neon_env_builder: NeonEnvBuilder):
|
||||
remote_storage_kind = s3_storage()
|
||||
neon_env_builder.enable_safekeeper_remote_storage(remote_storage_kind)
|
||||
|
||||
# Set a very small disk usage limit (1KB)
|
||||
neon_env_builder.safekeeper_extra_opts = ["--max-timeline-disk-usage-bytes=1024"]
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# Create a timeline and endpoint
|
||||
env.create_branch("test_timeline_disk_usage_limit")
|
||||
endpoint = env.endpoints.create_start("test_timeline_disk_usage_limit")
|
||||
endpoint = env.endpoints.create_start(
|
||||
"test_timeline_disk_usage_limit",
|
||||
config_lines=[
|
||||
"neon.lakebase_mode=true",
|
||||
],
|
||||
)
|
||||
|
||||
# Install the neon extension in the test database. We need it to query perf counter metrics.
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("CREATE EXTENSION IF NOT EXISTS neon")
|
||||
# Sanity-check safekeeper connection status in neon_perf_counters in the happy case.
|
||||
cur.execute(
|
||||
"SELECT value FROM neon_perf_counters WHERE metric = 'num_active_safekeepers'"
|
||||
)
|
||||
assert cur.fetchone() == (1,), "Expected 1 active safekeeper"
|
||||
cur.execute(
|
||||
"SELECT value FROM neon_perf_counters WHERE metric = 'num_configured_safekeepers'"
|
||||
)
|
||||
assert cur.fetchone() == (1,), "Expected 1 configured safekeeper"
|
||||
|
||||
# Get the safekeeper
|
||||
sk = env.safekeepers[0]
|
||||
|
||||
# Restart the safekeeper with a very small disk usage limit (1KB)
|
||||
sk.stop().start(["--max-timeline-disk-usage-bytes=1024"])
|
||||
|
||||
# Inject a failpoint to stop WAL backup
|
||||
with sk.http_client() as http_cli:
|
||||
http_cli.configure_failpoints([("backup-lsn-range-pausable", "pause")])
|
||||
@@ -2794,6 +2813,18 @@ def test_timeline_disk_usage_limit(neon_env_builder: NeonEnvBuilder):
|
||||
wait_until(error_logged)
|
||||
log.info("Found expected error message in compute log, resuming.")
|
||||
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
# Confirm that neon_perf_counters also indicates that there are no active safekeepers
|
||||
cur.execute(
|
||||
"SELECT value FROM neon_perf_counters WHERE metric = 'num_active_safekeepers'"
|
||||
)
|
||||
assert cur.fetchone() == (0,), "Expected 0 active safekeepers"
|
||||
cur.execute(
|
||||
"SELECT value FROM neon_perf_counters WHERE metric = 'num_configured_safekeepers'"
|
||||
)
|
||||
assert cur.fetchone() == (1,), "Expected 1 configured safekeeper"
|
||||
|
||||
# Sanity check that the hanging insert is indeed still hanging. Otherwise means the circuit breaker we
|
||||
# implemented didn't work as expected.
|
||||
time.sleep(2)
|
||||
|
||||
Reference in New Issue
Block a user