mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-31 12:00:42 +00:00
storcon: skip draining shard if it's secondary is lagging too much (#8644)
## Problem Migrations of tenant shards with cold secondaries are holding up drains in during production deployments. ## Summary of changes If a secondary locations is lagging by more than 256MiB (configurable, but that's the default), then skip cutting it over to the secondary as part of the node drain.
This commit is contained in:
@@ -2,7 +2,6 @@ import concurrent.futures
|
||||
import random
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from typing import Any, Dict
|
||||
|
||||
import pytest
|
||||
from fixtures.common_types import TenantId, TenantShardId, TimelineId
|
||||
@@ -24,51 +23,14 @@ def get_consistent_node_shard_counts(env: NeonEnv, total_shards) -> defaultdict[
|
||||
This function takes into account the intersection of the intent and the observed state.
|
||||
If they do not match, it asserts out.
|
||||
"""
|
||||
tenants = env.storage_controller.tenant_list()
|
||||
|
||||
intent = dict()
|
||||
observed = dict()
|
||||
|
||||
tenant_placement: defaultdict[str, Dict[str, Any]] = defaultdict(
|
||||
lambda: {
|
||||
"observed": {"attached": None, "secondary": []},
|
||||
"intent": {"attached": None, "secondary": []},
|
||||
}
|
||||
)
|
||||
|
||||
for t in tenants:
|
||||
for node_id, loc_state in t["observed"]["locations"].items():
|
||||
if (
|
||||
loc_state is not None
|
||||
and "conf" in loc_state
|
||||
and loc_state["conf"] is not None
|
||||
and loc_state["conf"]["mode"]
|
||||
in set(["AttachedSingle", "AttachedMulti", "AttachedStale"])
|
||||
):
|
||||
observed[t["tenant_shard_id"]] = int(node_id)
|
||||
tenant_placement[t["tenant_shard_id"]]["observed"]["attached"] = int(node_id)
|
||||
|
||||
if (
|
||||
loc_state is not None
|
||||
and "conf" in loc_state
|
||||
and loc_state["conf"] is not None
|
||||
and loc_state["conf"]["mode"] == "Secondary"
|
||||
):
|
||||
tenant_placement[t["tenant_shard_id"]]["observed"]["secondary"].append(int(node_id))
|
||||
|
||||
if "attached" in t["intent"]:
|
||||
intent[t["tenant_shard_id"]] = t["intent"]["attached"]
|
||||
tenant_placement[t["tenant_shard_id"]]["intent"]["attached"] = t["intent"]["attached"]
|
||||
|
||||
if "secondary" in t["intent"]:
|
||||
tenant_placement[t["tenant_shard_id"]]["intent"]["secondary"] += t["intent"][
|
||||
"secondary"
|
||||
]
|
||||
|
||||
tenant_placement = env.storage_controller.get_tenants_placement()
|
||||
log.info(f"{tenant_placement=}")
|
||||
|
||||
matching = {
|
||||
tid: intent[tid] for tid in observed if tid in intent and intent[tid] == observed[tid]
|
||||
tid: tenant_placement[tid]["intent"]["attached"]
|
||||
for tid in tenant_placement
|
||||
if tenant_placement[tid]["intent"]["attached"]
|
||||
== tenant_placement[tid]["observed"]["attached"]
|
||||
}
|
||||
assert len(matching) == total_shards
|
||||
|
||||
|
||||
Reference in New Issue
Block a user