mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-17 21:20:37 +00:00
Add FullAccessTimeline guard in safekeepers (#7887)
This is a preparation for https://github.com/neondatabase/neon/issues/6337. The idea is to add FullAccessTimeline, which will act as a guard for tasks requiring access to WAL files. Eviction will be blocked on these tasks and WAL won't be deleted from disk until there is at least one active FullAccessTimeline. To get FullAccessTimeline, tasks call `tli.full_access_guard().await?`. After eviction is implemented, this function will be responsible for downloading missing WAL file and waiting until the download finishes. This commit also contains other small refactorings: - Separate `get_tenant_dir` and `get_timeline_dir` functions for building a local path. This is useful for looking at usages and finding tasks requiring access to local filesystem. - `timeline_manager` is now responsible for spawning all background tasks - WAL removal task is now spawned instantly after horizon is updated
This commit is contained in:
committed by
GitHub
parent
5a394fde56
commit
16b2e74037
@@ -72,6 +72,18 @@ class Lsn:
|
||||
def segment_lsn(self, seg_sz: int = DEFAULT_WAL_SEG_SIZE) -> "Lsn":
|
||||
return Lsn(self.lsn_int - (self.lsn_int % seg_sz))
|
||||
|
||||
def segno(self, seg_sz: int = DEFAULT_WAL_SEG_SIZE) -> int:
|
||||
return self.lsn_int // seg_sz
|
||||
|
||||
def segment_name(self, seg_sz: int = DEFAULT_WAL_SEG_SIZE) -> str:
|
||||
segno = self.segno(seg_sz)
|
||||
# The filename format is 00000001XXXXXXXX000000YY, where XXXXXXXXYY is segno in hex.
|
||||
# XXXXXXXX is the higher 8 hex digits of segno
|
||||
high_bits = segno >> 8
|
||||
# YY is the lower 2 hex digits of segno
|
||||
low_bits = segno & 0xFF
|
||||
return f"00000001{high_bits:08X}000000{low_bits:02X}"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Key:
|
||||
|
||||
@@ -973,6 +973,9 @@ class NeonEnvBuilder:
|
||||
for pageserver in self.env.pageservers:
|
||||
pageserver.assert_no_errors()
|
||||
|
||||
for safekeeper in self.env.safekeepers:
|
||||
safekeeper.assert_no_errors()
|
||||
|
||||
self.env.storage_controller.assert_no_errors()
|
||||
|
||||
try:
|
||||
@@ -3813,6 +3816,9 @@ class Safekeeper(LogUtils):
|
||||
self.running = False
|
||||
return self
|
||||
|
||||
def assert_no_errors(self):
|
||||
assert not self.log_contains("manager task finished prematurely")
|
||||
|
||||
def append_logical_message(
|
||||
self, tenant_id: TenantId, timeline_id: TimelineId, request: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
@@ -3898,6 +3904,15 @@ class Safekeeper(LogUtils):
|
||||
"""
|
||||
cli = self.http_client()
|
||||
|
||||
target_segment_file = lsn.segment_name()
|
||||
|
||||
def are_segments_removed():
|
||||
segments = self.list_segments(tenant_id, timeline_id)
|
||||
log.info(
|
||||
f"waiting for all segments before {target_segment_file} to be removed from sk {self.id}, current segments: {segments}"
|
||||
)
|
||||
assert all(target_segment_file <= s for s in segments)
|
||||
|
||||
def are_lsns_advanced():
|
||||
stat = cli.timeline_status(tenant_id, timeline_id)
|
||||
log.info(
|
||||
@@ -3909,6 +3924,7 @@ class Safekeeper(LogUtils):
|
||||
# pageserver to this safekeeper
|
||||
wait_until(30, 1, are_lsns_advanced)
|
||||
cli.checkpoint(tenant_id, timeline_id)
|
||||
wait_until(30, 1, are_segments_removed)
|
||||
|
||||
def wait_until_paused(self, failpoint: str):
|
||||
msg = f"at failpoint {failpoint}"
|
||||
|
||||
Reference in New Issue
Block a user