mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-08 14:02:55 +00:00
storcon: reproduce shard split issue (#11290)
## Problem Issue https://github.com/neondatabase/neon/issues/11254 describes a case where restart during a shard split can result in a bad end state in the database. ## Summary of changes - Add a reproducer for the issue - Tighten an existing safety check around updated row counts in complete_shard_split
This commit is contained in:
@@ -1725,6 +1725,8 @@ class LogUtils:
|
||||
log.warning(f"Skipping log check: {logfile} does not exist")
|
||||
return None
|
||||
|
||||
log.info(f"Checking log {logfile} for pattern '{pattern}'")
|
||||
|
||||
contains_re = re.compile(pattern)
|
||||
|
||||
# XXX: Our rust logging machinery buffers the messages, so if you
|
||||
@@ -2618,10 +2620,13 @@ class NeonProxiedStorageController(NeonStorageController):
|
||||
self.running = False
|
||||
return self
|
||||
|
||||
def instance_log_path(self, instance_id: int) -> Path:
|
||||
return self.env.repo_dir / f"storage_controller_{instance_id}" / "storage_controller.log"
|
||||
|
||||
def assert_no_errors(self):
|
||||
for instance_id in self.instances.keys():
|
||||
assert_no_errors(
|
||||
self.env.repo_dir / f"storage_controller_{instance_id}" / "storage_controller.log",
|
||||
self.instance_log_path(instance_id),
|
||||
"storage_controller",
|
||||
self.allowed_errors,
|
||||
)
|
||||
@@ -2629,7 +2634,14 @@ class NeonProxiedStorageController(NeonStorageController):
|
||||
def log_contains(
|
||||
self, pattern: str, offset: None | LogCursor = None
|
||||
) -> tuple[str, LogCursor] | None:
|
||||
raise NotImplementedError()
|
||||
for instance_id in self.instances.keys():
|
||||
log_path = self.instance_log_path(instance_id)
|
||||
checker = LogUtils(log_path)
|
||||
found = checker.log_contains(pattern, offset)
|
||||
if found is not None:
|
||||
return found
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
Reference in New Issue
Block a user