mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-23 06:09:59 +00:00
refactor(test): duplication with fullbackup, tar content hashing (#7828)
"taking a fullbackup" is an ugly multi-liner copypasted in multiple places, most recently with timeline ancestor detach tests. move it under `PgBin` which is not a great place, but better than yet another utility function. Additionally: - cleanup `psql_env` repetition (PgBin already configures that) - move the backup tar comparison as a yet another free utility function - use backup tar comparison in `test_import.py` where a size check was done previously - cleanup extra timeline creation from test Cc: #7715
This commit is contained in:
@@ -4,10 +4,13 @@ import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import tarfile
|
||||
import threading
|
||||
import time
|
||||
from hashlib import sha256
|
||||
from pathlib import Path
|
||||
from typing import (
|
||||
IO,
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
@@ -15,8 +18,10 @@ from typing import (
|
||||
Iterable,
|
||||
List,
|
||||
Optional,
|
||||
Set,
|
||||
Tuple,
|
||||
TypeVar,
|
||||
Union,
|
||||
)
|
||||
from urllib.parse import urlencode
|
||||
|
||||
@@ -499,3 +504,48 @@ class AuxFileStore(str, enum.Enum):
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"'aux-{self.value}'"
|
||||
|
||||
|
||||
def assert_pageserver_backups_equal(left: Path, right: Path, skip_files: Set[str]):
|
||||
"""
|
||||
This is essentially:
|
||||
|
||||
lines=$(comm -3 \
|
||||
<(mkdir left && cd left && tar xf "$left" && find . -type f -print0 | xargs sha256sum | sort -k2) \
|
||||
<(mkdir right && cd right && tar xf "$right" && find . -type f -print0 | xargs sha256sum | sort -k2) \
|
||||
| wc -l)
|
||||
[ "$lines" = "0" ]
|
||||
|
||||
But in a more mac friendly fashion.
|
||||
"""
|
||||
started_at = time.time()
|
||||
|
||||
def hash_extracted(reader: Union[IO[bytes], None]) -> bytes:
|
||||
assert reader is not None
|
||||
digest = sha256(usedforsecurity=False)
|
||||
while True:
|
||||
buf = reader.read(64 * 1024)
|
||||
if not buf:
|
||||
break
|
||||
digest.update(buf)
|
||||
return digest.digest()
|
||||
|
||||
def build_hash_list(p: Path) -> List[Tuple[str, bytes]]:
|
||||
with tarfile.open(p) as f:
|
||||
matching_files = (info for info in f if info.isreg() and info.name not in skip_files)
|
||||
ret = list(
|
||||
map(lambda info: (info.name, hash_extracted(f.extractfile(info))), matching_files)
|
||||
)
|
||||
ret.sort(key=lambda t: t[0])
|
||||
return ret
|
||||
|
||||
left_list, right_list = map(build_hash_list, [left, right])
|
||||
|
||||
try:
|
||||
assert len(left_list) == len(right_list)
|
||||
|
||||
for left_tuple, right_tuple in zip(left_list, right_list):
|
||||
assert left_tuple == right_tuple
|
||||
finally:
|
||||
elapsed = time.time() - started_at
|
||||
log.info(f"assert_pageserver_backups_equal completed in {elapsed}s")
|
||||
|
||||
Reference in New Issue
Block a user