mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 05:22:56 +00:00
Major changes and new concepts: Simplify Repository to a value-store ------------------------------------ Move the responsibility of tracking relation metadata, like which relations exist and what are their sizes, from Repository to a new module, pgdatadir_mapping.rs. The interface to Repository is now a simple key-value PUT/GET operations. It's still not any old key-value store though. A Repository is still responsible from handling branching, and every GET operation comes with an LSN. Key --- The key to the Repository key-value store is a Key struct, which consists of a few integer fields. It's wide enough to store a full RelFileNode, fork and block number, and to distinguish those from metadata keys. See pgdatadir_mapping.rs for how relation blocks and metadata keys are mapped to the Key struct. Store arbitrary key-ranges in the layer files --------------------------------------------- The concept of a "segment" is gone. Each layer file can store an arbitrary range of Keys. TODO: - Deleting keys, to reclaim space. This isn't visible to Postgres, dropping or truncating a relation works as you would expect if you look at it from the compute node. If you drop a relation, for example, the relation is removed from the metadata entry, so that it appears to be gone. However, the layered repository implementation never reclaims the storage. - Tracking "logical database size", for disk space quotas. That ought to be reimplemented now in pgdatadir_mapping.rs, or perhaps in walingest.rs. - LSM compaction. The logic for checkpointing and creating image layers is very dumb. AFAIK the *read* code could deal with a full-fledged LSM tree now consisting of the delta and image layers. But there's no code to take a bunch of delta layers and compact them, and the heuristics for when to create image layers is pretty dumb. - The code to track the layers is inefficient. All layers are just stored in a vector, and whenever we need to find a layer, we do a linear search in it.
79 lines
2.3 KiB
Python
79 lines
2.3 KiB
Python
import os
|
|
import subprocess
|
|
|
|
from typing import Any, List
|
|
from fixtures.log_helper import log
|
|
|
|
|
|
def get_self_dir() -> str:
|
|
""" Get the path to the directory where this script lives. """
|
|
return os.path.dirname(os.path.abspath(__file__))
|
|
|
|
|
|
def mkdir_if_needed(path: str) -> None:
|
|
""" Create a directory if it doesn't already exist
|
|
|
|
Note this won't try to create intermediate directories.
|
|
"""
|
|
try:
|
|
os.mkdir(path)
|
|
except FileExistsError:
|
|
pass
|
|
assert os.path.isdir(path)
|
|
|
|
|
|
def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str:
|
|
""" Run a process and capture its output
|
|
|
|
Output will go to files named "cmd_NNN.stdout" and "cmd_NNN.stderr"
|
|
where "cmd" is the name of the program and NNN is an incrementing
|
|
counter.
|
|
|
|
If those files already exist, we will overwrite them.
|
|
Returns basepath for files with captured output.
|
|
"""
|
|
assert type(cmd) is list
|
|
base = os.path.basename(cmd[0]) + '_{}'.format(global_counter())
|
|
basepath = os.path.join(capture_dir, base)
|
|
stdout_filename = basepath + '.stdout'
|
|
stderr_filename = basepath + '.stderr'
|
|
|
|
with open(stdout_filename, 'w') as stdout_f:
|
|
with open(stderr_filename, 'w') as stderr_f:
|
|
log.info('(capturing output to "{}.stdout")'.format(base))
|
|
subprocess.run(cmd, **kwargs, stdout=stdout_f, stderr=stderr_f)
|
|
|
|
return basepath
|
|
|
|
|
|
_global_counter = 0
|
|
|
|
|
|
def global_counter() -> int:
|
|
""" A really dumb global counter.
|
|
|
|
This is useful for giving output files a unique number, so if we run the
|
|
same command multiple times we can keep their output separate.
|
|
"""
|
|
global _global_counter
|
|
_global_counter += 1
|
|
return _global_counter
|
|
|
|
|
|
def lsn_to_hex(num: int) -> str:
|
|
""" Convert lsn from int to standard hex notation. """
|
|
return "{:X}/{:X}".format(num >> 32, num & 0xffffffff)
|
|
|
|
|
|
def lsn_from_hex(lsn_hex: str) -> int:
|
|
""" Convert lsn from hex notation to int. """
|
|
l, r = lsn_hex.split('/')
|
|
return (int(l, 16) << 32) + int(r, 16)
|
|
|
|
|
|
def print_gc_result(row):
|
|
log.info("GC duration {elapsed} ms".format_map(row))
|
|
log.info(
|
|
" total: {layers_total}, needed_by_cutoff {layers_needed_by_cutoff}, needed_by_branches: {layers_needed_by_branches}, not_updated: {layers_not_updated}, removed: {layers_removed}"
|
|
.format_map(row))
|