Evict WAL files from disk (#8022)

Fixes https://github.com/neondatabase/neon/issues/6337

Add safekeeper support to switch between `Present` and
`Offloaded(flush_lsn)` states. The offloading is disabled by default,
but can be controlled using new cmdline arguments:

```
      --enable-offload
          Enable automatic switching to offloaded state
      --delete-offloaded-wal
          Delete local WAL files after offloading. When disabled, they will be left on disk
      --control-file-save-interval <CONTROL_FILE_SAVE_INTERVAL>
          Pending updates to control file will be automatically saved after this interval [default: 300s]
```

Manager watches state updates and detects when there are no actvity on
the timeline and actual partial backup upload in remote storage. When
all conditions are met, the state can be switched to offloaded.

In `timeline.rs` there is `StateSK` enum to support switching between
states. When offloaded, code can access only control file structure and
cannot use `SafeKeeper` to accept new WAL.

`FullAccessTimeline` is now renamed to `WalResidentTimeline`. This
struct contains guard to notify manager about active tasks requiring
on-disk WAL access. All guards are issued by the manager, all requests
are sent via channel using `ManagerCtl`. When manager receives request
to issue a guard, it unevicts timeline if it's currently evicted.

Fixed a bug in partial WAL backup, it used `term` instead of
`last_log_term` previously.

After this commit is merged, next step is to roll this change out, as in
issue #6338.
This commit is contained in:
Arthur Petukhovsky
2024-06-26 18:58:56 +01:00
committed by GitHub
parent dd3adc3693
commit 76fc3d4aa1
25 changed files with 1673 additions and 480 deletions

View File

@@ -3916,6 +3916,8 @@ class Safekeeper(LogUtils):
def assert_no_errors(self):
assert not self.log_contains("manager task finished prematurely")
assert not self.log_contains("error while acquiring WalResidentTimeline guard")
assert not self.log_contains("timeout while acquiring WalResidentTimeline guard")
def append_logical_message(
self, tenant_id: TenantId, timeline_id: TimelineId, request: Dict[str, Any]

View File

@@ -1,4 +1,5 @@
import filecmp
import logging
import os
import random
import shutil
@@ -2178,3 +2179,102 @@ def test_broker_discovery(neon_env_builder: NeonEnvBuilder):
do_something()
do_something()
# Test creates 5 endpoints and tries to wake them up randomly. All timeouts are
# configured to be very short, so that we expect that:
# - pageserver will update remote_consistent_lsn very often
# - safekeepers will upload partial WAL segments very often
# - safekeeper will try to evict and unevict timelines
#
# Test checks that there are no critical errors while doing this. Also it checks
# that every safekeeper has at least one successful eviction.
@pytest.mark.parametrize("delete_offloaded_wal", [False, True])
@pytest.mark.parametrize("restart_chance", [0.0, 0.2])
def test_s3_eviction(
neon_env_builder: NeonEnvBuilder, delete_offloaded_wal: bool, restart_chance: float
):
neon_env_builder.num_safekeepers = 3
neon_env_builder.enable_safekeeper_remote_storage(RemoteStorageKind.LOCAL_FS)
env = neon_env_builder.init_start(
initial_tenant_conf={
"checkpoint_timeout": "100ms",
}
)
extra_opts = [
"--enable-offload",
"--partial-backup-timeout",
"50ms",
"--control-file-save-interval",
"1s",
]
if delete_offloaded_wal:
extra_opts.append("--delete-offloaded-wal")
for sk in env.safekeepers:
sk.stop().start(extra_opts=extra_opts)
n_timelines = 5
branch_names = [f"branch{tlin}" for tlin in range(n_timelines)]
timelines = []
ps_client = env.pageservers[0].http_client()
# start postgres on each timeline
endpoints: list[Endpoint] = []
for branch_name in branch_names:
timeline_id = env.neon_cli.create_branch(branch_name)
timelines.append(timeline_id)
endpoints.append(env.endpoints.create_start(branch_name))
endpoints[-1].safe_psql("CREATE TABLE t(i int)")
endpoints[-1].safe_psql("INSERT INTO t VALUES (0)")
lsn = endpoints[-1].safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]
log.info(f"{branch_name}: LSN={lsn}")
endpoints[-1].stop()
# update remote_consistent_lsn on pageserver
ps_client.timeline_checkpoint(env.initial_tenant, timelines[-1], wait_until_uploaded=True)
check_values = [0] * n_timelines
n_iters = 20
for _ in range(n_iters):
if log.isEnabledFor(logging.DEBUG):
for j in range(n_timelines):
detail = ps_client.timeline_detail(env.initial_tenant, timelines[j])
log.debug(
f'{branch_names[j]}: RCL={detail["remote_consistent_lsn"]}, LRL={detail["last_record_lsn"]}'
)
i = random.randint(0, n_timelines - 1)
log.info(f"Starting endpoint {i}")
endpoints[i].start()
check_values[i] += 1
res = endpoints[i].safe_psql("UPDATE t SET i = i + 1 RETURNING i")
assert res[0][0] == check_values[i]
lsn = endpoints[i].safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]
log.info(f"{branch_names[i]}: LSN={lsn}")
endpoints[i].stop()
# update remote_consistent_lsn on pageserver
ps_client.timeline_checkpoint(env.initial_tenant, timelines[i], wait_until_uploaded=True)
# restarting random safekeepers
for sk in env.safekeepers:
if random.random() < restart_chance:
sk.stop().start(extra_opts=extra_opts)
time.sleep(0.5)
# require at least one successful eviction in at least one safekeeper
# TODO: require eviction in each safekeeper after https://github.com/neondatabase/neon/issues/8148 is fixed
assert any(
sk.log_contains("successfully evicted timeline")
and sk.log_contains("successfully restored evicted timeline")
for sk in env.safekeepers
)

View File

@@ -200,9 +200,8 @@ async def run_restarts_under_load(
# assert that at least one transaction has completed in every worker
stats.check_progress()
# testing #6530, temporary here
# TODO: remove afer partial backup is enabled by default
victim.start(extra_opts=["--partial-backup-enabled", "--partial-backup-timeout=2s"])
# testing #6530
victim.start(extra_opts=["--partial-backup-timeout=2s"])
log.info("Iterations are finished, exiting coroutines...")
stats.running = False