mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-06 21:12:55 +00:00
pageserver: include generation number in local layer paths (#7609)
## Problem In https://github.com/neondatabase/neon/pull/7531, we would like to be able to rewrite layers safely. One option is to make `Layer` able to rewrite files in place safely (e.g. by blocking evictions/deletions for an old Layer while a new one is created), but that's relatively fragile. It's more robust in general if we simply never overwrite the same local file: we can do that by putting the generation number in the filename. ## Summary of changes - Add `local_layer_path` (counterpart to `remote_layer_path`) and convert all locations that manually constructed a local layer path by joining LayerFileName to timeline path - In the layer upload path, construct remote paths with `remote_layer_path` rather than trying to build them out of a local path. - During startup, carry the full path to layer files through `init::reconcile`, and pass it into `Layer::for_resident` - Add a test to make sure we handle upgrades properly. - Comment out the generation part of `local_layer_path`, since we need to maintain forward compatibility for one release. A tiny followup PR will enable it afterwards. We could make this a bit simpler if we bulk renamed existing layers on startup instead of carrying literal paths through init, but that is operationally risky on existing servers with millions of layer files. We can always do a renaming change in future if it becomes annoying, but for the moment it's kind of nice to have a structure that enables us to change local path names again in future quite easily. We should rename `LayerFileName` to `LayerName` or somesuch, to make it more obvious that it's not a literal filename: this was already a bit confusing where that type is used in remote paths. That will be a followup, to avoid polluting this PR's diff.
This commit is contained in:
@@ -7,6 +7,7 @@ from fixtures.neon_fixtures import (
|
||||
flush_ep_to_pageserver,
|
||||
wait_for_last_flush_lsn,
|
||||
)
|
||||
from fixtures.pageserver.types import parse_layer_file_name
|
||||
from fixtures.pageserver.utils import wait_for_upload
|
||||
from fixtures.remote_storage import RemoteStorageKind
|
||||
|
||||
@@ -57,9 +58,9 @@ def test_basic_eviction(
|
||||
for sk in env.safekeepers:
|
||||
sk.stop()
|
||||
|
||||
timeline_path = env.pageserver.timeline_dir(tenant_id, timeline_id)
|
||||
initial_local_layers = sorted(
|
||||
list(filter(lambda path: path.name != "metadata", timeline_path.glob("*")))
|
||||
initial_local_layers = dict(
|
||||
(parse_layer_file_name(path.name), path)
|
||||
for path in env.pageserver.list_layers(tenant_id, timeline_id)
|
||||
)
|
||||
assert (
|
||||
len(initial_local_layers) > 1
|
||||
@@ -73,6 +74,7 @@ def test_basic_eviction(
|
||||
assert len(initial_local_layers) == len(
|
||||
initial_layer_map_info.historic_layers
|
||||
), "Should have the same layers in memory and on disk"
|
||||
|
||||
for returned_layer in initial_layer_map_info.historic_layers:
|
||||
assert (
|
||||
returned_layer.kind == "Delta"
|
||||
@@ -81,27 +83,29 @@ def test_basic_eviction(
|
||||
not returned_layer.remote
|
||||
), f"All created layers should be present locally, but got {returned_layer}"
|
||||
|
||||
local_layers = list(
|
||||
filter(lambda layer: layer.name == returned_layer.layer_file_name, initial_local_layers)
|
||||
returned_layer_name = parse_layer_file_name(returned_layer.layer_file_name)
|
||||
assert (
|
||||
returned_layer_name in initial_local_layers
|
||||
), f"Did not find returned layer {returned_layer_name} in local layers {list(initial_local_layers.keys())}"
|
||||
|
||||
local_layer_path = (
|
||||
env.pageserver.timeline_dir(tenant_id, timeline_id)
|
||||
/ initial_local_layers[returned_layer_name]
|
||||
)
|
||||
assert (
|
||||
len(local_layers) == 1
|
||||
), f"Did not find returned layer {returned_layer} in local layers {initial_local_layers}"
|
||||
local_layer = local_layers[0]
|
||||
assert (
|
||||
returned_layer.layer_file_size == local_layer.stat().st_size
|
||||
), f"Returned layer {returned_layer} has a different file size than local layer {local_layer}"
|
||||
returned_layer.layer_file_size == local_layer_path.stat().st_size
|
||||
), f"Returned layer {returned_layer} has a different file size than local layer {local_layer_path}"
|
||||
|
||||
# Detach all layers, ensre they are not in the local FS, but are still dumped as part of the layer map
|
||||
for local_layer in initial_local_layers:
|
||||
for local_layer_name, local_layer_path in initial_local_layers.items():
|
||||
client.evict_layer(
|
||||
tenant_id=tenant_id, timeline_id=timeline_id, layer_name=local_layer.name
|
||||
tenant_id=tenant_id, timeline_id=timeline_id, layer_name=local_layer_path.name
|
||||
)
|
||||
assert not any(
|
||||
new_local_layer.name == local_layer.name for new_local_layer in timeline_path.glob("*")
|
||||
), f"Did not expect to find {local_layer} layer after evicting"
|
||||
assert not env.pageserver.layer_exists(
|
||||
tenant_id, timeline_id, local_layer_name
|
||||
), f"Did not expect to find {local_layer_name} layer after evicting"
|
||||
|
||||
empty_layers = list(filter(lambda path: path.name != "metadata", timeline_path.glob("*")))
|
||||
empty_layers = env.pageserver.list_layers(tenant_id, timeline_id)
|
||||
assert not empty_layers, f"After evicting all layers, timeline {tenant_id}/{timeline_id} should have no layers locally, but got: {empty_layers}"
|
||||
|
||||
evicted_layer_map_info = client.layer_map_info(tenant_id=tenant_id, timeline_id=timeline_id)
|
||||
@@ -118,15 +122,15 @@ def test_basic_eviction(
|
||||
assert (
|
||||
returned_layer.remote
|
||||
), f"All layers should be evicted and not present locally, but got {returned_layer}"
|
||||
assert any(
|
||||
local_layer.name == returned_layer.layer_file_name
|
||||
for local_layer in initial_local_layers
|
||||
returned_layer_name = parse_layer_file_name(returned_layer.layer_file_name)
|
||||
assert (
|
||||
returned_layer_name in initial_local_layers
|
||||
), f"Did not find returned layer {returned_layer} in local layers {initial_local_layers}"
|
||||
|
||||
# redownload all evicted layers and ensure the initial state is restored
|
||||
for local_layer in initial_local_layers:
|
||||
for local_layer_name, _local_layer_path in initial_local_layers.items():
|
||||
client.download_layer(
|
||||
tenant_id=tenant_id, timeline_id=timeline_id, layer_name=local_layer.name
|
||||
tenant_id=tenant_id, timeline_id=timeline_id, layer_name=local_layer_name.to_str()
|
||||
)
|
||||
client.timeline_download_remote_layers(
|
||||
tenant_id,
|
||||
@@ -137,8 +141,9 @@ def test_basic_eviction(
|
||||
at_least_one_download=False,
|
||||
)
|
||||
|
||||
redownloaded_layers = sorted(
|
||||
list(filter(lambda path: path.name != "metadata", timeline_path.glob("*")))
|
||||
redownloaded_layers = dict(
|
||||
(parse_layer_file_name(path.name), path)
|
||||
for path in env.pageserver.list_layers(tenant_id, timeline_id)
|
||||
)
|
||||
assert (
|
||||
redownloaded_layers == initial_local_layers
|
||||
|
||||
Reference in New Issue
Block a user