mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-14 17:02:56 +00:00
Closes https://github.com/neondatabase/neon/issues/3439 Adds a set of commands to manipulate the layer map: * dump the layer map contents * evict the layer form the layer map (remove the local file, put the remote layer instead in the layer map) * download the layer (operation, reversing the eviction) The commands will change later, when the statistics is added on top, so the swagger schema is not adjusted. The commands might have issues with big amount of layers: no pagination is done for the dump command, eviction and download commands look for the layer to evict/download by iterating all layers sequentially and comparing the layer names. For now, that seems to be tolerable ("big" number of layers is ~2_000) and further experiments are needed. --------- Co-authored-by: Christian Schwarz <christian@neon.tech>
141 lines
5.9 KiB
Python
141 lines
5.9 KiB
Python
import pytest
|
|
from fixtures.neon_fixtures import (
|
|
NeonEnvBuilder,
|
|
RemoteStorageKind,
|
|
wait_for_last_record_lsn,
|
|
wait_for_upload,
|
|
)
|
|
from fixtures.types import Lsn, TenantId, TimelineId
|
|
from fixtures.utils import query_scalar
|
|
|
|
|
|
# Crates a few layers, ensures that we can evict them (removing locally but keeping track of them anyway)
|
|
# and then download them back.
|
|
@pytest.mark.parametrize("remote_storage_kind", [RemoteStorageKind.LOCAL_FS])
|
|
def test_basic_eviction(
|
|
neon_env_builder: NeonEnvBuilder,
|
|
remote_storage_kind: RemoteStorageKind,
|
|
):
|
|
neon_env_builder.enable_remote_storage(
|
|
remote_storage_kind=remote_storage_kind,
|
|
test_name="test_download_remote_layers_api",
|
|
)
|
|
|
|
env = neon_env_builder.init_start()
|
|
client = env.pageserver.http_client()
|
|
pg = env.postgres.create_start("main")
|
|
|
|
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
|
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
|
|
|
|
# Create a number of layers in the tenant
|
|
with pg.cursor() as cur:
|
|
cur.execute("CREATE TABLE foo (t text)")
|
|
cur.execute(
|
|
"""
|
|
INSERT INTO foo
|
|
SELECT 'long string to consume some space' || g
|
|
FROM generate_series(1, 5000000) g
|
|
"""
|
|
)
|
|
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
|
|
|
|
wait_for_last_record_lsn(client, tenant_id, timeline_id, current_lsn)
|
|
client.timeline_checkpoint(tenant_id, timeline_id)
|
|
wait_for_upload(client, tenant_id, timeline_id, current_lsn)
|
|
|
|
timeline_path = env.repo_dir / "tenants" / str(tenant_id) / "timelines" / str(timeline_id)
|
|
initial_local_layers = sorted(
|
|
list(filter(lambda path: path.name != "metadata", timeline_path.glob("*")))
|
|
)
|
|
assert (
|
|
len(initial_local_layers) > 1
|
|
), f"Should create multiple layers for timeline, but got {initial_local_layers}"
|
|
|
|
# Compare layer map dump with the local layers, ensure everything's present locally and matches
|
|
initial_layer_map_info = client.layer_map_info(tenant_id=tenant_id, timeline_id=timeline_id)
|
|
assert (
|
|
not initial_layer_map_info.in_memory_layers
|
|
), "Should have no in memory layers after flushing"
|
|
assert len(initial_local_layers) == len(
|
|
initial_layer_map_info.historic_layers
|
|
), "Should have the same layers in memory and on disk"
|
|
for returned_layer in initial_layer_map_info.historic_layers:
|
|
assert (
|
|
returned_layer.kind == "Delta"
|
|
), f"Did not create and expect image layers, but got {returned_layer}"
|
|
assert (
|
|
not returned_layer.remote
|
|
), f"All created layers should be present locally, but got {returned_layer}"
|
|
|
|
local_layers = list(
|
|
filter(lambda layer: layer.name == returned_layer.layer_file_name, initial_local_layers)
|
|
)
|
|
assert (
|
|
len(local_layers) == 1
|
|
), f"Did not find returned layer {returned_layer} in local layers {initial_local_layers}"
|
|
local_layer = local_layers[0]
|
|
assert (
|
|
returned_layer.layer_file_size == local_layer.stat().st_size
|
|
), f"Returned layer {returned_layer} has a different file size than local layer {local_layer}"
|
|
|
|
# Detach all layers, ensre they are not in the local FS, but are still dumped as part of the layer map
|
|
for local_layer in initial_local_layers:
|
|
client.evict_layer(
|
|
tenant_id=tenant_id, timeline_id=timeline_id, layer_name=local_layer.name
|
|
)
|
|
assert not any(
|
|
new_local_layer.name == local_layer.name for new_local_layer in timeline_path.glob("*")
|
|
), f"Did not expect to find {local_layer} layer after evicting"
|
|
|
|
empty_layers = list(filter(lambda path: path.name != "metadata", timeline_path.glob("*")))
|
|
assert (
|
|
not empty_layers
|
|
), f"After evicting all layers, timeline {tenant_id}/{timeline_id} should have no layers locally, but got: {empty_layers}"
|
|
|
|
evicted_layer_map_info = client.layer_map_info(tenant_id=tenant_id, timeline_id=timeline_id)
|
|
assert (
|
|
not evicted_layer_map_info.in_memory_layers
|
|
), "Should have no in memory layers after flushing and evicting"
|
|
assert len(initial_local_layers) == len(
|
|
evicted_layer_map_info.historic_layers
|
|
), "Should have the same layers in memory and on disk initially"
|
|
for returned_layer in evicted_layer_map_info.historic_layers:
|
|
assert (
|
|
returned_layer.kind == "Delta"
|
|
), f"Did not create and expect image layers, but got {returned_layer}"
|
|
assert (
|
|
returned_layer.remote
|
|
), f"All layers should be evicted and not present locally, but got {returned_layer}"
|
|
assert any(
|
|
local_layer.name == returned_layer.layer_file_name
|
|
for local_layer in initial_local_layers
|
|
), f"Did not find returned layer {returned_layer} in local layers {initial_local_layers}"
|
|
|
|
# redownload all evicted layers and ensure the initial state is restored
|
|
for local_layer in initial_local_layers:
|
|
client.download_layer(
|
|
tenant_id=tenant_id, timeline_id=timeline_id, layer_name=local_layer.name
|
|
)
|
|
client.timeline_download_remote_layers(
|
|
tenant_id,
|
|
timeline_id,
|
|
# allow some concurrency to unveil potential concurrency bugs
|
|
max_concurrent_downloads=10,
|
|
errors_ok=False,
|
|
at_least_one_download=False,
|
|
)
|
|
|
|
redownloaded_layers = sorted(
|
|
list(filter(lambda path: path.name != "metadata", timeline_path.glob("*")))
|
|
)
|
|
assert (
|
|
redownloaded_layers == initial_local_layers
|
|
), "Should have the same layers locally after redownloading the evicted layers"
|
|
redownloaded_layer_map_info = client.layer_map_info(
|
|
tenant_id=tenant_id, timeline_id=timeline_id
|
|
)
|
|
assert (
|
|
redownloaded_layer_map_info == initial_layer_map_info
|
|
), "Should have the same layer map after redownloading the evicted layers"
|