feat(pageserver): generate basebackup from aux file v2 storage (#7517)

This pull request adds the new basebackup read path + aux file write
path. In the regression test, all logical replication tests are run with
matrix aux_file_v2=false/true.

Also fixed the vectored get code path to correctly return missing key
error when being called from the unified sequential get code path.
---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z
2024-05-07 12:30:18 -04:00
committed by GitHub
parent 308227fa51
commit 017c34b773
12 changed files with 391 additions and 104 deletions

View File

@@ -82,6 +82,7 @@ from fixtures.utils import (
subprocess_capture,
wait_until,
)
from fixtures.utils import AuxFileStore as AuxFileStore # reexport
"""
This file contains pytest fixtures. A fixture is a test resource that can be
@@ -465,6 +466,7 @@ class NeonEnvBuilder:
initial_tenant: Optional[TenantId] = None,
initial_timeline: Optional[TimelineId] = None,
pageserver_virtual_file_io_engine: Optional[str] = None,
pageserver_aux_file_policy: Optional[AuxFileStore] = None,
):
self.repo_dir = repo_dir
self.rust_log_override = rust_log_override
@@ -520,6 +522,8 @@ class NeonEnvBuilder:
self.pageserver_validate_vectored_get = bool(validate)
log.debug(f'Overriding pageserver validate_vectored_get config to "{validate}"')
self.pageserver_aux_file_policy = pageserver_aux_file_policy
assert test_name.startswith(
"test_"
), "Unexpectedly instantiated from outside a test function"
@@ -565,6 +569,7 @@ class NeonEnvBuilder:
timeline_id=env.initial_timeline,
shard_count=initial_tenant_shard_count,
shard_stripe_size=initial_tenant_shard_stripe_size,
aux_file_v2=self.pageserver_aux_file_policy,
)
assert env.initial_tenant == initial_tenant
assert env.initial_timeline == initial_timeline
@@ -1047,6 +1052,7 @@ class NeonEnv:
)
self.pageserver_virtual_file_io_engine = config.pageserver_virtual_file_io_engine
self.pageserver_aux_file_policy = config.pageserver_aux_file_policy
# Create a config file corresponding to the options
cfg: Dict[str, Any] = {
@@ -1283,6 +1289,7 @@ def _shared_simple_env(
pg_distrib_dir: Path,
pg_version: PgVersion,
pageserver_virtual_file_io_engine: str,
pageserver_aux_file_policy: Optional[AuxFileStore],
) -> Iterator[NeonEnv]:
"""
# Internal fixture backing the `neon_simple_env` fixture. If TEST_SHARED_FIXTURES
@@ -1313,6 +1320,7 @@ def _shared_simple_env(
test_name=request.node.name,
test_output_dir=test_output_dir,
pageserver_virtual_file_io_engine=pageserver_virtual_file_io_engine,
pageserver_aux_file_policy=pageserver_aux_file_policy,
) as builder:
env = builder.init_start()
@@ -1352,6 +1360,7 @@ def neon_env_builder(
test_overlay_dir: Path,
top_output_dir: Path,
pageserver_virtual_file_io_engine: str,
pageserver_aux_file_policy: Optional[AuxFileStore] = None,
) -> Iterator[NeonEnvBuilder]:
"""
Fixture to create a Neon environment for test.
@@ -1385,6 +1394,7 @@ def neon_env_builder(
test_name=request.node.name,
test_output_dir=test_output_dir,
test_overlay_dir=test_overlay_dir,
pageserver_aux_file_policy=pageserver_aux_file_policy,
) as builder:
yield builder
@@ -1544,6 +1554,7 @@ class NeonCli(AbstractNeonCli):
shard_stripe_size: Optional[int] = None,
placement_policy: Optional[str] = None,
set_default: bool = False,
aux_file_v2: Optional[AuxFileStore] = None,
) -> Tuple[TenantId, TimelineId]:
"""
Creates a new tenant, returns its id and its initial timeline's id.
@@ -1567,6 +1578,16 @@ class NeonCli(AbstractNeonCli):
product(["-c"], (f"{key}:{value}" for key, value in conf.items()))
)
)
if aux_file_v2 is AuxFileStore.V2:
args.extend(["-c", "switch_aux_file_policy:v2"])
if aux_file_v2 is AuxFileStore.V1:
args.extend(["-c", "switch_aux_file_policy:v1"])
if aux_file_v2 is AuxFileStore.CrossValidation:
args.extend(["-c", "switch_aux_file_policy:cross_validation"])
if set_default:
args.append("--set-default")

View File

@@ -5,6 +5,7 @@ import pytest
from _pytest.python import Metafunc
from fixtures.pg_version import PgVersion
from fixtures.utils import AuxFileStore
"""
Dynamically parametrize tests by different parameters
@@ -31,6 +32,11 @@ def pageserver_virtual_file_io_engine() -> Optional[str]:
return os.getenv("PAGESERVER_VIRTUAL_FILE_IO_ENGINE")
@pytest.fixture(scope="function", autouse=True)
def pageserver_aux_file_policy() -> Optional[AuxFileStore]:
return None
def pytest_generate_tests(metafunc: Metafunc):
if (bt := os.getenv("BUILD_TYPE")) is None:
build_types = ["debug", "release"]

View File

@@ -1,4 +1,5 @@
import contextlib
import enum
import json
import os
import re
@@ -484,3 +485,16 @@ def assert_no_errors(log_file, service, allowed_errors):
log.info(f"not allowed {service} error: {error.strip()}")
assert not errors, f"Log errors on {service}: {errors[0]}"
@enum.unique
class AuxFileStore(str, enum.Enum):
V1 = "V1"
V2 = "V2"
CrossValidation = "CrossValidation"
def __repr__(self) -> str:
return f"'aux-{self.value}'"
def __str__(self) -> str:
return f"'aux-{self.value}'"

View File

@@ -190,7 +190,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
"trace_read_requests": True,
"walreceiver_connect_timeout": "13m",
"image_layer_creation_check_threshold": 1,
"switch_to_aux_file_v2": True,
"switch_aux_file_policy": "CrossValidation",
}
ps_http = env.pageserver.http_client()

View File

@@ -6,6 +6,7 @@ from string import ascii_lowercase
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
AuxFileStore,
NeonEnv,
NeonEnvBuilder,
logical_replication_sync,
@@ -19,6 +20,19 @@ def random_string(n: int):
return "".join([choice(ascii_lowercase) for _ in range(n)])
@pytest.mark.parametrize(
"pageserver_aux_file_policy", [AuxFileStore.V1, AuxFileStore.V2, AuxFileStore.CrossValidation]
)
def test_aux_file_v2_flag(neon_simple_env: NeonEnv, pageserver_aux_file_policy: AuxFileStore):
env = neon_simple_env
with env.pageserver.http_client() as client:
tenant_config = client.tenant_config(env.initial_tenant).effective_config
assert pageserver_aux_file_policy == tenant_config["switch_aux_file_policy"]
@pytest.mark.parametrize(
"pageserver_aux_file_policy", [AuxFileStore.V1, AuxFileStore.CrossValidation]
)
def test_logical_replication(neon_simple_env: NeonEnv, vanilla_pg):
env = neon_simple_env
@@ -160,6 +174,9 @@ COMMIT;
# Test that neon.logical_replication_max_snap_files works
@pytest.mark.parametrize(
"pageserver_aux_file_policy", [AuxFileStore.V1, AuxFileStore.CrossValidation]
)
def test_obsolete_slot_drop(neon_simple_env: NeonEnv, vanilla_pg):
def slot_removed(ep):
assert (
@@ -281,6 +298,9 @@ FROM generate_series(1, 16384) AS seq; -- Inserts enough rows to exceed 16MB of
# Test compute start at LSN page of which starts with contrecord
# https://github.com/neondatabase/neon/issues/5749
@pytest.mark.parametrize(
"pageserver_aux_file_policy", [AuxFileStore.V1, AuxFileStore.CrossValidation]
)
def test_wal_page_boundary_start(neon_simple_env: NeonEnv, vanilla_pg):
env = neon_simple_env
@@ -371,6 +391,9 @@ def test_wal_page_boundary_start(neon_simple_env: NeonEnv, vanilla_pg):
# logical replication bug as such, but without logical replication,
# records passed ot the WAL redo process are never large enough to hit
# the bug.
@pytest.mark.parametrize(
"pageserver_aux_file_policy", [AuxFileStore.V1, AuxFileStore.CrossValidation]
)
def test_large_records(neon_simple_env: NeonEnv, vanilla_pg):
env = neon_simple_env
@@ -442,6 +465,9 @@ def test_slots_and_branching(neon_simple_env: NeonEnv):
ws_cur.execute("select pg_create_logical_replication_slot('my_slot', 'pgoutput')")
@pytest.mark.parametrize(
"pageserver_aux_file_policy", [AuxFileStore.V1, AuxFileStore.CrossValidation]
)
def test_replication_shutdown(neon_simple_env: NeonEnv):
# Ensure Postgres can exit without stuck when a replication job is active + neon extension installed
env = neon_simple_env