mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-08 05:52:55 +00:00
test_suite: patch, don't replace, the tenant_config field, where appropriate (#7771)
Before this PR, the changed tests would overwrite the entire `tenant_config` because `pageserver_config_override` is merged non-recursively into the `ps_cfg`. This meant they would override the `PAGESERVER_DEFAULT_TENANT_CONFIG_COMPACTION_ALGORITHM`, impacting our matrix build for `compaction_algorithm=Tiered|Legacy` in https://github.com/neondatabase/neon/pull/7748. I found the tests fixed in this PR using the `NEON_PAGESERVER_PANIC_ON_UNSPECIFIED_COMPACTION_ALGORITHM` env var that I added in #7748. Therefore, I think this is an exhaustive fix. This is better than just searching the code base for `tenant_config`, which is what I had sketched in #7747. refs #7749
This commit is contained in:
committed by
GitHub
parent
4b8809b280
commit
6d951e69d6
@@ -455,7 +455,7 @@ class NeonEnvBuilder:
|
||||
test_overlay_dir: Optional[Path] = None,
|
||||
pageserver_remote_storage: Optional[RemoteStorage] = None,
|
||||
# toml that will be decomposed into `--config-override` flags during `pageserver --init`
|
||||
pageserver_config_override: Optional[str] = None,
|
||||
pageserver_config_override: Optional[str | Callable[[Dict[str, Any]], None]] = None,
|
||||
num_safekeepers: int = 1,
|
||||
num_pageservers: int = 1,
|
||||
# Use non-standard SK ids to check for various parsing bugs
|
||||
@@ -1127,10 +1127,14 @@ class NeonEnv:
|
||||
)
|
||||
|
||||
if config.pageserver_config_override is not None:
|
||||
for o in config.pageserver_config_override.split(";"):
|
||||
override = toml.loads(o)
|
||||
for key, value in override.items():
|
||||
ps_cfg[key] = value
|
||||
if callable(config.pageserver_config_override):
|
||||
config.pageserver_config_override(ps_cfg)
|
||||
else:
|
||||
assert isinstance(config.pageserver_config_override, str)
|
||||
for o in config.pageserver_config_override.split(";"):
|
||||
override = toml.loads(o)
|
||||
for key, value in override.items():
|
||||
ps_cfg[key] = value
|
||||
|
||||
# Create a corresponding NeonPageserver object
|
||||
self.pageservers.append(
|
||||
|
||||
@@ -11,8 +11,7 @@ from fixtures.utils import print_gc_result, query_scalar
|
||||
#
|
||||
def test_branch_behind(neon_env_builder: NeonEnvBuilder):
|
||||
# Disable pitr, because here we want to test branch creation after GC
|
||||
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
|
||||
env = neon_env_builder.init_start()
|
||||
env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"})
|
||||
|
||||
error_regexes = [
|
||||
".*invalid branch start lsn.*",
|
||||
|
||||
@@ -67,8 +67,7 @@ async def update_and_gc(env: NeonEnv, endpoint: Endpoint, timeline: TimelineId):
|
||||
#
|
||||
def test_gc_aggressive(neon_env_builder: NeonEnvBuilder):
|
||||
# Disable pitr, because here we want to test branch creation after GC
|
||||
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
|
||||
env = neon_env_builder.init_start()
|
||||
env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"})
|
||||
timeline = env.neon_cli.create_branch("test_gc_aggressive", "main")
|
||||
endpoint = env.endpoints.create_start("test_gc_aggressive")
|
||||
|
||||
@@ -94,13 +93,11 @@ def test_gc_aggressive(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
#
|
||||
def test_gc_index_upload(neon_env_builder: NeonEnvBuilder):
|
||||
# Disable time-based pitr, we will use LSN-based thresholds in the manual GC calls
|
||||
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
|
||||
num_index_uploads = 0
|
||||
|
||||
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
# Disable time-based pitr, we will use LSN-based thresholds in the manual GC calls
|
||||
env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"})
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.neon_cli.create_branch("test_gc_index_upload", "main")
|
||||
endpoint = env.endpoints.create_start("test_gc_index_upload")
|
||||
|
||||
@@ -16,8 +16,7 @@ from fixtures.utils import print_gc_result, query_scalar
|
||||
#
|
||||
def test_old_request_lsn(neon_env_builder: NeonEnvBuilder):
|
||||
# Disable pitr, because here we want to test branch creation after GC
|
||||
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
|
||||
env = neon_env_builder.init_start()
|
||||
env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"})
|
||||
env.neon_cli.create_branch("test_old_request_lsn", "main")
|
||||
endpoint = env.endpoints.create_start("test_old_request_lsn")
|
||||
|
||||
|
||||
@@ -42,6 +42,8 @@ def test_pageserver_init_node_id(neon_simple_env: NeonEnv, neon_binpath: Path):
|
||||
"listen_http_addr",
|
||||
"pg_auth_type",
|
||||
"http_auth_type",
|
||||
# TODO: only needed for NEON_PAGESERVER_PANIC_ON_UNSPECIFIED_COMPACTION_ALGORITHM in https://github.com/neondatabase/neon/pull/7748
|
||||
# "tenant_config",
|
||||
]
|
||||
required_config_overrides = [
|
||||
f"--config-override={toml.dumps({k: ps_config[k]})}" for k in required_config_keys
|
||||
|
||||
@@ -10,11 +10,9 @@ from fixtures.utils import print_gc_result, query_scalar
|
||||
#
|
||||
def test_pitr_gc(neon_env_builder: NeonEnvBuilder):
|
||||
# Set pitr interval such that we need to keep the data
|
||||
neon_env_builder.pageserver_config_override = (
|
||||
"tenant_config={pitr_interval = '1 day', gc_horizon = 0}"
|
||||
env = neon_env_builder.init_start(
|
||||
initial_tenant_conf={"pitr_interval": "1 day", "gc_horizon": "0"}
|
||||
)
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
endpoint_main = env.endpoints.create_start("main")
|
||||
|
||||
main_pg_conn = endpoint_main.connect()
|
||||
|
||||
@@ -10,9 +10,11 @@ from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
#
|
||||
def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
|
||||
# Override default checkpointer settings to run it more often
|
||||
neon_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance = 1048576}"
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
env = neon_env_builder.init_start(
|
||||
initial_tenant_conf={
|
||||
"checkpoint_distance": "1048576",
|
||||
}
|
||||
)
|
||||
env.pageserver.is_testing_enabled_or_skip()
|
||||
|
||||
# We expect the pageserver to exit, which will cause storage storage controller
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import json
|
||||
from contextlib import closing
|
||||
from typing import Any, Dict
|
||||
|
||||
import psycopg2.extras
|
||||
from fixtures.common_types import Lsn
|
||||
@@ -14,16 +15,22 @@ from fixtures.utils import wait_until
|
||||
|
||||
def test_tenant_config(neon_env_builder: NeonEnvBuilder):
|
||||
"""Test per tenant configuration"""
|
||||
# set some non-default global config
|
||||
neon_env_builder.pageserver_config_override = """
|
||||
page_cache_size=444;
|
||||
wait_lsn_timeout='111 s';
|
||||
[tenant_config]
|
||||
checkpoint_distance = 10000
|
||||
compaction_target_size = 1048576
|
||||
evictions_low_residence_duration_metric_threshold = "2 days"
|
||||
eviction_policy = { "kind" = "LayerAccessThreshold", period = "20s", threshold = "23 hours" }
|
||||
"""
|
||||
|
||||
def set_some_nondefault_global_config(ps_cfg: Dict[str, Any]):
|
||||
ps_cfg["page_cache_size"] = 444
|
||||
ps_cfg["wait_lsn_timeout"] = "111 s"
|
||||
|
||||
tenant_config = ps_cfg.setdefault("tenant_config", {})
|
||||
tenant_config["checkpoint_distance"] = 10000
|
||||
tenant_config["compaction_target_size"] = 1048576
|
||||
tenant_config["evictions_low_residence_duration_metric_threshold"] = "2 days"
|
||||
tenant_config["eviction_policy"] = {
|
||||
"kind": "LayerAccessThreshold",
|
||||
"period": "20s",
|
||||
"threshold": "23 hours",
|
||||
}
|
||||
|
||||
neon_env_builder.pageserver_config_override = set_some_nondefault_global_config
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
# we configure eviction but no remote storage, there might be error lines
|
||||
|
||||
@@ -502,9 +502,14 @@ def test_get_tenant_size_with_multiple_branches(
|
||||
|
||||
gc_horizon = 128 * 1024
|
||||
|
||||
neon_env_builder.pageserver_config_override = f"tenant_config={{compaction_period='0s', gc_period='0s', pitr_interval='0sec', gc_horizon={gc_horizon}}}"
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
env = neon_env_builder.init_start(
|
||||
initial_tenant_conf={
|
||||
"compaction_period": "0s",
|
||||
"gc_period": "0s",
|
||||
"pitr_interval": "0sec",
|
||||
"gc_horizon": gc_horizon,
|
||||
}
|
||||
)
|
||||
|
||||
# FIXME: we have a race condition between GC and delete timeline. GC might fail with this
|
||||
# error. Similar to https://github.com/neondatabase/neon/issues/2671
|
||||
|
||||
@@ -415,11 +415,12 @@ def test_timeline_physical_size_post_compaction(neon_env_builder: NeonEnvBuilder
|
||||
|
||||
# Disable background compaction as we don't want it to happen after `get_physical_size` request
|
||||
# and before checking the expected size on disk, which makes the assertion failed
|
||||
neon_env_builder.pageserver_config_override = (
|
||||
"tenant_config={checkpoint_distance=100000, compaction_period='10m'}"
|
||||
env = neon_env_builder.init_start(
|
||||
initial_tenant_conf={
|
||||
"checkpoint_distance": "100000",
|
||||
"compaction_period": "10m",
|
||||
}
|
||||
)
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_compaction")
|
||||
@@ -462,9 +463,14 @@ def test_timeline_physical_size_post_gc(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# Disable background compaction and GC as we don't want it to happen after `get_physical_size` request
|
||||
# and before checking the expected size on disk, which makes the assertion failed
|
||||
neon_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance=100000, compaction_period='0s', gc_period='0s', pitr_interval='1s'}"
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
env = neon_env_builder.init_start(
|
||||
initial_tenant_conf={
|
||||
"checkpoint_distance": "100000",
|
||||
"compaction_period": "0s",
|
||||
"gc_period": "0s",
|
||||
"pitr_interval": "1s",
|
||||
}
|
||||
)
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_gc")
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import time
|
||||
from typing import Any, Dict
|
||||
|
||||
from fixtures.common_types import Lsn, TenantId
|
||||
from fixtures.log_helper import log
|
||||
@@ -42,10 +43,14 @@ def test_pageserver_lsn_wait_error_start(neon_env_builder: NeonEnvBuilder):
|
||||
# Kills one of the safekeepers and ensures that only the active ones are printed in the state.
|
||||
def test_pageserver_lsn_wait_error_safekeeper_stop(neon_env_builder: NeonEnvBuilder):
|
||||
# Trigger WAL wait timeout faster
|
||||
neon_env_builder.pageserver_config_override = """
|
||||
wait_lsn_timeout = "1s"
|
||||
tenant_config={walreceiver_connect_timeout = "2s", lagging_wal_timeout = "2s"}
|
||||
"""
|
||||
def customize_pageserver_toml(ps_cfg: Dict[str, Any]):
|
||||
ps_cfg["wait_lsn_timeout"] = "1s"
|
||||
tenant_config = ps_cfg.setdefault("tenant_config", {})
|
||||
tenant_config["walreceiver_connect_timeout"] = "2s"
|
||||
tenant_config["lagging_wal_timeout"] = "2s"
|
||||
|
||||
neon_env_builder.pageserver_config_override = customize_pageserver_toml
|
||||
|
||||
# Have notable SK ids to ensure we check logs for their presence, not some other random numbers
|
||||
neon_env_builder.safekeepers_id_start = 12345
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
|
||||
Reference in New Issue
Block a user