Merge commit '296c9190b' into problame/standby-horizon-leases

This commit is contained in:
Christian Schwarz
2025-08-06 17:49:50 +02:00
48 changed files with 946 additions and 407 deletions

View File

@@ -5421,6 +5421,7 @@ SKIP_FILES = frozenset(
(
"pg_internal.init",
"pg.log",
"neon.signal",
"zenith.signal",
"pg_hba.conf",
"postgresql.conf",

View File

@@ -115,8 +115,7 @@ DEFAULT_PAGESERVER_ALLOWED_ERRORS = (
".*Local data loss suspected.*",
# Too many frozen layers error is normal during intensive benchmarks
".*too many frozen layers.*",
# Transient errors when resolving tenant shards by page service
".*Fail to resolve tenant shard in attempt.*",
".*Failed to resolve tenant shard after.*",
# Expected warnings when pageserver has not refreshed GC info yet
".*pitr LSN/interval not found, skipping force image creation LSN calculation.*",
".*No broker updates received for a while.*",

View File

@@ -7,6 +7,7 @@ import time
from enum import StrEnum
import pytest
from fixtures.common_types import TenantShardId
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnvBuilder,
@@ -960,6 +961,67 @@ def get_layer_map(env, tenant_shard_id, timeline_id, ps_id):
return image_layer_count, delta_layer_count
def test_image_layer_creation_time_threshold(neon_env_builder: NeonEnvBuilder):
"""
Tests that image layers can be created when the time threshold is reached on non-0 shards.
"""
tenant_conf = {
"compaction_threshold": "100",
"image_creation_threshold": "100",
"image_layer_creation_check_threshold": "1",
# disable distance based image layer creation check
"checkpoint_distance": 10 * 1024 * 1024 * 1024,
"checkpoint_timeout": "100ms",
"image_layer_force_creation_period": "1s",
"pitr_interval": "10s",
"gc_period": "1s",
"compaction_period": "1s",
"lsn_lease_length": "1s",
}
# consider every tenant large to run the image layer generation check more eagerly
neon_env_builder.pageserver_config_override = (
"image_layer_generation_large_timeline_threshold=0"
)
neon_env_builder.num_pageservers = 1
neon_env_builder.num_safekeepers = 1
env = neon_env_builder.init_start(
initial_tenant_conf=tenant_conf,
initial_tenant_shard_count=2,
initial_tenant_shard_stripe_size=1,
)
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
endpoint = env.endpoints.create_start("main")
endpoint.safe_psql("CREATE TABLE foo (id INTEGER, val text)")
for v in range(10):
endpoint.safe_psql(f"INSERT INTO foo (id, val) VALUES ({v}, repeat('abcde{v:0>3}', 500))")
tenant_shard_id = TenantShardId(tenant_id, 1, 2)
# Generate some rows.
for v in range(20):
endpoint.safe_psql(f"INSERT INTO foo (id, val) VALUES ({v}, repeat('abcde{v:0>3}', 500))")
# restart page server so that logical size on non-0 shards is missing
env.pageserver.restart()
(old_images, old_deltas) = get_layer_map(env, tenant_shard_id, timeline_id, 0)
log.info(f"old images: {old_images}, old deltas: {old_deltas}")
def check_image_creation():
(new_images, old_deltas) = get_layer_map(env, tenant_shard_id, timeline_id, 0)
log.info(f"images: {new_images}, deltas: {old_deltas}")
assert new_images > old_images
wait_until(check_image_creation)
endpoint.stop_and_destroy()
def test_image_layer_force_creation_period(neon_env_builder: NeonEnvBuilder):
"""
Tests that page server can force creating new images if image_layer_force_creation_period is enabled

View File

@@ -2,6 +2,9 @@ from __future__ import annotations
from typing import TYPE_CHECKING
import pytest
from fixtures.neon_fixtures import StorageControllerApiException
if TYPE_CHECKING:
from fixtures.neon_fixtures import NeonEnvBuilder
@@ -75,3 +78,38 @@ def test_safekeeper_migration_simple(neon_env_builder: NeonEnvBuilder):
ep.start(safekeeper_generation=1, safekeepers=[3])
assert ep.safe_psql("SELECT * FROM t") == [(i,) for i in range(1, 4)]
def test_new_sk_set_validation(neon_env_builder: NeonEnvBuilder):
"""
Test that safekeeper_migrate validates the new_sk_set before starting the migration.
"""
neon_env_builder.num_safekeepers = 3
neon_env_builder.storage_controller_config = {
"timelines_onto_safekeepers": True,
"timeline_safekeeper_count": 2,
}
env = neon_env_builder.init_start()
def expect_fail(sk_set: list[int], match: str):
with pytest.raises(StorageControllerApiException, match=match):
env.storage_controller.migrate_safekeepers(
env.initial_tenant, env.initial_timeline, sk_set
)
# Check that we failed before commiting to the database.
mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
assert mconf["generation"] == 1
expect_fail([], "safekeeper set is empty")
expect_fail([1], "must have at least 2 safekeepers")
expect_fail([1, 1], "duplicate safekeeper")
expect_fail([1, 100500], "does not exist")
mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
sk_set = mconf["sk_set"]
assert len(sk_set) == 2
decom_sk = [sk.id for sk in env.safekeepers if sk.id not in sk_set][0]
env.storage_controller.safekeeper_scheduling_policy(decom_sk, "Decomissioned")
expect_fail([sk_set[0], decom_sk], "decomissioned")

View File

@@ -1673,6 +1673,91 @@ def test_shard_resolve_during_split_abort(neon_env_builder: NeonEnvBuilder):
# END_HADRON
# HADRON
@pytest.mark.skip(reason="The backpressure change has not been merged yet.")
def test_back_pressure_per_shard(neon_env_builder: NeonEnvBuilder):
"""
Tests back pressure knobs are enforced on the per shard basis instead of at the tenant level.
"""
init_shard_count = 4
neon_env_builder.num_pageservers = init_shard_count
stripe_size = 1
env = neon_env_builder.init_start(
initial_tenant_shard_count=init_shard_count,
initial_tenant_shard_stripe_size=stripe_size,
initial_tenant_conf={
# disable auto-flush of shards and set max_replication_flush_lag as 15MB.
# The backpressure parameters must be enforced at the shard level to avoid stalling PG.
"checkpoint_distance": 1 * 1024 * 1024 * 1024,
"checkpoint_timeout": "1h",
},
)
endpoint = env.endpoints.create(
"main",
config_lines=[
"max_replication_write_lag = 0",
"max_replication_apply_lag = 0",
"max_replication_flush_lag = 15MB",
"neon.max_cluster_size = 10GB",
],
)
endpoint.respec(skip_pg_catalog_updates=False) # Needed for databricks_system to get created.
endpoint.start()
# generate 20MB of data
endpoint.safe_psql(
"CREATE TABLE usertable AS SELECT s AS KEY, repeat('a', 1000) as VALUE from generate_series(1, 20000) s;"
)
res = endpoint.safe_psql(
"SELECT neon.backpressure_throttling_time() as throttling_time", dbname="databricks_system"
)[0]
assert res[0] == 0, f"throttling_time should be 0, but got {res[0]}"
endpoint.stop()
# HADRON
def test_shard_split_page_server_timeout(neon_env_builder: NeonEnvBuilder):
"""
Tests that shard split can correctly handle page server timeouts and abort the split
"""
init_shard_count = 2
neon_env_builder.num_pageservers = 1
stripe_size = 1
if neon_env_builder.storage_controller_config is None:
neon_env_builder.storage_controller_config = {"shard_split_request_timeout": "5s"}
else:
neon_env_builder.storage_controller_config["shard_split_request_timeout"] = "5s"
env = neon_env_builder.init_start(
initial_tenant_shard_count=init_shard_count,
initial_tenant_shard_stripe_size=stripe_size,
)
env.storage_controller.allowed_errors.extend(
[
".*Enqueuing background abort.*",
".*failpoint.*",
".*Failed to abort.*",
".*Exclusive lock by ShardSplit was held.*",
]
)
env.pageserver.allowed_errors.extend([".*request was dropped before completing.*"])
endpoint1 = env.endpoints.create_start(branch_name="main")
env.pageserver.http_client().configure_failpoints(("shard-split-post-finish-pause", "pause"))
with pytest.raises(StorageControllerApiException):
env.storage_controller.tenant_shard_split(env.initial_tenant, shard_count=4)
env.pageserver.http_client().configure_failpoints(("shard-split-post-finish-pause", "off"))
endpoint1.stop_and_destroy()
def test_sharding_backpressure(neon_env_builder: NeonEnvBuilder):
"""
Check a scenario when one of the shards is much slower than others.

View File

@@ -209,9 +209,9 @@ def test_ancestor_detach_branched_from(
client.timeline_delete(env.initial_tenant, env.initial_timeline)
wait_timeline_detail_404(client, env.initial_tenant, env.initial_timeline)
# because we do the fullbackup from ancestor at the branch_lsn, the zenith.signal is always different
# as there is always "PREV_LSN: invalid" for "before"
skip_files = {"zenith.signal"}
# because we do the fullbackup from ancestor at the branch_lsn, the neon.signal and/or zenith.signal is always
# different as there is always "PREV_LSN: invalid" for "before"
skip_files = {"zenith.signal", "neon.signal"}
assert_pageserver_backups_equal(fullbackup_before, fullbackup_after, skip_files)
@@ -767,7 +767,7 @@ def test_compaction_induced_by_detaches_in_history(
env.pageserver, env.initial_tenant, branch_timeline_id, branch_lsn, fullbackup_after
)
# we don't need to skip any files, because zenith.signal will be identical
# we don't need to skip any files, because neon.signal will be identical
assert_pageserver_backups_equal(fullbackup_before, fullbackup_after, set())