mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-20 14:40:37 +00:00
Merge branch 'main' into communicator-rewrite
This commit is contained in:
@@ -453,6 +453,7 @@ class NeonEnvBuilder:
|
||||
pageserver_get_vectored_concurrent_io: str | None = None,
|
||||
pageserver_tracing_config: PageserverTracingConfig | None = None,
|
||||
pageserver_import_config: PageserverImportConfig | None = None,
|
||||
storcon_kick_secondary_downloads: bool | None = True,
|
||||
):
|
||||
self.repo_dir = repo_dir
|
||||
self.rust_log_override = rust_log_override
|
||||
@@ -514,6 +515,8 @@ class NeonEnvBuilder:
|
||||
self.pageserver_tracing_config = pageserver_tracing_config
|
||||
self.pageserver_import_config = pageserver_import_config
|
||||
|
||||
self.storcon_kick_secondary_downloads = storcon_kick_secondary_downloads
|
||||
|
||||
self.pageserver_default_tenant_config_compaction_algorithm: dict[str, Any] | None = (
|
||||
pageserver_default_tenant_config_compaction_algorithm
|
||||
)
|
||||
@@ -1212,6 +1215,13 @@ class NeonEnv:
|
||||
storage_controller_config = storage_controller_config or {}
|
||||
storage_controller_config["use_https_safekeeper_api"] = True
|
||||
|
||||
# TODO(diko): uncomment when timeline_safekeeper_count option is in the release branch,
|
||||
# so the compat tests will not fail bacause of it presence.
|
||||
# if config.num_safekeepers < 3:
|
||||
# storage_controller_config = storage_controller_config or {}
|
||||
# if "timeline_safekeeper_count" not in storage_controller_config:
|
||||
# storage_controller_config["timeline_safekeeper_count"] = config.num_safekeepers
|
||||
|
||||
if storage_controller_config is not None:
|
||||
cfg["storage_controller"] = storage_controller_config
|
||||
|
||||
@@ -1221,6 +1231,14 @@ class NeonEnv:
|
||||
else:
|
||||
cfg["storage_controller"] = {"use_local_compute_notifications": False}
|
||||
|
||||
if config.storcon_kick_secondary_downloads is not None:
|
||||
# Configure whether storage controller should actively kick off secondary downloads
|
||||
if "storage_controller" not in cfg:
|
||||
cfg["storage_controller"] = {}
|
||||
cfg["storage_controller"]["kick_secondary_downloads"] = (
|
||||
config.storcon_kick_secondary_downloads
|
||||
)
|
||||
|
||||
# Create config for pageserver
|
||||
http_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
|
||||
pg_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
|
||||
@@ -2215,6 +2233,21 @@ class NeonStorageController(MetricsGetter, LogUtils):
|
||||
response.raise_for_status()
|
||||
log.info(f"timeline_create success: {response.json()}")
|
||||
|
||||
def migrate_safekeepers(
|
||||
self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
new_sk_set: list[int],
|
||||
):
|
||||
response = self.request(
|
||||
"POST",
|
||||
f"{self.api}/v1/tenant/{tenant_id}/timeline/{timeline_id}/safekeeper_migrate",
|
||||
json={"new_sk_set": new_sk_set},
|
||||
headers=self.headers(TokenScope.PAGE_SERVER_API),
|
||||
)
|
||||
response.raise_for_status()
|
||||
log.info(f"migrate_safekeepers success: {response.json()}")
|
||||
|
||||
def locate(self, tenant_id: TenantId) -> list[dict[str, Any]]:
|
||||
"""
|
||||
:return: list of {"shard_id": "", "node_id": int, "listen_pg_addr": str, "listen_pg_port": int, "listen_http_addr": str, "listen_http_port": int}
|
||||
|
||||
@@ -1219,3 +1219,31 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
|
||||
)
|
||||
self.verbose_error(res)
|
||||
return res.json()
|
||||
|
||||
def force_override_feature_flag(self, flag: str, value: str | None = None):
|
||||
if value is None:
|
||||
res = self.delete(
|
||||
f"http://localhost:{self.port}/v1/feature_flag/{flag}",
|
||||
)
|
||||
else:
|
||||
res = self.put(
|
||||
f"http://localhost:{self.port}/v1/feature_flag/{flag}",
|
||||
params={"value": value},
|
||||
)
|
||||
self.verbose_error(res)
|
||||
|
||||
def evaluate_feature_flag_boolean(self, tenant_id: TenantId, flag: str) -> Any:
|
||||
res = self.get(
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/feature_flag/{flag}",
|
||||
params={"as": "boolean"},
|
||||
)
|
||||
self.verbose_error(res)
|
||||
return res.json()
|
||||
|
||||
def evaluate_feature_flag_multivariate(self, tenant_id: TenantId, flag: str) -> Any:
|
||||
res = self.get(
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/feature_flag/{flag}",
|
||||
params={"as": "multivariate"},
|
||||
)
|
||||
self.verbose_error(res)
|
||||
return res.json()
|
||||
|
||||
@@ -146,8 +146,6 @@ def run_benchmark(env: NeonEnv, pg_bin: PgBin, record, duration_secs: int):
|
||||
ps_http.base_url,
|
||||
"--page-service-connstring",
|
||||
env.pageserver.connstr(password=None),
|
||||
"--gzip-probability",
|
||||
"1",
|
||||
"--runtime",
|
||||
f"{duration_secs}s",
|
||||
# don't specify the targets explicitly, let pagebench auto-discover them
|
||||
|
||||
@@ -62,7 +62,8 @@ def test_sharding_autosplit(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
|
||||
ps.allowed_errors.extend(
|
||||
[
|
||||
# We shut down pageservers while they might have some compaction work going on
|
||||
".*Compaction failed.*shutting down.*"
|
||||
".*Compaction failed.*shutting down.*",
|
||||
".*flush task cancelled.*",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@@ -172,7 +172,7 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE
|
||||
env.initial_tenant,
|
||||
env.initial_timeline,
|
||||
MembershipConfiguration(generation=1, members=[sk.safekeeper_id()], new_members=None),
|
||||
int(env.pg_version),
|
||||
int(env.pg_version) * 10000,
|
||||
Lsn(0),
|
||||
None,
|
||||
)
|
||||
|
||||
@@ -418,7 +418,7 @@ def test_sql_exporter_metrics_e2e(
|
||||
pg_user = conn_options["user"]
|
||||
pg_dbname = conn_options["dbname"]
|
||||
pg_application_name = f"sql_exporter{stem_suffix}"
|
||||
connstr = f"postgresql://{pg_user}@{pg_host}:{pg_port}/{pg_dbname}?sslmode=disable&application_name={pg_application_name}"
|
||||
connstr = f"postgresql://{pg_user}@{pg_host}:{pg_port}/{pg_dbname}?sslmode=disable&application_name={pg_application_name}&pgaudit.log=none"
|
||||
|
||||
def escape_go_filepath_match_characters(s: str) -> str:
|
||||
"""
|
||||
|
||||
51
test_runner/regress/test_feature_flag.py
Normal file
51
test_runner/regress/test_feature_flag.py
Normal file
@@ -0,0 +1,51 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from fixtures.utils import run_only_on_default_postgres
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
|
||||
|
||||
@run_only_on_default_postgres("Pageserver-only test only needs to run on one version")
|
||||
def test_feature_flag(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
env.pageserver.http_client().force_override_feature_flag("test-feature-flag", "true")
|
||||
assert env.pageserver.http_client().evaluate_feature_flag_boolean(
|
||||
env.initial_tenant, "test-feature-flag"
|
||||
)["result"]["Ok"]
|
||||
assert (
|
||||
env.pageserver.http_client().evaluate_feature_flag_multivariate(
|
||||
env.initial_tenant, "test-feature-flag"
|
||||
)["result"]["Ok"]
|
||||
== "true"
|
||||
)
|
||||
|
||||
env.pageserver.http_client().force_override_feature_flag("test-feature-flag", "false")
|
||||
assert (
|
||||
env.pageserver.http_client().evaluate_feature_flag_boolean(
|
||||
env.initial_tenant, "test-feature-flag"
|
||||
)["result"]["Err"]
|
||||
== "No condition group is matched"
|
||||
)
|
||||
assert (
|
||||
env.pageserver.http_client().evaluate_feature_flag_multivariate(
|
||||
env.initial_tenant, "test-feature-flag"
|
||||
)["result"]["Ok"]
|
||||
== "false"
|
||||
)
|
||||
|
||||
env.pageserver.http_client().force_override_feature_flag("test-feature-flag", None)
|
||||
assert (
|
||||
"Err"
|
||||
in env.pageserver.http_client().evaluate_feature_flag_boolean(
|
||||
env.initial_tenant, "test-feature-flag"
|
||||
)["result"]
|
||||
)
|
||||
assert (
|
||||
"Err"
|
||||
in env.pageserver.http_client().evaluate_feature_flag_multivariate(
|
||||
env.initial_tenant, "test-feature-flag"
|
||||
)["result"]
|
||||
)
|
||||
@@ -671,12 +671,6 @@ def test_layer_download_cancelled_by_config_location(neon_env_builder: NeonEnvBu
|
||||
"""
|
||||
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
|
||||
|
||||
# On the new mode, the test runs into a cancellation issue, i.e. the walproposer can't shut down
|
||||
# as it is hang-waiting on the timeline_checkpoint call in WalIngest::new.
|
||||
neon_env_builder.storage_controller_config = {
|
||||
"timelines_onto_safekeepers": False,
|
||||
}
|
||||
|
||||
# turn off background tasks so that they don't interfere with the downloads
|
||||
env = neon_env_builder.init_start(
|
||||
initial_tenant_conf={
|
||||
|
||||
76
test_runner/regress/test_safekeeper_migration.py
Normal file
76
test_runner/regress/test_safekeeper_migration.py
Normal file
@@ -0,0 +1,76 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
|
||||
|
||||
def test_safekeeper_migration_simple(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Simple safekeeper migration test.
|
||||
Creates 3 safekeepers. The timeline is configuret to use only one safekeeper.
|
||||
1. Go through all safekeepers, migrate the timeline to it.
|
||||
2. Stop the other safekeepers. Validate that the insert is successful.
|
||||
3. Start the other safekeepers again and go to the next safekeeper.
|
||||
4. Validate that the table contains all inserted values.
|
||||
"""
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
neon_env_builder.storage_controller_config = {
|
||||
"timelines_onto_safekeepers": True,
|
||||
"timeline_safekeeper_count": 1,
|
||||
}
|
||||
env = neon_env_builder.init_start()
|
||||
# TODO(diko): pageserver spams with various errors during safekeeper migration.
|
||||
# Fix the code so it handles the migration better.
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*Timeline .* was cancelled and cannot be used anymore.*",
|
||||
".*Timeline .* has been deleted.*",
|
||||
".*wal receiver task finished with an error.*",
|
||||
]
|
||||
)
|
||||
|
||||
ep = env.endpoints.create("main", tenant_id=env.initial_tenant)
|
||||
|
||||
mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
|
||||
assert mconf["new_sk_set"] is None
|
||||
assert len(mconf["sk_set"]) == 1
|
||||
assert mconf["generation"] == 1
|
||||
|
||||
ep.start(safekeeper_generation=1, safekeepers=mconf["sk_set"])
|
||||
ep.safe_psql("CREATE EXTENSION neon_test_utils;")
|
||||
ep.safe_psql("CREATE TABLE t(a int)")
|
||||
|
||||
for active_sk in range(1, 4):
|
||||
env.storage_controller.migrate_safekeepers(
|
||||
env.initial_tenant, env.initial_timeline, [active_sk]
|
||||
)
|
||||
|
||||
other_sks = [sk for sk in range(1, 4) if sk != active_sk]
|
||||
|
||||
for sk in other_sks:
|
||||
env.safekeepers[sk - 1].stop()
|
||||
|
||||
ep.safe_psql(f"INSERT INTO t VALUES ({active_sk})")
|
||||
|
||||
for sk in other_sks:
|
||||
env.safekeepers[sk - 1].start()
|
||||
|
||||
ep.clear_buffers()
|
||||
|
||||
assert ep.safe_psql("SELECT * FROM t") == [(i,) for i in range(1, 4)]
|
||||
|
||||
# 1 initial generation + 2 migrations on each loop iteration.
|
||||
expected_gen = 1 + 2 * 3
|
||||
|
||||
mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
|
||||
assert mconf["generation"] == expected_gen
|
||||
|
||||
assert ep.safe_psql("SHOW neon.safekeepers")[0][0].startswith(f"g#{expected_gen}:")
|
||||
|
||||
# Restart and check again to make sure data is persistent.
|
||||
ep.stop()
|
||||
ep.start(safekeeper_generation=1, safekeepers=[3])
|
||||
|
||||
assert ep.safe_psql("SELECT * FROM t") == [(i,) for i in range(1, 4)]
|
||||
@@ -3642,7 +3642,9 @@ def test_timeline_delete_mid_live_migration(neon_env_builder: NeonEnvBuilder, mi
|
||||
env.start()
|
||||
|
||||
for ps in env.pageservers:
|
||||
ps.allowed_errors.append(".*Timeline.* has been deleted.*")
|
||||
ps.allowed_errors.extend(
|
||||
[".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"]
|
||||
)
|
||||
|
||||
tenant_id = TenantId.generate()
|
||||
timeline_id = TimelineId.generate()
|
||||
@@ -4166,13 +4168,20 @@ class DeletionSubject(Enum):
|
||||
TENANT = "tenant"
|
||||
|
||||
|
||||
class EmptyTimeline(Enum):
|
||||
EMPTY = "empty"
|
||||
NONEMPTY = "nonempty"
|
||||
|
||||
|
||||
@run_only_on_default_postgres("PG version is not interesting here")
|
||||
@pytest.mark.parametrize("restart_storcon", [RestartStorcon.RESTART, RestartStorcon.ONLINE])
|
||||
@pytest.mark.parametrize("deletetion_subject", [DeletionSubject.TENANT, DeletionSubject.TIMELINE])
|
||||
@pytest.mark.parametrize("empty_timeline", [EmptyTimeline.EMPTY, EmptyTimeline.NONEMPTY])
|
||||
def test_storcon_create_delete_sk_down(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
restart_storcon: RestartStorcon,
|
||||
deletetion_subject: DeletionSubject,
|
||||
empty_timeline: EmptyTimeline,
|
||||
):
|
||||
"""
|
||||
Test that the storcon can create and delete tenants and timelines with a safekeeper being down.
|
||||
@@ -4224,10 +4233,11 @@ def test_storcon_create_delete_sk_down(
|
||||
ep.start(safekeeper_generation=1, safekeepers=[1, 2, 3])
|
||||
ep.safe_psql("CREATE TABLE IF NOT EXISTS t(key int, value text)")
|
||||
|
||||
with env.endpoints.create("child_of_main", tenant_id=tenant_id) as ep:
|
||||
# endpoint should start.
|
||||
ep.start(safekeeper_generation=1, safekeepers=[1, 2, 3])
|
||||
ep.safe_psql("CREATE TABLE IF NOT EXISTS t(key int, value text)")
|
||||
if empty_timeline == EmptyTimeline.NONEMPTY:
|
||||
with env.endpoints.create("child_of_main", tenant_id=tenant_id) as ep:
|
||||
# endpoint should start.
|
||||
ep.start(safekeeper_generation=1, safekeepers=[1, 2, 3])
|
||||
ep.safe_psql("CREATE TABLE IF NOT EXISTS t(key int, value text)")
|
||||
|
||||
env.storage_controller.assert_log_contains("writing pending op for sk id 1")
|
||||
env.safekeepers[0].start()
|
||||
@@ -4434,6 +4444,53 @@ def test_storage_controller_graceful_migration(neon_env_builder: NeonEnvBuilder,
|
||||
assert initial_ps.http_client().tenant_list_locations()["tenant_shards"] == []
|
||||
|
||||
|
||||
def test_attached_0_graceful_migration(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_pageservers = 4
|
||||
neon_env_builder.num_azs = 2
|
||||
|
||||
neon_env_builder.storcon_kick_secondary_downloads = False
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# It is default, but we want to ensure that there are no secondary locations requested
|
||||
env.storage_controller.tenant_policy_update(env.initial_tenant, {"placement": {"Attached": 0}})
|
||||
env.storage_controller.reconcile_until_idle()
|
||||
|
||||
desc = env.storage_controller.tenant_describe(env.initial_tenant)["shards"][0]
|
||||
src_ps_id = desc["node_attached"]
|
||||
src_ps = env.get_pageserver(src_ps_id)
|
||||
src_az = desc["preferred_az_id"]
|
||||
|
||||
# There must be no secondary locations with Attached(0) placement policy
|
||||
assert len(desc["node_secondary"]) == 0
|
||||
|
||||
# Migrate tenant shard to the same AZ node
|
||||
dst_ps = [ps for ps in env.pageservers if ps.id != src_ps_id and ps.az_id == src_az][0]
|
||||
|
||||
env.storage_controller.tenant_shard_migrate(
|
||||
TenantShardId(env.initial_tenant, 0, 0),
|
||||
dst_ps.id,
|
||||
config=StorageControllerMigrationConfig(prewarm=True),
|
||||
)
|
||||
|
||||
def tenant_shard_migrated():
|
||||
src_locations = src_ps.http_client().tenant_list_locations()["tenant_shards"]
|
||||
assert len(src_locations) == 0
|
||||
log.info(f"Tenant shard migrated from {src_ps.id}")
|
||||
dst_locations = dst_ps.http_client().tenant_list_locations()["tenant_shards"]
|
||||
assert len(dst_locations) == 1
|
||||
assert dst_locations[0][1]["mode"] == "AttachedSingle"
|
||||
log.info(f"Tenant shard migrated to {dst_ps.id}")
|
||||
|
||||
# After all we expect that tenant shard exists only on dst node.
|
||||
# We wait so long because [`DEFAULT_HEATMAP_PERIOD`] and [`DEFAULT_DOWNLOAD_INTERVAL`]
|
||||
# are set to 60 seconds by default.
|
||||
#
|
||||
# TODO: we should consider making these configurable, so the test can run faster.
|
||||
wait_until(tenant_shard_migrated, timeout=180, interval=5, status_interval=10)
|
||||
log.info("Tenant shard migrated successfully")
|
||||
|
||||
|
||||
@run_only_on_default_postgres("this is like a 'unit test' against storcon db")
|
||||
def test_storage_controller_migrate_with_pageserver_restart(
|
||||
neon_env_builder: NeonEnvBuilder, make_httpserver
|
||||
|
||||
@@ -896,6 +896,134 @@ def test_timeline_retain_lsn(
|
||||
assert sum == pre_branch_sum
|
||||
|
||||
|
||||
def test_timeline_offload_delete_race(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Regression test for https://github.com/neondatabase/cloud/issues/30406
|
||||
"""
|
||||
remote_storage_kind = s3_storage()
|
||||
neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)
|
||||
neon_env_builder.num_pageservers = 2
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# Turn off gc and compaction loops: we want to issue them manually for better reliability
|
||||
tenant_id, root_timeline_id = env.create_tenant(
|
||||
conf={
|
||||
"gc_period": "0s",
|
||||
"compaction_period": "0s",
|
||||
"checkpoint_distance": f"{1024**2}",
|
||||
}
|
||||
)
|
||||
|
||||
origin_ps = env.get_tenant_pageserver(tenant_id)
|
||||
assert origin_ps
|
||||
origin_ps.allowed_errors.extend(
|
||||
[
|
||||
".*Timed out waiting for deletion queue flush.*",
|
||||
".*Timed out waiting for flush to remote storage.*",
|
||||
]
|
||||
)
|
||||
origin_ps_http = origin_ps.http_client()
|
||||
|
||||
# We are not sharding this tenant
|
||||
tenant_shard_id = TenantShardId(tenant_id, 0, 0)
|
||||
|
||||
# Create a branch and archive it
|
||||
child_timeline_id = env.create_branch("test_archived_branch_persisted", tenant_id)
|
||||
|
||||
with env.endpoints.create_start(
|
||||
"test_archived_branch_persisted", tenant_id=tenant_id
|
||||
) as endpoint:
|
||||
endpoint.safe_psql_many(
|
||||
[
|
||||
"CREATE TABLE foo(key serial primary key, t text default 'data_content')",
|
||||
"INSERT INTO foo SELECT FROM generate_series(1,512)",
|
||||
]
|
||||
)
|
||||
last_flush_lsn_upload(env, endpoint, tenant_id, child_timeline_id)
|
||||
|
||||
assert_prefix_not_empty(
|
||||
neon_env_builder.pageserver_remote_storage,
|
||||
prefix=f"tenants/{str(tenant_id)}/",
|
||||
)
|
||||
assert_prefix_not_empty(
|
||||
neon_env_builder.pageserver_remote_storage,
|
||||
prefix=f"tenants/{str(tenant_id)}/tenant-manifest",
|
||||
)
|
||||
|
||||
origin_ps_http.timeline_archival_config(
|
||||
tenant_id,
|
||||
child_timeline_id,
|
||||
state=TimelineArchivalState.ARCHIVED,
|
||||
)
|
||||
|
||||
def timeline_offloaded_api(timeline_id: TimelineId) -> bool:
|
||||
return any(
|
||||
timeline["timeline_id"] == str(timeline_id)
|
||||
for timeline in origin_ps_http.timeline_and_offloaded_list(
|
||||
tenant_id=tenant_id
|
||||
).offloaded
|
||||
)
|
||||
|
||||
def child_offloaded():
|
||||
origin_ps_http.timeline_offload(tenant_id=tenant_id, timeline_id=child_timeline_id)
|
||||
assert timeline_offloaded_api(child_timeline_id)
|
||||
|
||||
wait_until(child_offloaded)
|
||||
|
||||
# Delete the timeline from the origin pageserver, holding up the deletion queue so that it doesn't finish
|
||||
failpoint_deletion_queue = "deletion-queue-before-execute-pause"
|
||||
origin_ps_http.configure_failpoints((failpoint_deletion_queue, "pause"))
|
||||
origin_ps_http.timeline_delete(tenant_id, child_timeline_id)
|
||||
|
||||
dest_ps = [ps for ps in env.pageservers if ps.id != origin_ps.id][0]
|
||||
assert dest_ps
|
||||
log.info(f"Migrating {tenant_id} {origin_ps.id}->{dest_ps.id}")
|
||||
env.storage_controller.tenant_shard_migrate(tenant_shard_id, dest_ps_id=dest_ps.id)
|
||||
|
||||
log.info("unstuck the DELETE")
|
||||
origin_ps_http.configure_failpoints((failpoint_deletion_queue, "off"))
|
||||
|
||||
def child_prefix_empty():
|
||||
assert_prefix_empty(
|
||||
neon_env_builder.pageserver_remote_storage,
|
||||
prefix=f"tenants/{str(tenant_id)}/{str(child_timeline_id)}/",
|
||||
)
|
||||
|
||||
wait_until(child_prefix_empty)
|
||||
|
||||
dest_ps_http = dest_ps.http_client()
|
||||
|
||||
# We can't use timeline_delete_wait_completed here as timeline status will return 404, but we want to return 404 from the deletion endpoint
|
||||
def timeline_is_missing():
|
||||
data = None
|
||||
try:
|
||||
data = dest_ps_http.timeline_delete(tenant_id, child_timeline_id)
|
||||
log.info(f"timeline delete {data}")
|
||||
except PageserverApiException as e:
|
||||
log.debug(e)
|
||||
if e.status_code == 404:
|
||||
return
|
||||
|
||||
raise RuntimeError(f"Timeline exists {data}")
|
||||
|
||||
wait_until(timeline_is_missing)
|
||||
# (dest_ps_http, tenant_id, child_timeline_id)
|
||||
|
||||
#
|
||||
# Now ensure that scrubber doesn't have anything to clean up.
|
||||
#
|
||||
|
||||
# Sleep some amount larger than min_age_secs
|
||||
time.sleep(3)
|
||||
|
||||
# Ensure that min_age_secs has a deletion impeding effect
|
||||
gc_summary = env.storage_scrubber.pageserver_physical_gc(min_age_secs=1, mode="full")
|
||||
assert gc_summary["remote_storage_errors"] == 0
|
||||
assert gc_summary["indices_deleted"] == 0
|
||||
assert gc_summary["tenant_manifests_deleted"] == 0
|
||||
|
||||
|
||||
def test_timeline_offload_generations(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Test for scrubber deleting old generations of manifests
|
||||
|
||||
@@ -1099,7 +1099,9 @@ def test_timeline_detach_ancestor_interrupted_by_deletion(
|
||||
|
||||
for ps in env.pageservers:
|
||||
ps.allowed_errors.extend(SHUTDOWN_ALLOWED_ERRORS)
|
||||
ps.allowed_errors.append(".*Timeline.* has been deleted.*")
|
||||
ps.allowed_errors.extend(
|
||||
[".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"]
|
||||
)
|
||||
|
||||
pageservers = dict((int(p.id), p) for p in env.pageservers)
|
||||
|
||||
@@ -1221,7 +1223,9 @@ def test_sharded_tad_interleaved_after_partial_success(neon_env_builder: NeonEnv
|
||||
|
||||
for ps in env.pageservers:
|
||||
ps.allowed_errors.extend(SHUTDOWN_ALLOWED_ERRORS)
|
||||
ps.allowed_errors.append(".*Timeline.* has been deleted.*")
|
||||
ps.allowed_errors.extend(
|
||||
[".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"]
|
||||
)
|
||||
|
||||
pageservers = dict((int(p.id), p) for p in env.pageservers)
|
||||
|
||||
|
||||
@@ -25,7 +25,9 @@ def test_gc_blocking_by_timeline(neon_env_builder: NeonEnvBuilder, sharded: bool
|
||||
initial_tenant_shard_count=2 if sharded else None,
|
||||
)
|
||||
for ps in env.pageservers:
|
||||
ps.allowed_errors.append(".*Timeline.* has been deleted.*")
|
||||
ps.allowed_errors.extend(
|
||||
[".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"]
|
||||
)
|
||||
|
||||
if sharded:
|
||||
http = env.storage_controller.pageserver_api()
|
||||
|
||||
Reference in New Issue
Block a user