test(pageserver): add detach ancestor smoke test (#9842)

## Problem Follow up to https://github.com/neondatabase/neon/pull/9682, hopefully we can detect some issues or assure ourselves that this is ready for production. ## Summary of changes * Add a compaction-detach-ancestor smoke test. --------- Signed-off-by: Alex Chi Z <chi@neon.tech>
2026-01-09 06:22:57 +00:00 · 2024-11-22 13:21:51 -05:00
parent e939d36dd4
commit 6f8b1eb5a6
3 changed files with 57 additions and 4 deletions
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -343,7 +343,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        assert isinstance(res_json["tenant_shards"], list)
        return res_json

-    def tenant_get_location(self, tenant_id: TenantShardId):
+    def tenant_get_location(self, tenant_id: TenantId | TenantShardId):
        res = self.get(
            f"http://localhost:{self.port}/v1/location_config/{tenant_id}",
        )
--- a/test_runner/fixtures/workload.py
+++ b/test_runner/fixtures/workload.py
@@ -94,9 +94,10 @@ class Workload:
    def __del__(self):
        self.stop()

-    def init(self, pageserver_id: int | None = None):
+    def init(self, pageserver_id: int | None = None, allow_recreate=False):
        endpoint = self.endpoint(pageserver_id)
-
+        if allow_recreate:
+            endpoint.safe_psql(f"DROP TABLE IF EXISTS {self.table};")
        endpoint.safe_psql(f"CREATE TABLE {self.table} (id INTEGER PRIMARY KEY, val text);")
        endpoint.safe_psql("CREATE EXTENSION IF NOT EXISTS neon_test_utils;")
        last_flush_lsn_upload(
--- a/test_runner/regress/test_timeline_detach_ancestor.py
+++ b/test_runner/regress/test_timeline_detach_ancestor.py
@@ -23,7 +23,8 @@ from fixtures.neon_fixtures import (
 from fixtures.pageserver.http import HistoricLayerInfo, PageserverApiException
 from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_timeline_detail_404
 from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind
-from fixtures.utils import assert_pageserver_backups_equal, wait_until
+from fixtures.utils import assert_pageserver_backups_equal, skip_in_debug_build, wait_until
+from fixtures.workload import Workload
 from requests import ReadTimeout


@@ -1550,6 +1551,57 @@ def test_timeline_is_deleted_before_timeline_detach_ancestor_completes(
    env.pageserver.assert_log_contains(".* gc_loop.*: 1 timelines need GC", offset)


+@skip_in_debug_build("only run with release build")
+def test_pageserver_compaction_detach_ancestor_smoke(neon_env_builder: NeonEnvBuilder):
+    SMOKE_CONF = {
+        # Run both gc and gc-compaction.
+        "gc_period": "5s",
+        "compaction_period": "5s",
+        # No PiTR interval and small GC horizon
+        "pitr_interval": "0s",
+        "gc_horizon": f"{1024 ** 2}",
+        "lsn_lease_length": "0s",
+        # Small checkpoint distance to create many layers
+        "checkpoint_distance": 1024**2,
+        # Compact small layers
+        "compaction_target_size": 1024**2,
+        "image_creation_threshold": 2,
+    }
+
+    env = neon_env_builder.init_start(initial_tenant_conf=SMOKE_CONF)
+
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
+
+    row_count = 10000
+    churn_rounds = 50
+
+    ps_http = env.pageserver.http_client()
+
+    workload_parent = Workload(env, tenant_id, timeline_id)
+    workload_parent.init(env.pageserver.id)
+    log.info("Writing initial data ...")
+    workload_parent.write_rows(row_count, env.pageserver.id)
+    branch_id = env.create_branch("child")
+    workload_child = Workload(env, tenant_id, branch_id, branch_name="child")
+    workload_child.init(env.pageserver.id, allow_recreate=True)
+    log.info("Writing initial data on child...")
+    workload_child.write_rows(row_count, env.pageserver.id)
+
+    for i in range(1, churn_rounds + 1):
+        if i % 10 == 0:
+            log.info(f"Running churn round {i}/{churn_rounds} ...")
+
+        workload_parent.churn_rows(row_count, env.pageserver.id)
+        workload_child.churn_rows(row_count, env.pageserver.id)
+
+    ps_http.detach_ancestor(tenant_id, branch_id)
+
+    log.info("Validating at workload end ...")
+    workload_parent.validate(env.pageserver.id)
+    workload_child.validate(env.pageserver.id)
+
+
 # TODO:
 # - branch near existing L1 boundary, image layers?
 # - investigate: why are layers started at uneven lsn? not just after branching, but in general.