prove timeline resurrection during attach

2026-05-15 04:00:38 +00:00 · 2023-02-20 18:08:12 +02:00
13 changed files with 116 additions and 73 deletions
--- a/.github/ansible/prod.ap-southeast-1.hosts.yaml
+++ b/.github/ansible/prod.ap-southeast-1.hosts.yaml
@@ -2,11 +2,11 @@ storage:
  vars:
    bucket_name: neon-prod-storage-ap-southeast-1
    bucket_region: ap-southeast-1
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
+    console_mgmt_base_url: http://console-release.local
    broker_endpoint: http://storage-broker-lb.epsilon.ap-southeast-1.internal.aws.neon.tech:50051
    pageserver_config_stub:
      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
+      metric_collection_endpoint: http://console-release.local/billing/api/v1/usage_events
      metric_collection_interval: 10min
      remote_storage:
        bucket_name: "{{ bucket_name }}"
--- a/.github/ansible/prod.eu-central-1.hosts.yaml
+++ b/.github/ansible/prod.eu-central-1.hosts.yaml
@@ -2,11 +2,11 @@ storage:
  vars:
    bucket_name: neon-prod-storage-eu-central-1
    bucket_region: eu-central-1
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
+    console_mgmt_base_url: http://console-release.local
    broker_endpoint: http://storage-broker-lb.gamma.eu-central-1.internal.aws.neon.tech:50051
    pageserver_config_stub:
      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
+      metric_collection_endpoint: http://console-release.local/billing/api/v1/usage_events
      metric_collection_interval: 10min
      remote_storage:
        bucket_name: "{{ bucket_name }}"
--- a/.github/ansible/prod.us-east-2.hosts.yaml
+++ b/.github/ansible/prod.us-east-2.hosts.yaml
@@ -2,11 +2,11 @@ storage:
  vars:
    bucket_name: neon-prod-storage-us-east-2
    bucket_region: us-east-2
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
+    console_mgmt_base_url: http://console-release.local
    broker_endpoint: http://storage-broker-lb.delta.us-east-2.internal.aws.neon.tech:50051
    pageserver_config_stub:
      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
+      metric_collection_endpoint: http://console-release.local/billing/api/v1/usage_events
      metric_collection_interval: 10min
      remote_storage:
        bucket_name: "{{ bucket_name }}"
--- a/.github/ansible/prod.us-west-2.hosts.yaml
+++ b/.github/ansible/prod.us-west-2.hosts.yaml
@@ -2,11 +2,11 @@ storage:
  vars:
    bucket_name: neon-prod-storage-us-west-2
    bucket_region: us-west-2
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
+    console_mgmt_base_url: http://console-release.local
    broker_endpoint: http://storage-broker-lb.eta.us-west-2.internal.aws.neon.tech:50051
    pageserver_config_stub:
      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
+      metric_collection_endpoint: http://console-release.local/billing/api/v1/usage_events
      metric_collection_interval: 10min
      remote_storage:
        bucket_name: "{{ bucket_name }}"
--- a/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml
@@ -6,11 +6,11 @@ image:

 settings:
  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  authEndpoint: "http://console-release.local/management/api/v2"
  domain: "*.ap-southeast-1.aws.neon.tech"
  sentryEnvironment: "production"
  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-release.local/billing/api/v1/usage_events"
  metricCollectionInterval: "10min"

 # -- Additional labels for neon-proxy pods
--- a/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml
@@ -6,11 +6,11 @@ image:

 settings:
  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  authEndpoint: "http://console-release.local/management/api/v2"
  domain: "*.eu-central-1.aws.neon.tech"
  sentryEnvironment: "production"
  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-release.local/billing/api/v1/usage_events"
  metricCollectionInterval: "10min"

 # -- Additional labels for neon-proxy pods
--- a/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml
@@ -6,11 +6,11 @@ image:

 settings:
  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  authEndpoint: "http://console-release.local/management/api/v2"
  domain: "*.us-east-2.aws.neon.tech"
  sentryEnvironment: "production"
  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-release.local/billing/api/v1/usage_events"
  metricCollectionInterval: "10min"

 # -- Additional labels for neon-proxy pods
--- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml
@@ -6,11 +6,11 @@ image:

 settings:
  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  authEndpoint: "http://console-release.local/management/api/v2"
  domain: "*.cloud.neon.tech"
  sentryEnvironment: "production"
  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-release.local/billing/api/v1/usage_events"
  metricCollectionInterval: "10min"

 # -- Additional labels for neon-proxy pods
--- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
@@ -6,11 +6,11 @@ image:

 settings:
  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  authEndpoint: "http://console-release.local/management/api/v2"
  domain: "*.us-west-2.aws.neon.tech"
  sentryEnvironment: "production"
  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-release.local/billing/api/v1/usage_events"
  metricCollectionInterval: "10min"

 # -- Additional labels for neon-proxy pods
--- a/docs/synthetic-size.md
+++ b/docs/synthetic-size.md
@@ -1,41 +1,5 @@
 # Synthetic size

-## How to get the data
-
-Pageserver provides a HTTP API for getting the synthetic size of a tenant
-along with the data that was used to calculate it. Usage examples:
-
-1. This query returns the synthetic size of the tenant, along with the "raw" data
-That is returned in the `segments` and `timeline_inputs` fields.
-
-```
-curl localhost:9898/v1/tenant/5e1de642394b00a0a583a088e8276b98/synthetic_size | jq
-```
-
-2. If `inputs_only=true` is passed, the response will contain only the raw data.
-Actual synthetic size is not calculated.
-
-```
-curl localhost:9898/v1/tenant/5e1de642394b00a0a583a088e8276b98/synthetic_size?inputs_only=true | jq
-
-```
-
-3. 'retention_period' is a cutoff (in bytes) that overrides the cutoff that is used in the size calculation.
-Note, that override is applied only if provided `retnention_period` is shorter than the real cutoff.
-
-```
-curl localhost:9898/v1/tenant/5e1de642394b00a0a583a088e8276b98/synthetic_size?retention_period=1048576 | jq
-```
-
-4. If header `Accept: text/html` is passed, the response will be in HTML format.
-The HTML contains a json with the same data as in the previous examples + SVG diagram of the tenant timelines.
-
-```
-curl -H "Accept: text/html" localhost:9898/v1/tenant/5e1de642394b00a0a583a088e8276b98/synthetic_size > ./size.html |  google-chrome ./size.html
-```
-
-## Overview
-
 Neon storage has copy-on-write branching, which makes it difficult to
 answer the question "how large is my database"? To give one reasonable
 answer, we calculate _synthetic size_ for a project.
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -3758,7 +3758,7 @@ impl Timeline {
                drop(permit);

                Ok(())
-            }.in_current_span(),
+            },
        );

        receiver.await.context("download task cancelled")?
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -100,6 +100,7 @@ impl Timeline {
        #[allow(dead_code)]
        #[derive(Debug, Default)]
        struct EvictionStats {
+            not_considered_due_to_clock_skew: usize,
            candidates: usize,
            evicted: usize,
            errors: usize,
@@ -128,21 +129,9 @@ impl Timeline {
                let no_activity_for = match now.duration_since(last_activity_ts) {
                    Ok(d) => d,
                    Err(_e) => {
-                        // We reach here if `now` < `last_activity_ts`, which can legitimately
-                        // happen if there is an access between us getting `now`, and us getting
-                        // the access stats from the layer.
-                        //
-                        // The other reason why it can happen is system clock skew because
-                        // SystemTime::now() is not monotonic, so, even if there is no access
-                        // to the layer after we get `now` at the beginning of this function,
-                        // it could be that `now`  < `last_activity_ts`.
-                        //
-                        // To distinguish the cases, we would need to record `Instant`s in the
-                        // access stats (i.e., monotonic timestamps), but then, the timestamps
-                        // values in the access stats would need to be `Instant`'s, and hence
-                        // they would be meaningless outside of the pageserver process.
-                        // At the time of writing, the trade-off is that access stats are more
-                        // valuable than detecting clock skew.
+                        // NB: don't log the error. If there are many layers and the system clock
+                        // is skewed, we'd be flooding the log.
+                        stats.not_considered_due_to_clock_skew += 1;
                        continue;
                    }
                };
@@ -199,9 +188,8 @@ impl Timeline {
                }
            }
        }
-        if stats.candidates == stats.not_evictable {
-            debug!(stats=?stats, "eviction iteration complete");
-        } else if stats.errors > 0 || stats.not_evictable > 0 {
+        if stats.not_considered_due_to_clock_skew > 0 || stats.errors > 0 || stats.not_evictable > 0
+        {
            warn!(stats=?stats, "eviction iteration complete");
        } else {
            info!(stats=?stats, "eviction iteration complete");
--- a/test_runner/regress/test_remote_storage.py
+++ b/test_runner/regress/test_remote_storage.py
@@ -636,4 +636,95 @@ def test_timeline_deletion_with_files_stuck_in_upload_queue(
    time.sleep(10)


+@pytest.mark.parametrize("remote_storage_kind", available_remote_storages())
+def test_timeline_resurrection_on_attach(
+    neon_env_builder: NeonEnvBuilder,
+    remote_storage_kind: RemoteStorageKind,
+):
+    # Use this test to check more realistic SK ids: some etcd key parsing bugs were related,
+    # and this test needs SK to write data to pageserver, so it will be visible
+    neon_env_builder.safekeepers_id_start = 12
+
+    neon_env_builder.enable_remote_storage(
+        remote_storage_kind=remote_storage_kind,
+        test_name="test_timeline_resurrection",
+    )
+
+    ##### First start, insert data and upload it to the remote storage
+    env = neon_env_builder.init_start()
+
+    pageserver_http = env.pageserver.http_client()
+    pg = env.postgres.create_start("main")
+
+    client = env.pageserver.http_client()
+
+    tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
+    timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
+
+    with pg.cursor() as cur:
+        cur.execute("CREATE TABLE f (i integer);")
+        cur.execute("INSERT INTO f VALUES (generate_series(1,1000));")
+        current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
+
+        # wait until pageserver receives that data
+        wait_for_last_record_lsn(client, tenant_id, timeline_id, current_lsn)
+
+        # run checkpoint manually to be sure that data landed in remote storage
+        pageserver_http.timeline_checkpoint(tenant_id, timeline_id)
+
+        # wait until pageserver successfully uploaded a checkpoint to remote storage
+        log.info("waiting for checkpoint upload")
+        wait_for_upload(client, tenant_id, timeline_id, current_lsn)
+        log.info("upload of checkpoint is done")
+
+    new_timeline_id = env.neon_cli.create_branch("new", "main")
+    new_pg = env.postgres.create_start("new")
+
+    with new_pg.cursor() as cur:
+        cur.execute("INSERT INTO f VALUES (generate_series(1,1000));")
+        current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
+
+        # wait until pageserver receives that data
+        wait_for_last_record_lsn(client, tenant_id, new_timeline_id, current_lsn)
+
+        # run checkpoint manually to be sure that data landed in remote storage
+        pageserver_http.timeline_checkpoint(tenant_id, new_timeline_id)
+
+        # wait until pageserver successfully uploaded a checkpoint to remote storage
+        log.info("waiting for checkpoint upload")
+        wait_for_upload(client, tenant_id, new_timeline_id, current_lsn)
+        log.info("upload of checkpoint is done")
+
+    # delete new timeline
+    client.timeline_delete(tenant_id=tenant_id, timeline_id=new_timeline_id)
+
+    ##### Stop the pageserver instance, erase all its data
+    env.postgres.stop_all()
+    env.pageserver.stop()
+
+    dir_to_clear = Path(env.repo_dir) / "tenants"
+    shutil.rmtree(dir_to_clear)
+    os.mkdir(dir_to_clear)
+
+    ##### Second start, restore the data and ensure it's the same
+    env.pageserver.start()
+
+    client.tenant_attach(tenant_id=tenant_id)
+
+    def tenant_active():
+        all_states = client.tenant_list()
+        [tenant] = [t for t in all_states if TenantId(t["id"]) == tenant_id]
+        print(tenant)
+        assert tenant["state"] == "Active"
+
+    wait_until(
+        number_of_iterations=5,
+        interval=1,
+        func=tenant_active,
+    )
+
+    timelines = client.timeline_list(tenant_id=tenant_id)
+    assert len(timelines) == 1
+
+
 # TODO Test that we correctly handle GC of files that are stuck in upload queue.