mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-09 14:32:57 +00:00
Set the last_record_gauge to the value which was persisted metadata (#3460)
## Describe your changes Whenever a tenant is detached or the pageserver is restarted the pageserver_last_record_lsn metric is dropped This fix resurrects the value from the metadata whenever the tenant is attached again ## Issue ticket number and link [3571](https://github.com/neondatabase/cloud/issues/3571) ## Checklist before requesting a review - [X] I have performed a self-review of my code. - [ ] If it is a core feature, I have added thorough tests. - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard? - [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section.
This commit is contained in:
@@ -945,6 +945,10 @@ impl Timeline {
|
||||
};
|
||||
result.repartition_threshold = result.get_checkpoint_distance() / 10;
|
||||
result
|
||||
.metrics
|
||||
.last_record_gauge
|
||||
.set(disk_consistent_lsn.0 as i64);
|
||||
result
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ from threading import Thread
|
||||
import asyncpg
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.metrics import parse_metrics
|
||||
from fixtures.neon_fixtures import (
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
@@ -59,11 +60,11 @@ def test_tenant_reattach(
|
||||
# create new nenant
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant()
|
||||
|
||||
pg = env.postgres.create_start("main", tenant_id=tenant_id)
|
||||
with pg.cursor() as cur:
|
||||
cur.execute("CREATE TABLE t(key int primary key, value text)")
|
||||
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
|
||||
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
|
||||
with env.postgres.create_start("main", tenant_id=tenant_id) as pg:
|
||||
with pg.cursor() as cur:
|
||||
cur.execute("CREATE TABLE t(key int primary key, value text)")
|
||||
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
|
||||
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
|
||||
|
||||
# Wait for the all data to be processed by the pageserver and uploaded in remote storage
|
||||
wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id, current_lsn)
|
||||
@@ -78,15 +79,34 @@ def test_tenant_reattach(
|
||||
".*failed to perform remote task UploadMetadata.*, will retry.*"
|
||||
)
|
||||
|
||||
ps_metrics = parse_metrics(pageserver_http.get_metrics(), "pageserver")
|
||||
tenant_metric_filter = {
|
||||
"tenant_id": str(tenant_id),
|
||||
"timeline_id": str(timeline_id),
|
||||
}
|
||||
pageserver_last_record_lsn_before_detach = int(
|
||||
ps_metrics.query_one("pageserver_last_record_lsn", filter=tenant_metric_filter).value
|
||||
)
|
||||
|
||||
pageserver_http.tenant_detach(tenant_id)
|
||||
pageserver_http.tenant_attach(tenant_id)
|
||||
|
||||
with pg.cursor() as cur:
|
||||
assert query_scalar(cur, "SELECT count(*) FROM t") == 100000
|
||||
time.sleep(1) # for metrics propagation
|
||||
|
||||
# Check that we had to retry the downloads
|
||||
assert env.pageserver.log_contains(".*list prefixes.*failed, will retry.*")
|
||||
assert env.pageserver.log_contains(".*download.*failed, will retry.*")
|
||||
ps_metrics = parse_metrics(pageserver_http.get_metrics(), "pageserver")
|
||||
pageserver_last_record_lsn = int(
|
||||
ps_metrics.query_one("pageserver_last_record_lsn", filter=tenant_metric_filter).value
|
||||
)
|
||||
|
||||
assert pageserver_last_record_lsn_before_detach == pageserver_last_record_lsn
|
||||
|
||||
with env.postgres.create_start("main", tenant_id=tenant_id) as pg:
|
||||
with pg.cursor() as cur:
|
||||
assert query_scalar(cur, "SELECT count(*) FROM t") == 100000
|
||||
|
||||
# Check that we had to retry the downloads
|
||||
assert env.pageserver.log_contains(".*list prefixes.*failed, will retry.*")
|
||||
assert env.pageserver.log_contains(".*download.*failed, will retry.*")
|
||||
|
||||
|
||||
num_connections = 10
|
||||
|
||||
Reference in New Issue
Block a user