From e7d18bc1884ae7f5448fff99bd02f6d9390d5566 Mon Sep 17 00:00:00 2001 From: Mikhail Date: Wed, 9 Jul 2025 13:55:10 +0100 Subject: [PATCH] Replica promotion in compute_ctl (#12183) Add `/promote` method for `compute_ctl` promoting secondary replica to primary, depends on secondary being prewarmed. Add `compute-ctl` mode to `test_replica_promotes`, testing happy path only (no corner cases yet) Add openapi spec for `/promote` and `/lfc` handlers https://github.com/neondatabase/cloud/issues/19011 Resolves: https://github.com/neondatabase/cloud/issues/29807 --- Cargo.lock | 1 + compute_tools/Cargo.toml | 2 +- compute_tools/src/compute.rs | 7 +- compute_tools/src/compute_promote.rs | 132 +++++++++++++++ compute_tools/src/http/openapi_spec.yaml | 145 +++++++++++++++++ compute_tools/src/http/routes/mod.rs | 1 + compute_tools/src/http/routes/promote.rs | 14 ++ compute_tools/src/http/server.rs | 3 +- compute_tools/src/lib.rs | 1 + libs/compute_api/src/responses.rs | 30 +++- test_runner/fixtures/endpoint/http.py | 15 +- test_runner/regress/test_replica_promotes.py | 162 ++++++++++++------- 12 files changed, 448 insertions(+), 65 deletions(-) create mode 100644 compute_tools/src/compute_promote.rs create mode 100644 compute_tools/src/http/routes/promote.rs diff --git a/Cargo.lock b/Cargo.lock index 558e8e2295..c49a2daba7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1348,6 +1348,7 @@ dependencies = [ "p256 0.13.2", "pageserver_page_api", "postgres", + "postgres-types", "postgres_initdb", "postgres_versioninfo", "regex", diff --git a/compute_tools/Cargo.toml b/compute_tools/Cargo.toml index 1a03022d89..910bae3bda 100644 --- a/compute_tools/Cargo.toml +++ b/compute_tools/Cargo.toml @@ -66,7 +66,7 @@ url.workspace = true uuid.workspace = true walkdir.workspace = true x509-cert.workspace = true - +postgres-types.workspace = true postgres_versioninfo.workspace = true postgres_initdb.workspace = true compute_api.workspace = true diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index ec6e6c1634..0496d38e67 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -3,7 +3,7 @@ use chrono::{DateTime, Utc}; use compute_api::privilege::Privilege; use compute_api::responses::{ ComputeConfig, ComputeCtlConfig, ComputeMetrics, ComputeStatus, LfcOffloadState, - LfcPrewarmState, TlsConfig, + LfcPrewarmState, PromoteState, TlsConfig, }; use compute_api::spec::{ ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PageserverProtocol, PgIdent, @@ -29,8 +29,7 @@ use std::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use std::sync::{Arc, Condvar, Mutex, RwLock}; use std::time::{Duration, Instant}; use std::{env, fs}; -use tokio::task::JoinHandle; -use tokio::{spawn, time}; +use tokio::{spawn, sync::watch, task::JoinHandle, time}; use tracing::{Instrument, debug, error, info, instrument, warn}; use url::Url; use utils::id::{TenantId, TimelineId}; @@ -175,6 +174,7 @@ pub struct ComputeState { /// WAL flush LSN that is set after terminating Postgres and syncing safekeepers if /// mode == ComputeMode::Primary. None otherwise pub terminate_flush_lsn: Option, + pub promote_state: Option>, pub metrics: ComputeMetrics, } @@ -192,6 +192,7 @@ impl ComputeState { lfc_prewarm_state: LfcPrewarmState::default(), lfc_offload_state: LfcOffloadState::default(), terminate_flush_lsn: None, + promote_state: None, } } diff --git a/compute_tools/src/compute_promote.rs b/compute_tools/src/compute_promote.rs new file mode 100644 index 0000000000..42256faa22 --- /dev/null +++ b/compute_tools/src/compute_promote.rs @@ -0,0 +1,132 @@ +use crate::compute::ComputeNode; +use anyhow::{Context, Result, bail}; +use compute_api::{ + responses::{LfcPrewarmState, PromoteState, SafekeepersLsn}, + spec::ComputeMode, +}; +use std::{sync::Arc, time::Duration}; +use tokio::time::sleep; +use utils::lsn::Lsn; + +impl ComputeNode { + /// Returns only when promote fails or succeeds. If a network error occurs + /// and http client disconnects, this does not stop promotion, and subsequent + /// calls block until promote finishes. + /// Called by control plane on secondary after primary endpoint is terminated + pub async fn promote(self: &Arc, safekeepers_lsn: SafekeepersLsn) -> PromoteState { + let cloned = self.clone(); + let start_promotion = || { + let (tx, rx) = tokio::sync::watch::channel(PromoteState::NotPromoted); + tokio::spawn(async move { + tx.send(match cloned.promote_impl(safekeepers_lsn).await { + Ok(_) => PromoteState::Completed, + Err(err) => { + tracing::error!(%err, "promoting"); + PromoteState::Failed { + error: err.to_string(), + } + } + }) + }); + rx + }; + + let mut task; + // self.state is unlocked after block ends so we lock it in promote_impl + // and task.changed() is reached + { + task = self + .state + .lock() + .unwrap() + .promote_state + .get_or_insert_with(start_promotion) + .clone() + } + task.changed().await.expect("promote sender dropped"); + task.borrow().clone() + } + + // Why do we have to supply safekeepers? + // For secondary we use primary_connection_conninfo so safekeepers field is empty + async fn promote_impl(&self, safekeepers_lsn: SafekeepersLsn) -> Result<()> { + { + let state = self.state.lock().unwrap(); + let mode = &state.pspec.as_ref().unwrap().spec.mode; + if *mode != ComputeMode::Replica { + bail!("{} is not replica", mode.to_type_str()); + } + + // we don't need to query Postgres so not self.lfc_prewarm_state() + match &state.lfc_prewarm_state { + LfcPrewarmState::NotPrewarmed | LfcPrewarmState::Prewarming => { + bail!("prewarm not requested or pending") + } + LfcPrewarmState::Failed { error } => { + tracing::warn!(%error, "replica prewarm failed") + } + _ => {} + } + } + + let client = ComputeNode::get_maintenance_client(&self.tokio_conn_conf) + .await + .context("connecting to postgres")?; + + let primary_lsn = safekeepers_lsn.wal_flush_lsn; + let mut last_wal_replay_lsn: Lsn = Lsn::INVALID; + const RETRIES: i32 = 20; + for i in 0..=RETRIES { + let row = client + .query_one("SELECT pg_last_wal_replay_lsn()", &[]) + .await + .context("getting last replay lsn")?; + let lsn: u64 = row.get::(0).into(); + last_wal_replay_lsn = lsn.into(); + if last_wal_replay_lsn >= primary_lsn { + break; + } + tracing::info!("Try {i}, replica lsn {last_wal_replay_lsn}, primary lsn {primary_lsn}"); + sleep(Duration::from_secs(1)).await; + } + if last_wal_replay_lsn < primary_lsn { + bail!("didn't catch up with primary in {RETRIES} retries"); + } + + // using $1 doesn't work with ALTER SYSTEM SET + let safekeepers_sql = format!( + "ALTER SYSTEM SET neon.safekeepers='{}'", + safekeepers_lsn.safekeepers + ); + client + .query(&safekeepers_sql, &[]) + .await + .context("setting safekeepers")?; + client + .query("SELECT pg_reload_conf()", &[]) + .await + .context("reloading postgres config")?; + let row = client + .query_one("SELECT * FROM pg_promote()", &[]) + .await + .context("pg_promote")?; + if !row.get::(0) { + bail!("pg_promote() returned false"); + } + + let client = ComputeNode::get_maintenance_client(&self.tokio_conn_conf) + .await + .context("connecting to postgres")?; + let row = client + .query_one("SHOW transaction_read_only", &[]) + .await + .context("getting transaction_read_only")?; + if row.get::(0) == "on" { + bail!("replica in read only mode after promotion"); + } + + let mut state = self.state.lock().unwrap(); + state.pspec.as_mut().unwrap().spec.mode = ComputeMode::Primary; + Ok(()) + } +} diff --git a/compute_tools/src/http/openapi_spec.yaml b/compute_tools/src/http/openapi_spec.yaml index bbdb7d0917..eaf33d1f82 100644 --- a/compute_tools/src/http/openapi_spec.yaml +++ b/compute_tools/src/http/openapi_spec.yaml @@ -83,6 +83,87 @@ paths: schema: $ref: "#/components/schemas/DbsAndRoles" + /promote: + post: + tags: + - Promotion + summary: Promote secondary replica to primary + description: "" + operationId: promoteReplica + requestBody: + description: Promote requests data + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/SafekeepersLsn" + responses: + 200: + description: Promote succeeded or wasn't started + content: + application/json: + schema: + $ref: "#/components/schemas/PromoteState" + 500: + description: Promote failed + content: + application/json: + schema: + $ref: "#/components/schemas/PromoteState" + + /lfc/prewarm: + post: + summary: Request LFC Prewarm + parameters: + - name: from_endpoint + in: query + schema: + type: string + description: "" + operationId: lfcPrewarm + responses: + 202: + description: LFC prewarm started + 429: + description: LFC prewarm ongoing + get: + tags: + - Prewarm + summary: Get LFC prewarm state + description: "" + operationId: getLfcPrewarmState + responses: + 200: + description: Prewarm state + content: + application/json: + schema: + $ref: "#/components/schemas/LfcPrewarmState" + + /lfc/offload: + post: + summary: Request LFC offload + description: "" + operationId: lfcOffload + responses: + 202: + description: LFC offload started + 429: + description: LFC offload ongoing + get: + tags: + - Prewarm + summary: Get LFC offloading state + description: "" + operationId: getLfcOffloadState + responses: + 200: + description: Offload state + content: + application/json: + schema: + $ref: "#/components/schemas/LfcOffloadState" + /database_schema: get: tags: @@ -497,6 +578,70 @@ components: type: string example: "1.0.0" + SafekeepersLsn: + type: object + required: + - safekeepers + - wal_flush_lsn + properties: + safekeepers: + description: Primary replica safekeepers + type: string + wal_flush_lsn: + description: Primary last WAL flush LSN + type: string + + LfcPrewarmState: + type: object + required: + - status + - total + - prewarmed + - skipped + properties: + status: + description: Lfc prewarm status + enum: [not_prewarmed, prewarming, completed, failed] + type: string + error: + description: Lfc prewarm error, if any + type: string + total: + description: Total pages processed + type: integer + prewarmed: + description: Total pages prewarmed + type: integer + skipped: + description: Pages processed but not prewarmed + type: integer + + LfcOffloadState: + type: object + required: + - status + properties: + status: + description: Lfc offload status + enum: [not_offloaded, offloading, completed, failed] + type: string + error: + description: Lfc offload error, if any + type: string + + PromoteState: + type: object + required: + - status + properties: + status: + description: Promote result + enum: [not_promoted, completed, failed] + type: string + error: + description: Promote error, if any + type: string + InstalledExtensions: type: object properties: diff --git a/compute_tools/src/http/routes/mod.rs b/compute_tools/src/http/routes/mod.rs index 432e66a830..dd71f663eb 100644 --- a/compute_tools/src/http/routes/mod.rs +++ b/compute_tools/src/http/routes/mod.rs @@ -14,6 +14,7 @@ pub(in crate::http) mod insights; pub(in crate::http) mod lfc; pub(in crate::http) mod metrics; pub(in crate::http) mod metrics_json; +pub(in crate::http) mod promote; pub(in crate::http) mod status; pub(in crate::http) mod terminate; diff --git a/compute_tools/src/http/routes/promote.rs b/compute_tools/src/http/routes/promote.rs new file mode 100644 index 0000000000..bc5f93b4da --- /dev/null +++ b/compute_tools/src/http/routes/promote.rs @@ -0,0 +1,14 @@ +use crate::http::JsonResponse; +use axum::Form; +use http::StatusCode; + +pub(in crate::http) async fn promote( + compute: axum::extract::State>, + Form(safekeepers_lsn): Form, +) -> axum::response::Response { + let state = compute.promote(safekeepers_lsn).await; + if let compute_api::responses::PromoteState::Failed { error } = state { + return JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, error); + } + JsonResponse::success(StatusCode::OK, state) +} diff --git a/compute_tools/src/http/server.rs b/compute_tools/src/http/server.rs index d5d2427971..17939e39d4 100644 --- a/compute_tools/src/http/server.rs +++ b/compute_tools/src/http/server.rs @@ -23,7 +23,7 @@ use super::{ middleware::authorize::Authorize, routes::{ check_writability, configure, database_schema, dbs_and_roles, extension_server, extensions, - grants, insights, lfc, metrics, metrics_json, status, terminate, + grants, insights, lfc, metrics, metrics_json, promote, status, terminate, }, }; use crate::compute::ComputeNode; @@ -87,6 +87,7 @@ impl From<&Server> for Router> { let authenticated_router = Router::>::new() .route("/lfc/prewarm", get(lfc::prewarm_state).post(lfc::prewarm)) .route("/lfc/offload", get(lfc::offload_state).post(lfc::offload)) + .route("/promote", post(promote::promote)) .route("/check_writability", post(check_writability::is_writable)) .route("/configure", post(configure::configure)) .route("/database_schema", get(database_schema::get_schema_dump)) diff --git a/compute_tools/src/lib.rs b/compute_tools/src/lib.rs index 3899a1ca76..2d5d4565b7 100644 --- a/compute_tools/src/lib.rs +++ b/compute_tools/src/lib.rs @@ -12,6 +12,7 @@ pub mod logger; pub mod catalog; pub mod compute; pub mod compute_prewarm; +pub mod compute_promote; pub mod disk_quota; pub mod extension_server; pub mod installed_extensions; diff --git a/libs/compute_api/src/responses.rs b/libs/compute_api/src/responses.rs index a54411b06a..e10c381fb4 100644 --- a/libs/compute_api/src/responses.rs +++ b/libs/compute_api/src/responses.rs @@ -46,7 +46,7 @@ pub struct ExtensionInstallResponse { pub version: ExtVersion, } -#[derive(Serialize, Default, Debug, Clone)] +#[derive(Serialize, Default, Debug, Clone, PartialEq)] #[serde(tag = "status", rename_all = "snake_case")] pub enum LfcPrewarmState { #[default] @@ -58,6 +58,17 @@ pub enum LfcPrewarmState { }, } +impl Display for LfcPrewarmState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + LfcPrewarmState::NotPrewarmed => f.write_str("NotPrewarmed"), + LfcPrewarmState::Prewarming => f.write_str("Prewarming"), + LfcPrewarmState::Completed => f.write_str("Completed"), + LfcPrewarmState::Failed { error } => write!(f, "Error({error})"), + } + } +} + #[derive(Serialize, Default, Debug, Clone, PartialEq)] #[serde(tag = "status", rename_all = "snake_case")] pub enum LfcOffloadState { @@ -70,6 +81,23 @@ pub enum LfcOffloadState { }, } +#[derive(Serialize, Debug, Clone, PartialEq)] +#[serde(tag = "status", rename_all = "snake_case")] +/// Response of /promote +pub enum PromoteState { + NotPromoted, + Completed, + Failed { error: String }, +} + +#[derive(Deserialize, Serialize, Default, Debug, Clone)] +#[serde(rename_all = "snake_case")] +/// Result of /safekeepers_lsn +pub struct SafekeepersLsn { + pub safekeepers: String, + pub wal_flush_lsn: utils::lsn::Lsn, +} + /// Response of the /status API #[derive(Serialize, Debug, Deserialize)] #[serde(rename_all = "snake_case")] diff --git a/test_runner/fixtures/endpoint/http.py b/test_runner/fixtures/endpoint/http.py index 294c52321b..1d278095ce 100644 --- a/test_runner/fixtures/endpoint/http.py +++ b/test_runner/fixtures/endpoint/http.py @@ -2,11 +2,12 @@ from __future__ import annotations import urllib.parse from enum import StrEnum -from typing import TYPE_CHECKING, final +from typing import TYPE_CHECKING, Any, final import requests from requests.adapters import HTTPAdapter from requests.auth import AuthBase +from requests.exceptions import ReadTimeout from typing_extensions import override from fixtures.log_helper import log @@ -102,6 +103,18 @@ class EndpointHttpClient(requests.Session): wait_until(offloaded) + def promote(self, safekeepers_lsn: dict[str, Any], disconnect: bool = False): + url = f"http://localhost:{self.external_port}/promote" + if disconnect: + try: # send first request to start promote and disconnect + self.post(url, data=safekeepers_lsn, timeout=0.001) + except ReadTimeout: + pass # wait on second request which returns on promotion finish + res = self.post(url, data=safekeepers_lsn) + res.raise_for_status() + json: dict[str, str] = res.json() + return json + def database_schema(self, database: str): res = self.get( f"http://localhost:{self.external_port}/database_schema?database={urllib.parse.quote(database, safe='')}", diff --git a/test_runner/regress/test_replica_promotes.py b/test_runner/regress/test_replica_promotes.py index 4486901bae..1f26269f40 100644 --- a/test_runner/regress/test_replica_promotes.py +++ b/test_runner/regress/test_replica_promotes.py @@ -1,29 +1,51 @@ """ -File with secondary->primary promotion testing. - -This far, only contains a test that we don't break and that the data is persisted. +Secondary -> primary promotion testing """ +from enum import StrEnum from typing import cast import psycopg2 +import pytest from fixtures.common_types import Lsn from fixtures.log_helper import log from fixtures.neon_fixtures import Endpoint, NeonEnv, wait_replica_caughtup -from fixtures.pg_version import PgVersion +from fixtures.utils import USE_LFC +from psycopg2.extensions import cursor as Cursor from pytest import raises def stop_and_check_lsn(ep: Endpoint, expected_lsn: Lsn | None): ep.stop(mode="immediate-terminate") lsn = ep.terminate_flush_lsn - if expected_lsn is not None: + assert (lsn is not None) == (expected_lsn is not None), f"{lsn=}, {expected_lsn=}" + if lsn is not None: assert lsn >= expected_lsn, f"{expected_lsn=} < {lsn=}" - else: - assert lsn == expected_lsn, f"{expected_lsn=} != {lsn=}" -def test_replica_promotes(neon_simple_env: NeonEnv, pg_version: PgVersion): +def get_lsn_triple(cur: Cursor) -> tuple[str, str, str]: + cur.execute( + """ + SELECT pg_current_wal_insert_lsn(), + pg_current_wal_lsn(), + pg_current_wal_flush_lsn() + """ + ) + return cast("tuple[str, str, str]", cur.fetchone()) + + +class PromoteMethod(StrEnum): + COMPUTE_CTL = "compute-ctl" + POSTGRES = "postgres" + + +METHOD_OPTIONS = [e for e in PromoteMethod] +METHOD_IDS = [e.value for e in PromoteMethod] + + +@pytest.mark.skipif(not USE_LFC, reason="LFC is disabled, skipping") +@pytest.mark.parametrize("method", METHOD_OPTIONS, ids=METHOD_IDS) +def test_replica_promote(neon_simple_env: NeonEnv, method: PromoteMethod): """ Test that a replica safely promotes, and can commit data updates which show up when the primary boots up after the promoted secondary endpoint @@ -38,29 +60,26 @@ def test_replica_promotes(neon_simple_env: NeonEnv, pg_version: PgVersion): with primary.connect() as primary_conn: primary_cur = primary_conn.cursor() + primary_cur.execute("create extension neon") primary_cur.execute( "create table t(pk bigint GENERATED ALWAYS AS IDENTITY, payload integer)" ) primary_cur.execute("INSERT INTO t(payload) SELECT generate_series(1, 100)") - primary_cur.execute( - """ - SELECT pg_current_wal_insert_lsn(), - pg_current_wal_lsn(), - pg_current_wal_flush_lsn() - """ - ) - lsn_triple = cast("tuple[str, str, str]", primary_cur.fetchone()) + + lsn_triple = get_lsn_triple(primary_cur) log.info(f"Primary: Current LSN after workload is {lsn_triple}") expected_primary_lsn: Lsn = Lsn(lsn_triple[2]) primary_cur.execute("show neon.safekeepers") safekeepers = primary_cur.fetchall()[0][0] - wait_replica_caughtup(primary, secondary) + if method == PromoteMethod.COMPUTE_CTL: + primary.http_client().offload_lfc() + else: + wait_replica_caughtup(primary, secondary) with secondary.connect() as secondary_conn: secondary_cur = secondary_conn.cursor() secondary_cur.execute("select count(*) from t") - assert secondary_cur.fetchone() == (100,) with raises(psycopg2.Error): @@ -71,28 +90,30 @@ def test_replica_promotes(neon_simple_env: NeonEnv, pg_version: PgVersion): secondary_cur.execute("select count(*) from t") assert secondary_cur.fetchone() == (100,) + primary_endpoint_id = primary.endpoint_id stop_and_check_lsn(primary, expected_primary_lsn) # Reconnect to the secondary to make sure we get a read-write connection promo_conn = secondary.connect() promo_cur = promo_conn.cursor() - promo_cur.execute(f"alter system set neon.safekeepers='{safekeepers}'") - promo_cur.execute("select pg_reload_conf()") + if method == PromoteMethod.COMPUTE_CTL: + client = secondary.http_client() + client.prewarm_lfc(primary_endpoint_id) + # control plane knows safekeepers, simulate it by querying primary + assert (lsn := primary.terminate_flush_lsn) + safekeepers_lsn = {"safekeepers": safekeepers, "wal_flush_lsn": lsn} + assert client.promote(safekeepers_lsn)["status"] == "completed" + else: + promo_cur.execute(f"alter system set neon.safekeepers='{safekeepers}'") + promo_cur.execute("select pg_reload_conf()") + promo_cur.execute("SELECT * FROM pg_promote()") + assert promo_cur.fetchone() == (True,) - promo_cur.execute("SELECT * FROM pg_promote()") - assert promo_cur.fetchone() == (True,) - promo_cur.execute( - """ - SELECT pg_current_wal_insert_lsn(), - pg_current_wal_lsn(), - pg_current_wal_flush_lsn() - """ - ) - log.info(f"Secondary: LSN after promotion is {promo_cur.fetchone()}") + lsn_triple = get_lsn_triple(promo_cur) + log.info(f"Secondary: LSN after promotion is {lsn_triple}") # Reconnect to the secondary to make sure we get a read-write connection - with secondary.connect() as new_primary_conn: - new_primary_cur = new_primary_conn.cursor() + with secondary.connect() as conn, conn.cursor() as new_primary_cur: new_primary_cur.execute("select count(*) from t") assert new_primary_cur.fetchone() == (100,) @@ -101,43 +122,34 @@ def test_replica_promotes(neon_simple_env: NeonEnv, pg_version: PgVersion): ) assert new_primary_cur.fetchall() == [(it,) for it in range(101, 201)] - new_primary_cur = new_primary_conn.cursor() + new_primary_cur = conn.cursor() new_primary_cur.execute("select payload from t") assert new_primary_cur.fetchall() == [(it,) for it in range(1, 201)] new_primary_cur.execute("select count(*) from t") assert new_primary_cur.fetchone() == (200,) - new_primary_cur.execute( - """ - SELECT pg_current_wal_insert_lsn(), - pg_current_wal_lsn(), - pg_current_wal_flush_lsn() - """ - ) - log.info(f"Secondary: LSN after workload is {new_primary_cur.fetchone()}") - with secondary.connect() as second_viewpoint_conn: - new_primary_cur = second_viewpoint_conn.cursor() + lsn_triple = get_lsn_triple(new_primary_cur) + log.info(f"Secondary: LSN after workload is {lsn_triple}") + expected_promoted_lsn = Lsn(lsn_triple[2]) + + with secondary.connect() as conn, conn.cursor() as new_primary_cur: new_primary_cur.execute("select payload from t") assert new_primary_cur.fetchall() == [(it,) for it in range(1, 201)] - # wait_for_last_flush_lsn(env, secondary, env.initial_tenant, env.initial_timeline) - - # secondaries don't sync safekeepers on finish so LSN will be None - stop_and_check_lsn(secondary, None) + if method == PromoteMethod.COMPUTE_CTL: + # compute_ctl's /promote switches replica type to Primary so it syncs + # safekeepers on finish + stop_and_check_lsn(secondary, expected_promoted_lsn) + else: + # on testing postgres, we don't update replica type, secondaries don't + # sync so lsn should be None + stop_and_check_lsn(secondary, None) primary = env.endpoints.create_start(branch_name="main", endpoint_id="primary2") - with primary.connect() as new_primary: - new_primary_cur = new_primary.cursor() - new_primary_cur.execute( - """ - SELECT pg_current_wal_insert_lsn(), - pg_current_wal_lsn(), - pg_current_wal_flush_lsn() - """ - ) - lsn_triple = cast("tuple[str, str, str]", new_primary_cur.fetchone()) + with primary.connect() as new_primary, new_primary.cursor() as new_primary_cur: + lsn_triple = get_lsn_triple(new_primary_cur) expected_primary_lsn = Lsn(lsn_triple[2]) log.info(f"New primary: Boot LSN is {lsn_triple}") @@ -146,5 +158,39 @@ def test_replica_promotes(neon_simple_env: NeonEnv, pg_version: PgVersion): new_primary_cur.execute("INSERT INTO t (payload) SELECT generate_series(201, 300)") new_primary_cur.execute("select count(*) from t") assert new_primary_cur.fetchone() == (300,) - stop_and_check_lsn(primary, expected_primary_lsn) + + +@pytest.mark.skipif(not USE_LFC, reason="LFC is disabled, skipping") +def test_replica_promote_handler_disconnects(neon_simple_env: NeonEnv): + """ + Test that if a handler disconnects from /promote route of compute_ctl, promotion still happens + once, and no error is thrown + """ + env: NeonEnv = neon_simple_env + primary: Endpoint = env.endpoints.create_start(branch_name="main", endpoint_id="primary") + secondary: Endpoint = env.endpoints.new_replica_start(origin=primary, endpoint_id="secondary") + + with primary.connect() as conn, conn.cursor() as cur: + cur.execute("create extension neon") + cur.execute("create table t(pk bigint GENERATED ALWAYS AS IDENTITY, payload integer)") + cur.execute("INSERT INTO t(payload) SELECT generate_series(1, 100)") + cur.execute("show neon.safekeepers") + safekeepers = cur.fetchall()[0][0] + + primary.http_client().offload_lfc() + primary_endpoint_id = primary.endpoint_id + primary.stop(mode="immediate-terminate") + assert (lsn := primary.terminate_flush_lsn) + + client = secondary.http_client() + client.prewarm_lfc(primary_endpoint_id) + safekeepers_lsn = {"safekeepers": safekeepers, "wal_flush_lsn": lsn} + assert client.promote(safekeepers_lsn, disconnect=True)["status"] == "completed" + + with secondary.connect() as conn, conn.cursor() as cur: + cur.execute("select count(*) from t") + assert cur.fetchone() == (100,) + cur.execute("INSERT INTO t (payload) SELECT generate_series(101, 200) RETURNING payload") + cur.execute("select count(*) from t") + assert cur.fetchone() == (200,)