diff --git a/libs/safekeeper_api/src/models.rs b/libs/safekeeper_api/src/models.rs index 6bdc651668..33ff636a79 100644 --- a/libs/safekeeper_api/src/models.rs +++ b/libs/safekeeper_api/src/models.rs @@ -23,6 +23,7 @@ pub struct TimelineCreateRequest { pub tenant_id: TenantId, pub timeline_id: TimelineId, pub mconf: Configuration, + /// In the PG_VERSION_NUM macro format, like 140017. pub pg_version: u32, pub system_id: Option, // By default WAL_SEGMENT_SIZE diff --git a/safekeeper/client/src/mgmt_api.rs b/safekeeper/client/src/mgmt_api.rs index 7ae39ef95e..424cd89221 100644 --- a/safekeeper/client/src/mgmt_api.rs +++ b/safekeeper/client/src/mgmt_api.rs @@ -81,13 +81,10 @@ impl Client { } } - pub async fn create_timeline(&self, req: &TimelineCreateRequest) -> Result { - let uri = format!( - "{}/v1/tenant/{}/timeline/{}", - self.mgmt_api_endpoint, req.tenant_id, req.timeline_id - ); + pub async fn create_timeline(&self, req: &TimelineCreateRequest) -> Result { + let uri = format!("{}/v1/tenant/timeline", self.mgmt_api_endpoint); let resp = self.post(&uri, req).await?; - resp.json().await.map_err(Error::ReceiveBody) + Ok(resp) } pub async fn pull_timeline(&self, req: &PullTimelineRequest) -> Result { diff --git a/storage_controller/src/safekeeper_client.rs b/storage_controller/src/safekeeper_client.rs index a44fcc27d2..b30237e404 100644 --- a/storage_controller/src/safekeeper_client.rs +++ b/storage_controller/src/safekeeper_client.rs @@ -1,6 +1,5 @@ use safekeeper_api::models::{ self, PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest, - TimelineStatus, }; use safekeeper_client::mgmt_api::{Client, Result}; use utils::id::{NodeId, TenantId, TimelineId}; @@ -60,7 +59,7 @@ impl SafekeeperClient { pub(crate) async fn create_timeline( &self, req: &TimelineCreateRequest, - ) -> Result { + ) -> Result { measured_request!( "create_timeline", crate::metrics::Method::Post, diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 4e00136e1b..38bf959056 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -3804,7 +3804,7 @@ impl Service { create_mode: models::TimelineCreateRequestMode, ) -> Result { let timeline_id = timeline_info.timeline_id; - let pg_version = timeline_info.pg_version; + let pg_version = timeline_info.pg_version * 10000; // Initially start_lsn is determined by last_record_lsn in pageserver // response as it does initdb. However, later we persist it and in sk // creation calls replace with the value from the timeline row if it @@ -8723,6 +8723,8 @@ impl Service { pub(crate) async fn safekeepers_for_new_timeline( &self, ) -> Result, ApiError> { + // Number of safekeepers in different AZs we are looking for + let wanted_count = 3; let mut all_safekeepers = { let locked = self.inner.read().unwrap(); locked @@ -8768,15 +8770,17 @@ impl Service { continue; } sks.push(sk_info.clone()); - if sks.len() == 3 { + if sks.len() == wanted_count { break; } } - if sks.len() == 3 { + if sks.len() == wanted_count { Ok(sks) } else { Err(ApiError::InternalServerError(anyhow::anyhow!( - "couldn't find three safekeepers in different AZs for new timeline" + "couldn't find {wanted_count} safekeepers in different AZs for new timeline (found: {}, total active: {})", + sks.len(), + all_safekeepers.len(), ))) } } diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index deff02f0f9..aba8e04977 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1321,6 +1321,28 @@ class NeonEnv: for f in futs: f.result() + # Last step: register safekeepers at the storage controller + if ( + self.storage_controller_config is not None + and self.storage_controller_config.get("timelines_onto_safekeepers") is True + ): + for sk_id, sk in enumerate(self.safekeepers): + body = { + "id": sk_id, + "created_at": "2023-10-25T09:11:25Z", + "updated_at": "2024-08-28T11:32:43Z", + "region_id": "aws-us-east-2", + "host": "127.0.0.1", + "port": sk.port.pg, + "http_port": sk.port.http, + "https_port": None, + "version": 5957, + "availability_zone_id": f"us-east-2b-{sk_id}", + } + + self.storage_controller.on_safekeeper_deploy(sk_id, body) + self.storage_controller.safekeeper_scheduling_policy(sk_id, "Active") + def stop(self, immediate=False, ps_assert_metric_no_errors=False, fail_on_endpoint_errors=True): """ After this method returns, there should be no child processes running. diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py index 55e38b29a2..89c4a96499 100644 --- a/test_runner/regress/test_wal_acceptor.py +++ b/test_runner/regress/test_wal_acceptor.py @@ -2039,6 +2039,29 @@ def test_explicit_timeline_creation(neon_env_builder: NeonEnvBuilder): ep.safe_psql("CREATE TABLE IF NOT EXISTS t(key int, value text)") +def test_explicit_timeline_creation_storcon(neon_env_builder: NeonEnvBuilder): + """ + Test that having neon.safekeepers starting with g#n: with non zero n enables + generations, which as a side effect disables automatic timeline creation. + Like test_explicit_timeline_creation, but asks the storcon to + create membership conf & timeline. + """ + neon_env_builder.num_safekeepers = 3 + neon_env_builder.storage_controller_config = { + "timelines_onto_safekeepers": True, + } + env = neon_env_builder.init_start() + + config_lines = [ + "neon.safekeeper_proto_version = 3", + ] + ep = env.endpoints.create("main", config_lines=config_lines) + + # endpoint should start. + ep.start(safekeeper_generation=1, safekeepers=[1, 2, 3]) + ep.safe_psql("CREATE TABLE IF NOT EXISTS t(key int, value text)") + + # In this test we check for excessive START_REPLICATION and START_WAL_PUSH queries # when compute is active, but there are no writes to the timeline. In that case # pageserver should maintain a single connection to safekeeper and don't attempt