Add test_explicit_timeline_creation_storcon and make it work (#11261)

Adds a basic test that makes the storcon issue explicit creation of a
timeline on safeekepers (main storcon PR in #11058). It was adapted from
`test_explicit_timeline_creation` from #11002.

Also, do a bunch of fixes needed to get the test work (the API
definitions weren't correct), and log more stuff when we can't create a
new timeline due to no safekeepers being active.

Part of #9011

---------

Co-authored-by: Arseny Sher <sher-ars@yandex.ru>
This commit is contained in:
Arpad Müller
2025-03-17 17:28:21 +01:00
committed by GitHub
parent db30e1669c
commit 56149a046a
6 changed files with 58 additions and 12 deletions

View File

@@ -1,6 +1,5 @@
use safekeeper_api::models::{
self, PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest,
TimelineStatus,
};
use safekeeper_client::mgmt_api::{Client, Result};
use utils::id::{NodeId, TenantId, TimelineId};
@@ -60,7 +59,7 @@ impl SafekeeperClient {
pub(crate) async fn create_timeline(
&self,
req: &TimelineCreateRequest,
) -> Result<TimelineStatus> {
) -> Result<reqwest::Response> {
measured_request!(
"create_timeline",
crate::metrics::Method::Post,

View File

@@ -3804,7 +3804,7 @@ impl Service {
create_mode: models::TimelineCreateRequestMode,
) -> Result<SafekeepersInfo, ApiError> {
let timeline_id = timeline_info.timeline_id;
let pg_version = timeline_info.pg_version;
let pg_version = timeline_info.pg_version * 10000;
// Initially start_lsn is determined by last_record_lsn in pageserver
// response as it does initdb. However, later we persist it and in sk
// creation calls replace with the value from the timeline row if it
@@ -8723,6 +8723,8 @@ impl Service {
pub(crate) async fn safekeepers_for_new_timeline(
&self,
) -> Result<Vec<SafekeeperInfo>, ApiError> {
// Number of safekeepers in different AZs we are looking for
let wanted_count = 3;
let mut all_safekeepers = {
let locked = self.inner.read().unwrap();
locked
@@ -8768,15 +8770,17 @@ impl Service {
continue;
}
sks.push(sk_info.clone());
if sks.len() == 3 {
if sks.len() == wanted_count {
break;
}
}
if sks.len() == 3 {
if sks.len() == wanted_count {
Ok(sks)
} else {
Err(ApiError::InternalServerError(anyhow::anyhow!(
"couldn't find three safekeepers in different AZs for new timeline"
"couldn't find {wanted_count} safekeepers in different AZs for new timeline (found: {}, total active: {})",
sks.len(),
all_safekeepers.len(),
)))
}
}