mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
Add test_explicit_timeline_creation_storcon and make it work (#11261)
Adds a basic test that makes the storcon issue explicit creation of a timeline on safeekepers (main storcon PR in #11058). It was adapted from `test_explicit_timeline_creation` from #11002. Also, do a bunch of fixes needed to get the test work (the API definitions weren't correct), and log more stuff when we can't create a new timeline due to no safekeepers being active. Part of #9011 --------- Co-authored-by: Arseny Sher <sher-ars@yandex.ru>
This commit is contained in:
@@ -23,6 +23,7 @@ pub struct TimelineCreateRequest {
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
pub mconf: Configuration,
|
||||
/// In the PG_VERSION_NUM macro format, like 140017.
|
||||
pub pg_version: u32,
|
||||
pub system_id: Option<u64>,
|
||||
// By default WAL_SEGMENT_SIZE
|
||||
|
||||
@@ -81,13 +81,10 @@ impl Client {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn create_timeline(&self, req: &TimelineCreateRequest) -> Result<TimelineStatus> {
|
||||
let uri = format!(
|
||||
"{}/v1/tenant/{}/timeline/{}",
|
||||
self.mgmt_api_endpoint, req.tenant_id, req.timeline_id
|
||||
);
|
||||
pub async fn create_timeline(&self, req: &TimelineCreateRequest) -> Result<reqwest::Response> {
|
||||
let uri = format!("{}/v1/tenant/timeline", self.mgmt_api_endpoint);
|
||||
let resp = self.post(&uri, req).await?;
|
||||
resp.json().await.map_err(Error::ReceiveBody)
|
||||
Ok(resp)
|
||||
}
|
||||
|
||||
pub async fn pull_timeline(&self, req: &PullTimelineRequest) -> Result<PullTimelineResponse> {
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
use safekeeper_api::models::{
|
||||
self, PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest,
|
||||
TimelineStatus,
|
||||
};
|
||||
use safekeeper_client::mgmt_api::{Client, Result};
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
@@ -60,7 +59,7 @@ impl SafekeeperClient {
|
||||
pub(crate) async fn create_timeline(
|
||||
&self,
|
||||
req: &TimelineCreateRequest,
|
||||
) -> Result<TimelineStatus> {
|
||||
) -> Result<reqwest::Response> {
|
||||
measured_request!(
|
||||
"create_timeline",
|
||||
crate::metrics::Method::Post,
|
||||
|
||||
@@ -3804,7 +3804,7 @@ impl Service {
|
||||
create_mode: models::TimelineCreateRequestMode,
|
||||
) -> Result<SafekeepersInfo, ApiError> {
|
||||
let timeline_id = timeline_info.timeline_id;
|
||||
let pg_version = timeline_info.pg_version;
|
||||
let pg_version = timeline_info.pg_version * 10000;
|
||||
// Initially start_lsn is determined by last_record_lsn in pageserver
|
||||
// response as it does initdb. However, later we persist it and in sk
|
||||
// creation calls replace with the value from the timeline row if it
|
||||
@@ -8723,6 +8723,8 @@ impl Service {
|
||||
pub(crate) async fn safekeepers_for_new_timeline(
|
||||
&self,
|
||||
) -> Result<Vec<SafekeeperInfo>, ApiError> {
|
||||
// Number of safekeepers in different AZs we are looking for
|
||||
let wanted_count = 3;
|
||||
let mut all_safekeepers = {
|
||||
let locked = self.inner.read().unwrap();
|
||||
locked
|
||||
@@ -8768,15 +8770,17 @@ impl Service {
|
||||
continue;
|
||||
}
|
||||
sks.push(sk_info.clone());
|
||||
if sks.len() == 3 {
|
||||
if sks.len() == wanted_count {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if sks.len() == 3 {
|
||||
if sks.len() == wanted_count {
|
||||
Ok(sks)
|
||||
} else {
|
||||
Err(ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"couldn't find three safekeepers in different AZs for new timeline"
|
||||
"couldn't find {wanted_count} safekeepers in different AZs for new timeline (found: {}, total active: {})",
|
||||
sks.len(),
|
||||
all_safekeepers.len(),
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1321,6 +1321,28 @@ class NeonEnv:
|
||||
for f in futs:
|
||||
f.result()
|
||||
|
||||
# Last step: register safekeepers at the storage controller
|
||||
if (
|
||||
self.storage_controller_config is not None
|
||||
and self.storage_controller_config.get("timelines_onto_safekeepers") is True
|
||||
):
|
||||
for sk_id, sk in enumerate(self.safekeepers):
|
||||
body = {
|
||||
"id": sk_id,
|
||||
"created_at": "2023-10-25T09:11:25Z",
|
||||
"updated_at": "2024-08-28T11:32:43Z",
|
||||
"region_id": "aws-us-east-2",
|
||||
"host": "127.0.0.1",
|
||||
"port": sk.port.pg,
|
||||
"http_port": sk.port.http,
|
||||
"https_port": None,
|
||||
"version": 5957,
|
||||
"availability_zone_id": f"us-east-2b-{sk_id}",
|
||||
}
|
||||
|
||||
self.storage_controller.on_safekeeper_deploy(sk_id, body)
|
||||
self.storage_controller.safekeeper_scheduling_policy(sk_id, "Active")
|
||||
|
||||
def stop(self, immediate=False, ps_assert_metric_no_errors=False, fail_on_endpoint_errors=True):
|
||||
"""
|
||||
After this method returns, there should be no child processes running.
|
||||
|
||||
@@ -2039,6 +2039,29 @@ def test_explicit_timeline_creation(neon_env_builder: NeonEnvBuilder):
|
||||
ep.safe_psql("CREATE TABLE IF NOT EXISTS t(key int, value text)")
|
||||
|
||||
|
||||
def test_explicit_timeline_creation_storcon(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Test that having neon.safekeepers starting with g#n: with non zero n enables
|
||||
generations, which as a side effect disables automatic timeline creation.
|
||||
Like test_explicit_timeline_creation, but asks the storcon to
|
||||
create membership conf & timeline.
|
||||
"""
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
neon_env_builder.storage_controller_config = {
|
||||
"timelines_onto_safekeepers": True,
|
||||
}
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
config_lines = [
|
||||
"neon.safekeeper_proto_version = 3",
|
||||
]
|
||||
ep = env.endpoints.create("main", config_lines=config_lines)
|
||||
|
||||
# endpoint should start.
|
||||
ep.start(safekeeper_generation=1, safekeepers=[1, 2, 3])
|
||||
ep.safe_psql("CREATE TABLE IF NOT EXISTS t(key int, value text)")
|
||||
|
||||
|
||||
# In this test we check for excessive START_REPLICATION and START_WAL_PUSH queries
|
||||
# when compute is active, but there are no writes to the timeline. In that case
|
||||
# pageserver should maintain a single connection to safekeeper and don't attempt
|
||||
|
||||
Reference in New Issue
Block a user