Compare commits

..

69 Commits

Author SHA1 Message Date
a-masterov
f26987deef Merge branch 'main' into amasterov/random-ops-add-snapshots 2025-07-31 12:10:43 +02:00
Alexey Masterov
7c2022c1b5 Modify weights 2025-07-23 15:54:39 +02:00
Alexey Masterov
c233deb1c2 Rename a method for consistency 2025-07-23 15:38:59 +02:00
Alexey Masterov
d550b67c5f Reword the comment 2025-07-23 15:27:36 +02:00
Alexey Masterov
2ca2b05ab5 Remove the redundant condition 2025-07-23 15:21:21 +02:00
Alexey Masterov
5e1057b860 Return sleep before retry 2025-07-23 15:17:01 +02:00
Alexey Masterov
bb6127f495 Update the comment 2025-07-23 15:16:02 +02:00
a-masterov
a41c00e7c1 Merge branch 'main' into amasterov/random-ops-add-snapshots 2025-07-23 15:15:27 +02:00
Alexey Masterov
76832488d0 Add a log with the new branch 2025-07-23 15:09:02 +02:00
Alexey Masterov
3ce2c15c10 Cleanup 2025-07-23 14:30:55 +02:00
Alexey Masterov
1c3f49e231 fix restored_from 2025-07-23 12:57:15 +02:00
Alexey Masterov
b982cf6c84 format 2025-07-23 12:18:27 +02:00
Alexey Masterov
b1b23cdc8e Do not mark a leaf a branch without a parent 2025-07-23 12:10:56 +02:00
Alexey Masterov
556e9cb781 Merge branch 'main' into amasterov/random-ops-add-snapshots
# Conflicts:
#	test_runner/random_ops/test_random_ops.py
2025-07-23 10:58:56 +02:00
Alexey Masterov
8edea1dea3 Do not consider restored branches as leaf ones 2025-07-22 13:41:09 +02:00
Alexey Masterov
f5a553a8e5 Add debug 2025-07-22 11:38:09 +02:00
Alexey Masterov
7423c393c6 Remove parent_id from the restored branch 2025-07-22 11:04:08 +02:00
Alexey Masterov
c3a7158e62 Add debug 2025-07-22 10:43:28 +02:00
Alexey Masterov
848dcd7540 Add debug 2025-07-21 17:00:15 +02:00
Alexey Masterov
783dfe3cce refactoring 2025-07-21 16:05:56 +02:00
Alexey Masterov
cdc2ea110f Cleanup 2025-07-18 16:38:25 +02:00
Alexey Masterov
c7e1183da4 Cleanup 2025-07-18 16:37:39 +02:00
Alexey Masterov
6763925a4d Run all the operations 2025-07-18 16:32:07 +02:00
Alexey Masterov
3bcdbe30f1 Avoid to manipulate restored snapshots 2025-07-18 16:09:46 +02:00
Alexey Masterov
22975426b7 10x more wait 2025-07-18 15:17:29 +02:00
Alexey Masterov
31c6f66a49 Wait before delete 2025-07-18 15:08:45 +02:00
Alexey Masterov
287e01fdf9 retry more 2025-07-18 15:06:52 +02:00
Alexey Masterov
91c81cc5e5 refactor 2025-07-18 14:52:39 +02:00
Alexey Masterov
a8354b0aa3 Delete projects 2025-07-18 14:44:26 +02:00
Alexey Masterov
1102e2aff0 Add connect_env 2025-07-18 14:42:28 +02:00
Alexey Masterov
f6a61c9492 Add commit 2025-07-18 14:08:15 +02:00
Alexey Masterov
cbf8e248fc Do not delete project after failure (debug only, do not merge!) 2025-07-18 09:39:04 +02:00
Alexey Masterov
f0f30076cc Do not delete project after failure (debug only, do not merge!) 2025-07-18 09:35:26 +02:00
Alexey Masterov
42544cf145 Add debug 2025-07-17 19:57:20 +02:00
Alexey Masterov
28b25092ad An attempt 5 2025-07-17 19:49:49 +02:00
Alexey Masterov
b77a1fae04 An attempt 4 2025-07-17 18:58:00 +02:00
Alexey Masterov
73ed7ade70 An attempt 3 2025-07-17 18:53:09 +02:00
Alexey Masterov
74626b94a8 An attempt 2 2025-07-17 18:48:44 +02:00
Alexey Masterov
4ca6d8cecf An attempt 2025-07-17 18:32:57 +02:00
Alexey Masterov
bf0be50df9 Add debug 2025-07-17 15:06:53 +02:00
Alexey Masterov
1adc95758e add the database 2025-07-17 14:52:27 +02:00
Alexey Masterov
03e994f9c7 Connection parameters 2025-07-17 14:40:25 +02:00
Alexey Masterov
f0671c996e Debug 2025-07-17 14:27:09 +02:00
Alexey Masterov
829cb5fe59 use connection parameters instead of connect URI 2025-07-17 14:25:53 +02:00
Alexey Masterov
561083524d finalize restore by default 2025-07-17 13:58:09 +02:00
Alexey Masterov
009303e31f Connect to the target branch, not the main one 2025-07-17 13:44:19 +02:00
Alexey Masterov
0e42cac589 Add debug 2025-07-17 12:48:08 +02:00
Alexey Masterov
f5cebcaf6a Wait for the snapshot to complete 2025-07-17 12:34:43 +02:00
Alexey Masterov
5861d0f9b2 Add the environment 2025-07-17 12:01:50 +02:00
Alexey Masterov
dbedf11191 Add check for snapshot sanity 2025-07-17 11:50:30 +02:00
Alexey Masterov
1e20c4f2b2 format 2025-07-16 13:26:02 +02:00
Alexey Masterov
018f95115a Retry on 423 error "snapshot is in transition" 2025-07-16 13:21:32 +02:00
Alexey Masterov
f222256225 Added a documentation for the new methods 2025-07-15 17:07:07 +02:00
Alexey Masterov
17b5f5e090 Merge remote-tracking branch 'origin/amasterov/random-ops-add' into amasterov/random-ops-add 2025-07-15 16:35:48 +02:00
Alexey Masterov
9bf5d69c01 Cleanup 2025-07-15 16:35:16 +02:00
a-masterov
f816b3d90e Merge branch 'main' into amasterov/random-ops-add 2025-07-15 16:20:14 +02:00
Alexey Masterov
1ec1a82d3d Start benchmark 2025-07-15 15:16:45 +02:00
Alexey Masterov
e97c1d2684 Fix the parameter error 2025-07-15 14:49:27 +02:00
a-masterov
94cfd3f22e Merge branch 'main' into amasterov/random-ops-add 2025-07-15 12:09:32 +02:00
Alexey Masterov
f45ea8fe6b Add snapshots 2025-07-15 12:08:38 +02:00
Alexey Masterov
1443ba65d3 Add reset_to_parent 2025-07-15 12:08:38 +02:00
Alexey Masterov
185f4de0fe refactoring 2025-07-09 19:22:27 +02:00
Alexey Masterov
efb08f82cd Merge branch 'main' into amasterov/random-ops-add
# Conflicts:
#	test_runner/random_ops/test_random_ops.py
2025-07-09 18:59:43 +02:00
Alexey Masterov
c31563f551 formatting 2025-07-09 14:54:13 +02:00
Alexey Masterov
fd6c2cba01 Merge branch 'amasterov/workaround-branch-not-found-problem' into amasterov/random-ops-add
# Conflicts:
#	test_runner/random_ops/test_random_ops.py
2025-07-09 14:53:05 +02:00
Alexey Masterov
899f4a1e77 remove the duplicate log message 2025-07-09 13:58:13 +02:00
Alexey Masterov
e95fcfa0d5 Add retrying if parent branch is not found.
Add clarification and a link to Jira issue.
2025-07-09 13:54:54 +02:00
a-masterov
0ccc649299 Merge branch 'main' into amasterov/random-ops-add 2025-07-08 11:34:39 +02:00
Alexey Masterov
fe2abf3531 fix a mypy discovered error 2025-06-12 10:48:04 +02:00
20 changed files with 819 additions and 977 deletions

View File

@@ -1,99 +0,0 @@
name: verify runner performance with sysbench
on:
# uncomment to run on push for debugging your PR
push:
branches: [ 'bodobolero/sysbench_4_perf_runner' ]
workflow_dispatch:
inputs:
runner_labels_json:
description: JSON array of runner labels to test (e.g. ["small-amd64","large-amd64"])
required: false
default: '["unit-perf-aws-arm"]'
defaults:
run:
shell: bash -euxo pipefail {0}
concurrency:
group: sysbench-runner-perf
cancel-in-progress: true
permissions:
contents: read
jobs:
sysbench:
strategy:
fail-fast: false
matrix:
runner: ${{ fromJSON((github.event_name == 'workflow_dispatch' && inputs.runner_labels_json != '' && inputs.runner_labels_json) || '["unit-perf-aws-arm"]') }}
permissions:
id-token: write # aws-actions/configure-aws-credentials
statuses: write
contents: write
pull-requests: write
runs-on: ${{ matrix.runner }}
timeout-minutes: 120
container:
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
# for changed limits, see comments on `options:` earlier in this file
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864 --ulimit nofile=65536:65536 --security-opt seccomp=unconfined
steps:
- name: Checkout sysbench source
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
repository: akopytov/sysbench
ref: master
path: sysbench
- name: Build sysbench
run: |
cd "$GITHUB_WORKSPACE/sysbench"
./autogen.sh
./configure --without-mysql
make -j"$(nproc || sysctl -n hw.ncpu || echo 2)"
./src/sysbench --version
- name: sysbench io prepare
run: |
"$GITHUB_WORKSPACE/sysbench/src/sysbench" fileio \
--file-total-size=2G \
--file-test-mode=rndrw \
--file-extra-flags=direct \
--file-fsync-freq=0 \
--threads=4 \
--time=60 prepare
- name: sysbench io run
run: |
"$GITHUB_WORKSPACE/sysbench/src/sysbench" fileio \
--file-total-size=2G \
--file-test-mode=rndrw \
--file-extra-flags=direct \
--file-fsync-freq=0 \
--threads=4 \
--time=60 run
- name: sysbench cpu
run: |
"$GITHUB_WORKSPACE/sysbench/src/sysbench" cpu \
--cpu-max-prime=200000 \
--threads=8 \
--time=60 run
- name: sysbench memory
run: |
"$GITHUB_WORKSPACE/sysbench/src/sysbench" memory \
--memory-block-size=1M \
--memory-total-size=0 \
--threads=8 \
--time=60 \
--memory-oper=write \
run

View File

@@ -2780,7 +2780,7 @@ LIMIT 100",
// 4. We start again and try to prewarm with the state from 2. instead of the previous complete state
if matches!(
prewarm_state,
LfcPrewarmState::Completed { .. }
LfcPrewarmState::Completed
| LfcPrewarmState::NotPrewarmed
| LfcPrewarmState::Skipped
) {

View File

@@ -7,11 +7,19 @@ use http::StatusCode;
use reqwest::Client;
use std::mem::replace;
use std::sync::Arc;
use std::time::Instant;
use tokio::{io::AsyncReadExt, select, spawn};
use tokio_util::sync::CancellationToken;
use tracing::{error, info};
#[derive(serde::Serialize, Default)]
pub struct LfcPrewarmStateWithProgress {
#[serde(flatten)]
base: LfcPrewarmState,
total: i32,
prewarmed: i32,
skipped: i32,
}
/// A pair of url and a token to query endpoint storage for LFC prewarm-related tasks
struct EndpointStoragePair {
url: String,
@@ -20,7 +28,7 @@ struct EndpointStoragePair {
const KEY: &str = "lfc_state";
impl EndpointStoragePair {
/// endpoint_id is set to None while prewarming from other endpoint, see compute_promote.rs
/// endpoint_id is set to None while prewarming from other endpoint, see replica promotion
/// If not None, takes precedence over pspec.spec.endpoint_id
fn from_spec_and_endpoint(
pspec: &crate::compute::ParsedSpec,
@@ -46,8 +54,36 @@ impl EndpointStoragePair {
}
impl ComputeNode {
pub async fn lfc_prewarm_state(&self) -> LfcPrewarmState {
self.state.lock().unwrap().lfc_prewarm_state.clone()
// If prewarm failed, we want to get overall number of segments as well as done ones.
// However, this function should be reliable even if querying postgres failed.
pub async fn lfc_prewarm_state(&self) -> LfcPrewarmStateWithProgress {
info!("requesting LFC prewarm state from postgres");
let mut state = LfcPrewarmStateWithProgress::default();
{
state.base = self.state.lock().unwrap().lfc_prewarm_state.clone();
}
let client = match ComputeNode::get_maintenance_client(&self.tokio_conn_conf).await {
Ok(client) => client,
Err(err) => {
error!(%err, "connecting to postgres");
return state;
}
};
let row = match client
.query_one("select * from neon.get_prewarm_info()", &[])
.await
{
Ok(row) => row,
Err(err) => {
error!(%err, "querying LFC prewarm status");
return state;
}
};
state.total = row.try_get(0).unwrap_or_default();
state.prewarmed = row.try_get(1).unwrap_or_default();
state.skipped = row.try_get(2).unwrap_or_default();
state
}
pub fn lfc_offload_state(&self) -> LfcOffloadState {
@@ -97,6 +133,7 @@ impl ComputeNode {
}
/// Request LFC state from endpoint storage and load corresponding pages into Postgres.
/// Returns a result with `false` if the LFC state is not found in endpoint storage.
async fn prewarm_impl(
&self,
from_endpoint: Option<String>,
@@ -111,7 +148,6 @@ impl ComputeNode {
fail::fail_point!("compute-prewarm", |_| bail!("compute-prewarm failpoint"));
info!(%url, "requesting LFC state from endpoint storage");
let mut now = Instant::now();
let request = Client::new().get(&url).bearer_auth(storage_token);
let response = select! {
_ = token.cancelled() => return Ok(LfcPrewarmState::Cancelled),
@@ -124,8 +160,6 @@ impl ComputeNode {
StatusCode::NOT_FOUND => return Ok(LfcPrewarmState::Skipped),
status => bail!("{status} querying endpoint storage"),
}
let state_download_time_ms = now.elapsed().as_millis() as u32;
now = Instant::now();
let mut uncompressed = Vec::new();
let lfc_state = select! {
@@ -140,8 +174,6 @@ impl ComputeNode {
read = decoder.read_to_end(&mut uncompressed) => read
}
.context("decoding LFC state")?;
let uncompress_time_ms = now.elapsed().as_millis() as u32;
now = Instant::now();
let uncompressed_len = uncompressed.len();
info!(%url, "downloaded LFC state, uncompressed size {uncompressed_len}");
@@ -164,34 +196,15 @@ impl ComputeNode {
}
.context("loading LFC state into postgres")
.map(|_| ())?;
let prewarm_time_ms = now.elapsed().as_millis() as u32;
let row = client
.query_one("select * from neon.get_prewarm_info()", &[])
.await
.context("querying prewarm info")?;
let total = row.try_get(0).unwrap_or_default();
let prewarmed = row.try_get(1).unwrap_or_default();
let skipped = row.try_get(2).unwrap_or_default();
Ok(LfcPrewarmState::Completed {
total,
prewarmed,
skipped,
state_download_time_ms,
uncompress_time_ms,
prewarm_time_ms,
})
Ok(LfcPrewarmState::Completed)
}
/// If offload request is ongoing, return false, true otherwise
pub fn offload_lfc(self: &Arc<Self>) -> bool {
{
let state = &mut self.state.lock().unwrap().lfc_offload_state;
if matches!(
replace(state, LfcOffloadState::Offloading),
LfcOffloadState::Offloading
) {
if replace(state, LfcOffloadState::Offloading) == LfcOffloadState::Offloading {
return false;
}
}
@@ -203,10 +216,7 @@ impl ComputeNode {
pub async fn offload_lfc_async(self: &Arc<Self>) {
{
let state = &mut self.state.lock().unwrap().lfc_offload_state;
if matches!(
replace(state, LfcOffloadState::Offloading),
LfcOffloadState::Offloading
) {
if replace(state, LfcOffloadState::Offloading) == LfcOffloadState::Offloading {
return;
}
}
@@ -224,6 +234,7 @@ impl ComputeNode {
LfcOffloadState::Failed { error }
}
};
self.state.lock().unwrap().lfc_offload_state = state;
}
@@ -231,7 +242,6 @@ impl ComputeNode {
let EndpointStoragePair { url, token } = self.endpoint_storage_pair(None)?;
info!(%url, "requesting LFC state from Postgres");
let mut now = Instant::now();
let row = ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
.await
.context("connecting to postgres")?
@@ -245,36 +255,25 @@ impl ComputeNode {
info!(%url, "empty LFC state, not exporting");
return Ok(LfcOffloadState::Skipped);
};
let state_query_time_ms = now.elapsed().as_millis() as u32;
now = Instant::now();
let mut compressed = Vec::new();
ZstdEncoder::new(state)
.read_to_end(&mut compressed)
.await
.context("compressing LFC state")?;
let compress_time_ms = now.elapsed().as_millis() as u32;
now = Instant::now();
let compressed_len = compressed.len();
info!(%url, "downloaded LFC state, compressed size {compressed_len}");
info!(%url, "downloaded LFC state, compressed size {compressed_len}, writing to endpoint storage");
let request = Client::new().put(url).bearer_auth(token).body(compressed);
let response = request
.send()
.await
.context("writing to endpoint storage")?;
let state_upload_time_ms = now.elapsed().as_millis() as u32;
let status = response.status();
if status != StatusCode::OK {
bail!("request to endpoint storage failed: {status}");
match request.send().await {
Ok(res) if res.status() == StatusCode::OK => Ok(LfcOffloadState::Completed),
Ok(res) => bail!(
"Request to endpoint storage failed with status: {}",
res.status()
),
Err(err) => Err(err).context("writing to endpoint storage"),
}
Ok(LfcOffloadState::Completed {
compress_time_ms,
state_query_time_ms,
state_upload_time_ms,
})
}
pub fn cancel_prewarm(self: &Arc<Self>) {

View File

@@ -1,24 +1,32 @@
use crate::compute::ComputeNode;
use anyhow::{Context, bail};
use anyhow::{Context, Result, bail};
use compute_api::responses::{LfcPrewarmState, PromoteConfig, PromoteState};
use std::time::Instant;
use compute_api::spec::ComputeMode;
use itertools::Itertools;
use std::collections::HashMap;
use std::{sync::Arc, time::Duration};
use tokio::time::sleep;
use tracing::info;
use utils::lsn::Lsn;
impl ComputeNode {
/// Returns only when promote fails or succeeds. If http client calling this function
/// disconnects, this does not stop promotion, and subsequent calls block until promote finishes.
/// Returns only when promote fails or succeeds. If a network error occurs
/// and http client disconnects, this does not stop promotion, and subsequent
/// calls block until promote finishes.
/// Called by control plane on secondary after primary endpoint is terminated
/// Has a failpoint "compute-promotion"
pub async fn promote(self: &std::sync::Arc<Self>, cfg: PromoteConfig) -> PromoteState {
let this = self.clone();
let promote_fn = async move || match this.promote_impl(cfg).await {
Ok(state) => state,
Err(err) => {
tracing::error!(%err, "promoting replica");
let error = format!("{err:#}");
PromoteState::Failed { error }
pub async fn promote(self: &Arc<Self>, cfg: PromoteConfig) -> PromoteState {
let cloned = self.clone();
let promote_fn = async move || {
let Err(err) = cloned.promote_impl(cfg).await else {
return PromoteState::Completed;
};
tracing::error!(%err, "promoting");
PromoteState::Failed {
error: format!("{err:#}"),
}
};
let start_promotion = || {
let (tx, rx) = tokio::sync::watch::channel(PromoteState::NotPromoted);
tokio::spawn(async move { tx.send(promote_fn().await) });
@@ -26,31 +34,36 @@ impl ComputeNode {
};
let mut task;
// promote_impl locks self.state so we need to unlock it before calling task.changed()
// self.state is unlocked after block ends so we lock it in promote_impl
// and task.changed() is reached
{
let promote_state = &mut self.state.lock().unwrap().promote_state;
task = promote_state.get_or_insert_with(start_promotion).clone()
}
if task.changed().await.is_err() {
let error = "promote sender dropped".to_string();
return PromoteState::Failed { error };
task = self
.state
.lock()
.unwrap()
.promote_state
.get_or_insert_with(start_promotion)
.clone()
}
task.changed().await.expect("promote sender dropped");
task.borrow().clone()
}
async fn promote_impl(&self, cfg: PromoteConfig) -> anyhow::Result<PromoteState> {
async fn promote_impl(&self, mut cfg: PromoteConfig) -> Result<()> {
{
let state = self.state.lock().unwrap();
let mode = &state.pspec.as_ref().unwrap().spec.mode;
if *mode != compute_api::spec::ComputeMode::Replica {
bail!("compute mode \"{}\" is not replica", mode.to_type_str());
if *mode != ComputeMode::Replica {
bail!("{} is not replica", mode.to_type_str());
}
// we don't need to query Postgres so not self.lfc_prewarm_state()
match &state.lfc_prewarm_state {
status @ (LfcPrewarmState::NotPrewarmed | LfcPrewarmState::Prewarming) => {
bail!("compute {status}")
LfcPrewarmState::NotPrewarmed | LfcPrewarmState::Prewarming => {
bail!("prewarm not requested or pending")
}
LfcPrewarmState::Failed { error } => {
tracing::warn!(%error, "compute prewarm failed")
tracing::warn!(%error, "replica prewarm failed")
}
_ => {}
}
@@ -59,10 +72,9 @@ impl ComputeNode {
let client = ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
.await
.context("connecting to postgres")?;
let mut now = Instant::now();
let primary_lsn = cfg.wal_flush_lsn;
let mut standby_lsn = utils::lsn::Lsn::INVALID;
let mut last_wal_replay_lsn: Lsn = Lsn::INVALID;
const RETRIES: i32 = 20;
for i in 0..=RETRIES {
let row = client
@@ -70,18 +82,16 @@ impl ComputeNode {
.await
.context("getting last replay lsn")?;
let lsn: u64 = row.get::<usize, postgres_types::PgLsn>(0).into();
standby_lsn = lsn.into();
if standby_lsn >= primary_lsn {
last_wal_replay_lsn = lsn.into();
if last_wal_replay_lsn >= primary_lsn {
break;
}
info!(%standby_lsn, %primary_lsn, "catching up, try {i}");
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
info!("Try {i}, replica lsn {last_wal_replay_lsn}, primary lsn {primary_lsn}");
sleep(Duration::from_secs(1)).await;
}
if standby_lsn < primary_lsn {
if last_wal_replay_lsn < primary_lsn {
bail!("didn't catch up with primary in {RETRIES} retries");
}
let lsn_wait_time_ms = now.elapsed().as_millis() as u32;
now = Instant::now();
// using $1 doesn't work with ALTER SYSTEM SET
let safekeepers_sql = format!(
@@ -92,33 +102,27 @@ impl ComputeNode {
.query(&safekeepers_sql, &[])
.await
.context("setting safekeepers")?;
client
.query(
"ALTER SYSTEM SET synchronous_standby_names=walproposer",
&[],
)
.await
.context("setting synchronous_standby_names")?;
client
.query("SELECT pg_catalog.pg_reload_conf()", &[])
.await
.context("reloading postgres config")?;
#[cfg(feature = "testing")]
fail::fail_point!("compute-promotion", |_| bail!(
"compute-promotion failpoint"
));
fail::fail_point!("compute-promotion", |_| {
bail!("promotion configured to fail because of a failpoint")
});
let row = client
.query_one("SELECT * FROM pg_catalog.pg_promote()", &[])
.await
.context("pg_promote")?;
if !row.get::<usize, bool>(0) {
bail!("pg_promote() failed");
bail!("pg_promote() returned false");
}
let pg_promote_time_ms = now.elapsed().as_millis() as u32;
let now = Instant::now();
let client = ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
.await
.context("connecting to postgres")?;
let row = client
.query_one("SHOW transaction_read_only", &[])
.await
@@ -127,47 +131,36 @@ impl ComputeNode {
bail!("replica in read only mode after promotion");
}
// Already checked validity in http handler
#[allow(unused_mut)]
let mut new_pspec = crate::compute::ParsedSpec::try_from(cfg.spec).expect("invalid spec");
{
let mut state = self.state.lock().unwrap();
// Local setup has different ports for pg process (port=) for primary and secondary.
// Primary is stopped so we need secondary's "port" value
#[cfg(feature = "testing")]
{
let old_spec = &state.pspec.as_ref().unwrap().spec;
let Some(old_conf) = old_spec.cluster.postgresql_conf.as_ref() else {
bail!("pspec.spec.cluster.postgresql_conf missing for endpoint");
};
let set: std::collections::HashMap<&str, &str> = old_conf
.split_terminator('\n')
.map(|e| e.split_once("=").expect("invalid item"))
.collect();
let Some(new_conf) = new_pspec.spec.cluster.postgresql_conf.as_mut() else {
bail!("pspec.spec.cluster.postgresql_conf missing for supplied config");
};
new_conf.push_str(&format!("port={}\n", set["port"]));
}
tracing::debug!("applied spec: {:#?}", new_pspec.spec);
if self.params.lakebase_mode {
ComputeNode::set_spec(&self.params, &mut state, new_pspec);
} else {
state.pspec = Some(new_pspec);
}
let spec = &mut state.pspec.as_mut().unwrap().spec;
spec.mode = ComputeMode::Primary;
let new_conf = cfg.spec.cluster.postgresql_conf.as_mut().unwrap();
let existing_conf = spec.cluster.postgresql_conf.as_ref().unwrap();
Self::merge_spec(new_conf, existing_conf);
}
info!("applied new spec, reconfiguring as primary");
self.reconfigure()?;
let reconfigure_time_ms = now.elapsed().as_millis() as u32;
self.reconfigure()
}
Ok(PromoteState::Completed {
lsn_wait_time_ms,
pg_promote_time_ms,
reconfigure_time_ms,
})
/// Merge old and new Postgres conf specs to apply on secondary.
/// Change new spec's port and safekeepers since they are supplied
/// differenly
fn merge_spec(new_conf: &mut String, existing_conf: &str) {
let mut new_conf_set: HashMap<&str, &str> = new_conf
.split_terminator('\n')
.map(|e| e.split_once("=").expect("invalid item"))
.collect();
new_conf_set.remove("neon.safekeepers");
let existing_conf_set: HashMap<&str, &str> = existing_conf
.split_terminator('\n')
.map(|e| e.split_once("=").expect("invalid item"))
.collect();
new_conf_set.insert("port", existing_conf_set["port"]);
*new_conf = new_conf_set
.iter()
.map(|(k, v)| format!("{k}={v}"))
.join("\n");
}
}

View File

@@ -65,19 +65,14 @@ pub fn write_postgres_conf(
writeln!(file, "{conf}")?;
}
// Stripe size GUC should be defined prior to connection string
if let Some(stripe_size) = spec.shard_stripe_size {
writeln!(file, "neon.stripe_size={stripe_size}")?;
}
// Add options for connecting to storage
writeln!(file, "# Neon storage settings")?;
writeln!(file)?;
if let Some(conninfo) = &spec.pageserver_connection_info {
// Stripe size GUC should be defined prior to connection string
if let Some(stripe_size) = conninfo.stripe_size {
writeln!(
file,
"# from compute spec's pageserver_connection_info.stripe_size field"
)?;
writeln!(file, "neon.stripe_size={stripe_size}")?;
}
let mut libpq_urls: Option<Vec<String>> = Some(Vec::new());
let num_shards = if conninfo.shard_count.0 == 0 {
1 // unsharded, treat it as a single shard
@@ -115,7 +110,7 @@ pub fn write_postgres_conf(
if let Some(libpq_urls) = libpq_urls {
writeln!(
file,
"# derived from compute spec's pageserver_connection_info field"
"# derived from compute spec's pageserver_conninfo field"
)?;
writeln!(
file,
@@ -125,16 +120,24 @@ pub fn write_postgres_conf(
} else {
writeln!(file, "# no neon.pageserver_connstring")?;
}
} else {
// Stripe size GUC should be defined prior to connection string
if let Some(stripe_size) = spec.shard_stripe_size {
writeln!(file, "# from compute spec's shard_stripe_size field")?;
if let Some(stripe_size) = conninfo.stripe_size {
writeln!(
file,
"# from compute spec's pageserver_conninfo.stripe_size field"
)?;
writeln!(file, "neon.stripe_size={stripe_size}")?;
}
} else {
if let Some(s) = &spec.pageserver_connstring {
writeln!(file, "# from compute spec's pageserver_connstring field")?;
writeln!(file, "neon.pageserver_connstring={}", escape_conf_value(s))?;
}
if let Some(stripe_size) = spec.shard_stripe_size {
writeln!(file, "# from compute spec's shard_stripe_size field")?;
writeln!(file, "neon.stripe_size={stripe_size}")?;
}
}
if !spec.safekeeper_connstrings.is_empty() {

View File

@@ -617,6 +617,9 @@ components:
type: object
required:
- status
- total
- prewarmed
- skipped
properties:
status:
description: LFC prewarm status
@@ -634,15 +637,6 @@ components:
skipped:
description: Pages processed but not prewarmed
type: integer
state_download_time_ms:
description: Time it takes to download LFC state to compute
type: integer
uncompress_time_ms:
description: Time it takes to uncompress LFC state
type: integer
prewarm_time_ms:
description: Time it takes to prewarm LFC state in Postgres
type: integer
LfcOffloadState:
type: object
@@ -656,16 +650,6 @@ components:
error:
description: LFC offload error, if any
type: string
state_query_time_ms:
description: Time it takes to get LFC state from Postgres
type: integer
compress_time_ms:
description: Time it takes to compress LFC state
type: integer
state_upload_time_ms:
description: Time it takes to upload LFC state to endpoint storage
type: integer
PromoteState:
type: object
@@ -679,15 +663,6 @@ components:
error:
description: Promote error, if any
type: string
lsn_wait_time_ms:
description: Time it takes for secondary to catch up with primary WAL flush LSN
type: integer
pg_promote_time_ms:
description: Time it takes to call pg_promote on secondary
type: integer
reconfigure_time_ms:
description: Time it takes to reconfigure promoted secondary
type: integer
SetRoleGrantsRequest:
type: object

View File

@@ -1,11 +1,12 @@
use crate::compute_prewarm::LfcPrewarmStateWithProgress;
use crate::http::JsonResponse;
use axum::response::{IntoResponse, Response};
use axum::{Json, http::StatusCode};
use axum_extra::extract::OptionalQuery;
use compute_api::responses::{LfcOffloadState, LfcPrewarmState};
use compute_api::responses::LfcOffloadState;
type Compute = axum::extract::State<std::sync::Arc<crate::compute::ComputeNode>>;
pub(in crate::http) async fn prewarm_state(compute: Compute) -> Json<LfcPrewarmState> {
pub(in crate::http) async fn prewarm_state(compute: Compute) -> Json<LfcPrewarmStateWithProgress> {
Json(compute.lfc_prewarm_state().await)
}

View File

@@ -1,22 +1,11 @@
use crate::http::JsonResponse;
use axum::extract::Json;
use compute_api::responses::PromoteConfig;
use http::StatusCode;
pub(in crate::http) async fn promote(
compute: axum::extract::State<std::sync::Arc<crate::compute::ComputeNode>>,
Json(cfg): Json<PromoteConfig>,
Json(cfg): Json<compute_api::responses::PromoteConfig>,
) -> axum::response::Response {
// Return early at the cost of extra parsing spec
let pspec = match crate::compute::ParsedSpec::try_from(cfg.spec) {
Ok(p) => p,
Err(e) => return JsonResponse::error(StatusCode::BAD_REQUEST, e),
};
let cfg = PromoteConfig {
spec: pspec.spec,
wal_flush_lsn: cfg.wal_flush_lsn,
};
let state = compute.promote(cfg).await;
if let compute_api::responses::PromoteState::Failed { error: _ } = state {
return JsonResponse::create_response(StatusCode::INTERNAL_SERVER_ERROR, state);

View File

@@ -0,0 +1,13 @@
DO $$
DECLARE
query varchar;
BEGIN
FOR query IN
SELECT pg_catalog.format('ALTER FUNCTION %I(%s) OWNER TO {db_owner};', p.oid::regproc, pg_catalog.pg_get_function_identity_arguments(p.oid))
FROM pg_catalog.pg_proc p
WHERE p.pronamespace OPERATOR(pg_catalog.=) 'anon'::regnamespace::oid
LOOP
EXECUTE query;
END LOOP;
END
$$;

View File

@@ -71,9 +71,8 @@ const DEFAULT_PG_VERSION_NUM: &str = "17";
const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/";
/// Neon CLI.
#[derive(clap::Parser)]
#[command(version = GIT_VERSION, name = "Neon CLI")]
#[command(version = GIT_VERSION, about, name = "Neon CLI")]
struct Cli {
#[command(subcommand)]
command: NeonLocalCmd,
@@ -108,31 +107,30 @@ enum NeonLocalCmd {
Stop(StopCmdArgs),
}
/// Initialize a new Neon repository, preparing configs for services to start with.
#[derive(clap::Args)]
#[clap(about = "Initialize a new Neon repository, preparing configs for services to start with")]
struct InitCmdArgs {
/// How many pageservers to create (default 1).
#[clap(long)]
#[clap(long, help("How many pageservers to create (default 1)"))]
num_pageservers: Option<u16>,
#[clap(long)]
config: Option<PathBuf>,
/// Force initialization even if the repository is not empty.
#[clap(long, default_value = "must-not-exist")]
#[clap(long, help("Force initialization even if the repository is not empty"))]
#[arg(value_parser)]
#[clap(default_value = "must-not-exist")]
force: InitForceMode,
}
/// Start pageserver and safekeepers.
#[derive(clap::Args)]
#[clap(about = "Start pageserver and safekeepers")]
struct StartCmdArgs {
#[clap(long = "start-timeout", default_value = "10s")]
timeout: humantime::Duration,
}
/// Stop pageserver and safekeepers.
#[derive(clap::Args)]
#[clap(about = "Stop pageserver and safekeepers")]
struct StopCmdArgs {
#[arg(value_enum)]
#[clap(long, default_value_t = StopMode::Fast)]
@@ -145,8 +143,8 @@ enum StopMode {
Immediate,
}
/// Manage tenants.
#[derive(clap::Subcommand)]
#[clap(about = "Manage tenants")]
enum TenantCmd {
List,
Create(TenantCreateCmdArgs),
@@ -157,36 +155,38 @@ enum TenantCmd {
#[derive(clap::Args)]
struct TenantCreateCmdArgs {
/// Tenant ID, as a 32-byte hexadecimal string.
#[clap(long = "tenant-id")]
#[clap(
long = "tenant-id",
help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
)]
tenant_id: Option<TenantId>,
/// Use a specific timeline id when creating a tenant and its initial timeline.
#[clap(long)]
#[clap(
long,
help = "Use a specific timeline id when creating a tenant and its initial timeline"
)]
timeline_id: Option<TimelineId>,
#[clap(short = 'c')]
config: Vec<String>,
/// Postgres version to use for the initial timeline.
#[arg(default_value = DEFAULT_PG_VERSION_NUM)]
#[clap(long)]
#[clap(long, help = "Postgres version to use for the initial timeline")]
pg_version: PgMajorVersion,
/// Use this tenant in future CLI commands where tenant_id is needed, but not specified.
#[clap(long)]
#[clap(
long,
help = "Use this tenant in future CLI commands where tenant_id is needed, but not specified"
)]
set_default: bool,
/// Number of shards in the new tenant.
#[clap(long)]
#[clap(long, help = "Number of shards in the new tenant")]
#[arg(default_value_t = 0)]
shard_count: u8,
/// Sharding stripe size in pages.
#[clap(long)]
#[clap(long, help = "Sharding stripe size in pages")]
shard_stripe_size: Option<u32>,
/// Placement policy shards in this tenant.
#[clap(long)]
#[clap(long, help = "Placement policy shards in this tenant")]
#[arg(value_parser = parse_placement_policy)]
placement_policy: Option<PlacementPolicy>,
}
@@ -195,35 +195,44 @@ fn parse_placement_policy(s: &str) -> anyhow::Result<PlacementPolicy> {
Ok(serde_json::from_str::<PlacementPolicy>(s)?)
}
/// Set a particular tenant as default in future CLI commands where tenant_id is needed, but not
/// specified.
#[derive(clap::Args)]
#[clap(
about = "Set a particular tenant as default in future CLI commands where tenant_id is needed, but not specified"
)]
struct TenantSetDefaultCmdArgs {
/// Tenant ID, as a 32-byte hexadecimal string.
#[clap(long = "tenant-id")]
#[clap(
long = "tenant-id",
help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
)]
tenant_id: TenantId,
}
#[derive(clap::Args)]
struct TenantConfigCmdArgs {
/// Tenant ID, as a 32-byte hexadecimal string.
#[clap(long = "tenant-id")]
#[clap(
long = "tenant-id",
help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
)]
tenant_id: Option<TenantId>,
#[clap(short = 'c')]
config: Vec<String>,
}
/// Import a tenant that is present in remote storage, and create branches for its timelines.
#[derive(clap::Args)]
#[clap(
about = "Import a tenant that is present in remote storage, and create branches for its timelines"
)]
struct TenantImportCmdArgs {
/// Tenant ID, as a 32-byte hexadecimal string.
#[clap(long = "tenant-id")]
#[clap(
long = "tenant-id",
help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
)]
tenant_id: TenantId,
}
/// Manage timelines.
#[derive(clap::Subcommand)]
#[clap(about = "Manage timelines")]
enum TimelineCmd {
List(TimelineListCmdArgs),
Branch(TimelineBranchCmdArgs),
@@ -231,87 +240,98 @@ enum TimelineCmd {
Import(TimelineImportCmdArgs),
}
/// List all timelines available to this pageserver.
#[derive(clap::Args)]
#[clap(about = "List all timelines available to this pageserver")]
struct TimelineListCmdArgs {
/// Tenant ID, as a 32-byte hexadecimal string.
#[clap(long = "tenant-id")]
#[clap(
long = "tenant-id",
help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
)]
tenant_shard_id: Option<TenantShardId>,
}
/// Create a new timeline, branching off from another timeline.
#[derive(clap::Args)]
#[clap(about = "Create a new timeline, branching off from another timeline")]
struct TimelineBranchCmdArgs {
/// Tenant ID, as a 32-byte hexadecimal string.
#[clap(long = "tenant-id")]
#[clap(
long = "tenant-id",
help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
)]
tenant_id: Option<TenantId>,
/// New timeline's ID, as a 32-byte hexadecimal string.
#[clap(long)]
#[clap(long, help = "New timeline's ID")]
timeline_id: Option<TimelineId>,
/// Human-readable alias for the new timeline.
#[clap(long)]
#[clap(long, help = "Human-readable alias for the new timeline")]
branch_name: String,
/// Use last Lsn of another timeline (and its data) as base when creating the new timeline. The
/// timeline gets resolved by its branch name.
#[clap(long)]
#[clap(
long,
help = "Use last Lsn of another timeline (and its data) as base when creating the new timeline. The timeline gets resolved by its branch name."
)]
ancestor_branch_name: Option<String>,
/// When using another timeline as base, use a specific Lsn in it instead of the latest one.
#[clap(long)]
#[clap(
long,
help = "When using another timeline as base, use a specific Lsn in it instead of the latest one"
)]
ancestor_start_lsn: Option<Lsn>,
}
/// Create a new blank timeline.
#[derive(clap::Args)]
#[clap(about = "Create a new blank timeline")]
struct TimelineCreateCmdArgs {
/// Tenant ID, as a 32-byte hexadecimal string.
#[clap(long = "tenant-id")]
#[clap(
long = "tenant-id",
help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
)]
tenant_id: Option<TenantId>,
/// New timeline's ID, as a 32-byte hexadecimal string.
#[clap(long)]
#[clap(long, help = "New timeline's ID")]
timeline_id: Option<TimelineId>,
/// Human-readable alias for the new timeline.
#[clap(long)]
#[clap(long, help = "Human-readable alias for the new timeline")]
branch_name: String,
/// Postgres version.
#[arg(default_value = DEFAULT_PG_VERSION_NUM)]
#[clap(long)]
#[clap(long, help = "Postgres version")]
pg_version: PgMajorVersion,
}
/// Import a timeline from a basebackup directory.
#[derive(clap::Args)]
#[clap(about = "Import timeline from a basebackup directory")]
struct TimelineImportCmdArgs {
/// Tenant ID, as a 32-byte hexadecimal string.
#[clap(long = "tenant-id")]
#[clap(
long = "tenant-id",
help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
)]
tenant_id: Option<TenantId>,
/// New timeline's ID, as a 32-byte hexadecimal string.
#[clap(long)]
#[clap(long, help = "New timeline's ID")]
timeline_id: TimelineId,
/// Human-readable alias for the new timeline.
#[clap(long)]
#[clap(long, help = "Human-readable alias for the new timeline")]
branch_name: String,
/// Basebackup tarfile to import.
#[clap(long)]
#[clap(long, help = "Basebackup tarfile to import")]
base_tarfile: PathBuf,
/// LSN the basebackup starts at.
#[clap(long)]
#[clap(long, help = "Lsn the basebackup starts at")]
base_lsn: Lsn,
/// WAL to add after base.
#[clap(long)]
#[clap(long, help = "Wal to add after base")]
wal_tarfile: Option<PathBuf>,
/// LSN the basebackup ends at.
#[clap(long)]
#[clap(long, help = "Lsn the basebackup ends at")]
end_lsn: Option<Lsn>,
/// Postgres version of the basebackup being imported.
#[arg(default_value = DEFAULT_PG_VERSION_NUM)]
#[clap(long)]
#[clap(long, help = "Postgres version of the backup being imported")]
pg_version: PgMajorVersion,
}
/// Manage pageservers.
#[derive(clap::Subcommand)]
#[clap(about = "Manage pageservers")]
enum PageserverCmd {
Status(PageserverStatusCmdArgs),
Start(PageserverStartCmdArgs),
@@ -319,202 +339,223 @@ enum PageserverCmd {
Restart(PageserverRestartCmdArgs),
}
/// Show status of a local pageserver.
#[derive(clap::Args)]
#[clap(about = "Show status of a local pageserver")]
struct PageserverStatusCmdArgs {
/// Pageserver ID.
#[clap(long = "id")]
#[clap(long = "id", help = "pageserver id")]
pageserver_id: Option<NodeId>,
}
/// Start local pageserver.
#[derive(clap::Args)]
#[clap(about = "Start local pageserver")]
struct PageserverStartCmdArgs {
/// Pageserver ID.
#[clap(long = "id")]
#[clap(long = "id", help = "pageserver id")]
pageserver_id: Option<NodeId>,
/// Timeout until we fail the command.
#[clap(short = 't', long)]
#[clap(short = 't', long, help = "timeout until we fail the command")]
#[arg(default_value = "10s")]
start_timeout: humantime::Duration,
}
/// Stop local pageserver.
#[derive(clap::Args)]
#[clap(about = "Stop local pageserver")]
struct PageserverStopCmdArgs {
/// Pageserver ID.
#[clap(long = "id")]
#[clap(long = "id", help = "pageserver id")]
pageserver_id: Option<NodeId>,
/// If 'immediate', don't flush repository data at shutdown
#[clap(short = 'm')]
#[clap(
short = 'm',
help = "If 'immediate', don't flush repository data at shutdown"
)]
#[arg(value_enum, default_value = "fast")]
stop_mode: StopMode,
}
/// Restart local pageserver.
#[derive(clap::Args)]
#[clap(about = "Restart local pageserver")]
struct PageserverRestartCmdArgs {
/// Pageserver ID.
#[clap(long = "id")]
#[clap(long = "id", help = "pageserver id")]
pageserver_id: Option<NodeId>,
/// Timeout until we fail the command.
#[clap(short = 't', long)]
#[clap(short = 't', long, help = "timeout until we fail the command")]
#[arg(default_value = "10s")]
start_timeout: humantime::Duration,
}
/// Manage storage controller.
#[derive(clap::Subcommand)]
#[clap(about = "Manage storage controller")]
enum StorageControllerCmd {
Start(StorageControllerStartCmdArgs),
Stop(StorageControllerStopCmdArgs),
}
/// Start storage controller.
#[derive(clap::Args)]
#[clap(about = "Start storage controller")]
struct StorageControllerStartCmdArgs {
/// Timeout until we fail the command.
#[clap(short = 't', long)]
#[clap(short = 't', long, help = "timeout until we fail the command")]
#[arg(default_value = "10s")]
start_timeout: humantime::Duration,
/// Identifier used to distinguish storage controller instances.
#[clap(long)]
#[clap(
long,
help = "Identifier used to distinguish storage controller instances"
)]
#[arg(default_value_t = 1)]
instance_id: u8,
/// Base port for the storage controller instance identified by instance-id (defaults to
/// pageserver cplane api).
#[clap(long)]
#[clap(
long,
help = "Base port for the storage controller instance idenfified by instance-id (defaults to pageserver cplane api)"
)]
base_port: Option<u16>,
/// Whether the storage controller should handle pageserver-reported local disk loss events.
#[clap(long)]
#[clap(
long,
help = "Whether the storage controller should handle pageserver-reported local disk loss events."
)]
handle_ps_local_disk_loss: Option<bool>,
}
/// Stop storage controller.
#[derive(clap::Args)]
#[clap(about = "Stop storage controller")]
struct StorageControllerStopCmdArgs {
/// If 'immediate', don't flush repository data at shutdown
#[clap(short = 'm')]
#[clap(
short = 'm',
help = "If 'immediate', don't flush repository data at shutdown"
)]
#[arg(value_enum, default_value = "fast")]
stop_mode: StopMode,
/// Identifier used to distinguish storage controller instances.
#[clap(long)]
#[clap(
long,
help = "Identifier used to distinguish storage controller instances"
)]
#[arg(default_value_t = 1)]
instance_id: u8,
}
/// Manage storage broker.
#[derive(clap::Subcommand)]
#[clap(about = "Manage storage broker")]
enum StorageBrokerCmd {
Start(StorageBrokerStartCmdArgs),
Stop(StorageBrokerStopCmdArgs),
}
/// Start broker.
#[derive(clap::Args)]
#[clap(about = "Start broker")]
struct StorageBrokerStartCmdArgs {
/// Timeout until we fail the command.
#[clap(short = 't', long, default_value = "10s")]
#[clap(short = 't', long, help = "timeout until we fail the command")]
#[arg(default_value = "10s")]
start_timeout: humantime::Duration,
}
/// Stop broker.
#[derive(clap::Args)]
#[clap(about = "stop broker")]
struct StorageBrokerStopCmdArgs {
/// If 'immediate', don't flush repository data on shutdown.
#[clap(short = 'm')]
#[clap(
short = 'm',
help = "If 'immediate', don't flush repository data at shutdown"
)]
#[arg(value_enum, default_value = "fast")]
stop_mode: StopMode,
}
/// Manage safekeepers.
#[derive(clap::Subcommand)]
#[clap(about = "Manage safekeepers")]
enum SafekeeperCmd {
Start(SafekeeperStartCmdArgs),
Stop(SafekeeperStopCmdArgs),
Restart(SafekeeperRestartCmdArgs),
}
/// Manage object storage.
#[derive(clap::Subcommand)]
#[clap(about = "Manage object storage")]
enum EndpointStorageCmd {
Start(EndpointStorageStartCmd),
Stop(EndpointStorageStopCmd),
}
/// Start object storage.
#[derive(clap::Args)]
#[clap(about = "Start object storage")]
struct EndpointStorageStartCmd {
/// Timeout until we fail the command.
#[clap(short = 't', long)]
#[clap(short = 't', long, help = "timeout until we fail the command")]
#[arg(default_value = "10s")]
start_timeout: humantime::Duration,
}
/// Stop object storage.
#[derive(clap::Args)]
#[clap(about = "Stop object storage")]
struct EndpointStorageStopCmd {
/// If 'immediate', don't flush repository data on shutdown.
#[clap(short = 'm')]
#[arg(value_enum, default_value = "fast")]
#[clap(
short = 'm',
help = "If 'immediate', don't flush repository data at shutdown"
)]
stop_mode: StopMode,
}
/// Start local safekeeper.
#[derive(clap::Args)]
#[clap(about = "Start local safekeeper")]
struct SafekeeperStartCmdArgs {
/// Safekeeper ID.
#[clap(help = "safekeeper id")]
#[arg(default_value_t = NodeId(1))]
id: NodeId,
/// Additional safekeeper invocation options, e.g. -e=--http-auth-public-key-path=foo.
#[clap(short = 'e', long = "safekeeper-extra-opt")]
#[clap(
short = 'e',
long = "safekeeper-extra-opt",
help = "Additional safekeeper invocation options, e.g. -e=--http-auth-public-key-path=foo"
)]
extra_opt: Vec<String>,
/// Timeout until we fail the command.
#[clap(short = 't', long)]
#[clap(short = 't', long, help = "timeout until we fail the command")]
#[arg(default_value = "10s")]
start_timeout: humantime::Duration,
}
/// Stop local safekeeper.
#[derive(clap::Args)]
#[clap(about = "Stop local safekeeper")]
struct SafekeeperStopCmdArgs {
/// Safekeeper ID.
#[clap(help = "safekeeper id")]
#[arg(default_value_t = NodeId(1))]
id: NodeId,
/// If 'immediate', don't flush repository data on shutdown.
#[arg(value_enum, default_value = "fast")]
#[clap(short = 'm')]
#[clap(
short = 'm',
help = "If 'immediate', don't flush repository data at shutdown"
)]
stop_mode: StopMode,
}
/// Restart local safekeeper.
#[derive(clap::Args)]
#[clap(about = "Restart local safekeeper")]
struct SafekeeperRestartCmdArgs {
/// Safekeeper ID.
#[clap(help = "safekeeper id")]
#[arg(default_value_t = NodeId(1))]
id: NodeId,
/// If 'immediate', don't flush repository data on shutdown.
#[arg(value_enum, default_value = "fast")]
#[clap(short = 'm')]
#[clap(
short = 'm',
help = "If 'immediate', don't flush repository data at shutdown"
)]
stop_mode: StopMode,
/// Additional safekeeper invocation options, e.g. -e=--http-auth-public-key-path=foo.
#[clap(short = 'e', long = "safekeeper-extra-opt")]
#[clap(
short = 'e',
long = "safekeeper-extra-opt",
help = "Additional safekeeper invocation options, e.g. -e=--http-auth-public-key-path=foo"
)]
extra_opt: Vec<String>,
/// Timeout until we fail the command.
#[clap(short = 't', long)]
#[clap(short = 't', long, help = "timeout until we fail the command")]
#[arg(default_value = "10s")]
start_timeout: humantime::Duration,
}
/// Manage Postgres instances.
#[derive(clap::Subcommand)]
#[clap(about = "Manage Postgres instances")]
enum EndpointCmd {
List(EndpointListCmdArgs),
Create(EndpointCreateCmdArgs),
@@ -526,27 +567,33 @@ enum EndpointCmd {
GenerateJwt(EndpointGenerateJwtCmdArgs),
}
/// List endpoints.
#[derive(clap::Args)]
#[clap(about = "List endpoints")]
struct EndpointListCmdArgs {
/// Tenant ID, as a 32-byte hexadecimal string.
#[clap(long = "tenant-id")]
#[clap(
long = "tenant-id",
help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
)]
tenant_shard_id: Option<TenantShardId>,
}
/// Create a compute endpoint.
#[derive(clap::Args)]
#[clap(about = "Create a compute endpoint")]
struct EndpointCreateCmdArgs {
/// Tenant ID, as a 32-byte hexadecimal string.
#[clap(long = "tenant-id")]
#[clap(
long = "tenant-id",
help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
)]
tenant_id: Option<TenantId>,
/// Postgres endpoint ID.
#[clap(help = "Postgres endpoint id")]
endpoint_id: Option<String>,
/// Name of the branch the endpoint will run on.
#[clap(long)]
#[clap(long, help = "Name of the branch the endpoint will run on")]
branch_name: Option<String>,
/// Specify LSN on the timeline to start from. By default, end of the timeline would be used.
#[clap(long)]
#[clap(
long,
help = "Specify Lsn on the timeline to start from. By default, end of the timeline would be used"
)]
lsn: Option<Lsn>,
#[clap(long)]
pg_port: Option<u16>,
@@ -557,13 +604,16 @@ struct EndpointCreateCmdArgs {
#[clap(long = "pageserver-id")]
endpoint_pageserver_id: Option<NodeId>,
/// Don't do basebackup, create endpoint directory with only config files.
#[clap(long, action = clap::ArgAction::Set, default_value_t = false)]
#[clap(
long,
help = "Don't do basebackup, create endpoint directory with only config files",
action = clap::ArgAction::Set,
default_value_t = false
)]
config_only: bool,
/// Postgres version.
#[arg(default_value = DEFAULT_PG_VERSION_NUM)]
#[clap(long)]
#[clap(long, help = "Postgres version")]
pg_version: PgMajorVersion,
/// Use gRPC to communicate with Pageservers, by generating grpc:// connstrings.
@@ -574,140 +624,170 @@ struct EndpointCreateCmdArgs {
#[clap(long)]
grpc: bool,
/// If set, the node will be a hot replica on the specified timeline.
#[clap(long, action = clap::ArgAction::Set, default_value_t = false)]
#[clap(
long,
help = "If set, the node will be a hot replica on the specified timeline",
action = clap::ArgAction::Set,
default_value_t = false
)]
hot_standby: bool,
/// If set, will set up the catalog for neon_superuser.
#[clap(long)]
#[clap(long, help = "If set, will set up the catalog for neon_superuser")]
update_catalog: bool,
/// Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but
/// useful for tests.
#[clap(long)]
#[clap(
long,
help = "Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests."
)]
allow_multiple: bool,
/// Name of the privileged role for the endpoint.
// Only allow changing it on creation.
#[clap(long)]
/// Only allow changing it on creation
#[clap(long, help = "Name of the privileged role for the endpoint")]
privileged_role_name: Option<String>,
}
/// Start Postgres. If the endpoint doesn't exist yet, it is created.
#[derive(clap::Args)]
#[clap(about = "Start postgres. If the endpoint doesn't exist yet, it is created.")]
struct EndpointStartCmdArgs {
/// Postgres endpoint ID.
#[clap(help = "Postgres endpoint id")]
endpoint_id: String,
/// Pageserver ID.
#[clap(long = "pageserver-id")]
endpoint_pageserver_id: Option<NodeId>,
/// Safekeepers membership generation to prefix neon.safekeepers with.
#[clap(long)]
#[clap(
long,
help = "Safekeepers membership generation to prefix neon.safekeepers with. Normally neon_local sets it on its own, but this option allows to override. Non zero value forces endpoint to use membership configurations."
)]
safekeepers_generation: Option<u32>,
/// List of safekeepers endpoint will talk to.
#[clap(long)]
#[clap(
long,
help = "List of safekeepers endpoint will talk to. Normally neon_local chooses them on its own, but this option allows to override."
)]
safekeepers: Option<String>,
/// Configure the remote extensions storage proxy gateway URL to request for extensions.
#[clap(long, alias = "remote-ext-config")]
#[clap(
long,
help = "Configure the remote extensions storage proxy gateway URL to request for extensions.",
alias = "remote-ext-config"
)]
remote_ext_base_url: Option<String>,
/// If set, will create test user `user` and `neondb` database. Requires `update-catalog = true`
#[clap(long)]
#[clap(
long,
help = "If set, will create test user `user` and `neondb` database. Requires `update-catalog = true`"
)]
create_test_user: bool,
/// Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but
/// useful for tests.
#[clap(long)]
#[clap(
long,
help = "Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests."
)]
allow_multiple: bool,
/// Timeout until we fail the command.
#[clap(short = 't', long, value_parser= humantime::parse_duration)]
#[clap(short = 't', long, value_parser= humantime::parse_duration, help = "timeout until we fail the command")]
#[arg(default_value = "90s")]
start_timeout: Duration,
/// Download LFC cache from endpoint storage on endpoint startup
#[clap(long, default_value = "false")]
#[clap(
long,
help = "Download LFC cache from endpoint storage on endpoint startup",
default_value = "false"
)]
autoprewarm: bool,
/// Upload LFC cache to endpoint storage periodically
#[clap(long)]
#[clap(long, help = "Upload LFC cache to endpoint storage periodically")]
offload_lfc_interval_seconds: Option<std::num::NonZeroU64>,
/// Run in development mode, skipping VM-specific operations like process termination
#[clap(long, action = clap::ArgAction::SetTrue)]
#[clap(
long,
help = "Run in development mode, skipping VM-specific operations like process termination",
action = clap::ArgAction::SetTrue
)]
dev: bool,
}
/// Reconfigure an endpoint.
#[derive(clap::Args)]
#[clap(about = "Reconfigure an endpoint")]
struct EndpointReconfigureCmdArgs {
/// Tenant id. Represented as a hexadecimal string 32 symbols length
#[clap(long = "tenant-id")]
#[clap(
long = "tenant-id",
help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
)]
tenant_id: Option<TenantId>,
/// Postgres endpoint ID.
#[clap(help = "Postgres endpoint id")]
endpoint_id: String,
/// Pageserver ID.
#[clap(long = "pageserver-id")]
endpoint_pageserver_id: Option<NodeId>,
#[clap(long)]
safekeepers: Option<String>,
}
/// Refresh the endpoint's configuration by forcing it reload it's spec
#[derive(clap::Args)]
#[clap(about = "Refresh the endpoint's configuration by forcing it reload it's spec")]
struct EndpointRefreshConfigurationArgs {
/// Postgres endpoint id
#[clap(help = "Postgres endpoint id")]
endpoint_id: String,
}
/// Stop an endpoint.
#[derive(clap::Args)]
#[clap(about = "Stop an endpoint")]
struct EndpointStopCmdArgs {
/// Postgres endpoint ID.
#[clap(help = "Postgres endpoint id")]
endpoint_id: String,
/// Also delete data directory (now optional, should be default in future).
#[clap(long)]
#[clap(
long,
help = "Also delete data directory (now optional, should be default in future)"
)]
destroy: bool,
/// Postgres shutdown mode, passed to `pg_ctl -m <mode>`.
#[clap(long)]
#[clap(long, help = "Postgres shutdown mode")]
#[clap(default_value = "fast")]
mode: EndpointTerminateMode,
}
/// Update the pageservers in the spec file of the compute endpoint
#[derive(clap::Args)]
#[clap(about = "Update the pageservers in the spec file of the compute endpoint")]
struct EndpointUpdatePageserversCmdArgs {
/// Postgres endpoint id
#[clap(help = "Postgres endpoint id")]
endpoint_id: String,
/// Specified pageserver id
#[clap(short = 'p', long)]
#[clap(short = 'p', long, help = "Specified pageserver id")]
pageserver_id: Option<NodeId>,
}
/// Generate a JWT for an endpoint.
#[derive(clap::Args)]
#[clap(about = "Generate a JWT for an endpoint")]
struct EndpointGenerateJwtCmdArgs {
/// Postgres endpoint ID.
#[clap(help = "Postgres endpoint id")]
endpoint_id: String,
/// Scope to generate the JWT with.
#[clap(short = 's', long, value_parser = ComputeClaimsScope::from_str)]
#[clap(short = 's', long, help = "Scope to generate the JWT with", value_parser = ComputeClaimsScope::from_str)]
scope: Option<ComputeClaimsScope>,
}
/// Manage neon_local branch name mappings.
#[derive(clap::Subcommand)]
#[clap(about = "Manage neon_local branch name mappings")]
enum MappingsCmd {
Map(MappingsMapCmdArgs),
}
/// Create new mapping which cannot exist already.
#[derive(clap::Args)]
#[clap(about = "Create new mapping which cannot exist already")]
struct MappingsMapCmdArgs {
/// Tenant ID, as a 32-byte hexadecimal string.
#[clap(long)]
#[clap(
long,
help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
)]
tenant_id: TenantId,
/// Timeline ID, as a 32-byte hexadecimal string.
#[clap(long)]
#[clap(
long,
help = "Timeline id. Represented as a hexadecimal string 32 symbols length"
)]
timeline_id: TimelineId,
/// Branch name to give to the timeline.
#[clap(long)]
#[clap(long, help = "Branch name to give to the timeline")]
branch_name: String,
}

View File

@@ -303,13 +303,6 @@ enum Command {
#[arg(long, required = true, value_delimiter = ',')]
new_sk_set: Vec<NodeId>,
},
/// Abort ongoing safekeeper migration.
TimelineSafekeeperMigrateAbort {
#[arg(long)]
tenant_id: TenantId,
#[arg(long)]
timeline_id: TimelineId,
},
}
#[derive(Parser)]
@@ -1403,17 +1396,6 @@ async fn main() -> anyhow::Result<()> {
)
.await?;
}
Command::TimelineSafekeeperMigrateAbort {
tenant_id,
timeline_id,
} => {
let path =
format!("v1/tenant/{tenant_id}/timeline/{timeline_id}/safekeeper_migrate_abort");
storcon_client
.dispatch::<(), ()>(Method::POST, path, None)
.await?;
}
}
Ok(())

View File

@@ -1,9 +1,10 @@
//! Structs representing the JSON formats used in the compute_ctl's HTTP API.
use std::fmt::Display;
use chrono::{DateTime, Utc};
use jsonwebtoken::jwk::JwkSet;
use serde::{Deserialize, Serialize, Serializer};
use std::fmt::Display;
use crate::privilege::Privilege;
use crate::spec::{ComputeSpec, Database, ExtVersion, PgIdent, Role};
@@ -48,7 +49,7 @@ pub struct ExtensionInstallResponse {
/// Status of the LFC prewarm process. The same state machine is reused for
/// both autoprewarm (prewarm after compute/Postgres start using the previously
/// stored LFC state) and explicit prewarming via API.
#[derive(Serialize, Default, Debug, Clone)]
#[derive(Serialize, Default, Debug, Clone, PartialEq)]
#[serde(tag = "status", rename_all = "snake_case")]
pub enum LfcPrewarmState {
/// Default value when compute boots up.
@@ -58,14 +59,7 @@ pub enum LfcPrewarmState {
Prewarming,
/// We found requested LFC state in the endpoint storage and
/// completed prewarming successfully.
Completed {
total: i32,
prewarmed: i32,
skipped: i32,
state_download_time_ms: u32,
uncompress_time_ms: u32,
prewarm_time_ms: u32,
},
Completed,
/// Unexpected error happened during prewarming. Note, `Not Found 404`
/// response from the endpoint storage is explicitly excluded here
/// because it can normally happen on the first compute start,
@@ -90,7 +84,7 @@ impl Display for LfcPrewarmState {
match self {
LfcPrewarmState::NotPrewarmed => f.write_str("NotPrewarmed"),
LfcPrewarmState::Prewarming => f.write_str("Prewarming"),
LfcPrewarmState::Completed { .. } => f.write_str("Completed"),
LfcPrewarmState::Completed => f.write_str("Completed"),
LfcPrewarmState::Skipped => f.write_str("Skipped"),
LfcPrewarmState::Failed { error } => write!(f, "Error({error})"),
LfcPrewarmState::Cancelled => f.write_str("Cancelled"),
@@ -98,36 +92,26 @@ impl Display for LfcPrewarmState {
}
}
#[derive(Serialize, Default, Debug, Clone)]
#[derive(Serialize, Default, Debug, Clone, PartialEq)]
#[serde(tag = "status", rename_all = "snake_case")]
pub enum LfcOffloadState {
#[default]
NotOffloaded,
Offloading,
Completed {
state_query_time_ms: u32,
compress_time_ms: u32,
state_upload_time_ms: u32,
},
Completed,
Failed {
error: String,
},
/// LFC state was empty so it wasn't offloaded
Skipped,
}
#[derive(Serialize, Debug, Clone)]
#[derive(Serialize, Debug, Clone, PartialEq)]
#[serde(tag = "status", rename_all = "snake_case")]
/// Response of /promote
pub enum PromoteState {
NotPromoted,
Completed {
lsn_wait_time_ms: u32,
pg_promote_time_ms: u32,
reconfigure_time_ms: u32,
},
Failed {
error: String,
},
Completed,
Failed { error: String },
}
#[derive(Deserialize, Default, Debug)]

View File

@@ -5,17 +5,12 @@ use std::sync::Arc;
use bytes::Bytes;
use http::Method;
use http::header::{
ACCESS_CONTROL_ALLOW_HEADERS, ACCESS_CONTROL_ALLOW_METHODS, ACCESS_CONTROL_ALLOW_ORIGIN,
ACCESS_CONTROL_EXPOSE_HEADERS, ACCESS_CONTROL_MAX_AGE, ACCESS_CONTROL_REQUEST_HEADERS, ALLOW,
AUTHORIZATION, CONTENT_TYPE, HOST, ORIGIN,
};
use http::header::{AUTHORIZATION, CONTENT_TYPE, HOST};
use http_body_util::combinators::BoxBody;
use http_body_util::{BodyExt, Empty, Full};
use http_body_util::{BodyExt, Full};
use http_utils::error::ApiError;
use hyper::body::Incoming;
use hyper::http::response::Builder;
use hyper::http::{HeaderMap, HeaderName, HeaderValue};
use hyper::http::{HeaderName, HeaderValue};
use hyper::{Request, Response, StatusCode};
use indexmap::IndexMap;
use moka::sync::Cache;
@@ -72,15 +67,6 @@ use crate::util::deserialize_json_string;
static EMPTY_JSON_SCHEMA: &str = r#"{"schemas":[]}"#;
const INTROSPECTION_SQL: &str = POSTGRESQL_INTROSPECTION_SQL;
const HEADER_VALUE_ALLOW_ALL_ORIGINS: HeaderValue = HeaderValue::from_static("*");
// CORS headers values
const ACCESS_CONTROL_ALLOW_METHODS_VALUE: HeaderValue =
HeaderValue::from_static("GET, POST, PATCH, PUT, DELETE, OPTIONS");
const ACCESS_CONTROL_MAX_AGE_VALUE: HeaderValue = HeaderValue::from_static("86400");
const ACCESS_CONTROL_EXPOSE_HEADERS_VALUE: HeaderValue = HeaderValue::from_static(
"Content-Encoding, Content-Location, Content-Range, Content-Type, Date, Location, Server, Transfer-Encoding, Range-Unit",
);
const ACCESS_CONTROL_ALLOW_HEADERS_VALUE: HeaderValue = HeaderValue::from_static("Authorization");
// A wrapper around the DbSchema that allows for self-referencing
#[self_referencing]
@@ -151,8 +137,6 @@ pub struct ApiConfig {
pub role_claim_key: String,
#[serde(default, deserialize_with = "deserialize_comma_separated_option")]
pub db_extra_search_path: Option<Vec<String>>,
#[serde(default, deserialize_with = "deserialize_comma_separated_option")]
pub server_cors_allowed_origins: Option<Vec<String>>,
}
// The DbSchemaCache is a cache of the ApiConfig and DbSchemaOwned for each endpoint
@@ -181,13 +165,7 @@ impl DbSchemaCache {
}
}
pub fn get_cached(
&self,
endpoint_id: &EndpointCacheKey,
) -> Option<Arc<(ApiConfig, DbSchemaOwned)>> {
count_cache_outcome(CacheKind::Schema, self.0.get(endpoint_id))
}
pub async fn get_remote(
pub async fn get_cached_or_remote(
&self,
endpoint_id: &EndpointCacheKey,
auth_header: &HeaderValue,
@@ -196,42 +174,47 @@ impl DbSchemaCache {
ctx: &RequestContext,
config: &'static ProxyConfig,
) -> Result<Arc<(ApiConfig, DbSchemaOwned)>, RestError> {
info!("db_schema cache miss for endpoint: {:?}", endpoint_id);
let remote_value = self
.internal_get_remote(auth_header, connection_string, client, ctx, config)
.await;
let (api_config, schema_owned) = match remote_value {
Ok((api_config, schema_owned)) => (api_config, schema_owned),
Err(e @ RestError::SchemaTooLarge) => {
// for the case where the schema is too large, we cache an empty dummy value
// all the other requests will fail without triggering the introspection query
let schema_owned = serde_json::from_str::<DbSchemaOwned>(EMPTY_JSON_SCHEMA)
.map_err(|e| JsonDeserialize { source: e })?;
let cache_result = count_cache_outcome(CacheKind::Schema, self.0.get(endpoint_id));
match cache_result {
Some(v) => Ok(v),
None => {
info!("db_schema cache miss for endpoint: {:?}", endpoint_id);
let remote_value = self
.get_remote(auth_header, connection_string, client, ctx, config)
.await;
let (api_config, schema_owned) = match remote_value {
Ok((api_config, schema_owned)) => (api_config, schema_owned),
Err(e @ RestError::SchemaTooLarge) => {
// for the case where the schema is too large, we cache an empty dummy value
// all the other requests will fail without triggering the introspection query
let schema_owned = serde_json::from_str::<DbSchemaOwned>(EMPTY_JSON_SCHEMA)
.map_err(|e| JsonDeserialize { source: e })?;
let api_config = ApiConfig {
db_schemas: vec![],
db_anon_role: None,
db_max_rows: None,
db_allowed_select_functions: vec![],
role_claim_key: String::new(),
db_extra_search_path: None,
server_cors_allowed_origins: None,
let api_config = ApiConfig {
db_schemas: vec![],
db_anon_role: None,
db_max_rows: None,
db_allowed_select_functions: vec![],
role_claim_key: String::new(),
db_extra_search_path: None,
};
let value = Arc::new((api_config, schema_owned));
count_cache_insert(CacheKind::Schema);
self.0.insert(endpoint_id.clone(), value);
return Err(e);
}
Err(e) => {
return Err(e);
}
};
let value = Arc::new((api_config, schema_owned));
count_cache_insert(CacheKind::Schema);
self.0.insert(endpoint_id.clone(), value);
return Err(e);
self.0.insert(endpoint_id.clone(), value.clone());
Ok(value)
}
Err(e) => {
return Err(e);
}
};
let value = Arc::new((api_config, schema_owned));
count_cache_insert(CacheKind::Schema);
self.0.insert(endpoint_id.clone(), value.clone());
Ok(value)
}
}
async fn internal_get_remote(
pub async fn get_remote(
&self,
auth_header: &HeaderValue,
connection_string: &str,
@@ -548,7 +531,7 @@ pub(crate) async fn handle(
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, ApiError> {
let result = handle_inner(cancel, config, &ctx, request, backend).await;
let response = match result {
let mut response = match result {
Ok(r) => {
ctx.set_success();
@@ -657,6 +640,9 @@ pub(crate) async fn handle(
}
};
response
.headers_mut()
.insert("Access-Control-Allow-Origin", HeaderValue::from_static("*"));
Ok(response)
}
@@ -736,37 +722,6 @@ async fn handle_inner(
}
}
fn apply_common_cors_headers(
response: &mut Builder,
request_headers: &HeaderMap,
allowed_origins: Option<&Vec<String>>,
) {
let request_origin = request_headers
.get(ORIGIN)
.map(|v| v.to_str().unwrap_or(""));
let response_allow_origin = match (request_origin, allowed_origins) {
(Some(or), Some(allowed_origins)) => {
if allowed_origins.iter().any(|o| o == or) {
Some(HeaderValue::from_str(or).unwrap_or(HEADER_VALUE_ALLOW_ALL_ORIGINS))
} else {
None
}
}
(Some(_), None) => Some(HEADER_VALUE_ALLOW_ALL_ORIGINS),
_ => None,
};
if let Some(h) = response.headers_mut() {
h.insert(
ACCESS_CONTROL_EXPOSE_HEADERS,
ACCESS_CONTROL_EXPOSE_HEADERS_VALUE,
);
if let Some(origin) = response_allow_origin {
h.insert(ACCESS_CONTROL_ALLOW_ORIGIN, origin);
}
}
}
#[allow(clippy::too_many_arguments)]
async fn handle_rest_inner(
config: &'static ProxyConfig,
@@ -778,6 +733,12 @@ async fn handle_rest_inner(
jwt: String,
backend: Arc<PoolingBackend>,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, RestError> {
// validate the jwt token
let jwt_parsed = backend
.authenticate_with_jwt(ctx, &conn_info.user_info, jwt)
.await
.map_err(HttpConnError::from)?;
let db_schema_cache =
config
.rest_config
@@ -793,83 +754,28 @@ async fn handle_rest_inner(
message: "Failed to get endpoint cache key".to_string(),
}))?;
let mut client = backend.connect_to_local_proxy(ctx, conn_info).await?;
let (parts, originial_body) = request.into_parts();
// try and get the cached entry for this endpoint
// it contains the api config and the introspected db schema
let cached_entry = db_schema_cache.get_cached(&endpoint_cache_key);
let allowed_origins = cached_entry
.as_ref()
.and_then(|arc| arc.0.server_cors_allowed_origins.as_ref());
let mut response = Response::builder();
apply_common_cors_headers(&mut response, &parts.headers, allowed_origins);
// handle the OPTIONS request
if parts.method == Method::OPTIONS {
let allowed_headers = parts
.headers
.get(ACCESS_CONTROL_REQUEST_HEADERS)
.and_then(|a| a.to_str().ok())
.filter(|v| !v.is_empty())
.map_or_else(
|| "Authorization".to_string(),
|v| format!("{v}, Authorization"),
);
return response
.status(StatusCode::OK)
.header(
ACCESS_CONTROL_ALLOW_METHODS,
ACCESS_CONTROL_ALLOW_METHODS_VALUE,
)
.header(ACCESS_CONTROL_MAX_AGE, ACCESS_CONTROL_MAX_AGE_VALUE)
.header(
ACCESS_CONTROL_ALLOW_HEADERS,
HeaderValue::from_str(&allowed_headers)
.unwrap_or(ACCESS_CONTROL_ALLOW_HEADERS_VALUE),
)
.header(ALLOW, ACCESS_CONTROL_ALLOW_METHODS_VALUE)
.body(Empty::new().map_err(|x| match x {}).boxed())
.map_err(|e| {
RestError::SubzeroCore(InternalError {
message: e.to_string(),
})
});
}
// validate the jwt token
let jwt_parsed = backend
.authenticate_with_jwt(ctx, &conn_info.user_info, jwt)
.await
.map_err(HttpConnError::from)?;
let auth_header = parts
.headers
.get(AUTHORIZATION)
.ok_or(RestError::SubzeroCore(InternalError {
message: "Authorization header is required".to_string(),
}))?;
let mut client = backend.connect_to_local_proxy(ctx, conn_info).await?;
let entry = match cached_entry {
Some(e) => e,
None => {
// if not cached, get the remote entry (will run the introspection query)
db_schema_cache
.get_remote(
&endpoint_cache_key,
auth_header,
connection_string,
&mut client,
ctx,
config,
)
.await?
}
};
let entry = db_schema_cache
.get_cached_or_remote(
&endpoint_cache_key,
auth_header,
connection_string,
&mut client,
ctx,
config,
)
.await?;
let (api_config, db_schema_owned) = entry.as_ref();
let db_schema = db_schema_owned.borrow_schema();
let db_schemas = &api_config.db_schemas; // list of schemas available for the api
@@ -1093,8 +999,8 @@ async fn handle_rest_inner(
let _metrics = client.metrics(ctx); // FIXME: is everything in the context set correctly?
// send the request to the local proxy
let proxy_response = make_raw_local_proxy_request(&mut client, headers, req_body).await?;
let (response_parts, body) = proxy_response.into_parts();
let response = make_raw_local_proxy_request(&mut client, headers, req_body).await?;
let (parts, body) = response.into_parts();
let max_response = config.http_config.max_response_size_bytes;
let bytes = read_body_with_limit(body, max_response)
@@ -1103,7 +1009,7 @@ async fn handle_rest_inner(
// if the response status is greater than 399, then it is an error
// FIXME: check if there are other error codes or shapes of the response
if response_parts.status.as_u16() > 399 {
if parts.status.as_u16() > 399 {
// turn this postgres error from the json into PostgresError
let postgres_error = serde_json::from_slice(&bytes)
.map_err(|e| RestError::SubzeroCore(JsonDeserialize { source: e }))?;
@@ -1269,7 +1175,7 @@ async fn handle_rest_inner(
.boxed();
// build the response
response = response
let mut response = Response::builder()
.status(StatusCode::from_u16(status).unwrap_or(StatusCode::INTERNAL_SERVER_ERROR))
.header(CONTENT_TYPE, http_content_type);

View File

@@ -644,7 +644,6 @@ async fn handle_tenant_timeline_safekeeper_migrate(
req: Request<Body>,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
// TODO(diko): it's not PS operation, there should be a different permission scope.
check_permissions(&req, Scope::PageServerApi)?;
maybe_rate_limit(&req, tenant_id).await;
@@ -666,23 +665,6 @@ async fn handle_tenant_timeline_safekeeper_migrate(
json_response(StatusCode::OK, ())
}
async fn handle_tenant_timeline_safekeeper_migrate_abort(
service: Arc<Service>,
req: Request<Body>,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
// TODO(diko): it's not PS operation, there should be a different permission scope.
check_permissions(&req, Scope::PageServerApi)?;
maybe_rate_limit(&req, tenant_id).await;
service
.tenant_timeline_safekeeper_migrate_abort(tenant_id, timeline_id)
.await?;
json_response(StatusCode::OK, ())
}
async fn handle_tenant_timeline_lsn_lease(
service: Arc<Service>,
req: Request<Body>,
@@ -2629,16 +2611,6 @@ pub fn make_router(
)
},
)
.post(
"/v1/tenant/:tenant_id/timeline/:timeline_id/safekeeper_migrate_abort",
|r| {
tenant_service_handler(
r,
handle_tenant_timeline_safekeeper_migrate_abort,
RequestName("v1_tenant_timeline_safekeeper_migrate_abort"),
)
},
)
// LSN lease passthrough to all shards
.post(
"/v1/tenant/:tenant_id/timeline/:timeline_id/lsn_lease",

View File

@@ -1230,7 +1230,10 @@ impl Service {
}
// It it is the same new_sk_set, we can continue the migration (retry).
} else {
if !is_migration_finished(&timeline) {
let prev_finished = timeline.cplane_notified_generation == timeline.generation
&& timeline.sk_set_notified_generation == timeline.generation;
if !prev_finished {
// The previous migration is committed, but the finish step failed.
// Safekeepers/cplane might not know about the last membership configuration.
// Retry the finish step to ensure smooth migration.
@@ -1542,8 +1545,6 @@ impl Service {
timeline_id: TimelineId,
timeline: &TimelinePersistence,
) -> Result<(), ApiError> {
tracing::info!(generation=?timeline.generation, sk_set=?timeline.sk_set, new_sk_set=?timeline.new_sk_set, "retrying finish safekeeper migration");
if timeline.new_sk_set.is_some() {
// Logical error, should never happen.
return Err(ApiError::InternalServerError(anyhow::anyhow!(
@@ -1623,120 +1624,4 @@ impl Service {
Ok(wal_positions[quorum_size - 1])
}
/// Abort ongoing safekeeper migration.
pub(crate) async fn tenant_timeline_safekeeper_migrate_abort(
self: &Arc<Self>,
tenant_id: TenantId,
timeline_id: TimelineId,
) -> Result<(), ApiError> {
// TODO(diko): per-tenant lock is too wide. Consider introducing per-timeline locks.
let _tenant_lock = trace_shared_lock(
&self.tenant_op_locks,
tenant_id,
TenantOperations::TimelineSafekeeperMigrate,
)
.await;
// Fetch current timeline configuration from the configuration storage.
let timeline = self
.persistence
.get_timeline(tenant_id, timeline_id)
.await?;
let Some(timeline) = timeline else {
return Err(ApiError::NotFound(
anyhow::anyhow!(
"timeline {tenant_id}/{timeline_id} doesn't exist in timelines table"
)
.into(),
));
};
let mut generation = SafekeeperGeneration::new(timeline.generation as u32);
let Some(new_sk_set) = &timeline.new_sk_set else {
// No new_sk_set -> no active migration that we can abort.
tracing::info!("timeline has no active migration");
if !is_migration_finished(&timeline) {
// The last migration is committed, but the finish step failed.
// Safekeepers/cplane might not know about the last membership configuration.
// Retry the finish step to make the timeline state clean.
self.finish_safekeeper_migration_retry(tenant_id, timeline_id, &timeline)
.await?;
}
return Ok(());
};
tracing::info!(sk_set=?timeline.sk_set, ?new_sk_set, ?generation, "aborting timeline migration");
let cur_safekeepers = self.get_safekeepers(&timeline.sk_set)?;
let new_safekeepers = self.get_safekeepers(new_sk_set)?;
let cur_sk_member_set =
Self::make_member_set(&cur_safekeepers).map_err(ApiError::InternalServerError)?;
// Increment current generation and remove new_sk_set from the timeline to abort the migration.
generation = generation.next();
let mconf = membership::Configuration {
generation,
members: cur_sk_member_set,
new_members: None,
};
// Exclude safekeepers which were added during the current migration.
let cur_ids: HashSet<NodeId> = cur_safekeepers.iter().map(|sk| sk.get_id()).collect();
let exclude_safekeepers = new_safekeepers
.into_iter()
.filter(|sk| !cur_ids.contains(&sk.get_id()))
.collect::<Vec<_>>();
let exclude_requests = exclude_safekeepers
.iter()
.map(|sk| TimelinePendingOpPersistence {
sk_id: sk.skp.id,
tenant_id: tenant_id.to_string(),
timeline_id: timeline_id.to_string(),
generation: generation.into_inner() as i32,
op_kind: SafekeeperTimelineOpKind::Exclude,
})
.collect::<Vec<_>>();
let cur_sk_set = cur_safekeepers
.iter()
.map(|sk| sk.get_id())
.collect::<Vec<_>>();
// Persist new mconf and exclude requests.
self.persistence
.update_timeline_membership(
tenant_id,
timeline_id,
generation,
&cur_sk_set,
None,
&exclude_requests,
)
.await?;
// At this point we have already commited the abort, but still need to notify
// cplane/safekeepers with the new mconf. That's what finish_safekeeper_migration does.
self.finish_safekeeper_migration(
tenant_id,
timeline_id,
&cur_safekeepers,
&mconf,
&exclude_safekeepers,
)
.await?;
Ok(())
}
}
fn is_migration_finished(timeline: &TimelinePersistence) -> bool {
timeline.cplane_notified_generation == timeline.generation
&& timeline.sk_set_notified_generation == timeline.generation
}

View File

@@ -79,6 +79,7 @@ class NeonAPI:
elif resp.status_code == 423 and resp.json()["message"] in {
"endpoint is in some transitive state, could not suspend",
"project already has running conflicting operations, scheduling of new ones is prohibited",
"snapshot is in transition",
}:
retry = True
self.retries4xx += 1
@@ -355,6 +356,63 @@ class NeonAPI:
return cast("dict[str, Any]", resp.json())
def create_snapshot(
self,
project_id: str,
branch_id: str,
lsn: str | None = None,
timestamp: str | None = None,
name: str | None = None,
expires_at: str | None = None,
) -> dict[str, Any]:
params: dict[str, Any] = {
"lsn": lsn,
"timestamp": timestamp,
"name": name,
"expires_at": expires_at,
}
params = {key: value for key, value in params.items() if value is not None}
resp = self.__request(
"POST",
f"/projects/{project_id}/branches/{branch_id}/snapshot",
params=params,
json={},
headers={
"Accept": "application/json",
},
)
return cast("dict[str, Any]", resp.json())
def delete_snapshot(self, project_id: str, snapshot_id: str) -> dict[str, Any]:
resp = self.__request("DELETE", f"/projects/{project_id}/snapshots/{snapshot_id}")
return cast("dict[str, Any]", resp.json())
def restore_snapshot(
self,
project_id: str,
snapshot_id: str,
target_branch_id: str,
name: str | None = None,
finalize_restore: bool = True,
) -> dict[str, Any]:
data: dict[str, Any] = {
"target_branch_id": target_branch_id,
"finalize_restore": finalize_restore,
}
if name is not None:
data["name"] = name
log.info("Restore snapshot data: %s", data)
resp = self.__request(
"POST",
f"/projects/{project_id}/snapshots/{snapshot_id}/restore",
json=data,
headers={
"Accept": "application/json",
"Content-Type": "application/json",
},
)
return cast("dict[str, Any]", resp.json())
def delete_endpoint(self, project_id: str, endpoint_id: str) -> dict[str, Any]:
resp = self.__request("DELETE", f"/projects/{project_id}/endpoints/{endpoint_id}")
return cast("dict[str,Any]", resp.json())
@@ -396,6 +454,14 @@ class NeonAPI:
return cast("dict[str, Any]", resp.json())
def get_branch_endpoints(self, project_id: str, branch_id: str) -> dict[str, Any]:
resp = self.__request(
"GET",
f"/projects/{project_id}/branches/{branch_id}/endpoints",
headers={"Accept": "application/json", "Content-Type": "application/json"},
)
return cast("dict[str, Any]", resp.json())
def get_endpoints(self, project_id: str) -> dict[str, Any]:
resp = self.__request(
"GET",

View File

@@ -2323,19 +2323,6 @@ class NeonStorageController(MetricsGetter, LogUtils):
response.raise_for_status()
log.info(f"migrate_safekeepers success: {response.json()}")
def abort_safekeeper_migration(
self,
tenant_id: TenantId,
timeline_id: TimelineId,
):
response = self.request(
"POST",
f"{self.api}/v1/tenant/{tenant_id}/timeline/{timeline_id}/safekeeper_migrate_abort",
headers=self.headers(TokenScope.PAGE_SERVER_API),
)
response.raise_for_status()
log.info(f"abort_safekeeper_migration success: {response.json()}")
def locate(self, tenant_id: TenantId) -> list[dict[str, Any]]:
"""
:return: list of {"shard_id": "", "node_id": int, "listen_pg_addr": str, "listen_pg_port": int, "listen_http_addr": str, "listen_http_port": int}

View File

@@ -11,6 +11,7 @@ import time
from datetime import UTC, datetime, timedelta
from typing import TYPE_CHECKING, Any
import psycopg2
import pytest
from fixtures.log_helper import log
@@ -22,6 +23,29 @@ if TYPE_CHECKING:
from fixtures.pg_version import PgVersion
class NeonSnapshot:
"""
A snapshot of the Neon Branch
Gets the output of the API call af a snapshot creation
"""
def __init__(self, project: NeonProject, snapshot: dict[str, Any]):
self.project: NeonProject = project
snapshot = snapshot["snapshot"]
self.id: str = snapshot["id"]
self.name: str = snapshot["name"]
self.created_at: datetime = datetime.fromisoformat(snapshot["created_at"])
self.source_branch: NeonBranch = project.branches[snapshot["source_branch_id"]]
project.snapshots[self.id] = self
self.restored: bool = False
def __str__(self) -> str:
return f"id: {self.id}, name: {self.name}, created_at: {self.created_at}"
def delete(self) -> None:
self.project.delete_snapshot(self.id)
class NeonEndpoint:
"""
Neon Endpoint
@@ -70,6 +94,12 @@ class NeonBranch:
def __init__(self, project, branch: dict[str, Any], is_reset=False):
self.id: str = branch["branch"]["id"]
self.desc = branch
self.name: str | None = None
if "name" in branch["branch"]:
self.name = branch["branch"]["name"]
self.restored_from: str | None = None
if "restored_from" in branch["branch"]:
self.restored_from = branch["branch"]["restored_from"]
self.project: NeonProject = project
self.neon_api: NeonAPI = project.neon_api
self.project_id: str = branch["branch"]["project_id"]
@@ -113,10 +143,9 @@ class NeonBranch:
def __str__(self):
"""
Prints the branch's name with all the predecessors
(r) means the branch is a reset one
Prints the branch's information with all the predecessors
"""
return f"{self.id}{'(r)' if self.id in self.project.reset_branches else ''}, parent: {self.parent}"
return f"{self.id}{f'({self.name})' if self.name and self.name != self.id else ''}{f'(restored_from: {self.restored_from})' if self.restored_from else ''}, parent: {self.parent}"
def random_time(self) -> datetime:
min_time = max(
@@ -152,6 +181,9 @@ class NeonBranch:
self.project.terminate_benchmark(self.id)
def reset_to_parent(self) -> None:
"""
Resets the branch to the parent branch
"""
for ep in self.project.endpoints.values():
if ep.type == "read_only":
ep.terminate_benchmark()
@@ -240,6 +272,7 @@ class NeonProject:
# Leaf branches are the branches, which do not have children
self.leaf_branches: dict[str, NeonBranch] = {}
self.branches: dict[str, NeonBranch] = {}
self.branch_num: int = 0
self.reset_branches: set[str] = set()
self.main_branch: NeonBranch = NeonBranch(self, proj)
self.main_branch.connection_parameters = self.connection_parameters
@@ -253,6 +286,8 @@ class NeonProject:
self.limits: dict[str, Any] = self.get_limits()["limits"]
self.read_only_endpoints_total: int = 0
self.min_time: datetime = datetime.now(UTC)
self.snapshots: dict[str, NeonSnapshot] = {}
self.snapshot_num: int = 0
def get_limits(self) -> dict[str, Any]:
return self.neon_api.get_project_limits(self.id)
@@ -280,7 +315,10 @@ class NeonProject:
return False
def create_branch(
self, parent_id: str | None = None, parent_timestamp: datetime | None = None
self,
parent_id: str | None = None,
parent_timestamp: datetime | None = None,
is_reset: bool = False,
) -> NeonBranch | None:
self.wait()
if not self.check_limit_branches():
@@ -293,14 +331,14 @@ class NeonProject:
branch_def = self.neon_api.create_branch(
self.id, parent_id=parent_id, parent_timestamp=parent_timestamp_str
)
new_branch = NeonBranch(self, branch_def)
new_branch = NeonBranch(self, branch_def, is_reset)
self.wait()
return new_branch
def delete_branch(self, branch_id: str) -> None:
parent = self.branches[branch_id].parent
if not parent or branch_id == self.main_branch.id:
raise RuntimeError("Cannot delete the main branch")
raise RuntimeError("Cannot delete the main branch or a branch restored from a snapshot")
if branch_id not in self.leaf_branches and branch_id not in self.reset_branches:
raise RuntimeError(f"The branch {branch_id}, probably, has ancestors")
if branch_id not in self.branches:
@@ -313,7 +351,7 @@ class NeonProject:
if branch_id not in self.reset_branches:
self.terminate_benchmark(branch_id)
self.neon_api.delete_branch(self.id, branch_id)
if len(parent.children) == 1 and parent.id != self.main_branch.id:
if len(parent.children) == 1 and parent.parent is not None:
self.leaf_branches[parent.id] = parent
parent.children.pop(branch_id)
if branch_id in self.leaf_branches:
@@ -333,6 +371,22 @@ class NeonProject:
log.info("No leaf branches found")
return target
def get_random_parent_branch(self) -> NeonBranch:
return self.branches[random.choice(list(set(self.branches.keys()) - self.reset_branches))]
def gen_branch_name(self) -> str:
self.branch_num += 1
return f"branch{self.branch_num}"
def get_random_snapshot(self) -> NeonSnapshot | None:
snapshot: NeonSnapshot | None = None
avail_snapshots = [sn for sn in self.snapshots.values() if not sn.restored]
if avail_snapshots:
snapshot = random.choice(avail_snapshots)
else:
log.info("No snapshots found")
return snapshot
def delete_endpoint(self, endpoint_id: str) -> None:
self.terminate_benchmark(endpoint_id)
self.neon_api.delete_endpoint(self.id, endpoint_id)
@@ -409,6 +463,116 @@ class NeonProject:
self.restore_num += 1
return f"restore{self.restore_num}"
def gen_snapshot_name(self) -> str:
self.snapshot_num += 1
return f"snapshot{self.snapshot_num}"
def create_snapshot(
self,
lsn: str | None = None,
timestamp: datetime | None = None,
) -> NeonSnapshot:
"""
Create a new Neon snapshot for the current project
Two optional arguments: lsn and timestamp are mutually exclusive
they instruct to create a snapshot with the specific lns or timestamp
"""
snapshot_name = self.gen_snapshot_name()
with psycopg2.connect(self.connection_uri) as conn:
with conn.cursor() as cur:
# We will check the value we set now after the snapshot restored to verify consistency
cur.execute(
f"INSERT INTO sanity_check (name, value) VALUES "
f"('snapsot_name', '{snapshot_name}') ON CONFLICT (name) DO UPDATE SET value = EXCLUDED.value"
)
conn.commit()
snapshot = NeonSnapshot(
self,
self.neon_api.create_snapshot(
self.id,
self.main_branch.id,
lsn,
timestamp.isoformat().replace("+00:00", "Z") if timestamp else None,
snapshot_name,
),
)
self.wait()
# Now we taint the value after the snapshot was taken
cur.execute("UPDATE sanity_check SET value = 'tainted' || value")
conn.commit()
return snapshot
def delete_snapshot(self, snapshot_id: str) -> None:
"""
Deletes the snapshot with the given id
"""
self.wait()
self.neon_api.delete_snapshot(self.id, snapshot_id)
self.snapshots.pop(snapshot_id)
self.wait()
def restore_snapshot(self, snapshot_id: str) -> NeonBranch | None:
"""
Creates a new Neon branch for the current project, then restores the snapshot
with the given id
"""
target_branch = self.get_random_parent_branch().create_child_branch()
if not target_branch:
return None
self.snapshots[snapshot_id].restored = True
new_branch_def: dict[str, Any] = self.neon_api.restore_snapshot(
self.id,
snapshot_id,
target_branch.id,
self.gen_branch_name(),
)
self.wait()
new_branch_def = self.neon_api.get_branch_details(self.id, new_branch_def["branch"]["id"])
# The restored branch will lose the parent afterward, but it has it during the restoration.
# So, we delete parent_id
new_branch_def["branch"].pop("parent_id")
new_branch = NeonBranch(self, new_branch_def)
log.info("Restored snapshot to the branch: %s", new_branch)
target_branch_def = self.neon_api.get_branch_details(self.id, target_branch.id)
if "name" in target_branch_def["branch"]:
target_branch.name = target_branch_def["branch"]["name"]
if new_branch.connection_parameters is None:
if not new_branch.endpoints:
for ep in self.neon_api.get_branch_endpoints(self.id, new_branch.id)["endpoints"]:
if ep["id"] not in self.endpoints:
NeonEndpoint(self, ep)
new_branch.connection_parameters = self.connection_parameters.copy()
for ep in new_branch.endpoints.values():
if ep.type == "read_write":
new_branch.connection_parameters["host"] = ep.host
break
new_branch.connect_env = {
"PGHOST": new_branch.connection_parameters["host"],
"PGUSER": new_branch.connection_parameters["role"],
"PGDATABASE": new_branch.connection_parameters["database"],
"PGPASSWORD": new_branch.connection_parameters["password"],
"PGSSLMODE": "require",
}
with psycopg2.connect(
host=new_branch.connection_parameters["host"],
port=5432,
user=new_branch.connection_parameters["role"],
password=new_branch.connection_parameters["password"],
database=new_branch.connection_parameters["database"],
) as conn:
with conn.cursor() as cur:
cur.execute("SELECT value FROM sanity_check WHERE name = 'snapsot_name'")
snapshot_name = None
if row := cur.fetchone():
snapshot_name = row[0]
# We verify here that the value we select from the table matches with the snapshot name
# To ensure consistency
assert snapshot_name == self.snapshots[snapshot_id].name
self.wait()
target_branch.start_benchmark()
new_branch.start_benchmark()
return new_branch
@pytest.fixture()
def setup_class(
@@ -438,9 +602,7 @@ def do_action(project: NeonProject, action: str) -> bool:
if action == "new_branch" or action == "new_branch_random_time":
use_random_time: bool = action == "new_branch_random_time"
log.info("Trying to create a new branch %s", "random time" if use_random_time else "")
parent = project.branches[
random.choice(list(set(project.branches.keys()) - project.reset_branches))
]
parent = project.get_random_parent_branch()
child = parent.create_child_branch(parent.random_time() if use_random_time else None)
if child is None:
return False
@@ -479,6 +641,23 @@ def do_action(project: NeonProject, action: str) -> bool:
return False
log.info("Reset to parent %s", target)
target.reset_to_parent()
elif action == "create_snapshot":
snapshot = project.create_snapshot()
if snapshot is None:
return False
log.info("Created snapshot %s", snapshot)
elif action == "restore_snapshot":
if (snapshot_to_restore := project.get_random_snapshot()) is None:
return False
log.info("Restoring snapshot %s", snapshot_to_restore)
if project.restore_snapshot(snapshot_to_restore.id) is None:
return False
elif action == "delete_snapshot":
snapshot_to_delete = project.get_random_snapshot()
if snapshot_to_delete is None:
return False
snapshot_to_delete.delete()
log.info("Deleted snapshot %s", snapshot_to_delete)
else:
raise ValueError(f"The action {action} is unknown")
return True
@@ -512,12 +691,28 @@ def test_api_random(
("delete_branch", 1.2),
("restore_random_time", 0.9),
("reset_to_parent", 0.3),
("create_snapshot", 0.2),
("restore_snapshot", 0.1),
("delete_snapshot", 0.1),
)
if num_ops_env := os.getenv("NUM_OPERATIONS"):
num_operations = int(num_ops_env)
else:
num_operations = 250
pg_bin.run(["pgbench", "-i", "-I", "dtGvp", "-s100"], env=project.main_branch.connect_env)
# Create a table for sanity check
# We are going to leve some control values there to check, e.g., after restoring a snapshot
pg_bin.run(
[
"psql",
"-c",
"CREATE TABLE IF NOT EXISTS sanity_check (name VARCHAR NOT NULL PRIMARY KEY, value VARCHAR)",
],
env=project.main_branch.connect_env,
)
# To not go to the past where pgbench tables do not exist
time.sleep(1)
project.min_time = datetime.now(UTC)
# To not go to the past where pgbench tables do not exist
time.sleep(1)
project.min_time = datetime.now(UTC)

View File

@@ -145,7 +145,6 @@ def test_replica_promote(neon_simple_env: NeonEnv, method: PromoteMethod):
stop_and_check_lsn(secondary, None)
if method == PromoteMethod.COMPUTE_CTL:
log.info("Restarting primary to check new config")
secondary.stop()
# In production, compute ultimately receives new compute spec from cplane.
secondary.respec(mode="Primary")

View File

@@ -460,91 +460,3 @@ def test_pull_from_most_advanced_sk(neon_env_builder: NeonEnvBuilder):
ep.start(safekeeper_generation=5, safekeepers=new_sk_set2)
assert ep.safe_psql("SELECT * FROM t") == [(0,), (1,)]
def test_abort_safekeeper_migration(neon_env_builder: NeonEnvBuilder):
"""
Test that safekeeper migration can be aborted.
1. Insert failpoints and ensure the abort successfully reverts the timeline state.
2. Check that endpoint is operational after the abort.
"""
neon_env_builder.num_safekeepers = 2
neon_env_builder.storage_controller_config = {
"timelines_onto_safekeepers": True,
"timeline_safekeeper_count": 1,
}
env = neon_env_builder.init_start()
env.pageserver.allowed_errors.extend(PAGESERVER_ALLOWED_ERRORS)
mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
assert len(mconf["sk_set"]) == 1
cur_sk = mconf["sk_set"][0]
cur_gen = 1
ep = env.endpoints.create("main", tenant_id=env.initial_tenant)
ep.start(safekeeper_generation=1, safekeepers=mconf["sk_set"])
ep.safe_psql("CREATE EXTENSION neon_test_utils;")
ep.safe_psql("CREATE TABLE t(a int)")
ep.safe_psql("INSERT INTO t VALUES (1)")
another_sk = [sk.id for sk in env.safekeepers if sk.id != cur_sk][0]
failpoints = [
"sk-migration-after-step-3",
"sk-migration-after-step-4",
"sk-migration-after-step-5",
"sk-migration-after-step-7",
]
for fp in failpoints:
env.storage_controller.configure_failpoints((fp, "return(1)"))
with pytest.raises(StorageControllerApiException, match=f"failpoint {fp}"):
env.storage_controller.migrate_safekeepers(
env.initial_tenant, env.initial_timeline, [another_sk]
)
cur_gen += 1
env.storage_controller.configure_failpoints((fp, "off"))
# We should have a joint mconf after the failure.
mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
assert mconf["generation"] == cur_gen
assert mconf["sk_set"] == [cur_sk]
assert mconf["new_sk_set"] == [another_sk]
env.storage_controller.abort_safekeeper_migration(env.initial_tenant, env.initial_timeline)
cur_gen += 1
# Abort should revert the timeline to the previous sk_set and increment the generation.
mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
assert mconf["generation"] == cur_gen
assert mconf["sk_set"] == [cur_sk]
assert mconf["new_sk_set"] is None
assert ep.safe_psql("SHOW neon.safekeepers")[0][0].startswith(f"g#{cur_gen}:")
ep.safe_psql(f"INSERT INTO t VALUES ({cur_gen})")
# After step-8 the final mconf is committed and the migration is not abortable anymore.
# So the abort should not abort anything.
env.storage_controller.configure_failpoints(("sk-migration-after-step-8", "return(1)"))
with pytest.raises(StorageControllerApiException, match="failpoint sk-migration-after-step-8"):
env.storage_controller.migrate_safekeepers(
env.initial_tenant, env.initial_timeline, [another_sk]
)
cur_gen += 2
env.storage_controller.configure_failpoints((fp, "off"))
env.storage_controller.abort_safekeeper_migration(env.initial_tenant, env.initial_timeline)
# The migration is fully committed, no abort should have been performed.
mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
assert mconf["generation"] == cur_gen
assert mconf["sk_set"] == [another_sk]
assert mconf["new_sk_set"] is None
ep.safe_psql(f"INSERT INTO t VALUES ({cur_gen})")
ep.clear_buffers()
assert ep.safe_psql("SELECT * FROM t") == [(i + 1,) for i in range(cur_gen) if i % 2 == 0]