mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-17 21:20:37 +00:00
Compare commits
2 Commits
skyzh/leas
...
test-proxy
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d3e1024770 | ||
|
|
2c317dad14 |
69
.github/workflows/proxy-benchmark.yml
vendored
69
.github/workflows/proxy-benchmark.yml
vendored
@@ -13,6 +13,12 @@ on:
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
# - cron: '0 5 * * *' # Runs at 5 UTC once a day
|
||||
workflow_dispatch: # adds an ability to run this manually
|
||||
inputs:
|
||||
commit_hash:
|
||||
type: string
|
||||
description: 'The long neon repo commit hash for the system under test (proxy) to be tested.'
|
||||
required: false
|
||||
default: ''
|
||||
|
||||
defaults:
|
||||
run:
|
||||
@@ -33,20 +39,14 @@ jobs:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
runs-on: [ self-hosted, unit-perf-aws-arm ]
|
||||
timeout-minutes: 60 # 1h timeout
|
||||
container:
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
timeout-minutes: 60 # 1h timeout
|
||||
steps:
|
||||
- name: Checkout proxy-bench Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: neondatabase/proxy-bench
|
||||
path: proxy-bench
|
||||
|
||||
- name: Set up the environment which depends on $RUNNER_TEMP on nvme drive
|
||||
id: set-env
|
||||
shell: bash -euxo pipefail {0}
|
||||
@@ -54,19 +54,64 @@ jobs:
|
||||
PROXY_BENCH_PATH=$(realpath ./proxy-bench)
|
||||
{
|
||||
echo "PROXY_BENCH_PATH=$PROXY_BENCH_PATH"
|
||||
echo "NEON_DIR=${RUNNER_TEMP}/neon"
|
||||
echo "NEON_PROXY_PATH=${RUNNER_TEMP}/neon/bin/proxy"
|
||||
echo "NEON_DIR=${GITHUB_WORKSPACE}/"
|
||||
echo "NEON_PROXY_PATH=${GITHUB_WORKSPACE}/bin/proxy"
|
||||
echo "TEST_OUTPUT=${PROXY_BENCH_PATH}/test_output"
|
||||
echo "DOCKER_COMPOSE_FILE=${PROXY_BENCH_PATH}/docker-compose.yml"
|
||||
echo ""
|
||||
} >> "$GITHUB_ENV"
|
||||
|
||||
- name: Determine commit hash
|
||||
id: commit_hash
|
||||
shell: bash -euxo pipefail {0}
|
||||
env:
|
||||
INPUT_COMMIT_HASH: ${{ github.event.inputs.commit_hash }}
|
||||
run: |
|
||||
if [[ -z "${INPUT_COMMIT_HASH}" ]]; then
|
||||
COMMIT_HASH=$(curl -s https://api.github.com/repos/neondatabase/neon/commits/main | jq -r '.sha')
|
||||
echo "COMMIT_HASH=$COMMIT_HASH" >> $GITHUB_ENV
|
||||
echo "commit_hash=$COMMIT_HASH" >> "$GITHUB_OUTPUT"
|
||||
echo "COMMIT_HASH_TYPE=latest" >> $GITHUB_ENV
|
||||
else
|
||||
COMMIT_HASH="${INPUT_COMMIT_HASH}"
|
||||
echo "COMMIT_HASH=$COMMIT_HASH" >> $GITHUB_ENV
|
||||
echo "commit_hash=$COMMIT_HASH" >> "$GITHUB_OUTPUT"
|
||||
echo "COMMIT_HASH_TYPE=manual" >> $GITHUB_ENV
|
||||
fi
|
||||
- name: Checkout the neon repository at given commit hash
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
ref: ${{ steps.commit_hash.outputs.commit_hash }}
|
||||
|
||||
- name: Print GITHUB_WORKSPACE
|
||||
run: echo "$GITHUB_WORKSPACE"
|
||||
|
||||
- name: List parent dir of workspace
|
||||
run: ls -la /__w/neon
|
||||
|
||||
- name: List all env vars
|
||||
run: env
|
||||
|
||||
- name: Checkout proxy-bench Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: neondatabase/proxy-bench
|
||||
path: proxy-bench
|
||||
|
||||
- name: Cache poetry deps
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pypoetry/virtualenvs
|
||||
key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-bookworm-${{ hashFiles('poetry.lock') }}
|
||||
|
||||
- name: DEBUG List files for debugging
|
||||
working-directory: ${{ env.NEON_DIR }}
|
||||
run: |
|
||||
pwd
|
||||
ls -la
|
||||
|
||||
- name: Install Python deps
|
||||
working-directory: ${{ env.NEON_DIR }}
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: ./scripts/pysync
|
||||
|
||||
@@ -77,14 +122,14 @@ jobs:
|
||||
|
||||
- name: Run proxy-bench
|
||||
working-directory: ${{ env.PROXY_BENCH_PATH }}
|
||||
run: ./run.sh --with-grafana --bare-metal
|
||||
run: ./run.sh --bare-metal
|
||||
|
||||
- name: Ingest Bench Results
|
||||
if: always()
|
||||
working-directory: ${{ env.NEON_DIR }}
|
||||
run: |
|
||||
mkdir -p $TEST_OUTPUT
|
||||
python $NEON_DIR/scripts/proxy_bench_results_ingest.py --out $TEST_OUTPUT
|
||||
python3 ./scripts/proxy_bench_results_ingest.py --out $TEST_OUTPUT
|
||||
|
||||
- name: Push Metrics to Proxy perf database
|
||||
shell: bash -euxo pipefail {0}
|
||||
@@ -109,4 +154,4 @@ jobs:
|
||||
fi
|
||||
if [[ -d "${PROXY_BENCH_PATH}/test_output" ]]; then
|
||||
rm -rf ${PROXY_BENCH_PATH}/test_output
|
||||
fi
|
||||
fi
|
||||
|
||||
@@ -2648,11 +2648,7 @@ LIMIT 100",
|
||||
/// the pageserver connection strings has changed.
|
||||
///
|
||||
/// The operation will time out after a specified duration.
|
||||
pub fn wait_timeout_while_pageserver_connstr_unchanged(
|
||||
&self,
|
||||
duration: Duration,
|
||||
request_pageserver_conninfo: &PageserverConnectionInfo,
|
||||
) {
|
||||
pub fn wait_timeout_while_pageserver_connstr_unchanged(&self, duration: Duration) {
|
||||
let state = self.state.lock().unwrap();
|
||||
let old_pageserver_conninfo = state
|
||||
.pspec
|
||||
@@ -2660,10 +2656,6 @@ LIMIT 100",
|
||||
.expect("spec must be set")
|
||||
.pageserver_conninfo
|
||||
.clone();
|
||||
if request_pageserver_conninfo != &old_pageserver_conninfo {
|
||||
info!("Pageserver config changed during the previous request");
|
||||
return;
|
||||
}
|
||||
let mut unchanged = true;
|
||||
let _ = self
|
||||
.state_changed
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
use std::time::{Duration, SystemTime};
|
||||
|
||||
use anyhow::{Result, bail};
|
||||
use compute_api::spec::{ComputeMode, PageserverConnectionInfo, PageserverProtocol};
|
||||
use futures::StreamExt;
|
||||
use pageserver_page_api as page_api;
|
||||
use postgres::{NoTls, SimpleQueryMessage};
|
||||
use tracing::{Instrument, info, warn};
|
||||
use tracing::{info, warn};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
use utils::shard::TenantShardId;
|
||||
@@ -15,7 +15,7 @@ use utils::shard::TenantShardId;
|
||||
use crate::compute::ComputeNode;
|
||||
|
||||
/// Spawns a background thread to periodically renew LSN leases for static compute.
|
||||
/// Do nothing if the compute is not in static mode. MUST run this within a tokio runtime.
|
||||
/// Do nothing if the compute is not in static mode.
|
||||
pub fn launch_lsn_lease_bg_task_for_static(compute: &Arc<ComputeNode>) {
|
||||
let (tenant_id, timeline_id, lsn) = {
|
||||
let state = compute.state.lock().unwrap();
|
||||
@@ -28,27 +28,24 @@ pub fn launch_lsn_lease_bg_task_for_static(compute: &Arc<ComputeNode>) {
|
||||
let compute = compute.clone();
|
||||
|
||||
let span = tracing::info_span!("lsn_lease_bg_task", %tenant_id, %timeline_id, %lsn);
|
||||
tokio::spawn(
|
||||
async move {
|
||||
if let Err(e) = lsn_lease_bg_task(compute, tenant_id, timeline_id, lsn).await {
|
||||
// TODO: might need stronger error feedback than logging an warning.
|
||||
warn!("Exited with error: {e}");
|
||||
}
|
||||
thread::spawn(move || {
|
||||
let _entered = span.entered();
|
||||
if let Err(e) = lsn_lease_bg_task(compute, tenant_id, timeline_id, lsn) {
|
||||
// TODO: might need stronger error feedback than logging an warning.
|
||||
warn!("Exited with error: {e}");
|
||||
}
|
||||
.instrument(span),
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
/// Renews lsn lease periodically so static compute are not affected by GC.
|
||||
async fn lsn_lease_bg_task(
|
||||
fn lsn_lease_bg_task(
|
||||
compute: Arc<ComputeNode>,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
lsn: Lsn,
|
||||
) -> Result<()> {
|
||||
loop {
|
||||
let (valid_until, last_conninfo) =
|
||||
acquire_lsn_lease_with_retry(&compute, tenant_id, timeline_id, lsn).await?;
|
||||
let valid_until = acquire_lsn_lease_with_retry(&compute, tenant_id, timeline_id, lsn)?;
|
||||
let valid_duration = valid_until
|
||||
.duration_since(SystemTime::now())
|
||||
.unwrap_or(Duration::ZERO);
|
||||
@@ -62,22 +59,18 @@ async fn lsn_lease_bg_task(
|
||||
"Request succeeded, sleeping for {} seconds",
|
||||
sleep_duration.as_secs()
|
||||
);
|
||||
let compute = compute.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
compute.wait_timeout_while_pageserver_connstr_unchanged(sleep_duration, &last_conninfo);
|
||||
})
|
||||
.await?;
|
||||
compute.wait_timeout_while_pageserver_connstr_unchanged(sleep_duration);
|
||||
}
|
||||
}
|
||||
|
||||
/// Acquires lsn lease in a retry loop. Returns the expiration time if a lease is granted.
|
||||
/// Returns an error if a lease is explicitly not granted. Otherwise, we keep sending requests.
|
||||
async fn acquire_lsn_lease_with_retry(
|
||||
fn acquire_lsn_lease_with_retry(
|
||||
compute: &Arc<ComputeNode>,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
lsn: Lsn,
|
||||
) -> Result<(SystemTime, PageserverConnectionInfo)> {
|
||||
) -> Result<SystemTime> {
|
||||
let mut attempts = 0usize;
|
||||
let mut retry_period_ms: f64 = 500.0;
|
||||
const MAX_RETRY_PERIOD_MS: f64 = 60.0 * 1000.0;
|
||||
@@ -93,17 +86,10 @@ async fn acquire_lsn_lease_with_retry(
|
||||
)
|
||||
};
|
||||
|
||||
let result = try_acquire_lsn_lease(
|
||||
conninfo.clone(),
|
||||
auth.as_deref(),
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
lsn,
|
||||
)
|
||||
.await;
|
||||
let result = try_acquire_lsn_lease(conninfo, auth.as_deref(), tenant_id, timeline_id, lsn);
|
||||
match result {
|
||||
Ok(Some(res)) => {
|
||||
return Ok((res, conninfo));
|
||||
return Ok(res);
|
||||
}
|
||||
Ok(None) => {
|
||||
bail!("Permanent error: lease could not be obtained, LSN is behind the GC cutoff");
|
||||
@@ -111,15 +97,9 @@ async fn acquire_lsn_lease_with_retry(
|
||||
Err(e) => {
|
||||
warn!("Failed to acquire lsn lease: {e} (attempt {attempts})");
|
||||
|
||||
let compute = compute.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
compute.wait_timeout_while_pageserver_connstr_unchanged(
|
||||
Duration::from_millis(retry_period_ms as u64),
|
||||
&conninfo,
|
||||
);
|
||||
})
|
||||
.await?;
|
||||
|
||||
compute.wait_timeout_while_pageserver_connstr_unchanged(Duration::from_millis(
|
||||
retry_period_ms as u64,
|
||||
));
|
||||
retry_period_ms *= 1.5;
|
||||
retry_period_ms = retry_period_ms.min(MAX_RETRY_PERIOD_MS);
|
||||
}
|
||||
@@ -129,16 +109,15 @@ async fn acquire_lsn_lease_with_retry(
|
||||
}
|
||||
|
||||
/// Tries to acquire LSN leases on all Pageserver shards.
|
||||
async fn try_acquire_lsn_lease(
|
||||
fn try_acquire_lsn_lease(
|
||||
conninfo: PageserverConnectionInfo,
|
||||
auth: Option<&str>,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
lsn: Lsn,
|
||||
) -> Result<Option<SystemTime>> {
|
||||
const MAX_CONCURRENT_LEASE_CONNECTIONS: usize = 8;
|
||||
let mut leases = Vec::new();
|
||||
|
||||
let mut jobs = Vec::new();
|
||||
for (shard_index, shard) in conninfo.shards.into_iter() {
|
||||
let tenant_shard_id = TenantShardId {
|
||||
tenant_id,
|
||||
@@ -150,68 +129,46 @@ async fn try_acquire_lsn_lease(
|
||||
// leas on all of them? Currently, that's what we assume, but this is hypothetical
|
||||
// as of this writing, as we never pass the info for more than one pageserver per
|
||||
// shard.
|
||||
|
||||
for shard in shard.pageservers {
|
||||
let shard = shard.clone();
|
||||
jobs.push(async move {
|
||||
match conninfo.prefer_protocol {
|
||||
PageserverProtocol::Grpc => {
|
||||
acquire_lsn_lease_grpc(
|
||||
&shard.grpc_url.unwrap(),
|
||||
auth,
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
lsn,
|
||||
)
|
||||
.await
|
||||
}
|
||||
PageserverProtocol::Libpq => {
|
||||
acquire_lsn_lease_libpq(
|
||||
&shard.libpq_url.unwrap(),
|
||||
auth,
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
lsn,
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
});
|
||||
for pageserver in shard.pageservers {
|
||||
let lease = match conninfo.prefer_protocol {
|
||||
PageserverProtocol::Grpc => acquire_lsn_lease_grpc(
|
||||
&pageserver.grpc_url.unwrap(),
|
||||
auth,
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
lsn,
|
||||
)?,
|
||||
PageserverProtocol::Libpq => acquire_lsn_lease_libpq(
|
||||
&pageserver.libpq_url.unwrap(),
|
||||
auth,
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
lsn,
|
||||
)?,
|
||||
};
|
||||
leases.push(lease);
|
||||
}
|
||||
}
|
||||
|
||||
let mut stream = futures::stream::iter(jobs).buffer_unordered(MAX_CONCURRENT_LEASE_CONNECTIONS);
|
||||
let mut leases = Vec::new();
|
||||
while let Some(res) = stream.next().await {
|
||||
let lease = res?;
|
||||
leases.push(lease);
|
||||
}
|
||||
Ok(leases.into_iter().flatten().min())
|
||||
Ok(leases.into_iter().min().flatten())
|
||||
}
|
||||
|
||||
/// Acquires an LSN lease on a single shard, using the libpq API. The connstring must use a
|
||||
/// postgresql:// scheme.
|
||||
async fn acquire_lsn_lease_libpq(
|
||||
fn acquire_lsn_lease_libpq(
|
||||
connstring: &str,
|
||||
auth: Option<&str>,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
lsn: Lsn,
|
||||
) -> Result<Option<SystemTime>> {
|
||||
let mut config = tokio_postgres::Config::from_str(connstring)?;
|
||||
let mut config = postgres::Config::from_str(connstring)?;
|
||||
if let Some(auth) = auth {
|
||||
config.password(auth);
|
||||
}
|
||||
let (client, connection) = config.connect(NoTls).await?;
|
||||
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
tracing::warn!("lease lsn connection error: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
let mut client = config.connect(NoTls)?;
|
||||
let cmd = format!("lease lsn {tenant_shard_id} {timeline_id} {lsn} ");
|
||||
let res = client.simple_query(&cmd).await?;
|
||||
let res = client.simple_query(&cmd)?;
|
||||
let msg = match res.first() {
|
||||
Some(msg) => msg,
|
||||
None => bail!("empty response"),
|
||||
@@ -229,34 +186,35 @@ async fn acquire_lsn_lease_libpq(
|
||||
.checked_add(Duration::from_millis(u128::from_str(s).unwrap() as u64))
|
||||
.expect("Time larger than max SystemTime could handle")
|
||||
});
|
||||
|
||||
Ok(valid_until)
|
||||
}
|
||||
|
||||
/// Acquires an LSN lease on a single shard, using the gRPC API. The connstring must use a
|
||||
/// grpc:// scheme.
|
||||
async fn acquire_lsn_lease_grpc(
|
||||
fn acquire_lsn_lease_grpc(
|
||||
connstring: &str,
|
||||
auth: Option<&str>,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
lsn: Lsn,
|
||||
) -> Result<Option<SystemTime>> {
|
||||
let mut client = page_api::Client::connect(
|
||||
connstring.to_string(),
|
||||
tenant_shard_id.tenant_id,
|
||||
timeline_id,
|
||||
tenant_shard_id.to_index(),
|
||||
auth.map(String::from),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
tokio::runtime::Handle::current().block_on(async move {
|
||||
let mut client = page_api::Client::connect(
|
||||
connstring.to_string(),
|
||||
tenant_shard_id.tenant_id,
|
||||
timeline_id,
|
||||
tenant_shard_id.to_index(),
|
||||
auth.map(String::from),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = page_api::LeaseLsnRequest { lsn };
|
||||
match client.lease_lsn(req).await {
|
||||
Ok(expires) => Ok(Some(expires)),
|
||||
// Lease couldn't be acquired because the LSN has been garbage collected.
|
||||
Err(err) if err.code() == tonic::Code::FailedPrecondition => Ok(None),
|
||||
Err(err) => Err(err.into()),
|
||||
}
|
||||
let req = page_api::LeaseLsnRequest { lsn };
|
||||
match client.lease_lsn(req).await {
|
||||
Ok(expires) => Ok(Some(expires)),
|
||||
// Lease couldn't be acquired because the LSN has been garbage collected.
|
||||
Err(err) if err.code() == tonic::Code::FailedPrecondition => Ok(None),
|
||||
Err(err) => Err(err.into()),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -341,34 +341,6 @@ extern "C-unwind" fn log_internal(
|
||||
}
|
||||
}
|
||||
|
||||
/* BEGIN_HADRON */
|
||||
extern "C" fn reset_safekeeper_statuses_for_metrics(wp: *mut WalProposer, num_safekeepers: u32) {
|
||||
unsafe {
|
||||
let callback_data = (*(*wp).config).callback_data;
|
||||
let api = callback_data as *mut Box<dyn ApiImpl>;
|
||||
if api.is_null() {
|
||||
return;
|
||||
}
|
||||
(*api).reset_safekeeper_statuses_for_metrics(&mut (*wp), num_safekeepers);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" fn update_safekeeper_status_for_metrics(
|
||||
wp: *mut WalProposer,
|
||||
sk_index: u32,
|
||||
status: u8,
|
||||
) {
|
||||
unsafe {
|
||||
let callback_data = (*(*wp).config).callback_data;
|
||||
let api = callback_data as *mut Box<dyn ApiImpl>;
|
||||
if api.is_null() {
|
||||
return;
|
||||
}
|
||||
(*api).update_safekeeper_status_for_metrics(&mut (*wp), sk_index, status);
|
||||
}
|
||||
}
|
||||
/* END_HADRON */
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Level {
|
||||
Debug5,
|
||||
@@ -442,10 +414,6 @@ pub(crate) fn create_api() -> walproposer_api {
|
||||
finish_sync_safekeepers: Some(finish_sync_safekeepers),
|
||||
process_safekeeper_feedback: Some(process_safekeeper_feedback),
|
||||
log_internal: Some(log_internal),
|
||||
/* BEGIN_HADRON */
|
||||
reset_safekeeper_statuses_for_metrics: Some(reset_safekeeper_statuses_for_metrics),
|
||||
update_safekeeper_status_for_metrics: Some(update_safekeeper_status_for_metrics),
|
||||
/* END_HADRON */
|
||||
}
|
||||
}
|
||||
|
||||
@@ -483,8 +451,6 @@ pub fn empty_shmem() -> crate::bindings::WalproposerShmemState {
|
||||
replica_promote: false,
|
||||
min_ps_feedback: empty_feedback,
|
||||
wal_rate_limiter: empty_wal_rate_limiter,
|
||||
num_safekeepers: 0,
|
||||
safekeeper_status: [0; 32],
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -159,21 +159,6 @@ pub trait ApiImpl {
|
||||
fn after_election(&self, _wp: &mut WalProposer) {
|
||||
todo!()
|
||||
}
|
||||
|
||||
/* BEGIN_HADRON */
|
||||
fn reset_safekeeper_statuses_for_metrics(&self, _wp: &mut WalProposer, _num_safekeepers: u32) {
|
||||
// Do nothing for testing purposes.
|
||||
}
|
||||
|
||||
fn update_safekeeper_status_for_metrics(
|
||||
&self,
|
||||
_wp: &mut WalProposer,
|
||||
_sk_index: u32,
|
||||
_status: u8,
|
||||
) {
|
||||
// Do nothing for testing purposes.
|
||||
}
|
||||
/* END_HADRON */
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
||||
@@ -391,12 +391,6 @@ neon_get_perf_counters(PG_FUNCTION_ARGS)
|
||||
neon_per_backend_counters totals = {0};
|
||||
metric_t *metrics;
|
||||
|
||||
/* BEGIN_HADRON */
|
||||
WalproposerShmemState *wp_shmem;
|
||||
uint32 num_safekeepers;
|
||||
uint32 num_active_safekeepers;
|
||||
/* END_HADRON */
|
||||
|
||||
/* We put all the tuples into a tuplestore in one go. */
|
||||
InitMaterializedSRF(fcinfo, 0);
|
||||
|
||||
@@ -443,32 +437,11 @@ neon_get_perf_counters(PG_FUNCTION_ARGS)
|
||||
// Not ideal but piggyback our databricks counters into the neon perf counters view
|
||||
// so that we don't need to introduce neon--1.x+1.sql to add a new view.
|
||||
{
|
||||
// Keeping this code in its own block to work around the C90 "don't mix declarations and code" rule when we define
|
||||
// the `databricks_metrics` array in the next block. Yes, we are seriously dealing with C90 rules in 2025.
|
||||
|
||||
// Read safekeeper status from wal proposer shared memory first.
|
||||
// Note that we are taking a mutex when reading from walproposer shared memory so that the total safekeeper count is
|
||||
// consistent with the active wal acceptors count. Assuming that we don't query this view too often the mutex should
|
||||
// not be a huge deal.
|
||||
wp_shmem = GetWalpropShmemState();
|
||||
SpinLockAcquire(&wp_shmem->mutex);
|
||||
num_safekeepers = wp_shmem->num_safekeepers;
|
||||
num_active_safekeepers = 0;
|
||||
for (int i = 0; i < num_safekeepers; i++) {
|
||||
if (wp_shmem->safekeeper_status[i] == 1) {
|
||||
num_active_safekeepers++;
|
||||
}
|
||||
}
|
||||
SpinLockRelease(&wp_shmem->mutex);
|
||||
}
|
||||
{
|
||||
metric_t databricks_metrics[] = {
|
||||
{"sql_index_corruption_count", false, 0, (double) pg_atomic_read_u32(&databricks_metrics_shared->index_corruption_count)},
|
||||
{"sql_data_corruption_count", false, 0, (double) pg_atomic_read_u32(&databricks_metrics_shared->data_corruption_count)},
|
||||
{"sql_internal_error_count", false, 0, (double) pg_atomic_read_u32(&databricks_metrics_shared->internal_error_count)},
|
||||
{"ps_corruption_detected", false, 0, (double) pg_atomic_read_u32(&databricks_metrics_shared->ps_corruption_detected)},
|
||||
{"num_active_safekeepers", false, 0.0, (double) num_active_safekeepers},
|
||||
{"num_configured_safekeepers", false, 0.0, (double) num_safekeepers},
|
||||
{NULL, false, 0, 0},
|
||||
};
|
||||
for (int i = 0; databricks_metrics[i].name != NULL; i++)
|
||||
|
||||
@@ -154,9 +154,7 @@ WalProposerCreate(WalProposerConfig *config, walproposer_api api)
|
||||
wp->safekeeper[wp->n_safekeepers].state = SS_OFFLINE;
|
||||
wp->safekeeper[wp->n_safekeepers].active_state = SS_ACTIVE_SEND;
|
||||
wp->safekeeper[wp->n_safekeepers].wp = wp;
|
||||
/* BEGIN_HADRON */
|
||||
wp->safekeeper[wp->n_safekeepers].index = wp->n_safekeepers;
|
||||
/* END_HADRON */
|
||||
|
||||
{
|
||||
Safekeeper *sk = &wp->safekeeper[wp->n_safekeepers];
|
||||
int written = 0;
|
||||
@@ -185,10 +183,6 @@ WalProposerCreate(WalProposerConfig *config, walproposer_api api)
|
||||
if (wp->safekeepers_generation > INVALID_GENERATION && wp->config->proto_version < 3)
|
||||
wp_log(FATAL, "enabling generations requires protocol version 3");
|
||||
wp_log(LOG, "using safekeeper protocol version %d", wp->config->proto_version);
|
||||
|
||||
/* BEGIN_HADRON */
|
||||
wp->api.reset_safekeeper_statuses_for_metrics(wp, wp->n_safekeepers);
|
||||
/* END_HADRON */
|
||||
|
||||
/* Fill the greeting package */
|
||||
wp->greetRequest.pam.tag = 'g';
|
||||
@@ -361,10 +355,6 @@ ShutdownConnection(Safekeeper *sk)
|
||||
sk->state = SS_OFFLINE;
|
||||
sk->streamingAt = InvalidXLogRecPtr;
|
||||
|
||||
/* BEGIN_HADRON */
|
||||
sk->wp->api.update_safekeeper_status_for_metrics(sk->wp, sk->index, 0);
|
||||
/* END_HADRON */
|
||||
|
||||
MembershipConfigurationFree(&sk->greetResponse.mconf);
|
||||
if (sk->voteResponse.termHistory.entries)
|
||||
pfree(sk->voteResponse.termHistory.entries);
|
||||
@@ -1540,10 +1530,6 @@ StartStreaming(Safekeeper *sk)
|
||||
sk->active_state = SS_ACTIVE_SEND;
|
||||
sk->streamingAt = sk->startStreamingAt;
|
||||
|
||||
/* BEGIN_HADRON */
|
||||
sk->wp->api.update_safekeeper_status_for_metrics(sk->wp, sk->index, 1);
|
||||
/* END_HADRON */
|
||||
|
||||
/*
|
||||
* Donors can only be in SS_ACTIVE state, so we potentially update the
|
||||
* donor when we switch one to SS_ACTIVE.
|
||||
|
||||
@@ -432,10 +432,6 @@ typedef struct WalproposerShmemState
|
||||
/* BEGIN_HADRON */
|
||||
/* The WAL rate limiter */
|
||||
WalRateLimiter wal_rate_limiter;
|
||||
/* Number of safekeepers in the config */
|
||||
uint32 num_safekeepers;
|
||||
/* Per-safekeeper status flags: 0=inactive, 1=active */
|
||||
uint8 safekeeper_status[MAX_SAFEKEEPERS];
|
||||
/* END_HADRON */
|
||||
} WalproposerShmemState;
|
||||
|
||||
@@ -487,11 +483,6 @@ typedef struct Safekeeper
|
||||
char const *host;
|
||||
char const *port;
|
||||
|
||||
/* BEGIN_HADRON */
|
||||
/* index of this safekeeper in the WalProposer array */
|
||||
uint32 index;
|
||||
/* END_HADRON */
|
||||
|
||||
/*
|
||||
* connection string for connecting/reconnecting.
|
||||
*
|
||||
@@ -740,23 +731,6 @@ typedef struct walproposer_api
|
||||
* handled by elog().
|
||||
*/
|
||||
void (*log_internal) (WalProposer *wp, int level, const char *line);
|
||||
|
||||
/*
|
||||
* BEGIN_HADRON
|
||||
* APIs manipulating shared memory state used for Safekeeper quorum health metrics.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Reset the safekeeper statuses in shared memory for metric purposes.
|
||||
*/
|
||||
void (*reset_safekeeper_statuses_for_metrics) (WalProposer *wp, uint32 num_safekeepers);
|
||||
|
||||
/*
|
||||
* Update the safekeeper status in shared memory for metric purposes.
|
||||
*/
|
||||
void (*update_safekeeper_status_for_metrics) (WalProposer *wp, uint32 sk_index, uint8 status);
|
||||
|
||||
/* END_HADRON */
|
||||
} walproposer_api;
|
||||
|
||||
/*
|
||||
|
||||
@@ -2261,27 +2261,6 @@ GetNeonCurrentClusterSize(void)
|
||||
}
|
||||
uint64 GetNeonCurrentClusterSize(void);
|
||||
|
||||
/* BEGIN_HADRON */
|
||||
static void
|
||||
walprop_pg_reset_safekeeper_statuses_for_metrics(WalProposer *wp, uint32 num_safekeepers)
|
||||
{
|
||||
WalproposerShmemState* shmem = wp->api.get_shmem_state(wp);
|
||||
SpinLockAcquire(&shmem->mutex);
|
||||
shmem->num_safekeepers = num_safekeepers;
|
||||
memset(shmem->safekeeper_status, 0, sizeof(shmem->safekeeper_status));
|
||||
SpinLockRelease(&shmem->mutex);
|
||||
}
|
||||
|
||||
static void
|
||||
walprop_pg_update_safekeeper_status_for_metrics(WalProposer *wp, uint32 sk_index, uint8 status)
|
||||
{
|
||||
WalproposerShmemState* shmem = wp->api.get_shmem_state(wp);
|
||||
Assert(sk_index < MAX_SAFEKEEPERS);
|
||||
SpinLockAcquire(&shmem->mutex);
|
||||
shmem->safekeeper_status[sk_index] = status;
|
||||
SpinLockRelease(&shmem->mutex);
|
||||
}
|
||||
/* END_HADRON */
|
||||
|
||||
static const walproposer_api walprop_pg = {
|
||||
.get_shmem_state = walprop_pg_get_shmem_state,
|
||||
@@ -2315,6 +2294,4 @@ static const walproposer_api walprop_pg = {
|
||||
.finish_sync_safekeepers = walprop_pg_finish_sync_safekeepers,
|
||||
.process_safekeeper_feedback = walprop_pg_process_safekeeper_feedback,
|
||||
.log_internal = walprop_pg_log_internal,
|
||||
.reset_safekeeper_statuses_for_metrics = walprop_pg_reset_safekeeper_statuses_for_metrics,
|
||||
.update_safekeeper_status_for_metrics = walprop_pg_update_safekeeper_status_for_metrics,
|
||||
};
|
||||
|
||||
@@ -233,6 +233,15 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
|
||||
log.info(f"`SELECT` query succeed after GC, {ctx=}")
|
||||
return offset
|
||||
|
||||
# It's not reliable to let the compute renew the lease in this test case as we have a very tight
|
||||
# lease timeout. Therefore, the test case itself will renew the lease.
|
||||
#
|
||||
# This is a workaround to make the test case more deterministic.
|
||||
def renew_lease(env: NeonEnv, lease_lsn: Lsn):
|
||||
env.storage_controller.pageserver_api().timeline_lsn_lease(
|
||||
env.initial_tenant, env.initial_timeline, lease_lsn
|
||||
)
|
||||
|
||||
# Insert some records on main branch
|
||||
with env.endpoints.create_start("main", config_lines=["shared_buffers=1MB"]) as ep_main:
|
||||
with ep_main.cursor() as cur:
|
||||
@@ -245,6 +254,9 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
|
||||
XLOG_BLCKSZ = 8192
|
||||
lsn = Lsn((int(lsn) // XLOG_BLCKSZ) * XLOG_BLCKSZ)
|
||||
|
||||
# We need to mock the way cplane works: it gets a lease for a branch before starting the compute.
|
||||
renew_lease(env, lsn)
|
||||
|
||||
with env.endpoints.create_start(
|
||||
branch_name="main",
|
||||
endpoint_id="static",
|
||||
@@ -265,6 +277,8 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
|
||||
ps.stop()
|
||||
ps.start()
|
||||
|
||||
renew_lease(env, lsn)
|
||||
|
||||
trigger_gc_and_select(
|
||||
env,
|
||||
ep_static,
|
||||
|
||||
@@ -2742,7 +2742,6 @@ def test_pull_timeline_partial_segment_integrity(neon_env_builder: NeonEnvBuilde
|
||||
wait_until(unevicted)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Lakebase mode")
|
||||
def test_timeline_disk_usage_limit(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Test that the timeline disk usage circuit breaker works as expected. We test that:
|
||||
@@ -2758,32 +2757,18 @@ def test_timeline_disk_usage_limit(neon_env_builder: NeonEnvBuilder):
|
||||
remote_storage_kind = s3_storage()
|
||||
neon_env_builder.enable_safekeeper_remote_storage(remote_storage_kind)
|
||||
|
||||
# Set a very small disk usage limit (1KB)
|
||||
neon_env_builder.safekeeper_extra_opts = ["--max-timeline-disk-usage-bytes=1024"]
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# Create a timeline and endpoint
|
||||
env.create_branch("test_timeline_disk_usage_limit")
|
||||
endpoint = env.endpoints.create_start("test_timeline_disk_usage_limit")
|
||||
|
||||
# Install the neon extension in the test database. We need it to query perf counter metrics.
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("CREATE EXTENSION IF NOT EXISTS neon")
|
||||
# Sanity-check safekeeper connection status in neon_perf_counters in the happy case.
|
||||
cur.execute(
|
||||
"SELECT value FROM neon_perf_counters WHERE metric = 'num_active_safekeepers'"
|
||||
)
|
||||
assert cur.fetchone() == (1,), "Expected 1 active safekeeper"
|
||||
cur.execute(
|
||||
"SELECT value FROM neon_perf_counters WHERE metric = 'num_configured_safekeepers'"
|
||||
)
|
||||
assert cur.fetchone() == (1,), "Expected 1 configured safekeeper"
|
||||
|
||||
# Get the safekeeper
|
||||
sk = env.safekeepers[0]
|
||||
|
||||
# Restart the safekeeper with a very small disk usage limit (1KB)
|
||||
sk.stop().start(["--max-timeline-disk-usage-bytes=1024"])
|
||||
|
||||
# Inject a failpoint to stop WAL backup
|
||||
with sk.http_client() as http_cli:
|
||||
http_cli.configure_failpoints([("backup-lsn-range-pausable", "pause")])
|
||||
@@ -2809,18 +2794,6 @@ def test_timeline_disk_usage_limit(neon_env_builder: NeonEnvBuilder):
|
||||
wait_until(error_logged)
|
||||
log.info("Found expected error message in compute log, resuming.")
|
||||
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
# Confirm that neon_perf_counters also indicates that there are no active safekeepers
|
||||
cur.execute(
|
||||
"SELECT value FROM neon_perf_counters WHERE metric = 'num_active_safekeepers'"
|
||||
)
|
||||
assert cur.fetchone() == (0,), "Expected 0 active safekeepers"
|
||||
cur.execute(
|
||||
"SELECT value FROM neon_perf_counters WHERE metric = 'num_configured_safekeepers'"
|
||||
)
|
||||
assert cur.fetchone() == (1,), "Expected 1 configured safekeeper"
|
||||
|
||||
# Sanity check that the hanging insert is indeed still hanging. Otherwise means the circuit breaker we
|
||||
# implemented didn't work as expected.
|
||||
time.sleep(2)
|
||||
|
||||
Reference in New Issue
Block a user