mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-14 08:52:56 +00:00
Always compile failpoints support, add runtime config option to enable it.
It's annoying that many of the tests required a special build with the "testing" feature. I think it's better to have a runtime check. It adds a few CPU instructions to where failpoints are defined, even if they are disabled, but that's a small price to pay for the convenience. Fixes issue 2531, although differently from what was discussed on that issue.
This commit is contained in:
@@ -11,6 +11,3 @@ opt-level = 3
|
||||
[profile.dev]
|
||||
# Turn on a small amount of optimization in Development mode.
|
||||
opt-level = 1
|
||||
|
||||
[alias]
|
||||
build_testing = ["build", "--features", "testing"]
|
||||
|
||||
13
.github/workflows/build_and_test.yml
vendored
13
.github/workflows/build_and_test.yml
vendored
@@ -100,11 +100,11 @@ jobs:
|
||||
run: |
|
||||
if [[ $BUILD_TYPE == "debug" ]]; then
|
||||
cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
|
||||
CARGO_FEATURES="--features testing"
|
||||
CARGO_FEATURES=""
|
||||
CARGO_FLAGS="--locked --timings $CARGO_FEATURES"
|
||||
elif [[ $BUILD_TYPE == "release" ]]; then
|
||||
cov_prefix=""
|
||||
CARGO_FEATURES="--features testing,profiling"
|
||||
CARGO_FEATURES="--features profiling"
|
||||
CARGO_FLAGS="--locked --timings --release $CARGO_FEATURES"
|
||||
fi
|
||||
echo "cov_prefix=${cov_prefix}" >> $GITHUB_ENV
|
||||
@@ -539,9 +539,9 @@ jobs:
|
||||
# `neondatabase/neon` contains multiple binaries, all of them use the same input for the version into the same version formatting library.
|
||||
# Pick pageserver as currently the only binary with extra "version" features printed in the string to verify.
|
||||
# Regular pageserver version string looks like
|
||||
# Neon page server git-env:32d14403bd6ab4f4520a94cbfd81a6acef7a526c failpoints: true, features: []
|
||||
# Neon page server git-env:32d14403bd6ab4f4520a94cbfd81a6acef7a526c features: []
|
||||
# Bad versions might loop like:
|
||||
# Neon page server git-env:local failpoints: true, features: ["testing"]
|
||||
# Neon page server git-env:local features: [""]
|
||||
# Ensure that we don't have bad versions.
|
||||
- name: Verify image versions
|
||||
shell: bash # ensure no set -e for better error messages
|
||||
@@ -555,11 +555,6 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! echo "$pageserver_version" | grep -qv '"testing"' ; then
|
||||
echo "Pageserver version should have no testing feature enabled"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Verify docker-compose example
|
||||
run: env REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com TAG=${{needs.tag.outputs.build-tag}} ./docker-compose/docker_compose_test.sh
|
||||
|
||||
|
||||
@@ -213,7 +213,7 @@ Ensure your dependencies are installed as described [here](https://github.com/ne
|
||||
```sh
|
||||
git clone --recursive https://github.com/neondatabase/neon.git
|
||||
|
||||
CARGO_BUILD_FLAGS="--features=testing" make
|
||||
make
|
||||
|
||||
./scripts/pytest
|
||||
```
|
||||
|
||||
@@ -5,10 +5,6 @@ edition = "2021"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
# Enables test-only APIs, incuding failpoints. In particular, enables the `fail_point!` macro,
|
||||
# which adds some runtime cost to run tests on outage conditions
|
||||
testing = ["fail/failpoints"]
|
||||
|
||||
profiling = ["pprof"]
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
//!
|
||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
use bytes::{BufMut, BytesMut};
|
||||
use fail::fail_point;
|
||||
use itertools::Itertools;
|
||||
use std::fmt::Write as FmtWrite;
|
||||
use std::io;
|
||||
@@ -22,6 +21,7 @@ use std::time::SystemTime;
|
||||
use tar::{Builder, EntryType, Header};
|
||||
use tracing::*;
|
||||
|
||||
use crate::fail_point;
|
||||
use crate::tenant::Timeline;
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
|
||||
|
||||
@@ -35,10 +35,6 @@ project_git_version!(GIT_VERSION);
|
||||
const PID_FILE_NAME: &str = "pageserver.pid";
|
||||
|
||||
const FEATURES: &[&str] = &[
|
||||
#[cfg(feature = "testing")]
|
||||
"testing",
|
||||
#[cfg(feature = "fail/failpoints")]
|
||||
"fail/failpoints",
|
||||
#[cfg(feature = "profiling")]
|
||||
"profiling",
|
||||
];
|
||||
@@ -178,6 +174,10 @@ fn initialize_config(
|
||||
let conf = PageServerConf::parse_and_validate(&toml, workdir)
|
||||
.context("Failed to parse pageserver configuration")?;
|
||||
|
||||
if pageserver::TESTING_MODE.set(conf.testing_mode).is_err() {
|
||||
anyhow::bail!("testing_mode was already initialized");
|
||||
}
|
||||
|
||||
if update_config {
|
||||
info!("Writing pageserver config to '{}'", cfg_file_path.display());
|
||||
|
||||
@@ -206,16 +206,22 @@ fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
|
||||
|
||||
// If any failpoints were set from FAILPOINTS environment variable,
|
||||
// print them to the log for debugging purposes
|
||||
let failpoints = fail::list();
|
||||
if !failpoints.is_empty() {
|
||||
info!(
|
||||
"started with failpoints: {}",
|
||||
failpoints
|
||||
.iter()
|
||||
.map(|(name, actions)| format!("{name}={actions}"))
|
||||
.collect::<Vec<String>>()
|
||||
.join(";")
|
||||
)
|
||||
if *pageserver::TESTING_MODE.get().unwrap() {
|
||||
let failpoints = fail::list();
|
||||
if !failpoints.is_empty() {
|
||||
info!(
|
||||
"started with testing mode enabled, failpoints: {}",
|
||||
failpoints
|
||||
.iter()
|
||||
.map(|(name, actions)| format!("{name}={actions}"))
|
||||
.collect::<Vec<String>>()
|
||||
.join(";")
|
||||
)
|
||||
} else {
|
||||
info!("started with testing mode enabled");
|
||||
}
|
||||
} else {
|
||||
info!("started with testing mode disabled");
|
||||
}
|
||||
|
||||
let lock_file_path = conf.workdir.join(PID_FILE_NAME);
|
||||
|
||||
@@ -53,6 +53,8 @@ pub mod defaults {
|
||||
pub const DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES: usize =
|
||||
super::ConfigurableSemaphore::DEFAULT_INITIAL.get();
|
||||
|
||||
pub const DEFAULT_TESTING_MODE: bool = false;
|
||||
|
||||
///
|
||||
/// Default built-in configuration file.
|
||||
///
|
||||
@@ -75,6 +77,8 @@ pub mod defaults {
|
||||
|
||||
#concurrent_tenant_size_logical_size_queries = '{DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES}'
|
||||
|
||||
testing_mode = false
|
||||
|
||||
# [tenant_config]
|
||||
#checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
|
||||
#checkpoint_timeout = {DEFAULT_CHECKPOINT_TIMEOUT}
|
||||
@@ -143,6 +147,9 @@ pub struct PageServerConf {
|
||||
|
||||
/// Number of concurrent [`Tenant::gather_size_inputs`] allowed.
|
||||
pub concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore,
|
||||
|
||||
/// Enables failpoint support and extra mgmt APIs useful for testing.
|
||||
pub testing_mode: bool,
|
||||
}
|
||||
|
||||
/// We do not want to store this in a PageServerConf because the latter may be logged
|
||||
@@ -222,6 +229,8 @@ struct PageServerConfigBuilder {
|
||||
log_format: BuilderValue<LogFormat>,
|
||||
|
||||
concurrent_tenant_size_logical_size_queries: BuilderValue<ConfigurableSemaphore>,
|
||||
|
||||
testing_mode: BuilderValue<bool>,
|
||||
}
|
||||
|
||||
impl Default for PageServerConfigBuilder {
|
||||
@@ -252,6 +261,8 @@ impl Default for PageServerConfigBuilder {
|
||||
log_format: Set(LogFormat::from_str(DEFAULT_LOG_FORMAT).unwrap()),
|
||||
|
||||
concurrent_tenant_size_logical_size_queries: Set(ConfigurableSemaphore::default()),
|
||||
|
||||
testing_mode: Set(DEFAULT_TESTING_MODE),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -332,6 +343,10 @@ impl PageServerConfigBuilder {
|
||||
self.concurrent_tenant_size_logical_size_queries = BuilderValue::Set(u);
|
||||
}
|
||||
|
||||
pub fn testing_mode(&mut self, testing_mode: bool) {
|
||||
self.testing_mode = BuilderValue::Set(testing_mode);
|
||||
}
|
||||
|
||||
pub fn build(self) -> anyhow::Result<PageServerConf> {
|
||||
Ok(PageServerConf {
|
||||
listen_pg_addr: self
|
||||
@@ -380,6 +395,7 @@ impl PageServerConfigBuilder {
|
||||
.ok_or(anyhow!(
|
||||
"missing concurrent_tenant_size_logical_size_queries"
|
||||
))?,
|
||||
testing_mode: self.testing_mode.ok_or(anyhow!("missing testing_mode"))?,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -560,6 +576,7 @@ impl PageServerConf {
|
||||
let permits = NonZeroUsize::new(permits).context("initial semaphore permits out of range: 0, use other configuration to disable a feature")?;
|
||||
ConfigurableSemaphore::new(permits)
|
||||
}),
|
||||
"testing_mode" => builder.testing_mode(parse_toml_bool(key, item)?),
|
||||
_ => bail!("unrecognized pageserver option '{key}'"),
|
||||
}
|
||||
}
|
||||
@@ -681,6 +698,7 @@ impl PageServerConf {
|
||||
broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
|
||||
log_format: LogFormat::from_str(defaults::DEFAULT_LOG_FORMAT).unwrap(),
|
||||
concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::default(),
|
||||
testing_mode: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -694,6 +712,12 @@ fn parse_toml_string(name: &str, item: &Item) -> Result<String> {
|
||||
Ok(s.to_string())
|
||||
}
|
||||
|
||||
fn parse_toml_bool(name: &str, item: &Item) -> Result<bool> {
|
||||
Ok(item
|
||||
.as_bool()
|
||||
.with_context(|| format!("configure option {name} is not a boolean"))?)
|
||||
}
|
||||
|
||||
fn parse_toml_u64(name: &str, item: &Item) -> Result<u64> {
|
||||
// A toml integer is signed, so it cannot represent the full range of an u64. That's OK
|
||||
// for our use, though.
|
||||
@@ -870,6 +894,7 @@ log_format = 'json'
|
||||
broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
|
||||
log_format: LogFormat::from_str(defaults::DEFAULT_LOG_FORMAT).unwrap(),
|
||||
concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::default(),
|
||||
testing_mode: defaults::DEFAULT_TESTING_MODE,
|
||||
},
|
||||
"Correct defaults should be used when no config values are provided"
|
||||
);
|
||||
@@ -916,6 +941,7 @@ log_format = 'json'
|
||||
broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
|
||||
log_format: LogFormat::Json,
|
||||
concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::default(),
|
||||
testing_mode: defaults::DEFAULT_TESTING_MODE,
|
||||
},
|
||||
"Should be able to parse all basic config values correctly"
|
||||
);
|
||||
|
||||
@@ -7,12 +7,14 @@ use remote_storage::GenericRemoteStorage;
|
||||
use tracing::*;
|
||||
|
||||
use super::models::{
|
||||
LocalTimelineInfo, RemoteTimelineInfo, StatusResponse, TenantConfigRequest,
|
||||
TenantCreateRequest, TenantCreateResponse, TenantInfo, TimelineCreateRequest, TimelineInfo,
|
||||
ConfigureFailpointsRequest, LocalTimelineInfo, RemoteTimelineInfo, StatusResponse,
|
||||
TenantConfigRequest, TenantCreateRequest, TenantCreateResponse, TenantInfo,
|
||||
TimelineCreateRequest, TimelineGcRequest, TimelineInfo,
|
||||
};
|
||||
use crate::pgdatadir_mapping::LsnForTimestamp;
|
||||
use crate::tenant::Timeline;
|
||||
use crate::tenant_config::TenantConfOpt;
|
||||
use crate::CheckpointConfig;
|
||||
use crate::{config::PageServerConf, tenant_mgr};
|
||||
use utils::{
|
||||
auth::JwtAuth,
|
||||
@@ -27,12 +29,6 @@ use utils::{
|
||||
lsn::Lsn,
|
||||
};
|
||||
|
||||
// Imports only used for testing APIs
|
||||
#[cfg(feature = "testing")]
|
||||
use super::models::{ConfigureFailpointsRequest, TimelineGcRequest};
|
||||
#[cfg(feature = "testing")]
|
||||
use crate::CheckpointConfig;
|
||||
|
||||
struct State {
|
||||
conf: &'static PageServerConf,
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
@@ -695,7 +691,6 @@ async fn tenant_config_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
async fn failpoints_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
if !fail::has_failpoints() {
|
||||
return Err(ApiError::BadRequest(anyhow!(
|
||||
@@ -729,7 +724,6 @@ async fn failpoints_handler(mut request: Request<Body>) -> Result<Response<Body>
|
||||
}
|
||||
|
||||
// Run GC immediately on given timeline.
|
||||
#[cfg(feature = "testing")]
|
||||
async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
@@ -748,7 +742,6 @@ async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body
|
||||
}
|
||||
|
||||
// Run compaction immediately on given timeline.
|
||||
#[cfg(feature = "testing")]
|
||||
async fn timeline_compact_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
@@ -769,7 +762,6 @@ async fn timeline_compact_handler(request: Request<Body>) -> Result<Response<Bod
|
||||
}
|
||||
|
||||
// Run checkpoint immediately on given timeline.
|
||||
#[cfg(feature = "testing")]
|
||||
async fn timeline_checkpoint_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
@@ -814,22 +806,26 @@ pub fn make_router(
|
||||
}))
|
||||
}
|
||||
|
||||
// A wrapper around a handler function that returns an error if the server
|
||||
// was not configured with testing_mode enabled. This is used to gate API
|
||||
// functions that should only be used in tests, never in production.
|
||||
macro_rules! testing_api {
|
||||
($handler_desc:literal, $handler:path $(,)?) => {{
|
||||
#[cfg(not(feature = "testing"))]
|
||||
async fn cfg_disabled(_req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
Err(ApiError::BadRequest(anyhow!(concat!(
|
||||
"Cannot ",
|
||||
$handler_desc,
|
||||
" because pageserver was compiled without testing APIs",
|
||||
))))
|
||||
use futures::FutureExt;
|
||||
|req: Request<Body>| {
|
||||
if conf.testing_mode {
|
||||
$handler(req).left_future()
|
||||
} else {
|
||||
async {
|
||||
Err(ApiError::BadRequest(anyhow!(concat!(
|
||||
"Cannot ",
|
||||
$handler_desc,
|
||||
" because pageserver was configured without testing APIs",
|
||||
))))
|
||||
}
|
||||
.right_future()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
let handler = $handler;
|
||||
#[cfg(not(feature = "testing"))]
|
||||
let handler = cfg_disabled;
|
||||
handler
|
||||
}};
|
||||
}
|
||||
|
||||
|
||||
@@ -148,6 +148,28 @@ pub fn is_uninit_mark(path: &Path) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Wrapper around fail::fail_point! macro that returns quickly if testing_mode was
|
||||
/// disabled in the pageserver config. Also enabled in unit tests.
|
||||
///
|
||||
/// fail::fail_point! is fairly quick, but it does acquire an RwLock and perform a HashMap
|
||||
/// lookup. This macro is hopefully cheap enough that we don't need to worry about the
|
||||
/// overhead even in production, and even if the macro is used in hot spots. (This check
|
||||
/// compiles to two cmp instructions; get_unchecked() would shrink it to one.)
|
||||
///
|
||||
#[macro_export]
|
||||
macro_rules! fail_point {
|
||||
($($name:expr),*) => {{
|
||||
if cfg!(test) || *crate::TESTING_MODE.get().expect("testing_mode not initialized") {
|
||||
fail::fail_point!($($name), *)
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
/// This is set early in the pageserver startup, from the "testing_mode" setting in the
|
||||
/// config file.
|
||||
pub static TESTING_MODE: once_cell::sync::OnceCell<bool> = once_cell::sync::OnceCell::new();
|
||||
|
||||
#[cfg(test)]
|
||||
mod backoff_defaults_tests {
|
||||
use super::*;
|
||||
|
||||
@@ -12,7 +12,7 @@ pub(super) async fn delete_layer<'a>(
|
||||
storage: &'a GenericRemoteStorage,
|
||||
local_layer_path: &'a Path,
|
||||
) -> anyhow::Result<()> {
|
||||
fail::fail_point!("before-delete-layer", |_| {
|
||||
crate::fail_point!("before-delete-layer", |_| {
|
||||
anyhow::bail!("failpoint before-delete-layer")
|
||||
});
|
||||
debug!("Deleting layer from remote storage: {local_layer_path:?}",);
|
||||
|
||||
@@ -97,7 +97,7 @@ pub async fn download_layer_file<'a>(
|
||||
})?;
|
||||
drop(destination_file);
|
||||
|
||||
fail::fail_point!("remote-storage-download-pre-rename", |_| {
|
||||
crate::fail_point!("remote-storage-download-pre-rename", |_| {
|
||||
bail!("remote-storage-download-pre-rename failpoint triggered")
|
||||
});
|
||||
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
//! Helper functions to upload files to remote storage with a RemoteStorage
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use fail::fail_point;
|
||||
use std::path::Path;
|
||||
use tokio::fs;
|
||||
|
||||
use super::index::IndexPart;
|
||||
use crate::config::PageServerConf;
|
||||
use crate::fail_point;
|
||||
use crate::storage_sync::LayerFileMetadata;
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
@@ -46,6 +46,7 @@ use std::time::{Duration, Instant};
|
||||
|
||||
use self::metadata::TimelineMetadata;
|
||||
use crate::config::PageServerConf;
|
||||
use crate::fail_point;
|
||||
use crate::import_datadir;
|
||||
use crate::is_uninit_mark;
|
||||
use crate::metrics::{remove_tenant_metrics, STORAGE_TIME};
|
||||
@@ -255,7 +256,7 @@ impl UninitializedTimeline<'_> {
|
||||
// Thus spawning flush loop manually and skipping flush_loop setup in initialize_with_lock
|
||||
raw_timeline.maybe_spawn_flush_loop();
|
||||
|
||||
fail::fail_point!("before-checkpoint-new-timeline", |_| {
|
||||
fail_point!("before-checkpoint-new-timeline", |_| {
|
||||
bail!("failpoint before-checkpoint-new-timeline");
|
||||
});
|
||||
|
||||
@@ -2098,7 +2099,7 @@ impl Tenant {
|
||||
// Thus spawn flush loop manually and skip flush_loop setup in initialize_with_lock
|
||||
unfinished_timeline.maybe_spawn_flush_loop();
|
||||
|
||||
fail::fail_point!("before-checkpoint-new-timeline", |_| {
|
||||
fail_point!("before-checkpoint-new-timeline", |_| {
|
||||
anyhow::bail!("failpoint before-checkpoint-new-timeline");
|
||||
});
|
||||
|
||||
@@ -2192,7 +2193,7 @@ impl Tenant {
|
||||
.context("Failed to create timeline data structure")?;
|
||||
crashsafe::create_dir_all(timeline_path).context("Failed to create timeline directory")?;
|
||||
|
||||
fail::fail_point!("after-timeline-uninit-mark-creation", |_| {
|
||||
fail_point!("after-timeline-uninit-mark-creation", |_| {
|
||||
anyhow::bail!("failpoint after-timeline-uninit-mark-creation");
|
||||
});
|
||||
|
||||
@@ -2381,7 +2382,7 @@ fn try_create_target_tenant_dir(
|
||||
temporary_tenant_timelines_dir.display()
|
||||
)
|
||||
})?;
|
||||
fail::fail_point!("tenant-creation-before-tmp-rename", |_| {
|
||||
fail_point!("tenant-creation-before-tmp-rename", |_| {
|
||||
anyhow::bail!("failpoint tenant-creation-before-tmp-rename");
|
||||
});
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
|
||||
use anyhow::{anyhow, bail, ensure, Context};
|
||||
use bytes::Bytes;
|
||||
use fail::fail_point;
|
||||
use itertools::Itertools;
|
||||
use once_cell::sync::OnceCell;
|
||||
use pageserver_api::models::TimelineState;
|
||||
@@ -19,6 +18,7 @@ use std::sync::atomic::{AtomicBool, AtomicI64, Ordering as AtomicOrdering};
|
||||
use std::sync::{Arc, Mutex, MutexGuard, RwLock};
|
||||
use std::time::{Duration, Instant, SystemTime};
|
||||
|
||||
use crate::fail_point;
|
||||
use crate::storage_sync::index::IndexPart;
|
||||
use crate::storage_sync::RemoteTimelineClient;
|
||||
use crate::tenant::{
|
||||
|
||||
@@ -12,16 +12,19 @@ use once_cell::sync::Lazy;
|
||||
use tokio::sync::RwLock;
|
||||
use tracing::*;
|
||||
|
||||
use pageserver_api::models::TimelineGcRequest;
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use utils::crashsafe;
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use crate::repository::GcResult;
|
||||
use crate::task_mgr::{self, TaskKind};
|
||||
use crate::tenant::{Tenant, TenantState};
|
||||
use crate::tenant_config::TenantConfOpt;
|
||||
use crate::IGNORED_TENANT_FILE_NAME;
|
||||
|
||||
use utils::fs_ext::PathExt;
|
||||
use utils::http::error::ApiError;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
static TENANTS: Lazy<RwLock<HashMap<TenantId, Arc<Tenant>>>> =
|
||||
@@ -460,13 +463,6 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
use {
|
||||
crate::repository::GcResult, pageserver_api::models::TimelineGcRequest,
|
||||
utils::http::error::ApiError,
|
||||
};
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
pub async fn immediate_gc(
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
@@ -494,7 +490,7 @@ pub async fn immediate_gc(
|
||||
&format!("timeline_gc_handler garbage collection run for tenant {tenant_id} timeline {timeline_id}"),
|
||||
false,
|
||||
async move {
|
||||
fail::fail_point!("immediate_gc_task_pre");
|
||||
crate::fail_point!("immediate_gc_task_pre");
|
||||
let result = tenant
|
||||
.gc_iteration(Some(timeline_id), gc_horizon, pitr, true)
|
||||
.instrument(info_span!("manual_gc", tenant = %tenant_id, timeline = %timeline_id))
|
||||
|
||||
@@ -9,7 +9,6 @@ use std::{
|
||||
use anyhow::{bail, ensure, Context};
|
||||
use bytes::BytesMut;
|
||||
use chrono::{NaiveDateTime, Utc};
|
||||
use fail::fail_point;
|
||||
use futures::StreamExt;
|
||||
use postgres::{SimpleQueryMessage, SimpleQueryRow};
|
||||
use postgres_ffi::v14::xlog_utils::normalize_lsn;
|
||||
@@ -20,6 +19,7 @@ use tokio::{pin, select, sync::watch, time};
|
||||
use tokio_postgres::{replication::ReplicationStream, Client};
|
||||
use tracing::{debug, error, info, trace, warn};
|
||||
|
||||
use crate::fail_point;
|
||||
use crate::{metrics::LIVE_CONNECTIONS_COUNT, walreceiver::TaskStateUpdate};
|
||||
use crate::{
|
||||
task_mgr,
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
# avoid running regular linting script that checks every feature.
|
||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
# no extra features to test currently, add more here when needed
|
||||
cargo clippy --locked --all --all-targets --features testing -- -A unknown_lints -D warnings
|
||||
cargo clippy --locked --all --all-targets -- -A unknown_lints -D warnings
|
||||
else
|
||||
# * `-A unknown_lints` – do not warn about unknown lint suppressions
|
||||
# that people with newer toolchains might use
|
||||
|
||||
@@ -6,9 +6,6 @@ Prerequisites:
|
||||
- Correctly configured Python, see [`/docs/sourcetree.md`](/docs/sourcetree.md#using-python)
|
||||
- Neon and Postgres binaries
|
||||
- See the root [README.md](/README.md) for build directions
|
||||
If you want to test tests with test-only APIs, you would need to add `--features testing` to Rust code build commands.
|
||||
For convenience, repository cargo config contains `build_testing` alias, that serves as a subcommand, adding the required feature flags.
|
||||
Usage example: `cargo build_testing --release` is equivalent to `cargo build --features testing --release`
|
||||
- Tests can be run from the git tree; or see the environment variables
|
||||
below to run from other directories.
|
||||
- The neon git repo, including the postgres submodule
|
||||
|
||||
@@ -587,6 +587,7 @@ class NeonEnvBuilder:
|
||||
auth_enabled: bool = False,
|
||||
rust_log_override: Optional[str] = None,
|
||||
default_branch_name: str = DEFAULT_BRANCH_NAME,
|
||||
testing_mode: bool = True,
|
||||
):
|
||||
self.repo_dir = repo_dir
|
||||
self.rust_log_override = rust_log_override
|
||||
@@ -608,6 +609,7 @@ class NeonEnvBuilder:
|
||||
self.neon_binpath = neon_binpath
|
||||
self.pg_distrib_dir = pg_distrib_dir
|
||||
self.pg_version = pg_version
|
||||
self.testing_mode = testing_mode
|
||||
|
||||
def init(self) -> NeonEnv:
|
||||
# Cannot create more than one environment from one builder
|
||||
@@ -858,6 +860,7 @@ class NeonEnv:
|
||||
http=self.port_distributor.get_port(),
|
||||
)
|
||||
pageserver_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
|
||||
pageserver_testing_mode = "true" if config.testing_mode else "false"
|
||||
|
||||
toml += textwrap.dedent(
|
||||
f"""
|
||||
@@ -866,6 +869,7 @@ class NeonEnv:
|
||||
listen_pg_addr = 'localhost:{pageserver_port.pg}'
|
||||
listen_http_addr = 'localhost:{pageserver_port.http}'
|
||||
auth_type = '{pageserver_auth_type}'
|
||||
testing_mode = {pageserver_testing_mode}
|
||||
"""
|
||||
)
|
||||
|
||||
@@ -978,6 +982,10 @@ def _shared_simple_env(
|
||||
pg_distrib_dir=pg_distrib_dir,
|
||||
pg_version=pg_version,
|
||||
run_id=run_id,
|
||||
# Disable failpoint support. Failpoints could have unexpected consequences
|
||||
# when the pageserver is shared by concurrent tests. Also, it might affect
|
||||
# performance, and we use the shared simple env in performance tests.
|
||||
testing_mode=False,
|
||||
) as builder:
|
||||
env = builder.init_start()
|
||||
|
||||
@@ -1048,11 +1056,10 @@ class PageserverApiException(Exception):
|
||||
|
||||
|
||||
class PageserverHttpClient(requests.Session):
|
||||
def __init__(self, port: int, is_testing_enabled_or_skip: Fn, auth_token: Optional[str] = None):
|
||||
def __init__(self, port: int, auth_token: Optional[str] = None):
|
||||
super().__init__()
|
||||
self.port = port
|
||||
self.auth_token = auth_token
|
||||
self.is_testing_enabled_or_skip = is_testing_enabled_or_skip
|
||||
|
||||
if auth_token is not None:
|
||||
self.headers["Authorization"] = f"Bearer {auth_token}"
|
||||
@@ -1071,8 +1078,6 @@ class PageserverHttpClient(requests.Session):
|
||||
self.get(f"http://localhost:{self.port}/v1/status").raise_for_status()
|
||||
|
||||
def configure_failpoints(self, config_strings: Tuple[str, str] | List[Tuple[str, str]]):
|
||||
self.is_testing_enabled_or_skip()
|
||||
|
||||
if isinstance(config_strings, tuple):
|
||||
pairs = [config_strings]
|
||||
else:
|
||||
@@ -1212,8 +1217,6 @@ class PageserverHttpClient(requests.Session):
|
||||
def timeline_gc(
|
||||
self, tenant_id: TenantId, timeline_id: TimelineId, gc_horizon: Optional[int]
|
||||
) -> dict[str, Any]:
|
||||
self.is_testing_enabled_or_skip()
|
||||
|
||||
log.info(
|
||||
f"Requesting GC: tenant {tenant_id}, timeline {timeline_id}, gc_horizon {repr(gc_horizon)}"
|
||||
)
|
||||
@@ -1229,8 +1232,6 @@ class PageserverHttpClient(requests.Session):
|
||||
return res_json
|
||||
|
||||
def timeline_compact(self, tenant_id: TenantId, timeline_id: TimelineId):
|
||||
self.is_testing_enabled_or_skip()
|
||||
|
||||
log.info(f"Requesting compact: tenant {tenant_id}, timeline {timeline_id}")
|
||||
res = self.put(
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/compact"
|
||||
@@ -1254,8 +1255,6 @@ class PageserverHttpClient(requests.Session):
|
||||
return res_json
|
||||
|
||||
def timeline_checkpoint(self, tenant_id: TenantId, timeline_id: TimelineId):
|
||||
self.is_testing_enabled_or_skip()
|
||||
|
||||
log.info(f"Requesting checkpoint: tenant {tenant_id}, timeline {timeline_id}")
|
||||
res = self.put(
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/checkpoint"
|
||||
@@ -1815,10 +1814,6 @@ class NeonPageserver(PgProtocol):
|
||||
):
|
||||
self.stop(immediate=True)
|
||||
|
||||
def is_testing_enabled_or_skip(self):
|
||||
if '"testing"' not in self.version:
|
||||
pytest.skip("pageserver was built without 'testing' feature")
|
||||
|
||||
def is_profiling_enabled_or_skip(self):
|
||||
if '"profiling"' not in self.version:
|
||||
pytest.skip("pageserver was built without 'profiling' feature")
|
||||
@@ -1827,7 +1822,6 @@ class NeonPageserver(PgProtocol):
|
||||
return PageserverHttpClient(
|
||||
port=self.service_port.http,
|
||||
auth_token=auth_token,
|
||||
is_testing_enabled_or_skip=self.is_testing_enabled_or_skip,
|
||||
)
|
||||
|
||||
def assert_no_errors(self):
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
First make a release build. The profiling flag is optional, used only for tests that
|
||||
generate flame graphs. The `-s` flag just silences a lot of output, and makes it
|
||||
easier to see if you have compile errors without scrolling up.
|
||||
`BUILD_TYPE=release CARGO_BUILD_FLAGS="--features=testing,profiling" make -s -j8`
|
||||
`BUILD_TYPE=release CARGO_BUILD_FLAGS="--features=profiling" make -s -j8`
|
||||
|
||||
NOTE: the `profiling` flag only works on linux because we use linux-specific
|
||||
libc APIs like `libc::timer_t`.
|
||||
|
||||
@@ -327,7 +327,6 @@ def check_neon_works(
|
||||
auth_token = snapshot_config["pageserver"]["auth_token"]
|
||||
pageserver_http = PageserverHttpClient(
|
||||
port=pageserver_port,
|
||||
is_testing_enabled_or_skip=lambda: True, # TODO: check if testing really enabled
|
||||
auth_token=auth_token,
|
||||
)
|
||||
|
||||
|
||||
@@ -13,7 +13,6 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance = 1048576}"
|
||||
|
||||
env = neon_env_builder.init()
|
||||
env.pageserver.is_testing_enabled_or_skip()
|
||||
|
||||
neon_env_builder.start()
|
||||
|
||||
|
||||
@@ -58,7 +58,6 @@ def new_pageserver_service(
|
||||
pageserver_client = PageserverHttpClient(
|
||||
port=http_port,
|
||||
auth_token=None,
|
||||
is_testing_enabled_or_skip=lambda: True, # TODO: check if testing really enabled
|
||||
)
|
||||
try:
|
||||
pageserver_process = start_in_background(
|
||||
@@ -360,7 +359,6 @@ def test_tenant_relocation(
|
||||
new_pageserver_http = PageserverHttpClient(
|
||||
port=new_pageserver_http_port,
|
||||
auth_token=None,
|
||||
is_testing_enabled_or_skip=env.pageserver.is_testing_enabled_or_skip,
|
||||
)
|
||||
|
||||
with new_pageserver_service(
|
||||
|
||||
Reference in New Issue
Block a user