diff --git a/.cargo/config.toml b/.cargo/config.toml index d70d57a817..c40783bc1b 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -13,4 +13,4 @@ opt-level = 3 opt-level = 1 [alias] -build_testing = ["build", "--features", "failpoints"] +build_testing = ["build", "--features", "testing"] diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 0b6cb21120..44db968753 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -100,11 +100,11 @@ jobs: run: | if [[ $BUILD_TYPE == "debug" ]]; then cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run" - CARGO_FEATURES="--features failpoints" + CARGO_FEATURES="--features testing" CARGO_FLAGS="--locked --timings $CARGO_FEATURES" elif [[ $BUILD_TYPE == "release" ]]; then cov_prefix="" - CARGO_FEATURES="--features failpoints,profiling" + CARGO_FEATURES="--features testing,profiling" CARGO_FLAGS="--locked --timings --release $CARGO_FEATURES" fi echo "cov_prefix=${cov_prefix}" >> $GITHUB_ENV diff --git a/README.md b/README.md index 03ed57a0fa..dc469c36b1 100644 --- a/README.md +++ b/README.md @@ -222,7 +222,12 @@ Ensure your dependencies are installed as described [here](https://github.com/ne ```sh git clone --recursive https://github.com/neondatabase/neon.git -make # builds also postgres and installs it to ./pg_install + +# either: +CARGO_BUILD_FLAGS="--features=testing" make +# or: +make debug + ./scripts/pytest ``` diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index ce09e788bd..85ece97d9b 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -5,10 +5,10 @@ edition = "2021" [features] default = [] +# Enables test-only APIs, incuding failpoints. In particular, enables the `fail_point!` macro, +# which adds some runtime cost to run tests on outage conditions +testing = ["fail/failpoints"] -# Feature that enables a special API, fail_point! macro (adds some runtime cost) -# to run tests on outage conditions -failpoints = ["fail/failpoints"] profiling = ["pprof"] [dependencies] diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index 92d5eab379..fb79ad3945 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -87,8 +87,8 @@ fn main() -> anyhow::Result<()> { if arg_matches.is_present("enabled-features") { let features: &[&str] = &[ - #[cfg(feature = "failpoints")] - "failpoints", + #[cfg(feature = "testing")] + "testing", #[cfg(feature = "profiling")] "profiling", ]; diff --git a/pageserver/src/http/models.rs b/pageserver/src/http/models.rs index c0dc5b9677..2d7d560d2a 100644 --- a/pageserver/src/http/models.rs +++ b/pageserver/src/http/models.rs @@ -160,3 +160,21 @@ pub struct TimelineInfo { pub local: Option, pub remote: Option, } + +pub type ConfigureFailpointsRequest = Vec; + +/// Information for configuring a single fail point +#[derive(Debug, Serialize, Deserialize)] +pub struct FailpointConfig { + /// Name of the fail point + pub name: String, + /// List of actions to take, using the format described in `fail::cfg` + /// + /// We also support `actions = "exit"` to cause the fail point to immediately exit. + pub actions: String, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct TimelineGcRequest { + pub gc_horizon: Option, +} diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 2e49429f38..bfc9e4462b 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -29,6 +29,12 @@ use utils::{ lsn::Lsn, }; +// Imports only used for testing APIs +#[cfg(feature = "testing")] +use super::models::{ConfigureFailpointsRequest, TimelineGcRequest}; +#[cfg(feature = "testing")] +use crate::CheckpointConfig; + struct State { conf: &'static PageServerConf, auth: Option>, @@ -661,6 +667,103 @@ async fn tenant_config_handler(mut request: Request) -> Result) -> Result, ApiError> { + if !fail::has_failpoints() { + return Err(ApiError::BadRequest( + "Cannot manage failpoints because pageserver was compiled without failpoints support" + .to_owned(), + )); + } + + let failpoints: ConfigureFailpointsRequest = json_request(&mut request).await?; + for fp in failpoints { + info!("cfg failpoint: {} {}", fp.name, fp.actions); + + // We recognize one extra "action" that's not natively recognized + // by the failpoints crate: exit, to immediately kill the process + let cfg_result = if fp.actions == "exit" { + fail::cfg_callback(fp.name, || { + info!("Exit requested by failpoint"); + std::process::exit(1); + }) + } else { + fail::cfg(fp.name, &fp.actions) + }; + + if let Err(err_msg) = cfg_result { + return Err(ApiError::BadRequest(format!( + "Failed to configure failpoints: {err_msg}" + ))); + } + } + + json_response(StatusCode::OK, ()) +} + +// Run GC immediately on given timeline. +// FIXME: This is just for tests. See test_runner/regress/test_gc.py. +// This probably should require special authentication or a global flag to +// enable, I don't think we want to or need to allow regular clients to invoke +// GC. +// @hllinnaka in commits ec44f4b29, 3aca717f3 +#[cfg(feature = "testing")] +async fn timeline_gc_handler(mut request: Request) -> Result, ApiError> { + let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?; + let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; + check_permission(&request, Some(tenant_id))?; + + // FIXME: currently this will return a 500 error on bad tenant id; it should be 4XX + let repo = tenant_mgr::get_tenant(tenant_id, false)?; + let gc_req: TimelineGcRequest = json_request(&mut request).await?; + + let _span_guard = + info_span!("manual_gc", tenant = %tenant_id, timeline = %timeline_id).entered(); + let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| repo.get_gc_horizon()); + + // Use tenant's pitr setting + let pitr = repo.get_pitr_interval(); + let result = repo.gc_iteration(Some(timeline_id), gc_horizon, pitr, true)?; + json_response(StatusCode::OK, result) +} + +// Run compaction immediately on given timeline. +// FIXME This is just for tests. Don't expect this to be exposed to +// the users or the api. +// @dhammika in commit a0781f229 +#[cfg(feature = "testing")] +async fn timeline_compact_handler(request: Request) -> Result, ApiError> { + let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?; + let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; + check_permission(&request, Some(tenant_id))?; + + let repo = tenant_mgr::get_tenant(tenant_id, true)?; + // FIXME: currently this will return a 500 error on bad timeline id; it should be 4XX + let timeline = repo.get_timeline(timeline_id).with_context(|| { + format!("No timeline {timeline_id} in repository for tenant {tenant_id}") + })?; + timeline.compact()?; + + json_response(StatusCode::OK, ()) +} + +// Run checkpoint immediately on given timeline. +#[cfg(feature = "testing")] +async fn timeline_checkpoint_handler(request: Request) -> Result, ApiError> { + let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?; + let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; + check_permission(&request, Some(tenant_id))?; + + let repo = tenant_mgr::get_tenant(tenant_id, true)?; + // FIXME: currently this will return a 500 error on bad timeline id; it should be 4XX + let timeline = repo.get_timeline(timeline_id).with_context(|| { + format!("No timeline {timeline_id} in repository for tenant {tenant_id}") + })?; + timeline.checkpoint(CheckpointConfig::Forced)?; + + json_response(StatusCode::OK, ()) +} + async fn handler_404(_: Request) -> Result, ApiError> { json_response( StatusCode::NOT_FOUND, @@ -687,12 +790,38 @@ pub fn make_router( })) } + macro_rules! testing_api { + ($handler_desc:literal, $handler:path $(,)?) => {{ + #[cfg(not(feature = "testing"))] + async fn cfg_disabled(_req: Request) -> Result, ApiError> { + Err(ApiError::BadRequest( + concat!( + "Cannot ", + $handler_desc, + " because pageserver was compiled without testing APIs", + ) + .to_owned(), + )) + } + + #[cfg(feature = "testing")] + let handler = $handler; + #[cfg(not(feature = "testing"))] + let handler = cfg_disabled; + handler + }}; + } + Ok(router .data(Arc::new( State::new(conf, auth, remote_index, remote_storage) .context("Failed to initialize router state")?, )) .get("/v1/status", status_handler) + .put( + "/v1/failpoints", + testing_api!("manage failpoints", failpoints_handler), + ) .get("/v1/tenant", tenant_list_handler) .post("/v1/tenant", tenant_create_handler) .get("/v1/tenant/:tenant_id", tenant_status) @@ -705,6 +834,18 @@ pub fn make_router( "/v1/tenant/:tenant_id/timeline/:timeline_id", timeline_detail_handler, ) + .put( + "/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc", + testing_api!("run timeline GC", timeline_gc_handler), + ) + .put( + "/v1/tenant/:tenant_id/timeline/:timeline_id/compact", + testing_api!("run timeline compaction", timeline_compact_handler), + ) + .put( + "/v1/tenant/:tenant_id/timeline/:timeline_id/checkpoint", + testing_api!("run timeline checkpoint", timeline_checkpoint_handler), + ) .delete( "/v1/tenant/:tenant_id/timeline/:timeline_id", timeline_delete_handler, diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index 1461a6d117..9e159f7391 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -27,7 +27,7 @@ use utils::{ lsn::Lsn, postgres_backend::AuthType, postgres_backend_async::{self, PostgresBackend}, - pq_proto::{BeMessage, FeMessage, RowDescriptor, SINGLE_COL_ROWDESC}, + pq_proto::{BeMessage, FeMessage, RowDescriptor}, simple_rcu::RcuReadGuard, }; @@ -1005,31 +1005,6 @@ impl postgres_backend_async::Handler for PageServerHandler { // important because psycopg2 executes "SET datestyle TO 'ISO'" // on connect pgb.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?; - } else if query_string.starts_with("failpoints ") { - ensure!(fail::has_failpoints(), "Cannot manage failpoints because pageserver was compiled without failpoints support"); - - let (_, failpoints) = query_string.split_at("failpoints ".len()); - - for failpoint in failpoints.split(';') { - if let Some((name, actions)) = failpoint.split_once('=') { - info!("cfg failpoint: {} {}", name, actions); - - // We recognize one extra "action" that's not natively recognized - // by the failpoints crate: exit, to immediately kill the process - if actions == "exit" { - fail::cfg_callback(name, || { - info!("Exit requested by failpoint"); - std::process::exit(1); - }) - .unwrap(); - } else { - fail::cfg(name, actions).unwrap(); - } - } else { - bail!("Invalid failpoints format"); - } - } - pgb.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?; } else if query_string.starts_with("show ") { // show let (_, params_raw) = query_string.split_at("show ".len()); @@ -1072,94 +1047,6 @@ impl postgres_backend_async::Handler for PageServerHandler { Some(tenant.get_pitr_interval().as_secs().to_string().as_bytes()), ]))? .write_message(&BeMessage::CommandComplete(b"SELECT 1"))?; - } else if query_string.starts_with("do_gc ") { - // Run GC immediately on given timeline. - // FIXME: This is just for tests. See test_runner/regress/test_gc.py. - // This probably should require special authentication or a global flag to - // enable, I don't think we want to or need to allow regular clients to invoke - // GC. - - // do_gc - let re = Regex::new(r"^do_gc ([[:xdigit:]]+)\s([[:xdigit:]]+)($|\s)([[:digit:]]+)?") - .unwrap(); - - let caps = re - .captures(query_string) - .with_context(|| format!("invalid do_gc: '{}'", query_string))?; - - let tenant_id = TenantId::from_str(caps.get(1).unwrap().as_str())?; - let timeline_id = TimelineId::from_str(caps.get(2).unwrap().as_str())?; - - let _span_guard = - info_span!("manual_gc", tenant = %tenant_id, timeline = %timeline_id).entered(); - - let tenant = tenant_mgr::get_tenant(tenant_id, true)?; - - let gc_horizon: u64 = caps - .get(4) - .map(|h| h.as_str().parse()) - .unwrap_or_else(|| Ok(tenant.get_gc_horizon()))?; - - // Use tenant's pitr setting - let pitr = tenant.get_pitr_interval(); - let result = tenant.gc_iteration(Some(timeline_id), gc_horizon, pitr, true)?; - pgb.write_message(&BeMessage::RowDescription(&[ - RowDescriptor::int8_col(b"layers_total"), - RowDescriptor::int8_col(b"layers_needed_by_cutoff"), - RowDescriptor::int8_col(b"layers_needed_by_pitr"), - RowDescriptor::int8_col(b"layers_needed_by_branches"), - RowDescriptor::int8_col(b"layers_not_updated"), - RowDescriptor::int8_col(b"layers_removed"), - RowDescriptor::int8_col(b"elapsed"), - ]))? - .write_message(&BeMessage::DataRow(&[ - Some(result.layers_total.to_string().as_bytes()), - Some(result.layers_needed_by_cutoff.to_string().as_bytes()), - Some(result.layers_needed_by_pitr.to_string().as_bytes()), - Some(result.layers_needed_by_branches.to_string().as_bytes()), - Some(result.layers_not_updated.to_string().as_bytes()), - Some(result.layers_removed.to_string().as_bytes()), - Some(result.elapsed.as_millis().to_string().as_bytes()), - ]))? - .write_message(&BeMessage::CommandComplete(b"SELECT 1"))?; - } else if query_string.starts_with("compact ") { - // Run compaction immediately on given timeline. - // FIXME This is just for tests. Don't expect this to be exposed to - // the users or the api. - - // compact - let re = Regex::new(r"^compact ([[:xdigit:]]+)\s([[:xdigit:]]+)($|\s)?").unwrap(); - - let caps = re - .captures(query_string) - .with_context(|| format!("Invalid compact: '{}'", query_string))?; - - let tenant_id = TenantId::from_str(caps.get(1).unwrap().as_str())?; - let timeline_id = TimelineId::from_str(caps.get(2).unwrap().as_str())?; - let timeline = get_local_timeline(tenant_id, timeline_id)?; - timeline.compact()?; - - pgb.write_message(&SINGLE_COL_ROWDESC)? - .write_message(&BeMessage::CommandComplete(b"SELECT 1"))?; - } else if query_string.starts_with("checkpoint ") { - // Run checkpoint immediately on given timeline. - - // checkpoint - let re = Regex::new(r"^checkpoint ([[:xdigit:]]+)\s([[:xdigit:]]+)($|\s)?").unwrap(); - - let caps = re - .captures(query_string) - .with_context(|| format!("invalid checkpoint command: '{}'", query_string))?; - - let tenant_id = TenantId::from_str(caps.get(1).unwrap().as_str())?; - let timeline_id = TimelineId::from_str(caps.get(2).unwrap().as_str())?; - let timeline = get_local_timeline(tenant_id, timeline_id)?; - - // Checkpoint the timeline and also compact it (due to `CheckpointConfig::Forced`). - timeline.checkpoint(CheckpointConfig::Forced)?; - - pgb.write_message(&SINGLE_COL_ROWDESC)? - .write_message(&BeMessage::CommandComplete(b"SELECT 1"))?; } else if query_string.starts_with("get_lsn_by_timestamp ") { // Locate LSN of last transaction with timestamp less or equal than sppecified // TODO lazy static diff --git a/pageserver/src/repository.rs b/pageserver/src/repository.rs index f6ea9d8c5d..cfcc87a2ed 100644 --- a/pageserver/src/repository.rs +++ b/pageserver/src/repository.rs @@ -176,7 +176,7 @@ impl Value { /// /// Result of performing GC /// -#[derive(Default)] +#[derive(Default, Serialize)] pub struct GcResult { pub layers_total: u64, pub layers_needed_by_cutoff: u64, @@ -185,9 +185,18 @@ pub struct GcResult { pub layers_not_updated: u64, pub layers_removed: u64, // # of layer files removed because they have been made obsolete by newer ondisk files. + #[serde(serialize_with = "serialize_duration_as_millis")] pub elapsed: Duration, } +// helper function for `GcResult`, serializing a `Duration` as an integer number of milliseconds +fn serialize_duration_as_millis(d: &Duration, serializer: S) -> Result +where + S: serde::Serializer, +{ + d.as_millis().serialize(serializer) +} + impl AddAssign for GcResult { fn add_assign(&mut self, other: Self) { self.layers_total += other.layers_total; diff --git a/test_runner/README.md b/test_runner/README.md index f17a4a5a5d..79b2418af6 100644 --- a/test_runner/README.md +++ b/test_runner/README.md @@ -6,9 +6,9 @@ Prerequisites: - Correctly configured Python, see [`/docs/sourcetree.md`](/docs/sourcetree.md#using-python) - Neon and Postgres binaries - See the root [README.md](/README.md) for build directions - If you want to test tests with failpoints, you would need to add `--features failpoints` to Rust code build commands. + If you want to test tests with test-only APIs, you would need to add `--features testing` to Rust code build commands. For convenience, repository cargo config contains `build_testing` alias, that serves as a subcommand, adding the required feature flags. - Usage example: `cargo build_testing --release` is equivalent to `cargo build --features failpoints --release` + Usage example: `cargo build_testing --release` is equivalent to `cargo build --features testing --release` - Tests can be run from the git tree; or see the environment variables below to run from other directories. - The neon git repo, including the postgres submodule diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 0c03429f95..1e83ee3839 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -964,6 +964,24 @@ class NeonPageserverHttpClient(requests.Session): def check_status(self): self.get(f"http://localhost:{self.port}/v1/status").raise_for_status() + def configure_failpoints(self, config_strings: tuple[str, str] | list[tuple[str, str]]) -> None: + if isinstance(config_strings, tuple): + pairs = [config_strings] + else: + pairs = config_strings + + log.info(f"Requesting config failpoints: {repr(pairs)}") + + res = self.put( + f"http://localhost:{self.port}/v1/failpoints", + json=[{"name": name, "actions": actions} for name, actions in pairs], + ) + log.info(f"Got failpoints request response code {res.status_code}") + self.verbose_error(res) + res_json = res.json() + assert res_json is None + return res_json + def tenant_list(self) -> List[Dict[Any, Any]]: res = self.get(f"http://localhost:{self.port}/v1/tenant") self.verbose_error(res) @@ -1061,6 +1079,45 @@ class NeonPageserverHttpClient(requests.Session): assert res_json is None return res_json + def timeline_gc( + self, tenant_id: TenantId, timeline_id: TimelineId, gc_horizon: Optional[int] + ) -> dict[str, Any]: + log.info( + f"Requesting GC: tenant {tenant_id}, timeline {timeline_id}, gc_horizon {repr(gc_horizon)}" + ) + res = self.put( + f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/do_gc", + json={"gc_horizon": gc_horizon}, + ) + log.info(f"Got GC request response code: {res.status_code}") + self.verbose_error(res) + res_json = res.json() + assert res_json is not None + assert isinstance(res_json, dict) + return res_json + + def timeline_compact(self, tenant_id: TenantId, timeline_id: TimelineId): + log.info(f"Requesting compact: tenant {tenant_id}, timeline {timeline_id}") + res = self.put( + f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/compact" + ) + log.info(f"Got compact request response code: {res.status_code}") + self.verbose_error(res) + res_json = res.json() + assert res_json is None + return res_json + + def timeline_checkpoint(self, tenant_id: TenantId, timeline_id: TimelineId): + log.info(f"Requesting checkpoint: tenant {tenant_id}, timeline {timeline_id}") + res = self.put( + f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/checkpoint" + ) + log.info(f"Got checkpoint request response code: {res.status_code}") + self.verbose_error(res) + res_json = res.json() + assert res_json is None + return res_json + def get_metrics(self) -> str: res = self.get(f"http://localhost:{self.port}/metrics") self.verbose_error(res) diff --git a/test_runner/regress/test_ancestor_branch.py b/test_runner/regress/test_ancestor_branch.py index cb2621ff02..d7aebfb938 100644 --- a/test_runner/regress/test_ancestor_branch.py +++ b/test_runner/regress/test_ancestor_branch.py @@ -9,6 +9,7 @@ from fixtures.utils import query_scalar # def test_ancestor_branch(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() + pageserver_http = env.pageserver.http_client() # Override defaults, 1M gc_horizon and 4M checkpoint_distance. # Extend compaction_period and gc_period to disable background compaction and gc. @@ -23,7 +24,7 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder): } ) - env.pageserver.safe_psql("failpoints flush-frozen-before-sync=sleep(10000)") + pageserver_http.configure_failpoints(("flush-frozen-before-sync", "sleep(10000)")) pg_branch0 = env.postgres.create_start("main", tenant_id=tenant) branch0_cur = pg_branch0.connect().cursor() @@ -92,9 +93,9 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder): log.info(f"LSN after 300k rows: {lsn_300}") # Run compaction on branch1. - compact = f"compact {tenant} {branch1_timeline} {lsn_200}" + compact = f"compact {tenant} {branch1_timeline}" log.info(compact) - env.pageserver.safe_psql(compact) + pageserver_http.timeline_compact(tenant, branch1_timeline) assert query_scalar(branch0_cur, "SELECT count(*) FROM foo") == 100000 diff --git a/test_runner/regress/test_basebackup_error.py b/test_runner/regress/test_basebackup_error.py index 81a46ee2f0..94d3999d17 100644 --- a/test_runner/regress/test_basebackup_error.py +++ b/test_runner/regress/test_basebackup_error.py @@ -9,9 +9,10 @@ from fixtures.neon_fixtures import NeonEnv def test_basebackup_error(neon_simple_env: NeonEnv): env = neon_simple_env env.neon_cli.create_branch("test_basebackup_error", "empty") + pageserver_http = env.pageserver.http_client() # Introduce failpoint - env.pageserver.safe_psql("failpoints basebackup-before-control-file=return") + pageserver_http.configure_failpoints(("basebackup-before-control-file", "return")) with pytest.raises(Exception, match="basebackup-before-control-file"): env.postgres.create_start("test_basebackup_error") diff --git a/test_runner/regress/test_branch_and_gc.py b/test_runner/regress/test_branch_and_gc.py index c8c5929066..12debe50eb 100644 --- a/test_runner/regress/test_branch_and_gc.py +++ b/test_runner/regress/test_branch_and_gc.py @@ -47,6 +47,7 @@ from fixtures.utils import query_scalar # could not find data for key ... at LSN ..., for request at LSN ... def test_branch_and_gc(neon_simple_env: NeonEnv): env = neon_simple_env + pageserver_http_client = env.pageserver.http_client() tenant, _ = env.neon_cli.create_tenant( conf={ @@ -84,7 +85,7 @@ def test_branch_and_gc(neon_simple_env: NeonEnv): # Set the GC horizon so that lsn1 is inside the horizon, which means # we can create a new branch starting from lsn1. - env.pageserver.safe_psql(f"do_gc {tenant} {timeline_main} {lsn2 - lsn1 + 1024}") + pageserver_http_client.timeline_gc(tenant, timeline_main, lsn2 - lsn1 + 1024) env.neon_cli.create_branch( "test_branch", "test_main", tenant_id=tenant, ancestor_start_lsn=lsn1 @@ -113,6 +114,8 @@ def test_branch_and_gc(neon_simple_env: NeonEnv): # For more details, see discussion in https://github.com/neondatabase/neon/pull/2101#issuecomment-1185273447. def test_branch_creation_before_gc(neon_simple_env: NeonEnv): env = neon_simple_env + pageserver_http_client = env.pageserver.http_client() + # Disable background GC but set the `pitr_interval` to be small, so GC can delete something tenant, _ = env.neon_cli.create_tenant( conf={ @@ -147,10 +150,10 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv): # Use `failpoint=sleep` and `threading` to make the GC iteration triggers *before* the # branch creation task but the individual timeline GC iteration happens *after* # the branch creation task. - env.pageserver.safe_psql("failpoints before-timeline-gc=sleep(2000)") + pageserver_http_client.configure_failpoints(("before-timeline-gc", "sleep(2000)")) def do_gc(): - env.pageserver.safe_psql(f"do_gc {tenant} {b0} 0") + pageserver_http_client.timeline_gc(tenant, b0, 0) thread = threading.Thread(target=do_gc, daemon=True) thread.start() @@ -161,7 +164,7 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv): time.sleep(1.0) # The starting LSN is invalid as the corresponding record is scheduled to be removed by in-queue GC. - with pytest.raises(Exception, match="invalid branch start lsn"): + with pytest.raises(Exception, match="invalid branch start lsn: .*"): env.neon_cli.create_branch("b1", "b0", tenant_id=tenant, ancestor_start_lsn=lsn) thread.join() diff --git a/test_runner/regress/test_branch_behind.py b/test_runner/regress/test_branch_behind.py index b0d0737172..0e2a8b346b 100644 --- a/test_runner/regress/test_branch_behind.py +++ b/test_runner/regress/test_branch_behind.py @@ -1,4 +1,3 @@ -import psycopg2.extras import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnvBuilder @@ -96,7 +95,7 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder): assert pg.safe_psql("SELECT 1")[0][0] == 1 # branch at pre-initdb lsn - with pytest.raises(Exception, match="invalid branch start lsn"): + with pytest.raises(Exception, match="invalid branch start lsn: .*"): env.neon_cli.create_branch("test_branch_preinitdb", ancestor_start_lsn=Lsn("0/42")) # branch at pre-ancestor lsn @@ -106,13 +105,11 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder): ) # check that we cannot create branch based on garbage collected data - with env.pageserver.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur: - # call gc to advace latest_gc_cutoff_lsn - pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0") - row = pscur.fetchone() - print_gc_result(row) + with env.pageserver.http_client() as pageserver_http: + gc_result = pageserver_http.timeline_gc(env.initial_tenant, timeline, 0) + print_gc_result(gc_result) - with pytest.raises(Exception, match="invalid branch start lsn"): + with pytest.raises(Exception, match="invalid branch start lsn: .*"): # this gced_lsn is pretty random, so if gc is disabled this woudln't fail env.neon_cli.create_branch( "test_branch_create_fail", "test_branch_behind", ancestor_start_lsn=gced_lsn diff --git a/test_runner/regress/test_broken_timeline.py b/test_runner/regress/test_broken_timeline.py index fd81981b2b..7baa67935d 100644 --- a/test_runner/regress/test_broken_timeline.py +++ b/test_runner/regress/test_broken_timeline.py @@ -113,13 +113,14 @@ def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv): def test_fix_broken_timelines_on_startup(neon_simple_env: NeonEnv): env = neon_simple_env + pageserver_http = env.pageserver.http_client() tenant_id, _ = env.neon_cli.create_tenant() old_tenant_timelines = env.neon_cli.list_timelines(tenant_id) # Introduce failpoint when creating a new timeline - env.pageserver.safe_psql("failpoints before-checkpoint-new-timeline=return") + pageserver_http.configure_failpoints(("before-checkpoint-new-timeline", "return")) with pytest.raises(Exception, match="before-checkpoint-new-timeline"): _ = env.neon_cli.create_timeline("test_fix_broken_timelines", tenant_id) diff --git a/test_runner/regress/test_gc_aggressive.py b/test_runner/regress/test_gc_aggressive.py index 88d4ad8a6e..332bef225f 100644 --- a/test_runner/regress/test_gc_aggressive.py +++ b/test_runner/regress/test_gc_aggressive.py @@ -1,4 +1,5 @@ import asyncio +import concurrent.futures import random from fixtures.log_helper import log @@ -30,10 +31,15 @@ async def update_table(pg: Postgres): # Perform aggressive GC with 0 horizon async def gc(env: NeonEnv, timeline: TimelineId): - psconn = await env.pageserver.connect_async() + pageserver_http = env.pageserver.http_client() - while updates_performed < updates_to_perform: - await psconn.execute(f"do_gc {env.initial_tenant} {timeline} 0") + loop = asyncio.get_running_loop() + + with concurrent.futures.ThreadPoolExecutor() as pool: + while updates_performed < updates_to_perform: + await loop.run_in_executor( + pool, lambda: pageserver_http.timeline_gc(env.initial_tenant, timeline, 0) + ) # At the same time, run UPDATEs and GC diff --git a/test_runner/regress/test_import.py b/test_runner/regress/test_import.py index 7b61b03b97..885a0dc26f 100644 --- a/test_runner/regress/test_import.py +++ b/test_runner/regress/test_import.py @@ -270,8 +270,7 @@ def _import( assert os.path.getsize(tar_output_file) == os.path.getsize(new_tar_output_file) # Check that gc works - psconn = env.pageserver.connect() - pscur = psconn.cursor() - pscur.execute(f"do_gc {tenant} {timeline} 0") + pageserver_http = env.pageserver.http_client() + pageserver_http.timeline_gc(tenant, timeline, 0) return tar_output_file diff --git a/test_runner/regress/test_old_request_lsn.py b/test_runner/regress/test_old_request_lsn.py index c99e13f45f..3e387bb6cc 100644 --- a/test_runner/regress/test_old_request_lsn.py +++ b/test_runner/regress/test_old_request_lsn.py @@ -1,4 +1,3 @@ -import psycopg2.extras from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnvBuilder from fixtures.types import TimelineId @@ -29,8 +28,7 @@ def test_old_request_lsn(neon_env_builder: NeonEnvBuilder): # Get the timeline ID of our branch. We need it for the 'do_gc' command timeline = TimelineId(query_scalar(cur, "SHOW neon.timeline_id")) - psconn = env.pageserver.connect() - pscur = psconn.cursor(cursor_factory=psycopg2.extras.DictCursor) + pageserver_http = env.pageserver.http_client() # Create table, and insert some rows. Make it big enough that it doesn't fit in # shared_buffers. @@ -61,9 +59,8 @@ def test_old_request_lsn(neon_env_builder: NeonEnvBuilder): # Make a lot of updates on a single row, generating a lot of WAL. Trigger # garbage collections so that the page server will remove old page versions. for i in range(10): - pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0") - gcrow = pscur.fetchone() - print_gc_result(gcrow) + gc_result = pageserver_http.timeline_gc(env.initial_tenant, timeline, 0) + print_gc_result(gc_result) for j in range(100): cur.execute("UPDATE foo SET val = val + 1 WHERE id = 1;") diff --git a/test_runner/regress/test_pitr_gc.py b/test_runner/regress/test_pitr_gc.py index 57b2ee1c04..d8b7256577 100644 --- a/test_runner/regress/test_pitr_gc.py +++ b/test_runner/regress/test_pitr_gc.py @@ -1,6 +1,3 @@ -from contextlib import closing - -import psycopg2.extras from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnvBuilder from fixtures.types import TimelineId @@ -54,13 +51,11 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder): log.info(f"LSN after 10000 rows: {debug_lsn} xid {debug_xid}") # run GC - with closing(env.pageserver.connect()) as psconn: - with psconn.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur: - pscur.execute(f"compact {env.initial_tenant} {timeline}") - # perform aggressive GC. Data still should be kept because of the PITR setting. - pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0") - row = pscur.fetchone() - print_gc_result(row) + with env.pageserver.http_client() as pageserver_http: + pageserver_http.timeline_compact(env.initial_tenant, timeline) + # perform aggressive GC. Data still should be kept because of the PITR setting. + gc_result = pageserver_http.timeline_gc(env.initial_tenant, timeline, 0) + print_gc_result(gc_result) # Branch at the point where only 100 rows were inserted # It must have been preserved by PITR setting diff --git a/test_runner/regress/test_readonly_node.py b/test_runner/regress/test_readonly_node.py index 3be64e077f..dfa57aec25 100644 --- a/test_runner/regress/test_readonly_node.py +++ b/test_runner/regress/test_readonly_node.py @@ -106,6 +106,7 @@ def test_readonly_node(neon_simple_env: NeonEnv): # Similar test, but with more data, and we force checkpoints def test_timetravel(neon_simple_env: NeonEnv): env = neon_simple_env + pageserver_http_client = env.pageserver.http_client() env.neon_cli.create_branch("test_timetravel", "empty") pg = env.postgres.create_start("test_timetravel") @@ -136,7 +137,7 @@ def test_timetravel(neon_simple_env: NeonEnv): wait_for_last_record_lsn(client, tenant_id, timeline_id, current_lsn) # run checkpoint manually to force a new layer file - env.pageserver.safe_psql(f"checkpoint {tenant_id} {timeline_id}") + pageserver_http_client.timeline_checkpoint(tenant_id, timeline_id) ##### Restart pageserver env.postgres.stop_all() diff --git a/test_runner/regress/test_recovery.py b/test_runner/regress/test_recovery.py index 08c15d8f09..d0ba96e8e0 100644 --- a/test_runner/regress/test_recovery.py +++ b/test_runner/regress/test_recovery.py @@ -1,7 +1,6 @@ import time from contextlib import closing -import psycopg2.extras from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnvBuilder @@ -19,8 +18,8 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder): f = env.neon_cli.pageserver_enabled_features() assert ( - "failpoints" in f["features"] - ), "Build pageserver with --features=failpoints option to run this test" + "testing" in f["features"] + ), "Build pageserver with --features=testing option to run this test" neon_env_builder.start() # Create a branch for us @@ -31,26 +30,28 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder): with closing(pg.connect()) as conn: with conn.cursor() as cur: - with closing(env.pageserver.connect()) as psconn: - with psconn.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur: - # Create and initialize test table - cur.execute("CREATE TABLE foo(x bigint)") - cur.execute("INSERT INTO foo VALUES (generate_series(1,100000))") + with env.pageserver.http_client() as pageserver_http: + # Create and initialize test table + cur.execute("CREATE TABLE foo(x bigint)") + cur.execute("INSERT INTO foo VALUES (generate_series(1,100000))") - # Sleep for some time to let checkpoint create image layers - time.sleep(2) + # Sleep for some time to let checkpoint create image layers + time.sleep(2) - # Configure failpoints - pscur.execute( - "failpoints flush-frozen-before-sync=sleep(2000);checkpoint-after-sync=exit" - ) + # Configure failpoints + pageserver_http.configure_failpoints( + [ + ("flush-frozen-before-sync", "sleep(2000)"), + ("checkpoint-after-sync", "exit"), + ] + ) - # Do some updates until pageserver is crashed - try: - while True: - cur.execute("update foo set x=x+1") - except Exception as err: - log.info(f"Expected server crash {err}") + # Do some updates until pageserver is crashed + try: + while True: + cur.execute("update foo set x=x+1") + except Exception as err: + log.info(f"Expected server crash {err}") log.info("Wait before server restart") env.pageserver.stop() diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py index cbe74cad5c..3e775b10b0 100644 --- a/test_runner/regress/test_remote_storage.py +++ b/test_runner/regress/test_remote_storage.py @@ -57,6 +57,7 @@ def test_remote_storage_backup_and_restore( ##### First start, insert secret data and upload it to the remote storage env = neon_env_builder.init_start() + pageserver_http = env.pageserver.http_client() pg = env.postgres.create_start("main") client = env.pageserver.http_client() @@ -80,7 +81,7 @@ def test_remote_storage_backup_and_restore( wait_for_last_record_lsn(client, tenant_id, timeline_id, current_lsn) # run checkpoint manually to be sure that data landed in remote storage - env.pageserver.safe_psql(f"checkpoint {tenant_id} {timeline_id}") + pageserver_http.timeline_checkpoint(tenant_id, timeline_id) log.info(f"waiting for checkpoint {checkpoint_number} upload") # wait until pageserver successfully uploaded a checkpoint to remote storage @@ -99,7 +100,7 @@ def test_remote_storage_backup_and_restore( env.pageserver.start() # Introduce failpoint in download - env.pageserver.safe_psql("failpoints remote-storage-download-pre-rename=return") + pageserver_http.configure_failpoints(("remote-storage-download-pre-rename", "return")) client.tenant_attach(tenant_id) diff --git a/test_runner/regress/test_tenant_detach.py b/test_runner/regress/test_tenant_detach.py index e3c9a091f9..f18e6867a9 100644 --- a/test_runner/regress/test_tenant_detach.py +++ b/test_runner/regress/test_tenant_detach.py @@ -1,16 +1,21 @@ from threading import Thread -import psycopg2 import pytest from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverApiException +from fixtures.neon_fixtures import ( + NeonEnvBuilder, + NeonPageserverApiException, + NeonPageserverHttpClient, +) from fixtures.types import TenantId, TimelineId -def do_gc_target(env: NeonEnv, tenant_id: TenantId, timeline_id: TimelineId): +def do_gc_target( + pageserver_http: NeonPageserverHttpClient, tenant_id: TenantId, timeline_id: TimelineId +): """Hack to unblock main, see https://github.com/neondatabase/neon/issues/2211""" try: - env.pageserver.safe_psql(f"do_gc {tenant_id} {timeline_id} 0") + pageserver_http.timeline_gc(tenant_id, timeline_id, 0) except Exception as e: log.error("do_gc failed: %s", e) @@ -44,13 +49,13 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): # gc should not try to even start with pytest.raises( - expected_exception=psycopg2.DatabaseError, match="gc target timeline does not exist" + expected_exception=NeonPageserverApiException, match="gc target timeline does not exist" ): bogus_timeline_id = TimelineId.generate() - env.pageserver.safe_psql(f"do_gc {tenant_id} {bogus_timeline_id} 0") + pageserver_http.timeline_gc(tenant_id, bogus_timeline_id, 0) # try to concurrently run gc and detach - gc_thread = Thread(target=lambda: do_gc_target(env, tenant_id, timeline_id)) + gc_thread = Thread(target=lambda: do_gc_target(pageserver_http, tenant_id, timeline_id)) gc_thread.start() last_error = None @@ -73,6 +78,6 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): assert not (env.repo_dir / "tenants" / str(tenant_id)).exists() with pytest.raises( - expected_exception=psycopg2.DatabaseError, match=f"Tenant {tenant_id} not found" + expected_exception=NeonPageserverApiException, match=f"Tenant {tenant_id} not found" ): - env.pageserver.safe_psql(f"do_gc {tenant_id} {timeline_id} 0") + pageserver_http.timeline_gc(tenant_id, timeline_id, 0) diff --git a/test_runner/regress/test_tenant_relocation.py b/test_runner/regress/test_tenant_relocation.py index aa7d92f1fd..2b01546198 100644 --- a/test_runner/regress/test_tenant_relocation.py +++ b/test_runner/regress/test_tenant_relocation.py @@ -147,14 +147,13 @@ def populate_branch( def ensure_checkpoint( - pageserver_cur, pageserver_http: NeonPageserverHttpClient, tenant_id: TenantId, timeline_id: TimelineId, current_lsn: Lsn, ): # run checkpoint manually to be sure that data landed in remote storage - pageserver_cur.execute(f"checkpoint {tenant_id} {timeline_id}") + pageserver_http.timeline_checkpoint(tenant_id, timeline_id) # wait until pageserver successfully uploaded a checkpoint to remote storage wait_for_upload(pageserver_http, tenant_id, timeline_id, current_lsn) @@ -324,22 +323,19 @@ def test_tenant_relocation( # this requirement introduces a problem # if user creates a branch during migration # it wont appear on the new pageserver - with pg_cur(env.pageserver) as cur: - ensure_checkpoint( - cur, - pageserver_http=pageserver_http, - tenant_id=tenant_id, - timeline_id=timeline_id_main, - current_lsn=current_lsn_main, - ) + ensure_checkpoint( + pageserver_http=pageserver_http, + tenant_id=tenant_id, + timeline_id=timeline_id_main, + current_lsn=current_lsn_main, + ) - ensure_checkpoint( - cur, - pageserver_http=pageserver_http, - tenant_id=tenant_id, - timeline_id=timeline_id_second, - current_lsn=current_lsn_second, - ) + ensure_checkpoint( + pageserver_http=pageserver_http, + tenant_id=tenant_id, + timeline_id=timeline_id_second, + current_lsn=current_lsn_second, + ) log.info("inititalizing new pageserver") # bootstrap second pageserver diff --git a/test_runner/regress/test_tenants.py b/test_runner/regress/test_tenants.py index 4500395c8f..52b9e6369c 100644 --- a/test_runner/regress/test_tenants.py +++ b/test_runner/regress/test_tenants.py @@ -19,7 +19,8 @@ def test_tenant_creation_fails(neon_simple_env: NeonEnv): ) initial_tenant_dirs = set([d for d in tenants_dir.iterdir()]) - neon_simple_env.pageserver.safe_psql("failpoints tenant-creation-before-tmp-rename=return") + pageserver_http = neon_simple_env.pageserver.http_client() + pageserver_http.configure_failpoints(("tenant-creation-before-tmp-rename", "return")) with pytest.raises(Exception, match="tenant-creation-before-tmp-rename"): _ = neon_simple_env.neon_cli.create_tenant() diff --git a/test_runner/regress/test_tenants_with_remote_storage.py b/test_runner/regress/test_tenants_with_remote_storage.py index 85f371c845..83affac062 100644 --- a/test_runner/regress/test_tenants_with_remote_storage.py +++ b/test_runner/regress/test_tenants_with_remote_storage.py @@ -91,5 +91,5 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Rem wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id, current_lsn) # run final checkpoint manually to flush all the data to remote storage - env.pageserver.safe_psql(f"checkpoint {tenant_id} {timeline_id}") + pageserver_http.timeline_checkpoint(tenant_id, timeline_id) wait_for_upload(pageserver_http, tenant_id, timeline_id, current_lsn) diff --git a/test_runner/regress/test_timeline_size.py b/test_runner/regress/test_timeline_size.py index 83018f46f5..979d1a107f 100644 --- a/test_runner/regress/test_timeline_size.py +++ b/test_runner/regress/test_timeline_size.py @@ -238,6 +238,7 @@ def test_timeline_physical_size_init(neon_simple_env: NeonEnv): def test_timeline_physical_size_post_checkpoint(neon_simple_env: NeonEnv): env = neon_simple_env + pageserver_http = env.pageserver.http_client() new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_checkpoint") pg = env.postgres.create_start("test_timeline_physical_size_post_checkpoint") @@ -251,7 +252,7 @@ def test_timeline_physical_size_post_checkpoint(neon_simple_env: NeonEnv): ) wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) - env.pageserver.safe_psql(f"checkpoint {env.initial_tenant} {new_timeline_id}") + pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id) assert_physical_size(env, env.initial_tenant, new_timeline_id) @@ -264,6 +265,7 @@ def test_timeline_physical_size_post_compaction(neon_env_builder: NeonEnvBuilder ) env = neon_env_builder.init_start() + pageserver_http = env.pageserver.http_client() new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_compaction") pg = env.postgres.create_start("test_timeline_physical_size_post_compaction") @@ -278,8 +280,8 @@ def test_timeline_physical_size_post_compaction(neon_env_builder: NeonEnvBuilder ) wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) - env.pageserver.safe_psql(f"checkpoint {env.initial_tenant} {new_timeline_id}") - env.pageserver.safe_psql(f"compact {env.initial_tenant} {new_timeline_id}") + pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id) + pageserver_http.timeline_compact(env.initial_tenant, new_timeline_id) assert_physical_size(env, env.initial_tenant, new_timeline_id) @@ -290,6 +292,7 @@ def test_timeline_physical_size_post_gc(neon_env_builder: NeonEnvBuilder): neon_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance=100000, compaction_period='10m', gc_period='10m', pitr_interval='1s'}" env = neon_env_builder.init_start() + pageserver_http = env.pageserver.http_client() new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_gc") pg = env.postgres.create_start("test_timeline_physical_size_post_gc") @@ -304,7 +307,7 @@ def test_timeline_physical_size_post_gc(neon_env_builder: NeonEnvBuilder): ) wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) - env.pageserver.safe_psql(f"checkpoint {env.initial_tenant} {new_timeline_id}") + pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id) pg.safe_psql( """ @@ -315,9 +318,9 @@ def test_timeline_physical_size_post_gc(neon_env_builder: NeonEnvBuilder): ) wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) - env.pageserver.safe_psql(f"checkpoint {env.initial_tenant} {new_timeline_id}") + pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id) - env.pageserver.safe_psql(f"do_gc {env.initial_tenant} {new_timeline_id} 0") + pageserver_http.timeline_gc(env.initial_tenant, new_timeline_id, gc_horizon=None) assert_physical_size(env, env.initial_tenant, new_timeline_id) @@ -326,6 +329,7 @@ def test_timeline_physical_size_post_gc(neon_env_builder: NeonEnvBuilder): # Test the metrics. def test_timeline_size_metrics(neon_simple_env: NeonEnv): env = neon_simple_env + pageserver_http = env.pageserver.http_client() new_timeline_id = env.neon_cli.create_branch("test_timeline_size_metrics") pg = env.postgres.create_start("test_timeline_size_metrics") @@ -340,7 +344,7 @@ def test_timeline_size_metrics(neon_simple_env: NeonEnv): ) wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) - env.pageserver.safe_psql(f"checkpoint {env.initial_tenant} {new_timeline_id}") + pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id) # get the metrics and parse the metric for the current timeline's physical size metrics = env.pageserver.http_client().get_metrics() @@ -382,6 +386,7 @@ def test_tenant_physical_size(neon_simple_env: NeonEnv): random.seed(100) env = neon_simple_env + pageserver_http = env.pageserver.http_client() client = env.pageserver.http_client() tenant, timeline = env.neon_cli.create_tenant() @@ -405,7 +410,7 @@ def test_tenant_physical_size(neon_simple_env: NeonEnv): ) wait_for_last_flush_lsn(env, pg, tenant, timeline) - env.pageserver.safe_psql(f"checkpoint {tenant} {timeline}") + pageserver_http.timeline_checkpoint(tenant, timeline) timeline_total_size += get_timeline_physical_size(timeline) diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py index 089ed91c98..931de0f1e3 100644 --- a/test_runner/regress/test_wal_acceptor.py +++ b/test_runner/regress/test_wal_acceptor.py @@ -59,9 +59,7 @@ def wait_lsn_force_checkpoint( ) # force checkpoint to advance remote_consistent_lsn - with closing(ps.connect(**pageserver_conn_options)) as psconn: - with psconn.cursor() as pscur: - pscur.execute(f"checkpoint {tenant_id} {timeline_id}") + ps.http_client(auth_token).timeline_checkpoint(tenant_id, timeline_id) # ensure that remote_consistent_lsn is advanced wait_for_upload(