Compare commits

..

4 Commits

Author SHA1 Message Date
Conrad Ludgate
87c793f58c log spec json and return parse error
dont cancel e2e test early
2024-10-04 11:37:48 +01:00
Conrad Ludgate
52a7d780ad test 2024-10-04 10:32:18 +01:00
Conrad Ludgate
2255a8ebac minor changes to local_proxy 2024-10-04 09:18:43 +01:00
Conrad Ludgate
e109d5aac0 add local_proxy to computespec 2024-10-04 09:16:21 +01:00
119 changed files with 1284 additions and 1921 deletions

View File

@@ -33,7 +33,7 @@ jobs:
github-event-name: ${{ github.event_name }}
cancel-previous-e2e-tests:
needs: [ check-permissions ]
needs: [ check-permissions, promote-images, tag ]
if: github.event_name == 'pull_request'
runs-on: ubuntu-22.04
@@ -518,7 +518,7 @@ jobs:
trigger-e2e-tests:
if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' }}
needs: [ check-permissions, promote-images, tag ]
needs: [ check-permissions, promote-images, tag, cancel-previous-e2e-tests ]
uses: ./.github/workflows/trigger-e2e-tests.yml
secrets: inherit

8
Cargo.lock generated
View File

@@ -1265,7 +1265,6 @@ version = "0.1.0"
dependencies = [
"anyhow",
"bytes",
"camino",
"cfg-if",
"chrono",
"clap",
@@ -7335,6 +7334,12 @@ version = "0.1.0"
dependencies = [
"ahash",
"anyhow",
"aws-config",
"aws-runtime",
"aws-sigv4",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-types",
"base64 0.21.1",
"base64ct",
"bitflags 2.4.1",
@@ -7408,6 +7413,7 @@ dependencies = [
"tracing",
"tracing-core",
"url",
"uuid",
"zeroize",
"zstd",
"zstd-safe",

View File

@@ -53,7 +53,7 @@ azure_storage_blobs = { version = "0.19", default-features = false, features = [
flate2 = "1.0.26"
async-stream = "0.3"
async-trait = "0.1"
aws-config = { version = "1.5", default-features = false, features=["rustls", "sso"] }
aws-config = { version = "1.5", default-features = false, features=["rustls"] }
aws-sdk-s3 = "1.52"
aws-sdk-iam = "1.46.0"
aws-smithy-async = { version = "1.2.1", default-features = false, features=["rt-tokio"] }

View File

@@ -1075,20 +1075,6 @@ RUN set -e \
&& make -j $(nproc) dist_man_MANS= \
&& make install dist_man_MANS=
#########################################################################################
#
# Compile the Neon-specific `local_proxy` binary
#
#########################################################################################
FROM $REPOSITORY/$IMAGE:$TAG AS local_proxy
ARG BUILD_TAG
ENV BUILD_TAG=$BUILD_TAG
USER nonroot
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
COPY --chown=nonroot . .
RUN mold -run cargo build --locked --profile release-line-debug-size-lto --bin local_proxy
#########################################################################################
#
# Layers "postgres-exporter" and "sql-exporter"
@@ -1227,10 +1213,6 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb
COPY --from=pgbouncer /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer
COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini
# local_proxy and its config
COPY --from=local_proxy --chown=postgres /home/nonroot/target/release-line-debug-size-lto/local_proxy /usr/local/bin/local_proxy
RUN mkdir -p /etc/local_proxy && chown postgres:postgres /etc/local_proxy
# Metrics exporter binaries and configuration files
COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter
COPY --from=sql-exporter /bin/sql_exporter /bin/sql_exporter

View File

@@ -19,10 +19,6 @@ commands:
user: postgres
sysvInitAction: respawn
shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini'
- name: local_proxy
user: postgres
sysvInitAction: respawn
shell: '/usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
- name: postgres-exporter
user: nobody
sysvInitAction: respawn

View File

@@ -11,7 +11,7 @@ testing = []
[dependencies]
anyhow.workspace = true
camino.workspace = true
# camino.workspace = true
chrono.workspace = true
cfg-if.workspace = true
clap.workspace = true

View File

@@ -34,7 +34,6 @@ use nix::sys::signal::{kill, Signal};
use remote_storage::{DownloadError, RemotePath};
use crate::checker::create_availability_check_data;
use crate::local_proxy;
use crate::logger::inlinify;
use crate::pg_helpers::*;
use crate::spec::*;
@@ -887,11 +886,6 @@ impl ComputeNode {
// 'Close' connection
drop(client);
if let Some(ref local_proxy) = spec.local_proxy_config {
info!("configuring local_proxy");
local_proxy::configure(local_proxy).context("apply_config local_proxy")?;
}
// Run migrations separately to not hold up cold starts
thread::spawn(move || {
let mut connstr = connstr.clone();
@@ -942,19 +936,6 @@ impl ComputeNode {
});
}
if let Some(ref local_proxy) = spec.local_proxy_config {
info!("configuring local_proxy");
// Spawn a thread to do the configuration,
// so that we don't block the main thread that starts Postgres.
let local_proxy = local_proxy.clone();
let _handle = Some(thread::spawn(move || {
if let Err(err) = local_proxy::configure(&local_proxy) {
error!("error while configuring local_proxy: {err:?}");
}
}));
}
// Write new config
let pgdata_path = Path::new(&self.pgdata);
let postgresql_conf_path = pgdata_path.join("postgresql.conf");
@@ -1042,19 +1023,6 @@ impl ComputeNode {
});
}
if let Some(local_proxy) = &pspec.spec.local_proxy_config {
info!("configuring local_proxy");
// Spawn a thread to do the configuration,
// so that we don't block the main thread that starts Postgres.
let local_proxy = local_proxy.clone();
let _handle = thread::spawn(move || {
if let Err(err) = local_proxy::configure(&local_proxy) {
error!("error while configuring local_proxy: {err:?}");
}
});
}
info!(
"start_compute spec.remote_extensions {:?}",
pspec.spec.remote_extensions

View File

@@ -264,68 +264,72 @@ async fn handle_configure_request(
let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap();
let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap();
if let Ok(request) = serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
let spec = request.spec;
match serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
Ok(request) => {
let spec = request.spec;
let parsed_spec = match ParsedSpec::try_from(spec) {
Ok(ps) => ps,
Err(msg) => return Err((msg, StatusCode::BAD_REQUEST)),
};
let parsed_spec = match ParsedSpec::try_from(spec) {
Ok(ps) => ps,
Err(msg) => return Err((msg, StatusCode::BAD_REQUEST)),
};
// XXX: wrap state update under lock in code blocks. Otherwise,
// we will try to `Send` `mut state` into the spawned thread
// bellow, which will cause error:
// ```
// error: future cannot be sent between threads safely
// ```
{
let mut state = compute.state.lock().unwrap();
if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
let msg = format!(
"invalid compute status for configuration request: {:?}",
state.status.clone()
);
return Err((msg, StatusCode::PRECONDITION_FAILED));
}
state.pspec = Some(parsed_spec);
state.status = ComputeStatus::ConfigurationPending;
compute.state_changed.notify_all();
drop(state);
info!("set new spec and notified waiters");
}
// Spawn a blocking thread to wait for compute to become Running.
// This is needed to do not block the main pool of workers and
// be able to serve other requests while some particular request
// is waiting for compute to finish configuration.
let c = compute.clone();
task::spawn_blocking(move || {
let mut state = c.state.lock().unwrap();
while state.status != ComputeStatus::Running {
state = c.state_changed.wait(state).unwrap();
info!(
"waiting for compute to become Running, current status: {:?}",
state.status
);
if state.status == ComputeStatus::Failed {
let err = state.error.as_ref().map_or("unknown error", |x| x);
let msg = format!("compute configuration failed: {:?}", err);
return Err((msg, StatusCode::INTERNAL_SERVER_ERROR));
// XXX: wrap state update under lock in code blocks. Otherwise,
// we will try to `Send` `mut state` into the spawned thread
// bellow, which will cause error:
// ```
// error: future cannot be sent between threads safely
// ```
{
let mut state = compute.state.lock().unwrap();
if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
let msg = format!(
"invalid compute status for configuration request: {:?}",
state.status.clone()
);
return Err((msg, StatusCode::PRECONDITION_FAILED));
}
state.pspec = Some(parsed_spec);
state.status = ComputeStatus::ConfigurationPending;
compute.state_changed.notify_all();
drop(state);
info!("set new spec and notified waiters");
}
Ok(())
})
.await
.unwrap()?;
// Spawn a blocking thread to wait for compute to become Running.
// This is needed to do not block the main pool of workers and
// be able to serve other requests while some particular request
// is waiting for compute to finish configuration.
let c = compute.clone();
task::spawn_blocking(move || {
let mut state = c.state.lock().unwrap();
while state.status != ComputeStatus::Running {
state = c.state_changed.wait(state).unwrap();
info!(
"waiting for compute to become Running, current status: {:?}",
state.status
);
// Return current compute state if everything went well.
let state = compute.state.lock().unwrap().clone();
let status_response = status_response_from_state(&state);
Ok(serde_json::to_string(&status_response).unwrap())
} else {
Err(("invalid spec".to_string(), StatusCode::BAD_REQUEST))
if state.status == ComputeStatus::Failed {
let err = state.error.as_ref().map_or("unknown error", |x| x);
let msg = format!("compute configuration failed: {:?}", err);
return Err((msg, StatusCode::INTERNAL_SERVER_ERROR));
}
}
Ok(())
})
.await
.unwrap()?;
// Return current compute state if everything went well.
let state = compute.state.lock().unwrap().clone();
let status_response = status_response_from_state(&state);
Ok(serde_json::to_string(&status_response).unwrap())
}
Err(err) => {
error!("could not parse spec: {spec_raw}");
Err((format!("invalid spec: {err:?}"), StatusCode::BAD_REQUEST))
}
}
}

View File

@@ -15,7 +15,7 @@ pub mod catalog;
pub mod compute;
pub mod disk_quota;
pub mod extension_server;
pub mod local_proxy;
// pub mod local_proxy;
pub mod lsn_lease;
mod migration;
pub mod monitor;

View File

@@ -109,6 +109,7 @@ pub struct ComputeSpec {
/// Local Proxy configuration used for JWT authentication
#[serde(default)]
#[serde(skip_serializing_if = "Option::is_none")]
pub local_proxy_config: Option<LocalProxySpec>,
}
@@ -282,7 +283,7 @@ pub struct GenericOption {
/// declare a `trait` on it.
pub type GenericOptions = Option<Vec<GenericOption>>;
/// Configured the local_proxy application with the relevant JWKS and roles it should
/// Configured the local-proxy application with the relevant JWKS and roles it should
/// use for authorizing connect requests using JWT.
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct LocalProxySpec {

View File

@@ -14,7 +14,7 @@ use std::time::SystemTime;
use super::REMOTE_STORAGE_PREFIX_SEPARATOR;
use anyhow::Result;
use azure_core::request_options::{IfMatchCondition, MaxResults, Metadata, Range};
use azure_core::request_options::{MaxResults, Metadata, Range};
use azure_core::{Continuable, RetryOptions};
use azure_identity::DefaultAzureCredential;
use azure_storage::StorageCredentials;
@@ -33,10 +33,10 @@ use tracing::debug;
use utils::backoff;
use crate::metrics::{start_measuring_requests, AttemptOutcome, RequestKind};
use crate::ListingObject;
use crate::{
config::AzureConfig, error::Cancelled, ConcurrencyLimiter, Download, DownloadError,
DownloadOpts, Listing, ListingMode, ListingObject, RemotePath, RemoteStorage, StorageMetadata,
TimeTravelError, TimeoutOrCancel,
config::AzureConfig, error::Cancelled, ConcurrencyLimiter, Download, DownloadError, Listing,
ListingMode, RemotePath, RemoteStorage, StorageMetadata, TimeTravelError, TimeoutOrCancel,
};
pub struct AzureBlobStorage {
@@ -259,7 +259,6 @@ fn to_download_error(error: azure_core::Error) -> DownloadError {
if let Some(http_err) = error.as_http_error() {
match http_err.status() {
StatusCode::NotFound => DownloadError::NotFound,
StatusCode::NotModified => DownloadError::Unmodified,
StatusCode::BadRequest => DownloadError::BadInput(anyhow::Error::new(error)),
_ => DownloadError::Other(anyhow::Error::new(error)),
}
@@ -485,16 +484,11 @@ impl RemoteStorage for AzureBlobStorage {
async fn download(
&self,
from: &RemotePath,
opts: &DownloadOpts,
cancel: &CancellationToken,
) -> Result<Download, DownloadError> {
let blob_client = self.client.blob_client(self.relative_path_to_name(from));
let mut builder = blob_client.get();
if let Some(ref etag) = opts.etag {
builder = builder.if_match(IfMatchCondition::NotMatch(etag.to_string()))
}
let builder = blob_client.get();
self.download_for_builder(builder, cancel).await
}

View File

@@ -5,8 +5,6 @@ pub enum DownloadError {
BadInput(anyhow::Error),
/// The file was not found in the remote storage.
NotFound,
/// The caller provided an ETag, and the file was not modified.
Unmodified,
/// A cancellation token aborted the download, typically during
/// tenant detach or process shutdown.
Cancelled,
@@ -26,7 +24,6 @@ impl std::fmt::Display for DownloadError {
write!(f, "Failed to download a remote file due to user input: {e}")
}
DownloadError::NotFound => write!(f, "No file found for the remote object id given"),
DownloadError::Unmodified => write!(f, "File was not modified"),
DownloadError::Cancelled => write!(f, "Cancelled, shutting down"),
DownloadError::Timeout => write!(f, "timeout"),
DownloadError::Other(e) => write!(f, "Failed to download a remote file: {e:?}"),
@@ -41,7 +38,7 @@ impl DownloadError {
pub fn is_permanent(&self) -> bool {
use DownloadError::*;
match self {
BadInput(_) | NotFound | Unmodified | Cancelled => true,
BadInput(_) | NotFound | Cancelled => true,
Timeout | Other(_) => false,
}
}

View File

@@ -161,14 +161,6 @@ pub struct Listing {
pub keys: Vec<ListingObject>,
}
/// Options for downloads. The default value is a plain GET.
#[derive(Default)]
pub struct DownloadOpts {
/// If given, returns [`DownloadError::Unmodified`] if the object still has
/// the same ETag (using If-None-Match).
pub etag: Option<Etag>,
}
/// Storage (potentially remote) API to manage its state.
/// This storage tries to be unaware of any layered repository context,
/// providing basic CRUD operations for storage files.
@@ -253,7 +245,6 @@ pub trait RemoteStorage: Send + Sync + 'static {
async fn download(
&self,
from: &RemotePath,
opts: &DownloadOpts,
cancel: &CancellationToken,
) -> Result<Download, DownloadError>;
@@ -410,18 +401,16 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
}
}
/// See [`RemoteStorage::download`]
pub async fn download(
&self,
from: &RemotePath,
opts: &DownloadOpts,
cancel: &CancellationToken,
) -> Result<Download, DownloadError> {
match self {
Self::LocalFs(s) => s.download(from, opts, cancel).await,
Self::AwsS3(s) => s.download(from, opts, cancel).await,
Self::AzureBlob(s) => s.download(from, opts, cancel).await,
Self::Unreliable(s) => s.download(from, opts, cancel).await,
Self::LocalFs(s) => s.download(from, cancel).await,
Self::AwsS3(s) => s.download(from, cancel).await,
Self::AzureBlob(s) => s.download(from, cancel).await,
Self::Unreliable(s) => s.download(from, cancel).await,
}
}
@@ -583,7 +572,7 @@ impl GenericRemoteStorage {
) -> Result<Download, DownloadError> {
match byte_range {
Some((start, end)) => self.download_byte_range(from, start, end, cancel).await,
None => self.download(from, &DownloadOpts::default(), cancel).await,
None => self.download(from, cancel).await,
}
}

View File

@@ -23,8 +23,8 @@ use tokio_util::{io::ReaderStream, sync::CancellationToken};
use utils::crashsafe::path_with_suffix_extension;
use crate::{
Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject, RemotePath,
TimeTravelError, TimeoutOrCancel, REMOTE_STORAGE_PREFIX_SEPARATOR,
Download, DownloadError, Listing, ListingMode, ListingObject, RemotePath, TimeTravelError,
TimeoutOrCancel, REMOTE_STORAGE_PREFIX_SEPARATOR,
};
use super::{RemoteStorage, StorageMetadata};
@@ -494,17 +494,11 @@ impl RemoteStorage for LocalFs {
async fn download(
&self,
from: &RemotePath,
opts: &DownloadOpts,
cancel: &CancellationToken,
) -> Result<Download, DownloadError> {
let target_path = from.with_base(&self.storage_root);
let file_metadata = file_metadata(&target_path).await?;
let etag = mock_etag(&file_metadata);
if opts.etag.as_ref() == Some(&etag) {
return Err(DownloadError::Unmodified);
}
let source = ReaderStream::new(
fs::OpenOptions::new()
@@ -525,6 +519,7 @@ impl RemoteStorage for LocalFs {
let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone());
let source = crate::support::DownloadStream::new(cancel_or_timeout, source);
let etag = mock_etag(&file_metadata);
Ok(Download {
metadata,
last_modified: file_metadata
@@ -697,7 +692,7 @@ mod fs_tests {
) -> anyhow::Result<String> {
let cancel = CancellationToken::new();
let download = storage
.download(remote_storage_path, &DownloadOpts::default(), &cancel)
.download(remote_storage_path, &cancel)
.await
.map_err(|e| anyhow::anyhow!("Download failed: {e}"))?;
ensure!(
@@ -778,8 +773,8 @@ mod fs_tests {
"We should upload and download the same contents"
);
let non_existing_path = RemotePath::new(Utf8Path::new("somewhere/else"))?;
match storage.download(&non_existing_path, &DownloadOpts::default(), &cancel).await {
let non_existing_path = "somewhere/else";
match storage.download(&RemotePath::new(Utf8Path::new(non_existing_path))?, &cancel).await {
Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys
other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"),
}
@@ -1106,13 +1101,7 @@ mod fs_tests {
storage.upload(body, len, &path, None, &cancel).await?;
}
let read = aggregate(
storage
.download(&path, &DownloadOpts::default(), &cancel)
.await?
.download_stream,
)
.await?;
let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?;
assert_eq!(body, read);
let shorter = Bytes::from_static(b"shorter body");
@@ -1123,13 +1112,7 @@ mod fs_tests {
storage.upload(body, len, &path, None, &cancel).await?;
}
let read = aggregate(
storage
.download(&path, &DownloadOpts::default(), &cancel)
.await?
.download_stream,
)
.await?;
let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?;
assert_eq!(shorter, read);
Ok(())
}
@@ -1162,13 +1145,7 @@ mod fs_tests {
storage.upload(body, len, &path, None, &cancel).await?;
}
let read = aggregate(
storage
.download(&path, &DownloadOpts::default(), &cancel)
.await?
.download_stream,
)
.await?;
let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?;
assert_eq!(body, read);
Ok(())

View File

@@ -28,7 +28,6 @@ use aws_sdk_s3::{
Client,
};
use aws_smithy_async::rt::sleep::TokioSleep;
use http_types::StatusCode;
use aws_smithy_types::{body::SdkBody, DateTime};
use aws_smithy_types::{byte_stream::ByteStream, date_time::ConversionError};
@@ -45,8 +44,8 @@ use crate::{
error::Cancelled,
metrics::{start_counting_cancelled_wait, start_measuring_requests},
support::PermitCarrying,
ConcurrencyLimiter, Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject,
RemotePath, RemoteStorage, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE,
ConcurrencyLimiter, Download, DownloadError, Listing, ListingMode, ListingObject, RemotePath,
RemoteStorage, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE,
REMOTE_STORAGE_PREFIX_SEPARATOR,
};
@@ -68,7 +67,6 @@ pub struct S3Bucket {
struct GetObjectRequest {
bucket: String,
key: String,
etag: Option<String>,
range: Option<String>,
}
impl S3Bucket {
@@ -250,18 +248,13 @@ impl S3Bucket {
let started_at = start_measuring_requests(kind);
let mut builder = self
let get_object = self
.client
.get_object()
.bucket(request.bucket)
.key(request.key)
.set_range(request.range);
if let Some(etag) = request.etag {
builder = builder.if_none_match(etag);
}
let get_object = builder.send();
.set_range(request.range)
.send();
let get_object = tokio::select! {
res = get_object => res,
@@ -284,20 +277,6 @@ impl S3Bucket {
);
return Err(DownloadError::NotFound);
}
Err(SdkError::ServiceError(e))
// aws_smithy_runtime_api::http::response::StatusCode isn't
// re-exported by any aws crates, so just check the numeric
// status against http_types::StatusCode instead of pulling it.
if e.raw().status().as_u16() == StatusCode::NotModified =>
{
// Count an unmodified file as a success.
crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
kind,
AttemptOutcome::Ok,
started_at,
);
return Err(DownloadError::Unmodified);
}
Err(e) => {
crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
kind,
@@ -794,7 +773,6 @@ impl RemoteStorage for S3Bucket {
async fn download(
&self,
from: &RemotePath,
opts: &DownloadOpts,
cancel: &CancellationToken,
) -> Result<Download, DownloadError> {
// if prefix is not none then download file `prefix/from`
@@ -803,7 +781,6 @@ impl RemoteStorage for S3Bucket {
GetObjectRequest {
bucket: self.bucket_name.clone(),
key: self.relative_path_to_s3_object(from),
etag: opts.etag.as_ref().map(|e| e.to_string()),
range: None,
},
cancel,
@@ -830,7 +807,6 @@ impl RemoteStorage for S3Bucket {
GetObjectRequest {
bucket: self.bucket_name.clone(),
key: self.relative_path_to_s3_object(from),
etag: None,
range,
},
cancel,

View File

@@ -12,8 +12,8 @@ use std::{collections::hash_map::Entry, sync::Arc};
use tokio_util::sync::CancellationToken;
use crate::{
Download, DownloadError, DownloadOpts, GenericRemoteStorage, Listing, ListingMode, RemotePath,
RemoteStorage, StorageMetadata, TimeTravelError,
Download, DownloadError, GenericRemoteStorage, Listing, ListingMode, RemotePath, RemoteStorage,
StorageMetadata, TimeTravelError,
};
pub struct UnreliableWrapper {
@@ -167,12 +167,11 @@ impl RemoteStorage for UnreliableWrapper {
async fn download(
&self,
from: &RemotePath,
opts: &DownloadOpts,
cancel: &CancellationToken,
) -> Result<Download, DownloadError> {
self.attempt(RemoteOp::Download(from.clone()))
.map_err(DownloadError::Other)?;
self.inner.download(from, opts, cancel).await
self.inner.download(from, cancel).await
}
async fn download_byte_range(

View File

@@ -1,7 +1,8 @@
use anyhow::Context;
use camino::Utf8Path;
use futures::StreamExt;
use remote_storage::{DownloadError, DownloadOpts, ListingMode, ListingObject, RemotePath};
use remote_storage::ListingMode;
use remote_storage::RemotePath;
use std::sync::Arc;
use std::{collections::HashSet, num::NonZeroU32};
use test_context::test_context;
@@ -283,10 +284,7 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
ctx.client.upload(data, len, &path, None, &cancel).await?;
// Normal download request
let dl = ctx
.client
.download(&path, &DownloadOpts::default(), &cancel)
.await?;
let dl = ctx.client.download(&path, &cancel).await?;
let buf = download_to_vec(dl).await?;
assert_eq!(&buf, &orig);
@@ -339,54 +337,6 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
Ok(())
}
/// Tests that conditional downloads work properly, by returning
/// DownloadError::Unmodified when the object ETag matches the given ETag.
#[test_context(MaybeEnabledStorage)]
#[tokio::test]
async fn download_conditional(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
let MaybeEnabledStorage::Enabled(ctx) = ctx else {
return Ok(());
};
let cancel = CancellationToken::new();
// Create a file.
let path = RemotePath::new(Utf8Path::new(format!("{}/file", ctx.base_prefix).as_str()))?;
let data = bytes::Bytes::from_static("foo".as_bytes());
let (stream, len) = wrap_stream(data);
ctx.client.upload(stream, len, &path, None, &cancel).await?;
// Download it to obtain its etag.
let mut opts = DownloadOpts::default();
let download = ctx.client.download(&path, &opts, &cancel).await?;
// Download with the etag yields DownloadError::Unmodified.
opts.etag = Some(download.etag);
let result = ctx.client.download(&path, &opts, &cancel).await;
assert!(
matches!(result, Err(DownloadError::Unmodified)),
"expected DownloadError::Unmodified, got {result:?}"
);
// Replace the file contents.
let data = bytes::Bytes::from_static("bar".as_bytes());
let (stream, len) = wrap_stream(data);
ctx.client.upload(stream, len, &path, None, &cancel).await?;
// A download with the old etag should yield the new file.
let download = ctx.client.download(&path, &opts, &cancel).await?;
assert_ne!(download.etag, opts.etag.unwrap(), "ETag did not change");
// A download with the new etag should yield Unmodified again.
opts.etag = Some(download.etag);
let result = ctx.client.download(&path, &opts, &cancel).await;
assert!(
matches!(result, Err(DownloadError::Unmodified)),
"expected DownloadError::Unmodified, got {result:?}"
);
Ok(())
}
#[test_context(MaybeEnabledStorage)]
#[tokio::test]
async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
@@ -414,10 +364,7 @@ async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
// Normal download request
ctx.client.copy_object(&path, &path_dest, &cancel).await?;
let dl = ctx
.client
.download(&path_dest, &DownloadOpts::default(), &cancel)
.await?;
let dl = ctx.client.download(&path_dest, &cancel).await?;
let buf = download_to_vec(dl).await?;
assert_eq!(&buf, &orig);
@@ -429,56 +376,3 @@ async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
Ok(())
}
/// Tests that head_object works properly.
#[test_context(MaybeEnabledStorage)]
#[tokio::test]
async fn head_object(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
let MaybeEnabledStorage::Enabled(ctx) = ctx else {
return Ok(());
};
let cancel = CancellationToken::new();
let path = RemotePath::new(Utf8Path::new(format!("{}/file", ctx.base_prefix).as_str()))?;
// Errors on missing file.
let result = ctx.client.head_object(&path, &cancel).await;
assert!(
matches!(result, Err(DownloadError::NotFound)),
"expected NotFound, got {result:?}"
);
// Create the file.
let data = bytes::Bytes::from_static("foo".as_bytes());
let (stream, len) = wrap_stream(data);
ctx.client.upload(stream, len, &path, None, &cancel).await?;
// Fetch the head metadata.
let object = ctx.client.head_object(&path, &cancel).await?;
assert_eq!(
object,
ListingObject {
key: path.clone(),
last_modified: object.last_modified, // ignore
size: 3
}
);
// Wait for a couple of seconds, and then update the file to check the last
// modified timestamp.
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
let data = bytes::Bytes::from_static("bar".as_bytes());
let (stream, len) = wrap_stream(data);
ctx.client.upload(stream, len, &path, None, &cancel).await?;
let new = ctx.client.head_object(&path, &cancel).await?;
assert!(
!new.last_modified
.duration_since(object.last_modified)?
.is_zero(),
"last_modified did not advance"
);
Ok(())
}

View File

@@ -12,8 +12,8 @@ use anyhow::Context;
use camino::Utf8Path;
use futures_util::StreamExt;
use remote_storage::{
DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath,
RemoteStorageConfig, RemoteStorageKind, S3Config,
DownloadError, GenericRemoteStorage, ListingMode, RemotePath, RemoteStorageConfig,
RemoteStorageKind, S3Config,
};
use test_context::test_context;
use test_context::AsyncTestContext;
@@ -121,8 +121,7 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
// A little check to ensure that our clock is not too far off from the S3 clock
{
let opts = DownloadOpts::default();
let dl = retry(|| ctx.client.download(&path2, &opts, &cancel)).await?;
let dl = retry(|| ctx.client.download(&path2, &cancel)).await?;
let last_modified = dl.last_modified;
let half_wt = WAIT_TIME.mul_f32(0.5);
let t0_hwt = t0 + half_wt;
@@ -160,12 +159,7 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
let t2_files_recovered = list_files(&ctx.client, &cancel).await?;
println!("after recovery to t2: {t2_files_recovered:?}");
assert_eq!(t2_files, t2_files_recovered);
let path2_recovered_t2 = download_to_vec(
ctx.client
.download(&path2, &DownloadOpts::default(), &cancel)
.await?,
)
.await?;
let path2_recovered_t2 = download_to_vec(ctx.client.download(&path2, &cancel).await?).await?;
assert_eq!(path2_recovered_t2, new_data.as_bytes());
// after recovery to t1: path1 is back, path2 has the old content
@@ -176,12 +170,7 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
let t1_files_recovered = list_files(&ctx.client, &cancel).await?;
println!("after recovery to t1: {t1_files_recovered:?}");
assert_eq!(t1_files, t1_files_recovered);
let path2_recovered_t1 = download_to_vec(
ctx.client
.download(&path2, &DownloadOpts::default(), &cancel)
.await?,
)
.await?;
let path2_recovered_t1 = download_to_vec(ctx.client.download(&path2, &cancel).await?).await?;
assert_eq!(path2_recovered_t1, old_data.as_bytes());
// after recovery to t0: everything is gone except for path1
@@ -427,7 +416,7 @@ async fn download_is_timeouted(ctx: &mut MaybeEnabledStorage) {
let started_at = std::time::Instant::now();
let mut stream = ctx
.client
.download(&path, &DownloadOpts::default(), &cancel)
.download(&path, &cancel)
.await
.expect("download succeeds")
.download_stream;
@@ -502,7 +491,7 @@ async fn download_is_cancelled(ctx: &mut MaybeEnabledStorage) {
{
let stream = ctx
.client
.download(&path, &DownloadOpts::default(), &cancel)
.download(&path, &cancel)
.await
.expect("download succeeds")
.download_stream;

View File

@@ -27,7 +27,7 @@ use crate::tenant::Generation;
use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile};
use crate::TEMP_FILE_SUFFIX;
use remote_storage::{DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath};
use remote_storage::{DownloadError, GenericRemoteStorage, ListingMode, RemotePath};
use utils::crashsafe::path_with_suffix_extension;
use utils::id::{TenantId, TimelineId};
use utils::pausable_failpoint;
@@ -153,9 +153,7 @@ async fn download_object<'a>(
.with_context(|| format!("create a destination file for layer '{dst_path}'"))
.map_err(DownloadError::Other)?;
let download = storage
.download(src_path, &DownloadOpts::default(), cancel)
.await?;
let download = storage.download(src_path, cancel).await?;
pausable_failpoint!("before-downloading-layer-stream-pausable");
@@ -206,9 +204,7 @@ async fn download_object<'a>(
.with_context(|| format!("create a destination file for layer '{dst_path}'"))
.map_err(DownloadError::Other)?;
let mut download = storage
.download(src_path, &DownloadOpts::default(), cancel)
.await?;
let mut download = storage.download(src_path, cancel).await?;
pausable_failpoint!("before-downloading-layer-stream-pausable");
@@ -348,9 +344,7 @@ async fn do_download_index_part(
let index_part_bytes = download_retry_forever(
|| async {
let download = storage
.download(&remote_path, &DownloadOpts::default(), cancel)
.await?;
let download = storage.download(&remote_path, cancel).await?;
let mut bytes = Vec::new();
@@ -532,15 +526,10 @@ pub(crate) async fn download_initdb_tar_zst(
.with_context(|| format!("tempfile creation {temp_path}"))
.map_err(DownloadError::Other)?;
let download = match storage
.download(&remote_path, &DownloadOpts::default(), cancel)
.await
{
let download = match storage.download(&remote_path, cancel).await {
Ok(dl) => dl,
Err(DownloadError::NotFound) => {
storage
.download(&remote_preserved_path, &DownloadOpts::default(), cancel)
.await?
storage.download(&remote_preserved_path, cancel).await?
}
Err(other) => Err(other)?,
};

View File

@@ -49,7 +49,7 @@ use futures::Future;
use metrics::UIntGauge;
use pageserver_api::models::SecondaryProgress;
use pageserver_api::shard::TenantShardId;
use remote_storage::{DownloadError, DownloadOpts, Etag, GenericRemoteStorage};
use remote_storage::{DownloadError, Etag, GenericRemoteStorage};
use tokio_util::sync::CancellationToken;
use tracing::{info_span, instrument, warn, Instrument};
@@ -944,34 +944,36 @@ impl<'a> TenantDownloader<'a> {
) -> Result<HeatMapDownload, UpdateError> {
debug_assert_current_span_has_tenant_id();
let tenant_shard_id = self.secondary_state.get_tenant_shard_id();
// TODO: pull up etag check into the request, to do a conditional GET rather than
// issuing a GET and then maybe ignoring the response body
// (https://github.com/neondatabase/neon/issues/6199)
tracing::debug!("Downloading heatmap for secondary tenant",);
let heatmap_path = remote_heatmap_path(tenant_shard_id);
let cancel = &self.secondary_state.cancel;
let opts = DownloadOpts {
etag: prev_etag.cloned(),
};
backoff::retry(
|| async {
let download = match self
let download = self
.remote_storage
.download(&heatmap_path, &opts, cancel)
.download(&heatmap_path, cancel)
.await
{
Ok(download) => download,
Err(DownloadError::Unmodified) => return Ok(HeatMapDownload::Unmodified),
Err(err) => return Err(err.into()),
};
.map_err(UpdateError::from)?;
let mut heatmap_bytes = Vec::new();
let mut body = tokio_util::io::StreamReader::new(download.download_stream);
let _size = tokio::io::copy_buf(&mut body, &mut heatmap_bytes).await?;
Ok(HeatMapDownload::Modified(HeatMapModified {
etag: download.etag,
last_modified: download.last_modified,
bytes: heatmap_bytes,
}))
SECONDARY_MODE.download_heatmap.inc();
if Some(&download.etag) == prev_etag {
Ok(HeatMapDownload::Unmodified)
} else {
let mut heatmap_bytes = Vec::new();
let mut body = tokio_util::io::StreamReader::new(download.download_stream);
let _size = tokio::io::copy_buf(&mut body, &mut heatmap_bytes).await?;
Ok(HeatMapDownload::Modified(HeatMapModified {
etag: download.etag,
last_modified: download.last_modified,
bytes: heatmap_bytes,
}))
}
},
|e| matches!(e, UpdateError::NoData | UpdateError::Cancelled),
FAILED_DOWNLOAD_WARN_THRESHOLD,
@@ -982,7 +984,6 @@ impl<'a> TenantDownloader<'a> {
.await
.ok_or_else(|| UpdateError::Cancelled)
.and_then(|x| x)
.inspect(|_| SECONDARY_MODE.download_heatmap.inc())
}
/// Download heatmap layers that are not present on local disk, or update their

View File

@@ -191,14 +191,13 @@ NeonOnDemandXLogReaderRoutines(XLogReaderRoutine *xlr)
if (!wal_reader)
{
XLogRecPtr basebackupLsn = GetRedoStartLsn();
XLogRecPtr epochStartLsn = pg_atomic_read_u64(&GetWalpropShmemState()->propEpochStartLsn);
/* should never happen */
if (basebackupLsn == 0)
if (epochStartLsn == 0)
{
elog(ERROR, "unable to start walsender when basebackupLsn is 0");
elog(ERROR, "Unable to start walsender when propEpochStartLsn is 0!");
}
wal_reader = NeonWALReaderAllocate(wal_segment_size, basebackupLsn, "[walsender] ");
wal_reader = NeonWALReaderAllocate(wal_segment_size, epochStartLsn, "[walsender] ");
}
xlr->page_read = NeonWALPageRead;
xlr->segment_open = NeonWALReadSegmentOpen;

View File

@@ -77,10 +77,10 @@ struct LocalProxyCliArgs {
#[clap(long, default_value = "127.0.0.1:5432")]
compute: SocketAddr,
/// Path of the local proxy config file
#[clap(long, default_value = "./local_proxy.json")]
#[clap(long, default_value = "./localproxy.json")]
config_path: Utf8PathBuf,
/// Path of the local proxy PID file
#[clap(long, default_value = "./local_proxy.pid")]
#[clap(long, default_value = "./localproxy.pid")]
pid_path: Utf8PathBuf,
}

View File

@@ -20,7 +20,7 @@ use tokio_postgres::tls::MakeTlsConnect;
use tokio_postgres_rustls::MakeRustlsConnect;
use tracing::{error, info, warn};
pub const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node";
const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node";
#[derive(Debug, Error)]
pub(crate) enum ConnectionError {

View File

@@ -1,6 +1,5 @@
use crate::{
auth::backend::ComputeCredentialKeys,
compute::COULD_NOT_CONNECT,
compute::{self, PostgresConnection},
config::RetryConfig,
console::{self, errors::WakeComputeError, locks::ApiLocks, CachedNodeInfo, NodeInfo},
@@ -16,7 +15,7 @@ use crate::{
use async_trait::async_trait;
use pq_proto::StartupMessageParams;
use tokio::time;
use tracing::{debug, info, warn};
use tracing::{error, info, warn};
use super::retry::ShouldRetryWakeCompute;
@@ -117,6 +116,7 @@ where
node_info.set_keys(user_info.get_keys());
node_info.allow_self_signed_compute = allow_self_signed_compute;
// let mut node_info = credentials.get_node_info(ctx, user_info).await?;
mechanism.update_connect_config(&mut node_info.config);
let retry_type = RetryType::ConnectToCompute;
@@ -139,10 +139,10 @@ where
Err(e) => e,
};
debug!(error = ?err, COULD_NOT_CONNECT);
error!(error = ?err, "could not connect to compute node");
let node_info = if !node_info.cached() || !err.should_retry_wake_compute() {
// If we just recieved this from cplane and didn't get it from cache, we shouldn't retry.
// If we just recieved this from cplane and dodn't get it from cache, we shouldn't retry.
// Do not need to retrieve a new node_info, just return the old one.
if should_retry(&err, num_retries, connect_to_compute_retry_config) {
Metrics::get().proxy.retries_metric.observe(
@@ -191,7 +191,7 @@ where
}
Err(e) => {
if !should_retry(&e, num_retries, connect_to_compute_retry_config) {
// Don't log an error here, caller will print the error
error!(error = ?e, num_retries, retriable = false, "couldn't connect to compute node");
Metrics::get().proxy.retries_metric.observe(
RetriesMetricGroup {
outcome: ConnectOutcome::Failed,
@@ -202,7 +202,7 @@ where
return Err(e.into());
}
warn!(error = ?e, num_retries, retriable = true, COULD_NOT_CONNECT);
warn!(error = ?e, num_retries, retriable = true, "couldn't connect to compute node");
}
};

View File

@@ -2,29 +2,21 @@ use utils::lsn::Lsn;
use crate::timeline_manager::StateSnapshot;
/// Get oldest LSN we still need to keep.
///
/// We hold WAL till it is consumed by
/// 1) pageserver (remote_consistent_lsn)
/// 2) s3 offloading.
/// 3) Additionally we must store WAL since last local commit_lsn because
/// that's where we start looking for last WAL record on start.
///
/// If some peer safekeeper misses data it will fetch it from the remote
/// storage. While it is safe to use inmem values for determining horizon, we
/// use persistent to make possible normal states less surprising. All segments
/// covering LSNs before horizon_lsn can be removed.
/// Get oldest LSN we still need to keep. We hold WAL till it is consumed
/// by all of 1) pageserver (remote_consistent_lsn) 2) peers 3) s3
/// offloading.
/// While it is safe to use inmem values for determining horizon,
/// we use persistent to make possible normal states less surprising.
/// All segments covering LSNs before horizon_lsn can be removed.
pub(crate) fn calc_horizon_lsn(state: &StateSnapshot, extra_horizon_lsn: Option<Lsn>) -> Lsn {
use std::cmp::min;
let mut horizon_lsn = state.cfile_remote_consistent_lsn;
let mut horizon_lsn = min(
state.cfile_remote_consistent_lsn,
state.cfile_peer_horizon_lsn,
);
// we don't want to remove WAL that is not yet offloaded to s3
horizon_lsn = min(horizon_lsn, state.cfile_backup_lsn);
// Min by local commit_lsn to be able to begin reading WAL from somewhere on
// sk start. Technically we don't allow local commit_lsn to be higher than
// flush_lsn, but let's be double safe by including it as well.
horizon_lsn = min(horizon_lsn, state.cfile_commit_lsn);
horizon_lsn = min(horizon_lsn, state.flush_lsn);
if let Some(extra_horizon_lsn) = extra_horizon_lsn {
horizon_lsn = min(horizon_lsn, extra_horizon_lsn);
}

View File

@@ -47,7 +47,7 @@ pub(crate) struct StateSnapshot {
pub(crate) remote_consistent_lsn: Lsn,
// persistent control file values
pub(crate) cfile_commit_lsn: Lsn,
pub(crate) cfile_peer_horizon_lsn: Lsn,
pub(crate) cfile_remote_consistent_lsn: Lsn,
pub(crate) cfile_backup_lsn: Lsn,
@@ -70,7 +70,7 @@ impl StateSnapshot {
commit_lsn: state.inmem.commit_lsn,
backup_lsn: state.inmem.backup_lsn,
remote_consistent_lsn: state.inmem.remote_consistent_lsn,
cfile_commit_lsn: state.commit_lsn,
cfile_peer_horizon_lsn: state.peer_horizon_lsn,
cfile_remote_consistent_lsn: state.remote_consistent_lsn,
cfile_backup_lsn: state.backup_lsn,
flush_lsn: read_guard.sk.flush_lsn(),

View File

@@ -526,21 +526,6 @@ pub(crate) enum ReconcileResultRequest {
Stop,
}
#[derive(Clone)]
struct MutationLocation {
node: Node,
generation: Generation,
}
#[derive(Clone)]
struct ShardMutationLocations {
latest: MutationLocation,
other: Vec<MutationLocation>,
}
#[derive(Default, Clone)]
struct TenantMutationLocations(BTreeMap<TenantShardId, ShardMutationLocations>);
impl Service {
pub fn get_config(&self) -> &Config {
&self.config
@@ -3002,83 +2987,38 @@ impl Service {
failpoint_support::sleep_millis_async!("tenant-create-timeline-shared-lock");
self.tenant_remote_mutation(tenant_id, move |mut targets| async move {
if targets.0.is_empty() {
if targets.is_empty() {
return Err(ApiError::NotFound(
anyhow::anyhow!("Tenant not found").into(),
));
};
let (shard_zero_tid, shard_zero_locations) =
targets.0.pop_first().expect("Must have at least one shard");
assert!(shard_zero_tid.is_shard_zero());
let shard_zero = targets.remove(0);
async fn create_one(
tenant_shard_id: TenantShardId,
locations: ShardMutationLocations,
node: Node,
jwt: Option<String>,
create_req: TimelineCreateRequest,
) -> Result<TimelineInfo, ApiError> {
let latest = locations.latest.node;
tracing::info!(
"Creating timeline on shard {}/{}, attached to node {latest} in generation {:?}",
"Creating timeline on shard {}/{}, attached to node {node}",
tenant_shard_id,
create_req.new_timeline_id,
locations.latest.generation
);
let client = PageserverClient::new(node.get_id(), node.base_url(), jwt.as_deref());
let client =
PageserverClient::new(latest.get_id(), latest.base_url(), jwt.as_deref());
let timeline_info = client
client
.timeline_create(tenant_shard_id, &create_req)
.await
.map_err(|e| passthrough_api_error(&latest, e))?;
// We propagate timeline creations to all attached locations such that a compute
// for the new timeline is able to start regardless of the current state of the
// tenant shard reconciliation.
for location in locations.other {
tracing::info!(
"Creating timeline on shard {}/{}, stale attached to node {} in generation {:?}",
tenant_shard_id,
create_req.new_timeline_id,
location.node,
location.generation
);
let client = PageserverClient::new(
location.node.get_id(),
location.node.base_url(),
jwt.as_deref(),
);
let res = client
.timeline_create(tenant_shard_id, &create_req)
.await;
if let Err(e) = res {
match e {
mgmt_api::Error::ApiError(StatusCode::NOT_FOUND, _) => {
// Tenant might have been detached from the stale location,
// so ignore 404s.
},
_ => {
return Err(passthrough_api_error(&location.node, e));
}
}
}
}
Ok(timeline_info)
.map_err(|e| passthrough_api_error(&node, e))
}
// Because the caller might not provide an explicit LSN, we must do the creation first on a single shard, and then
// use whatever LSN that shard picked when creating on subsequent shards. We arbitrarily use shard zero as the shard
// that will get the first creation request, and propagate the LSN to all the >0 shards.
let timeline_info = create_one(
shard_zero_tid,
shard_zero_locations,
shard_zero.0,
shard_zero.1,
self.config.jwt_token.clone(),
create_req.clone(),
)
@@ -3091,24 +3031,14 @@ impl Service {
}
// Create timeline on remaining shards with number >0
if !targets.0.is_empty() {
if !targets.is_empty() {
// If we had multiple shards, issue requests for the remainder now.
let jwt = &self.config.jwt_token;
self.tenant_for_shards(
targets
.0
.iter()
.map(|t| (*t.0, t.1.latest.node.clone()))
.collect(),
|tenant_shard_id: TenantShardId, _node: Node| {
targets.iter().map(|t| (t.0, t.1.clone())).collect(),
|tenant_shard_id: TenantShardId, node: Node| {
let create_req = create_req.clone();
let mutation_locations = targets.0.remove(&tenant_shard_id).unwrap();
Box::pin(create_one(
tenant_shard_id,
mutation_locations,
jwt.clone(),
create_req,
))
Box::pin(create_one(tenant_shard_id, node, jwt.clone(), create_req))
},
)
.await?;
@@ -3138,7 +3068,7 @@ impl Service {
.await;
self.tenant_remote_mutation(tenant_id, move |targets| async move {
if targets.0.is_empty() {
if targets.is_empty() {
return Err(ApiError::NotFound(
anyhow::anyhow!("Tenant not found").into(),
));
@@ -3169,9 +3099,8 @@ impl Service {
// no shard needs to go first/last; the operation should be idempotent
// TODO: it would be great to ensure that all shards return the same error
let locations = targets.0.iter().map(|t| (*t.0, t.1.latest.node.clone())).collect();
let results = self
.tenant_for_shards(locations, |tenant_shard_id, node| {
.tenant_for_shards(targets, |tenant_shard_id, node| {
futures::FutureExt::boxed(config_one(
tenant_shard_id,
timeline_id,
@@ -3202,7 +3131,7 @@ impl Service {
.await;
self.tenant_remote_mutation(tenant_id, move |targets| async move {
if targets.0.is_empty() {
if targets.is_empty() {
return Err(ApiError::NotFound(
anyhow::anyhow!("Tenant not found").into(),
));
@@ -3250,9 +3179,8 @@ impl Service {
}
// no shard needs to go first/last; the operation should be idempotent
let locations = targets.0.iter().map(|t| (*t.0, t.1.latest.node.clone())).collect();
let mut results = self
.tenant_for_shards(locations, |tenant_shard_id, node| {
.tenant_for_shards(targets, |tenant_shard_id, node| {
futures::FutureExt::boxed(detach_one(
tenant_shard_id,
timeline_id,
@@ -3299,7 +3227,7 @@ impl Service {
.await;
self.tenant_remote_mutation(tenant_id, move |targets| async move {
if targets.0.is_empty() {
if targets.is_empty() {
return Err(ApiError::NotFound(
anyhow::anyhow!("Tenant not found").into(),
));
@@ -3321,12 +3249,7 @@ impl Service {
}
// no shard needs to go first/last; the operation should be idempotent
let locations = targets
.0
.iter()
.map(|t| (*t.0, t.1.latest.node.clone()))
.collect();
self.tenant_for_shards(locations, |tenant_shard_id, node| {
self.tenant_for_shards(targets, |tenant_shard_id, node| {
futures::FutureExt::boxed(do_one(
tenant_shard_id,
timeline_id,
@@ -3421,11 +3344,11 @@ impl Service {
op: O,
) -> Result<R, ApiError>
where
O: FnOnce(TenantMutationLocations) -> F,
O: FnOnce(Vec<(TenantShardId, Node)>) -> F,
F: std::future::Future<Output = R>,
{
let mutation_locations = {
let mut locations = TenantMutationLocations::default();
let target_gens = {
let mut targets = Vec::new();
// Load the currently attached pageservers for the latest generation of each shard. This can
// run concurrently with reconciliations, and it is not guaranteed that the node we find here
@@ -3476,50 +3399,14 @@ impl Service {
.ok_or(ApiError::Conflict(format!(
"Raced with removal of node {node_id}"
)))?;
let generation = generation.expect("Checked above");
let tenant = locked.tenants.get(&tenant_shard_id);
// TODO(vlad): Abstract the logic that finds stale attached locations
// from observed state into a [`Service`] method.
let other_locations = match tenant {
Some(tenant) => {
let mut other = tenant.attached_locations();
let latest_location_index =
other.iter().position(|&l| l == (node.get_id(), generation));
if let Some(idx) = latest_location_index {
other.remove(idx);
}
other
}
None => Vec::default(),
};
let location = ShardMutationLocations {
latest: MutationLocation {
node: node.clone(),
generation,
},
other: other_locations
.into_iter()
.filter_map(|(node_id, generation)| {
let node = locked.nodes.get(&node_id)?;
Some(MutationLocation {
node: node.clone(),
generation,
})
})
.collect(),
};
locations.0.insert(tenant_shard_id, location);
targets.push((tenant_shard_id, node.clone(), generation));
}
locations
targets
};
let result = op(mutation_locations.clone()).await;
let targets = target_gens.iter().map(|t| (t.0, t.1.clone())).collect();
let result = op(targets).await;
// Post-check: are all the generations of all the shards the same as they were initially? This proves that
// our remote operation executed on the latest generation and is therefore persistent.
@@ -3535,10 +3422,9 @@ impl Service {
}| (tenant_shard_id, generation),
)
.collect::<Vec<_>>()
!= mutation_locations
.0
!= target_gens
.into_iter()
.map(|i| (i.0, Some(i.1.latest.generation)))
.map(|i| (i.0, i.2))
.collect::<Vec<_>>()
{
// We raced with something that incremented the generation, and therefore cannot be
@@ -3568,14 +3454,12 @@ impl Service {
.await;
self.tenant_remote_mutation(tenant_id, move |mut targets| async move {
if targets.0.is_empty() {
if targets.is_empty() {
return Err(ApiError::NotFound(
anyhow::anyhow!("Tenant not found").into(),
));
}
let (shard_zero_tid, shard_zero_locations) = targets.0.pop_first().expect("Must have at least one shard");
assert!(shard_zero_tid.is_shard_zero());
let shard_zero = targets.remove(0);
async fn delete_one(
tenant_shard_id: TenantShardId,
@@ -3598,9 +3482,8 @@ impl Service {
})
}
let locations = targets.0.iter().map(|t| (*t.0, t.1.latest.node.clone())).collect();
let statuses = self
.tenant_for_shards(locations, |tenant_shard_id: TenantShardId, node: Node| {
.tenant_for_shards(targets, |tenant_shard_id: TenantShardId, node: Node| {
Box::pin(delete_one(
tenant_shard_id,
timeline_id,
@@ -3618,9 +3501,9 @@ impl Service {
// Delete shard zero last: this is not strictly necessary, but since a caller's GET on a timeline will be routed
// to shard zero, it gives a more obvious behavior that a GET returns 404 once the deletion is done.
let shard_zero_status = delete_one(
shard_zero_tid,
shard_zero.0,
timeline_id,
shard_zero_locations.latest.node,
shard_zero.1,
self.config.jwt_token.clone(),
)
.await?;

View File

@@ -17,7 +17,6 @@ use crate::{
service::ReconcileResultRequest,
};
use futures::future::{self, Either};
use itertools::Itertools;
use pageserver_api::controller_api::{
AvailabilityZone, NodeSchedulingPolicy, PlacementPolicy, ShardSchedulingPolicy,
};
@@ -1411,32 +1410,6 @@ impl TenantShard {
pub(crate) fn set_preferred_az(&mut self, preferred_az_id: AvailabilityZone) {
self.preferred_az_id = Some(preferred_az_id);
}
/// Returns all the nodes to which this tenant shard is attached according to the
/// observed state and the generations. Return vector is sorted from latest generation
/// to earliest.
pub(crate) fn attached_locations(&self) -> Vec<(NodeId, Generation)> {
self.observed
.locations
.iter()
.filter_map(|(node_id, observed)| {
use LocationConfigMode::{AttachedMulti, AttachedSingle, AttachedStale};
let conf = observed.conf.as_ref()?;
match (conf.generation, conf.mode) {
(Some(gen), AttachedMulti | AttachedSingle | AttachedStale) => {
Some((*node_id, gen))
}
_ => None,
}
})
.sorted_by(|(_lhs_node_id, lhs_gen), (_rhs_node_id, rhs_gen)| {
lhs_gen.cmp(rhs_gen).reverse()
})
.map(|(node_id, gen)| (node_id, Generation::new(gen)))
.collect()
}
}
#[cfg(test)]

View File

@@ -5,7 +5,6 @@ edition.workspace = true
license.workspace = true
[dependencies]
aws-config.workspace = true
aws-sdk-s3.workspace = true
either.workspace = true
anyhow.workspace = true
@@ -32,6 +31,7 @@ storage_controller_client.workspace = true
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
chrono = { workspace = true, default-features = false, features = ["clock", "serde"] }
reqwest = { workspace = true, default-features = false, features = ["rustls-tls", "json"] }
aws-config = { workspace = true, default-features = false, features = ["rustls", "sso"] }
pageserver = { path = "../pageserver" }
pageserver_api = { path = "../libs/pageserver_api" }

View File

@@ -28,9 +28,8 @@ use pageserver::tenant::remote_timeline_client::{remote_tenant_path, remote_time
use pageserver::tenant::TENANTS_SEGMENT_NAME;
use pageserver_api::shard::TenantShardId;
use remote_storage::{
DownloadOpts, GenericRemoteStorage, Listing, ListingMode, RemotePath, RemoteStorageConfig,
RemoteStorageKind, S3Config, DEFAULT_MAX_KEYS_PER_LIST_RESPONSE,
DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT,
GenericRemoteStorage, Listing, ListingMode, RemotePath, RemoteStorageConfig, RemoteStorageKind,
S3Config, DEFAULT_MAX_KEYS_PER_LIST_RESPONSE, DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT,
};
use reqwest::Url;
use serde::{Deserialize, Serialize};
@@ -489,10 +488,7 @@ async fn download_object_with_retries(
let cancel = CancellationToken::new();
for trial in 0..MAX_RETRIES {
let mut buf = Vec::new();
let download = match remote_client
.download(key, &DownloadOpts::default(), &cancel)
.await
{
let download = match remote_client.download(key, &cancel).await {
Ok(response) => response,
Err(e) => {
error!("Failed to download object for key {key}: {e}");

View File

@@ -1,662 +0,0 @@
from __future__ import annotations
import abc
import json
import os
import re
import subprocess
import tempfile
import textwrap
from itertools import chain, product
from pathlib import Path
from typing import (
Any,
Dict,
List,
Optional,
Tuple,
TypeVar,
cast,
)
import toml
from fixtures.common_types import Lsn, TenantId, TimelineId
from fixtures.log_helper import log
from fixtures.pageserver.common_types import IndexPartDump
from fixtures.pg_version import PgVersion
from fixtures.utils import AuxFileStore
T = TypeVar("T")
class AbstractNeonCli(abc.ABC):
"""
A typed wrapper around an arbitrary Neon CLI tool.
Supports a way to run arbitrary command directly via CLI.
Do not use directly, use specific subclasses instead.
"""
def __init__(self, extra_env: Optional[Dict[str, str]], binpath: Path):
self.extra_env = extra_env
self.binpath = binpath
COMMAND: str = cast(str, None) # To be overwritten by the derived class.
def raw_cli(
self,
arguments: List[str],
extra_env_vars: Optional[Dict[str, str]] = None,
check_return_code=True,
timeout=None,
) -> "subprocess.CompletedProcess[str]":
"""
Run the command with the specified arguments.
Arguments must be in list form, e.g. ['endpoint', 'create']
Return both stdout and stderr, which can be accessed as
>>> result = env.neon_cli.raw_cli(...)
>>> assert result.stderr == ""
>>> log.info(result.stdout)
If `check_return_code`, on non-zero exit code logs failure and raises.
"""
assert isinstance(arguments, list)
assert isinstance(self.COMMAND, str)
command_path = str(self.binpath / self.COMMAND)
args = [command_path] + arguments
log.info('Running command "{}"'.format(" ".join(args)))
env_vars = os.environ.copy()
# extra env
for extra_env_key, extra_env_value in (self.extra_env or {}).items():
env_vars[extra_env_key] = extra_env_value
for extra_env_key, extra_env_value in (extra_env_vars or {}).items():
env_vars[extra_env_key] = extra_env_value
# Pass through coverage settings
var = "LLVM_PROFILE_FILE"
val = os.environ.get(var)
if val:
env_vars[var] = val
# Intercept CalledProcessError and print more info
try:
res = subprocess.run(
args,
env=env_vars,
check=False,
universal_newlines=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=timeout,
)
except subprocess.TimeoutExpired as e:
if e.stderr:
stderr = e.stderr.decode(errors="replace")
else:
stderr = ""
if e.stdout:
stdout = e.stdout.decode(errors="replace")
else:
stdout = ""
log.warn(f"CLI timeout: stderr={stderr}, stdout={stdout}")
raise
indent = " "
if not res.returncode:
stripped = res.stdout.strip()
lines = stripped.splitlines()
if len(lines) < 2:
log.debug(f"Run {res.args} success: {stripped}")
else:
log.debug("Run %s success:\n%s" % (res.args, textwrap.indent(stripped, indent)))
elif check_return_code:
# this way command output will be in recorded and shown in CI in failure message
indent = indent * 2
msg = textwrap.dedent(
"""\
Run %s failed:
stdout:
%s
stderr:
%s
"""
)
msg = msg % (
res.args,
textwrap.indent(res.stdout.strip(), indent),
textwrap.indent(res.stderr.strip(), indent),
)
log.info(msg)
raise RuntimeError(msg) from subprocess.CalledProcessError(
res.returncode, res.args, res.stdout, res.stderr
)
return res
class NeonLocalCli(AbstractNeonCli):
"""A typed wrapper around the `neon_local` CLI tool.
Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
Note: The methods in this class are supposed to be faithful wrappers of the underlying
'neon_local' commands. If you're tempted to add any logic here, please consider putting it
in the caller instead!
There are a few exceptions where these wrapper methods intentionally differ from the
underlying commands, however:
- Many 'neon_local' commands take an optional 'tenant_id' argument and use the default from
the config file if it's omitted. The corresponding wrappers require an explicit 'tenant_id'
argument. The idea is that we don't want to rely on the config file's default in tests,
because NeonEnv has its own 'initial_tenant'. They are currently always the same, but we
want to rely on the Neonenv's default instead of the config file default in tests.
- Similarly, --pg_version argument is always required in the wrappers, even when it's
optional in the 'neon_local' command. The default in 'neon_local' is a specific
hardcoded version, but in tests, we never want to accidentally rely on that;, we
always want to use the version from the test fixtures.
- Wrappers for commands that create a new tenant or timeline ID require the new tenant
or timeline ID to be passed by the caller, while the 'neon_local' commands will
generate a random ID if it's not specified. This is because we don't want to have to
parse the ID from the 'neon_local' output. Making it required ensures that the
caller has to generate it.
"""
COMMAND = "neon_local"
def __init__(
self,
extra_env: Optional[Dict[str, str]],
binpath: Path,
repo_dir: Path,
pg_distrib_dir: Path,
):
if extra_env is None:
env_vars = {}
else:
env_vars = extra_env.copy()
env_vars["NEON_REPO_DIR"] = str(repo_dir)
env_vars["POSTGRES_DISTRIB_DIR"] = str(pg_distrib_dir)
super().__init__(env_vars, binpath)
def raw_cli(self, *args, **kwargs) -> subprocess.CompletedProcess[str]:
return super().raw_cli(*args, **kwargs)
def tenant_create(
self,
tenant_id: TenantId,
timeline_id: TimelineId,
pg_version: PgVersion,
conf: Optional[Dict[str, Any]] = None,
shard_count: Optional[int] = None,
shard_stripe_size: Optional[int] = None,
placement_policy: Optional[str] = None,
set_default: bool = False,
aux_file_policy: Optional[AuxFileStore] = None,
):
"""
Creates a new tenant, returns its id and its initial timeline's id.
"""
args = [
"tenant",
"create",
"--tenant-id",
str(tenant_id),
"--timeline-id",
str(timeline_id),
"--pg-version",
pg_version,
]
if conf is not None:
args.extend(
chain.from_iterable(
product(["-c"], (f"{key}:{value}" for key, value in conf.items()))
)
)
if aux_file_policy is AuxFileStore.V2:
args.extend(["-c", "switch_aux_file_policy:v2"])
elif aux_file_policy is AuxFileStore.V1:
args.extend(["-c", "switch_aux_file_policy:v1"])
elif aux_file_policy is AuxFileStore.CrossValidation:
args.extend(["-c", "switch_aux_file_policy:cross-validation"])
if set_default:
args.append("--set-default")
if shard_count is not None:
args.extend(["--shard-count", str(shard_count)])
if shard_stripe_size is not None:
args.extend(["--shard-stripe-size", str(shard_stripe_size)])
if placement_policy is not None:
args.extend(["--placement-policy", str(placement_policy)])
res = self.raw_cli(args)
res.check_returncode()
def tenant_import(self, tenant_id: TenantId):
args = ["tenant", "import", "--tenant-id", str(tenant_id)]
res = self.raw_cli(args)
res.check_returncode()
def tenant_set_default(self, tenant_id: TenantId):
"""
Update default tenant for future operations that require tenant_id.
"""
res = self.raw_cli(["tenant", "set-default", "--tenant-id", str(tenant_id)])
res.check_returncode()
def tenant_config(self, tenant_id: TenantId, conf: Dict[str, str]):
"""
Update tenant config.
"""
args = ["tenant", "config", "--tenant-id", str(tenant_id)]
if conf is not None:
args.extend(
chain.from_iterable(
product(["-c"], (f"{key}:{value}" for key, value in conf.items()))
)
)
res = self.raw_cli(args)
res.check_returncode()
def tenant_list(self) -> "subprocess.CompletedProcess[str]":
res = self.raw_cli(["tenant", "list"])
res.check_returncode()
return res
def timeline_create(
self,
new_branch_name: str,
tenant_id: TenantId,
timeline_id: TimelineId,
pg_version: PgVersion,
) -> TimelineId:
if timeline_id is None:
timeline_id = TimelineId.generate()
cmd = [
"timeline",
"create",
"--branch-name",
new_branch_name,
"--tenant-id",
str(tenant_id),
"--timeline-id",
str(timeline_id),
"--pg-version",
pg_version,
]
res = self.raw_cli(cmd)
res.check_returncode()
return timeline_id
def timeline_branch(
self,
tenant_id: TenantId,
timeline_id: TimelineId,
new_branch_name,
ancestor_branch_name: Optional[str] = None,
ancestor_start_lsn: Optional[Lsn] = None,
):
cmd = [
"timeline",
"branch",
"--branch-name",
new_branch_name,
"--timeline-id",
str(timeline_id),
"--tenant-id",
str(tenant_id),
]
if ancestor_branch_name is not None:
cmd.extend(["--ancestor-branch-name", ancestor_branch_name])
if ancestor_start_lsn is not None:
cmd.extend(["--ancestor-start-lsn", str(ancestor_start_lsn)])
res = self.raw_cli(cmd)
res.check_returncode()
def timeline_import(
self,
tenant_id: TenantId,
timeline_id: TimelineId,
new_branch_name: str,
base_lsn: Lsn,
base_tarfile: Path,
pg_version: PgVersion,
end_lsn: Optional[Lsn] = None,
wal_tarfile: Optional[Path] = None,
):
cmd = [
"timeline",
"import",
"--tenant-id",
str(tenant_id),
"--timeline-id",
str(timeline_id),
"--pg-version",
pg_version,
"--branch-name",
new_branch_name,
"--base-lsn",
str(base_lsn),
"--base-tarfile",
str(base_tarfile),
]
if end_lsn is not None:
cmd.extend(["--end-lsn", str(end_lsn)])
if wal_tarfile is not None:
cmd.extend(["--wal-tarfile", str(wal_tarfile)])
res = self.raw_cli(cmd)
res.check_returncode()
def timeline_list(self, tenant_id: TenantId) -> List[Tuple[str, TimelineId]]:
"""
Returns a list of (branch_name, timeline_id) tuples out of parsed `neon timeline list` CLI output.
"""
# main [b49f7954224a0ad25cc0013ea107b54b]
# ┣━ @0/16B5A50: test_cli_branch_list_main [20f98c79111b9015d84452258b7d5540]
TIMELINE_DATA_EXTRACTOR: re.Pattern = re.compile( # type: ignore[type-arg]
r"\s?(?P<branch_name>[^\s]+)\s\[(?P<timeline_id>[^\]]+)\]", re.MULTILINE
)
res = self.raw_cli(["timeline", "list", "--tenant-id", str(tenant_id)])
timelines_cli = sorted(
map(
lambda branch_and_id: (branch_and_id[0], TimelineId(branch_and_id[1])),
TIMELINE_DATA_EXTRACTOR.findall(res.stdout),
)
)
return timelines_cli
def init(
self,
init_config: Dict[str, Any],
force: Optional[str] = None,
) -> "subprocess.CompletedProcess[str]":
with tempfile.NamedTemporaryFile(mode="w+") as init_config_tmpfile:
init_config_tmpfile.write(toml.dumps(init_config))
init_config_tmpfile.flush()
cmd = [
"init",
f"--config={init_config_tmpfile.name}",
]
if force is not None:
cmd.extend(["--force", force])
res = self.raw_cli(cmd)
res.check_returncode()
return res
def storage_controller_start(
self,
timeout_in_seconds: Optional[int] = None,
instance_id: Optional[int] = None,
base_port: Optional[int] = None,
):
cmd = ["storage_controller", "start"]
if timeout_in_seconds is not None:
cmd.append(f"--start-timeout={timeout_in_seconds}s")
if instance_id is not None:
cmd.append(f"--instance-id={instance_id}")
if base_port is not None:
cmd.append(f"--base-port={base_port}")
return self.raw_cli(cmd)
def storage_controller_stop(self, immediate: bool, instance_id: Optional[int] = None):
cmd = ["storage_controller", "stop"]
if immediate:
cmd.extend(["-m", "immediate"])
if instance_id is not None:
cmd.append(f"--instance-id={instance_id}")
return self.raw_cli(cmd)
def pageserver_start(
self,
id: int,
extra_env_vars: Optional[Dict[str, str]] = None,
timeout_in_seconds: Optional[int] = None,
) -> "subprocess.CompletedProcess[str]":
start_args = ["pageserver", "start", f"--id={id}"]
if timeout_in_seconds is not None:
start_args.append(f"--start-timeout={timeout_in_seconds}s")
return self.raw_cli(start_args, extra_env_vars=extra_env_vars)
def pageserver_stop(self, id: int, immediate=False) -> "subprocess.CompletedProcess[str]":
cmd = ["pageserver", "stop", f"--id={id}"]
if immediate:
cmd.extend(["-m", "immediate"])
log.info(f"Stopping pageserver with {cmd}")
return self.raw_cli(cmd)
def safekeeper_start(
self,
id: int,
extra_opts: Optional[List[str]] = None,
extra_env_vars: Optional[Dict[str, str]] = None,
timeout_in_seconds: Optional[int] = None,
) -> "subprocess.CompletedProcess[str]":
if extra_opts is not None:
extra_opts = [f"-e={opt}" for opt in extra_opts]
else:
extra_opts = []
if timeout_in_seconds is not None:
extra_opts.append(f"--start-timeout={timeout_in_seconds}s")
return self.raw_cli(
["safekeeper", "start", str(id), *extra_opts], extra_env_vars=extra_env_vars
)
def safekeeper_stop(
self, id: Optional[int] = None, immediate=False
) -> "subprocess.CompletedProcess[str]":
args = ["safekeeper", "stop"]
if id is not None:
args.append(str(id))
if immediate:
args.extend(["-m", "immediate"])
return self.raw_cli(args)
def storage_broker_start(
self, timeout_in_seconds: Optional[int] = None
) -> "subprocess.CompletedProcess[str]":
cmd = ["storage_broker", "start"]
if timeout_in_seconds is not None:
cmd.append(f"--start-timeout={timeout_in_seconds}s")
return self.raw_cli(cmd)
def storage_broker_stop(self) -> "subprocess.CompletedProcess[str]":
cmd = ["storage_broker", "stop"]
return self.raw_cli(cmd)
def endpoint_create(
self,
branch_name: str,
pg_port: int,
http_port: int,
tenant_id: TenantId,
pg_version: PgVersion,
endpoint_id: Optional[str] = None,
hot_standby: bool = False,
lsn: Optional[Lsn] = None,
pageserver_id: Optional[int] = None,
allow_multiple=False,
) -> "subprocess.CompletedProcess[str]":
args = [
"endpoint",
"create",
"--tenant-id",
str(tenant_id),
"--branch-name",
branch_name,
"--pg-version",
pg_version,
]
if lsn is not None:
args.extend(["--lsn", str(lsn)])
if pg_port is not None:
args.extend(["--pg-port", str(pg_port)])
if http_port is not None:
args.extend(["--http-port", str(http_port)])
if endpoint_id is not None:
args.append(endpoint_id)
if hot_standby:
args.extend(["--hot-standby", "true"])
if pageserver_id is not None:
args.extend(["--pageserver-id", str(pageserver_id)])
if allow_multiple:
args.extend(["--allow-multiple"])
res = self.raw_cli(args)
res.check_returncode()
return res
def endpoint_start(
self,
endpoint_id: str,
safekeepers: Optional[List[int]] = None,
remote_ext_config: Optional[str] = None,
pageserver_id: Optional[int] = None,
allow_multiple=False,
basebackup_request_tries: Optional[int] = None,
) -> "subprocess.CompletedProcess[str]":
args = [
"endpoint",
"start",
]
extra_env_vars = {}
if basebackup_request_tries is not None:
extra_env_vars["NEON_COMPUTE_TESTING_BASEBACKUP_TRIES"] = str(basebackup_request_tries)
if remote_ext_config is not None:
args.extend(["--remote-ext-config", remote_ext_config])
if safekeepers is not None:
args.extend(["--safekeepers", (",".join(map(str, safekeepers)))])
if endpoint_id is not None:
args.append(endpoint_id)
if pageserver_id is not None:
args.extend(["--pageserver-id", str(pageserver_id)])
if allow_multiple:
args.extend(["--allow-multiple"])
res = self.raw_cli(args, extra_env_vars)
res.check_returncode()
return res
def endpoint_reconfigure(
self,
endpoint_id: str,
tenant_id: Optional[TenantId] = None,
pageserver_id: Optional[int] = None,
safekeepers: Optional[List[int]] = None,
check_return_code=True,
) -> "subprocess.CompletedProcess[str]":
args = ["endpoint", "reconfigure", endpoint_id]
if tenant_id is not None:
args.extend(["--tenant-id", str(tenant_id)])
if pageserver_id is not None:
args.extend(["--pageserver-id", str(pageserver_id)])
if safekeepers is not None:
args.extend(["--safekeepers", (",".join(map(str, safekeepers)))])
return self.raw_cli(args, check_return_code=check_return_code)
def endpoint_stop(
self,
endpoint_id: str,
destroy=False,
check_return_code=True,
mode: Optional[str] = None,
) -> "subprocess.CompletedProcess[str]":
args = [
"endpoint",
"stop",
]
if destroy:
args.append("--destroy")
if mode is not None:
args.append(f"--mode={mode}")
if endpoint_id is not None:
args.append(endpoint_id)
return self.raw_cli(args, check_return_code=check_return_code)
def mappings_map_branch(
self, name: str, tenant_id: TenantId, timeline_id: TimelineId
) -> "subprocess.CompletedProcess[str]":
"""
Map tenant id and timeline id to a neon_local branch name. They do not have to exist.
Usually needed when creating branches via PageserverHttpClient and not neon_local.
After creating a name mapping, you can use EndpointFactory.create_start
with this registered branch name.
"""
args = [
"mappings",
"map",
"--branch-name",
name,
"--tenant-id",
str(tenant_id),
"--timeline-id",
str(timeline_id),
]
return self.raw_cli(args, check_return_code=True)
def start(self, check_return_code=True) -> "subprocess.CompletedProcess[str]":
return self.raw_cli(["start"], check_return_code=check_return_code)
def stop(self, check_return_code=True) -> "subprocess.CompletedProcess[str]":
return self.raw_cli(["stop"], check_return_code=check_return_code)
class WalCraft(AbstractNeonCli):
"""
A typed wrapper around the `wal_craft` CLI tool.
Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
"""
COMMAND = "wal_craft"
def postgres_config(self) -> List[str]:
res = self.raw_cli(["print-postgres-config"])
res.check_returncode()
return res.stdout.split("\n")
def in_existing(self, type: str, connection: str) -> None:
res = self.raw_cli(["in-existing", type, connection])
res.check_returncode()
class Pagectl(AbstractNeonCli):
"""
A typed wrapper around the `pagectl` utility CLI tool.
"""
COMMAND = "pagectl"
def dump_index_part(self, path: Path) -> IndexPartDump:
res = self.raw_cli(["index-part", "dump", str(path)])
res.check_returncode()
parsed = json.loads(res.stdout)
return IndexPartDump.from_json(parsed)

View File

@@ -9,6 +9,8 @@ import os
import re
import shutil
import subprocess
import tempfile
import textwrap
import threading
import time
import uuid
@@ -19,6 +21,7 @@ from datetime import datetime
from enum import Enum
from fcntl import LOCK_EX, LOCK_UN, flock
from functools import cached_property
from itertools import chain, product
from pathlib import Path
from types import TracebackType
from typing import (
@@ -61,12 +64,11 @@ from fixtures.common_types import Lsn, NodeId, TenantId, TenantShardId, Timeline
from fixtures.endpoint.http import EndpointHttpClient
from fixtures.log_helper import log
from fixtures.metrics import Metrics, MetricsGetter, parse_metrics
from fixtures.neon_cli import NeonLocalCli, Pagectl
from fixtures.pageserver.allowed_errors import (
DEFAULT_PAGESERVER_ALLOWED_ERRORS,
DEFAULT_STORAGE_CONTROLLER_ALLOWED_ERRORS,
)
from fixtures.pageserver.common_types import LayerName, parse_layer_file_name
from fixtures.pageserver.common_types import IndexPartDump, LayerName, parse_layer_file_name
from fixtures.pageserver.http import PageserverHttpClient
from fixtures.pageserver.utils import (
wait_for_last_record_lsn,
@@ -489,7 +491,7 @@ class NeonEnvBuilder:
log.debug(
f"Services started, creating initial tenant {env.initial_tenant} and its initial timeline"
)
initial_tenant, initial_timeline = env.create_tenant(
initial_tenant, initial_timeline = env.neon_cli.create_tenant(
tenant_id=env.initial_tenant,
conf=initial_tenant_conf,
timeline_id=env.initial_timeline,
@@ -950,16 +952,10 @@ class NeonEnv:
initial_tenant - tenant ID of the initial tenant created in the repository
neon_cli - can be used to run the 'neon_local' CLI tool
neon_cli - can be used to run the 'neon' CLI tool
create_tenant() - initializes a new tenant and an initial empty timeline on it,
returns the tenant and timeline id
create_branch() - branch a new timeline from an existing one, returns
the new timeline id
create_timeline() - initializes a new timeline by running initdb, returns
the new timeline id
create_tenant() - initializes a new tenant in the page server, returns
the tenant id
"""
BASE_PAGESERVER_ID = 1
@@ -970,6 +966,8 @@ class NeonEnv:
self.rust_log_override = config.rust_log_override
self.port_distributor = config.port_distributor
self.s3_mock_server = config.mock_s3_server
self.neon_cli = NeonCli(env=self)
self.pagectl = Pagectl(env=self)
self.endpoints = EndpointFactory(self)
self.safekeepers: List[Safekeeper] = []
self.pageservers: List[NeonPageserver] = []
@@ -989,21 +987,6 @@ class NeonEnv:
self.initial_tenant = config.initial_tenant
self.initial_timeline = config.initial_timeline
neon_local_env_vars = {}
if self.rust_log_override is not None:
neon_local_env_vars["RUST_LOG"] = self.rust_log_override
self.neon_cli = NeonLocalCli(
extra_env=neon_local_env_vars,
binpath=self.neon_local_binpath,
repo_dir=self.repo_dir,
pg_distrib_dir=self.pg_distrib_dir,
)
pagectl_env_vars = {}
if self.rust_log_override is not None:
pagectl_env_vars["RUST_LOG"] = self.rust_log_override
self.pagectl = Pagectl(extra_env=pagectl_env_vars, binpath=self.neon_binpath)
# The URL for the pageserver to use as its control_plane_api config
if config.storage_controller_port_override is not None:
log.info(
@@ -1327,74 +1310,6 @@ class NeonEnv:
self.endpoint_counter += 1
return "ep-" + str(self.endpoint_counter)
def create_tenant(
self,
tenant_id: Optional[TenantId] = None,
timeline_id: Optional[TimelineId] = None,
conf: Optional[Dict[str, Any]] = None,
shard_count: Optional[int] = None,
shard_stripe_size: Optional[int] = None,
placement_policy: Optional[str] = None,
set_default: bool = False,
aux_file_policy: Optional[AuxFileStore] = None,
) -> Tuple[TenantId, TimelineId]:
"""
Creates a new tenant, returns its id and its initial timeline's id.
"""
tenant_id = tenant_id or TenantId.generate()
timeline_id = timeline_id or TimelineId.generate()
self.neon_cli.tenant_create(
tenant_id=tenant_id,
timeline_id=timeline_id,
pg_version=self.pg_version,
conf=conf,
shard_count=shard_count,
shard_stripe_size=shard_stripe_size,
placement_policy=placement_policy,
set_default=set_default,
aux_file_policy=aux_file_policy,
)
return tenant_id, timeline_id
def config_tenant(self, tenant_id: Optional[TenantId], conf: Dict[str, str]):
"""
Update tenant config.
"""
tenant_id = tenant_id or self.initial_tenant
self.neon_cli.tenant_config(tenant_id, conf)
def create_branch(
self,
new_branch_name: str = DEFAULT_BRANCH_NAME,
tenant_id: Optional[TenantId] = None,
ancestor_branch_name: Optional[str] = None,
ancestor_start_lsn: Optional[Lsn] = None,
new_timeline_id: Optional[TimelineId] = None,
) -> TimelineId:
new_timeline_id = new_timeline_id or TimelineId.generate()
tenant_id = tenant_id or self.initial_tenant
self.neon_cli.timeline_branch(
tenant_id, new_timeline_id, new_branch_name, ancestor_branch_name, ancestor_start_lsn
)
return new_timeline_id
def create_timeline(
self,
new_branch_name: str,
tenant_id: Optional[TenantId] = None,
timeline_id: Optional[TimelineId] = None,
) -> TimelineId:
timeline_id = timeline_id or TimelineId.generate()
tenant_id = tenant_id or self.initial_tenant
self.neon_cli.timeline_create(new_branch_name, tenant_id, timeline_id, self.pg_version)
return timeline_id
@pytest.fixture(scope="function")
def neon_simple_env(
@@ -1510,6 +1425,597 @@ class PageserverPort:
http: int
class AbstractNeonCli(abc.ABC):
"""
A typed wrapper around an arbitrary Neon CLI tool.
Supports a way to run arbitrary command directly via CLI.
Do not use directly, use specific subclasses instead.
"""
def __init__(self, env: NeonEnv):
self.env = env
COMMAND: str = cast(str, None) # To be overwritten by the derived class.
def raw_cli(
self,
arguments: List[str],
extra_env_vars: Optional[Dict[str, str]] = None,
check_return_code=True,
timeout=None,
local_binpath=False,
) -> "subprocess.CompletedProcess[str]":
"""
Run the command with the specified arguments.
Arguments must be in list form, e.g. ['pg', 'create']
Return both stdout and stderr, which can be accessed as
>>> result = env.neon_cli.raw_cli(...)
>>> assert result.stderr == ""
>>> log.info(result.stdout)
If `check_return_code`, on non-zero exit code logs failure and raises.
If `local_binpath` is true, then we are invoking a test utility
"""
assert isinstance(arguments, list)
assert isinstance(self.COMMAND, str)
if local_binpath:
# Test utility
bin_neon = str(self.env.neon_local_binpath / self.COMMAND)
else:
# Normal binary
bin_neon = str(self.env.neon_binpath / self.COMMAND)
args = [bin_neon] + arguments
log.info('Running command "{}"'.format(" ".join(args)))
env_vars = os.environ.copy()
env_vars["NEON_REPO_DIR"] = str(self.env.repo_dir)
env_vars["POSTGRES_DISTRIB_DIR"] = str(self.env.pg_distrib_dir)
if self.env.rust_log_override is not None:
env_vars["RUST_LOG"] = self.env.rust_log_override
for extra_env_key, extra_env_value in (extra_env_vars or {}).items():
env_vars[extra_env_key] = extra_env_value
# Pass coverage settings
var = "LLVM_PROFILE_FILE"
val = os.environ.get(var)
if val:
env_vars[var] = val
# Intercept CalledProcessError and print more info
try:
res = subprocess.run(
args,
env=env_vars,
check=False,
universal_newlines=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=timeout,
)
except subprocess.TimeoutExpired as e:
if e.stderr:
stderr = e.stderr.decode(errors="replace")
else:
stderr = ""
if e.stdout:
stdout = e.stdout.decode(errors="replace")
else:
stdout = ""
log.warn(f"CLI timeout: stderr={stderr}, stdout={stdout}")
raise
indent = " "
if not res.returncode:
stripped = res.stdout.strip()
lines = stripped.splitlines()
if len(lines) < 2:
log.debug(f"Run {res.args} success: {stripped}")
else:
log.debug("Run %s success:\n%s" % (res.args, textwrap.indent(stripped, indent)))
elif check_return_code:
# this way command output will be in recorded and shown in CI in failure message
indent = indent * 2
msg = textwrap.dedent(
"""\
Run %s failed:
stdout:
%s
stderr:
%s
"""
)
msg = msg % (
res.args,
textwrap.indent(res.stdout.strip(), indent),
textwrap.indent(res.stderr.strip(), indent),
)
log.info(msg)
raise RuntimeError(msg) from subprocess.CalledProcessError(
res.returncode, res.args, res.stdout, res.stderr
)
return res
class NeonCli(AbstractNeonCli):
"""
A typed wrapper around the `neon` CLI tool.
Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
"""
COMMAND = "neon_local"
def raw_cli(self, *args, **kwargs) -> subprocess.CompletedProcess[str]:
kwargs["local_binpath"] = True
return super().raw_cli(*args, **kwargs)
def create_tenant(
self,
tenant_id: Optional[TenantId] = None,
timeline_id: Optional[TimelineId] = None,
conf: Optional[Dict[str, Any]] = None,
shard_count: Optional[int] = None,
shard_stripe_size: Optional[int] = None,
placement_policy: Optional[str] = None,
set_default: bool = False,
aux_file_policy: Optional[AuxFileStore] = None,
) -> Tuple[TenantId, TimelineId]:
"""
Creates a new tenant, returns its id and its initial timeline's id.
"""
tenant_id = tenant_id or TenantId.generate()
timeline_id = timeline_id or TimelineId.generate()
args = [
"tenant",
"create",
"--tenant-id",
str(tenant_id),
"--timeline-id",
str(timeline_id),
"--pg-version",
self.env.pg_version,
]
if conf is not None:
args.extend(
chain.from_iterable(
product(["-c"], (f"{key}:{value}" for key, value in conf.items()))
)
)
if aux_file_policy is AuxFileStore.V2:
args.extend(["-c", "switch_aux_file_policy:v2"])
elif aux_file_policy is AuxFileStore.V1:
args.extend(["-c", "switch_aux_file_policy:v1"])
elif aux_file_policy is AuxFileStore.CrossValidation:
args.extend(["-c", "switch_aux_file_policy:cross-validation"])
if set_default:
args.append("--set-default")
if shard_count is not None:
args.extend(["--shard-count", str(shard_count)])
if shard_stripe_size is not None:
args.extend(["--shard-stripe-size", str(shard_stripe_size)])
if placement_policy is not None:
args.extend(["--placement-policy", str(placement_policy)])
res = self.raw_cli(args)
res.check_returncode()
return tenant_id, timeline_id
def import_tenant(self, tenant_id: TenantId):
args = ["tenant", "import", "--tenant-id", str(tenant_id)]
res = self.raw_cli(args)
res.check_returncode()
def set_default(self, tenant_id: TenantId):
"""
Update default tenant for future operations that require tenant_id.
"""
res = self.raw_cli(["tenant", "set-default", "--tenant-id", str(tenant_id)])
res.check_returncode()
def config_tenant(self, tenant_id: TenantId, conf: Dict[str, str]):
"""
Update tenant config.
"""
args = ["tenant", "config", "--tenant-id", str(tenant_id)]
if conf is not None:
args.extend(
chain.from_iterable(
product(["-c"], (f"{key}:{value}" for key, value in conf.items()))
)
)
res = self.raw_cli(args)
res.check_returncode()
def list_tenants(self) -> "subprocess.CompletedProcess[str]":
res = self.raw_cli(["tenant", "list"])
res.check_returncode()
return res
def create_timeline(
self,
new_branch_name: str,
tenant_id: Optional[TenantId] = None,
timeline_id: Optional[TimelineId] = None,
) -> TimelineId:
if timeline_id is None:
timeline_id = TimelineId.generate()
cmd = [
"timeline",
"create",
"--branch-name",
new_branch_name,
"--tenant-id",
str(tenant_id or self.env.initial_tenant),
"--timeline-id",
str(timeline_id),
"--pg-version",
self.env.pg_version,
]
res = self.raw_cli(cmd)
res.check_returncode()
return timeline_id
def create_branch(
self,
new_branch_name: str = DEFAULT_BRANCH_NAME,
ancestor_branch_name: Optional[str] = None,
tenant_id: Optional[TenantId] = None,
ancestor_start_lsn: Optional[Lsn] = None,
new_timeline_id: Optional[TimelineId] = None,
) -> TimelineId:
if new_timeline_id is None:
new_timeline_id = TimelineId.generate()
cmd = [
"timeline",
"branch",
"--branch-name",
new_branch_name,
"--timeline-id",
str(new_timeline_id),
"--tenant-id",
str(tenant_id or self.env.initial_tenant),
]
if ancestor_branch_name is not None:
cmd.extend(["--ancestor-branch-name", ancestor_branch_name])
if ancestor_start_lsn is not None:
cmd.extend(["--ancestor-start-lsn", str(ancestor_start_lsn)])
res = self.raw_cli(cmd)
res.check_returncode()
return TimelineId(str(new_timeline_id))
def list_timelines(self, tenant_id: Optional[TenantId] = None) -> List[Tuple[str, TimelineId]]:
"""
Returns a list of (branch_name, timeline_id) tuples out of parsed `neon timeline list` CLI output.
"""
# main [b49f7954224a0ad25cc0013ea107b54b]
# ┣━ @0/16B5A50: test_cli_branch_list_main [20f98c79111b9015d84452258b7d5540]
TIMELINE_DATA_EXTRACTOR: re.Pattern = re.compile( # type: ignore[type-arg]
r"\s?(?P<branch_name>[^\s]+)\s\[(?P<timeline_id>[^\]]+)\]", re.MULTILINE
)
res = self.raw_cli(
["timeline", "list", "--tenant-id", str(tenant_id or self.env.initial_tenant)]
)
timelines_cli = sorted(
map(
lambda branch_and_id: (branch_and_id[0], TimelineId(branch_and_id[1])),
TIMELINE_DATA_EXTRACTOR.findall(res.stdout),
)
)
return timelines_cli
def init(
self,
init_config: Dict[str, Any],
force: Optional[str] = None,
) -> "subprocess.CompletedProcess[str]":
with tempfile.NamedTemporaryFile(mode="w+") as init_config_tmpfile:
init_config_tmpfile.write(toml.dumps(init_config))
init_config_tmpfile.flush()
cmd = [
"init",
f"--config={init_config_tmpfile.name}",
]
if force is not None:
cmd.extend(["--force", force])
res = self.raw_cli(cmd)
res.check_returncode()
return res
def storage_controller_start(
self,
timeout_in_seconds: Optional[int] = None,
instance_id: Optional[int] = None,
base_port: Optional[int] = None,
):
cmd = ["storage_controller", "start"]
if timeout_in_seconds is not None:
cmd.append(f"--start-timeout={timeout_in_seconds}s")
if instance_id is not None:
cmd.append(f"--instance-id={instance_id}")
if base_port is not None:
cmd.append(f"--base-port={base_port}")
return self.raw_cli(cmd)
def storage_controller_stop(self, immediate: bool, instance_id: Optional[int] = None):
cmd = ["storage_controller", "stop"]
if immediate:
cmd.extend(["-m", "immediate"])
if instance_id is not None:
cmd.append(f"--instance-id={instance_id}")
return self.raw_cli(cmd)
def pageserver_start(
self,
id: int,
extra_env_vars: Optional[Dict[str, str]] = None,
timeout_in_seconds: Optional[int] = None,
) -> "subprocess.CompletedProcess[str]":
start_args = ["pageserver", "start", f"--id={id}"]
if timeout_in_seconds is not None:
start_args.append(f"--start-timeout={timeout_in_seconds}s")
storage = self.env.pageserver_remote_storage
if isinstance(storage, S3Storage):
s3_env_vars = storage.access_env_vars()
extra_env_vars = (extra_env_vars or {}) | s3_env_vars
return self.raw_cli(start_args, extra_env_vars=extra_env_vars)
def pageserver_stop(self, id: int, immediate=False) -> "subprocess.CompletedProcess[str]":
cmd = ["pageserver", "stop", f"--id={id}"]
if immediate:
cmd.extend(["-m", "immediate"])
log.info(f"Stopping pageserver with {cmd}")
return self.raw_cli(cmd)
def safekeeper_start(
self,
id: int,
extra_opts: Optional[List[str]] = None,
timeout_in_seconds: Optional[int] = None,
) -> "subprocess.CompletedProcess[str]":
s3_env_vars = None
if isinstance(self.env.safekeepers_remote_storage, S3Storage):
s3_env_vars = self.env.safekeepers_remote_storage.access_env_vars()
if extra_opts is not None:
extra_opts = [f"-e={opt}" for opt in extra_opts]
else:
extra_opts = []
if timeout_in_seconds is not None:
extra_opts.append(f"--start-timeout={timeout_in_seconds}s")
return self.raw_cli(
["safekeeper", "start", str(id), *extra_opts], extra_env_vars=s3_env_vars
)
def safekeeper_stop(
self, id: Optional[int] = None, immediate=False
) -> "subprocess.CompletedProcess[str]":
args = ["safekeeper", "stop"]
if id is not None:
args.append(str(id))
if immediate:
args.extend(["-m", "immediate"])
return self.raw_cli(args)
def broker_start(
self, timeout_in_seconds: Optional[int] = None
) -> "subprocess.CompletedProcess[str]":
cmd = ["storage_broker", "start"]
if timeout_in_seconds is not None:
cmd.append(f"--start-timeout={timeout_in_seconds}s")
return self.raw_cli(cmd)
def broker_stop(self) -> "subprocess.CompletedProcess[str]":
cmd = ["storage_broker", "stop"]
return self.raw_cli(cmd)
def endpoint_create(
self,
branch_name: str,
pg_port: int,
http_port: int,
endpoint_id: Optional[str] = None,
tenant_id: Optional[TenantId] = None,
hot_standby: bool = False,
lsn: Optional[Lsn] = None,
pageserver_id: Optional[int] = None,
allow_multiple=False,
) -> "subprocess.CompletedProcess[str]":
args = [
"endpoint",
"create",
"--tenant-id",
str(tenant_id or self.env.initial_tenant),
"--branch-name",
branch_name,
"--pg-version",
self.env.pg_version,
]
if lsn is not None:
args.extend(["--lsn", str(lsn)])
if pg_port is not None:
args.extend(["--pg-port", str(pg_port)])
if http_port is not None:
args.extend(["--http-port", str(http_port)])
if endpoint_id is not None:
args.append(endpoint_id)
if hot_standby:
args.extend(["--hot-standby", "true"])
if pageserver_id is not None:
args.extend(["--pageserver-id", str(pageserver_id)])
if allow_multiple:
args.extend(["--allow-multiple"])
res = self.raw_cli(args)
res.check_returncode()
return res
def endpoint_start(
self,
endpoint_id: str,
safekeepers: Optional[List[int]] = None,
remote_ext_config: Optional[str] = None,
pageserver_id: Optional[int] = None,
allow_multiple=False,
basebackup_request_tries: Optional[int] = None,
) -> "subprocess.CompletedProcess[str]":
args = [
"endpoint",
"start",
]
extra_env_vars = {}
if basebackup_request_tries is not None:
extra_env_vars["NEON_COMPUTE_TESTING_BASEBACKUP_TRIES"] = str(basebackup_request_tries)
if remote_ext_config is not None:
args.extend(["--remote-ext-config", remote_ext_config])
if safekeepers is not None:
args.extend(["--safekeepers", (",".join(map(str, safekeepers)))])
if endpoint_id is not None:
args.append(endpoint_id)
if pageserver_id is not None:
args.extend(["--pageserver-id", str(pageserver_id)])
if allow_multiple:
args.extend(["--allow-multiple"])
res = self.raw_cli(args, extra_env_vars)
res.check_returncode()
return res
def endpoint_reconfigure(
self,
endpoint_id: str,
tenant_id: Optional[TenantId] = None,
pageserver_id: Optional[int] = None,
safekeepers: Optional[List[int]] = None,
check_return_code=True,
) -> "subprocess.CompletedProcess[str]":
args = ["endpoint", "reconfigure", endpoint_id]
if tenant_id is not None:
args.extend(["--tenant-id", str(tenant_id)])
if pageserver_id is not None:
args.extend(["--pageserver-id", str(pageserver_id)])
if safekeepers is not None:
args.extend(["--safekeepers", (",".join(map(str, safekeepers)))])
return self.raw_cli(args, check_return_code=check_return_code)
def endpoint_stop(
self,
endpoint_id: str,
destroy=False,
check_return_code=True,
mode: Optional[str] = None,
) -> "subprocess.CompletedProcess[str]":
args = [
"endpoint",
"stop",
]
if destroy:
args.append("--destroy")
if mode is not None:
args.append(f"--mode={mode}")
if endpoint_id is not None:
args.append(endpoint_id)
return self.raw_cli(args, check_return_code=check_return_code)
def map_branch(
self, name: str, tenant_id: TenantId, timeline_id: TimelineId
) -> "subprocess.CompletedProcess[str]":
"""
Map tenant id and timeline id to a neon_local branch name. They do not have to exist.
Usually needed when creating branches via PageserverHttpClient and not neon_local.
After creating a name mapping, you can use EndpointFactory.create_start
with this registered branch name.
"""
args = [
"mappings",
"map",
"--branch-name",
name,
"--tenant-id",
str(tenant_id),
"--timeline-id",
str(timeline_id),
]
return self.raw_cli(args, check_return_code=True)
def start(self, check_return_code=True) -> "subprocess.CompletedProcess[str]":
return self.raw_cli(["start"], check_return_code=check_return_code)
def stop(self, check_return_code=True) -> "subprocess.CompletedProcess[str]":
return self.raw_cli(["stop"], check_return_code=check_return_code)
class WalCraft(AbstractNeonCli):
"""
A typed wrapper around the `wal_craft` CLI tool.
Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
"""
COMMAND = "wal_craft"
def postgres_config(self) -> List[str]:
res = self.raw_cli(["print-postgres-config"])
res.check_returncode()
return res.stdout.split("\n")
def in_existing(self, type: str, connection: str) -> None:
res = self.raw_cli(["in-existing", type, connection])
res.check_returncode()
class ComputeCtl(AbstractNeonCli):
"""
A typed wrapper around the `compute_ctl` CLI tool.
"""
COMMAND = "compute_ctl"
class Pagectl(AbstractNeonCli):
"""
A typed wrapper around the `pagectl` utility CLI tool.
"""
COMMAND = "pagectl"
def dump_index_part(self, path: Path) -> IndexPartDump:
res = self.raw_cli(["index-part", "dump", str(path)])
res.check_returncode()
parsed = json.loads(res.stdout)
return IndexPartDump.from_json(parsed)
class LogUtils:
"""
A mixin class which provides utilities for inspecting the logs of a service.
@@ -2427,10 +2933,6 @@ class NeonPageserver(PgProtocol, LogUtils):
"""
assert self.running is False
storage = self.env.pageserver_remote_storage
if isinstance(storage, S3Storage):
s3_env_vars = storage.access_env_vars()
extra_env_vars = (extra_env_vars or {}) | s3_env_vars
self.env.neon_cli.pageserver_start(
self.id, extra_env_vars=extra_env_vars, timeout_in_seconds=timeout_in_seconds
)
@@ -3451,7 +3953,6 @@ class Endpoint(PgProtocol, LogUtils):
hot_standby=hot_standby,
pg_port=self.pg_port,
http_port=self.http_port,
pg_version=self.env.pg_version,
pageserver_id=pageserver_id,
allow_multiple=allow_multiple,
)
@@ -3894,16 +4395,8 @@ class Safekeeper(LogUtils):
extra_opts = self.extra_opts
assert self.running is False
s3_env_vars = None
if isinstance(self.env.safekeepers_remote_storage, S3Storage):
s3_env_vars = self.env.safekeepers_remote_storage.access_env_vars()
self.env.neon_cli.safekeeper_start(
self.id,
extra_opts=extra_opts,
timeout_in_seconds=timeout_in_seconds,
extra_env_vars=s3_env_vars,
self.id, extra_opts=extra_opts, timeout_in_seconds=timeout_in_seconds
)
self.running = True
# wait for wal acceptor start by checking its status
@@ -4049,7 +4542,7 @@ class Safekeeper(LogUtils):
1) wait for remote_consistent_lsn and wal_backup_lsn on safekeeper to reach it.
2) checkpoint timeline on safekeeper, which should remove WAL before this LSN; optionally wait for that.
"""
client = self.http_client()
cli = self.http_client()
target_segment_file = lsn.segment_name()
@@ -4061,7 +4554,7 @@ class Safekeeper(LogUtils):
assert all(target_segment_file <= s for s in segments)
def are_lsns_advanced():
stat = client.timeline_status(tenant_id, timeline_id)
stat = cli.timeline_status(tenant_id, timeline_id)
log.info(
f"waiting for remote_consistent_lsn and backup_lsn on sk {self.id} to reach {lsn}, currently remote_consistent_lsn={stat.remote_consistent_lsn}, backup_lsn={stat.backup_lsn}"
)
@@ -4070,7 +4563,7 @@ class Safekeeper(LogUtils):
# xxx: max wait is long because we might be waiting for reconnection from
# pageserver to this safekeeper
wait_until(30, 1, are_lsns_advanced)
client.checkpoint(tenant_id, timeline_id)
cli.checkpoint(tenant_id, timeline_id)
if wait_wal_removal:
wait_until(30, 1, are_segments_removed)
@@ -4098,13 +4591,13 @@ class NeonBroker(LogUtils):
timeout_in_seconds: Optional[int] = None,
):
assert not self.running
self.env.neon_cli.storage_broker_start(timeout_in_seconds)
self.env.neon_cli.broker_start(timeout_in_seconds)
self.running = True
return self
def stop(self):
if self.running:
self.env.neon_cli.storage_broker_stop()
self.env.neon_cli.broker_stop()
self.running = False
return self
@@ -4733,10 +5226,10 @@ def flush_ep_to_pageserver(
commit_lsn: Lsn = Lsn(0)
# In principle in the absense of failures polling single sk would be enough.
for sk in env.safekeepers:
client = sk.http_client()
cli = sk.http_client()
# wait until compute connections are gone
wait_walreceivers_absent(client, tenant, timeline)
commit_lsn = max(client.get_commit_lsn(tenant, timeline), commit_lsn)
wait_walreceivers_absent(cli, tenant, timeline)
commit_lsn = max(cli.get_commit_lsn(tenant, timeline), commit_lsn)
# Note: depending on WAL filtering implementation, probably most shards
# won't be able to reach commit_lsn (unless gaps are also ack'ed), so this
@@ -4789,12 +5282,7 @@ def fork_at_current_lsn(
the WAL up to that LSN to arrive in the pageserver before creating the branch.
"""
current_lsn = endpoint.safe_psql("SELECT pg_current_wal_lsn()")[0][0]
return env.create_branch(
new_branch_name=new_branch_name,
tenant_id=tenant_id,
ancestor_branch_name=ancestor_branch_name,
ancestor_start_lsn=current_lsn,
)
return env.neon_cli.create_branch(new_branch_name, ancestor_branch_name, tenant_id, current_lsn)
def import_timeline_from_vanilla_postgres(
@@ -4813,9 +5301,9 @@ def import_timeline_from_vanilla_postgres(
"""
# Take backup of the existing PostgreSQL server with pg_basebackup
basebackup_dir = test_output_dir / "basebackup"
base_tar = basebackup_dir / "base.tar"
wal_tar = basebackup_dir / "pg_wal.tar"
basebackup_dir = os.path.join(test_output_dir, "basebackup")
base_tar = os.path.join(basebackup_dir, "base.tar")
wal_tar = os.path.join(basebackup_dir, "pg_wal.tar")
os.mkdir(basebackup_dir)
pg_bin.run(
[
@@ -4825,28 +5313,40 @@ def import_timeline_from_vanilla_postgres(
"-d",
vanilla_pg_connstr,
"-D",
str(basebackup_dir),
basebackup_dir,
]
)
# Extract start_lsn and end_lsn form the backup manifest file
with open(os.path.join(basebackup_dir, "backup_manifest")) as f:
manifest = json.load(f)
start_lsn = Lsn(manifest["WAL-Ranges"][0]["Start-LSN"])
end_lsn = Lsn(manifest["WAL-Ranges"][0]["End-LSN"])
start_lsn = manifest["WAL-Ranges"][0]["Start-LSN"]
end_lsn = manifest["WAL-Ranges"][0]["End-LSN"]
# Import the backup tarballs into the pageserver
env.neon_cli.timeline_import(
tenant_id=tenant_id,
timeline_id=timeline_id,
new_branch_name=branch_name,
base_lsn=start_lsn,
base_tarfile=base_tar,
end_lsn=end_lsn,
wal_tarfile=wal_tar,
pg_version=env.pg_version,
env.neon_cli.raw_cli(
[
"timeline",
"import",
"--tenant-id",
str(tenant_id),
"--timeline-id",
str(timeline_id),
"--branch-name",
branch_name,
"--base-lsn",
start_lsn,
"--base-tarfile",
base_tar,
"--end-lsn",
end_lsn,
"--wal-tarfile",
wal_tar,
"--pg-version",
env.pg_version,
]
)
wait_for_last_record_lsn(env.pageserver.http_client(), tenant_id, timeline_id, end_lsn)
wait_for_last_record_lsn(env.pageserver.http_client(), tenant_id, timeline_id, Lsn(end_lsn))
def last_flush_lsn_upload(

View File

@@ -7,7 +7,7 @@ from pathlib import Path
from typing import Any, List, Tuple
from fixtures.common_types import TenantId, TimelineId
from fixtures.neon_fixtures import NeonEnv
from fixtures.neon_fixtures import NeonEnv, Pagectl
from fixtures.pageserver.common_types import (
InvalidFileName,
parse_layer_file_name,
@@ -35,7 +35,7 @@ def duplicate_one_tenant(env: NeonEnv, template_tenant: TenantId, new_tenant: Te
for file in tl.iterdir():
shutil.copy2(file, dst_tl_dir)
if "__" in file.name:
env.pagectl.raw_cli(
Pagectl(env).raw_cli(
[
"layer",
"rewrite-summary",

View File

@@ -53,7 +53,7 @@ def setup_env(
"checkpoint_distance": 268435456,
"image_creation_threshold": 3,
}
template_tenant, template_timeline = env.create_tenant(set_default=True)
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
env.pageserver.tenant_detach(template_tenant)
env.pageserver.tenant_attach(template_tenant, config)
ep = env.endpoints.create_start("main", tenant_id=template_tenant)

View File

@@ -81,7 +81,7 @@ def setup_tenant_template(env: NeonEnv, n_txns: int):
"image_creation_threshold": 3,
}
template_tenant, template_timeline = env.create_tenant(set_default=True)
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
env.pageserver.tenant_detach(template_tenant)
env.pageserver.tenant_attach(template_tenant, config)

View File

@@ -162,7 +162,7 @@ def setup_tenant_template(env: NeonEnv, pg_bin: PgBin, scale: int):
"checkpoint_distance": 268435456,
"image_creation_threshold": 3,
}
template_tenant, template_timeline = env.create_tenant(set_default=True)
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
env.pageserver.tenant_detach(template_tenant)
env.pageserver.tenant_attach(template_tenant, config)
ps_http = env.pageserver.http_client()

View File

@@ -41,7 +41,7 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int)
pg_bin = neon_compare.pg_bin
# Use aggressive GC and checkpoint settings, so GC and compaction happen more often during the test
tenant, _ = env.create_tenant(
tenant, _ = env.neon_cli.create_tenant(
conf={
"gc_period": "5 s",
"gc_horizon": f"{4 * 1024 ** 2}",
@@ -64,7 +64,7 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int)
endpoint.stop()
env.create_branch("b0", tenant_id=tenant)
env.neon_cli.create_branch("b0", tenant_id=tenant)
threads: List[threading.Thread] = []
threads.append(threading.Thread(target=run_pgbench, args=("b0",), daemon=True))
@@ -78,7 +78,7 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int)
p = random.randint(0, i)
timer = timeit.default_timer()
env.create_branch(f"b{i + 1}", ancestor_branch_name=f"b{p}", tenant_id=tenant)
env.neon_cli.create_branch(f"b{i + 1}", f"b{p}", tenant_id=tenant)
dur = timeit.default_timer() - timer
log.info(f"Creating branch b{i+1} took {dur}s")
@@ -104,7 +104,7 @@ def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int, shape:
# seed the prng so we will measure the same structure every time
rng = random.Random("2024-02-29")
env.create_branch("b0")
env.neon_cli.create_branch("b0")
endpoint = env.endpoints.create_start("b0")
neon_compare.pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s10", endpoint.connstr()])
@@ -121,7 +121,7 @@ def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int, shape:
timer = timeit.default_timer()
# each of these uploads to remote storage before completion
env.create_branch(f"b{i + 1}", ancestor_branch_name=parent)
env.neon_cli.create_branch(f"b{i + 1}", parent)
dur = timeit.default_timer() - timer
branch_creation_durations.append(dur)
@@ -222,7 +222,7 @@ def wait_and_record_startup_metrics(
def test_branch_creation_many_relations(neon_compare: NeonCompare):
env = neon_compare.env
timeline_id = env.create_branch("root")
timeline_id = env.neon_cli.create_branch("root")
endpoint = env.endpoints.create_start("root")
with closing(endpoint.connect()) as conn:
@@ -238,7 +238,7 @@ def test_branch_creation_many_relations(neon_compare: NeonCompare):
)
with neon_compare.record_duration("create_branch_time_not_busy_root"):
env.create_branch("child_not_busy", ancestor_branch_name="root")
env.neon_cli.create_branch("child_not_busy", "root")
# run a concurrent insertion to make the ancestor "busy" during the branch creation
thread = threading.Thread(
@@ -247,6 +247,6 @@ def test_branch_creation_many_relations(neon_compare: NeonCompare):
thread.start()
with neon_compare.record_duration("create_branch_time_busy_root"):
env.create_branch("child_busy", ancestor_branch_name="root")
env.neon_cli.create_branch("child_busy", "root")
thread.join()

View File

@@ -41,7 +41,7 @@ def test_compare_child_and_root_pgbench_perf(neon_compare: NeonCompare):
)
neon_compare.zenbenchmark.record_pg_bench_result(branch, res)
env.create_branch("root")
env.neon_cli.create_branch("root")
endpoint_root = env.endpoints.create_start("root")
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", endpoint_root.connstr(), "-s10"])
@@ -55,14 +55,14 @@ def test_compare_child_and_root_pgbench_perf(neon_compare: NeonCompare):
def test_compare_child_and_root_write_perf(neon_compare: NeonCompare):
env = neon_compare.env
env.create_branch("root")
env.neon_cli.create_branch("root")
endpoint_root = env.endpoints.create_start("root")
endpoint_root.safe_psql(
"CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')",
)
env.create_branch("child", ancestor_branch_name="root")
env.neon_cli.create_branch("child", "root")
endpoint_child = env.endpoints.create_start("child")
with neon_compare.record_duration("root_run_duration"):
@@ -73,7 +73,7 @@ def test_compare_child_and_root_write_perf(neon_compare: NeonCompare):
def test_compare_child_and_root_read_perf(neon_compare: NeonCompare):
env = neon_compare.env
env.create_branch("root")
env.neon_cli.create_branch("root")
endpoint_root = env.endpoints.create_start("root")
endpoint_root.safe_psql_many(
@@ -83,7 +83,7 @@ def test_compare_child_and_root_read_perf(neon_compare: NeonCompare):
]
)
env.create_branch("child", ancestor_branch_name="root")
env.neon_cli.create_branch("child", "root")
endpoint_child = env.endpoints.create_start("child")
with neon_compare.record_duration("root_run_duration"):

View File

@@ -26,8 +26,10 @@ def test_bulk_tenant_create(
for i in range(tenants_count):
start = timeit.default_timer()
tenant, _ = env.create_tenant()
env.create_timeline(f"test_bulk_tenant_create_{tenants_count}_{i}", tenant_id=tenant)
tenant, _ = env.neon_cli.create_tenant()
env.neon_cli.create_timeline(
f"test_bulk_tenant_create_{tenants_count}_{i}", tenant_id=tenant
)
# FIXME: We used to start new safekeepers here. Did that make sense? Should we do it now?
# if use_safekeepers == 'with_sa':

View File

@@ -16,7 +16,7 @@ def test_bulk_update(neon_env_builder: NeonEnvBuilder, zenbenchmark, fillfactor)
env = neon_env_builder.init_start()
n_records = 1000000
timeline_id = env.create_branch("test_bulk_update")
timeline_id = env.neon_cli.create_branch("test_bulk_update")
tenant_id = env.initial_tenant
endpoint = env.endpoints.create_start("test_bulk_update")
cur = endpoint.connect().cursor()

View File

@@ -17,7 +17,7 @@ def test_compaction(neon_compare: NeonCompare):
env = neon_compare.env
pageserver_http = env.pageserver.http_client()
tenant_id, timeline_id = env.create_tenant(
tenant_id, timeline_id = env.neon_cli.create_tenant(
conf={
# Disable background GC and compaction, we'll run compaction manually.
"gc_period": "0s",
@@ -68,7 +68,7 @@ def test_compaction_l0_memory(neon_compare: NeonCompare):
env = neon_compare.env
pageserver_http = env.pageserver.http_client()
tenant_id, timeline_id = env.create_tenant(
tenant_id, timeline_id = env.neon_cli.create_tenant(
conf={
# Initially disable compaction so that we will build up a stack of L0s
"compaction_period": "0s",

View File

@@ -11,7 +11,7 @@ def gc_feedback_impl(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchma
env = neon_env_builder.init_start()
client = env.pageserver.http_client()
tenant_id, _ = env.create_tenant(
tenant_id, _ = env.neon_cli.create_tenant(
conf={
# disable default GC and compaction
"gc_period": "1000 m",
@@ -63,7 +63,7 @@ def gc_feedback_impl(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchma
log.info(f"Physical storage size {physical_size}")
if mode == "with_snapshots":
if step == n_steps / 2:
env.create_branch("child")
env.neon_cli.create_branch("child")
max_num_of_deltas_above_image = 0
max_total_num_of_deltas = 0

View File

@@ -15,7 +15,7 @@ def test_layer_map(neon_env_builder: NeonEnvBuilder, zenbenchmark):
# We want to have a lot of lot of layer files to exercise the layer map. Disable
# GC, and make checkpoint_distance very small, so that we get a lot of small layer
# files.
tenant, timeline = env.create_tenant(
tenant, timeline = env.neon_cli.create_tenant(
conf={
"gc_period": "0s",
"checkpoint_distance": "16384",

View File

@@ -33,7 +33,7 @@ def test_lazy_startup(slru: str, neon_env_builder: NeonEnvBuilder, zenbenchmark:
env = neon_env_builder.init_start()
lazy_slru_download = "true" if slru == "lazy" else "false"
tenant, _ = env.create_tenant(
tenant, _ = env.neon_cli.create_tenant(
conf={
"lazy_slru_download": lazy_slru_download,
}

View File

@@ -85,7 +85,7 @@ def test_sharding_autosplit(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
tenants = {}
for tenant_id in set(TenantId.generate() for _i in range(0, tenant_count)):
timeline_id = TimelineId.generate()
env.create_tenant(tenant_id, timeline_id, conf=tenant_conf)
env.neon_cli.create_tenant(tenant_id, timeline_id, conf=tenant_conf)
endpoint = env.endpoints.create("main", tenant_id=tenant_id)
tenants[tenant_id] = TenantState(timeline_id, endpoint)
endpoint.start()

View File

@@ -27,7 +27,7 @@ def test_startup_simple(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenc
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.create_branch("test_startup")
env.neon_cli.create_branch("test_startup")
endpoint = None

View File

@@ -12,7 +12,7 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
pageserver_http = env.pageserver.http_client()
# Override defaults: 4M checkpoint_distance, disable background compaction and gc.
tenant, _ = env.create_tenant(
tenant, _ = env.neon_cli.create_tenant(
conf={
"checkpoint_distance": "4194304",
"gc_period": "0s",
@@ -45,9 +45,7 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
log.info(f"LSN after 100k rows: {lsn_100}")
# Create branch1.
env.create_branch(
"branch1", ancestor_branch_name="main", ancestor_start_lsn=lsn_100, tenant_id=tenant
)
env.neon_cli.create_branch("branch1", "main", tenant_id=tenant, ancestor_start_lsn=lsn_100)
endpoint_branch1 = env.endpoints.create_start("branch1", tenant_id=tenant)
branch1_cur = endpoint_branch1.connect().cursor()
@@ -69,9 +67,7 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
log.info(f"LSN after 200k rows: {lsn_200}")
# Create branch2.
env.create_branch(
"branch2", ancestor_branch_name="branch1", ancestor_start_lsn=lsn_200, tenant_id=tenant
)
env.neon_cli.create_branch("branch2", "branch1", tenant_id=tenant, ancestor_start_lsn=lsn_200)
endpoint_branch2 = env.endpoints.create_start("branch2", tenant_id=tenant)
branch2_cur = endpoint_branch2.connect().cursor()

View File

@@ -41,7 +41,7 @@ def negative_env(neon_env_builder: NeonEnvBuilder) -> Generator[NegativeTests, N
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
ps_http = env.pageserver.http_client()
(tenant_id, _) = env.create_tenant()
(tenant_id, _) = env.neon_cli.create_tenant()
assert ps_http.tenant_config(tenant_id).tenant_specific_overrides == {}
config_pre_detach = ps_http.tenant_config(tenant_id)
assert tenant_id in [TenantId(t["id"]) for t in ps_http.tenant_list()]
@@ -109,7 +109,7 @@ def test_empty_config(positive_env: NeonEnv, content_type: Optional[str]):
"""
env = positive_env
ps_http = env.pageserver.http_client()
(tenant_id, _) = env.create_tenant()
(tenant_id, _) = env.neon_cli.create_tenant()
assert ps_http.tenant_config(tenant_id).tenant_specific_overrides == {}
config_pre_detach = ps_http.tenant_config(tenant_id)
assert tenant_id in [TenantId(t["id"]) for t in ps_http.tenant_list()]
@@ -182,7 +182,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
fully_custom_config.keys()
), "ensure we cover all config options"
(tenant_id, _) = env.create_tenant()
(tenant_id, _) = env.neon_cli.create_tenant()
ps_http.set_tenant_config(tenant_id, fully_custom_config)
our_tenant_config = ps_http.tenant_config(tenant_id)
assert our_tenant_config.tenant_specific_overrides == fully_custom_config

View File

@@ -76,7 +76,7 @@ def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
branch = "test_compute_auth_to_pageserver"
env.create_branch(branch)
env.neon_cli.create_branch(branch)
endpoint = env.endpoints.create_start(branch)
with closing(endpoint.connect()) as conn:
@@ -186,7 +186,7 @@ def test_auth_failures(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
env = neon_env_builder.init_start()
branch = f"test_auth_failures_auth_enabled_{auth_enabled}"
timeline_id = env.create_branch(branch)
timeline_id = env.neon_cli.create_branch(branch)
env.endpoints.create_start(branch)
tenant_token = env.auth_keys.generate_tenant_token(env.initial_tenant)

View File

@@ -98,7 +98,7 @@ def check_backpressure(endpoint: Endpoint, stop_event: threading.Event, polling_
def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
# Create a branch for us
env.create_branch("test_backpressure")
env.neon_cli.create_branch("test_backpressure")
endpoint = env.endpoints.create(
"test_backpressure", config_lines=["max_replication_write_lag=30MB"]

View File

@@ -22,7 +22,7 @@ def test_compute_pageserver_connection_stress(neon_env_builder: NeonEnvBuilder):
pageserver_http = env.pageserver.http_client()
pageserver_http.configure_failpoints(("simulated-bad-compute-connection", "50%return(15)"))
env.create_branch("test_compute_pageserver_connection_stress")
env.neon_cli.create_branch("test_compute_pageserver_connection_stress")
endpoint = env.endpoints.create_start("test_compute_pageserver_connection_stress")
pg_conn = endpoint.connect()

View File

@@ -53,7 +53,7 @@ def test_branch_and_gc(neon_simple_env: NeonEnv, build_type: str):
env = neon_simple_env
pageserver_http_client = env.pageserver.http_client()
tenant, timeline_main = env.create_tenant(
tenant, timeline_main = env.neon_cli.create_tenant(
conf={
# disable background GC
"gc_period": "0s",
@@ -90,7 +90,7 @@ def test_branch_and_gc(neon_simple_env: NeonEnv, build_type: str):
pageserver_http_client.timeline_checkpoint(tenant, timeline_main)
pageserver_http_client.timeline_gc(tenant, timeline_main, lsn2 - lsn1 + 1024)
env.create_branch(
env.neon_cli.create_branch(
"test_branch", ancestor_branch_name="main", ancestor_start_lsn=lsn1, tenant_id=tenant
)
endpoint_branch = env.endpoints.create_start("test_branch", tenant_id=tenant)
@@ -127,7 +127,7 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
env.storage_controller.allowed_errors.extend(error_regexes)
# Disable background GC but set the `pitr_interval` to be small, so GC can delete something
tenant, _ = env.create_tenant(
tenant, _ = env.neon_cli.create_tenant(
conf={
# disable background GC
"gc_period": "0s",
@@ -145,7 +145,7 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
}
)
b0 = env.create_branch("b0", tenant_id=tenant)
b0 = env.neon_cli.create_branch("b0", tenant_id=tenant)
endpoint0 = env.endpoints.create_start("b0", tenant_id=tenant)
res = endpoint0.safe_psql_many(
queries=[
@@ -176,7 +176,7 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
# The starting LSN is invalid as the corresponding record is scheduled to be removed by in-queue GC.
with pytest.raises(Exception, match="invalid branch start lsn: .*"):
env.create_branch("b1", ancestor_branch_name="b0", ancestor_start_lsn=lsn, tenant_id=tenant)
env.neon_cli.create_branch("b1", "b0", tenant_id=tenant, ancestor_start_lsn=lsn)
# retry the same with the HTTP API, so that we can inspect the status code
with pytest.raises(TimelineCreate406):
new_timeline_id = TimelineId.generate()

View File

@@ -23,7 +23,7 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
env.storage_controller.allowed_errors.extend(error_regexes)
# Branch at the point where only 100 rows were inserted
branch_behind_timeline_id = env.create_branch("test_branch_behind")
branch_behind_timeline_id = env.neon_cli.create_branch("test_branch_behind")
endpoint_main = env.endpoints.create_start("test_branch_behind")
main_cur = endpoint_main.connect().cursor()
@@ -58,10 +58,8 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
log.info(f"LSN after 200100 rows: {lsn_b}")
# Branch at the point where only 100 rows were inserted
env.create_branch(
"test_branch_behind_hundred",
ancestor_branch_name="test_branch_behind",
ancestor_start_lsn=lsn_a,
env.neon_cli.create_branch(
"test_branch_behind_hundred", "test_branch_behind", ancestor_start_lsn=lsn_a
)
# Insert many more rows. This generates enough WAL to fill a few segments.
@@ -77,10 +75,8 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
log.info(f"LSN after 400100 rows: {lsn_c}")
# Branch at the point where only 200100 rows were inserted
env.create_branch(
"test_branch_behind_more",
ancestor_branch_name="test_branch_behind",
ancestor_start_lsn=lsn_b,
env.neon_cli.create_branch(
"test_branch_behind_more", "test_branch_behind", ancestor_start_lsn=lsn_b
)
endpoint_hundred = env.endpoints.create_start("test_branch_behind_hundred")
@@ -101,17 +97,15 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
pageserver_http = env.pageserver.http_client()
# branch at segment boundary
env.create_branch(
"test_branch_segment_boundary",
ancestor_branch_name="test_branch_behind",
ancestor_start_lsn=Lsn("0/3000000"),
env.neon_cli.create_branch(
"test_branch_segment_boundary", "test_branch_behind", ancestor_start_lsn=Lsn("0/3000000")
)
endpoint = env.endpoints.create_start("test_branch_segment_boundary")
assert endpoint.safe_psql("SELECT 1")[0][0] == 1
# branch at pre-initdb lsn (from main branch)
with pytest.raises(Exception, match="invalid branch start lsn: .*"):
env.create_branch("test_branch_preinitdb", ancestor_start_lsn=Lsn("0/42"))
env.neon_cli.create_branch("test_branch_preinitdb", ancestor_start_lsn=Lsn("0/42"))
# retry the same with the HTTP API, so that we can inspect the status code
with pytest.raises(TimelineCreate406):
new_timeline_id = TimelineId.generate()
@@ -122,10 +116,8 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
# branch at pre-ancestor lsn
with pytest.raises(Exception, match="less than timeline ancestor lsn"):
env.create_branch(
"test_branch_preinitdb",
ancestor_branch_name="test_branch_behind",
ancestor_start_lsn=Lsn("0/42"),
env.neon_cli.create_branch(
"test_branch_preinitdb", "test_branch_behind", ancestor_start_lsn=Lsn("0/42")
)
# retry the same with the HTTP API, so that we can inspect the status code
with pytest.raises(TimelineCreate406):
@@ -147,10 +139,8 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
print_gc_result(gc_result)
with pytest.raises(Exception, match="invalid branch start lsn: .*"):
# this gced_lsn is pretty random, so if gc is disabled this woudln't fail
env.create_branch(
"test_branch_create_fail",
ancestor_branch_name="test_branch_behind",
ancestor_start_lsn=gced_lsn,
env.neon_cli.create_branch(
"test_branch_create_fail", "test_branch_behind", ancestor_start_lsn=gced_lsn
)
# retry the same with the HTTP API, so that we can inspect the status code
with pytest.raises(TimelineCreate406):

View File

@@ -38,7 +38,7 @@ def test_branching_with_pgbench(
env = neon_simple_env
# Use aggressive GC and checkpoint settings, so that we also exercise GC during the test
tenant, _ = env.create_tenant(
tenant, _ = env.neon_cli.create_tenant(
conf={
"gc_period": "5 s",
"gc_horizon": f"{1024 ** 2}",
@@ -55,7 +55,7 @@ def test_branching_with_pgbench(
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", f"-s{scale}", connstr])
pg_bin.run_capture(["pgbench", "-T15", connstr])
env.create_branch("b0", tenant_id=tenant)
env.neon_cli.create_branch("b0", tenant_id=tenant)
endpoints: List[Endpoint] = []
endpoints.append(env.endpoints.create_start("b0", tenant_id=tenant))
@@ -84,9 +84,9 @@ def test_branching_with_pgbench(
threads = []
if ty == "cascade":
env.create_branch(f"b{i + 1}", ancestor_branch_name=f"b{i}", tenant_id=tenant)
env.neon_cli.create_branch(f"b{i + 1}", f"b{i}", tenant_id=tenant)
else:
env.create_branch(f"b{i + 1}", ancestor_branch_name="b0", tenant_id=tenant)
env.neon_cli.create_branch(f"b{i + 1}", "b0", tenant_id=tenant)
endpoints.append(env.endpoints.create_start(f"b{i + 1}", tenant_id=tenant))
@@ -120,7 +120,7 @@ def test_branching_unnormalized_start_lsn(neon_simple_env: NeonEnv, pg_bin: PgBi
env = neon_simple_env
env.create_branch("b0")
env.neon_cli.create_branch("b0")
endpoint0 = env.endpoints.create_start("b0")
pg_bin.run_capture(["pgbench", "-i", endpoint0.connstr()])
@@ -133,7 +133,7 @@ def test_branching_unnormalized_start_lsn(neon_simple_env: NeonEnv, pg_bin: PgBi
start_lsn = Lsn((int(curr_lsn) - XLOG_BLCKSZ) // XLOG_BLCKSZ * XLOG_BLCKSZ)
log.info(f"Branching b1 from b0 starting at lsn {start_lsn}...")
env.create_branch("b1", ancestor_branch_name="b0", ancestor_start_lsn=start_lsn)
env.neon_cli.create_branch("b1", "b0", ancestor_start_lsn=start_lsn)
endpoint1 = env.endpoints.create_start("b1")
pg_bin.run_capture(["pgbench", "-i", endpoint1.connstr()])
@@ -173,7 +173,7 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE
wait_until_paused(env, "before-upload-index-pausable")
env.neon_cli.mappings_map_branch(initial_branch, env.initial_tenant, env.initial_timeline)
env.neon_cli.map_branch(initial_branch, env.initial_tenant, env.initial_timeline)
with pytest.raises(RuntimeError, match="ERROR: Not found: Timeline"):
env.endpoints.create_start(
@@ -432,7 +432,9 @@ def test_branching_while_stuck_find_gc_cutoffs(neon_env_builder: NeonEnvBuilder)
wait_until_paused(env, failpoint)
env.create_branch("branch", ancestor_branch_name="main")
env.neon_cli.create_branch(
tenant_id=env.initial_tenant, ancestor_branch_name="main", new_branch_name="branch"
)
client.configure_failpoints((failpoint, "off"))

View File

@@ -34,7 +34,7 @@ def test_local_corruption(neon_env_builder: NeonEnvBuilder):
tenant_timelines: List[Tuple[TenantId, TimelineId, Endpoint]] = []
for _ in range(3):
tenant_id, timeline_id = env.create_tenant()
tenant_id, timeline_id = env.neon_cli.create_tenant()
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
with endpoint.cursor() as cur:
@@ -84,11 +84,13 @@ def test_local_corruption(neon_env_builder: NeonEnvBuilder):
def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv):
env = neon_simple_env
tenant_id, _ = env.create_tenant()
tenant_id, _ = env.neon_cli.create_tenant()
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
futures = [
executor.submit(env.create_timeline, f"test-create-multiple-timelines-{i}", tenant_id)
executor.submit(
env.neon_cli.create_timeline, f"test-create-multiple-timelines-{i}", tenant_id
)
for i in range(4)
]
for future in futures:
@@ -109,7 +111,7 @@ def test_timeline_init_break_before_checkpoint(neon_env_builder: NeonEnvBuilder)
tenant_id = env.initial_tenant
timelines_dir = env.pageserver.timeline_dir(tenant_id)
old_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
old_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
initial_timeline_dirs = [d for d in timelines_dir.iterdir()]
# Introduce failpoint during timeline init (some intermediate files are on disk), before it's checkpointed.
@@ -121,7 +123,7 @@ def test_timeline_init_break_before_checkpoint(neon_env_builder: NeonEnvBuilder)
env.pageserver.restart(immediate=True)
# Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally.
new_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
new_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
assert (
new_tenant_timelines == old_tenant_timelines
), f"Pageserver after restart should ignore non-initialized timelines for tenant {tenant_id}"
@@ -149,11 +151,11 @@ def test_timeline_init_break_before_checkpoint_recreate(
]
)
env.create_tenant(env.initial_tenant)
env.neon_cli.create_tenant(env.initial_tenant)
tenant_id = env.initial_tenant
timelines_dir = env.pageserver.timeline_dir(tenant_id)
old_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
old_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
initial_timeline_dirs = [d for d in timelines_dir.iterdir()]
# Some fixed timeline ID (like control plane does)
@@ -174,7 +176,7 @@ def test_timeline_init_break_before_checkpoint_recreate(
env.pageserver.restart(immediate=True)
# Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally.
new_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
new_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
assert (
new_tenant_timelines == old_tenant_timelines
), f"Pageserver after restart should ignore non-initialized timelines for tenant {tenant_id}"
@@ -199,7 +201,7 @@ def test_timeline_create_break_after_dir_creation(neon_env_builder: NeonEnvBuild
tenant_id = env.initial_tenant
timelines_dir = env.pageserver.timeline_dir(tenant_id)
old_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
old_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
initial_timeline_dirs = [d for d in timelines_dir.iterdir()]
# Introduce failpoint when creating a new timeline, right after creating its directory
@@ -209,7 +211,7 @@ def test_timeline_create_break_after_dir_creation(neon_env_builder: NeonEnvBuild
# Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally.
# "New" timeline is not present in the list, allowing pageserver to retry the same request
new_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
new_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
assert (
new_tenant_timelines == old_tenant_timelines
), f"Pageserver after restart should ignore non-initialized timelines for tenant {tenant_id}"

View File

@@ -34,7 +34,7 @@ def test_change_pageserver(neon_env_builder: NeonEnvBuilder, make_httpserver):
ignore_notify
)
env.create_branch("test_change_pageserver")
env.neon_cli.create_branch("test_change_pageserver")
endpoint = env.endpoints.create_start("test_change_pageserver")
# Put this tenant into a dual-attached state

View File

@@ -56,10 +56,8 @@ def test_clog_truncate(neon_simple_env: NeonEnv):
# create new branch after clog truncation and start a compute node on it
log.info(f"create branch at lsn_after_truncation {lsn_after_truncation}")
env.create_branch(
"test_clog_truncate_new",
ancestor_branch_name="main",
ancestor_start_lsn=lsn_after_truncation,
env.neon_cli.create_branch(
"test_clog_truncate_new", "main", ancestor_start_lsn=lsn_after_truncation
)
endpoint2 = env.endpoints.create_start("test_clog_truncate_new")

View File

@@ -23,7 +23,7 @@ def test_lsof_pageserver_pid(neon_simple_env: NeonEnv):
env = neon_simple_env
def start_workload():
env.create_branch("test_lsof_pageserver_pid")
env.neon_cli.create_branch("test_lsof_pageserver_pid")
endpoint = env.endpoints.create_start("test_lsof_pageserver_pid")
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:

View File

@@ -517,7 +517,7 @@ def test_historic_storage_formats(
assert metadata_summary["tenant_count"] >= 1
assert metadata_summary["timeline_count"] >= 1
env.neon_cli.tenant_import(dataset.tenant_id)
env.neon_cli.import_tenant(dataset.tenant_id)
# Discover timelines
timelines = env.pageserver.http_client().timeline_list(dataset.tenant_id)

View File

@@ -38,7 +38,7 @@ def test_safekeepers_reconfigure_reorder(
):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.create_branch("test_safekeepers_reconfigure_reorder")
env.neon_cli.create_branch("test_safekeepers_reconfigure_reorder")
endpoint = env.endpoints.create_start("test_safekeepers_reconfigure_reorder")

View File

@@ -1,7 +1,6 @@
import pytest
from fixtures.log_helper import log
from fixtures.neon_cli import WalCraft
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.neon_fixtures import NeonEnvBuilder, WalCraft
# Restart nodes with WAL end having specially crafted shape, like last record
# crossing segment boundary, to test decoding issues.
@@ -19,7 +18,7 @@ from fixtures.neon_fixtures import NeonEnvBuilder
)
def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str):
env = neon_env_builder.init_start()
env.create_branch("test_crafted_wal_end")
env.neon_cli.create_branch("test_crafted_wal_end")
env.pageserver.allowed_errors.extend(
[
# seems like pageserver stop triggers these
@@ -28,7 +27,7 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str):
)
endpoint = env.endpoints.create("test_crafted_wal_end")
wal_craft = WalCraft(extra_env=None, binpath=env.neon_binpath)
wal_craft = WalCraft(env)
endpoint.config(wal_craft.postgres_config())
endpoint.start()
res = endpoint.safe_psql_many(

View File

@@ -31,7 +31,7 @@ def test_createdb(neon_simple_env: NeonEnv, strategy: str):
lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
# Create a branch
env.create_branch("test_createdb2", ancestor_branch_name="main", ancestor_start_lsn=lsn)
env.neon_cli.create_branch("test_createdb2", "main", ancestor_start_lsn=lsn)
endpoint2 = env.endpoints.create_start("test_createdb2")
# Test that you can connect to the new database on both branches
@@ -77,14 +77,10 @@ def test_dropdb(neon_simple_env: NeonEnv, test_output_dir):
lsn_after_drop = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
# Create two branches before and after database drop.
env.create_branch(
"test_before_dropdb", ancestor_branch_name="main", ancestor_start_lsn=lsn_before_drop
)
env.neon_cli.create_branch("test_before_dropdb", "main", ancestor_start_lsn=lsn_before_drop)
endpoint_before = env.endpoints.create_start("test_before_dropdb")
env.create_branch(
"test_after_dropdb", ancestor_branch_name="main", ancestor_start_lsn=lsn_after_drop
)
env.neon_cli.create_branch("test_after_dropdb", "main", ancestor_start_lsn=lsn_after_drop)
endpoint_after = env.endpoints.create_start("test_after_dropdb")
# Test that database exists on the branch before drop

View File

@@ -18,7 +18,7 @@ def test_createuser(neon_simple_env: NeonEnv):
lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
# Create a branch
env.create_branch("test_createuser2", ancestor_branch_name="main", ancestor_start_lsn=lsn)
env.neon_cli.create_branch("test_createuser2", "main", ancestor_start_lsn=lsn)
endpoint2 = env.endpoints.create_start("test_createuser2")
# Test that you can connect to new branch as a new user

View File

@@ -59,11 +59,11 @@ def test_min_resident_size_override_handling(
env.pageserver.stop()
env.pageserver.start()
tenant_id, _ = env.create_tenant()
tenant_id, _ = env.neon_cli.create_tenant()
assert_overrides(tenant_id, config_level_override)
# Also ensure that specifying the paramter to create_tenant works, in addition to http-level recconfig.
tenant_id, _ = env.create_tenant(conf={"min_resident_size_override": "100"})
tenant_id, _ = env.neon_cli.create_tenant(conf={"min_resident_size_override": "100"})
assert_config(tenant_id, 100, 100)
ps_http.set_tenant_config(tenant_id, {})
assert_config(tenant_id, None, config_level_override)
@@ -280,7 +280,7 @@ def _eviction_env(
def pgbench_init_tenant(
layer_size: int, scale: int, env: NeonEnv, pg_bin: PgBin
) -> Tuple[TenantId, TimelineId]:
tenant_id, timeline_id = env.create_tenant(
tenant_id, timeline_id = env.neon_cli.create_tenant(
conf={
"gc_period": "0s",
"compaction_period": "0s",

View File

@@ -81,7 +81,7 @@ def test_remote_extensions(
# Start a compute node with remote_extension spec
# and check that it can download the extensions and use them to CREATE EXTENSION.
env = neon_env_builder_local.init_start()
env.create_branch("test_remote_extensions")
env.neon_cli.create_branch("test_remote_extensions")
endpoint = env.endpoints.create(
"test_remote_extensions",
config_lines=["log_min_messages=debug3"],

View File

@@ -15,7 +15,7 @@ def test_endpoint_crash(neon_env_builder: NeonEnvBuilder, sql_func: str):
Test that triggering crash from neon_test_utils crashes the endpoint
"""
env = neon_env_builder.init_start()
env.create_branch("test_endpoint_crash")
env.neon_cli.create_branch("test_endpoint_crash")
endpoint = env.endpoints.create_start("test_endpoint_crash")
endpoint.safe_psql("CREATE EXTENSION neon_test_utils;")

View File

@@ -3,7 +3,7 @@ from fixtures.neon_fixtures import NeonEnvBuilder
def test_fsm_truncate(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.create_branch("test_fsm_truncate")
env.neon_cli.create_branch("test_fsm_truncate")
endpoint = env.endpoints.create_start("test_fsm_truncate")
endpoint.safe_psql(
"CREATE TABLE t1(key int); CREATE TABLE t2(key int); TRUNCATE TABLE t1; TRUNCATE TABLE t2;"

View File

@@ -68,7 +68,7 @@ async def update_and_gc(env: NeonEnv, endpoint: Endpoint, timeline: TimelineId):
def test_gc_aggressive(neon_env_builder: NeonEnvBuilder):
# Disable pitr, because here we want to test branch creation after GC
env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"})
timeline = env.create_branch("test_gc_aggressive", ancestor_branch_name="main")
timeline = env.neon_cli.create_branch("test_gc_aggressive", "main")
endpoint = env.endpoints.create_start("test_gc_aggressive")
with endpoint.cursor() as cur:
@@ -99,7 +99,7 @@ def test_gc_index_upload(neon_env_builder: NeonEnvBuilder):
# Disable time-based pitr, we will use LSN-based thresholds in the manual GC calls
env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"})
tenant_id = env.initial_tenant
timeline_id = env.create_branch("test_gc_index_upload", ancestor_branch_name="main")
timeline_id = env.neon_cli.create_branch("test_gc_index_upload", "main")
endpoint = env.endpoints.create_start("test_gc_index_upload")
pageserver_http = env.pageserver.http_client()

View File

@@ -98,15 +98,27 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
)
def import_tar(base, wal):
env.neon_cli.timeline_import(
tenant_id=tenant,
timeline_id=timeline,
new_branch_name=branch_name,
base_tarfile=base,
base_lsn=start_lsn,
wal_tarfile=wal,
end_lsn=end_lsn,
pg_version=env.pg_version,
env.neon_cli.raw_cli(
[
"timeline",
"import",
"--tenant-id",
str(tenant),
"--timeline-id",
str(timeline),
"--branch-name",
branch_name,
"--base-lsn",
start_lsn,
"--base-tarfile",
base,
"--end-lsn",
end_lsn,
"--wal-tarfile",
wal,
"--pg-version",
env.pg_version,
]
)
# Importing empty file fails
@@ -146,7 +158,7 @@ def test_import_from_pageserver_small(
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
env = neon_env_builder.init_start()
timeline = env.create_branch("test_import_from_pageserver_small")
timeline = env.neon_cli.create_branch("test_import_from_pageserver_small")
endpoint = env.endpoints.create_start("test_import_from_pageserver_small")
num_rows = 3000
@@ -165,7 +177,7 @@ def test_import_from_pageserver_multisegment(
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
env = neon_env_builder.init_start()
timeline = env.create_branch("test_import_from_pageserver_multisegment")
timeline = env.neon_cli.create_branch("test_import_from_pageserver_multisegment")
endpoint = env.endpoints.create_start("test_import_from_pageserver_multisegment")
# For `test_import_from_pageserver_multisegment`, we want to make sure that the data
@@ -256,13 +268,23 @@ def _import(
branch_name = "import_from_pageserver"
client = env.pageserver.http_client()
env.pageserver.tenant_create(tenant)
env.neon_cli.timeline_import(
tenant_id=tenant,
timeline_id=timeline,
new_branch_name=branch_name,
base_lsn=lsn,
base_tarfile=tar_output_file,
pg_version=env.pg_version,
env.neon_cli.raw_cli(
[
"timeline",
"import",
"--tenant-id",
str(tenant),
"--timeline-id",
str(timeline),
"--branch-name",
branch_name,
"--base-lsn",
str(lsn),
"--base-tarfile",
str(tar_output_file),
"--pg-version",
env.pg_version,
]
)
# Wait for data to land in s3

View File

@@ -178,9 +178,9 @@ def test_gc_of_remote_layers(neon_env_builder: NeonEnvBuilder):
def tenant_update_config(changes):
tenant_config.update(changes)
env.config_tenant(tenant_id, tenant_config)
env.neon_cli.config_tenant(tenant_id, tenant_config)
tenant_id, timeline_id = env.create_tenant(conf=tenant_config)
tenant_id, timeline_id = env.neon_cli.create_tenant(conf=tenant_config)
log.info("tenant id is %s", tenant_id)
env.initial_tenant = tenant_id # update_and_gc relies on this
ps_http = env.pageserver.http_client()

View File

@@ -8,7 +8,7 @@ def test_image_layer_writer_fail_before_finish(neon_simple_env: NeonEnv):
env = neon_simple_env
pageserver_http = env.pageserver.http_client()
tenant_id, timeline_id = env.create_tenant(
tenant_id, timeline_id = env.neon_cli.create_tenant(
conf={
# small checkpoint distance to create more delta layer files
"checkpoint_distance": f"{1024 ** 2}",
@@ -52,7 +52,7 @@ def test_delta_layer_writer_fail_before_finish(neon_simple_env: NeonEnv):
env = neon_simple_env
pageserver_http = env.pageserver.http_client()
tenant_id, timeline_id = env.create_tenant(
tenant_id, timeline_id = env.neon_cli.create_tenant(
conf={
# small checkpoint distance to create more delta layer files
"checkpoint_distance": f"{1024 ** 2}",

View File

@@ -56,7 +56,7 @@ def test_issue_5878(neon_env_builder: NeonEnvBuilder):
"compaction_target_size": f"{128 * (1024**3)}", # make it so that we only have 1 partition => image coverage for delta layers => enables gc of delta layers
}
tenant_id, timeline_id = env.create_tenant(conf=tenant_config)
tenant_id, timeline_id = env.neon_cli.create_tenant(conf=tenant_config)
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)

View File

@@ -219,7 +219,7 @@ def test_ondemand_wal_download_in_replication_slot_funcs(neon_env_builder: NeonE
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.create_branch("init")
env.neon_cli.create_branch("init")
endpoint = env.endpoints.create_start("init")
with endpoint.connect().cursor() as cur:
@@ -270,7 +270,7 @@ def test_lr_with_slow_safekeeper(neon_env_builder: NeonEnvBuilder, vanilla_pg):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.create_branch("init")
env.neon_cli.create_branch("init")
endpoint = env.endpoints.create_start("init")
with endpoint.connect().cursor() as cur:
@@ -352,7 +352,7 @@ FROM generate_series(1, 16384) AS seq; -- Inserts enough rows to exceed 16MB of
def test_restart_endpoint(neon_simple_env: NeonEnv, vanilla_pg):
env = neon_simple_env
env.create_branch("init")
env.neon_cli.create_branch("init")
endpoint = env.endpoints.create_start("init")
tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0]
timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
@@ -397,7 +397,7 @@ def test_restart_endpoint(neon_simple_env: NeonEnv, vanilla_pg):
def test_large_records(neon_simple_env: NeonEnv, vanilla_pg):
env = neon_simple_env
env.create_branch("init")
env.neon_cli.create_branch("init")
endpoint = env.endpoints.create_start("init")
cur = endpoint.connect().cursor()
@@ -445,7 +445,7 @@ def test_large_records(neon_simple_env: NeonEnv, vanilla_pg):
def test_slots_and_branching(neon_simple_env: NeonEnv):
env = neon_simple_env
tenant, timeline = env.create_tenant()
tenant, timeline = env.neon_cli.create_tenant()
env.pageserver.http_client()
main_branch = env.endpoints.create_start("main", tenant_id=tenant)
@@ -457,7 +457,7 @@ def test_slots_and_branching(neon_simple_env: NeonEnv):
wait_for_last_flush_lsn(env, main_branch, tenant, timeline)
# Create branch ws.
env.create_branch("ws", ancestor_branch_name="main", tenant_id=tenant)
env.neon_cli.create_branch("ws", "main", tenant_id=tenant)
ws_branch = env.endpoints.create_start("ws", tenant_id=tenant)
# Check that we can create slot with the same name
@@ -469,10 +469,10 @@ def test_slots_and_branching(neon_simple_env: NeonEnv):
def test_replication_shutdown(neon_simple_env: NeonEnv):
# Ensure Postgres can exit without stuck when a replication job is active + neon extension installed
env = neon_simple_env
env.create_branch("test_replication_shutdown_publisher", ancestor_branch_name="main")
env.neon_cli.create_branch("test_replication_shutdown_publisher", "main")
pub = env.endpoints.create("test_replication_shutdown_publisher")
env.create_branch("test_replication_shutdown_subscriber")
env.neon_cli.create_branch("test_replication_shutdown_subscriber")
sub = env.endpoints.create("test_replication_shutdown_subscriber")
pub.respec(skip_pg_catalog_updates=False)
@@ -575,7 +575,7 @@ def test_subscriber_synchronous_commit(neon_simple_env: NeonEnv, vanilla_pg):
vanilla_pg.start()
vanilla_pg.safe_psql("create extension neon;")
env.create_branch("subscriber")
env.neon_cli.create_branch("subscriber")
sub = env.endpoints.create("subscriber")
sub.start()

View File

@@ -32,7 +32,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder, with_lease: bool):
"""
env = neon_env_builder.init_start()
tenant_id, _ = env.create_tenant(
tenant_id, _ = env.neon_cli.create_tenant(
conf={
# disable default GC and compaction
"gc_period": "1000 m",
@@ -43,7 +43,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder, with_lease: bool):
}
)
timeline_id = env.create_branch("test_lsn_mapping", tenant_id=tenant_id)
timeline_id = env.neon_cli.create_branch("test_lsn_mapping", tenant_id=tenant_id)
endpoint_main = env.endpoints.create_start("test_lsn_mapping", tenant_id=tenant_id)
timeline_id = endpoint_main.safe_psql("show neon.timeline_id")[0][0]
@@ -123,8 +123,8 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder, with_lease: bool):
endpoint_here.stop_and_destroy()
# Do the "past" check again at a new branch to ensure that we don't return something before the branch cutoff
timeline_id_child = env.create_branch(
"test_lsn_mapping_child", ancestor_branch_name="test_lsn_mapping", tenant_id=tenant_id
timeline_id_child = env.neon_cli.create_branch(
"test_lsn_mapping_child", tenant_id=tenant_id, ancestor_branch_name="test_lsn_mapping"
)
# Timestamp is in the unreachable past
@@ -190,7 +190,7 @@ def test_ts_of_lsn_api(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
new_timeline_id = env.create_branch("test_ts_of_lsn_api")
new_timeline_id = env.neon_cli.create_branch("test_ts_of_lsn_api")
endpoint_main = env.endpoints.create_start("test_ts_of_lsn_api")
cur = endpoint_main.connect().cursor()

View File

@@ -72,7 +72,9 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
assert int(next_multixact_id) > int(next_multixact_id_old)
# Branch at this point
env.create_branch("test_multixact_new", ancestor_branch_name="main", ancestor_start_lsn=lsn)
env.neon_cli.create_branch(
"test_multixact_new", ancestor_branch_name="main", ancestor_start_lsn=lsn
)
endpoint_new = env.endpoints.create_start("test_multixact_new")
next_multixact_id_new = endpoint_new.safe_psql(

View File

@@ -31,7 +31,7 @@ def helper_compare_timeline_list(
)
)
timelines_cli = env.neon_cli.timeline_list(initial_tenant)
timelines_cli = env.neon_cli.list_timelines(initial_tenant)
cli_timeline_ids = sorted([timeline_id for (_, timeline_id) in timelines_cli])
assert timelines_api == cli_timeline_ids
@@ -44,19 +44,17 @@ def test_cli_timeline_list(neon_simple_env: NeonEnv):
helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)
# Create a branch for us
main_timeline_id = env.create_branch("test_cli_branch_list_main")
main_timeline_id = env.neon_cli.create_branch("test_cli_branch_list_main")
helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)
# Create a nested branch
nested_timeline_id = env.create_branch(
"test_cli_branch_list_nested", ancestor_branch_name="test_cli_branch_list_main"
nested_timeline_id = env.neon_cli.create_branch(
"test_cli_branch_list_nested", "test_cli_branch_list_main"
)
helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)
# Check that all new branches are visible via CLI
timelines_cli = [
timeline_id for (_, timeline_id) in env.neon_cli.timeline_list(env.initial_tenant)
]
timelines_cli = [timeline_id for (_, timeline_id) in env.neon_cli.list_timelines()]
assert main_timeline_id in timelines_cli
assert nested_timeline_id in timelines_cli
@@ -66,7 +64,7 @@ def helper_compare_tenant_list(pageserver_http_client: PageserverHttpClient, env
tenants = pageserver_http_client.tenant_list()
tenants_api = sorted(map(lambda t: cast(str, t["id"]), tenants))
res = env.neon_cli.tenant_list()
res = env.neon_cli.list_tenants()
tenants_cli = sorted(map(lambda t: t.split()[0], res.stdout.splitlines()))
assert tenants_api == tenants_cli
@@ -79,18 +77,18 @@ def test_cli_tenant_list(neon_simple_env: NeonEnv):
helper_compare_tenant_list(pageserver_http_client, env)
# Create new tenant
tenant1, _ = env.create_tenant()
tenant1, _ = env.neon_cli.create_tenant()
# check tenant1 appeared
helper_compare_tenant_list(pageserver_http_client, env)
# Create new tenant
tenant2, _ = env.create_tenant()
tenant2, _ = env.neon_cli.create_tenant()
# check tenant2 appeared
helper_compare_tenant_list(pageserver_http_client, env)
res = env.neon_cli.tenant_list()
res = env.neon_cli.list_tenants()
tenants = sorted(map(lambda t: TenantId(t.split()[0]), res.stdout.splitlines()))
assert env.initial_tenant in tenants
@@ -100,8 +98,8 @@ def test_cli_tenant_list(neon_simple_env: NeonEnv):
def test_cli_tenant_create(neon_simple_env: NeonEnv):
env = neon_simple_env
tenant_id, _ = env.create_tenant()
timelines = env.neon_cli.timeline_list(tenant_id)
tenant_id, _ = env.neon_cli.create_tenant()
timelines = env.neon_cli.list_timelines(tenant_id)
# an initial timeline should be created upon tenant creation
assert len(timelines) == 1
@@ -134,7 +132,7 @@ def test_cli_start_stop(neon_env_builder: NeonEnvBuilder):
env.neon_cli.pageserver_stop(env.pageserver.id)
env.neon_cli.safekeeper_stop()
env.neon_cli.storage_controller_stop(False)
env.neon_cli.storage_broker_stop()
env.neon_cli.broker_stop()
# Keep NeonEnv state up to date, it usually owns starting/stopping services
env.pageserver.running = False
@@ -177,7 +175,7 @@ def test_cli_start_stop_multi(neon_env_builder: NeonEnvBuilder):
# Stop this to get out of the way of the following `start`
env.neon_cli.storage_controller_stop(False)
env.neon_cli.storage_broker_stop()
env.neon_cli.broker_stop()
# Default start
res = env.neon_cli.raw_cli(["start"])

View File

@@ -8,7 +8,7 @@ from fixtures.neon_fixtures import NeonEnvBuilder
# Verify that the neon extension is installed and has the correct version.
def test_neon_extension(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.create_branch("test_create_extension_neon")
env.neon_cli.create_branch("test_create_extension_neon")
endpoint_main = env.endpoints.create("test_create_extension_neon")
# don't skip pg_catalog updates - it runs CREATE EXTENSION neon
@@ -35,7 +35,7 @@ def test_neon_extension(neon_env_builder: NeonEnvBuilder):
# Verify that the neon extension can be upgraded/downgraded.
def test_neon_extension_compatibility(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.create_branch("test_neon_extension_compatibility")
env.neon_cli.create_branch("test_neon_extension_compatibility")
endpoint_main = env.endpoints.create("test_neon_extension_compatibility")
# don't skip pg_catalog updates - it runs CREATE EXTENSION neon
@@ -72,7 +72,7 @@ def test_neon_extension_compatibility(neon_env_builder: NeonEnvBuilder):
# Verify that the neon extension can be auto-upgraded to the latest version.
def test_neon_extension_auto_upgrade(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.create_branch("test_neon_extension_auto_upgrade")
env.neon_cli.create_branch("test_neon_extension_auto_upgrade")
endpoint_main = env.endpoints.create("test_neon_extension_auto_upgrade")
# don't skip pg_catalog updates - it runs CREATE EXTENSION neon

View File

@@ -1,5 +1,4 @@
import pytest
from fixtures.common_types import TimelineId
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.port_distributor import PortDistributor
@@ -11,36 +10,22 @@ def test_neon_cli_basics(neon_env_builder: NeonEnvBuilder, port_distributor: Por
# Skipping the init step that creates a local tenant in Pytest tests
try:
env.neon_cli.start()
env.create_tenant(tenant_id=env.initial_tenant, set_default=True)
env.neon_cli.create_tenant(tenant_id=env.initial_tenant, set_default=True)
main_branch_name = "main"
pg_port = port_distributor.get_port()
http_port = port_distributor.get_port()
env.neon_cli.endpoint_create(
main_branch_name,
pg_port,
http_port,
endpoint_id="ep-basic-main",
tenant_id=env.initial_tenant,
pg_version=env.pg_version,
main_branch_name, pg_port, http_port, endpoint_id="ep-basic-main"
)
env.neon_cli.endpoint_start("ep-basic-main")
branch_name = "migration-check"
env.neon_cli.timeline_branch(
tenant_id=env.initial_tenant,
timeline_id=TimelineId.generate(),
new_branch_name=branch_name,
)
env.neon_cli.create_branch(branch_name)
pg_port = port_distributor.get_port()
http_port = port_distributor.get_port()
env.neon_cli.endpoint_create(
branch_name,
pg_port,
http_port,
endpoint_id=f"ep-{branch_name}",
tenant_id=env.initial_tenant,
pg_version=env.pg_version,
branch_name, pg_port, http_port, endpoint_id=f"ep-{branch_name}"
)
env.neon_cli.endpoint_start(f"ep-{branch_name}")
finally:
@@ -58,26 +43,12 @@ def test_neon_two_primary_endpoints_fail(
pg_port = port_distributor.get_port()
http_port = port_distributor.get_port()
env.neon_cli.endpoint_create(
branch_name,
pg_port,
http_port,
endpoint_id="ep1",
tenant_id=env.initial_tenant,
pg_version=env.pg_version,
)
env.neon_cli.endpoint_create(branch_name, pg_port, http_port, "ep1")
pg_port = port_distributor.get_port()
http_port = port_distributor.get_port()
# ep1 is not running so create will succeed
env.neon_cli.endpoint_create(
branch_name,
pg_port,
http_port,
endpoint_id="ep2",
tenant_id=env.initial_tenant,
pg_version=env.pg_version,
)
env.neon_cli.endpoint_create(branch_name, pg_port, http_port, "ep2")
env.neon_cli.endpoint_start("ep1")

View File

@@ -6,10 +6,10 @@ from fixtures.utils import wait_until
def test_neon_superuser(neon_simple_env: NeonEnv, pg_version: PgVersion):
env = neon_simple_env
env.create_branch("test_neon_superuser_publisher", ancestor_branch_name="main")
env.neon_cli.create_branch("test_neon_superuser_publisher", "main")
pub = env.endpoints.create("test_neon_superuser_publisher")
env.create_branch("test_neon_superuser_subscriber")
env.neon_cli.create_branch("test_neon_superuser_subscriber")
sub = env.endpoints.create("test_neon_superuser_subscriber")
pub.respec(skip_pg_catalog_updates=False)

View File

@@ -5,7 +5,7 @@ from fixtures.pageserver.http import PageserverHttpClient
def check_tenant(env: NeonEnv, pageserver_http: PageserverHttpClient):
tenant_id, timeline_id = env.create_tenant()
tenant_id, timeline_id = env.neon_cli.create_tenant()
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
# we rely upon autocommit after each statement
res_1 = endpoint.safe_psql_many(

View File

@@ -17,7 +17,7 @@ from fixtures.utils import print_gc_result, query_scalar
def test_old_request_lsn(neon_env_builder: NeonEnvBuilder):
# Disable pitr, because here we want to test branch creation after GC
env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"})
env.create_branch("test_old_request_lsn", ancestor_branch_name="main")
env.neon_cli.create_branch("test_old_request_lsn", "main")
endpoint = env.endpoints.create_start("test_old_request_lsn")
pg_conn = endpoint.connect()

View File

@@ -545,7 +545,7 @@ def test_compaction_downloads_on_demand_without_image_creation(neon_env_builder:
layer_sizes += layer.layer_file_size
pageserver_http.evict_layer(tenant_id, timeline_id, layer.layer_file_name)
env.config_tenant(tenant_id, {"compaction_threshold": "3"})
env.neon_cli.config_tenant(tenant_id, {"compaction_threshold": "3"})
pageserver_http.timeline_compact(tenant_id, timeline_id)
layers = pageserver_http.layer_map_info(tenant_id, timeline_id)
@@ -647,7 +647,7 @@ def test_compaction_downloads_on_demand_with_image_creation(neon_env_builder: Ne
# layers -- threshold of 2 would sound more reasonable, but keeping it as 1
# to be less flaky
conf["image_creation_threshold"] = "1"
env.config_tenant(tenant_id, {k: str(v) for k, v in conf.items()})
env.neon_cli.config_tenant(tenant_id, {k: str(v) for k, v in conf.items()})
pageserver_http.timeline_compact(tenant_id, timeline_id)
layers = pageserver_http.layer_map_info(tenant_id, timeline_id)

View File

@@ -59,7 +59,7 @@ def check_client(env: NeonEnv, client: PageserverHttpClient):
def test_pageserver_http_get_wal_receiver_not_found(neon_simple_env: NeonEnv):
env = neon_simple_env
with env.pageserver.http_client() as client:
tenant_id, timeline_id = env.create_tenant()
tenant_id, timeline_id = env.neon_cli.create_tenant()
timeline_details = client.timeline_detail(
tenant_id=tenant_id, timeline_id=timeline_id, include_non_incremental_logical_size=True
@@ -108,7 +108,7 @@ def expect_updated_msg_lsn(
def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv):
env = neon_simple_env
with env.pageserver.http_client() as client:
tenant_id, timeline_id = env.create_tenant()
tenant_id, timeline_id = env.neon_cli.create_tenant()
endpoint = env.endpoints.create_start(DEFAULT_BRANCH_NAME, tenant_id=tenant_id)
# insert something to force sk -> ps message

View File

@@ -9,7 +9,7 @@ def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder)
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.create_branch("test_pageserver_catchup_while_compute_down")
env.neon_cli.create_branch("test_pageserver_catchup_while_compute_down")
# Make shared_buffers large to ensure we won't query pageserver while it is down.
endpoint = env.endpoints.create_start(
"test_pageserver_catchup_while_compute_down", config_lines=["shared_buffers=512MB"]

View File

@@ -150,7 +150,7 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
env.pageserver.start()
env.storage_controller.node_configure(env.pageserver.id, {"availability": "Active"})
env.create_tenant(
env.neon_cli.create_tenant(
tenant_id=env.initial_tenant, conf=TENANT_CONF, timeline_id=env.initial_timeline
)
@@ -643,7 +643,9 @@ def test_upgrade_generationless_local_file_paths(
tenant_id = TenantId.generate()
timeline_id = TimelineId.generate()
env.create_tenant(tenant_id, timeline_id, conf=TENANT_CONF, placement_policy='{"Attached":1}')
env.neon_cli.create_tenant(
tenant_id, timeline_id, conf=TENANT_CONF, placement_policy='{"Attached":1}'
)
workload = Workload(env, tenant_id, timeline_id)
workload.init()

View File

@@ -42,7 +42,7 @@ async def run_worker_for_tenant(
async def run_worker(env: NeonEnv, tenant_conf, entries: int) -> Tuple[TenantId, TimelineId, Lsn]:
tenant, timeline = env.create_tenant(conf=tenant_conf)
tenant, timeline = env.neon_cli.create_tenant(conf=tenant_conf)
last_flush_lsn = await run_worker_for_tenant(env, entries, tenant)
return tenant, timeline, last_flush_lsn

View File

@@ -14,7 +14,7 @@ from fixtures.neon_fixtures import NeonEnv, PgBin
# least the code gets exercised.
def test_pageserver_reconnect(neon_simple_env: NeonEnv, pg_bin: PgBin):
env = neon_simple_env
env.create_branch("test_pageserver_restarts")
env.neon_cli.create_branch("test_pageserver_restarts")
endpoint = env.endpoints.create_start("test_pageserver_restarts")
n_reconnects = 1000
timeout = 0.01
@@ -46,7 +46,7 @@ def test_pageserver_reconnect(neon_simple_env: NeonEnv, pg_bin: PgBin):
# Test handling errors during page server reconnect
def test_pageserver_reconnect_failure(neon_simple_env: NeonEnv):
env = neon_simple_env
env.create_branch("test_pageserver_reconnect")
env.neon_cli.create_branch("test_pageserver_reconnect")
endpoint = env.endpoints.create_start("test_pageserver_reconnect")
con = endpoint.connect()

View File

@@ -169,7 +169,7 @@ def test_pageserver_chaos(
# Use a tiny checkpoint distance, to create a lot of layers quickly.
# That allows us to stress the compaction and layer flushing logic more.
tenant, _ = env.create_tenant(
tenant, _ = env.neon_cli.create_tenant(
conf={
"checkpoint_distance": "5000000",
}

View File

@@ -12,7 +12,7 @@ from fixtures.neon_fixtures import NeonEnv, PgBin
# running.
def test_pageserver_restarts_under_worload(neon_simple_env: NeonEnv, pg_bin: PgBin):
env = neon_simple_env
env.create_branch("test_pageserver_restarts")
env.neon_cli.create_branch("test_pageserver_restarts")
endpoint = env.endpoints.create_start("test_pageserver_restarts")
n_restarts = 10
scale = 10

View File

@@ -650,7 +650,7 @@ def test_secondary_background_downloads(neon_env_builder: NeonEnvBuilder):
tenant_id = TenantId.generate()
timeline_a = TimelineId.generate()
timeline_b = TimelineId.generate()
env.create_tenant(
env.neon_cli.create_tenant(
tenant_id,
timeline_a,
placement_policy='{"Attached":1}',
@@ -658,7 +658,7 @@ def test_secondary_background_downloads(neon_env_builder: NeonEnvBuilder):
# to trigger the upload promptly.
conf={"heatmap_period": f"{upload_period_secs}s"},
)
env.create_timeline("main2", tenant_id, timeline_b)
env.neon_cli.create_timeline("main2", tenant_id, timeline_b)
tenant_timelines[tenant_id] = [timeline_a, timeline_b]
@@ -778,7 +778,9 @@ def test_slow_secondary_downloads(neon_env_builder: NeonEnvBuilder, via_controll
tenant_id = TenantId.generate()
timeline_id = TimelineId.generate()
env.create_tenant(tenant_id, timeline_id, conf=TENANT_CONF, placement_policy='{"Attached":1}')
env.neon_cli.create_tenant(
tenant_id, timeline_id, conf=TENANT_CONF, placement_policy='{"Attached":1}'
)
attached_to_id = env.storage_controller.locate(tenant_id)[0]["node_id"]
ps_attached = env.get_pageserver(attached_to_id)

View File

@@ -57,7 +57,7 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder):
# Branch at the point where only 100 rows were inserted
# It must have been preserved by PITR setting
env.create_branch("test_pitr_gc_hundred", ancestor_branch_name="main", ancestor_start_lsn=lsn_a)
env.neon_cli.create_branch("test_pitr_gc_hundred", "main", ancestor_start_lsn=lsn_a)
endpoint_hundred = env.endpoints.create_start("test_pitr_gc_hundred")

View File

@@ -25,7 +25,7 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
)
# Create a branch for us
env.create_branch("test_pageserver_recovery", ancestor_branch_name="main")
env.neon_cli.create_branch("test_pageserver_recovery", "main")
endpoint = env.endpoints.create_start("test_pageserver_recovery")

View File

@@ -230,7 +230,7 @@ def test_remote_storage_upload_queue_retries(
# create tenant with config that will determinstically allow
# compaction and gc
tenant_id, timeline_id = env.create_tenant(
tenant_id, timeline_id = env.neon_cli.create_tenant(
conf={
# small checkpointing and compaction targets to ensure we generate many upload operations
"checkpoint_distance": f"{64 * 1024}",
@@ -640,9 +640,7 @@ def test_empty_branch_remote_storage_upload(neon_env_builder: NeonEnvBuilder):
client = env.pageserver.http_client()
new_branch_name = "new_branch"
new_branch_timeline_id = env.create_branch(
new_branch_name, ancestor_branch_name="main", tenant_id=env.initial_tenant
)
new_branch_timeline_id = env.neon_cli.create_branch(new_branch_name, "main", env.initial_tenant)
assert_nothing_to_upload(client, env.initial_tenant, new_branch_timeline_id)
timelines_before_detach = set(

View File

@@ -60,7 +60,9 @@ def test_tenant_s3_restore(
last_flush_lsns = []
for timeline in ["first", "second"]:
timeline_id = env.create_branch(timeline, ancestor_branch_name=parent, tenant_id=tenant_id)
timeline_id = env.neon_cli.create_branch(
timeline, tenant_id=tenant_id, ancestor_branch_name=parent
)
with env.endpoints.create_start(timeline, tenant_id=tenant_id) as endpoint:
run_pg_bench_small(pg_bin, endpoint.connstr())
endpoint.safe_psql(f"CREATE TABLE created_{timeline}(id integer);")

View File

@@ -77,7 +77,7 @@ def test_sharding_smoke(
assert all(s < expect_initdb_size // 2 for s in sizes.values())
# Test that timeline creation works on a sharded tenant
timeline_b = env.create_branch("branch_b", tenant_id=tenant_id)
timeline_b = env.neon_cli.create_branch("branch_b", tenant_id=tenant_id)
# Test that we can write data to a sharded tenant
workload = Workload(env, tenant_id, timeline_b, branch_name="branch_b")
@@ -378,7 +378,7 @@ def test_sharding_split_smoke(
env.start()
tenant_id = TenantId.generate()
timeline_id = TimelineId.generate()
env.create_tenant(
env.neon_cli.create_tenant(
tenant_id,
timeline_id,
shard_count=shard_count,
@@ -1127,7 +1127,7 @@ def test_sharding_split_failures(
timeline_id = TimelineId.generate()
# Create a tenant with secondary locations enabled
env.create_tenant(
env.neon_cli.create_tenant(
tenant_id, timeline_id, shard_count=initial_shard_count, placement_policy='{"Attached":1}'
)
@@ -1441,7 +1441,7 @@ def test_sharding_unlogged_relation(neon_env_builder: NeonEnvBuilder):
tenant_id = TenantId.generate()
timeline_id = TimelineId.generate()
env.create_tenant(tenant_id, timeline_id, shard_count=8)
env.neon_cli.create_tenant(tenant_id, timeline_id, shard_count=8)
# We will create many tables to ensure it's overwhelmingly likely that at least one
# of them doesn't land on shard 0
@@ -1483,7 +1483,7 @@ def test_top_tenants(neon_env_builder: NeonEnvBuilder):
for i in range(0, n_tenants):
tenant_id = TenantId.generate()
timeline_id = TimelineId.generate()
env.create_tenant(tenant_id, timeline_id)
env.neon_cli.create_tenant(tenant_id, timeline_id)
# Write a different amount of data to each tenant
w = Workload(env, tenant_id, timeline_id)

View File

@@ -96,7 +96,7 @@ def test_storage_controller_smoke(
# Creating several tenants should spread out across the pageservers
for tid in tenant_ids:
env.create_tenant(tid, shard_count=shards_per_tenant)
env.neon_cli.create_tenant(tid, shard_count=shards_per_tenant)
# Repeating a creation should be idempotent (we are just testing it doesn't return an error)
env.storage_controller.tenant_create(
@@ -172,7 +172,7 @@ def test_storage_controller_smoke(
# Create some fresh tenants
tenant_ids = set(TenantId.generate() for i in range(0, tenant_count))
for tid in tenant_ids:
env.create_tenant(tid, shard_count=shards_per_tenant)
env.neon_cli.create_tenant(tid, shard_count=shards_per_tenant)
counts = get_node_shard_counts(env, tenant_ids)
# Nothing should have been scheduled on the node in Draining
@@ -806,7 +806,10 @@ def test_storage_controller_s3_time_travel_recovery(
env.storage_controller.consistency_check()
branch_name = "main"
timeline_id = env.create_timeline(branch_name, tenant_id=tenant_id)
timeline_id = env.neon_cli.create_timeline(
branch_name,
tenant_id=tenant_id,
)
# Write some nontrivial amount of data into the endpoint and wait until it is uploaded
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
run_pg_bench_small(pg_bin, endpoint.connstr())
@@ -1006,7 +1009,9 @@ def test_storage_controller_tenant_deletion(
tenant_id = TenantId.generate()
timeline_id = TimelineId.generate()
env.create_tenant(tenant_id, timeline_id, shard_count=2, placement_policy='{"Attached":1}')
env.neon_cli.create_tenant(
tenant_id, timeline_id, shard_count=2, placement_policy='{"Attached":1}'
)
# Ensure all the locations are configured, including secondaries
env.storage_controller.reconcile_until_idle()
@@ -1212,7 +1217,10 @@ def test_storage_controller_heartbeats(
env.storage_controller.tenant_create(tid)
branch_name = "main"
env.create_timeline(branch_name, tenant_id=tid)
env.neon_cli.create_timeline(
branch_name,
tenant_id=tid,
)
with env.endpoints.create_start("main", tenant_id=tid) as endpoint:
run_pg_bench_small(pg_bin, endpoint.connstr())
@@ -1314,9 +1322,9 @@ def test_storage_controller_re_attach(neon_env_builder: NeonEnvBuilder):
# We'll have two tenants.
tenant_a = TenantId.generate()
env.create_tenant(tenant_a, placement_policy='{"Attached":1}')
env.neon_cli.create_tenant(tenant_a, placement_policy='{"Attached":1}')
tenant_b = TenantId.generate()
env.create_tenant(tenant_b, placement_policy='{"Attached":1}')
env.neon_cli.create_tenant(tenant_b, placement_policy='{"Attached":1}')
# Each pageserver will have one attached and one secondary location
env.storage_controller.tenant_shard_migrate(
@@ -1639,7 +1647,7 @@ def test_tenant_import(neon_env_builder: NeonEnvBuilder, shard_count, remote_sto
# Create a second timeline to ensure that import finds both
timeline_a = env.initial_timeline
timeline_b = env.create_branch("branch_b", tenant_id=tenant_id)
timeline_b = env.neon_cli.create_branch("branch_b", tenant_id=tenant_id)
workload_a = Workload(env, tenant_id, timeline_a, branch_name="main")
workload_a.init()
@@ -1681,7 +1689,7 @@ def test_tenant_import(neon_env_builder: NeonEnvBuilder, shard_count, remote_sto
)
# Now import it again
env.neon_cli.tenant_import(tenant_id)
env.neon_cli.import_tenant(tenant_id)
# Check we found the shards
describe = env.storage_controller.tenant_describe(tenant_id)
@@ -1723,7 +1731,7 @@ def test_graceful_cluster_restart(neon_env_builder: NeonEnvBuilder):
for _ in range(0, tenant_count):
tid = TenantId.generate()
tenant_ids.append(tid)
env.create_tenant(
env.neon_cli.create_tenant(
tid, placement_policy='{"Attached":1}', shard_count=shard_count_per_tenant
)
@@ -1810,7 +1818,7 @@ def test_skip_drain_on_secondary_lag(neon_env_builder: NeonEnvBuilder, pg_bin: P
env = neon_env_builder.init_configs()
env.start()
tid, timeline_id = env.create_tenant(placement_policy='{"Attached":1}')
tid, timeline_id = env.neon_cli.create_tenant(placement_policy='{"Attached":1}')
# Give things a chance to settle.
env.storage_controller.reconcile_until_idle(timeout_secs=30)
@@ -1916,7 +1924,7 @@ def test_background_operation_cancellation(neon_env_builder: NeonEnvBuilder):
for _ in range(0, tenant_count):
tid = TenantId.generate()
tenant_ids.append(tid)
env.create_tenant(
env.neon_cli.create_tenant(
tid, placement_policy='{"Attached":1}', shard_count=shard_count_per_tenant
)
@@ -1976,7 +1984,7 @@ def test_storage_controller_node_deletion(
for _ in range(0, tenant_count):
tid = TenantId.generate()
tenant_ids.append(tid)
env.create_tenant(
env.neon_cli.create_tenant(
tid, placement_policy='{"Attached":1}', shard_count=shard_count_per_tenant
)
@@ -2101,7 +2109,7 @@ def test_storage_controller_metadata_health(
)
# Mock tenant with unhealthy scrubber scan result
tenant_b, _ = env.create_tenant(shard_count=shard_count)
tenant_b, _ = env.neon_cli.create_tenant(shard_count=shard_count)
tenant_b_shard_ids = (
env.storage_controller.tenant_shard_split(tenant_b, shard_count=shard_count)
if shard_count is not None
@@ -2109,7 +2117,7 @@ def test_storage_controller_metadata_health(
)
# Mock tenant that never gets a health update from scrubber
tenant_c, _ = env.create_tenant(shard_count=shard_count)
tenant_c, _ = env.neon_cli.create_tenant(shard_count=shard_count)
tenant_c_shard_ids = (
env.storage_controller.tenant_shard_split(tenant_c, shard_count=shard_count)
@@ -2509,7 +2517,7 @@ def test_storage_controller_validate_during_migration(neon_env_builder: NeonEnvB
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
env.create_tenant(tenant_id, timeline_id)
env.neon_cli.create_tenant(tenant_id, timeline_id)
env.storage_controller.pageserver_api().set_tenant_config(tenant_id, TENANT_CONF)
# Write enough data that a compaction would do some work (deleting some L0s)
@@ -2644,7 +2652,7 @@ def test_storage_controller_proxy_during_migration(
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
env.create_tenant(tenant_id, timeline_id)
env.neon_cli.create_tenant(tenant_id, timeline_id)
# The test stalls a reconcile on purpose to check if the long running
# reconcile alert fires.
@@ -2823,7 +2831,7 @@ def test_shard_preferred_azs(neon_env_builder: NeonEnvBuilder):
# Generate a layer to avoid shard split handling on ps from tripping
# up on debug assert.
timeline_id = TimelineId.generate()
env.create_timeline("bar", tids[0], timeline_id)
env.neon_cli.create_timeline("bar", tids[0], timeline_id)
workload = Workload(env, tids[0], timeline_id, branch_name="bar")
workload.init()
@@ -2911,97 +2919,3 @@ def test_timeline_delete_mid_live_migration(neon_env_builder: NeonEnvBuilder, mi
# Always disable 'pause' failpoints, even on failure, to avoid hanging in shutdown
env.storage_controller.configure_failpoints((migration_failpoint.value, "off"))
raise
@run_only_on_default_postgres("Postgres version makes no difference here")
@pytest.mark.parametrize(
"migration_failpoint",
[
MigrationFailpoints.PRE_GENERATION_INC,
MigrationFailpoints.POST_NOTIFY,
MigrationFailpoints.POST_DETACH,
],
)
def test_multi_attached_timeline_creation(neon_env_builder: NeonEnvBuilder, migration_failpoint):
neon_env_builder.num_pageservers = 2
env = neon_env_builder.init_configs()
env.start()
tenant_id = TenantId.generate()
env.storage_controller.tenant_create(tenant_id, placement_policy={"Attached": 1})
shard_zero = TenantShardId(tenant_id, 0, 0)
locations = env.storage_controller.get_tenants_placement()[str(shard_zero)]
assert locations["observed"] == locations["intent"]
assert locations["observed"]["attached"] is not None
assert len(locations["observed"]["secondary"]) > 0
attached_location = locations["observed"]["attached"]
secondary_location = locations["observed"]["secondary"][0]
env.storage_controller.configure_failpoints((migration_failpoint.value, "pause"))
try:
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
migrate_fut = executor.submit(
env.storage_controller.tenant_shard_migrate,
shard_zero,
secondary_location,
)
def has_hit_migration_failpoint():
expr = f"at failpoint {migration_failpoint.value}"
log.info(expr)
assert env.storage_controller.log_contains(expr)
wait_until(10, 1, has_hit_migration_failpoint)
timeline_id = TimelineId.generate()
env.storage_controller.pageserver_api().timeline_create(
pg_version=PgVersion.NOT_SET, tenant_id=tenant_id, new_timeline_id=timeline_id
)
# Timeline creation only goes to the origin.
if migration_failpoint == MigrationFailpoints.PRE_GENERATION_INC:
client = env.get_pageserver(attached_location).http_client()
assert timeline_id in {
TimelineId(b["timeline_id"]) for b in client.timeline_list(tenant_id)
}, f"new timeline not found on {attached_location}"
with pytest.raises(PageserverApiException) as exc:
env.get_pageserver(secondary_location).http_client().timeline_list(tenant_id)
assert exc.value.status_code == 404
# Timeline creations goes to both attached locations
if migration_failpoint == MigrationFailpoints.POST_NOTIFY:
for node_id in [attached_location, secondary_location]:
client = env.get_pageserver(node_id).http_client()
assert timeline_id in {
TimelineId(b["timeline_id"]) for b in client.timeline_list(tenant_id)
}, f"new timeline not found on {node_id}"
# Timeline creation goes both locations, but storcon gets a 404 from the origin
# which it ignores.
if migration_failpoint == MigrationFailpoints.POST_DETACH:
client = env.get_pageserver(secondary_location).http_client()
assert timeline_id in {
TimelineId(b["timeline_id"]) for b in client.timeline_list(tenant_id)
}, f"new timeline not found on {attached_location}"
with pytest.raises(PageserverApiException) as exc:
env.get_pageserver(attached_location).http_client().timeline_list(tenant_id)
assert exc.value.status_code == 404
# Eventually migration completes
env.storage_controller.configure_failpoints((migration_failpoint.value, "off"))
migrate_fut.result()
# Ensure that we detached from the old attached location
with pytest.raises(PageserverApiException) as exc:
env.get_pageserver(attached_location).http_client().timeline_list(tenant_id)
assert exc.value.status_code == 404
except:
# Always disable 'pause' failpoints, even on failure, to avoid hanging in shutdown
env.storage_controller.configure_failpoints((migration_failpoint.value, "off"))
raise

View File

@@ -135,7 +135,7 @@ def test_scrubber_physical_gc(neon_env_builder: NeonEnvBuilder, shard_count: Opt
tenant_id = TenantId.generate()
timeline_id = TimelineId.generate()
env.create_tenant(tenant_id, timeline_id, shard_count=shard_count)
env.neon_cli.create_tenant(tenant_id, timeline_id, shard_count=shard_count)
workload = Workload(env, tenant_id, timeline_id)
workload.init()
@@ -185,7 +185,7 @@ def test_scrubber_physical_gc_ancestors(
tenant_id = TenantId.generate()
timeline_id = TimelineId.generate()
env.create_tenant(
env.neon_cli.create_tenant(
tenant_id,
timeline_id,
shard_count=shard_count,
@@ -303,7 +303,7 @@ def test_scrubber_physical_gc_timeline_deletion(neon_env_builder: NeonEnvBuilder
tenant_id = TenantId.generate()
timeline_id = TimelineId.generate()
env.create_tenant(
env.neon_cli.create_tenant(
tenant_id,
timeline_id,
shard_count=None,
@@ -385,7 +385,7 @@ def test_scrubber_physical_gc_ancestors_split(neon_env_builder: NeonEnvBuilder):
tenant_id = TenantId.generate()
timeline_id = TimelineId.generate()
initial_shard_count = 2
env.create_tenant(
env.neon_cli.create_tenant(
tenant_id,
timeline_id,
shard_count=initial_shard_count,

View File

@@ -9,11 +9,11 @@ from fixtures.utils import wait_until
# It requires tracking information about replication origins at page server side
def test_subscriber_restart(neon_simple_env: NeonEnv):
env = neon_simple_env
env.create_branch("publisher")
env.neon_cli.create_branch("publisher")
pub = env.endpoints.create("publisher")
pub.start()
sub_timeline_id = env.create_branch("subscriber")
sub_timeline_id = env.neon_cli.create_branch("subscriber")
sub = env.endpoints.create("subscriber")
sub.start()

View File

@@ -38,7 +38,7 @@ def test_tenant_config(neon_env_builder: NeonEnvBuilder):
# Check that we raise on misspelled configs
invalid_conf_key = "some_invalid_setting_name_blah_blah_123"
try:
env.create_tenant(
env.neon_cli.create_tenant(
conf={
invalid_conf_key: "20000",
}
@@ -54,9 +54,9 @@ def test_tenant_config(neon_env_builder: NeonEnvBuilder):
"evictions_low_residence_duration_metric_threshold": "42s",
"eviction_policy": json.dumps({"kind": "NoEviction"}),
}
tenant, _ = env.create_tenant(conf=new_conf)
tenant, _ = env.neon_cli.create_tenant(conf=new_conf)
env.create_timeline("test_tenant_conf", tenant_id=tenant)
env.neon_cli.create_timeline("test_tenant_conf", tenant_id=tenant)
env.endpoints.create_start("test_tenant_conf", "main", tenant)
# check the configuration of the default tenant
@@ -121,7 +121,10 @@ def test_tenant_config(neon_env_builder: NeonEnvBuilder):
),
"max_lsn_wal_lag": "13000000",
}
env.config_tenant(tenant_id=tenant, conf=conf_update)
env.neon_cli.config_tenant(
tenant_id=tenant,
conf=conf_update,
)
updated_tenant_config = http_client.tenant_config(tenant_id=tenant)
updated_specific_config = updated_tenant_config.tenant_specific_overrides
@@ -169,8 +172,10 @@ def test_tenant_config(neon_env_builder: NeonEnvBuilder):
final_conf = {
"pitr_interval": "1 min",
}
env.config_tenant(tenant_id=tenant, conf=final_conf)
env.neon_cli.config_tenant(
tenant_id=tenant,
conf=final_conf,
)
final_tenant_config = http_client.tenant_config(tenant_id=tenant)
final_specific_config = final_tenant_config.tenant_specific_overrides
assert final_specific_config["pitr_interval"] == "1m"
@@ -213,7 +218,7 @@ def test_creating_tenant_conf_after_attach(neon_env_builder: NeonEnvBuilder):
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
# tenant is created with defaults, as in without config file
(tenant_id, timeline_id) = env.create_tenant()
(tenant_id, timeline_id) = env.neon_cli.create_tenant()
config_path = env.pageserver.tenant_dir(tenant_id) / "config-v1"
http_client = env.pageserver.http_client()
@@ -235,9 +240,9 @@ def test_creating_tenant_conf_after_attach(neon_env_builder: NeonEnvBuilder):
func=lambda: assert_tenant_state(http_client, tenant_id, "Active"),
)
env.config_tenant(tenant_id, {"gc_horizon": "1000000"})
env.neon_cli.config_tenant(tenant_id, {"gc_horizon": "1000000"})
contents_first = config_path.read_text()
env.config_tenant(tenant_id, {"gc_horizon": "0"})
env.neon_cli.config_tenant(tenant_id, {"gc_horizon": "0"})
contents_later = config_path.read_text()
# dont test applying the setting here, we have that another test case to show it
@@ -293,7 +298,7 @@ def test_live_reconfig_get_evictions_low_residence_duration_metric_threshold(
metric = get_metric()
assert int(metric.value) > 0, "metric is updated"
env.config_tenant(
env.neon_cli.config_tenant(
tenant_id, {"evictions_low_residence_duration_metric_threshold": default_value}
)
updated_metric = get_metric()
@@ -301,7 +306,9 @@ def test_live_reconfig_get_evictions_low_residence_duration_metric_threshold(
metric.value
), "metric is unchanged when setting same value"
env.config_tenant(tenant_id, {"evictions_low_residence_duration_metric_threshold": "2day"})
env.neon_cli.config_tenant(
tenant_id, {"evictions_low_residence_duration_metric_threshold": "2day"}
)
metric = get_metric()
assert int(metric.labels["low_threshold_secs"]) == 2 * 24 * 60 * 60
assert int(metric.value) == 0
@@ -313,7 +320,9 @@ def test_live_reconfig_get_evictions_low_residence_duration_metric_threshold(
assert int(metric.labels["low_threshold_secs"]) == 2 * 24 * 60 * 60
assert int(metric.value) > 0
env.config_tenant(tenant_id, {"evictions_low_residence_duration_metric_threshold": "2h"})
env.neon_cli.config_tenant(
tenant_id, {"evictions_low_residence_duration_metric_threshold": "2h"}
)
metric = get_metric()
assert int(metric.labels["low_threshold_secs"]) == 2 * 60 * 60
assert int(metric.value) == 0, "value resets if label changes"
@@ -325,7 +334,7 @@ def test_live_reconfig_get_evictions_low_residence_duration_metric_threshold(
assert int(metric.labels["low_threshold_secs"]) == 2 * 60 * 60
assert int(metric.value) > 0, "set a non-zero value for next step"
env.config_tenant(tenant_id, {})
env.neon_cli.config_tenant(tenant_id, {})
metric = get_metric()
assert int(metric.labels["low_threshold_secs"]) == 24 * 60 * 60, "label resets to default"
assert int(metric.value) == 0, "value resets to default"

View File

@@ -78,7 +78,7 @@ def test_tenant_delete_smoke(
# may need to retry on some remote storage errors injected by the test harness
error_tolerant_delete(ps_http, tenant_id)
env.create_tenant(
env.neon_cli.create_tenant(
tenant_id=tenant_id,
conf=many_small_layers_tenant_config(),
)
@@ -89,7 +89,9 @@ def test_tenant_delete_smoke(
# create two timelines one being the parent of another
parent = None
for timeline in ["first", "second"]:
timeline_id = env.create_branch(timeline, ancestor_branch_name=parent, tenant_id=tenant_id)
timeline_id = env.neon_cli.create_branch(
timeline, tenant_id=tenant_id, ancestor_branch_name=parent
)
with env.endpoints.create_start(timeline, tenant_id=tenant_id) as endpoint:
run_pg_bench_small(pg_bin, endpoint.connstr())
wait_for_last_flush_lsn(env, endpoint, tenant=tenant_id, timeline=timeline_id)
@@ -337,7 +339,7 @@ def test_tenant_delete_scrubber(pg_bin: PgBin, make_httpserver, neon_env_builder
ps_http = env.pageserver.http_client()
# create a tenant separate from the main tenant so that we have one remaining
# after we deleted it, as the scrubber treats empty buckets as an error.
(tenant_id, timeline_id) = env.create_tenant()
(tenant_id, timeline_id) = env.neon_cli.create_tenant()
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
run_pg_bench_small(pg_bin, endpoint.connstr())

Some files were not shown because too many files have changed in this diff Show More