mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-26 23:00:37 +00:00
Compare commits
4 Commits
break-e2e-
...
local-prox
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
87c793f58c | ||
|
|
52a7d780ad | ||
|
|
2255a8ebac | ||
|
|
e109d5aac0 |
4
.github/workflows/build_and_test.yml
vendored
4
.github/workflows/build_and_test.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
||||
github-event-name: ${{ github.event_name }}
|
||||
|
||||
cancel-previous-e2e-tests:
|
||||
needs: [ check-permissions ]
|
||||
needs: [ check-permissions, promote-images, tag ]
|
||||
if: github.event_name == 'pull_request'
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
@@ -518,7 +518,7 @@ jobs:
|
||||
|
||||
trigger-e2e-tests:
|
||||
if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' }}
|
||||
needs: [ check-permissions, promote-images, tag ]
|
||||
needs: [ check-permissions, promote-images, tag, cancel-previous-e2e-tests ]
|
||||
uses: ./.github/workflows/trigger-e2e-tests.yml
|
||||
secrets: inherit
|
||||
|
||||
|
||||
8
Cargo.lock
generated
8
Cargo.lock
generated
@@ -1265,7 +1265,6 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
"camino",
|
||||
"cfg-if",
|
||||
"chrono",
|
||||
"clap",
|
||||
@@ -7335,6 +7334,12 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
"aws-config",
|
||||
"aws-runtime",
|
||||
"aws-sigv4",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-types",
|
||||
"base64 0.21.1",
|
||||
"base64ct",
|
||||
"bitflags 2.4.1",
|
||||
@@ -7408,6 +7413,7 @@ dependencies = [
|
||||
"tracing",
|
||||
"tracing-core",
|
||||
"url",
|
||||
"uuid",
|
||||
"zeroize",
|
||||
"zstd",
|
||||
"zstd-safe",
|
||||
|
||||
@@ -53,7 +53,7 @@ azure_storage_blobs = { version = "0.19", default-features = false, features = [
|
||||
flate2 = "1.0.26"
|
||||
async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
aws-config = { version = "1.5", default-features = false, features=["rustls", "sso"] }
|
||||
aws-config = { version = "1.5", default-features = false, features=["rustls"] }
|
||||
aws-sdk-s3 = "1.52"
|
||||
aws-sdk-iam = "1.46.0"
|
||||
aws-smithy-async = { version = "1.2.1", default-features = false, features=["rt-tokio"] }
|
||||
|
||||
@@ -1075,20 +1075,6 @@ RUN set -e \
|
||||
&& make -j $(nproc) dist_man_MANS= \
|
||||
&& make install dist_man_MANS=
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Compile the Neon-specific `local_proxy` binary
|
||||
#
|
||||
#########################################################################################
|
||||
FROM $REPOSITORY/$IMAGE:$TAG AS local_proxy
|
||||
ARG BUILD_TAG
|
||||
ENV BUILD_TAG=$BUILD_TAG
|
||||
|
||||
USER nonroot
|
||||
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
|
||||
COPY --chown=nonroot . .
|
||||
RUN mold -run cargo build --locked --profile release-line-debug-size-lto --bin local_proxy
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layers "postgres-exporter" and "sql-exporter"
|
||||
@@ -1227,10 +1213,6 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb
|
||||
COPY --from=pgbouncer /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer
|
||||
COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini
|
||||
|
||||
# local_proxy and its config
|
||||
COPY --from=local_proxy --chown=postgres /home/nonroot/target/release-line-debug-size-lto/local_proxy /usr/local/bin/local_proxy
|
||||
RUN mkdir -p /etc/local_proxy && chown postgres:postgres /etc/local_proxy
|
||||
|
||||
# Metrics exporter binaries and configuration files
|
||||
COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter
|
||||
COPY --from=sql-exporter /bin/sql_exporter /bin/sql_exporter
|
||||
|
||||
@@ -19,10 +19,6 @@ commands:
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini'
|
||||
- name: local_proxy
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
shell: '/usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
|
||||
- name: postgres-exporter
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
|
||||
@@ -11,7 +11,7 @@ testing = []
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
camino.workspace = true
|
||||
# camino.workspace = true
|
||||
chrono.workspace = true
|
||||
cfg-if.workspace = true
|
||||
clap.workspace = true
|
||||
|
||||
@@ -34,7 +34,6 @@ use nix::sys::signal::{kill, Signal};
|
||||
use remote_storage::{DownloadError, RemotePath};
|
||||
|
||||
use crate::checker::create_availability_check_data;
|
||||
use crate::local_proxy;
|
||||
use crate::logger::inlinify;
|
||||
use crate::pg_helpers::*;
|
||||
use crate::spec::*;
|
||||
@@ -887,11 +886,6 @@ impl ComputeNode {
|
||||
// 'Close' connection
|
||||
drop(client);
|
||||
|
||||
if let Some(ref local_proxy) = spec.local_proxy_config {
|
||||
info!("configuring local_proxy");
|
||||
local_proxy::configure(local_proxy).context("apply_config local_proxy")?;
|
||||
}
|
||||
|
||||
// Run migrations separately to not hold up cold starts
|
||||
thread::spawn(move || {
|
||||
let mut connstr = connstr.clone();
|
||||
@@ -942,19 +936,6 @@ impl ComputeNode {
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(ref local_proxy) = spec.local_proxy_config {
|
||||
info!("configuring local_proxy");
|
||||
|
||||
// Spawn a thread to do the configuration,
|
||||
// so that we don't block the main thread that starts Postgres.
|
||||
let local_proxy = local_proxy.clone();
|
||||
let _handle = Some(thread::spawn(move || {
|
||||
if let Err(err) = local_proxy::configure(&local_proxy) {
|
||||
error!("error while configuring local_proxy: {err:?}");
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
// Write new config
|
||||
let pgdata_path = Path::new(&self.pgdata);
|
||||
let postgresql_conf_path = pgdata_path.join("postgresql.conf");
|
||||
@@ -1042,19 +1023,6 @@ impl ComputeNode {
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(local_proxy) = &pspec.spec.local_proxy_config {
|
||||
info!("configuring local_proxy");
|
||||
|
||||
// Spawn a thread to do the configuration,
|
||||
// so that we don't block the main thread that starts Postgres.
|
||||
let local_proxy = local_proxy.clone();
|
||||
let _handle = thread::spawn(move || {
|
||||
if let Err(err) = local_proxy::configure(&local_proxy) {
|
||||
error!("error while configuring local_proxy: {err:?}");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
info!(
|
||||
"start_compute spec.remote_extensions {:?}",
|
||||
pspec.spec.remote_extensions
|
||||
|
||||
@@ -264,68 +264,72 @@ async fn handle_configure_request(
|
||||
|
||||
let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap();
|
||||
let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap();
|
||||
if let Ok(request) = serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
|
||||
let spec = request.spec;
|
||||
match serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
|
||||
Ok(request) => {
|
||||
let spec = request.spec;
|
||||
|
||||
let parsed_spec = match ParsedSpec::try_from(spec) {
|
||||
Ok(ps) => ps,
|
||||
Err(msg) => return Err((msg, StatusCode::BAD_REQUEST)),
|
||||
};
|
||||
let parsed_spec = match ParsedSpec::try_from(spec) {
|
||||
Ok(ps) => ps,
|
||||
Err(msg) => return Err((msg, StatusCode::BAD_REQUEST)),
|
||||
};
|
||||
|
||||
// XXX: wrap state update under lock in code blocks. Otherwise,
|
||||
// we will try to `Send` `mut state` into the spawned thread
|
||||
// bellow, which will cause error:
|
||||
// ```
|
||||
// error: future cannot be sent between threads safely
|
||||
// ```
|
||||
{
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
|
||||
let msg = format!(
|
||||
"invalid compute status for configuration request: {:?}",
|
||||
state.status.clone()
|
||||
);
|
||||
return Err((msg, StatusCode::PRECONDITION_FAILED));
|
||||
}
|
||||
state.pspec = Some(parsed_spec);
|
||||
state.status = ComputeStatus::ConfigurationPending;
|
||||
compute.state_changed.notify_all();
|
||||
drop(state);
|
||||
info!("set new spec and notified waiters");
|
||||
}
|
||||
|
||||
// Spawn a blocking thread to wait for compute to become Running.
|
||||
// This is needed to do not block the main pool of workers and
|
||||
// be able to serve other requests while some particular request
|
||||
// is waiting for compute to finish configuration.
|
||||
let c = compute.clone();
|
||||
task::spawn_blocking(move || {
|
||||
let mut state = c.state.lock().unwrap();
|
||||
while state.status != ComputeStatus::Running {
|
||||
state = c.state_changed.wait(state).unwrap();
|
||||
info!(
|
||||
"waiting for compute to become Running, current status: {:?}",
|
||||
state.status
|
||||
);
|
||||
|
||||
if state.status == ComputeStatus::Failed {
|
||||
let err = state.error.as_ref().map_or("unknown error", |x| x);
|
||||
let msg = format!("compute configuration failed: {:?}", err);
|
||||
return Err((msg, StatusCode::INTERNAL_SERVER_ERROR));
|
||||
// XXX: wrap state update under lock in code blocks. Otherwise,
|
||||
// we will try to `Send` `mut state` into the spawned thread
|
||||
// bellow, which will cause error:
|
||||
// ```
|
||||
// error: future cannot be sent between threads safely
|
||||
// ```
|
||||
{
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
|
||||
let msg = format!(
|
||||
"invalid compute status for configuration request: {:?}",
|
||||
state.status.clone()
|
||||
);
|
||||
return Err((msg, StatusCode::PRECONDITION_FAILED));
|
||||
}
|
||||
state.pspec = Some(parsed_spec);
|
||||
state.status = ComputeStatus::ConfigurationPending;
|
||||
compute.state_changed.notify_all();
|
||||
drop(state);
|
||||
info!("set new spec and notified waiters");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
.unwrap()?;
|
||||
// Spawn a blocking thread to wait for compute to become Running.
|
||||
// This is needed to do not block the main pool of workers and
|
||||
// be able to serve other requests while some particular request
|
||||
// is waiting for compute to finish configuration.
|
||||
let c = compute.clone();
|
||||
task::spawn_blocking(move || {
|
||||
let mut state = c.state.lock().unwrap();
|
||||
while state.status != ComputeStatus::Running {
|
||||
state = c.state_changed.wait(state).unwrap();
|
||||
info!(
|
||||
"waiting for compute to become Running, current status: {:?}",
|
||||
state.status
|
||||
);
|
||||
|
||||
// Return current compute state if everything went well.
|
||||
let state = compute.state.lock().unwrap().clone();
|
||||
let status_response = status_response_from_state(&state);
|
||||
Ok(serde_json::to_string(&status_response).unwrap())
|
||||
} else {
|
||||
Err(("invalid spec".to_string(), StatusCode::BAD_REQUEST))
|
||||
if state.status == ComputeStatus::Failed {
|
||||
let err = state.error.as_ref().map_or("unknown error", |x| x);
|
||||
let msg = format!("compute configuration failed: {:?}", err);
|
||||
return Err((msg, StatusCode::INTERNAL_SERVER_ERROR));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
.unwrap()?;
|
||||
|
||||
// Return current compute state if everything went well.
|
||||
let state = compute.state.lock().unwrap().clone();
|
||||
let status_response = status_response_from_state(&state);
|
||||
Ok(serde_json::to_string(&status_response).unwrap())
|
||||
}
|
||||
Err(err) => {
|
||||
error!("could not parse spec: {spec_raw}");
|
||||
Err((format!("invalid spec: {err:?}"), StatusCode::BAD_REQUEST))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ pub mod catalog;
|
||||
pub mod compute;
|
||||
pub mod disk_quota;
|
||||
pub mod extension_server;
|
||||
pub mod local_proxy;
|
||||
// pub mod local_proxy;
|
||||
pub mod lsn_lease;
|
||||
mod migration;
|
||||
pub mod monitor;
|
||||
|
||||
@@ -109,6 +109,7 @@ pub struct ComputeSpec {
|
||||
|
||||
/// Local Proxy configuration used for JWT authentication
|
||||
#[serde(default)]
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub local_proxy_config: Option<LocalProxySpec>,
|
||||
}
|
||||
|
||||
@@ -282,7 +283,7 @@ pub struct GenericOption {
|
||||
/// declare a `trait` on it.
|
||||
pub type GenericOptions = Option<Vec<GenericOption>>;
|
||||
|
||||
/// Configured the local_proxy application with the relevant JWKS and roles it should
|
||||
/// Configured the local-proxy application with the relevant JWKS and roles it should
|
||||
/// use for authorizing connect requests using JWT.
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
pub struct LocalProxySpec {
|
||||
|
||||
@@ -14,7 +14,7 @@ use std::time::SystemTime;
|
||||
|
||||
use super::REMOTE_STORAGE_PREFIX_SEPARATOR;
|
||||
use anyhow::Result;
|
||||
use azure_core::request_options::{IfMatchCondition, MaxResults, Metadata, Range};
|
||||
use azure_core::request_options::{MaxResults, Metadata, Range};
|
||||
use azure_core::{Continuable, RetryOptions};
|
||||
use azure_identity::DefaultAzureCredential;
|
||||
use azure_storage::StorageCredentials;
|
||||
@@ -33,10 +33,10 @@ use tracing::debug;
|
||||
use utils::backoff;
|
||||
|
||||
use crate::metrics::{start_measuring_requests, AttemptOutcome, RequestKind};
|
||||
use crate::ListingObject;
|
||||
use crate::{
|
||||
config::AzureConfig, error::Cancelled, ConcurrencyLimiter, Download, DownloadError,
|
||||
DownloadOpts, Listing, ListingMode, ListingObject, RemotePath, RemoteStorage, StorageMetadata,
|
||||
TimeTravelError, TimeoutOrCancel,
|
||||
config::AzureConfig, error::Cancelled, ConcurrencyLimiter, Download, DownloadError, Listing,
|
||||
ListingMode, RemotePath, RemoteStorage, StorageMetadata, TimeTravelError, TimeoutOrCancel,
|
||||
};
|
||||
|
||||
pub struct AzureBlobStorage {
|
||||
@@ -259,7 +259,6 @@ fn to_download_error(error: azure_core::Error) -> DownloadError {
|
||||
if let Some(http_err) = error.as_http_error() {
|
||||
match http_err.status() {
|
||||
StatusCode::NotFound => DownloadError::NotFound,
|
||||
StatusCode::NotModified => DownloadError::Unmodified,
|
||||
StatusCode::BadRequest => DownloadError::BadInput(anyhow::Error::new(error)),
|
||||
_ => DownloadError::Other(anyhow::Error::new(error)),
|
||||
}
|
||||
@@ -485,16 +484,11 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
async fn download(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
opts: &DownloadOpts,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Download, DownloadError> {
|
||||
let blob_client = self.client.blob_client(self.relative_path_to_name(from));
|
||||
|
||||
let mut builder = blob_client.get();
|
||||
|
||||
if let Some(ref etag) = opts.etag {
|
||||
builder = builder.if_match(IfMatchCondition::NotMatch(etag.to_string()))
|
||||
}
|
||||
let builder = blob_client.get();
|
||||
|
||||
self.download_for_builder(builder, cancel).await
|
||||
}
|
||||
|
||||
@@ -5,8 +5,6 @@ pub enum DownloadError {
|
||||
BadInput(anyhow::Error),
|
||||
/// The file was not found in the remote storage.
|
||||
NotFound,
|
||||
/// The caller provided an ETag, and the file was not modified.
|
||||
Unmodified,
|
||||
/// A cancellation token aborted the download, typically during
|
||||
/// tenant detach or process shutdown.
|
||||
Cancelled,
|
||||
@@ -26,7 +24,6 @@ impl std::fmt::Display for DownloadError {
|
||||
write!(f, "Failed to download a remote file due to user input: {e}")
|
||||
}
|
||||
DownloadError::NotFound => write!(f, "No file found for the remote object id given"),
|
||||
DownloadError::Unmodified => write!(f, "File was not modified"),
|
||||
DownloadError::Cancelled => write!(f, "Cancelled, shutting down"),
|
||||
DownloadError::Timeout => write!(f, "timeout"),
|
||||
DownloadError::Other(e) => write!(f, "Failed to download a remote file: {e:?}"),
|
||||
@@ -41,7 +38,7 @@ impl DownloadError {
|
||||
pub fn is_permanent(&self) -> bool {
|
||||
use DownloadError::*;
|
||||
match self {
|
||||
BadInput(_) | NotFound | Unmodified | Cancelled => true,
|
||||
BadInput(_) | NotFound | Cancelled => true,
|
||||
Timeout | Other(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -161,14 +161,6 @@ pub struct Listing {
|
||||
pub keys: Vec<ListingObject>,
|
||||
}
|
||||
|
||||
/// Options for downloads. The default value is a plain GET.
|
||||
#[derive(Default)]
|
||||
pub struct DownloadOpts {
|
||||
/// If given, returns [`DownloadError::Unmodified`] if the object still has
|
||||
/// the same ETag (using If-None-Match).
|
||||
pub etag: Option<Etag>,
|
||||
}
|
||||
|
||||
/// Storage (potentially remote) API to manage its state.
|
||||
/// This storage tries to be unaware of any layered repository context,
|
||||
/// providing basic CRUD operations for storage files.
|
||||
@@ -253,7 +245,6 @@ pub trait RemoteStorage: Send + Sync + 'static {
|
||||
async fn download(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
opts: &DownloadOpts,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Download, DownloadError>;
|
||||
|
||||
@@ -410,18 +401,16 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
|
||||
}
|
||||
}
|
||||
|
||||
/// See [`RemoteStorage::download`]
|
||||
pub async fn download(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
opts: &DownloadOpts,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Download, DownloadError> {
|
||||
match self {
|
||||
Self::LocalFs(s) => s.download(from, opts, cancel).await,
|
||||
Self::AwsS3(s) => s.download(from, opts, cancel).await,
|
||||
Self::AzureBlob(s) => s.download(from, opts, cancel).await,
|
||||
Self::Unreliable(s) => s.download(from, opts, cancel).await,
|
||||
Self::LocalFs(s) => s.download(from, cancel).await,
|
||||
Self::AwsS3(s) => s.download(from, cancel).await,
|
||||
Self::AzureBlob(s) => s.download(from, cancel).await,
|
||||
Self::Unreliable(s) => s.download(from, cancel).await,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -583,7 +572,7 @@ impl GenericRemoteStorage {
|
||||
) -> Result<Download, DownloadError> {
|
||||
match byte_range {
|
||||
Some((start, end)) => self.download_byte_range(from, start, end, cancel).await,
|
||||
None => self.download(from, &DownloadOpts::default(), cancel).await,
|
||||
None => self.download(from, cancel).await,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -23,8 +23,8 @@ use tokio_util::{io::ReaderStream, sync::CancellationToken};
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
|
||||
use crate::{
|
||||
Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject, RemotePath,
|
||||
TimeTravelError, TimeoutOrCancel, REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||
Download, DownloadError, Listing, ListingMode, ListingObject, RemotePath, TimeTravelError,
|
||||
TimeoutOrCancel, REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||
};
|
||||
|
||||
use super::{RemoteStorage, StorageMetadata};
|
||||
@@ -494,17 +494,11 @@ impl RemoteStorage for LocalFs {
|
||||
async fn download(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
opts: &DownloadOpts,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Download, DownloadError> {
|
||||
let target_path = from.with_base(&self.storage_root);
|
||||
|
||||
let file_metadata = file_metadata(&target_path).await?;
|
||||
let etag = mock_etag(&file_metadata);
|
||||
|
||||
if opts.etag.as_ref() == Some(&etag) {
|
||||
return Err(DownloadError::Unmodified);
|
||||
}
|
||||
|
||||
let source = ReaderStream::new(
|
||||
fs::OpenOptions::new()
|
||||
@@ -525,6 +519,7 @@ impl RemoteStorage for LocalFs {
|
||||
let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone());
|
||||
let source = crate::support::DownloadStream::new(cancel_or_timeout, source);
|
||||
|
||||
let etag = mock_etag(&file_metadata);
|
||||
Ok(Download {
|
||||
metadata,
|
||||
last_modified: file_metadata
|
||||
@@ -697,7 +692,7 @@ mod fs_tests {
|
||||
) -> anyhow::Result<String> {
|
||||
let cancel = CancellationToken::new();
|
||||
let download = storage
|
||||
.download(remote_storage_path, &DownloadOpts::default(), &cancel)
|
||||
.download(remote_storage_path, &cancel)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("Download failed: {e}"))?;
|
||||
ensure!(
|
||||
@@ -778,8 +773,8 @@ mod fs_tests {
|
||||
"We should upload and download the same contents"
|
||||
);
|
||||
|
||||
let non_existing_path = RemotePath::new(Utf8Path::new("somewhere/else"))?;
|
||||
match storage.download(&non_existing_path, &DownloadOpts::default(), &cancel).await {
|
||||
let non_existing_path = "somewhere/else";
|
||||
match storage.download(&RemotePath::new(Utf8Path::new(non_existing_path))?, &cancel).await {
|
||||
Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys
|
||||
other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"),
|
||||
}
|
||||
@@ -1106,13 +1101,7 @@ mod fs_tests {
|
||||
storage.upload(body, len, &path, None, &cancel).await?;
|
||||
}
|
||||
|
||||
let read = aggregate(
|
||||
storage
|
||||
.download(&path, &DownloadOpts::default(), &cancel)
|
||||
.await?
|
||||
.download_stream,
|
||||
)
|
||||
.await?;
|
||||
let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?;
|
||||
assert_eq!(body, read);
|
||||
|
||||
let shorter = Bytes::from_static(b"shorter body");
|
||||
@@ -1123,13 +1112,7 @@ mod fs_tests {
|
||||
storage.upload(body, len, &path, None, &cancel).await?;
|
||||
}
|
||||
|
||||
let read = aggregate(
|
||||
storage
|
||||
.download(&path, &DownloadOpts::default(), &cancel)
|
||||
.await?
|
||||
.download_stream,
|
||||
)
|
||||
.await?;
|
||||
let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?;
|
||||
assert_eq!(shorter, read);
|
||||
Ok(())
|
||||
}
|
||||
@@ -1162,13 +1145,7 @@ mod fs_tests {
|
||||
storage.upload(body, len, &path, None, &cancel).await?;
|
||||
}
|
||||
|
||||
let read = aggregate(
|
||||
storage
|
||||
.download(&path, &DownloadOpts::default(), &cancel)
|
||||
.await?
|
||||
.download_stream,
|
||||
)
|
||||
.await?;
|
||||
let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?;
|
||||
assert_eq!(body, read);
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -28,7 +28,6 @@ use aws_sdk_s3::{
|
||||
Client,
|
||||
};
|
||||
use aws_smithy_async::rt::sleep::TokioSleep;
|
||||
use http_types::StatusCode;
|
||||
|
||||
use aws_smithy_types::{body::SdkBody, DateTime};
|
||||
use aws_smithy_types::{byte_stream::ByteStream, date_time::ConversionError};
|
||||
@@ -45,8 +44,8 @@ use crate::{
|
||||
error::Cancelled,
|
||||
metrics::{start_counting_cancelled_wait, start_measuring_requests},
|
||||
support::PermitCarrying,
|
||||
ConcurrencyLimiter, Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject,
|
||||
RemotePath, RemoteStorage, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE,
|
||||
ConcurrencyLimiter, Download, DownloadError, Listing, ListingMode, ListingObject, RemotePath,
|
||||
RemoteStorage, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE,
|
||||
REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||
};
|
||||
|
||||
@@ -68,7 +67,6 @@ pub struct S3Bucket {
|
||||
struct GetObjectRequest {
|
||||
bucket: String,
|
||||
key: String,
|
||||
etag: Option<String>,
|
||||
range: Option<String>,
|
||||
}
|
||||
impl S3Bucket {
|
||||
@@ -250,18 +248,13 @@ impl S3Bucket {
|
||||
|
||||
let started_at = start_measuring_requests(kind);
|
||||
|
||||
let mut builder = self
|
||||
let get_object = self
|
||||
.client
|
||||
.get_object()
|
||||
.bucket(request.bucket)
|
||||
.key(request.key)
|
||||
.set_range(request.range);
|
||||
|
||||
if let Some(etag) = request.etag {
|
||||
builder = builder.if_none_match(etag);
|
||||
}
|
||||
|
||||
let get_object = builder.send();
|
||||
.set_range(request.range)
|
||||
.send();
|
||||
|
||||
let get_object = tokio::select! {
|
||||
res = get_object => res,
|
||||
@@ -284,20 +277,6 @@ impl S3Bucket {
|
||||
);
|
||||
return Err(DownloadError::NotFound);
|
||||
}
|
||||
Err(SdkError::ServiceError(e))
|
||||
// aws_smithy_runtime_api::http::response::StatusCode isn't
|
||||
// re-exported by any aws crates, so just check the numeric
|
||||
// status against http_types::StatusCode instead of pulling it.
|
||||
if e.raw().status().as_u16() == StatusCode::NotModified =>
|
||||
{
|
||||
// Count an unmodified file as a success.
|
||||
crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
|
||||
kind,
|
||||
AttemptOutcome::Ok,
|
||||
started_at,
|
||||
);
|
||||
return Err(DownloadError::Unmodified);
|
||||
}
|
||||
Err(e) => {
|
||||
crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
|
||||
kind,
|
||||
@@ -794,7 +773,6 @@ impl RemoteStorage for S3Bucket {
|
||||
async fn download(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
opts: &DownloadOpts,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Download, DownloadError> {
|
||||
// if prefix is not none then download file `prefix/from`
|
||||
@@ -803,7 +781,6 @@ impl RemoteStorage for S3Bucket {
|
||||
GetObjectRequest {
|
||||
bucket: self.bucket_name.clone(),
|
||||
key: self.relative_path_to_s3_object(from),
|
||||
etag: opts.etag.as_ref().map(|e| e.to_string()),
|
||||
range: None,
|
||||
},
|
||||
cancel,
|
||||
@@ -830,7 +807,6 @@ impl RemoteStorage for S3Bucket {
|
||||
GetObjectRequest {
|
||||
bucket: self.bucket_name.clone(),
|
||||
key: self.relative_path_to_s3_object(from),
|
||||
etag: None,
|
||||
range,
|
||||
},
|
||||
cancel,
|
||||
|
||||
@@ -12,8 +12,8 @@ use std::{collections::hash_map::Entry, sync::Arc};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::{
|
||||
Download, DownloadError, DownloadOpts, GenericRemoteStorage, Listing, ListingMode, RemotePath,
|
||||
RemoteStorage, StorageMetadata, TimeTravelError,
|
||||
Download, DownloadError, GenericRemoteStorage, Listing, ListingMode, RemotePath, RemoteStorage,
|
||||
StorageMetadata, TimeTravelError,
|
||||
};
|
||||
|
||||
pub struct UnreliableWrapper {
|
||||
@@ -167,12 +167,11 @@ impl RemoteStorage for UnreliableWrapper {
|
||||
async fn download(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
opts: &DownloadOpts,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Download, DownloadError> {
|
||||
self.attempt(RemoteOp::Download(from.clone()))
|
||||
.map_err(DownloadError::Other)?;
|
||||
self.inner.download(from, opts, cancel).await
|
||||
self.inner.download(from, cancel).await
|
||||
}
|
||||
|
||||
async fn download_byte_range(
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
use anyhow::Context;
|
||||
use camino::Utf8Path;
|
||||
use futures::StreamExt;
|
||||
use remote_storage::{DownloadError, DownloadOpts, ListingMode, ListingObject, RemotePath};
|
||||
use remote_storage::ListingMode;
|
||||
use remote_storage::RemotePath;
|
||||
use std::sync::Arc;
|
||||
use std::{collections::HashSet, num::NonZeroU32};
|
||||
use test_context::test_context;
|
||||
@@ -283,10 +284,7 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
|
||||
ctx.client.upload(data, len, &path, None, &cancel).await?;
|
||||
|
||||
// Normal download request
|
||||
let dl = ctx
|
||||
.client
|
||||
.download(&path, &DownloadOpts::default(), &cancel)
|
||||
.await?;
|
||||
let dl = ctx.client.download(&path, &cancel).await?;
|
||||
let buf = download_to_vec(dl).await?;
|
||||
assert_eq!(&buf, &orig);
|
||||
|
||||
@@ -339,54 +337,6 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Tests that conditional downloads work properly, by returning
|
||||
/// DownloadError::Unmodified when the object ETag matches the given ETag.
|
||||
#[test_context(MaybeEnabledStorage)]
|
||||
#[tokio::test]
|
||||
async fn download_conditional(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
|
||||
let MaybeEnabledStorage::Enabled(ctx) = ctx else {
|
||||
return Ok(());
|
||||
};
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
// Create a file.
|
||||
let path = RemotePath::new(Utf8Path::new(format!("{}/file", ctx.base_prefix).as_str()))?;
|
||||
let data = bytes::Bytes::from_static("foo".as_bytes());
|
||||
let (stream, len) = wrap_stream(data);
|
||||
ctx.client.upload(stream, len, &path, None, &cancel).await?;
|
||||
|
||||
// Download it to obtain its etag.
|
||||
let mut opts = DownloadOpts::default();
|
||||
let download = ctx.client.download(&path, &opts, &cancel).await?;
|
||||
|
||||
// Download with the etag yields DownloadError::Unmodified.
|
||||
opts.etag = Some(download.etag);
|
||||
let result = ctx.client.download(&path, &opts, &cancel).await;
|
||||
assert!(
|
||||
matches!(result, Err(DownloadError::Unmodified)),
|
||||
"expected DownloadError::Unmodified, got {result:?}"
|
||||
);
|
||||
|
||||
// Replace the file contents.
|
||||
let data = bytes::Bytes::from_static("bar".as_bytes());
|
||||
let (stream, len) = wrap_stream(data);
|
||||
ctx.client.upload(stream, len, &path, None, &cancel).await?;
|
||||
|
||||
// A download with the old etag should yield the new file.
|
||||
let download = ctx.client.download(&path, &opts, &cancel).await?;
|
||||
assert_ne!(download.etag, opts.etag.unwrap(), "ETag did not change");
|
||||
|
||||
// A download with the new etag should yield Unmodified again.
|
||||
opts.etag = Some(download.etag);
|
||||
let result = ctx.client.download(&path, &opts, &cancel).await;
|
||||
assert!(
|
||||
matches!(result, Err(DownloadError::Unmodified)),
|
||||
"expected DownloadError::Unmodified, got {result:?}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test_context(MaybeEnabledStorage)]
|
||||
#[tokio::test]
|
||||
async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
|
||||
@@ -414,10 +364,7 @@ async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
|
||||
// Normal download request
|
||||
ctx.client.copy_object(&path, &path_dest, &cancel).await?;
|
||||
|
||||
let dl = ctx
|
||||
.client
|
||||
.download(&path_dest, &DownloadOpts::default(), &cancel)
|
||||
.await?;
|
||||
let dl = ctx.client.download(&path_dest, &cancel).await?;
|
||||
let buf = download_to_vec(dl).await?;
|
||||
assert_eq!(&buf, &orig);
|
||||
|
||||
@@ -429,56 +376,3 @@ async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Tests that head_object works properly.
|
||||
#[test_context(MaybeEnabledStorage)]
|
||||
#[tokio::test]
|
||||
async fn head_object(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
|
||||
let MaybeEnabledStorage::Enabled(ctx) = ctx else {
|
||||
return Ok(());
|
||||
};
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
let path = RemotePath::new(Utf8Path::new(format!("{}/file", ctx.base_prefix).as_str()))?;
|
||||
|
||||
// Errors on missing file.
|
||||
let result = ctx.client.head_object(&path, &cancel).await;
|
||||
assert!(
|
||||
matches!(result, Err(DownloadError::NotFound)),
|
||||
"expected NotFound, got {result:?}"
|
||||
);
|
||||
|
||||
// Create the file.
|
||||
let data = bytes::Bytes::from_static("foo".as_bytes());
|
||||
let (stream, len) = wrap_stream(data);
|
||||
ctx.client.upload(stream, len, &path, None, &cancel).await?;
|
||||
|
||||
// Fetch the head metadata.
|
||||
let object = ctx.client.head_object(&path, &cancel).await?;
|
||||
assert_eq!(
|
||||
object,
|
||||
ListingObject {
|
||||
key: path.clone(),
|
||||
last_modified: object.last_modified, // ignore
|
||||
size: 3
|
||||
}
|
||||
);
|
||||
|
||||
// Wait for a couple of seconds, and then update the file to check the last
|
||||
// modified timestamp.
|
||||
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
|
||||
|
||||
let data = bytes::Bytes::from_static("bar".as_bytes());
|
||||
let (stream, len) = wrap_stream(data);
|
||||
ctx.client.upload(stream, len, &path, None, &cancel).await?;
|
||||
let new = ctx.client.head_object(&path, &cancel).await?;
|
||||
|
||||
assert!(
|
||||
!new.last_modified
|
||||
.duration_since(object.last_modified)?
|
||||
.is_zero(),
|
||||
"last_modified did not advance"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -12,8 +12,8 @@ use anyhow::Context;
|
||||
use camino::Utf8Path;
|
||||
use futures_util::StreamExt;
|
||||
use remote_storage::{
|
||||
DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath,
|
||||
RemoteStorageConfig, RemoteStorageKind, S3Config,
|
||||
DownloadError, GenericRemoteStorage, ListingMode, RemotePath, RemoteStorageConfig,
|
||||
RemoteStorageKind, S3Config,
|
||||
};
|
||||
use test_context::test_context;
|
||||
use test_context::AsyncTestContext;
|
||||
@@ -121,8 +121,7 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
|
||||
|
||||
// A little check to ensure that our clock is not too far off from the S3 clock
|
||||
{
|
||||
let opts = DownloadOpts::default();
|
||||
let dl = retry(|| ctx.client.download(&path2, &opts, &cancel)).await?;
|
||||
let dl = retry(|| ctx.client.download(&path2, &cancel)).await?;
|
||||
let last_modified = dl.last_modified;
|
||||
let half_wt = WAIT_TIME.mul_f32(0.5);
|
||||
let t0_hwt = t0 + half_wt;
|
||||
@@ -160,12 +159,7 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
|
||||
let t2_files_recovered = list_files(&ctx.client, &cancel).await?;
|
||||
println!("after recovery to t2: {t2_files_recovered:?}");
|
||||
assert_eq!(t2_files, t2_files_recovered);
|
||||
let path2_recovered_t2 = download_to_vec(
|
||||
ctx.client
|
||||
.download(&path2, &DownloadOpts::default(), &cancel)
|
||||
.await?,
|
||||
)
|
||||
.await?;
|
||||
let path2_recovered_t2 = download_to_vec(ctx.client.download(&path2, &cancel).await?).await?;
|
||||
assert_eq!(path2_recovered_t2, new_data.as_bytes());
|
||||
|
||||
// after recovery to t1: path1 is back, path2 has the old content
|
||||
@@ -176,12 +170,7 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
|
||||
let t1_files_recovered = list_files(&ctx.client, &cancel).await?;
|
||||
println!("after recovery to t1: {t1_files_recovered:?}");
|
||||
assert_eq!(t1_files, t1_files_recovered);
|
||||
let path2_recovered_t1 = download_to_vec(
|
||||
ctx.client
|
||||
.download(&path2, &DownloadOpts::default(), &cancel)
|
||||
.await?,
|
||||
)
|
||||
.await?;
|
||||
let path2_recovered_t1 = download_to_vec(ctx.client.download(&path2, &cancel).await?).await?;
|
||||
assert_eq!(path2_recovered_t1, old_data.as_bytes());
|
||||
|
||||
// after recovery to t0: everything is gone except for path1
|
||||
@@ -427,7 +416,7 @@ async fn download_is_timeouted(ctx: &mut MaybeEnabledStorage) {
|
||||
let started_at = std::time::Instant::now();
|
||||
let mut stream = ctx
|
||||
.client
|
||||
.download(&path, &DownloadOpts::default(), &cancel)
|
||||
.download(&path, &cancel)
|
||||
.await
|
||||
.expect("download succeeds")
|
||||
.download_stream;
|
||||
@@ -502,7 +491,7 @@ async fn download_is_cancelled(ctx: &mut MaybeEnabledStorage) {
|
||||
{
|
||||
let stream = ctx
|
||||
.client
|
||||
.download(&path, &DownloadOpts::default(), &cancel)
|
||||
.download(&path, &cancel)
|
||||
.await
|
||||
.expect("download succeeds")
|
||||
.download_stream;
|
||||
|
||||
@@ -27,7 +27,7 @@ use crate::tenant::Generation;
|
||||
use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
|
||||
use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile};
|
||||
use crate::TEMP_FILE_SUFFIX;
|
||||
use remote_storage::{DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath};
|
||||
use remote_storage::{DownloadError, GenericRemoteStorage, ListingMode, RemotePath};
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::pausable_failpoint;
|
||||
@@ -153,9 +153,7 @@ async fn download_object<'a>(
|
||||
.with_context(|| format!("create a destination file for layer '{dst_path}'"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
let download = storage
|
||||
.download(src_path, &DownloadOpts::default(), cancel)
|
||||
.await?;
|
||||
let download = storage.download(src_path, cancel).await?;
|
||||
|
||||
pausable_failpoint!("before-downloading-layer-stream-pausable");
|
||||
|
||||
@@ -206,9 +204,7 @@ async fn download_object<'a>(
|
||||
.with_context(|| format!("create a destination file for layer '{dst_path}'"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
let mut download = storage
|
||||
.download(src_path, &DownloadOpts::default(), cancel)
|
||||
.await?;
|
||||
let mut download = storage.download(src_path, cancel).await?;
|
||||
|
||||
pausable_failpoint!("before-downloading-layer-stream-pausable");
|
||||
|
||||
@@ -348,9 +344,7 @@ async fn do_download_index_part(
|
||||
|
||||
let index_part_bytes = download_retry_forever(
|
||||
|| async {
|
||||
let download = storage
|
||||
.download(&remote_path, &DownloadOpts::default(), cancel)
|
||||
.await?;
|
||||
let download = storage.download(&remote_path, cancel).await?;
|
||||
|
||||
let mut bytes = Vec::new();
|
||||
|
||||
@@ -532,15 +526,10 @@ pub(crate) async fn download_initdb_tar_zst(
|
||||
.with_context(|| format!("tempfile creation {temp_path}"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
let download = match storage
|
||||
.download(&remote_path, &DownloadOpts::default(), cancel)
|
||||
.await
|
||||
{
|
||||
let download = match storage.download(&remote_path, cancel).await {
|
||||
Ok(dl) => dl,
|
||||
Err(DownloadError::NotFound) => {
|
||||
storage
|
||||
.download(&remote_preserved_path, &DownloadOpts::default(), cancel)
|
||||
.await?
|
||||
storage.download(&remote_preserved_path, cancel).await?
|
||||
}
|
||||
Err(other) => Err(other)?,
|
||||
};
|
||||
|
||||
@@ -49,7 +49,7 @@ use futures::Future;
|
||||
use metrics::UIntGauge;
|
||||
use pageserver_api::models::SecondaryProgress;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use remote_storage::{DownloadError, DownloadOpts, Etag, GenericRemoteStorage};
|
||||
use remote_storage::{DownloadError, Etag, GenericRemoteStorage};
|
||||
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{info_span, instrument, warn, Instrument};
|
||||
@@ -944,34 +944,36 @@ impl<'a> TenantDownloader<'a> {
|
||||
) -> Result<HeatMapDownload, UpdateError> {
|
||||
debug_assert_current_span_has_tenant_id();
|
||||
let tenant_shard_id = self.secondary_state.get_tenant_shard_id();
|
||||
// TODO: pull up etag check into the request, to do a conditional GET rather than
|
||||
// issuing a GET and then maybe ignoring the response body
|
||||
// (https://github.com/neondatabase/neon/issues/6199)
|
||||
tracing::debug!("Downloading heatmap for secondary tenant",);
|
||||
|
||||
let heatmap_path = remote_heatmap_path(tenant_shard_id);
|
||||
let cancel = &self.secondary_state.cancel;
|
||||
let opts = DownloadOpts {
|
||||
etag: prev_etag.cloned(),
|
||||
};
|
||||
|
||||
backoff::retry(
|
||||
|| async {
|
||||
let download = match self
|
||||
let download = self
|
||||
.remote_storage
|
||||
.download(&heatmap_path, &opts, cancel)
|
||||
.download(&heatmap_path, cancel)
|
||||
.await
|
||||
{
|
||||
Ok(download) => download,
|
||||
Err(DownloadError::Unmodified) => return Ok(HeatMapDownload::Unmodified),
|
||||
Err(err) => return Err(err.into()),
|
||||
};
|
||||
.map_err(UpdateError::from)?;
|
||||
|
||||
let mut heatmap_bytes = Vec::new();
|
||||
let mut body = tokio_util::io::StreamReader::new(download.download_stream);
|
||||
let _size = tokio::io::copy_buf(&mut body, &mut heatmap_bytes).await?;
|
||||
Ok(HeatMapDownload::Modified(HeatMapModified {
|
||||
etag: download.etag,
|
||||
last_modified: download.last_modified,
|
||||
bytes: heatmap_bytes,
|
||||
}))
|
||||
SECONDARY_MODE.download_heatmap.inc();
|
||||
|
||||
if Some(&download.etag) == prev_etag {
|
||||
Ok(HeatMapDownload::Unmodified)
|
||||
} else {
|
||||
let mut heatmap_bytes = Vec::new();
|
||||
let mut body = tokio_util::io::StreamReader::new(download.download_stream);
|
||||
let _size = tokio::io::copy_buf(&mut body, &mut heatmap_bytes).await?;
|
||||
Ok(HeatMapDownload::Modified(HeatMapModified {
|
||||
etag: download.etag,
|
||||
last_modified: download.last_modified,
|
||||
bytes: heatmap_bytes,
|
||||
}))
|
||||
}
|
||||
},
|
||||
|e| matches!(e, UpdateError::NoData | UpdateError::Cancelled),
|
||||
FAILED_DOWNLOAD_WARN_THRESHOLD,
|
||||
@@ -982,7 +984,6 @@ impl<'a> TenantDownloader<'a> {
|
||||
.await
|
||||
.ok_or_else(|| UpdateError::Cancelled)
|
||||
.and_then(|x| x)
|
||||
.inspect(|_| SECONDARY_MODE.download_heatmap.inc())
|
||||
}
|
||||
|
||||
/// Download heatmap layers that are not present on local disk, or update their
|
||||
|
||||
@@ -191,14 +191,13 @@ NeonOnDemandXLogReaderRoutines(XLogReaderRoutine *xlr)
|
||||
|
||||
if (!wal_reader)
|
||||
{
|
||||
XLogRecPtr basebackupLsn = GetRedoStartLsn();
|
||||
XLogRecPtr epochStartLsn = pg_atomic_read_u64(&GetWalpropShmemState()->propEpochStartLsn);
|
||||
|
||||
/* should never happen */
|
||||
if (basebackupLsn == 0)
|
||||
if (epochStartLsn == 0)
|
||||
{
|
||||
elog(ERROR, "unable to start walsender when basebackupLsn is 0");
|
||||
elog(ERROR, "Unable to start walsender when propEpochStartLsn is 0!");
|
||||
}
|
||||
wal_reader = NeonWALReaderAllocate(wal_segment_size, basebackupLsn, "[walsender] ");
|
||||
wal_reader = NeonWALReaderAllocate(wal_segment_size, epochStartLsn, "[walsender] ");
|
||||
}
|
||||
xlr->page_read = NeonWALPageRead;
|
||||
xlr->segment_open = NeonWALReadSegmentOpen;
|
||||
|
||||
@@ -77,10 +77,10 @@ struct LocalProxyCliArgs {
|
||||
#[clap(long, default_value = "127.0.0.1:5432")]
|
||||
compute: SocketAddr,
|
||||
/// Path of the local proxy config file
|
||||
#[clap(long, default_value = "./local_proxy.json")]
|
||||
#[clap(long, default_value = "./localproxy.json")]
|
||||
config_path: Utf8PathBuf,
|
||||
/// Path of the local proxy PID file
|
||||
#[clap(long, default_value = "./local_proxy.pid")]
|
||||
#[clap(long, default_value = "./localproxy.pid")]
|
||||
pid_path: Utf8PathBuf,
|
||||
}
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ use tokio_postgres::tls::MakeTlsConnect;
|
||||
use tokio_postgres_rustls::MakeRustlsConnect;
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
pub const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node";
|
||||
const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node";
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub(crate) enum ConnectionError {
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
use crate::{
|
||||
auth::backend::ComputeCredentialKeys,
|
||||
compute::COULD_NOT_CONNECT,
|
||||
compute::{self, PostgresConnection},
|
||||
config::RetryConfig,
|
||||
console::{self, errors::WakeComputeError, locks::ApiLocks, CachedNodeInfo, NodeInfo},
|
||||
@@ -16,7 +15,7 @@ use crate::{
|
||||
use async_trait::async_trait;
|
||||
use pq_proto::StartupMessageParams;
|
||||
use tokio::time;
|
||||
use tracing::{debug, info, warn};
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
use super::retry::ShouldRetryWakeCompute;
|
||||
|
||||
@@ -117,6 +116,7 @@ where
|
||||
|
||||
node_info.set_keys(user_info.get_keys());
|
||||
node_info.allow_self_signed_compute = allow_self_signed_compute;
|
||||
// let mut node_info = credentials.get_node_info(ctx, user_info).await?;
|
||||
mechanism.update_connect_config(&mut node_info.config);
|
||||
let retry_type = RetryType::ConnectToCompute;
|
||||
|
||||
@@ -139,10 +139,10 @@ where
|
||||
Err(e) => e,
|
||||
};
|
||||
|
||||
debug!(error = ?err, COULD_NOT_CONNECT);
|
||||
error!(error = ?err, "could not connect to compute node");
|
||||
|
||||
let node_info = if !node_info.cached() || !err.should_retry_wake_compute() {
|
||||
// If we just recieved this from cplane and didn't get it from cache, we shouldn't retry.
|
||||
// If we just recieved this from cplane and dodn't get it from cache, we shouldn't retry.
|
||||
// Do not need to retrieve a new node_info, just return the old one.
|
||||
if should_retry(&err, num_retries, connect_to_compute_retry_config) {
|
||||
Metrics::get().proxy.retries_metric.observe(
|
||||
@@ -191,7 +191,7 @@ where
|
||||
}
|
||||
Err(e) => {
|
||||
if !should_retry(&e, num_retries, connect_to_compute_retry_config) {
|
||||
// Don't log an error here, caller will print the error
|
||||
error!(error = ?e, num_retries, retriable = false, "couldn't connect to compute node");
|
||||
Metrics::get().proxy.retries_metric.observe(
|
||||
RetriesMetricGroup {
|
||||
outcome: ConnectOutcome::Failed,
|
||||
@@ -202,7 +202,7 @@ where
|
||||
return Err(e.into());
|
||||
}
|
||||
|
||||
warn!(error = ?e, num_retries, retriable = true, COULD_NOT_CONNECT);
|
||||
warn!(error = ?e, num_retries, retriable = true, "couldn't connect to compute node");
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -2,29 +2,21 @@ use utils::lsn::Lsn;
|
||||
|
||||
use crate::timeline_manager::StateSnapshot;
|
||||
|
||||
/// Get oldest LSN we still need to keep.
|
||||
///
|
||||
/// We hold WAL till it is consumed by
|
||||
/// 1) pageserver (remote_consistent_lsn)
|
||||
/// 2) s3 offloading.
|
||||
/// 3) Additionally we must store WAL since last local commit_lsn because
|
||||
/// that's where we start looking for last WAL record on start.
|
||||
///
|
||||
/// If some peer safekeeper misses data it will fetch it from the remote
|
||||
/// storage. While it is safe to use inmem values for determining horizon, we
|
||||
/// use persistent to make possible normal states less surprising. All segments
|
||||
/// covering LSNs before horizon_lsn can be removed.
|
||||
/// Get oldest LSN we still need to keep. We hold WAL till it is consumed
|
||||
/// by all of 1) pageserver (remote_consistent_lsn) 2) peers 3) s3
|
||||
/// offloading.
|
||||
/// While it is safe to use inmem values for determining horizon,
|
||||
/// we use persistent to make possible normal states less surprising.
|
||||
/// All segments covering LSNs before horizon_lsn can be removed.
|
||||
pub(crate) fn calc_horizon_lsn(state: &StateSnapshot, extra_horizon_lsn: Option<Lsn>) -> Lsn {
|
||||
use std::cmp::min;
|
||||
|
||||
let mut horizon_lsn = state.cfile_remote_consistent_lsn;
|
||||
let mut horizon_lsn = min(
|
||||
state.cfile_remote_consistent_lsn,
|
||||
state.cfile_peer_horizon_lsn,
|
||||
);
|
||||
// we don't want to remove WAL that is not yet offloaded to s3
|
||||
horizon_lsn = min(horizon_lsn, state.cfile_backup_lsn);
|
||||
// Min by local commit_lsn to be able to begin reading WAL from somewhere on
|
||||
// sk start. Technically we don't allow local commit_lsn to be higher than
|
||||
// flush_lsn, but let's be double safe by including it as well.
|
||||
horizon_lsn = min(horizon_lsn, state.cfile_commit_lsn);
|
||||
horizon_lsn = min(horizon_lsn, state.flush_lsn);
|
||||
if let Some(extra_horizon_lsn) = extra_horizon_lsn {
|
||||
horizon_lsn = min(horizon_lsn, extra_horizon_lsn);
|
||||
}
|
||||
|
||||
@@ -47,7 +47,7 @@ pub(crate) struct StateSnapshot {
|
||||
pub(crate) remote_consistent_lsn: Lsn,
|
||||
|
||||
// persistent control file values
|
||||
pub(crate) cfile_commit_lsn: Lsn,
|
||||
pub(crate) cfile_peer_horizon_lsn: Lsn,
|
||||
pub(crate) cfile_remote_consistent_lsn: Lsn,
|
||||
pub(crate) cfile_backup_lsn: Lsn,
|
||||
|
||||
@@ -70,7 +70,7 @@ impl StateSnapshot {
|
||||
commit_lsn: state.inmem.commit_lsn,
|
||||
backup_lsn: state.inmem.backup_lsn,
|
||||
remote_consistent_lsn: state.inmem.remote_consistent_lsn,
|
||||
cfile_commit_lsn: state.commit_lsn,
|
||||
cfile_peer_horizon_lsn: state.peer_horizon_lsn,
|
||||
cfile_remote_consistent_lsn: state.remote_consistent_lsn,
|
||||
cfile_backup_lsn: state.backup_lsn,
|
||||
flush_lsn: read_guard.sk.flush_lsn(),
|
||||
|
||||
@@ -526,21 +526,6 @@ pub(crate) enum ReconcileResultRequest {
|
||||
Stop,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct MutationLocation {
|
||||
node: Node,
|
||||
generation: Generation,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct ShardMutationLocations {
|
||||
latest: MutationLocation,
|
||||
other: Vec<MutationLocation>,
|
||||
}
|
||||
|
||||
#[derive(Default, Clone)]
|
||||
struct TenantMutationLocations(BTreeMap<TenantShardId, ShardMutationLocations>);
|
||||
|
||||
impl Service {
|
||||
pub fn get_config(&self) -> &Config {
|
||||
&self.config
|
||||
@@ -3002,83 +2987,38 @@ impl Service {
|
||||
failpoint_support::sleep_millis_async!("tenant-create-timeline-shared-lock");
|
||||
|
||||
self.tenant_remote_mutation(tenant_id, move |mut targets| async move {
|
||||
if targets.0.is_empty() {
|
||||
if targets.is_empty() {
|
||||
return Err(ApiError::NotFound(
|
||||
anyhow::anyhow!("Tenant not found").into(),
|
||||
));
|
||||
};
|
||||
|
||||
let (shard_zero_tid, shard_zero_locations) =
|
||||
targets.0.pop_first().expect("Must have at least one shard");
|
||||
assert!(shard_zero_tid.is_shard_zero());
|
||||
let shard_zero = targets.remove(0);
|
||||
|
||||
async fn create_one(
|
||||
tenant_shard_id: TenantShardId,
|
||||
locations: ShardMutationLocations,
|
||||
node: Node,
|
||||
jwt: Option<String>,
|
||||
create_req: TimelineCreateRequest,
|
||||
) -> Result<TimelineInfo, ApiError> {
|
||||
let latest = locations.latest.node;
|
||||
|
||||
tracing::info!(
|
||||
"Creating timeline on shard {}/{}, attached to node {latest} in generation {:?}",
|
||||
"Creating timeline on shard {}/{}, attached to node {node}",
|
||||
tenant_shard_id,
|
||||
create_req.new_timeline_id,
|
||||
locations.latest.generation
|
||||
);
|
||||
let client = PageserverClient::new(node.get_id(), node.base_url(), jwt.as_deref());
|
||||
|
||||
let client =
|
||||
PageserverClient::new(latest.get_id(), latest.base_url(), jwt.as_deref());
|
||||
|
||||
let timeline_info = client
|
||||
client
|
||||
.timeline_create(tenant_shard_id, &create_req)
|
||||
.await
|
||||
.map_err(|e| passthrough_api_error(&latest, e))?;
|
||||
|
||||
// We propagate timeline creations to all attached locations such that a compute
|
||||
// for the new timeline is able to start regardless of the current state of the
|
||||
// tenant shard reconciliation.
|
||||
for location in locations.other {
|
||||
tracing::info!(
|
||||
"Creating timeline on shard {}/{}, stale attached to node {} in generation {:?}",
|
||||
tenant_shard_id,
|
||||
create_req.new_timeline_id,
|
||||
location.node,
|
||||
location.generation
|
||||
);
|
||||
|
||||
let client = PageserverClient::new(
|
||||
location.node.get_id(),
|
||||
location.node.base_url(),
|
||||
jwt.as_deref(),
|
||||
);
|
||||
|
||||
let res = client
|
||||
.timeline_create(tenant_shard_id, &create_req)
|
||||
.await;
|
||||
|
||||
if let Err(e) = res {
|
||||
match e {
|
||||
mgmt_api::Error::ApiError(StatusCode::NOT_FOUND, _) => {
|
||||
// Tenant might have been detached from the stale location,
|
||||
// so ignore 404s.
|
||||
},
|
||||
_ => {
|
||||
return Err(passthrough_api_error(&location.node, e));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(timeline_info)
|
||||
.map_err(|e| passthrough_api_error(&node, e))
|
||||
}
|
||||
|
||||
// Because the caller might not provide an explicit LSN, we must do the creation first on a single shard, and then
|
||||
// use whatever LSN that shard picked when creating on subsequent shards. We arbitrarily use shard zero as the shard
|
||||
// that will get the first creation request, and propagate the LSN to all the >0 shards.
|
||||
let timeline_info = create_one(
|
||||
shard_zero_tid,
|
||||
shard_zero_locations,
|
||||
shard_zero.0,
|
||||
shard_zero.1,
|
||||
self.config.jwt_token.clone(),
|
||||
create_req.clone(),
|
||||
)
|
||||
@@ -3091,24 +3031,14 @@ impl Service {
|
||||
}
|
||||
|
||||
// Create timeline on remaining shards with number >0
|
||||
if !targets.0.is_empty() {
|
||||
if !targets.is_empty() {
|
||||
// If we had multiple shards, issue requests for the remainder now.
|
||||
let jwt = &self.config.jwt_token;
|
||||
self.tenant_for_shards(
|
||||
targets
|
||||
.0
|
||||
.iter()
|
||||
.map(|t| (*t.0, t.1.latest.node.clone()))
|
||||
.collect(),
|
||||
|tenant_shard_id: TenantShardId, _node: Node| {
|
||||
targets.iter().map(|t| (t.0, t.1.clone())).collect(),
|
||||
|tenant_shard_id: TenantShardId, node: Node| {
|
||||
let create_req = create_req.clone();
|
||||
let mutation_locations = targets.0.remove(&tenant_shard_id).unwrap();
|
||||
Box::pin(create_one(
|
||||
tenant_shard_id,
|
||||
mutation_locations,
|
||||
jwt.clone(),
|
||||
create_req,
|
||||
))
|
||||
Box::pin(create_one(tenant_shard_id, node, jwt.clone(), create_req))
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
@@ -3138,7 +3068,7 @@ impl Service {
|
||||
.await;
|
||||
|
||||
self.tenant_remote_mutation(tenant_id, move |targets| async move {
|
||||
if targets.0.is_empty() {
|
||||
if targets.is_empty() {
|
||||
return Err(ApiError::NotFound(
|
||||
anyhow::anyhow!("Tenant not found").into(),
|
||||
));
|
||||
@@ -3169,9 +3099,8 @@ impl Service {
|
||||
|
||||
// no shard needs to go first/last; the operation should be idempotent
|
||||
// TODO: it would be great to ensure that all shards return the same error
|
||||
let locations = targets.0.iter().map(|t| (*t.0, t.1.latest.node.clone())).collect();
|
||||
let results = self
|
||||
.tenant_for_shards(locations, |tenant_shard_id, node| {
|
||||
.tenant_for_shards(targets, |tenant_shard_id, node| {
|
||||
futures::FutureExt::boxed(config_one(
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
@@ -3202,7 +3131,7 @@ impl Service {
|
||||
.await;
|
||||
|
||||
self.tenant_remote_mutation(tenant_id, move |targets| async move {
|
||||
if targets.0.is_empty() {
|
||||
if targets.is_empty() {
|
||||
return Err(ApiError::NotFound(
|
||||
anyhow::anyhow!("Tenant not found").into(),
|
||||
));
|
||||
@@ -3250,9 +3179,8 @@ impl Service {
|
||||
}
|
||||
|
||||
// no shard needs to go first/last; the operation should be idempotent
|
||||
let locations = targets.0.iter().map(|t| (*t.0, t.1.latest.node.clone())).collect();
|
||||
let mut results = self
|
||||
.tenant_for_shards(locations, |tenant_shard_id, node| {
|
||||
.tenant_for_shards(targets, |tenant_shard_id, node| {
|
||||
futures::FutureExt::boxed(detach_one(
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
@@ -3299,7 +3227,7 @@ impl Service {
|
||||
.await;
|
||||
|
||||
self.tenant_remote_mutation(tenant_id, move |targets| async move {
|
||||
if targets.0.is_empty() {
|
||||
if targets.is_empty() {
|
||||
return Err(ApiError::NotFound(
|
||||
anyhow::anyhow!("Tenant not found").into(),
|
||||
));
|
||||
@@ -3321,12 +3249,7 @@ impl Service {
|
||||
}
|
||||
|
||||
// no shard needs to go first/last; the operation should be idempotent
|
||||
let locations = targets
|
||||
.0
|
||||
.iter()
|
||||
.map(|t| (*t.0, t.1.latest.node.clone()))
|
||||
.collect();
|
||||
self.tenant_for_shards(locations, |tenant_shard_id, node| {
|
||||
self.tenant_for_shards(targets, |tenant_shard_id, node| {
|
||||
futures::FutureExt::boxed(do_one(
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
@@ -3421,11 +3344,11 @@ impl Service {
|
||||
op: O,
|
||||
) -> Result<R, ApiError>
|
||||
where
|
||||
O: FnOnce(TenantMutationLocations) -> F,
|
||||
O: FnOnce(Vec<(TenantShardId, Node)>) -> F,
|
||||
F: std::future::Future<Output = R>,
|
||||
{
|
||||
let mutation_locations = {
|
||||
let mut locations = TenantMutationLocations::default();
|
||||
let target_gens = {
|
||||
let mut targets = Vec::new();
|
||||
|
||||
// Load the currently attached pageservers for the latest generation of each shard. This can
|
||||
// run concurrently with reconciliations, and it is not guaranteed that the node we find here
|
||||
@@ -3476,50 +3399,14 @@ impl Service {
|
||||
.ok_or(ApiError::Conflict(format!(
|
||||
"Raced with removal of node {node_id}"
|
||||
)))?;
|
||||
let generation = generation.expect("Checked above");
|
||||
|
||||
let tenant = locked.tenants.get(&tenant_shard_id);
|
||||
|
||||
// TODO(vlad): Abstract the logic that finds stale attached locations
|
||||
// from observed state into a [`Service`] method.
|
||||
let other_locations = match tenant {
|
||||
Some(tenant) => {
|
||||
let mut other = tenant.attached_locations();
|
||||
let latest_location_index =
|
||||
other.iter().position(|&l| l == (node.get_id(), generation));
|
||||
if let Some(idx) = latest_location_index {
|
||||
other.remove(idx);
|
||||
}
|
||||
|
||||
other
|
||||
}
|
||||
None => Vec::default(),
|
||||
};
|
||||
|
||||
let location = ShardMutationLocations {
|
||||
latest: MutationLocation {
|
||||
node: node.clone(),
|
||||
generation,
|
||||
},
|
||||
other: other_locations
|
||||
.into_iter()
|
||||
.filter_map(|(node_id, generation)| {
|
||||
let node = locked.nodes.get(&node_id)?;
|
||||
|
||||
Some(MutationLocation {
|
||||
node: node.clone(),
|
||||
generation,
|
||||
})
|
||||
})
|
||||
.collect(),
|
||||
};
|
||||
locations.0.insert(tenant_shard_id, location);
|
||||
targets.push((tenant_shard_id, node.clone(), generation));
|
||||
}
|
||||
|
||||
locations
|
||||
targets
|
||||
};
|
||||
|
||||
let result = op(mutation_locations.clone()).await;
|
||||
let targets = target_gens.iter().map(|t| (t.0, t.1.clone())).collect();
|
||||
let result = op(targets).await;
|
||||
|
||||
// Post-check: are all the generations of all the shards the same as they were initially? This proves that
|
||||
// our remote operation executed on the latest generation and is therefore persistent.
|
||||
@@ -3535,10 +3422,9 @@ impl Service {
|
||||
}| (tenant_shard_id, generation),
|
||||
)
|
||||
.collect::<Vec<_>>()
|
||||
!= mutation_locations
|
||||
.0
|
||||
!= target_gens
|
||||
.into_iter()
|
||||
.map(|i| (i.0, Some(i.1.latest.generation)))
|
||||
.map(|i| (i.0, i.2))
|
||||
.collect::<Vec<_>>()
|
||||
{
|
||||
// We raced with something that incremented the generation, and therefore cannot be
|
||||
@@ -3568,14 +3454,12 @@ impl Service {
|
||||
.await;
|
||||
|
||||
self.tenant_remote_mutation(tenant_id, move |mut targets| async move {
|
||||
if targets.0.is_empty() {
|
||||
if targets.is_empty() {
|
||||
return Err(ApiError::NotFound(
|
||||
anyhow::anyhow!("Tenant not found").into(),
|
||||
));
|
||||
}
|
||||
|
||||
let (shard_zero_tid, shard_zero_locations) = targets.0.pop_first().expect("Must have at least one shard");
|
||||
assert!(shard_zero_tid.is_shard_zero());
|
||||
let shard_zero = targets.remove(0);
|
||||
|
||||
async fn delete_one(
|
||||
tenant_shard_id: TenantShardId,
|
||||
@@ -3598,9 +3482,8 @@ impl Service {
|
||||
})
|
||||
}
|
||||
|
||||
let locations = targets.0.iter().map(|t| (*t.0, t.1.latest.node.clone())).collect();
|
||||
let statuses = self
|
||||
.tenant_for_shards(locations, |tenant_shard_id: TenantShardId, node: Node| {
|
||||
.tenant_for_shards(targets, |tenant_shard_id: TenantShardId, node: Node| {
|
||||
Box::pin(delete_one(
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
@@ -3618,9 +3501,9 @@ impl Service {
|
||||
// Delete shard zero last: this is not strictly necessary, but since a caller's GET on a timeline will be routed
|
||||
// to shard zero, it gives a more obvious behavior that a GET returns 404 once the deletion is done.
|
||||
let shard_zero_status = delete_one(
|
||||
shard_zero_tid,
|
||||
shard_zero.0,
|
||||
timeline_id,
|
||||
shard_zero_locations.latest.node,
|
||||
shard_zero.1,
|
||||
self.config.jwt_token.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -17,7 +17,6 @@ use crate::{
|
||||
service::ReconcileResultRequest,
|
||||
};
|
||||
use futures::future::{self, Either};
|
||||
use itertools::Itertools;
|
||||
use pageserver_api::controller_api::{
|
||||
AvailabilityZone, NodeSchedulingPolicy, PlacementPolicy, ShardSchedulingPolicy,
|
||||
};
|
||||
@@ -1411,32 +1410,6 @@ impl TenantShard {
|
||||
pub(crate) fn set_preferred_az(&mut self, preferred_az_id: AvailabilityZone) {
|
||||
self.preferred_az_id = Some(preferred_az_id);
|
||||
}
|
||||
|
||||
/// Returns all the nodes to which this tenant shard is attached according to the
|
||||
/// observed state and the generations. Return vector is sorted from latest generation
|
||||
/// to earliest.
|
||||
pub(crate) fn attached_locations(&self) -> Vec<(NodeId, Generation)> {
|
||||
self.observed
|
||||
.locations
|
||||
.iter()
|
||||
.filter_map(|(node_id, observed)| {
|
||||
use LocationConfigMode::{AttachedMulti, AttachedSingle, AttachedStale};
|
||||
|
||||
let conf = observed.conf.as_ref()?;
|
||||
|
||||
match (conf.generation, conf.mode) {
|
||||
(Some(gen), AttachedMulti | AttachedSingle | AttachedStale) => {
|
||||
Some((*node_id, gen))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
})
|
||||
.sorted_by(|(_lhs_node_id, lhs_gen), (_rhs_node_id, rhs_gen)| {
|
||||
lhs_gen.cmp(rhs_gen).reverse()
|
||||
})
|
||||
.map(|(node_id, gen)| (node_id, Generation::new(gen)))
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -5,7 +5,6 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
aws-config.workspace = true
|
||||
aws-sdk-s3.workspace = true
|
||||
either.workspace = true
|
||||
anyhow.workspace = true
|
||||
@@ -32,6 +31,7 @@ storage_controller_client.workspace = true
|
||||
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
|
||||
chrono = { workspace = true, default-features = false, features = ["clock", "serde"] }
|
||||
reqwest = { workspace = true, default-features = false, features = ["rustls-tls", "json"] }
|
||||
aws-config = { workspace = true, default-features = false, features = ["rustls", "sso"] }
|
||||
|
||||
pageserver = { path = "../pageserver" }
|
||||
pageserver_api = { path = "../libs/pageserver_api" }
|
||||
|
||||
@@ -28,9 +28,8 @@ use pageserver::tenant::remote_timeline_client::{remote_tenant_path, remote_time
|
||||
use pageserver::tenant::TENANTS_SEGMENT_NAME;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use remote_storage::{
|
||||
DownloadOpts, GenericRemoteStorage, Listing, ListingMode, RemotePath, RemoteStorageConfig,
|
||||
RemoteStorageKind, S3Config, DEFAULT_MAX_KEYS_PER_LIST_RESPONSE,
|
||||
DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT,
|
||||
GenericRemoteStorage, Listing, ListingMode, RemotePath, RemoteStorageConfig, RemoteStorageKind,
|
||||
S3Config, DEFAULT_MAX_KEYS_PER_LIST_RESPONSE, DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT,
|
||||
};
|
||||
use reqwest::Url;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -489,10 +488,7 @@ async fn download_object_with_retries(
|
||||
let cancel = CancellationToken::new();
|
||||
for trial in 0..MAX_RETRIES {
|
||||
let mut buf = Vec::new();
|
||||
let download = match remote_client
|
||||
.download(key, &DownloadOpts::default(), &cancel)
|
||||
.await
|
||||
{
|
||||
let download = match remote_client.download(key, &cancel).await {
|
||||
Ok(response) => response,
|
||||
Err(e) => {
|
||||
error!("Failed to download object for key {key}: {e}");
|
||||
|
||||
@@ -1,662 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import tempfile
|
||||
import textwrap
|
||||
from itertools import chain, product
|
||||
from pathlib import Path
|
||||
from typing import (
|
||||
Any,
|
||||
Dict,
|
||||
List,
|
||||
Optional,
|
||||
Tuple,
|
||||
TypeVar,
|
||||
cast,
|
||||
)
|
||||
|
||||
import toml
|
||||
|
||||
from fixtures.common_types import Lsn, TenantId, TimelineId
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.pageserver.common_types import IndexPartDump
|
||||
from fixtures.pg_version import PgVersion
|
||||
from fixtures.utils import AuxFileStore
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
class AbstractNeonCli(abc.ABC):
|
||||
"""
|
||||
A typed wrapper around an arbitrary Neon CLI tool.
|
||||
Supports a way to run arbitrary command directly via CLI.
|
||||
Do not use directly, use specific subclasses instead.
|
||||
"""
|
||||
|
||||
def __init__(self, extra_env: Optional[Dict[str, str]], binpath: Path):
|
||||
self.extra_env = extra_env
|
||||
self.binpath = binpath
|
||||
|
||||
COMMAND: str = cast(str, None) # To be overwritten by the derived class.
|
||||
|
||||
def raw_cli(
|
||||
self,
|
||||
arguments: List[str],
|
||||
extra_env_vars: Optional[Dict[str, str]] = None,
|
||||
check_return_code=True,
|
||||
timeout=None,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
"""
|
||||
Run the command with the specified arguments.
|
||||
|
||||
Arguments must be in list form, e.g. ['endpoint', 'create']
|
||||
|
||||
Return both stdout and stderr, which can be accessed as
|
||||
|
||||
>>> result = env.neon_cli.raw_cli(...)
|
||||
>>> assert result.stderr == ""
|
||||
>>> log.info(result.stdout)
|
||||
|
||||
If `check_return_code`, on non-zero exit code logs failure and raises.
|
||||
"""
|
||||
|
||||
assert isinstance(arguments, list)
|
||||
assert isinstance(self.COMMAND, str)
|
||||
|
||||
command_path = str(self.binpath / self.COMMAND)
|
||||
|
||||
args = [command_path] + arguments
|
||||
log.info('Running command "{}"'.format(" ".join(args)))
|
||||
|
||||
env_vars = os.environ.copy()
|
||||
|
||||
# extra env
|
||||
for extra_env_key, extra_env_value in (self.extra_env or {}).items():
|
||||
env_vars[extra_env_key] = extra_env_value
|
||||
for extra_env_key, extra_env_value in (extra_env_vars or {}).items():
|
||||
env_vars[extra_env_key] = extra_env_value
|
||||
|
||||
# Pass through coverage settings
|
||||
var = "LLVM_PROFILE_FILE"
|
||||
val = os.environ.get(var)
|
||||
if val:
|
||||
env_vars[var] = val
|
||||
|
||||
# Intercept CalledProcessError and print more info
|
||||
try:
|
||||
res = subprocess.run(
|
||||
args,
|
||||
env=env_vars,
|
||||
check=False,
|
||||
universal_newlines=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
timeout=timeout,
|
||||
)
|
||||
except subprocess.TimeoutExpired as e:
|
||||
if e.stderr:
|
||||
stderr = e.stderr.decode(errors="replace")
|
||||
else:
|
||||
stderr = ""
|
||||
|
||||
if e.stdout:
|
||||
stdout = e.stdout.decode(errors="replace")
|
||||
else:
|
||||
stdout = ""
|
||||
|
||||
log.warn(f"CLI timeout: stderr={stderr}, stdout={stdout}")
|
||||
raise
|
||||
|
||||
indent = " "
|
||||
if not res.returncode:
|
||||
stripped = res.stdout.strip()
|
||||
lines = stripped.splitlines()
|
||||
if len(lines) < 2:
|
||||
log.debug(f"Run {res.args} success: {stripped}")
|
||||
else:
|
||||
log.debug("Run %s success:\n%s" % (res.args, textwrap.indent(stripped, indent)))
|
||||
elif check_return_code:
|
||||
# this way command output will be in recorded and shown in CI in failure message
|
||||
indent = indent * 2
|
||||
msg = textwrap.dedent(
|
||||
"""\
|
||||
Run %s failed:
|
||||
stdout:
|
||||
%s
|
||||
stderr:
|
||||
%s
|
||||
"""
|
||||
)
|
||||
msg = msg % (
|
||||
res.args,
|
||||
textwrap.indent(res.stdout.strip(), indent),
|
||||
textwrap.indent(res.stderr.strip(), indent),
|
||||
)
|
||||
log.info(msg)
|
||||
raise RuntimeError(msg) from subprocess.CalledProcessError(
|
||||
res.returncode, res.args, res.stdout, res.stderr
|
||||
)
|
||||
return res
|
||||
|
||||
|
||||
class NeonLocalCli(AbstractNeonCli):
|
||||
"""A typed wrapper around the `neon_local` CLI tool.
|
||||
Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
|
||||
|
||||
Note: The methods in this class are supposed to be faithful wrappers of the underlying
|
||||
'neon_local' commands. If you're tempted to add any logic here, please consider putting it
|
||||
in the caller instead!
|
||||
|
||||
There are a few exceptions where these wrapper methods intentionally differ from the
|
||||
underlying commands, however:
|
||||
- Many 'neon_local' commands take an optional 'tenant_id' argument and use the default from
|
||||
the config file if it's omitted. The corresponding wrappers require an explicit 'tenant_id'
|
||||
argument. The idea is that we don't want to rely on the config file's default in tests,
|
||||
because NeonEnv has its own 'initial_tenant'. They are currently always the same, but we
|
||||
want to rely on the Neonenv's default instead of the config file default in tests.
|
||||
|
||||
- Similarly, --pg_version argument is always required in the wrappers, even when it's
|
||||
optional in the 'neon_local' command. The default in 'neon_local' is a specific
|
||||
hardcoded version, but in tests, we never want to accidentally rely on that;, we
|
||||
always want to use the version from the test fixtures.
|
||||
|
||||
- Wrappers for commands that create a new tenant or timeline ID require the new tenant
|
||||
or timeline ID to be passed by the caller, while the 'neon_local' commands will
|
||||
generate a random ID if it's not specified. This is because we don't want to have to
|
||||
parse the ID from the 'neon_local' output. Making it required ensures that the
|
||||
caller has to generate it.
|
||||
"""
|
||||
|
||||
COMMAND = "neon_local"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
extra_env: Optional[Dict[str, str]],
|
||||
binpath: Path,
|
||||
repo_dir: Path,
|
||||
pg_distrib_dir: Path,
|
||||
):
|
||||
if extra_env is None:
|
||||
env_vars = {}
|
||||
else:
|
||||
env_vars = extra_env.copy()
|
||||
env_vars["NEON_REPO_DIR"] = str(repo_dir)
|
||||
env_vars["POSTGRES_DISTRIB_DIR"] = str(pg_distrib_dir)
|
||||
|
||||
super().__init__(env_vars, binpath)
|
||||
|
||||
def raw_cli(self, *args, **kwargs) -> subprocess.CompletedProcess[str]:
|
||||
return super().raw_cli(*args, **kwargs)
|
||||
|
||||
def tenant_create(
|
||||
self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
pg_version: PgVersion,
|
||||
conf: Optional[Dict[str, Any]] = None,
|
||||
shard_count: Optional[int] = None,
|
||||
shard_stripe_size: Optional[int] = None,
|
||||
placement_policy: Optional[str] = None,
|
||||
set_default: bool = False,
|
||||
aux_file_policy: Optional[AuxFileStore] = None,
|
||||
):
|
||||
"""
|
||||
Creates a new tenant, returns its id and its initial timeline's id.
|
||||
"""
|
||||
args = [
|
||||
"tenant",
|
||||
"create",
|
||||
"--tenant-id",
|
||||
str(tenant_id),
|
||||
"--timeline-id",
|
||||
str(timeline_id),
|
||||
"--pg-version",
|
||||
pg_version,
|
||||
]
|
||||
if conf is not None:
|
||||
args.extend(
|
||||
chain.from_iterable(
|
||||
product(["-c"], (f"{key}:{value}" for key, value in conf.items()))
|
||||
)
|
||||
)
|
||||
|
||||
if aux_file_policy is AuxFileStore.V2:
|
||||
args.extend(["-c", "switch_aux_file_policy:v2"])
|
||||
elif aux_file_policy is AuxFileStore.V1:
|
||||
args.extend(["-c", "switch_aux_file_policy:v1"])
|
||||
elif aux_file_policy is AuxFileStore.CrossValidation:
|
||||
args.extend(["-c", "switch_aux_file_policy:cross-validation"])
|
||||
|
||||
if set_default:
|
||||
args.append("--set-default")
|
||||
|
||||
if shard_count is not None:
|
||||
args.extend(["--shard-count", str(shard_count)])
|
||||
|
||||
if shard_stripe_size is not None:
|
||||
args.extend(["--shard-stripe-size", str(shard_stripe_size)])
|
||||
|
||||
if placement_policy is not None:
|
||||
args.extend(["--placement-policy", str(placement_policy)])
|
||||
|
||||
res = self.raw_cli(args)
|
||||
res.check_returncode()
|
||||
|
||||
def tenant_import(self, tenant_id: TenantId):
|
||||
args = ["tenant", "import", "--tenant-id", str(tenant_id)]
|
||||
res = self.raw_cli(args)
|
||||
res.check_returncode()
|
||||
|
||||
def tenant_set_default(self, tenant_id: TenantId):
|
||||
"""
|
||||
Update default tenant for future operations that require tenant_id.
|
||||
"""
|
||||
res = self.raw_cli(["tenant", "set-default", "--tenant-id", str(tenant_id)])
|
||||
res.check_returncode()
|
||||
|
||||
def tenant_config(self, tenant_id: TenantId, conf: Dict[str, str]):
|
||||
"""
|
||||
Update tenant config.
|
||||
"""
|
||||
|
||||
args = ["tenant", "config", "--tenant-id", str(tenant_id)]
|
||||
if conf is not None:
|
||||
args.extend(
|
||||
chain.from_iterable(
|
||||
product(["-c"], (f"{key}:{value}" for key, value in conf.items()))
|
||||
)
|
||||
)
|
||||
|
||||
res = self.raw_cli(args)
|
||||
res.check_returncode()
|
||||
|
||||
def tenant_list(self) -> "subprocess.CompletedProcess[str]":
|
||||
res = self.raw_cli(["tenant", "list"])
|
||||
res.check_returncode()
|
||||
return res
|
||||
|
||||
def timeline_create(
|
||||
self,
|
||||
new_branch_name: str,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
pg_version: PgVersion,
|
||||
) -> TimelineId:
|
||||
if timeline_id is None:
|
||||
timeline_id = TimelineId.generate()
|
||||
|
||||
cmd = [
|
||||
"timeline",
|
||||
"create",
|
||||
"--branch-name",
|
||||
new_branch_name,
|
||||
"--tenant-id",
|
||||
str(tenant_id),
|
||||
"--timeline-id",
|
||||
str(timeline_id),
|
||||
"--pg-version",
|
||||
pg_version,
|
||||
]
|
||||
|
||||
res = self.raw_cli(cmd)
|
||||
res.check_returncode()
|
||||
|
||||
return timeline_id
|
||||
|
||||
def timeline_branch(
|
||||
self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
new_branch_name,
|
||||
ancestor_branch_name: Optional[str] = None,
|
||||
ancestor_start_lsn: Optional[Lsn] = None,
|
||||
):
|
||||
cmd = [
|
||||
"timeline",
|
||||
"branch",
|
||||
"--branch-name",
|
||||
new_branch_name,
|
||||
"--timeline-id",
|
||||
str(timeline_id),
|
||||
"--tenant-id",
|
||||
str(tenant_id),
|
||||
]
|
||||
if ancestor_branch_name is not None:
|
||||
cmd.extend(["--ancestor-branch-name", ancestor_branch_name])
|
||||
if ancestor_start_lsn is not None:
|
||||
cmd.extend(["--ancestor-start-lsn", str(ancestor_start_lsn)])
|
||||
|
||||
res = self.raw_cli(cmd)
|
||||
res.check_returncode()
|
||||
|
||||
def timeline_import(
|
||||
self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
new_branch_name: str,
|
||||
base_lsn: Lsn,
|
||||
base_tarfile: Path,
|
||||
pg_version: PgVersion,
|
||||
end_lsn: Optional[Lsn] = None,
|
||||
wal_tarfile: Optional[Path] = None,
|
||||
):
|
||||
cmd = [
|
||||
"timeline",
|
||||
"import",
|
||||
"--tenant-id",
|
||||
str(tenant_id),
|
||||
"--timeline-id",
|
||||
str(timeline_id),
|
||||
"--pg-version",
|
||||
pg_version,
|
||||
"--branch-name",
|
||||
new_branch_name,
|
||||
"--base-lsn",
|
||||
str(base_lsn),
|
||||
"--base-tarfile",
|
||||
str(base_tarfile),
|
||||
]
|
||||
if end_lsn is not None:
|
||||
cmd.extend(["--end-lsn", str(end_lsn)])
|
||||
if wal_tarfile is not None:
|
||||
cmd.extend(["--wal-tarfile", str(wal_tarfile)])
|
||||
|
||||
res = self.raw_cli(cmd)
|
||||
res.check_returncode()
|
||||
|
||||
def timeline_list(self, tenant_id: TenantId) -> List[Tuple[str, TimelineId]]:
|
||||
"""
|
||||
Returns a list of (branch_name, timeline_id) tuples out of parsed `neon timeline list` CLI output.
|
||||
"""
|
||||
|
||||
# main [b49f7954224a0ad25cc0013ea107b54b]
|
||||
# ┣━ @0/16B5A50: test_cli_branch_list_main [20f98c79111b9015d84452258b7d5540]
|
||||
TIMELINE_DATA_EXTRACTOR: re.Pattern = re.compile( # type: ignore[type-arg]
|
||||
r"\s?(?P<branch_name>[^\s]+)\s\[(?P<timeline_id>[^\]]+)\]", re.MULTILINE
|
||||
)
|
||||
res = self.raw_cli(["timeline", "list", "--tenant-id", str(tenant_id)])
|
||||
timelines_cli = sorted(
|
||||
map(
|
||||
lambda branch_and_id: (branch_and_id[0], TimelineId(branch_and_id[1])),
|
||||
TIMELINE_DATA_EXTRACTOR.findall(res.stdout),
|
||||
)
|
||||
)
|
||||
return timelines_cli
|
||||
|
||||
def init(
|
||||
self,
|
||||
init_config: Dict[str, Any],
|
||||
force: Optional[str] = None,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
with tempfile.NamedTemporaryFile(mode="w+") as init_config_tmpfile:
|
||||
init_config_tmpfile.write(toml.dumps(init_config))
|
||||
init_config_tmpfile.flush()
|
||||
|
||||
cmd = [
|
||||
"init",
|
||||
f"--config={init_config_tmpfile.name}",
|
||||
]
|
||||
|
||||
if force is not None:
|
||||
cmd.extend(["--force", force])
|
||||
|
||||
res = self.raw_cli(cmd)
|
||||
res.check_returncode()
|
||||
return res
|
||||
|
||||
def storage_controller_start(
|
||||
self,
|
||||
timeout_in_seconds: Optional[int] = None,
|
||||
instance_id: Optional[int] = None,
|
||||
base_port: Optional[int] = None,
|
||||
):
|
||||
cmd = ["storage_controller", "start"]
|
||||
if timeout_in_seconds is not None:
|
||||
cmd.append(f"--start-timeout={timeout_in_seconds}s")
|
||||
if instance_id is not None:
|
||||
cmd.append(f"--instance-id={instance_id}")
|
||||
if base_port is not None:
|
||||
cmd.append(f"--base-port={base_port}")
|
||||
return self.raw_cli(cmd)
|
||||
|
||||
def storage_controller_stop(self, immediate: bool, instance_id: Optional[int] = None):
|
||||
cmd = ["storage_controller", "stop"]
|
||||
if immediate:
|
||||
cmd.extend(["-m", "immediate"])
|
||||
if instance_id is not None:
|
||||
cmd.append(f"--instance-id={instance_id}")
|
||||
return self.raw_cli(cmd)
|
||||
|
||||
def pageserver_start(
|
||||
self,
|
||||
id: int,
|
||||
extra_env_vars: Optional[Dict[str, str]] = None,
|
||||
timeout_in_seconds: Optional[int] = None,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
start_args = ["pageserver", "start", f"--id={id}"]
|
||||
if timeout_in_seconds is not None:
|
||||
start_args.append(f"--start-timeout={timeout_in_seconds}s")
|
||||
return self.raw_cli(start_args, extra_env_vars=extra_env_vars)
|
||||
|
||||
def pageserver_stop(self, id: int, immediate=False) -> "subprocess.CompletedProcess[str]":
|
||||
cmd = ["pageserver", "stop", f"--id={id}"]
|
||||
if immediate:
|
||||
cmd.extend(["-m", "immediate"])
|
||||
|
||||
log.info(f"Stopping pageserver with {cmd}")
|
||||
return self.raw_cli(cmd)
|
||||
|
||||
def safekeeper_start(
|
||||
self,
|
||||
id: int,
|
||||
extra_opts: Optional[List[str]] = None,
|
||||
extra_env_vars: Optional[Dict[str, str]] = None,
|
||||
timeout_in_seconds: Optional[int] = None,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
if extra_opts is not None:
|
||||
extra_opts = [f"-e={opt}" for opt in extra_opts]
|
||||
else:
|
||||
extra_opts = []
|
||||
if timeout_in_seconds is not None:
|
||||
extra_opts.append(f"--start-timeout={timeout_in_seconds}s")
|
||||
return self.raw_cli(
|
||||
["safekeeper", "start", str(id), *extra_opts], extra_env_vars=extra_env_vars
|
||||
)
|
||||
|
||||
def safekeeper_stop(
|
||||
self, id: Optional[int] = None, immediate=False
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
args = ["safekeeper", "stop"]
|
||||
if id is not None:
|
||||
args.append(str(id))
|
||||
if immediate:
|
||||
args.extend(["-m", "immediate"])
|
||||
return self.raw_cli(args)
|
||||
|
||||
def storage_broker_start(
|
||||
self, timeout_in_seconds: Optional[int] = None
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
cmd = ["storage_broker", "start"]
|
||||
if timeout_in_seconds is not None:
|
||||
cmd.append(f"--start-timeout={timeout_in_seconds}s")
|
||||
return self.raw_cli(cmd)
|
||||
|
||||
def storage_broker_stop(self) -> "subprocess.CompletedProcess[str]":
|
||||
cmd = ["storage_broker", "stop"]
|
||||
return self.raw_cli(cmd)
|
||||
|
||||
def endpoint_create(
|
||||
self,
|
||||
branch_name: str,
|
||||
pg_port: int,
|
||||
http_port: int,
|
||||
tenant_id: TenantId,
|
||||
pg_version: PgVersion,
|
||||
endpoint_id: Optional[str] = None,
|
||||
hot_standby: bool = False,
|
||||
lsn: Optional[Lsn] = None,
|
||||
pageserver_id: Optional[int] = None,
|
||||
allow_multiple=False,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
args = [
|
||||
"endpoint",
|
||||
"create",
|
||||
"--tenant-id",
|
||||
str(tenant_id),
|
||||
"--branch-name",
|
||||
branch_name,
|
||||
"--pg-version",
|
||||
pg_version,
|
||||
]
|
||||
if lsn is not None:
|
||||
args.extend(["--lsn", str(lsn)])
|
||||
if pg_port is not None:
|
||||
args.extend(["--pg-port", str(pg_port)])
|
||||
if http_port is not None:
|
||||
args.extend(["--http-port", str(http_port)])
|
||||
if endpoint_id is not None:
|
||||
args.append(endpoint_id)
|
||||
if hot_standby:
|
||||
args.extend(["--hot-standby", "true"])
|
||||
if pageserver_id is not None:
|
||||
args.extend(["--pageserver-id", str(pageserver_id)])
|
||||
if allow_multiple:
|
||||
args.extend(["--allow-multiple"])
|
||||
|
||||
res = self.raw_cli(args)
|
||||
res.check_returncode()
|
||||
return res
|
||||
|
||||
def endpoint_start(
|
||||
self,
|
||||
endpoint_id: str,
|
||||
safekeepers: Optional[List[int]] = None,
|
||||
remote_ext_config: Optional[str] = None,
|
||||
pageserver_id: Optional[int] = None,
|
||||
allow_multiple=False,
|
||||
basebackup_request_tries: Optional[int] = None,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
args = [
|
||||
"endpoint",
|
||||
"start",
|
||||
]
|
||||
extra_env_vars = {}
|
||||
if basebackup_request_tries is not None:
|
||||
extra_env_vars["NEON_COMPUTE_TESTING_BASEBACKUP_TRIES"] = str(basebackup_request_tries)
|
||||
if remote_ext_config is not None:
|
||||
args.extend(["--remote-ext-config", remote_ext_config])
|
||||
|
||||
if safekeepers is not None:
|
||||
args.extend(["--safekeepers", (",".join(map(str, safekeepers)))])
|
||||
if endpoint_id is not None:
|
||||
args.append(endpoint_id)
|
||||
if pageserver_id is not None:
|
||||
args.extend(["--pageserver-id", str(pageserver_id)])
|
||||
if allow_multiple:
|
||||
args.extend(["--allow-multiple"])
|
||||
|
||||
res = self.raw_cli(args, extra_env_vars)
|
||||
res.check_returncode()
|
||||
return res
|
||||
|
||||
def endpoint_reconfigure(
|
||||
self,
|
||||
endpoint_id: str,
|
||||
tenant_id: Optional[TenantId] = None,
|
||||
pageserver_id: Optional[int] = None,
|
||||
safekeepers: Optional[List[int]] = None,
|
||||
check_return_code=True,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
args = ["endpoint", "reconfigure", endpoint_id]
|
||||
if tenant_id is not None:
|
||||
args.extend(["--tenant-id", str(tenant_id)])
|
||||
if pageserver_id is not None:
|
||||
args.extend(["--pageserver-id", str(pageserver_id)])
|
||||
if safekeepers is not None:
|
||||
args.extend(["--safekeepers", (",".join(map(str, safekeepers)))])
|
||||
return self.raw_cli(args, check_return_code=check_return_code)
|
||||
|
||||
def endpoint_stop(
|
||||
self,
|
||||
endpoint_id: str,
|
||||
destroy=False,
|
||||
check_return_code=True,
|
||||
mode: Optional[str] = None,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
args = [
|
||||
"endpoint",
|
||||
"stop",
|
||||
]
|
||||
if destroy:
|
||||
args.append("--destroy")
|
||||
if mode is not None:
|
||||
args.append(f"--mode={mode}")
|
||||
if endpoint_id is not None:
|
||||
args.append(endpoint_id)
|
||||
|
||||
return self.raw_cli(args, check_return_code=check_return_code)
|
||||
|
||||
def mappings_map_branch(
|
||||
self, name: str, tenant_id: TenantId, timeline_id: TimelineId
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
"""
|
||||
Map tenant id and timeline id to a neon_local branch name. They do not have to exist.
|
||||
Usually needed when creating branches via PageserverHttpClient and not neon_local.
|
||||
|
||||
After creating a name mapping, you can use EndpointFactory.create_start
|
||||
with this registered branch name.
|
||||
"""
|
||||
args = [
|
||||
"mappings",
|
||||
"map",
|
||||
"--branch-name",
|
||||
name,
|
||||
"--tenant-id",
|
||||
str(tenant_id),
|
||||
"--timeline-id",
|
||||
str(timeline_id),
|
||||
]
|
||||
|
||||
return self.raw_cli(args, check_return_code=True)
|
||||
|
||||
def start(self, check_return_code=True) -> "subprocess.CompletedProcess[str]":
|
||||
return self.raw_cli(["start"], check_return_code=check_return_code)
|
||||
|
||||
def stop(self, check_return_code=True) -> "subprocess.CompletedProcess[str]":
|
||||
return self.raw_cli(["stop"], check_return_code=check_return_code)
|
||||
|
||||
|
||||
class WalCraft(AbstractNeonCli):
|
||||
"""
|
||||
A typed wrapper around the `wal_craft` CLI tool.
|
||||
Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
|
||||
"""
|
||||
|
||||
COMMAND = "wal_craft"
|
||||
|
||||
def postgres_config(self) -> List[str]:
|
||||
res = self.raw_cli(["print-postgres-config"])
|
||||
res.check_returncode()
|
||||
return res.stdout.split("\n")
|
||||
|
||||
def in_existing(self, type: str, connection: str) -> None:
|
||||
res = self.raw_cli(["in-existing", type, connection])
|
||||
res.check_returncode()
|
||||
|
||||
|
||||
class Pagectl(AbstractNeonCli):
|
||||
"""
|
||||
A typed wrapper around the `pagectl` utility CLI tool.
|
||||
"""
|
||||
|
||||
COMMAND = "pagectl"
|
||||
|
||||
def dump_index_part(self, path: Path) -> IndexPartDump:
|
||||
res = self.raw_cli(["index-part", "dump", str(path)])
|
||||
res.check_returncode()
|
||||
parsed = json.loads(res.stdout)
|
||||
return IndexPartDump.from_json(parsed)
|
||||
@@ -9,6 +9,8 @@ import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import textwrap
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
@@ -19,6 +21,7 @@ from datetime import datetime
|
||||
from enum import Enum
|
||||
from fcntl import LOCK_EX, LOCK_UN, flock
|
||||
from functools import cached_property
|
||||
from itertools import chain, product
|
||||
from pathlib import Path
|
||||
from types import TracebackType
|
||||
from typing import (
|
||||
@@ -61,12 +64,11 @@ from fixtures.common_types import Lsn, NodeId, TenantId, TenantShardId, Timeline
|
||||
from fixtures.endpoint.http import EndpointHttpClient
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.metrics import Metrics, MetricsGetter, parse_metrics
|
||||
from fixtures.neon_cli import NeonLocalCli, Pagectl
|
||||
from fixtures.pageserver.allowed_errors import (
|
||||
DEFAULT_PAGESERVER_ALLOWED_ERRORS,
|
||||
DEFAULT_STORAGE_CONTROLLER_ALLOWED_ERRORS,
|
||||
)
|
||||
from fixtures.pageserver.common_types import LayerName, parse_layer_file_name
|
||||
from fixtures.pageserver.common_types import IndexPartDump, LayerName, parse_layer_file_name
|
||||
from fixtures.pageserver.http import PageserverHttpClient
|
||||
from fixtures.pageserver.utils import (
|
||||
wait_for_last_record_lsn,
|
||||
@@ -489,7 +491,7 @@ class NeonEnvBuilder:
|
||||
log.debug(
|
||||
f"Services started, creating initial tenant {env.initial_tenant} and its initial timeline"
|
||||
)
|
||||
initial_tenant, initial_timeline = env.create_tenant(
|
||||
initial_tenant, initial_timeline = env.neon_cli.create_tenant(
|
||||
tenant_id=env.initial_tenant,
|
||||
conf=initial_tenant_conf,
|
||||
timeline_id=env.initial_timeline,
|
||||
@@ -950,16 +952,10 @@ class NeonEnv:
|
||||
|
||||
initial_tenant - tenant ID of the initial tenant created in the repository
|
||||
|
||||
neon_cli - can be used to run the 'neon_local' CLI tool
|
||||
neon_cli - can be used to run the 'neon' CLI tool
|
||||
|
||||
create_tenant() - initializes a new tenant and an initial empty timeline on it,
|
||||
returns the tenant and timeline id
|
||||
|
||||
create_branch() - branch a new timeline from an existing one, returns
|
||||
the new timeline id
|
||||
|
||||
create_timeline() - initializes a new timeline by running initdb, returns
|
||||
the new timeline id
|
||||
create_tenant() - initializes a new tenant in the page server, returns
|
||||
the tenant id
|
||||
"""
|
||||
|
||||
BASE_PAGESERVER_ID = 1
|
||||
@@ -970,6 +966,8 @@ class NeonEnv:
|
||||
self.rust_log_override = config.rust_log_override
|
||||
self.port_distributor = config.port_distributor
|
||||
self.s3_mock_server = config.mock_s3_server
|
||||
self.neon_cli = NeonCli(env=self)
|
||||
self.pagectl = Pagectl(env=self)
|
||||
self.endpoints = EndpointFactory(self)
|
||||
self.safekeepers: List[Safekeeper] = []
|
||||
self.pageservers: List[NeonPageserver] = []
|
||||
@@ -989,21 +987,6 @@ class NeonEnv:
|
||||
self.initial_tenant = config.initial_tenant
|
||||
self.initial_timeline = config.initial_timeline
|
||||
|
||||
neon_local_env_vars = {}
|
||||
if self.rust_log_override is not None:
|
||||
neon_local_env_vars["RUST_LOG"] = self.rust_log_override
|
||||
self.neon_cli = NeonLocalCli(
|
||||
extra_env=neon_local_env_vars,
|
||||
binpath=self.neon_local_binpath,
|
||||
repo_dir=self.repo_dir,
|
||||
pg_distrib_dir=self.pg_distrib_dir,
|
||||
)
|
||||
|
||||
pagectl_env_vars = {}
|
||||
if self.rust_log_override is not None:
|
||||
pagectl_env_vars["RUST_LOG"] = self.rust_log_override
|
||||
self.pagectl = Pagectl(extra_env=pagectl_env_vars, binpath=self.neon_binpath)
|
||||
|
||||
# The URL for the pageserver to use as its control_plane_api config
|
||||
if config.storage_controller_port_override is not None:
|
||||
log.info(
|
||||
@@ -1327,74 +1310,6 @@ class NeonEnv:
|
||||
self.endpoint_counter += 1
|
||||
return "ep-" + str(self.endpoint_counter)
|
||||
|
||||
def create_tenant(
|
||||
self,
|
||||
tenant_id: Optional[TenantId] = None,
|
||||
timeline_id: Optional[TimelineId] = None,
|
||||
conf: Optional[Dict[str, Any]] = None,
|
||||
shard_count: Optional[int] = None,
|
||||
shard_stripe_size: Optional[int] = None,
|
||||
placement_policy: Optional[str] = None,
|
||||
set_default: bool = False,
|
||||
aux_file_policy: Optional[AuxFileStore] = None,
|
||||
) -> Tuple[TenantId, TimelineId]:
|
||||
"""
|
||||
Creates a new tenant, returns its id and its initial timeline's id.
|
||||
"""
|
||||
tenant_id = tenant_id or TenantId.generate()
|
||||
timeline_id = timeline_id or TimelineId.generate()
|
||||
|
||||
self.neon_cli.tenant_create(
|
||||
tenant_id=tenant_id,
|
||||
timeline_id=timeline_id,
|
||||
pg_version=self.pg_version,
|
||||
conf=conf,
|
||||
shard_count=shard_count,
|
||||
shard_stripe_size=shard_stripe_size,
|
||||
placement_policy=placement_policy,
|
||||
set_default=set_default,
|
||||
aux_file_policy=aux_file_policy,
|
||||
)
|
||||
|
||||
return tenant_id, timeline_id
|
||||
|
||||
def config_tenant(self, tenant_id: Optional[TenantId], conf: Dict[str, str]):
|
||||
"""
|
||||
Update tenant config.
|
||||
"""
|
||||
tenant_id = tenant_id or self.initial_tenant
|
||||
self.neon_cli.tenant_config(tenant_id, conf)
|
||||
|
||||
def create_branch(
|
||||
self,
|
||||
new_branch_name: str = DEFAULT_BRANCH_NAME,
|
||||
tenant_id: Optional[TenantId] = None,
|
||||
ancestor_branch_name: Optional[str] = None,
|
||||
ancestor_start_lsn: Optional[Lsn] = None,
|
||||
new_timeline_id: Optional[TimelineId] = None,
|
||||
) -> TimelineId:
|
||||
new_timeline_id = new_timeline_id or TimelineId.generate()
|
||||
tenant_id = tenant_id or self.initial_tenant
|
||||
|
||||
self.neon_cli.timeline_branch(
|
||||
tenant_id, new_timeline_id, new_branch_name, ancestor_branch_name, ancestor_start_lsn
|
||||
)
|
||||
|
||||
return new_timeline_id
|
||||
|
||||
def create_timeline(
|
||||
self,
|
||||
new_branch_name: str,
|
||||
tenant_id: Optional[TenantId] = None,
|
||||
timeline_id: Optional[TimelineId] = None,
|
||||
) -> TimelineId:
|
||||
timeline_id = timeline_id or TimelineId.generate()
|
||||
tenant_id = tenant_id or self.initial_tenant
|
||||
|
||||
self.neon_cli.timeline_create(new_branch_name, tenant_id, timeline_id, self.pg_version)
|
||||
|
||||
return timeline_id
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def neon_simple_env(
|
||||
@@ -1510,6 +1425,597 @@ class PageserverPort:
|
||||
http: int
|
||||
|
||||
|
||||
class AbstractNeonCli(abc.ABC):
|
||||
"""
|
||||
A typed wrapper around an arbitrary Neon CLI tool.
|
||||
Supports a way to run arbitrary command directly via CLI.
|
||||
Do not use directly, use specific subclasses instead.
|
||||
"""
|
||||
|
||||
def __init__(self, env: NeonEnv):
|
||||
self.env = env
|
||||
|
||||
COMMAND: str = cast(str, None) # To be overwritten by the derived class.
|
||||
|
||||
def raw_cli(
|
||||
self,
|
||||
arguments: List[str],
|
||||
extra_env_vars: Optional[Dict[str, str]] = None,
|
||||
check_return_code=True,
|
||||
timeout=None,
|
||||
local_binpath=False,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
"""
|
||||
Run the command with the specified arguments.
|
||||
|
||||
Arguments must be in list form, e.g. ['pg', 'create']
|
||||
|
||||
Return both stdout and stderr, which can be accessed as
|
||||
|
||||
>>> result = env.neon_cli.raw_cli(...)
|
||||
>>> assert result.stderr == ""
|
||||
>>> log.info(result.stdout)
|
||||
|
||||
If `check_return_code`, on non-zero exit code logs failure and raises.
|
||||
|
||||
If `local_binpath` is true, then we are invoking a test utility
|
||||
"""
|
||||
|
||||
assert isinstance(arguments, list)
|
||||
assert isinstance(self.COMMAND, str)
|
||||
|
||||
if local_binpath:
|
||||
# Test utility
|
||||
bin_neon = str(self.env.neon_local_binpath / self.COMMAND)
|
||||
else:
|
||||
# Normal binary
|
||||
bin_neon = str(self.env.neon_binpath / self.COMMAND)
|
||||
|
||||
args = [bin_neon] + arguments
|
||||
log.info('Running command "{}"'.format(" ".join(args)))
|
||||
|
||||
env_vars = os.environ.copy()
|
||||
env_vars["NEON_REPO_DIR"] = str(self.env.repo_dir)
|
||||
env_vars["POSTGRES_DISTRIB_DIR"] = str(self.env.pg_distrib_dir)
|
||||
if self.env.rust_log_override is not None:
|
||||
env_vars["RUST_LOG"] = self.env.rust_log_override
|
||||
for extra_env_key, extra_env_value in (extra_env_vars or {}).items():
|
||||
env_vars[extra_env_key] = extra_env_value
|
||||
|
||||
# Pass coverage settings
|
||||
var = "LLVM_PROFILE_FILE"
|
||||
val = os.environ.get(var)
|
||||
if val:
|
||||
env_vars[var] = val
|
||||
|
||||
# Intercept CalledProcessError and print more info
|
||||
try:
|
||||
res = subprocess.run(
|
||||
args,
|
||||
env=env_vars,
|
||||
check=False,
|
||||
universal_newlines=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
timeout=timeout,
|
||||
)
|
||||
except subprocess.TimeoutExpired as e:
|
||||
if e.stderr:
|
||||
stderr = e.stderr.decode(errors="replace")
|
||||
else:
|
||||
stderr = ""
|
||||
|
||||
if e.stdout:
|
||||
stdout = e.stdout.decode(errors="replace")
|
||||
else:
|
||||
stdout = ""
|
||||
|
||||
log.warn(f"CLI timeout: stderr={stderr}, stdout={stdout}")
|
||||
raise
|
||||
|
||||
indent = " "
|
||||
if not res.returncode:
|
||||
stripped = res.stdout.strip()
|
||||
lines = stripped.splitlines()
|
||||
if len(lines) < 2:
|
||||
log.debug(f"Run {res.args} success: {stripped}")
|
||||
else:
|
||||
log.debug("Run %s success:\n%s" % (res.args, textwrap.indent(stripped, indent)))
|
||||
elif check_return_code:
|
||||
# this way command output will be in recorded and shown in CI in failure message
|
||||
indent = indent * 2
|
||||
msg = textwrap.dedent(
|
||||
"""\
|
||||
Run %s failed:
|
||||
stdout:
|
||||
%s
|
||||
stderr:
|
||||
%s
|
||||
"""
|
||||
)
|
||||
msg = msg % (
|
||||
res.args,
|
||||
textwrap.indent(res.stdout.strip(), indent),
|
||||
textwrap.indent(res.stderr.strip(), indent),
|
||||
)
|
||||
log.info(msg)
|
||||
raise RuntimeError(msg) from subprocess.CalledProcessError(
|
||||
res.returncode, res.args, res.stdout, res.stderr
|
||||
)
|
||||
return res
|
||||
|
||||
|
||||
class NeonCli(AbstractNeonCli):
|
||||
"""
|
||||
A typed wrapper around the `neon` CLI tool.
|
||||
Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
|
||||
"""
|
||||
|
||||
COMMAND = "neon_local"
|
||||
|
||||
def raw_cli(self, *args, **kwargs) -> subprocess.CompletedProcess[str]:
|
||||
kwargs["local_binpath"] = True
|
||||
return super().raw_cli(*args, **kwargs)
|
||||
|
||||
def create_tenant(
|
||||
self,
|
||||
tenant_id: Optional[TenantId] = None,
|
||||
timeline_id: Optional[TimelineId] = None,
|
||||
conf: Optional[Dict[str, Any]] = None,
|
||||
shard_count: Optional[int] = None,
|
||||
shard_stripe_size: Optional[int] = None,
|
||||
placement_policy: Optional[str] = None,
|
||||
set_default: bool = False,
|
||||
aux_file_policy: Optional[AuxFileStore] = None,
|
||||
) -> Tuple[TenantId, TimelineId]:
|
||||
"""
|
||||
Creates a new tenant, returns its id and its initial timeline's id.
|
||||
"""
|
||||
tenant_id = tenant_id or TenantId.generate()
|
||||
timeline_id = timeline_id or TimelineId.generate()
|
||||
|
||||
args = [
|
||||
"tenant",
|
||||
"create",
|
||||
"--tenant-id",
|
||||
str(tenant_id),
|
||||
"--timeline-id",
|
||||
str(timeline_id),
|
||||
"--pg-version",
|
||||
self.env.pg_version,
|
||||
]
|
||||
if conf is not None:
|
||||
args.extend(
|
||||
chain.from_iterable(
|
||||
product(["-c"], (f"{key}:{value}" for key, value in conf.items()))
|
||||
)
|
||||
)
|
||||
|
||||
if aux_file_policy is AuxFileStore.V2:
|
||||
args.extend(["-c", "switch_aux_file_policy:v2"])
|
||||
elif aux_file_policy is AuxFileStore.V1:
|
||||
args.extend(["-c", "switch_aux_file_policy:v1"])
|
||||
elif aux_file_policy is AuxFileStore.CrossValidation:
|
||||
args.extend(["-c", "switch_aux_file_policy:cross-validation"])
|
||||
|
||||
if set_default:
|
||||
args.append("--set-default")
|
||||
|
||||
if shard_count is not None:
|
||||
args.extend(["--shard-count", str(shard_count)])
|
||||
|
||||
if shard_stripe_size is not None:
|
||||
args.extend(["--shard-stripe-size", str(shard_stripe_size)])
|
||||
|
||||
if placement_policy is not None:
|
||||
args.extend(["--placement-policy", str(placement_policy)])
|
||||
|
||||
res = self.raw_cli(args)
|
||||
res.check_returncode()
|
||||
return tenant_id, timeline_id
|
||||
|
||||
def import_tenant(self, tenant_id: TenantId):
|
||||
args = ["tenant", "import", "--tenant-id", str(tenant_id)]
|
||||
res = self.raw_cli(args)
|
||||
res.check_returncode()
|
||||
|
||||
def set_default(self, tenant_id: TenantId):
|
||||
"""
|
||||
Update default tenant for future operations that require tenant_id.
|
||||
"""
|
||||
res = self.raw_cli(["tenant", "set-default", "--tenant-id", str(tenant_id)])
|
||||
res.check_returncode()
|
||||
|
||||
def config_tenant(self, tenant_id: TenantId, conf: Dict[str, str]):
|
||||
"""
|
||||
Update tenant config.
|
||||
"""
|
||||
|
||||
args = ["tenant", "config", "--tenant-id", str(tenant_id)]
|
||||
if conf is not None:
|
||||
args.extend(
|
||||
chain.from_iterable(
|
||||
product(["-c"], (f"{key}:{value}" for key, value in conf.items()))
|
||||
)
|
||||
)
|
||||
|
||||
res = self.raw_cli(args)
|
||||
res.check_returncode()
|
||||
|
||||
def list_tenants(self) -> "subprocess.CompletedProcess[str]":
|
||||
res = self.raw_cli(["tenant", "list"])
|
||||
res.check_returncode()
|
||||
return res
|
||||
|
||||
def create_timeline(
|
||||
self,
|
||||
new_branch_name: str,
|
||||
tenant_id: Optional[TenantId] = None,
|
||||
timeline_id: Optional[TimelineId] = None,
|
||||
) -> TimelineId:
|
||||
if timeline_id is None:
|
||||
timeline_id = TimelineId.generate()
|
||||
|
||||
cmd = [
|
||||
"timeline",
|
||||
"create",
|
||||
"--branch-name",
|
||||
new_branch_name,
|
||||
"--tenant-id",
|
||||
str(tenant_id or self.env.initial_tenant),
|
||||
"--timeline-id",
|
||||
str(timeline_id),
|
||||
"--pg-version",
|
||||
self.env.pg_version,
|
||||
]
|
||||
|
||||
res = self.raw_cli(cmd)
|
||||
res.check_returncode()
|
||||
|
||||
return timeline_id
|
||||
|
||||
def create_branch(
|
||||
self,
|
||||
new_branch_name: str = DEFAULT_BRANCH_NAME,
|
||||
ancestor_branch_name: Optional[str] = None,
|
||||
tenant_id: Optional[TenantId] = None,
|
||||
ancestor_start_lsn: Optional[Lsn] = None,
|
||||
new_timeline_id: Optional[TimelineId] = None,
|
||||
) -> TimelineId:
|
||||
if new_timeline_id is None:
|
||||
new_timeline_id = TimelineId.generate()
|
||||
cmd = [
|
||||
"timeline",
|
||||
"branch",
|
||||
"--branch-name",
|
||||
new_branch_name,
|
||||
"--timeline-id",
|
||||
str(new_timeline_id),
|
||||
"--tenant-id",
|
||||
str(tenant_id or self.env.initial_tenant),
|
||||
]
|
||||
if ancestor_branch_name is not None:
|
||||
cmd.extend(["--ancestor-branch-name", ancestor_branch_name])
|
||||
if ancestor_start_lsn is not None:
|
||||
cmd.extend(["--ancestor-start-lsn", str(ancestor_start_lsn)])
|
||||
|
||||
res = self.raw_cli(cmd)
|
||||
res.check_returncode()
|
||||
|
||||
return TimelineId(str(new_timeline_id))
|
||||
|
||||
def list_timelines(self, tenant_id: Optional[TenantId] = None) -> List[Tuple[str, TimelineId]]:
|
||||
"""
|
||||
Returns a list of (branch_name, timeline_id) tuples out of parsed `neon timeline list` CLI output.
|
||||
"""
|
||||
|
||||
# main [b49f7954224a0ad25cc0013ea107b54b]
|
||||
# ┣━ @0/16B5A50: test_cli_branch_list_main [20f98c79111b9015d84452258b7d5540]
|
||||
TIMELINE_DATA_EXTRACTOR: re.Pattern = re.compile( # type: ignore[type-arg]
|
||||
r"\s?(?P<branch_name>[^\s]+)\s\[(?P<timeline_id>[^\]]+)\]", re.MULTILINE
|
||||
)
|
||||
res = self.raw_cli(
|
||||
["timeline", "list", "--tenant-id", str(tenant_id or self.env.initial_tenant)]
|
||||
)
|
||||
timelines_cli = sorted(
|
||||
map(
|
||||
lambda branch_and_id: (branch_and_id[0], TimelineId(branch_and_id[1])),
|
||||
TIMELINE_DATA_EXTRACTOR.findall(res.stdout),
|
||||
)
|
||||
)
|
||||
return timelines_cli
|
||||
|
||||
def init(
|
||||
self,
|
||||
init_config: Dict[str, Any],
|
||||
force: Optional[str] = None,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
with tempfile.NamedTemporaryFile(mode="w+") as init_config_tmpfile:
|
||||
init_config_tmpfile.write(toml.dumps(init_config))
|
||||
init_config_tmpfile.flush()
|
||||
|
||||
cmd = [
|
||||
"init",
|
||||
f"--config={init_config_tmpfile.name}",
|
||||
]
|
||||
|
||||
if force is not None:
|
||||
cmd.extend(["--force", force])
|
||||
|
||||
res = self.raw_cli(cmd)
|
||||
res.check_returncode()
|
||||
return res
|
||||
|
||||
def storage_controller_start(
|
||||
self,
|
||||
timeout_in_seconds: Optional[int] = None,
|
||||
instance_id: Optional[int] = None,
|
||||
base_port: Optional[int] = None,
|
||||
):
|
||||
cmd = ["storage_controller", "start"]
|
||||
if timeout_in_seconds is not None:
|
||||
cmd.append(f"--start-timeout={timeout_in_seconds}s")
|
||||
if instance_id is not None:
|
||||
cmd.append(f"--instance-id={instance_id}")
|
||||
if base_port is not None:
|
||||
cmd.append(f"--base-port={base_port}")
|
||||
return self.raw_cli(cmd)
|
||||
|
||||
def storage_controller_stop(self, immediate: bool, instance_id: Optional[int] = None):
|
||||
cmd = ["storage_controller", "stop"]
|
||||
if immediate:
|
||||
cmd.extend(["-m", "immediate"])
|
||||
if instance_id is not None:
|
||||
cmd.append(f"--instance-id={instance_id}")
|
||||
return self.raw_cli(cmd)
|
||||
|
||||
def pageserver_start(
|
||||
self,
|
||||
id: int,
|
||||
extra_env_vars: Optional[Dict[str, str]] = None,
|
||||
timeout_in_seconds: Optional[int] = None,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
start_args = ["pageserver", "start", f"--id={id}"]
|
||||
if timeout_in_seconds is not None:
|
||||
start_args.append(f"--start-timeout={timeout_in_seconds}s")
|
||||
storage = self.env.pageserver_remote_storage
|
||||
|
||||
if isinstance(storage, S3Storage):
|
||||
s3_env_vars = storage.access_env_vars()
|
||||
extra_env_vars = (extra_env_vars or {}) | s3_env_vars
|
||||
|
||||
return self.raw_cli(start_args, extra_env_vars=extra_env_vars)
|
||||
|
||||
def pageserver_stop(self, id: int, immediate=False) -> "subprocess.CompletedProcess[str]":
|
||||
cmd = ["pageserver", "stop", f"--id={id}"]
|
||||
if immediate:
|
||||
cmd.extend(["-m", "immediate"])
|
||||
|
||||
log.info(f"Stopping pageserver with {cmd}")
|
||||
return self.raw_cli(cmd)
|
||||
|
||||
def safekeeper_start(
|
||||
self,
|
||||
id: int,
|
||||
extra_opts: Optional[List[str]] = None,
|
||||
timeout_in_seconds: Optional[int] = None,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
s3_env_vars = None
|
||||
if isinstance(self.env.safekeepers_remote_storage, S3Storage):
|
||||
s3_env_vars = self.env.safekeepers_remote_storage.access_env_vars()
|
||||
|
||||
if extra_opts is not None:
|
||||
extra_opts = [f"-e={opt}" for opt in extra_opts]
|
||||
else:
|
||||
extra_opts = []
|
||||
if timeout_in_seconds is not None:
|
||||
extra_opts.append(f"--start-timeout={timeout_in_seconds}s")
|
||||
return self.raw_cli(
|
||||
["safekeeper", "start", str(id), *extra_opts], extra_env_vars=s3_env_vars
|
||||
)
|
||||
|
||||
def safekeeper_stop(
|
||||
self, id: Optional[int] = None, immediate=False
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
args = ["safekeeper", "stop"]
|
||||
if id is not None:
|
||||
args.append(str(id))
|
||||
if immediate:
|
||||
args.extend(["-m", "immediate"])
|
||||
return self.raw_cli(args)
|
||||
|
||||
def broker_start(
|
||||
self, timeout_in_seconds: Optional[int] = None
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
cmd = ["storage_broker", "start"]
|
||||
if timeout_in_seconds is not None:
|
||||
cmd.append(f"--start-timeout={timeout_in_seconds}s")
|
||||
return self.raw_cli(cmd)
|
||||
|
||||
def broker_stop(self) -> "subprocess.CompletedProcess[str]":
|
||||
cmd = ["storage_broker", "stop"]
|
||||
return self.raw_cli(cmd)
|
||||
|
||||
def endpoint_create(
|
||||
self,
|
||||
branch_name: str,
|
||||
pg_port: int,
|
||||
http_port: int,
|
||||
endpoint_id: Optional[str] = None,
|
||||
tenant_id: Optional[TenantId] = None,
|
||||
hot_standby: bool = False,
|
||||
lsn: Optional[Lsn] = None,
|
||||
pageserver_id: Optional[int] = None,
|
||||
allow_multiple=False,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
args = [
|
||||
"endpoint",
|
||||
"create",
|
||||
"--tenant-id",
|
||||
str(tenant_id or self.env.initial_tenant),
|
||||
"--branch-name",
|
||||
branch_name,
|
||||
"--pg-version",
|
||||
self.env.pg_version,
|
||||
]
|
||||
if lsn is not None:
|
||||
args.extend(["--lsn", str(lsn)])
|
||||
if pg_port is not None:
|
||||
args.extend(["--pg-port", str(pg_port)])
|
||||
if http_port is not None:
|
||||
args.extend(["--http-port", str(http_port)])
|
||||
if endpoint_id is not None:
|
||||
args.append(endpoint_id)
|
||||
if hot_standby:
|
||||
args.extend(["--hot-standby", "true"])
|
||||
if pageserver_id is not None:
|
||||
args.extend(["--pageserver-id", str(pageserver_id)])
|
||||
if allow_multiple:
|
||||
args.extend(["--allow-multiple"])
|
||||
|
||||
res = self.raw_cli(args)
|
||||
res.check_returncode()
|
||||
return res
|
||||
|
||||
def endpoint_start(
|
||||
self,
|
||||
endpoint_id: str,
|
||||
safekeepers: Optional[List[int]] = None,
|
||||
remote_ext_config: Optional[str] = None,
|
||||
pageserver_id: Optional[int] = None,
|
||||
allow_multiple=False,
|
||||
basebackup_request_tries: Optional[int] = None,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
args = [
|
||||
"endpoint",
|
||||
"start",
|
||||
]
|
||||
extra_env_vars = {}
|
||||
if basebackup_request_tries is not None:
|
||||
extra_env_vars["NEON_COMPUTE_TESTING_BASEBACKUP_TRIES"] = str(basebackup_request_tries)
|
||||
if remote_ext_config is not None:
|
||||
args.extend(["--remote-ext-config", remote_ext_config])
|
||||
|
||||
if safekeepers is not None:
|
||||
args.extend(["--safekeepers", (",".join(map(str, safekeepers)))])
|
||||
if endpoint_id is not None:
|
||||
args.append(endpoint_id)
|
||||
if pageserver_id is not None:
|
||||
args.extend(["--pageserver-id", str(pageserver_id)])
|
||||
if allow_multiple:
|
||||
args.extend(["--allow-multiple"])
|
||||
|
||||
res = self.raw_cli(args, extra_env_vars)
|
||||
res.check_returncode()
|
||||
return res
|
||||
|
||||
def endpoint_reconfigure(
|
||||
self,
|
||||
endpoint_id: str,
|
||||
tenant_id: Optional[TenantId] = None,
|
||||
pageserver_id: Optional[int] = None,
|
||||
safekeepers: Optional[List[int]] = None,
|
||||
check_return_code=True,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
args = ["endpoint", "reconfigure", endpoint_id]
|
||||
if tenant_id is not None:
|
||||
args.extend(["--tenant-id", str(tenant_id)])
|
||||
if pageserver_id is not None:
|
||||
args.extend(["--pageserver-id", str(pageserver_id)])
|
||||
if safekeepers is not None:
|
||||
args.extend(["--safekeepers", (",".join(map(str, safekeepers)))])
|
||||
return self.raw_cli(args, check_return_code=check_return_code)
|
||||
|
||||
def endpoint_stop(
|
||||
self,
|
||||
endpoint_id: str,
|
||||
destroy=False,
|
||||
check_return_code=True,
|
||||
mode: Optional[str] = None,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
args = [
|
||||
"endpoint",
|
||||
"stop",
|
||||
]
|
||||
if destroy:
|
||||
args.append("--destroy")
|
||||
if mode is not None:
|
||||
args.append(f"--mode={mode}")
|
||||
if endpoint_id is not None:
|
||||
args.append(endpoint_id)
|
||||
|
||||
return self.raw_cli(args, check_return_code=check_return_code)
|
||||
|
||||
def map_branch(
|
||||
self, name: str, tenant_id: TenantId, timeline_id: TimelineId
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
"""
|
||||
Map tenant id and timeline id to a neon_local branch name. They do not have to exist.
|
||||
Usually needed when creating branches via PageserverHttpClient and not neon_local.
|
||||
|
||||
After creating a name mapping, you can use EndpointFactory.create_start
|
||||
with this registered branch name.
|
||||
"""
|
||||
args = [
|
||||
"mappings",
|
||||
"map",
|
||||
"--branch-name",
|
||||
name,
|
||||
"--tenant-id",
|
||||
str(tenant_id),
|
||||
"--timeline-id",
|
||||
str(timeline_id),
|
||||
]
|
||||
|
||||
return self.raw_cli(args, check_return_code=True)
|
||||
|
||||
def start(self, check_return_code=True) -> "subprocess.CompletedProcess[str]":
|
||||
return self.raw_cli(["start"], check_return_code=check_return_code)
|
||||
|
||||
def stop(self, check_return_code=True) -> "subprocess.CompletedProcess[str]":
|
||||
return self.raw_cli(["stop"], check_return_code=check_return_code)
|
||||
|
||||
|
||||
class WalCraft(AbstractNeonCli):
|
||||
"""
|
||||
A typed wrapper around the `wal_craft` CLI tool.
|
||||
Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
|
||||
"""
|
||||
|
||||
COMMAND = "wal_craft"
|
||||
|
||||
def postgres_config(self) -> List[str]:
|
||||
res = self.raw_cli(["print-postgres-config"])
|
||||
res.check_returncode()
|
||||
return res.stdout.split("\n")
|
||||
|
||||
def in_existing(self, type: str, connection: str) -> None:
|
||||
res = self.raw_cli(["in-existing", type, connection])
|
||||
res.check_returncode()
|
||||
|
||||
|
||||
class ComputeCtl(AbstractNeonCli):
|
||||
"""
|
||||
A typed wrapper around the `compute_ctl` CLI tool.
|
||||
"""
|
||||
|
||||
COMMAND = "compute_ctl"
|
||||
|
||||
|
||||
class Pagectl(AbstractNeonCli):
|
||||
"""
|
||||
A typed wrapper around the `pagectl` utility CLI tool.
|
||||
"""
|
||||
|
||||
COMMAND = "pagectl"
|
||||
|
||||
def dump_index_part(self, path: Path) -> IndexPartDump:
|
||||
res = self.raw_cli(["index-part", "dump", str(path)])
|
||||
res.check_returncode()
|
||||
parsed = json.loads(res.stdout)
|
||||
return IndexPartDump.from_json(parsed)
|
||||
|
||||
|
||||
class LogUtils:
|
||||
"""
|
||||
A mixin class which provides utilities for inspecting the logs of a service.
|
||||
@@ -2427,10 +2933,6 @@ class NeonPageserver(PgProtocol, LogUtils):
|
||||
"""
|
||||
assert self.running is False
|
||||
|
||||
storage = self.env.pageserver_remote_storage
|
||||
if isinstance(storage, S3Storage):
|
||||
s3_env_vars = storage.access_env_vars()
|
||||
extra_env_vars = (extra_env_vars or {}) | s3_env_vars
|
||||
self.env.neon_cli.pageserver_start(
|
||||
self.id, extra_env_vars=extra_env_vars, timeout_in_seconds=timeout_in_seconds
|
||||
)
|
||||
@@ -3451,7 +3953,6 @@ class Endpoint(PgProtocol, LogUtils):
|
||||
hot_standby=hot_standby,
|
||||
pg_port=self.pg_port,
|
||||
http_port=self.http_port,
|
||||
pg_version=self.env.pg_version,
|
||||
pageserver_id=pageserver_id,
|
||||
allow_multiple=allow_multiple,
|
||||
)
|
||||
@@ -3894,16 +4395,8 @@ class Safekeeper(LogUtils):
|
||||
extra_opts = self.extra_opts
|
||||
|
||||
assert self.running is False
|
||||
|
||||
s3_env_vars = None
|
||||
if isinstance(self.env.safekeepers_remote_storage, S3Storage):
|
||||
s3_env_vars = self.env.safekeepers_remote_storage.access_env_vars()
|
||||
|
||||
self.env.neon_cli.safekeeper_start(
|
||||
self.id,
|
||||
extra_opts=extra_opts,
|
||||
timeout_in_seconds=timeout_in_seconds,
|
||||
extra_env_vars=s3_env_vars,
|
||||
self.id, extra_opts=extra_opts, timeout_in_seconds=timeout_in_seconds
|
||||
)
|
||||
self.running = True
|
||||
# wait for wal acceptor start by checking its status
|
||||
@@ -4049,7 +4542,7 @@ class Safekeeper(LogUtils):
|
||||
1) wait for remote_consistent_lsn and wal_backup_lsn on safekeeper to reach it.
|
||||
2) checkpoint timeline on safekeeper, which should remove WAL before this LSN; optionally wait for that.
|
||||
"""
|
||||
client = self.http_client()
|
||||
cli = self.http_client()
|
||||
|
||||
target_segment_file = lsn.segment_name()
|
||||
|
||||
@@ -4061,7 +4554,7 @@ class Safekeeper(LogUtils):
|
||||
assert all(target_segment_file <= s for s in segments)
|
||||
|
||||
def are_lsns_advanced():
|
||||
stat = client.timeline_status(tenant_id, timeline_id)
|
||||
stat = cli.timeline_status(tenant_id, timeline_id)
|
||||
log.info(
|
||||
f"waiting for remote_consistent_lsn and backup_lsn on sk {self.id} to reach {lsn}, currently remote_consistent_lsn={stat.remote_consistent_lsn}, backup_lsn={stat.backup_lsn}"
|
||||
)
|
||||
@@ -4070,7 +4563,7 @@ class Safekeeper(LogUtils):
|
||||
# xxx: max wait is long because we might be waiting for reconnection from
|
||||
# pageserver to this safekeeper
|
||||
wait_until(30, 1, are_lsns_advanced)
|
||||
client.checkpoint(tenant_id, timeline_id)
|
||||
cli.checkpoint(tenant_id, timeline_id)
|
||||
if wait_wal_removal:
|
||||
wait_until(30, 1, are_segments_removed)
|
||||
|
||||
@@ -4098,13 +4591,13 @@ class NeonBroker(LogUtils):
|
||||
timeout_in_seconds: Optional[int] = None,
|
||||
):
|
||||
assert not self.running
|
||||
self.env.neon_cli.storage_broker_start(timeout_in_seconds)
|
||||
self.env.neon_cli.broker_start(timeout_in_seconds)
|
||||
self.running = True
|
||||
return self
|
||||
|
||||
def stop(self):
|
||||
if self.running:
|
||||
self.env.neon_cli.storage_broker_stop()
|
||||
self.env.neon_cli.broker_stop()
|
||||
self.running = False
|
||||
return self
|
||||
|
||||
@@ -4733,10 +5226,10 @@ def flush_ep_to_pageserver(
|
||||
commit_lsn: Lsn = Lsn(0)
|
||||
# In principle in the absense of failures polling single sk would be enough.
|
||||
for sk in env.safekeepers:
|
||||
client = sk.http_client()
|
||||
cli = sk.http_client()
|
||||
# wait until compute connections are gone
|
||||
wait_walreceivers_absent(client, tenant, timeline)
|
||||
commit_lsn = max(client.get_commit_lsn(tenant, timeline), commit_lsn)
|
||||
wait_walreceivers_absent(cli, tenant, timeline)
|
||||
commit_lsn = max(cli.get_commit_lsn(tenant, timeline), commit_lsn)
|
||||
|
||||
# Note: depending on WAL filtering implementation, probably most shards
|
||||
# won't be able to reach commit_lsn (unless gaps are also ack'ed), so this
|
||||
@@ -4789,12 +5282,7 @@ def fork_at_current_lsn(
|
||||
the WAL up to that LSN to arrive in the pageserver before creating the branch.
|
||||
"""
|
||||
current_lsn = endpoint.safe_psql("SELECT pg_current_wal_lsn()")[0][0]
|
||||
return env.create_branch(
|
||||
new_branch_name=new_branch_name,
|
||||
tenant_id=tenant_id,
|
||||
ancestor_branch_name=ancestor_branch_name,
|
||||
ancestor_start_lsn=current_lsn,
|
||||
)
|
||||
return env.neon_cli.create_branch(new_branch_name, ancestor_branch_name, tenant_id, current_lsn)
|
||||
|
||||
|
||||
def import_timeline_from_vanilla_postgres(
|
||||
@@ -4813,9 +5301,9 @@ def import_timeline_from_vanilla_postgres(
|
||||
"""
|
||||
|
||||
# Take backup of the existing PostgreSQL server with pg_basebackup
|
||||
basebackup_dir = test_output_dir / "basebackup"
|
||||
base_tar = basebackup_dir / "base.tar"
|
||||
wal_tar = basebackup_dir / "pg_wal.tar"
|
||||
basebackup_dir = os.path.join(test_output_dir, "basebackup")
|
||||
base_tar = os.path.join(basebackup_dir, "base.tar")
|
||||
wal_tar = os.path.join(basebackup_dir, "pg_wal.tar")
|
||||
os.mkdir(basebackup_dir)
|
||||
pg_bin.run(
|
||||
[
|
||||
@@ -4825,28 +5313,40 @@ def import_timeline_from_vanilla_postgres(
|
||||
"-d",
|
||||
vanilla_pg_connstr,
|
||||
"-D",
|
||||
str(basebackup_dir),
|
||||
basebackup_dir,
|
||||
]
|
||||
)
|
||||
|
||||
# Extract start_lsn and end_lsn form the backup manifest file
|
||||
with open(os.path.join(basebackup_dir, "backup_manifest")) as f:
|
||||
manifest = json.load(f)
|
||||
start_lsn = Lsn(manifest["WAL-Ranges"][0]["Start-LSN"])
|
||||
end_lsn = Lsn(manifest["WAL-Ranges"][0]["End-LSN"])
|
||||
start_lsn = manifest["WAL-Ranges"][0]["Start-LSN"]
|
||||
end_lsn = manifest["WAL-Ranges"][0]["End-LSN"]
|
||||
|
||||
# Import the backup tarballs into the pageserver
|
||||
env.neon_cli.timeline_import(
|
||||
tenant_id=tenant_id,
|
||||
timeline_id=timeline_id,
|
||||
new_branch_name=branch_name,
|
||||
base_lsn=start_lsn,
|
||||
base_tarfile=base_tar,
|
||||
end_lsn=end_lsn,
|
||||
wal_tarfile=wal_tar,
|
||||
pg_version=env.pg_version,
|
||||
env.neon_cli.raw_cli(
|
||||
[
|
||||
"timeline",
|
||||
"import",
|
||||
"--tenant-id",
|
||||
str(tenant_id),
|
||||
"--timeline-id",
|
||||
str(timeline_id),
|
||||
"--branch-name",
|
||||
branch_name,
|
||||
"--base-lsn",
|
||||
start_lsn,
|
||||
"--base-tarfile",
|
||||
base_tar,
|
||||
"--end-lsn",
|
||||
end_lsn,
|
||||
"--wal-tarfile",
|
||||
wal_tar,
|
||||
"--pg-version",
|
||||
env.pg_version,
|
||||
]
|
||||
)
|
||||
wait_for_last_record_lsn(env.pageserver.http_client(), tenant_id, timeline_id, end_lsn)
|
||||
wait_for_last_record_lsn(env.pageserver.http_client(), tenant_id, timeline_id, Lsn(end_lsn))
|
||||
|
||||
|
||||
def last_flush_lsn_upload(
|
||||
|
||||
@@ -7,7 +7,7 @@ from pathlib import Path
|
||||
from typing import Any, List, Tuple
|
||||
|
||||
from fixtures.common_types import TenantId, TimelineId
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.neon_fixtures import NeonEnv, Pagectl
|
||||
from fixtures.pageserver.common_types import (
|
||||
InvalidFileName,
|
||||
parse_layer_file_name,
|
||||
@@ -35,7 +35,7 @@ def duplicate_one_tenant(env: NeonEnv, template_tenant: TenantId, new_tenant: Te
|
||||
for file in tl.iterdir():
|
||||
shutil.copy2(file, dst_tl_dir)
|
||||
if "__" in file.name:
|
||||
env.pagectl.raw_cli(
|
||||
Pagectl(env).raw_cli(
|
||||
[
|
||||
"layer",
|
||||
"rewrite-summary",
|
||||
|
||||
@@ -53,7 +53,7 @@ def setup_env(
|
||||
"checkpoint_distance": 268435456,
|
||||
"image_creation_threshold": 3,
|
||||
}
|
||||
template_tenant, template_timeline = env.create_tenant(set_default=True)
|
||||
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
|
||||
env.pageserver.tenant_detach(template_tenant)
|
||||
env.pageserver.tenant_attach(template_tenant, config)
|
||||
ep = env.endpoints.create_start("main", tenant_id=template_tenant)
|
||||
|
||||
@@ -81,7 +81,7 @@ def setup_tenant_template(env: NeonEnv, n_txns: int):
|
||||
"image_creation_threshold": 3,
|
||||
}
|
||||
|
||||
template_tenant, template_timeline = env.create_tenant(set_default=True)
|
||||
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
|
||||
env.pageserver.tenant_detach(template_tenant)
|
||||
env.pageserver.tenant_attach(template_tenant, config)
|
||||
|
||||
|
||||
@@ -162,7 +162,7 @@ def setup_tenant_template(env: NeonEnv, pg_bin: PgBin, scale: int):
|
||||
"checkpoint_distance": 268435456,
|
||||
"image_creation_threshold": 3,
|
||||
}
|
||||
template_tenant, template_timeline = env.create_tenant(set_default=True)
|
||||
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
|
||||
env.pageserver.tenant_detach(template_tenant)
|
||||
env.pageserver.tenant_attach(template_tenant, config)
|
||||
ps_http = env.pageserver.http_client()
|
||||
|
||||
@@ -41,7 +41,7 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int)
|
||||
pg_bin = neon_compare.pg_bin
|
||||
|
||||
# Use aggressive GC and checkpoint settings, so GC and compaction happen more often during the test
|
||||
tenant, _ = env.create_tenant(
|
||||
tenant, _ = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
"gc_period": "5 s",
|
||||
"gc_horizon": f"{4 * 1024 ** 2}",
|
||||
@@ -64,7 +64,7 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int)
|
||||
|
||||
endpoint.stop()
|
||||
|
||||
env.create_branch("b0", tenant_id=tenant)
|
||||
env.neon_cli.create_branch("b0", tenant_id=tenant)
|
||||
|
||||
threads: List[threading.Thread] = []
|
||||
threads.append(threading.Thread(target=run_pgbench, args=("b0",), daemon=True))
|
||||
@@ -78,7 +78,7 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int)
|
||||
p = random.randint(0, i)
|
||||
|
||||
timer = timeit.default_timer()
|
||||
env.create_branch(f"b{i + 1}", ancestor_branch_name=f"b{p}", tenant_id=tenant)
|
||||
env.neon_cli.create_branch(f"b{i + 1}", f"b{p}", tenant_id=tenant)
|
||||
dur = timeit.default_timer() - timer
|
||||
|
||||
log.info(f"Creating branch b{i+1} took {dur}s")
|
||||
@@ -104,7 +104,7 @@ def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int, shape:
|
||||
# seed the prng so we will measure the same structure every time
|
||||
rng = random.Random("2024-02-29")
|
||||
|
||||
env.create_branch("b0")
|
||||
env.neon_cli.create_branch("b0")
|
||||
|
||||
endpoint = env.endpoints.create_start("b0")
|
||||
neon_compare.pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s10", endpoint.connstr()])
|
||||
@@ -121,7 +121,7 @@ def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int, shape:
|
||||
|
||||
timer = timeit.default_timer()
|
||||
# each of these uploads to remote storage before completion
|
||||
env.create_branch(f"b{i + 1}", ancestor_branch_name=parent)
|
||||
env.neon_cli.create_branch(f"b{i + 1}", parent)
|
||||
dur = timeit.default_timer() - timer
|
||||
branch_creation_durations.append(dur)
|
||||
|
||||
@@ -222,7 +222,7 @@ def wait_and_record_startup_metrics(
|
||||
def test_branch_creation_many_relations(neon_compare: NeonCompare):
|
||||
env = neon_compare.env
|
||||
|
||||
timeline_id = env.create_branch("root")
|
||||
timeline_id = env.neon_cli.create_branch("root")
|
||||
|
||||
endpoint = env.endpoints.create_start("root")
|
||||
with closing(endpoint.connect()) as conn:
|
||||
@@ -238,7 +238,7 @@ def test_branch_creation_many_relations(neon_compare: NeonCompare):
|
||||
)
|
||||
|
||||
with neon_compare.record_duration("create_branch_time_not_busy_root"):
|
||||
env.create_branch("child_not_busy", ancestor_branch_name="root")
|
||||
env.neon_cli.create_branch("child_not_busy", "root")
|
||||
|
||||
# run a concurrent insertion to make the ancestor "busy" during the branch creation
|
||||
thread = threading.Thread(
|
||||
@@ -247,6 +247,6 @@ def test_branch_creation_many_relations(neon_compare: NeonCompare):
|
||||
thread.start()
|
||||
|
||||
with neon_compare.record_duration("create_branch_time_busy_root"):
|
||||
env.create_branch("child_busy", ancestor_branch_name="root")
|
||||
env.neon_cli.create_branch("child_busy", "root")
|
||||
|
||||
thread.join()
|
||||
|
||||
@@ -41,7 +41,7 @@ def test_compare_child_and_root_pgbench_perf(neon_compare: NeonCompare):
|
||||
)
|
||||
neon_compare.zenbenchmark.record_pg_bench_result(branch, res)
|
||||
|
||||
env.create_branch("root")
|
||||
env.neon_cli.create_branch("root")
|
||||
endpoint_root = env.endpoints.create_start("root")
|
||||
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", endpoint_root.connstr(), "-s10"])
|
||||
|
||||
@@ -55,14 +55,14 @@ def test_compare_child_and_root_pgbench_perf(neon_compare: NeonCompare):
|
||||
|
||||
def test_compare_child_and_root_write_perf(neon_compare: NeonCompare):
|
||||
env = neon_compare.env
|
||||
env.create_branch("root")
|
||||
env.neon_cli.create_branch("root")
|
||||
endpoint_root = env.endpoints.create_start("root")
|
||||
|
||||
endpoint_root.safe_psql(
|
||||
"CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')",
|
||||
)
|
||||
|
||||
env.create_branch("child", ancestor_branch_name="root")
|
||||
env.neon_cli.create_branch("child", "root")
|
||||
endpoint_child = env.endpoints.create_start("child")
|
||||
|
||||
with neon_compare.record_duration("root_run_duration"):
|
||||
@@ -73,7 +73,7 @@ def test_compare_child_and_root_write_perf(neon_compare: NeonCompare):
|
||||
|
||||
def test_compare_child_and_root_read_perf(neon_compare: NeonCompare):
|
||||
env = neon_compare.env
|
||||
env.create_branch("root")
|
||||
env.neon_cli.create_branch("root")
|
||||
endpoint_root = env.endpoints.create_start("root")
|
||||
|
||||
endpoint_root.safe_psql_many(
|
||||
@@ -83,7 +83,7 @@ def test_compare_child_and_root_read_perf(neon_compare: NeonCompare):
|
||||
]
|
||||
)
|
||||
|
||||
env.create_branch("child", ancestor_branch_name="root")
|
||||
env.neon_cli.create_branch("child", "root")
|
||||
endpoint_child = env.endpoints.create_start("child")
|
||||
|
||||
with neon_compare.record_duration("root_run_duration"):
|
||||
|
||||
@@ -26,8 +26,10 @@ def test_bulk_tenant_create(
|
||||
for i in range(tenants_count):
|
||||
start = timeit.default_timer()
|
||||
|
||||
tenant, _ = env.create_tenant()
|
||||
env.create_timeline(f"test_bulk_tenant_create_{tenants_count}_{i}", tenant_id=tenant)
|
||||
tenant, _ = env.neon_cli.create_tenant()
|
||||
env.neon_cli.create_timeline(
|
||||
f"test_bulk_tenant_create_{tenants_count}_{i}", tenant_id=tenant
|
||||
)
|
||||
|
||||
# FIXME: We used to start new safekeepers here. Did that make sense? Should we do it now?
|
||||
# if use_safekeepers == 'with_sa':
|
||||
|
||||
@@ -16,7 +16,7 @@ def test_bulk_update(neon_env_builder: NeonEnvBuilder, zenbenchmark, fillfactor)
|
||||
env = neon_env_builder.init_start()
|
||||
n_records = 1000000
|
||||
|
||||
timeline_id = env.create_branch("test_bulk_update")
|
||||
timeline_id = env.neon_cli.create_branch("test_bulk_update")
|
||||
tenant_id = env.initial_tenant
|
||||
endpoint = env.endpoints.create_start("test_bulk_update")
|
||||
cur = endpoint.connect().cursor()
|
||||
|
||||
@@ -17,7 +17,7 @@ def test_compaction(neon_compare: NeonCompare):
|
||||
env = neon_compare.env
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
tenant_id, timeline_id = env.create_tenant(
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
# Disable background GC and compaction, we'll run compaction manually.
|
||||
"gc_period": "0s",
|
||||
@@ -68,7 +68,7 @@ def test_compaction_l0_memory(neon_compare: NeonCompare):
|
||||
env = neon_compare.env
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
tenant_id, timeline_id = env.create_tenant(
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
# Initially disable compaction so that we will build up a stack of L0s
|
||||
"compaction_period": "0s",
|
||||
|
||||
@@ -11,7 +11,7 @@ def gc_feedback_impl(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchma
|
||||
env = neon_env_builder.init_start()
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
tenant_id, _ = env.create_tenant(
|
||||
tenant_id, _ = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
# disable default GC and compaction
|
||||
"gc_period": "1000 m",
|
||||
@@ -63,7 +63,7 @@ def gc_feedback_impl(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchma
|
||||
log.info(f"Physical storage size {physical_size}")
|
||||
if mode == "with_snapshots":
|
||||
if step == n_steps / 2:
|
||||
env.create_branch("child")
|
||||
env.neon_cli.create_branch("child")
|
||||
|
||||
max_num_of_deltas_above_image = 0
|
||||
max_total_num_of_deltas = 0
|
||||
|
||||
@@ -15,7 +15,7 @@ def test_layer_map(neon_env_builder: NeonEnvBuilder, zenbenchmark):
|
||||
# We want to have a lot of lot of layer files to exercise the layer map. Disable
|
||||
# GC, and make checkpoint_distance very small, so that we get a lot of small layer
|
||||
# files.
|
||||
tenant, timeline = env.create_tenant(
|
||||
tenant, timeline = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
"gc_period": "0s",
|
||||
"checkpoint_distance": "16384",
|
||||
|
||||
@@ -33,7 +33,7 @@ def test_lazy_startup(slru: str, neon_env_builder: NeonEnvBuilder, zenbenchmark:
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
lazy_slru_download = "true" if slru == "lazy" else "false"
|
||||
tenant, _ = env.create_tenant(
|
||||
tenant, _ = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
"lazy_slru_download": lazy_slru_download,
|
||||
}
|
||||
|
||||
@@ -85,7 +85,7 @@ def test_sharding_autosplit(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
|
||||
tenants = {}
|
||||
for tenant_id in set(TenantId.generate() for _i in range(0, tenant_count)):
|
||||
timeline_id = TimelineId.generate()
|
||||
env.create_tenant(tenant_id, timeline_id, conf=tenant_conf)
|
||||
env.neon_cli.create_tenant(tenant_id, timeline_id, conf=tenant_conf)
|
||||
endpoint = env.endpoints.create("main", tenant_id=tenant_id)
|
||||
tenants[tenant_id] = TenantState(timeline_id, endpoint)
|
||||
endpoint.start()
|
||||
|
||||
@@ -27,7 +27,7 @@ def test_startup_simple(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenc
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.create_branch("test_startup")
|
||||
env.neon_cli.create_branch("test_startup")
|
||||
|
||||
endpoint = None
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
# Override defaults: 4M checkpoint_distance, disable background compaction and gc.
|
||||
tenant, _ = env.create_tenant(
|
||||
tenant, _ = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
"checkpoint_distance": "4194304",
|
||||
"gc_period": "0s",
|
||||
@@ -45,9 +45,7 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
|
||||
log.info(f"LSN after 100k rows: {lsn_100}")
|
||||
|
||||
# Create branch1.
|
||||
env.create_branch(
|
||||
"branch1", ancestor_branch_name="main", ancestor_start_lsn=lsn_100, tenant_id=tenant
|
||||
)
|
||||
env.neon_cli.create_branch("branch1", "main", tenant_id=tenant, ancestor_start_lsn=lsn_100)
|
||||
endpoint_branch1 = env.endpoints.create_start("branch1", tenant_id=tenant)
|
||||
|
||||
branch1_cur = endpoint_branch1.connect().cursor()
|
||||
@@ -69,9 +67,7 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
|
||||
log.info(f"LSN after 200k rows: {lsn_200}")
|
||||
|
||||
# Create branch2.
|
||||
env.create_branch(
|
||||
"branch2", ancestor_branch_name="branch1", ancestor_start_lsn=lsn_200, tenant_id=tenant
|
||||
)
|
||||
env.neon_cli.create_branch("branch2", "branch1", tenant_id=tenant, ancestor_start_lsn=lsn_200)
|
||||
endpoint_branch2 = env.endpoints.create_start("branch2", tenant_id=tenant)
|
||||
branch2_cur = endpoint_branch2.connect().cursor()
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ def negative_env(neon_env_builder: NeonEnvBuilder) -> Generator[NegativeTests, N
|
||||
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
|
||||
|
||||
ps_http = env.pageserver.http_client()
|
||||
(tenant_id, _) = env.create_tenant()
|
||||
(tenant_id, _) = env.neon_cli.create_tenant()
|
||||
assert ps_http.tenant_config(tenant_id).tenant_specific_overrides == {}
|
||||
config_pre_detach = ps_http.tenant_config(tenant_id)
|
||||
assert tenant_id in [TenantId(t["id"]) for t in ps_http.tenant_list()]
|
||||
@@ -109,7 +109,7 @@ def test_empty_config(positive_env: NeonEnv, content_type: Optional[str]):
|
||||
"""
|
||||
env = positive_env
|
||||
ps_http = env.pageserver.http_client()
|
||||
(tenant_id, _) = env.create_tenant()
|
||||
(tenant_id, _) = env.neon_cli.create_tenant()
|
||||
assert ps_http.tenant_config(tenant_id).tenant_specific_overrides == {}
|
||||
config_pre_detach = ps_http.tenant_config(tenant_id)
|
||||
assert tenant_id in [TenantId(t["id"]) for t in ps_http.tenant_list()]
|
||||
@@ -182,7 +182,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
|
||||
fully_custom_config.keys()
|
||||
), "ensure we cover all config options"
|
||||
|
||||
(tenant_id, _) = env.create_tenant()
|
||||
(tenant_id, _) = env.neon_cli.create_tenant()
|
||||
ps_http.set_tenant_config(tenant_id, fully_custom_config)
|
||||
our_tenant_config = ps_http.tenant_config(tenant_id)
|
||||
assert our_tenant_config.tenant_specific_overrides == fully_custom_config
|
||||
|
||||
@@ -76,7 +76,7 @@ def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
branch = "test_compute_auth_to_pageserver"
|
||||
env.create_branch(branch)
|
||||
env.neon_cli.create_branch(branch)
|
||||
endpoint = env.endpoints.create_start(branch)
|
||||
|
||||
with closing(endpoint.connect()) as conn:
|
||||
@@ -186,7 +186,7 @@ def test_auth_failures(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
branch = f"test_auth_failures_auth_enabled_{auth_enabled}"
|
||||
timeline_id = env.create_branch(branch)
|
||||
timeline_id = env.neon_cli.create_branch(branch)
|
||||
env.endpoints.create_start(branch)
|
||||
|
||||
tenant_token = env.auth_keys.generate_tenant_token(env.initial_tenant)
|
||||
|
||||
@@ -98,7 +98,7 @@ def check_backpressure(endpoint: Endpoint, stop_event: threading.Event, polling_
|
||||
def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
# Create a branch for us
|
||||
env.create_branch("test_backpressure")
|
||||
env.neon_cli.create_branch("test_backpressure")
|
||||
|
||||
endpoint = env.endpoints.create(
|
||||
"test_backpressure", config_lines=["max_replication_write_lag=30MB"]
|
||||
|
||||
@@ -22,7 +22,7 @@ def test_compute_pageserver_connection_stress(neon_env_builder: NeonEnvBuilder):
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
pageserver_http.configure_failpoints(("simulated-bad-compute-connection", "50%return(15)"))
|
||||
|
||||
env.create_branch("test_compute_pageserver_connection_stress")
|
||||
env.neon_cli.create_branch("test_compute_pageserver_connection_stress")
|
||||
endpoint = env.endpoints.create_start("test_compute_pageserver_connection_stress")
|
||||
|
||||
pg_conn = endpoint.connect()
|
||||
|
||||
@@ -53,7 +53,7 @@ def test_branch_and_gc(neon_simple_env: NeonEnv, build_type: str):
|
||||
env = neon_simple_env
|
||||
pageserver_http_client = env.pageserver.http_client()
|
||||
|
||||
tenant, timeline_main = env.create_tenant(
|
||||
tenant, timeline_main = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
# disable background GC
|
||||
"gc_period": "0s",
|
||||
@@ -90,7 +90,7 @@ def test_branch_and_gc(neon_simple_env: NeonEnv, build_type: str):
|
||||
pageserver_http_client.timeline_checkpoint(tenant, timeline_main)
|
||||
pageserver_http_client.timeline_gc(tenant, timeline_main, lsn2 - lsn1 + 1024)
|
||||
|
||||
env.create_branch(
|
||||
env.neon_cli.create_branch(
|
||||
"test_branch", ancestor_branch_name="main", ancestor_start_lsn=lsn1, tenant_id=tenant
|
||||
)
|
||||
endpoint_branch = env.endpoints.create_start("test_branch", tenant_id=tenant)
|
||||
@@ -127,7 +127,7 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
|
||||
env.storage_controller.allowed_errors.extend(error_regexes)
|
||||
|
||||
# Disable background GC but set the `pitr_interval` to be small, so GC can delete something
|
||||
tenant, _ = env.create_tenant(
|
||||
tenant, _ = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
# disable background GC
|
||||
"gc_period": "0s",
|
||||
@@ -145,7 +145,7 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
|
||||
}
|
||||
)
|
||||
|
||||
b0 = env.create_branch("b0", tenant_id=tenant)
|
||||
b0 = env.neon_cli.create_branch("b0", tenant_id=tenant)
|
||||
endpoint0 = env.endpoints.create_start("b0", tenant_id=tenant)
|
||||
res = endpoint0.safe_psql_many(
|
||||
queries=[
|
||||
@@ -176,7 +176,7 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
|
||||
|
||||
# The starting LSN is invalid as the corresponding record is scheduled to be removed by in-queue GC.
|
||||
with pytest.raises(Exception, match="invalid branch start lsn: .*"):
|
||||
env.create_branch("b1", ancestor_branch_name="b0", ancestor_start_lsn=lsn, tenant_id=tenant)
|
||||
env.neon_cli.create_branch("b1", "b0", tenant_id=tenant, ancestor_start_lsn=lsn)
|
||||
# retry the same with the HTTP API, so that we can inspect the status code
|
||||
with pytest.raises(TimelineCreate406):
|
||||
new_timeline_id = TimelineId.generate()
|
||||
|
||||
@@ -23,7 +23,7 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
|
||||
env.storage_controller.allowed_errors.extend(error_regexes)
|
||||
|
||||
# Branch at the point where only 100 rows were inserted
|
||||
branch_behind_timeline_id = env.create_branch("test_branch_behind")
|
||||
branch_behind_timeline_id = env.neon_cli.create_branch("test_branch_behind")
|
||||
endpoint_main = env.endpoints.create_start("test_branch_behind")
|
||||
|
||||
main_cur = endpoint_main.connect().cursor()
|
||||
@@ -58,10 +58,8 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
|
||||
log.info(f"LSN after 200100 rows: {lsn_b}")
|
||||
|
||||
# Branch at the point where only 100 rows were inserted
|
||||
env.create_branch(
|
||||
"test_branch_behind_hundred",
|
||||
ancestor_branch_name="test_branch_behind",
|
||||
ancestor_start_lsn=lsn_a,
|
||||
env.neon_cli.create_branch(
|
||||
"test_branch_behind_hundred", "test_branch_behind", ancestor_start_lsn=lsn_a
|
||||
)
|
||||
|
||||
# Insert many more rows. This generates enough WAL to fill a few segments.
|
||||
@@ -77,10 +75,8 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
|
||||
log.info(f"LSN after 400100 rows: {lsn_c}")
|
||||
|
||||
# Branch at the point where only 200100 rows were inserted
|
||||
env.create_branch(
|
||||
"test_branch_behind_more",
|
||||
ancestor_branch_name="test_branch_behind",
|
||||
ancestor_start_lsn=lsn_b,
|
||||
env.neon_cli.create_branch(
|
||||
"test_branch_behind_more", "test_branch_behind", ancestor_start_lsn=lsn_b
|
||||
)
|
||||
|
||||
endpoint_hundred = env.endpoints.create_start("test_branch_behind_hundred")
|
||||
@@ -101,17 +97,15 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
# branch at segment boundary
|
||||
env.create_branch(
|
||||
"test_branch_segment_boundary",
|
||||
ancestor_branch_name="test_branch_behind",
|
||||
ancestor_start_lsn=Lsn("0/3000000"),
|
||||
env.neon_cli.create_branch(
|
||||
"test_branch_segment_boundary", "test_branch_behind", ancestor_start_lsn=Lsn("0/3000000")
|
||||
)
|
||||
endpoint = env.endpoints.create_start("test_branch_segment_boundary")
|
||||
assert endpoint.safe_psql("SELECT 1")[0][0] == 1
|
||||
|
||||
# branch at pre-initdb lsn (from main branch)
|
||||
with pytest.raises(Exception, match="invalid branch start lsn: .*"):
|
||||
env.create_branch("test_branch_preinitdb", ancestor_start_lsn=Lsn("0/42"))
|
||||
env.neon_cli.create_branch("test_branch_preinitdb", ancestor_start_lsn=Lsn("0/42"))
|
||||
# retry the same with the HTTP API, so that we can inspect the status code
|
||||
with pytest.raises(TimelineCreate406):
|
||||
new_timeline_id = TimelineId.generate()
|
||||
@@ -122,10 +116,8 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# branch at pre-ancestor lsn
|
||||
with pytest.raises(Exception, match="less than timeline ancestor lsn"):
|
||||
env.create_branch(
|
||||
"test_branch_preinitdb",
|
||||
ancestor_branch_name="test_branch_behind",
|
||||
ancestor_start_lsn=Lsn("0/42"),
|
||||
env.neon_cli.create_branch(
|
||||
"test_branch_preinitdb", "test_branch_behind", ancestor_start_lsn=Lsn("0/42")
|
||||
)
|
||||
# retry the same with the HTTP API, so that we can inspect the status code
|
||||
with pytest.raises(TimelineCreate406):
|
||||
@@ -147,10 +139,8 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
|
||||
print_gc_result(gc_result)
|
||||
with pytest.raises(Exception, match="invalid branch start lsn: .*"):
|
||||
# this gced_lsn is pretty random, so if gc is disabled this woudln't fail
|
||||
env.create_branch(
|
||||
"test_branch_create_fail",
|
||||
ancestor_branch_name="test_branch_behind",
|
||||
ancestor_start_lsn=gced_lsn,
|
||||
env.neon_cli.create_branch(
|
||||
"test_branch_create_fail", "test_branch_behind", ancestor_start_lsn=gced_lsn
|
||||
)
|
||||
# retry the same with the HTTP API, so that we can inspect the status code
|
||||
with pytest.raises(TimelineCreate406):
|
||||
|
||||
@@ -38,7 +38,7 @@ def test_branching_with_pgbench(
|
||||
env = neon_simple_env
|
||||
|
||||
# Use aggressive GC and checkpoint settings, so that we also exercise GC during the test
|
||||
tenant, _ = env.create_tenant(
|
||||
tenant, _ = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
"gc_period": "5 s",
|
||||
"gc_horizon": f"{1024 ** 2}",
|
||||
@@ -55,7 +55,7 @@ def test_branching_with_pgbench(
|
||||
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", f"-s{scale}", connstr])
|
||||
pg_bin.run_capture(["pgbench", "-T15", connstr])
|
||||
|
||||
env.create_branch("b0", tenant_id=tenant)
|
||||
env.neon_cli.create_branch("b0", tenant_id=tenant)
|
||||
endpoints: List[Endpoint] = []
|
||||
endpoints.append(env.endpoints.create_start("b0", tenant_id=tenant))
|
||||
|
||||
@@ -84,9 +84,9 @@ def test_branching_with_pgbench(
|
||||
threads = []
|
||||
|
||||
if ty == "cascade":
|
||||
env.create_branch(f"b{i + 1}", ancestor_branch_name=f"b{i}", tenant_id=tenant)
|
||||
env.neon_cli.create_branch(f"b{i + 1}", f"b{i}", tenant_id=tenant)
|
||||
else:
|
||||
env.create_branch(f"b{i + 1}", ancestor_branch_name="b0", tenant_id=tenant)
|
||||
env.neon_cli.create_branch(f"b{i + 1}", "b0", tenant_id=tenant)
|
||||
|
||||
endpoints.append(env.endpoints.create_start(f"b{i + 1}", tenant_id=tenant))
|
||||
|
||||
@@ -120,7 +120,7 @@ def test_branching_unnormalized_start_lsn(neon_simple_env: NeonEnv, pg_bin: PgBi
|
||||
|
||||
env = neon_simple_env
|
||||
|
||||
env.create_branch("b0")
|
||||
env.neon_cli.create_branch("b0")
|
||||
endpoint0 = env.endpoints.create_start("b0")
|
||||
|
||||
pg_bin.run_capture(["pgbench", "-i", endpoint0.connstr()])
|
||||
@@ -133,7 +133,7 @@ def test_branching_unnormalized_start_lsn(neon_simple_env: NeonEnv, pg_bin: PgBi
|
||||
start_lsn = Lsn((int(curr_lsn) - XLOG_BLCKSZ) // XLOG_BLCKSZ * XLOG_BLCKSZ)
|
||||
|
||||
log.info(f"Branching b1 from b0 starting at lsn {start_lsn}...")
|
||||
env.create_branch("b1", ancestor_branch_name="b0", ancestor_start_lsn=start_lsn)
|
||||
env.neon_cli.create_branch("b1", "b0", ancestor_start_lsn=start_lsn)
|
||||
endpoint1 = env.endpoints.create_start("b1")
|
||||
|
||||
pg_bin.run_capture(["pgbench", "-i", endpoint1.connstr()])
|
||||
@@ -173,7 +173,7 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE
|
||||
|
||||
wait_until_paused(env, "before-upload-index-pausable")
|
||||
|
||||
env.neon_cli.mappings_map_branch(initial_branch, env.initial_tenant, env.initial_timeline)
|
||||
env.neon_cli.map_branch(initial_branch, env.initial_tenant, env.initial_timeline)
|
||||
|
||||
with pytest.raises(RuntimeError, match="ERROR: Not found: Timeline"):
|
||||
env.endpoints.create_start(
|
||||
@@ -432,7 +432,9 @@ def test_branching_while_stuck_find_gc_cutoffs(neon_env_builder: NeonEnvBuilder)
|
||||
|
||||
wait_until_paused(env, failpoint)
|
||||
|
||||
env.create_branch("branch", ancestor_branch_name="main")
|
||||
env.neon_cli.create_branch(
|
||||
tenant_id=env.initial_tenant, ancestor_branch_name="main", new_branch_name="branch"
|
||||
)
|
||||
|
||||
client.configure_failpoints((failpoint, "off"))
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ def test_local_corruption(neon_env_builder: NeonEnvBuilder):
|
||||
tenant_timelines: List[Tuple[TenantId, TimelineId, Endpoint]] = []
|
||||
|
||||
for _ in range(3):
|
||||
tenant_id, timeline_id = env.create_tenant()
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant()
|
||||
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
with endpoint.cursor() as cur:
|
||||
@@ -84,11 +84,13 @@ def test_local_corruption(neon_env_builder: NeonEnvBuilder):
|
||||
def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
|
||||
tenant_id, _ = env.create_tenant()
|
||||
tenant_id, _ = env.neon_cli.create_tenant()
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
|
||||
futures = [
|
||||
executor.submit(env.create_timeline, f"test-create-multiple-timelines-{i}", tenant_id)
|
||||
executor.submit(
|
||||
env.neon_cli.create_timeline, f"test-create-multiple-timelines-{i}", tenant_id
|
||||
)
|
||||
for i in range(4)
|
||||
]
|
||||
for future in futures:
|
||||
@@ -109,7 +111,7 @@ def test_timeline_init_break_before_checkpoint(neon_env_builder: NeonEnvBuilder)
|
||||
tenant_id = env.initial_tenant
|
||||
|
||||
timelines_dir = env.pageserver.timeline_dir(tenant_id)
|
||||
old_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
|
||||
old_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
|
||||
initial_timeline_dirs = [d for d in timelines_dir.iterdir()]
|
||||
|
||||
# Introduce failpoint during timeline init (some intermediate files are on disk), before it's checkpointed.
|
||||
@@ -121,7 +123,7 @@ def test_timeline_init_break_before_checkpoint(neon_env_builder: NeonEnvBuilder)
|
||||
env.pageserver.restart(immediate=True)
|
||||
|
||||
# Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally.
|
||||
new_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
|
||||
new_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
|
||||
assert (
|
||||
new_tenant_timelines == old_tenant_timelines
|
||||
), f"Pageserver after restart should ignore non-initialized timelines for tenant {tenant_id}"
|
||||
@@ -149,11 +151,11 @@ def test_timeline_init_break_before_checkpoint_recreate(
|
||||
]
|
||||
)
|
||||
|
||||
env.create_tenant(env.initial_tenant)
|
||||
env.neon_cli.create_tenant(env.initial_tenant)
|
||||
tenant_id = env.initial_tenant
|
||||
|
||||
timelines_dir = env.pageserver.timeline_dir(tenant_id)
|
||||
old_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
|
||||
old_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
|
||||
initial_timeline_dirs = [d for d in timelines_dir.iterdir()]
|
||||
|
||||
# Some fixed timeline ID (like control plane does)
|
||||
@@ -174,7 +176,7 @@ def test_timeline_init_break_before_checkpoint_recreate(
|
||||
env.pageserver.restart(immediate=True)
|
||||
|
||||
# Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally.
|
||||
new_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
|
||||
new_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
|
||||
assert (
|
||||
new_tenant_timelines == old_tenant_timelines
|
||||
), f"Pageserver after restart should ignore non-initialized timelines for tenant {tenant_id}"
|
||||
@@ -199,7 +201,7 @@ def test_timeline_create_break_after_dir_creation(neon_env_builder: NeonEnvBuild
|
||||
tenant_id = env.initial_tenant
|
||||
|
||||
timelines_dir = env.pageserver.timeline_dir(tenant_id)
|
||||
old_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
|
||||
old_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
|
||||
initial_timeline_dirs = [d for d in timelines_dir.iterdir()]
|
||||
|
||||
# Introduce failpoint when creating a new timeline, right after creating its directory
|
||||
@@ -209,7 +211,7 @@ def test_timeline_create_break_after_dir_creation(neon_env_builder: NeonEnvBuild
|
||||
|
||||
# Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally.
|
||||
# "New" timeline is not present in the list, allowing pageserver to retry the same request
|
||||
new_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
|
||||
new_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
|
||||
assert (
|
||||
new_tenant_timelines == old_tenant_timelines
|
||||
), f"Pageserver after restart should ignore non-initialized timelines for tenant {tenant_id}"
|
||||
|
||||
@@ -34,7 +34,7 @@ def test_change_pageserver(neon_env_builder: NeonEnvBuilder, make_httpserver):
|
||||
ignore_notify
|
||||
)
|
||||
|
||||
env.create_branch("test_change_pageserver")
|
||||
env.neon_cli.create_branch("test_change_pageserver")
|
||||
endpoint = env.endpoints.create_start("test_change_pageserver")
|
||||
|
||||
# Put this tenant into a dual-attached state
|
||||
|
||||
@@ -56,10 +56,8 @@ def test_clog_truncate(neon_simple_env: NeonEnv):
|
||||
|
||||
# create new branch after clog truncation and start a compute node on it
|
||||
log.info(f"create branch at lsn_after_truncation {lsn_after_truncation}")
|
||||
env.create_branch(
|
||||
"test_clog_truncate_new",
|
||||
ancestor_branch_name="main",
|
||||
ancestor_start_lsn=lsn_after_truncation,
|
||||
env.neon_cli.create_branch(
|
||||
"test_clog_truncate_new", "main", ancestor_start_lsn=lsn_after_truncation
|
||||
)
|
||||
endpoint2 = env.endpoints.create_start("test_clog_truncate_new")
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ def test_lsof_pageserver_pid(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
|
||||
def start_workload():
|
||||
env.create_branch("test_lsof_pageserver_pid")
|
||||
env.neon_cli.create_branch("test_lsof_pageserver_pid")
|
||||
endpoint = env.endpoints.create_start("test_lsof_pageserver_pid")
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
|
||||
@@ -517,7 +517,7 @@ def test_historic_storage_formats(
|
||||
assert metadata_summary["tenant_count"] >= 1
|
||||
assert metadata_summary["timeline_count"] >= 1
|
||||
|
||||
env.neon_cli.tenant_import(dataset.tenant_id)
|
||||
env.neon_cli.import_tenant(dataset.tenant_id)
|
||||
|
||||
# Discover timelines
|
||||
timelines = env.pageserver.http_client().timeline_list(dataset.tenant_id)
|
||||
|
||||
@@ -38,7 +38,7 @@ def test_safekeepers_reconfigure_reorder(
|
||||
):
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
env.create_branch("test_safekeepers_reconfigure_reorder")
|
||||
env.neon_cli.create_branch("test_safekeepers_reconfigure_reorder")
|
||||
|
||||
endpoint = env.endpoints.create_start("test_safekeepers_reconfigure_reorder")
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_cli import WalCraft
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, WalCraft
|
||||
|
||||
# Restart nodes with WAL end having specially crafted shape, like last record
|
||||
# crossing segment boundary, to test decoding issues.
|
||||
@@ -19,7 +18,7 @@ from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
)
|
||||
def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str):
|
||||
env = neon_env_builder.init_start()
|
||||
env.create_branch("test_crafted_wal_end")
|
||||
env.neon_cli.create_branch("test_crafted_wal_end")
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
# seems like pageserver stop triggers these
|
||||
@@ -28,7 +27,7 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str):
|
||||
)
|
||||
|
||||
endpoint = env.endpoints.create("test_crafted_wal_end")
|
||||
wal_craft = WalCraft(extra_env=None, binpath=env.neon_binpath)
|
||||
wal_craft = WalCraft(env)
|
||||
endpoint.config(wal_craft.postgres_config())
|
||||
endpoint.start()
|
||||
res = endpoint.safe_psql_many(
|
||||
|
||||
@@ -31,7 +31,7 @@ def test_createdb(neon_simple_env: NeonEnv, strategy: str):
|
||||
lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
|
||||
|
||||
# Create a branch
|
||||
env.create_branch("test_createdb2", ancestor_branch_name="main", ancestor_start_lsn=lsn)
|
||||
env.neon_cli.create_branch("test_createdb2", "main", ancestor_start_lsn=lsn)
|
||||
endpoint2 = env.endpoints.create_start("test_createdb2")
|
||||
|
||||
# Test that you can connect to the new database on both branches
|
||||
@@ -77,14 +77,10 @@ def test_dropdb(neon_simple_env: NeonEnv, test_output_dir):
|
||||
lsn_after_drop = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
|
||||
|
||||
# Create two branches before and after database drop.
|
||||
env.create_branch(
|
||||
"test_before_dropdb", ancestor_branch_name="main", ancestor_start_lsn=lsn_before_drop
|
||||
)
|
||||
env.neon_cli.create_branch("test_before_dropdb", "main", ancestor_start_lsn=lsn_before_drop)
|
||||
endpoint_before = env.endpoints.create_start("test_before_dropdb")
|
||||
|
||||
env.create_branch(
|
||||
"test_after_dropdb", ancestor_branch_name="main", ancestor_start_lsn=lsn_after_drop
|
||||
)
|
||||
env.neon_cli.create_branch("test_after_dropdb", "main", ancestor_start_lsn=lsn_after_drop)
|
||||
endpoint_after = env.endpoints.create_start("test_after_dropdb")
|
||||
|
||||
# Test that database exists on the branch before drop
|
||||
|
||||
@@ -18,7 +18,7 @@ def test_createuser(neon_simple_env: NeonEnv):
|
||||
lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
|
||||
|
||||
# Create a branch
|
||||
env.create_branch("test_createuser2", ancestor_branch_name="main", ancestor_start_lsn=lsn)
|
||||
env.neon_cli.create_branch("test_createuser2", "main", ancestor_start_lsn=lsn)
|
||||
endpoint2 = env.endpoints.create_start("test_createuser2")
|
||||
|
||||
# Test that you can connect to new branch as a new user
|
||||
|
||||
@@ -59,11 +59,11 @@ def test_min_resident_size_override_handling(
|
||||
env.pageserver.stop()
|
||||
env.pageserver.start()
|
||||
|
||||
tenant_id, _ = env.create_tenant()
|
||||
tenant_id, _ = env.neon_cli.create_tenant()
|
||||
assert_overrides(tenant_id, config_level_override)
|
||||
|
||||
# Also ensure that specifying the paramter to create_tenant works, in addition to http-level recconfig.
|
||||
tenant_id, _ = env.create_tenant(conf={"min_resident_size_override": "100"})
|
||||
tenant_id, _ = env.neon_cli.create_tenant(conf={"min_resident_size_override": "100"})
|
||||
assert_config(tenant_id, 100, 100)
|
||||
ps_http.set_tenant_config(tenant_id, {})
|
||||
assert_config(tenant_id, None, config_level_override)
|
||||
@@ -280,7 +280,7 @@ def _eviction_env(
|
||||
def pgbench_init_tenant(
|
||||
layer_size: int, scale: int, env: NeonEnv, pg_bin: PgBin
|
||||
) -> Tuple[TenantId, TimelineId]:
|
||||
tenant_id, timeline_id = env.create_tenant(
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
"gc_period": "0s",
|
||||
"compaction_period": "0s",
|
||||
|
||||
@@ -81,7 +81,7 @@ def test_remote_extensions(
|
||||
# Start a compute node with remote_extension spec
|
||||
# and check that it can download the extensions and use them to CREATE EXTENSION.
|
||||
env = neon_env_builder_local.init_start()
|
||||
env.create_branch("test_remote_extensions")
|
||||
env.neon_cli.create_branch("test_remote_extensions")
|
||||
endpoint = env.endpoints.create(
|
||||
"test_remote_extensions",
|
||||
config_lines=["log_min_messages=debug3"],
|
||||
|
||||
@@ -15,7 +15,7 @@ def test_endpoint_crash(neon_env_builder: NeonEnvBuilder, sql_func: str):
|
||||
Test that triggering crash from neon_test_utils crashes the endpoint
|
||||
"""
|
||||
env = neon_env_builder.init_start()
|
||||
env.create_branch("test_endpoint_crash")
|
||||
env.neon_cli.create_branch("test_endpoint_crash")
|
||||
endpoint = env.endpoints.create_start("test_endpoint_crash")
|
||||
|
||||
endpoint.safe_psql("CREATE EXTENSION neon_test_utils;")
|
||||
|
||||
@@ -3,7 +3,7 @@ from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
|
||||
def test_fsm_truncate(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
env.create_branch("test_fsm_truncate")
|
||||
env.neon_cli.create_branch("test_fsm_truncate")
|
||||
endpoint = env.endpoints.create_start("test_fsm_truncate")
|
||||
endpoint.safe_psql(
|
||||
"CREATE TABLE t1(key int); CREATE TABLE t2(key int); TRUNCATE TABLE t1; TRUNCATE TABLE t2;"
|
||||
|
||||
@@ -68,7 +68,7 @@ async def update_and_gc(env: NeonEnv, endpoint: Endpoint, timeline: TimelineId):
|
||||
def test_gc_aggressive(neon_env_builder: NeonEnvBuilder):
|
||||
# Disable pitr, because here we want to test branch creation after GC
|
||||
env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"})
|
||||
timeline = env.create_branch("test_gc_aggressive", ancestor_branch_name="main")
|
||||
timeline = env.neon_cli.create_branch("test_gc_aggressive", "main")
|
||||
endpoint = env.endpoints.create_start("test_gc_aggressive")
|
||||
|
||||
with endpoint.cursor() as cur:
|
||||
@@ -99,7 +99,7 @@ def test_gc_index_upload(neon_env_builder: NeonEnvBuilder):
|
||||
# Disable time-based pitr, we will use LSN-based thresholds in the manual GC calls
|
||||
env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"})
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.create_branch("test_gc_index_upload", ancestor_branch_name="main")
|
||||
timeline_id = env.neon_cli.create_branch("test_gc_index_upload", "main")
|
||||
endpoint = env.endpoints.create_start("test_gc_index_upload")
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
@@ -98,15 +98,27 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
|
||||
)
|
||||
|
||||
def import_tar(base, wal):
|
||||
env.neon_cli.timeline_import(
|
||||
tenant_id=tenant,
|
||||
timeline_id=timeline,
|
||||
new_branch_name=branch_name,
|
||||
base_tarfile=base,
|
||||
base_lsn=start_lsn,
|
||||
wal_tarfile=wal,
|
||||
end_lsn=end_lsn,
|
||||
pg_version=env.pg_version,
|
||||
env.neon_cli.raw_cli(
|
||||
[
|
||||
"timeline",
|
||||
"import",
|
||||
"--tenant-id",
|
||||
str(tenant),
|
||||
"--timeline-id",
|
||||
str(timeline),
|
||||
"--branch-name",
|
||||
branch_name,
|
||||
"--base-lsn",
|
||||
start_lsn,
|
||||
"--base-tarfile",
|
||||
base,
|
||||
"--end-lsn",
|
||||
end_lsn,
|
||||
"--wal-tarfile",
|
||||
wal,
|
||||
"--pg-version",
|
||||
env.pg_version,
|
||||
]
|
||||
)
|
||||
|
||||
# Importing empty file fails
|
||||
@@ -146,7 +158,7 @@ def test_import_from_pageserver_small(
|
||||
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
timeline = env.create_branch("test_import_from_pageserver_small")
|
||||
timeline = env.neon_cli.create_branch("test_import_from_pageserver_small")
|
||||
endpoint = env.endpoints.create_start("test_import_from_pageserver_small")
|
||||
|
||||
num_rows = 3000
|
||||
@@ -165,7 +177,7 @@ def test_import_from_pageserver_multisegment(
|
||||
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
timeline = env.create_branch("test_import_from_pageserver_multisegment")
|
||||
timeline = env.neon_cli.create_branch("test_import_from_pageserver_multisegment")
|
||||
endpoint = env.endpoints.create_start("test_import_from_pageserver_multisegment")
|
||||
|
||||
# For `test_import_from_pageserver_multisegment`, we want to make sure that the data
|
||||
@@ -256,13 +268,23 @@ def _import(
|
||||
branch_name = "import_from_pageserver"
|
||||
client = env.pageserver.http_client()
|
||||
env.pageserver.tenant_create(tenant)
|
||||
env.neon_cli.timeline_import(
|
||||
tenant_id=tenant,
|
||||
timeline_id=timeline,
|
||||
new_branch_name=branch_name,
|
||||
base_lsn=lsn,
|
||||
base_tarfile=tar_output_file,
|
||||
pg_version=env.pg_version,
|
||||
env.neon_cli.raw_cli(
|
||||
[
|
||||
"timeline",
|
||||
"import",
|
||||
"--tenant-id",
|
||||
str(tenant),
|
||||
"--timeline-id",
|
||||
str(timeline),
|
||||
"--branch-name",
|
||||
branch_name,
|
||||
"--base-lsn",
|
||||
str(lsn),
|
||||
"--base-tarfile",
|
||||
str(tar_output_file),
|
||||
"--pg-version",
|
||||
env.pg_version,
|
||||
]
|
||||
)
|
||||
|
||||
# Wait for data to land in s3
|
||||
|
||||
@@ -178,9 +178,9 @@ def test_gc_of_remote_layers(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
def tenant_update_config(changes):
|
||||
tenant_config.update(changes)
|
||||
env.config_tenant(tenant_id, tenant_config)
|
||||
env.neon_cli.config_tenant(tenant_id, tenant_config)
|
||||
|
||||
tenant_id, timeline_id = env.create_tenant(conf=tenant_config)
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant(conf=tenant_config)
|
||||
log.info("tenant id is %s", tenant_id)
|
||||
env.initial_tenant = tenant_id # update_and_gc relies on this
|
||||
ps_http = env.pageserver.http_client()
|
||||
|
||||
@@ -8,7 +8,7 @@ def test_image_layer_writer_fail_before_finish(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
tenant_id, timeline_id = env.create_tenant(
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
# small checkpoint distance to create more delta layer files
|
||||
"checkpoint_distance": f"{1024 ** 2}",
|
||||
@@ -52,7 +52,7 @@ def test_delta_layer_writer_fail_before_finish(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
tenant_id, timeline_id = env.create_tenant(
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
# small checkpoint distance to create more delta layer files
|
||||
"checkpoint_distance": f"{1024 ** 2}",
|
||||
|
||||
@@ -56,7 +56,7 @@ def test_issue_5878(neon_env_builder: NeonEnvBuilder):
|
||||
"compaction_target_size": f"{128 * (1024**3)}", # make it so that we only have 1 partition => image coverage for delta layers => enables gc of delta layers
|
||||
}
|
||||
|
||||
tenant_id, timeline_id = env.create_tenant(conf=tenant_config)
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant(conf=tenant_config)
|
||||
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
|
||||
|
||||
@@ -219,7 +219,7 @@ def test_ondemand_wal_download_in_replication_slot_funcs(neon_env_builder: NeonE
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.create_branch("init")
|
||||
env.neon_cli.create_branch("init")
|
||||
endpoint = env.endpoints.create_start("init")
|
||||
|
||||
with endpoint.connect().cursor() as cur:
|
||||
@@ -270,7 +270,7 @@ def test_lr_with_slow_safekeeper(neon_env_builder: NeonEnvBuilder, vanilla_pg):
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.create_branch("init")
|
||||
env.neon_cli.create_branch("init")
|
||||
endpoint = env.endpoints.create_start("init")
|
||||
|
||||
with endpoint.connect().cursor() as cur:
|
||||
@@ -352,7 +352,7 @@ FROM generate_series(1, 16384) AS seq; -- Inserts enough rows to exceed 16MB of
|
||||
def test_restart_endpoint(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
env = neon_simple_env
|
||||
|
||||
env.create_branch("init")
|
||||
env.neon_cli.create_branch("init")
|
||||
endpoint = env.endpoints.create_start("init")
|
||||
tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0]
|
||||
timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
|
||||
@@ -397,7 +397,7 @@ def test_restart_endpoint(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
def test_large_records(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
env = neon_simple_env
|
||||
|
||||
env.create_branch("init")
|
||||
env.neon_cli.create_branch("init")
|
||||
endpoint = env.endpoints.create_start("init")
|
||||
|
||||
cur = endpoint.connect().cursor()
|
||||
@@ -445,7 +445,7 @@ def test_large_records(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
def test_slots_and_branching(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
|
||||
tenant, timeline = env.create_tenant()
|
||||
tenant, timeline = env.neon_cli.create_tenant()
|
||||
env.pageserver.http_client()
|
||||
|
||||
main_branch = env.endpoints.create_start("main", tenant_id=tenant)
|
||||
@@ -457,7 +457,7 @@ def test_slots_and_branching(neon_simple_env: NeonEnv):
|
||||
wait_for_last_flush_lsn(env, main_branch, tenant, timeline)
|
||||
|
||||
# Create branch ws.
|
||||
env.create_branch("ws", ancestor_branch_name="main", tenant_id=tenant)
|
||||
env.neon_cli.create_branch("ws", "main", tenant_id=tenant)
|
||||
ws_branch = env.endpoints.create_start("ws", tenant_id=tenant)
|
||||
|
||||
# Check that we can create slot with the same name
|
||||
@@ -469,10 +469,10 @@ def test_slots_and_branching(neon_simple_env: NeonEnv):
|
||||
def test_replication_shutdown(neon_simple_env: NeonEnv):
|
||||
# Ensure Postgres can exit without stuck when a replication job is active + neon extension installed
|
||||
env = neon_simple_env
|
||||
env.create_branch("test_replication_shutdown_publisher", ancestor_branch_name="main")
|
||||
env.neon_cli.create_branch("test_replication_shutdown_publisher", "main")
|
||||
pub = env.endpoints.create("test_replication_shutdown_publisher")
|
||||
|
||||
env.create_branch("test_replication_shutdown_subscriber")
|
||||
env.neon_cli.create_branch("test_replication_shutdown_subscriber")
|
||||
sub = env.endpoints.create("test_replication_shutdown_subscriber")
|
||||
|
||||
pub.respec(skip_pg_catalog_updates=False)
|
||||
@@ -575,7 +575,7 @@ def test_subscriber_synchronous_commit(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
vanilla_pg.start()
|
||||
vanilla_pg.safe_psql("create extension neon;")
|
||||
|
||||
env.create_branch("subscriber")
|
||||
env.neon_cli.create_branch("subscriber")
|
||||
sub = env.endpoints.create("subscriber")
|
||||
sub.start()
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder, with_lease: bool):
|
||||
"""
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
tenant_id, _ = env.create_tenant(
|
||||
tenant_id, _ = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
# disable default GC and compaction
|
||||
"gc_period": "1000 m",
|
||||
@@ -43,7 +43,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder, with_lease: bool):
|
||||
}
|
||||
)
|
||||
|
||||
timeline_id = env.create_branch("test_lsn_mapping", tenant_id=tenant_id)
|
||||
timeline_id = env.neon_cli.create_branch("test_lsn_mapping", tenant_id=tenant_id)
|
||||
endpoint_main = env.endpoints.create_start("test_lsn_mapping", tenant_id=tenant_id)
|
||||
timeline_id = endpoint_main.safe_psql("show neon.timeline_id")[0][0]
|
||||
|
||||
@@ -123,8 +123,8 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder, with_lease: bool):
|
||||
endpoint_here.stop_and_destroy()
|
||||
|
||||
# Do the "past" check again at a new branch to ensure that we don't return something before the branch cutoff
|
||||
timeline_id_child = env.create_branch(
|
||||
"test_lsn_mapping_child", ancestor_branch_name="test_lsn_mapping", tenant_id=tenant_id
|
||||
timeline_id_child = env.neon_cli.create_branch(
|
||||
"test_lsn_mapping_child", tenant_id=tenant_id, ancestor_branch_name="test_lsn_mapping"
|
||||
)
|
||||
|
||||
# Timestamp is in the unreachable past
|
||||
@@ -190,7 +190,7 @@ def test_ts_of_lsn_api(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
new_timeline_id = env.create_branch("test_ts_of_lsn_api")
|
||||
new_timeline_id = env.neon_cli.create_branch("test_ts_of_lsn_api")
|
||||
endpoint_main = env.endpoints.create_start("test_ts_of_lsn_api")
|
||||
|
||||
cur = endpoint_main.connect().cursor()
|
||||
|
||||
@@ -72,7 +72,9 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
|
||||
assert int(next_multixact_id) > int(next_multixact_id_old)
|
||||
|
||||
# Branch at this point
|
||||
env.create_branch("test_multixact_new", ancestor_branch_name="main", ancestor_start_lsn=lsn)
|
||||
env.neon_cli.create_branch(
|
||||
"test_multixact_new", ancestor_branch_name="main", ancestor_start_lsn=lsn
|
||||
)
|
||||
endpoint_new = env.endpoints.create_start("test_multixact_new")
|
||||
|
||||
next_multixact_id_new = endpoint_new.safe_psql(
|
||||
|
||||
@@ -31,7 +31,7 @@ def helper_compare_timeline_list(
|
||||
)
|
||||
)
|
||||
|
||||
timelines_cli = env.neon_cli.timeline_list(initial_tenant)
|
||||
timelines_cli = env.neon_cli.list_timelines(initial_tenant)
|
||||
cli_timeline_ids = sorted([timeline_id for (_, timeline_id) in timelines_cli])
|
||||
assert timelines_api == cli_timeline_ids
|
||||
|
||||
@@ -44,19 +44,17 @@ def test_cli_timeline_list(neon_simple_env: NeonEnv):
|
||||
helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)
|
||||
|
||||
# Create a branch for us
|
||||
main_timeline_id = env.create_branch("test_cli_branch_list_main")
|
||||
main_timeline_id = env.neon_cli.create_branch("test_cli_branch_list_main")
|
||||
helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)
|
||||
|
||||
# Create a nested branch
|
||||
nested_timeline_id = env.create_branch(
|
||||
"test_cli_branch_list_nested", ancestor_branch_name="test_cli_branch_list_main"
|
||||
nested_timeline_id = env.neon_cli.create_branch(
|
||||
"test_cli_branch_list_nested", "test_cli_branch_list_main"
|
||||
)
|
||||
helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)
|
||||
|
||||
# Check that all new branches are visible via CLI
|
||||
timelines_cli = [
|
||||
timeline_id for (_, timeline_id) in env.neon_cli.timeline_list(env.initial_tenant)
|
||||
]
|
||||
timelines_cli = [timeline_id for (_, timeline_id) in env.neon_cli.list_timelines()]
|
||||
|
||||
assert main_timeline_id in timelines_cli
|
||||
assert nested_timeline_id in timelines_cli
|
||||
@@ -66,7 +64,7 @@ def helper_compare_tenant_list(pageserver_http_client: PageserverHttpClient, env
|
||||
tenants = pageserver_http_client.tenant_list()
|
||||
tenants_api = sorted(map(lambda t: cast(str, t["id"]), tenants))
|
||||
|
||||
res = env.neon_cli.tenant_list()
|
||||
res = env.neon_cli.list_tenants()
|
||||
tenants_cli = sorted(map(lambda t: t.split()[0], res.stdout.splitlines()))
|
||||
|
||||
assert tenants_api == tenants_cli
|
||||
@@ -79,18 +77,18 @@ def test_cli_tenant_list(neon_simple_env: NeonEnv):
|
||||
helper_compare_tenant_list(pageserver_http_client, env)
|
||||
|
||||
# Create new tenant
|
||||
tenant1, _ = env.create_tenant()
|
||||
tenant1, _ = env.neon_cli.create_tenant()
|
||||
|
||||
# check tenant1 appeared
|
||||
helper_compare_tenant_list(pageserver_http_client, env)
|
||||
|
||||
# Create new tenant
|
||||
tenant2, _ = env.create_tenant()
|
||||
tenant2, _ = env.neon_cli.create_tenant()
|
||||
|
||||
# check tenant2 appeared
|
||||
helper_compare_tenant_list(pageserver_http_client, env)
|
||||
|
||||
res = env.neon_cli.tenant_list()
|
||||
res = env.neon_cli.list_tenants()
|
||||
tenants = sorted(map(lambda t: TenantId(t.split()[0]), res.stdout.splitlines()))
|
||||
|
||||
assert env.initial_tenant in tenants
|
||||
@@ -100,8 +98,8 @@ def test_cli_tenant_list(neon_simple_env: NeonEnv):
|
||||
|
||||
def test_cli_tenant_create(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
tenant_id, _ = env.create_tenant()
|
||||
timelines = env.neon_cli.timeline_list(tenant_id)
|
||||
tenant_id, _ = env.neon_cli.create_tenant()
|
||||
timelines = env.neon_cli.list_timelines(tenant_id)
|
||||
|
||||
# an initial timeline should be created upon tenant creation
|
||||
assert len(timelines) == 1
|
||||
@@ -134,7 +132,7 @@ def test_cli_start_stop(neon_env_builder: NeonEnvBuilder):
|
||||
env.neon_cli.pageserver_stop(env.pageserver.id)
|
||||
env.neon_cli.safekeeper_stop()
|
||||
env.neon_cli.storage_controller_stop(False)
|
||||
env.neon_cli.storage_broker_stop()
|
||||
env.neon_cli.broker_stop()
|
||||
|
||||
# Keep NeonEnv state up to date, it usually owns starting/stopping services
|
||||
env.pageserver.running = False
|
||||
@@ -177,7 +175,7 @@ def test_cli_start_stop_multi(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# Stop this to get out of the way of the following `start`
|
||||
env.neon_cli.storage_controller_stop(False)
|
||||
env.neon_cli.storage_broker_stop()
|
||||
env.neon_cli.broker_stop()
|
||||
|
||||
# Default start
|
||||
res = env.neon_cli.raw_cli(["start"])
|
||||
|
||||
@@ -8,7 +8,7 @@ from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
# Verify that the neon extension is installed and has the correct version.
|
||||
def test_neon_extension(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
env.create_branch("test_create_extension_neon")
|
||||
env.neon_cli.create_branch("test_create_extension_neon")
|
||||
|
||||
endpoint_main = env.endpoints.create("test_create_extension_neon")
|
||||
# don't skip pg_catalog updates - it runs CREATE EXTENSION neon
|
||||
@@ -35,7 +35,7 @@ def test_neon_extension(neon_env_builder: NeonEnvBuilder):
|
||||
# Verify that the neon extension can be upgraded/downgraded.
|
||||
def test_neon_extension_compatibility(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
env.create_branch("test_neon_extension_compatibility")
|
||||
env.neon_cli.create_branch("test_neon_extension_compatibility")
|
||||
|
||||
endpoint_main = env.endpoints.create("test_neon_extension_compatibility")
|
||||
# don't skip pg_catalog updates - it runs CREATE EXTENSION neon
|
||||
@@ -72,7 +72,7 @@ def test_neon_extension_compatibility(neon_env_builder: NeonEnvBuilder):
|
||||
# Verify that the neon extension can be auto-upgraded to the latest version.
|
||||
def test_neon_extension_auto_upgrade(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
env.create_branch("test_neon_extension_auto_upgrade")
|
||||
env.neon_cli.create_branch("test_neon_extension_auto_upgrade")
|
||||
|
||||
endpoint_main = env.endpoints.create("test_neon_extension_auto_upgrade")
|
||||
# don't skip pg_catalog updates - it runs CREATE EXTENSION neon
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import pytest
|
||||
from fixtures.common_types import TimelineId
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.port_distributor import PortDistributor
|
||||
|
||||
@@ -11,36 +10,22 @@ def test_neon_cli_basics(neon_env_builder: NeonEnvBuilder, port_distributor: Por
|
||||
# Skipping the init step that creates a local tenant in Pytest tests
|
||||
try:
|
||||
env.neon_cli.start()
|
||||
env.create_tenant(tenant_id=env.initial_tenant, set_default=True)
|
||||
env.neon_cli.create_tenant(tenant_id=env.initial_tenant, set_default=True)
|
||||
|
||||
main_branch_name = "main"
|
||||
pg_port = port_distributor.get_port()
|
||||
http_port = port_distributor.get_port()
|
||||
env.neon_cli.endpoint_create(
|
||||
main_branch_name,
|
||||
pg_port,
|
||||
http_port,
|
||||
endpoint_id="ep-basic-main",
|
||||
tenant_id=env.initial_tenant,
|
||||
pg_version=env.pg_version,
|
||||
main_branch_name, pg_port, http_port, endpoint_id="ep-basic-main"
|
||||
)
|
||||
env.neon_cli.endpoint_start("ep-basic-main")
|
||||
|
||||
branch_name = "migration-check"
|
||||
env.neon_cli.timeline_branch(
|
||||
tenant_id=env.initial_tenant,
|
||||
timeline_id=TimelineId.generate(),
|
||||
new_branch_name=branch_name,
|
||||
)
|
||||
env.neon_cli.create_branch(branch_name)
|
||||
pg_port = port_distributor.get_port()
|
||||
http_port = port_distributor.get_port()
|
||||
env.neon_cli.endpoint_create(
|
||||
branch_name,
|
||||
pg_port,
|
||||
http_port,
|
||||
endpoint_id=f"ep-{branch_name}",
|
||||
tenant_id=env.initial_tenant,
|
||||
pg_version=env.pg_version,
|
||||
branch_name, pg_port, http_port, endpoint_id=f"ep-{branch_name}"
|
||||
)
|
||||
env.neon_cli.endpoint_start(f"ep-{branch_name}")
|
||||
finally:
|
||||
@@ -58,26 +43,12 @@ def test_neon_two_primary_endpoints_fail(
|
||||
|
||||
pg_port = port_distributor.get_port()
|
||||
http_port = port_distributor.get_port()
|
||||
env.neon_cli.endpoint_create(
|
||||
branch_name,
|
||||
pg_port,
|
||||
http_port,
|
||||
endpoint_id="ep1",
|
||||
tenant_id=env.initial_tenant,
|
||||
pg_version=env.pg_version,
|
||||
)
|
||||
env.neon_cli.endpoint_create(branch_name, pg_port, http_port, "ep1")
|
||||
|
||||
pg_port = port_distributor.get_port()
|
||||
http_port = port_distributor.get_port()
|
||||
# ep1 is not running so create will succeed
|
||||
env.neon_cli.endpoint_create(
|
||||
branch_name,
|
||||
pg_port,
|
||||
http_port,
|
||||
endpoint_id="ep2",
|
||||
tenant_id=env.initial_tenant,
|
||||
pg_version=env.pg_version,
|
||||
)
|
||||
env.neon_cli.endpoint_create(branch_name, pg_port, http_port, "ep2")
|
||||
|
||||
env.neon_cli.endpoint_start("ep1")
|
||||
|
||||
|
||||
@@ -6,10 +6,10 @@ from fixtures.utils import wait_until
|
||||
|
||||
def test_neon_superuser(neon_simple_env: NeonEnv, pg_version: PgVersion):
|
||||
env = neon_simple_env
|
||||
env.create_branch("test_neon_superuser_publisher", ancestor_branch_name="main")
|
||||
env.neon_cli.create_branch("test_neon_superuser_publisher", "main")
|
||||
pub = env.endpoints.create("test_neon_superuser_publisher")
|
||||
|
||||
env.create_branch("test_neon_superuser_subscriber")
|
||||
env.neon_cli.create_branch("test_neon_superuser_subscriber")
|
||||
sub = env.endpoints.create("test_neon_superuser_subscriber")
|
||||
|
||||
pub.respec(skip_pg_catalog_updates=False)
|
||||
|
||||
@@ -5,7 +5,7 @@ from fixtures.pageserver.http import PageserverHttpClient
|
||||
|
||||
|
||||
def check_tenant(env: NeonEnv, pageserver_http: PageserverHttpClient):
|
||||
tenant_id, timeline_id = env.create_tenant()
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant()
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
# we rely upon autocommit after each statement
|
||||
res_1 = endpoint.safe_psql_many(
|
||||
|
||||
@@ -17,7 +17,7 @@ from fixtures.utils import print_gc_result, query_scalar
|
||||
def test_old_request_lsn(neon_env_builder: NeonEnvBuilder):
|
||||
# Disable pitr, because here we want to test branch creation after GC
|
||||
env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"})
|
||||
env.create_branch("test_old_request_lsn", ancestor_branch_name="main")
|
||||
env.neon_cli.create_branch("test_old_request_lsn", "main")
|
||||
endpoint = env.endpoints.create_start("test_old_request_lsn")
|
||||
|
||||
pg_conn = endpoint.connect()
|
||||
|
||||
@@ -545,7 +545,7 @@ def test_compaction_downloads_on_demand_without_image_creation(neon_env_builder:
|
||||
layer_sizes += layer.layer_file_size
|
||||
pageserver_http.evict_layer(tenant_id, timeline_id, layer.layer_file_name)
|
||||
|
||||
env.config_tenant(tenant_id, {"compaction_threshold": "3"})
|
||||
env.neon_cli.config_tenant(tenant_id, {"compaction_threshold": "3"})
|
||||
|
||||
pageserver_http.timeline_compact(tenant_id, timeline_id)
|
||||
layers = pageserver_http.layer_map_info(tenant_id, timeline_id)
|
||||
@@ -647,7 +647,7 @@ def test_compaction_downloads_on_demand_with_image_creation(neon_env_builder: Ne
|
||||
# layers -- threshold of 2 would sound more reasonable, but keeping it as 1
|
||||
# to be less flaky
|
||||
conf["image_creation_threshold"] = "1"
|
||||
env.config_tenant(tenant_id, {k: str(v) for k, v in conf.items()})
|
||||
env.neon_cli.config_tenant(tenant_id, {k: str(v) for k, v in conf.items()})
|
||||
|
||||
pageserver_http.timeline_compact(tenant_id, timeline_id)
|
||||
layers = pageserver_http.layer_map_info(tenant_id, timeline_id)
|
||||
|
||||
@@ -59,7 +59,7 @@ def check_client(env: NeonEnv, client: PageserverHttpClient):
|
||||
def test_pageserver_http_get_wal_receiver_not_found(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
with env.pageserver.http_client() as client:
|
||||
tenant_id, timeline_id = env.create_tenant()
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant()
|
||||
|
||||
timeline_details = client.timeline_detail(
|
||||
tenant_id=tenant_id, timeline_id=timeline_id, include_non_incremental_logical_size=True
|
||||
@@ -108,7 +108,7 @@ def expect_updated_msg_lsn(
|
||||
def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
with env.pageserver.http_client() as client:
|
||||
tenant_id, timeline_id = env.create_tenant()
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant()
|
||||
endpoint = env.endpoints.create_start(DEFAULT_BRANCH_NAME, tenant_id=tenant_id)
|
||||
|
||||
# insert something to force sk -> ps message
|
||||
|
||||
@@ -9,7 +9,7 @@ def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder)
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.create_branch("test_pageserver_catchup_while_compute_down")
|
||||
env.neon_cli.create_branch("test_pageserver_catchup_while_compute_down")
|
||||
# Make shared_buffers large to ensure we won't query pageserver while it is down.
|
||||
endpoint = env.endpoints.create_start(
|
||||
"test_pageserver_catchup_while_compute_down", config_lines=["shared_buffers=512MB"]
|
||||
|
||||
@@ -150,7 +150,7 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
|
||||
env.pageserver.start()
|
||||
env.storage_controller.node_configure(env.pageserver.id, {"availability": "Active"})
|
||||
|
||||
env.create_tenant(
|
||||
env.neon_cli.create_tenant(
|
||||
tenant_id=env.initial_tenant, conf=TENANT_CONF, timeline_id=env.initial_timeline
|
||||
)
|
||||
|
||||
@@ -643,7 +643,9 @@ def test_upgrade_generationless_local_file_paths(
|
||||
|
||||
tenant_id = TenantId.generate()
|
||||
timeline_id = TimelineId.generate()
|
||||
env.create_tenant(tenant_id, timeline_id, conf=TENANT_CONF, placement_policy='{"Attached":1}')
|
||||
env.neon_cli.create_tenant(
|
||||
tenant_id, timeline_id, conf=TENANT_CONF, placement_policy='{"Attached":1}'
|
||||
)
|
||||
|
||||
workload = Workload(env, tenant_id, timeline_id)
|
||||
workload.init()
|
||||
|
||||
@@ -42,7 +42,7 @@ async def run_worker_for_tenant(
|
||||
|
||||
|
||||
async def run_worker(env: NeonEnv, tenant_conf, entries: int) -> Tuple[TenantId, TimelineId, Lsn]:
|
||||
tenant, timeline = env.create_tenant(conf=tenant_conf)
|
||||
tenant, timeline = env.neon_cli.create_tenant(conf=tenant_conf)
|
||||
last_flush_lsn = await run_worker_for_tenant(env, entries, tenant)
|
||||
return tenant, timeline, last_flush_lsn
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ from fixtures.neon_fixtures import NeonEnv, PgBin
|
||||
# least the code gets exercised.
|
||||
def test_pageserver_reconnect(neon_simple_env: NeonEnv, pg_bin: PgBin):
|
||||
env = neon_simple_env
|
||||
env.create_branch("test_pageserver_restarts")
|
||||
env.neon_cli.create_branch("test_pageserver_restarts")
|
||||
endpoint = env.endpoints.create_start("test_pageserver_restarts")
|
||||
n_reconnects = 1000
|
||||
timeout = 0.01
|
||||
@@ -46,7 +46,7 @@ def test_pageserver_reconnect(neon_simple_env: NeonEnv, pg_bin: PgBin):
|
||||
# Test handling errors during page server reconnect
|
||||
def test_pageserver_reconnect_failure(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.create_branch("test_pageserver_reconnect")
|
||||
env.neon_cli.create_branch("test_pageserver_reconnect")
|
||||
endpoint = env.endpoints.create_start("test_pageserver_reconnect")
|
||||
|
||||
con = endpoint.connect()
|
||||
|
||||
@@ -169,7 +169,7 @@ def test_pageserver_chaos(
|
||||
|
||||
# Use a tiny checkpoint distance, to create a lot of layers quickly.
|
||||
# That allows us to stress the compaction and layer flushing logic more.
|
||||
tenant, _ = env.create_tenant(
|
||||
tenant, _ = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
"checkpoint_distance": "5000000",
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ from fixtures.neon_fixtures import NeonEnv, PgBin
|
||||
# running.
|
||||
def test_pageserver_restarts_under_worload(neon_simple_env: NeonEnv, pg_bin: PgBin):
|
||||
env = neon_simple_env
|
||||
env.create_branch("test_pageserver_restarts")
|
||||
env.neon_cli.create_branch("test_pageserver_restarts")
|
||||
endpoint = env.endpoints.create_start("test_pageserver_restarts")
|
||||
n_restarts = 10
|
||||
scale = 10
|
||||
|
||||
@@ -650,7 +650,7 @@ def test_secondary_background_downloads(neon_env_builder: NeonEnvBuilder):
|
||||
tenant_id = TenantId.generate()
|
||||
timeline_a = TimelineId.generate()
|
||||
timeline_b = TimelineId.generate()
|
||||
env.create_tenant(
|
||||
env.neon_cli.create_tenant(
|
||||
tenant_id,
|
||||
timeline_a,
|
||||
placement_policy='{"Attached":1}',
|
||||
@@ -658,7 +658,7 @@ def test_secondary_background_downloads(neon_env_builder: NeonEnvBuilder):
|
||||
# to trigger the upload promptly.
|
||||
conf={"heatmap_period": f"{upload_period_secs}s"},
|
||||
)
|
||||
env.create_timeline("main2", tenant_id, timeline_b)
|
||||
env.neon_cli.create_timeline("main2", tenant_id, timeline_b)
|
||||
|
||||
tenant_timelines[tenant_id] = [timeline_a, timeline_b]
|
||||
|
||||
@@ -778,7 +778,9 @@ def test_slow_secondary_downloads(neon_env_builder: NeonEnvBuilder, via_controll
|
||||
tenant_id = TenantId.generate()
|
||||
timeline_id = TimelineId.generate()
|
||||
|
||||
env.create_tenant(tenant_id, timeline_id, conf=TENANT_CONF, placement_policy='{"Attached":1}')
|
||||
env.neon_cli.create_tenant(
|
||||
tenant_id, timeline_id, conf=TENANT_CONF, placement_policy='{"Attached":1}'
|
||||
)
|
||||
|
||||
attached_to_id = env.storage_controller.locate(tenant_id)[0]["node_id"]
|
||||
ps_attached = env.get_pageserver(attached_to_id)
|
||||
|
||||
@@ -57,7 +57,7 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# Branch at the point where only 100 rows were inserted
|
||||
# It must have been preserved by PITR setting
|
||||
env.create_branch("test_pitr_gc_hundred", ancestor_branch_name="main", ancestor_start_lsn=lsn_a)
|
||||
env.neon_cli.create_branch("test_pitr_gc_hundred", "main", ancestor_start_lsn=lsn_a)
|
||||
|
||||
endpoint_hundred = env.endpoints.create_start("test_pitr_gc_hundred")
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
|
||||
)
|
||||
|
||||
# Create a branch for us
|
||||
env.create_branch("test_pageserver_recovery", ancestor_branch_name="main")
|
||||
env.neon_cli.create_branch("test_pageserver_recovery", "main")
|
||||
|
||||
endpoint = env.endpoints.create_start("test_pageserver_recovery")
|
||||
|
||||
|
||||
@@ -230,7 +230,7 @@ def test_remote_storage_upload_queue_retries(
|
||||
|
||||
# create tenant with config that will determinstically allow
|
||||
# compaction and gc
|
||||
tenant_id, timeline_id = env.create_tenant(
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
# small checkpointing and compaction targets to ensure we generate many upload operations
|
||||
"checkpoint_distance": f"{64 * 1024}",
|
||||
@@ -640,9 +640,7 @@ def test_empty_branch_remote_storage_upload(neon_env_builder: NeonEnvBuilder):
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
new_branch_name = "new_branch"
|
||||
new_branch_timeline_id = env.create_branch(
|
||||
new_branch_name, ancestor_branch_name="main", tenant_id=env.initial_tenant
|
||||
)
|
||||
new_branch_timeline_id = env.neon_cli.create_branch(new_branch_name, "main", env.initial_tenant)
|
||||
assert_nothing_to_upload(client, env.initial_tenant, new_branch_timeline_id)
|
||||
|
||||
timelines_before_detach = set(
|
||||
|
||||
@@ -60,7 +60,9 @@ def test_tenant_s3_restore(
|
||||
last_flush_lsns = []
|
||||
|
||||
for timeline in ["first", "second"]:
|
||||
timeline_id = env.create_branch(timeline, ancestor_branch_name=parent, tenant_id=tenant_id)
|
||||
timeline_id = env.neon_cli.create_branch(
|
||||
timeline, tenant_id=tenant_id, ancestor_branch_name=parent
|
||||
)
|
||||
with env.endpoints.create_start(timeline, tenant_id=tenant_id) as endpoint:
|
||||
run_pg_bench_small(pg_bin, endpoint.connstr())
|
||||
endpoint.safe_psql(f"CREATE TABLE created_{timeline}(id integer);")
|
||||
|
||||
@@ -77,7 +77,7 @@ def test_sharding_smoke(
|
||||
assert all(s < expect_initdb_size // 2 for s in sizes.values())
|
||||
|
||||
# Test that timeline creation works on a sharded tenant
|
||||
timeline_b = env.create_branch("branch_b", tenant_id=tenant_id)
|
||||
timeline_b = env.neon_cli.create_branch("branch_b", tenant_id=tenant_id)
|
||||
|
||||
# Test that we can write data to a sharded tenant
|
||||
workload = Workload(env, tenant_id, timeline_b, branch_name="branch_b")
|
||||
@@ -378,7 +378,7 @@ def test_sharding_split_smoke(
|
||||
env.start()
|
||||
tenant_id = TenantId.generate()
|
||||
timeline_id = TimelineId.generate()
|
||||
env.create_tenant(
|
||||
env.neon_cli.create_tenant(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
shard_count=shard_count,
|
||||
@@ -1127,7 +1127,7 @@ def test_sharding_split_failures(
|
||||
timeline_id = TimelineId.generate()
|
||||
|
||||
# Create a tenant with secondary locations enabled
|
||||
env.create_tenant(
|
||||
env.neon_cli.create_tenant(
|
||||
tenant_id, timeline_id, shard_count=initial_shard_count, placement_policy='{"Attached":1}'
|
||||
)
|
||||
|
||||
@@ -1441,7 +1441,7 @@ def test_sharding_unlogged_relation(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
tenant_id = TenantId.generate()
|
||||
timeline_id = TimelineId.generate()
|
||||
env.create_tenant(tenant_id, timeline_id, shard_count=8)
|
||||
env.neon_cli.create_tenant(tenant_id, timeline_id, shard_count=8)
|
||||
|
||||
# We will create many tables to ensure it's overwhelmingly likely that at least one
|
||||
# of them doesn't land on shard 0
|
||||
@@ -1483,7 +1483,7 @@ def test_top_tenants(neon_env_builder: NeonEnvBuilder):
|
||||
for i in range(0, n_tenants):
|
||||
tenant_id = TenantId.generate()
|
||||
timeline_id = TimelineId.generate()
|
||||
env.create_tenant(tenant_id, timeline_id)
|
||||
env.neon_cli.create_tenant(tenant_id, timeline_id)
|
||||
|
||||
# Write a different amount of data to each tenant
|
||||
w = Workload(env, tenant_id, timeline_id)
|
||||
|
||||
@@ -96,7 +96,7 @@ def test_storage_controller_smoke(
|
||||
|
||||
# Creating several tenants should spread out across the pageservers
|
||||
for tid in tenant_ids:
|
||||
env.create_tenant(tid, shard_count=shards_per_tenant)
|
||||
env.neon_cli.create_tenant(tid, shard_count=shards_per_tenant)
|
||||
|
||||
# Repeating a creation should be idempotent (we are just testing it doesn't return an error)
|
||||
env.storage_controller.tenant_create(
|
||||
@@ -172,7 +172,7 @@ def test_storage_controller_smoke(
|
||||
# Create some fresh tenants
|
||||
tenant_ids = set(TenantId.generate() for i in range(0, tenant_count))
|
||||
for tid in tenant_ids:
|
||||
env.create_tenant(tid, shard_count=shards_per_tenant)
|
||||
env.neon_cli.create_tenant(tid, shard_count=shards_per_tenant)
|
||||
|
||||
counts = get_node_shard_counts(env, tenant_ids)
|
||||
# Nothing should have been scheduled on the node in Draining
|
||||
@@ -806,7 +806,10 @@ def test_storage_controller_s3_time_travel_recovery(
|
||||
env.storage_controller.consistency_check()
|
||||
|
||||
branch_name = "main"
|
||||
timeline_id = env.create_timeline(branch_name, tenant_id=tenant_id)
|
||||
timeline_id = env.neon_cli.create_timeline(
|
||||
branch_name,
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
# Write some nontrivial amount of data into the endpoint and wait until it is uploaded
|
||||
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
|
||||
run_pg_bench_small(pg_bin, endpoint.connstr())
|
||||
@@ -1006,7 +1009,9 @@ def test_storage_controller_tenant_deletion(
|
||||
|
||||
tenant_id = TenantId.generate()
|
||||
timeline_id = TimelineId.generate()
|
||||
env.create_tenant(tenant_id, timeline_id, shard_count=2, placement_policy='{"Attached":1}')
|
||||
env.neon_cli.create_tenant(
|
||||
tenant_id, timeline_id, shard_count=2, placement_policy='{"Attached":1}'
|
||||
)
|
||||
|
||||
# Ensure all the locations are configured, including secondaries
|
||||
env.storage_controller.reconcile_until_idle()
|
||||
@@ -1212,7 +1217,10 @@ def test_storage_controller_heartbeats(
|
||||
env.storage_controller.tenant_create(tid)
|
||||
|
||||
branch_name = "main"
|
||||
env.create_timeline(branch_name, tenant_id=tid)
|
||||
env.neon_cli.create_timeline(
|
||||
branch_name,
|
||||
tenant_id=tid,
|
||||
)
|
||||
|
||||
with env.endpoints.create_start("main", tenant_id=tid) as endpoint:
|
||||
run_pg_bench_small(pg_bin, endpoint.connstr())
|
||||
@@ -1314,9 +1322,9 @@ def test_storage_controller_re_attach(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# We'll have two tenants.
|
||||
tenant_a = TenantId.generate()
|
||||
env.create_tenant(tenant_a, placement_policy='{"Attached":1}')
|
||||
env.neon_cli.create_tenant(tenant_a, placement_policy='{"Attached":1}')
|
||||
tenant_b = TenantId.generate()
|
||||
env.create_tenant(tenant_b, placement_policy='{"Attached":1}')
|
||||
env.neon_cli.create_tenant(tenant_b, placement_policy='{"Attached":1}')
|
||||
|
||||
# Each pageserver will have one attached and one secondary location
|
||||
env.storage_controller.tenant_shard_migrate(
|
||||
@@ -1639,7 +1647,7 @@ def test_tenant_import(neon_env_builder: NeonEnvBuilder, shard_count, remote_sto
|
||||
|
||||
# Create a second timeline to ensure that import finds both
|
||||
timeline_a = env.initial_timeline
|
||||
timeline_b = env.create_branch("branch_b", tenant_id=tenant_id)
|
||||
timeline_b = env.neon_cli.create_branch("branch_b", tenant_id=tenant_id)
|
||||
|
||||
workload_a = Workload(env, tenant_id, timeline_a, branch_name="main")
|
||||
workload_a.init()
|
||||
@@ -1681,7 +1689,7 @@ def test_tenant_import(neon_env_builder: NeonEnvBuilder, shard_count, remote_sto
|
||||
)
|
||||
|
||||
# Now import it again
|
||||
env.neon_cli.tenant_import(tenant_id)
|
||||
env.neon_cli.import_tenant(tenant_id)
|
||||
|
||||
# Check we found the shards
|
||||
describe = env.storage_controller.tenant_describe(tenant_id)
|
||||
@@ -1723,7 +1731,7 @@ def test_graceful_cluster_restart(neon_env_builder: NeonEnvBuilder):
|
||||
for _ in range(0, tenant_count):
|
||||
tid = TenantId.generate()
|
||||
tenant_ids.append(tid)
|
||||
env.create_tenant(
|
||||
env.neon_cli.create_tenant(
|
||||
tid, placement_policy='{"Attached":1}', shard_count=shard_count_per_tenant
|
||||
)
|
||||
|
||||
@@ -1810,7 +1818,7 @@ def test_skip_drain_on_secondary_lag(neon_env_builder: NeonEnvBuilder, pg_bin: P
|
||||
env = neon_env_builder.init_configs()
|
||||
env.start()
|
||||
|
||||
tid, timeline_id = env.create_tenant(placement_policy='{"Attached":1}')
|
||||
tid, timeline_id = env.neon_cli.create_tenant(placement_policy='{"Attached":1}')
|
||||
|
||||
# Give things a chance to settle.
|
||||
env.storage_controller.reconcile_until_idle(timeout_secs=30)
|
||||
@@ -1916,7 +1924,7 @@ def test_background_operation_cancellation(neon_env_builder: NeonEnvBuilder):
|
||||
for _ in range(0, tenant_count):
|
||||
tid = TenantId.generate()
|
||||
tenant_ids.append(tid)
|
||||
env.create_tenant(
|
||||
env.neon_cli.create_tenant(
|
||||
tid, placement_policy='{"Attached":1}', shard_count=shard_count_per_tenant
|
||||
)
|
||||
|
||||
@@ -1976,7 +1984,7 @@ def test_storage_controller_node_deletion(
|
||||
for _ in range(0, tenant_count):
|
||||
tid = TenantId.generate()
|
||||
tenant_ids.append(tid)
|
||||
env.create_tenant(
|
||||
env.neon_cli.create_tenant(
|
||||
tid, placement_policy='{"Attached":1}', shard_count=shard_count_per_tenant
|
||||
)
|
||||
|
||||
@@ -2101,7 +2109,7 @@ def test_storage_controller_metadata_health(
|
||||
)
|
||||
|
||||
# Mock tenant with unhealthy scrubber scan result
|
||||
tenant_b, _ = env.create_tenant(shard_count=shard_count)
|
||||
tenant_b, _ = env.neon_cli.create_tenant(shard_count=shard_count)
|
||||
tenant_b_shard_ids = (
|
||||
env.storage_controller.tenant_shard_split(tenant_b, shard_count=shard_count)
|
||||
if shard_count is not None
|
||||
@@ -2109,7 +2117,7 @@ def test_storage_controller_metadata_health(
|
||||
)
|
||||
|
||||
# Mock tenant that never gets a health update from scrubber
|
||||
tenant_c, _ = env.create_tenant(shard_count=shard_count)
|
||||
tenant_c, _ = env.neon_cli.create_tenant(shard_count=shard_count)
|
||||
|
||||
tenant_c_shard_ids = (
|
||||
env.storage_controller.tenant_shard_split(tenant_c, shard_count=shard_count)
|
||||
@@ -2509,7 +2517,7 @@ def test_storage_controller_validate_during_migration(neon_env_builder: NeonEnvB
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
env.create_tenant(tenant_id, timeline_id)
|
||||
env.neon_cli.create_tenant(tenant_id, timeline_id)
|
||||
env.storage_controller.pageserver_api().set_tenant_config(tenant_id, TENANT_CONF)
|
||||
|
||||
# Write enough data that a compaction would do some work (deleting some L0s)
|
||||
@@ -2644,7 +2652,7 @@ def test_storage_controller_proxy_during_migration(
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
env.create_tenant(tenant_id, timeline_id)
|
||||
env.neon_cli.create_tenant(tenant_id, timeline_id)
|
||||
|
||||
# The test stalls a reconcile on purpose to check if the long running
|
||||
# reconcile alert fires.
|
||||
@@ -2823,7 +2831,7 @@ def test_shard_preferred_azs(neon_env_builder: NeonEnvBuilder):
|
||||
# Generate a layer to avoid shard split handling on ps from tripping
|
||||
# up on debug assert.
|
||||
timeline_id = TimelineId.generate()
|
||||
env.create_timeline("bar", tids[0], timeline_id)
|
||||
env.neon_cli.create_timeline("bar", tids[0], timeline_id)
|
||||
|
||||
workload = Workload(env, tids[0], timeline_id, branch_name="bar")
|
||||
workload.init()
|
||||
@@ -2911,97 +2919,3 @@ def test_timeline_delete_mid_live_migration(neon_env_builder: NeonEnvBuilder, mi
|
||||
# Always disable 'pause' failpoints, even on failure, to avoid hanging in shutdown
|
||||
env.storage_controller.configure_failpoints((migration_failpoint.value, "off"))
|
||||
raise
|
||||
|
||||
|
||||
@run_only_on_default_postgres("Postgres version makes no difference here")
|
||||
@pytest.mark.parametrize(
|
||||
"migration_failpoint",
|
||||
[
|
||||
MigrationFailpoints.PRE_GENERATION_INC,
|
||||
MigrationFailpoints.POST_NOTIFY,
|
||||
MigrationFailpoints.POST_DETACH,
|
||||
],
|
||||
)
|
||||
def test_multi_attached_timeline_creation(neon_env_builder: NeonEnvBuilder, migration_failpoint):
|
||||
neon_env_builder.num_pageservers = 2
|
||||
env = neon_env_builder.init_configs()
|
||||
env.start()
|
||||
|
||||
tenant_id = TenantId.generate()
|
||||
env.storage_controller.tenant_create(tenant_id, placement_policy={"Attached": 1})
|
||||
|
||||
shard_zero = TenantShardId(tenant_id, 0, 0)
|
||||
locations = env.storage_controller.get_tenants_placement()[str(shard_zero)]
|
||||
|
||||
assert locations["observed"] == locations["intent"]
|
||||
assert locations["observed"]["attached"] is not None
|
||||
assert len(locations["observed"]["secondary"]) > 0
|
||||
|
||||
attached_location = locations["observed"]["attached"]
|
||||
secondary_location = locations["observed"]["secondary"][0]
|
||||
|
||||
env.storage_controller.configure_failpoints((migration_failpoint.value, "pause"))
|
||||
|
||||
try:
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
||||
migrate_fut = executor.submit(
|
||||
env.storage_controller.tenant_shard_migrate,
|
||||
shard_zero,
|
||||
secondary_location,
|
||||
)
|
||||
|
||||
def has_hit_migration_failpoint():
|
||||
expr = f"at failpoint {migration_failpoint.value}"
|
||||
log.info(expr)
|
||||
assert env.storage_controller.log_contains(expr)
|
||||
|
||||
wait_until(10, 1, has_hit_migration_failpoint)
|
||||
|
||||
timeline_id = TimelineId.generate()
|
||||
env.storage_controller.pageserver_api().timeline_create(
|
||||
pg_version=PgVersion.NOT_SET, tenant_id=tenant_id, new_timeline_id=timeline_id
|
||||
)
|
||||
|
||||
# Timeline creation only goes to the origin.
|
||||
if migration_failpoint == MigrationFailpoints.PRE_GENERATION_INC:
|
||||
client = env.get_pageserver(attached_location).http_client()
|
||||
assert timeline_id in {
|
||||
TimelineId(b["timeline_id"]) for b in client.timeline_list(tenant_id)
|
||||
}, f"new timeline not found on {attached_location}"
|
||||
|
||||
with pytest.raises(PageserverApiException) as exc:
|
||||
env.get_pageserver(secondary_location).http_client().timeline_list(tenant_id)
|
||||
assert exc.value.status_code == 404
|
||||
|
||||
# Timeline creations goes to both attached locations
|
||||
if migration_failpoint == MigrationFailpoints.POST_NOTIFY:
|
||||
for node_id in [attached_location, secondary_location]:
|
||||
client = env.get_pageserver(node_id).http_client()
|
||||
assert timeline_id in {
|
||||
TimelineId(b["timeline_id"]) for b in client.timeline_list(tenant_id)
|
||||
}, f"new timeline not found on {node_id}"
|
||||
|
||||
# Timeline creation goes both locations, but storcon gets a 404 from the origin
|
||||
# which it ignores.
|
||||
if migration_failpoint == MigrationFailpoints.POST_DETACH:
|
||||
client = env.get_pageserver(secondary_location).http_client()
|
||||
assert timeline_id in {
|
||||
TimelineId(b["timeline_id"]) for b in client.timeline_list(tenant_id)
|
||||
}, f"new timeline not found on {attached_location}"
|
||||
|
||||
with pytest.raises(PageserverApiException) as exc:
|
||||
env.get_pageserver(attached_location).http_client().timeline_list(tenant_id)
|
||||
assert exc.value.status_code == 404
|
||||
|
||||
# Eventually migration completes
|
||||
env.storage_controller.configure_failpoints((migration_failpoint.value, "off"))
|
||||
migrate_fut.result()
|
||||
|
||||
# Ensure that we detached from the old attached location
|
||||
with pytest.raises(PageserverApiException) as exc:
|
||||
env.get_pageserver(attached_location).http_client().timeline_list(tenant_id)
|
||||
assert exc.value.status_code == 404
|
||||
except:
|
||||
# Always disable 'pause' failpoints, even on failure, to avoid hanging in shutdown
|
||||
env.storage_controller.configure_failpoints((migration_failpoint.value, "off"))
|
||||
raise
|
||||
|
||||
@@ -135,7 +135,7 @@ def test_scrubber_physical_gc(neon_env_builder: NeonEnvBuilder, shard_count: Opt
|
||||
|
||||
tenant_id = TenantId.generate()
|
||||
timeline_id = TimelineId.generate()
|
||||
env.create_tenant(tenant_id, timeline_id, shard_count=shard_count)
|
||||
env.neon_cli.create_tenant(tenant_id, timeline_id, shard_count=shard_count)
|
||||
|
||||
workload = Workload(env, tenant_id, timeline_id)
|
||||
workload.init()
|
||||
@@ -185,7 +185,7 @@ def test_scrubber_physical_gc_ancestors(
|
||||
|
||||
tenant_id = TenantId.generate()
|
||||
timeline_id = TimelineId.generate()
|
||||
env.create_tenant(
|
||||
env.neon_cli.create_tenant(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
shard_count=shard_count,
|
||||
@@ -303,7 +303,7 @@ def test_scrubber_physical_gc_timeline_deletion(neon_env_builder: NeonEnvBuilder
|
||||
|
||||
tenant_id = TenantId.generate()
|
||||
timeline_id = TimelineId.generate()
|
||||
env.create_tenant(
|
||||
env.neon_cli.create_tenant(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
shard_count=None,
|
||||
@@ -385,7 +385,7 @@ def test_scrubber_physical_gc_ancestors_split(neon_env_builder: NeonEnvBuilder):
|
||||
tenant_id = TenantId.generate()
|
||||
timeline_id = TimelineId.generate()
|
||||
initial_shard_count = 2
|
||||
env.create_tenant(
|
||||
env.neon_cli.create_tenant(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
shard_count=initial_shard_count,
|
||||
|
||||
@@ -9,11 +9,11 @@ from fixtures.utils import wait_until
|
||||
# It requires tracking information about replication origins at page server side
|
||||
def test_subscriber_restart(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.create_branch("publisher")
|
||||
env.neon_cli.create_branch("publisher")
|
||||
pub = env.endpoints.create("publisher")
|
||||
pub.start()
|
||||
|
||||
sub_timeline_id = env.create_branch("subscriber")
|
||||
sub_timeline_id = env.neon_cli.create_branch("subscriber")
|
||||
sub = env.endpoints.create("subscriber")
|
||||
sub.start()
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ def test_tenant_config(neon_env_builder: NeonEnvBuilder):
|
||||
# Check that we raise on misspelled configs
|
||||
invalid_conf_key = "some_invalid_setting_name_blah_blah_123"
|
||||
try:
|
||||
env.create_tenant(
|
||||
env.neon_cli.create_tenant(
|
||||
conf={
|
||||
invalid_conf_key: "20000",
|
||||
}
|
||||
@@ -54,9 +54,9 @@ def test_tenant_config(neon_env_builder: NeonEnvBuilder):
|
||||
"evictions_low_residence_duration_metric_threshold": "42s",
|
||||
"eviction_policy": json.dumps({"kind": "NoEviction"}),
|
||||
}
|
||||
tenant, _ = env.create_tenant(conf=new_conf)
|
||||
tenant, _ = env.neon_cli.create_tenant(conf=new_conf)
|
||||
|
||||
env.create_timeline("test_tenant_conf", tenant_id=tenant)
|
||||
env.neon_cli.create_timeline("test_tenant_conf", tenant_id=tenant)
|
||||
env.endpoints.create_start("test_tenant_conf", "main", tenant)
|
||||
|
||||
# check the configuration of the default tenant
|
||||
@@ -121,7 +121,10 @@ def test_tenant_config(neon_env_builder: NeonEnvBuilder):
|
||||
),
|
||||
"max_lsn_wal_lag": "13000000",
|
||||
}
|
||||
env.config_tenant(tenant_id=tenant, conf=conf_update)
|
||||
env.neon_cli.config_tenant(
|
||||
tenant_id=tenant,
|
||||
conf=conf_update,
|
||||
)
|
||||
|
||||
updated_tenant_config = http_client.tenant_config(tenant_id=tenant)
|
||||
updated_specific_config = updated_tenant_config.tenant_specific_overrides
|
||||
@@ -169,8 +172,10 @@ def test_tenant_config(neon_env_builder: NeonEnvBuilder):
|
||||
final_conf = {
|
||||
"pitr_interval": "1 min",
|
||||
}
|
||||
env.config_tenant(tenant_id=tenant, conf=final_conf)
|
||||
|
||||
env.neon_cli.config_tenant(
|
||||
tenant_id=tenant,
|
||||
conf=final_conf,
|
||||
)
|
||||
final_tenant_config = http_client.tenant_config(tenant_id=tenant)
|
||||
final_specific_config = final_tenant_config.tenant_specific_overrides
|
||||
assert final_specific_config["pitr_interval"] == "1m"
|
||||
@@ -213,7 +218,7 @@ def test_creating_tenant_conf_after_attach(neon_env_builder: NeonEnvBuilder):
|
||||
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
|
||||
|
||||
# tenant is created with defaults, as in without config file
|
||||
(tenant_id, timeline_id) = env.create_tenant()
|
||||
(tenant_id, timeline_id) = env.neon_cli.create_tenant()
|
||||
config_path = env.pageserver.tenant_dir(tenant_id) / "config-v1"
|
||||
|
||||
http_client = env.pageserver.http_client()
|
||||
@@ -235,9 +240,9 @@ def test_creating_tenant_conf_after_attach(neon_env_builder: NeonEnvBuilder):
|
||||
func=lambda: assert_tenant_state(http_client, tenant_id, "Active"),
|
||||
)
|
||||
|
||||
env.config_tenant(tenant_id, {"gc_horizon": "1000000"})
|
||||
env.neon_cli.config_tenant(tenant_id, {"gc_horizon": "1000000"})
|
||||
contents_first = config_path.read_text()
|
||||
env.config_tenant(tenant_id, {"gc_horizon": "0"})
|
||||
env.neon_cli.config_tenant(tenant_id, {"gc_horizon": "0"})
|
||||
contents_later = config_path.read_text()
|
||||
|
||||
# dont test applying the setting here, we have that another test case to show it
|
||||
@@ -293,7 +298,7 @@ def test_live_reconfig_get_evictions_low_residence_duration_metric_threshold(
|
||||
metric = get_metric()
|
||||
assert int(metric.value) > 0, "metric is updated"
|
||||
|
||||
env.config_tenant(
|
||||
env.neon_cli.config_tenant(
|
||||
tenant_id, {"evictions_low_residence_duration_metric_threshold": default_value}
|
||||
)
|
||||
updated_metric = get_metric()
|
||||
@@ -301,7 +306,9 @@ def test_live_reconfig_get_evictions_low_residence_duration_metric_threshold(
|
||||
metric.value
|
||||
), "metric is unchanged when setting same value"
|
||||
|
||||
env.config_tenant(tenant_id, {"evictions_low_residence_duration_metric_threshold": "2day"})
|
||||
env.neon_cli.config_tenant(
|
||||
tenant_id, {"evictions_low_residence_duration_metric_threshold": "2day"}
|
||||
)
|
||||
metric = get_metric()
|
||||
assert int(metric.labels["low_threshold_secs"]) == 2 * 24 * 60 * 60
|
||||
assert int(metric.value) == 0
|
||||
@@ -313,7 +320,9 @@ def test_live_reconfig_get_evictions_low_residence_duration_metric_threshold(
|
||||
assert int(metric.labels["low_threshold_secs"]) == 2 * 24 * 60 * 60
|
||||
assert int(metric.value) > 0
|
||||
|
||||
env.config_tenant(tenant_id, {"evictions_low_residence_duration_metric_threshold": "2h"})
|
||||
env.neon_cli.config_tenant(
|
||||
tenant_id, {"evictions_low_residence_duration_metric_threshold": "2h"}
|
||||
)
|
||||
metric = get_metric()
|
||||
assert int(metric.labels["low_threshold_secs"]) == 2 * 60 * 60
|
||||
assert int(metric.value) == 0, "value resets if label changes"
|
||||
@@ -325,7 +334,7 @@ def test_live_reconfig_get_evictions_low_residence_duration_metric_threshold(
|
||||
assert int(metric.labels["low_threshold_secs"]) == 2 * 60 * 60
|
||||
assert int(metric.value) > 0, "set a non-zero value for next step"
|
||||
|
||||
env.config_tenant(tenant_id, {})
|
||||
env.neon_cli.config_tenant(tenant_id, {})
|
||||
metric = get_metric()
|
||||
assert int(metric.labels["low_threshold_secs"]) == 24 * 60 * 60, "label resets to default"
|
||||
assert int(metric.value) == 0, "value resets to default"
|
||||
|
||||
@@ -78,7 +78,7 @@ def test_tenant_delete_smoke(
|
||||
# may need to retry on some remote storage errors injected by the test harness
|
||||
error_tolerant_delete(ps_http, tenant_id)
|
||||
|
||||
env.create_tenant(
|
||||
env.neon_cli.create_tenant(
|
||||
tenant_id=tenant_id,
|
||||
conf=many_small_layers_tenant_config(),
|
||||
)
|
||||
@@ -89,7 +89,9 @@ def test_tenant_delete_smoke(
|
||||
# create two timelines one being the parent of another
|
||||
parent = None
|
||||
for timeline in ["first", "second"]:
|
||||
timeline_id = env.create_branch(timeline, ancestor_branch_name=parent, tenant_id=tenant_id)
|
||||
timeline_id = env.neon_cli.create_branch(
|
||||
timeline, tenant_id=tenant_id, ancestor_branch_name=parent
|
||||
)
|
||||
with env.endpoints.create_start(timeline, tenant_id=tenant_id) as endpoint:
|
||||
run_pg_bench_small(pg_bin, endpoint.connstr())
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant=tenant_id, timeline=timeline_id)
|
||||
@@ -337,7 +339,7 @@ def test_tenant_delete_scrubber(pg_bin: PgBin, make_httpserver, neon_env_builder
|
||||
ps_http = env.pageserver.http_client()
|
||||
# create a tenant separate from the main tenant so that we have one remaining
|
||||
# after we deleted it, as the scrubber treats empty buckets as an error.
|
||||
(tenant_id, timeline_id) = env.create_tenant()
|
||||
(tenant_id, timeline_id) = env.neon_cli.create_tenant()
|
||||
|
||||
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
|
||||
run_pg_bench_small(pg_bin, endpoint.connstr())
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user