mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-25 14:20:38 +00:00
Compare commits
1 Commits
release-pr
...
add-pg_tra
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
791f181034 |
@@ -1388,6 +1388,38 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_partman.control
|
||||
|
||||
#########################################################################################
|
||||
# Layer "pg_tracing"
|
||||
# compile pg_tracing extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg_tracing-src
|
||||
ARG PG_VERSION
|
||||
WORKDIR /ext-src
|
||||
RUN case "${PG_VERSION:?}" in \
|
||||
"v14" | "v15") \
|
||||
echo "pg_tracing not supported on this PostgreSQL version." && exit 0 \
|
||||
;; \
|
||||
*) \
|
||||
;; \
|
||||
esac && \
|
||||
wget https://github.com/DataDog/pg_tracing/archive/refs/tags/v0.1.3.tar.gz -O pg_tracing.tar.gz && \
|
||||
echo "d0a7cca7279bb29601ba6c4c1aaeb3a44d71e6afa3b78aae1e3b7269e688f907 pg_tracing.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_tracing-src && cd pg_tracing-src && tar xzf ../pg_tracing.tar.gz --strip-components=1 -C .
|
||||
|
||||
FROM pg-build AS pg_tracing-build
|
||||
COPY --from=pg_tracing-src /ext-src/ /ext-src/
|
||||
WORKDIR /ext-src/pg_tracing-src
|
||||
RUN case "${PG_VERSION:?}" in \
|
||||
"v14" | "v15") \
|
||||
echo "pg_tracing not supported on this PostgreSQL version." && exit 0 \
|
||||
;; \
|
||||
*) \
|
||||
;; \
|
||||
esac && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg_mooncake"
|
||||
@@ -1617,6 +1649,7 @@ COPY --from=pg_semver-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=wal2json-build /usr/local/pgsql /usr/local/pgsql
|
||||
COPY --from=pg_ivm-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_tracing-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_duckdb-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_repack-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
@@ -1793,6 +1826,7 @@ COPY --from=pg_semver-src /ext-src/ /ext-src/
|
||||
#COPY --from=wal2json-src /ext-src/ /ext-src/
|
||||
COPY --from=pg_ivm-src /ext-src/ /ext-src/
|
||||
COPY --from=pg_partman-src /ext-src/ /ext-src/
|
||||
COPY --from=pg_tracing-src /ext-src/ /ext-src/
|
||||
#COPY --from=pg_mooncake-src /ext-src/ /ext-src/
|
||||
COPY --from=pg_repack-src /ext-src/ /ext-src/
|
||||
COPY --from=pg_repack-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
@@ -80,22 +80,10 @@ pub enum TenantState {
|
||||
///
|
||||
/// Transitions out of this state are possible through `set_broken()`.
|
||||
Stopping {
|
||||
/// The barrier can be used to wait for shutdown to complete. The first caller to set
|
||||
/// Some(Barrier) is responsible for driving shutdown to completion. Subsequent callers
|
||||
/// will wait for the first caller's existing barrier.
|
||||
///
|
||||
/// None is set when an attach is cancelled, to signal to shutdown that the attach has in
|
||||
/// fact cancelled:
|
||||
///
|
||||
/// 1. `shutdown` sees `TenantState::Attaching`, and cancels the tenant.
|
||||
/// 2. `attach` sets `TenantState::Stopping(None)` and exits.
|
||||
/// 3. `set_stopping` waits for `TenantState::Stopping(None)` and sets
|
||||
/// `TenantState::Stopping(Some)` to claim the barrier as the shutdown owner.
|
||||
//
|
||||
// Because of https://github.com/serde-rs/serde/issues/2105 this has to be a named field,
|
||||
// otherwise it will not be skipped during deserialization
|
||||
#[serde(skip)]
|
||||
progress: Option<completion::Barrier>,
|
||||
progress: completion::Barrier,
|
||||
},
|
||||
/// The tenant is recognized by the pageserver, but can no longer be used for
|
||||
/// any operations.
|
||||
@@ -2731,15 +2719,10 @@ mod tests {
|
||||
"Activating",
|
||||
),
|
||||
(line!(), TenantState::Active, "Active"),
|
||||
(
|
||||
line!(),
|
||||
TenantState::Stopping { progress: None },
|
||||
"Stopping",
|
||||
),
|
||||
(
|
||||
line!(),
|
||||
TenantState::Stopping {
|
||||
progress: Some(completion::Barrier::default()),
|
||||
progress: utils::completion::Barrier::default(),
|
||||
},
|
||||
"Stopping",
|
||||
),
|
||||
|
||||
@@ -45,7 +45,6 @@ use remote_timeline_client::manifest::{
|
||||
};
|
||||
use remote_timeline_client::{
|
||||
FAILED_REMOTE_OP_RETRIES, FAILED_UPLOAD_WARN_THRESHOLD, UploadQueueNotReadyError,
|
||||
download_tenant_manifest,
|
||||
};
|
||||
use secondary::heatmap::{HeatMapTenant, HeatMapTimeline};
|
||||
use storage_broker::BrokerClientChannel;
|
||||
@@ -227,8 +226,7 @@ struct TimelinePreload {
|
||||
}
|
||||
|
||||
pub(crate) struct TenantPreload {
|
||||
/// The tenant manifest from remote storage, or None if no manifest was found.
|
||||
tenant_manifest: Option<TenantManifest>,
|
||||
tenant_manifest: TenantManifest,
|
||||
/// Map from timeline ID to a possible timeline preload. It is None iff the timeline is offloaded according to the manifest.
|
||||
timelines: HashMap<TimelineId, Option<TimelinePreload>>,
|
||||
}
|
||||
@@ -284,15 +282,12 @@ pub struct Tenant {
|
||||
/// **Lock order**: if acquiring all (or a subset), acquire them in order `timelines`, `timelines_offloaded`, `timelines_creating`
|
||||
timelines_offloaded: Mutex<HashMap<TimelineId, Arc<OffloadedTimeline>>>,
|
||||
|
||||
/// The last tenant manifest known to be in remote storage. None if the manifest has not yet
|
||||
/// been either downloaded or uploaded. Always Some after tenant attach.
|
||||
/// Serialize writes of the tenant manifest to remote storage. If there are concurrent operations
|
||||
/// affecting the manifest, such as timeline deletion and timeline offload, they must wait for
|
||||
/// each other (this could be optimized to coalesce writes if necessary).
|
||||
///
|
||||
/// Initially populated during tenant attach, updated via `maybe_upload_tenant_manifest`.
|
||||
///
|
||||
/// Do not modify this directly. It is used to check whether a new manifest needs to be
|
||||
/// uploaded. The manifest is constructed in `build_tenant_manifest`, and uploaded via
|
||||
/// `maybe_upload_tenant_manifest`.
|
||||
remote_tenant_manifest: tokio::sync::Mutex<Option<TenantManifest>>,
|
||||
/// The contents of the Mutex are the last manifest we successfully uploaded
|
||||
tenant_manifest_upload: tokio::sync::Mutex<Option<TenantManifest>>,
|
||||
|
||||
// This mutex prevents creation of new timelines during GC.
|
||||
// Adding yet another mutex (in addition to `timelines`) is needed because holding
|
||||
@@ -1359,41 +1354,36 @@ impl Tenant {
|
||||
}
|
||||
}
|
||||
|
||||
fn make_broken_or_stopping(t: &Tenant, err: anyhow::Error) {
|
||||
t.state.send_modify(|state| match state {
|
||||
// TODO: the old code alluded to DeleteTenantFlow sometimes setting
|
||||
// TenantState::Stopping before we get here, but this may be outdated.
|
||||
// Let's find out with a testing assertion. If this doesn't fire, and the
|
||||
// logs don't show this happening in production, remove the Stopping cases.
|
||||
TenantState::Stopping{..} if cfg!(any(test, feature = "testing")) => {
|
||||
panic!("unexpected TenantState::Stopping during attach")
|
||||
}
|
||||
// If the tenant is cancelled, assume the error was caused by cancellation.
|
||||
TenantState::Attaching if t.cancel.is_cancelled() => {
|
||||
info!("attach cancelled, setting tenant state to Stopping: {err}");
|
||||
// NB: progress None tells `set_stopping` that attach has cancelled.
|
||||
*state = TenantState::Stopping { progress: None };
|
||||
}
|
||||
// According to the old code, DeleteTenantFlow may already have set this to
|
||||
// Stopping. Retain its progress.
|
||||
// TODO: there is no DeleteTenantFlow. Is this still needed? See above.
|
||||
TenantState::Stopping { progress } if t.cancel.is_cancelled() => {
|
||||
assert!(progress.is_some(), "concurrent attach cancellation");
|
||||
info!("attach cancelled, already Stopping: {err}");
|
||||
}
|
||||
// Mark the tenant as broken.
|
||||
TenantState::Attaching | TenantState::Stopping { .. } => {
|
||||
error!("attach failed, setting tenant state to Broken (was {state}): {err:?}");
|
||||
*state = TenantState::broken_from_reason(err.to_string())
|
||||
}
|
||||
// The attach task owns the tenant state until activated.
|
||||
state => panic!("invalid tenant state {state} during attach: {err:?}"),
|
||||
});
|
||||
// Ideally we should use Tenant::set_broken_no_wait, but it is not supposed to be used when tenant is in loading state.
|
||||
enum BrokenVerbosity {
|
||||
Error,
|
||||
Info
|
||||
}
|
||||
let make_broken =
|
||||
|t: &Tenant, err: anyhow::Error, verbosity: BrokenVerbosity| {
|
||||
match verbosity {
|
||||
BrokenVerbosity::Info => {
|
||||
info!("attach cancelled, setting tenant state to Broken: {err}");
|
||||
},
|
||||
BrokenVerbosity::Error => {
|
||||
error!("attach failed, setting tenant state to Broken: {err:?}");
|
||||
}
|
||||
}
|
||||
t.state.send_modify(|state| {
|
||||
// The Stopping case is for when we have passed control on to DeleteTenantFlow:
|
||||
// if it errors, we will call make_broken when tenant is already in Stopping.
|
||||
assert!(
|
||||
matches!(*state, TenantState::Attaching | TenantState::Stopping { .. }),
|
||||
"the attach task owns the tenant state until activation is complete"
|
||||
);
|
||||
|
||||
*state = TenantState::broken_from_reason(err.to_string());
|
||||
});
|
||||
};
|
||||
|
||||
// TODO: should also be rejecting tenant conf changes that violate this check.
|
||||
if let Err(e) = crate::tenant::storage_layer::inmemory_layer::IndexEntry::validate_checkpoint_distance(tenant_clone.get_checkpoint_distance()) {
|
||||
make_broken_or_stopping(&tenant_clone, anyhow::anyhow!(e));
|
||||
make_broken(&tenant_clone, anyhow::anyhow!(e), BrokenVerbosity::Error);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
@@ -1445,8 +1435,10 @@ impl Tenant {
|
||||
// stayed in Activating for such a long time that shutdown found it in
|
||||
// that state.
|
||||
tracing::info!(state=%tenant_clone.current_state(), "Tenant shut down before activation");
|
||||
// Set the tenant to Stopping to signal `set_stopping` that we're done.
|
||||
make_broken_or_stopping(&tenant_clone, anyhow::anyhow!("Shut down while Attaching"));
|
||||
// Make the tenant broken so that set_stopping will not hang waiting for it to leave
|
||||
// the Attaching state. This is an over-reaction (nothing really broke, the tenant is
|
||||
// just shutting down), but ensures progress.
|
||||
make_broken(&tenant_clone, anyhow::anyhow!("Shut down while Attaching"), BrokenVerbosity::Info);
|
||||
return Ok(());
|
||||
},
|
||||
)
|
||||
@@ -1465,7 +1457,7 @@ impl Tenant {
|
||||
match res {
|
||||
Ok(p) => Some(p),
|
||||
Err(e) => {
|
||||
make_broken_or_stopping(&tenant_clone, anyhow::anyhow!(e));
|
||||
make_broken(&tenant_clone, anyhow::anyhow!(e), BrokenVerbosity::Error);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
@@ -1491,7 +1483,9 @@ impl Tenant {
|
||||
info!("attach finished, activating");
|
||||
tenant_clone.activate(broker_client, None, &ctx);
|
||||
}
|
||||
Err(e) => make_broken_or_stopping(&tenant_clone, anyhow::anyhow!(e)),
|
||||
Err(e) => {
|
||||
make_broken(&tenant_clone, anyhow::anyhow!(e), BrokenVerbosity::Error);
|
||||
}
|
||||
}
|
||||
|
||||
// If we are doing an opportunistic warmup attachment at startup, initialize
|
||||
@@ -1531,27 +1525,28 @@ impl Tenant {
|
||||
cancel.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let tenant_manifest = match download_tenant_manifest(
|
||||
remote_storage,
|
||||
&self.tenant_shard_id,
|
||||
self.generation,
|
||||
&cancel,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok((tenant_manifest, _, _)) => Some(tenant_manifest),
|
||||
Err(DownloadError::NotFound) => None,
|
||||
Err(err) => return Err(err.into()),
|
||||
};
|
||||
let (offloaded_add, tenant_manifest) =
|
||||
match remote_timeline_client::download_tenant_manifest(
|
||||
remote_storage,
|
||||
&self.tenant_shard_id,
|
||||
self.generation,
|
||||
&cancel,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok((tenant_manifest, _generation, _manifest_mtime)) => (
|
||||
format!("{} offloaded", tenant_manifest.offloaded_timelines.len()),
|
||||
tenant_manifest,
|
||||
),
|
||||
Err(DownloadError::NotFound) => {
|
||||
("no manifest".to_string(), TenantManifest::empty())
|
||||
}
|
||||
Err(e) => Err(e)?,
|
||||
};
|
||||
|
||||
info!(
|
||||
"found {} timelines ({} offloaded timelines)",
|
||||
remote_timeline_ids.len(),
|
||||
tenant_manifest
|
||||
.as_ref()
|
||||
.map(|m| m.offloaded_timelines.len())
|
||||
.unwrap_or(0)
|
||||
"found {} timelines, and {offloaded_add}",
|
||||
remote_timeline_ids.len()
|
||||
);
|
||||
|
||||
for k in other_keys {
|
||||
@@ -1560,13 +1555,11 @@ impl Tenant {
|
||||
|
||||
// Avoid downloading IndexPart of offloaded timelines.
|
||||
let mut offloaded_with_prefix = HashSet::new();
|
||||
if let Some(tenant_manifest) = &tenant_manifest {
|
||||
for offloaded in tenant_manifest.offloaded_timelines.iter() {
|
||||
if remote_timeline_ids.remove(&offloaded.timeline_id) {
|
||||
offloaded_with_prefix.insert(offloaded.timeline_id);
|
||||
} else {
|
||||
// We'll take care later of timelines in the manifest without a prefix
|
||||
}
|
||||
for offloaded in tenant_manifest.offloaded_timelines.iter() {
|
||||
if remote_timeline_ids.remove(&offloaded.timeline_id) {
|
||||
offloaded_with_prefix.insert(offloaded.timeline_id);
|
||||
} else {
|
||||
// We'll take care later of timelines in the manifest without a prefix
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1640,14 +1633,12 @@ impl Tenant {
|
||||
|
||||
let mut offloaded_timeline_ids = HashSet::new();
|
||||
let mut offloaded_timelines_list = Vec::new();
|
||||
if let Some(tenant_manifest) = &preload.tenant_manifest {
|
||||
for timeline_manifest in tenant_manifest.offloaded_timelines.iter() {
|
||||
let timeline_id = timeline_manifest.timeline_id;
|
||||
let offloaded_timeline =
|
||||
OffloadedTimeline::from_manifest(self.tenant_shard_id, timeline_manifest);
|
||||
offloaded_timelines_list.push((timeline_id, Arc::new(offloaded_timeline)));
|
||||
offloaded_timeline_ids.insert(timeline_id);
|
||||
}
|
||||
for timeline_manifest in preload.tenant_manifest.offloaded_timelines.iter() {
|
||||
let timeline_id = timeline_manifest.timeline_id;
|
||||
let offloaded_timeline =
|
||||
OffloadedTimeline::from_manifest(self.tenant_shard_id, timeline_manifest);
|
||||
offloaded_timelines_list.push((timeline_id, Arc::new(offloaded_timeline)));
|
||||
offloaded_timeline_ids.insert(timeline_id);
|
||||
}
|
||||
// Complete deletions for offloaded timeline id's from manifest.
|
||||
// The manifest will be uploaded later in this function.
|
||||
@@ -1805,21 +1796,15 @@ impl Tenant {
|
||||
.context("resume_deletion")
|
||||
.map_err(LoadLocalTimelineError::ResumeDeletion)?;
|
||||
}
|
||||
let needs_manifest_upload =
|
||||
offloaded_timelines_list.len() != preload.tenant_manifest.offloaded_timelines.len();
|
||||
{
|
||||
let mut offloaded_timelines_accessor = self.timelines_offloaded.lock().unwrap();
|
||||
offloaded_timelines_accessor.extend(offloaded_timelines_list.into_iter());
|
||||
}
|
||||
|
||||
// Stash the preloaded tenant manifest, and upload a new manifest if changed.
|
||||
//
|
||||
// NB: this must happen after the tenant is fully populated above. In particular the
|
||||
// offloaded timelines, which are included in the manifest.
|
||||
{
|
||||
let mut guard = self.remote_tenant_manifest.lock().await;
|
||||
assert!(guard.is_none(), "tenant manifest set before preload"); // first populated here
|
||||
*guard = preload.tenant_manifest;
|
||||
if needs_manifest_upload {
|
||||
self.store_tenant_manifest().await?;
|
||||
}
|
||||
self.maybe_upload_tenant_manifest().await?;
|
||||
|
||||
// The local filesystem contents are a cache of what's in the remote IndexPart;
|
||||
// IndexPart is the source of truth.
|
||||
@@ -2233,7 +2218,7 @@ impl Tenant {
|
||||
};
|
||||
|
||||
// Upload new list of offloaded timelines to S3
|
||||
self.maybe_upload_tenant_manifest().await?;
|
||||
self.store_tenant_manifest().await?;
|
||||
|
||||
// Activate the timeline (if it makes sense)
|
||||
if !(timeline.is_broken() || timeline.is_stopping()) {
|
||||
@@ -3444,7 +3429,7 @@ impl Tenant {
|
||||
shutdown_mode
|
||||
};
|
||||
|
||||
match self.set_stopping(shutdown_progress).await {
|
||||
match self.set_stopping(shutdown_progress, false, false).await {
|
||||
Ok(()) => {}
|
||||
Err(SetStoppingError::Broken) => {
|
||||
// assume that this is acceptable
|
||||
@@ -3524,13 +3509,25 @@ impl Tenant {
|
||||
/// This function waits for the tenant to become active if it isn't already, before transitioning it into Stopping state.
|
||||
///
|
||||
/// This function is not cancel-safe!
|
||||
async fn set_stopping(&self, progress: completion::Barrier) -> Result<(), SetStoppingError> {
|
||||
///
|
||||
/// `allow_transition_from_loading` is needed for the special case of loading task deleting the tenant.
|
||||
/// `allow_transition_from_attaching` is needed for the special case of attaching deleted tenant.
|
||||
async fn set_stopping(
|
||||
&self,
|
||||
progress: completion::Barrier,
|
||||
_allow_transition_from_loading: bool,
|
||||
allow_transition_from_attaching: bool,
|
||||
) -> Result<(), SetStoppingError> {
|
||||
let mut rx = self.state.subscribe();
|
||||
|
||||
// cannot stop before we're done activating, so wait out until we're done activating
|
||||
rx.wait_for(|state| match state {
|
||||
TenantState::Attaching if allow_transition_from_attaching => true,
|
||||
TenantState::Activating(_) | TenantState::Attaching => {
|
||||
info!("waiting for {state} to turn Active|Broken|Stopping");
|
||||
info!(
|
||||
"waiting for {} to turn Active|Broken|Stopping",
|
||||
<&'static str>::from(state)
|
||||
);
|
||||
false
|
||||
}
|
||||
TenantState::Active | TenantState::Broken { .. } | TenantState::Stopping { .. } => true,
|
||||
@@ -3541,24 +3538,25 @@ impl Tenant {
|
||||
// we now know we're done activating, let's see whether this task is the winner to transition into Stopping
|
||||
let mut err = None;
|
||||
let stopping = self.state.send_if_modified(|current_state| match current_state {
|
||||
TenantState::Activating(_) | TenantState::Attaching => {
|
||||
unreachable!("we ensured above that we're done with activation, and, there is no re-activation")
|
||||
TenantState::Activating(_) => {
|
||||
unreachable!("1we ensured above that we're done with activation, and, there is no re-activation")
|
||||
}
|
||||
TenantState::Attaching => {
|
||||
if !allow_transition_from_attaching {
|
||||
unreachable!("2we ensured above that we're done with activation, and, there is no re-activation")
|
||||
};
|
||||
*current_state = TenantState::Stopping { progress };
|
||||
true
|
||||
}
|
||||
TenantState::Active => {
|
||||
// FIXME: due to time-of-check vs time-of-use issues, it can happen that new timelines
|
||||
// are created after the transition to Stopping. That's harmless, as the Timelines
|
||||
// won't be accessible to anyone afterwards, because the Tenant is in Stopping state.
|
||||
*current_state = TenantState::Stopping { progress: Some(progress) };
|
||||
*current_state = TenantState::Stopping { progress };
|
||||
// Continue stopping outside the closure. We need to grab timelines.lock()
|
||||
// and we plan to turn it into a tokio::sync::Mutex in a future patch.
|
||||
true
|
||||
}
|
||||
TenantState::Stopping { progress: None } => {
|
||||
// An attach was cancelled, and the attach transitioned the tenant from Attaching to
|
||||
// Stopping(None) to let us know it exited. Register our progress and continue.
|
||||
*current_state = TenantState::Stopping { progress: Some(progress) };
|
||||
true
|
||||
}
|
||||
TenantState::Broken { reason, .. } => {
|
||||
info!(
|
||||
"Cannot set tenant to Stopping state, it is in Broken state due to: {reason}"
|
||||
@@ -3566,7 +3564,7 @@ impl Tenant {
|
||||
err = Some(SetStoppingError::Broken);
|
||||
false
|
||||
}
|
||||
TenantState::Stopping { progress: Some(progress) } => {
|
||||
TenantState::Stopping { progress } => {
|
||||
info!("Tenant is already in Stopping state");
|
||||
err = Some(SetStoppingError::AlreadyStopping(progress.clone()));
|
||||
false
|
||||
@@ -4067,19 +4065,18 @@ impl Tenant {
|
||||
|
||||
/// Generate an up-to-date TenantManifest based on the state of this Tenant.
|
||||
fn build_tenant_manifest(&self) -> TenantManifest {
|
||||
// Collect the offloaded timelines, and sort them for deterministic output.
|
||||
let offloaded_timelines = self
|
||||
.timelines_offloaded
|
||||
.lock()
|
||||
.unwrap()
|
||||
.values()
|
||||
.map(|tli| tli.manifest())
|
||||
.sorted_by_key(|m| m.timeline_id)
|
||||
.collect_vec();
|
||||
let timelines_offloaded = self.timelines_offloaded.lock().unwrap();
|
||||
|
||||
let mut timeline_manifests = timelines_offloaded
|
||||
.iter()
|
||||
.map(|(_timeline_id, offloaded)| offloaded.manifest())
|
||||
.collect::<Vec<_>>();
|
||||
// Sort the manifests so that our output is deterministic
|
||||
timeline_manifests.sort_by_key(|timeline_manifest| timeline_manifest.timeline_id);
|
||||
|
||||
TenantManifest {
|
||||
version: LATEST_TENANT_MANIFEST_VERSION,
|
||||
offloaded_timelines,
|
||||
offloaded_timelines: timeline_manifests,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4302,7 +4299,7 @@ impl Tenant {
|
||||
timelines: Mutex::new(HashMap::new()),
|
||||
timelines_creating: Mutex::new(HashSet::new()),
|
||||
timelines_offloaded: Mutex::new(HashMap::new()),
|
||||
remote_tenant_manifest: Default::default(),
|
||||
tenant_manifest_upload: Default::default(),
|
||||
gc_cs: tokio::sync::Mutex::new(()),
|
||||
walredo_mgr,
|
||||
remote_storage,
|
||||
@@ -5535,35 +5532,27 @@ impl Tenant {
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
/// Builds a new tenant manifest, and uploads it if it differs from the last-known tenant
|
||||
/// manifest in `Self::remote_tenant_manifest`.
|
||||
///
|
||||
/// TODO: instead of requiring callers to remember to call `maybe_upload_tenant_manifest` after
|
||||
/// changing any `Tenant` state that's included in the manifest, consider making the manifest
|
||||
/// the authoritative source of data with an API that automatically uploads on changes. Revisit
|
||||
/// this when the manifest is more widely used and we have a better idea of the data model.
|
||||
pub(crate) async fn maybe_upload_tenant_manifest(&self) -> Result<(), TenantManifestError> {
|
||||
// Multiple tasks may call this function concurrently after mutating the Tenant runtime
|
||||
// state, affecting the manifest generated by `build_tenant_manifest`. We use an async mutex
|
||||
// to serialize these callers. `eq_ignoring_version` acts as a slightly inefficient but
|
||||
// simple coalescing mechanism.
|
||||
/// Serialize and write the latest TenantManifest to remote storage.
|
||||
pub(crate) async fn store_tenant_manifest(&self) -> Result<(), TenantManifestError> {
|
||||
// Only one manifest write may be done at at time, and the contents of the manifest
|
||||
// must be loaded while holding this lock. This makes it safe to call this function
|
||||
// from anywhere without worrying about colliding updates.
|
||||
let mut guard = tokio::select! {
|
||||
guard = self.remote_tenant_manifest.lock() => guard,
|
||||
_ = self.cancel.cancelled() => return Err(TenantManifestError::Cancelled),
|
||||
g = self.tenant_manifest_upload.lock() => {
|
||||
g
|
||||
},
|
||||
_ = self.cancel.cancelled() => {
|
||||
return Err(TenantManifestError::Cancelled);
|
||||
}
|
||||
};
|
||||
|
||||
// Build a new manifest.
|
||||
let manifest = self.build_tenant_manifest();
|
||||
|
||||
// Check if the manifest has changed. We ignore the version number here, to avoid
|
||||
// uploading every manifest on version number bumps.
|
||||
if let Some(old) = guard.as_ref() {
|
||||
if manifest.eq_ignoring_version(old) {
|
||||
return Ok(());
|
||||
}
|
||||
if Some(&manifest) == (*guard).as_ref() {
|
||||
// Optimisation: skip uploads that don't change anything.
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Upload the manifest. Remote storage does no retries internally, so retry here.
|
||||
// Remote storage does no retries internally, so wrap it
|
||||
match backoff::retry(
|
||||
|| async {
|
||||
upload_tenant_manifest(
|
||||
@@ -5575,7 +5564,7 @@ impl Tenant {
|
||||
)
|
||||
.await
|
||||
},
|
||||
|_| self.cancel.is_cancelled(),
|
||||
|_e| self.cancel.is_cancelled(),
|
||||
FAILED_UPLOAD_WARN_THRESHOLD,
|
||||
FAILED_REMOTE_OP_RETRIES,
|
||||
"uploading tenant manifest",
|
||||
|
||||
@@ -3,15 +3,11 @@ use serde::{Deserialize, Serialize};
|
||||
use utils::id::TimelineId;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
/// Tenant shard manifest, stored in remote storage. Contains offloaded timelines and other tenant
|
||||
/// shard-wide information that must be persisted in remote storage.
|
||||
///
|
||||
/// The manifest is always updated on tenant attach, and as needed.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||
/// Tenant-shard scoped manifest
|
||||
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct TenantManifest {
|
||||
/// The manifest version. Incremented on manifest format changes, even non-breaking ones.
|
||||
/// Manifests must generally always be backwards and forwards compatible for one release, to
|
||||
/// allow release rollbacks.
|
||||
/// Debugging aid describing the version of this manifest.
|
||||
/// Can also be used for distinguishing breaking changes later on.
|
||||
pub version: usize,
|
||||
|
||||
/// The list of offloaded timelines together with enough information
|
||||
@@ -20,7 +16,6 @@ pub struct TenantManifest {
|
||||
/// Note: the timelines mentioned in this list might be deleted, i.e.
|
||||
/// we don't hold an invariant that the references aren't dangling.
|
||||
/// Existence of index-part.json is the actual indicator of timeline existence.
|
||||
#[serde(default)]
|
||||
pub offloaded_timelines: Vec<OffloadedTimelineManifest>,
|
||||
}
|
||||
|
||||
@@ -29,7 +24,7 @@ pub struct TenantManifest {
|
||||
/// Very similar to [`pageserver_api::models::OffloadedTimelineInfo`],
|
||||
/// but the two datastructures serve different needs, this is for a persistent disk format
|
||||
/// that must be backwards compatible, while the other is only for informative purposes.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Copy, PartialEq, Eq)]
|
||||
#[derive(Clone, Serialize, Deserialize, Copy, PartialEq, Eq)]
|
||||
pub struct OffloadedTimelineManifest {
|
||||
pub timeline_id: TimelineId,
|
||||
/// Whether the timeline has a parent it has been branched off from or not
|
||||
@@ -40,114 +35,20 @@ pub struct OffloadedTimelineManifest {
|
||||
pub archived_at: NaiveDateTime,
|
||||
}
|
||||
|
||||
/// The newest manifest version. This should be incremented on changes, even non-breaking ones. We
|
||||
/// do not use deny_unknown_fields, so new fields are not breaking.
|
||||
pub const LATEST_TENANT_MANIFEST_VERSION: usize = 1;
|
||||
|
||||
impl TenantManifest {
|
||||
/// Returns true if the manifests are equal, ignoring the version number. This avoids
|
||||
/// re-uploading all manifests just because the version number is bumped.
|
||||
pub fn eq_ignoring_version(&self, other: &Self) -> bool {
|
||||
// Fast path: if the version is equal, just compare directly.
|
||||
if self.version == other.version {
|
||||
return self == other;
|
||||
pub(crate) fn empty() -> Self {
|
||||
Self {
|
||||
version: LATEST_TENANT_MANIFEST_VERSION,
|
||||
offloaded_timelines: vec![],
|
||||
}
|
||||
|
||||
// We could alternatively just clone and modify the version here.
|
||||
let Self {
|
||||
version: _, // ignore version
|
||||
offloaded_timelines,
|
||||
} = self;
|
||||
|
||||
offloaded_timelines == &other.offloaded_timelines
|
||||
}
|
||||
|
||||
/// Decodes a manifest from JSON.
|
||||
pub fn from_json_bytes(bytes: &[u8]) -> Result<Self, serde_json::Error> {
|
||||
serde_json::from_slice(bytes)
|
||||
serde_json::from_slice::<Self>(bytes)
|
||||
}
|
||||
|
||||
/// Encodes a manifest as JSON.
|
||||
pub fn to_json_bytes(&self) -> serde_json::Result<Vec<u8>> {
|
||||
pub(crate) fn to_json_bytes(&self) -> serde_json::Result<Vec<u8>> {
|
||||
serde_json::to_vec(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::str::FromStr;
|
||||
|
||||
use utils::id::TimelineId;
|
||||
|
||||
use super::*;
|
||||
|
||||
/// Empty manifests should be parsed. Version is required.
|
||||
#[test]
|
||||
fn parse_empty() -> anyhow::Result<()> {
|
||||
let json = r#"{
|
||||
"version": 0
|
||||
}"#;
|
||||
let expected = TenantManifest {
|
||||
version: 0,
|
||||
offloaded_timelines: Vec::new(),
|
||||
};
|
||||
assert_eq!(expected, TenantManifest::from_json_bytes(json.as_bytes())?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Unknown fields should be ignored, for forwards compatibility.
|
||||
#[test]
|
||||
fn parse_unknown_fields() -> anyhow::Result<()> {
|
||||
let json = r#"{
|
||||
"version": 1,
|
||||
"foo": "bar"
|
||||
}"#;
|
||||
let expected = TenantManifest {
|
||||
version: 1,
|
||||
offloaded_timelines: Vec::new(),
|
||||
};
|
||||
assert_eq!(expected, TenantManifest::from_json_bytes(json.as_bytes())?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// v1 manifests should be parsed, for backwards compatibility.
|
||||
#[test]
|
||||
fn parse_v1() -> anyhow::Result<()> {
|
||||
let json = r#"{
|
||||
"version": 1,
|
||||
"offloaded_timelines": [
|
||||
{
|
||||
"timeline_id": "5c4df612fd159e63c1b7853fe94d97da",
|
||||
"archived_at": "2025-03-07T11:07:11.373105434"
|
||||
},
|
||||
{
|
||||
"timeline_id": "f3def5823ad7080d2ea538d8e12163fa",
|
||||
"ancestor_timeline_id": "5c4df612fd159e63c1b7853fe94d97da",
|
||||
"ancestor_retain_lsn": "0/1F79038",
|
||||
"archived_at": "2025-03-05T11:10:22.257901390"
|
||||
}
|
||||
]
|
||||
}"#;
|
||||
let expected = TenantManifest {
|
||||
version: 1,
|
||||
offloaded_timelines: vec![
|
||||
OffloadedTimelineManifest {
|
||||
timeline_id: TimelineId::from_str("5c4df612fd159e63c1b7853fe94d97da")?,
|
||||
ancestor_timeline_id: None,
|
||||
ancestor_retain_lsn: None,
|
||||
archived_at: NaiveDateTime::from_str("2025-03-07T11:07:11.373105434")?,
|
||||
},
|
||||
OffloadedTimelineManifest {
|
||||
timeline_id: TimelineId::from_str("f3def5823ad7080d2ea538d8e12163fa")?,
|
||||
ancestor_timeline_id: Some(TimelineId::from_str(
|
||||
"5c4df612fd159e63c1b7853fe94d97da",
|
||||
)?),
|
||||
ancestor_retain_lsn: Some(Lsn::from_str("0/1F79038")?),
|
||||
archived_at: NaiveDateTime::from_str("2025-03-05T11:10:22.257901390")?,
|
||||
},
|
||||
],
|
||||
};
|
||||
assert_eq!(expected, TenantManifest::from_json_bytes(json.as_bytes())?);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -61,7 +61,6 @@ pub(crate) async fn upload_index_part(
|
||||
.await
|
||||
.with_context(|| format!("upload index part for '{tenant_shard_id} / {timeline_id}'"))
|
||||
}
|
||||
|
||||
/// Serializes and uploads the given tenant manifest data to the remote storage.
|
||||
pub(crate) async fn upload_tenant_manifest(
|
||||
storage: &GenericRemoteStorage,
|
||||
@@ -77,14 +76,16 @@ pub(crate) async fn upload_tenant_manifest(
|
||||
});
|
||||
pausable_failpoint!("before-upload-manifest-pausable");
|
||||
|
||||
let serialized = Bytes::from(tenant_manifest.to_json_bytes()?);
|
||||
let tenant_manifest_size = serialized.len();
|
||||
let remote_path = remote_tenant_manifest_path(tenant_shard_id, generation);
|
||||
let serialized = tenant_manifest.to_json_bytes()?;
|
||||
let serialized = Bytes::from(serialized);
|
||||
|
||||
let tenant_manifest_site = serialized.len();
|
||||
|
||||
let remote_path = remote_tenant_manifest_path(tenant_shard_id, generation);
|
||||
storage
|
||||
.upload_storage_object(
|
||||
futures::stream::once(futures::future::ready(Ok(serialized))),
|
||||
tenant_manifest_size,
|
||||
tenant_manifest_site,
|
||||
&remote_path,
|
||||
cancel,
|
||||
)
|
||||
|
||||
@@ -268,12 +268,7 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
error_run += 1;
|
||||
let backoff =
|
||||
exponential_backoff_duration(error_run, BASE_BACKOFF_SECS, MAX_BACKOFF_SECS);
|
||||
log_compaction_error(
|
||||
&err,
|
||||
Some((error_run, backoff)),
|
||||
cancel.is_cancelled(),
|
||||
false,
|
||||
);
|
||||
log_compaction_error(&err, Some((error_run, backoff)), cancel.is_cancelled());
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -290,7 +285,6 @@ pub(crate) fn log_compaction_error(
|
||||
err: &CompactionError,
|
||||
retry_info: Option<(u32, Duration)>,
|
||||
task_cancelled: bool,
|
||||
degrade_to_warning: bool,
|
||||
) {
|
||||
use CompactionError::*;
|
||||
|
||||
@@ -339,7 +333,6 @@ pub(crate) fn log_compaction_error(
|
||||
}
|
||||
} else {
|
||||
match level {
|
||||
Level::ERROR if degrade_to_warning => warn!("Compaction failed and discarded: {err:#}"),
|
||||
Level::ERROR => error!("Compaction failed: {err:#}"),
|
||||
Level::INFO => info!("Compaction failed: {err:#}"),
|
||||
level => unimplemented!("unexpected level {level:?}"),
|
||||
|
||||
@@ -1940,7 +1940,7 @@ impl Timeline {
|
||||
)
|
||||
.await;
|
||||
if let Err(err) = &res {
|
||||
log_compaction_error(err, None, cancel.is_cancelled(), false);
|
||||
log_compaction_error(err, None, cancel.is_cancelled());
|
||||
}
|
||||
res
|
||||
}
|
||||
@@ -6353,33 +6353,10 @@ impl Timeline {
|
||||
|
||||
/// Reconstruct a value, using the given base image and WAL records in 'data'.
|
||||
async fn reconstruct_value(
|
||||
&self,
|
||||
key: Key,
|
||||
request_lsn: Lsn,
|
||||
data: ValueReconstructState,
|
||||
) -> Result<Bytes, PageReconstructError> {
|
||||
self.reconstruct_value_inner(key, request_lsn, data, false)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Reconstruct a value, using the given base image and WAL records in 'data'. It does not fire critical errors because
|
||||
/// sometimes it is expected to fail due to unreplayable history described in <https://github.com/neondatabase/neon/issues/10395>.
|
||||
async fn reconstruct_value_wo_critical_error(
|
||||
&self,
|
||||
key: Key,
|
||||
request_lsn: Lsn,
|
||||
data: ValueReconstructState,
|
||||
) -> Result<Bytes, PageReconstructError> {
|
||||
self.reconstruct_value_inner(key, request_lsn, data, true)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn reconstruct_value_inner(
|
||||
&self,
|
||||
key: Key,
|
||||
request_lsn: Lsn,
|
||||
mut data: ValueReconstructState,
|
||||
no_critical_error: bool,
|
||||
) -> Result<Bytes, PageReconstructError> {
|
||||
// Perform WAL redo if needed
|
||||
data.records.reverse();
|
||||
@@ -6436,9 +6413,7 @@ impl Timeline {
|
||||
Ok(img) => img,
|
||||
Err(walredo::Error::Cancelled) => return Err(PageReconstructError::Cancelled),
|
||||
Err(walredo::Error::Other(err)) => {
|
||||
if !no_critical_error {
|
||||
critical!("walredo failure during page reconstruction: {err:?}");
|
||||
}
|
||||
critical!("walredo failure during page reconstruction: {err:?}");
|
||||
return Err(PageReconstructError::WalRedo(
|
||||
err.context("reconstruct a page image"),
|
||||
));
|
||||
|
||||
@@ -448,7 +448,7 @@ impl GcCompactionQueue {
|
||||
) -> Result<CompactionOutcome, CompactionError> {
|
||||
let res = self.iteration_inner(cancel, ctx, gc_block, timeline).await;
|
||||
if let Err(err) = &res {
|
||||
log_compaction_error(err, None, cancel.is_cancelled(), true);
|
||||
log_compaction_error(err, None, cancel.is_cancelled());
|
||||
}
|
||||
match res {
|
||||
Ok(res) => Ok(res),
|
||||
@@ -2410,9 +2410,7 @@ impl Timeline {
|
||||
} else {
|
||||
lsn_split_points[i]
|
||||
};
|
||||
let img = self
|
||||
.reconstruct_value_wo_critical_error(key, request_lsn, state)
|
||||
.await?;
|
||||
let img = self.reconstruct_value(key, request_lsn, state).await?;
|
||||
Some((request_lsn, img))
|
||||
} else {
|
||||
None
|
||||
@@ -3108,6 +3106,8 @@ impl Timeline {
|
||||
// the key and LSN range are determined. However, to keep things simple here, we still
|
||||
// create this writer, and discard the writer in the end.
|
||||
|
||||
let mut keys_processed = 0;
|
||||
|
||||
while let Some(((key, lsn, val), desc)) = merge_iter
|
||||
.next_with_trace()
|
||||
.await
|
||||
@@ -3118,7 +3118,9 @@ impl Timeline {
|
||||
return Err(CompactionError::ShuttingDown);
|
||||
}
|
||||
|
||||
keys_processed += 1;
|
||||
let should_yield = yield_for_l0
|
||||
&& keys_processed % 1000 == 0
|
||||
&& self
|
||||
.l0_compaction_trigger
|
||||
.notified()
|
||||
|
||||
@@ -410,13 +410,10 @@ impl DeleteTimelineFlow {
|
||||
// So indeed, the tenant manifest might refer to an offloaded timeline which has already been deleted.
|
||||
// However, we handle this case in tenant loading code so the next time we attach, the issue is
|
||||
// resolved.
|
||||
tenant
|
||||
.maybe_upload_tenant_manifest()
|
||||
.await
|
||||
.map_err(|err| match err {
|
||||
TenantManifestError::Cancelled => DeleteTimelineError::Cancelled,
|
||||
err => DeleteTimelineError::Other(err.into()),
|
||||
})?;
|
||||
tenant.store_tenant_manifest().await.map_err(|e| match e {
|
||||
TenantManifestError::Cancelled => DeleteTimelineError::Cancelled,
|
||||
_ => DeleteTimelineError::Other(e.into()),
|
||||
})?;
|
||||
|
||||
*guard = Self::Finished;
|
||||
|
||||
|
||||
@@ -111,7 +111,7 @@ pub(crate) async fn offload_timeline(
|
||||
// at the next restart attach it again.
|
||||
// For that to happen, we'd need to make the manifest reflect our *intended* state,
|
||||
// not our actual state of offloaded timelines.
|
||||
tenant.maybe_upload_tenant_manifest().await?;
|
||||
tenant.store_tenant_manifest().await?;
|
||||
|
||||
tracing::info!("Timeline offload complete (remaining arc refcount: {remaining_refcount})");
|
||||
|
||||
|
||||
@@ -1563,12 +1563,8 @@ local_cache_pages(PG_FUNCTION_ARGS)
|
||||
hash_seq_init(&status, lfc_hash);
|
||||
while ((entry = hash_seq_search(&status)) != NULL)
|
||||
{
|
||||
/* Skip hole tags */
|
||||
if (NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key)) != 0)
|
||||
{
|
||||
for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
|
||||
n_pages += GET_STATE(entry, i) == AVAILABLE;
|
||||
}
|
||||
for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
|
||||
n_pages += GET_STATE(entry, i) == AVAILABLE;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1596,19 +1592,16 @@ local_cache_pages(PG_FUNCTION_ARGS)
|
||||
{
|
||||
for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
|
||||
{
|
||||
if (NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key)) != 0)
|
||||
if (GET_STATE(entry, i) == AVAILABLE)
|
||||
{
|
||||
if (GET_STATE(entry, i) == AVAILABLE)
|
||||
{
|
||||
fctx->record[n].pageoffs = entry->offset * BLOCKS_PER_CHUNK + i;
|
||||
fctx->record[n].relfilenode = NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key));
|
||||
fctx->record[n].reltablespace = NInfoGetSpcOid(BufTagGetNRelFileInfo(entry->key));
|
||||
fctx->record[n].reldatabase = NInfoGetDbOid(BufTagGetNRelFileInfo(entry->key));
|
||||
fctx->record[n].forknum = entry->key.forkNum;
|
||||
fctx->record[n].blocknum = entry->key.blockNum + i;
|
||||
fctx->record[n].accesscount = entry->access_count;
|
||||
n += 1;
|
||||
}
|
||||
fctx->record[n].pageoffs = entry->offset * BLOCKS_PER_CHUNK + i;
|
||||
fctx->record[n].relfilenode = NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key));
|
||||
fctx->record[n].reltablespace = NInfoGetSpcOid(BufTagGetNRelFileInfo(entry->key));
|
||||
fctx->record[n].reldatabase = NInfoGetDbOid(BufTagGetNRelFileInfo(entry->key));
|
||||
fctx->record[n].forknum = entry->key.forkNum;
|
||||
fctx->record[n].blocknum = entry->key.blockNum + i;
|
||||
fctx->record[n].accesscount = entry->access_count;
|
||||
n += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,8 +49,6 @@ def test_lfc_resize(neon_simple_env: NeonEnv, pg_bin: PgBin):
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute("create extension neon")
|
||||
|
||||
def get_lfc_size() -> tuple[int, int]:
|
||||
lfc_file_path = endpoint.lfc_path()
|
||||
lfc_file_size = lfc_file_path.stat().st_size
|
||||
@@ -105,23 +103,3 @@ def test_lfc_resize(neon_simple_env: NeonEnv, pg_bin: PgBin):
|
||||
time.sleep(1)
|
||||
|
||||
assert int(lfc_file_blocks) <= 128 * 1024
|
||||
|
||||
# Now test that number of rows returned by local_cache is the same as file_cache_used_pages.
|
||||
# Perform several iterations to make cache cache content stabilized.
|
||||
nretries = 10
|
||||
while True:
|
||||
cur.execute("select count(*) from local_cache")
|
||||
local_cache_size = cur.fetchall()[0][0]
|
||||
|
||||
cur.execute(
|
||||
"select lfc_value::bigint FROM neon_lfc_stats where lfc_key='file_cache_used_pages'"
|
||||
)
|
||||
used_pages = cur.fetchall()[0][0]
|
||||
|
||||
if local_cache_size == used_pages or nretries == 0:
|
||||
break
|
||||
|
||||
nretries = nretries - 1
|
||||
time.sleep(1)
|
||||
|
||||
assert local_cache_size == used_pages
|
||||
|
||||
@@ -4109,7 +4109,6 @@ def test_storcon_create_delete_sk_down(neon_env_builder: NeonEnvBuilder, restart
|
||||
env.storage_controller.allowed_errors.extend(
|
||||
[
|
||||
".*Call to safekeeper.* management API still failed after.*",
|
||||
".*Call to safekeeper.* management API failed, will retry.*",
|
||||
".*reconcile_one.*tenant_id={tenant_id}.*Call to safekeeper.* management API still failed after.*",
|
||||
]
|
||||
)
|
||||
|
||||
@@ -318,7 +318,7 @@ def test_timeline_offload_persist(neon_env_builder: NeonEnvBuilder, delete_timel
|
||||
neon_env_builder.pageserver_remote_storage,
|
||||
prefix=f"tenants/{str(tenant_id)}/",
|
||||
)
|
||||
assert_prefix_not_empty(
|
||||
assert_prefix_empty(
|
||||
neon_env_builder.pageserver_remote_storage,
|
||||
prefix=f"tenants/{str(tenant_id)}/tenant-manifest",
|
||||
)
|
||||
@@ -387,7 +387,7 @@ def test_timeline_offload_persist(neon_env_builder: NeonEnvBuilder, delete_timel
|
||||
sum_again = endpoint.safe_psql("SELECT sum(key) from foo where key < 500")
|
||||
assert sum == sum_again
|
||||
|
||||
assert_prefix_not_empty(
|
||||
assert_prefix_empty(
|
||||
neon_env_builder.pageserver_remote_storage,
|
||||
prefix=f"tenants/{str(env.initial_tenant)}/tenant-manifest",
|
||||
)
|
||||
@@ -924,7 +924,7 @@ def test_timeline_offload_generations(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.pageserver_remote_storage,
|
||||
prefix=f"tenants/{str(tenant_id)}/",
|
||||
)
|
||||
assert_prefix_not_empty(
|
||||
assert_prefix_empty(
|
||||
neon_env_builder.pageserver_remote_storage,
|
||||
prefix=f"tenants/{str(tenant_id)}/tenant-manifest",
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user