diff --git a/libs/remote_storage/src/azure_blob.rs b/libs/remote_storage/src/azure_blob.rs index f64cd9e206..e9c24ac723 100644 --- a/libs/remote_storage/src/azure_blob.rs +++ b/libs/remote_storage/src/azure_blob.rs @@ -824,6 +824,7 @@ impl RemoteStorage for AzureBlobStorage { timestamp: SystemTime, done_if_after: SystemTime, cancel: &CancellationToken, + _complexity_limit: Option, ) -> Result<(), TimeTravelError> { let msg = "PLEASE NOTE: Azure Blob storage time-travel recovery may not work as expected " .to_string() diff --git a/libs/remote_storage/src/lib.rs b/libs/remote_storage/src/lib.rs index b265d37a62..9e445dd72f 100644 --- a/libs/remote_storage/src/lib.rs +++ b/libs/remote_storage/src/lib.rs @@ -440,6 +440,7 @@ pub trait RemoteStorage: Send + Sync + 'static { timestamp: SystemTime, done_if_after: SystemTime, cancel: &CancellationToken, + complexity_limit: Option, ) -> Result<(), TimeTravelError>; } @@ -651,22 +652,23 @@ impl GenericRemoteStorage> { timestamp: SystemTime, done_if_after: SystemTime, cancel: &CancellationToken, + complexity_limit: Option, ) -> Result<(), TimeTravelError> { match self { Self::LocalFs(s) => { - s.time_travel_recover(prefix, timestamp, done_if_after, cancel) + s.time_travel_recover(prefix, timestamp, done_if_after, cancel, complexity_limit) .await } Self::AwsS3(s) => { - s.time_travel_recover(prefix, timestamp, done_if_after, cancel) + s.time_travel_recover(prefix, timestamp, done_if_after, cancel, complexity_limit) .await } Self::AzureBlob(s) => { - s.time_travel_recover(prefix, timestamp, done_if_after, cancel) + s.time_travel_recover(prefix, timestamp, done_if_after, cancel, complexity_limit) .await } Self::Unreliable(s) => { - s.time_travel_recover(prefix, timestamp, done_if_after, cancel) + s.time_travel_recover(prefix, timestamp, done_if_after, cancel, complexity_limit) .await } } diff --git a/libs/remote_storage/src/local_fs.rs b/libs/remote_storage/src/local_fs.rs index 6607b55f1a..8320d7afdc 100644 --- a/libs/remote_storage/src/local_fs.rs +++ b/libs/remote_storage/src/local_fs.rs @@ -610,6 +610,7 @@ impl RemoteStorage for LocalFs { _timestamp: SystemTime, _done_if_after: SystemTime, _cancel: &CancellationToken, + _complexity_limit: Option, ) -> Result<(), TimeTravelError> { Err(TimeTravelError::Unimplemented) } diff --git a/libs/remote_storage/src/s3_bucket.rs b/libs/remote_storage/src/s3_bucket.rs index 004aad447e..8a2e5bd10e 100644 --- a/libs/remote_storage/src/s3_bucket.rs +++ b/libs/remote_storage/src/s3_bucket.rs @@ -981,22 +981,16 @@ impl RemoteStorage for S3Bucket { timestamp: SystemTime, done_if_after: SystemTime, cancel: &CancellationToken, + complexity_limit: Option, ) -> Result<(), TimeTravelError> { let kind = RequestKind::TimeTravel; let permit = self.permit(kind, cancel).await?; tracing::trace!("Target time: {timestamp:?}, done_if_after {done_if_after:?}"); - // Limit the number of versions deletions, mostly so that we don't - // keep requesting forever if the list is too long, as we'd put the - // list in RAM. - // Building a list of 100k entries that reaches the limit roughly takes - // 40 seconds, and roughly corresponds to tenants of 2 TiB physical size. - const COMPLEXITY_LIMIT: Option = NonZeroU32::new(100_000); - let mode = ListingMode::NoDelimiter; let version_listing = self - .list_versions_with_permit(&permit, prefix, mode, COMPLEXITY_LIMIT, cancel) + .list_versions_with_permit(&permit, prefix, mode, complexity_limit, cancel) .await .map_err(|err| match err { DownloadError::Other(e) => TimeTravelError::Other(e), diff --git a/libs/remote_storage/src/simulate_failures.rs b/libs/remote_storage/src/simulate_failures.rs index 894cf600be..f9856a5856 100644 --- a/libs/remote_storage/src/simulate_failures.rs +++ b/libs/remote_storage/src/simulate_failures.rs @@ -240,11 +240,12 @@ impl RemoteStorage for UnreliableWrapper { timestamp: SystemTime, done_if_after: SystemTime, cancel: &CancellationToken, + complexity_limit: Option, ) -> Result<(), TimeTravelError> { self.attempt(RemoteOp::TimeTravelRecover(prefix.map(|p| p.to_owned()))) .map_err(TimeTravelError::Other)?; self.inner - .time_travel_recover(prefix, timestamp, done_if_after, cancel) + .time_travel_recover(prefix, timestamp, done_if_after, cancel, complexity_limit) .await } } diff --git a/libs/remote_storage/tests/test_real_s3.rs b/libs/remote_storage/tests/test_real_s3.rs index d38e13fd05..6b893edf75 100644 --- a/libs/remote_storage/tests/test_real_s3.rs +++ b/libs/remote_storage/tests/test_real_s3.rs @@ -157,7 +157,7 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow: // No changes after recovery to t2 (no-op) let t_final = time_point().await; ctx.client - .time_travel_recover(None, t2, t_final, &cancel) + .time_travel_recover(None, t2, t_final, &cancel, None) .await?; let t2_files_recovered = list_files(&ctx.client, &cancel).await?; println!("after recovery to t2: {t2_files_recovered:?}"); @@ -173,7 +173,7 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow: // after recovery to t1: path1 is back, path2 has the old content let t_final = time_point().await; ctx.client - .time_travel_recover(None, t1, t_final, &cancel) + .time_travel_recover(None, t1, t_final, &cancel, None) .await?; let t1_files_recovered = list_files(&ctx.client, &cancel).await?; println!("after recovery to t1: {t1_files_recovered:?}"); @@ -189,7 +189,7 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow: // after recovery to t0: everything is gone except for path1 let t_final = time_point().await; ctx.client - .time_travel_recover(None, t0, t_final, &cancel) + .time_travel_recover(None, t0, t_final, &cancel, None) .await?; let t0_files_recovered = list_files(&ctx.client, &cancel).await?; println!("after recovery to t0: {t0_files_recovered:?}"); diff --git a/pageserver/ctl/src/main.rs b/pageserver/ctl/src/main.rs index 1d81b839a8..3cd4faaf2e 100644 --- a/pageserver/ctl/src/main.rs +++ b/pageserver/ctl/src/main.rs @@ -176,9 +176,11 @@ async fn main() -> anyhow::Result<()> { let config = RemoteStorageConfig::from_toml_str(&cmd.config_toml_str)?; let storage = remote_storage::GenericRemoteStorage::from_config(&config).await; let cancel = CancellationToken::new(); + // Complexity limit: as we are running this command locally, we should have a lot of memory available, and we do not + // need to limit the number of versions we are going to delete. storage .unwrap() - .time_travel_recover(Some(&prefix), timestamp, done_if_after, &cancel) + .time_travel_recover(Some(&prefix), timestamp, done_if_after, &cancel, None) .await?; } Commands::Key(dkc) => dkc.execute(), diff --git a/pageserver/src/tenant/remote_timeline_client/upload.rs b/pageserver/src/tenant/remote_timeline_client/upload.rs index 89f6136530..ffb4717d9f 100644 --- a/pageserver/src/tenant/remote_timeline_client/upload.rs +++ b/pageserver/src/tenant/remote_timeline_client/upload.rs @@ -1,6 +1,7 @@ //! Helper functions to upload files to remote storage with a RemoteStorage use std::io::{ErrorKind, SeekFrom}; +use std::num::NonZeroU32; use std::time::SystemTime; use anyhow::{Context, bail}; @@ -228,11 +229,25 @@ pub(crate) async fn time_travel_recover_tenant( let timelines_path = super::remote_timelines_path(tenant_shard_id); prefixes.push(timelines_path); } + + // Limit the number of versions deletions, mostly so that we don't + // keep requesting forever if the list is too long, as we'd put the + // list in RAM. + // Building a list of 100k entries that reaches the limit roughly takes + // 40 seconds, and roughly corresponds to tenants of 2 TiB physical size. + const COMPLEXITY_LIMIT: Option = NonZeroU32::new(100_000); + for prefix in &prefixes { backoff::retry( || async { storage - .time_travel_recover(Some(prefix), timestamp, done_if_after, cancel) + .time_travel_recover( + Some(prefix), + timestamp, + done_if_after, + cancel, + COMPLEXITY_LIMIT, + ) .await }, |e| !matches!(e, TimeTravelError::Other(_)),