storcon: Fix migration for Attached(0) tenants (#12256)

## Problem

`Attached(0)` tenant migrations can get stuck if the heatmap file has
not been uploaded.

## Summary of Changes

- Added a test to reproduce the issue.
- Introduced a `kick_secondary_downloads` config flag:
  - Enabled in testing environments.
  - Disabled in production (and in the new test).
- Updated `Attached(0)` locations to consider the number of secondaries
in their intent when deciding whether to download the heatmap.
This commit is contained in:
Aleksandr Sarantsev
2025-06-23 22:55:26 +04:00
committed by GitHub
parent 85164422d0
commit 5eecde461d
8 changed files with 108 additions and 6 deletions

View File

@@ -5,6 +5,9 @@ use std::time::Duration;
use anyhow::{Context, anyhow};
use camino::Utf8PathBuf;
#[cfg(feature = "testing")]
use clap::ArgAction;
use clap::Parser;
use futures::future::OptionFuture;
use http_utils::tls_certs::ReloadingCertificateResolver;
@@ -213,6 +216,13 @@ struct Cli {
/// This option exists primarily for testing purposes.
#[arg(long, default_value = "3", value_parser = clap::value_parser!(i64).range(1..))]
timeline_safekeeper_count: i64,
/// When set, actively checks and initiates heatmap downloads/uploads during reconciliation.
/// This speed up migrations by avoiding the default wait for the heatmap download interval.
/// Primarily useful for testing to reduce test execution time.
#[cfg(feature = "testing")]
#[arg(long, default_value = "true", action=ArgAction::Set)]
kick_secondary_downloads: bool,
}
enum StrictMode {
@@ -445,6 +455,8 @@ async fn async_main() -> anyhow::Result<()> {
timelines_onto_safekeepers: args.timelines_onto_safekeepers,
use_local_compute_notifications: args.use_local_compute_notifications,
timeline_safekeeper_count: args.timeline_safekeeper_count,
#[cfg(feature = "testing")]
kick_secondary_downloads: args.kick_secondary_downloads,
};
// Validate that we can connect to the database