mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-24 00:20:37 +00:00
storage_controller: reconcile completed imports at start-up (#11614)
## Problem In https://github.com/neondatabase/neon/pull/11345 coordination of imports moved to the storage controller. It involves notifying cplane when the import has been completed by calling an idempotent endpoint. If the storage controller shuts down in the middle of finalizing an import, it would never be retried. ## Summary of changes Reconcile imports at start-up by fetching the complete imports from the database and spawning a background task which notifies cplane. Closes: https://github.com/neondatabase/neon/issues/11570
This commit is contained in:
@@ -133,6 +133,7 @@ pub(crate) enum DatabaseOperation {
|
||||
InsertTimelineImport,
|
||||
UpdateTimelineImport,
|
||||
DeleteTimelineImport,
|
||||
ListTimelineImports,
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
@@ -1640,6 +1641,35 @@ impl Persistence {
|
||||
.await
|
||||
}
|
||||
|
||||
pub(crate) async fn list_complete_timeline_imports(
|
||||
&self,
|
||||
) -> DatabaseResult<Vec<TimelineImport>> {
|
||||
use crate::schema::timeline_imports::dsl;
|
||||
let persistent = self
|
||||
.with_measured_conn(DatabaseOperation::ListTimelineImports, move |conn| {
|
||||
Box::pin(async move {
|
||||
let from_db: Vec<TimelineImportPersistence> =
|
||||
dsl::timeline_imports.load(conn).await?;
|
||||
Ok(from_db)
|
||||
})
|
||||
})
|
||||
.await?;
|
||||
|
||||
let imports: Result<Vec<TimelineImport>, _> = persistent
|
||||
.into_iter()
|
||||
.map(TimelineImport::from_persistent)
|
||||
.collect();
|
||||
match imports {
|
||||
Ok(ok) => Ok(ok
|
||||
.into_iter()
|
||||
.filter(|import| import.is_complete())
|
||||
.collect()),
|
||||
Err(err) => Err(DatabaseError::Logical(format!(
|
||||
"failed to deserialize import: {err}"
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn delete_timeline_import(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
|
||||
@@ -878,6 +878,22 @@ impl Service {
|
||||
});
|
||||
}
|
||||
|
||||
// Fetch the list of completed imports and attempt to finalize them in the background.
|
||||
// This handles the case where the previous storage controller instance shut down
|
||||
// whilst finalizing imports.
|
||||
let complete_imports = self.persistence.list_complete_timeline_imports().await;
|
||||
match complete_imports {
|
||||
Ok(ok) => {
|
||||
tokio::task::spawn({
|
||||
let finalize_imports_self = self.clone();
|
||||
async move { finalize_imports_self.finalize_timeline_imports(ok).await }
|
||||
});
|
||||
}
|
||||
Err(err) => {
|
||||
tracing::error!("Could not retrieve completed imports from database: {err}");
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"Startup complete, spawned {reconcile_tasks} reconciliation tasks ({shard_count} shards total)"
|
||||
);
|
||||
@@ -3869,13 +3885,10 @@ impl Service {
|
||||
self: &Arc<Self>,
|
||||
import: TimelineImport,
|
||||
) -> anyhow::Result<()> {
|
||||
// TODO(vlad): On start-up, load up the imports and notify cplane of the
|
||||
// ones that have been completed. This assumes the new cplane API will
|
||||
// be idempotent. If that's not possible, bang a flag in the database.
|
||||
// https://github.com/neondatabase/neon/issues/11570
|
||||
|
||||
tracing::info!("Finalizing timeline import");
|
||||
|
||||
pausable_failpoint!("timeline-import-pre-cplane-notification");
|
||||
|
||||
let import_failed = import.completion_error().is_some();
|
||||
|
||||
if !import_failed {
|
||||
@@ -3926,6 +3939,15 @@ impl Service {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn finalize_timeline_imports(self: &Arc<Self>, imports: Vec<TimelineImport>) {
|
||||
futures::future::join_all(
|
||||
imports
|
||||
.into_iter()
|
||||
.map(|import| self.finalize_timeline_import(import)),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn timeline_active_on_all_shards(
|
||||
self: &Arc<Self>,
|
||||
import: &TimelineImport,
|
||||
|
||||
@@ -103,7 +103,7 @@ impl TimelineImport {
|
||||
let crnt = occ.get_mut();
|
||||
if *crnt == status {
|
||||
Ok(TimelineImportUpdateFollowUp::None)
|
||||
} else if crnt.is_terminal() && !status.is_terminal() {
|
||||
} else if crnt.is_terminal() && *crnt != status {
|
||||
Err(TimelineImportUpdateError::UnexpectedUpdate)
|
||||
} else {
|
||||
*crnt = status;
|
||||
|
||||
Reference in New Issue
Block a user