From 8e9f2ffce42e2aa4fed1da166c0a649ed31c450d Mon Sep 17 00:00:00 2001 From: Weny Xu Date: Wed, 30 Aug 2023 14:59:50 +0800 Subject: [PATCH] fix: skip procedure if target route is not found (#2277) * fix: skip procedure if target route is not found * chore: apply suggestions from CR --- src/meta-srv/src/procedure/region_failover.rs | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/meta-srv/src/procedure/region_failover.rs b/src/meta-srv/src/procedure/region_failover.rs index 08e4f9ad67..af19ce9c24 100644 --- a/src/meta-srv/src/procedure/region_failover.rs +++ b/src/meta-srv/src/procedure/region_failover.rs @@ -26,6 +26,7 @@ use std::time::Duration; use async_trait::async_trait; use common_meta::ident::TableIdent; +use common_meta::key::datanode_table::DatanodeTableKey; use common_meta::key::TableMetadataManagerRef; use common_meta::{ClusterId, RegionIdent}; use common_procedure::error::{ @@ -168,6 +169,11 @@ impl RegionFailoverManager { return Ok(()); } + if !self.failed_region_exists(failed_region).await? { + // The failed region could be failover by another procedure. + return Ok(()); + } + let context = self.create_context(); let procedure = RegionFailoverProcedure::new(failed_region.clone(), context); let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure)); @@ -207,6 +213,27 @@ impl RegionFailoverManager { .context(TableMetadataManagerSnafu)? .is_some()) } + + async fn failed_region_exists(&self, failed_region: &RegionIdent) -> Result { + let table_id = failed_region.table_ident.table_id; + let datanode_id = failed_region.datanode_id; + + let value = self + .table_metadata_manager + .datanode_table_manager() + .get(&DatanodeTableKey::new(datanode_id, table_id)) + .await + .context(TableMetadataManagerSnafu)?; + + Ok(value + .map(|value| { + value + .regions + .iter() + .any(|region| *region == failed_region.region_number) + }) + .unwrap_or_default()) + } } /// A "Node" in the state machine of region failover procedure.