diff --git a/config/config.md b/config/config.md
index f34a41d861..f3230190c9 100644
--- a/config/config.md
+++ b/config/config.md
@@ -319,6 +319,7 @@
| `selector` | String | `round_robin` | Datanode selector type.
- `round_robin` (default value)
- `lease_based`
- `load_based`
For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
| `use_memory_store` | Bool | `false` | Store data in memory. |
| `enable_region_failover` | Bool | `false` | Whether to enable region failover.
This feature is only available on GreptimeDB running on cluster mode and
- Using Remote WAL
- Using shared storage (e.g., s3). |
+| `allow_region_failover_on_local_wal` | Bool | `false` | Whether to allow region failover on local WAL.
**This option is not recommended to be set to true, because it may lead to data loss during failover.** |
| `node_max_idle_time` | String | `24hours` | Max allowed idle time before removing node info from metasrv memory. |
| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. |
| `runtime` | -- | -- | The runtime options. |
diff --git a/config/metasrv.example.toml b/config/metasrv.example.toml
index 89c92352b2..0e7f9b74f0 100644
--- a/config/metasrv.example.toml
+++ b/config/metasrv.example.toml
@@ -50,6 +50,10 @@ use_memory_store = false
## - Using shared storage (e.g., s3).
enable_region_failover = false
+## Whether to allow region failover on local WAL.
+## **This option is not recommended to be set to true, because it may lead to data loss during failover.**
+allow_region_failover_on_local_wal = false
+
## Max allowed idle time before removing node info from metasrv memory.
node_max_idle_time = "24hours"
diff --git a/src/meta-srv/src/metasrv.rs b/src/meta-srv/src/metasrv.rs
index 34b3cac25e..6c9111dd9c 100644
--- a/src/meta-srv/src/metasrv.rs
+++ b/src/meta-srv/src/metasrv.rs
@@ -111,6 +111,11 @@ pub struct MetasrvOptions {
pub use_memory_store: bool,
/// Whether to enable region failover.
pub enable_region_failover: bool,
+ /// Whether to allow region failover on local WAL.
+ ///
+ /// If it's true, the region failover will be allowed even if the local WAL is used.
+ /// Note that this option is not recommended to be set to true, because it may lead to data loss during failover.
+ pub allow_region_failover_on_local_wal: bool,
/// The HTTP server options.
pub http: HttpOptions,
/// The logging options.
@@ -173,6 +178,7 @@ impl Default for MetasrvOptions {
selector: SelectorType::default(),
use_memory_store: false,
enable_region_failover: false,
+ allow_region_failover_on_local_wal: false,
http: HttpOptions::default(),
logging: LoggingOptions {
dir: format!("{METASRV_HOME}/logs"),
diff --git a/src/meta-srv/src/metasrv/builder.rs b/src/meta-srv/src/metasrv/builder.rs
index 02f835e226..0c93e4e4c7 100644
--- a/src/meta-srv/src/metasrv/builder.rs
+++ b/src/meta-srv/src/metasrv/builder.rs
@@ -40,7 +40,8 @@ use common_meta::state_store::KvStateStore;
use common_meta::wal_options_allocator::{build_kafka_client, build_wal_options_allocator};
use common_procedure::local::{LocalManager, ManagerConfig};
use common_procedure::ProcedureManagerRef;
-use snafu::ResultExt;
+use common_telemetry::warn;
+use snafu::{ensure, ResultExt};
use crate::cache_invalidator::MetasrvCacheInvalidator;
use crate::cluster::{MetaPeerClientBuilder, MetaPeerClientRef};
@@ -276,18 +277,25 @@ impl MetasrvBuilder {
},
));
let peer_lookup_service = Arc::new(MetaPeerLookupService::new(meta_peer_client.clone()));
+
if !is_remote_wal && options.enable_region_failover {
- return error::UnexpectedSnafu {
- violated: "Region failover is not supported in the local WAL implementation!",
+ ensure!(
+ options.allow_region_failover_on_local_wal,
+ error::UnexpectedSnafu {
+ violated: "Region failover is not supported in the local WAL implementation!
+ If you want to enable region failover for local WAL, please set `allow_region_failover_on_local_wal` to true.",
+ }
+ );
+ if options.allow_region_failover_on_local_wal {
+ warn!("Region failover is force enabled in the local WAL implementation! This may lead to data loss during failover!");
}
- .fail();
}
let (tx, rx) = RegionSupervisor::channel();
let (region_failure_detector_controller, region_supervisor_ticker): (
RegionFailureDetectorControllerRef,
Option>,
- ) = if options.enable_region_failover && is_remote_wal {
+ ) = if options.enable_region_failover {
(
Arc::new(RegionFailureDetectorControl::new(tx.clone())) as _,
Some(Arc::new(RegionSupervisorTicker::new(
@@ -313,7 +321,7 @@ impl MetasrvBuilder {
));
region_migration_manager.try_start()?;
- let region_failover_handler = if options.enable_region_failover && is_remote_wal {
+ let region_failover_handler = if options.enable_region_failover {
let region_supervisor = RegionSupervisor::new(
rx,
options.failure_detector,