diff --git a/config/config.md b/config/config.md
index aaa92c7f35..107da0b35b 100644
--- a/config/config.md
+++ b/config/config.md
@@ -319,6 +319,7 @@
| `selector` | String | `round_robin` | Datanode selector type. - `round_robin` (default value) - `lease_based` - `load_based` For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
| `use_memory_store` | Bool | `false` | Store data in memory. |
| `enable_region_failover` | Bool | `false` | Whether to enable region failover. This feature is only available on GreptimeDB running on cluster mode and - Using Remote WAL - Using shared storage (e.g., s3). |
+| `node_max_idle_time` | String | `24hours` | Max allowed idle time before removing node info from metasrv memory. |
| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. |
| `runtime` | -- | -- | The runtime options. |
| `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
diff --git a/config/metasrv.example.toml b/config/metasrv.example.toml
index 18b203f204..842ac21530 100644
--- a/config/metasrv.example.toml
+++ b/config/metasrv.example.toml
@@ -50,6 +50,9 @@ use_memory_store = false
## - Using shared storage (e.g., s3).
enable_region_failover = false
+## Max allowed idle time before removing node info from metasrv memory.
+node_max_idle_time = "24hours"
+
## Whether to enable greptimedb telemetry. Enabled by default.
#+ enable_telemetry = true
diff --git a/src/common/meta/src/cluster.rs b/src/common/meta/src/cluster.rs
index bb2429c0e6..f73dcf1537 100644
--- a/src/common/meta/src/cluster.rs
+++ b/src/common/meta/src/cluster.rs
@@ -57,12 +57,10 @@ pub trait ClusterInfo {
}
/// The key of [NodeInfo] in the storage. The format is `__meta_cluster_node_info-{cluster_id}-{role}-{node_id}`.
-///
-/// This key cannot be used to describe the `Metasrv` because the `Metasrv` does not have
-/// a `cluster_id`, it serves multiple clusters.
#[derive(Debug, Clone, Copy, Eq, Hash, PartialEq, Serialize, Deserialize)]
pub struct NodeInfoKey {
/// The cluster id.
+ // todo(hl): remove cluster_id as it is not assigned anywhere.
pub cluster_id: ClusterId,
/// The role of the node. It can be `[Role::Datanode]` or `[Role::Frontend]`.
pub role: Role,
@@ -232,8 +230,8 @@ impl TryFrom> for NodeInfoKey {
}
}
-impl From for Vec {
- fn from(key: NodeInfoKey) -> Self {
+impl From<&NodeInfoKey> for Vec {
+ fn from(key: &NodeInfoKey) -> Self {
format!(
"{}-{}-{}-{}",
CLUSTER_NODE_INFO_PREFIX,
@@ -315,7 +313,7 @@ mod tests {
node_id: 2,
};
- let key_bytes: Vec = key.into();
+ let key_bytes: Vec = (&key).into();
let new_key: NodeInfoKey = key_bytes.try_into().unwrap();
assert_eq!(1, new_key.cluster_id);
diff --git a/src/common/meta/src/lib.rs b/src/common/meta/src/lib.rs
index fd6fc775a4..7479a14337 100644
--- a/src/common/meta/src/lib.rs
+++ b/src/common/meta/src/lib.rs
@@ -34,6 +34,7 @@ pub mod kv_backend;
pub mod leadership_notifier;
pub mod lock_key;
pub mod metrics;
+pub mod node_expiry_listener;
pub mod node_manager;
pub mod peer;
pub mod range_stream;
diff --git a/src/common/meta/src/node_expiry_listener.rs b/src/common/meta/src/node_expiry_listener.rs
new file mode 100644
index 0000000000..c5da2936a5
--- /dev/null
+++ b/src/common/meta/src/node_expiry_listener.rs
@@ -0,0 +1,152 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Mutex;
+use std::time::Duration;
+
+use common_telemetry::{debug, error, info, warn};
+use tokio::task::JoinHandle;
+use tokio::time::{interval, MissedTickBehavior};
+
+use crate::cluster::{NodeInfo, NodeInfoKey};
+use crate::error;
+use crate::kv_backend::ResettableKvBackendRef;
+use crate::leadership_notifier::LeadershipChangeListener;
+use crate::rpc::store::RangeRequest;
+use crate::rpc::KeyValue;
+
+/// [NodeExpiryListener] periodically checks all node info in memory and removes
+/// expired node info to prevent memory leak.
+pub struct NodeExpiryListener {
+ handle: Mutex