Skip to main content

common_meta/
key.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! This mod defines all the keys used in the metadata store (Metasrv).
16//! Specifically, there are these kinds of keys:
17//!
18//! 1. Datanode table key: `__dn_table/{datanode_id}/{table_id}`
19//!     - The value is a [DatanodeTableValue] struct; it contains `table_id` and the regions that
20//!       belong to this Datanode.
21//!     - This key is primary used in the startup of Datanode, to let Datanode know which tables
22//!       and regions it should open.
23//!
24//! 2. Table info key: `__table_info/{table_id}`
25//!     - The value is a [TableInfoValue] struct; it contains the whole table info (like column
26//!       schemas).
27//!     - This key is mainly used in constructing the table in Datanode and Frontend.
28//!
29//! 3. Catalog name key: `__catalog_name/{catalog_name}`
30//!     - Indices all catalog names
31//!
32//! 4. Schema name key: `__schema_name/{catalog_name}/{schema_name}`
33//!     - Indices all schema names belong to the {catalog_name}
34//!
35//! 5. Table name key: `__table_name/{catalog_name}/{schema_name}/{table_name}`
36//!     - The value is a [TableNameValue] struct; it contains the table id.
37//!     - Used in the table name to table id lookup.
38//!
39//! 6. Flow info key: `__flow/info/{flow_id}`
40//!     - Stores metadata of the flow.
41//!
42//! 7. Flow route key: `__flow/route/{flow_id}/{partition_id}`
43//!     - Stores route of the flow.
44//!
45//! 8. Flow name key: `__flow/name/{catalog}/{flow_name}`
46//!     - Mapping {catalog}/{flow_name} to {flow_id}
47//!
48//! 9. Flownode flow key: `__flow/flownode/{flownode_id}/{flow_id}/{partition_id}`
49//!     - Mapping {flownode_id} to {flow_id}
50//!
51//! 10. Table flow key: `__flow/source_table/{table_id}/{flownode_id}/{flow_id}/{partition_id}`
52//!     - Mapping source table's {table_id} to {flownode_id}
53//!     - Used in `Flownode` booting.
54//!
55//! 11. View info key: `__view_info/{view_id}`
56//!     - The value is a [ViewInfoValue] struct; it contains the encoded logical plan.
57//!     - This key is mainly used in constructing the view in Datanode and Frontend.
58//!
59//! 12. Kafka topic key: `__topic_name/kafka/{topic_name}`
60//!     - The key is used to track existing topics in Kafka.
61//!     - The value is a [TopicNameValue](crate::key::topic_name::TopicNameValue) struct; it contains the `pruned_entry_id` which represents
62//!       the highest entry id that has been pruned from the remote WAL.
63//!     - When a region uses this topic, it should start replaying entries from `pruned_entry_id + 1` (minimum available entry id).
64//!
65//! 13. Topic name to region map key `__topic_region/{topic_name}/{region_id}`
66//!     - Mapping {topic_name} to {region_id}
67//!
68//! All keys have related managers. The managers take care of the serialization and deserialization
69//! of keys and values, and the interaction with the underlying KV store backend.
70//!
71//! To simplify the managers used in struct fields and function parameters, we define "unify"
72//! table metadata manager: [TableMetadataManager]
73//! and flow metadata manager: [FlowMetadataManager](crate::key::flow::FlowMetadataManager).
74//! It contains all the managers defined above. It's recommended to just use this manager only.
75//!
76//! The whole picture of flow keys will be like this:
77//!
78//! __flow/
79//!   info/
80//!     {flow_id}
81//!   route/
82//!     {flow_id}/
83//!      {partition_id}
84//!
85//!    name/
86//!      {catalog_name}
87//!        {flow_name}
88//!
89//!    flownode/
90//!      {flownode_id}/
91//!        {flow_id}/
92//!          {partition_id}
93//!
94//!    source_table/
95//!      {table_id}/
96//!        {flownode_id}/
97//!          {flow_id}/
98//!            {partition_id}
99
100pub mod catalog_name;
101pub mod datanode_table;
102pub mod flow;
103pub mod node_address;
104pub mod runtime_switch;
105mod schema_metadata_manager;
106pub mod schema_name;
107pub mod table_info;
108pub mod table_name;
109pub mod table_repart;
110pub mod table_route;
111#[cfg(any(test, feature = "testing"))]
112pub mod test_utils;
113pub mod tombstone;
114pub mod topic_name;
115pub mod topic_region;
116pub mod txn_helper;
117pub mod view_info;
118
119use std::collections::{BTreeMap, HashMap, HashSet};
120use std::fmt::Debug;
121use std::ops::{Deref, DerefMut};
122use std::sync::Arc;
123
124use bytes::Bytes;
125use common_base::regex_pattern::NAME_PATTERN;
126use common_catalog::consts::{
127    DEFAULT_CATALOG_NAME, DEFAULT_PRIVATE_SCHEMA_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME,
128};
129use common_telemetry::warn;
130use common_wal::options::WalOptions;
131use datanode_table::{DatanodeTableKey, DatanodeTableManager, DatanodeTableValue};
132use flow::flow_route::FlowRouteValue;
133use flow::table_flow::TableFlowValue;
134use futures_util::TryStreamExt;
135use lazy_static::lazy_static;
136use regex::Regex;
137pub use schema_metadata_manager::{SchemaMetadataManager, SchemaMetadataManagerRef};
138use serde::de::DeserializeOwned;
139use serde::{Deserialize, Serialize};
140use snafu::{OptionExt, ResultExt, ensure};
141use store_api::storage::RegionNumber;
142use table::metadata::{TableId, TableInfo};
143use table::table_name::TableName;
144use table_info::{TableInfoKey, TableInfoManager, TableInfoValue};
145use table_name::{TableNameKey, TableNameManager, TableNameValue};
146use topic_name::TopicNameManager;
147use topic_region::{TopicRegionKey, TopicRegionManager};
148use view_info::{ViewInfoKey, ViewInfoManager, ViewInfoValue};
149
150use self::catalog_name::{CatalogManager, CatalogNameKey, CatalogNameValue};
151use self::datanode_table::RegionInfo;
152use self::flow::flow_info::FlowInfoValue;
153use self::flow::flow_name::FlowNameValue;
154use self::schema_name::{SchemaManager, SchemaNameKey, SchemaNameValue};
155use self::table_route::{TableRouteManager, TableRouteValue};
156use self::tombstone::TombstoneManager;
157use crate::DatanodeId;
158use crate::error::{self, Result, SerdeJsonSnafu};
159use crate::key::flow::flow_state::FlowStateValue;
160use crate::key::node_address::NodeAddressValue;
161use crate::key::table_repart::{TableRepartKey, TableRepartManager};
162use crate::key::table_route::TableRouteKey;
163use crate::key::topic_region::TopicRegionValue;
164use crate::key::txn_helper::TxnOpGetResponseSet;
165use crate::kv_backend::KvBackendRef;
166use crate::kv_backend::txn::{Txn, TxnOp};
167use crate::rpc::router::{LeaderState, RegionRoute, region_distribution};
168use crate::rpc::store::BatchDeleteRequest;
169use crate::state_store::PoisonValue;
170
171pub const TOPIC_NAME_PATTERN: &str = r"[a-zA-Z0-9_:-][a-zA-Z0-9_:\-\.@#]*";
172pub const LEGACY_MAINTENANCE_KEY: &str = "__maintenance";
173pub const MAINTENANCE_KEY: &str = "__switches/maintenance";
174pub const PAUSE_PROCEDURE_KEY: &str = "__switches/pause_procedure";
175pub const RECOVERY_MODE_KEY: &str = "__switches/recovery";
176
177pub const DATANODE_TABLE_KEY_PREFIX: &str = "__dn_table";
178pub const TABLE_INFO_KEY_PREFIX: &str = "__table_info";
179pub const VIEW_INFO_KEY_PREFIX: &str = "__view_info";
180pub const TABLE_NAME_KEY_PREFIX: &str = "__table_name";
181pub const CATALOG_NAME_KEY_PREFIX: &str = "__catalog_name";
182pub const SCHEMA_NAME_KEY_PREFIX: &str = "__schema_name";
183pub const TABLE_ROUTE_PREFIX: &str = "__table_route";
184pub const TABLE_REPART_PREFIX: &str = "__table_repart";
185pub const NODE_ADDRESS_PREFIX: &str = "__node_address";
186pub const KAFKA_TOPIC_KEY_PREFIX: &str = "__topic_name/kafka";
187// The legacy topic key prefix is used to store the topic name in previous versions.
188pub const LEGACY_TOPIC_KEY_PREFIX: &str = "__created_wal_topics/kafka";
189pub const TOPIC_REGION_PREFIX: &str = "__topic_region";
190
191/// The election key.
192pub const ELECTION_KEY: &str = "__metasrv_election";
193/// The root key of metasrv election candidates.
194pub const CANDIDATES_ROOT: &str = "__metasrv_election_candidates/";
195
196/// The keys with these prefixes will be loaded into the cache when the leader starts.
197pub const CACHE_KEY_PREFIXES: [&str; 5] = [
198    TABLE_NAME_KEY_PREFIX,
199    CATALOG_NAME_KEY_PREFIX,
200    SCHEMA_NAME_KEY_PREFIX,
201    TABLE_ROUTE_PREFIX,
202    NODE_ADDRESS_PREFIX,
203];
204
205/// A set of regions with the same role.
206#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize)]
207pub struct RegionRoleSet {
208    /// Leader regions.
209    pub leader_regions: Vec<RegionNumber>,
210    /// Follower regions.
211    pub follower_regions: Vec<RegionNumber>,
212}
213
214impl<'de> Deserialize<'de> for RegionRoleSet {
215    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
216    where
217        D: serde::Deserializer<'de>,
218    {
219        #[derive(Deserialize)]
220        #[serde(untagged)]
221        enum RegionRoleSetOrLeaderOnly {
222            Full {
223                leader_regions: Vec<RegionNumber>,
224                follower_regions: Vec<RegionNumber>,
225            },
226            LeaderOnly(Vec<RegionNumber>),
227        }
228        match RegionRoleSetOrLeaderOnly::deserialize(deserializer)? {
229            RegionRoleSetOrLeaderOnly::Full {
230                leader_regions,
231                follower_regions,
232            } => Ok(RegionRoleSet::new(leader_regions, follower_regions)),
233            RegionRoleSetOrLeaderOnly::LeaderOnly(leader_regions) => {
234                Ok(RegionRoleSet::new(leader_regions, vec![]))
235            }
236        }
237    }
238}
239
240impl RegionRoleSet {
241    /// Create a new region role set.
242    pub fn new(leader_regions: Vec<RegionNumber>, follower_regions: Vec<RegionNumber>) -> Self {
243        Self {
244            leader_regions,
245            follower_regions,
246        }
247    }
248
249    /// Add a leader region to the set.
250    pub fn add_leader_region(&mut self, region_number: RegionNumber) {
251        self.leader_regions.push(region_number);
252    }
253
254    /// Add a follower region to the set.
255    pub fn add_follower_region(&mut self, region_number: RegionNumber) {
256        self.follower_regions.push(region_number);
257    }
258
259    /// Sort the regions.
260    pub fn sort(&mut self) {
261        self.follower_regions.sort();
262        self.leader_regions.sort();
263    }
264}
265
266/// The distribution of regions.
267///
268/// The key is the datanode id, the value is the region role set.
269pub type RegionDistribution = BTreeMap<DatanodeId, RegionRoleSet>;
270
271/// The id of flow.
272pub type FlowId = u32;
273/// The partition of flow.
274pub type FlowPartitionId = u32;
275
276lazy_static! {
277    pub static ref TOPIC_NAME_PATTERN_REGEX: Regex = Regex::new(TOPIC_NAME_PATTERN).unwrap();
278}
279
280lazy_static! {
281    static ref TABLE_INFO_KEY_PATTERN: Regex =
282        Regex::new(&format!("^{TABLE_INFO_KEY_PREFIX}/([0-9]+)$")).unwrap();
283}
284
285lazy_static! {
286    static ref VIEW_INFO_KEY_PATTERN: Regex =
287        Regex::new(&format!("^{VIEW_INFO_KEY_PREFIX}/([0-9]+)$")).unwrap();
288}
289
290lazy_static! {
291    static ref TABLE_ROUTE_KEY_PATTERN: Regex =
292        Regex::new(&format!("^{TABLE_ROUTE_PREFIX}/([0-9]+)$")).unwrap();
293}
294
295lazy_static! {
296    pub(crate) static ref TABLE_REPART_KEY_PATTERN: Regex =
297        Regex::new(&format!("^{TABLE_REPART_PREFIX}/([0-9]+)$")).unwrap();
298}
299
300lazy_static! {
301    static ref DATANODE_TABLE_KEY_PATTERN: Regex =
302        Regex::new(&format!("^{DATANODE_TABLE_KEY_PREFIX}/([0-9]+)/([0-9]+)$")).unwrap();
303}
304
305lazy_static! {
306    static ref TABLE_NAME_KEY_PATTERN: Regex = Regex::new(&format!(
307        "^{TABLE_NAME_KEY_PREFIX}/({NAME_PATTERN})/({NAME_PATTERN})/({NAME_PATTERN})$"
308    ))
309    .unwrap();
310}
311
312lazy_static! {
313    /// CATALOG_NAME_KEY: {CATALOG_NAME_KEY_PREFIX}/{catalog_name}
314    static ref CATALOG_NAME_KEY_PATTERN: Regex = Regex::new(&format!(
315        "^{CATALOG_NAME_KEY_PREFIX}/({NAME_PATTERN})$"
316    ))
317        .unwrap();
318}
319
320lazy_static! {
321    /// SCHEMA_NAME_KEY: {SCHEMA_NAME_KEY_PREFIX}/{catalog_name}/{schema_name}
322    static ref SCHEMA_NAME_KEY_PATTERN:Regex=Regex::new(&format!(
323        "^{SCHEMA_NAME_KEY_PREFIX}/({NAME_PATTERN})/({NAME_PATTERN})$"
324    ))
325        .unwrap();
326}
327
328lazy_static! {
329    static ref NODE_ADDRESS_PATTERN: Regex =
330        Regex::new(&format!("^{NODE_ADDRESS_PREFIX}/([0-9]+)/([0-9]+)$")).unwrap();
331}
332
333lazy_static! {
334    pub static ref KAFKA_TOPIC_KEY_PATTERN: Regex =
335        Regex::new(&format!("^{KAFKA_TOPIC_KEY_PREFIX}/(.*)$")).unwrap();
336}
337
338lazy_static! {
339    pub static ref TOPIC_REGION_PATTERN: Regex = Regex::new(&format!(
340        "^{TOPIC_REGION_PREFIX}/({TOPIC_NAME_PATTERN})/([0-9]+)$"
341    ))
342    .unwrap();
343}
344
345/// The key of metadata.
346pub trait MetadataKey<'a, T> {
347    fn to_bytes(&self) -> Vec<u8>;
348
349    fn from_bytes(bytes: &'a [u8]) -> Result<T>;
350}
351
352#[derive(Debug, Clone, PartialEq)]
353pub struct BytesAdapter(Vec<u8>);
354
355impl From<Vec<u8>> for BytesAdapter {
356    fn from(value: Vec<u8>) -> Self {
357        Self(value)
358    }
359}
360
361impl<'a> MetadataKey<'a, BytesAdapter> for BytesAdapter {
362    fn to_bytes(&self) -> Vec<u8> {
363        self.0.clone()
364    }
365
366    fn from_bytes(bytes: &'a [u8]) -> Result<BytesAdapter> {
367        Ok(BytesAdapter(bytes.to_vec()))
368    }
369}
370
371pub(crate) trait MetadataKeyGetTxnOp {
372    fn build_get_op(
373        &self,
374    ) -> (
375        TxnOp,
376        impl for<'a> FnMut(&'a mut TxnOpGetResponseSet) -> Option<Vec<u8>>,
377    );
378}
379
380pub trait MetadataValue {
381    fn try_from_raw_value(raw_value: &[u8]) -> Result<Self>
382    where
383        Self: Sized;
384
385    fn try_as_raw_value(&self) -> Result<Vec<u8>>;
386}
387
388pub type TableMetadataManagerRef = Arc<TableMetadataManager>;
389
390pub struct TableMetadataManager {
391    table_name_manager: TableNameManager,
392    table_info_manager: TableInfoManager,
393    view_info_manager: ViewInfoManager,
394    datanode_table_manager: DatanodeTableManager,
395    catalog_manager: CatalogManager,
396    schema_manager: SchemaManager,
397    table_route_manager: TableRouteManager,
398    table_repart_manager: TableRepartManager,
399    tombstone_manager: TombstoneManager,
400    topic_name_manager: TopicNameManager,
401    topic_region_manager: TopicRegionManager,
402    kv_backend: KvBackendRef,
403}
404
405#[macro_export]
406macro_rules! ensure_values {
407    ($got:expr, $expected_value:expr, $name:expr) => {
408        ensure!(
409            $got == $expected_value,
410            error::UnexpectedSnafu {
411                err_msg: format!(
412                    "Reads the different value: {:?} during {}, expected: {:?}",
413                    $got, $name, $expected_value
414                )
415            }
416        );
417    };
418}
419
420/// A struct containing a deserialized value(`inner`) and an original bytes.
421///
422/// - Serialize behaviors:
423///
424/// The `inner` field will be ignored.
425///
426/// - Deserialize behaviors:
427///
428/// The `inner` field will be deserialized from the `bytes` field.
429pub struct DeserializedValueWithBytes<T: DeserializeOwned + Serialize> {
430    // The original bytes of the inner.
431    bytes: Bytes,
432    // The value was deserialized from the original bytes.
433    inner: T,
434}
435
436#[derive(Debug, Clone, PartialEq, Eq)]
437pub struct DroppedTableName {
438    /// Table id stored in the tombstoned table-name mapping.
439    pub table_id: TableId,
440    /// Original fully qualified table name.
441    pub table_name: TableName,
442}
443
444#[derive(Debug, Clone)]
445pub struct DroppedTableMetadata {
446    /// Table id of the dropped table.
447    pub table_id: TableId,
448    /// Original fully qualified table name.
449    pub table_name: TableName,
450    /// Tombstoned table info value.
451    pub table_info_value: TableInfoValue,
452    /// Tombstoned table route value.
453    pub table_route_value: TableRouteValue,
454    /// Per-region WAL options recovered from tombstoned datanode metadata.
455    pub region_wal_options: HashMap<RegionNumber, WalOptions>,
456}
457
458impl<T: DeserializeOwned + Serialize> Deref for DeserializedValueWithBytes<T> {
459    type Target = T;
460
461    fn deref(&self) -> &Self::Target {
462        &self.inner
463    }
464}
465
466impl<T: DeserializeOwned + Serialize> DerefMut for DeserializedValueWithBytes<T> {
467    fn deref_mut(&mut self) -> &mut Self::Target {
468        &mut self.inner
469    }
470}
471
472impl<T: DeserializeOwned + Serialize + Debug> Debug for DeserializedValueWithBytes<T> {
473    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
474        write!(
475            f,
476            "DeserializedValueWithBytes(inner: {:?}, bytes: {:?})",
477            self.inner, self.bytes
478        )
479    }
480}
481
482impl<T: DeserializeOwned + Serialize> Serialize for DeserializedValueWithBytes<T> {
483    /// - Serialize behaviors:
484    ///
485    /// The `inner` field will be ignored.
486    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
487    where
488        S: serde::Serializer,
489    {
490        // Safety: The original bytes are always JSON encoded.
491        // It's more efficiently than `serialize_bytes`.
492        serializer.serialize_str(&String::from_utf8_lossy(&self.bytes))
493    }
494}
495
496impl<'de, T: DeserializeOwned + Serialize + MetadataValue> Deserialize<'de>
497    for DeserializedValueWithBytes<T>
498{
499    /// - Deserialize behaviors:
500    ///
501    /// The `inner` field will be deserialized from the `bytes` field.
502    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
503    where
504        D: serde::Deserializer<'de>,
505    {
506        let buf = String::deserialize(deserializer)?;
507        let bytes = Bytes::from(buf);
508
509        let value = DeserializedValueWithBytes::from_inner_bytes(bytes)
510            .map_err(|err| serde::de::Error::custom(err.to_string()))?;
511
512        Ok(value)
513    }
514}
515
516impl<T: Serialize + DeserializeOwned + Clone> Clone for DeserializedValueWithBytes<T> {
517    fn clone(&self) -> Self {
518        Self {
519            bytes: self.bytes.clone(),
520            inner: self.inner.clone(),
521        }
522    }
523}
524
525impl<T: Serialize + DeserializeOwned + MetadataValue> DeserializedValueWithBytes<T> {
526    /// Returns a struct containing a deserialized value and an original `bytes`.
527    /// It accepts original bytes of inner.
528    pub fn from_inner_bytes(bytes: Bytes) -> Result<Self> {
529        let inner = T::try_from_raw_value(&bytes)?;
530        Ok(Self { bytes, inner })
531    }
532
533    /// Returns a struct containing a deserialized value and an original `bytes`.
534    /// It accepts original bytes of inner.
535    pub fn from_inner_slice(bytes: &[u8]) -> Result<Self> {
536        Self::from_inner_bytes(Bytes::copy_from_slice(bytes))
537    }
538
539    pub fn into_inner(self) -> T {
540        self.inner
541    }
542
543    pub fn get_inner_ref(&self) -> &T {
544        &self.inner
545    }
546
547    /// Returns original `bytes`
548    pub fn get_raw_bytes(&self) -> Vec<u8> {
549        self.bytes.to_vec()
550    }
551
552    #[cfg(any(test, feature = "testing"))]
553    pub fn from_inner(inner: T) -> Self {
554        let bytes = serde_json::to_vec(&inner).unwrap();
555
556        Self {
557            bytes: Bytes::from(bytes),
558            inner,
559        }
560    }
561}
562
563impl TableMetadataManager {
564    pub fn new(kv_backend: KvBackendRef) -> Self {
565        TableMetadataManager {
566            table_name_manager: TableNameManager::new(kv_backend.clone()),
567            table_info_manager: TableInfoManager::new(kv_backend.clone()),
568            view_info_manager: ViewInfoManager::new(kv_backend.clone()),
569            datanode_table_manager: DatanodeTableManager::new(kv_backend.clone()),
570            catalog_manager: CatalogManager::new(kv_backend.clone()),
571            schema_manager: SchemaManager::new(kv_backend.clone()),
572            table_route_manager: TableRouteManager::new(kv_backend.clone()),
573            table_repart_manager: TableRepartManager::new(kv_backend.clone()),
574            tombstone_manager: TombstoneManager::new(kv_backend.clone()),
575            topic_name_manager: TopicNameManager::new(kv_backend.clone()),
576            topic_region_manager: TopicRegionManager::new(kv_backend.clone()),
577            kv_backend,
578        }
579    }
580
581    /// Creates a new `TableMetadataManager` with a custom tombstone prefix.
582    pub fn new_with_custom_tombstone_prefix(
583        kv_backend: KvBackendRef,
584        tombstone_prefix: &str,
585    ) -> Self {
586        Self {
587            table_name_manager: TableNameManager::new(kv_backend.clone()),
588            table_info_manager: TableInfoManager::new(kv_backend.clone()),
589            view_info_manager: ViewInfoManager::new(kv_backend.clone()),
590            datanode_table_manager: DatanodeTableManager::new(kv_backend.clone()),
591            catalog_manager: CatalogManager::new(kv_backend.clone()),
592            schema_manager: SchemaManager::new(kv_backend.clone()),
593            table_route_manager: TableRouteManager::new(kv_backend.clone()),
594            table_repart_manager: TableRepartManager::new(kv_backend.clone()),
595            tombstone_manager: TombstoneManager::new_with_prefix(
596                kv_backend.clone(),
597                tombstone_prefix,
598            ),
599            topic_name_manager: TopicNameManager::new(kv_backend.clone()),
600            topic_region_manager: TopicRegionManager::new(kv_backend.clone()),
601            kv_backend,
602        }
603    }
604
605    pub async fn init(&self) -> Result<()> {
606        let catalog_name = CatalogNameKey::new(DEFAULT_CATALOG_NAME);
607
608        self.catalog_manager().create(catalog_name, true).await?;
609
610        let internal_schemas = [
611            DEFAULT_SCHEMA_NAME,
612            INFORMATION_SCHEMA_NAME,
613            DEFAULT_PRIVATE_SCHEMA_NAME,
614        ];
615
616        for schema_name in internal_schemas {
617            let schema_key = SchemaNameKey::new(DEFAULT_CATALOG_NAME, schema_name);
618
619            self.schema_manager().create(schema_key, None, true).await?;
620        }
621
622        Ok(())
623    }
624
625    pub fn table_name_manager(&self) -> &TableNameManager {
626        &self.table_name_manager
627    }
628
629    pub fn table_info_manager(&self) -> &TableInfoManager {
630        &self.table_info_manager
631    }
632
633    pub fn view_info_manager(&self) -> &ViewInfoManager {
634        &self.view_info_manager
635    }
636
637    pub fn datanode_table_manager(&self) -> &DatanodeTableManager {
638        &self.datanode_table_manager
639    }
640
641    pub fn catalog_manager(&self) -> &CatalogManager {
642        &self.catalog_manager
643    }
644
645    pub fn schema_manager(&self) -> &SchemaManager {
646        &self.schema_manager
647    }
648
649    pub fn table_route_manager(&self) -> &TableRouteManager {
650        &self.table_route_manager
651    }
652
653    pub fn table_repart_manager(&self) -> &TableRepartManager {
654        &self.table_repart_manager
655    }
656
657    pub fn topic_name_manager(&self) -> &TopicNameManager {
658        &self.topic_name_manager
659    }
660
661    pub fn topic_region_manager(&self) -> &TopicRegionManager {
662        &self.topic_region_manager
663    }
664
665    pub fn kv_backend(&self) -> &KvBackendRef {
666        &self.kv_backend
667    }
668
669    pub async fn get_full_table_info(
670        &self,
671        table_id: TableId,
672    ) -> Result<(
673        Option<DeserializedValueWithBytes<TableInfoValue>>,
674        Option<DeserializedValueWithBytes<TableRouteValue>>,
675    )> {
676        let table_info_key = TableInfoKey::new(table_id);
677        let table_route_key = TableRouteKey::new(table_id);
678        let (table_info_txn, table_info_filter) = table_info_key.build_get_op();
679        let (table_route_txn, table_route_filter) = table_route_key.build_get_op();
680
681        let txn = Txn::new().and_then(vec![table_info_txn, table_route_txn]);
682        let mut res = self.kv_backend.txn(txn).await?;
683        let mut set = TxnOpGetResponseSet::from(&mut res.responses);
684        let table_info_value = TxnOpGetResponseSet::decode_with(table_info_filter)(&mut set)?;
685        let mut table_route_value = TxnOpGetResponseSet::decode_with(table_route_filter)(&mut set)?;
686        if let Some(table_route_value) = &mut table_route_value {
687            self.table_route_manager()
688                .table_route_storage()
689                .remap_table_route(table_route_value)
690                .await?;
691        }
692        Ok((table_info_value, table_route_value))
693    }
694
695    /// Creates metadata for view and returns an error if different metadata exists.
696    /// The caller MUST ensure it has the exclusive access to `TableNameKey`.
697    /// Parameters include:
698    /// - `view_info`: the encoded logical plan
699    /// - `table_names`: the resolved fully table names in logical plan
700    /// - `columns`: the view columns
701    /// - `plan_columns`: the original plan columns
702    /// - `definition`: The SQL to create the view
703    ///
704    pub async fn create_view_metadata(
705        &self,
706        view_info: TableInfo,
707        raw_logical_plan: Vec<u8>,
708        table_names: HashSet<TableName>,
709        columns: Vec<String>,
710        plan_columns: Vec<String>,
711        definition: String,
712    ) -> Result<()> {
713        let view_id = view_info.ident.table_id;
714
715        // Creates view name.
716        let view_name = TableNameKey::new(
717            &view_info.catalog_name,
718            &view_info.schema_name,
719            &view_info.name,
720        );
721        let create_table_name_txn = self
722            .table_name_manager()
723            .build_create_txn(&view_name, view_id)?;
724
725        // Creates table info.
726        let table_info_value = TableInfoValue::new(view_info);
727
728        let (create_table_info_txn, on_create_table_info_failure) = self
729            .table_info_manager()
730            .build_create_txn(view_id, &table_info_value)?;
731
732        // Creates view info
733        let view_info_value = ViewInfoValue::new(
734            raw_logical_plan.into(),
735            table_names,
736            columns,
737            plan_columns,
738            definition,
739        );
740        let (create_view_info_txn, on_create_view_info_failure) = self
741            .view_info_manager()
742            .build_create_txn(view_id, &view_info_value)?;
743
744        let txn = Txn::merge_all(vec![
745            create_table_name_txn,
746            create_table_info_txn,
747            create_view_info_txn,
748        ]);
749
750        let mut r = self.kv_backend.txn(txn).await?;
751
752        // Checks whether metadata was already created.
753        if !r.succeeded {
754            let mut set = TxnOpGetResponseSet::from(&mut r.responses);
755            let remote_table_info = on_create_table_info_failure(&mut set)?
756                .context(error::UnexpectedSnafu {
757                    err_msg: "Reads the empty table info in comparing operation of creating table metadata",
758                })?
759                .into_inner();
760
761            let remote_view_info = on_create_view_info_failure(&mut set)?
762                .context(error::UnexpectedSnafu {
763                    err_msg: "Reads the empty view info in comparing operation of creating view metadata",
764                })?
765                .into_inner();
766
767            let op_name = "the creating view metadata";
768            ensure_values!(remote_table_info, table_info_value, op_name);
769            ensure_values!(remote_view_info, view_info_value, op_name);
770        }
771
772        Ok(())
773    }
774
775    /// Creates metadata for table and returns an error if different metadata exists.
776    /// The caller MUST ensure it has the exclusive access to `TableNameKey`.
777    pub async fn create_table_metadata(
778        &self,
779        table_info: TableInfo,
780        table_route_value: TableRouteValue,
781        region_wal_options: HashMap<RegionNumber, String>,
782    ) -> Result<()> {
783        let table_id = table_info.ident.table_id;
784        let engine = table_info.meta.engine.clone();
785
786        // Creates table name.
787        let table_name = TableNameKey::new(
788            &table_info.catalog_name,
789            &table_info.schema_name,
790            &table_info.name,
791        );
792        let create_table_name_txn = self
793            .table_name_manager()
794            .build_create_txn(&table_name, table_id)?;
795
796        let region_options = table_info.to_region_options();
797        // Creates table info.
798        let table_info_value = TableInfoValue::new(table_info);
799        let (create_table_info_txn, on_create_table_info_failure) = self
800            .table_info_manager()
801            .build_create_txn(table_id, &table_info_value)?;
802
803        let (create_table_route_txn, on_create_table_route_failure) = self
804            .table_route_manager()
805            .table_route_storage()
806            .build_create_txn(table_id, &table_route_value)?;
807
808        let create_topic_region_txn = self
809            .topic_region_manager
810            .build_create_txn(table_id, &region_wal_options)?;
811
812        let mut txn = Txn::merge_all(vec![
813            create_table_name_txn,
814            create_table_info_txn,
815            create_table_route_txn,
816            create_topic_region_txn,
817        ]);
818
819        if let TableRouteValue::Physical(x) = &table_route_value {
820            let region_storage_path = table_info_value.region_storage_path();
821            let create_datanode_table_txn = self.datanode_table_manager().build_create_txn(
822                table_id,
823                &engine,
824                &region_storage_path,
825                region_options,
826                region_wal_options,
827                region_distribution(&x.region_routes),
828            )?;
829            txn = txn.merge(create_datanode_table_txn);
830        }
831
832        let mut r = self.kv_backend.txn(txn).await?;
833
834        // Checks whether metadata was already created.
835        if !r.succeeded {
836            let mut set = TxnOpGetResponseSet::from(&mut r.responses);
837            let remote_table_info = on_create_table_info_failure(&mut set)?
838                .context(error::UnexpectedSnafu {
839                    err_msg: "Reads the empty table info in comparing operation of creating table metadata",
840                })?
841                .into_inner();
842
843            let remote_table_route = on_create_table_route_failure(&mut set)?
844                .context(error::UnexpectedSnafu {
845                    err_msg: "Reads the empty table route in comparing operation of creating table metadata",
846                })?
847                .into_inner();
848
849            let op_name = "the creating table metadata";
850            ensure_values!(remote_table_info, table_info_value, op_name);
851            ensure_values!(remote_table_route, table_route_value, op_name);
852        }
853
854        Ok(())
855    }
856
857    pub fn create_logical_tables_metadata_chunk_size(&self) -> usize {
858        // The batch size is max_txn_size / 3 because the size of the `tables_data`
859        // is 3 times the size of the `tables_data`.
860        self.kv_backend.max_txn_ops() / 3
861    }
862
863    /// Creates metadata for multiple logical tables and return an error if different metadata exists.
864    pub async fn create_logical_tables_metadata(
865        &self,
866        tables_data: Vec<(TableInfo, TableRouteValue)>,
867    ) -> Result<()> {
868        let len = tables_data.len();
869        let mut txns = Vec::with_capacity(3 * len);
870        struct OnFailure<F1, R1, F2, R2>
871        where
872            F1: FnOnce(&mut TxnOpGetResponseSet) -> R1,
873            F2: FnOnce(&mut TxnOpGetResponseSet) -> R2,
874        {
875            table_info_value: TableInfoValue,
876            on_create_table_info_failure: F1,
877            table_route_value: TableRouteValue,
878            on_create_table_route_failure: F2,
879        }
880        let mut on_failures = Vec::with_capacity(len);
881        for (table_info, table_route_value) in tables_data {
882            let table_id = table_info.ident.table_id;
883
884            // Creates table name.
885            let table_name = TableNameKey::new(
886                &table_info.catalog_name,
887                &table_info.schema_name,
888                &table_info.name,
889            );
890            let create_table_name_txn = self
891                .table_name_manager()
892                .build_create_txn(&table_name, table_id)?;
893            txns.push(create_table_name_txn);
894
895            // Creates table info.
896            let table_info_value = TableInfoValue::new(table_info);
897            let (create_table_info_txn, on_create_table_info_failure) =
898                self.table_info_manager()
899                    .build_create_txn(table_id, &table_info_value)?;
900            txns.push(create_table_info_txn);
901
902            let (create_table_route_txn, on_create_table_route_failure) = self
903                .table_route_manager()
904                .table_route_storage()
905                .build_create_txn(table_id, &table_route_value)?;
906            txns.push(create_table_route_txn);
907
908            on_failures.push(OnFailure {
909                table_info_value,
910                on_create_table_info_failure,
911                table_route_value,
912                on_create_table_route_failure,
913            });
914        }
915
916        let txn = Txn::merge_all(txns);
917        let mut r = self.kv_backend.txn(txn).await?;
918
919        // Checks whether metadata was already created.
920        if !r.succeeded {
921            let mut set = TxnOpGetResponseSet::from(&mut r.responses);
922            for on_failure in on_failures {
923                let remote_table_info = (on_failure.on_create_table_info_failure)(&mut set)?
924                    .context(error::UnexpectedSnafu {
925                        err_msg: "Reads the empty table info in comparing operation of creating table metadata",
926                    })?
927                    .into_inner();
928
929                let remote_table_route = (on_failure.on_create_table_route_failure)(&mut set)?
930                    .context(error::UnexpectedSnafu {
931                        err_msg: "Reads the empty table route in comparing operation of creating table metadata",
932                    })?
933                    .into_inner();
934
935                let op_name = "the creating logical tables metadata";
936                ensure_values!(remote_table_info, on_failure.table_info_value, op_name);
937                ensure_values!(remote_table_route, on_failure.table_route_value, op_name);
938            }
939        }
940
941        Ok(())
942    }
943
944    fn table_metadata_keys(
945        &self,
946        table_id: TableId,
947        table_name: &TableName,
948        table_route_value: &TableRouteValue,
949        region_wal_options: &HashMap<RegionNumber, WalOptions>,
950    ) -> Result<Vec<Vec<u8>>> {
951        // Builds keys
952        let datanode_ids = if table_route_value.is_physical() {
953            region_distribution(table_route_value.region_routes()?)
954                .into_keys()
955                .collect()
956        } else {
957            vec![]
958        };
959        let mut keys = Vec::with_capacity(3 + datanode_ids.len());
960        let table_name = TableNameKey::new(
961            &table_name.catalog_name,
962            &table_name.schema_name,
963            &table_name.table_name,
964        );
965        let table_info_key = TableInfoKey::new(table_id);
966        let table_route_key = TableRouteKey::new(table_id);
967        let table_repart_key = TableRepartKey::new(table_id);
968        let datanode_table_keys = datanode_ids
969            .into_iter()
970            .map(|datanode_id| DatanodeTableKey::new(datanode_id, table_id))
971            .collect::<HashSet<_>>();
972        let topic_region_map = self
973            .topic_region_manager
974            .get_topic_region_mapping(table_id, region_wal_options);
975        let topic_region_keys = topic_region_map
976            .iter()
977            .map(|(region_id, topic)| TopicRegionKey::new(*region_id, topic))
978            .collect::<Vec<_>>();
979        keys.push(table_name.to_bytes());
980        keys.push(table_info_key.to_bytes());
981        keys.push(table_route_key.to_bytes());
982        keys.push(table_repart_key.to_bytes());
983        for key in &datanode_table_keys {
984            keys.push(key.to_bytes());
985        }
986        for key in topic_region_keys {
987            keys.push(key.to_bytes());
988        }
989        Ok(keys)
990    }
991
992    /// Deletes metadata for table **logically**.
993    /// The caller MUST ensure it has the exclusive access to `TableNameKey`.
994    pub async fn delete_table_metadata(
995        &self,
996        table_id: TableId,
997        table_name: &TableName,
998        table_route_value: &TableRouteValue,
999        region_wal_options: &HashMap<RegionNumber, WalOptions>,
1000    ) -> Result<()> {
1001        let keys =
1002            self.table_metadata_keys(table_id, table_name, table_route_value, region_wal_options)?;
1003        self.tombstone_manager.create(keys).await.map(|_| ())
1004    }
1005
1006    /// Lists dropped tables from tombstoned table-name entries.
1007    pub async fn list_dropped_tables(&self) -> Result<Vec<DroppedTableName>> {
1008        let mut stream = self.tombstone_manager.tombstoned_table_names();
1009        let mut dropped_tables = Vec::new();
1010
1011        while let Some(kv) = stream.try_next().await? {
1012            let raw_key = self.tombstone_manager.strip_tombstone_prefix(&kv.key)?;
1013            let table_name = TableNameKey::from_bytes(raw_key)?.into();
1014            let table_id = TableNameValue::try_from_raw_value(&kv.value)?.table_id();
1015            dropped_tables.push(DroppedTableName {
1016                table_id,
1017                table_name,
1018            });
1019        }
1020
1021        Ok(dropped_tables)
1022    }
1023
1024    /// Gets dropped table metadata by its original full table name.
1025    pub async fn get_dropped_table(
1026        &self,
1027        table_name: &TableName,
1028    ) -> Result<Option<DroppedTableMetadata>> {
1029        let table_name_key = TableNameKey::from(table_name);
1030        let Some(kv) = self
1031            .tombstone_manager
1032            .get(&table_name_key.to_bytes())
1033            .await?
1034        else {
1035            return Ok(None);
1036        };
1037
1038        let table_id = TableNameValue::try_from_raw_value(&kv.value)?.table_id();
1039        self.get_dropped_table_metadata(table_id, table_name.clone())
1040            .await
1041    }
1042
1043    /// Gets dropped table metadata by table id.
1044    pub async fn get_dropped_table_by_id(
1045        &self,
1046        table_id: TableId,
1047    ) -> Result<Option<DroppedTableMetadata>> {
1048        self.get_dropped_table_metadata(table_id, None).await
1049    }
1050
1051    /// Deletes metadata tombstone for table **permanently**.
1052    /// The caller MUST ensure it has the exclusive access to `TableNameKey`.
1053    pub async fn delete_table_metadata_tombstone(
1054        &self,
1055        table_id: TableId,
1056        table_name: &TableName,
1057        table_route_value: &TableRouteValue,
1058        region_wal_options: &HashMap<RegionNumber, WalOptions>,
1059    ) -> Result<()> {
1060        let table_metadata_keys =
1061            self.table_metadata_keys(table_id, table_name, table_route_value, region_wal_options)?;
1062        self.tombstone_manager
1063            .delete(table_metadata_keys)
1064            .await
1065            .map(|_| ())
1066    }
1067
1068    /// Restores metadata for table.
1069    /// The caller MUST ensure it has the exclusive access to `TableNameKey`.
1070    pub async fn restore_table_metadata(
1071        &self,
1072        table_id: TableId,
1073        table_name: &TableName,
1074        table_route_value: &TableRouteValue,
1075        region_wal_options: &HashMap<RegionNumber, WalOptions>,
1076    ) -> Result<()> {
1077        let keys =
1078            self.table_metadata_keys(table_id, table_name, table_route_value, region_wal_options)?;
1079        self.tombstone_manager.restore(keys).await.map(|_| ())
1080    }
1081
1082    /// Deletes metadata for table **permanently**.
1083    /// The caller MUST ensure it has the exclusive access to `TableNameKey`.
1084    pub async fn destroy_table_metadata(
1085        &self,
1086        table_id: TableId,
1087        table_name: &TableName,
1088        table_route_value: &TableRouteValue,
1089        region_wal_options: &HashMap<RegionNumber, WalOptions>,
1090    ) -> Result<()> {
1091        let keys =
1092            self.table_metadata_keys(table_id, table_name, table_route_value, region_wal_options)?;
1093        let _ = self
1094            .kv_backend
1095            .batch_delete(BatchDeleteRequest::new().with_keys(keys))
1096            .await?;
1097        Ok(())
1098    }
1099
1100    /// Rebuilds dropped table metadata from tombstoned keys.
1101    async fn get_dropped_table_metadata<T>(
1102        &self,
1103        table_id: TableId,
1104        table_name: T,
1105    ) -> Result<Option<DroppedTableMetadata>>
1106    where
1107        T: Into<Option<TableName>>,
1108    {
1109        let table_info_key = TableInfoKey::new(table_id);
1110        let Some(table_info_kv) = self
1111            .tombstone_manager
1112            .get(&table_info_key.to_bytes())
1113            .await?
1114        else {
1115            return Ok(None);
1116        };
1117
1118        let table_info_value = TableInfoValue::try_from_raw_value(&table_info_kv.value)?;
1119        let table_name = table_name
1120            .into()
1121            .unwrap_or_else(|| table_info_value.table_name());
1122
1123        let table_route_key = TableRouteKey::new(table_id);
1124        let table_route_kv = self
1125            .tombstone_manager
1126            .get(&table_route_key.to_bytes())
1127            .await?
1128            .with_context(|| error::UnexpectedSnafu {
1129                err_msg: format!("Missing tombstoned table route metadata for table id {table_id}"),
1130            })?;
1131        let mut table_route_value = TableRouteValue::try_from_raw_value(&table_route_kv.value)?;
1132        self.table_route_manager
1133            .table_route_storage()
1134            .remap_table_route(&mut table_route_value)
1135            .await?;
1136
1137        let region_wal_options = self
1138            .dropped_region_wal_options(table_id, &table_route_value)
1139            .await?;
1140
1141        Ok(Some(DroppedTableMetadata {
1142            table_id,
1143            table_name,
1144            table_info_value,
1145            table_route_value,
1146            region_wal_options,
1147        }))
1148    }
1149
1150    /// Rebuilds region WAL options from tombstoned datanode-table entries.
1151    async fn dropped_region_wal_options(
1152        &self,
1153        table_id: TableId,
1154        table_route_value: &TableRouteValue,
1155    ) -> Result<HashMap<RegionNumber, WalOptions>> {
1156        let mut region_wal_options = HashMap::new();
1157        let datanode_table_keys = region_distribution(table_route_value.region_routes()?)
1158            .into_keys()
1159            .map(|datanode_id| DatanodeTableKey::new(datanode_id, table_id))
1160            .collect::<Vec<_>>();
1161        let datanode_table_key_bytes = datanode_table_keys
1162            .iter()
1163            .map(|key| key.to_bytes())
1164            .collect::<Vec<_>>();
1165        let datanode_table_values = self
1166            .tombstone_manager
1167            .batch_get(&datanode_table_key_bytes)
1168            .await?;
1169
1170        for datanode_table_key in datanode_table_keys {
1171            let Some(kv) = datanode_table_values.get(&datanode_table_key.to_bytes()) else {
1172                continue;
1173            };
1174
1175            let datanode_table_value = DatanodeTableValue::try_from_raw_value(&kv.value)?;
1176            for (region_number, wal_options) in &datanode_table_value.region_info.region_wal_options
1177            {
1178                region_wal_options.insert(
1179                    *region_number,
1180                    serde_json::from_str(wal_options).context(error::SerdeJsonSnafu)?,
1181                );
1182            }
1183        }
1184
1185        Ok(region_wal_options)
1186    }
1187
1188    fn view_info_keys(&self, view_id: TableId, view_name: &TableName) -> Result<Vec<Vec<u8>>> {
1189        let mut keys = Vec::with_capacity(3);
1190        let view_name = TableNameKey::new(
1191            &view_name.catalog_name,
1192            &view_name.schema_name,
1193            &view_name.table_name,
1194        );
1195        let table_info_key = TableInfoKey::new(view_id);
1196        let view_info_key = ViewInfoKey::new(view_id);
1197        keys.push(view_name.to_bytes());
1198        keys.push(table_info_key.to_bytes());
1199        keys.push(view_info_key.to_bytes());
1200
1201        Ok(keys)
1202    }
1203
1204    /// Deletes metadata for view **permanently**.
1205    /// The caller MUST ensure it has the exclusive access to `ViewNameKey`.
1206    pub async fn destroy_view_info(&self, view_id: TableId, view_name: &TableName) -> Result<()> {
1207        let keys = self.view_info_keys(view_id, view_name)?;
1208        let _ = self
1209            .kv_backend
1210            .batch_delete(BatchDeleteRequest::new().with_keys(keys))
1211            .await?;
1212        Ok(())
1213    }
1214
1215    /// Renames the table name and returns an error if different metadata exists.
1216    /// The caller MUST ensure it has the exclusive access to old and new `TableNameKey`s,
1217    /// and the new `TableNameKey` MUST be empty.
1218    pub async fn rename_table(
1219        &self,
1220        current_table_info_value: &DeserializedValueWithBytes<TableInfoValue>,
1221        new_table_name: String,
1222    ) -> Result<()> {
1223        let current_table_info = &current_table_info_value.table_info;
1224        let table_id = current_table_info.ident.table_id;
1225
1226        let table_name_key = TableNameKey::new(
1227            &current_table_info.catalog_name,
1228            &current_table_info.schema_name,
1229            &current_table_info.name,
1230        );
1231
1232        let new_table_name_key = TableNameKey::new(
1233            &current_table_info.catalog_name,
1234            &current_table_info.schema_name,
1235            &new_table_name,
1236        );
1237
1238        // Updates table name.
1239        let update_table_name_txn = self.table_name_manager().build_update_txn(
1240            &table_name_key,
1241            &new_table_name_key,
1242            table_id,
1243        )?;
1244
1245        let new_table_info_value = current_table_info_value
1246            .inner
1247            .with_update(move |table_info| {
1248                table_info.name = new_table_name;
1249            });
1250
1251        // Updates table info.
1252        let (update_table_info_txn, on_update_table_info_failure) = self
1253            .table_info_manager()
1254            .build_update_txn(table_id, current_table_info_value, &new_table_info_value)?;
1255
1256        let txn = Txn::merge_all(vec![update_table_name_txn, update_table_info_txn]);
1257
1258        let mut r = self.kv_backend.txn(txn).await?;
1259
1260        // Checks whether metadata was already updated.
1261        if !r.succeeded {
1262            let mut set = TxnOpGetResponseSet::from(&mut r.responses);
1263            let remote_table_info = on_update_table_info_failure(&mut set)?
1264                .context(error::UnexpectedSnafu {
1265                    err_msg: "Reads the empty table info in comparing operation of the rename table metadata",
1266                })?
1267                .into_inner();
1268
1269            let op_name = "the renaming table metadata";
1270            ensure_values!(remote_table_info, new_table_info_value, op_name);
1271        }
1272
1273        Ok(())
1274    }
1275
1276    /// Updates table info and returns an error if different metadata exists.
1277    /// And cascade-ly update all redundant table options for each region
1278    /// if region_distribution is present.
1279    pub async fn update_table_info(
1280        &self,
1281        current_table_info_value: &DeserializedValueWithBytes<TableInfoValue>,
1282        region_distribution: Option<RegionDistribution>,
1283        new_table_info: TableInfo,
1284    ) -> Result<()> {
1285        let table_id = current_table_info_value.table_info.ident.table_id;
1286        let new_table_info_value = current_table_info_value.update(new_table_info);
1287
1288        // Updates table info.
1289        let (update_table_info_txn, on_update_table_info_failure) = self
1290            .table_info_manager()
1291            .build_update_txn(table_id, current_table_info_value, &new_table_info_value)?;
1292
1293        let txn = if let Some(region_distribution) = region_distribution {
1294            // region options induced from table info.
1295            let new_region_options = new_table_info_value.table_info.to_region_options();
1296            let update_datanode_table_options_txn = self
1297                .datanode_table_manager
1298                .build_update_table_options_txn(table_id, region_distribution, new_region_options)
1299                .await?;
1300            Txn::merge_all([update_table_info_txn, update_datanode_table_options_txn])
1301        } else {
1302            update_table_info_txn
1303        };
1304
1305        let mut r = self.kv_backend.txn(txn).await?;
1306        // Checks whether metadata was already updated.
1307        if !r.succeeded {
1308            let mut set = TxnOpGetResponseSet::from(&mut r.responses);
1309            let remote_table_info = on_update_table_info_failure(&mut set)?
1310                .context(error::UnexpectedSnafu {
1311                    err_msg: "Reads the empty table info in comparing operation of the updating table info",
1312                })?
1313                .into_inner();
1314
1315            let op_name = "the updating table info";
1316            ensure_values!(remote_table_info, new_table_info_value, op_name);
1317        }
1318        Ok(())
1319    }
1320
1321    /// Updates view info and returns an error if different metadata exists.
1322    /// Parameters include:
1323    /// - `view_id`: the view id
1324    /// - `current_view_info_value`: the current view info for CAS checking
1325    /// - `new_view_info`: the encoded logical plan
1326    /// - `table_names`: the resolved fully table names in logical plan
1327    /// - `columns`: the view columns
1328    /// - `plan_columns`: the original plan columns
1329    /// - `definition`: The SQL to create the view
1330    ///
1331    #[allow(clippy::too_many_arguments)]
1332    pub async fn update_view_info(
1333        &self,
1334        view_id: TableId,
1335        current_view_info_value: &DeserializedValueWithBytes<ViewInfoValue>,
1336        new_view_info: Vec<u8>,
1337        table_names: HashSet<TableName>,
1338        columns: Vec<String>,
1339        plan_columns: Vec<String>,
1340        definition: String,
1341    ) -> Result<()> {
1342        let new_view_info_value = current_view_info_value.update(
1343            new_view_info.into(),
1344            table_names,
1345            columns,
1346            plan_columns,
1347            definition,
1348        );
1349
1350        // Updates view info.
1351        let (update_view_info_txn, on_update_view_info_failure) = self
1352            .view_info_manager()
1353            .build_update_txn(view_id, current_view_info_value, &new_view_info_value)?;
1354
1355        let mut r = self.kv_backend.txn(update_view_info_txn).await?;
1356
1357        // Checks whether metadata was already updated.
1358        if !r.succeeded {
1359            let mut set = TxnOpGetResponseSet::from(&mut r.responses);
1360            let remote_view_info = on_update_view_info_failure(&mut set)?
1361                .context(error::UnexpectedSnafu {
1362                    err_msg: "Reads the empty view info in comparing operation of the updating view info",
1363                })?
1364                .into_inner();
1365
1366            let op_name = "the updating view info";
1367            ensure_values!(remote_view_info, new_view_info_value, op_name);
1368        }
1369        Ok(())
1370    }
1371
1372    pub fn batch_update_table_info_value_chunk_size(&self) -> usize {
1373        self.kv_backend.max_txn_ops()
1374    }
1375
1376    pub async fn batch_update_table_info_values(
1377        &self,
1378        table_info_value_pairs: Vec<(DeserializedValueWithBytes<TableInfoValue>, TableInfo)>,
1379    ) -> Result<()> {
1380        let len = table_info_value_pairs.len();
1381        let mut txns = Vec::with_capacity(len);
1382        struct OnFailure<F, R>
1383        where
1384            F: FnOnce(&mut TxnOpGetResponseSet) -> R,
1385        {
1386            table_info_value: TableInfoValue,
1387            on_update_table_info_failure: F,
1388        }
1389        let mut on_failures = Vec::with_capacity(len);
1390
1391        for (table_info_value, new_table_info) in table_info_value_pairs {
1392            let table_id = table_info_value.table_info.ident.table_id;
1393
1394            let new_table_info_value = table_info_value.update(new_table_info);
1395
1396            let (update_table_info_txn, on_update_table_info_failure) =
1397                self.table_info_manager().build_update_txn(
1398                    table_id,
1399                    &table_info_value,
1400                    &new_table_info_value,
1401                )?;
1402
1403            txns.push(update_table_info_txn);
1404
1405            on_failures.push(OnFailure {
1406                table_info_value: new_table_info_value,
1407                on_update_table_info_failure,
1408            });
1409        }
1410
1411        let txn = Txn::merge_all(txns);
1412        let mut r = self.kv_backend.txn(txn).await?;
1413
1414        if !r.succeeded {
1415            let mut set = TxnOpGetResponseSet::from(&mut r.responses);
1416            for on_failure in on_failures {
1417                let remote_table_info = (on_failure.on_update_table_info_failure)(&mut set)?
1418                    .context(error::UnexpectedSnafu {
1419                        err_msg: "Reads the empty table info in comparing operation of the updating table info",
1420                    })?
1421                    .into_inner();
1422
1423                let op_name = "the batch updating table info";
1424                ensure_values!(remote_table_info, on_failure.table_info_value, op_name);
1425            }
1426        }
1427
1428        Ok(())
1429    }
1430
1431    pub async fn update_table_route(
1432        &self,
1433        table_id: TableId,
1434        region_info: RegionInfo,
1435        current_table_route_value: &DeserializedValueWithBytes<TableRouteValue>,
1436        new_region_routes: Vec<RegionRoute>,
1437        new_region_options: &HashMap<String, String>,
1438        new_region_wal_options: &HashMap<RegionNumber, String>,
1439    ) -> Result<()> {
1440        // Updates the datanode table key value pairs.
1441        let current_region_distribution =
1442            region_distribution(current_table_route_value.region_routes()?);
1443        let new_region_distribution = region_distribution(&new_region_routes);
1444
1445        let update_topic_region_txn = self.topic_region_manager.build_update_txn(
1446            table_id,
1447            &region_info.region_wal_options,
1448            new_region_wal_options,
1449        )?;
1450        let update_datanode_table_txn = self.datanode_table_manager().build_update_txn(
1451            table_id,
1452            region_info,
1453            current_region_distribution,
1454            new_region_distribution,
1455            new_region_options,
1456            new_region_wal_options,
1457        )?;
1458
1459        // Updates the table_route.
1460        let new_table_route_value = current_table_route_value.update(new_region_routes)?;
1461        let (update_table_route_txn, on_update_table_route_failure) = self
1462            .table_route_manager()
1463            .table_route_storage()
1464            .build_update_txn(table_id, current_table_route_value, &new_table_route_value)?;
1465
1466        let txn = Txn::merge_all(vec![
1467            update_datanode_table_txn,
1468            update_table_route_txn,
1469            update_topic_region_txn,
1470        ]);
1471
1472        let mut r = self.kv_backend.txn(txn).await?;
1473
1474        // Checks whether metadata was already updated.
1475        if !r.succeeded {
1476            let mut set = TxnOpGetResponseSet::from(&mut r.responses);
1477            let remote_table_route = on_update_table_route_failure(&mut set)?
1478                .context(error::UnexpectedSnafu {
1479                    err_msg: "Reads the empty table route in comparing operation of the updating table route",
1480                })?
1481                .into_inner();
1482
1483            let op_name = "the updating table route";
1484            ensure_values!(remote_table_route, new_table_route_value, op_name);
1485        }
1486
1487        Ok(())
1488    }
1489
1490    /// Updates the leader status of the [RegionRoute].
1491    pub async fn update_leader_region_status<F>(
1492        &self,
1493        table_id: TableId,
1494        current_table_route_value: &DeserializedValueWithBytes<TableRouteValue>,
1495        next_region_route_status: F,
1496    ) -> Result<()>
1497    where
1498        F: Fn(&RegionRoute) -> Option<Option<LeaderState>>,
1499    {
1500        let mut new_region_routes = current_table_route_value.region_routes()?.clone();
1501
1502        let mut updated = 0;
1503        for route in &mut new_region_routes {
1504            if let Some(state) = next_region_route_status(route)
1505                && route.set_leader_state(state)
1506            {
1507                updated += 1;
1508            }
1509        }
1510
1511        if updated == 0 {
1512            warn!("No leader status updated");
1513            return Ok(());
1514        }
1515
1516        // Updates the table_route.
1517        let new_table_route_value = current_table_route_value.update(new_region_routes)?;
1518
1519        let (update_table_route_txn, on_update_table_route_failure) = self
1520            .table_route_manager()
1521            .table_route_storage()
1522            .build_update_txn(table_id, current_table_route_value, &new_table_route_value)?;
1523
1524        let mut r = self.kv_backend.txn(update_table_route_txn).await?;
1525
1526        // Checks whether metadata was already updated.
1527        if !r.succeeded {
1528            let mut set = TxnOpGetResponseSet::from(&mut r.responses);
1529            let remote_table_route = on_update_table_route_failure(&mut set)?
1530                .context(error::UnexpectedSnafu {
1531                    err_msg: "Reads the empty table route in comparing operation of the updating leader region status",
1532                })?
1533                .into_inner();
1534
1535            let op_name = "the updating leader region status";
1536            ensure_values!(remote_table_route, new_table_route_value, op_name);
1537        }
1538
1539        Ok(())
1540    }
1541}
1542
1543#[macro_export]
1544macro_rules! impl_metadata_value {
1545    ($($val_ty: ty), *) => {
1546        $(
1547            impl $crate::key::MetadataValue for $val_ty {
1548                fn try_from_raw_value(raw_value: &[u8]) -> Result<Self> {
1549                    serde_json::from_slice(raw_value).context(SerdeJsonSnafu)
1550                }
1551
1552                fn try_as_raw_value(&self) -> Result<Vec<u8>> {
1553                    serde_json::to_vec(self).context(SerdeJsonSnafu)
1554                }
1555            }
1556        )*
1557    }
1558}
1559
1560macro_rules! impl_metadata_key_get_txn_op {
1561    ($($key: ty), *) => {
1562        $(
1563            impl $crate::key::MetadataKeyGetTxnOp for $key {
1564                /// Returns a [TxnOp] to retrieve the corresponding value
1565                /// and a filter to retrieve the value from the [TxnOpGetResponseSet]
1566                fn build_get_op(
1567                    &self,
1568                ) -> (
1569                    TxnOp,
1570                    impl for<'a> FnMut(
1571                        &'a mut TxnOpGetResponseSet,
1572                    ) -> Option<Vec<u8>>,
1573                ) {
1574                    let raw_key = self.to_bytes();
1575                    (
1576                        TxnOp::Get(raw_key.clone()),
1577                        TxnOpGetResponseSet::filter(raw_key),
1578                    )
1579                }
1580            }
1581        )*
1582    }
1583}
1584
1585impl_metadata_key_get_txn_op! {
1586    TableNameKey<'_>,
1587    TableInfoKey,
1588    ViewInfoKey,
1589    TableRouteKey,
1590    DatanodeTableKey
1591}
1592
1593#[macro_export]
1594macro_rules! impl_optional_metadata_value {
1595    ($($val_ty: ty), *) => {
1596        $(
1597            impl $val_ty {
1598                pub fn try_from_raw_value(raw_value: &[u8]) -> Result<Option<Self>> {
1599                    serde_json::from_slice(raw_value).context(SerdeJsonSnafu)
1600                }
1601
1602                pub fn try_as_raw_value(&self) -> Result<Vec<u8>> {
1603                    serde_json::to_vec(self).context(SerdeJsonSnafu)
1604                }
1605            }
1606        )*
1607    }
1608}
1609
1610impl_metadata_value! {
1611    TableNameValue,
1612    TableInfoValue,
1613    ViewInfoValue,
1614    DatanodeTableValue,
1615    FlowInfoValue,
1616    FlowNameValue,
1617    FlowRouteValue,
1618    TableFlowValue,
1619    NodeAddressValue,
1620    SchemaNameValue,
1621    FlowStateValue,
1622    PoisonValue,
1623    TopicRegionValue
1624}
1625
1626impl_optional_metadata_value! {
1627    CatalogNameValue,
1628    SchemaNameValue
1629}
1630
1631#[cfg(test)]
1632mod tests {
1633    use std::collections::{BTreeMap, HashMap, HashSet};
1634    use std::sync::Arc;
1635
1636    use bytes::Bytes;
1637    use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
1638    use common_time::util::current_time_millis;
1639    use common_wal::options::{KafkaWalOptions, WalOptions};
1640    use futures::TryStreamExt;
1641    use store_api::storage::{RegionId, RegionNumber};
1642    use table::metadata::TableInfo;
1643    use table::table_name::TableName;
1644
1645    use super::datanode_table::DatanodeTableKey;
1646    use super::test_utils;
1647    use crate::ddl::allocator::wal_options::WalOptionsAllocator;
1648    use crate::ddl::test_util::create_table::test_create_table_task;
1649    use crate::ddl::utils::region_storage_path;
1650    use crate::error::Result;
1651    use crate::key::datanode_table::RegionInfo;
1652    use crate::key::node_address::{NodeAddressKey, NodeAddressValue};
1653    use crate::key::table_info::TableInfoValue;
1654    use crate::key::table_name::TableNameKey;
1655    use crate::key::table_route::TableRouteValue;
1656    use crate::key::topic_region::TopicRegionKey;
1657    use crate::key::{
1658        DeserializedValueWithBytes, MetadataValue, RegionDistribution, RegionRoleSet,
1659        TOPIC_REGION_PREFIX, TableMetadataManager, ViewInfoValue,
1660    };
1661    use crate::kv_backend::KvBackend;
1662    use crate::kv_backend::memory::MemoryKvBackend;
1663    use crate::peer::Peer;
1664    use crate::rpc::router::{LeaderState, Region, RegionRoute, region_distribution};
1665    use crate::rpc::store::{PutRequest, RangeRequest};
1666    use crate::wal_provider::WalProvider;
1667
1668    #[test]
1669    fn test_deserialized_value_with_bytes() {
1670        let region_route = new_test_region_route();
1671        let region_routes = vec![region_route.clone()];
1672
1673        let expected_region_routes =
1674            TableRouteValue::physical(vec![region_route.clone(), region_route.clone()]);
1675        let expected = serde_json::to_vec(&expected_region_routes).unwrap();
1676
1677        // Serialize behaviors:
1678        // The inner field will be ignored.
1679        let value = DeserializedValueWithBytes {
1680            // ignored
1681            inner: TableRouteValue::physical(region_routes.clone()),
1682            bytes: Bytes::from(expected.clone()),
1683        };
1684
1685        let encoded = serde_json::to_vec(&value).unwrap();
1686
1687        // Deserialize behaviors:
1688        // The inner field will be deserialized from the bytes field.
1689        let decoded: DeserializedValueWithBytes<TableRouteValue> =
1690            serde_json::from_slice(&encoded).unwrap();
1691
1692        assert_eq!(decoded.inner, expected_region_routes);
1693        assert_eq!(decoded.bytes, expected);
1694    }
1695
1696    fn new_test_region_route() -> RegionRoute {
1697        new_region_route(1, 2)
1698    }
1699
1700    fn new_region_route(region_id: u64, datanode: u64) -> RegionRoute {
1701        RegionRoute {
1702            region: Region {
1703                id: region_id.into(),
1704                name: "r1".to_string(),
1705                attrs: BTreeMap::new(),
1706                partition_expr: Default::default(),
1707            },
1708            leader_peer: Some(Peer::new(datanode, "a2")),
1709            follower_peers: vec![],
1710            leader_state: None,
1711            leader_down_since: None,
1712            write_route_policy: None,
1713        }
1714    }
1715
1716    fn new_test_table_info() -> TableInfo {
1717        test_utils::new_test_table_info(10)
1718    }
1719
1720    fn new_test_table_names() -> HashSet<TableName> {
1721        let mut set = HashSet::new();
1722        set.insert(TableName {
1723            catalog_name: "greptime".to_string(),
1724            schema_name: "public".to_string(),
1725            table_name: "a_table".to_string(),
1726        });
1727        set.insert(TableName {
1728            catalog_name: "greptime".to_string(),
1729            schema_name: "public".to_string(),
1730            table_name: "b_table".to_string(),
1731        });
1732        set
1733    }
1734
1735    async fn create_physical_table_metadata(
1736        table_metadata_manager: &TableMetadataManager,
1737        table_info: TableInfo,
1738        region_routes: Vec<RegionRoute>,
1739        region_wal_options: HashMap<RegionNumber, String>,
1740    ) -> Result<()> {
1741        table_metadata_manager
1742            .create_table_metadata(
1743                table_info,
1744                TableRouteValue::physical(region_routes),
1745                region_wal_options,
1746            )
1747            .await
1748    }
1749
1750    fn create_mock_region_wal_options() -> HashMap<RegionNumber, WalOptions> {
1751        let topics = (0..2)
1752            .map(|i| format!("greptimedb_topic{}", i))
1753            .collect::<Vec<_>>();
1754        let wal_options = topics
1755            .iter()
1756            .map(|topic| {
1757                WalOptions::Kafka(KafkaWalOptions {
1758                    topic: topic.clone(),
1759                })
1760            })
1761            .collect::<Vec<_>>();
1762
1763        (0..16)
1764            .enumerate()
1765            .map(|(i, region_number)| (region_number, wal_options[i % wal_options.len()].clone()))
1766            .collect()
1767    }
1768
1769    fn create_mixed_region_wal_options() -> HashMap<RegionNumber, WalOptions> {
1770        HashMap::from([
1771            (
1772                0,
1773                WalOptions::Kafka(KafkaWalOptions {
1774                    topic: "greptimedb_topic0".to_string(),
1775                }),
1776            ),
1777            (1, WalOptions::RaftEngine),
1778            (2, WalOptions::Noop),
1779            (
1780                3,
1781                WalOptions::Kafka(KafkaWalOptions {
1782                    topic: "greptimedb_topic1".to_string(),
1783                }),
1784            ),
1785        ])
1786    }
1787
1788    #[tokio::test]
1789    async fn test_raft_engine_topic_region_map() {
1790        let mem_kv = Arc::new(MemoryKvBackend::default());
1791        let table_metadata_manager = TableMetadataManager::new(mem_kv.clone());
1792        let region_route = new_test_region_route();
1793        let region_routes = &vec![region_route.clone()];
1794        let table_info = new_test_table_info();
1795        let wal_provider = WalProvider::RaftEngine;
1796        let regions: Vec<_> = (0..16).collect();
1797        let region_wal_options = wal_provider.allocate(&regions, false).await.unwrap();
1798        create_physical_table_metadata(
1799            &table_metadata_manager,
1800            table_info.clone(),
1801            region_routes.clone(),
1802            region_wal_options.clone(),
1803        )
1804        .await
1805        .unwrap();
1806
1807        let topic_region_key = TOPIC_REGION_PREFIX.to_string();
1808        let range_req = RangeRequest::new().with_prefix(topic_region_key);
1809        let resp = mem_kv.range(range_req).await.unwrap();
1810        // Should be empty because the topic region map is empty for raft engine.
1811        assert!(resp.kvs.is_empty());
1812    }
1813
1814    #[tokio::test]
1815    async fn test_create_table_metadata() {
1816        let mem_kv = Arc::new(MemoryKvBackend::default());
1817        let table_metadata_manager = TableMetadataManager::new(mem_kv);
1818        let region_route = new_test_region_route();
1819        let region_routes = &vec![region_route.clone()];
1820        let table_info = new_test_table_info();
1821        let region_wal_options = create_mock_region_wal_options()
1822            .into_iter()
1823            .map(|(k, v)| (k, serde_json::to_string(&v).unwrap()))
1824            .collect::<HashMap<_, _>>();
1825
1826        // creates metadata.
1827        create_physical_table_metadata(
1828            &table_metadata_manager,
1829            table_info.clone(),
1830            region_routes.clone(),
1831            region_wal_options.clone(),
1832        )
1833        .await
1834        .unwrap();
1835
1836        // if metadata was already created, it should be ok.
1837        assert!(
1838            create_physical_table_metadata(
1839                &table_metadata_manager,
1840                table_info.clone(),
1841                region_routes.clone(),
1842                region_wal_options.clone(),
1843            )
1844            .await
1845            .is_ok()
1846        );
1847
1848        let mut modified_region_routes = region_routes.clone();
1849        modified_region_routes.push(region_route.clone());
1850        // if remote metadata was exists, it should return an error.
1851        assert!(
1852            create_physical_table_metadata(
1853                &table_metadata_manager,
1854                table_info.clone(),
1855                modified_region_routes,
1856                region_wal_options.clone(),
1857            )
1858            .await
1859            .is_err()
1860        );
1861
1862        let (remote_table_info, remote_table_route) = table_metadata_manager
1863            .get_full_table_info(10)
1864            .await
1865            .unwrap();
1866
1867        assert_eq!(
1868            remote_table_info.unwrap().into_inner().table_info,
1869            table_info
1870        );
1871        assert_eq!(
1872            remote_table_route
1873                .unwrap()
1874                .into_inner()
1875                .region_routes()
1876                .unwrap(),
1877            region_routes
1878        );
1879
1880        for i in 0..2 {
1881            let region_number = i as u32;
1882            let region_id = RegionId::new(table_info.ident.table_id, region_number);
1883            let topic = format!("greptimedb_topic{}", i);
1884            let regions = table_metadata_manager
1885                .topic_region_manager
1886                .regions(&topic)
1887                .await
1888                .unwrap()
1889                .into_keys()
1890                .collect::<Vec<_>>();
1891            assert_eq!(regions.len(), 8);
1892            assert!(regions.contains(&region_id));
1893        }
1894    }
1895
1896    #[tokio::test]
1897    async fn test_get_full_table_info_remaps_route_address() {
1898        let mem_kv = Arc::new(MemoryKvBackend::default());
1899        let table_metadata_manager = TableMetadataManager::new(mem_kv.clone());
1900
1901        let mut region_route = new_test_region_route();
1902        region_route.follower_peers = vec![Peer::empty(3)];
1903        let region_routes = vec![region_route];
1904        let table_info = new_test_table_info();
1905        let table_id = table_info.ident.table_id;
1906
1907        create_physical_table_metadata(
1908            &table_metadata_manager,
1909            table_info,
1910            region_routes,
1911            HashMap::new(),
1912        )
1913        .await
1914        .unwrap();
1915
1916        mem_kv
1917            .put(PutRequest {
1918                key: NodeAddressKey::with_datanode(2).to_string().into_bytes(),
1919                value: NodeAddressValue::new(Peer::new(2, "new-a2"))
1920                    .try_as_raw_value()
1921                    .unwrap(),
1922                ..Default::default()
1923            })
1924            .await
1925            .unwrap();
1926        mem_kv
1927            .put(PutRequest {
1928                key: NodeAddressKey::with_datanode(3).to_string().into_bytes(),
1929                value: NodeAddressValue::new(Peer::new(3, "new-a3"))
1930                    .try_as_raw_value()
1931                    .unwrap(),
1932                ..Default::default()
1933            })
1934            .await
1935            .unwrap();
1936
1937        let (_, table_route) = table_metadata_manager
1938            .get_full_table_info(table_id)
1939            .await
1940            .unwrap();
1941        let table_route = table_route.unwrap().into_inner();
1942        let region_routes = table_route.region_routes().unwrap();
1943
1944        assert_eq!(
1945            region_routes[0].leader_peer.as_ref().unwrap().addr,
1946            "new-a2"
1947        );
1948        assert_eq!(region_routes[0].follower_peers[0].addr, "new-a3");
1949    }
1950
1951    #[tokio::test]
1952    async fn test_create_logic_tables_metadata() {
1953        let mem_kv = Arc::new(MemoryKvBackend::default());
1954        let table_metadata_manager = TableMetadataManager::new(mem_kv);
1955        let region_route = new_test_region_route();
1956        let region_routes = vec![region_route.clone()];
1957        let table_info = new_test_table_info();
1958        let table_id = table_info.ident.table_id;
1959        let table_route_value = TableRouteValue::physical(region_routes.clone());
1960
1961        let tables_data = vec![(table_info.clone(), table_route_value.clone())];
1962        // creates metadata.
1963        table_metadata_manager
1964            .create_logical_tables_metadata(tables_data.clone())
1965            .await
1966            .unwrap();
1967
1968        // if metadata was already created, it should be ok.
1969        assert!(
1970            table_metadata_manager
1971                .create_logical_tables_metadata(tables_data)
1972                .await
1973                .is_ok()
1974        );
1975
1976        let mut modified_region_routes = region_routes.clone();
1977        modified_region_routes.push(new_region_route(2, 3));
1978        let modified_table_route_value = TableRouteValue::physical(modified_region_routes.clone());
1979        let modified_tables_data = vec![(table_info.clone(), modified_table_route_value)];
1980        // if remote metadata was exists, it should return an error.
1981        assert!(
1982            table_metadata_manager
1983                .create_logical_tables_metadata(modified_tables_data)
1984                .await
1985                .is_err()
1986        );
1987
1988        let (remote_table_info, remote_table_route) = table_metadata_manager
1989            .get_full_table_info(table_id)
1990            .await
1991            .unwrap();
1992
1993        assert_eq!(
1994            remote_table_info.unwrap().into_inner().table_info,
1995            table_info
1996        );
1997        assert_eq!(
1998            remote_table_route
1999                .unwrap()
2000                .into_inner()
2001                .region_routes()
2002                .unwrap(),
2003            &region_routes
2004        );
2005    }
2006
2007    #[tokio::test]
2008    async fn test_create_many_logical_tables_metadata() {
2009        let kv_backend = Arc::new(MemoryKvBackend::default());
2010        let table_metadata_manager = TableMetadataManager::new(kv_backend);
2011
2012        let mut tables_data = vec![];
2013        for i in 0..128 {
2014            let table_id = i + 1;
2015            let regin_number = table_id * 3;
2016            let region_id = RegionId::new(table_id, regin_number);
2017            let region_route = new_region_route(region_id.as_u64(), 2);
2018            let region_routes = vec![region_route.clone()];
2019            let table_info = test_utils::new_test_table_info_with_name(
2020                table_id,
2021                &format!("my_table_{}", table_id),
2022            );
2023            let table_route_value = TableRouteValue::physical(region_routes.clone());
2024
2025            tables_data.push((table_info, table_route_value));
2026        }
2027
2028        // creates metadata.
2029        table_metadata_manager
2030            .create_logical_tables_metadata(tables_data)
2031            .await
2032            .unwrap();
2033    }
2034
2035    #[tokio::test]
2036    async fn test_delete_table_metadata() {
2037        let mem_kv = Arc::new(MemoryKvBackend::default());
2038        let table_metadata_manager = TableMetadataManager::new(mem_kv);
2039        let region_route = new_test_region_route();
2040        let region_routes = &vec![region_route.clone()];
2041        let table_info = new_test_table_info();
2042        let table_id = table_info.ident.table_id;
2043        let datanode_id = 2;
2044        let region_wal_options = create_mock_region_wal_options();
2045        let serialized_region_wal_options = region_wal_options
2046            .iter()
2047            .map(|(k, v)| (*k, serde_json::to_string(v).unwrap()))
2048            .collect::<HashMap<_, _>>();
2049
2050        // creates metadata.
2051        create_physical_table_metadata(
2052            &table_metadata_manager,
2053            table_info.clone(),
2054            region_routes.clone(),
2055            serialized_region_wal_options,
2056        )
2057        .await
2058        .unwrap();
2059
2060        let table_name = TableName::new(
2061            table_info.catalog_name,
2062            table_info.schema_name,
2063            table_info.name,
2064        );
2065        let table_route_value = &TableRouteValue::physical(region_routes.clone());
2066        // deletes metadata.
2067        table_metadata_manager
2068            .delete_table_metadata(
2069                table_id,
2070                &table_name,
2071                table_route_value,
2072                &region_wal_options,
2073            )
2074            .await
2075            .unwrap();
2076        // Should be ignored.
2077        table_metadata_manager
2078            .delete_table_metadata(
2079                table_id,
2080                &table_name,
2081                table_route_value,
2082                &region_wal_options,
2083            )
2084            .await
2085            .unwrap();
2086        assert!(
2087            table_metadata_manager
2088                .table_info_manager()
2089                .get(table_id)
2090                .await
2091                .unwrap()
2092                .is_none()
2093        );
2094        assert!(
2095            table_metadata_manager
2096                .table_route_manager()
2097                .table_route_storage()
2098                .get(table_id)
2099                .await
2100                .unwrap()
2101                .is_none()
2102        );
2103        assert!(
2104            table_metadata_manager
2105                .datanode_table_manager()
2106                .tables(datanode_id)
2107                .try_collect::<Vec<_>>()
2108                .await
2109                .unwrap()
2110                .is_empty()
2111        );
2112        // Checks removed values
2113        let table_info = table_metadata_manager
2114            .table_info_manager()
2115            .get(table_id)
2116            .await
2117            .unwrap();
2118        assert!(table_info.is_none());
2119        let table_route = table_metadata_manager
2120            .table_route_manager()
2121            .table_route_storage()
2122            .get(table_id)
2123            .await
2124            .unwrap();
2125        assert!(table_route.is_none());
2126        // Logical delete removes the topic region mapping as well.
2127        let regions = table_metadata_manager
2128            .topic_region_manager
2129            .regions("greptimedb_topic0")
2130            .await
2131            .unwrap();
2132        assert_eq!(regions.len(), 0);
2133        let regions = table_metadata_manager
2134            .topic_region_manager
2135            .regions("greptimedb_topic1")
2136            .await
2137            .unwrap();
2138        assert_eq!(regions.len(), 0);
2139    }
2140
2141    #[tokio::test]
2142    async fn test_rename_table() {
2143        let mem_kv = Arc::new(MemoryKvBackend::default());
2144        let table_metadata_manager = TableMetadataManager::new(mem_kv);
2145        let region_route = new_test_region_route();
2146        let region_routes = vec![region_route.clone()];
2147        let table_info = new_test_table_info();
2148        let table_id = table_info.ident.table_id;
2149        // creates metadata.
2150        create_physical_table_metadata(
2151            &table_metadata_manager,
2152            table_info.clone(),
2153            region_routes.clone(),
2154            HashMap::new(),
2155        )
2156        .await
2157        .unwrap();
2158
2159        let new_table_name = "another_name".to_string();
2160        let table_info_value =
2161            DeserializedValueWithBytes::from_inner(TableInfoValue::new(table_info.clone()));
2162
2163        table_metadata_manager
2164            .rename_table(&table_info_value, new_table_name.clone())
2165            .await
2166            .unwrap();
2167        // if remote metadata was updated, it should be ok.
2168        table_metadata_manager
2169            .rename_table(&table_info_value, new_table_name.clone())
2170            .await
2171            .unwrap();
2172        let mut modified_table_info = table_info.clone();
2173        modified_table_info.name = "hi".to_string();
2174        let modified_table_info_value =
2175            DeserializedValueWithBytes::from_inner(table_info_value.update(modified_table_info));
2176        // if the table_info_value is wrong, it should return an error.
2177        // The ABA problem.
2178        assert!(
2179            table_metadata_manager
2180                .rename_table(&modified_table_info_value, new_table_name.clone())
2181                .await
2182                .is_err()
2183        );
2184
2185        let old_table_name = TableNameKey::new(
2186            &table_info.catalog_name,
2187            &table_info.schema_name,
2188            &table_info.name,
2189        );
2190        let new_table_name = TableNameKey::new(
2191            &table_info.catalog_name,
2192            &table_info.schema_name,
2193            &new_table_name,
2194        );
2195
2196        assert!(
2197            table_metadata_manager
2198                .table_name_manager()
2199                .get(old_table_name)
2200                .await
2201                .unwrap()
2202                .is_none()
2203        );
2204
2205        assert_eq!(
2206            table_metadata_manager
2207                .table_name_manager()
2208                .get(new_table_name)
2209                .await
2210                .unwrap()
2211                .unwrap()
2212                .table_id(),
2213            table_id
2214        );
2215    }
2216
2217    #[tokio::test]
2218    async fn test_update_table_info() {
2219        let mem_kv = Arc::new(MemoryKvBackend::default());
2220        let table_metadata_manager = TableMetadataManager::new(mem_kv);
2221        let region_route = new_test_region_route();
2222        let region_routes = vec![region_route.clone()];
2223        let table_info = new_test_table_info();
2224        let table_id = table_info.ident.table_id;
2225        // creates metadata.
2226        create_physical_table_metadata(
2227            &table_metadata_manager,
2228            table_info.clone(),
2229            region_routes.clone(),
2230            HashMap::new(),
2231        )
2232        .await
2233        .unwrap();
2234
2235        let mut new_table_info = table_info.clone();
2236        new_table_info.name = "hi".to_string();
2237        let current_table_info_value =
2238            DeserializedValueWithBytes::from_inner(TableInfoValue::new(table_info.clone()));
2239        // should be ok.
2240        table_metadata_manager
2241            .update_table_info(&current_table_info_value, None, new_table_info.clone())
2242            .await
2243            .unwrap();
2244        // if table info was updated, it should be ok.
2245        table_metadata_manager
2246            .update_table_info(&current_table_info_value, None, new_table_info.clone())
2247            .await
2248            .unwrap();
2249
2250        // updated table_info should equal the `new_table_info`
2251        let updated_table_info = table_metadata_manager
2252            .table_info_manager()
2253            .get(table_id)
2254            .await
2255            .unwrap()
2256            .unwrap()
2257            .into_inner();
2258        assert_eq!(updated_table_info.table_info, new_table_info);
2259
2260        let mut wrong_table_info = table_info.clone();
2261        wrong_table_info.name = "wrong".to_string();
2262        let wrong_table_info_value = DeserializedValueWithBytes::from_inner(
2263            current_table_info_value.update(wrong_table_info),
2264        );
2265        // if the current_table_info_value is wrong, it should return an error.
2266        // The ABA problem.
2267        assert!(
2268            table_metadata_manager
2269                .update_table_info(&wrong_table_info_value, None, new_table_info)
2270                .await
2271                .is_err()
2272        )
2273    }
2274
2275    #[tokio::test]
2276    async fn test_update_table_leader_region_status() {
2277        let mem_kv = Arc::new(MemoryKvBackend::default());
2278        let table_metadata_manager = TableMetadataManager::new(mem_kv);
2279        let datanode = 1;
2280        let region_routes = vec![
2281            RegionRoute {
2282                region: Region {
2283                    id: 1.into(),
2284                    name: "r1".to_string(),
2285                    attrs: BTreeMap::new(),
2286                    partition_expr: Default::default(),
2287                },
2288                leader_peer: Some(Peer::new(datanode, "a2")),
2289                leader_state: Some(LeaderState::Downgrading),
2290                follower_peers: vec![],
2291                leader_down_since: Some(current_time_millis()),
2292                write_route_policy: None,
2293            },
2294            RegionRoute {
2295                region: Region {
2296                    id: 2.into(),
2297                    name: "r2".to_string(),
2298                    attrs: BTreeMap::new(),
2299                    partition_expr: Default::default(),
2300                },
2301                leader_peer: Some(Peer::new(datanode, "a1")),
2302                leader_state: None,
2303                follower_peers: vec![],
2304                leader_down_since: None,
2305                write_route_policy: None,
2306            },
2307        ];
2308        let table_info = new_test_table_info();
2309        let table_id = table_info.ident.table_id;
2310        let current_table_route_value = DeserializedValueWithBytes::from_inner(
2311            TableRouteValue::physical(region_routes.clone()),
2312        );
2313
2314        // creates metadata.
2315        create_physical_table_metadata(
2316            &table_metadata_manager,
2317            table_info.clone(),
2318            region_routes.clone(),
2319            HashMap::new(),
2320        )
2321        .await
2322        .unwrap();
2323
2324        table_metadata_manager
2325            .update_leader_region_status(table_id, &current_table_route_value, |region_route| {
2326                if region_route.leader_state.is_some() {
2327                    None
2328                } else {
2329                    Some(Some(LeaderState::Downgrading))
2330                }
2331            })
2332            .await
2333            .unwrap();
2334
2335        let updated_route_value = table_metadata_manager
2336            .table_route_manager()
2337            .table_route_storage()
2338            .get(table_id)
2339            .await
2340            .unwrap()
2341            .unwrap();
2342
2343        assert_eq!(
2344            updated_route_value.region_routes().unwrap()[0].leader_state,
2345            Some(LeaderState::Downgrading)
2346        );
2347
2348        assert!(
2349            updated_route_value.region_routes().unwrap()[0]
2350                .leader_down_since
2351                .is_some()
2352        );
2353
2354        assert_eq!(
2355            updated_route_value.region_routes().unwrap()[1].leader_state,
2356            Some(LeaderState::Downgrading)
2357        );
2358        assert!(
2359            updated_route_value.region_routes().unwrap()[1]
2360                .leader_down_since
2361                .is_some()
2362        );
2363    }
2364
2365    async fn assert_datanode_table(
2366        table_metadata_manager: &TableMetadataManager,
2367        table_id: u32,
2368        region_routes: &[RegionRoute],
2369    ) {
2370        let region_distribution = region_distribution(region_routes);
2371        for (datanode, regions) in region_distribution {
2372            let got = table_metadata_manager
2373                .datanode_table_manager()
2374                .get(&DatanodeTableKey::new(datanode, table_id))
2375                .await
2376                .unwrap()
2377                .unwrap();
2378
2379            assert_eq!(got.regions, regions.leader_regions);
2380            assert_eq!(got.follower_regions, regions.follower_regions);
2381        }
2382    }
2383
2384    #[tokio::test]
2385    async fn test_update_table_route() {
2386        let mem_kv = Arc::new(MemoryKvBackend::default());
2387        let table_metadata_manager = TableMetadataManager::new(mem_kv);
2388        let region_route = new_test_region_route();
2389        let region_routes = vec![region_route.clone()];
2390        let table_info = new_test_table_info();
2391        let table_id = table_info.ident.table_id;
2392        let engine = table_info.meta.engine.as_str();
2393        let region_storage_path =
2394            region_storage_path(&table_info.catalog_name, &table_info.schema_name);
2395        let current_table_route_value = DeserializedValueWithBytes::from_inner(
2396            TableRouteValue::physical(region_routes.clone()),
2397        );
2398
2399        // creates metadata.
2400        create_physical_table_metadata(
2401            &table_metadata_manager,
2402            table_info.clone(),
2403            region_routes.clone(),
2404            HashMap::new(),
2405        )
2406        .await
2407        .unwrap();
2408
2409        assert_datanode_table(&table_metadata_manager, table_id, &region_routes).await;
2410        let new_region_routes = vec![
2411            new_region_route(1, 1),
2412            new_region_route(2, 2),
2413            new_region_route(3, 3),
2414        ];
2415        // it should be ok.
2416        table_metadata_manager
2417            .update_table_route(
2418                table_id,
2419                RegionInfo {
2420                    engine: engine.to_string(),
2421                    region_storage_path: region_storage_path.clone(),
2422                    region_options: HashMap::new(),
2423                    region_wal_options: HashMap::new(),
2424                },
2425                &current_table_route_value,
2426                new_region_routes.clone(),
2427                &HashMap::new(),
2428                &HashMap::new(),
2429            )
2430            .await
2431            .unwrap();
2432        assert_datanode_table(&table_metadata_manager, table_id, &new_region_routes).await;
2433
2434        // if the table route was updated. it should be ok.
2435        table_metadata_manager
2436            .update_table_route(
2437                table_id,
2438                RegionInfo {
2439                    engine: engine.to_string(),
2440                    region_storage_path: region_storage_path.clone(),
2441                    region_options: HashMap::new(),
2442                    region_wal_options: HashMap::new(),
2443                },
2444                &current_table_route_value,
2445                new_region_routes.clone(),
2446                &HashMap::new(),
2447                &HashMap::new(),
2448            )
2449            .await
2450            .unwrap();
2451
2452        let current_table_route_value = DeserializedValueWithBytes::from_inner(
2453            current_table_route_value
2454                .inner
2455                .update(new_region_routes.clone())
2456                .unwrap(),
2457        );
2458        let new_region_routes = vec![new_region_route(2, 4), new_region_route(5, 5)];
2459        // it should be ok.
2460        table_metadata_manager
2461            .update_table_route(
2462                table_id,
2463                RegionInfo {
2464                    engine: engine.to_string(),
2465                    region_storage_path: region_storage_path.clone(),
2466                    region_options: HashMap::new(),
2467                    region_wal_options: HashMap::new(),
2468                },
2469                &current_table_route_value,
2470                new_region_routes.clone(),
2471                &HashMap::new(),
2472                &HashMap::new(),
2473            )
2474            .await
2475            .unwrap();
2476        assert_datanode_table(&table_metadata_manager, table_id, &new_region_routes).await;
2477
2478        // if the current_table_route_value is wrong, it should return an error.
2479        // The ABA problem.
2480        let wrong_table_route_value = DeserializedValueWithBytes::from_inner(
2481            current_table_route_value
2482                .update(vec![
2483                    new_region_route(1, 1),
2484                    new_region_route(2, 2),
2485                    new_region_route(3, 3),
2486                    new_region_route(4, 4),
2487                ])
2488                .unwrap(),
2489        );
2490        assert!(
2491            table_metadata_manager
2492                .update_table_route(
2493                    table_id,
2494                    RegionInfo {
2495                        engine: engine.to_string(),
2496                        region_storage_path: region_storage_path.clone(),
2497                        region_options: HashMap::new(),
2498                        region_wal_options: HashMap::new(),
2499                    },
2500                    &wrong_table_route_value,
2501                    new_region_routes,
2502                    &HashMap::new(),
2503                    &HashMap::new(),
2504                )
2505                .await
2506                .is_err()
2507        );
2508    }
2509
2510    #[tokio::test]
2511    async fn test_update_table_route_with_topic_region_mapping() {
2512        let mem_kv = Arc::new(MemoryKvBackend::default());
2513        let table_metadata_manager = TableMetadataManager::new(mem_kv.clone());
2514        let region_route = new_test_region_route();
2515        let region_routes = vec![region_route.clone()];
2516        let table_info = new_test_table_info();
2517        let table_id = table_info.ident.table_id;
2518        let engine = table_info.meta.engine.as_str();
2519        let region_storage_path =
2520            region_storage_path(&table_info.catalog_name, &table_info.schema_name);
2521
2522        // Create initial metadata with Kafka WAL options
2523        let old_region_wal_options: HashMap<RegionNumber, String> = vec![
2524            (
2525                1,
2526                serde_json::to_string(&WalOptions::Kafka(KafkaWalOptions {
2527                    topic: "topic_1".to_string(),
2528                }))
2529                .unwrap(),
2530            ),
2531            (
2532                2,
2533                serde_json::to_string(&WalOptions::Kafka(KafkaWalOptions {
2534                    topic: "topic_2".to_string(),
2535                }))
2536                .unwrap(),
2537            ),
2538        ]
2539        .into_iter()
2540        .collect();
2541
2542        create_physical_table_metadata(
2543            &table_metadata_manager,
2544            table_info.clone(),
2545            region_routes.clone(),
2546            old_region_wal_options.clone(),
2547        )
2548        .await
2549        .unwrap();
2550
2551        let current_table_route_value = DeserializedValueWithBytes::from_inner(
2552            TableRouteValue::physical(region_routes.clone()),
2553        );
2554
2555        // Verify initial topic region mappings exist
2556        let region_id_1 = RegionId::new(table_id, 1);
2557        let region_id_2 = RegionId::new(table_id, 2);
2558        let topic_1_key = TopicRegionKey::new(region_id_1, "topic_1");
2559        let topic_2_key = TopicRegionKey::new(region_id_2, "topic_2");
2560        assert!(
2561            table_metadata_manager
2562                .topic_region_manager
2563                .get(topic_1_key.clone())
2564                .await
2565                .unwrap()
2566                .is_some()
2567        );
2568        assert!(
2569            table_metadata_manager
2570                .topic_region_manager
2571                .get(topic_2_key.clone())
2572                .await
2573                .unwrap()
2574                .is_some()
2575        );
2576
2577        // Test 1: Add new region with new topic
2578        let new_region_routes = vec![
2579            new_region_route(1, 1),
2580            new_region_route(2, 2),
2581            new_region_route(3, 3), // New region
2582        ];
2583        let new_region_wal_options: HashMap<RegionNumber, String> = vec![
2584            (
2585                1,
2586                serde_json::to_string(&WalOptions::Kafka(KafkaWalOptions {
2587                    topic: "topic_1".to_string(), // Unchanged
2588                }))
2589                .unwrap(),
2590            ),
2591            (
2592                2,
2593                serde_json::to_string(&WalOptions::Kafka(KafkaWalOptions {
2594                    topic: "topic_2".to_string(), // Unchanged
2595                }))
2596                .unwrap(),
2597            ),
2598            (
2599                3,
2600                serde_json::to_string(&WalOptions::Kafka(KafkaWalOptions {
2601                    topic: "topic_3".to_string(), // New topic
2602                }))
2603                .unwrap(),
2604            ),
2605        ]
2606        .into_iter()
2607        .collect();
2608        let current_table_route_value_updated = DeserializedValueWithBytes::from_inner(
2609            current_table_route_value
2610                .inner
2611                .update(new_region_routes.clone())
2612                .unwrap(),
2613        );
2614        table_metadata_manager
2615            .update_table_route(
2616                table_id,
2617                RegionInfo {
2618                    engine: engine.to_string(),
2619                    region_storage_path: region_storage_path.clone(),
2620                    region_options: HashMap::new(),
2621                    region_wal_options: old_region_wal_options.clone(),
2622                },
2623                &current_table_route_value,
2624                new_region_routes.clone(),
2625                &HashMap::new(),
2626                &new_region_wal_options,
2627            )
2628            .await
2629            .unwrap();
2630        // Verify new topic region mapping was created
2631        let region_id_3 = RegionId::new(table_id, 3);
2632        let topic_3_key = TopicRegionKey::new(region_id_3, "topic_3");
2633        assert!(
2634            table_metadata_manager
2635                .topic_region_manager
2636                .get(topic_3_key)
2637                .await
2638                .unwrap()
2639                .is_some()
2640        );
2641        // Test 2: Remove a region and change topic for another
2642        let newer_region_routes = vec![
2643            new_region_route(1, 1),
2644            // Region 2 removed
2645            // Region 3 now has different topic
2646        ];
2647        let newer_region_wal_options: HashMap<RegionNumber, String> = vec![
2648            (
2649                1,
2650                serde_json::to_string(&WalOptions::Kafka(KafkaWalOptions {
2651                    topic: "topic_1".to_string(), // Unchanged
2652                }))
2653                .unwrap(),
2654            ),
2655            (
2656                3,
2657                serde_json::to_string(&WalOptions::Kafka(KafkaWalOptions {
2658                    topic: "topic_3_new".to_string(), // Changed topic
2659                }))
2660                .unwrap(),
2661            ),
2662        ]
2663        .into_iter()
2664        .collect();
2665        table_metadata_manager
2666            .update_table_route(
2667                table_id,
2668                RegionInfo {
2669                    engine: engine.to_string(),
2670                    region_storage_path: region_storage_path.clone(),
2671                    region_options: HashMap::new(),
2672                    region_wal_options: new_region_wal_options.clone(),
2673                },
2674                &current_table_route_value_updated,
2675                newer_region_routes.clone(),
2676                &HashMap::new(),
2677                &newer_region_wal_options,
2678            )
2679            .await
2680            .unwrap();
2681        // Verify region 2 mapping was deleted
2682        let topic_2_key_new = TopicRegionKey::new(region_id_2, "topic_2");
2683        assert!(
2684            table_metadata_manager
2685                .topic_region_manager
2686                .get(topic_2_key_new)
2687                .await
2688                .unwrap()
2689                .is_none()
2690        );
2691        // Verify region 3 old topic mapping was deleted
2692        let topic_3_key_old = TopicRegionKey::new(region_id_3, "topic_3");
2693        assert!(
2694            table_metadata_manager
2695                .topic_region_manager
2696                .get(topic_3_key_old)
2697                .await
2698                .unwrap()
2699                .is_none()
2700        );
2701        // Verify region 3 new topic mapping was created
2702        let topic_3_key_new = TopicRegionKey::new(region_id_3, "topic_3_new");
2703        assert!(
2704            table_metadata_manager
2705                .topic_region_manager
2706                .get(topic_3_key_new)
2707                .await
2708                .unwrap()
2709                .is_some()
2710        );
2711        // Verify region 1 mapping still exists (unchanged)
2712        assert!(
2713            table_metadata_manager
2714                .topic_region_manager
2715                .get(topic_1_key)
2716                .await
2717                .unwrap()
2718                .is_some()
2719        );
2720    }
2721
2722    #[tokio::test]
2723    async fn test_destroy_table_metadata() {
2724        let mem_kv = Arc::new(MemoryKvBackend::default());
2725        let table_metadata_manager = TableMetadataManager::new(mem_kv.clone());
2726        let table_id = 1025;
2727        let table_name = "foo";
2728        let task = test_create_table_task(table_name, table_id);
2729        let options = create_mixed_region_wal_options();
2730        let serialized_options = options
2731            .iter()
2732            .map(|(k, v)| (*k, serde_json::to_string(v).unwrap()))
2733            .collect::<HashMap<_, _>>();
2734        table_metadata_manager
2735            .create_table_metadata(
2736                task.table_info,
2737                TableRouteValue::physical(vec![
2738                    RegionRoute {
2739                        region: Region::new_test(RegionId::new(table_id, 1)),
2740                        leader_peer: Some(Peer::empty(1)),
2741                        follower_peers: vec![Peer::empty(5)],
2742                        leader_state: None,
2743                        leader_down_since: None,
2744                        write_route_policy: None,
2745                    },
2746                    RegionRoute {
2747                        region: Region::new_test(RegionId::new(table_id, 2)),
2748                        leader_peer: Some(Peer::empty(2)),
2749                        follower_peers: vec![Peer::empty(4)],
2750                        leader_state: None,
2751                        leader_down_since: None,
2752                        write_route_policy: None,
2753                    },
2754                    RegionRoute {
2755                        region: Region::new_test(RegionId::new(table_id, 3)),
2756                        leader_peer: Some(Peer::empty(3)),
2757                        follower_peers: vec![],
2758                        leader_state: None,
2759                        leader_down_since: None,
2760                        write_route_policy: None,
2761                    },
2762                ]),
2763                serialized_options,
2764            )
2765            .await
2766            .unwrap();
2767        let table_name = TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name);
2768        let table_route_value = table_metadata_manager
2769            .table_route_manager
2770            .table_route_storage()
2771            .get_with_raw_bytes(table_id)
2772            .await
2773            .unwrap()
2774            .unwrap();
2775        table_metadata_manager
2776            .destroy_table_metadata(table_id, &table_name, &table_route_value, &options)
2777            .await
2778            .unwrap();
2779        assert!(mem_kv.is_empty());
2780    }
2781
2782    #[tokio::test]
2783    async fn test_restore_table_metadata() {
2784        let mem_kv = Arc::new(MemoryKvBackend::default());
2785        let table_metadata_manager = TableMetadataManager::new(mem_kv.clone());
2786        let table_id = 1025;
2787        let table_name = "foo";
2788        let task = test_create_table_task(table_name, table_id);
2789        let options = create_mixed_region_wal_options();
2790        let serialized_options = options
2791            .iter()
2792            .map(|(k, v)| (*k, serde_json::to_string(v).unwrap()))
2793            .collect::<HashMap<_, _>>();
2794        table_metadata_manager
2795            .create_table_metadata(
2796                task.table_info,
2797                TableRouteValue::physical(vec![
2798                    RegionRoute {
2799                        region: Region::new_test(RegionId::new(table_id, 1)),
2800                        leader_peer: Some(Peer::empty(1)),
2801                        follower_peers: vec![Peer::empty(5)],
2802                        leader_state: None,
2803                        leader_down_since: None,
2804                        write_route_policy: None,
2805                    },
2806                    RegionRoute {
2807                        region: Region::new_test(RegionId::new(table_id, 2)),
2808                        leader_peer: Some(Peer::empty(2)),
2809                        follower_peers: vec![Peer::empty(4)],
2810                        leader_state: None,
2811                        leader_down_since: None,
2812                        write_route_policy: None,
2813                    },
2814                    RegionRoute {
2815                        region: Region::new_test(RegionId::new(table_id, 3)),
2816                        leader_peer: Some(Peer::empty(3)),
2817                        follower_peers: vec![],
2818                        leader_state: None,
2819                        leader_down_since: None,
2820                        write_route_policy: None,
2821                    },
2822                ]),
2823                serialized_options,
2824            )
2825            .await
2826            .unwrap();
2827        let expected_result = mem_kv.dump();
2828        let table_route_value = table_metadata_manager
2829            .table_route_manager
2830            .table_route_storage()
2831            .get_with_raw_bytes(table_id)
2832            .await
2833            .unwrap()
2834            .unwrap();
2835        let region_routes = table_route_value.region_routes().unwrap();
2836        let table_name = TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name);
2837        let table_route_value = TableRouteValue::physical(region_routes.clone());
2838        table_metadata_manager
2839            .delete_table_metadata(table_id, &table_name, &table_route_value, &options)
2840            .await
2841            .unwrap();
2842        table_metadata_manager
2843            .restore_table_metadata(table_id, &table_name, &table_route_value, &options)
2844            .await
2845            .unwrap();
2846        let kvs = mem_kv.dump();
2847        assert_eq!(kvs, expected_result);
2848        // Should be ignored.
2849        table_metadata_manager
2850            .restore_table_metadata(table_id, &table_name, &table_route_value, &options)
2851            .await
2852            .unwrap();
2853        let kvs = mem_kv.dump();
2854        assert_eq!(kvs, expected_result);
2855    }
2856
2857    #[tokio::test]
2858    async fn test_dropped_table_metadata_enumeration_and_lookup() {
2859        let mem_kv = Arc::new(MemoryKvBackend::default());
2860        let table_metadata_manager = TableMetadataManager::new(mem_kv.clone());
2861        let table_id = 1025;
2862        let table_name = "foo";
2863        let task = test_create_table_task(table_name, table_id);
2864        let table_info = task.table_info.clone();
2865        let options = create_mixed_region_wal_options();
2866        let serialized_options = options
2867            .iter()
2868            .map(|(k, v)| (*k, serde_json::to_string(v).unwrap()))
2869            .collect::<HashMap<_, _>>();
2870        table_metadata_manager
2871            .create_table_metadata(
2872                table_info.clone(),
2873                TableRouteValue::physical(vec![
2874                    RegionRoute {
2875                        region: Region::new_test(RegionId::new(table_id, 1)),
2876                        leader_peer: Some(Peer::empty(1)),
2877                        follower_peers: vec![Peer::empty(5)],
2878                        leader_state: None,
2879                        leader_down_since: None,
2880                        write_route_policy: None,
2881                    },
2882                    RegionRoute {
2883                        region: Region::new_test(RegionId::new(table_id, 2)),
2884                        leader_peer: Some(Peer::empty(2)),
2885                        follower_peers: vec![Peer::empty(4)],
2886                        leader_state: None,
2887                        leader_down_since: None,
2888                        write_route_policy: None,
2889                    },
2890                    RegionRoute {
2891                        region: Region::new_test(RegionId::new(table_id, 3)),
2892                        leader_peer: Some(Peer::empty(3)),
2893                        follower_peers: vec![],
2894                        leader_state: None,
2895                        leader_down_since: None,
2896                        write_route_policy: None,
2897                    },
2898                ]),
2899                serialized_options,
2900            )
2901            .await
2902            .unwrap();
2903        let table_route_value = table_metadata_manager
2904            .table_route_manager
2905            .table_route_storage()
2906            .get_with_raw_bytes(table_id)
2907            .await
2908            .unwrap()
2909            .unwrap();
2910        let region_routes = table_route_value.region_routes().unwrap();
2911        let table_name = TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name);
2912        let table_route_value = TableRouteValue::physical(region_routes.clone());
2913
2914        table_metadata_manager
2915            .delete_table_metadata(table_id, &table_name, &table_route_value, &options)
2916            .await
2917            .unwrap();
2918
2919        let dropped_tables = table_metadata_manager.list_dropped_tables().await.unwrap();
2920        assert_eq!(dropped_tables.len(), 1);
2921        assert_eq!(dropped_tables[0].table_id, table_id);
2922        assert_eq!(dropped_tables[0].table_name, table_name);
2923
2924        let dropped_table = table_metadata_manager
2925            .get_dropped_table(&table_name)
2926            .await
2927            .unwrap()
2928            .unwrap();
2929        assert_eq!(dropped_table.table_id, table_id);
2930        assert_eq!(dropped_table.table_name, table_name);
2931        assert_eq!(dropped_table.table_info_value.table_info, table_info);
2932        assert_eq!(
2933            dropped_table.table_route_value.region_routes().unwrap(),
2934            region_routes
2935        );
2936        assert_eq!(dropped_table.region_wal_options, options);
2937
2938        let dropped_table_by_id = table_metadata_manager
2939            .get_dropped_table_by_id(table_id)
2940            .await
2941            .unwrap()
2942            .unwrap();
2943        assert_eq!(dropped_table_by_id.table_id, table_id);
2944        assert_eq!(dropped_table_by_id.table_name, table_name);
2945        assert_eq!(dropped_table_by_id.table_info_value.table_info, table_info);
2946        assert_eq!(
2947            dropped_table_by_id
2948                .table_route_value
2949                .region_routes()
2950                .unwrap(),
2951            region_routes
2952        );
2953        assert_eq!(dropped_table_by_id.region_wal_options, options);
2954    }
2955
2956    #[tokio::test]
2957    async fn test_dropped_table_lookup_survives_live_name_recreation() {
2958        let mem_kv = Arc::new(MemoryKvBackend::default());
2959        let table_metadata_manager = TableMetadataManager::new(mem_kv.clone());
2960        let dropped_table_id = 1025;
2961        let recreated_table_id = 1026;
2962        let table_name = "foo";
2963        let dropped_task = test_create_table_task(table_name, dropped_table_id);
2964        let dropped_table_info = dropped_task.table_info.clone();
2965        let options = create_mock_region_wal_options();
2966        let serialized_options = options
2967            .iter()
2968            .map(|(k, v)| (*k, serde_json::to_string(v).unwrap()))
2969            .collect::<HashMap<_, _>>();
2970        table_metadata_manager
2971            .create_table_metadata(
2972                dropped_table_info.clone(),
2973                TableRouteValue::physical(vec![
2974                    RegionRoute {
2975                        region: Region::new_test(RegionId::new(dropped_table_id, 1)),
2976                        leader_peer: Some(Peer::empty(1)),
2977                        follower_peers: vec![Peer::empty(5)],
2978                        leader_state: None,
2979                        leader_down_since: None,
2980                        write_route_policy: None,
2981                    },
2982                    RegionRoute {
2983                        region: Region::new_test(RegionId::new(dropped_table_id, 2)),
2984                        leader_peer: Some(Peer::empty(2)),
2985                        follower_peers: vec![Peer::empty(4)],
2986                        leader_state: None,
2987                        leader_down_since: None,
2988                        write_route_policy: None,
2989                    },
2990                ]),
2991                serialized_options.clone(),
2992            )
2993            .await
2994            .unwrap();
2995
2996        let dropped_table_name =
2997            TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name);
2998        let dropped_table_route = table_metadata_manager
2999            .table_route_manager
3000            .table_route_storage()
3001            .get_with_raw_bytes(dropped_table_id)
3002            .await
3003            .unwrap()
3004            .unwrap();
3005        let dropped_table_route =
3006            TableRouteValue::physical(dropped_table_route.region_routes().unwrap().clone());
3007        table_metadata_manager
3008            .delete_table_metadata(
3009                dropped_table_id,
3010                &dropped_table_name,
3011                &dropped_table_route,
3012                &options,
3013            )
3014            .await
3015            .unwrap();
3016
3017        let recreated_task = test_create_table_task(table_name, recreated_table_id);
3018        table_metadata_manager
3019            .create_table_metadata(
3020                recreated_task.table_info,
3021                TableRouteValue::physical(vec![RegionRoute {
3022                    region: Region::new_test(RegionId::new(recreated_table_id, 1)),
3023                    leader_peer: Some(Peer::empty(4)),
3024                    follower_peers: vec![],
3025                    leader_state: None,
3026                    leader_down_since: None,
3027                    write_route_policy: None,
3028                }]),
3029                serialized_options,
3030            )
3031            .await
3032            .unwrap();
3033
3034        assert_eq!(
3035            table_metadata_manager
3036                .table_name_manager()
3037                .get(TableNameKey::from(&dropped_table_name))
3038                .await
3039                .unwrap()
3040                .unwrap()
3041                .table_id(),
3042            recreated_table_id
3043        );
3044
3045        let dropped_table = table_metadata_manager
3046            .get_dropped_table(&dropped_table_name)
3047            .await
3048            .unwrap()
3049            .unwrap();
3050        assert_eq!(dropped_table.table_id, dropped_table_id);
3051        assert_eq!(dropped_table.table_name, dropped_table_name);
3052        assert_eq!(
3053            dropped_table.table_info_value.table_info,
3054            dropped_table_info
3055        );
3056
3057        let dropped_tables = table_metadata_manager.list_dropped_tables().await.unwrap();
3058        assert_eq!(dropped_tables.len(), 1);
3059        assert_eq!(dropped_tables[0].table_id, dropped_table_id);
3060        assert_eq!(dropped_tables[0].table_name, dropped_table_name);
3061    }
3062
3063    #[tokio::test]
3064    async fn test_dropped_table_lookup_ignores_unrelated_malformed_datanode_tombstones() {
3065        let mem_kv = Arc::new(MemoryKvBackend::default());
3066        let table_metadata_manager = TableMetadataManager::new(mem_kv.clone());
3067        let table_id = 1025;
3068        let table_name = "foo";
3069        let task = test_create_table_task(table_name, table_id);
3070        let table_info = task.table_info.clone();
3071        let options = create_mixed_region_wal_options();
3072        let serialized_options = options
3073            .iter()
3074            .map(|(k, v)| (*k, serde_json::to_string(v).unwrap()))
3075            .collect::<HashMap<_, _>>();
3076        table_metadata_manager
3077            .create_table_metadata(
3078                table_info.clone(),
3079                TableRouteValue::physical(vec![
3080                    RegionRoute {
3081                        region: Region::new_test(RegionId::new(table_id, 1)),
3082                        leader_peer: Some(Peer::empty(1)),
3083                        follower_peers: vec![Peer::empty(5)],
3084                        leader_state: None,
3085                        leader_down_since: None,
3086                        write_route_policy: None,
3087                    },
3088                    RegionRoute {
3089                        region: Region::new_test(RegionId::new(table_id, 2)),
3090                        leader_peer: Some(Peer::empty(2)),
3091                        follower_peers: vec![Peer::empty(4)],
3092                        leader_state: None,
3093                        leader_down_since: None,
3094                        write_route_policy: None,
3095                    },
3096                ]),
3097                serialized_options,
3098            )
3099            .await
3100            .unwrap();
3101
3102        let table_route_value = table_metadata_manager
3103            .table_route_manager
3104            .table_route_storage()
3105            .get_with_raw_bytes(table_id)
3106            .await
3107            .unwrap()
3108            .unwrap();
3109        let region_routes = table_route_value.region_routes().unwrap();
3110        let table_name = TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name);
3111        let table_route_value = TableRouteValue::physical(region_routes.clone());
3112
3113        table_metadata_manager
3114            .delete_table_metadata(table_id, &table_name, &table_route_value, &options)
3115            .await
3116            .unwrap();
3117
3118        mem_kv
3119            .put(
3120                PutRequest::new()
3121                    .with_key("__tombstone/__dn_table/not-a-datanode-table-key")
3122                    .with_value("malformed"),
3123            )
3124            .await
3125            .unwrap();
3126
3127        let dropped_table = table_metadata_manager
3128            .get_dropped_table(&table_name)
3129            .await
3130            .unwrap()
3131            .unwrap();
3132        assert_eq!(dropped_table.table_id, table_id);
3133        assert_eq!(dropped_table.table_name, table_name);
3134        assert_eq!(dropped_table.table_info_value.table_info, table_info);
3135        assert_eq!(dropped_table.region_wal_options, options);
3136    }
3137
3138    #[tokio::test]
3139    async fn test_create_update_view_info() {
3140        let mem_kv = Arc::new(MemoryKvBackend::default());
3141        let table_metadata_manager = TableMetadataManager::new(mem_kv);
3142
3143        let view_info = new_test_table_info();
3144
3145        let view_id = view_info.ident.table_id;
3146
3147        let logical_plan: Vec<u8> = vec![1, 2, 3];
3148        let columns = vec!["a".to_string()];
3149        let plan_columns = vec!["number".to_string()];
3150        let table_names = new_test_table_names();
3151        let definition = "CREATE VIEW test AS SELECT * FROM numbers";
3152
3153        // Create metadata
3154        table_metadata_manager
3155            .create_view_metadata(
3156                view_info.clone(),
3157                logical_plan.clone(),
3158                table_names.clone(),
3159                columns.clone(),
3160                plan_columns.clone(),
3161                definition.to_string(),
3162            )
3163            .await
3164            .unwrap();
3165
3166        {
3167            // assert view info
3168            let current_view_info = table_metadata_manager
3169                .view_info_manager()
3170                .get(view_id)
3171                .await
3172                .unwrap()
3173                .unwrap()
3174                .into_inner();
3175            assert_eq!(current_view_info.view_info, logical_plan);
3176            assert_eq!(current_view_info.table_names, table_names);
3177            assert_eq!(current_view_info.definition, definition);
3178            assert_eq!(current_view_info.columns, columns);
3179            assert_eq!(current_view_info.plan_columns, plan_columns);
3180            // assert table info
3181            let current_table_info = table_metadata_manager
3182                .table_info_manager()
3183                .get(view_id)
3184                .await
3185                .unwrap()
3186                .unwrap()
3187                .into_inner();
3188            assert_eq!(current_table_info.table_info, view_info);
3189        }
3190
3191        let new_logical_plan: Vec<u8> = vec![4, 5, 6];
3192        let new_table_names = {
3193            let mut set = HashSet::new();
3194            set.insert(TableName {
3195                catalog_name: "greptime".to_string(),
3196                schema_name: "public".to_string(),
3197                table_name: "b_table".to_string(),
3198            });
3199            set.insert(TableName {
3200                catalog_name: "greptime".to_string(),
3201                schema_name: "public".to_string(),
3202                table_name: "c_table".to_string(),
3203            });
3204            set
3205        };
3206        let new_columns = vec!["b".to_string()];
3207        let new_plan_columns = vec!["number2".to_string()];
3208        let new_definition = "CREATE VIEW test AS SELECT * FROM b_table join c_table";
3209
3210        let current_view_info_value = DeserializedValueWithBytes::from_inner(ViewInfoValue::new(
3211            logical_plan.clone().into(),
3212            table_names,
3213            columns,
3214            plan_columns,
3215            definition.to_string(),
3216        ));
3217        // should be ok.
3218        table_metadata_manager
3219            .update_view_info(
3220                view_id,
3221                &current_view_info_value,
3222                new_logical_plan.clone(),
3223                new_table_names.clone(),
3224                new_columns.clone(),
3225                new_plan_columns.clone(),
3226                new_definition.to_string(),
3227            )
3228            .await
3229            .unwrap();
3230        // if table info was updated, it should be ok.
3231        table_metadata_manager
3232            .update_view_info(
3233                view_id,
3234                &current_view_info_value,
3235                new_logical_plan.clone(),
3236                new_table_names.clone(),
3237                new_columns.clone(),
3238                new_plan_columns.clone(),
3239                new_definition.to_string(),
3240            )
3241            .await
3242            .unwrap();
3243
3244        // updated view_info should equal the `new_logical_plan`
3245        let updated_view_info = table_metadata_manager
3246            .view_info_manager()
3247            .get(view_id)
3248            .await
3249            .unwrap()
3250            .unwrap()
3251            .into_inner();
3252        assert_eq!(updated_view_info.view_info, new_logical_plan);
3253        assert_eq!(updated_view_info.table_names, new_table_names);
3254        assert_eq!(updated_view_info.definition, new_definition);
3255        assert_eq!(updated_view_info.columns, new_columns);
3256        assert_eq!(updated_view_info.plan_columns, new_plan_columns);
3257
3258        let wrong_view_info = logical_plan.clone();
3259        let wrong_definition = "wrong_definition";
3260        let wrong_view_info_value =
3261            DeserializedValueWithBytes::from_inner(current_view_info_value.update(
3262                wrong_view_info.into(),
3263                new_table_names.clone(),
3264                new_columns.clone(),
3265                new_plan_columns.clone(),
3266                wrong_definition.to_string(),
3267            ));
3268        // if the current_view_info_value is wrong, it should return an error.
3269        // The ABA problem.
3270        assert!(
3271            table_metadata_manager
3272                .update_view_info(
3273                    view_id,
3274                    &wrong_view_info_value,
3275                    new_logical_plan.clone(),
3276                    new_table_names.clone(),
3277                    vec!["c".to_string()],
3278                    vec!["number3".to_string()],
3279                    wrong_definition.to_string(),
3280                )
3281                .await
3282                .is_err()
3283        );
3284
3285        // The view_info is not changed.
3286        let current_view_info = table_metadata_manager
3287            .view_info_manager()
3288            .get(view_id)
3289            .await
3290            .unwrap()
3291            .unwrap()
3292            .into_inner();
3293        assert_eq!(current_view_info.view_info, new_logical_plan);
3294        assert_eq!(current_view_info.table_names, new_table_names);
3295        assert_eq!(current_view_info.definition, new_definition);
3296        assert_eq!(current_view_info.columns, new_columns);
3297        assert_eq!(current_view_info.plan_columns, new_plan_columns);
3298    }
3299
3300    #[test]
3301    fn test_region_role_set_deserialize() {
3302        let s = r#"{"leader_regions": [1, 2, 3], "follower_regions": [4, 5, 6]}"#;
3303        let region_role_set: RegionRoleSet = serde_json::from_str(s).unwrap();
3304        assert_eq!(region_role_set.leader_regions, vec![1, 2, 3]);
3305        assert_eq!(region_role_set.follower_regions, vec![4, 5, 6]);
3306
3307        let s = r#"[1, 2, 3]"#;
3308        let region_role_set: RegionRoleSet = serde_json::from_str(s).unwrap();
3309        assert_eq!(region_role_set.leader_regions, vec![1, 2, 3]);
3310        assert!(region_role_set.follower_regions.is_empty());
3311    }
3312
3313    #[test]
3314    fn test_region_distribution_deserialize() {
3315        let s = r#"{"1": [1,2,3], "2": {"leader_regions": [7, 8, 9], "follower_regions": [10, 11, 12]}}"#;
3316        let region_distribution: RegionDistribution = serde_json::from_str(s).unwrap();
3317        assert_eq!(region_distribution.len(), 2);
3318        assert_eq!(region_distribution[&1].leader_regions, vec![1, 2, 3]);
3319        assert!(region_distribution[&1].follower_regions.is_empty());
3320        assert_eq!(region_distribution[&2].leader_regions, vec![7, 8, 9]);
3321        assert_eq!(region_distribution[&2].follower_regions, vec![10, 11, 12]);
3322    }
3323}