Skip to main content

datanode/
datanode.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Datanode implementation.
16
17use std::path::Path;
18use std::sync::Arc;
19use std::time::{Duration, Instant};
20
21use common_base::Plugins;
22use common_error::ext::BoxedError;
23use common_greptimedb_telemetry::GreptimeDBTelemetryTask;
24use common_meta::cache::{LayeredCacheRegistry, SchemaCacheRef, TableSchemaCacheRef};
25use common_meta::cache_invalidator::CacheInvalidatorRef;
26use common_meta::datanode::TopicStatsReporter;
27use common_meta::key::runtime_switch::RuntimeSwitchManager;
28use common_meta::key::{SchemaMetadataManager, SchemaMetadataManagerRef};
29use common_meta::kv_backend::KvBackendRef;
30pub use common_procedure::options::ProcedureConfig;
31use common_query::prelude::set_default_prefix;
32use common_stat::ResourceStatImpl;
33use common_telemetry::{error, info, warn};
34use common_wal::config::DatanodeWalConfig;
35use common_wal::config::kafka::DatanodeKafkaConfig;
36use common_wal::config::raft_engine::RaftEngineConfig;
37use file_engine::engine::FileRegionEngine;
38use log_store::kafka::log_store::KafkaLogStore;
39use log_store::kafka::{GlobalIndexCollector, default_index_file};
40use log_store::noop::log_store::NoopLogStore;
41use log_store::raft_engine::log_store::RaftEngineLogStore;
42use meta_client::MetaClientRef;
43use metric_engine::engine::MetricEngine;
44use mito2::config::MitoConfig;
45use mito2::engine::{MitoEngine, MitoEngineBuilder};
46use mito2::region::opener::PartitionExprFetcherRef;
47use mito2::sst::file_ref::{FileReferenceManager, FileReferenceManagerRef};
48use object_store::manager::{ObjectStoreManager, ObjectStoreManagerRef};
49use object_store::util::normalize_dir;
50use query::QueryEngineFactory;
51use query::dummy_catalog::{DummyCatalogManager, TableProviderFactoryRef};
52use servers::server::ServerHandlers;
53use snafu::{OptionExt, ResultExt, ensure};
54use store_api::path_utils::WAL_DIR;
55use store_api::region_engine::{
56    RegionEngineRef, RegionRole, SetRegionRoleStateResponse, SettableRegionRoleState,
57};
58use tokio::fs;
59use tokio::sync::Notify;
60
61use crate::config::{DatanodeOptions, RegionEngineConfig, StorageConfig};
62use crate::error::{
63    self, BuildDatanodeSnafu, BuildMetricEngineSnafu, BuildMitoEngineSnafu, CreateDirSnafu,
64    GetMetadataSnafu, MissingCacheSnafu, MissingNodeIdSnafu, OpenLogStoreSnafu, Result,
65    ShutdownInstanceSnafu, ShutdownServerSnafu, StartServerSnafu,
66};
67use crate::event_listener::{
68    NoopRegionServerEventListener, RegionServerEventListenerRef, RegionServerEventReceiver,
69    new_region_server_event_channel,
70};
71use crate::greptimedb_telemetry::get_greptimedb_telemetry_task;
72use crate::heartbeat::HeartbeatTask;
73use crate::partition_expr_fetcher::MetaPartitionExprFetcher;
74use crate::region_server::{DummyTableProviderFactory, RegionServer};
75use crate::store::{self, new_object_store_without_cache};
76use crate::utils::{RegionOpenRequests, build_region_open_requests};
77
78/// Datanode service.
79pub struct Datanode {
80    services: ServerHandlers,
81    heartbeat_task: Option<HeartbeatTask>,
82    region_event_receiver: Option<RegionServerEventReceiver>,
83    region_server: RegionServer,
84    greptimedb_telemetry_task: Arc<GreptimeDBTelemetryTask>,
85    leases_notifier: Option<Arc<Notify>>,
86    plugins: Plugins,
87}
88
89impl Datanode {
90    pub async fn start(&mut self) -> Result<()> {
91        info!("Starting datanode instance...");
92
93        self.start_heartbeat().await?;
94        self.wait_coordinated().await;
95
96        self.start_telemetry();
97
98        self.services.start_all().await.context(StartServerSnafu)
99    }
100
101    pub fn server_handlers(&self) -> &ServerHandlers {
102        &self.services
103    }
104
105    pub fn start_telemetry(&self) {
106        if let Err(e) = self.greptimedb_telemetry_task.start() {
107            warn!(e; "Failed to start telemetry task!");
108        }
109    }
110
111    pub async fn start_heartbeat(&mut self) -> Result<()> {
112        if let Some(task) = &self.heartbeat_task {
113            // Safety: The event_receiver must exist.
114            let receiver = self.region_event_receiver.take().unwrap();
115
116            task.start(receiver, self.leases_notifier.clone()).await?;
117        }
118        Ok(())
119    }
120
121    /// If `leases_notifier` exists, it waits until leases have been obtained in all regions.
122    pub async fn wait_coordinated(&mut self) {
123        if let Some(notifier) = self.leases_notifier.take() {
124            notifier.notified().await;
125        }
126    }
127
128    pub fn setup_services(&mut self, services: ServerHandlers) {
129        self.services = services;
130    }
131
132    pub async fn shutdown(&mut self) -> Result<()> {
133        self.services
134            .shutdown_all()
135            .await
136            .context(ShutdownServerSnafu)?;
137
138        let _ = self.greptimedb_telemetry_task.stop().await;
139        if let Some(heartbeat_task) = &self.heartbeat_task {
140            heartbeat_task
141                .close()
142                .map_err(BoxedError::new)
143                .context(ShutdownInstanceSnafu)?;
144        }
145        self.region_server.stop().await?;
146        Ok(())
147    }
148
149    pub fn region_server(&self) -> RegionServer {
150        self.region_server.clone()
151    }
152
153    pub fn plugins(&self) -> Plugins {
154        self.plugins.clone()
155    }
156}
157
158pub struct DatanodeBuilder {
159    opts: DatanodeOptions,
160    table_provider_factory: Option<TableProviderFactoryRef>,
161    plugins: Plugins,
162    meta_client: Option<MetaClientRef>,
163    kv_backend: KvBackendRef,
164    cache_registry: Option<Arc<LayeredCacheRegistry>>,
165    topic_stats_reporter: Option<Box<dyn TopicStatsReporter>>,
166    open_regions_writable_override: Option<bool>,
167    #[cfg(feature = "enterprise")]
168    extension_range_provider_factory: Option<mito2::extension::BoxedExtensionRangeProviderFactory>,
169}
170
171impl DatanodeBuilder {
172    pub fn new(opts: DatanodeOptions, plugins: Plugins, kv_backend: KvBackendRef) -> Self {
173        Self {
174            opts,
175            table_provider_factory: None,
176            plugins,
177            meta_client: None,
178            kv_backend,
179            cache_registry: None,
180            open_regions_writable_override: None,
181            #[cfg(feature = "enterprise")]
182            extension_range_provider_factory: None,
183            topic_stats_reporter: None,
184        }
185    }
186
187    pub fn options(&self) -> &DatanodeOptions {
188        &self.opts
189    }
190
191    pub fn with_meta_client(&mut self, client: MetaClientRef) -> &mut Self {
192        self.meta_client = Some(client);
193        self
194    }
195
196    pub fn with_cache_registry(&mut self, registry: Arc<LayeredCacheRegistry>) -> &mut Self {
197        self.cache_registry = Some(registry);
198        self
199    }
200
201    pub fn kv_backend(&self) -> &KvBackendRef {
202        &self.kv_backend
203    }
204
205    pub fn with_table_provider_factory(&mut self, factory: TableProviderFactoryRef) -> &mut Self {
206        self.table_provider_factory = Some(factory);
207        self
208    }
209
210    /// Overrides whether regions opened during datanode startup should become writable.
211    ///
212    /// When unset, the builder uses its default writable policy for reopened regions
213    /// (writable only when no metasrv client is configured).
214    ///
215    /// Warning: setting this to `true` on a metasrv-controlled datanode (one built
216    /// with `with_meta_client`) will promote regions to Leader before heartbeat and
217    /// lease coordination begin, bypassing the metasrv safety contract and creating a
218    /// potential split-brain window during startup.
219    pub fn with_open_regions_writable_override(&mut self, writable: bool) -> &mut Self {
220        self.open_regions_writable_override = Some(writable);
221        self
222    }
223
224    #[cfg(feature = "enterprise")]
225    pub fn with_extension_range_provider(
226        &mut self,
227        extension_range_provider_factory: mito2::extension::BoxedExtensionRangeProviderFactory,
228    ) -> &mut Self {
229        self.extension_range_provider_factory = Some(extension_range_provider_factory);
230        self
231    }
232
233    pub async fn build(mut self) -> Result<Datanode> {
234        let node_id = self.opts.node_id.context(MissingNodeIdSnafu)?;
235        set_default_prefix(self.opts.default_column_prefix.as_deref())
236            .map_err(BoxedError::new)
237            .context(BuildDatanodeSnafu)?;
238
239        let meta_client = self.meta_client.take();
240
241        // If metasrv client is provided, we will use it to control the region server.
242        // Otherwise the region server is self-controlled, meaning no heartbeat and immediately
243        // writable upon open.
244        let controlled_by_metasrv = meta_client.is_some();
245
246        // build and initialize region server
247        let (region_event_listener, region_event_receiver) = if controlled_by_metasrv {
248            let (tx, rx) = new_region_server_event_channel();
249            (Box::new(tx) as _, Some(rx))
250        } else {
251            (Box::new(NoopRegionServerEventListener) as _, None)
252        };
253
254        let cache_registry = self.cache_registry.take().context(MissingCacheSnafu)?;
255        let schema_cache: SchemaCacheRef = cache_registry.get().context(MissingCacheSnafu)?;
256        let table_id_schema_cache: TableSchemaCacheRef =
257            cache_registry.get().context(MissingCacheSnafu)?;
258
259        let schema_metadata_manager = Arc::new(SchemaMetadataManager::new(
260            table_id_schema_cache,
261            schema_cache,
262        ));
263
264        let gc_enabled = self.opts.region_engine.iter().any(|engine| {
265            if let RegionEngineConfig::Mito(config) = engine {
266                config.gc.enable
267            } else {
268                false
269            }
270        });
271
272        let file_ref_manager = Arc::new(FileReferenceManager::with_gc_enabled(
273            Some(node_id),
274            gc_enabled,
275        ));
276        let region_server = self
277            .new_region_server(
278                schema_metadata_manager,
279                region_event_listener,
280                file_ref_manager,
281            )
282            .await?;
283
284        // TODO(weny): Considering introducing a readonly kv_backend trait.
285        let runtime_switch_manager = RuntimeSwitchManager::new(self.kv_backend.clone());
286        let is_recovery_mode = runtime_switch_manager
287            .recovery_mode()
288            .await
289            .context(GetMetadataSnafu)?;
290
291        let region_open_requests =
292            build_region_open_requests(node_id, self.kv_backend.clone()).await?;
293        let open_with_writable = self
294            .open_regions_writable_override
295            .unwrap_or(!controlled_by_metasrv);
296        let open_all_regions = open_all_regions(
297            region_server.clone(),
298            region_open_requests,
299            open_with_writable,
300            self.opts.init_regions_parallelism,
301            // Ignore nonexistent regions in recovery mode.
302            is_recovery_mode,
303        );
304
305        if self.opts.init_regions_in_background {
306            // Opens regions in background.
307            common_runtime::spawn_global(async move {
308                if let Err(err) = open_all_regions.await {
309                    error!(err; "Failed to open regions during the startup.");
310                }
311            });
312        } else {
313            open_all_regions.await?;
314        }
315
316        let heartbeat_task = if let Some(meta_client) = meta_client {
317            let task = self
318                .create_heartbeat_task(&region_server, meta_client, cache_registry)
319                .await?;
320            Some(task)
321        } else {
322            None
323        };
324
325        let is_standalone = heartbeat_task.is_none();
326        let greptimedb_telemetry_task = get_greptimedb_telemetry_task(
327            Some(self.opts.storage.data_home.clone()),
328            is_standalone && self.opts.enable_telemetry,
329        )
330        .await;
331
332        let leases_notifier = if self.opts.require_lease_before_startup && !is_standalone {
333            Some(Arc::new(Notify::new()))
334        } else {
335            None
336        };
337
338        Ok(Datanode {
339            services: ServerHandlers::default(),
340            heartbeat_task,
341            region_server,
342            greptimedb_telemetry_task,
343            region_event_receiver,
344            leases_notifier,
345            plugins: self.plugins.clone(),
346        })
347    }
348
349    async fn create_heartbeat_task(
350        &self,
351        region_server: &RegionServer,
352        meta_client: MetaClientRef,
353        cache_invalidator: CacheInvalidatorRef,
354    ) -> Result<HeartbeatTask> {
355        let stat = {
356            let mut stat = ResourceStatImpl::default();
357            stat.start_collect_cpu_usage();
358            Arc::new(stat)
359        };
360
361        HeartbeatTask::try_new(
362            &self.opts,
363            region_server.clone(),
364            meta_client,
365            self.kv_backend.clone(),
366            cache_invalidator,
367            self.plugins.clone(),
368            stat,
369        )
370        .await
371    }
372
373    /// Builds [ObjectStoreManager] from [StorageConfig].
374    pub async fn build_object_store_manager(cfg: &StorageConfig) -> Result<ObjectStoreManagerRef> {
375        let object_store = store::new_object_store(cfg.store.clone(), &cfg.data_home).await?;
376        let default_name = cfg.store.config_name();
377        let mut object_store_manager = ObjectStoreManager::new(default_name, object_store);
378        for store in &cfg.providers {
379            object_store_manager.add(
380                store.config_name(),
381                store::new_object_store(store.clone(), &cfg.data_home).await?,
382            );
383        }
384        Ok(Arc::new(object_store_manager))
385    }
386
387    #[cfg(test)]
388    /// Open all regions belong to this datanode.
389    async fn initialize_region_server(
390        &self,
391        region_server: &RegionServer,
392        open_with_writable: bool,
393    ) -> Result<()> {
394        let node_id = self.opts.node_id.context(MissingNodeIdSnafu)?;
395
396        // TODO(weny): Considering introducing a readonly kv_backend trait.
397        let runtime_switch_manager = RuntimeSwitchManager::new(self.kv_backend.clone());
398        let is_recovery_mode = runtime_switch_manager
399            .recovery_mode()
400            .await
401            .context(GetMetadataSnafu)?;
402        let region_open_requests =
403            build_region_open_requests(node_id, self.kv_backend.clone()).await?;
404
405        open_all_regions(
406            region_server.clone(),
407            region_open_requests,
408            open_with_writable,
409            self.opts.init_regions_parallelism,
410            is_recovery_mode,
411        )
412        .await
413    }
414
415    async fn new_region_server(
416        &mut self,
417        schema_metadata_manager: SchemaMetadataManagerRef,
418        event_listener: RegionServerEventListenerRef,
419        file_ref_manager: FileReferenceManagerRef,
420    ) -> Result<RegionServer> {
421        let opts: &DatanodeOptions = &self.opts;
422
423        let query_engine_factory = QueryEngineFactory::new_with_plugins(
424            // query engine in datanode only executes plan with resolved table source.
425            DummyCatalogManager::arc(),
426            None,
427            None,
428            None,
429            None,
430            None,
431            false,
432            self.plugins.clone(),
433            opts.query.clone(),
434        );
435        let query_engine = query_engine_factory.query_engine();
436
437        let table_provider_factory = self
438            .table_provider_factory
439            .clone()
440            .unwrap_or_else(|| Arc::new(DummyTableProviderFactory));
441
442        let mut region_server = RegionServer::with_table_provider(
443            query_engine,
444            common_runtime::global_runtime(),
445            event_listener,
446            table_provider_factory,
447            opts.max_concurrent_queries,
448            //TODO: revaluate the hardcoded timeout on the next version of datanode concurrency limiter.
449            Duration::from_millis(100),
450            opts.grpc.flight_compression,
451        );
452
453        let object_store_manager = Self::build_object_store_manager(&opts.storage).await?;
454        let engines = self
455            .build_store_engines(
456                object_store_manager,
457                schema_metadata_manager,
458                file_ref_manager,
459                self.plugins.clone(),
460            )
461            .await?;
462        for engine in engines {
463            region_server.register_engine(engine);
464        }
465        if let Some(topic_stats_reporter) = self.topic_stats_reporter.take() {
466            region_server.set_topic_stats_reporter(topic_stats_reporter);
467        }
468
469        Ok(region_server)
470    }
471
472    // internal utils
473
474    /// Builds [RegionEngineRef] from `store_engine` section in `opts`
475    async fn build_store_engines(
476        &mut self,
477        object_store_manager: ObjectStoreManagerRef,
478        schema_metadata_manager: SchemaMetadataManagerRef,
479        file_ref_manager: FileReferenceManagerRef,
480        plugins: Plugins,
481    ) -> Result<Vec<RegionEngineRef>> {
482        let mut metric_engine_config = metric_engine::config::EngineConfig::default();
483        let mut mito_engine_config = MitoConfig::default();
484        let mut file_engine_config = file_engine::config::EngineConfig::default();
485
486        for engine in &self.opts.region_engine {
487            match engine {
488                RegionEngineConfig::Mito(config) => {
489                    mito_engine_config = config.clone();
490                }
491                RegionEngineConfig::File(config) => {
492                    file_engine_config = config.clone();
493                }
494                RegionEngineConfig::Metric(metric_config) => {
495                    metric_engine_config = metric_config.clone();
496                }
497            }
498        }
499
500        // Build a fetcher to backfill partition_expr on open.
501        let fetcher = Arc::new(MetaPartitionExprFetcher::new(self.kv_backend.clone()));
502        let mito_engine = self
503            .build_mito_engine(
504                object_store_manager.clone(),
505                mito_engine_config,
506                schema_metadata_manager.clone(),
507                file_ref_manager.clone(),
508                fetcher.clone(),
509                plugins.clone(),
510            )
511            .await?;
512
513        let metric_engine = MetricEngine::try_new(mito_engine.clone(), metric_engine_config)
514            .context(BuildMetricEngineSnafu)?;
515
516        let file_engine = FileRegionEngine::new(
517            file_engine_config,
518            object_store_manager.default_object_store().clone(), // TODO: implement custom storage for file engine
519        );
520
521        Ok(vec![
522            Arc::new(mito_engine) as _,
523            Arc::new(metric_engine) as _,
524            Arc::new(file_engine) as _,
525        ])
526    }
527
528    /// Builds [MitoEngine] according to options.
529    async fn build_mito_engine(
530        &mut self,
531        object_store_manager: ObjectStoreManagerRef,
532        mut config: MitoConfig,
533        schema_metadata_manager: SchemaMetadataManagerRef,
534        file_ref_manager: FileReferenceManagerRef,
535        partition_expr_fetcher: PartitionExprFetcherRef,
536        plugins: Plugins,
537    ) -> Result<MitoEngine> {
538        let opts = &self.opts;
539        if opts.storage.is_object_storage() {
540            // Enable the write cache when setting object storage
541            config.enable_write_cache = true;
542            info!("Configured 'enable_write_cache=true' for mito engine.");
543        }
544
545        let mito_engine = match &opts.wal {
546            DatanodeWalConfig::RaftEngine(raft_engine_config) => {
547                let log_store =
548                    Self::build_raft_engine_log_store(&opts.storage.data_home, raft_engine_config)
549                        .await?;
550
551                let builder = MitoEngineBuilder::new(
552                    &opts.storage.data_home,
553                    config,
554                    log_store,
555                    object_store_manager,
556                    schema_metadata_manager,
557                    file_ref_manager,
558                    partition_expr_fetcher.clone(),
559                    plugins,
560                );
561
562                #[cfg(feature = "enterprise")]
563                let builder = builder.with_extension_range_provider_factory(
564                    self.extension_range_provider_factory.take(),
565                );
566
567                builder.try_build().await.context(BuildMitoEngineSnafu)?
568            }
569            DatanodeWalConfig::Kafka(kafka_config) => {
570                if kafka_config.create_index && opts.node_id.is_none() {
571                    warn!("The WAL index creation only available in distributed mode.")
572                }
573                let global_index_collector = if kafka_config.create_index
574                    && let Some(node_id) = opts.node_id
575                {
576                    let operator = new_object_store_without_cache(
577                        &opts.storage.store,
578                        &opts.storage.data_home,
579                    )
580                    .await?;
581                    let path = default_index_file(node_id);
582                    Some(Self::build_global_index_collector(
583                        kafka_config.dump_index_interval,
584                        operator,
585                        path,
586                    ))
587                } else {
588                    None
589                };
590
591                let log_store =
592                    Self::build_kafka_log_store(kafka_config, global_index_collector).await?;
593                self.topic_stats_reporter = Some(log_store.topic_stats_reporter());
594                let builder = MitoEngineBuilder::new(
595                    &opts.storage.data_home,
596                    config,
597                    log_store,
598                    object_store_manager,
599                    schema_metadata_manager,
600                    file_ref_manager,
601                    partition_expr_fetcher,
602                    plugins,
603                );
604
605                #[cfg(feature = "enterprise")]
606                let builder = builder.with_extension_range_provider_factory(
607                    self.extension_range_provider_factory.take(),
608                );
609
610                builder.try_build().await.context(BuildMitoEngineSnafu)?
611            }
612            DatanodeWalConfig::Noop => {
613                let log_store = Arc::new(NoopLogStore);
614
615                let builder = MitoEngineBuilder::new(
616                    &opts.storage.data_home,
617                    config,
618                    log_store,
619                    object_store_manager,
620                    schema_metadata_manager,
621                    file_ref_manager,
622                    partition_expr_fetcher.clone(),
623                    plugins,
624                );
625
626                #[cfg(feature = "enterprise")]
627                let builder = builder.with_extension_range_provider_factory(
628                    self.extension_range_provider_factory.take(),
629                );
630
631                builder.try_build().await.context(BuildMitoEngineSnafu)?
632            }
633        };
634        Ok(mito_engine)
635    }
636
637    /// Builds [RaftEngineLogStore].
638    async fn build_raft_engine_log_store(
639        data_home: &str,
640        config: &RaftEngineConfig,
641    ) -> Result<Arc<RaftEngineLogStore>> {
642        let data_home = normalize_dir(data_home);
643        let wal_dir = match &config.dir {
644            Some(dir) => dir.clone(),
645            None => format!("{}{WAL_DIR}", data_home),
646        };
647
648        // create WAL directory
649        fs::create_dir_all(Path::new(&wal_dir))
650            .await
651            .context(CreateDirSnafu { dir: &wal_dir })?;
652        info!(
653            "Creating raft-engine logstore with config: {:?} and storage path: {}",
654            config, &wal_dir
655        );
656        let logstore = RaftEngineLogStore::try_new(wal_dir, config)
657            .await
658            .map_err(Box::new)
659            .context(OpenLogStoreSnafu)?;
660
661        Ok(Arc::new(logstore))
662    }
663
664    /// Builds [`KafkaLogStore`].
665    async fn build_kafka_log_store(
666        config: &DatanodeKafkaConfig,
667        global_index_collector: Option<GlobalIndexCollector>,
668    ) -> Result<Arc<KafkaLogStore>> {
669        KafkaLogStore::try_new(config, global_index_collector)
670            .await
671            .map_err(Box::new)
672            .context(OpenLogStoreSnafu)
673            .map(Arc::new)
674    }
675
676    /// Builds [`GlobalIndexCollector`]
677    fn build_global_index_collector(
678        dump_index_interval: Duration,
679        operator: object_store::ObjectStore,
680        path: String,
681    ) -> GlobalIndexCollector {
682        GlobalIndexCollector::new(dump_index_interval, operator, path)
683    }
684}
685
686/// Open all regions belong to this datanode.
687async fn open_all_regions(
688    region_server: RegionServer,
689    region_open_requests: RegionOpenRequests,
690    open_with_writable: bool,
691    init_regions_parallelism: usize,
692    ignore_nonexistent_region: bool,
693) -> Result<()> {
694    let RegionOpenRequests {
695        leader_regions,
696        #[cfg(feature = "enterprise")]
697        follower_regions,
698    } = region_open_requests;
699
700    let leader_region_num = leader_regions.len();
701    info!("going to open {} region(s)", leader_region_num);
702    let now = Instant::now();
703    let open_regions = region_server
704        .handle_batch_open_requests(
705            init_regions_parallelism,
706            leader_regions,
707            ignore_nonexistent_region,
708        )
709        .await?;
710    info!(
711        "Opened {} regions in {:?}",
712        open_regions.len(),
713        now.elapsed()
714    );
715    if !ignore_nonexistent_region {
716        ensure!(
717            open_regions.len() == leader_region_num,
718            error::UnexpectedSnafu {
719                violated: format!(
720                    "Expected to open {} of regions, only {} of regions has opened",
721                    leader_region_num,
722                    open_regions.len()
723                )
724            }
725        );
726    } else if open_regions.len() != leader_region_num {
727        warn!(
728            "ignore nonexistent region, expected to open {} of regions, only {} of regions has opened",
729            leader_region_num,
730            open_regions.len()
731        );
732    }
733
734    for region_id in open_regions {
735        if open_with_writable {
736            let res = region_server.set_region_role(region_id, RegionRole::Leader);
737            match res {
738                Ok(_) => {
739                    // Finalize leadership: persist backfilled metadata.
740                    if let SetRegionRoleStateResponse::InvalidTransition(err) = region_server
741                        .set_region_role_state_gracefully(
742                            region_id,
743                            SettableRegionRoleState::Leader,
744                        )
745                        .await?
746                    {
747                        error!(err; "failed to convert region {region_id} to leader");
748                    }
749                }
750                Err(e) => {
751                    error!(e; "failed to convert region {region_id} to leader");
752                }
753            }
754        }
755    }
756
757    #[cfg(feature = "enterprise")]
758    if !follower_regions.is_empty() {
759        use tokio::time::Instant;
760
761        let follower_region_num = follower_regions.len();
762        info!("going to open {} follower region(s)", follower_region_num);
763
764        let now = Instant::now();
765        let open_regions = region_server
766            .handle_batch_open_requests(
767                init_regions_parallelism,
768                follower_regions,
769                ignore_nonexistent_region,
770            )
771            .await?;
772        info!(
773            "Opened {} follower regions in {:?}",
774            open_regions.len(),
775            now.elapsed()
776        );
777
778        if !ignore_nonexistent_region {
779            ensure!(
780                open_regions.len() == follower_region_num,
781                error::UnexpectedSnafu {
782                    violated: format!(
783                        "Expected to open {} of follower regions, only {} of regions has opened",
784                        follower_region_num,
785                        open_regions.len()
786                    )
787                }
788            );
789        } else if open_regions.len() != follower_region_num {
790            warn!(
791                "ignore nonexistent region, expected to open {} of follower regions, only {} of regions has opened",
792                follower_region_num,
793                open_regions.len()
794            );
795        }
796    }
797
798    info!("all regions are opened");
799
800    Ok(())
801}
802
803#[cfg(test)]
804mod tests {
805    use std::assert_matches;
806    use std::collections::{BTreeMap, HashMap};
807    use std::sync::Arc;
808
809    use cache::build_datanode_cache_registry;
810    use common_base::Plugins;
811    use common_meta::cache::LayeredCacheRegistryBuilder;
812    use common_meta::key::RegionRoleSet;
813    use common_meta::key::datanode_table::DatanodeTableManager;
814    use common_meta::kv_backend::KvBackendRef;
815    use common_meta::kv_backend::memory::MemoryKvBackend;
816    use mito2::engine::MITO_ENGINE_NAME;
817    use store_api::region_request::RegionRequest;
818    use store_api::storage::RegionId;
819
820    use crate::config::DatanodeOptions;
821    use crate::datanode::DatanodeBuilder;
822    use crate::tests::{MockRegionEngine, mock_region_server};
823
824    async fn setup_table_datanode(kv: &KvBackendRef) {
825        let mgr = DatanodeTableManager::new(kv.clone());
826        let txn = mgr
827            .build_create_txn(
828                1028,
829                MITO_ENGINE_NAME,
830                "foo/bar/weny",
831                HashMap::from([("foo".to_string(), "bar".to_string())]),
832                HashMap::default(),
833                BTreeMap::from([(0, RegionRoleSet::new(vec![0, 1, 2], vec![]))]),
834            )
835            .unwrap();
836
837        let r = kv.txn(txn).await.unwrap();
838        assert!(r.succeeded);
839    }
840
841    #[tokio::test]
842    async fn test_initialize_region_server() {
843        common_telemetry::init_default_ut_logging();
844        let mut mock_region_server = mock_region_server();
845        let (mock_region, mut mock_region_handler) = MockRegionEngine::new(MITO_ENGINE_NAME);
846
847        mock_region_server.register_engine(mock_region.clone());
848
849        let kv_backend = Arc::new(MemoryKvBackend::new());
850        let layered_cache_registry = Arc::new(
851            LayeredCacheRegistryBuilder::default()
852                .add_cache_registry(build_datanode_cache_registry(kv_backend.clone()))
853                .build(),
854        );
855
856        let mut builder = DatanodeBuilder::new(
857            DatanodeOptions {
858                node_id: Some(0),
859                ..Default::default()
860            },
861            Plugins::default(),
862            kv_backend.clone(),
863        );
864        builder.with_cache_registry(layered_cache_registry);
865        setup_table_datanode(&(kv_backend as _)).await;
866
867        builder
868            .initialize_region_server(&mock_region_server, false)
869            .await
870            .unwrap();
871
872        for i in 0..3 {
873            let (region_id, req) = mock_region_handler.recv().await.unwrap();
874            assert_eq!(region_id, RegionId::new(1028, i));
875            if let RegionRequest::Open(req) = req {
876                assert_eq!(
877                    req.options,
878                    HashMap::from([("foo".to_string(), "bar".to_string())])
879                )
880            } else {
881                unreachable!()
882            }
883        }
884
885        assert_matches!(
886            mock_region_handler.try_recv(),
887            Err(tokio::sync::mpsc::error::TryRecvError::Empty)
888        );
889    }
890}