Skip to main content

cmd/
standalone.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt::Debug;
16use std::net::SocketAddr;
17use std::path::Path;
18use std::sync::Arc;
19use std::{fs, path};
20
21use async_trait::async_trait;
22use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
23use catalog::CatalogManagerRef;
24use catalog::information_schema::InformationExtensionRef;
25use catalog::kvbackend::{CatalogManagerConfiguratorRef, KvBackendCatalogManagerBuilder};
26use catalog::process_manager::ProcessManager;
27use clap::Parser;
28use common_base::Plugins;
29use common_catalog::consts::{MIN_USER_FLOW_ID, MIN_USER_TABLE_ID};
30use common_config::{Configurable, metadata_store_dir};
31use common_error::ext::BoxedError;
32use common_meta::DatanodeId;
33use common_meta::cache::{LayeredCacheRegistryBuilder, LayeredCacheRegistryRef};
34use common_meta::ddl::flow_meta::FlowMetadataAllocator;
35use common_meta::ddl::table_meta::TableMetadataAllocator;
36use common_meta::ddl::{DdlContext, NoopRegionFailureDetectorControl};
37use common_meta::ddl_manager::{DdlManager, DdlManagerConfiguratorRef, DdlManagerRef};
38use common_meta::key::flow::FlowMetadataManager;
39use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
40use common_meta::kv_backend::KvBackendRef;
41use common_meta::node_manager::{FlownodeRef, NodeManagerRef};
42use common_meta::procedure_executor::{LocalProcedureExecutor, ProcedureExecutorRef};
43use common_meta::region_keeper::MemoryRegionKeeper;
44use common_meta::region_registry::LeaderRegionRegistry;
45use common_meta::sequence::{Sequence, SequenceBuilder};
46use common_meta::wal_provider::{WalProviderRef, build_wal_provider};
47use common_options::plugin_options::StandaloneFlag;
48use common_procedure::ProcedureManagerRef;
49use common_query::prelude::set_default_prefix;
50use common_telemetry::info;
51use common_telemetry::logging::{DEFAULT_LOGGING_DIR, TracingOptions};
52use common_time::timezone::set_default_timezone;
53use common_version::{short_version, verbose_version};
54use datanode::config::DatanodeOptions;
55use datanode::datanode::{Datanode, DatanodeBuilder};
56use datanode::region_server::RegionServer;
57use flow::{
58    FlowDualEngineRef, FlownodeBuilder, FlownodeInstance, FlownodeOptions, FrontendClient,
59    FrontendInvoker, GrpcQueryHandlerWithBoxedError,
60};
61use frontend::frontend::Frontend;
62use frontend::instance::StandaloneDatanodeManager;
63use frontend::instance::builder::FrontendBuilder;
64use frontend::server::Services;
65use meta_srv::metasrv::{FLOW_ID_SEQ, TABLE_ID_SEQ};
66use plugins::PluginOptions;
67use plugins::frontend::context::{
68    CatalogManagerConfigureContext, StandaloneCatalogManagerConfigureContext,
69};
70use plugins::standalone::context::DdlManagerConfigureContext;
71use servers::tls::{TlsMode, TlsOption, merge_tls_option};
72use snafu::ResultExt;
73use standalone::options::StandaloneOptions;
74use standalone::{StandaloneInformationExtension, StandaloneRepartitionProcedureFactory};
75use tracing_appender::non_blocking::WorkerGuard;
76
77use crate::error::{OtherSnafu, Result, StartFlownodeSnafu};
78use crate::options::{GlobalOptions, GreptimeOptions};
79use crate::{App, create_resource_limit_metrics, error, log_versions, maybe_activate_heap_profile};
80
81pub const APP_NAME: &str = "greptime-standalone";
82
83#[derive(Parser)]
84pub struct Command {
85    #[clap(subcommand)]
86    subcmd: SubCommand,
87}
88
89impl Command {
90    pub async fn build(&self, opts: GreptimeOptions<StandaloneOptions>) -> Result<Instance> {
91        self.subcmd.build(opts).await
92    }
93
94    pub fn load_options(
95        &self,
96        global_options: &GlobalOptions,
97    ) -> Result<GreptimeOptions<StandaloneOptions>> {
98        self.subcmd.load_options(global_options)
99    }
100}
101
102#[derive(Parser)]
103enum SubCommand {
104    Start(StartCommand),
105}
106
107impl SubCommand {
108    async fn build(&self, opts: GreptimeOptions<StandaloneOptions>) -> Result<Instance> {
109        match self {
110            SubCommand::Start(cmd) => cmd.build(opts).await,
111        }
112    }
113
114    fn load_options(
115        &self,
116        global_options: &GlobalOptions,
117    ) -> Result<GreptimeOptions<StandaloneOptions>> {
118        match self {
119            SubCommand::Start(cmd) => cmd.load_options(global_options),
120        }
121    }
122}
123
124pub struct Instance {
125    datanode: Datanode,
126    frontend: Frontend,
127    flownode: FlownodeInstance,
128    procedure_manager: ProcedureManagerRef,
129    leader_services_controller: Box<dyn StandaloneLeaderServicesController>,
130    leader_services_context: LeaderServicesContext,
131    // Keep the logging guard to prevent the worker from being dropped.
132    _guard: Vec<WorkerGuard>,
133}
134
135impl Instance {
136    /// Find the socket addr of a server by its `name`.
137    pub fn server_addr(&self, name: &str) -> Option<SocketAddr> {
138        self.frontend.server_handlers().addr(name)
139    }
140
141    /// Get the mutable Frontend component of this Standalone instance for externally modification
142    /// by others (might not be in this code base, so don't delete this function).
143    pub fn mut_frontend(&mut self) -> &mut Frontend {
144        &mut self.frontend
145    }
146
147    /// Get the Datanode component of this Standalone instance for externally usage
148    /// by others (might not be in this code base, so don't delete this function).
149    pub fn datanode(&self) -> &Datanode {
150        &self.datanode
151    }
152}
153
154#[async_trait]
155impl App for Instance {
156    fn name(&self) -> &str {
157        APP_NAME
158    }
159
160    async fn start(&mut self) -> Result<()> {
161        self.datanode.start_telemetry();
162
163        self.leader_services_controller
164            .start(self.leader_services_context.clone())
165            .await?;
166
167        plugins::start_frontend_plugins(&self.frontend.instance)
168            .await
169            .context(error::StartFrontendSnafu)?;
170
171        self.frontend
172            .start()
173            .await
174            .context(error::StartFrontendSnafu)?;
175
176        self.flownode.start().await.context(StartFlownodeSnafu)?;
177
178        Ok(())
179    }
180
181    async fn stop(&mut self) -> Result<()> {
182        self.frontend
183            .shutdown()
184            .await
185            .context(error::ShutdownFrontendSnafu)?;
186
187        self.leader_services_controller
188            .stop(
189                self.procedure_manager.clone(),
190                self.datanode.region_server(),
191            )
192            .await?;
193
194        self.datanode
195            .shutdown()
196            .await
197            .context(error::ShutdownDatanodeSnafu)?;
198
199        self.flownode
200            .shutdown()
201            .await
202            .context(error::ShutdownFlownodeSnafu)?;
203
204        info!("Datanode instance stopped.");
205
206        Ok(())
207    }
208}
209
210#[derive(Debug, Default, Parser)]
211pub struct StartCommand {
212    #[clap(long)]
213    http_addr: Option<String>,
214    #[clap(long = "grpc-bind-addr", alias = "rpc-bind-addr", alias = "rpc-addr")]
215    grpc_bind_addr: Option<String>,
216    #[clap(long)]
217    mysql_addr: Option<String>,
218    #[clap(long)]
219    postgres_addr: Option<String>,
220    #[clap(short, long)]
221    influxdb_enable: bool,
222    #[clap(short, long)]
223    pub config_file: Option<String>,
224    #[clap(long)]
225    tls_mode: Option<TlsMode>,
226    #[clap(long)]
227    tls_cert_path: Option<String>,
228    #[clap(long)]
229    tls_key_path: Option<String>,
230    #[clap(long)]
231    tls_watch: bool,
232    #[clap(long)]
233    user_provider: Option<String>,
234    #[clap(long, default_value = "GREPTIMEDB_STANDALONE")]
235    pub env_prefix: String,
236    /// The working home directory of this standalone instance.
237    #[clap(long)]
238    data_home: Option<String>,
239}
240
241impl StartCommand {
242    /// Load the GreptimeDB options from various sources (command line, config file or env).
243    pub fn load_options(
244        &self,
245        global_options: &GlobalOptions,
246    ) -> Result<GreptimeOptions<StandaloneOptions>> {
247        let mut opts = GreptimeOptions::<StandaloneOptions>::load_layered_options(
248            self.config_file.as_deref(),
249            self.env_prefix.as_ref(),
250        )
251        .context(error::LoadLayeredConfigSnafu)?;
252
253        self.merge_with_cli_options(global_options, &mut opts.component)?;
254        opts.component.sanitize();
255
256        Ok(opts)
257    }
258
259    // The precedence order is: cli > config file > environment variables > default values.
260    pub fn merge_with_cli_options(
261        &self,
262        global_options: &GlobalOptions,
263        opts: &mut StandaloneOptions,
264    ) -> Result<()> {
265        if let Some(dir) = &global_options.log_dir {
266            opts.logging.dir.clone_from(dir);
267        }
268
269        if global_options.log_level.is_some() {
270            opts.logging.level.clone_from(&global_options.log_level);
271        }
272
273        opts.tracing = TracingOptions {
274            #[cfg(feature = "tokio-console")]
275            tokio_console_addr: global_options.tokio_console_addr.clone(),
276        };
277
278        let tls_opts = TlsOption::new(
279            self.tls_mode,
280            self.tls_cert_path.clone(),
281            self.tls_key_path.clone(),
282            self.tls_watch,
283        );
284
285        if let Some(addr) = &self.http_addr {
286            opts.http.addr.clone_from(addr);
287        }
288
289        if let Some(data_home) = &self.data_home {
290            opts.storage.data_home.clone_from(data_home);
291        }
292
293        // If the logging dir is not set, use the default logs dir in the data home.
294        if opts.logging.dir.is_empty() {
295            opts.logging.dir = Path::new(&opts.storage.data_home)
296                .join(DEFAULT_LOGGING_DIR)
297                .to_string_lossy()
298                .to_string();
299        }
300
301        if let Some(addr) = &self.grpc_bind_addr {
302            // frontend grpc addr conflict with datanode default grpc addr
303            let datanode_grpc_addr = DatanodeOptions::default().grpc.bind_addr;
304            if addr.eq(&datanode_grpc_addr) {
305                return error::IllegalConfigSnafu {
306                    msg: format!(
307                        "gRPC listen address conflicts with datanode reserved gRPC addr: {datanode_grpc_addr}",
308                    ),
309                }.fail();
310            }
311            opts.grpc.bind_addr.clone_from(addr);
312            opts.grpc.tls = merge_tls_option(&opts.grpc.tls, tls_opts.clone());
313        }
314
315        if let Some(addr) = &self.mysql_addr {
316            opts.mysql.enable = true;
317            opts.mysql.addr.clone_from(addr);
318            opts.mysql.tls = merge_tls_option(&opts.mysql.tls, tls_opts.clone());
319        }
320
321        if let Some(addr) = &self.postgres_addr {
322            opts.postgres.enable = true;
323            opts.postgres.addr.clone_from(addr);
324            opts.postgres.tls = merge_tls_option(&opts.postgres.tls, tls_opts.clone());
325        }
326
327        if self.influxdb_enable {
328            opts.influxdb.enable = self.influxdb_enable;
329        }
330
331        if let Some(user_provider) = &self.user_provider {
332            opts.user_provider = Some(user_provider.clone());
333        }
334
335        Ok(())
336    }
337
338    #[allow(unreachable_code)]
339    #[allow(unused_variables)]
340    #[allow(clippy::diverging_sub_expression)]
341    /// Build GreptimeDB instance with the loaded options.
342    pub async fn build(&self, opts: GreptimeOptions<StandaloneOptions>) -> Result<Instance> {
343        common_runtime::init_global_runtimes(&opts.runtime);
344
345        let guard = common_telemetry::init_global_logging(
346            APP_NAME,
347            &opts.component.logging,
348            &opts.component.tracing,
349            None,
350            Some(&opts.component.slow_query),
351        );
352
353        log_versions(verbose_version(), short_version(), APP_NAME);
354        maybe_activate_heap_profile(&opts.component.memory);
355        create_resource_limit_metrics(APP_NAME);
356
357        info!("Standalone start command: {:#?}", self);
358        info!("Standalone options: {opts:#?}");
359
360        let (mut instance, _) =
361            Self::build_with(opts.component, opts.plugins, InstanceCreator::default()).await?;
362        instance._guard.extend(guard);
363        Ok(instance)
364    }
365
366    pub async fn build_with(
367        mut opts: StandaloneOptions,
368        plugin_opts: Vec<PluginOptions>,
369        creator: InstanceCreator,
370    ) -> Result<(Instance, InstanceCreatorResult)> {
371        let mut plugins = Plugins::new();
372        plugins.insert(StandaloneFlag);
373        set_default_prefix(opts.default_column_prefix.as_deref())
374            .map_err(BoxedError::new)
375            .context(error::BuildCliSnafu)?;
376
377        opts.grpc.detect_server_addr();
378        let fe_opts = opts.frontend_options();
379        let dn_opts = opts.datanode_options();
380        let node_id = dn_opts.node_id;
381        let init_regions_parallelism = dn_opts.init_regions_parallelism;
382
383        plugins::setup_frontend_plugins_pre_build(&mut plugins, &plugin_opts, &fe_opts, None)
384            .await
385            .context(error::StartFrontendSnafu)?;
386
387        plugins::setup_datanode_plugins_pre_build(&mut plugins, &plugin_opts, &dn_opts)
388            .await
389            .context(error::StartDatanodeSnafu)?;
390
391        set_default_timezone(fe_opts.default_timezone.as_deref())
392            .context(error::InitTimezoneSnafu)?;
393
394        let data_home = &dn_opts.storage.data_home;
395        // Ensure the data_home directory exists.
396        fs::create_dir_all(path::Path::new(data_home))
397            .context(error::CreateDirSnafu { dir: data_home })?;
398
399        let metadata_dir = metadata_store_dir(data_home);
400        let kv_backend = creator
401            .metadata_kv_backend_creator
402            .create(metadata_dir, &opts)
403            .await?;
404        let procedure_manager =
405            standalone::build_procedure_manager(kv_backend.clone(), opts.procedure);
406
407        plugins::setup_standalone_plugins(&mut plugins, &plugin_opts, &opts, kv_backend.clone())
408            .await
409            .context(error::SetupStandalonePluginsSnafu)?;
410
411        // Builds cache registry
412        let layered_cache_builder = LayeredCacheRegistryBuilder::default();
413        let fundamental_cache_registry = build_fundamental_cache_registry(kv_backend.clone());
414        let mut layered_cache_builder = with_default_composite_cache_registry(
415            layered_cache_builder.add_cache_registry(fundamental_cache_registry),
416        )
417        .context(error::BuildCacheRegistrySnafu)?;
418
419        if let Some(plugin_cache_builder) = plugins::standalone::configure_cache_registry(&plugins)
420        {
421            layered_cache_builder =
422                layered_cache_builder.add_cache_registry(plugin_cache_builder.build());
423        }
424
425        let layered_cache_registry = Arc::new(layered_cache_builder.build());
426
427        let mut builder = DatanodeBuilder::new(dn_opts, plugins.clone(), kv_backend.clone());
428        builder.with_cache_registry(layered_cache_registry.clone());
429        if let Some(writable) = creator.open_regions_writable_override {
430            builder.with_open_regions_writable_override(writable);
431        }
432
433        plugins::setup_datanode_plugins_post_build(&mut plugins, &plugin_opts, &builder)
434            .await
435            .context(error::StartDatanodeSnafu)?;
436        builder.set_plugins(plugins.clone());
437
438        let datanode = builder.build().await.context(error::StartDatanodeSnafu)?;
439
440        let information_extension = Arc::new(StandaloneInformationExtension::new(
441            datanode.region_server(),
442            procedure_manager.clone(),
443        ));
444
445        plugins.insert::<InformationExtensionRef>(information_extension.clone());
446
447        let process_manager = Arc::new(ProcessManager::new(opts.grpc.server_addr.clone(), None));
448
449        // for standalone not use grpc, but get a handler to frontend grpc client without
450        // actually make a connection
451        let (frontend_client, frontend_instance_handler) =
452            FrontendClient::from_empty_grpc_handler(opts.query.clone());
453        let frontend_client = Arc::new(frontend_client);
454
455        let builder = KvBackendCatalogManagerBuilder::new(
456            information_extension.clone(),
457            kv_backend.clone(),
458            layered_cache_registry.clone(),
459        )
460        .with_procedure_manager(procedure_manager.clone())
461        .with_process_manager(process_manager.clone());
462        let builder = if let Some(configurator) =
463            plugins.get::<CatalogManagerConfiguratorRef<CatalogManagerConfigureContext>>()
464        {
465            let ctx = StandaloneCatalogManagerConfigureContext {
466                fe_client: frontend_client.clone(),
467            };
468            let ctx = CatalogManagerConfigureContext::Standalone(ctx);
469            configurator
470                .configure(builder, ctx)
471                .await
472                .context(OtherSnafu)?
473        } else {
474            builder
475        };
476        let catalog_manager = builder.build();
477
478        let table_metadata_manager =
479            Self::create_table_metadata_manager(kv_backend.clone()).await?;
480
481        let flow_metadata_manager = Arc::new(FlowMetadataManager::new(kv_backend.clone()));
482        let flownode_options = FlownodeOptions {
483            flow: opts.flow.clone(),
484            ..Default::default()
485        };
486
487        let mut flow_builder = FlownodeBuilder::new(
488            flownode_options,
489            plugins.clone(),
490            table_metadata_manager.clone(),
491            catalog_manager.clone(),
492            flow_metadata_manager.clone(),
493            frontend_client.clone(),
494        );
495
496        plugins::setup_flownode_plugins_post_build(&mut plugins, &plugin_opts, &flow_builder)
497            .await
498            .context(error::StartFlownodeSnafu)?;
499        flow_builder.set_plugins(plugins.clone());
500
501        let flownode = flow_builder
502            .build()
503            .await
504            .map_err(BoxedError::new)
505            .context(error::OtherSnafu)?;
506        let flow_engine = flownode.flow_engine();
507
508        // set the ref to query for the local flow state
509        {
510            information_extension
511                .set_flow_engine(flow_engine.clone())
512                .await;
513        }
514
515        let node_manager = creator
516            .node_manager_creator
517            .create(&kv_backend, datanode.region_server(), flow_engine.clone())
518            .await?;
519
520        let table_id_allocator = creator.table_id_allocator_creator.create(&kv_backend);
521        let flow_id_sequence = Arc::new(
522            SequenceBuilder::new(FLOW_ID_SEQ, kv_backend.clone())
523                .initial(MIN_USER_FLOW_ID as u64)
524                .step(10)
525                .build(),
526        );
527        let kafka_options = opts
528            .wal
529            .clone()
530            .try_into()
531            .context(error::InvalidWalProviderSnafu)?;
532        let wal_provider = build_wal_provider(&kafka_options, kv_backend.clone())
533            .await
534            .context(error::BuildWalProviderSnafu)?;
535        let wal_provider = Arc::new(wal_provider);
536        let table_metadata_allocator = Arc::new(TableMetadataAllocator::new(
537            table_id_allocator.clone(),
538            wal_provider.clone(),
539        ));
540        let flow_metadata_allocator = Arc::new(FlowMetadataAllocator::with_noop_peer_allocator(
541            flow_id_sequence,
542        ));
543
544        let ddl_context = DdlContext {
545            node_manager: node_manager.clone(),
546            cache_invalidator: layered_cache_registry.clone(),
547            memory_region_keeper: Arc::new(MemoryRegionKeeper::default()),
548            leader_region_registry: Arc::new(LeaderRegionRegistry::default()),
549            table_metadata_manager: table_metadata_manager.clone(),
550            table_metadata_allocator: table_metadata_allocator.clone(),
551            flow_metadata_manager: flow_metadata_manager.clone(),
552            flow_metadata_allocator: flow_metadata_allocator.clone(),
553            region_failure_detector_controller: Arc::new(NoopRegionFailureDetectorControl),
554        };
555
556        let ddl_manager = DdlManager::try_new(
557            ddl_context,
558            procedure_manager.clone(),
559            Arc::new(StandaloneRepartitionProcedureFactory),
560            true,
561        )
562        .context(error::InitDdlManagerSnafu)?;
563
564        let ddl_manager = if let Some(configurator) =
565            plugins.get::<DdlManagerConfiguratorRef<DdlManagerConfigureContext>>()
566        {
567            let ctx = DdlManagerConfigureContext {
568                kv_backend: kv_backend.clone(),
569                fe_client: frontend_client.clone(),
570                catalog_manager: catalog_manager.clone(),
571            };
572            configurator
573                .configure(ddl_manager, ctx)
574                .await
575                .context(OtherSnafu)?
576        } else {
577            ddl_manager
578        };
579
580        let procedure_executor = creator
581            .procedure_executor_creator
582            .create(Arc::new(ddl_manager), procedure_manager.clone())
583            .await?;
584
585        let fe_instance = FrontendBuilder::new(
586            fe_opts.clone(),
587            kv_backend.clone(),
588            layered_cache_registry.clone(),
589            catalog_manager.clone(),
590            node_manager.clone(),
591            procedure_executor.clone(),
592            process_manager,
593        );
594
595        plugins::setup_frontend_plugins_post_build(&mut plugins, &plugin_opts, &fe_instance)
596            .await
597            .context(error::StartFrontendSnafu)?;
598
599        let fe_instance = fe_instance
600            .with_plugin(plugins.clone())
601            .try_build()
602            .await
603            .context(error::StartFrontendSnafu)?;
604        let fe_instance = Arc::new(fe_instance);
605
606        // set the frontend client for flownode
607        let grpc_handler = fe_instance.clone() as Arc<dyn GrpcQueryHandlerWithBoxedError>;
608        let weak_grpc_handler = Arc::downgrade(&grpc_handler);
609        frontend_instance_handler
610            .set_handler(weak_grpc_handler)
611            .await;
612
613        // set the frontend invoker for flownode
614        let flow_streaming_engine = flow_engine.streaming_engine();
615        // flow server need to be able to use frontend to write insert requests back
616        let invoker = FrontendInvoker::build_from(
617            flow_streaming_engine.clone(),
618            catalog_manager.clone(),
619            kv_backend.clone(),
620            layered_cache_registry.clone(),
621            procedure_executor,
622            node_manager.clone(),
623            fe_instance.frontend_peer_addr().to_string(),
624        )
625        .await
626        .context(StartFlownodeSnafu)?;
627        flow_streaming_engine.set_frontend_invoker(invoker).await;
628
629        let servers = Services::new(opts, fe_instance.clone(), plugins.clone())
630            .build()
631            .context(error::StartFrontendSnafu)?;
632
633        let frontend = Frontend {
634            instance: fe_instance,
635            servers,
636            heartbeat_task: None,
637        };
638        let leader_services_context = LeaderServicesContext {
639            procedure_manager: procedure_manager.clone(),
640            wal_provider: wal_provider.clone(),
641            region_server: datanode.region_server(),
642            kv_backend: kv_backend.clone(),
643            cache_registry: layered_cache_registry,
644            catalog_manager,
645            flow_engine,
646            frontend_client,
647            node_id,
648            init_regions_parallelism,
649            plugin_options: plugin_opts,
650        };
651
652        let instance = Instance {
653            datanode,
654            frontend,
655            flownode,
656            procedure_manager,
657            leader_services_controller: creator.leader_services_controller,
658            leader_services_context,
659            _guard: vec![],
660        };
661        let result = InstanceCreatorResult {
662            kv_backend,
663            node_manager,
664            table_id_allocator,
665        };
666        Ok((instance, result))
667    }
668
669    pub async fn create_table_metadata_manager(
670        kv_backend: KvBackendRef,
671    ) -> Result<TableMetadataManagerRef> {
672        let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend));
673
674        table_metadata_manager
675            .init()
676            .await
677            .context(error::InitMetadataSnafu)?;
678
679        Ok(table_metadata_manager)
680    }
681}
682
683#[async_trait]
684pub trait NodeManagerCreator: Send + Sync {
685    async fn create(
686        &self,
687        kv_backend: &KvBackendRef,
688        region_server: RegionServer,
689        flow_server: FlownodeRef,
690    ) -> Result<NodeManagerRef>;
691}
692
693pub struct DefaultNodeManagerCreator;
694
695#[async_trait]
696impl NodeManagerCreator for DefaultNodeManagerCreator {
697    async fn create(
698        &self,
699        _: &KvBackendRef,
700        region_server: RegionServer,
701        flow_server: FlownodeRef,
702    ) -> Result<NodeManagerRef> {
703        Ok(Arc::new(StandaloneDatanodeManager {
704            region_server,
705            flow_server,
706        }))
707    }
708}
709
710/// Customizes how standalone opens its metadata KV backend.
711///
712/// The default implementation preserves the built-in raft-engine path. Other
713/// callers can provide a custom implementation without changing standalone
714/// configuration types.
715#[async_trait]
716pub trait MetadataKvBackendCreator: Send + Sync {
717    async fn create(&self, metadata_dir: String, opts: &StandaloneOptions) -> Result<KvBackendRef>;
718}
719
720pub struct DefaultMetadataKvBackendCreator;
721
722#[async_trait]
723impl MetadataKvBackendCreator for DefaultMetadataKvBackendCreator {
724    async fn create(&self, metadata_dir: String, opts: &StandaloneOptions) -> Result<KvBackendRef> {
725        standalone::build_metadata_kvbackend(metadata_dir, opts.metadata_store)
726            .context(error::BuildMetadataKvbackendSnafu)
727    }
728}
729
730pub trait TableIdAllocatorCreator: Send + Sync {
731    fn create(&self, kv_backend: &KvBackendRef) -> Arc<Sequence>;
732}
733
734struct DefaultTableIdAllocatorCreator;
735
736impl TableIdAllocatorCreator for DefaultTableIdAllocatorCreator {
737    fn create(&self, kv_backend: &KvBackendRef) -> Arc<Sequence> {
738        Arc::new(
739            SequenceBuilder::new(TABLE_ID_SEQ, kv_backend.clone())
740                .initial(MIN_USER_TABLE_ID as u64)
741                .step(10)
742                .build(),
743        )
744    }
745}
746
747#[async_trait]
748pub trait ProcedureExecutorCreator: Send + Sync {
749    async fn create(
750        &self,
751        ddl_manager: DdlManagerRef,
752        procedure_manager: ProcedureManagerRef,
753    ) -> Result<ProcedureExecutorRef>;
754}
755
756pub struct DefaultProcedureExecutorCreator;
757
758#[async_trait]
759impl ProcedureExecutorCreator for DefaultProcedureExecutorCreator {
760    async fn create(
761        &self,
762        ddl_manager: DdlManagerRef,
763        procedure_manager: ProcedureManagerRef,
764    ) -> Result<ProcedureExecutorRef> {
765        Ok(Arc::new(LocalProcedureExecutor::new(
766            ddl_manager,
767            procedure_manager,
768        )))
769    }
770}
771
772#[async_trait]
773pub trait StandaloneLeaderServicesController: Send + Sync {
774    /// Starts leader services that manage standalone metadata or WAL state.
775    ///
776    /// The default implementation starts the procedure manager and WAL provider
777    /// during instance startup.
778    async fn start(&self, context: LeaderServicesContext) -> Result<()>;
779
780    /// Stops services started by [`StandaloneLeaderServicesController::start`].
781    async fn stop(
782        &self,
783        procedure_manager: ProcedureManagerRef,
784        region_server: RegionServer,
785    ) -> Result<()>;
786}
787
788#[derive(Clone)]
789/// Additional runtime handles for custom leader-service controllers.
790///
791/// The default standalone startup only needs to start/stop the procedure
792/// manager and WAL provider. Some embedders need to do more work around
793/// leader-service startup, for example reconciling metadata-backed runtime
794/// state before publishing writable leadership. Grouping those handles here
795/// keeps `Instance` small and avoids expanding
796/// [`StandaloneLeaderServicesController::start`] every time a custom lifecycle
797/// needs one more standalone component.
798pub struct LeaderServicesContext {
799    pub procedure_manager: ProcedureManagerRef,
800    pub wal_provider: WalProviderRef,
801    pub region_server: RegionServer,
802    pub kv_backend: KvBackendRef,
803    pub cache_registry: LayeredCacheRegistryRef,
804    pub catalog_manager: CatalogManagerRef,
805    pub flow_engine: FlowDualEngineRef,
806    pub frontend_client: Arc<FrontendClient>,
807    pub node_id: Option<DatanodeId>,
808    pub init_regions_parallelism: usize,
809    pub plugin_options: Vec<PluginOptions>,
810}
811
812pub struct DefaultStandaloneLeaderServicesController;
813
814#[async_trait]
815impl StandaloneLeaderServicesController for DefaultStandaloneLeaderServicesController {
816    async fn start(&self, context: LeaderServicesContext) -> Result<()> {
817        context
818            .procedure_manager
819            .start()
820            .await
821            .context(error::StartProcedureManagerSnafu)?;
822        context
823            .wal_provider
824            .start()
825            .await
826            .context(error::StartWalProviderSnafu)
827    }
828
829    async fn stop(
830        &self,
831        procedure_manager: ProcedureManagerRef,
832        _region_server: RegionServer,
833    ) -> Result<()> {
834        procedure_manager
835            .stop()
836            .await
837            .context(error::StopProcedureManagerSnafu)
838    }
839}
840
841/// `InstanceCreator` is used for grouping various component creators for building the
842/// Standalone instance, suitable for customizing how the instance can be built.
843pub struct InstanceCreator {
844    /// Hook for replacing metadata KV construction while reusing the rest of the
845    /// standalone build flow.
846    metadata_kv_backend_creator: Box<dyn MetadataKvBackendCreator>,
847    node_manager_creator: Box<dyn NodeManagerCreator>,
848    table_id_allocator_creator: Box<dyn TableIdAllocatorCreator>,
849    procedure_executor_creator: Box<dyn ProcedureExecutorCreator>,
850    leader_services_controller: Box<dyn StandaloneLeaderServicesController>,
851    open_regions_writable_override: Option<bool>,
852}
853
854impl InstanceCreator {
855    pub fn new(
856        node_manager_creator: Box<dyn NodeManagerCreator>,
857        table_id_allocator_creator: Box<dyn TableIdAllocatorCreator>,
858        procedure_executor_creator: Box<dyn ProcedureExecutorCreator>,
859    ) -> Self {
860        Self {
861            metadata_kv_backend_creator: Box::new(DefaultMetadataKvBackendCreator),
862            node_manager_creator,
863            table_id_allocator_creator,
864            procedure_executor_creator,
865            leader_services_controller: Box::new(DefaultStandaloneLeaderServicesController),
866            open_regions_writable_override: None,
867        }
868    }
869
870    pub fn with_metadata_kv_backend_creator(
871        mut self,
872        metadata_kv_backend_creator: Box<dyn MetadataKvBackendCreator>,
873    ) -> Self {
874        self.metadata_kv_backend_creator = metadata_kv_backend_creator;
875        self
876    }
877
878    /// Wraps the metadata backend creator while retaining the default creator.
879    ///
880    /// This is useful for callers that need to add runtime behavior around
881    /// metadata access without reimplementing backend selection.
882    pub fn map_metadata_kv_backend_creator<F>(mut self, f: F) -> Self
883    where
884        F: FnOnce(Box<dyn MetadataKvBackendCreator>) -> Box<dyn MetadataKvBackendCreator>,
885    {
886        self.metadata_kv_backend_creator = f(self.metadata_kv_backend_creator);
887        self
888    }
889
890    /// Wraps node-manager creation while preserving the selected standalone node manager.
891    pub fn map_node_manager_creator<F>(mut self, f: F) -> Self
892    where
893        F: FnOnce(Box<dyn NodeManagerCreator>) -> Box<dyn NodeManagerCreator>,
894    {
895        self.node_manager_creator = f(self.node_manager_creator);
896        self
897    }
898
899    /// Wraps procedure-executor creation while preserving the current setup.
900    pub fn map_procedure_executor_creator<F>(mut self, f: F) -> Self
901    where
902        F: FnOnce(Box<dyn ProcedureExecutorCreator>) -> Box<dyn ProcedureExecutorCreator>,
903    {
904        self.procedure_executor_creator = f(self.procedure_executor_creator);
905        self
906    }
907
908    /// Replaces startup/shutdown ownership for procedure manager and WAL provider.
909    pub fn with_leader_services_controller(
910        mut self,
911        leader_services_controller: Box<dyn StandaloneLeaderServicesController>,
912    ) -> Self {
913        self.leader_services_controller = leader_services_controller;
914        self
915    }
916
917    /// Overrides whether regions opened during startup should become writable.
918    ///
919    /// `None` keeps the default startup behavior (regions open writable).
920    ///
921    /// Warning: setting this to `false` in standalone mode will leave reopened regions
922    /// permanently read-only. Standalone has no metasrv heartbeat or region-role
923    /// reconciliation, so there is no path to promote regions to Leader after startup.
924    pub fn with_open_regions_writable_override(mut self, writable: bool) -> Self {
925        self.open_regions_writable_override = Some(writable);
926        self
927    }
928}
929
930impl Default for InstanceCreator {
931    fn default() -> Self {
932        Self {
933            metadata_kv_backend_creator: Box::new(DefaultMetadataKvBackendCreator),
934            node_manager_creator: Box::new(DefaultNodeManagerCreator),
935            table_id_allocator_creator: Box::new(DefaultTableIdAllocatorCreator),
936            procedure_executor_creator: Box::new(DefaultProcedureExecutorCreator),
937            leader_services_controller: Box::new(DefaultStandaloneLeaderServicesController),
938            open_regions_writable_override: None,
939        }
940    }
941}
942
943/// `InstanceCreatorResult` is expected to be used paired with [InstanceCreator].
944/// It stores the created and other important components for further reusing.
945pub struct InstanceCreatorResult {
946    pub kv_backend: KvBackendRef,
947    pub node_manager: NodeManagerRef,
948    pub table_id_allocator: Arc<Sequence>,
949}
950
951#[cfg(test)]
952mod tests {
953    use std::default::Default;
954    use std::io::Write;
955    use std::time::Duration;
956
957    use auth::{Identity, Password, UserProviderRef};
958    use clap::{CommandFactory, Parser};
959    use common_base::readable_size::ReadableSize;
960    use common_config::ENV_VAR_SEP;
961    use common_options::plugin_options::StandaloneFlag;
962    use common_test_util::temp_dir::create_named_temp_file;
963    use common_wal::config::DatanodeWalConfig;
964    use frontend::frontend::FrontendOptions;
965    use object_store::config::{FileConfig, GcsConfig};
966    use servers::grpc::GrpcOptions;
967
968    use super::*;
969    use crate::options::GlobalOptions;
970
971    #[tokio::test]
972    async fn test_try_from_start_command_to_anymap() {
973        let fe_opts = FrontendOptions {
974            user_provider: Some("static_user_provider:cmd:test=test".to_string()),
975            ..Default::default()
976        };
977
978        let mut plugins = Plugins::new();
979        plugins.insert(StandaloneFlag);
980        plugins::setup_frontend_plugins_pre_build(&mut plugins, &[], &fe_opts, None)
981            .await
982            .unwrap();
983
984        let provider = plugins.get::<UserProviderRef>().unwrap();
985        let result = provider
986            .authenticate(
987                Identity::UserId("test", None),
988                Password::PlainText("test".to_string().into()),
989            )
990            .await;
991        let _ = result.unwrap();
992    }
993
994    #[test]
995    fn test_toml() {
996        let opts = StandaloneOptions::default();
997        let toml_string = toml::to_string(&opts).unwrap();
998        let _parsed: StandaloneOptions = toml::from_str(&toml_string).unwrap();
999    }
1000
1001    #[test]
1002    fn test_read_from_config_file() {
1003        let mut file = create_named_temp_file();
1004        let toml_str = r#"
1005            enable_memory_catalog = true
1006
1007            [wal]
1008            provider = "raft_engine"
1009            dir = "./greptimedb_data/test/wal"
1010            file_size = "1GB"
1011            purge_threshold = "50GB"
1012            purge_interval = "10m"
1013            read_batch_size = 128
1014            sync_write = false
1015
1016            [storage]
1017            data_home = "./greptimedb_data/"
1018            type = "File"
1019
1020            [[storage.providers]]
1021            type = "Gcs"
1022            bucket = "foo"
1023            endpoint = "bar"
1024
1025            [[storage.providers]]
1026            type = "S3"
1027            access_key_id = "access_key_id"
1028            secret_access_key = "secret_access_key"
1029
1030            [storage.compaction]
1031            max_inflight_tasks = 3
1032            max_files_in_level0 = 7
1033            max_purge_tasks = 32
1034
1035            [storage.manifest]
1036            checkpoint_margin = 9
1037            gc_duration = '7s'
1038
1039            [http]
1040            addr = "127.0.0.1:4000"
1041            timeout = "33s"
1042            body_limit = "128MB"
1043
1044            [opentsdb]
1045            enable = true
1046
1047            [logging]
1048            level = "debug"
1049            dir = "./greptimedb_data/test/logs"
1050        "#;
1051        write!(file, "{}", toml_str).unwrap();
1052        let cmd = StartCommand {
1053            config_file: Some(file.path().to_str().unwrap().to_string()),
1054            user_provider: Some("static_user_provider:cmd:test=test".to_string()),
1055            ..Default::default()
1056        };
1057
1058        let options = cmd
1059            .load_options(&GlobalOptions::default())
1060            .unwrap()
1061            .component;
1062        let fe_opts = options.frontend_options();
1063        let dn_opts = options.datanode_options();
1064        let logging_opts = options.logging;
1065        assert_eq!("127.0.0.1:4000".to_string(), fe_opts.http.addr);
1066        assert_eq!(Duration::from_secs(33), fe_opts.http.timeout);
1067        assert_eq!(ReadableSize::mb(128), fe_opts.http.body_limit);
1068        assert_eq!("127.0.0.1:4001".to_string(), fe_opts.grpc.bind_addr);
1069        assert!(fe_opts.mysql.enable);
1070        assert_eq!("127.0.0.1:4002", fe_opts.mysql.addr);
1071        assert_eq!(2, fe_opts.mysql.runtime_size);
1072        assert_eq!(None, fe_opts.mysql.reject_no_database);
1073        assert!(fe_opts.influxdb.enable);
1074        assert!(fe_opts.opentsdb.enable);
1075
1076        let DatanodeWalConfig::RaftEngine(raft_engine_config) = dn_opts.wal else {
1077            unreachable!()
1078        };
1079        assert_eq!(
1080            "./greptimedb_data/test/wal",
1081            raft_engine_config.dir.unwrap()
1082        );
1083
1084        assert!(matches!(
1085            &dn_opts.storage.store,
1086            object_store::config::ObjectStoreConfig::File(FileConfig { .. })
1087        ));
1088        assert_eq!(dn_opts.storage.providers.len(), 2);
1089        assert!(matches!(
1090            dn_opts.storage.providers[0],
1091            object_store::config::ObjectStoreConfig::Gcs(GcsConfig { .. })
1092        ));
1093        match &dn_opts.storage.providers[1] {
1094            object_store::config::ObjectStoreConfig::S3(s3_config) => {
1095                assert_eq!(
1096                    "SecretBox<alloc::string::String>([REDACTED])".to_string(),
1097                    format!("{:?}", s3_config.connection.access_key_id)
1098                );
1099            }
1100            _ => {
1101                unreachable!()
1102            }
1103        }
1104
1105        assert_eq!("debug", logging_opts.level.as_ref().unwrap());
1106        assert_eq!("./greptimedb_data/test/logs".to_string(), logging_opts.dir);
1107    }
1108
1109    #[test]
1110    fn test_load_log_options_from_cli() {
1111        let cmd = StartCommand {
1112            user_provider: Some("static_user_provider:cmd:test=test".to_string()),
1113            mysql_addr: Some("127.0.0.1:4002".to_string()),
1114            postgres_addr: Some("127.0.0.1:4003".to_string()),
1115            ..Default::default()
1116        };
1117
1118        let opts = cmd
1119            .load_options(&GlobalOptions {
1120                log_dir: Some("./greptimedb_data/test/logs".to_string()),
1121                log_level: Some("debug".to_string()),
1122
1123                #[cfg(feature = "tokio-console")]
1124                tokio_console_addr: None,
1125            })
1126            .unwrap()
1127            .component;
1128
1129        assert_eq!("./greptimedb_data/test/logs", opts.logging.dir);
1130        assert_eq!("debug", opts.logging.level.unwrap());
1131    }
1132
1133    #[test]
1134    fn test_config_precedence_order() {
1135        let mut file = create_named_temp_file();
1136        let toml_str = r#"
1137            [http]
1138            addr = "127.0.0.1:4000"
1139
1140            [logging]
1141            level = "debug"
1142        "#;
1143        write!(file, "{}", toml_str).unwrap();
1144
1145        let env_prefix = "STANDALONE_UT";
1146        temp_env::with_vars(
1147            [
1148                (
1149                    // logging.dir = /other/log/dir
1150                    [
1151                        env_prefix.to_string(),
1152                        "logging".to_uppercase(),
1153                        "dir".to_uppercase(),
1154                    ]
1155                    .join(ENV_VAR_SEP),
1156                    Some("/other/log/dir"),
1157                ),
1158                (
1159                    // logging.level = info
1160                    [
1161                        env_prefix.to_string(),
1162                        "logging".to_uppercase(),
1163                        "level".to_uppercase(),
1164                    ]
1165                    .join(ENV_VAR_SEP),
1166                    Some("info"),
1167                ),
1168                (
1169                    // http.addr = 127.0.0.1:24000
1170                    [
1171                        env_prefix.to_string(),
1172                        "http".to_uppercase(),
1173                        "addr".to_uppercase(),
1174                    ]
1175                    .join(ENV_VAR_SEP),
1176                    Some("127.0.0.1:24000"),
1177                ),
1178            ],
1179            || {
1180                let command = StartCommand {
1181                    config_file: Some(file.path().to_str().unwrap().to_string()),
1182                    http_addr: Some("127.0.0.1:14000".to_string()),
1183                    env_prefix: env_prefix.to_string(),
1184                    ..Default::default()
1185                };
1186
1187                let opts = command.load_options(&Default::default()).unwrap().component;
1188
1189                // Should be read from env, env > default values.
1190                assert_eq!(opts.logging.dir, "/other/log/dir");
1191
1192                // Should be read from config file, config file > env > default values.
1193                assert_eq!(opts.logging.level.as_ref().unwrap(), "debug");
1194
1195                // Should be read from cli, cli > config file > env > default values.
1196                let fe_opts = opts.frontend_options();
1197                assert_eq!(fe_opts.http.addr, "127.0.0.1:14000");
1198                assert_eq!(ReadableSize::mb(64), fe_opts.http.body_limit);
1199
1200                // Should be default value.
1201                assert_eq!(fe_opts.grpc.bind_addr, GrpcOptions::default().bind_addr);
1202            },
1203        );
1204    }
1205
1206    #[test]
1207    fn test_parse_grpc_bind_addr_aliases() {
1208        let command =
1209            StartCommand::try_parse_from(["standalone", "--grpc-bind-addr", "127.0.0.1:14001"])
1210                .unwrap();
1211        assert_eq!(command.grpc_bind_addr.as_deref(), Some("127.0.0.1:14001"));
1212
1213        let command =
1214            StartCommand::try_parse_from(["standalone", "--rpc-bind-addr", "127.0.0.1:24001"])
1215                .unwrap();
1216        assert_eq!(command.grpc_bind_addr.as_deref(), Some("127.0.0.1:24001"));
1217
1218        let command =
1219            StartCommand::try_parse_from(["standalone", "--rpc-addr", "127.0.0.1:34001"]).unwrap();
1220        assert_eq!(command.grpc_bind_addr.as_deref(), Some("127.0.0.1:34001"));
1221    }
1222
1223    #[test]
1224    fn test_help_uses_grpc_option_names() {
1225        let mut cmd = StartCommand::command();
1226        let mut help = Vec::new();
1227        cmd.write_long_help(&mut help).unwrap();
1228        let help = String::from_utf8(help).unwrap();
1229
1230        assert!(help.contains("--grpc-bind-addr"));
1231        assert!(!help.contains("--rpc-bind-addr"));
1232        assert!(!help.contains("--rpc-addr"));
1233    }
1234
1235    #[test]
1236    fn test_load_default_standalone_options() {
1237        let options =
1238            StandaloneOptions::load_layered_options(None, "GREPTIMEDB_STANDALONE").unwrap();
1239        let default_options = StandaloneOptions::default();
1240        assert_eq!(options.enable_telemetry, default_options.enable_telemetry);
1241        assert_eq!(options.http, default_options.http);
1242        assert_eq!(options.grpc, default_options.grpc);
1243        assert_eq!(options.mysql, default_options.mysql);
1244        assert_eq!(options.postgres, default_options.postgres);
1245        assert_eq!(options.opentsdb, default_options.opentsdb);
1246        assert_eq!(options.influxdb, default_options.influxdb);
1247        assert_eq!(options.prom_store, default_options.prom_store);
1248        assert_eq!(options.wal, default_options.wal);
1249        assert_eq!(options.metadata_store, default_options.metadata_store);
1250        assert_eq!(options.procedure, default_options.procedure);
1251        assert_eq!(options.logging, default_options.logging);
1252        assert_eq!(options.region_engine, default_options.region_engine);
1253    }
1254
1255    #[test]
1256    fn test_cache_config() {
1257        let toml_str = r#"
1258            [storage]
1259            data_home = "test_data_home"
1260            type = "S3"
1261            [storage.cache_config]
1262            enable_read_cache = true
1263        "#;
1264        let mut opts: StandaloneOptions = toml::from_str(toml_str).unwrap();
1265        opts.sanitize();
1266        assert!(opts.storage.store.cache_config().unwrap().enable_read_cache);
1267        assert_eq!(
1268            opts.storage.store.cache_config().unwrap().cache_path,
1269            "test_data_home"
1270        );
1271    }
1272}