Skip to main content

cmd/
standalone.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt::Debug;
16use std::net::SocketAddr;
17use std::path::Path;
18use std::sync::Arc;
19use std::{fs, path};
20
21use async_trait::async_trait;
22use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
23use catalog::CatalogManagerRef;
24use catalog::information_schema::InformationExtensionRef;
25use catalog::kvbackend::{CatalogManagerConfiguratorRef, KvBackendCatalogManagerBuilder};
26use catalog::process_manager::ProcessManager;
27use clap::Parser;
28use common_base::Plugins;
29use common_catalog::consts::{MIN_USER_FLOW_ID, MIN_USER_TABLE_ID};
30use common_config::{Configurable, metadata_store_dir};
31use common_error::ext::BoxedError;
32use common_meta::DatanodeId;
33use common_meta::cache::{LayeredCacheRegistryBuilder, LayeredCacheRegistryRef};
34use common_meta::ddl::flow_meta::FlowMetadataAllocator;
35use common_meta::ddl::table_meta::TableMetadataAllocator;
36use common_meta::ddl::{DdlContext, NoopRegionFailureDetectorControl};
37use common_meta::ddl_manager::{DdlManager, DdlManagerConfiguratorRef, DdlManagerRef};
38use common_meta::key::flow::FlowMetadataManager;
39use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
40use common_meta::kv_backend::KvBackendRef;
41use common_meta::node_manager::{FlownodeRef, NodeManagerRef};
42use common_meta::procedure_executor::{LocalProcedureExecutor, ProcedureExecutorRef};
43use common_meta::region_keeper::MemoryRegionKeeper;
44use common_meta::region_registry::LeaderRegionRegistry;
45use common_meta::sequence::{Sequence, SequenceBuilder};
46use common_meta::wal_provider::{WalProviderRef, build_wal_provider};
47use common_options::plugin_options::StandaloneFlag;
48use common_procedure::ProcedureManagerRef;
49use common_query::prelude::set_default_prefix;
50use common_telemetry::info;
51use common_telemetry::logging::{DEFAULT_LOGGING_DIR, TracingOptions};
52use common_time::timezone::set_default_timezone;
53use common_version::{short_version, verbose_version};
54use datanode::config::DatanodeOptions;
55use datanode::datanode::{Datanode, DatanodeBuilder};
56use datanode::region_server::RegionServer;
57use flow::{
58    FlowDualEngineRef, FlownodeBuilder, FlownodeInstance, FlownodeOptions, FrontendClient,
59    FrontendInvoker, GrpcQueryHandlerWithBoxedError,
60};
61use frontend::frontend::Frontend;
62use frontend::instance::StandaloneDatanodeManager;
63use frontend::instance::builder::FrontendBuilder;
64use frontend::server::Services;
65use meta_srv::metasrv::{FLOW_ID_SEQ, TABLE_ID_SEQ};
66use plugins::PluginOptions;
67use plugins::frontend::context::{
68    CatalogManagerConfigureContext, StandaloneCatalogManagerConfigureContext,
69};
70use plugins::standalone::context::DdlManagerConfigureContext;
71use servers::tls::{TlsMode, TlsOption, merge_tls_option};
72use snafu::ResultExt;
73use standalone::options::StandaloneOptions;
74use standalone::{StandaloneInformationExtension, StandaloneRepartitionProcedureFactory};
75use tracing_appender::non_blocking::WorkerGuard;
76
77use crate::error::{OtherSnafu, Result, StartFlownodeSnafu};
78use crate::options::{GlobalOptions, GreptimeOptions};
79use crate::{App, create_resource_limit_metrics, error, log_versions, maybe_activate_heap_profile};
80
81pub const APP_NAME: &str = "greptime-standalone";
82
83#[derive(Parser)]
84pub struct Command {
85    #[clap(subcommand)]
86    subcmd: SubCommand,
87}
88
89impl Command {
90    pub async fn build(&self, opts: GreptimeOptions<StandaloneOptions>) -> Result<Instance> {
91        self.subcmd.build(opts).await
92    }
93
94    pub fn load_options(
95        &self,
96        global_options: &GlobalOptions,
97    ) -> Result<GreptimeOptions<StandaloneOptions>> {
98        self.subcmd.load_options(global_options)
99    }
100}
101
102#[derive(Parser)]
103enum SubCommand {
104    Start(StartCommand),
105}
106
107impl SubCommand {
108    async fn build(&self, opts: GreptimeOptions<StandaloneOptions>) -> Result<Instance> {
109        match self {
110            SubCommand::Start(cmd) => cmd.build(opts).await,
111        }
112    }
113
114    fn load_options(
115        &self,
116        global_options: &GlobalOptions,
117    ) -> Result<GreptimeOptions<StandaloneOptions>> {
118        match self {
119            SubCommand::Start(cmd) => cmd.load_options(global_options),
120        }
121    }
122}
123
124pub struct Instance {
125    datanode: Datanode,
126    frontend: Frontend,
127    flownode: FlownodeInstance,
128    procedure_manager: ProcedureManagerRef,
129    leader_services_controller: Box<dyn StandaloneLeaderServicesController>,
130    leader_services_context: LeaderServicesContext,
131    // Keep the logging guard to prevent the worker from being dropped.
132    _guard: Vec<WorkerGuard>,
133}
134
135impl Instance {
136    /// Find the socket addr of a server by its `name`.
137    pub fn server_addr(&self, name: &str) -> Option<SocketAddr> {
138        self.frontend.server_handlers().addr(name)
139    }
140
141    /// Get the mutable Frontend component of this Standalone instance for externally modification
142    /// by others (might not be in this code base, so don't delete this function).
143    pub fn mut_frontend(&mut self) -> &mut Frontend {
144        &mut self.frontend
145    }
146
147    /// Get the Datanode component of this Standalone instance for externally usage
148    /// by others (might not be in this code base, so don't delete this function).
149    pub fn datanode(&self) -> &Datanode {
150        &self.datanode
151    }
152}
153
154#[async_trait]
155impl App for Instance {
156    fn name(&self) -> &str {
157        APP_NAME
158    }
159
160    async fn start(&mut self) -> Result<()> {
161        self.datanode.start_telemetry();
162
163        self.leader_services_controller
164            .start(self.leader_services_context.clone())
165            .await?;
166
167        plugins::start_frontend_plugins(self.frontend.instance.plugins().clone())
168            .await
169            .context(error::StartFrontendSnafu)?;
170
171        self.frontend
172            .start()
173            .await
174            .context(error::StartFrontendSnafu)?;
175
176        self.flownode.start().await.context(StartFlownodeSnafu)?;
177
178        Ok(())
179    }
180
181    async fn stop(&mut self) -> Result<()> {
182        self.frontend
183            .shutdown()
184            .await
185            .context(error::ShutdownFrontendSnafu)?;
186
187        self.leader_services_controller
188            .stop(
189                self.procedure_manager.clone(),
190                self.datanode.region_server(),
191            )
192            .await?;
193
194        self.datanode
195            .shutdown()
196            .await
197            .context(error::ShutdownDatanodeSnafu)?;
198
199        self.flownode
200            .shutdown()
201            .await
202            .context(error::ShutdownFlownodeSnafu)?;
203
204        info!("Datanode instance stopped.");
205
206        Ok(())
207    }
208}
209
210#[derive(Debug, Default, Parser)]
211pub struct StartCommand {
212    #[clap(long)]
213    http_addr: Option<String>,
214    #[clap(long = "grpc-bind-addr", alias = "rpc-bind-addr", alias = "rpc-addr")]
215    grpc_bind_addr: Option<String>,
216    #[clap(long)]
217    mysql_addr: Option<String>,
218    #[clap(long)]
219    postgres_addr: Option<String>,
220    #[clap(short, long)]
221    influxdb_enable: bool,
222    #[clap(short, long)]
223    pub config_file: Option<String>,
224    #[clap(long)]
225    tls_mode: Option<TlsMode>,
226    #[clap(long)]
227    tls_cert_path: Option<String>,
228    #[clap(long)]
229    tls_key_path: Option<String>,
230    #[clap(long)]
231    tls_watch: bool,
232    #[clap(long)]
233    user_provider: Option<String>,
234    #[clap(long, default_value = "GREPTIMEDB_STANDALONE")]
235    pub env_prefix: String,
236    /// The working home directory of this standalone instance.
237    #[clap(long)]
238    data_home: Option<String>,
239}
240
241impl StartCommand {
242    /// Load the GreptimeDB options from various sources (command line, config file or env).
243    pub fn load_options(
244        &self,
245        global_options: &GlobalOptions,
246    ) -> Result<GreptimeOptions<StandaloneOptions>> {
247        let mut opts = GreptimeOptions::<StandaloneOptions>::load_layered_options(
248            self.config_file.as_deref(),
249            self.env_prefix.as_ref(),
250        )
251        .context(error::LoadLayeredConfigSnafu)?;
252
253        self.merge_with_cli_options(global_options, &mut opts.component)?;
254        opts.component.sanitize();
255
256        Ok(opts)
257    }
258
259    // The precedence order is: cli > config file > environment variables > default values.
260    pub fn merge_with_cli_options(
261        &self,
262        global_options: &GlobalOptions,
263        opts: &mut StandaloneOptions,
264    ) -> Result<()> {
265        if let Some(dir) = &global_options.log_dir {
266            opts.logging.dir.clone_from(dir);
267        }
268
269        if global_options.log_level.is_some() {
270            opts.logging.level.clone_from(&global_options.log_level);
271        }
272
273        opts.tracing = TracingOptions {
274            #[cfg(feature = "tokio-console")]
275            tokio_console_addr: global_options.tokio_console_addr.clone(),
276        };
277
278        let tls_opts = TlsOption::new(
279            self.tls_mode,
280            self.tls_cert_path.clone(),
281            self.tls_key_path.clone(),
282            self.tls_watch,
283        );
284
285        if let Some(addr) = &self.http_addr {
286            opts.http.addr.clone_from(addr);
287        }
288
289        if let Some(data_home) = &self.data_home {
290            opts.storage.data_home.clone_from(data_home);
291        }
292
293        // If the logging dir is not set, use the default logs dir in the data home.
294        if opts.logging.dir.is_empty() {
295            opts.logging.dir = Path::new(&opts.storage.data_home)
296                .join(DEFAULT_LOGGING_DIR)
297                .to_string_lossy()
298                .to_string();
299        }
300
301        if let Some(addr) = &self.grpc_bind_addr {
302            // frontend grpc addr conflict with datanode default grpc addr
303            let datanode_grpc_addr = DatanodeOptions::default().grpc.bind_addr;
304            if addr.eq(&datanode_grpc_addr) {
305                return error::IllegalConfigSnafu {
306                    msg: format!(
307                        "gRPC listen address conflicts with datanode reserved gRPC addr: {datanode_grpc_addr}",
308                    ),
309                }.fail();
310            }
311            opts.grpc.bind_addr.clone_from(addr);
312            opts.grpc.tls = merge_tls_option(&opts.grpc.tls, tls_opts.clone());
313        }
314
315        if let Some(addr) = &self.mysql_addr {
316            opts.mysql.enable = true;
317            opts.mysql.addr.clone_from(addr);
318            opts.mysql.tls = merge_tls_option(&opts.mysql.tls, tls_opts.clone());
319        }
320
321        if let Some(addr) = &self.postgres_addr {
322            opts.postgres.enable = true;
323            opts.postgres.addr.clone_from(addr);
324            opts.postgres.tls = merge_tls_option(&opts.postgres.tls, tls_opts.clone());
325        }
326
327        if self.influxdb_enable {
328            opts.influxdb.enable = self.influxdb_enable;
329        }
330
331        if let Some(user_provider) = &self.user_provider {
332            opts.user_provider = Some(user_provider.clone());
333        }
334
335        Ok(())
336    }
337
338    #[allow(unreachable_code)]
339    #[allow(unused_variables)]
340    #[allow(clippy::diverging_sub_expression)]
341    /// Build GreptimeDB instance with the loaded options.
342    pub async fn build(&self, opts: GreptimeOptions<StandaloneOptions>) -> Result<Instance> {
343        common_runtime::init_global_runtimes(&opts.runtime);
344
345        let guard = common_telemetry::init_global_logging(
346            APP_NAME,
347            &opts.component.logging,
348            &opts.component.tracing,
349            None,
350            Some(&opts.component.slow_query),
351        );
352
353        log_versions(verbose_version(), short_version(), APP_NAME);
354        maybe_activate_heap_profile(&opts.component.memory);
355        create_resource_limit_metrics(APP_NAME);
356
357        info!("Standalone start command: {:#?}", self);
358        info!("Standalone options: {opts:#?}");
359
360        let (mut instance, _) =
361            Self::build_with(opts.component, opts.plugins, InstanceCreator::default()).await?;
362        instance._guard.extend(guard);
363        Ok(instance)
364    }
365
366    pub async fn build_with(
367        mut opts: StandaloneOptions,
368        plugin_opts: Vec<PluginOptions>,
369        creator: InstanceCreator,
370    ) -> Result<(Instance, InstanceCreatorResult)> {
371        let mut plugins = Plugins::new();
372        plugins.insert(StandaloneFlag);
373        set_default_prefix(opts.default_column_prefix.as_deref())
374            .map_err(BoxedError::new)
375            .context(error::BuildCliSnafu)?;
376
377        opts.grpc.detect_server_addr();
378        let fe_opts = opts.frontend_options();
379        let dn_opts = opts.datanode_options();
380        let node_id = dn_opts.node_id;
381        let init_regions_parallelism = dn_opts.init_regions_parallelism;
382
383        plugins::setup_frontend_plugins(&mut plugins, &plugin_opts, &fe_opts)
384            .await
385            .context(error::StartFrontendSnafu)?;
386
387        plugins::setup_datanode_plugins(&mut plugins, &plugin_opts, &dn_opts)
388            .await
389            .context(error::StartDatanodeSnafu)?;
390
391        set_default_timezone(fe_opts.default_timezone.as_deref())
392            .context(error::InitTimezoneSnafu)?;
393
394        let data_home = &dn_opts.storage.data_home;
395        // Ensure the data_home directory exists.
396        fs::create_dir_all(path::Path::new(data_home))
397            .context(error::CreateDirSnafu { dir: data_home })?;
398
399        let metadata_dir = metadata_store_dir(data_home);
400        let kv_backend = creator
401            .metadata_kv_backend_creator
402            .create(metadata_dir, &opts)
403            .await?;
404        let procedure_manager =
405            standalone::build_procedure_manager(kv_backend.clone(), opts.procedure);
406
407        plugins::setup_standalone_plugins(&mut plugins, &plugin_opts, &opts, kv_backend.clone())
408            .await
409            .context(error::SetupStandalonePluginsSnafu)?;
410
411        // Builds cache registry
412        let layered_cache_builder = LayeredCacheRegistryBuilder::default();
413        let fundamental_cache_registry = build_fundamental_cache_registry(kv_backend.clone());
414        let mut layered_cache_builder = with_default_composite_cache_registry(
415            layered_cache_builder.add_cache_registry(fundamental_cache_registry),
416        )
417        .context(error::BuildCacheRegistrySnafu)?;
418
419        if let Some(plugin_cache_builder) = plugins::standalone::configure_cache_registry(&plugins)
420        {
421            layered_cache_builder =
422                layered_cache_builder.add_cache_registry(plugin_cache_builder.build());
423        }
424
425        let layered_cache_registry = Arc::new(layered_cache_builder.build());
426
427        let mut builder = DatanodeBuilder::new(dn_opts, plugins.clone(), kv_backend.clone());
428        builder.with_cache_registry(layered_cache_registry.clone());
429        if let Some(writable) = creator.open_regions_writable_override {
430            builder.with_open_regions_writable_override(writable);
431        }
432        let datanode = builder.build().await.context(error::StartDatanodeSnafu)?;
433
434        let information_extension = Arc::new(StandaloneInformationExtension::new(
435            datanode.region_server(),
436            procedure_manager.clone(),
437        ));
438
439        plugins.insert::<InformationExtensionRef>(information_extension.clone());
440
441        let process_manager = Arc::new(ProcessManager::new(opts.grpc.server_addr.clone(), None));
442
443        // for standalone not use grpc, but get a handler to frontend grpc client without
444        // actually make a connection
445        let (frontend_client, frontend_instance_handler) =
446            FrontendClient::from_empty_grpc_handler(opts.query.clone());
447        let frontend_client = Arc::new(frontend_client);
448
449        let builder = KvBackendCatalogManagerBuilder::new(
450            information_extension.clone(),
451            kv_backend.clone(),
452            layered_cache_registry.clone(),
453        )
454        .with_procedure_manager(procedure_manager.clone())
455        .with_process_manager(process_manager.clone());
456        let builder = if let Some(configurator) =
457            plugins.get::<CatalogManagerConfiguratorRef<CatalogManagerConfigureContext>>()
458        {
459            let ctx = StandaloneCatalogManagerConfigureContext {
460                fe_client: frontend_client.clone(),
461            };
462            let ctx = CatalogManagerConfigureContext::Standalone(ctx);
463            configurator
464                .configure(builder, ctx)
465                .await
466                .context(OtherSnafu)?
467        } else {
468            builder
469        };
470        let catalog_manager = builder.build();
471
472        let table_metadata_manager =
473            Self::create_table_metadata_manager(kv_backend.clone()).await?;
474
475        let flow_metadata_manager = Arc::new(FlowMetadataManager::new(kv_backend.clone()));
476        let flownode_options = FlownodeOptions {
477            flow: opts.flow.clone(),
478            ..Default::default()
479        };
480
481        let flow_builder = FlownodeBuilder::new(
482            flownode_options,
483            plugins.clone(),
484            table_metadata_manager.clone(),
485            catalog_manager.clone(),
486            flow_metadata_manager.clone(),
487            frontend_client.clone(),
488        );
489        let flownode = flow_builder
490            .build()
491            .await
492            .map_err(BoxedError::new)
493            .context(error::OtherSnafu)?;
494        let flow_engine = flownode.flow_engine();
495
496        // set the ref to query for the local flow state
497        {
498            information_extension
499                .set_flow_engine(flow_engine.clone())
500                .await;
501        }
502
503        let node_manager = creator
504            .node_manager_creator
505            .create(&kv_backend, datanode.region_server(), flow_engine.clone())
506            .await?;
507
508        let table_id_allocator = creator.table_id_allocator_creator.create(&kv_backend);
509        let flow_id_sequence = Arc::new(
510            SequenceBuilder::new(FLOW_ID_SEQ, kv_backend.clone())
511                .initial(MIN_USER_FLOW_ID as u64)
512                .step(10)
513                .build(),
514        );
515        let kafka_options = opts
516            .wal
517            .clone()
518            .try_into()
519            .context(error::InvalidWalProviderSnafu)?;
520        let wal_provider = build_wal_provider(&kafka_options, kv_backend.clone())
521            .await
522            .context(error::BuildWalProviderSnafu)?;
523        let wal_provider = Arc::new(wal_provider);
524        let table_metadata_allocator = Arc::new(TableMetadataAllocator::new(
525            table_id_allocator.clone(),
526            wal_provider.clone(),
527        ));
528        let flow_metadata_allocator = Arc::new(FlowMetadataAllocator::with_noop_peer_allocator(
529            flow_id_sequence,
530        ));
531
532        let ddl_context = DdlContext {
533            node_manager: node_manager.clone(),
534            cache_invalidator: layered_cache_registry.clone(),
535            memory_region_keeper: Arc::new(MemoryRegionKeeper::default()),
536            leader_region_registry: Arc::new(LeaderRegionRegistry::default()),
537            table_metadata_manager: table_metadata_manager.clone(),
538            table_metadata_allocator: table_metadata_allocator.clone(),
539            flow_metadata_manager: flow_metadata_manager.clone(),
540            flow_metadata_allocator: flow_metadata_allocator.clone(),
541            region_failure_detector_controller: Arc::new(NoopRegionFailureDetectorControl),
542        };
543
544        let ddl_manager = DdlManager::try_new(
545            ddl_context,
546            procedure_manager.clone(),
547            Arc::new(StandaloneRepartitionProcedureFactory),
548            true,
549        )
550        .context(error::InitDdlManagerSnafu)?;
551
552        let ddl_manager = if let Some(configurator) =
553            plugins.get::<DdlManagerConfiguratorRef<DdlManagerConfigureContext>>()
554        {
555            let ctx = DdlManagerConfigureContext {
556                kv_backend: kv_backend.clone(),
557                fe_client: frontend_client.clone(),
558                catalog_manager: catalog_manager.clone(),
559            };
560            configurator
561                .configure(ddl_manager, ctx)
562                .await
563                .context(OtherSnafu)?
564        } else {
565            ddl_manager
566        };
567
568        let procedure_executor = creator
569            .procedure_executor_creator
570            .create(Arc::new(ddl_manager), procedure_manager.clone())
571            .await?;
572
573        let fe_instance = FrontendBuilder::new(
574            fe_opts.clone(),
575            kv_backend.clone(),
576            layered_cache_registry.clone(),
577            catalog_manager.clone(),
578            node_manager.clone(),
579            procedure_executor.clone(),
580            process_manager,
581        )
582        .with_plugin(plugins.clone())
583        .try_build()
584        .await
585        .context(error::StartFrontendSnafu)?;
586        let fe_instance = Arc::new(fe_instance);
587
588        // set the frontend client for flownode
589        let grpc_handler = fe_instance.clone() as Arc<dyn GrpcQueryHandlerWithBoxedError>;
590        let weak_grpc_handler = Arc::downgrade(&grpc_handler);
591        frontend_instance_handler
592            .set_handler(weak_grpc_handler)
593            .await;
594
595        // set the frontend invoker for flownode
596        let flow_streaming_engine = flow_engine.streaming_engine();
597        // flow server need to be able to use frontend to write insert requests back
598        let invoker = FrontendInvoker::build_from(
599            flow_streaming_engine.clone(),
600            catalog_manager.clone(),
601            kv_backend.clone(),
602            layered_cache_registry.clone(),
603            procedure_executor,
604            node_manager.clone(),
605            fe_instance.frontend_peer_addr().to_string(),
606        )
607        .await
608        .context(StartFlownodeSnafu)?;
609        flow_streaming_engine.set_frontend_invoker(invoker).await;
610
611        let servers = Services::new(opts, fe_instance.clone(), plugins.clone())
612            .build()
613            .context(error::StartFrontendSnafu)?;
614
615        let frontend = Frontend {
616            instance: fe_instance,
617            servers,
618            heartbeat_task: None,
619        };
620        let leader_services_context = LeaderServicesContext {
621            procedure_manager: procedure_manager.clone(),
622            wal_provider: wal_provider.clone(),
623            region_server: datanode.region_server(),
624            kv_backend: kv_backend.clone(),
625            cache_registry: layered_cache_registry,
626            catalog_manager,
627            flow_engine,
628            frontend_client,
629            node_id,
630            init_regions_parallelism,
631            plugin_options: plugin_opts,
632        };
633
634        let instance = Instance {
635            datanode,
636            frontend,
637            flownode,
638            procedure_manager,
639            leader_services_controller: creator.leader_services_controller,
640            leader_services_context,
641            _guard: vec![],
642        };
643        let result = InstanceCreatorResult {
644            kv_backend,
645            node_manager,
646            table_id_allocator,
647        };
648        Ok((instance, result))
649    }
650
651    pub async fn create_table_metadata_manager(
652        kv_backend: KvBackendRef,
653    ) -> Result<TableMetadataManagerRef> {
654        let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend));
655
656        table_metadata_manager
657            .init()
658            .await
659            .context(error::InitMetadataSnafu)?;
660
661        Ok(table_metadata_manager)
662    }
663}
664
665#[async_trait]
666pub trait NodeManagerCreator: Send + Sync {
667    async fn create(
668        &self,
669        kv_backend: &KvBackendRef,
670        region_server: RegionServer,
671        flow_server: FlownodeRef,
672    ) -> Result<NodeManagerRef>;
673}
674
675pub struct DefaultNodeManagerCreator;
676
677#[async_trait]
678impl NodeManagerCreator for DefaultNodeManagerCreator {
679    async fn create(
680        &self,
681        _: &KvBackendRef,
682        region_server: RegionServer,
683        flow_server: FlownodeRef,
684    ) -> Result<NodeManagerRef> {
685        Ok(Arc::new(StandaloneDatanodeManager {
686            region_server,
687            flow_server,
688        }))
689    }
690}
691
692/// Customizes how standalone opens its metadata KV backend.
693///
694/// The default implementation preserves the built-in raft-engine path. Other
695/// callers can provide a custom implementation without changing standalone
696/// configuration types.
697#[async_trait]
698pub trait MetadataKvBackendCreator: Send + Sync {
699    async fn create(&self, metadata_dir: String, opts: &StandaloneOptions) -> Result<KvBackendRef>;
700}
701
702pub struct DefaultMetadataKvBackendCreator;
703
704#[async_trait]
705impl MetadataKvBackendCreator for DefaultMetadataKvBackendCreator {
706    async fn create(&self, metadata_dir: String, opts: &StandaloneOptions) -> Result<KvBackendRef> {
707        standalone::build_metadata_kvbackend(metadata_dir, opts.metadata_store)
708            .context(error::BuildMetadataKvbackendSnafu)
709    }
710}
711
712pub trait TableIdAllocatorCreator: Send + Sync {
713    fn create(&self, kv_backend: &KvBackendRef) -> Arc<Sequence>;
714}
715
716struct DefaultTableIdAllocatorCreator;
717
718impl TableIdAllocatorCreator for DefaultTableIdAllocatorCreator {
719    fn create(&self, kv_backend: &KvBackendRef) -> Arc<Sequence> {
720        Arc::new(
721            SequenceBuilder::new(TABLE_ID_SEQ, kv_backend.clone())
722                .initial(MIN_USER_TABLE_ID as u64)
723                .step(10)
724                .build(),
725        )
726    }
727}
728
729#[async_trait]
730pub trait ProcedureExecutorCreator: Send + Sync {
731    async fn create(
732        &self,
733        ddl_manager: DdlManagerRef,
734        procedure_manager: ProcedureManagerRef,
735    ) -> Result<ProcedureExecutorRef>;
736}
737
738pub struct DefaultProcedureExecutorCreator;
739
740#[async_trait]
741impl ProcedureExecutorCreator for DefaultProcedureExecutorCreator {
742    async fn create(
743        &self,
744        ddl_manager: DdlManagerRef,
745        procedure_manager: ProcedureManagerRef,
746    ) -> Result<ProcedureExecutorRef> {
747        Ok(Arc::new(LocalProcedureExecutor::new(
748            ddl_manager,
749            procedure_manager,
750        )))
751    }
752}
753
754#[async_trait]
755pub trait StandaloneLeaderServicesController: Send + Sync {
756    /// Starts leader services that manage standalone metadata or WAL state.
757    ///
758    /// The default implementation starts the procedure manager and WAL provider
759    /// during instance startup.
760    async fn start(&self, context: LeaderServicesContext) -> Result<()>;
761
762    /// Stops services started by [`StandaloneLeaderServicesController::start`].
763    async fn stop(
764        &self,
765        procedure_manager: ProcedureManagerRef,
766        region_server: RegionServer,
767    ) -> Result<()>;
768}
769
770#[derive(Clone)]
771/// Additional runtime handles for custom leader-service controllers.
772///
773/// The default standalone startup only needs to start/stop the procedure
774/// manager and WAL provider. Some embedders need to do more work around
775/// leader-service startup, for example reconciling metadata-backed runtime
776/// state before publishing writable leadership. Grouping those handles here
777/// keeps `Instance` small and avoids expanding
778/// [`StandaloneLeaderServicesController::start`] every time a custom lifecycle
779/// needs one more standalone component.
780pub struct LeaderServicesContext {
781    pub procedure_manager: ProcedureManagerRef,
782    pub wal_provider: WalProviderRef,
783    pub region_server: RegionServer,
784    pub kv_backend: KvBackendRef,
785    pub cache_registry: LayeredCacheRegistryRef,
786    pub catalog_manager: CatalogManagerRef,
787    pub flow_engine: FlowDualEngineRef,
788    pub frontend_client: Arc<FrontendClient>,
789    pub node_id: Option<DatanodeId>,
790    pub init_regions_parallelism: usize,
791    pub plugin_options: Vec<PluginOptions>,
792}
793
794pub struct DefaultStandaloneLeaderServicesController;
795
796#[async_trait]
797impl StandaloneLeaderServicesController for DefaultStandaloneLeaderServicesController {
798    async fn start(&self, context: LeaderServicesContext) -> Result<()> {
799        context
800            .procedure_manager
801            .start()
802            .await
803            .context(error::StartProcedureManagerSnafu)?;
804        context
805            .wal_provider
806            .start()
807            .await
808            .context(error::StartWalProviderSnafu)
809    }
810
811    async fn stop(
812        &self,
813        procedure_manager: ProcedureManagerRef,
814        _region_server: RegionServer,
815    ) -> Result<()> {
816        procedure_manager
817            .stop()
818            .await
819            .context(error::StopProcedureManagerSnafu)
820    }
821}
822
823/// `InstanceCreator` is used for grouping various component creators for building the
824/// Standalone instance, suitable for customizing how the instance can be built.
825pub struct InstanceCreator {
826    /// Hook for replacing metadata KV construction while reusing the rest of the
827    /// standalone build flow.
828    metadata_kv_backend_creator: Box<dyn MetadataKvBackendCreator>,
829    node_manager_creator: Box<dyn NodeManagerCreator>,
830    table_id_allocator_creator: Box<dyn TableIdAllocatorCreator>,
831    procedure_executor_creator: Box<dyn ProcedureExecutorCreator>,
832    leader_services_controller: Box<dyn StandaloneLeaderServicesController>,
833    open_regions_writable_override: Option<bool>,
834}
835
836impl InstanceCreator {
837    pub fn new(
838        node_manager_creator: Box<dyn NodeManagerCreator>,
839        table_id_allocator_creator: Box<dyn TableIdAllocatorCreator>,
840        procedure_executor_creator: Box<dyn ProcedureExecutorCreator>,
841    ) -> Self {
842        Self {
843            metadata_kv_backend_creator: Box::new(DefaultMetadataKvBackendCreator),
844            node_manager_creator,
845            table_id_allocator_creator,
846            procedure_executor_creator,
847            leader_services_controller: Box::new(DefaultStandaloneLeaderServicesController),
848            open_regions_writable_override: None,
849        }
850    }
851
852    pub fn with_metadata_kv_backend_creator(
853        mut self,
854        metadata_kv_backend_creator: Box<dyn MetadataKvBackendCreator>,
855    ) -> Self {
856        self.metadata_kv_backend_creator = metadata_kv_backend_creator;
857        self
858    }
859
860    /// Wraps the metadata backend creator while retaining the default creator.
861    ///
862    /// This is useful for callers that need to add runtime behavior around
863    /// metadata access without reimplementing backend selection.
864    pub fn map_metadata_kv_backend_creator<F>(mut self, f: F) -> Self
865    where
866        F: FnOnce(Box<dyn MetadataKvBackendCreator>) -> Box<dyn MetadataKvBackendCreator>,
867    {
868        self.metadata_kv_backend_creator = f(self.metadata_kv_backend_creator);
869        self
870    }
871
872    /// Wraps node-manager creation while preserving the selected standalone node manager.
873    pub fn map_node_manager_creator<F>(mut self, f: F) -> Self
874    where
875        F: FnOnce(Box<dyn NodeManagerCreator>) -> Box<dyn NodeManagerCreator>,
876    {
877        self.node_manager_creator = f(self.node_manager_creator);
878        self
879    }
880
881    /// Wraps procedure-executor creation while preserving the current setup.
882    pub fn map_procedure_executor_creator<F>(mut self, f: F) -> Self
883    where
884        F: FnOnce(Box<dyn ProcedureExecutorCreator>) -> Box<dyn ProcedureExecutorCreator>,
885    {
886        self.procedure_executor_creator = f(self.procedure_executor_creator);
887        self
888    }
889
890    /// Replaces startup/shutdown ownership for procedure manager and WAL provider.
891    pub fn with_leader_services_controller(
892        mut self,
893        leader_services_controller: Box<dyn StandaloneLeaderServicesController>,
894    ) -> Self {
895        self.leader_services_controller = leader_services_controller;
896        self
897    }
898
899    /// Overrides whether regions opened during startup should become writable.
900    ///
901    /// `None` keeps the default startup behavior (regions open writable).
902    ///
903    /// Warning: setting this to `false` in standalone mode will leave reopened regions
904    /// permanently read-only. Standalone has no metasrv heartbeat or region-role
905    /// reconciliation, so there is no path to promote regions to Leader after startup.
906    pub fn with_open_regions_writable_override(mut self, writable: bool) -> Self {
907        self.open_regions_writable_override = Some(writable);
908        self
909    }
910}
911
912impl Default for InstanceCreator {
913    fn default() -> Self {
914        Self {
915            metadata_kv_backend_creator: Box::new(DefaultMetadataKvBackendCreator),
916            node_manager_creator: Box::new(DefaultNodeManagerCreator),
917            table_id_allocator_creator: Box::new(DefaultTableIdAllocatorCreator),
918            procedure_executor_creator: Box::new(DefaultProcedureExecutorCreator),
919            leader_services_controller: Box::new(DefaultStandaloneLeaderServicesController),
920            open_regions_writable_override: None,
921        }
922    }
923}
924
925/// `InstanceCreatorResult` is expected to be used paired with [InstanceCreator].
926/// It stores the created and other important components for further reusing.
927pub struct InstanceCreatorResult {
928    pub kv_backend: KvBackendRef,
929    pub node_manager: NodeManagerRef,
930    pub table_id_allocator: Arc<Sequence>,
931}
932
933#[cfg(test)]
934mod tests {
935    use std::default::Default;
936    use std::io::Write;
937    use std::time::Duration;
938
939    use auth::{Identity, Password, UserProviderRef};
940    use clap::{CommandFactory, Parser};
941    use common_base::readable_size::ReadableSize;
942    use common_config::ENV_VAR_SEP;
943    use common_options::plugin_options::StandaloneFlag;
944    use common_test_util::temp_dir::create_named_temp_file;
945    use common_wal::config::DatanodeWalConfig;
946    use frontend::frontend::FrontendOptions;
947    use object_store::config::{FileConfig, GcsConfig};
948    use servers::grpc::GrpcOptions;
949
950    use super::*;
951    use crate::options::GlobalOptions;
952
953    #[tokio::test]
954    async fn test_try_from_start_command_to_anymap() {
955        let fe_opts = FrontendOptions {
956            user_provider: Some("static_user_provider:cmd:test=test".to_string()),
957            ..Default::default()
958        };
959
960        let mut plugins = Plugins::new();
961        plugins.insert(StandaloneFlag);
962        plugins::setup_frontend_plugins(&mut plugins, &[], &fe_opts)
963            .await
964            .unwrap();
965
966        let provider = plugins.get::<UserProviderRef>().unwrap();
967        let result = provider
968            .authenticate(
969                Identity::UserId("test", None),
970                Password::PlainText("test".to_string().into()),
971            )
972            .await;
973        let _ = result.unwrap();
974    }
975
976    #[test]
977    fn test_toml() {
978        let opts = StandaloneOptions::default();
979        let toml_string = toml::to_string(&opts).unwrap();
980        let _parsed: StandaloneOptions = toml::from_str(&toml_string).unwrap();
981    }
982
983    #[test]
984    fn test_read_from_config_file() {
985        let mut file = create_named_temp_file();
986        let toml_str = r#"
987            enable_memory_catalog = true
988
989            [wal]
990            provider = "raft_engine"
991            dir = "./greptimedb_data/test/wal"
992            file_size = "1GB"
993            purge_threshold = "50GB"
994            purge_interval = "10m"
995            read_batch_size = 128
996            sync_write = false
997
998            [storage]
999            data_home = "./greptimedb_data/"
1000            type = "File"
1001
1002            [[storage.providers]]
1003            type = "Gcs"
1004            bucket = "foo"
1005            endpoint = "bar"
1006
1007            [[storage.providers]]
1008            type = "S3"
1009            access_key_id = "access_key_id"
1010            secret_access_key = "secret_access_key"
1011
1012            [storage.compaction]
1013            max_inflight_tasks = 3
1014            max_files_in_level0 = 7
1015            max_purge_tasks = 32
1016
1017            [storage.manifest]
1018            checkpoint_margin = 9
1019            gc_duration = '7s'
1020
1021            [http]
1022            addr = "127.0.0.1:4000"
1023            timeout = "33s"
1024            body_limit = "128MB"
1025
1026            [opentsdb]
1027            enable = true
1028
1029            [logging]
1030            level = "debug"
1031            dir = "./greptimedb_data/test/logs"
1032        "#;
1033        write!(file, "{}", toml_str).unwrap();
1034        let cmd = StartCommand {
1035            config_file: Some(file.path().to_str().unwrap().to_string()),
1036            user_provider: Some("static_user_provider:cmd:test=test".to_string()),
1037            ..Default::default()
1038        };
1039
1040        let options = cmd
1041            .load_options(&GlobalOptions::default())
1042            .unwrap()
1043            .component;
1044        let fe_opts = options.frontend_options();
1045        let dn_opts = options.datanode_options();
1046        let logging_opts = options.logging;
1047        assert_eq!("127.0.0.1:4000".to_string(), fe_opts.http.addr);
1048        assert_eq!(Duration::from_secs(33), fe_opts.http.timeout);
1049        assert_eq!(ReadableSize::mb(128), fe_opts.http.body_limit);
1050        assert_eq!("127.0.0.1:4001".to_string(), fe_opts.grpc.bind_addr);
1051        assert!(fe_opts.mysql.enable);
1052        assert_eq!("127.0.0.1:4002", fe_opts.mysql.addr);
1053        assert_eq!(2, fe_opts.mysql.runtime_size);
1054        assert_eq!(None, fe_opts.mysql.reject_no_database);
1055        assert!(fe_opts.influxdb.enable);
1056        assert!(fe_opts.opentsdb.enable);
1057
1058        let DatanodeWalConfig::RaftEngine(raft_engine_config) = dn_opts.wal else {
1059            unreachable!()
1060        };
1061        assert_eq!(
1062            "./greptimedb_data/test/wal",
1063            raft_engine_config.dir.unwrap()
1064        );
1065
1066        assert!(matches!(
1067            &dn_opts.storage.store,
1068            object_store::config::ObjectStoreConfig::File(FileConfig { .. })
1069        ));
1070        assert_eq!(dn_opts.storage.providers.len(), 2);
1071        assert!(matches!(
1072            dn_opts.storage.providers[0],
1073            object_store::config::ObjectStoreConfig::Gcs(GcsConfig { .. })
1074        ));
1075        match &dn_opts.storage.providers[1] {
1076            object_store::config::ObjectStoreConfig::S3(s3_config) => {
1077                assert_eq!(
1078                    "SecretBox<alloc::string::String>([REDACTED])".to_string(),
1079                    format!("{:?}", s3_config.connection.access_key_id)
1080                );
1081            }
1082            _ => {
1083                unreachable!()
1084            }
1085        }
1086
1087        assert_eq!("debug", logging_opts.level.as_ref().unwrap());
1088        assert_eq!("./greptimedb_data/test/logs".to_string(), logging_opts.dir);
1089    }
1090
1091    #[test]
1092    fn test_load_log_options_from_cli() {
1093        let cmd = StartCommand {
1094            user_provider: Some("static_user_provider:cmd:test=test".to_string()),
1095            mysql_addr: Some("127.0.0.1:4002".to_string()),
1096            postgres_addr: Some("127.0.0.1:4003".to_string()),
1097            ..Default::default()
1098        };
1099
1100        let opts = cmd
1101            .load_options(&GlobalOptions {
1102                log_dir: Some("./greptimedb_data/test/logs".to_string()),
1103                log_level: Some("debug".to_string()),
1104
1105                #[cfg(feature = "tokio-console")]
1106                tokio_console_addr: None,
1107            })
1108            .unwrap()
1109            .component;
1110
1111        assert_eq!("./greptimedb_data/test/logs", opts.logging.dir);
1112        assert_eq!("debug", opts.logging.level.unwrap());
1113    }
1114
1115    #[test]
1116    fn test_config_precedence_order() {
1117        let mut file = create_named_temp_file();
1118        let toml_str = r#"
1119            [http]
1120            addr = "127.0.0.1:4000"
1121
1122            [logging]
1123            level = "debug"
1124        "#;
1125        write!(file, "{}", toml_str).unwrap();
1126
1127        let env_prefix = "STANDALONE_UT";
1128        temp_env::with_vars(
1129            [
1130                (
1131                    // logging.dir = /other/log/dir
1132                    [
1133                        env_prefix.to_string(),
1134                        "logging".to_uppercase(),
1135                        "dir".to_uppercase(),
1136                    ]
1137                    .join(ENV_VAR_SEP),
1138                    Some("/other/log/dir"),
1139                ),
1140                (
1141                    // logging.level = info
1142                    [
1143                        env_prefix.to_string(),
1144                        "logging".to_uppercase(),
1145                        "level".to_uppercase(),
1146                    ]
1147                    .join(ENV_VAR_SEP),
1148                    Some("info"),
1149                ),
1150                (
1151                    // http.addr = 127.0.0.1:24000
1152                    [
1153                        env_prefix.to_string(),
1154                        "http".to_uppercase(),
1155                        "addr".to_uppercase(),
1156                    ]
1157                    .join(ENV_VAR_SEP),
1158                    Some("127.0.0.1:24000"),
1159                ),
1160            ],
1161            || {
1162                let command = StartCommand {
1163                    config_file: Some(file.path().to_str().unwrap().to_string()),
1164                    http_addr: Some("127.0.0.1:14000".to_string()),
1165                    env_prefix: env_prefix.to_string(),
1166                    ..Default::default()
1167                };
1168
1169                let opts = command.load_options(&Default::default()).unwrap().component;
1170
1171                // Should be read from env, env > default values.
1172                assert_eq!(opts.logging.dir, "/other/log/dir");
1173
1174                // Should be read from config file, config file > env > default values.
1175                assert_eq!(opts.logging.level.as_ref().unwrap(), "debug");
1176
1177                // Should be read from cli, cli > config file > env > default values.
1178                let fe_opts = opts.frontend_options();
1179                assert_eq!(fe_opts.http.addr, "127.0.0.1:14000");
1180                assert_eq!(ReadableSize::mb(64), fe_opts.http.body_limit);
1181
1182                // Should be default value.
1183                assert_eq!(fe_opts.grpc.bind_addr, GrpcOptions::default().bind_addr);
1184            },
1185        );
1186    }
1187
1188    #[test]
1189    fn test_parse_grpc_bind_addr_aliases() {
1190        let command =
1191            StartCommand::try_parse_from(["standalone", "--grpc-bind-addr", "127.0.0.1:14001"])
1192                .unwrap();
1193        assert_eq!(command.grpc_bind_addr.as_deref(), Some("127.0.0.1:14001"));
1194
1195        let command =
1196            StartCommand::try_parse_from(["standalone", "--rpc-bind-addr", "127.0.0.1:24001"])
1197                .unwrap();
1198        assert_eq!(command.grpc_bind_addr.as_deref(), Some("127.0.0.1:24001"));
1199
1200        let command =
1201            StartCommand::try_parse_from(["standalone", "--rpc-addr", "127.0.0.1:34001"]).unwrap();
1202        assert_eq!(command.grpc_bind_addr.as_deref(), Some("127.0.0.1:34001"));
1203    }
1204
1205    #[test]
1206    fn test_help_uses_grpc_option_names() {
1207        let mut cmd = StartCommand::command();
1208        let mut help = Vec::new();
1209        cmd.write_long_help(&mut help).unwrap();
1210        let help = String::from_utf8(help).unwrap();
1211
1212        assert!(help.contains("--grpc-bind-addr"));
1213        assert!(!help.contains("--rpc-bind-addr"));
1214        assert!(!help.contains("--rpc-addr"));
1215    }
1216
1217    #[test]
1218    fn test_load_default_standalone_options() {
1219        let options =
1220            StandaloneOptions::load_layered_options(None, "GREPTIMEDB_STANDALONE").unwrap();
1221        let default_options = StandaloneOptions::default();
1222        assert_eq!(options.enable_telemetry, default_options.enable_telemetry);
1223        assert_eq!(options.http, default_options.http);
1224        assert_eq!(options.grpc, default_options.grpc);
1225        assert_eq!(options.mysql, default_options.mysql);
1226        assert_eq!(options.postgres, default_options.postgres);
1227        assert_eq!(options.opentsdb, default_options.opentsdb);
1228        assert_eq!(options.influxdb, default_options.influxdb);
1229        assert_eq!(options.prom_store, default_options.prom_store);
1230        assert_eq!(options.wal, default_options.wal);
1231        assert_eq!(options.metadata_store, default_options.metadata_store);
1232        assert_eq!(options.procedure, default_options.procedure);
1233        assert_eq!(options.logging, default_options.logging);
1234        assert_eq!(options.region_engine, default_options.region_engine);
1235    }
1236
1237    #[test]
1238    fn test_cache_config() {
1239        let toml_str = r#"
1240            [storage]
1241            data_home = "test_data_home"
1242            type = "S3"
1243            [storage.cache_config]
1244            enable_read_cache = true
1245        "#;
1246        let mut opts: StandaloneOptions = toml::from_str(toml_str).unwrap();
1247        opts.sanitize();
1248        assert!(opts.storage.store.cache_config().unwrap().enable_read_cache);
1249        assert_eq!(
1250            opts.storage.store.cache_config().unwrap().cache_path,
1251            "test_data_home"
1252        );
1253    }
1254}