Skip to main content

cmd/
flownode.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt::Debug;
16use std::path::Path;
17use std::sync::Arc;
18use std::time::Duration;
19
20use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
21use catalog::information_extension::DistributedInformationExtension;
22use catalog::kvbackend::{
23    CachedKvBackendBuilder, KvBackendCatalogManagerBuilder, new_read_only_meta_kv_backend,
24};
25use clap::Parser;
26use client::client_manager::NodeClients;
27use common_base::Plugins;
28use common_config::{Configurable, DEFAULT_DATA_HOME};
29use common_grpc::channel_manager::ChannelConfig;
30use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
31use common_meta::heartbeat::handler::HandlerGroupExecutor;
32use common_meta::heartbeat::handler::invalidate_table_cache::InvalidateCacheHandler;
33use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
34use common_meta::key::TableMetadataManager;
35use common_meta::key::flow::FlowMetadataManager;
36use common_stat::ResourceStatImpl;
37use common_telemetry::info;
38use common_telemetry::logging::{DEFAULT_LOGGING_DIR, TracingOptions};
39use common_version::{short_version, verbose_version};
40use flow::{
41    FlownodeBuilder, FlownodeInstance, FlownodeServiceBuilder, FrontendClient, FrontendInvoker,
42    get_flow_auth_options,
43};
44use meta_client::{MetaClientOptions, MetaClientType};
45use plugins::flownode::context::GrpcConfigureContext;
46use servers::addrs;
47use servers::configurator::GrpcBuilderConfiguratorRef;
48use snafu::{OptionExt, ResultExt, ensure};
49use tracing_appender::non_blocking::WorkerGuard;
50
51use crate::error::{
52    BuildCacheRegistrySnafu, LoadLayeredConfigSnafu, MetaClientInitSnafu, MissingConfigSnafu,
53    OtherSnafu, Result, ShutdownFlownodeSnafu, StartFlownodeSnafu,
54};
55use crate::options::{GlobalOptions, GreptimeOptions};
56use crate::{App, create_resource_limit_metrics, log_versions, maybe_activate_heap_profile};
57
58pub const APP_NAME: &str = "greptime-flownode";
59
60type FlownodeOptions = GreptimeOptions<flow::FlownodeOptions>;
61
62pub struct Instance {
63    flownode: FlownodeInstance,
64    // Keep the logging guard to prevent the worker from being dropped.
65    _guard: Vec<WorkerGuard>,
66}
67
68impl Instance {
69    pub fn new(flownode: FlownodeInstance, guard: Vec<WorkerGuard>) -> Self {
70        Self {
71            flownode,
72            _guard: guard,
73        }
74    }
75
76    pub fn flownode(&self) -> &FlownodeInstance {
77        &self.flownode
78    }
79
80    /// allow customizing flownode for downstream projects
81    pub fn flownode_mut(&mut self) -> &mut FlownodeInstance {
82        &mut self.flownode
83    }
84}
85
86#[async_trait::async_trait]
87impl App for Instance {
88    fn name(&self) -> &str {
89        APP_NAME
90    }
91
92    async fn start(&mut self) -> Result<()> {
93        plugins::start_flownode_plugins(self.flownode.flow_engine().plugins().clone())
94            .await
95            .context(StartFlownodeSnafu)?;
96
97        self.flownode.start().await.context(StartFlownodeSnafu)
98    }
99
100    async fn stop(&mut self) -> Result<()> {
101        self.flownode
102            .shutdown()
103            .await
104            .context(ShutdownFlownodeSnafu)
105    }
106}
107
108#[derive(Parser)]
109pub struct Command {
110    #[clap(subcommand)]
111    subcmd: SubCommand,
112}
113
114impl Command {
115    pub async fn build(&self, opts: FlownodeOptions) -> Result<Instance> {
116        self.subcmd.build(opts).await
117    }
118
119    pub fn load_options(&self, global_options: &GlobalOptions) -> Result<FlownodeOptions> {
120        match &self.subcmd {
121            SubCommand::Start(cmd) => cmd.load_options(global_options),
122        }
123    }
124}
125
126#[derive(Parser)]
127enum SubCommand {
128    Start(StartCommand),
129}
130
131impl SubCommand {
132    async fn build(&self, opts: FlownodeOptions) -> Result<Instance> {
133        match self {
134            SubCommand::Start(cmd) => cmd.build(opts).await,
135        }
136    }
137}
138
139#[derive(Debug, Parser, Default)]
140struct StartCommand {
141    /// Flownode's id
142    #[clap(long)]
143    node_id: Option<u64>,
144    /// Bind address for the gRPC server.
145    #[clap(long = "grpc-bind-addr", alias = "rpc-bind-addr", alias = "rpc-addr")]
146    grpc_bind_addr: Option<String>,
147    /// The address advertised to the metasrv, and used for connections from outside the host.
148    /// If left empty or unset, the server will automatically use the IP address of the first network interface
149    /// on the host, with the same port number as the one specified in `grpc_bind_addr`.
150    #[clap(
151        long = "grpc-server-addr",
152        alias = "rpc-server-addr",
153        alias = "rpc-hostname"
154    )]
155    grpc_server_addr: Option<String>,
156    /// Metasrv address list;
157    #[clap(long, value_delimiter = ',', num_args = 1..)]
158    metasrv_addrs: Option<Vec<String>>,
159    /// The configuration file for flownode
160    #[clap(short, long)]
161    config_file: Option<String>,
162    /// The prefix of environment variables, default is `GREPTIMEDB_FLOWNODE`;
163    #[clap(long, default_value = "GREPTIMEDB_FLOWNODE")]
164    env_prefix: String,
165    #[clap(long)]
166    http_addr: Option<String>,
167    /// HTTP request timeout in seconds.
168    #[clap(long)]
169    http_timeout: Option<u64>,
170    /// User Provider cfg, for auth, currently only support static user provider
171    #[clap(long)]
172    user_provider: Option<String>,
173}
174
175impl StartCommand {
176    fn load_options(&self, global_options: &GlobalOptions) -> Result<FlownodeOptions> {
177        let mut opts = FlownodeOptions::load_layered_options(
178            self.config_file.as_deref(),
179            self.env_prefix.as_ref(),
180        )
181        .context(LoadLayeredConfigSnafu)?;
182
183        self.merge_with_cli_options(global_options, &mut opts)?;
184
185        Ok(opts)
186    }
187
188    // The precedence order is: cli > config file > environment variables > default values.
189    fn merge_with_cli_options(
190        &self,
191        global_options: &GlobalOptions,
192        opts: &mut FlownodeOptions,
193    ) -> Result<()> {
194        let opts = &mut opts.component;
195
196        if let Some(dir) = &global_options.log_dir {
197            opts.logging.dir.clone_from(dir);
198        }
199
200        // If the logging dir is not set, use the default logs dir in the data home.
201        if opts.logging.dir.is_empty() {
202            opts.logging.dir = Path::new(DEFAULT_DATA_HOME)
203                .join(DEFAULT_LOGGING_DIR)
204                .to_string_lossy()
205                .to_string();
206        }
207
208        if global_options.log_level.is_some() {
209            opts.logging.level.clone_from(&global_options.log_level);
210        }
211
212        opts.tracing = TracingOptions {
213            #[cfg(feature = "tokio-console")]
214            tokio_console_addr: global_options.tokio_console_addr.clone(),
215        };
216
217        if let Some(addr) = &self.grpc_bind_addr {
218            opts.grpc.bind_addr.clone_from(addr);
219        }
220
221        if let Some(server_addr) = &self.grpc_server_addr {
222            opts.grpc.server_addr.clone_from(server_addr);
223        }
224
225        if let Some(node_id) = self.node_id {
226            opts.node_id = Some(node_id);
227        }
228
229        if let Some(metasrv_addrs) = &self.metasrv_addrs {
230            opts.meta_client
231                .get_or_insert_with(MetaClientOptions::default)
232                .metasrv_addrs
233                .clone_from(metasrv_addrs);
234        }
235
236        if let Some(http_addr) = &self.http_addr {
237            opts.http.addr.clone_from(http_addr);
238        }
239
240        if let Some(http_timeout) = self.http_timeout {
241            opts.http.timeout = Duration::from_secs(http_timeout);
242        }
243
244        if let Some(user_provider) = &self.user_provider {
245            opts.user_provider = Some(user_provider.clone());
246        }
247
248        ensure!(
249            opts.node_id.is_some(),
250            MissingConfigSnafu {
251                msg: "Missing node id option"
252            }
253        );
254
255        Ok(())
256    }
257
258    async fn build(&self, opts: FlownodeOptions) -> Result<Instance> {
259        common_runtime::init_global_runtimes(&opts.runtime);
260
261        let guard = common_telemetry::init_global_logging(
262            APP_NAME,
263            &opts.component.logging,
264            &opts.component.tracing,
265            opts.component.node_id.map(|x| x.to_string()),
266            None,
267        );
268
269        log_versions(verbose_version(), short_version(), APP_NAME);
270        maybe_activate_heap_profile(&opts.component.memory);
271        create_resource_limit_metrics(APP_NAME);
272
273        info!("Flownode start command: {:#?}", self);
274        info!("Flownode options: {:#?}", opts);
275
276        let plugin_opts = opts.plugins;
277        let mut opts = opts.component;
278        opts.grpc.detect_server_addr();
279
280        let mut plugins = Plugins::new();
281        plugins::setup_flownode_plugins(&mut plugins, &plugin_opts, &opts)
282            .await
283            .context(StartFlownodeSnafu)?;
284
285        let member_id = opts
286            .node_id
287            .context(MissingConfigSnafu { msg: "'node_id'" })?;
288
289        let meta_config = opts.meta_client.as_ref().context(MissingConfigSnafu {
290            msg: "'meta_client_options'",
291        })?;
292
293        let meta_client = meta_client::create_meta_client(
294            MetaClientType::Flownode { member_id },
295            meta_config,
296            None,
297            None,
298        )
299        .await
300        .context(MetaClientInitSnafu)?;
301
302        let cache_max_capacity = meta_config.metadata_cache_max_capacity;
303        let cache_ttl = meta_config.metadata_cache_ttl;
304        let cache_tti = meta_config.metadata_cache_tti;
305
306        let readonly_meta_backend = new_read_only_meta_kv_backend(meta_client.clone());
307
308        // TODO(discord9): add helper function to ease the creation of cache registry&such
309        let cached_meta_backend = CachedKvBackendBuilder::new(readonly_meta_backend.clone())
310            .cache_max_capacity(cache_max_capacity)
311            .cache_ttl(cache_ttl)
312            .cache_tti(cache_tti)
313            .build();
314        let cached_meta_backend = Arc::new(cached_meta_backend);
315
316        // Builds cache registry
317        let layered_cache_builder = LayeredCacheRegistryBuilder::default().add_cache_registry(
318            CacheRegistryBuilder::default()
319                .add_cache(cached_meta_backend.clone())
320                .build(),
321        );
322        let fundamental_cache_registry =
323            build_fundamental_cache_registry(readonly_meta_backend.clone());
324        let layered_cache_registry = Arc::new(
325            with_default_composite_cache_registry(
326                layered_cache_builder.add_cache_registry(fundamental_cache_registry),
327            )
328            .context(BuildCacheRegistrySnafu)?
329            .build(),
330        );
331
332        // flownode's frontend to datanode need not timeout.
333        // Some queries are expected to take long time.
334        let channel_config = ChannelConfig {
335            timeout: None,
336            ..Default::default()
337        };
338        let client = Arc::new(NodeClients::new(channel_config));
339
340        let information_extension = Arc::new(DistributedInformationExtension::new(
341            meta_client.clone(),
342            client.clone(),
343        ));
344        let catalog_manager = KvBackendCatalogManagerBuilder::new(
345            information_extension,
346            cached_meta_backend.clone(),
347            layered_cache_registry.clone(),
348        )
349        .build();
350
351        let table_metadata_manager =
352            Arc::new(TableMetadataManager::new(cached_meta_backend.clone()));
353
354        let executor = HandlerGroupExecutor::new(vec![
355            Arc::new(ParseMailboxMessageHandler),
356            Arc::new(InvalidateCacheHandler::new(layered_cache_registry.clone())),
357        ]);
358
359        let mut resource_stat = ResourceStatImpl::default();
360        resource_stat.start_collect_cpu_usage();
361
362        let heartbeat_task = flow::heartbeat::HeartbeatTask::new(
363            &opts,
364            meta_client.clone(),
365            Arc::new(executor),
366            Arc::new(resource_stat),
367        );
368
369        let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone()));
370        let flow_auth_header = get_flow_auth_options(&opts).context(StartFlownodeSnafu)?;
371        let frontend_client = FrontendClient::from_meta_client(
372            meta_client.clone(),
373            flow_auth_header,
374            opts.query.clone(),
375            opts.flow.batching_mode.clone(),
376        )
377        .context(StartFlownodeSnafu)?;
378        let frontend_client = Arc::new(frontend_client);
379        let flownode_builder = FlownodeBuilder::new(
380            opts.clone(),
381            plugins.clone(),
382            table_metadata_manager,
383            catalog_manager.clone(),
384            flow_metadata_manager,
385            frontend_client.clone(),
386        )
387        .with_heartbeat_task(heartbeat_task);
388
389        let mut flownode = flownode_builder.build().await.context(StartFlownodeSnafu)?;
390
391        let builder =
392            FlownodeServiceBuilder::grpc_server_builder(&opts, flownode.flownode_server());
393        let builder = if let Some(configurator) =
394            plugins.get::<GrpcBuilderConfiguratorRef<GrpcConfigureContext>>()
395        {
396            let context = GrpcConfigureContext {
397                kv_backend: cached_meta_backend.clone(),
398                fe_client: frontend_client.clone(),
399                flownode_id: member_id,
400                catalog_manager: catalog_manager.clone(),
401            };
402            configurator
403                .configure(builder, context)
404                .await
405                .context(OtherSnafu)?
406        } else {
407            builder
408        };
409        let grpc_server = builder.build();
410
411        let services = FlownodeServiceBuilder::new(&opts)
412            .with_grpc_server(grpc_server)
413            .enable_http_service()
414            .build()
415            .context(StartFlownodeSnafu)?;
416        flownode.setup_services(services);
417        let flownode = flownode;
418
419        let invoker = FrontendInvoker::build_from(
420            flownode.flow_engine().streaming_engine(),
421            catalog_manager.clone(),
422            cached_meta_backend.clone(),
423            layered_cache_registry.clone(),
424            meta_client.clone(),
425            client,
426            addrs::resolve_addr(&opts.grpc.bind_addr, Some(&opts.grpc.server_addr)),
427        )
428        .await
429        .context(StartFlownodeSnafu)?;
430        flownode
431            .flow_engine()
432            .streaming_engine()
433            // TODO(discord9): refactor and avoid circular reference
434            .set_frontend_invoker(invoker)
435            .await;
436
437        Ok(Instance::new(flownode, guard))
438    }
439}
440
441#[cfg(test)]
442mod tests {
443    use clap::{CommandFactory, Parser};
444
445    use super::*;
446
447    #[test]
448    fn test_parse_grpc_cli_aliases() {
449        let command = StartCommand::try_parse_from([
450            "flownode",
451            "--grpc-bind-addr",
452            "127.0.0.1:14004",
453            "--grpc-server-addr",
454            "10.0.0.1:14004",
455        ])
456        .unwrap();
457        assert_eq!(command.grpc_bind_addr.as_deref(), Some("127.0.0.1:14004"));
458        assert_eq!(command.grpc_server_addr.as_deref(), Some("10.0.0.1:14004"));
459
460        let command = StartCommand::try_parse_from([
461            "flownode",
462            "--rpc-bind-addr",
463            "127.0.0.1:24004",
464            "--rpc-server-addr",
465            "10.0.0.2:24004",
466        ])
467        .unwrap();
468        assert_eq!(command.grpc_bind_addr.as_deref(), Some("127.0.0.1:24004"));
469        assert_eq!(command.grpc_server_addr.as_deref(), Some("10.0.0.2:24004"));
470
471        let command = StartCommand::try_parse_from([
472            "flownode",
473            "--rpc-addr",
474            "127.0.0.1:34004",
475            "--rpc-hostname",
476            "10.0.0.3:34004",
477        ])
478        .unwrap();
479        assert_eq!(command.grpc_bind_addr.as_deref(), Some("127.0.0.1:34004"));
480        assert_eq!(command.grpc_server_addr.as_deref(), Some("10.0.0.3:34004"));
481    }
482
483    #[test]
484    fn test_help_uses_grpc_option_names() {
485        let mut cmd = StartCommand::command();
486        let mut help = Vec::new();
487        cmd.write_long_help(&mut help).unwrap();
488        let help = String::from_utf8(help).unwrap();
489
490        assert!(help.contains("--grpc-bind-addr"));
491        assert!(help.contains("--grpc-server-addr"));
492        assert!(!help.contains("--rpc-bind-addr"));
493        assert!(!help.contains("--rpc-server-addr"));
494        assert!(!help.contains("--rpc-addr"));
495        assert!(!help.contains("--rpc-hostname"));
496    }
497}