Skip to main content

cmd/
flownode.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt::Debug;
16use std::path::Path;
17use std::sync::Arc;
18use std::time::Duration;
19
20use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
21use catalog::information_extension::DistributedInformationExtension;
22use catalog::kvbackend::{
23    CachedKvBackendBuilder, KvBackendCatalogManagerBuilder, new_read_only_meta_kv_backend,
24};
25use clap::Parser;
26use client::client_manager::NodeClients;
27use common_base::Plugins;
28use common_config::{Configurable, DEFAULT_DATA_HOME};
29use common_grpc::channel_manager::ChannelConfig;
30use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
31use common_meta::heartbeat::handler::HandlerGroupExecutor;
32use common_meta::heartbeat::handler::invalidate_table_cache::InvalidateCacheHandler;
33use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
34use common_meta::key::TableMetadataManager;
35use common_meta::key::flow::FlowMetadataManager;
36use common_stat::ResourceStatImpl;
37use common_telemetry::info;
38use common_telemetry::logging::{DEFAULT_LOGGING_DIR, TracingOptions};
39use common_version::{short_version, verbose_version};
40use flow::{
41    FlownodeBuilder, FlownodeInstance, FlownodeServiceBuilder, FrontendClient, FrontendInvoker,
42    get_flow_auth_options,
43};
44use meta_client::{MetaClientOptions, MetaClientType};
45use plugins::flownode::context::GrpcConfigureContext;
46use servers::configurator::GrpcBuilderConfiguratorRef;
47use snafu::{OptionExt, ResultExt, ensure};
48use tracing_appender::non_blocking::WorkerGuard;
49
50use crate::error::{
51    BuildCacheRegistrySnafu, LoadLayeredConfigSnafu, MetaClientInitSnafu, MissingConfigSnafu,
52    OtherSnafu, Result, ShutdownFlownodeSnafu, StartFlownodeSnafu,
53};
54use crate::options::{GlobalOptions, GreptimeOptions};
55use crate::{App, create_resource_limit_metrics, log_versions, maybe_activate_heap_profile};
56
57pub const APP_NAME: &str = "greptime-flownode";
58
59type FlownodeOptions = GreptimeOptions<flow::FlownodeOptions>;
60
61pub struct Instance {
62    flownode: FlownodeInstance,
63    // Keep the logging guard to prevent the worker from being dropped.
64    _guard: Vec<WorkerGuard>,
65}
66
67impl Instance {
68    pub fn new(flownode: FlownodeInstance, guard: Vec<WorkerGuard>) -> Self {
69        Self {
70            flownode,
71            _guard: guard,
72        }
73    }
74
75    pub fn flownode(&self) -> &FlownodeInstance {
76        &self.flownode
77    }
78
79    /// allow customizing flownode for downstream projects
80    pub fn flownode_mut(&mut self) -> &mut FlownodeInstance {
81        &mut self.flownode
82    }
83}
84
85#[async_trait::async_trait]
86impl App for Instance {
87    fn name(&self) -> &str {
88        APP_NAME
89    }
90
91    async fn start(&mut self) -> Result<()> {
92        plugins::start_flownode_plugins(self.flownode.flow_engine().plugins().clone())
93            .await
94            .context(StartFlownodeSnafu)?;
95
96        self.flownode.start().await.context(StartFlownodeSnafu)
97    }
98
99    async fn stop(&mut self) -> Result<()> {
100        self.flownode
101            .shutdown()
102            .await
103            .context(ShutdownFlownodeSnafu)
104    }
105}
106
107#[derive(Parser)]
108pub struct Command {
109    #[clap(subcommand)]
110    subcmd: SubCommand,
111}
112
113impl Command {
114    pub async fn build(&self, opts: FlownodeOptions) -> Result<Instance> {
115        self.subcmd.build(opts).await
116    }
117
118    pub fn load_options(&self, global_options: &GlobalOptions) -> Result<FlownodeOptions> {
119        match &self.subcmd {
120            SubCommand::Start(cmd) => cmd.load_options(global_options),
121        }
122    }
123}
124
125#[derive(Parser)]
126enum SubCommand {
127    Start(StartCommand),
128}
129
130impl SubCommand {
131    async fn build(&self, opts: FlownodeOptions) -> Result<Instance> {
132        match self {
133            SubCommand::Start(cmd) => cmd.build(opts).await,
134        }
135    }
136}
137
138#[derive(Debug, Parser, Default)]
139struct StartCommand {
140    /// Flownode's id
141    #[clap(long)]
142    node_id: Option<u64>,
143    /// Bind address for the gRPC server.
144    #[clap(long = "grpc-bind-addr", alias = "rpc-bind-addr", alias = "rpc-addr")]
145    grpc_bind_addr: Option<String>,
146    /// The address advertised to the metasrv, and used for connections from outside the host.
147    /// If left empty or unset, the server will automatically use the IP address of the first network interface
148    /// on the host, with the same port number as the one specified in `grpc_bind_addr`.
149    #[clap(
150        long = "grpc-server-addr",
151        alias = "rpc-server-addr",
152        alias = "rpc-hostname"
153    )]
154    grpc_server_addr: Option<String>,
155    /// Metasrv address list;
156    #[clap(long, value_delimiter = ',', num_args = 1..)]
157    metasrv_addrs: Option<Vec<String>>,
158    /// The configuration file for flownode
159    #[clap(short, long)]
160    config_file: Option<String>,
161    /// The prefix of environment variables, default is `GREPTIMEDB_FLOWNODE`;
162    #[clap(long, default_value = "GREPTIMEDB_FLOWNODE")]
163    env_prefix: String,
164    #[clap(long)]
165    http_addr: Option<String>,
166    /// HTTP request timeout in seconds.
167    #[clap(long)]
168    http_timeout: Option<u64>,
169    /// User Provider cfg, for auth, currently only support static user provider
170    #[clap(long)]
171    user_provider: Option<String>,
172}
173
174impl StartCommand {
175    fn load_options(&self, global_options: &GlobalOptions) -> Result<FlownodeOptions> {
176        let mut opts = FlownodeOptions::load_layered_options(
177            self.config_file.as_deref(),
178            self.env_prefix.as_ref(),
179        )
180        .context(LoadLayeredConfigSnafu)?;
181
182        self.merge_with_cli_options(global_options, &mut opts)?;
183
184        Ok(opts)
185    }
186
187    // The precedence order is: cli > config file > environment variables > default values.
188    fn merge_with_cli_options(
189        &self,
190        global_options: &GlobalOptions,
191        opts: &mut FlownodeOptions,
192    ) -> Result<()> {
193        let opts = &mut opts.component;
194
195        if let Some(dir) = &global_options.log_dir {
196            opts.logging.dir.clone_from(dir);
197        }
198
199        // If the logging dir is not set, use the default logs dir in the data home.
200        if opts.logging.dir.is_empty() {
201            opts.logging.dir = Path::new(DEFAULT_DATA_HOME)
202                .join(DEFAULT_LOGGING_DIR)
203                .to_string_lossy()
204                .to_string();
205        }
206
207        if global_options.log_level.is_some() {
208            opts.logging.level.clone_from(&global_options.log_level);
209        }
210
211        opts.tracing = TracingOptions {
212            #[cfg(feature = "tokio-console")]
213            tokio_console_addr: global_options.tokio_console_addr.clone(),
214        };
215
216        if let Some(addr) = &self.grpc_bind_addr {
217            opts.grpc.bind_addr.clone_from(addr);
218        }
219
220        if let Some(server_addr) = &self.grpc_server_addr {
221            opts.grpc.server_addr.clone_from(server_addr);
222        }
223
224        if let Some(node_id) = self.node_id {
225            opts.node_id = Some(node_id);
226        }
227
228        if let Some(metasrv_addrs) = &self.metasrv_addrs {
229            opts.meta_client
230                .get_or_insert_with(MetaClientOptions::default)
231                .metasrv_addrs
232                .clone_from(metasrv_addrs);
233        }
234
235        if let Some(http_addr) = &self.http_addr {
236            opts.http.addr.clone_from(http_addr);
237        }
238
239        if let Some(http_timeout) = self.http_timeout {
240            opts.http.timeout = Duration::from_secs(http_timeout);
241        }
242
243        if let Some(user_provider) = &self.user_provider {
244            opts.user_provider = Some(user_provider.clone());
245        }
246
247        ensure!(
248            opts.node_id.is_some(),
249            MissingConfigSnafu {
250                msg: "Missing node id option"
251            }
252        );
253
254        Ok(())
255    }
256
257    async fn build(&self, opts: FlownodeOptions) -> Result<Instance> {
258        common_runtime::init_global_runtimes(&opts.runtime);
259
260        let guard = common_telemetry::init_global_logging(
261            APP_NAME,
262            &opts.component.logging,
263            &opts.component.tracing,
264            opts.component.node_id.map(|x| x.to_string()),
265            None,
266        );
267
268        log_versions(verbose_version(), short_version(), APP_NAME);
269        maybe_activate_heap_profile(&opts.component.memory);
270        create_resource_limit_metrics(APP_NAME);
271
272        info!("Flownode start command: {:#?}", self);
273        info!("Flownode options: {:#?}", opts);
274
275        let plugin_opts = opts.plugins;
276        let mut opts = opts.component;
277        opts.grpc.detect_server_addr();
278
279        let mut plugins = Plugins::new();
280        plugins::setup_flownode_plugins(&mut plugins, &plugin_opts, &opts)
281            .await
282            .context(StartFlownodeSnafu)?;
283
284        let member_id = opts
285            .node_id
286            .context(MissingConfigSnafu { msg: "'node_id'" })?;
287
288        let meta_config = opts.meta_client.as_ref().context(MissingConfigSnafu {
289            msg: "'meta_client_options'",
290        })?;
291
292        let meta_client = meta_client::create_meta_client(
293            MetaClientType::Flownode { member_id },
294            meta_config,
295            None,
296            None,
297        )
298        .await
299        .context(MetaClientInitSnafu)?;
300
301        let cache_max_capacity = meta_config.metadata_cache_max_capacity;
302        let cache_ttl = meta_config.metadata_cache_ttl;
303        let cache_tti = meta_config.metadata_cache_tti;
304
305        let readonly_meta_backend = new_read_only_meta_kv_backend(meta_client.clone());
306
307        // TODO(discord9): add helper function to ease the creation of cache registry&such
308        let cached_meta_backend = CachedKvBackendBuilder::new(readonly_meta_backend.clone())
309            .cache_max_capacity(cache_max_capacity)
310            .cache_ttl(cache_ttl)
311            .cache_tti(cache_tti)
312            .build();
313        let cached_meta_backend = Arc::new(cached_meta_backend);
314
315        // Builds cache registry
316        let layered_cache_builder = LayeredCacheRegistryBuilder::default().add_cache_registry(
317            CacheRegistryBuilder::default()
318                .add_cache(cached_meta_backend.clone())
319                .build(),
320        );
321        let fundamental_cache_registry =
322            build_fundamental_cache_registry(readonly_meta_backend.clone());
323        let layered_cache_registry = Arc::new(
324            with_default_composite_cache_registry(
325                layered_cache_builder.add_cache_registry(fundamental_cache_registry),
326            )
327            .context(BuildCacheRegistrySnafu)?
328            .build(),
329        );
330
331        // flownode's frontend to datanode need not timeout.
332        // Some queries are expected to take long time.
333        let channel_config = ChannelConfig {
334            timeout: None,
335            ..Default::default()
336        };
337        let client = Arc::new(NodeClients::new(channel_config));
338
339        let information_extension = Arc::new(DistributedInformationExtension::new(
340            meta_client.clone(),
341            client.clone(),
342        ));
343        let catalog_manager = KvBackendCatalogManagerBuilder::new(
344            information_extension,
345            cached_meta_backend.clone(),
346            layered_cache_registry.clone(),
347        )
348        .build();
349
350        let table_metadata_manager =
351            Arc::new(TableMetadataManager::new(cached_meta_backend.clone()));
352
353        let executor = HandlerGroupExecutor::new(vec![
354            Arc::new(ParseMailboxMessageHandler),
355            Arc::new(InvalidateCacheHandler::new(layered_cache_registry.clone())),
356        ]);
357
358        let mut resource_stat = ResourceStatImpl::default();
359        resource_stat.start_collect_cpu_usage();
360
361        let heartbeat_task = flow::heartbeat::HeartbeatTask::new(
362            &opts,
363            meta_client.clone(),
364            Arc::new(executor),
365            Arc::new(resource_stat),
366        );
367
368        let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone()));
369        let flow_auth_header = get_flow_auth_options(&opts).context(StartFlownodeSnafu)?;
370        let frontend_client = FrontendClient::from_meta_client(
371            meta_client.clone(),
372            flow_auth_header,
373            opts.query.clone(),
374            opts.flow.batching_mode.clone(),
375        )
376        .context(StartFlownodeSnafu)?;
377        let frontend_client = Arc::new(frontend_client);
378        let flownode_builder = FlownodeBuilder::new(
379            opts.clone(),
380            plugins.clone(),
381            table_metadata_manager,
382            catalog_manager.clone(),
383            flow_metadata_manager,
384            frontend_client.clone(),
385        )
386        .with_heartbeat_task(heartbeat_task);
387
388        let mut flownode = flownode_builder.build().await.context(StartFlownodeSnafu)?;
389
390        let builder =
391            FlownodeServiceBuilder::grpc_server_builder(&opts, flownode.flownode_server());
392        let builder = if let Some(configurator) =
393            plugins.get::<GrpcBuilderConfiguratorRef<GrpcConfigureContext>>()
394        {
395            let context = GrpcConfigureContext {
396                kv_backend: cached_meta_backend.clone(),
397                fe_client: frontend_client.clone(),
398                flownode_id: member_id,
399                catalog_manager: catalog_manager.clone(),
400            };
401            configurator
402                .configure(builder, context)
403                .await
404                .context(OtherSnafu)?
405        } else {
406            builder
407        };
408        let grpc_server = builder.build();
409
410        let services = FlownodeServiceBuilder::new(&opts)
411            .with_grpc_server(grpc_server)
412            .enable_http_service()
413            .build()
414            .context(StartFlownodeSnafu)?;
415        flownode.setup_services(services);
416        let flownode = flownode;
417
418        let invoker = FrontendInvoker::build_from(
419            flownode.flow_engine().streaming_engine(),
420            catalog_manager.clone(),
421            cached_meta_backend.clone(),
422            layered_cache_registry.clone(),
423            meta_client.clone(),
424            client,
425        )
426        .await
427        .context(StartFlownodeSnafu)?;
428        flownode
429            .flow_engine()
430            .streaming_engine()
431            // TODO(discord9): refactor and avoid circular reference
432            .set_frontend_invoker(invoker)
433            .await;
434
435        Ok(Instance::new(flownode, guard))
436    }
437}
438
439#[cfg(test)]
440mod tests {
441    use clap::{CommandFactory, Parser};
442
443    use super::*;
444
445    #[test]
446    fn test_parse_grpc_cli_aliases() {
447        let command = StartCommand::try_parse_from([
448            "flownode",
449            "--grpc-bind-addr",
450            "127.0.0.1:14004",
451            "--grpc-server-addr",
452            "10.0.0.1:14004",
453        ])
454        .unwrap();
455        assert_eq!(command.grpc_bind_addr.as_deref(), Some("127.0.0.1:14004"));
456        assert_eq!(command.grpc_server_addr.as_deref(), Some("10.0.0.1:14004"));
457
458        let command = StartCommand::try_parse_from([
459            "flownode",
460            "--rpc-bind-addr",
461            "127.0.0.1:24004",
462            "--rpc-server-addr",
463            "10.0.0.2:24004",
464        ])
465        .unwrap();
466        assert_eq!(command.grpc_bind_addr.as_deref(), Some("127.0.0.1:24004"));
467        assert_eq!(command.grpc_server_addr.as_deref(), Some("10.0.0.2:24004"));
468
469        let command = StartCommand::try_parse_from([
470            "flownode",
471            "--rpc-addr",
472            "127.0.0.1:34004",
473            "--rpc-hostname",
474            "10.0.0.3:34004",
475        ])
476        .unwrap();
477        assert_eq!(command.grpc_bind_addr.as_deref(), Some("127.0.0.1:34004"));
478        assert_eq!(command.grpc_server_addr.as_deref(), Some("10.0.0.3:34004"));
479    }
480
481    #[test]
482    fn test_help_uses_grpc_option_names() {
483        let mut cmd = StartCommand::command();
484        let mut help = Vec::new();
485        cmd.write_long_help(&mut help).unwrap();
486        let help = String::from_utf8(help).unwrap();
487
488        assert!(help.contains("--grpc-bind-addr"));
489        assert!(help.contains("--grpc-server-addr"));
490        assert!(!help.contains("--rpc-bind-addr"));
491        assert!(!help.contains("--rpc-server-addr"));
492        assert!(!help.contains("--rpc-addr"));
493        assert!(!help.contains("--rpc-hostname"));
494    }
495}