cmd/
flownode.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt::Debug;
16use std::path::Path;
17use std::sync::Arc;
18use std::time::Duration;
19
20use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
21use catalog::information_extension::DistributedInformationExtension;
22use catalog::kvbackend::{CachedKvBackendBuilder, KvBackendCatalogManagerBuilder, MetaKvBackend};
23use clap::Parser;
24use client::client_manager::NodeClients;
25use common_base::Plugins;
26use common_config::{Configurable, DEFAULT_DATA_HOME};
27use common_grpc::channel_manager::ChannelConfig;
28use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
29use common_meta::heartbeat::handler::HandlerGroupExecutor;
30use common_meta::heartbeat::handler::invalidate_table_cache::InvalidateCacheHandler;
31use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
32use common_meta::key::TableMetadataManager;
33use common_meta::key::flow::FlowMetadataManager;
34use common_stat::ResourceStatImpl;
35use common_telemetry::info;
36use common_telemetry::logging::{DEFAULT_LOGGING_DIR, TracingOptions};
37use common_version::{short_version, verbose_version};
38use const_format::concatcp;
39use flow::{
40    FlownodeBuilder, FlownodeInstance, FlownodeServiceBuilder, FrontendClient, FrontendInvoker,
41    get_flow_auth_options,
42};
43use meta_client::{MetaClientOptions, MetaClientType};
44use plugins::flownode::context::GrpcConfigureContext;
45use servers::configurator::GrpcBuilderConfiguratorRef;
46use snafu::{OptionExt, ResultExt, ensure};
47use tracing_appender::non_blocking::WorkerGuard;
48
49use crate::error::{
50    BuildCacheRegistrySnafu, InitMetadataSnafu, LoadLayeredConfigSnafu, MetaClientInitSnafu,
51    MissingConfigSnafu, OtherSnafu, Result, ShutdownFlownodeSnafu, StartFlownodeSnafu,
52};
53use crate::options::{GlobalOptions, GreptimeOptions};
54use crate::{App, create_resource_limit_metrics, log_versions, maybe_activate_heap_profile};
55
56pub const APP_NAME: &str = concatcp!(common_version::product_name(), "-flownode");
57
58type FlownodeOptions = GreptimeOptions<flow::FlownodeOptions>;
59
60pub struct Instance {
61    flownode: FlownodeInstance,
62    // Keep the logging guard to prevent the worker from being dropped.
63    _guard: Vec<WorkerGuard>,
64}
65
66impl Instance {
67    pub fn new(flownode: FlownodeInstance, guard: Vec<WorkerGuard>) -> Self {
68        Self {
69            flownode,
70            _guard: guard,
71        }
72    }
73
74    pub fn flownode(&self) -> &FlownodeInstance {
75        &self.flownode
76    }
77
78    /// allow customizing flownode for downstream projects
79    pub fn flownode_mut(&mut self) -> &mut FlownodeInstance {
80        &mut self.flownode
81    }
82}
83
84#[async_trait::async_trait]
85impl App for Instance {
86    fn name(&self) -> &str {
87        APP_NAME
88    }
89
90    async fn start(&mut self) -> Result<()> {
91        plugins::start_flownode_plugins(self.flownode.flow_engine().plugins().clone())
92            .await
93            .context(StartFlownodeSnafu)?;
94
95        self.flownode.start().await.context(StartFlownodeSnafu)
96    }
97
98    async fn stop(&mut self) -> Result<()> {
99        self.flownode
100            .shutdown()
101            .await
102            .context(ShutdownFlownodeSnafu)
103    }
104}
105
106#[derive(Parser)]
107pub struct Command {
108    #[clap(subcommand)]
109    subcmd: SubCommand,
110}
111
112impl Command {
113    pub async fn build(&self, opts: FlownodeOptions) -> Result<Instance> {
114        self.subcmd.build(opts).await
115    }
116
117    pub fn load_options(&self, global_options: &GlobalOptions) -> Result<FlownodeOptions> {
118        match &self.subcmd {
119            SubCommand::Start(cmd) => cmd.load_options(global_options),
120        }
121    }
122}
123
124#[derive(Parser)]
125enum SubCommand {
126    Start(StartCommand),
127}
128
129impl SubCommand {
130    async fn build(&self, opts: FlownodeOptions) -> Result<Instance> {
131        match self {
132            SubCommand::Start(cmd) => cmd.build(opts).await,
133        }
134    }
135}
136
137#[derive(Debug, Parser, Default)]
138struct StartCommand {
139    /// Flownode's id
140    #[clap(long)]
141    node_id: Option<u64>,
142    /// Bind address for the gRPC server.
143    #[clap(long, alias = "rpc-addr")]
144    rpc_bind_addr: Option<String>,
145    /// The address advertised to the metasrv, and used for connections from outside the host.
146    /// If left empty or unset, the server will automatically use the IP address of the first network interface
147    /// on the host, with the same port number as the one specified in `rpc_bind_addr`.
148    #[clap(long, alias = "rpc-hostname")]
149    rpc_server_addr: Option<String>,
150    /// Metasrv address list;
151    #[clap(long, value_delimiter = ',', num_args = 1..)]
152    metasrv_addrs: Option<Vec<String>>,
153    /// The configuration file for flownode
154    #[clap(short, long)]
155    config_file: Option<String>,
156    /// The prefix of environment variables, default is `GREPTIMEDB_FLOWNODE`;
157    #[clap(long, default_value = "GREPTIMEDB_FLOWNODE")]
158    env_prefix: String,
159    #[clap(long)]
160    http_addr: Option<String>,
161    /// HTTP request timeout in seconds.
162    #[clap(long)]
163    http_timeout: Option<u64>,
164    /// User Provider cfg, for auth, currently only support static user provider
165    #[clap(long)]
166    user_provider: Option<String>,
167}
168
169impl StartCommand {
170    fn load_options(&self, global_options: &GlobalOptions) -> Result<FlownodeOptions> {
171        let mut opts = FlownodeOptions::load_layered_options(
172            self.config_file.as_deref(),
173            self.env_prefix.as_ref(),
174        )
175        .context(LoadLayeredConfigSnafu)?;
176
177        self.merge_with_cli_options(global_options, &mut opts)?;
178
179        Ok(opts)
180    }
181
182    // The precedence order is: cli > config file > environment variables > default values.
183    fn merge_with_cli_options(
184        &self,
185        global_options: &GlobalOptions,
186        opts: &mut FlownodeOptions,
187    ) -> Result<()> {
188        let opts = &mut opts.component;
189
190        if let Some(dir) = &global_options.log_dir {
191            opts.logging.dir.clone_from(dir);
192        }
193
194        // If the logging dir is not set, use the default logs dir in the data home.
195        if opts.logging.dir.is_empty() {
196            opts.logging.dir = Path::new(DEFAULT_DATA_HOME)
197                .join(DEFAULT_LOGGING_DIR)
198                .to_string_lossy()
199                .to_string();
200        }
201
202        if global_options.log_level.is_some() {
203            opts.logging.level.clone_from(&global_options.log_level);
204        }
205
206        opts.tracing = TracingOptions {
207            #[cfg(feature = "tokio-console")]
208            tokio_console_addr: global_options.tokio_console_addr.clone(),
209        };
210
211        if let Some(addr) = &self.rpc_bind_addr {
212            opts.grpc.bind_addr.clone_from(addr);
213        }
214
215        if let Some(server_addr) = &self.rpc_server_addr {
216            opts.grpc.server_addr.clone_from(server_addr);
217        }
218
219        if let Some(node_id) = self.node_id {
220            opts.node_id = Some(node_id);
221        }
222
223        if let Some(metasrv_addrs) = &self.metasrv_addrs {
224            opts.meta_client
225                .get_or_insert_with(MetaClientOptions::default)
226                .metasrv_addrs
227                .clone_from(metasrv_addrs);
228        }
229
230        if let Some(http_addr) = &self.http_addr {
231            opts.http.addr.clone_from(http_addr);
232        }
233
234        if let Some(http_timeout) = self.http_timeout {
235            opts.http.timeout = Duration::from_secs(http_timeout);
236        }
237
238        if let Some(user_provider) = &self.user_provider {
239            opts.user_provider = Some(user_provider.clone());
240        }
241
242        ensure!(
243            opts.node_id.is_some(),
244            MissingConfigSnafu {
245                msg: "Missing node id option"
246            }
247        );
248
249        Ok(())
250    }
251
252    async fn build(&self, opts: FlownodeOptions) -> Result<Instance> {
253        common_runtime::init_global_runtimes(&opts.runtime);
254
255        let guard = common_telemetry::init_global_logging(
256            APP_NAME,
257            &opts.component.logging,
258            &opts.component.tracing,
259            opts.component.node_id.map(|x| x.to_string()),
260            None,
261        );
262
263        log_versions(verbose_version(), short_version(), APP_NAME);
264        maybe_activate_heap_profile(&opts.component.memory);
265        create_resource_limit_metrics(APP_NAME);
266
267        info!("Flownode start command: {:#?}", self);
268        info!("Flownode options: {:#?}", opts);
269
270        let plugin_opts = opts.plugins;
271        let mut opts = opts.component;
272        opts.grpc.detect_server_addr();
273
274        let mut plugins = Plugins::new();
275        plugins::setup_flownode_plugins(&mut plugins, &plugin_opts, &opts)
276            .await
277            .context(StartFlownodeSnafu)?;
278
279        let member_id = opts
280            .node_id
281            .context(MissingConfigSnafu { msg: "'node_id'" })?;
282
283        let meta_config = opts.meta_client.as_ref().context(MissingConfigSnafu {
284            msg: "'meta_client_options'",
285        })?;
286
287        let meta_client = meta_client::create_meta_client(
288            MetaClientType::Flownode { member_id },
289            meta_config,
290            None,
291            None,
292        )
293        .await
294        .context(MetaClientInitSnafu)?;
295
296        let cache_max_capacity = meta_config.metadata_cache_max_capacity;
297        let cache_ttl = meta_config.metadata_cache_ttl;
298        let cache_tti = meta_config.metadata_cache_tti;
299
300        // TODO(discord9): add helper function to ease the creation of cache registry&such
301        let cached_meta_backend =
302            CachedKvBackendBuilder::new(Arc::new(MetaKvBackend::new(meta_client.clone())))
303                .cache_max_capacity(cache_max_capacity)
304                .cache_ttl(cache_ttl)
305                .cache_tti(cache_tti)
306                .build();
307        let cached_meta_backend = Arc::new(cached_meta_backend);
308
309        // Builds cache registry
310        let layered_cache_builder = LayeredCacheRegistryBuilder::default().add_cache_registry(
311            CacheRegistryBuilder::default()
312                .add_cache(cached_meta_backend.clone())
313                .build(),
314        );
315        let fundamental_cache_registry =
316            build_fundamental_cache_registry(Arc::new(MetaKvBackend::new(meta_client.clone())));
317        let layered_cache_registry = Arc::new(
318            with_default_composite_cache_registry(
319                layered_cache_builder.add_cache_registry(fundamental_cache_registry),
320            )
321            .context(BuildCacheRegistrySnafu)?
322            .build(),
323        );
324
325        // flownode's frontend to datanode need not timeout.
326        // Some queries are expected to take long time.
327        let channel_config = ChannelConfig {
328            timeout: None,
329            ..Default::default()
330        };
331        let client = Arc::new(NodeClients::new(channel_config));
332
333        let information_extension = Arc::new(DistributedInformationExtension::new(
334            meta_client.clone(),
335            client.clone(),
336        ));
337        let catalog_manager = KvBackendCatalogManagerBuilder::new(
338            information_extension,
339            cached_meta_backend.clone(),
340            layered_cache_registry.clone(),
341        )
342        .build();
343
344        let table_metadata_manager =
345            Arc::new(TableMetadataManager::new(cached_meta_backend.clone()));
346        table_metadata_manager
347            .init()
348            .await
349            .context(InitMetadataSnafu)?;
350
351        let executor = HandlerGroupExecutor::new(vec![
352            Arc::new(ParseMailboxMessageHandler),
353            Arc::new(InvalidateCacheHandler::new(layered_cache_registry.clone())),
354        ]);
355
356        let mut resource_stat = ResourceStatImpl::default();
357        resource_stat.start_collect_cpu_usage();
358
359        let heartbeat_task = flow::heartbeat::HeartbeatTask::new(
360            &opts,
361            meta_client.clone(),
362            Arc::new(executor),
363            Arc::new(resource_stat),
364        );
365
366        let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone()));
367        let flow_auth_header = get_flow_auth_options(&opts).context(StartFlownodeSnafu)?;
368        let frontend_client = FrontendClient::from_meta_client(
369            meta_client.clone(),
370            flow_auth_header,
371            opts.query.clone(),
372            opts.flow.batching_mode.clone(),
373        )
374        .context(StartFlownodeSnafu)?;
375        let frontend_client = Arc::new(frontend_client);
376        let flownode_builder = FlownodeBuilder::new(
377            opts.clone(),
378            plugins.clone(),
379            table_metadata_manager,
380            catalog_manager.clone(),
381            flow_metadata_manager,
382            frontend_client.clone(),
383        )
384        .with_heartbeat_task(heartbeat_task);
385
386        let mut flownode = flownode_builder.build().await.context(StartFlownodeSnafu)?;
387
388        let builder =
389            FlownodeServiceBuilder::grpc_server_builder(&opts, flownode.flownode_server());
390        let builder = if let Some(configurator) =
391            plugins.get::<GrpcBuilderConfiguratorRef<GrpcConfigureContext>>()
392        {
393            let context = GrpcConfigureContext {
394                kv_backend: cached_meta_backend.clone(),
395                fe_client: frontend_client.clone(),
396                flownode_id: member_id,
397                catalog_manager: catalog_manager.clone(),
398            };
399            configurator
400                .configure(builder, context)
401                .await
402                .context(OtherSnafu)?
403        } else {
404            builder
405        };
406        let grpc_server = builder.build();
407
408        let services = FlownodeServiceBuilder::new(&opts)
409            .with_grpc_server(grpc_server)
410            .enable_http_service()
411            .build()
412            .context(StartFlownodeSnafu)?;
413        flownode.setup_services(services);
414        let flownode = flownode;
415
416        let invoker = FrontendInvoker::build_from(
417            flownode.flow_engine().streaming_engine(),
418            catalog_manager.clone(),
419            cached_meta_backend.clone(),
420            layered_cache_registry.clone(),
421            meta_client.clone(),
422            client,
423        )
424        .await
425        .context(StartFlownodeSnafu)?;
426        flownode
427            .flow_engine()
428            .streaming_engine()
429            // TODO(discord9): refactor and avoid circular reference
430            .set_frontend_invoker(invoker)
431            .await;
432
433        Ok(Instance::new(flownode, guard))
434    }
435}