Skip to main content

flow/
server.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Implementation of grpc service for flow node
16
17use std::net::SocketAddr;
18use std::sync::Arc;
19
20use api::v1::flow::DirtyWindowRequests;
21use api::v1::{RowDeleteRequests, RowInsertRequests};
22use cache::{PARTITION_INFO_CACHE_NAME, TABLE_FLOWNODE_SET_CACHE_NAME, TABLE_ROUTE_CACHE_NAME};
23use catalog::CatalogManagerRef;
24use common_base::Plugins;
25use common_error::ext::BoxedError;
26use common_meta::cache::{LayeredCacheRegistryRef, TableFlownodeSetCacheRef, TableRouteCacheRef};
27use common_meta::key::TableMetadataManagerRef;
28use common_meta::key::flow::FlowMetadataManagerRef;
29use common_meta::kv_backend::KvBackendRef;
30use common_meta::node_manager::{Flownode, NodeManagerRef};
31use common_meta::procedure_executor::ProcedureExecutorRef;
32use common_query::Output;
33use common_runtime::JoinHandle;
34use common_telemetry::tracing::info;
35use futures::TryStreamExt;
36use greptime_proto::v1::flow::{FlowRequest, FlowResponse, InsertRequests, flow_server};
37use itertools::Itertools;
38use operator::delete::Deleter;
39use operator::insert::Inserter;
40use operator::statement::StatementExecutor;
41use partition::cache::PartitionInfoCacheRef;
42use partition::manager::PartitionRuleManager;
43use query::{QueryEngine, QueryEngineFactory};
44use servers::add_service;
45use servers::grpc::builder::GrpcServerBuilder;
46use servers::grpc::{GrpcServer, GrpcServerConfig};
47use servers::http::HttpServerBuilder;
48use servers::metrics_handler::MetricsHandler;
49use servers::server::{ServerHandler, ServerHandlers};
50use session::context::QueryContextRef;
51use snafu::{OptionExt, ResultExt};
52use tokio::sync::{Mutex, broadcast, oneshot};
53use tonic::codec::CompressionEncoding;
54use tonic::{Request, Response, Status};
55
56use crate::adapter::flownode_impl::{FlowDualEngine, FlowDualEngineRef};
57use crate::adapter::{FlowStreamingEngineRef, create_worker};
58use crate::batching_mode::engine::BatchingEngine;
59use crate::error::{
60    CacheRequiredSnafu, ExternalSnafu, ListFlowsSnafu, ParseAddrSnafu, ShutdownServerSnafu,
61    StartServerSnafu, UnexpectedSnafu, to_status_with_last_err,
62};
63use crate::heartbeat::HeartbeatTask;
64use crate::metrics::{METRIC_FLOW_PROCESSING_TIME, METRIC_FLOW_ROWS};
65use crate::transform::register_function_to_query_engine;
66use crate::utils::{SizeReportSender, StateReportHandler};
67use crate::{Error, FlownodeOptions, FrontendClient, StreamingEngine};
68
69pub const FLOW_NODE_SERVER_NAME: &str = "FLOW_NODE_SERVER";
70/// wrapping flow node manager to avoid orphan rule with Arc<...>
71#[derive(Clone)]
72pub struct FlowService {
73    pub dual_engine: FlowDualEngineRef,
74}
75
76impl FlowService {
77    pub fn new(manager: FlowDualEngineRef) -> Self {
78        Self {
79            dual_engine: manager,
80        }
81    }
82}
83
84#[async_trait::async_trait]
85impl flow_server::Flow for FlowService {
86    async fn handle_create_remove(
87        &self,
88        request: Request<FlowRequest>,
89    ) -> Result<Response<FlowResponse>, Status> {
90        let _timer = METRIC_FLOW_PROCESSING_TIME
91            .with_label_values(&["ddl"])
92            .start_timer();
93
94        let request = request.into_inner();
95        self.dual_engine
96            .handle(request)
97            .await
98            .map_err(|err| {
99                common_telemetry::error!(err; "Failed to handle flow request");
100                err
101            })
102            .map(Response::new)
103            .map_err(to_status_with_last_err)
104    }
105
106    async fn handle_mirror_request(
107        &self,
108        request: Request<InsertRequests>,
109    ) -> Result<Response<FlowResponse>, Status> {
110        let _timer = METRIC_FLOW_PROCESSING_TIME
111            .with_label_values(&["insert"])
112            .start_timer();
113
114        let request = request.into_inner();
115        // TODO(discord9): fix protobuf import order shenanigans to remove this duplicated define
116        let mut row_count = 0;
117        let request = api::v1::region::InsertRequests {
118            requests: request
119                .requests
120                .into_iter()
121                .map(|insert| {
122                    insert.rows.as_ref().inspect(|x| row_count += x.rows.len());
123                    api::v1::region::InsertRequest {
124                        region_id: insert.region_id,
125                        rows: insert.rows,
126                        partition_expr_version: insert.partition_expr_version,
127                    }
128                })
129                .collect_vec(),
130        };
131
132        METRIC_FLOW_ROWS
133            .with_label_values(&["in"])
134            .inc_by(row_count as u64);
135
136        self.dual_engine
137            .handle_inserts(request)
138            .await
139            .map(Response::new)
140            .map_err(to_status_with_last_err)
141    }
142
143    async fn handle_mark_dirty_time_window(
144        &self,
145        reqs: Request<DirtyWindowRequests>,
146    ) -> Result<Response<FlowResponse>, Status> {
147        self.dual_engine
148            .handle_mark_window_dirty(reqs.into_inner())
149            .await
150            .map(Response::new)
151            .map_err(to_status_with_last_err)
152    }
153}
154
155#[derive(Clone)]
156pub struct FlownodeServer {
157    inner: Arc<FlownodeServerInner>,
158}
159
160/// FlownodeServerInner is the inner state of FlownodeServer,
161/// this struct mostly useful for construct/start and stop the
162/// flow node server
163struct FlownodeServerInner {
164    /// worker shutdown signal, not to be confused with server_shutdown_tx
165    worker_shutdown_tx: Mutex<broadcast::Sender<()>>,
166    /// server shutdown signal for shutdown grpc server
167    server_shutdown_tx: Mutex<broadcast::Sender<()>>,
168    /// streaming task handler
169    streaming_task_handler: Mutex<Option<JoinHandle<()>>>,
170    /// state report task handler
171    state_report_task_handler: Mutex<Option<JoinHandle<()>>>,
172    flow_service: FlowService,
173}
174
175impl FlownodeServer {
176    pub fn new(flow_service: FlowService) -> Self {
177        let (tx, _rx) = broadcast::channel::<()>(1);
178        let (server_tx, _server_rx) = broadcast::channel::<()>(1);
179        Self {
180            inner: Arc::new(FlownodeServerInner {
181                flow_service,
182                worker_shutdown_tx: Mutex::new(tx),
183                server_shutdown_tx: Mutex::new(server_tx),
184                streaming_task_handler: Mutex::new(None),
185                state_report_task_handler: Mutex::new(None),
186            }),
187        }
188    }
189
190    /// Start the background task for streaming computation.
191    ///
192    /// Should be called only after heartbeat is establish, hence can get cluster info
193    async fn start_workers(&self) -> Result<(), Error> {
194        let manager_ref = self.inner.flow_service.dual_engine.clone();
195        let mut state_report_task_handler = self.inner.state_report_task_handler.lock().await;
196        let started_state_report_task = state_report_task_handler.is_none();
197        if state_report_task_handler.is_none() {
198            *state_report_task_handler = manager_ref.clone().start_state_report_task().await;
199        }
200        drop(state_report_task_handler);
201        let handle = manager_ref
202            .streaming_engine()
203            .run_background(Some(self.inner.worker_shutdown_tx.lock().await.subscribe()));
204        self.inner
205            .streaming_task_handler
206            .lock()
207            .await
208            .replace(handle);
209
210        if let Err(err) = self
211            .inner
212            .flow_service
213            .dual_engine
214            .start_flow_consistent_check_task()
215            .await
216        {
217            self.rollback_started_workers(started_state_report_task)
218                .await;
219            return Err(err);
220        }
221
222        Ok(())
223    }
224
225    async fn rollback_started_workers(&self, abort_state_report_task: bool) {
226        let tx = self.inner.worker_shutdown_tx.lock().await;
227        if tx.send(()).is_err() {
228            info!("Receiver dropped, the flow node server has already shutdown");
229        }
230        drop(tx);
231
232        if let Some(handle) = self.inner.streaming_task_handler.lock().await.take() {
233            handle.abort();
234        }
235
236        if abort_state_report_task
237            && let Some(handle) = self.inner.state_report_task_handler.lock().await.take()
238        {
239            handle.abort();
240        }
241    }
242
243    /// Stop the background task for streaming computation.
244    async fn stop_workers(&self) -> Result<(), Error> {
245        let tx = self.inner.worker_shutdown_tx.lock().await;
246        if tx.send(()).is_err() {
247            info!("Receiver dropped, the flow node server has already shutdown");
248        }
249        // Keep state_report_task_handler alive across worker restarts.
250        // Dropping it here would permanently lose the report channel receiver.
251        self.inner
252            .flow_service
253            .dual_engine
254            .stop_flow_consistent_check_task()
255            .await?;
256        Ok(())
257    }
258}
259
260impl FlownodeServer {
261    pub fn create_flow_service(&self) -> flow_server::FlowServer<impl flow_server::Flow> {
262        flow_server::FlowServer::new(self.inner.flow_service.clone())
263            .accept_compressed(CompressionEncoding::Gzip)
264            .send_compressed(CompressionEncoding::Gzip)
265            .accept_compressed(CompressionEncoding::Zstd)
266            .send_compressed(CompressionEncoding::Zstd)
267    }
268}
269
270/// The flownode server instance.
271pub struct FlownodeInstance {
272    flownode_server: FlownodeServer,
273    services: ServerHandlers,
274    heartbeat_task: Option<HeartbeatTask>,
275}
276
277impl FlownodeInstance {
278    pub async fn start(&mut self) -> Result<(), crate::Error> {
279        if let Some(task) = &self.heartbeat_task {
280            task.start().await?;
281        }
282
283        self.flownode_server.start_workers().await?;
284
285        self.services.start_all().await.context(StartServerSnafu)?;
286
287        Ok(())
288    }
289    pub async fn shutdown(&mut self) -> Result<(), Error> {
290        self.services
291            .shutdown_all()
292            .await
293            .context(ShutdownServerSnafu)?;
294
295        self.flownode_server.stop_workers().await?;
296
297        if let Some(task) = &self.heartbeat_task {
298            task.shutdown();
299        }
300
301        Ok(())
302    }
303
304    pub fn flownode_server(&self) -> &FlownodeServer {
305        &self.flownode_server
306    }
307
308    pub fn flow_engine(&self) -> FlowDualEngineRef {
309        self.flownode_server.inner.flow_service.dual_engine.clone()
310    }
311
312    pub fn setup_services(&mut self, services: ServerHandlers) {
313        self.services = services;
314    }
315}
316
317/// [`FlownodeInstance`] Builder
318pub struct FlownodeBuilder {
319    opts: FlownodeOptions,
320    plugins: Plugins,
321    table_meta: TableMetadataManagerRef,
322    catalog_manager: CatalogManagerRef,
323    flow_metadata_manager: FlowMetadataManagerRef,
324    heartbeat_task: Option<HeartbeatTask>,
325    /// receive a oneshot sender to send state size report
326    state_report_handler: Option<StateReportHandler>,
327    frontend_client: Arc<FrontendClient>,
328}
329
330impl FlownodeBuilder {
331    /// init flownode builder
332    pub fn new(
333        opts: FlownodeOptions,
334        plugins: Plugins,
335        table_meta: TableMetadataManagerRef,
336        catalog_manager: CatalogManagerRef,
337        flow_metadata_manager: FlowMetadataManagerRef,
338        frontend_client: Arc<FrontendClient>,
339    ) -> Self {
340        Self {
341            opts,
342            plugins,
343            table_meta,
344            catalog_manager,
345            flow_metadata_manager,
346            heartbeat_task: None,
347            state_report_handler: None,
348            frontend_client,
349        }
350    }
351
352    pub fn with_heartbeat_task(self, heartbeat_task: HeartbeatTask) -> Self {
353        let (sender, receiver) = SizeReportSender::new();
354        Self {
355            heartbeat_task: Some(heartbeat_task.with_query_stat_size(sender)),
356            state_report_handler: Some(receiver),
357            ..self
358        }
359    }
360
361    pub fn opts(&self) -> &FlownodeOptions {
362        &self.opts
363    }
364
365    pub fn table_meta(&self) -> &TableMetadataManagerRef {
366        &self.table_meta
367    }
368
369    pub fn catalog_manager(&self) -> &CatalogManagerRef {
370        &self.catalog_manager
371    }
372
373    pub fn flow_metadata_manager(&self) -> &FlowMetadataManagerRef {
374        &self.flow_metadata_manager
375    }
376
377    pub fn frontend_client(&self) -> &Arc<FrontendClient> {
378        &self.frontend_client
379    }
380
381    pub fn set_plugins(&mut self, plugins: Plugins) {
382        self.plugins = plugins;
383    }
384
385    pub async fn build(mut self) -> Result<FlownodeInstance, Error> {
386        // TODO(discord9): does this query engine need those?
387        let query_engine_factory = QueryEngineFactory::new_with_plugins(
388            // query engine in flownode is only used for translate plan with resolved table source.
389            self.catalog_manager.clone(),
390            None,
391            None,
392            None,
393            None,
394            None,
395            false,
396            Default::default(),
397            self.opts.query.clone(),
398        );
399        let manager = Arc::new(
400            self.build_manager(query_engine_factory.query_engine())
401                .await?,
402        );
403        let batching = Arc::new(BatchingEngine::new(
404            self.frontend_client.clone(),
405            query_engine_factory.query_engine(),
406            self.flow_metadata_manager.clone(),
407            self.table_meta.clone(),
408            self.catalog_manager.clone(),
409            self.opts.flow.batching_mode.clone(),
410        ));
411        let dual = Arc::new(FlowDualEngine::new(
412            manager.clone(),
413            batching,
414            self.flow_metadata_manager.clone(),
415            self.catalog_manager.clone(),
416            self.plugins.clone(),
417        ));
418        if let Some(handler) = self.state_report_handler.take() {
419            dual.set_state_report_handler(handler).await;
420        }
421
422        let server = FlownodeServer::new(FlowService::new(dual));
423
424        let heartbeat_task = self.heartbeat_task;
425
426        let instance = FlownodeInstance {
427            flownode_server: server,
428            services: ServerHandlers::default(),
429            heartbeat_task,
430        };
431        Ok(instance)
432    }
433
434    /// build [`FlowWorkerManager`], note this doesn't take ownership of `self`,
435    /// nor does it actually start running the worker.
436    async fn build_manager(
437        &mut self,
438        query_engine: Arc<dyn QueryEngine>,
439    ) -> Result<StreamingEngine, Error> {
440        let table_meta = self.table_meta.clone();
441
442        register_function_to_query_engine(&query_engine);
443
444        let num_workers = self.opts.flow.num_workers;
445
446        let node_id = self.opts.node_id.map(|id| id as u32);
447
448        let mut man = StreamingEngine::new(node_id, query_engine, table_meta);
449        for worker_id in 0..num_workers {
450            let (tx, rx) = oneshot::channel();
451
452            let _handle = std::thread::Builder::new()
453                .name(format!("flow-worker-{}", worker_id))
454                .spawn(move || {
455                    let (handle, mut worker) = create_worker();
456                    let _ = tx.send(handle);
457                    info!("Flow Worker started in new thread");
458                    worker.run();
459                });
460            let worker_handle = rx.await.map_err(|e| {
461                UnexpectedSnafu {
462                    reason: format!("Failed to receive worker handle: {}", e),
463                }
464                .build()
465            })?;
466            man.add_worker_handle(worker_handle);
467        }
468        info!("Flow Node Manager started");
469        Ok(man)
470    }
471}
472
473/// Useful in distributed mode
474pub struct FlownodeServiceBuilder<'a> {
475    opts: &'a FlownodeOptions,
476    grpc_server: Option<GrpcServer>,
477    enable_http_service: bool,
478}
479
480impl<'a> FlownodeServiceBuilder<'a> {
481    pub fn new(opts: &'a FlownodeOptions) -> Self {
482        Self {
483            opts,
484            grpc_server: None,
485            enable_http_service: false,
486        }
487    }
488
489    pub fn enable_http_service(self) -> Self {
490        Self {
491            enable_http_service: true,
492            ..self
493        }
494    }
495
496    pub fn with_grpc_server(self, grpc_server: GrpcServer) -> Self {
497        Self {
498            grpc_server: Some(grpc_server),
499            ..self
500        }
501    }
502
503    pub fn with_default_grpc_server(mut self, flownode_server: &FlownodeServer) -> Self {
504        let grpc_server = Self::grpc_server_builder(self.opts, flownode_server).build();
505        self.grpc_server = Some(grpc_server);
506        self
507    }
508
509    pub fn build(mut self) -> Result<ServerHandlers, Error> {
510        let handlers = ServerHandlers::default();
511        if let Some(grpc_server) = self.grpc_server.take() {
512            let addr: SocketAddr = self.opts.grpc.bind_addr.parse().context(ParseAddrSnafu {
513                addr: &self.opts.grpc.bind_addr,
514            })?;
515            let handler: ServerHandler = (Box::new(grpc_server), addr);
516            handlers.insert(handler);
517        }
518
519        if self.enable_http_service {
520            let http_server = HttpServerBuilder::new(self.opts.http.clone())
521                .with_metrics_handler(MetricsHandler)
522                .build();
523            let addr: SocketAddr = self.opts.http.addr.parse().context(ParseAddrSnafu {
524                addr: &self.opts.http.addr,
525            })?;
526            let handler: ServerHandler = (Box::new(http_server), addr);
527            handlers.insert(handler);
528        }
529        Ok(handlers)
530    }
531
532    pub fn grpc_server_builder(
533        opts: &FlownodeOptions,
534        flownode_server: &FlownodeServer,
535    ) -> GrpcServerBuilder {
536        let config = GrpcServerConfig {
537            max_recv_message_size: opts.grpc.max_recv_message_size.as_bytes() as usize,
538            max_send_message_size: opts.grpc.max_send_message_size.as_bytes() as usize,
539            tls: opts.grpc.tls.clone(),
540            max_connection_age: opts.grpc.max_connection_age,
541        };
542        let service = flownode_server.create_flow_service();
543        let runtime = common_runtime::global_runtime();
544        let mut builder = GrpcServerBuilder::new(config, runtime);
545        add_service!(builder, service);
546        builder
547    }
548}
549
550/// Basically a tiny frontend that communicates with datanode, different from [`FrontendClient`] which
551/// connect to a real frontend instead, this is used for flow's streaming engine. And is for simple query.
552///
553/// For heavy query use [`FrontendClient`] which offload computation to frontend, lifting the load from flownode
554#[derive(Clone)]
555pub struct FrontendInvoker {
556    inserter: Arc<Inserter>,
557    deleter: Arc<Deleter>,
558    statement_executor: Arc<StatementExecutor>,
559}
560
561impl FrontendInvoker {
562    pub fn new(
563        inserter: Arc<Inserter>,
564        deleter: Arc<Deleter>,
565        statement_executor: Arc<StatementExecutor>,
566    ) -> Self {
567        Self {
568            inserter,
569            deleter,
570            statement_executor,
571        }
572    }
573
574    pub async fn build_from(
575        flow_streaming_engine: FlowStreamingEngineRef,
576        catalog_manager: CatalogManagerRef,
577        kv_backend: KvBackendRef,
578        layered_cache_registry: LayeredCacheRegistryRef,
579        procedure_executor: ProcedureExecutorRef,
580        node_manager: NodeManagerRef,
581        origin_frontend_addr: String,
582    ) -> Result<FrontendInvoker, Error> {
583        let table_route_cache: TableRouteCacheRef =
584            layered_cache_registry.get().context(CacheRequiredSnafu {
585                name: TABLE_ROUTE_CACHE_NAME,
586            })?;
587        let partition_info_cache: PartitionInfoCacheRef =
588            layered_cache_registry.get().context(CacheRequiredSnafu {
589                name: PARTITION_INFO_CACHE_NAME,
590            })?;
591
592        let partition_manager = Arc::new(PartitionRuleManager::new(
593            kv_backend.clone(),
594            table_route_cache.clone(),
595            partition_info_cache.clone(),
596        ));
597
598        let table_flownode_cache: TableFlownodeSetCacheRef =
599            layered_cache_registry.get().context(CacheRequiredSnafu {
600                name: TABLE_FLOWNODE_SET_CACHE_NAME,
601            })?;
602
603        // TODO(auto_create_table): flow sink tables are created through a controlled
604        // `CREATE FLOW` path, not client writes, so they are intentionally exempt from
605        // the frontend's global auto-create switch. Revisit if flow should honor it.
606        let inserter = Arc::new(Inserter::new(
607            catalog_manager.clone(),
608            partition_manager.clone(),
609            node_manager.clone(),
610            table_flownode_cache,
611            true,
612        ));
613
614        let deleter = Arc::new(Deleter::new(
615            catalog_manager.clone(),
616            partition_manager.clone(),
617            node_manager.clone(),
618        ));
619
620        let query_engine = flow_streaming_engine.query_engine.clone();
621
622        let statement_executor = Arc::new(StatementExecutor::new(
623            catalog_manager.clone(),
624            query_engine.clone(),
625            procedure_executor.clone(),
626            kv_backend.clone(),
627            layered_cache_registry.clone(),
628            inserter.clone(),
629            partition_manager,
630            None,
631            origin_frontend_addr,
632        ));
633
634        let invoker = FrontendInvoker::new(inserter, deleter, statement_executor);
635        Ok(invoker)
636    }
637}
638
639impl FrontendInvoker {
640    pub async fn row_inserts(
641        &self,
642        requests: RowInsertRequests,
643        ctx: QueryContextRef,
644    ) -> common_frontend::error::Result<Output> {
645        let _timer = METRIC_FLOW_PROCESSING_TIME
646            .with_label_values(&["output_insert"])
647            .start_timer();
648
649        self.inserter
650            .handle_row_inserts(requests, ctx, &self.statement_executor, false, false)
651            .await
652            .map_err(BoxedError::new)
653            .context(common_frontend::error::ExternalSnafu)
654    }
655
656    pub async fn row_deletes(
657        &self,
658        requests: RowDeleteRequests,
659        ctx: QueryContextRef,
660    ) -> common_frontend::error::Result<Output> {
661        let _timer = METRIC_FLOW_PROCESSING_TIME
662            .with_label_values(&["output_delete"])
663            .start_timer();
664
665        self.deleter
666            .handle_row_deletes(requests, ctx)
667            .await
668            .map_err(BoxedError::new)
669            .context(common_frontend::error::ExternalSnafu)
670    }
671
672    pub fn statement_executor(&self) -> Arc<StatementExecutor> {
673        self.statement_executor.clone()
674    }
675}
676
677/// get all flow ids in this flownode
678pub(crate) async fn get_all_flow_ids(
679    flow_metadata_manager: &FlowMetadataManagerRef,
680    catalog_manager: &CatalogManagerRef,
681    nodeid: Option<u64>,
682) -> Result<Vec<u32>, Error> {
683    let ret = if let Some(nodeid) = nodeid {
684        let flow_ids_one_node = flow_metadata_manager
685            .flownode_flow_manager()
686            .flows(nodeid)
687            .try_collect::<Vec<_>>()
688            .await
689            .context(ListFlowsSnafu { id: Some(nodeid) })?;
690        flow_ids_one_node.into_iter().map(|(id, _)| id).collect()
691    } else {
692        let all_catalogs = catalog_manager
693            .catalog_names()
694            .await
695            .map_err(BoxedError::new)
696            .context(ExternalSnafu)?;
697        let mut all_flow_ids = vec![];
698        for catalog in all_catalogs {
699            let flows = flow_metadata_manager
700                .flow_name_manager()
701                .flow_names(&catalog)
702                .await
703                .try_collect::<Vec<_>>()
704                .await
705                .map_err(BoxedError::new)
706                .context(ExternalSnafu)?;
707
708            all_flow_ids.extend(flows.into_iter().map(|(_, id)| id.flow_id()));
709        }
710        all_flow_ids
711    };
712
713    Ok(ret)
714}
715
716#[cfg(test)]
717mod tests {
718    use std::sync::Arc;
719    use std::time::Duration;
720
721    use api::v1::meta::Role;
722    use catalog::memory::new_memory_catalog_manager;
723    use common_base::Plugins;
724    use common_meta::key::TableMetadataManager;
725    use common_meta::key::flow::FlowMetadataManager;
726    use common_meta::kv_backend::memory::MemoryKvBackend;
727    use meta_client::client::MetaClient;
728    use query::options::QueryOptions;
729
730    use super::*;
731    use crate::adapter::flownode_impl::FlowDualEngine;
732    use crate::batching_mode::BatchingModeOptions;
733    use crate::batching_mode::engine::BatchingEngine;
734    use crate::utils::SizeReportSender;
735
736    async fn new_test_flownode_server() -> (FlownodeServer, SizeReportSender) {
737        let (frontend_client, _handler) =
738            FrontendClient::from_empty_grpc_handler(QueryOptions::default());
739
740        new_test_flownode_server_with_frontend_client(
741            frontend_client,
742            BatchingModeOptions::default(),
743            None,
744        )
745        .await
746    }
747
748    async fn new_test_flownode_server_with_frontend_client(
749        frontend_client: FrontendClient,
750        batching_opts: BatchingModeOptions,
751        node_id: Option<u32>,
752    ) -> (FlownodeServer, SizeReportSender) {
753        let kv_backend = Arc::new(MemoryKvBackend::new());
754        let table_meta = Arc::new(TableMetadataManager::new(kv_backend.clone()));
755        table_meta.init().await.unwrap();
756        let flow_meta = Arc::new(FlowMetadataManager::new(kv_backend.clone()));
757        let catalog_manager = new_memory_catalog_manager().unwrap();
758        let query_engine = crate::test_utils::create_test_query_engine();
759
760        let streaming_engine = Arc::new(StreamingEngine::new(
761            node_id,
762            query_engine.clone(),
763            table_meta.clone(),
764        ));
765        let batching_engine = Arc::new(BatchingEngine::new(
766            Arc::new(frontend_client),
767            query_engine,
768            flow_meta.clone(),
769            table_meta,
770            catalog_manager.clone(),
771            batching_opts,
772        ));
773        let dual_engine = Arc::new(FlowDualEngine::new(
774            streaming_engine,
775            batching_engine,
776            flow_meta,
777            catalog_manager,
778            Plugins::new(),
779        ));
780
781        let (report_sender, report_handler) = SizeReportSender::new();
782        dual_engine.set_state_report_handler(report_handler).await;
783
784        let server = FlownodeServer::new(FlowService::new(dual_engine));
785        (server, report_sender)
786    }
787
788    #[tokio::test]
789    async fn test_state_report_handler_survives_worker_restart() {
790        let (server, report_sender) = new_test_flownode_server().await;
791
792        server.start_workers().await.unwrap();
793        report_sender.query(Duration::from_secs(3)).await.unwrap();
794
795        server.stop_workers().await.unwrap();
796        report_sender.query(Duration::from_secs(3)).await.unwrap();
797
798        server.start_workers().await.unwrap();
799        report_sender.query(Duration::from_secs(3)).await.unwrap();
800
801        server.stop_workers().await.unwrap();
802    }
803
804    #[tokio::test]
805    async fn test_start_workers_rolls_back_on_check_task_start_failure() {
806        let batching_opts = BatchingModeOptions {
807            experimental_frontend_scan_timeout: Duration::from_millis(1),
808            ..Default::default()
809        };
810        let frontend_client = FrontendClient::from_meta_client(
811            Arc::new(MetaClient::new(0, Role::Frontend)),
812            QueryOptions::default(),
813            batching_opts.clone(),
814        )
815        .unwrap();
816        let (server, _report_sender) =
817            new_test_flownode_server_with_frontend_client(frontend_client, batching_opts, Some(1))
818                .await;
819
820        server.start_workers().await.unwrap_err();
821
822        assert!(server.inner.streaming_task_handler.lock().await.is_none());
823        assert!(
824            server
825                .inner
826                .state_report_task_handler
827                .lock()
828                .await
829                .is_none()
830        );
831    }
832}