Skip to main content

flow/batching_mode/
frontend_client.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Frontend client to run flow as batching task which is time-window-aware normal query triggered every tick set by user
16
17use std::collections::HashMap;
18use std::sync::{Arc, Mutex, Weak};
19
20use api::v1::greptime_request::Request;
21use api::v1::query_request::Query;
22use api::v1::{CreateTableExpr, QueryRequest};
23use client::{Client, Database};
24use common_error::ext::BoxedError;
25use common_grpc::channel_manager::{ChannelConfig, ChannelManager, load_client_tls_config};
26use common_meta::peer::{Peer, PeerDiscovery};
27use common_query::Output;
28use common_telemetry::warn;
29use meta_client::client::MetaClient;
30use query::datafusion::QUERY_PARALLELISM_HINT;
31use query::options::QueryOptions;
32use rand::rng;
33use rand::seq::SliceRandom;
34use servers::query_handler::grpc::GrpcQueryHandler;
35use session::context::{QueryContextBuilder, QueryContextRef};
36use session::hints::READ_PREFERENCE_HINT;
37use snafu::{OptionExt, ResultExt};
38use tokio::sync::SetOnce;
39
40use crate::batching_mode::BatchingModeOptions;
41use crate::error::{
42    CreateSinkTableSnafu, ExternalSnafu, InvalidClientConfigSnafu, InvalidRequestSnafu,
43    NoAvailableFrontendSnafu, UnexpectedSnafu,
44};
45use crate::{Error, FlowAuthHeader};
46
47/// Adapter trait for [`GrpcQueryHandler`] that boxes the underlying error into [`BoxedError`].
48///
49/// This is mainly used by flownode to invoke a frontend instance in standalone mode.
50#[async_trait::async_trait]
51pub trait GrpcQueryHandlerWithBoxedError: Send + Sync + 'static {
52    async fn do_query(
53        &self,
54        query: Request,
55        ctx: QueryContextRef,
56    ) -> std::result::Result<Output, BoxedError>;
57}
58
59/// auto impl
60#[async_trait::async_trait]
61impl<T: GrpcQueryHandler + Send + Sync + 'static> GrpcQueryHandlerWithBoxedError for T {
62    async fn do_query(
63        &self,
64        query: Request,
65        ctx: QueryContextRef,
66    ) -> std::result::Result<Output, BoxedError> {
67        self.do_query(query, ctx).await.map_err(BoxedError::new)
68    }
69}
70
71#[derive(Debug, Clone)]
72pub struct HandlerMutable {
73    handler: Arc<Mutex<Option<Weak<dyn GrpcQueryHandlerWithBoxedError>>>>,
74    is_initialized: Arc<SetOnce<()>>,
75}
76
77impl HandlerMutable {
78    pub async fn set_handler(&self, handler: Weak<dyn GrpcQueryHandlerWithBoxedError>) {
79        *self.handler.lock().unwrap() = Some(handler);
80        // Ignore the error, as we allow the handler to be set multiple times.
81        let _ = self.is_initialized.set(());
82    }
83}
84
85/// A simple frontend client able to execute sql using grpc protocol
86///
87/// This is for computation-heavy query which need to offload computation to frontend, lifting the load from flownode
88#[derive(Debug, Clone)]
89pub enum FrontendClient {
90    Distributed {
91        meta_client: Arc<MetaClient>,
92        chnl_mgr: ChannelManager,
93        auth: Option<FlowAuthHeader>,
94        query: QueryOptions,
95        batch_opts: BatchingModeOptions,
96    },
97    Standalone {
98        /// for the sake of simplicity still use grpc even in standalone mode
99        /// notice the client here should all be lazy, so that can wait after frontend is booted then make conn
100        database_client: HandlerMutable,
101        query: QueryOptions,
102    },
103}
104
105impl FrontendClient {
106    /// Create a new empty frontend client, with a `HandlerMutable` to set the grpc handler later
107    pub fn from_empty_grpc_handler(query: QueryOptions) -> (Self, HandlerMutable) {
108        let is_initialized = Arc::new(SetOnce::new());
109        let handler = HandlerMutable {
110            handler: Arc::new(Mutex::new(None)),
111            is_initialized,
112        };
113        (
114            Self::Standalone {
115                database_client: handler.clone(),
116                query,
117            },
118            handler,
119        )
120    }
121
122    /// Waits until the frontend client is initialized.
123    pub async fn wait_initialized(&self) {
124        if let FrontendClient::Standalone {
125            database_client, ..
126        } = self
127        {
128            database_client.is_initialized.wait().await;
129        }
130    }
131
132    pub fn from_meta_client(
133        meta_client: Arc<MetaClient>,
134        auth: Option<FlowAuthHeader>,
135        query: QueryOptions,
136        batch_opts: BatchingModeOptions,
137    ) -> Result<Self, Error> {
138        common_telemetry::info!("Frontend client build with auth={:?}", auth);
139        Ok(Self::Distributed {
140            meta_client,
141            chnl_mgr: {
142                let cfg = ChannelConfig::new()
143                    .connect_timeout(batch_opts.grpc_conn_timeout)
144                    .timeout(Some(batch_opts.query_timeout));
145
146                let tls_config = load_client_tls_config(batch_opts.frontend_tls.clone())
147                    .context(InvalidClientConfigSnafu)?;
148                ChannelManager::with_config(cfg, tls_config)
149            },
150            auth,
151            query,
152            batch_opts,
153        })
154    }
155
156    pub fn from_grpc_handler(
157        grpc_handler: Weak<dyn GrpcQueryHandlerWithBoxedError>,
158        query: QueryOptions,
159    ) -> Self {
160        let is_initialized = Arc::new(SetOnce::new_with(Some(())));
161        let handler = HandlerMutable {
162            handler: Arc::new(Mutex::new(Some(grpc_handler))),
163            is_initialized: is_initialized.clone(),
164        };
165
166        Self::Standalone {
167            database_client: handler,
168            query,
169        }
170    }
171}
172
173#[derive(Debug, Clone)]
174pub struct DatabaseWithPeer {
175    pub database: Database,
176    pub peer: Peer,
177}
178
179impl DatabaseWithPeer {
180    fn new(database: Database, peer: Peer) -> Self {
181        Self { database, peer }
182    }
183
184    /// Try sending a "SELECT 1" to the database
185    async fn try_select_one(&self) -> Result<(), Error> {
186        // notice here use `sql` for `SELECT 1` return 1 row
187        let _ = self
188            .database
189            .sql("SELECT 1")
190            .await
191            .with_context(|_| InvalidRequestSnafu {
192                context: format!("Failed to handle `SELECT 1` request at {:?}", self.peer),
193            })?;
194        Ok(())
195    }
196}
197
198impl FrontendClient {
199    // TODO: support more fine-grained load balancing strategies for frontend
200    // selection, such as AZ (availability zone) awareness, to prefer frontends
201    // in the same zone as the flownode and reduce cross-AZ latency.
202    /// scan for available frontend from metadata
203    pub(crate) async fn scan_for_frontend(&self) -> Result<Vec<Peer>, Error> {
204        let Self::Distributed { meta_client, .. } = self else {
205            return Ok(vec![]);
206        };
207
208        meta_client
209            .active_frontends()
210            .await
211            .map(|nodes| nodes.into_iter().map(|node| node.peer).collect())
212            .map_err(BoxedError::new)
213            .context(ExternalSnafu)
214    }
215
216    /// Get a frontend discovered by metasrv and verified with a query probe.
217    async fn get_random_active_frontend(
218        &self,
219        catalog: &str,
220        schema: &str,
221    ) -> Result<DatabaseWithPeer, Error> {
222        let Self::Distributed {
223            meta_client: _,
224            chnl_mgr,
225            auth,
226            query: _,
227            batch_opts,
228        } = self
229        else {
230            return UnexpectedSnafu {
231                reason: "Expect distributed mode",
232            }
233            .fail();
234        };
235
236        let mut interval = tokio::time::interval(batch_opts.grpc_conn_timeout);
237        interval.tick().await;
238        for retry in 0..batch_opts.experimental_grpc_max_retries {
239            let mut frontends = self.scan_for_frontend().await?;
240            // shuffle the frontends to avoid always pick the same one
241            frontends.shuffle(&mut rng());
242
243            for peer in frontends {
244                let addr = peer.addr.clone();
245                let client = Client::with_manager_and_urls(chnl_mgr.clone(), vec![addr.clone()]);
246                let database = {
247                    let mut db = Database::new(catalog, schema, client);
248                    if let Some(auth) = auth {
249                        db.set_auth(auth.auth().clone());
250                    }
251                    db
252                };
253                let db = DatabaseWithPeer::new(database, peer);
254                match db.try_select_one().await {
255                    Ok(_) => return Ok(db),
256                    Err(e) => {
257                        warn!(
258                            "Failed to connect to frontend {} on retry={}: \n{e:?}",
259                            addr, retry
260                        );
261                    }
262                }
263            }
264            // no available frontend
265            // sleep and retry
266            interval.tick().await;
267        }
268
269        NoAvailableFrontendSnafu {
270            timeout: batch_opts.grpc_conn_timeout,
271            context: "No available frontend found that is able to process query",
272        }
273        .fail()
274    }
275
276    pub async fn create(
277        &self,
278        create: CreateTableExpr,
279        catalog: &str,
280        schema: &str,
281    ) -> Result<u32, Error> {
282        self.handle(
283            Request::Ddl(api::v1::DdlRequest {
284                expr: Some(api::v1::ddl_request::Expr::CreateTable(create.clone())),
285            }),
286            catalog,
287            schema,
288            &mut None,
289        )
290        .await
291        .map_err(BoxedError::new)
292        .with_context(|_| CreateSinkTableSnafu {
293            create: create.clone(),
294        })
295    }
296
297    /// Execute a SQL statement on the frontend.
298    pub async fn sql(&self, catalog: &str, schema: &str, sql: &str) -> Result<Output, Error> {
299        match self {
300            FrontendClient::Distributed { .. } => {
301                let db = self.get_random_active_frontend(catalog, schema).await?;
302                db.database
303                    .sql(sql)
304                    .await
305                    .map_err(BoxedError::new)
306                    .context(ExternalSnafu)
307            }
308            FrontendClient::Standalone {
309                database_client, ..
310            } => {
311                let ctx = QueryContextBuilder::default()
312                    .current_catalog(catalog.to_string())
313                    .current_schema(schema.to_string())
314                    .build();
315                let ctx = Arc::new(ctx);
316                {
317                    let database_client = {
318                        database_client
319                            .handler
320                            .lock()
321                            .unwrap()
322                            .as_ref()
323                            .context(UnexpectedSnafu {
324                                reason: "Standalone's frontend instance is not set",
325                            })?
326                            .upgrade()
327                            .context(UnexpectedSnafu {
328                                reason: "Failed to upgrade database client",
329                            })?
330                    };
331                    let req = Request::Query(QueryRequest {
332                        query: Some(Query::Sql(sql.to_string())),
333                    });
334                    database_client
335                        .do_query(req, ctx)
336                        .await
337                        .map_err(BoxedError::new)
338                        .context(ExternalSnafu)
339                }
340            }
341        }
342    }
343
344    /// Handle a request to frontend
345    pub(crate) async fn handle(
346        &self,
347        req: api::v1::greptime_request::Request,
348        catalog: &str,
349        schema: &str,
350        peer_desc: &mut Option<PeerDesc>,
351    ) -> Result<u32, Error> {
352        match self {
353            FrontendClient::Distributed {
354                query, batch_opts, ..
355            } => {
356                let db = self.get_random_active_frontend(catalog, schema).await?;
357
358                *peer_desc = Some(PeerDesc::Dist {
359                    peer: db.peer.clone(),
360                });
361
362                db.database
363                    .handle_with_retry(
364                        req.clone(),
365                        batch_opts.experimental_grpc_max_retries,
366                        &[
367                            (QUERY_PARALLELISM_HINT, &query.parallelism.to_string()),
368                            (READ_PREFERENCE_HINT, batch_opts.read_preference.as_ref()),
369                        ],
370                    )
371                    .await
372                    .with_context(|_| InvalidRequestSnafu {
373                        context: format!("Failed to handle request at {:?}: {:?}", db.peer, req),
374                    })
375            }
376            FrontendClient::Standalone {
377                database_client,
378                query,
379            } => {
380                let ctx = QueryContextBuilder::default()
381                    .current_catalog(catalog.to_string())
382                    .current_schema(schema.to_string())
383                    .extensions(HashMap::from([(
384                        QUERY_PARALLELISM_HINT.to_string(),
385                        query.parallelism.to_string(),
386                    )]))
387                    .build();
388                let ctx = Arc::new(ctx);
389                {
390                    let database_client = {
391                        database_client
392                            .handler
393                            .lock()
394                            .unwrap()
395                            .as_ref()
396                            .context(UnexpectedSnafu {
397                                reason: "Standalone's frontend instance is not set",
398                            })?
399                            .upgrade()
400                            .context(UnexpectedSnafu {
401                                reason: "Failed to upgrade database client",
402                            })?
403                    };
404                    let resp: common_query::Output = database_client
405                        .do_query(req, ctx)
406                        .await
407                        .map_err(BoxedError::new)
408                        .context(ExternalSnafu)?;
409                    match resp.data {
410                        common_query::OutputData::AffectedRows(rows) => {
411                            Ok(rows.try_into().map_err(|_| {
412                                UnexpectedSnafu {
413                                    reason: format!("Failed to convert rows to u32: {}", rows),
414                                }
415                                .build()
416                            })?)
417                        }
418                        _ => UnexpectedSnafu {
419                            reason: "Unexpected output data",
420                        }
421                        .fail(),
422                    }
423                }
424            }
425        }
426    }
427}
428
429/// Describe a peer of frontend
430#[derive(Debug, Default)]
431pub(crate) enum PeerDesc {
432    /// Distributed mode's frontend peer address
433    Dist {
434        /// frontend peer address
435        peer: Peer,
436    },
437    /// Standalone mode
438    #[default]
439    Standalone,
440}
441
442impl std::fmt::Display for PeerDesc {
443    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
444        match self {
445            PeerDesc::Dist { peer } => write!(f, "{}", peer.addr),
446            PeerDesc::Standalone => write!(f, "standalone"),
447        }
448    }
449}
450
451#[cfg(test)]
452mod tests {
453    use std::time::Duration;
454
455    use common_query::Output;
456    use tokio::time::timeout;
457
458    use super::*;
459
460    #[derive(Debug)]
461    struct NoopHandler;
462
463    #[async_trait::async_trait]
464    impl GrpcQueryHandlerWithBoxedError for NoopHandler {
465        async fn do_query(
466            &self,
467            _query: Request,
468            _ctx: QueryContextRef,
469        ) -> std::result::Result<Output, BoxedError> {
470            Ok(Output::new_with_affected_rows(0))
471        }
472    }
473
474    #[tokio::test]
475    async fn wait_initialized() {
476        let (client, handler_mut) =
477            FrontendClient::from_empty_grpc_handler(QueryOptions::default());
478
479        assert!(
480            timeout(Duration::from_millis(50), client.wait_initialized())
481                .await
482                .is_err()
483        );
484
485        let handler: Arc<dyn GrpcQueryHandlerWithBoxedError> = Arc::new(NoopHandler);
486        handler_mut.set_handler(Arc::downgrade(&handler)).await;
487
488        timeout(Duration::from_secs(1), client.wait_initialized())
489            .await
490            .expect("wait_initialized should complete after handler is set");
491
492        timeout(Duration::from_millis(10), client.wait_initialized())
493            .await
494            .expect("wait_initialized should be a no-op once initialized");
495
496        let handler: Arc<dyn GrpcQueryHandlerWithBoxedError> = Arc::new(NoopHandler);
497        let client =
498            FrontendClient::from_grpc_handler(Arc::downgrade(&handler), QueryOptions::default());
499        assert!(
500            timeout(Duration::from_millis(10), client.wait_initialized())
501                .await
502                .is_ok()
503        );
504
505        let meta_client = Arc::new(MetaClient::new(0, api::v1::meta::Role::Frontend));
506        let client = FrontendClient::from_meta_client(
507            meta_client,
508            None,
509            QueryOptions::default(),
510            BatchingModeOptions::default(),
511        )
512        .unwrap();
513        assert!(
514            timeout(Duration::from_millis(10), client.wait_initialized())
515                .await
516                .is_ok()
517        );
518    }
519}