1use std::collections::HashMap;
18use std::sync::{Arc, Mutex, Weak};
19
20use api::v1::greptime_request::Request;
21use api::v1::query_request::Query;
22use api::v1::{CreateTableExpr, QueryRequest};
23use client::{Client, Database};
24use common_error::ext::BoxedError;
25use common_grpc::channel_manager::{ChannelConfig, ChannelManager, load_client_tls_config};
26use common_meta::peer::{Peer, PeerDiscovery};
27use common_query::Output;
28use common_telemetry::warn;
29use meta_client::client::MetaClient;
30use query::datafusion::QUERY_PARALLELISM_HINT;
31use query::options::QueryOptions;
32use rand::rng;
33use rand::seq::SliceRandom;
34use servers::query_handler::grpc::GrpcQueryHandler;
35use session::context::{QueryContextBuilder, QueryContextRef};
36use session::hints::READ_PREFERENCE_HINT;
37use snafu::{OptionExt, ResultExt};
38use tokio::sync::SetOnce;
39
40use crate::batching_mode::BatchingModeOptions;
41use crate::error::{
42 CreateSinkTableSnafu, ExternalSnafu, InvalidClientConfigSnafu, InvalidRequestSnafu,
43 NoAvailableFrontendSnafu, UnexpectedSnafu,
44};
45use crate::{Error, FlowAuthHeader};
46
47#[async_trait::async_trait]
51pub trait GrpcQueryHandlerWithBoxedError: Send + Sync + 'static {
52 async fn do_query(
53 &self,
54 query: Request,
55 ctx: QueryContextRef,
56 ) -> std::result::Result<Output, BoxedError>;
57}
58
59#[async_trait::async_trait]
61impl<T: GrpcQueryHandler + Send + Sync + 'static> GrpcQueryHandlerWithBoxedError for T {
62 async fn do_query(
63 &self,
64 query: Request,
65 ctx: QueryContextRef,
66 ) -> std::result::Result<Output, BoxedError> {
67 self.do_query(query, ctx).await.map_err(BoxedError::new)
68 }
69}
70
71#[derive(Debug, Clone)]
72pub struct HandlerMutable {
73 handler: Arc<Mutex<Option<Weak<dyn GrpcQueryHandlerWithBoxedError>>>>,
74 is_initialized: Arc<SetOnce<()>>,
75}
76
77impl HandlerMutable {
78 pub async fn set_handler(&self, handler: Weak<dyn GrpcQueryHandlerWithBoxedError>) {
79 *self.handler.lock().unwrap() = Some(handler);
80 let _ = self.is_initialized.set(());
82 }
83}
84
85#[derive(Debug, Clone)]
89pub enum FrontendClient {
90 Distributed {
91 meta_client: Arc<MetaClient>,
92 chnl_mgr: ChannelManager,
93 auth: Option<FlowAuthHeader>,
94 query: QueryOptions,
95 batch_opts: BatchingModeOptions,
96 },
97 Standalone {
98 database_client: HandlerMutable,
101 query: QueryOptions,
102 },
103}
104
105impl FrontendClient {
106 pub fn from_empty_grpc_handler(query: QueryOptions) -> (Self, HandlerMutable) {
108 let is_initialized = Arc::new(SetOnce::new());
109 let handler = HandlerMutable {
110 handler: Arc::new(Mutex::new(None)),
111 is_initialized,
112 };
113 (
114 Self::Standalone {
115 database_client: handler.clone(),
116 query,
117 },
118 handler,
119 )
120 }
121
122 pub async fn wait_initialized(&self) {
124 if let FrontendClient::Standalone {
125 database_client, ..
126 } = self
127 {
128 database_client.is_initialized.wait().await;
129 }
130 }
131
132 pub fn from_meta_client(
133 meta_client: Arc<MetaClient>,
134 auth: Option<FlowAuthHeader>,
135 query: QueryOptions,
136 batch_opts: BatchingModeOptions,
137 ) -> Result<Self, Error> {
138 common_telemetry::info!("Frontend client build with auth={:?}", auth);
139 Ok(Self::Distributed {
140 meta_client,
141 chnl_mgr: {
142 let cfg = ChannelConfig::new()
143 .connect_timeout(batch_opts.grpc_conn_timeout)
144 .timeout(Some(batch_opts.query_timeout));
145
146 let tls_config = load_client_tls_config(batch_opts.frontend_tls.clone())
147 .context(InvalidClientConfigSnafu)?;
148 ChannelManager::with_config(cfg, tls_config)
149 },
150 auth,
151 query,
152 batch_opts,
153 })
154 }
155
156 pub fn from_grpc_handler(
157 grpc_handler: Weak<dyn GrpcQueryHandlerWithBoxedError>,
158 query: QueryOptions,
159 ) -> Self {
160 let is_initialized = Arc::new(SetOnce::new_with(Some(())));
161 let handler = HandlerMutable {
162 handler: Arc::new(Mutex::new(Some(grpc_handler))),
163 is_initialized: is_initialized.clone(),
164 };
165
166 Self::Standalone {
167 database_client: handler,
168 query,
169 }
170 }
171}
172
173#[derive(Debug, Clone)]
174pub struct DatabaseWithPeer {
175 pub database: Database,
176 pub peer: Peer,
177}
178
179impl DatabaseWithPeer {
180 fn new(database: Database, peer: Peer) -> Self {
181 Self { database, peer }
182 }
183
184 async fn try_select_one(&self) -> Result<(), Error> {
186 let _ = self
188 .database
189 .sql("SELECT 1")
190 .await
191 .with_context(|_| InvalidRequestSnafu {
192 context: format!("Failed to handle `SELECT 1` request at {:?}", self.peer),
193 })?;
194 Ok(())
195 }
196}
197
198impl FrontendClient {
199 pub(crate) async fn scan_for_frontend(&self) -> Result<Vec<Peer>, Error> {
204 let Self::Distributed { meta_client, .. } = self else {
205 return Ok(vec![]);
206 };
207
208 meta_client
209 .active_frontends()
210 .await
211 .map(|nodes| nodes.into_iter().map(|node| node.peer).collect())
212 .map_err(BoxedError::new)
213 .context(ExternalSnafu)
214 }
215
216 async fn get_random_active_frontend(
218 &self,
219 catalog: &str,
220 schema: &str,
221 ) -> Result<DatabaseWithPeer, Error> {
222 let Self::Distributed {
223 meta_client: _,
224 chnl_mgr,
225 auth,
226 query: _,
227 batch_opts,
228 } = self
229 else {
230 return UnexpectedSnafu {
231 reason: "Expect distributed mode",
232 }
233 .fail();
234 };
235
236 let mut interval = tokio::time::interval(batch_opts.grpc_conn_timeout);
237 interval.tick().await;
238 for retry in 0..batch_opts.experimental_grpc_max_retries {
239 let mut frontends = self.scan_for_frontend().await?;
240 frontends.shuffle(&mut rng());
242
243 for peer in frontends {
244 let addr = peer.addr.clone();
245 let client = Client::with_manager_and_urls(chnl_mgr.clone(), vec![addr.clone()]);
246 let database = {
247 let mut db = Database::new(catalog, schema, client);
248 if let Some(auth) = auth {
249 db.set_auth(auth.auth().clone());
250 }
251 db
252 };
253 let db = DatabaseWithPeer::new(database, peer);
254 match db.try_select_one().await {
255 Ok(_) => return Ok(db),
256 Err(e) => {
257 warn!(
258 "Failed to connect to frontend {} on retry={}: \n{e:?}",
259 addr, retry
260 );
261 }
262 }
263 }
264 interval.tick().await;
267 }
268
269 NoAvailableFrontendSnafu {
270 timeout: batch_opts.grpc_conn_timeout,
271 context: "No available frontend found that is able to process query",
272 }
273 .fail()
274 }
275
276 pub async fn create(
277 &self,
278 create: CreateTableExpr,
279 catalog: &str,
280 schema: &str,
281 ) -> Result<u32, Error> {
282 self.handle(
283 Request::Ddl(api::v1::DdlRequest {
284 expr: Some(api::v1::ddl_request::Expr::CreateTable(create.clone())),
285 }),
286 catalog,
287 schema,
288 &mut None,
289 )
290 .await
291 .map_err(BoxedError::new)
292 .with_context(|_| CreateSinkTableSnafu {
293 create: create.clone(),
294 })
295 }
296
297 pub async fn sql(&self, catalog: &str, schema: &str, sql: &str) -> Result<Output, Error> {
299 match self {
300 FrontendClient::Distributed { .. } => {
301 let db = self.get_random_active_frontend(catalog, schema).await?;
302 db.database
303 .sql(sql)
304 .await
305 .map_err(BoxedError::new)
306 .context(ExternalSnafu)
307 }
308 FrontendClient::Standalone {
309 database_client, ..
310 } => {
311 let ctx = QueryContextBuilder::default()
312 .current_catalog(catalog.to_string())
313 .current_schema(schema.to_string())
314 .build();
315 let ctx = Arc::new(ctx);
316 {
317 let database_client = {
318 database_client
319 .handler
320 .lock()
321 .unwrap()
322 .as_ref()
323 .context(UnexpectedSnafu {
324 reason: "Standalone's frontend instance is not set",
325 })?
326 .upgrade()
327 .context(UnexpectedSnafu {
328 reason: "Failed to upgrade database client",
329 })?
330 };
331 let req = Request::Query(QueryRequest {
332 query: Some(Query::Sql(sql.to_string())),
333 });
334 database_client
335 .do_query(req, ctx)
336 .await
337 .map_err(BoxedError::new)
338 .context(ExternalSnafu)
339 }
340 }
341 }
342 }
343
344 pub(crate) async fn handle(
346 &self,
347 req: api::v1::greptime_request::Request,
348 catalog: &str,
349 schema: &str,
350 peer_desc: &mut Option<PeerDesc>,
351 ) -> Result<u32, Error> {
352 match self {
353 FrontendClient::Distributed {
354 query, batch_opts, ..
355 } => {
356 let db = self.get_random_active_frontend(catalog, schema).await?;
357
358 *peer_desc = Some(PeerDesc::Dist {
359 peer: db.peer.clone(),
360 });
361
362 db.database
363 .handle_with_retry(
364 req.clone(),
365 batch_opts.experimental_grpc_max_retries,
366 &[
367 (QUERY_PARALLELISM_HINT, &query.parallelism.to_string()),
368 (READ_PREFERENCE_HINT, batch_opts.read_preference.as_ref()),
369 ],
370 )
371 .await
372 .with_context(|_| InvalidRequestSnafu {
373 context: format!("Failed to handle request at {:?}: {:?}", db.peer, req),
374 })
375 }
376 FrontendClient::Standalone {
377 database_client,
378 query,
379 } => {
380 let ctx = QueryContextBuilder::default()
381 .current_catalog(catalog.to_string())
382 .current_schema(schema.to_string())
383 .extensions(HashMap::from([(
384 QUERY_PARALLELISM_HINT.to_string(),
385 query.parallelism.to_string(),
386 )]))
387 .build();
388 let ctx = Arc::new(ctx);
389 {
390 let database_client = {
391 database_client
392 .handler
393 .lock()
394 .unwrap()
395 .as_ref()
396 .context(UnexpectedSnafu {
397 reason: "Standalone's frontend instance is not set",
398 })?
399 .upgrade()
400 .context(UnexpectedSnafu {
401 reason: "Failed to upgrade database client",
402 })?
403 };
404 let resp: common_query::Output = database_client
405 .do_query(req, ctx)
406 .await
407 .map_err(BoxedError::new)
408 .context(ExternalSnafu)?;
409 match resp.data {
410 common_query::OutputData::AffectedRows(rows) => {
411 Ok(rows.try_into().map_err(|_| {
412 UnexpectedSnafu {
413 reason: format!("Failed to convert rows to u32: {}", rows),
414 }
415 .build()
416 })?)
417 }
418 _ => UnexpectedSnafu {
419 reason: "Unexpected output data",
420 }
421 .fail(),
422 }
423 }
424 }
425 }
426 }
427}
428
429#[derive(Debug, Default)]
431pub(crate) enum PeerDesc {
432 Dist {
434 peer: Peer,
436 },
437 #[default]
439 Standalone,
440}
441
442impl std::fmt::Display for PeerDesc {
443 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
444 match self {
445 PeerDesc::Dist { peer } => write!(f, "{}", peer.addr),
446 PeerDesc::Standalone => write!(f, "standalone"),
447 }
448 }
449}
450
451#[cfg(test)]
452mod tests {
453 use std::time::Duration;
454
455 use common_query::Output;
456 use tokio::time::timeout;
457
458 use super::*;
459
460 #[derive(Debug)]
461 struct NoopHandler;
462
463 #[async_trait::async_trait]
464 impl GrpcQueryHandlerWithBoxedError for NoopHandler {
465 async fn do_query(
466 &self,
467 _query: Request,
468 _ctx: QueryContextRef,
469 ) -> std::result::Result<Output, BoxedError> {
470 Ok(Output::new_with_affected_rows(0))
471 }
472 }
473
474 #[tokio::test]
475 async fn wait_initialized() {
476 let (client, handler_mut) =
477 FrontendClient::from_empty_grpc_handler(QueryOptions::default());
478
479 assert!(
480 timeout(Duration::from_millis(50), client.wait_initialized())
481 .await
482 .is_err()
483 );
484
485 let handler: Arc<dyn GrpcQueryHandlerWithBoxedError> = Arc::new(NoopHandler);
486 handler_mut.set_handler(Arc::downgrade(&handler)).await;
487
488 timeout(Duration::from_secs(1), client.wait_initialized())
489 .await
490 .expect("wait_initialized should complete after handler is set");
491
492 timeout(Duration::from_millis(10), client.wait_initialized())
493 .await
494 .expect("wait_initialized should be a no-op once initialized");
495
496 let handler: Arc<dyn GrpcQueryHandlerWithBoxedError> = Arc::new(NoopHandler);
497 let client =
498 FrontendClient::from_grpc_handler(Arc::downgrade(&handler), QueryOptions::default());
499 assert!(
500 timeout(Duration::from_millis(10), client.wait_initialized())
501 .await
502 .is_ok()
503 );
504
505 let meta_client = Arc::new(MetaClient::new(0, api::v1::meta::Role::Frontend));
506 let client = FrontendClient::from_meta_client(
507 meta_client,
508 None,
509 QueryOptions::default(),
510 BatchingModeOptions::default(),
511 )
512 .unwrap();
513 assert!(
514 timeout(Duration::from_millis(10), client.wait_initialized())
515 .await
516 .is_ok()
517 );
518 }
519}