Skip to main content

datanode/
region_server.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15mod catalog;
16
17use std::collections::HashMap;
18use std::fmt::Debug;
19use std::ops::Deref;
20use std::pin::Pin;
21use std::sync::atomic::{AtomicBool, Ordering};
22use std::sync::{Arc, RwLock};
23use std::task::{Context, Poll};
24use std::time::Duration;
25
26use api::region::RegionResponse;
27use api::v1::meta::TopicStat;
28use api::v1::region::remote_dyn_filter_request::Action;
29use api::v1::region::sync_request::ManifestInfo;
30use api::v1::region::{
31    ListMetadataRequest, RegionResponse as RegionResponseV1, RemoteDynFilterRequest, SyncRequest,
32    region_request,
33};
34use api::v1::{ResponseHeader, Status};
35use arrow_flight::{FlightData, Ticket};
36use async_trait::async_trait;
37use bytes::Bytes;
38use common_error::ext::{BoxedError, ErrorExt};
39use common_error::status_code::StatusCode;
40use common_meta::datanode::TopicStatsReporter;
41use common_query::OutputData;
42use common_query::request::QueryRequest;
43use common_recordbatch::adapter::RecordBatchMetrics;
44use common_recordbatch::{OrderOption, RecordBatch, RecordBatchStream, SendableRecordBatchStream};
45use common_runtime::Runtime;
46use common_telemetry::tracing::{self, info_span};
47use common_telemetry::tracing_context::{FutureExt, TracingContext};
48use common_telemetry::{debug, error, info, warn};
49use dashmap::DashMap;
50use datafusion::datasource::TableProvider;
51use datafusion_common::tree_node::TreeNode;
52use either::Either;
53use futures_util::Stream;
54use futures_util::future::try_join_all;
55use metric_engine::engine::MetricEngine;
56use mito2::engine::{MITO_ENGINE_NAME, MitoEngine};
57use prost::Message;
58use query::QueryEngineRef;
59pub use query::dummy_catalog::{
60    DummyCatalogList, DummyTableProviderFactory, TableProviderFactoryRef,
61};
62use query::options::should_collect_region_watermark_from_extensions;
63use serde_json;
64use servers::error::{
65    self as servers_error, ExecuteGrpcRequestSnafu, Result as ServerResult, SuspendedSnafu,
66};
67use servers::grpc::FlightCompression;
68use servers::grpc::flight::{FlightCraft, FlightRecordBatchStream, TonicStream};
69use servers::grpc::region_server::RegionServerHandler;
70use session::context::{QueryContext, QueryContextBuilder, QueryContextRef};
71use snafu::{OptionExt, ResultExt, ensure};
72use store_api::metric_engine_consts::{
73    FILE_ENGINE_NAME, LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME,
74};
75use store_api::region_engine::{
76    RegionEngineRef, RegionManifestInfo, RegionRole, RegionStatistic, RemapManifestsRequest,
77    RemapManifestsResponse, SetRegionRoleStateResponse, SettableRegionRoleState,
78    SyncRegionFromRequest,
79};
80use store_api::region_request::{
81    AffectedRows, BatchRegionDdlRequest, RegionCatchupRequest, RegionCloseRequest,
82    RegionOpenRequest, RegionRequest,
83};
84use store_api::storage::RegionId;
85use tokio::sync::{Semaphore, SemaphorePermit};
86use tokio::time::timeout;
87use tonic::{Request, Response, Result as TonicResult};
88
89use crate::error::{
90    self, BuildRegionRequestsSnafu, ConcurrentQueryLimiterClosedSnafu,
91    ConcurrentQueryLimiterTimeoutSnafu, DataFusionSnafu, DecodeLogicalPlanSnafu,
92    ExecuteLogicalPlanSnafu, FindLogicalRegionsSnafu, GetRegionMetadataSnafu,
93    HandleBatchDdlRequestSnafu, HandleBatchOpenRequestSnafu, HandleRegionRequestSnafu,
94    NewPlanDecoderSnafu, NotYetImplementedSnafu, RegionEngineNotFoundSnafu, RegionNotFoundSnafu,
95    RegionNotReadySnafu, Result, SerializeJsonSnafu, StopRegionEngineSnafu, UnexpectedSnafu,
96    UnsupportedOutputSnafu,
97};
98use crate::event_listener::RegionServerEventListenerRef;
99use crate::region_server::catalog::{NameAwareCatalogList, NameAwareDataSourceInjectorBuilder};
100
101#[derive(Clone)]
102pub struct RegionServer {
103    inner: Arc<RegionServerInner>,
104    flight_compression: FlightCompression,
105    suspend: Arc<AtomicBool>,
106}
107
108pub struct RegionStat {
109    pub region_id: RegionId,
110    pub engine: String,
111    pub role: RegionRole,
112}
113
114impl RegionServer {
115    pub fn new(
116        query_engine: QueryEngineRef,
117        runtime: Runtime,
118        event_listener: RegionServerEventListenerRef,
119        flight_compression: FlightCompression,
120    ) -> Self {
121        Self::with_table_provider(
122            query_engine,
123            runtime,
124            event_listener,
125            Arc::new(DummyTableProviderFactory),
126            0,
127            Duration::from_millis(0),
128            flight_compression,
129        )
130    }
131
132    pub fn with_table_provider(
133        query_engine: QueryEngineRef,
134        runtime: Runtime,
135        event_listener: RegionServerEventListenerRef,
136        table_provider_factory: TableProviderFactoryRef,
137        max_concurrent_queries: usize,
138        concurrent_query_limiter_timeout: Duration,
139        flight_compression: FlightCompression,
140    ) -> Self {
141        Self {
142            inner: Arc::new(RegionServerInner::new(
143                query_engine,
144                runtime,
145                event_listener,
146                table_provider_factory,
147                RegionServerParallelism::from_opts(
148                    max_concurrent_queries,
149                    concurrent_query_limiter_timeout,
150                ),
151            )),
152            flight_compression,
153            suspend: Arc::new(AtomicBool::new(false)),
154        }
155    }
156
157    /// Registers an engine.
158    pub fn register_engine(&mut self, engine: RegionEngineRef) {
159        self.inner.register_engine(engine);
160    }
161
162    /// Sets the topic stats.
163    pub fn set_topic_stats_reporter(&mut self, topic_stats_reporter: Box<dyn TopicStatsReporter>) {
164        self.inner.set_topic_stats_reporter(topic_stats_reporter);
165    }
166
167    /// Finds the region's engine by its id. If the region is not ready, returns `None`.
168    pub fn find_engine(&self, region_id: RegionId) -> Result<Option<RegionEngineRef>> {
169        match self.inner.get_engine(region_id, &RegionChange::None) {
170            Ok(CurrentEngine::Engine(engine)) => Ok(Some(engine)),
171            Ok(CurrentEngine::EarlyReturn(_)) => Ok(None),
172            Err(error::Error::RegionNotFound { .. }) => Ok(None),
173            Err(err) => Err(err),
174        }
175    }
176
177    /// Gets the MitoEngine if it's registered.
178    pub fn mito_engine(&self) -> Option<MitoEngine> {
179        if let Some(mito) = self.inner.mito_engine.read().unwrap().clone() {
180            Some(mito)
181        } else {
182            self.inner
183                .engines
184                .read()
185                .unwrap()
186                .get(MITO_ENGINE_NAME)
187                .cloned()
188                .and_then(|e| {
189                    let mito = e.as_any().downcast_ref::<MitoEngine>().cloned();
190                    if mito.is_none() {
191                        warn!("Mito engine not found in region server engines");
192                    }
193                    mito
194                })
195        }
196    }
197
198    #[tracing::instrument(skip_all)]
199    pub async fn handle_batch_open_requests(
200        &self,
201        parallelism: usize,
202        requests: Vec<(RegionId, RegionOpenRequest)>,
203        ignore_nonexistent_region: bool,
204    ) -> Result<Vec<RegionId>> {
205        self.inner
206            .handle_batch_open_requests(parallelism, requests, ignore_nonexistent_region)
207            .await
208    }
209
210    #[tracing::instrument(skip_all)]
211    pub async fn handle_batch_catchup_requests(
212        &self,
213        parallelism: usize,
214        requests: Vec<(RegionId, RegionCatchupRequest)>,
215    ) -> Result<Vec<(RegionId, std::result::Result<(), BoxedError>)>> {
216        self.inner
217            .handle_batch_catchup_requests(parallelism, requests)
218            .await
219    }
220
221    #[tracing::instrument(skip_all, fields(request_type = request.request_type()))]
222    pub async fn handle_request(
223        &self,
224        region_id: RegionId,
225        request: RegionRequest,
226    ) -> Result<RegionResponse> {
227        self.inner.handle_request(region_id, request).await
228    }
229
230    /// Returns a table provider for the region. Will set snapshot sequence if available in the context.
231    async fn table_provider(
232        &self,
233        region_id: RegionId,
234        ctx: Option<QueryContextRef>,
235    ) -> Result<Arc<dyn TableProvider>> {
236        let status = self
237            .inner
238            .region_map
239            .get(&region_id)
240            .context(RegionNotFoundSnafu { region_id })?
241            .clone();
242        ensure!(
243            matches!(status, RegionEngineWithStatus::Ready(_)),
244            RegionNotReadySnafu { region_id }
245        );
246
247        self.inner
248            .table_provider_factory
249            .create(region_id, status.into_engine(), ctx)
250            .await
251            .context(ExecuteLogicalPlanSnafu)
252    }
253
254    /// Handle reads from remote. They're often query requests received by our Arrow Flight service.
255    pub async fn handle_remote_read(
256        &self,
257        request: api::v1::region::QueryRequest,
258        query_ctx: QueryContextRef,
259    ) -> Result<SendableRecordBatchStream> {
260        let _permit = if let Some(p) = &self.inner.parallelism {
261            Some(p.acquire().await?)
262        } else {
263            None
264        };
265
266        let region_id = RegionId::from_u64(request.region_id);
267        let catalog_list = Arc::new(NameAwareCatalogList::new(
268            self.clone(),
269            region_id,
270            query_ctx.clone(),
271        ));
272
273        if query_ctx.explain_verbose() {
274            common_telemetry::info!("Handle remote read for region: {}", region_id);
275        }
276
277        let decoder = self
278            .inner
279            .query_engine
280            .engine_context(query_ctx.clone())
281            .new_plan_decoder()
282            .context(NewPlanDecoderSnafu)?;
283
284        let plan = decoder
285            .decode(Bytes::from(request.plan), catalog_list, false)
286            .await
287            .context(DecodeLogicalPlanSnafu)?;
288
289        let stream = self
290            .inner
291            .handle_read(
292                QueryRequest {
293                    header: request.header,
294                    region_id,
295                    plan,
296                },
297                query_ctx.clone(),
298            )
299            .await?;
300
301        Ok(wrap_flow_region_watermark_stream(
302            stream, region_id, &query_ctx,
303        ))
304    }
305
306    #[tracing::instrument(skip_all)]
307    pub async fn handle_read(&self, request: QueryRequest) -> Result<SendableRecordBatchStream> {
308        let _permit = if let Some(p) = &self.inner.parallelism {
309            Some(p.acquire().await?)
310        } else {
311            None
312        };
313
314        let ctx = request.header.as_ref().map(|h| h.into());
315        let query_ctx = Arc::new(ctx.unwrap_or_else(|| QueryContextBuilder::default().build()));
316
317        let region_id = request.region_id;
318        let injector_builder = NameAwareDataSourceInjectorBuilder::from_plan(&request.plan)
319            .context(DataFusionSnafu)?;
320        let mut injector = injector_builder
321            .build(self, request.region_id, query_ctx.clone())
322            .await?;
323
324        let plan = request
325            .plan
326            .rewrite(&mut injector)
327            .context(DataFusionSnafu)?
328            .data;
329
330        let stream = self
331            .inner
332            .handle_read(QueryRequest { plan, ..request }, query_ctx.clone())
333            .await?;
334
335        Ok(wrap_flow_region_watermark_stream(
336            stream, region_id, &query_ctx,
337        ))
338    }
339
340    /// Returns all opened and reportable regions.
341    ///
342    /// Notes: except all metrics regions.
343    pub fn reportable_regions(&self) -> Vec<RegionStat> {
344        self.inner
345            .region_map
346            .iter()
347            .filter_map(|e| {
348                let region_id = *e.key();
349                // Filters out any regions whose role equals None.
350                e.role(region_id).map(|role| RegionStat {
351                    region_id,
352                    engine: e.value().name().to_string(),
353                    role,
354                })
355            })
356            .collect()
357    }
358
359    /// Returns the reportable topics.
360    pub fn topic_stats(&self) -> Vec<TopicStat> {
361        let mut reporter = self.inner.topic_stats_reporter.write().unwrap();
362        let Some(reporter) = reporter.as_mut() else {
363            return vec![];
364        };
365        reporter
366            .reportable_topics()
367            .into_iter()
368            .map(|stat| TopicStat {
369                topic_name: stat.topic,
370                record_size: stat.record_size,
371                record_num: stat.record_num,
372                latest_entry_id: stat.latest_entry_id,
373            })
374            .collect()
375    }
376
377    pub fn is_region_leader(&self, region_id: RegionId) -> Option<bool> {
378        self.inner.region_map.get(&region_id).and_then(|engine| {
379            engine.role(region_id).map(|role| match role {
380                RegionRole::Follower => false,
381                RegionRole::Leader => true,
382                RegionRole::StagingLeader => true,
383                RegionRole::DowngradingLeader => true,
384            })
385        })
386    }
387
388    pub fn set_region_role(&self, region_id: RegionId, role: RegionRole) -> Result<()> {
389        let engine = self
390            .inner
391            .region_map
392            .get(&region_id)
393            .with_context(|| RegionNotFoundSnafu { region_id })?;
394        engine
395            .set_region_role(region_id, role)
396            .with_context(|_| HandleRegionRequestSnafu { region_id })
397    }
398
399    /// Set region role state gracefully.
400    ///
401    /// For [SettableRegionRoleState::Follower]:
402    /// After the call returns, the engine ensures that
403    /// no **further** write or flush operations will succeed in this region.
404    ///
405    /// For [SettableRegionRoleState::DowngradingLeader]:
406    /// After the call returns, the engine ensures that
407    /// no **further** write operations will succeed in this region.
408    pub async fn set_region_role_state_gracefully(
409        &self,
410        region_id: RegionId,
411        state: SettableRegionRoleState,
412    ) -> Result<SetRegionRoleStateResponse> {
413        match self.inner.region_map.get(&region_id) {
414            Some(engine) => Ok(engine
415                .set_region_role_state_gracefully(region_id, state)
416                .await
417                .with_context(|_| HandleRegionRequestSnafu { region_id })?),
418            None => Ok(SetRegionRoleStateResponse::NotFound),
419        }
420    }
421
422    pub fn runtime(&self) -> Runtime {
423        self.inner.runtime.clone()
424    }
425
426    pub fn region_statistic(&self, region_id: RegionId) -> Option<RegionStatistic> {
427        match self.inner.region_map.get(&region_id) {
428            Some(e) => e.region_statistic(region_id),
429            None => None,
430        }
431    }
432
433    /// Stop the region server.
434    pub async fn stop(&self) -> Result<()> {
435        self.inner.stop().await
436    }
437
438    #[cfg(test)]
439    /// Registers a region for test purpose.
440    pub(crate) fn register_test_region(&self, region_id: RegionId, engine: RegionEngineRef) {
441        {
442            let mut engines = self.inner.engines.write().unwrap();
443            if !engines.contains_key(engine.name()) {
444                debug!("Registering test engine: {}", engine.name());
445                engines.insert(engine.name().to_string(), engine.clone());
446            }
447        }
448
449        self.inner
450            .region_map
451            .insert(region_id, RegionEngineWithStatus::Ready(engine));
452    }
453
454    async fn handle_batch_ddl_requests(
455        &self,
456        request: region_request::Body,
457    ) -> Result<RegionResponse> {
458        // Safety: we have already checked the request type in `RegionServer::handle()`.
459        let batch_request = BatchRegionDdlRequest::try_from_request_body(request)
460            .context(BuildRegionRequestsSnafu)?
461            .unwrap();
462        let tracing_context = TracingContext::from_current_span();
463
464        let span = tracing_context.attach(info_span!("RegionServer::handle_batch_ddl_requests"));
465        self.inner
466            .handle_batch_request(batch_request)
467            .trace(span)
468            .await
469    }
470
471    async fn handle_requests_in_parallel(
472        &self,
473        request: region_request::Body,
474    ) -> Result<RegionResponse> {
475        let requests =
476            RegionRequest::try_from_request_body(request).context(BuildRegionRequestsSnafu)?;
477
478        // Try to optimize batch Put requests for metric engine
479        // Returns either Some(response) or None(requests_back)
480        match self.try_handle_metric_batch_puts(requests).await? {
481            Either::Left(response) => Ok(response),
482            Either::Right(requests) => {
483                // Fallback: original parallel processing
484                let tracing_context = TracingContext::from_current_span();
485                let join_tasks =
486                    requests
487                        .into_iter()
488                        .map(|(region_id, req): (RegionId, RegionRequest)| {
489                            let self_to_move = self;
490                            let span = tracing_context.attach(info_span!(
491                                "RegionServer::handle_region_request",
492                                region_id = region_id.to_string()
493                            ));
494                            async move {
495                                self_to_move
496                                    .handle_request(region_id, req)
497                                    .trace(span)
498                                    .await
499                            }
500                        });
501
502                let results = try_join_all(join_tasks).await?;
503                let mut affected_rows = 0;
504                let mut extensions = HashMap::new();
505                for result in results {
506                    affected_rows += result.affected_rows;
507                    extensions.extend(result.extensions);
508                }
509
510                Ok(RegionResponse {
511                    affected_rows,
512                    extensions,
513                    metadata: Vec::new(),
514                })
515            }
516        }
517    }
518
519    async fn handle_requests_in_serial(
520        &self,
521        request: region_request::Body,
522    ) -> Result<RegionResponse> {
523        let requests =
524            RegionRequest::try_from_request_body(request).context(BuildRegionRequestsSnafu)?;
525        let tracing_context = TracingContext::from_current_span();
526
527        let mut affected_rows = 0;
528        let mut extensions = HashMap::new();
529        for (region_id, req) in requests {
530            let span = tracing_context.attach(info_span!(
531                "RegionServer::handle_region_request",
532                region_id = region_id.to_string()
533            ));
534            let result = self.handle_request(region_id, req).trace(span).await?;
535
536            affected_rows += result.affected_rows;
537            extensions.extend(result.extensions);
538        }
539
540        Ok(RegionResponse {
541            affected_rows,
542            extensions,
543            metadata: Vec::new(),
544        })
545    }
546
547    /// Attempts to optimize batch Put requests for metric engine.
548    ///
549    /// Returns Either::Left(response) if optimization succeeded,
550    /// or Either::Right(original_requests) to fall back to parallel processing.
551    ///
552    /// This avoids cloning requests when optimization cannot be applied.
553    async fn try_handle_metric_batch_puts(
554        &self,
555        requests: Vec<(RegionId, RegionRequest)>,
556    ) -> Result<Either<RegionResponse, Vec<(RegionId, RegionRequest)>>> {
557        if requests.is_empty() {
558            return Ok(Either::Right(requests));
559        }
560
561        // Quick check: verify first request is Put and is metric engine
562        if !matches!(requests[0].1, RegionRequest::Put(_)) {
563            return Ok(Either::Right(requests));
564        }
565        let first_region_id = requests[0].0;
566        let request_type = requests[0].1.request_type();
567
568        // SAFETY: If the first request belongs to metric engine, then ALL requests
569        // in this batch are guaranteed to belong to metric engine. This invariant
570        // is maintained by the request batching logic upstream.
571        let engine = match self
572            .inner
573            .get_engine(first_region_id, &RegionChange::None)?
574        {
575            CurrentEngine::Engine(e) => e,
576            _ => return Ok(Either::Right(requests)),
577        };
578
579        if engine.name() != METRIC_ENGINE_NAME {
580            return Ok(Either::Right(requests));
581        }
582
583        // Check if ALL requests are Put (now we know it's worth checking)
584        let mut all_puts = true;
585        for (_, req) in &requests {
586            if !matches!(req, RegionRequest::Put(_)) {
587                all_puts = false;
588                break;
589            }
590        }
591
592        if !all_puts {
593            return Ok(Either::Right(requests));
594        }
595
596        // Now extract Put requests by consuming ownership (zero clone!)
597        let put_requests = requests.into_iter().map(|(region_id, req)| {
598            if let RegionRequest::Put(put) = req {
599                (region_id, put)
600            } else {
601                unreachable!("Already checked all are Put")
602            }
603        });
604
605        // Downcast to MetricEngine and call batch API
606        let metric_engine =
607            engine
608                .as_any()
609                .downcast_ref::<MetricEngine>()
610                .context(UnexpectedSnafu {
611                    violated: "Failed to downcast to MetricEngine",
612                })?;
613
614        let tracing_context = TracingContext::from_current_span();
615        let batch_size = put_requests.len();
616        let span = tracing_context.attach(info_span!(
617            "RegionServer::handle_metric_batch_puts",
618            batch_size = batch_size,
619        ));
620        let result = metric_engine
621            .put_regions_batch(put_requests)
622            .trace(span)
623            .await
624            .map_err(BoxedError::new)
625            .context(HandleRegionRequestSnafu {
626                region_id: first_region_id,
627            });
628
629        match result {
630            Ok(total_affected) => {
631                crate::metrics::REGION_CHANGED_ROW_COUNT
632                    .with_label_values(&[request_type])
633                    .inc_by(total_affected as u64);
634                Ok(Either::Left(RegionResponse::new(total_affected)))
635            }
636            Err(err) => {
637                crate::metrics::REGION_SERVER_INSERT_FAIL_COUNT
638                    .with_label_values(&[request_type])
639                    .inc_by(batch_size as u64);
640                Err(err)
641            }
642        }
643    }
644
645    async fn handle_sync_region_request(&self, request: &SyncRequest) -> Result<RegionResponse> {
646        let region_id = RegionId::from_u64(request.region_id);
647        let manifest_info = request
648            .manifest_info
649            .context(error::MissingRequiredFieldSnafu {
650                name: "manifest_info",
651            })?;
652
653        let manifest_info = match manifest_info {
654            ManifestInfo::MitoManifestInfo(info) => {
655                RegionManifestInfo::mito(info.data_manifest_version, 0, 0)
656            }
657            ManifestInfo::MetricManifestInfo(info) => RegionManifestInfo::metric(
658                info.data_manifest_version,
659                0,
660                info.metadata_manifest_version,
661                0,
662            ),
663        };
664
665        let tracing_context = TracingContext::from_current_span();
666        let span = tracing_context.attach(info_span!("RegionServer::handle_sync_region_request"));
667
668        self.sync_region(
669            region_id,
670            SyncRegionFromRequest::from_manifest(manifest_info),
671        )
672        .trace(span)
673        .await
674        .map(|_| RegionResponse::new(AffectedRows::default()))
675    }
676
677    /// Handles the ListMetadata request and retrieves metadata for specified regions.
678    ///
679    /// Returns the results as a JSON-serialized list in the [RegionResponse]. It serializes
680    /// non-existing regions as `null`.
681    #[tracing::instrument(skip_all)]
682    async fn handle_list_metadata_request(
683        &self,
684        request: &ListMetadataRequest,
685    ) -> Result<RegionResponse> {
686        let mut region_metadatas = Vec::new();
687        // Collect metadata for each region
688        for region_id in &request.region_ids {
689            let region_id = RegionId::from_u64(*region_id);
690            // Get the engine.
691            let Some(engine) = self.find_engine(region_id)? else {
692                region_metadatas.push(None);
693                continue;
694            };
695
696            match engine.get_metadata(region_id).await {
697                Ok(metadata) => region_metadatas.push(Some(metadata)),
698                Err(err) => {
699                    if err.status_code() == StatusCode::RegionNotFound {
700                        region_metadatas.push(None);
701                    } else {
702                        Err(err).with_context(|_| GetRegionMetadataSnafu {
703                            engine: engine.name(),
704                            region_id,
705                        })?;
706                    }
707                }
708            }
709        }
710
711        // Serialize metadata to JSON
712        let json_result = serde_json::to_vec(&region_metadatas).context(SerializeJsonSnafu)?;
713
714        let response = RegionResponse::from_metadata(json_result);
715
716        Ok(response)
717    }
718
719    async fn handle_remote_dyn_filter_request(
720        &self,
721        request: &RemoteDynFilterRequest,
722    ) -> Result<RegionResponse> {
723        if request.query_id.is_empty() {
724            return error::MissingRequiredFieldSnafu { name: "query_id" }.fail();
725        }
726
727        match request
728            .action
729            .as_ref()
730            .context(error::MissingRequiredFieldSnafu { name: "action" })?
731        {
732            Action::Update(update) => {
733                self.handle_remote_dyn_filter_update(&request.query_id, update)
734                    .await
735            }
736            Action::Unregister(unregister) => {
737                self.handle_remote_dyn_filter_unregister(&request.query_id, unregister)
738                    .await
739            }
740        }
741    }
742
743    async fn handle_remote_dyn_filter_update(
744        &self,
745        query_id: &str,
746        request: &api::v1::region::RemoteDynFilterUpdate,
747    ) -> Result<RegionResponse> {
748        if request.filter_id.is_empty() {
749            return error::MissingRequiredFieldSnafu { name: "filter_id" }.fail();
750        }
751
752        if request.payload.is_empty() {
753            return error::MissingRequiredFieldSnafu { name: "payload" }.fail();
754        }
755
756        NotYetImplementedSnafu {
757            what: format!(
758                "remote dyn filter update unary RPC placeholder for query_id {query_id}, filter_id {}",
759                request.filter_id
760            ),
761        }
762        .fail()
763    }
764
765    async fn handle_remote_dyn_filter_unregister(
766        &self,
767        query_id: &str,
768        request: &api::v1::region::RemoteDynFilterUnregister,
769    ) -> Result<RegionResponse> {
770        if request.filter_id.is_empty() {
771            return error::MissingRequiredFieldSnafu { name: "filter_id" }.fail();
772        }
773
774        NotYetImplementedSnafu {
775            what: format!(
776                "remote dyn filter unregister unary RPC placeholder for query_id {query_id}, filter_id {}",
777                request.filter_id
778            ),
779        }
780        .fail()
781    }
782
783    /// Sync region manifest and registers new opened logical regions.
784    pub async fn sync_region(
785        &self,
786        region_id: RegionId,
787        request: SyncRegionFromRequest,
788    ) -> Result<()> {
789        let engine = match self.inner.get_engine(region_id, &RegionChange::None)? {
790            CurrentEngine::Engine(engine) => engine,
791            _ => {
792                return UnexpectedSnafu {
793                    violated: "unexpected EarlyReturn engine status for a ready region",
794                }
795                .fail();
796            }
797        };
798
799        self.inner
800            .handle_sync_region(&engine, region_id, request)
801            .await
802    }
803
804    /// Remaps manifests from old regions to new regions.
805    pub async fn remap_manifests(
806        &self,
807        request: RemapManifestsRequest,
808    ) -> Result<RemapManifestsResponse> {
809        let region_id = request.region_id;
810        let engine = match self.inner.get_engine(region_id, &RegionChange::None)? {
811            CurrentEngine::Engine(engine) => engine,
812            _ => {
813                return UnexpectedSnafu {
814                    violated: "unexpected EarlyReturn engine status for a ready region",
815                }
816                .fail();
817            }
818        };
819
820        engine
821            .remap_manifests(request)
822            .await
823            .with_context(|_| HandleRegionRequestSnafu { region_id })
824    }
825
826    fn is_suspended(&self) -> bool {
827        self.suspend.load(Ordering::Relaxed)
828    }
829
830    pub(crate) fn suspend_state(&self) -> Arc<AtomicBool> {
831        self.suspend.clone()
832    }
833}
834
835fn wrap_flow_region_watermark_stream(
836    stream: SendableRecordBatchStream,
837    region_id: RegionId,
838    query_ctx: &QueryContextRef,
839) -> SendableRecordBatchStream {
840    if should_collect_region_watermark_from_extensions(&query_ctx.extensions())
841        && let Some(seq) = query_ctx.get_snapshot(region_id.as_u64())
842    {
843        Box::pin(RegionWatermarkStream::new(stream, region_id, seq)) as SendableRecordBatchStream
844    } else {
845        stream
846    }
847}
848
849/// Wraps a region read stream so terminal metrics can carry the scan-open watermark.
850struct RegionWatermarkStream {
851    stream: SendableRecordBatchStream,
852    region_id: u64,
853    snapshot_seq: u64,
854    finished: bool,
855}
856
857impl RegionWatermarkStream {
858    fn new(stream: SendableRecordBatchStream, region_id: RegionId, snapshot_seq: u64) -> Self {
859        Self {
860            stream,
861            region_id: region_id.as_u64(),
862            snapshot_seq,
863            finished: false,
864        }
865    }
866
867    fn merged_metrics(&self, mut metrics: RecordBatchMetrics) -> RecordBatchMetrics {
868        if metrics
869            .region_watermarks
870            .iter()
871            .any(|entry| entry.region_id == self.region_id)
872        {
873            return metrics;
874        }
875
876        metrics
877            .region_watermarks
878            .push(common_recordbatch::adapter::RegionWatermarkEntry {
879                region_id: self.region_id,
880                watermark: Some(self.snapshot_seq),
881            });
882        metrics
883    }
884}
885
886impl RecordBatchStream for RegionWatermarkStream {
887    fn name(&self) -> &str {
888        self.stream.name()
889    }
890
891    fn schema(&self) -> datatypes::schema::SchemaRef {
892        self.stream.schema()
893    }
894
895    fn output_ordering(&self) -> Option<&[OrderOption]> {
896        self.stream.output_ordering()
897    }
898
899    fn metrics(&self) -> Option<RecordBatchMetrics> {
900        let base = self.stream.metrics();
901        if !self.finished {
902            return base;
903        }
904
905        Some(self.merged_metrics(base.unwrap_or_default()))
906    }
907}
908
909impl Stream for RegionWatermarkStream {
910    type Item = common_recordbatch::error::Result<RecordBatch>;
911
912    fn size_hint(&self) -> (usize, Option<usize>) {
913        self.stream.size_hint()
914    }
915
916    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
917        match Pin::new(&mut self.stream).poll_next(cx) {
918            Poll::Ready(None) => {
919                self.finished = true;
920                Poll::Ready(None)
921            }
922            other => other,
923        }
924    }
925}
926
927#[async_trait]
928impl RegionServerHandler for RegionServer {
929    async fn handle(&self, request: region_request::Body) -> ServerResult<RegionResponseV1> {
930        let failed_requests_cnt = crate::metrics::REGION_SERVER_REQUEST_FAILURE_COUNT
931            .with_label_values(&[request.as_ref()]);
932        let response = match &request {
933            region_request::Body::Creates(_)
934            | region_request::Body::Drops(_)
935            | region_request::Body::Alters(_) => self.handle_batch_ddl_requests(request).await,
936            region_request::Body::Inserts(_) | region_request::Body::Deletes(_) => {
937                self.handle_requests_in_parallel(request).await
938            }
939            region_request::Body::Sync(sync_request) => {
940                self.handle_sync_region_request(sync_request).await
941            }
942            region_request::Body::ListMetadata(list_metadata_request) => {
943                self.handle_list_metadata_request(list_metadata_request)
944                    .await
945            }
946            region_request::Body::RemoteDynFilter(remote_dyn_filter_request) => {
947                self.handle_remote_dyn_filter_request(remote_dyn_filter_request)
948                    .await
949            }
950            _ => self.handle_requests_in_serial(request).await,
951        }
952        .map_err(BoxedError::new)
953        .inspect_err(|_| {
954            failed_requests_cnt.inc();
955        })
956        .context(ExecuteGrpcRequestSnafu)?;
957
958        Ok(RegionResponseV1 {
959            header: Some(ResponseHeader {
960                status: Some(Status {
961                    status_code: StatusCode::Success as _,
962                    ..Default::default()
963                }),
964            }),
965            affected_rows: response.affected_rows as _,
966            extensions: response.extensions,
967            metadata: response.metadata,
968        })
969    }
970}
971
972#[async_trait]
973impl FlightCraft for RegionServer {
974    async fn do_get(
975        &self,
976        request: Request<Ticket>,
977    ) -> TonicResult<Response<TonicStream<FlightData>>> {
978        ensure!(!self.is_suspended(), SuspendedSnafu);
979
980        let ticket = request.into_inner().ticket;
981        let request = api::v1::region::QueryRequest::decode(ticket.as_ref())
982            .context(servers_error::InvalidFlightTicketSnafu)?;
983        let tracing_context = request
984            .header
985            .as_ref()
986            .map(|h| TracingContext::from_w3c(&h.tracing_context))
987            .unwrap_or_default();
988        let query_ctx = request
989            .header
990            .as_ref()
991            .map(|h| Arc::new(QueryContext::from(h)))
992            .unwrap_or(QueryContext::arc());
993
994        let result = self
995            .handle_remote_read(request, query_ctx.clone())
996            .trace(tracing_context.attach(info_span!("RegionServer::handle_read")))
997            .await?;
998
999        let stream = Box::pin(FlightRecordBatchStream::new(
1000            result,
1001            tracing_context,
1002            self.flight_compression,
1003            query_ctx,
1004        ));
1005        Ok(Response::new(stream))
1006    }
1007}
1008
1009#[derive(Clone)]
1010enum RegionEngineWithStatus {
1011    // An opening, or creating region.
1012    Registering(RegionEngineRef),
1013    // A closing, or dropping region.
1014    Deregistering(RegionEngineRef),
1015    // A ready region.
1016    Ready(RegionEngineRef),
1017}
1018
1019impl RegionEngineWithStatus {
1020    /// Returns [RegionEngineRef].
1021    pub fn into_engine(self) -> RegionEngineRef {
1022        match self {
1023            RegionEngineWithStatus::Registering(engine) => engine,
1024            RegionEngineWithStatus::Deregistering(engine) => engine,
1025            RegionEngineWithStatus::Ready(engine) => engine,
1026        }
1027    }
1028}
1029
1030impl Deref for RegionEngineWithStatus {
1031    type Target = RegionEngineRef;
1032
1033    fn deref(&self) -> &Self::Target {
1034        match self {
1035            RegionEngineWithStatus::Registering(engine) => engine,
1036            RegionEngineWithStatus::Deregistering(engine) => engine,
1037            RegionEngineWithStatus::Ready(engine) => engine,
1038        }
1039    }
1040}
1041
1042struct RegionServerInner {
1043    engines: RwLock<HashMap<String, RegionEngineRef>>,
1044    region_map: DashMap<RegionId, RegionEngineWithStatus>,
1045    query_engine: QueryEngineRef,
1046    runtime: Runtime,
1047    event_listener: RegionServerEventListenerRef,
1048    table_provider_factory: TableProviderFactoryRef,
1049    /// The number of queries allowed to be executed at the same time.
1050    /// Act as last line of defense on datanode to prevent query overloading.
1051    parallelism: Option<RegionServerParallelism>,
1052    /// The topic stats reporter.
1053    topic_stats_reporter: RwLock<Option<Box<dyn TopicStatsReporter>>>,
1054    /// HACK(zhongzc): Direct MitoEngine handle for diagnostics. This couples the
1055    /// server with a concrete engine; acceptable for now to fetch Mito-specific
1056    /// info (e.g., list SSTs). Consider a diagnostics trait later.
1057    mito_engine: RwLock<Option<MitoEngine>>,
1058}
1059
1060struct RegionServerParallelism {
1061    semaphore: Semaphore,
1062    timeout: Duration,
1063}
1064
1065impl RegionServerParallelism {
1066    pub fn from_opts(
1067        max_concurrent_queries: usize,
1068        concurrent_query_limiter_timeout: Duration,
1069    ) -> Option<Self> {
1070        if max_concurrent_queries == 0 {
1071            return None;
1072        }
1073        Some(RegionServerParallelism {
1074            semaphore: Semaphore::new(max_concurrent_queries),
1075            timeout: concurrent_query_limiter_timeout,
1076        })
1077    }
1078
1079    pub async fn acquire(&self) -> Result<SemaphorePermit<'_>> {
1080        timeout(self.timeout, self.semaphore.acquire())
1081            .await
1082            .context(ConcurrentQueryLimiterTimeoutSnafu)?
1083            .context(ConcurrentQueryLimiterClosedSnafu)
1084    }
1085}
1086
1087enum CurrentEngine {
1088    Engine(RegionEngineRef),
1089    EarlyReturn(AffectedRows),
1090}
1091
1092impl Debug for CurrentEngine {
1093    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1094        match self {
1095            CurrentEngine::Engine(engine) => f
1096                .debug_struct("CurrentEngine")
1097                .field("engine", &engine.name())
1098                .finish(),
1099            CurrentEngine::EarlyReturn(rows) => f
1100                .debug_struct("CurrentEngine")
1101                .field("return", rows)
1102                .finish(),
1103        }
1104    }
1105}
1106
1107impl RegionServerInner {
1108    pub fn new(
1109        query_engine: QueryEngineRef,
1110        runtime: Runtime,
1111        event_listener: RegionServerEventListenerRef,
1112        table_provider_factory: TableProviderFactoryRef,
1113        parallelism: Option<RegionServerParallelism>,
1114    ) -> Self {
1115        Self {
1116            engines: RwLock::new(HashMap::new()),
1117            region_map: DashMap::new(),
1118            query_engine,
1119            runtime,
1120            event_listener,
1121            table_provider_factory,
1122            parallelism,
1123            topic_stats_reporter: RwLock::new(None),
1124            mito_engine: RwLock::new(None),
1125        }
1126    }
1127
1128    pub fn register_engine(&self, engine: RegionEngineRef) {
1129        let engine_name = engine.name();
1130        if engine_name == MITO_ENGINE_NAME
1131            && let Some(mito_engine) = engine.as_any().downcast_ref::<MitoEngine>()
1132        {
1133            *self.mito_engine.write().unwrap() = Some(mito_engine.clone());
1134        }
1135
1136        info!("Region Engine {engine_name} is registered");
1137        self.engines
1138            .write()
1139            .unwrap()
1140            .insert(engine_name.to_string(), engine);
1141    }
1142
1143    pub fn set_topic_stats_reporter(&self, topic_stats_reporter: Box<dyn TopicStatsReporter>) {
1144        info!("Set topic stats reporter");
1145        *self.topic_stats_reporter.write().unwrap() = Some(topic_stats_reporter);
1146    }
1147
1148    fn get_engine(
1149        &self,
1150        region_id: RegionId,
1151        region_change: &RegionChange,
1152    ) -> Result<CurrentEngine> {
1153        let current_region_status = self.region_map.get(&region_id);
1154
1155        let engine = match region_change {
1156            RegionChange::Register(attribute) => match current_region_status {
1157                Some(status) => match status.clone() {
1158                    RegionEngineWithStatus::Registering(engine) => engine,
1159                    RegionEngineWithStatus::Deregistering(_) => {
1160                        return error::RegionBusySnafu { region_id }.fail();
1161                    }
1162                    RegionEngineWithStatus::Ready(_) => status.clone().into_engine(),
1163                },
1164                _ => self
1165                    .engines
1166                    .read()
1167                    .unwrap()
1168                    .get(attribute.engine())
1169                    .with_context(|| RegionEngineNotFoundSnafu {
1170                        name: attribute.engine(),
1171                    })?
1172                    .clone(),
1173            },
1174            RegionChange::Deregisters => match current_region_status {
1175                Some(status) => match status.clone() {
1176                    RegionEngineWithStatus::Registering(_) => {
1177                        return error::RegionBusySnafu { region_id }.fail();
1178                    }
1179                    RegionEngineWithStatus::Deregistering(_) => {
1180                        return Ok(CurrentEngine::EarlyReturn(0));
1181                    }
1182                    RegionEngineWithStatus::Ready(_) => status.clone().into_engine(),
1183                },
1184                None => return Ok(CurrentEngine::EarlyReturn(0)),
1185            },
1186            RegionChange::None | RegionChange::Catchup | RegionChange::Ingest => {
1187                match current_region_status {
1188                    Some(status) => match status.clone() {
1189                        RegionEngineWithStatus::Registering(_) => {
1190                            return error::RegionNotReadySnafu { region_id }.fail();
1191                        }
1192                        RegionEngineWithStatus::Deregistering(_) => {
1193                            return error::RegionNotFoundSnafu { region_id }.fail();
1194                        }
1195                        RegionEngineWithStatus::Ready(engine) => engine,
1196                    },
1197                    None => return error::RegionNotFoundSnafu { region_id }.fail(),
1198                }
1199            }
1200        };
1201
1202        Ok(CurrentEngine::Engine(engine))
1203    }
1204
1205    async fn handle_batch_open_requests_inner(
1206        &self,
1207        engine: RegionEngineRef,
1208        parallelism: usize,
1209        requests: Vec<(RegionId, RegionOpenRequest)>,
1210        ignore_nonexistent_region: bool,
1211    ) -> Result<Vec<RegionId>> {
1212        let region_changes = requests
1213            .iter()
1214            .map(|(region_id, open)| {
1215                let attribute = parse_region_attribute(&open.engine, &open.options)?;
1216                Ok((*region_id, RegionChange::Register(attribute)))
1217            })
1218            .collect::<Result<HashMap<_, _>>>()?;
1219
1220        for (&region_id, region_change) in &region_changes {
1221            self.set_region_status_not_ready(region_id, &engine, region_change)
1222        }
1223
1224        let mut open_regions = Vec::with_capacity(requests.len());
1225        let mut errors = vec![];
1226        match engine
1227            .handle_batch_open_requests(parallelism, requests)
1228            .await
1229            .with_context(|_| HandleBatchOpenRequestSnafu)
1230        {
1231            Ok(results) => {
1232                for (region_id, result) in results {
1233                    let region_change = &region_changes[&region_id];
1234                    match result {
1235                        Ok(_) => {
1236                            if let Err(e) = self
1237                                .set_region_status_ready(region_id, engine.clone(), *region_change)
1238                                .await
1239                            {
1240                                error!(e; "Failed to set region to ready: {}", region_id);
1241                                errors.push(BoxedError::new(e));
1242                            } else {
1243                                open_regions.push(region_id)
1244                            }
1245                        }
1246                        Err(e) => {
1247                            self.unset_region_status(region_id, &engine, *region_change);
1248                            if e.status_code() == StatusCode::RegionNotFound
1249                                && ignore_nonexistent_region
1250                            {
1251                                warn!("Region {} not found, ignore it, source: {:?}", region_id, e);
1252                            } else {
1253                                error!(e; "Failed to open region: {}", region_id);
1254                                errors.push(e);
1255                            }
1256                        }
1257                    }
1258                }
1259            }
1260            Err(e) => {
1261                for (&region_id, region_change) in &region_changes {
1262                    self.unset_region_status(region_id, &engine, *region_change);
1263                }
1264                error!(e; "Failed to open batch regions");
1265                errors.push(BoxedError::new(e));
1266            }
1267        }
1268
1269        if !errors.is_empty() {
1270            return error::UnexpectedSnafu {
1271                // Returns the first error.
1272                violated: format!("Failed to open batch regions: {:?}", errors[0]),
1273            }
1274            .fail();
1275        }
1276
1277        Ok(open_regions)
1278    }
1279
1280    pub async fn handle_batch_open_requests(
1281        &self,
1282        parallelism: usize,
1283        requests: Vec<(RegionId, RegionOpenRequest)>,
1284        ignore_nonexistent_region: bool,
1285    ) -> Result<Vec<RegionId>> {
1286        let mut engine_grouped_requests: HashMap<String, Vec<_>> =
1287            HashMap::with_capacity(requests.len());
1288        for (region_id, request) in requests {
1289            if let Some(requests) = engine_grouped_requests.get_mut(&request.engine) {
1290                requests.push((region_id, request));
1291            } else {
1292                engine_grouped_requests.insert(request.engine.clone(), vec![(region_id, request)]);
1293            }
1294        }
1295
1296        let mut results = Vec::with_capacity(engine_grouped_requests.keys().len());
1297        for (engine, requests) in engine_grouped_requests {
1298            let engine = self
1299                .engines
1300                .read()
1301                .unwrap()
1302                .get(&engine)
1303                .with_context(|| RegionEngineNotFoundSnafu { name: &engine })?
1304                .clone();
1305            results.push(
1306                self.handle_batch_open_requests_inner(
1307                    engine,
1308                    parallelism,
1309                    requests,
1310                    ignore_nonexistent_region,
1311                )
1312                .await,
1313            )
1314        }
1315
1316        Ok(results
1317            .into_iter()
1318            .collect::<Result<Vec<_>>>()?
1319            .into_iter()
1320            .flatten()
1321            .collect::<Vec<_>>())
1322    }
1323
1324    pub async fn handle_batch_catchup_requests_inner(
1325        &self,
1326        engine: RegionEngineRef,
1327        parallelism: usize,
1328        requests: Vec<(RegionId, RegionCatchupRequest)>,
1329    ) -> Result<Vec<(RegionId, std::result::Result<(), BoxedError>)>> {
1330        for (region_id, _) in &requests {
1331            self.set_region_status_not_ready(*region_id, &engine, &RegionChange::Catchup);
1332        }
1333        let region_ids = requests
1334            .iter()
1335            .map(|(region_id, _)| *region_id)
1336            .collect::<Vec<_>>();
1337        let mut responses = Vec::with_capacity(requests.len());
1338        match engine
1339            .handle_batch_catchup_requests(parallelism, requests)
1340            .await
1341        {
1342            Ok(results) => {
1343                for (region_id, result) in results {
1344                    match result {
1345                        Ok(_) => {
1346                            if let Err(e) = self
1347                                .set_region_status_ready(
1348                                    region_id,
1349                                    engine.clone(),
1350                                    RegionChange::Catchup,
1351                                )
1352                                .await
1353                            {
1354                                error!(e; "Failed to set region to ready: {}", region_id);
1355                                responses.push((region_id, Err(BoxedError::new(e))));
1356                            } else {
1357                                responses.push((region_id, Ok(())));
1358                            }
1359                        }
1360                        Err(e) => {
1361                            self.unset_region_status(region_id, &engine, RegionChange::Catchup);
1362                            error!(e; "Failed to catchup region: {}", region_id);
1363                            responses.push((region_id, Err(e)));
1364                        }
1365                    }
1366                }
1367            }
1368            Err(e) => {
1369                for region_id in region_ids {
1370                    self.unset_region_status(region_id, &engine, RegionChange::Catchup);
1371                }
1372                error!(e; "Failed to catchup batch regions");
1373                return error::UnexpectedSnafu {
1374                    violated: format!("Failed to catchup batch regions: {:?}", e),
1375                }
1376                .fail();
1377            }
1378        }
1379
1380        Ok(responses)
1381    }
1382
1383    pub async fn handle_batch_catchup_requests(
1384        &self,
1385        parallelism: usize,
1386        requests: Vec<(RegionId, RegionCatchupRequest)>,
1387    ) -> Result<Vec<(RegionId, std::result::Result<(), BoxedError>)>> {
1388        let mut engine_grouped_requests: HashMap<String, Vec<_>> = HashMap::new();
1389
1390        let mut responses = Vec::with_capacity(requests.len());
1391        for (region_id, request) in requests {
1392            if let Ok(engine) = self.get_engine(region_id, &RegionChange::Catchup) {
1393                match engine {
1394                    CurrentEngine::Engine(engine) => {
1395                        engine_grouped_requests
1396                            .entry(engine.name().to_string())
1397                            .or_default()
1398                            .push((region_id, request));
1399                    }
1400                    CurrentEngine::EarlyReturn(_) => {
1401                        return error::UnexpectedSnafu {
1402                            violated: format!("Unexpected engine type for region {}", region_id),
1403                        }
1404                        .fail();
1405                    }
1406                }
1407            } else {
1408                responses.push((
1409                    region_id,
1410                    Err(BoxedError::new(
1411                        error::RegionNotFoundSnafu { region_id }.build(),
1412                    )),
1413                ));
1414            }
1415        }
1416
1417        for (engine, requests) in engine_grouped_requests {
1418            let engine = self
1419                .engines
1420                .read()
1421                .unwrap()
1422                .get(&engine)
1423                .with_context(|| RegionEngineNotFoundSnafu { name: &engine })?
1424                .clone();
1425            responses.extend(
1426                self.handle_batch_catchup_requests_inner(engine, parallelism, requests)
1427                    .await?,
1428            );
1429        }
1430
1431        Ok(responses)
1432    }
1433
1434    // Handle requests in batch.
1435    //
1436    // limitation: all create requests must be in the same engine.
1437    pub async fn handle_batch_request(
1438        &self,
1439        batch_request: BatchRegionDdlRequest,
1440    ) -> Result<RegionResponse> {
1441        let region_changes = match &batch_request {
1442            BatchRegionDdlRequest::Create(requests) => requests
1443                .iter()
1444                .map(|(region_id, create)| {
1445                    let attribute = parse_region_attribute(&create.engine, &create.options)?;
1446                    Ok((*region_id, RegionChange::Register(attribute)))
1447                })
1448                .collect::<Result<Vec<_>>>()?,
1449            BatchRegionDdlRequest::Drop(requests) => requests
1450                .iter()
1451                .map(|(region_id, _)| (*region_id, RegionChange::Deregisters))
1452                .collect::<Vec<_>>(),
1453            BatchRegionDdlRequest::Alter(requests) => requests
1454                .iter()
1455                .map(|(region_id, _)| (*region_id, RegionChange::None))
1456                .collect::<Vec<_>>(),
1457        };
1458
1459        // The ddl procedure will ensure all requests are in the same engine.
1460        // Therefore, we can get the engine from the first request.
1461        let (first_region_id, first_region_change) = region_changes.first().unwrap();
1462        let engine = match self.get_engine(*first_region_id, first_region_change)? {
1463            CurrentEngine::Engine(engine) => engine,
1464            CurrentEngine::EarlyReturn(rows) => return Ok(RegionResponse::new(rows)),
1465        };
1466
1467        for (region_id, region_change) in region_changes.iter() {
1468            self.set_region_status_not_ready(*region_id, &engine, region_change);
1469        }
1470
1471        let ddl_type = batch_request.request_type();
1472        let result = engine
1473            .handle_batch_ddl_requests(batch_request)
1474            .await
1475            .context(HandleBatchDdlRequestSnafu { ddl_type });
1476
1477        match result {
1478            Ok(result) => {
1479                for (region_id, region_change) in &region_changes {
1480                    self.set_region_status_ready(*region_id, engine.clone(), *region_change)
1481                        .await?;
1482                }
1483
1484                Ok(RegionResponse {
1485                    affected_rows: result.affected_rows,
1486                    extensions: result.extensions,
1487                    metadata: Vec::new(),
1488                })
1489            }
1490            Err(err) => {
1491                for (region_id, region_change) in region_changes {
1492                    self.unset_region_status(region_id, &engine, region_change);
1493                }
1494
1495                Err(err)
1496            }
1497        }
1498    }
1499
1500    pub async fn handle_request(
1501        &self,
1502        region_id: RegionId,
1503        request: RegionRequest,
1504    ) -> Result<RegionResponse> {
1505        let request_type = request.request_type();
1506        let _timer = crate::metrics::HANDLE_REGION_REQUEST_ELAPSED
1507            .with_label_values(&[request_type])
1508            .start_timer();
1509
1510        let region_change = match &request {
1511            RegionRequest::Create(create) => {
1512                let attribute = parse_region_attribute(&create.engine, &create.options)?;
1513                RegionChange::Register(attribute)
1514            }
1515            RegionRequest::Open(open) => {
1516                let attribute = parse_region_attribute(&open.engine, &open.options)?;
1517                RegionChange::Register(attribute)
1518            }
1519            RegionRequest::Close(_) | RegionRequest::Drop(_) => RegionChange::Deregisters,
1520            RegionRequest::Put(_) | RegionRequest::Delete(_) | RegionRequest::BulkInserts(_) => {
1521                RegionChange::Ingest
1522            }
1523            RegionRequest::Alter(_)
1524            | RegionRequest::Flush(_)
1525            | RegionRequest::Compact(_)
1526            | RegionRequest::Truncate(_)
1527            | RegionRequest::BuildIndex(_)
1528            | RegionRequest::EnterStaging(_)
1529            | RegionRequest::ApplyStagingManifest(_) => RegionChange::None,
1530            RegionRequest::Catchup(_) => RegionChange::Catchup,
1531        };
1532
1533        let engine = match self.get_engine(region_id, &region_change)? {
1534            CurrentEngine::Engine(engine) => engine,
1535            CurrentEngine::EarlyReturn(rows) => return Ok(RegionResponse::new(rows)),
1536        };
1537
1538        // Sets corresponding region status to registering/deregistering before the operation.
1539        self.set_region_status_not_ready(region_id, &engine, &region_change);
1540
1541        match engine
1542            .handle_request(region_id, request)
1543            .await
1544            .with_context(|_| HandleRegionRequestSnafu { region_id })
1545        {
1546            Ok(result) => {
1547                // Update metrics
1548                if matches!(region_change, RegionChange::Ingest) {
1549                    crate::metrics::REGION_CHANGED_ROW_COUNT
1550                        .with_label_values(&[request_type])
1551                        .inc_by(result.affected_rows as u64);
1552                }
1553                // Sets corresponding region status to ready.
1554                self.set_region_status_ready(region_id, engine.clone(), region_change)
1555                    .await?;
1556
1557                Ok(RegionResponse {
1558                    affected_rows: result.affected_rows,
1559                    extensions: result.extensions,
1560                    metadata: Vec::new(),
1561                })
1562            }
1563            Err(err) => {
1564                if matches!(region_change, RegionChange::Ingest) {
1565                    crate::metrics::REGION_SERVER_INSERT_FAIL_COUNT
1566                        .with_label_values(&[request_type])
1567                        .inc();
1568                }
1569                // Removes the region status if the operation fails.
1570                self.unset_region_status(region_id, &engine, region_change);
1571                Err(err)
1572            }
1573        }
1574    }
1575
1576    /// Handles the sync region request.
1577    pub async fn handle_sync_region(
1578        &self,
1579        engine: &RegionEngineRef,
1580        region_id: RegionId,
1581        request: SyncRegionFromRequest,
1582    ) -> Result<()> {
1583        let Some(new_opened_regions) = engine
1584            .sync_region(region_id, request)
1585            .await
1586            .with_context(|_| HandleRegionRequestSnafu { region_id })?
1587            .new_opened_logical_region_ids()
1588        else {
1589            return Ok(());
1590        };
1591
1592        for region in &new_opened_regions {
1593            self.region_map
1594                .insert(*region, RegionEngineWithStatus::Ready(engine.clone()));
1595        }
1596        if !new_opened_regions.is_empty() {
1597            info!(
1598                region_id = %region_id,
1599                logical_region_count = new_opened_regions.len(),
1600                logical_regions = ?new_opened_regions,
1601                "Logical regions are registered"
1602            );
1603        }
1604
1605        Ok(())
1606    }
1607
1608    fn set_region_status_not_ready(
1609        &self,
1610        region_id: RegionId,
1611        engine: &RegionEngineRef,
1612        region_change: &RegionChange,
1613    ) {
1614        match region_change {
1615            RegionChange::Register(_) => {
1616                self.region_map.insert(
1617                    region_id,
1618                    RegionEngineWithStatus::Registering(engine.clone()),
1619                );
1620            }
1621            RegionChange::Deregisters => {
1622                self.region_map.insert(
1623                    region_id,
1624                    RegionEngineWithStatus::Deregistering(engine.clone()),
1625                );
1626            }
1627            _ => {}
1628        }
1629    }
1630
1631    fn unset_region_status(
1632        &self,
1633        region_id: RegionId,
1634        engine: &RegionEngineRef,
1635        region_change: RegionChange,
1636    ) {
1637        match region_change {
1638            RegionChange::None | RegionChange::Ingest => {}
1639            RegionChange::Register(_) => {
1640                self.region_map.remove(&region_id);
1641            }
1642            RegionChange::Deregisters => {
1643                self.region_map
1644                    .insert(region_id, RegionEngineWithStatus::Ready(engine.clone()));
1645            }
1646            RegionChange::Catchup => {}
1647        }
1648    }
1649
1650    async fn set_region_status_ready(
1651        &self,
1652        region_id: RegionId,
1653        engine: RegionEngineRef,
1654        region_change: RegionChange,
1655    ) -> Result<()> {
1656        let engine_type = engine.name();
1657        match region_change {
1658            RegionChange::None | RegionChange::Ingest => {}
1659            RegionChange::Register(attribute) => {
1660                info!(
1661                    "Region {region_id} is registered to engine {}",
1662                    attribute.engine()
1663                );
1664                self.region_map
1665                    .insert(region_id, RegionEngineWithStatus::Ready(engine.clone()));
1666
1667                match attribute {
1668                    RegionAttribute::Metric { physical } => {
1669                        if physical {
1670                            // Registers the logical regions belong to the physical region (`region_id`).
1671                            self.register_logical_regions(&engine, region_id).await?;
1672                            // We only send the `on_region_registered` event of the physical region.
1673                            self.event_listener.on_region_registered(region_id);
1674                        }
1675                    }
1676                    RegionAttribute::Mito => self.event_listener.on_region_registered(region_id),
1677                    RegionAttribute::File => {
1678                        // do nothing
1679                    }
1680                }
1681            }
1682            RegionChange::Deregisters => {
1683                info!("Region {region_id} is deregistered from engine {engine_type}");
1684                self.region_map
1685                    .remove(&region_id)
1686                    .map(|(id, engine)| engine.set_region_role(id, RegionRole::Follower));
1687                self.event_listener.on_region_deregistered(region_id);
1688            }
1689            RegionChange::Catchup => {
1690                if is_metric_engine(engine.name()) {
1691                    // Registers the logical regions belong to the physical region (`region_id`).
1692                    self.register_logical_regions(&engine, region_id).await?;
1693                }
1694            }
1695        }
1696        Ok(())
1697    }
1698
1699    async fn register_logical_regions(
1700        &self,
1701        engine: &RegionEngineRef,
1702        physical_region_id: RegionId,
1703    ) -> Result<()> {
1704        let metric_engine =
1705            engine
1706                .as_any()
1707                .downcast_ref::<MetricEngine>()
1708                .context(UnexpectedSnafu {
1709                    violated: format!(
1710                        "expecting engine type '{}', actual '{}'",
1711                        METRIC_ENGINE_NAME,
1712                        engine.name(),
1713                    ),
1714                })?;
1715
1716        let logical_regions = metric_engine
1717            .logical_regions(physical_region_id)
1718            .await
1719            .context(FindLogicalRegionsSnafu { physical_region_id })?;
1720
1721        for region in &logical_regions {
1722            self.region_map
1723                .insert(*region, RegionEngineWithStatus::Ready(engine.clone()));
1724        }
1725        if !logical_regions.is_empty() {
1726            info!(
1727                physical_region_id = %physical_region_id,
1728                logical_region_count = logical_regions.len(),
1729                logical_regions = ?logical_regions,
1730                "Logical regions are registered"
1731            );
1732        }
1733        Ok(())
1734    }
1735
1736    pub async fn handle_read(
1737        &self,
1738        request: QueryRequest,
1739        query_ctx: QueryContextRef,
1740    ) -> Result<SendableRecordBatchStream> {
1741        // TODO(ruihang): add metrics and set trace id
1742
1743        let result = self
1744            .query_engine
1745            .execute(request.plan, query_ctx)
1746            .await
1747            .context(ExecuteLogicalPlanSnafu)?;
1748
1749        match result.data {
1750            OutputData::AffectedRows(_) | OutputData::RecordBatches(_) => {
1751                UnsupportedOutputSnafu { expected: "stream" }.fail()
1752            }
1753            OutputData::Stream(stream) => Ok(stream),
1754        }
1755    }
1756
1757    async fn stop(&self) -> Result<()> {
1758        // Calling async functions while iterating inside the Dashmap could easily cause the Rust
1759        // complains "higher-ranked lifetime error". Rust can't prove some future is legit.
1760        // Possible related issue: https://github.com/rust-lang/rust/issues/102211
1761        //
1762        // The workaround is to put the async functions in the `common_runtime::spawn_global`. Or like
1763        // it here, collect the values first then use later separately.
1764
1765        let regions = self
1766            .region_map
1767            .iter()
1768            .map(|x| (*x.key(), x.value().clone()))
1769            .collect::<Vec<_>>();
1770        let num_regions = regions.len();
1771
1772        for (region_id, engine) in regions {
1773            let closed = engine
1774                .handle_request(region_id, RegionRequest::Close(RegionCloseRequest {}))
1775                .await;
1776            match closed {
1777                Ok(_) => debug!("Region {region_id} is closed"),
1778                Err(e) => warn!("Failed to close region {region_id}, err: {e}"),
1779            }
1780        }
1781        self.region_map.clear();
1782        info!("closed {num_regions} regions");
1783
1784        drop(self.mito_engine.write().unwrap().take());
1785        let engines = self.engines.write().unwrap().drain().collect::<Vec<_>>();
1786        for (engine_name, engine) in engines {
1787            engine
1788                .stop()
1789                .await
1790                .context(StopRegionEngineSnafu { name: &engine_name })?;
1791            info!("Region engine {engine_name} is stopped");
1792        }
1793
1794        Ok(())
1795    }
1796}
1797
1798#[derive(Debug, Clone, Copy)]
1799enum RegionChange {
1800    None,
1801    Register(RegionAttribute),
1802    Deregisters,
1803    Catchup,
1804    Ingest,
1805}
1806
1807fn is_metric_engine(engine: &str) -> bool {
1808    engine == METRIC_ENGINE_NAME
1809}
1810
1811fn parse_region_attribute(
1812    engine: &str,
1813    options: &HashMap<String, String>,
1814) -> Result<RegionAttribute> {
1815    match engine {
1816        MITO_ENGINE_NAME => Ok(RegionAttribute::Mito),
1817        METRIC_ENGINE_NAME => {
1818            let physical = !options.contains_key(LOGICAL_TABLE_METADATA_KEY);
1819
1820            Ok(RegionAttribute::Metric { physical })
1821        }
1822        FILE_ENGINE_NAME => Ok(RegionAttribute::File),
1823        _ => error::UnexpectedSnafu {
1824            violated: format!("Unknown engine: {}", engine),
1825        }
1826        .fail(),
1827    }
1828}
1829
1830#[derive(Debug, Clone, Copy)]
1831enum RegionAttribute {
1832    Mito,
1833    Metric { physical: bool },
1834    File,
1835}
1836
1837impl RegionAttribute {
1838    fn engine(&self) -> &'static str {
1839        match self {
1840            RegionAttribute::Mito => MITO_ENGINE_NAME,
1841            RegionAttribute::Metric { .. } => METRIC_ENGINE_NAME,
1842            RegionAttribute::File => FILE_ENGINE_NAME,
1843        }
1844    }
1845}
1846
1847#[cfg(test)]
1848mod tests {
1849
1850    use std::assert_matches;
1851    use std::collections::HashMap;
1852    use std::sync::Arc;
1853
1854    use api::v1::SemanticType;
1855    use api::v1::region::{
1856        RemoteDynFilterRequest, RemoteDynFilterUnregister, RemoteDynFilterUpdate,
1857        remote_dyn_filter_request,
1858    };
1859    use common_error::ext::ErrorExt;
1860    use common_recordbatch::RecordBatches;
1861    use common_recordbatch::adapter::{RecordBatchMetrics, RegionWatermarkEntry};
1862    use datatypes::prelude::{ConcreteDataType, VectorRef};
1863    use datatypes::schema::{ColumnSchema, Schema};
1864    use datatypes::vectors::Int32Vector;
1865    use futures_util::StreamExt;
1866    use mito2::test_util::CreateRequestBuilder;
1867    use query::options::FLOW_RETURN_REGION_SEQ;
1868    use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataBuilder};
1869    use store_api::region_engine::RegionEngine;
1870    use store_api::region_request::{
1871        PathType, RegionDropRequest, RegionOpenRequest, RegionTruncateRequest,
1872    };
1873    use store_api::storage::RegionId;
1874
1875    use super::*;
1876    use crate::error::Result;
1877    use crate::tests::{MockRegionEngine, mock_region_server};
1878
1879    fn single_value_stream() -> SendableRecordBatchStream {
1880        let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
1881            "v",
1882            ConcreteDataType::int32_datatype(),
1883            false,
1884        )]));
1885        let values: VectorRef = Arc::new(Int32Vector::from_slice([1]));
1886        let batch = RecordBatch::new(schema.clone(), vec![values]).unwrap();
1887        RecordBatches::try_new(schema, vec![batch])
1888            .unwrap()
1889            .as_stream()
1890    }
1891
1892    #[tokio::test]
1893    async fn test_region_watermark_stream_only_sets_terminal_metrics() {
1894        let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
1895            "v",
1896            ConcreteDataType::int32_datatype(),
1897            false,
1898        )]));
1899        let values: VectorRef = Arc::new(Int32Vector::from_slice([1, 2]));
1900        let batch = RecordBatch::new(schema.clone(), vec![values]).unwrap();
1901        let stream = RecordBatches::try_new(schema, vec![batch])
1902            .unwrap()
1903            .as_stream();
1904
1905        let region_id = RegionId::new(42, 7);
1906        let wrapped = RegionWatermarkStream::new(stream, region_id, 99);
1907        let mut pinned = Box::pin(wrapped);
1908
1909        assert!(pinned.as_ref().get_ref().metrics().is_none());
1910        while pinned.next().await.is_some() {}
1911
1912        let metrics = pinned.as_ref().get_ref().metrics().unwrap();
1913        assert_eq!(
1914            metrics.region_watermarks,
1915            vec![RegionWatermarkEntry {
1916                region_id: region_id.as_u64(),
1917                watermark: Some(99),
1918            }]
1919        );
1920    }
1921
1922    #[test]
1923    fn test_region_watermark_stream_preserves_unproved_watermark() {
1924        let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
1925            "v",
1926            ConcreteDataType::int32_datatype(),
1927            false,
1928        )]));
1929        let values: VectorRef = Arc::new(Int32Vector::from_slice([1]));
1930        let batch = RecordBatch::new(schema.clone(), vec![values]).unwrap();
1931        let stream = RecordBatches::try_new(schema, vec![batch])
1932            .unwrap()
1933            .as_stream();
1934
1935        let region_id = RegionId::new(42, 7);
1936        let wrapped = RegionWatermarkStream::new(stream, region_id, 99);
1937        let metrics = RecordBatchMetrics {
1938            region_watermarks: vec![RegionWatermarkEntry {
1939                region_id: region_id.as_u64(),
1940                watermark: None,
1941            }],
1942            ..Default::default()
1943        };
1944
1945        let merged = wrapped.merged_metrics(metrics);
1946        assert_eq!(
1947            merged.region_watermarks,
1948            vec![RegionWatermarkEntry {
1949                region_id: region_id.as_u64(),
1950                watermark: None,
1951            }]
1952        );
1953    }
1954
1955    #[tokio::test]
1956    async fn test_wrap_flow_region_watermark_stream_adds_terminal_metrics() {
1957        let region_id = RegionId::new(42, 7);
1958        let query_ctx = Arc::new(
1959            QueryContextBuilder::default()
1960                .extensions(HashMap::from([(
1961                    FLOW_RETURN_REGION_SEQ.to_string(),
1962                    "true".to_string(),
1963                )]))
1964                .build(),
1965        );
1966        query_ctx.set_snapshot(region_id.as_u64(), 99);
1967
1968        let wrapped =
1969            wrap_flow_region_watermark_stream(single_value_stream(), region_id, &query_ctx);
1970        let mut pinned = Box::pin(wrapped);
1971        while pinned.next().await.is_some() {}
1972
1973        let metrics = pinned.as_ref().get_ref().metrics().unwrap();
1974        assert_eq!(
1975            metrics.region_watermarks,
1976            vec![RegionWatermarkEntry {
1977                region_id: region_id.as_u64(),
1978                watermark: Some(99),
1979            }]
1980        );
1981    }
1982
1983    #[tokio::test]
1984    async fn test_wrap_flow_region_watermark_stream_skips_without_extension() {
1985        let region_id = RegionId::new(42, 7);
1986        let query_ctx = Arc::new(QueryContextBuilder::default().build());
1987        query_ctx.set_snapshot(region_id.as_u64(), 99);
1988
1989        let wrapped =
1990            wrap_flow_region_watermark_stream(single_value_stream(), region_id, &query_ctx);
1991        let mut pinned = Box::pin(wrapped);
1992        while pinned.next().await.is_some() {}
1993
1994        assert!(pinned.as_ref().get_ref().metrics().is_none());
1995    }
1996
1997    #[tokio::test]
1998    async fn test_wrap_flow_region_watermark_stream_skips_without_snapshot() {
1999        let region_id = RegionId::new(42, 7);
2000        let query_ctx = Arc::new(
2001            QueryContextBuilder::default()
2002                .extensions(HashMap::from([(
2003                    FLOW_RETURN_REGION_SEQ.to_string(),
2004                    "true".to_string(),
2005                )]))
2006                .build(),
2007        );
2008
2009        let wrapped =
2010            wrap_flow_region_watermark_stream(single_value_stream(), region_id, &query_ctx);
2011        let mut pinned = Box::pin(wrapped);
2012        while pinned.next().await.is_some() {}
2013
2014        assert!(pinned.as_ref().get_ref().metrics().is_none());
2015    }
2016
2017    #[tokio::test]
2018    async fn test_region_registering() {
2019        common_telemetry::init_default_ut_logging();
2020
2021        let mut mock_region_server = mock_region_server();
2022        let (engine, _receiver) = MockRegionEngine::new(MITO_ENGINE_NAME);
2023        let engine_name = engine.name();
2024        mock_region_server.register_engine(engine.clone());
2025        let region_id = RegionId::new(1, 1);
2026        let builder = CreateRequestBuilder::new();
2027        let create_req = builder.build();
2028        // Tries to create/open a registering region.
2029        mock_region_server.inner.region_map.insert(
2030            region_id,
2031            RegionEngineWithStatus::Registering(engine.clone()),
2032        );
2033        let response = mock_region_server
2034            .handle_request(region_id, RegionRequest::Create(create_req))
2035            .await
2036            .unwrap();
2037        assert_eq!(response.affected_rows, 0);
2038        let status = mock_region_server
2039            .inner
2040            .region_map
2041            .get(&region_id)
2042            .unwrap()
2043            .clone();
2044        assert!(matches!(status, RegionEngineWithStatus::Ready(_)));
2045
2046        mock_region_server.inner.region_map.insert(
2047            region_id,
2048            RegionEngineWithStatus::Registering(engine.clone()),
2049        );
2050        let response = mock_region_server
2051            .handle_request(
2052                region_id,
2053                RegionRequest::Open(RegionOpenRequest {
2054                    engine: engine_name.to_string(),
2055                    table_dir: String::new(),
2056                    path_type: PathType::Bare,
2057                    options: Default::default(),
2058                    skip_wal_replay: false,
2059                    checkpoint: None,
2060                }),
2061            )
2062            .await
2063            .unwrap();
2064        assert_eq!(response.affected_rows, 0);
2065        let status = mock_region_server
2066            .inner
2067            .region_map
2068            .get(&region_id)
2069            .unwrap()
2070            .clone();
2071        assert!(matches!(status, RegionEngineWithStatus::Ready(_)));
2072    }
2073
2074    #[tokio::test]
2075    async fn test_region_deregistering() {
2076        common_telemetry::init_default_ut_logging();
2077
2078        let mut mock_region_server = mock_region_server();
2079        let (engine, _receiver) = MockRegionEngine::new(MITO_ENGINE_NAME);
2080
2081        mock_region_server.register_engine(engine.clone());
2082
2083        let region_id = RegionId::new(1, 1);
2084
2085        // Tries to drop/close a registering region.
2086        mock_region_server.inner.region_map.insert(
2087            region_id,
2088            RegionEngineWithStatus::Deregistering(engine.clone()),
2089        );
2090
2091        let response = mock_region_server
2092            .handle_request(
2093                region_id,
2094                RegionRequest::Drop(RegionDropRequest {
2095                    fast_path: false,
2096                    force: false,
2097                    partial_drop: false,
2098                }),
2099            )
2100            .await
2101            .unwrap();
2102        assert_eq!(response.affected_rows, 0);
2103
2104        let status = mock_region_server
2105            .inner
2106            .region_map
2107            .get(&region_id)
2108            .unwrap()
2109            .clone();
2110        assert!(matches!(status, RegionEngineWithStatus::Deregistering(_)));
2111
2112        mock_region_server.inner.region_map.insert(
2113            region_id,
2114            RegionEngineWithStatus::Deregistering(engine.clone()),
2115        );
2116
2117        let response = mock_region_server
2118            .handle_request(region_id, RegionRequest::Close(RegionCloseRequest {}))
2119            .await
2120            .unwrap();
2121        assert_eq!(response.affected_rows, 0);
2122
2123        let status = mock_region_server
2124            .inner
2125            .region_map
2126            .get(&region_id)
2127            .unwrap()
2128            .clone();
2129        assert!(matches!(status, RegionEngineWithStatus::Deregistering(_)));
2130    }
2131
2132    #[tokio::test]
2133    async fn test_region_not_ready() {
2134        common_telemetry::init_default_ut_logging();
2135
2136        let mut mock_region_server = mock_region_server();
2137        let (engine, _receiver) = MockRegionEngine::new(MITO_ENGINE_NAME);
2138
2139        mock_region_server.register_engine(engine.clone());
2140
2141        let region_id = RegionId::new(1, 1);
2142
2143        // Tries to drop/close a registering region.
2144        mock_region_server.inner.region_map.insert(
2145            region_id,
2146            RegionEngineWithStatus::Registering(engine.clone()),
2147        );
2148
2149        let err = mock_region_server
2150            .handle_request(
2151                region_id,
2152                RegionRequest::Truncate(RegionTruncateRequest::All),
2153            )
2154            .await
2155            .unwrap_err();
2156
2157        assert_eq!(err.status_code(), StatusCode::RegionNotReady);
2158    }
2159
2160    #[tokio::test]
2161    async fn test_region_request_failed() {
2162        common_telemetry::init_default_ut_logging();
2163
2164        let mut mock_region_server = mock_region_server();
2165        let (engine, _receiver) = MockRegionEngine::with_mock_fn(
2166            MITO_ENGINE_NAME,
2167            Box::new(|_region_id, _request| {
2168                error::UnexpectedSnafu {
2169                    violated: "test".to_string(),
2170                }
2171                .fail()
2172            }),
2173        );
2174
2175        mock_region_server.register_engine(engine.clone());
2176
2177        let region_id = RegionId::new(1, 1);
2178        let builder = CreateRequestBuilder::new();
2179        let create_req = builder.build();
2180        mock_region_server
2181            .handle_request(region_id, RegionRequest::Create(create_req))
2182            .await
2183            .unwrap_err();
2184
2185        let status = mock_region_server.inner.region_map.get(&region_id);
2186        assert!(status.is_none());
2187
2188        mock_region_server
2189            .inner
2190            .region_map
2191            .insert(region_id, RegionEngineWithStatus::Ready(engine.clone()));
2192
2193        mock_region_server
2194            .handle_request(
2195                region_id,
2196                RegionRequest::Drop(RegionDropRequest {
2197                    fast_path: false,
2198                    force: false,
2199                    partial_drop: false,
2200                }),
2201            )
2202            .await
2203            .unwrap_err();
2204
2205        let status = mock_region_server.inner.region_map.get(&region_id);
2206        assert!(status.is_some());
2207    }
2208
2209    #[tokio::test]
2210    async fn test_batch_open_region_ignore_nonexistent_regions() {
2211        common_telemetry::init_default_ut_logging();
2212        let mut mock_region_server = mock_region_server();
2213        let (engine, _receiver) = MockRegionEngine::with_mock_fn(
2214            MITO_ENGINE_NAME,
2215            Box::new(|region_id, _request| {
2216                if region_id == RegionId::new(1, 1) {
2217                    error::RegionNotFoundSnafu { region_id }.fail()
2218                } else {
2219                    Ok(0)
2220                }
2221            }),
2222        );
2223        mock_region_server.register_engine(engine.clone());
2224
2225        let region_ids = mock_region_server
2226            .handle_batch_open_requests(
2227                8,
2228                vec![
2229                    (
2230                        RegionId::new(1, 1),
2231                        RegionOpenRequest {
2232                            engine: MITO_ENGINE_NAME.to_string(),
2233                            table_dir: String::new(),
2234                            path_type: PathType::Bare,
2235                            options: Default::default(),
2236                            skip_wal_replay: false,
2237                            checkpoint: None,
2238                        },
2239                    ),
2240                    (
2241                        RegionId::new(1, 2),
2242                        RegionOpenRequest {
2243                            engine: MITO_ENGINE_NAME.to_string(),
2244                            table_dir: String::new(),
2245                            path_type: PathType::Bare,
2246                            options: Default::default(),
2247                            skip_wal_replay: false,
2248                            checkpoint: None,
2249                        },
2250                    ),
2251                ],
2252                true,
2253            )
2254            .await
2255            .unwrap();
2256        assert_eq!(region_ids, vec![RegionId::new(1, 2)]);
2257
2258        let err = mock_region_server
2259            .handle_batch_open_requests(
2260                8,
2261                vec![
2262                    (
2263                        RegionId::new(1, 1),
2264                        RegionOpenRequest {
2265                            engine: MITO_ENGINE_NAME.to_string(),
2266                            table_dir: String::new(),
2267                            path_type: PathType::Bare,
2268                            options: Default::default(),
2269                            skip_wal_replay: false,
2270                            checkpoint: None,
2271                        },
2272                    ),
2273                    (
2274                        RegionId::new(1, 2),
2275                        RegionOpenRequest {
2276                            engine: MITO_ENGINE_NAME.to_string(),
2277                            table_dir: String::new(),
2278                            path_type: PathType::Bare,
2279                            options: Default::default(),
2280                            skip_wal_replay: false,
2281                            checkpoint: None,
2282                        },
2283                    ),
2284                ],
2285                false,
2286            )
2287            .await
2288            .unwrap_err();
2289        assert_eq!(err.status_code(), StatusCode::Unexpected);
2290    }
2291
2292    struct CurrentEngineTest {
2293        region_id: RegionId,
2294        current_region_status: Option<RegionEngineWithStatus>,
2295        region_change: RegionChange,
2296        assert: Box<dyn FnOnce(Result<CurrentEngine>)>,
2297    }
2298
2299    #[tokio::test]
2300    async fn test_current_engine() {
2301        common_telemetry::init_default_ut_logging();
2302
2303        let mut mock_region_server = mock_region_server();
2304        let (engine, _) = MockRegionEngine::new(MITO_ENGINE_NAME);
2305        mock_region_server.register_engine(engine.clone());
2306
2307        let region_id = RegionId::new(1024, 1);
2308        let tests = vec![
2309            // RegionChange::None
2310            CurrentEngineTest {
2311                region_id,
2312                current_region_status: None,
2313                region_change: RegionChange::None,
2314                assert: Box::new(|result| {
2315                    let err = result.unwrap_err();
2316                    assert_eq!(err.status_code(), StatusCode::RegionNotFound);
2317                }),
2318            },
2319            CurrentEngineTest {
2320                region_id,
2321                current_region_status: Some(RegionEngineWithStatus::Ready(engine.clone())),
2322                region_change: RegionChange::None,
2323                assert: Box::new(|result| {
2324                    let current_engine = result.unwrap();
2325                    assert_matches!(current_engine, CurrentEngine::Engine(_));
2326                }),
2327            },
2328            CurrentEngineTest {
2329                region_id,
2330                current_region_status: Some(RegionEngineWithStatus::Registering(engine.clone())),
2331                region_change: RegionChange::None,
2332                assert: Box::new(|result| {
2333                    let err = result.unwrap_err();
2334                    assert_eq!(err.status_code(), StatusCode::RegionNotReady);
2335                }),
2336            },
2337            CurrentEngineTest {
2338                region_id,
2339                current_region_status: Some(RegionEngineWithStatus::Deregistering(engine.clone())),
2340                region_change: RegionChange::None,
2341                assert: Box::new(|result| {
2342                    let err = result.unwrap_err();
2343                    assert_eq!(err.status_code(), StatusCode::RegionNotFound);
2344                }),
2345            },
2346            // RegionChange::Register
2347            CurrentEngineTest {
2348                region_id,
2349                current_region_status: None,
2350                region_change: RegionChange::Register(RegionAttribute::Mito),
2351                assert: Box::new(|result| {
2352                    let current_engine = result.unwrap();
2353                    assert_matches!(current_engine, CurrentEngine::Engine(_));
2354                }),
2355            },
2356            CurrentEngineTest {
2357                region_id,
2358                current_region_status: Some(RegionEngineWithStatus::Registering(engine.clone())),
2359                region_change: RegionChange::Register(RegionAttribute::Mito),
2360                assert: Box::new(|result| {
2361                    let current_engine = result.unwrap();
2362                    assert_matches!(current_engine, CurrentEngine::Engine(_));
2363                }),
2364            },
2365            CurrentEngineTest {
2366                region_id,
2367                current_region_status: Some(RegionEngineWithStatus::Deregistering(engine.clone())),
2368                region_change: RegionChange::Register(RegionAttribute::Mito),
2369                assert: Box::new(|result| {
2370                    let err = result.unwrap_err();
2371                    assert_eq!(err.status_code(), StatusCode::RegionBusy);
2372                }),
2373            },
2374            CurrentEngineTest {
2375                region_id,
2376                current_region_status: Some(RegionEngineWithStatus::Ready(engine.clone())),
2377                region_change: RegionChange::Register(RegionAttribute::Mito),
2378                assert: Box::new(|result| {
2379                    let current_engine = result.unwrap();
2380                    assert_matches!(current_engine, CurrentEngine::Engine(_));
2381                }),
2382            },
2383            // RegionChange::Deregister
2384            CurrentEngineTest {
2385                region_id,
2386                current_region_status: None,
2387                region_change: RegionChange::Deregisters,
2388                assert: Box::new(|result| {
2389                    let current_engine = result.unwrap();
2390                    assert_matches!(current_engine, CurrentEngine::EarlyReturn(_));
2391                }),
2392            },
2393            CurrentEngineTest {
2394                region_id,
2395                current_region_status: Some(RegionEngineWithStatus::Registering(engine.clone())),
2396                region_change: RegionChange::Deregisters,
2397                assert: Box::new(|result| {
2398                    let err = result.unwrap_err();
2399                    assert_eq!(err.status_code(), StatusCode::RegionBusy);
2400                }),
2401            },
2402            CurrentEngineTest {
2403                region_id,
2404                current_region_status: Some(RegionEngineWithStatus::Deregistering(engine.clone())),
2405                region_change: RegionChange::Deregisters,
2406                assert: Box::new(|result| {
2407                    let current_engine = result.unwrap();
2408                    assert_matches!(current_engine, CurrentEngine::EarlyReturn(_));
2409                }),
2410            },
2411            CurrentEngineTest {
2412                region_id,
2413                current_region_status: Some(RegionEngineWithStatus::Ready(engine.clone())),
2414                region_change: RegionChange::Deregisters,
2415                assert: Box::new(|result| {
2416                    let current_engine = result.unwrap();
2417                    assert_matches!(current_engine, CurrentEngine::Engine(_));
2418                }),
2419            },
2420        ];
2421
2422        for test in tests {
2423            let CurrentEngineTest {
2424                region_id,
2425                current_region_status,
2426                region_change,
2427                assert,
2428            } = test;
2429
2430            // Sets up
2431            if let Some(status) = current_region_status {
2432                mock_region_server
2433                    .inner
2434                    .region_map
2435                    .insert(region_id, status);
2436            } else {
2437                mock_region_server.inner.region_map.remove(&region_id);
2438            }
2439
2440            let result = mock_region_server
2441                .inner
2442                .get_engine(region_id, &region_change);
2443
2444            assert(result);
2445        }
2446    }
2447
2448    #[tokio::test]
2449    async fn test_region_server_parallelism() {
2450        let p = RegionServerParallelism::from_opts(2, Duration::from_millis(1)).unwrap();
2451        let first_query = p.acquire().await;
2452        assert!(first_query.is_ok());
2453        let second_query = p.acquire().await;
2454        assert!(second_query.is_ok());
2455        let third_query = p.acquire().await;
2456        assert!(third_query.is_err());
2457        let err = third_query.unwrap_err();
2458        assert_eq!(
2459            err.output_msg(),
2460            "Failed to acquire permit under timeouts: deadline has elapsed".to_string()
2461        );
2462        drop(first_query);
2463        let forth_query = p.acquire().await;
2464        assert!(forth_query.is_ok());
2465    }
2466
2467    fn mock_region_metadata(region_id: RegionId) -> RegionMetadata {
2468        let mut metadata_builder = RegionMetadataBuilder::new(region_id);
2469        metadata_builder.push_column_metadata(ColumnMetadata {
2470            column_schema: datatypes::schema::ColumnSchema::new(
2471                "timestamp",
2472                ConcreteDataType::timestamp_nanosecond_datatype(),
2473                false,
2474            ),
2475            semantic_type: SemanticType::Timestamp,
2476            column_id: 0,
2477        });
2478        metadata_builder.push_column_metadata(ColumnMetadata {
2479            column_schema: datatypes::schema::ColumnSchema::new(
2480                "file",
2481                ConcreteDataType::string_datatype(),
2482                true,
2483            ),
2484            semantic_type: SemanticType::Tag,
2485            column_id: 1,
2486        });
2487        metadata_builder.push_column_metadata(ColumnMetadata {
2488            column_schema: datatypes::schema::ColumnSchema::new(
2489                "message",
2490                ConcreteDataType::string_datatype(),
2491                true,
2492            ),
2493            semantic_type: SemanticType::Field,
2494            column_id: 2,
2495        });
2496        metadata_builder.primary_key(vec![1]);
2497        metadata_builder.build().unwrap()
2498    }
2499
2500    #[tokio::test]
2501    async fn test_handle_list_metadata_request() {
2502        common_telemetry::init_default_ut_logging();
2503
2504        let mut mock_region_server = mock_region_server();
2505        let region_id_1 = RegionId::new(1, 0);
2506        let region_id_2 = RegionId::new(2, 0);
2507
2508        let metadata_1 = mock_region_metadata(region_id_1);
2509        let metadata_2 = mock_region_metadata(region_id_2);
2510        let metadatas = vec![Some(metadata_1.clone()), Some(metadata_2.clone())];
2511
2512        let metadata_1 = Arc::new(metadata_1);
2513        let metadata_2 = Arc::new(metadata_2);
2514        let (engine, _) = MockRegionEngine::with_metadata_mock_fn(
2515            MITO_ENGINE_NAME,
2516            Box::new(move |region_id| {
2517                if region_id == region_id_1 {
2518                    Ok(metadata_1.clone())
2519                } else if region_id == region_id_2 {
2520                    Ok(metadata_2.clone())
2521                } else {
2522                    error::RegionNotFoundSnafu { region_id }.fail()
2523                }
2524            }),
2525        );
2526
2527        mock_region_server.register_engine(engine.clone());
2528        mock_region_server
2529            .inner
2530            .region_map
2531            .insert(region_id_1, RegionEngineWithStatus::Ready(engine.clone()));
2532        mock_region_server
2533            .inner
2534            .region_map
2535            .insert(region_id_2, RegionEngineWithStatus::Ready(engine.clone()));
2536
2537        // All regions exist.
2538        let list_metadata_request = ListMetadataRequest {
2539            region_ids: vec![region_id_1.as_u64(), region_id_2.as_u64()],
2540        };
2541        let response = mock_region_server
2542            .handle_list_metadata_request(&list_metadata_request)
2543            .await
2544            .unwrap();
2545        let decoded_metadata: Vec<Option<RegionMetadata>> =
2546            serde_json::from_slice(&response.metadata).unwrap();
2547        assert_eq!(metadatas, decoded_metadata);
2548    }
2549
2550    #[tokio::test]
2551    async fn test_handle_list_metadata_not_found() {
2552        common_telemetry::init_default_ut_logging();
2553
2554        let mut mock_region_server = mock_region_server();
2555        let region_id_1 = RegionId::new(1, 0);
2556        let region_id_2 = RegionId::new(2, 0);
2557
2558        let metadata_1 = mock_region_metadata(region_id_1);
2559        let metadatas = vec![Some(metadata_1.clone()), None];
2560
2561        let metadata_1 = Arc::new(metadata_1);
2562        let (engine, _) = MockRegionEngine::with_metadata_mock_fn(
2563            MITO_ENGINE_NAME,
2564            Box::new(move |region_id| {
2565                if region_id == region_id_1 {
2566                    Ok(metadata_1.clone())
2567                } else {
2568                    error::RegionNotFoundSnafu { region_id }.fail()
2569                }
2570            }),
2571        );
2572
2573        mock_region_server.register_engine(engine.clone());
2574        mock_region_server
2575            .inner
2576            .region_map
2577            .insert(region_id_1, RegionEngineWithStatus::Ready(engine.clone()));
2578
2579        // Not in region map.
2580        let list_metadata_request = ListMetadataRequest {
2581            region_ids: vec![region_id_1.as_u64(), region_id_2.as_u64()],
2582        };
2583        let response = mock_region_server
2584            .handle_list_metadata_request(&list_metadata_request)
2585            .await
2586            .unwrap();
2587        let decoded_metadata: Vec<Option<RegionMetadata>> =
2588            serde_json::from_slice(&response.metadata).unwrap();
2589        assert_eq!(metadatas, decoded_metadata);
2590
2591        // Not in region engine.
2592        mock_region_server
2593            .inner
2594            .region_map
2595            .insert(region_id_2, RegionEngineWithStatus::Ready(engine.clone()));
2596        let response = mock_region_server
2597            .handle_list_metadata_request(&list_metadata_request)
2598            .await
2599            .unwrap();
2600        let decoded_metadata: Vec<Option<RegionMetadata>> =
2601            serde_json::from_slice(&response.metadata).unwrap();
2602        assert_eq!(metadatas, decoded_metadata);
2603    }
2604
2605    #[tokio::test]
2606    async fn test_handle_list_metadata_failed() {
2607        common_telemetry::init_default_ut_logging();
2608
2609        let mut mock_region_server = mock_region_server();
2610        let region_id_1 = RegionId::new(1, 0);
2611
2612        let (engine, _) = MockRegionEngine::with_metadata_mock_fn(
2613            MITO_ENGINE_NAME,
2614            Box::new(move |region_id| {
2615                error::UnexpectedSnafu {
2616                    violated: format!("Failed to get region {region_id}"),
2617                }
2618                .fail()
2619            }),
2620        );
2621
2622        mock_region_server.register_engine(engine.clone());
2623        mock_region_server
2624            .inner
2625            .region_map
2626            .insert(region_id_1, RegionEngineWithStatus::Ready(engine.clone()));
2627
2628        // Failed to get.
2629        let list_metadata_request = ListMetadataRequest {
2630            region_ids: vec![region_id_1.as_u64()],
2631        };
2632        mock_region_server
2633            .handle_list_metadata_request(&list_metadata_request)
2634            .await
2635            .unwrap_err();
2636    }
2637
2638    #[tokio::test]
2639    async fn test_handle_remote_dyn_filter_request_requires_query_id() {
2640        let mock_region_server = mock_region_server();
2641
2642        let err = mock_region_server
2643            .handle_remote_dyn_filter_request(&RemoteDynFilterRequest {
2644                query_id: String::new(),
2645                action: Some(remote_dyn_filter_request::Action::Unregister(
2646                    RemoteDynFilterUnregister {
2647                        filter_id: "filter-1".to_string(),
2648                    },
2649                )),
2650            })
2651            .await
2652            .unwrap_err();
2653
2654        assert_matches!(
2655            err,
2656            crate::error::Error::MissingRequiredField { ref name, .. } if name == "query_id"
2657        );
2658    }
2659
2660    #[tokio::test]
2661    async fn test_handle_remote_dyn_filter_request_requires_action() {
2662        let mock_region_server = mock_region_server();
2663
2664        let err = mock_region_server
2665            .handle_remote_dyn_filter_request(&RemoteDynFilterRequest {
2666                query_id: "query-1".to_string(),
2667                action: None,
2668            })
2669            .await
2670            .unwrap_err();
2671
2672        assert_matches!(
2673            err,
2674            crate::error::Error::MissingRequiredField { ref name, .. } if name == "action"
2675        );
2676    }
2677
2678    #[tokio::test]
2679    async fn test_handle_remote_dyn_filter_update_requires_filter_id() {
2680        let mock_region_server = mock_region_server();
2681
2682        let err = mock_region_server
2683            .handle_remote_dyn_filter_request(&RemoteDynFilterRequest {
2684                query_id: "query-1".to_string(),
2685                action: Some(remote_dyn_filter_request::Action::Update(
2686                    RemoteDynFilterUpdate {
2687                        filter_id: String::new(),
2688                        payload: vec![1],
2689                        generation: 1,
2690                        is_complete: false,
2691                    },
2692                )),
2693            })
2694            .await
2695            .unwrap_err();
2696
2697        assert_matches!(
2698            err,
2699            crate::error::Error::MissingRequiredField { ref name, .. } if name == "filter_id"
2700        );
2701    }
2702
2703    #[tokio::test]
2704    async fn test_handle_remote_dyn_filter_update_requires_payload() {
2705        let mock_region_server = mock_region_server();
2706
2707        let err = mock_region_server
2708            .handle_remote_dyn_filter_request(&RemoteDynFilterRequest {
2709                query_id: "query-1".to_string(),
2710                action: Some(remote_dyn_filter_request::Action::Update(
2711                    RemoteDynFilterUpdate {
2712                        filter_id: "filter-1".to_string(),
2713                        payload: Vec::new(),
2714                        generation: 1,
2715                        is_complete: false,
2716                    },
2717                )),
2718            })
2719            .await
2720            .unwrap_err();
2721
2722        assert_matches!(
2723            err,
2724            crate::error::Error::MissingRequiredField { ref name, .. } if name == "payload"
2725        );
2726    }
2727
2728    #[tokio::test]
2729    async fn test_handle_remote_dyn_filter_update_placeholder() {
2730        let mock_region_server = mock_region_server();
2731
2732        let err = mock_region_server
2733            .handle_remote_dyn_filter_request(&RemoteDynFilterRequest {
2734                query_id: "query-1".to_string(),
2735                action: Some(remote_dyn_filter_request::Action::Update(
2736                    RemoteDynFilterUpdate {
2737                        filter_id: "filter-1".to_string(),
2738                        payload: vec![1],
2739                        generation: 1,
2740                        is_complete: false,
2741                    },
2742                )),
2743            })
2744            .await
2745            .unwrap_err();
2746
2747        assert_matches!(err, crate::error::Error::NotYetImplemented { .. });
2748    }
2749
2750    #[tokio::test]
2751    async fn test_handle_remote_dyn_filter_unregister_placeholder() {
2752        let mock_region_server = mock_region_server();
2753
2754        let err = mock_region_server
2755            .handle_remote_dyn_filter_request(&RemoteDynFilterRequest {
2756                query_id: "query-1".to_string(),
2757                action: Some(remote_dyn_filter_request::Action::Unregister(
2758                    RemoteDynFilterUnregister {
2759                        filter_id: "filter-1".to_string(),
2760                    },
2761                )),
2762            })
2763            .await
2764            .unwrap_err();
2765
2766        assert_matches!(err, crate::error::Error::NotYetImplemented { .. });
2767    }
2768}