Skip to main content

datanode/region_server/
catalog.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::HashMap;
16use std::sync::Arc;
17
18use datafusion::catalog::{
19    CatalogProvider, CatalogProviderList, MemTable, SchemaProvider, TableProvider,
20};
21use datafusion::datasource::provider_as_source;
22use datafusion::error as df_error;
23use datafusion::error::Result as DfResult;
24use datafusion_common::DataFusionError;
25use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion, TreeNodeRewriter};
26use datafusion_expr::{LogicalPlan, TableSource};
27use futures::TryStreamExt;
28use session::context::QueryContextRef;
29use snafu::{OptionExt, ResultExt};
30use store_api::region_info::RegionInfoEntry;
31use store_api::sst_entry::{ManifestSstEntry, PuffinIndexMetaEntry, StorageSstEntry};
32use store_api::storage::RegionId;
33
34use crate::error::{DataFusionSnafu, ListStorageSstsSnafu, Result, UnexpectedSnafu};
35use crate::region_server::RegionServer;
36
37/// Reserved internal table kinds used.
38/// These are recognized by reserved table names and mapped to providers.
39#[allow(clippy::enum_variant_names)]
40#[derive(Clone, Debug, PartialEq, Eq, Hash, Copy)]
41enum InternalTableKind {
42    InspectSstManifest,
43    InspectSstStorage,
44    InspectSstIndexMeta,
45    InspectRegionInfo,
46}
47
48impl InternalTableKind {
49    /// Determine if the name is a reserved internal table (case-insensitive).
50    pub fn from_table_name(name: &str) -> Option<Self> {
51        if name.eq_ignore_ascii_case(ManifestSstEntry::reserved_table_name_for_inspection()) {
52            return Some(Self::InspectSstManifest);
53        }
54        if name.eq_ignore_ascii_case(StorageSstEntry::reserved_table_name_for_inspection()) {
55            return Some(Self::InspectSstStorage);
56        }
57        if name.eq_ignore_ascii_case(PuffinIndexMetaEntry::reserved_table_name_for_inspection()) {
58            return Some(Self::InspectSstIndexMeta);
59        }
60        if name.eq_ignore_ascii_case(RegionInfoEntry::reserved_table_name_for_inspection()) {
61            return Some(Self::InspectRegionInfo);
62        }
63        None
64    }
65
66    /// Return the `TableProvider` for the internal table.
67    pub async fn table_provider(&self, server: &RegionServer) -> Result<Arc<dyn TableProvider>> {
68        match self {
69            Self::InspectSstManifest => server.inspect_sst_manifest_provider().await,
70            Self::InspectSstStorage => server.inspect_sst_storage_provider().await,
71            Self::InspectSstIndexMeta => server.inspect_sst_index_meta_provider().await,
72            Self::InspectRegionInfo => server.inspect_region_info_provider().await,
73        }
74    }
75}
76
77impl RegionServer {
78    /// Expose SSTs listed in Manifest as an in-memory table for inspection.
79    pub async fn inspect_sst_manifest_provider(&self) -> Result<Arc<dyn TableProvider>> {
80        let mito = {
81            let guard = self.inner.mito_engine.read().unwrap();
82            guard.as_ref().cloned().context(UnexpectedSnafu {
83                violated: "mito engine not available",
84            })?
85        };
86
87        let entries = mito.all_ssts_from_manifest().await;
88        let schema = ManifestSstEntry::schema().arrow_schema().clone();
89        let batch = ManifestSstEntry::to_record_batch(&entries)
90            .map_err(DataFusionError::from)
91            .context(DataFusionSnafu)?;
92
93        let table = MemTable::try_new(schema, vec![vec![batch]]).context(DataFusionSnafu)?;
94        Ok(Arc::new(table))
95    }
96
97    /// Expose SSTs found in storage as an in-memory table for inspection.
98    pub async fn inspect_sst_storage_provider(&self) -> Result<Arc<dyn TableProvider>> {
99        let mito = {
100            let guard = self.inner.mito_engine.read().unwrap();
101            guard.as_ref().cloned().context(UnexpectedSnafu {
102                violated: "mito engine not available",
103            })?
104        };
105        let entries = mito
106            .all_ssts_from_storage()
107            .try_collect::<Vec<_>>()
108            .await
109            .context(ListStorageSstsSnafu)?;
110        let schema = StorageSstEntry::schema().arrow_schema().clone();
111        let batch = StorageSstEntry::to_record_batch(&entries)
112            .map_err(DataFusionError::from)
113            .context(DataFusionSnafu)?;
114
115        let table = MemTable::try_new(schema, vec![vec![batch]]).context(DataFusionSnafu)?;
116        Ok(Arc::new(table))
117    }
118
119    /// Expose index metadata across the engine as an in-memory table.
120    pub async fn inspect_sst_index_meta_provider(&self) -> Result<Arc<dyn TableProvider>> {
121        let mito = {
122            let guard = self.inner.mito_engine.read().unwrap();
123            guard.as_ref().cloned().context(UnexpectedSnafu {
124                violated: "mito engine not available",
125            })?
126        };
127
128        let entries = mito.all_index_metas().await;
129        let schema = PuffinIndexMetaEntry::schema().arrow_schema().clone();
130        let batch = PuffinIndexMetaEntry::to_record_batch(&entries)
131            .map_err(DataFusionError::from)
132            .context(DataFusionSnafu)?;
133
134        let table = MemTable::try_new(schema, vec![vec![batch]]).context(DataFusionSnafu)?;
135        Ok(Arc::new(table))
136    }
137
138    /// Expose region info across the engine as an in-memory table.
139    pub async fn inspect_region_info_provider(&self) -> Result<Arc<dyn TableProvider>> {
140        let mito = {
141            let guard = self.inner.mito_engine.read().unwrap();
142            guard.as_ref().cloned().context(UnexpectedSnafu {
143                violated: "mito engine not available",
144            })?
145        };
146
147        let entries = mito.all_region_infos().await;
148        let schema = RegionInfoEntry::schema().arrow_schema().clone();
149        let batch = RegionInfoEntry::to_record_batch(&entries)
150            .map_err(DataFusionError::from)
151            .context(DataFusionSnafu)?;
152
153        let table = MemTable::try_new(schema, vec![vec![batch]]).context(DataFusionSnafu)?;
154        Ok(Arc::new(table))
155    }
156}
157
158/// A catalog list that resolves `TableProvider` by table name:
159/// - For reserved internal names, return inspection providers;
160/// - Otherwise, fall back to the Region provider.
161#[derive(Clone, Debug)]
162pub(crate) struct NameAwareCatalogList {
163    catalog: NameAwareCatalogProvider,
164}
165
166impl NameAwareCatalogList {
167    /// Creates the catalog list.
168    pub fn new(server: RegionServer, region_id: RegionId, query_ctx: QueryContextRef) -> Self {
169        let schema_provider = NameAwareSchemaProvider {
170            server,
171            region_id,
172            query_ctx,
173        };
174        let catalog = NameAwareCatalogProvider {
175            schema: schema_provider,
176        };
177        Self { catalog }
178    }
179}
180
181impl CatalogProviderList for NameAwareCatalogList {
182    fn as_any(&self) -> &dyn std::any::Any {
183        self
184    }
185    fn register_catalog(
186        &self,
187        _name: String,
188        _catalog: Arc<dyn CatalogProvider>,
189    ) -> Option<Arc<dyn CatalogProvider>> {
190        None
191    }
192    fn catalog_names(&self) -> Vec<String> {
193        vec![]
194    }
195    fn catalog(&self, _name: &str) -> Option<Arc<dyn CatalogProvider>> {
196        Some(Arc::new(self.catalog.clone()))
197    }
198}
199
200#[derive(Clone, Debug)]
201struct NameAwareCatalogProvider {
202    schema: NameAwareSchemaProvider,
203}
204
205impl CatalogProvider for NameAwareCatalogProvider {
206    fn as_any(&self) -> &dyn std::any::Any {
207        self
208    }
209    fn schema_names(&self) -> Vec<String> {
210        vec![]
211    }
212    fn schema(&self, _name: &str) -> Option<Arc<dyn SchemaProvider>> {
213        Some(Arc::new(self.schema.clone()))
214    }
215}
216
217#[derive(Clone)]
218struct NameAwareSchemaProvider {
219    server: RegionServer,
220    region_id: RegionId,
221    query_ctx: QueryContextRef,
222}
223
224impl std::fmt::Debug for NameAwareSchemaProvider {
225    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
226        write!(f, "NameAwareSchemaProvider")
227    }
228}
229
230#[async_trait::async_trait]
231impl SchemaProvider for NameAwareSchemaProvider {
232    fn as_any(&self) -> &dyn std::any::Any {
233        self
234    }
235    fn table_names(&self) -> Vec<String> {
236        vec![]
237    }
238
239    async fn table(&self, name: &str) -> DfResult<Option<Arc<dyn TableProvider>>> {
240        // Resolve inspect providers by reserved names.
241        if let Some(kind) = InternalTableKind::from_table_name(name) {
242            return kind
243                .table_provider(&self.server)
244                .await
245                .map(Some)
246                .map_err(|e| df_error::DataFusionError::External(Box::new(e)));
247        }
248
249        // Fallback to region provider for any other table name.
250        let provider = self
251            .server
252            .table_provider(self.region_id, Some(self.query_ctx.clone()))
253            .await
254            .map_err(|e| df_error::DataFusionError::External(Box::new(e)))?;
255        Ok(Some(provider))
256    }
257
258    fn table_exist(&self, _name: &str) -> bool {
259        true
260    }
261}
262/// Builds a `NameAwareDataSourceInjector` from a logical plan.
263///
264/// It scans the plan to determine:
265/// - whether a Region `TableSource` is required, and
266/// - which internal inspection sources are referenced.
267pub(crate) struct NameAwareDataSourceInjectorBuilder {
268    /// Whether the plan requires a Region `TableSource`.
269    need_region_provider: bool,
270    /// Internal table kinds referenced by the plan.
271    reserved_table_needed: Vec<InternalTableKind>,
272}
273
274impl NameAwareDataSourceInjectorBuilder {
275    /// Walk the `LogicalPlan` to determine whether a Region source is needed,
276    /// and collect the kinds of internal sources required.
277    pub fn from_plan(plan: &LogicalPlan) -> DfResult<Self> {
278        let mut need_region_provider = false;
279        let mut reserved_table_needed = Vec::new();
280        plan.apply(|node| {
281            if let LogicalPlan::TableScan(ts) = node {
282                let name = ts.table_name.to_string();
283                if let Some(kind) = InternalTableKind::from_table_name(&name) {
284                    if !reserved_table_needed.contains(&kind) {
285                        reserved_table_needed.push(kind);
286                    }
287                } else {
288                    // Any normal table scan implies a Region source is needed.
289                    need_region_provider = true;
290                }
291            }
292            Ok(TreeNodeRecursion::Continue)
293        })?;
294
295        Ok(Self {
296            need_region_provider,
297            reserved_table_needed,
298        })
299    }
300
301    pub async fn build(
302        self,
303        server: &RegionServer,
304        region_id: RegionId,
305        query_ctx: QueryContextRef,
306    ) -> Result<NameAwareDataSourceInjector> {
307        let region = if self.need_region_provider {
308            let provider = server.table_provider(region_id, Some(query_ctx)).await?;
309            Some(provider_as_source(provider))
310        } else {
311            None
312        };
313
314        let mut reserved_sources = HashMap::new();
315        for kind in &self.reserved_table_needed {
316            let provider = kind.table_provider(server).await?;
317            reserved_sources.insert(*kind, provider_as_source(provider));
318        }
319
320        Ok(NameAwareDataSourceInjector {
321            reserved_sources,
322            region_source: region,
323        })
324    }
325}
326
327/// Rewrites `LogicalPlan` to inject proper data sources for `TableScan`.
328/// Uses internal sources for reserved tables; otherwise uses the Region source.
329pub(crate) struct NameAwareDataSourceInjector {
330    /// Sources for reserved internal tables, keyed by kind.
331    reserved_sources: HashMap<InternalTableKind, Arc<dyn TableSource>>,
332    /// Optional Region-level source used for normal tables.
333    region_source: Option<Arc<dyn TableSource>>,
334}
335
336impl TreeNodeRewriter for NameAwareDataSourceInjector {
337    type Node = LogicalPlan;
338
339    fn f_up(&mut self, node: Self::Node) -> DfResult<Transformed<Self::Node>> {
340        Ok(match node {
341            LogicalPlan::TableScan(mut scan) => {
342                let name = scan.table_name.to_string();
343                if let Some(kind) = InternalTableKind::from_table_name(&name)
344                    && let Some(source) = self.reserved_sources.get(&kind)
345                {
346                    // Matched a reserved internal table: rewrite to its dedicated source.
347                    scan.source = source.clone();
348                } else {
349                    let Some(region) = &self.region_source else {
350                        // Region source required but not constructed; this is unexpected.
351                        return Err(datafusion::error::DataFusionError::Plan(
352                            "region provider not available".to_string(),
353                        ));
354                    };
355                    // Normal table: rewrite to the Region source.
356                    scan.source = region.clone();
357                }
358                Transformed::yes(LogicalPlan::TableScan(scan))
359            }
360            _ => Transformed::no(node),
361        })
362    }
363}
364
365#[cfg(test)]
366mod tests {
367    use std::sync::Arc;
368
369    use datafusion::catalog::MemTable as DfMemTable;
370    use datafusion_common::tree_node::TreeNode;
371    use datafusion_expr::{LogicalPlanBuilder, table_scan};
372    use datatypes::arrow::array::Int32Array;
373    use datatypes::arrow::datatypes::{DataType, Field, Schema};
374    use datatypes::arrow::record_batch::RecordBatch;
375    use store_api::region_info::RegionInfoEntry;
376
377    use super::*; // bring rewrite() into scope
378
379    fn test_schema() -> Schema {
380        Schema::new(vec![Field::new("a", DataType::Int32, true)])
381    }
382
383    fn empty_mem_table() -> Arc<DfMemTable> {
384        let schema = Arc::new(test_schema());
385        let batch = RecordBatch::try_new(
386            schema.clone(),
387            vec![Arc::new(Int32Array::from(Vec::<i32>::new()))],
388        )
389        .unwrap();
390        Arc::new(DfMemTable::try_new(schema, vec![vec![batch]]).unwrap())
391    }
392
393    #[test]
394    fn test_injector_builder_from_plan_flags() {
395        let schema = test_schema();
396        let reserved = ManifestSstEntry::reserved_table_name_for_inspection();
397        // plan1: reserved table scan only
398        let plan1 = table_scan(Some(reserved), &schema, None)
399            .unwrap()
400            .build()
401            .unwrap();
402        let b1 = NameAwareDataSourceInjectorBuilder::from_plan(&plan1).unwrap();
403        assert!(!b1.need_region_provider);
404        assert_eq!(
405            b1.reserved_table_needed,
406            vec![InternalTableKind::InspectSstManifest]
407        );
408
409        // plan2: normal table scan only
410        let plan2 = table_scan(Some("normal_table"), &schema, None)
411            .unwrap()
412            .build()
413            .unwrap();
414        let b2 = NameAwareDataSourceInjectorBuilder::from_plan(&plan2).unwrap();
415        assert!(b2.need_region_provider);
416        assert!(b2.reserved_table_needed.is_empty());
417
418        // plan3: both reserved and normal (via UNION)
419        let p_res = table_scan(Some(reserved), &schema, None)
420            .unwrap()
421            .build()
422            .unwrap();
423        let p_norm = table_scan(Some("normal_table"), &schema, None)
424            .unwrap()
425            .build()
426            .unwrap();
427        let plan3 = LogicalPlanBuilder::from(p_res)
428            .union(LogicalPlanBuilder::from(p_norm).build().unwrap())
429            .unwrap()
430            .build()
431            .unwrap();
432        let b3 = NameAwareDataSourceInjectorBuilder::from_plan(&plan3).unwrap();
433        assert!(b3.need_region_provider);
434        assert_eq!(
435            b3.reserved_table_needed,
436            vec![InternalTableKind::InspectSstManifest]
437        );
438
439        let region_info = RegionInfoEntry::reserved_table_name_for_inspection();
440        let plan4 = table_scan(Some(region_info), &schema, None)
441            .unwrap()
442            .build()
443            .unwrap();
444        let b4 = NameAwareDataSourceInjectorBuilder::from_plan(&plan4).unwrap();
445        assert!(!b4.need_region_provider);
446        assert_eq!(
447            b4.reserved_table_needed,
448            vec![InternalTableKind::InspectRegionInfo]
449        );
450    }
451
452    #[test]
453    fn test_rewriter_replaces_with_reserved_source() {
454        let schema = test_schema();
455        let table_name = ManifestSstEntry::reserved_table_name_for_inspection();
456        let plan = table_scan(Some(table_name), &schema, None)
457            .unwrap()
458            .build()
459            .unwrap();
460
461        let provider = empty_mem_table();
462        let source = provider_as_source(provider);
463
464        let mut injector = NameAwareDataSourceInjector {
465            reserved_sources: {
466                let mut m = HashMap::new();
467                m.insert(InternalTableKind::InspectSstManifest, source.clone());
468                m
469            },
470            region_source: None,
471        };
472
473        let transformed = plan.rewrite(&mut injector).unwrap();
474        let new_plan = transformed.data;
475
476        if let LogicalPlan::TableScan(scan) = new_plan {
477            // Compare the underlying Arc ptrs to ensure replacement happened
478            let src_ptr = Arc::as_ptr(&scan.source);
479            let want_ptr = Arc::as_ptr(&source);
480            assert!(std::ptr::eq(src_ptr, want_ptr));
481        } else {
482            panic!("expected TableScan after rewrite");
483        }
484    }
485
486    #[test]
487    fn test_rewriter_replaces_with_region_info_reserved_source() {
488        let schema = test_schema();
489        let table_name = RegionInfoEntry::reserved_table_name_for_inspection();
490        let plan = table_scan(Some(table_name), &schema, None)
491            .unwrap()
492            .build()
493            .unwrap();
494
495        let provider = empty_mem_table();
496        let source = provider_as_source(provider);
497
498        let mut injector = NameAwareDataSourceInjector {
499            reserved_sources: {
500                let mut m = HashMap::new();
501                m.insert(InternalTableKind::InspectRegionInfo, source.clone());
502                m
503            },
504            region_source: None,
505        };
506
507        let transformed = plan.rewrite(&mut injector).unwrap();
508        let new_plan = transformed.data;
509
510        if let LogicalPlan::TableScan(scan) = new_plan {
511            let src_ptr = Arc::as_ptr(&scan.source);
512            let want_ptr = Arc::as_ptr(&source);
513            assert!(std::ptr::eq(src_ptr, want_ptr));
514        } else {
515            panic!("expected TableScan after rewrite");
516        }
517    }
518
519    #[test]
520    fn test_rewriter_replaces_with_region_source_for_normal() {
521        let schema = test_schema();
522        let plan = table_scan(Some("normal_table"), &schema, None)
523            .unwrap()
524            .build()
525            .unwrap();
526
527        let provider = empty_mem_table();
528        let region_source = provider_as_source(provider);
529
530        let mut injector = NameAwareDataSourceInjector {
531            reserved_sources: HashMap::new(),
532            region_source: Some(region_source.clone()),
533        };
534
535        let transformed = plan.rewrite(&mut injector).unwrap();
536        let new_plan = transformed.data;
537
538        if let LogicalPlan::TableScan(scan) = new_plan {
539            let src_ptr = Arc::as_ptr(&scan.source);
540            let want_ptr = Arc::as_ptr(&region_source);
541            assert!(std::ptr::eq(src_ptr, want_ptr));
542        } else {
543            panic!("expected TableScan after rewrite");
544        }
545    }
546}