table/table/
adapter.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::any::Any;
16use std::sync::{Arc, Mutex};
17
18use common_query::stream::StreamScanAdapter;
19use common_recordbatch::OrderOption;
20use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
21use datafusion::catalog::Session;
22use datafusion::datasource::{TableProvider, TableType as DfTableType};
23use datafusion::error::Result as DfResult;
24use datafusion::physical_plan::ExecutionPlan;
25use datafusion_expr::TableProviderFilterPushDown as DfTableProviderFilterPushDown;
26use datafusion_expr::expr::Expr;
27use datafusion_physical_expr::PhysicalSortExpr;
28use datafusion_physical_expr::expressions::Column;
29use store_api::storage::{ScanRequest, VectorSearchRequest};
30
31use crate::table::{TableRef, TableType};
32
33/// Adapt greptime's [TableRef] to DataFusion's [TableProvider].
34pub struct DfTableProviderAdapter {
35    table: TableRef,
36    scan_req: Arc<Mutex<ScanRequest>>,
37}
38
39impl DfTableProviderAdapter {
40    pub fn new(table: TableRef) -> Self {
41        Self {
42            table,
43            scan_req: Arc::default(),
44        }
45    }
46
47    pub fn table(&self) -> TableRef {
48        self.table.clone()
49    }
50
51    pub fn with_ordering_hint(&self, order_opts: &[OrderOption]) {
52        self.scan_req.lock().unwrap().output_ordering = Some(order_opts.to_vec());
53    }
54
55    pub fn with_vector_search_hint(&self, hint: VectorSearchRequest) {
56        self.scan_req.lock().unwrap().vector_search = Some(hint);
57    }
58
59    pub fn get_vector_search_hint(&self) -> Option<VectorSearchRequest> {
60        self.scan_req.lock().unwrap().vector_search.clone()
61    }
62
63    #[cfg(feature = "testing")]
64    pub fn get_scan_req(&self) -> ScanRequest {
65        self.scan_req.lock().unwrap().clone()
66    }
67}
68
69impl std::fmt::Debug for DfTableProviderAdapter {
70    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71        f.debug_struct("DfTableProviderAdapter")
72            .field("table", &self.table.table_info.full_table_name())
73            .finish()
74    }
75}
76
77#[async_trait::async_trait]
78impl TableProvider for DfTableProviderAdapter {
79    fn as_any(&self) -> &dyn Any {
80        self
81    }
82
83    fn schema(&self) -> DfSchemaRef {
84        self.table.schema().arrow_schema().clone()
85    }
86
87    fn table_type(&self) -> DfTableType {
88        match self.table.table_type() {
89            TableType::Base => DfTableType::Base,
90            TableType::View => DfTableType::View,
91            TableType::Temporary => DfTableType::Temporary,
92        }
93    }
94
95    fn get_column_default(&self, column: &str) -> Option<&Expr> {
96        self.table.get_column_default(column)
97    }
98
99    async fn scan(
100        &self,
101        _state: &dyn Session,
102        projection: Option<&Vec<usize>>,
103        filters: &[Expr],
104        limit: Option<usize>,
105    ) -> DfResult<Arc<dyn ExecutionPlan>> {
106        let filters: Vec<Expr> = filters.iter().map(Clone::clone).collect();
107        let request = {
108            let mut request = self.scan_req.lock().unwrap();
109            request.filters = filters;
110            request.projection = projection.cloned();
111            request.limit = limit;
112            request.clone()
113        };
114        let stream = self.table.scan_to_stream(request).await?;
115
116        // build sort physical expr
117        let schema = stream.schema();
118        let sort_expr = stream.output_ordering().map(|order_opts| {
119            order_opts
120                .iter()
121                .map(|order_opt| {
122                    let col_index = schema.column_index_by_name(&order_opt.name).unwrap();
123                    let col_expr = Arc::new(Column::new(&order_opt.name, col_index));
124                    PhysicalSortExpr {
125                        expr: col_expr,
126                        options: order_opt.options,
127                    }
128                })
129                .collect::<Vec<_>>()
130        });
131
132        Ok(Arc::new(
133            StreamScanAdapter::new(stream).with_output_ordering(sort_expr),
134        ))
135    }
136
137    fn supports_filters_pushdown(
138        &self,
139        filters: &[&Expr],
140    ) -> DfResult<Vec<DfTableProviderFilterPushDown>> {
141        let filters = filters.iter().map(|&x| x.clone()).collect::<Vec<_>>();
142        Ok(self
143            .table
144            .supports_filters_pushdown(&filters.iter().collect::<Vec<_>>())
145            .map(|v| v.into_iter().map(Into::into).collect::<Vec<_>>())?)
146    }
147}