Skip to main content

metric_engine/engine/
read.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use api::v1::SemanticType;
18use common_telemetry::{debug, error, tracing};
19use datafusion::logical_expr::{self, Expr};
20use snafu::{OptionExt, ResultExt};
21use store_api::metadata::{RegionMetadataBuilder, RegionMetadataRef};
22use store_api::metric_engine_consts::DATA_SCHEMA_TABLE_ID_COLUMN_NAME;
23use store_api::region_engine::{RegionEngine, RegionScannerRef};
24use store_api::storage::{RegionId, ScanRequest, SequenceNumber};
25
26use crate::engine::MetricEngineInner;
27use crate::error::{
28    InvalidMetadataSnafu, LogicalRegionNotFoundSnafu, MitoReadOperationSnafu, Result,
29};
30use crate::metrics::MITO_OPERATION_ELAPSED;
31use crate::utils;
32
33impl MetricEngineInner {
34    #[tracing::instrument(skip_all)]
35    pub async fn read_region(
36        &self,
37        region_id: RegionId,
38        request: ScanRequest,
39    ) -> Result<RegionScannerRef> {
40        let is_reading_physical_region = self.is_physical_region(region_id);
41
42        if is_reading_physical_region {
43            debug!(
44                "Metric region received read request {request:?} on physical region {region_id:?}"
45            );
46            self.read_physical_region(region_id, request).await
47        } else {
48            self.read_logical_region(region_id, request).await
49        }
50    }
51
52    /// Proxy the read request to underlying physical region (mito engine).
53    async fn read_physical_region(
54        &self,
55        region_id: RegionId,
56        request: ScanRequest,
57    ) -> Result<RegionScannerRef> {
58        let _timer = MITO_OPERATION_ELAPSED
59            .with_label_values(&["read_physical"])
60            .start_timer();
61
62        self.mito
63            .handle_query(region_id, request)
64            .await
65            .context(MitoReadOperationSnafu)
66    }
67
68    async fn read_logical_region(
69        &self,
70        logical_region_id: RegionId,
71        request: ScanRequest,
72    ) -> Result<RegionScannerRef> {
73        let _timer = MITO_OPERATION_ELAPSED
74            .with_label_values(&["read"])
75            .start_timer();
76
77        let physical_region_id = self.get_physical_region_id(logical_region_id).await?;
78        let data_region_id = utils::to_data_region_id(physical_region_id);
79        let request = self
80            .transform_request(physical_region_id, logical_region_id, request)
81            .await?;
82        let mut scanner = self
83            .mito
84            .handle_query(data_region_id, request)
85            .await
86            .context(MitoReadOperationSnafu)?;
87        scanner.set_logical_region(true);
88        scanner.set_query_load_region_id(data_region_id);
89
90        Ok(scanner)
91    }
92
93    pub async fn get_last_seq_num(&self, region_id: RegionId) -> Result<SequenceNumber> {
94        let region_id = if self.is_physical_region(region_id) {
95            region_id
96        } else {
97            let physical_region_id = self.get_physical_region_id(region_id).await?;
98            utils::to_data_region_id(physical_region_id)
99        };
100        self.mito
101            .get_committed_sequence(region_id)
102            .await
103            .context(MitoReadOperationSnafu)
104    }
105
106    pub async fn load_region_metadata(&self, region_id: RegionId) -> Result<RegionMetadataRef> {
107        let is_reading_physical_region =
108            self.state.read().unwrap().exist_physical_region(region_id);
109
110        if is_reading_physical_region {
111            self.mito
112                .get_metadata(region_id)
113                .await
114                .context(MitoReadOperationSnafu)
115        } else {
116            let physical_region_id = self.get_physical_region_id(region_id).await?;
117            self.logical_region_metadata(physical_region_id, region_id)
118                .await
119        }
120    }
121
122    /// Returns true if it's a physical region.
123    pub fn is_physical_region(&self, region_id: RegionId) -> bool {
124        self.state.read().unwrap().exist_physical_region(region_id)
125    }
126
127    async fn get_physical_region_id(&self, logical_region_id: RegionId) -> Result<RegionId> {
128        let state = &self.state.read().unwrap();
129        state
130            .get_physical_region_id(logical_region_id)
131            .with_context(|| {
132                error!("Trying to read an nonexistent region {logical_region_id}");
133                LogicalRegionNotFoundSnafu {
134                    region_id: logical_region_id,
135                }
136            })
137    }
138
139    /// Transform the [ScanRequest] from logical region to physical data region.
140    async fn transform_request(
141        &self,
142        physical_region_id: RegionId,
143        logical_region_id: RegionId,
144        mut request: ScanRequest,
145    ) -> Result<ScanRequest> {
146        // transform projection
147        let physical_projection = match request.projection_input.as_ref() {
148            Some(projection_input) => {
149                self.transform_projection(
150                    physical_region_id,
151                    logical_region_id,
152                    &projection_input.projection,
153                )
154                .await?
155            }
156            None => {
157                self.default_projection(physical_region_id, logical_region_id)
158                    .await?
159            }
160        };
161
162        // Rewrite the top-level projection from logical-region schema indices to
163        // physical-region schema indices. `nested_paths` are left unchanged because
164        // they are expressed by column name rather than schema index.
165        request.projection_input.get_or_insert_default().projection = physical_projection;
166
167        request
168            .filters
169            .push(self.table_id_filter(logical_region_id));
170
171        Ok(request)
172    }
173
174    /// Generate a filter on the table id column.
175    fn table_id_filter(&self, logical_region_id: RegionId) -> Expr {
176        logical_expr::col(DATA_SCHEMA_TABLE_ID_COLUMN_NAME)
177            .eq(logical_expr::lit(logical_region_id.table_id()))
178    }
179
180    /// Transform the projection from logical region to physical region.
181    ///
182    /// This method will not preserve internal columns.
183    pub async fn transform_projection(
184        &self,
185        physical_region_id: RegionId,
186        logical_region_id: RegionId,
187        origin_projection: &[usize],
188    ) -> Result<Vec<usize>> {
189        // project on logical columns
190        let all_logical_columns = self
191            .load_logical_column_names(physical_region_id, logical_region_id)
192            .await?;
193        let projected_logical_names = origin_projection
194            .iter()
195            .map(|i| all_logical_columns[*i].clone())
196            .collect::<Vec<_>>();
197
198        // generate physical projection
199        let mut physical_projection = Vec::with_capacity(origin_projection.len());
200        let data_region_id = utils::to_data_region_id(physical_region_id);
201        let physical_metadata = self
202            .mito
203            .get_metadata(data_region_id)
204            .await
205            .context(MitoReadOperationSnafu)?;
206
207        for name in projected_logical_names {
208            // Safety: logical columns is a strict subset of physical columns
209            physical_projection.push(physical_metadata.column_index_by_name(&name).unwrap());
210        }
211
212        Ok(physical_projection)
213    }
214
215    /// Default projection for a logical region. Includes non-internal columns
216    pub async fn default_projection(
217        &self,
218        physical_region_id: RegionId,
219        logical_region_id: RegionId,
220    ) -> Result<Vec<usize>> {
221        let logical_columns = self
222            .load_logical_column_names(physical_region_id, logical_region_id)
223            .await?;
224        let mut projection = Vec::with_capacity(logical_columns.len());
225        let data_region_id = utils::to_data_region_id(physical_region_id);
226        let physical_metadata = self
227            .mito
228            .get_metadata(data_region_id)
229            .await
230            .context(MitoReadOperationSnafu)?;
231        for name in logical_columns {
232            // Safety: logical columns is a strict subset of physical columns
233            projection.push(physical_metadata.column_index_by_name(&name).unwrap());
234        }
235
236        Ok(projection)
237    }
238
239    pub async fn logical_region_metadata(
240        &self,
241        physical_region_id: RegionId,
242        logical_region_id: RegionId,
243    ) -> Result<RegionMetadataRef> {
244        let logical_columns = self
245            .load_logical_columns(physical_region_id, logical_region_id)
246            .await?;
247
248        let primary_keys = logical_columns
249            .iter()
250            .filter_map(|col| {
251                if col.semantic_type == SemanticType::Tag {
252                    Some(col.column_id)
253                } else {
254                    None
255                }
256            })
257            .collect::<Vec<_>>();
258
259        let mut logical_metadata_builder = RegionMetadataBuilder::new(logical_region_id);
260        for col in logical_columns {
261            logical_metadata_builder.push_column_metadata(col);
262        }
263        logical_metadata_builder.primary_key(primary_keys);
264        let logical_metadata = logical_metadata_builder
265            .build()
266            .context(InvalidMetadataSnafu)?;
267
268        Ok(Arc::new(logical_metadata))
269    }
270}
271
272#[cfg(test)]
273impl MetricEngineInner {
274    pub async fn scan_to_stream(
275        &self,
276        region_id: RegionId,
277        request: ScanRequest,
278    ) -> Result<common_recordbatch::SendableRecordBatchStream, common_error::ext::BoxedError> {
279        let is_reading_physical_region = self.is_physical_region(region_id);
280
281        if is_reading_physical_region {
282            self.mito
283                .scan_to_stream(region_id, request)
284                .await
285                .map_err(common_error::ext::BoxedError::new)
286        } else {
287            let physical_region_id = self
288                .get_physical_region_id(region_id)
289                .await
290                .map_err(common_error::ext::BoxedError::new)?;
291            let request = self
292                .transform_request(physical_region_id, region_id, request)
293                .await
294                .map_err(common_error::ext::BoxedError::new)?;
295            self.mito
296                .scan_to_stream(physical_region_id, request)
297                .await
298                .map_err(common_error::ext::BoxedError::new)
299        }
300    }
301}
302
303#[cfg(test)]
304mod test {
305    use store_api::region_request::RegionRequest;
306
307    use super::*;
308    use crate::test_util::{
309        TestEnv, alter_logical_region_add_tag_columns, create_logical_region_request,
310    };
311
312    #[tokio::test]
313    async fn test_transform_scan_req() {
314        let env = TestEnv::new().await;
315        env.init_metric_region().await;
316
317        let logical_region_id = env.default_logical_region_id();
318        let physical_region_id = env.default_physical_region_id();
319
320        // create another logical region
321        let logical_region_id2 = RegionId::new(1112345678, 999);
322        let create_request =
323            create_logical_region_request(&["123", "456", "789"], physical_region_id, "blabla");
324        env.metric()
325            .handle_request(logical_region_id2, RegionRequest::Create(create_request))
326            .await
327            .unwrap();
328
329        // add columns to the first logical region
330        let alter_request =
331            alter_logical_region_add_tag_columns(123456, &["987", "798", "654", "321"]);
332        env.metric()
333            .handle_request(logical_region_id, RegionRequest::Alter(alter_request))
334            .await
335            .unwrap();
336
337        // check explicit projection
338        let projection_input = Some(vec![0, 1, 2, 3, 4, 5, 6].into());
339        let scan_req = ScanRequest {
340            projection_input,
341            filters: vec![],
342            ..Default::default()
343        };
344
345        let scan_req = env
346            .metric()
347            .inner
348            .transform_request(physical_region_id, logical_region_id, scan_req)
349            .await
350            .unwrap();
351
352        assert_eq!(
353            scan_req.projection_indices().unwrap(),
354            &[11, 10, 9, 8, 0, 1, 4]
355        );
356        assert_eq!(scan_req.filters.len(), 1);
357        assert_eq!(
358            scan_req.filters[0],
359            logical_expr::col(DATA_SCHEMA_TABLE_ID_COLUMN_NAME)
360                .eq(logical_expr::lit(logical_region_id.table_id()))
361        );
362
363        // check default projection
364        let scan_req = ScanRequest::default();
365        let scan_req = env
366            .metric()
367            .inner
368            .transform_request(physical_region_id, logical_region_id, scan_req)
369            .await
370            .unwrap();
371        assert_eq!(
372            scan_req.projection_indices().unwrap(),
373            &[11, 10, 9, 8, 0, 1, 4]
374        );
375    }
376}