1#![allow(dead_code)]
17
18use std::collections::{BTreeMap, HashSet};
19use std::mem;
20
21use datafusion_common::HashMap;
22use datafusion_expr::utils::expr_to_columns;
23use snafu::OptionExt;
24use store_api::metadata::RegionMetadataRef;
25use store_api::storage::{ColumnId, NestedPath, ProjectionInput};
26
27use crate::error::{InvalidRequestSnafu, Result};
28use crate::read::scan_region::PredicateGroup;
29
30#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
67pub struct ReadColumns {
68 cols: Vec<ReadColumn>,
69}
70
71impl ReadColumns {
72 pub fn from_deduped_column_ids<I>(column_ids: I) -> Self
73 where
74 I: IntoIterator<Item = ColumnId>,
75 {
76 let cols = column_ids
77 .into_iter()
78 .map(|col_id| ReadColumn::new(col_id, vec![]))
79 .collect();
80 ReadColumns { cols }
81 }
82
83 pub fn is_empty(&self) -> bool {
84 self.cols.is_empty()
85 }
86
87 pub fn column_ids_iter(&self) -> impl Iterator<Item = ColumnId> + '_ {
88 self.cols.iter().map(|column| column.column_id())
89 }
90
91 pub fn column_ids(&self) -> Vec<ColumnId> {
92 self.column_ids_iter().collect()
93 }
94
95 pub fn columns(&self) -> &[ReadColumn] {
96 &self.cols
97 }
98
99 pub fn estimated_size(&self) -> usize {
100 self.cols.capacity() * mem::size_of::<ReadColumn>()
101 + self
102 .cols
103 .iter()
104 .map(ReadColumn::estimated_size)
105 .sum::<usize>()
106 }
107}
108
109#[derive(Debug, Clone, PartialEq, Eq, Hash)]
110pub struct ReadColumn {
111 column_id: ColumnId,
112 nested_paths: Vec<NestedPath>,
115}
116
117impl ReadColumn {
118 pub fn new(column_id: ColumnId, nested_paths: Vec<NestedPath>) -> Self {
119 Self {
120 column_id,
121 nested_paths,
122 }
123 }
124
125 pub fn column_id(&self) -> ColumnId {
126 self.column_id
127 }
128
129 pub fn nested_paths(&self) -> &[NestedPath] {
130 &self.nested_paths
131 }
132
133 pub fn estimated_size(&self) -> usize {
134 mem::size_of::<ColumnId>()
135 + self.nested_paths.capacity() * mem::size_of::<NestedPath>()
136 + self
137 .nested_paths
138 .iter()
139 .map(|path| {
140 path.capacity() * mem::size_of::<String>()
141 + path.iter().map(|node| node.capacity()).sum::<usize>()
142 })
143 .sum::<usize>()
144 }
145}
146
147pub fn merge(a: ReadColumns, b: ReadColumns) -> ReadColumns {
148 let mut merged = BTreeMap::<ColumnId, Vec<NestedPath>>::new();
149
150 for col in a.cols.into_iter().chain(b.cols) {
151 if let Some(nested_paths) = merged.get_mut(&col.column_id) {
152 if nested_paths.is_empty() || col.nested_paths.is_empty() {
153 *nested_paths = vec![];
154 } else {
155 merge_nested_paths(nested_paths, col.nested_paths);
156 }
157 continue;
158 }
159
160 merged.insert(col.column_id, normalize_nested_paths(col.nested_paths));
161 }
162
163 ReadColumns {
164 cols: merged
165 .into_iter()
166 .map(|(column_id, nested_paths)| ReadColumn {
167 column_id,
168 nested_paths,
169 })
170 .collect(),
171 }
172}
173
174fn normalize_nested_paths(nested_paths: Vec<NestedPath>) -> Vec<NestedPath> {
175 let mut normalized = Vec::with_capacity(nested_paths.len());
176 merge_nested_paths(&mut normalized, nested_paths);
177 normalized
178}
179
180fn merge_nested_paths(merged: &mut Vec<NestedPath>, incoming: Vec<NestedPath>) {
181 for path in incoming {
182 if merged
183 .iter()
184 .any(|existing| path.starts_with(existing.as_slice()))
185 {
186 continue;
187 }
188
189 merged.retain(|existing| !existing.starts_with(path.as_slice()));
190 merged.push(path);
191 }
192}
193
194pub fn read_columns_from_projection(
206 projection: ProjectionInput,
207 metadata: &RegionMetadataRef,
208) -> Result<ReadColumns> {
209 let root_indices = if projection.projection.is_empty() {
210 vec![metadata.time_index_column_pos()]
211 } else {
212 projection.projection
213 };
214
215 let mut paths_by_col: HashMap<String, Vec<NestedPath>> =
216 HashMap::with_capacity(projection.nested_paths.len());
217 for path in projection.nested_paths {
218 let Some((root_name, _)) = path.split_first() else {
219 continue;
220 };
221 paths_by_col
222 .entry(root_name.clone())
223 .or_default()
224 .push(path);
225 }
226
227 let mut read_cols = Vec::with_capacity(root_indices.len());
228 let mut seen = HashSet::with_capacity(root_indices.len());
229 for root_idx in root_indices {
230 if !seen.insert(root_idx) {
231 continue;
232 }
233
234 let col = metadata
235 .column_metadatas
236 .get(root_idx)
237 .with_context(|| InvalidRequestSnafu {
238 region_id: metadata.region_id,
239 reason: format!("projection index {} is out of bounds", root_idx),
240 })?;
241 let col_id = col.column_id;
242
243 let nested_paths = paths_by_col
244 .remove(&col.column_schema.name)
245 .unwrap_or_default();
246
247 read_cols.push(ReadColumn {
248 column_id: col_id,
249 nested_paths,
250 });
251 }
252
253 Ok(ReadColumns { cols: read_cols })
254}
255
256pub fn read_columns_from_predicate(
264 predicate: &PredicateGroup,
265 metadata: &RegionMetadataRef,
266) -> ReadColumns {
267 let mut root_names = HashSet::new();
268 let mut columns = HashSet::new();
269
270 if let Some(p) = predicate.predicate_without_region() {
271 for expr in p.exprs() {
272 columns.clear();
273 if expr_to_columns(expr, &mut columns).is_err() {
274 continue;
275 }
276 root_names.extend(columns.drain().map(|column| column.name));
277 }
278 }
279
280 if let Some(expr) = predicate.region_partition_expr() {
281 expr.collect_column_names(&mut root_names);
282 }
283
284 let mut cols = Vec::with_capacity(root_names.len());
287 for column in &metadata.column_metadatas {
288 if root_names.contains(&column.column_schema.name) {
289 cols.push(ReadColumn::new(column.column_id, vec![]));
290 }
291 }
292
293 ReadColumns { cols }
294}
295
296#[cfg(test)]
297mod tests {
298 use std::sync::Arc;
299
300 use api::v1::SemanticType;
301 use datafusion_expr::{col, lit};
302 use datatypes::prelude::ConcreteDataType;
303 use datatypes::schema::ColumnSchema;
304 use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder};
305 use store_api::storage::RegionId;
306
307 use super::*;
308
309 #[test]
310 fn test_read_columns_from_empty_projection() {
311 let metadata = new_test_metadata();
312
313 let read_columns =
314 read_columns_from_projection(ProjectionInput::default(), &metadata).unwrap();
315
316 let expected = ReadColumns {
317 cols: vec![ReadColumn::new(2, vec![])],
318 };
319 assert_eq!(expected, read_columns);
320
321 let projection_input =
322 ProjectionInput::new(vec![]).with_nested_paths(vec![vec!["1".to_string()]]);
323 let read_columns = read_columns_from_projection(projection_input, &metadata).unwrap();
324
325 let expected = ReadColumns {
326 cols: vec![ReadColumn::new(2, vec![])],
327 };
328 assert_eq!(expected, read_columns);
329 }
330
331 #[test]
332 fn test_read_columns_from_projection_with_nested_paths() {
333 let metadata = new_test_metadata();
334 let projection = ProjectionInput::new(vec![1, 0]).with_nested_paths(vec![
335 nested_path(&["field_0", "a"]),
336 nested_path(&["field_0", "b", "c"]),
337 ]);
338
339 let read_columns = read_columns_from_projection(projection, &metadata).unwrap();
340
341 let expected = ReadColumns {
342 cols: vec![
343 ReadColumn::new(
344 3,
345 vec![
346 nested_path(&["field_0", "a"]),
347 nested_path(&["field_0", "b", "c"]),
348 ],
349 ),
350 ReadColumn::new(0, vec![]),
351 ],
352 };
353 assert_eq!(expected, read_columns,);
354 }
355
356 #[test]
357 fn test_read_columns_from_projection_dedups_duplicate_indices() {
358 let metadata = new_test_metadata();
359 let projection = ProjectionInput::new(vec![1, 1, 0]).with_nested_paths(vec![
360 nested_path(&["field_0", "a"]),
361 nested_path(&["field_0", "b", "c"]),
362 ]);
363
364 let read_columns = read_columns_from_projection(projection, &metadata).unwrap();
365
366 let expected = ReadColumns {
367 cols: vec![
368 ReadColumn::new(
369 3,
370 vec![
371 nested_path(&["field_0", "a"]),
372 nested_path(&["field_0", "b", "c"]),
373 ],
374 ),
375 ReadColumn::new(0, vec![]),
376 ],
377 };
378 assert_eq!(expected, read_columns);
379 }
380
381 #[test]
382 fn test_read_columns_from_projection_out_of_bound() {
383 let metadata = new_test_metadata();
384 let projection = ProjectionInput::new(vec![3]);
385
386 let err = read_columns_from_projection(projection, &metadata).unwrap_err();
387
388 assert!(
389 err.to_string()
390 .contains("projection index 3 is out of bound")
391 );
392 }
393
394 #[test]
395 fn test_read_columns_from_predicate_reads_root_columns_only() {
396 let metadata = new_test_metadata();
397 let predicate = PredicateGroup::new(
398 metadata.as_ref(),
399 &[col("field_0").gt(lit(1)), col("tag_0").eq(lit("a"))],
400 )
401 .unwrap();
402
403 let read_columns = read_columns_from_predicate(&predicate, &metadata);
404
405 let expected = ReadColumns {
406 cols: vec![ReadColumn::new(0, vec![]), ReadColumn::new(3, vec![])],
407 };
408 assert_eq!(expected, read_columns);
409 }
410
411 #[test]
412 fn test_read_columns_from_predicate_empty() {
413 let metadata = new_test_metadata();
414 let predicate = PredicateGroup::new(metadata.as_ref(), &[]).unwrap();
415
416 let read_columns = read_columns_from_predicate(&predicate, &metadata);
417
418 assert!(read_columns.is_empty());
419 }
420
421 #[test]
422 fn test_merge_read_cols_with_only_root() {
423 let a = ReadColumns {
424 cols: vec![ReadColumn::new(3, vec![]), ReadColumn::new(1, vec![])],
425 };
426 let b = ReadColumns {
427 cols: vec![ReadColumn::new(2, vec![])],
428 };
429
430 let merged = merge(a, b);
431
432 assert_eq!(
433 merged,
434 ReadColumns {
435 cols: vec![
436 ReadColumn::new(1, vec![]),
437 ReadColumn::new(2, vec![]),
438 ReadColumn::new(3, vec![]),
439 ],
440 }
441 );
442 }
443
444 #[test]
445 fn test_merge_read_cols_with_nested_paths() {
446 let a = ReadColumns {
447 cols: vec![ReadColumn::new(1, vec![nested_path(&["j", "a"])])],
448 };
449 let b = ReadColumns {
450 cols: vec![ReadColumn::new(
451 1,
452 vec![nested_path(&["j", "b"]), nested_path(&["j", "c"])],
453 )],
454 };
455
456 let merged = merge(a, b);
457
458 assert_eq!(
459 merged,
460 ReadColumns {
461 cols: vec![ReadColumn::new(
462 1,
463 vec![
464 nested_path(&["j", "a"]),
465 nested_path(&["j", "b"]),
466 nested_path(&["j", "c"]),
467 ],
468 )],
469 }
470 );
471 }
472
473 #[test]
474 fn test_merge_read_cols_with_column_override() {
475 let a = ReadColumns {
476 cols: vec![
477 ReadColumn::new(1, vec![nested_path(&["j", "a"])]),
478 ReadColumn::new(2, vec![nested_path(&["k", "b"])]),
479 ],
480 };
481 let b = ReadColumns {
482 cols: vec![
483 ReadColumn::new(1, vec![]),
484 ReadColumn::new(2, vec![nested_path(&["k", "b", "c"])]),
485 ],
486 };
487
488 let merged = merge(a, b);
489
490 assert_eq!(
491 merged,
492 ReadColumns {
493 cols: vec![
494 ReadColumn::new(1, vec![]),
495 ReadColumn::new(2, vec![nested_path(&["k", "b"])])
496 ],
497 }
498 );
499 }
500
501 #[test]
502 fn test_merge_read_cols_dedups_redundant_nested_paths() {
503 let a = ReadColumns {
504 cols: vec![ReadColumn::new(
505 1,
506 vec![
507 nested_path(&["j", "a", "b"]),
508 nested_path(&["j", "a"]),
509 nested_path(&["j", "a", "b", "c"]),
510 ],
511 )],
512 };
513 let b = ReadColumns {
514 cols: vec![ReadColumn::new(1, vec![nested_path(&["j", "a"])])],
515 };
516
517 let merged = merge(a, b);
518
519 assert_eq!(
520 merged,
521 ReadColumns {
522 cols: vec![ReadColumn::new(1, vec![nested_path(&["j", "a"])])],
523 }
524 );
525 }
526
527 fn new_test_metadata() -> RegionMetadataRef {
528 let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 1));
529 builder
530 .push_column_metadata(ColumnMetadata {
531 column_schema: ColumnSchema::new(
532 "tag_0".to_string(),
533 ConcreteDataType::string_datatype(),
534 true,
535 ),
536 semantic_type: SemanticType::Tag,
537 column_id: 0,
538 })
539 .push_column_metadata(ColumnMetadata {
540 column_schema: ColumnSchema::new(
541 "field_0".to_string(),
542 ConcreteDataType::string_datatype(),
543 true,
544 ),
545 semantic_type: SemanticType::Field,
546 column_id: 3,
547 })
548 .push_column_metadata(ColumnMetadata {
549 column_schema: ColumnSchema::new(
550 "ts".to_string(),
551 ConcreteDataType::timestamp_millisecond_datatype(),
552 false,
553 ),
554 semantic_type: SemanticType::Timestamp,
555 column_id: 2,
556 });
557 builder.primary_key(vec![0]);
558 Arc::new(builder.build().unwrap())
559 }
560
561 fn nested_path(parts: &[&str]) -> NestedPath {
562 parts.iter().map(|part| (*part).to_string()).collect()
563 }
564}