Skip to main content

store_api/
sst_entry.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use common_recordbatch::DfRecordBatch;
18use common_time::Timestamp;
19use common_time::timestamp::TimeUnit;
20use datafusion_common::DataFusionError;
21use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, LogicalTableSource};
22use datatypes::arrow::array::{
23    ArrayRef, BooleanArray, TimestampMillisecondArray, TimestampNanosecondArray, UInt8Array,
24    UInt32Array, UInt64Array,
25};
26use datatypes::arrow::error::ArrowError;
27use datatypes::arrow_array::StringArray;
28use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
29use serde::{Deserialize, Serialize};
30
31use crate::storage::{RegionGroup, RegionId, RegionNumber, RegionSeq, ScanRequest, TableId};
32
33/// An entry describing a SST file known by the engine's manifest.
34#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
35pub struct ManifestSstEntry {
36    /// The table directory this file belongs to.
37    pub table_dir: String,
38    /// The region id of region that refers to the file.
39    pub region_id: RegionId,
40    /// The table id this file belongs to.
41    pub table_id: TableId,
42    /// The region number this file belongs to.
43    pub region_number: RegionNumber,
44    /// The region group this file belongs to.
45    pub region_group: RegionGroup,
46    /// The region sequence this file belongs to.
47    pub region_sequence: RegionSeq,
48    /// Engine-specific file identifier (string form).
49    pub file_id: String,
50    /// Index version, increment when the index file is rebuilt.
51    pub index_version: u64,
52    /// SST level.
53    pub level: u8,
54    /// Full path of the SST file in object store.
55    pub file_path: String,
56    /// File size in bytes.
57    pub file_size: u64,
58    /// Full path of the index file in object store.
59    pub index_file_path: Option<String>,
60    /// File size of the index file in object store.
61    pub index_file_size: Option<u64>,
62    /// Number of rows in the SST.
63    pub num_rows: u64,
64    /// Number of row groups in the SST.
65    pub num_row_groups: u64,
66    /// Number of series in the SST.
67    pub num_series: Option<u64>,
68    /// Min timestamp.
69    pub min_ts: Timestamp,
70    /// Max timestamp.
71    pub max_ts: Timestamp,
72    /// The sequence number associated with this file.
73    pub sequence: Option<u64>,
74    /// The region id of region that creates the file.
75    pub origin_region_id: RegionId,
76    /// The node id fetched from the manifest.
77    pub node_id: Option<u64>,
78    /// Whether this file is visible in current version.
79    pub visible: bool,
80}
81
82impl ManifestSstEntry {
83    /// Returns the schema of the manifest sst entry.
84    pub fn schema() -> SchemaRef {
85        use datatypes::prelude::ConcreteDataType as Ty;
86        Arc::new(Schema::new(vec![
87            ColumnSchema::new("table_dir", Ty::string_datatype(), false),
88            ColumnSchema::new("region_id", Ty::uint64_datatype(), false),
89            ColumnSchema::new("table_id", Ty::uint32_datatype(), false),
90            ColumnSchema::new("region_number", Ty::uint32_datatype(), false),
91            ColumnSchema::new("region_group", Ty::uint8_datatype(), false),
92            ColumnSchema::new("region_sequence", Ty::uint32_datatype(), false),
93            ColumnSchema::new("file_id", Ty::string_datatype(), false),
94            ColumnSchema::new("index_version", Ty::uint64_datatype(), false),
95            ColumnSchema::new("level", Ty::uint8_datatype(), false),
96            ColumnSchema::new("file_path", Ty::string_datatype(), false),
97            ColumnSchema::new("file_size", Ty::uint64_datatype(), false),
98            ColumnSchema::new("index_file_path", Ty::string_datatype(), true),
99            ColumnSchema::new("index_file_size", Ty::uint64_datatype(), true),
100            ColumnSchema::new("num_rows", Ty::uint64_datatype(), false),
101            ColumnSchema::new("num_row_groups", Ty::uint64_datatype(), false),
102            ColumnSchema::new("num_series", Ty::uint64_datatype(), true),
103            ColumnSchema::new("min_ts", Ty::timestamp_nanosecond_datatype(), true),
104            ColumnSchema::new("max_ts", Ty::timestamp_nanosecond_datatype(), true),
105            ColumnSchema::new("sequence", Ty::uint64_datatype(), true),
106            ColumnSchema::new("origin_region_id", Ty::uint64_datatype(), false),
107            ColumnSchema::new("node_id", Ty::uint64_datatype(), true),
108            ColumnSchema::new("visible", Ty::boolean_datatype(), false),
109        ]))
110    }
111
112    /// Converts a list of manifest sst entries to a record batch.
113    pub fn to_record_batch(entries: &[Self]) -> std::result::Result<DfRecordBatch, ArrowError> {
114        let schema = Self::schema();
115        let table_dirs = entries.iter().map(|e| e.table_dir.as_str());
116        let region_ids = entries.iter().map(|e| e.region_id.as_u64());
117        let table_ids = entries.iter().map(|e| e.table_id);
118        let region_numbers = entries.iter().map(|e| e.region_number);
119        let region_groups = entries.iter().map(|e| e.region_group);
120        let region_sequences = entries.iter().map(|e| e.region_sequence);
121        let file_ids = entries.iter().map(|e| e.file_id.as_str());
122        let index_versions = entries.iter().map(|e| e.index_version);
123        let levels = entries.iter().map(|e| e.level);
124        let file_paths = entries.iter().map(|e| e.file_path.as_str());
125        let file_sizes = entries.iter().map(|e| e.file_size);
126        let index_file_paths = entries.iter().map(|e| e.index_file_path.as_ref());
127        let index_file_sizes = entries.iter().map(|e| e.index_file_size);
128        let num_rows = entries.iter().map(|e| e.num_rows);
129        let num_row_groups = entries.iter().map(|e| e.num_row_groups);
130        let num_series = entries.iter().map(|e| e.num_series);
131        let min_ts = entries.iter().map(|e| {
132            e.min_ts
133                .convert_to(TimeUnit::Nanosecond)
134                .map(|ts| ts.value())
135        });
136        let max_ts = entries.iter().map(|e| {
137            e.max_ts
138                .convert_to(TimeUnit::Nanosecond)
139                .map(|ts| ts.value())
140        });
141        let sequences = entries.iter().map(|e| e.sequence);
142        let origin_region_ids = entries.iter().map(|e| e.origin_region_id.as_u64());
143        let node_ids = entries.iter().map(|e| e.node_id);
144        let visible_flags = entries.iter().map(|e| Some(e.visible));
145
146        let columns: Vec<ArrayRef> = vec![
147            Arc::new(StringArray::from_iter_values(table_dirs)),
148            Arc::new(UInt64Array::from_iter_values(region_ids)),
149            Arc::new(UInt32Array::from_iter_values(table_ids)),
150            Arc::new(UInt32Array::from_iter_values(region_numbers)),
151            Arc::new(UInt8Array::from_iter_values(region_groups)),
152            Arc::new(UInt32Array::from_iter_values(region_sequences)),
153            Arc::new(StringArray::from_iter_values(file_ids)),
154            Arc::new(UInt64Array::from_iter(index_versions)),
155            Arc::new(UInt8Array::from_iter_values(levels)),
156            Arc::new(StringArray::from_iter_values(file_paths)),
157            Arc::new(UInt64Array::from_iter_values(file_sizes)),
158            Arc::new(StringArray::from_iter(index_file_paths)),
159            Arc::new(UInt64Array::from_iter(index_file_sizes)),
160            Arc::new(UInt64Array::from_iter_values(num_rows)),
161            Arc::new(UInt64Array::from_iter_values(num_row_groups)),
162            Arc::new(UInt64Array::from_iter(num_series)),
163            Arc::new(TimestampNanosecondArray::from_iter(min_ts)),
164            Arc::new(TimestampNanosecondArray::from_iter(max_ts)),
165            Arc::new(UInt64Array::from_iter(sequences)),
166            Arc::new(UInt64Array::from_iter_values(origin_region_ids)),
167            Arc::new(UInt64Array::from_iter(node_ids)),
168            Arc::new(BooleanArray::from_iter(visible_flags)),
169        ];
170
171        DfRecordBatch::try_new(schema.arrow_schema().clone(), columns)
172    }
173
174    /// Reserved internal inspect table name.
175    ///
176    /// This table name is used only for building logical plans on the
177    /// frontend -> datanode path. It is not user-visible and cannot be
178    /// referenced by user queries.
179    pub fn reserved_table_name_for_inspection() -> &'static str {
180        "__inspect/__mito/__sst_manifest"
181    }
182
183    /// Builds a logical plan for scanning the manifest sst entries.
184    pub fn build_plan(scan_request: ScanRequest) -> Result<LogicalPlan, DataFusionError> {
185        build_plan_helper(
186            scan_request,
187            Self::reserved_table_name_for_inspection(),
188            Self::schema(),
189        )
190    }
191}
192
193/// An entry describing a SST file listed from storage layer directly.
194#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
195pub struct StorageSstEntry {
196    /// Full path of the SST file in object store.
197    pub file_path: String,
198    /// File size in bytes.
199    pub file_size: Option<u64>,
200    /// Last modified time in milliseconds since epoch, if available from storage.
201    pub last_modified_ms: Option<Timestamp>,
202    /// The node id fetched from the manifest.
203    pub node_id: Option<u64>,
204}
205
206impl StorageSstEntry {
207    /// Returns the schema of the storage sst entry.
208    pub fn schema() -> SchemaRef {
209        use datatypes::prelude::ConcreteDataType as Ty;
210        Arc::new(Schema::new(vec![
211            ColumnSchema::new("file_path", Ty::string_datatype(), false),
212            ColumnSchema::new("file_size", Ty::uint64_datatype(), true),
213            ColumnSchema::new(
214                "last_modified_ms",
215                Ty::timestamp_millisecond_datatype(),
216                true,
217            ),
218            ColumnSchema::new("node_id", Ty::uint64_datatype(), true),
219        ]))
220    }
221
222    /// Converts a list of storage sst entries to a record batch.
223    pub fn to_record_batch(entries: &[Self]) -> std::result::Result<DfRecordBatch, ArrowError> {
224        let schema = Self::schema();
225        let file_paths = entries.iter().map(|e| e.file_path.as_str());
226        let file_sizes = entries.iter().map(|e| e.file_size);
227        let last_modified_ms = entries.iter().map(|e| {
228            e.last_modified_ms
229                .and_then(|ts| ts.convert_to(TimeUnit::Millisecond).map(|ts| ts.value()))
230        });
231        let node_ids = entries.iter().map(|e| e.node_id);
232
233        let columns: Vec<ArrayRef> = vec![
234            Arc::new(StringArray::from_iter_values(file_paths)),
235            Arc::new(UInt64Array::from_iter(file_sizes)),
236            Arc::new(TimestampMillisecondArray::from_iter(last_modified_ms)),
237            Arc::new(UInt64Array::from_iter(node_ids)),
238        ];
239
240        DfRecordBatch::try_new(schema.arrow_schema().clone(), columns)
241    }
242
243    /// Reserved internal inspect table name.
244    ///
245    /// This table name is used only for building logical plans on the
246    /// frontend -> datanode path. It is not user-visible and cannot be
247    /// referenced by user queries.
248    pub fn reserved_table_name_for_inspection() -> &'static str {
249        "__inspect/__mito/__sst_storage"
250    }
251
252    /// Builds a logical plan for scanning the storage sst entries.
253    pub fn build_plan(scan_request: ScanRequest) -> Result<LogicalPlan, DataFusionError> {
254        build_plan_helper(
255            scan_request,
256            Self::reserved_table_name_for_inspection(),
257            Self::schema(),
258        )
259    }
260}
261
262/// An entry describing puffin index metadata for inspection.
263#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
264pub struct PuffinIndexMetaEntry {
265    /// The table directory this index belongs to.
266    pub table_dir: String,
267    /// The full path of the index file in object store.
268    pub index_file_path: String,
269    /// The region id referencing the index file.
270    pub region_id: RegionId,
271    /// The table id referencing the index file.
272    pub table_id: TableId,
273    /// The region number referencing the index file.
274    pub region_number: RegionNumber,
275    /// The region group referencing the index file.
276    pub region_group: RegionGroup,
277    /// The region sequence referencing the index file.
278    pub region_sequence: RegionSeq,
279    /// Engine-specific file identifier (string form).
280    pub file_id: String,
281    /// Size of the index file in object store (if available).
282    pub index_file_size: Option<u64>,
283    /// Logical index type (`bloom_filter`, `fulltext_bloom`, `fulltext_tantivy`, `inverted`).
284    pub index_type: String,
285    /// Target type (`column`, ...).
286    pub target_type: String,
287    /// Encoded target key string.
288    pub target_key: String,
289    /// Structured JSON describing the target.
290    pub target_json: String,
291    /// Size of the blob storing this target.
292    pub blob_size: u64,
293    /// Structured JSON describing index-specific metadata (if available).
294    pub meta_json: Option<String>,
295    /// Node id associated with the index file (if known).
296    pub node_id: Option<u64>,
297}
298
299impl PuffinIndexMetaEntry {
300    /// Returns the schema describing puffin index metadata entries.
301    pub fn schema() -> SchemaRef {
302        use datatypes::prelude::ConcreteDataType as Ty;
303        Arc::new(Schema::new(vec![
304            ColumnSchema::new("table_dir", Ty::string_datatype(), false),
305            ColumnSchema::new("index_file_path", Ty::string_datatype(), false),
306            ColumnSchema::new("region_id", Ty::uint64_datatype(), false),
307            ColumnSchema::new("table_id", Ty::uint32_datatype(), false),
308            ColumnSchema::new("region_number", Ty::uint32_datatype(), false),
309            ColumnSchema::new("region_group", Ty::uint8_datatype(), false),
310            ColumnSchema::new("region_sequence", Ty::uint32_datatype(), false),
311            ColumnSchema::new("file_id", Ty::string_datatype(), false),
312            ColumnSchema::new("index_file_size", Ty::uint64_datatype(), true),
313            ColumnSchema::new("index_type", Ty::string_datatype(), false),
314            ColumnSchema::new("target_type", Ty::string_datatype(), false),
315            ColumnSchema::new("target_key", Ty::string_datatype(), false),
316            ColumnSchema::new("target_json", Ty::string_datatype(), false),
317            ColumnSchema::new("blob_size", Ty::uint64_datatype(), false),
318            ColumnSchema::new("meta_json", Ty::string_datatype(), true),
319            ColumnSchema::new("node_id", Ty::uint64_datatype(), true),
320        ]))
321    }
322
323    /// Converts a list of puffin index metadata entries to a record batch.
324    pub fn to_record_batch(entries: &[Self]) -> std::result::Result<DfRecordBatch, ArrowError> {
325        let schema = Self::schema();
326        let table_dirs = entries.iter().map(|e| e.table_dir.as_str());
327        let index_file_paths = entries.iter().map(|e| e.index_file_path.as_str());
328        let region_ids = entries.iter().map(|e| e.region_id.as_u64());
329        let table_ids = entries.iter().map(|e| e.table_id);
330        let region_numbers = entries.iter().map(|e| e.region_number);
331        let region_groups = entries.iter().map(|e| e.region_group);
332        let region_sequences = entries.iter().map(|e| e.region_sequence);
333        let file_ids = entries.iter().map(|e| e.file_id.as_str());
334        let index_file_sizes = entries.iter().map(|e| e.index_file_size);
335        let index_types = entries.iter().map(|e| e.index_type.as_str());
336        let target_types = entries.iter().map(|e| e.target_type.as_str());
337        let target_keys = entries.iter().map(|e| e.target_key.as_str());
338        let target_jsons = entries.iter().map(|e| e.target_json.as_str());
339        let blob_sizes = entries.iter().map(|e| e.blob_size);
340        let meta_jsons = entries.iter().map(|e| e.meta_json.as_deref());
341        let node_ids = entries.iter().map(|e| e.node_id);
342
343        let columns: Vec<ArrayRef> = vec![
344            Arc::new(StringArray::from_iter_values(table_dirs)),
345            Arc::new(StringArray::from_iter_values(index_file_paths)),
346            Arc::new(UInt64Array::from_iter_values(region_ids)),
347            Arc::new(UInt32Array::from_iter_values(table_ids)),
348            Arc::new(UInt32Array::from_iter_values(region_numbers)),
349            Arc::new(UInt8Array::from_iter_values(region_groups)),
350            Arc::new(UInt32Array::from_iter_values(region_sequences)),
351            Arc::new(StringArray::from_iter_values(file_ids)),
352            Arc::new(UInt64Array::from_iter(index_file_sizes)),
353            Arc::new(StringArray::from_iter_values(index_types)),
354            Arc::new(StringArray::from_iter_values(target_types)),
355            Arc::new(StringArray::from_iter_values(target_keys)),
356            Arc::new(StringArray::from_iter_values(target_jsons)),
357            Arc::new(UInt64Array::from_iter_values(blob_sizes)),
358            Arc::new(StringArray::from_iter(meta_jsons)),
359            Arc::new(UInt64Array::from_iter(node_ids)),
360        ];
361
362        DfRecordBatch::try_new(schema.arrow_schema().clone(), columns)
363    }
364
365    /// Reserved internal inspect table name for puffin index metadata.
366    pub fn reserved_table_name_for_inspection() -> &'static str {
367        "__inspect/__mito/__puffin_index_meta"
368    }
369
370    /// Builds a logical plan for scanning puffin index metadata entries.
371    pub fn build_plan(scan_request: ScanRequest) -> Result<LogicalPlan, DataFusionError> {
372        build_plan_helper(
373            scan_request,
374            Self::reserved_table_name_for_inspection(),
375            Self::schema(),
376        )
377    }
378}
379
380fn build_plan_helper(
381    scan_request: ScanRequest,
382    table_name: &str,
383    schema: SchemaRef,
384) -> Result<LogicalPlan, DataFusionError> {
385    let table_source = LogicalTableSource::new(schema.arrow_schema().clone());
386
387    let projection = scan_request.projection_input.map(|input| input.projection);
388    let mut builder = LogicalPlanBuilder::scan(table_name, Arc::new(table_source), projection)?;
389
390    for filter in scan_request.filters {
391        builder = builder.filter(filter)?;
392    }
393
394    if let Some(limit) = scan_request.limit {
395        builder = builder.limit(0, Some(limit))?;
396    }
397
398    builder.build()
399}
400
401#[cfg(test)]
402mod tests {
403    use datafusion_common::TableReference;
404    use datafusion_expr::{LogicalPlan, Operator, binary_expr, col, lit};
405    use datatypes::arrow::array::{
406        Array, TimestampMillisecondArray, TimestampNanosecondArray, UInt8Array, UInt32Array,
407        UInt64Array,
408    };
409    use datatypes::arrow_array::StringArray;
410
411    use super::*;
412
413    #[test]
414    fn test_sst_entry_manifest_to_record_batch() {
415        // Prepare entries
416        let table_id1: TableId = 1;
417        let region_group1: RegionGroup = 2;
418        let region_seq1: RegionSeq = 3;
419        let region_number1: RegionNumber = ((region_group1 as u32) << 24) | region_seq1;
420        let region_id1 = RegionId::with_group_and_seq(table_id1, region_group1, region_seq1);
421
422        let table_id2: TableId = 5;
423        let region_group2: RegionGroup = 1;
424        let region_seq2: RegionSeq = 42;
425        let region_number2: RegionNumber = ((region_group2 as u32) << 24) | region_seq2;
426        let region_id2 = RegionId::with_group_and_seq(table_id2, region_group2, region_seq2);
427
428        let entries = vec![
429            ManifestSstEntry {
430                table_dir: "tdir1".to_string(),
431                region_id: region_id1,
432                table_id: table_id1,
433                region_number: region_number1,
434                region_group: region_group1,
435                region_sequence: region_seq1,
436                file_id: "f1".to_string(),
437                index_version: 0,
438                level: 1,
439                file_path: "/p1".to_string(),
440                file_size: 100,
441                index_file_path: None,
442                index_file_size: None,
443                num_rows: 10,
444                num_row_groups: 2,
445                num_series: Some(5),
446                min_ts: Timestamp::new_millisecond(1000), // 1s -> 1_000_000_000ns
447                max_ts: Timestamp::new_second(2),         // 2s -> 2_000_000_000ns
448                sequence: None,
449                origin_region_id: region_id1,
450                node_id: Some(1),
451                visible: false,
452            },
453            ManifestSstEntry {
454                table_dir: "tdir2".to_string(),
455                region_id: region_id2,
456                table_id: table_id2,
457                region_number: region_number2,
458                region_group: region_group2,
459                region_sequence: region_seq2,
460                file_id: "f2".to_string(),
461                index_version: 1,
462                level: 3,
463                file_path: "/p2".to_string(),
464                file_size: 200,
465                index_file_path: Some("idx".to_string()),
466                index_file_size: Some(11),
467                num_rows: 20,
468                num_row_groups: 4,
469                num_series: None,
470                min_ts: Timestamp::new_nanosecond(5),     // 5ns
471                max_ts: Timestamp::new_microsecond(2000), // 2ms -> 2_000_000ns
472                sequence: Some(9),
473                origin_region_id: region_id2,
474                node_id: None,
475                visible: true,
476            },
477        ];
478
479        let schema = ManifestSstEntry::schema();
480        let batch = ManifestSstEntry::to_record_batch(&entries).unwrap();
481
482        // Schema checks
483        assert_eq!(schema.arrow_schema().fields().len(), batch.num_columns());
484        assert_eq!(2, batch.num_rows());
485        for (i, f) in schema.arrow_schema().fields().iter().enumerate() {
486            assert_eq!(f.name(), batch.schema().field(i).name());
487            assert_eq!(f.is_nullable(), batch.schema().field(i).is_nullable());
488            assert_eq!(f.data_type(), batch.schema().field(i).data_type());
489        }
490
491        // Column asserts
492        let table_dirs = batch
493            .column(0)
494            .as_any()
495            .downcast_ref::<StringArray>()
496            .unwrap();
497        assert_eq!("tdir1", table_dirs.value(0));
498        assert_eq!("tdir2", table_dirs.value(1));
499
500        let region_ids = batch
501            .column(1)
502            .as_any()
503            .downcast_ref::<UInt64Array>()
504            .unwrap();
505        assert_eq!(region_id1.as_u64(), region_ids.value(0));
506        assert_eq!(region_id2.as_u64(), region_ids.value(1));
507
508        let table_ids = batch
509            .column(2)
510            .as_any()
511            .downcast_ref::<UInt32Array>()
512            .unwrap();
513        assert_eq!(table_id1, table_ids.value(0));
514        assert_eq!(table_id2, table_ids.value(1));
515
516        let region_numbers = batch
517            .column(3)
518            .as_any()
519            .downcast_ref::<UInt32Array>()
520            .unwrap();
521        assert_eq!(region_number1, region_numbers.value(0));
522        assert_eq!(region_number2, region_numbers.value(1));
523
524        let region_groups = batch
525            .column(4)
526            .as_any()
527            .downcast_ref::<UInt8Array>()
528            .unwrap();
529        assert_eq!(region_group1, region_groups.value(0));
530        assert_eq!(region_group2, region_groups.value(1));
531
532        let region_sequences = batch
533            .column(5)
534            .as_any()
535            .downcast_ref::<UInt32Array>()
536            .unwrap();
537        assert_eq!(region_seq1, region_sequences.value(0));
538        assert_eq!(region_seq2, region_sequences.value(1));
539
540        let file_ids = batch
541            .column(6)
542            .as_any()
543            .downcast_ref::<StringArray>()
544            .unwrap();
545        assert_eq!("f1", file_ids.value(0));
546        assert_eq!("f2", file_ids.value(1));
547
548        let index_versions = batch
549            .column(7)
550            .as_any()
551            .downcast_ref::<UInt64Array>()
552            .unwrap();
553        assert_eq!(0, index_versions.value(0));
554        assert_eq!(1, index_versions.value(1));
555
556        let levels = batch
557            .column(8)
558            .as_any()
559            .downcast_ref::<UInt8Array>()
560            .unwrap();
561        assert_eq!(1, levels.value(0));
562        assert_eq!(3, levels.value(1));
563
564        let file_paths = batch
565            .column(9)
566            .as_any()
567            .downcast_ref::<StringArray>()
568            .unwrap();
569        assert_eq!("/p1", file_paths.value(0));
570        assert_eq!("/p2", file_paths.value(1));
571
572        let file_sizes = batch
573            .column(10)
574            .as_any()
575            .downcast_ref::<UInt64Array>()
576            .unwrap();
577        assert_eq!(100, file_sizes.value(0));
578        assert_eq!(200, file_sizes.value(1));
579
580        let index_file_paths = batch
581            .column(11)
582            .as_any()
583            .downcast_ref::<StringArray>()
584            .unwrap();
585        assert!(index_file_paths.is_null(0));
586        assert_eq!("idx", index_file_paths.value(1));
587
588        let index_file_sizes = batch
589            .column(12)
590            .as_any()
591            .downcast_ref::<UInt64Array>()
592            .unwrap();
593        assert!(index_file_sizes.is_null(0));
594        assert_eq!(11, index_file_sizes.value(1));
595
596        let num_rows = batch
597            .column(13)
598            .as_any()
599            .downcast_ref::<UInt64Array>()
600            .unwrap();
601        assert_eq!(10, num_rows.value(0));
602        assert_eq!(20, num_rows.value(1));
603
604        let num_row_groups = batch
605            .column(14)
606            .as_any()
607            .downcast_ref::<UInt64Array>()
608            .unwrap();
609        assert_eq!(2, num_row_groups.value(0));
610        assert_eq!(4, num_row_groups.value(1));
611
612        let num_series = batch
613            .column(15)
614            .as_any()
615            .downcast_ref::<UInt64Array>()
616            .unwrap();
617        assert_eq!(5, num_series.value(0));
618        assert!(num_series.is_null(1));
619
620        let min_ts = batch
621            .column(16)
622            .as_any()
623            .downcast_ref::<TimestampNanosecondArray>()
624            .unwrap();
625        assert_eq!(1_000_000_000, min_ts.value(0));
626        assert_eq!(5, min_ts.value(1));
627
628        let max_ts = batch
629            .column(17)
630            .as_any()
631            .downcast_ref::<TimestampNanosecondArray>()
632            .unwrap();
633        assert_eq!(2_000_000_000, max_ts.value(0));
634        assert_eq!(2_000_000, max_ts.value(1));
635
636        let sequences = batch
637            .column(18)
638            .as_any()
639            .downcast_ref::<UInt64Array>()
640            .unwrap();
641        assert!(sequences.is_null(0));
642        assert_eq!(9, sequences.value(1));
643
644        let origin_region_ids = batch
645            .column(19)
646            .as_any()
647            .downcast_ref::<UInt64Array>()
648            .unwrap();
649        assert_eq!(region_id1.as_u64(), origin_region_ids.value(0));
650        assert_eq!(region_id2.as_u64(), origin_region_ids.value(1));
651
652        let node_ids = batch
653            .column(20)
654            .as_any()
655            .downcast_ref::<UInt64Array>()
656            .unwrap();
657        assert_eq!(1, node_ids.value(0));
658        assert!(node_ids.is_null(1));
659
660        let visible = batch
661            .column(21)
662            .as_any()
663            .downcast_ref::<BooleanArray>()
664            .unwrap();
665        assert!(!visible.value(0));
666        assert!(visible.value(1));
667    }
668
669    #[test]
670    fn test_sst_entry_storage_to_record_batch() {
671        let entries = vec![
672            StorageSstEntry {
673                file_path: "/s1".to_string(),
674                file_size: None,
675                last_modified_ms: None,
676                node_id: Some(1),
677            },
678            StorageSstEntry {
679                file_path: "/s2".to_string(),
680                file_size: Some(123),
681                last_modified_ms: Some(Timestamp::new_millisecond(456)),
682                node_id: None,
683            },
684        ];
685
686        let schema = StorageSstEntry::schema();
687        let batch = StorageSstEntry::to_record_batch(&entries).unwrap();
688
689        assert_eq!(schema.arrow_schema().fields().len(), batch.num_columns());
690        assert_eq!(2, batch.num_rows());
691
692        let file_paths = batch
693            .column(0)
694            .as_any()
695            .downcast_ref::<StringArray>()
696            .unwrap();
697        assert_eq!("/s1", file_paths.value(0));
698        assert_eq!("/s2", file_paths.value(1));
699
700        let file_sizes = batch
701            .column(1)
702            .as_any()
703            .downcast_ref::<UInt64Array>()
704            .unwrap();
705        assert!(file_sizes.is_null(0));
706        assert_eq!(123, file_sizes.value(1));
707
708        let last_modified = batch
709            .column(2)
710            .as_any()
711            .downcast_ref::<TimestampMillisecondArray>()
712            .unwrap();
713        assert!(last_modified.is_null(0));
714        assert_eq!(456, last_modified.value(1));
715
716        let node_ids = batch
717            .column(3)
718            .as_any()
719            .downcast_ref::<UInt64Array>()
720            .unwrap();
721        assert_eq!(1, node_ids.value(0));
722        assert!(node_ids.is_null(1));
723    }
724
725    #[test]
726    fn test_puffin_index_meta_to_record_batch() {
727        let entries = vec![
728            PuffinIndexMetaEntry {
729                table_dir: "table1".to_string(),
730                index_file_path: "index1".to_string(),
731                region_id: RegionId::with_group_and_seq(10, 0, 20),
732                table_id: 10,
733                region_number: 20,
734                region_group: 0,
735                region_sequence: 20,
736                file_id: "file1".to_string(),
737                index_file_size: Some(1024),
738                index_type: "bloom_filter".to_string(),
739                target_type: "column".to_string(),
740                target_key: "1".to_string(),
741                target_json: "{\"column\":1}".to_string(),
742                blob_size: 256,
743                meta_json: Some("{\"bloom\":{}}".to_string()),
744                node_id: Some(42),
745            },
746            PuffinIndexMetaEntry {
747                table_dir: "table2".to_string(),
748                index_file_path: "index2".to_string(),
749                region_id: RegionId::with_group_and_seq(11, 0, 21),
750                table_id: 11,
751                region_number: 21,
752                region_group: 0,
753                region_sequence: 21,
754                file_id: "file2".to_string(),
755                index_file_size: None,
756                index_type: "inverted".to_string(),
757                target_type: "unknown".to_string(),
758                target_key: "legacy".to_string(),
759                target_json: "{}".to_string(),
760                blob_size: 0,
761                meta_json: None,
762                node_id: None,
763            },
764        ];
765
766        let schema = PuffinIndexMetaEntry::schema();
767        let batch = PuffinIndexMetaEntry::to_record_batch(&entries).unwrap();
768
769        assert_eq!(schema.arrow_schema().fields().len(), batch.num_columns());
770        assert_eq!(2, batch.num_rows());
771
772        let table_dirs = batch
773            .column(0)
774            .as_any()
775            .downcast_ref::<StringArray>()
776            .unwrap();
777        assert_eq!("table1", table_dirs.value(0));
778        assert_eq!("table2", table_dirs.value(1));
779
780        let index_file_paths = batch
781            .column(1)
782            .as_any()
783            .downcast_ref::<StringArray>()
784            .unwrap();
785        assert_eq!("index1", index_file_paths.value(0));
786        assert_eq!("index2", index_file_paths.value(1));
787
788        let region_ids = batch
789            .column(2)
790            .as_any()
791            .downcast_ref::<UInt64Array>()
792            .unwrap();
793        assert_eq!(
794            RegionId::with_group_and_seq(10, 0, 20).as_u64(),
795            region_ids.value(0)
796        );
797        assert_eq!(
798            RegionId::with_group_and_seq(11, 0, 21).as_u64(),
799            region_ids.value(1)
800        );
801
802        let table_ids = batch
803            .column(3)
804            .as_any()
805            .downcast_ref::<UInt32Array>()
806            .unwrap();
807        assert_eq!(10, table_ids.value(0));
808        assert_eq!(11, table_ids.value(1));
809
810        let region_numbers = batch
811            .column(4)
812            .as_any()
813            .downcast_ref::<UInt32Array>()
814            .unwrap();
815        assert_eq!(20, region_numbers.value(0));
816        assert_eq!(21, region_numbers.value(1));
817
818        let region_groups = batch
819            .column(5)
820            .as_any()
821            .downcast_ref::<UInt8Array>()
822            .unwrap();
823        assert_eq!(0, region_groups.value(0));
824        assert_eq!(0, region_groups.value(1));
825
826        let region_sequences = batch
827            .column(6)
828            .as_any()
829            .downcast_ref::<UInt32Array>()
830            .unwrap();
831        assert_eq!(20, region_sequences.value(0));
832        assert_eq!(21, region_sequences.value(1));
833
834        let file_ids = batch
835            .column(7)
836            .as_any()
837            .downcast_ref::<StringArray>()
838            .unwrap();
839        assert_eq!("file1", file_ids.value(0));
840        assert_eq!("file2", file_ids.value(1));
841
842        let index_file_sizes = batch
843            .column(8)
844            .as_any()
845            .downcast_ref::<UInt64Array>()
846            .unwrap();
847        assert_eq!(1024, index_file_sizes.value(0));
848        assert!(index_file_sizes.is_null(1));
849
850        let index_types = batch
851            .column(9)
852            .as_any()
853            .downcast_ref::<StringArray>()
854            .unwrap();
855        assert_eq!("bloom_filter", index_types.value(0));
856        assert_eq!("inverted", index_types.value(1));
857
858        let target_types = batch
859            .column(10)
860            .as_any()
861            .downcast_ref::<StringArray>()
862            .unwrap();
863        assert_eq!("column", target_types.value(0));
864        assert_eq!("unknown", target_types.value(1));
865
866        let target_keys = batch
867            .column(11)
868            .as_any()
869            .downcast_ref::<StringArray>()
870            .unwrap();
871        assert_eq!("1", target_keys.value(0));
872        assert_eq!("legacy", target_keys.value(1));
873
874        let target_json = batch
875            .column(12)
876            .as_any()
877            .downcast_ref::<StringArray>()
878            .unwrap();
879        assert_eq!("{\"column\":1}", target_json.value(0));
880        assert_eq!("{}", target_json.value(1));
881
882        let blob_sizes = batch
883            .column(13)
884            .as_any()
885            .downcast_ref::<UInt64Array>()
886            .unwrap();
887        assert_eq!(256, blob_sizes.value(0));
888        assert_eq!(0, blob_sizes.value(1));
889
890        let meta_jsons = batch
891            .column(14)
892            .as_any()
893            .downcast_ref::<StringArray>()
894            .unwrap();
895        assert_eq!("{\"bloom\":{}}", meta_jsons.value(0));
896        assert!(meta_jsons.is_null(1));
897
898        let node_ids = batch
899            .column(15)
900            .as_any()
901            .downcast_ref::<UInt64Array>()
902            .unwrap();
903        assert_eq!(42, node_ids.value(0));
904        assert!(node_ids.is_null(1));
905    }
906
907    #[test]
908    fn test_manifest_build_plan() {
909        // Note: filter must reference a column in the projected schema
910        let projection_input = Some(vec![0, 1, 2].into());
911        let request = ScanRequest {
912            projection_input,
913            filters: vec![binary_expr(col("table_id"), Operator::Gt, lit(0))],
914            limit: Some(5),
915            ..Default::default()
916        };
917
918        let plan = ManifestSstEntry::build_plan(request).unwrap();
919
920        // Expect plan to be Filter -> Limit -> TableScan or Filter+Limit wrapped.
921        // We'll pattern match to reach TableScan and verify key fields.
922        let (scan, has_filter, has_limit) = extract_scan(&plan);
923
924        assert!(has_filter);
925        assert!(has_limit);
926        assert_eq!(
927            scan.table_name,
928            TableReference::bare(ManifestSstEntry::reserved_table_name_for_inspection())
929        );
930        assert_eq!(scan.projection, Some(vec![0, 1, 2]));
931
932        // projected schema should match projection
933        let fields = scan.projected_schema.fields();
934        assert_eq!(fields.len(), 3);
935        assert_eq!(fields[0].name(), "table_dir");
936        assert_eq!(fields[1].name(), "region_id");
937        assert_eq!(fields[2].name(), "table_id");
938    }
939
940    #[test]
941    fn test_storage_build_plan() {
942        let projection_input = Some(vec![0, 2].into());
943        let request = ScanRequest {
944            projection_input,
945            filters: vec![binary_expr(col("file_path"), Operator::Eq, lit("/a"))],
946            limit: Some(1),
947            ..Default::default()
948        };
949
950        let plan = StorageSstEntry::build_plan(request).unwrap();
951        let (scan, has_filter, has_limit) = extract_scan(&plan);
952        assert!(has_filter);
953        assert!(has_limit);
954        assert_eq!(
955            scan.table_name,
956            TableReference::bare(StorageSstEntry::reserved_table_name_for_inspection())
957        );
958        assert_eq!(scan.projection, Some(vec![0, 2]));
959
960        let fields = scan.projected_schema.fields();
961        assert_eq!(fields.len(), 2);
962        assert_eq!(fields[0].name(), "file_path");
963        assert_eq!(fields[1].name(), "last_modified_ms");
964    }
965
966    // Helper to reach TableScan and detect presence of Filter/Limit in plan
967    fn extract_scan(plan: &LogicalPlan) -> (&datafusion_expr::logical_plan::TableScan, bool, bool) {
968        use datafusion_expr::logical_plan::Limit;
969
970        match plan {
971            LogicalPlan::Filter(f) => {
972                let (scan, _, has_limit) = extract_scan(&f.input);
973                (scan, true, has_limit)
974            }
975            LogicalPlan::Limit(Limit { input, .. }) => {
976                let (scan, has_filter, _) = extract_scan(input);
977                (scan, has_filter, true)
978            }
979            LogicalPlan::TableScan(scan) => (scan, false, false),
980            other => panic!("unexpected plan: {other:?}"),
981        }
982    }
983}