Skip to main content

store_api/
sst_entry.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use bytes::Bytes;
18use common_recordbatch::DfRecordBatch;
19use common_time::Timestamp;
20use common_time::timestamp::TimeUnit;
21use datafusion_common::DataFusionError;
22use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, LogicalTableSource};
23use datatypes::arrow::array::{
24    ArrayRef, BinaryArray, BooleanArray, TimestampMillisecondArray, TimestampNanosecondArray,
25    UInt8Array, UInt32Array, UInt64Array,
26};
27use datatypes::arrow::error::ArrowError;
28use datatypes::arrow_array::StringArray;
29use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
30use serde::{Deserialize, Serialize};
31
32use crate::storage::{RegionGroup, RegionId, RegionNumber, RegionSeq, ScanRequest, TableId};
33
34/// An entry describing a SST file known by the engine's manifest.
35#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
36pub struct ManifestSstEntry {
37    /// The table directory this file belongs to.
38    pub table_dir: String,
39    /// The region id of region that refers to the file.
40    pub region_id: RegionId,
41    /// The table id this file belongs to.
42    pub table_id: TableId,
43    /// The region number this file belongs to.
44    pub region_number: RegionNumber,
45    /// The region group this file belongs to.
46    pub region_group: RegionGroup,
47    /// The region sequence this file belongs to.
48    pub region_sequence: RegionSeq,
49    /// Engine-specific file identifier (string form).
50    pub file_id: String,
51    /// Index version, increment when the index file is rebuilt.
52    pub index_version: u64,
53    /// SST level.
54    pub level: u8,
55    /// Full path of the SST file in object store.
56    pub file_path: String,
57    /// File size in bytes.
58    pub file_size: u64,
59    /// Full path of the index file in object store.
60    pub index_file_path: Option<String>,
61    /// File size of the index file in object store.
62    pub index_file_size: Option<u64>,
63    /// Number of rows in the SST.
64    pub num_rows: u64,
65    /// Number of row groups in the SST.
66    pub num_row_groups: u64,
67    /// Number of series in the SST.
68    pub num_series: Option<u64>,
69    /// Min timestamp.
70    pub min_ts: Timestamp,
71    /// Max timestamp.
72    pub max_ts: Timestamp,
73    /// The sequence number associated with this file.
74    pub sequence: Option<u64>,
75    /// The region id of region that creates the file.
76    pub origin_region_id: RegionId,
77    /// The node id fetched from the manifest.
78    pub node_id: Option<u64>,
79    /// Whether this file is visible in current version.
80    pub visible: bool,
81    /// Minimum encoded primary key in the SST.
82    pub primary_key_min: Option<Bytes>,
83    /// Maximum encoded primary key in the SST.
84    pub primary_key_max: Option<Bytes>,
85}
86
87impl ManifestSstEntry {
88    /// Returns the schema of the manifest sst entry.
89    pub fn schema() -> SchemaRef {
90        use datatypes::prelude::ConcreteDataType as Ty;
91        Arc::new(Schema::new(vec![
92            ColumnSchema::new("table_dir", Ty::string_datatype(), false),
93            ColumnSchema::new("region_id", Ty::uint64_datatype(), false),
94            ColumnSchema::new("table_id", Ty::uint32_datatype(), false),
95            ColumnSchema::new("region_number", Ty::uint32_datatype(), false),
96            ColumnSchema::new("region_group", Ty::uint8_datatype(), false),
97            ColumnSchema::new("region_sequence", Ty::uint32_datatype(), false),
98            ColumnSchema::new("file_id", Ty::string_datatype(), false),
99            ColumnSchema::new("index_version", Ty::uint64_datatype(), false),
100            ColumnSchema::new("level", Ty::uint8_datatype(), false),
101            ColumnSchema::new("file_path", Ty::string_datatype(), false),
102            ColumnSchema::new("file_size", Ty::uint64_datatype(), false),
103            ColumnSchema::new("index_file_path", Ty::string_datatype(), true),
104            ColumnSchema::new("index_file_size", Ty::uint64_datatype(), true),
105            ColumnSchema::new("num_rows", Ty::uint64_datatype(), false),
106            ColumnSchema::new("num_row_groups", Ty::uint64_datatype(), false),
107            ColumnSchema::new("num_series", Ty::uint64_datatype(), true),
108            ColumnSchema::new("min_ts", Ty::timestamp_nanosecond_datatype(), true),
109            ColumnSchema::new("max_ts", Ty::timestamp_nanosecond_datatype(), true),
110            ColumnSchema::new("sequence", Ty::uint64_datatype(), true),
111            ColumnSchema::new("origin_region_id", Ty::uint64_datatype(), false),
112            ColumnSchema::new("node_id", Ty::uint64_datatype(), true),
113            ColumnSchema::new("visible", Ty::boolean_datatype(), false),
114            ColumnSchema::new("primary_key_min", Ty::binary_datatype(), true),
115            ColumnSchema::new("primary_key_max", Ty::binary_datatype(), true),
116        ]))
117    }
118
119    /// Converts a list of manifest sst entries to a record batch.
120    pub fn to_record_batch(entries: &[Self]) -> std::result::Result<DfRecordBatch, ArrowError> {
121        let schema = Self::schema();
122        let table_dirs = entries.iter().map(|e| e.table_dir.as_str());
123        let region_ids = entries.iter().map(|e| e.region_id.as_u64());
124        let table_ids = entries.iter().map(|e| e.table_id);
125        let region_numbers = entries.iter().map(|e| e.region_number);
126        let region_groups = entries.iter().map(|e| e.region_group);
127        let region_sequences = entries.iter().map(|e| e.region_sequence);
128        let file_ids = entries.iter().map(|e| e.file_id.as_str());
129        let index_versions = entries.iter().map(|e| e.index_version);
130        let levels = entries.iter().map(|e| e.level);
131        let file_paths = entries.iter().map(|e| e.file_path.as_str());
132        let file_sizes = entries.iter().map(|e| e.file_size);
133        let index_file_paths = entries.iter().map(|e| e.index_file_path.as_ref());
134        let index_file_sizes = entries.iter().map(|e| e.index_file_size);
135        let num_rows = entries.iter().map(|e| e.num_rows);
136        let num_row_groups = entries.iter().map(|e| e.num_row_groups);
137        let num_series = entries.iter().map(|e| e.num_series);
138        let min_ts = entries.iter().map(|e| {
139            e.min_ts
140                .convert_to(TimeUnit::Nanosecond)
141                .map(|ts| ts.value())
142        });
143        let max_ts = entries.iter().map(|e| {
144            e.max_ts
145                .convert_to(TimeUnit::Nanosecond)
146                .map(|ts| ts.value())
147        });
148        let sequences = entries.iter().map(|e| e.sequence);
149        let origin_region_ids = entries.iter().map(|e| e.origin_region_id.as_u64());
150        let node_ids = entries.iter().map(|e| e.node_id);
151        let visible_flags = entries.iter().map(|e| Some(e.visible));
152        let primary_key_min = entries.iter().map(|e| e.primary_key_min.as_deref());
153        let primary_key_max = entries.iter().map(|e| e.primary_key_max.as_deref());
154
155        let columns: Vec<ArrayRef> = vec![
156            Arc::new(StringArray::from_iter_values(table_dirs)),
157            Arc::new(UInt64Array::from_iter_values(region_ids)),
158            Arc::new(UInt32Array::from_iter_values(table_ids)),
159            Arc::new(UInt32Array::from_iter_values(region_numbers)),
160            Arc::new(UInt8Array::from_iter_values(region_groups)),
161            Arc::new(UInt32Array::from_iter_values(region_sequences)),
162            Arc::new(StringArray::from_iter_values(file_ids)),
163            Arc::new(UInt64Array::from_iter(index_versions)),
164            Arc::new(UInt8Array::from_iter_values(levels)),
165            Arc::new(StringArray::from_iter_values(file_paths)),
166            Arc::new(UInt64Array::from_iter_values(file_sizes)),
167            Arc::new(StringArray::from_iter(index_file_paths)),
168            Arc::new(UInt64Array::from_iter(index_file_sizes)),
169            Arc::new(UInt64Array::from_iter_values(num_rows)),
170            Arc::new(UInt64Array::from_iter_values(num_row_groups)),
171            Arc::new(UInt64Array::from_iter(num_series)),
172            Arc::new(TimestampNanosecondArray::from_iter(min_ts)),
173            Arc::new(TimestampNanosecondArray::from_iter(max_ts)),
174            Arc::new(UInt64Array::from_iter(sequences)),
175            Arc::new(UInt64Array::from_iter_values(origin_region_ids)),
176            Arc::new(UInt64Array::from_iter(node_ids)),
177            Arc::new(BooleanArray::from_iter(visible_flags)),
178            Arc::new(BinaryArray::from_iter(primary_key_min)),
179            Arc::new(BinaryArray::from_iter(primary_key_max)),
180        ];
181
182        DfRecordBatch::try_new(schema.arrow_schema().clone(), columns)
183    }
184
185    /// Reserved internal inspect table name.
186    ///
187    /// This table name is used only for building logical plans on the
188    /// frontend -> datanode path. It is not user-visible and cannot be
189    /// referenced by user queries.
190    pub fn reserved_table_name_for_inspection() -> &'static str {
191        "__inspect/__mito/__sst_manifest"
192    }
193
194    /// Builds a logical plan for scanning the manifest sst entries.
195    pub fn build_plan(scan_request: ScanRequest) -> Result<LogicalPlan, DataFusionError> {
196        build_plan_helper(
197            scan_request,
198            Self::reserved_table_name_for_inspection(),
199            Self::schema(),
200        )
201    }
202}
203
204/// An entry describing a SST file listed from storage layer directly.
205#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
206pub struct StorageSstEntry {
207    /// Full path of the SST file in object store.
208    pub file_path: String,
209    /// File size in bytes.
210    pub file_size: Option<u64>,
211    /// Last modified time in milliseconds since epoch, if available from storage.
212    pub last_modified_ms: Option<Timestamp>,
213    /// The node id fetched from the manifest.
214    pub node_id: Option<u64>,
215}
216
217impl StorageSstEntry {
218    /// Returns the schema of the storage sst entry.
219    pub fn schema() -> SchemaRef {
220        use datatypes::prelude::ConcreteDataType as Ty;
221        Arc::new(Schema::new(vec![
222            ColumnSchema::new("file_path", Ty::string_datatype(), false),
223            ColumnSchema::new("file_size", Ty::uint64_datatype(), true),
224            ColumnSchema::new(
225                "last_modified_ms",
226                Ty::timestamp_millisecond_datatype(),
227                true,
228            ),
229            ColumnSchema::new("node_id", Ty::uint64_datatype(), true),
230        ]))
231    }
232
233    /// Converts a list of storage sst entries to a record batch.
234    pub fn to_record_batch(entries: &[Self]) -> std::result::Result<DfRecordBatch, ArrowError> {
235        let schema = Self::schema();
236        let file_paths = entries.iter().map(|e| e.file_path.as_str());
237        let file_sizes = entries.iter().map(|e| e.file_size);
238        let last_modified_ms = entries.iter().map(|e| {
239            e.last_modified_ms
240                .and_then(|ts| ts.convert_to(TimeUnit::Millisecond).map(|ts| ts.value()))
241        });
242        let node_ids = entries.iter().map(|e| e.node_id);
243
244        let columns: Vec<ArrayRef> = vec![
245            Arc::new(StringArray::from_iter_values(file_paths)),
246            Arc::new(UInt64Array::from_iter(file_sizes)),
247            Arc::new(TimestampMillisecondArray::from_iter(last_modified_ms)),
248            Arc::new(UInt64Array::from_iter(node_ids)),
249        ];
250
251        DfRecordBatch::try_new(schema.arrow_schema().clone(), columns)
252    }
253
254    /// Reserved internal inspect table name.
255    ///
256    /// This table name is used only for building logical plans on the
257    /// frontend -> datanode path. It is not user-visible and cannot be
258    /// referenced by user queries.
259    pub fn reserved_table_name_for_inspection() -> &'static str {
260        "__inspect/__mito/__sst_storage"
261    }
262
263    /// Builds a logical plan for scanning the storage sst entries.
264    pub fn build_plan(scan_request: ScanRequest) -> Result<LogicalPlan, DataFusionError> {
265        build_plan_helper(
266            scan_request,
267            Self::reserved_table_name_for_inspection(),
268            Self::schema(),
269        )
270    }
271}
272
273/// An entry describing puffin index metadata for inspection.
274#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
275pub struct PuffinIndexMetaEntry {
276    /// The table directory this index belongs to.
277    pub table_dir: String,
278    /// The full path of the index file in object store.
279    pub index_file_path: String,
280    /// The region id referencing the index file.
281    pub region_id: RegionId,
282    /// The table id referencing the index file.
283    pub table_id: TableId,
284    /// The region number referencing the index file.
285    pub region_number: RegionNumber,
286    /// The region group referencing the index file.
287    pub region_group: RegionGroup,
288    /// The region sequence referencing the index file.
289    pub region_sequence: RegionSeq,
290    /// Engine-specific file identifier (string form).
291    pub file_id: String,
292    /// Size of the index file in object store (if available).
293    pub index_file_size: Option<u64>,
294    /// Logical index type (`bloom_filter`, `fulltext_bloom`, `fulltext_tantivy`, `inverted`).
295    pub index_type: String,
296    /// Target type (`column`, ...).
297    pub target_type: String,
298    /// Encoded target key string.
299    pub target_key: String,
300    /// Structured JSON describing the target.
301    pub target_json: String,
302    /// Size of the blob storing this target.
303    pub blob_size: u64,
304    /// Structured JSON describing index-specific metadata (if available).
305    pub meta_json: Option<String>,
306    /// Node id associated with the index file (if known).
307    pub node_id: Option<u64>,
308}
309
310impl PuffinIndexMetaEntry {
311    /// Returns the schema describing puffin index metadata entries.
312    pub fn schema() -> SchemaRef {
313        use datatypes::prelude::ConcreteDataType as Ty;
314        Arc::new(Schema::new(vec![
315            ColumnSchema::new("table_dir", Ty::string_datatype(), false),
316            ColumnSchema::new("index_file_path", Ty::string_datatype(), false),
317            ColumnSchema::new("region_id", Ty::uint64_datatype(), false),
318            ColumnSchema::new("table_id", Ty::uint32_datatype(), false),
319            ColumnSchema::new("region_number", Ty::uint32_datatype(), false),
320            ColumnSchema::new("region_group", Ty::uint8_datatype(), false),
321            ColumnSchema::new("region_sequence", Ty::uint32_datatype(), false),
322            ColumnSchema::new("file_id", Ty::string_datatype(), false),
323            ColumnSchema::new("index_file_size", Ty::uint64_datatype(), true),
324            ColumnSchema::new("index_type", Ty::string_datatype(), false),
325            ColumnSchema::new("target_type", Ty::string_datatype(), false),
326            ColumnSchema::new("target_key", Ty::string_datatype(), false),
327            ColumnSchema::new("target_json", Ty::string_datatype(), false),
328            ColumnSchema::new("blob_size", Ty::uint64_datatype(), false),
329            ColumnSchema::new("meta_json", Ty::string_datatype(), true),
330            ColumnSchema::new("node_id", Ty::uint64_datatype(), true),
331        ]))
332    }
333
334    /// Converts a list of puffin index metadata entries to a record batch.
335    pub fn to_record_batch(entries: &[Self]) -> std::result::Result<DfRecordBatch, ArrowError> {
336        let schema = Self::schema();
337        let table_dirs = entries.iter().map(|e| e.table_dir.as_str());
338        let index_file_paths = entries.iter().map(|e| e.index_file_path.as_str());
339        let region_ids = entries.iter().map(|e| e.region_id.as_u64());
340        let table_ids = entries.iter().map(|e| e.table_id);
341        let region_numbers = entries.iter().map(|e| e.region_number);
342        let region_groups = entries.iter().map(|e| e.region_group);
343        let region_sequences = entries.iter().map(|e| e.region_sequence);
344        let file_ids = entries.iter().map(|e| e.file_id.as_str());
345        let index_file_sizes = entries.iter().map(|e| e.index_file_size);
346        let index_types = entries.iter().map(|e| e.index_type.as_str());
347        let target_types = entries.iter().map(|e| e.target_type.as_str());
348        let target_keys = entries.iter().map(|e| e.target_key.as_str());
349        let target_jsons = entries.iter().map(|e| e.target_json.as_str());
350        let blob_sizes = entries.iter().map(|e| e.blob_size);
351        let meta_jsons = entries.iter().map(|e| e.meta_json.as_deref());
352        let node_ids = entries.iter().map(|e| e.node_id);
353
354        let columns: Vec<ArrayRef> = vec![
355            Arc::new(StringArray::from_iter_values(table_dirs)),
356            Arc::new(StringArray::from_iter_values(index_file_paths)),
357            Arc::new(UInt64Array::from_iter_values(region_ids)),
358            Arc::new(UInt32Array::from_iter_values(table_ids)),
359            Arc::new(UInt32Array::from_iter_values(region_numbers)),
360            Arc::new(UInt8Array::from_iter_values(region_groups)),
361            Arc::new(UInt32Array::from_iter_values(region_sequences)),
362            Arc::new(StringArray::from_iter_values(file_ids)),
363            Arc::new(UInt64Array::from_iter(index_file_sizes)),
364            Arc::new(StringArray::from_iter_values(index_types)),
365            Arc::new(StringArray::from_iter_values(target_types)),
366            Arc::new(StringArray::from_iter_values(target_keys)),
367            Arc::new(StringArray::from_iter_values(target_jsons)),
368            Arc::new(UInt64Array::from_iter_values(blob_sizes)),
369            Arc::new(StringArray::from_iter(meta_jsons)),
370            Arc::new(UInt64Array::from_iter(node_ids)),
371        ];
372
373        DfRecordBatch::try_new(schema.arrow_schema().clone(), columns)
374    }
375
376    /// Reserved internal inspect table name for puffin index metadata.
377    pub fn reserved_table_name_for_inspection() -> &'static str {
378        "__inspect/__mito/__puffin_index_meta"
379    }
380
381    /// Builds a logical plan for scanning puffin index metadata entries.
382    pub fn build_plan(scan_request: ScanRequest) -> Result<LogicalPlan, DataFusionError> {
383        build_plan_helper(
384            scan_request,
385            Self::reserved_table_name_for_inspection(),
386            Self::schema(),
387        )
388    }
389}
390
391fn build_plan_helper(
392    scan_request: ScanRequest,
393    table_name: &str,
394    schema: SchemaRef,
395) -> Result<LogicalPlan, DataFusionError> {
396    let table_source = LogicalTableSource::new(schema.arrow_schema().clone());
397
398    let projection = scan_request.projection_input.map(|input| input.projection);
399    let mut builder = LogicalPlanBuilder::scan(table_name, Arc::new(table_source), projection)?;
400
401    for filter in scan_request.filters {
402        builder = builder.filter(filter)?;
403    }
404
405    if let Some(limit) = scan_request.limit {
406        builder = builder.limit(0, Some(limit))?;
407    }
408
409    builder.build()
410}
411
412#[cfg(test)]
413mod tests {
414    use datafusion_common::TableReference;
415    use datafusion_expr::{LogicalPlan, Operator, binary_expr, col, lit};
416    use datatypes::arrow::array::{
417        Array, BinaryArray, TimestampMillisecondArray, TimestampNanosecondArray, UInt8Array,
418        UInt32Array, UInt64Array,
419    };
420    use datatypes::arrow_array::StringArray;
421
422    use super::*;
423
424    #[test]
425    fn test_sst_entry_manifest_to_record_batch() {
426        // Prepare entries
427        let table_id1: TableId = 1;
428        let region_group1: RegionGroup = 2;
429        let region_seq1: RegionSeq = 3;
430        let region_number1: RegionNumber = ((region_group1 as u32) << 24) | region_seq1;
431        let region_id1 = RegionId::with_group_and_seq(table_id1, region_group1, region_seq1);
432
433        let table_id2: TableId = 5;
434        let region_group2: RegionGroup = 1;
435        let region_seq2: RegionSeq = 42;
436        let region_number2: RegionNumber = ((region_group2 as u32) << 24) | region_seq2;
437        let region_id2 = RegionId::with_group_and_seq(table_id2, region_group2, region_seq2);
438
439        let entries = vec![
440            ManifestSstEntry {
441                table_dir: "tdir1".to_string(),
442                region_id: region_id1,
443                table_id: table_id1,
444                region_number: region_number1,
445                region_group: region_group1,
446                region_sequence: region_seq1,
447                file_id: "f1".to_string(),
448                index_version: 0,
449                level: 1,
450                file_path: "/p1".to_string(),
451                file_size: 100,
452                index_file_path: None,
453                index_file_size: None,
454                num_rows: 10,
455                num_row_groups: 2,
456                num_series: Some(5),
457                min_ts: Timestamp::new_millisecond(1000), // 1s -> 1_000_000_000ns
458                max_ts: Timestamp::new_second(2),         // 2s -> 2_000_000_000ns
459                sequence: None,
460                origin_region_id: region_id1,
461                node_id: Some(1),
462                visible: false,
463                primary_key_min: Some(Bytes::from_static(b"aaa")),
464                primary_key_max: Some(Bytes::from_static(b"zzz")),
465            },
466            ManifestSstEntry {
467                table_dir: "tdir2".to_string(),
468                region_id: region_id2,
469                table_id: table_id2,
470                region_number: region_number2,
471                region_group: region_group2,
472                region_sequence: region_seq2,
473                file_id: "f2".to_string(),
474                index_version: 1,
475                level: 3,
476                file_path: "/p2".to_string(),
477                file_size: 200,
478                index_file_path: Some("idx".to_string()),
479                index_file_size: Some(11),
480                num_rows: 20,
481                num_row_groups: 4,
482                num_series: None,
483                min_ts: Timestamp::new_nanosecond(5),     // 5ns
484                max_ts: Timestamp::new_microsecond(2000), // 2ms -> 2_000_000ns
485                sequence: Some(9),
486                origin_region_id: region_id2,
487                node_id: None,
488                visible: true,
489                primary_key_min: None,
490                primary_key_max: None,
491            },
492        ];
493
494        let schema = ManifestSstEntry::schema();
495        let batch = ManifestSstEntry::to_record_batch(&entries).unwrap();
496
497        // Schema checks
498        assert_eq!(schema.arrow_schema().fields().len(), batch.num_columns());
499        assert_eq!(2, batch.num_rows());
500        for (i, f) in schema.arrow_schema().fields().iter().enumerate() {
501            assert_eq!(f.name(), batch.schema().field(i).name());
502            assert_eq!(f.is_nullable(), batch.schema().field(i).is_nullable());
503            assert_eq!(f.data_type(), batch.schema().field(i).data_type());
504        }
505
506        // Column asserts
507        let table_dirs = batch
508            .column(0)
509            .as_any()
510            .downcast_ref::<StringArray>()
511            .unwrap();
512        assert_eq!("tdir1", table_dirs.value(0));
513        assert_eq!("tdir2", table_dirs.value(1));
514
515        let region_ids = batch
516            .column(1)
517            .as_any()
518            .downcast_ref::<UInt64Array>()
519            .unwrap();
520        assert_eq!(region_id1.as_u64(), region_ids.value(0));
521        assert_eq!(region_id2.as_u64(), region_ids.value(1));
522
523        let table_ids = batch
524            .column(2)
525            .as_any()
526            .downcast_ref::<UInt32Array>()
527            .unwrap();
528        assert_eq!(table_id1, table_ids.value(0));
529        assert_eq!(table_id2, table_ids.value(1));
530
531        let region_numbers = batch
532            .column(3)
533            .as_any()
534            .downcast_ref::<UInt32Array>()
535            .unwrap();
536        assert_eq!(region_number1, region_numbers.value(0));
537        assert_eq!(region_number2, region_numbers.value(1));
538
539        let region_groups = batch
540            .column(4)
541            .as_any()
542            .downcast_ref::<UInt8Array>()
543            .unwrap();
544        assert_eq!(region_group1, region_groups.value(0));
545        assert_eq!(region_group2, region_groups.value(1));
546
547        let region_sequences = batch
548            .column(5)
549            .as_any()
550            .downcast_ref::<UInt32Array>()
551            .unwrap();
552        assert_eq!(region_seq1, region_sequences.value(0));
553        assert_eq!(region_seq2, region_sequences.value(1));
554
555        let file_ids = batch
556            .column(6)
557            .as_any()
558            .downcast_ref::<StringArray>()
559            .unwrap();
560        assert_eq!("f1", file_ids.value(0));
561        assert_eq!("f2", file_ids.value(1));
562
563        let index_versions = batch
564            .column(7)
565            .as_any()
566            .downcast_ref::<UInt64Array>()
567            .unwrap();
568        assert_eq!(0, index_versions.value(0));
569        assert_eq!(1, index_versions.value(1));
570
571        let levels = batch
572            .column(8)
573            .as_any()
574            .downcast_ref::<UInt8Array>()
575            .unwrap();
576        assert_eq!(1, levels.value(0));
577        assert_eq!(3, levels.value(1));
578
579        let file_paths = batch
580            .column(9)
581            .as_any()
582            .downcast_ref::<StringArray>()
583            .unwrap();
584        assert_eq!("/p1", file_paths.value(0));
585        assert_eq!("/p2", file_paths.value(1));
586
587        let file_sizes = batch
588            .column(10)
589            .as_any()
590            .downcast_ref::<UInt64Array>()
591            .unwrap();
592        assert_eq!(100, file_sizes.value(0));
593        assert_eq!(200, file_sizes.value(1));
594
595        let index_file_paths = batch
596            .column(11)
597            .as_any()
598            .downcast_ref::<StringArray>()
599            .unwrap();
600        assert!(index_file_paths.is_null(0));
601        assert_eq!("idx", index_file_paths.value(1));
602
603        let index_file_sizes = batch
604            .column(12)
605            .as_any()
606            .downcast_ref::<UInt64Array>()
607            .unwrap();
608        assert!(index_file_sizes.is_null(0));
609        assert_eq!(11, index_file_sizes.value(1));
610
611        let num_rows = batch
612            .column(13)
613            .as_any()
614            .downcast_ref::<UInt64Array>()
615            .unwrap();
616        assert_eq!(10, num_rows.value(0));
617        assert_eq!(20, num_rows.value(1));
618
619        let num_row_groups = batch
620            .column(14)
621            .as_any()
622            .downcast_ref::<UInt64Array>()
623            .unwrap();
624        assert_eq!(2, num_row_groups.value(0));
625        assert_eq!(4, num_row_groups.value(1));
626
627        let num_series = batch
628            .column(15)
629            .as_any()
630            .downcast_ref::<UInt64Array>()
631            .unwrap();
632        assert_eq!(5, num_series.value(0));
633        assert!(num_series.is_null(1));
634
635        let min_ts = batch
636            .column(16)
637            .as_any()
638            .downcast_ref::<TimestampNanosecondArray>()
639            .unwrap();
640        assert_eq!(1_000_000_000, min_ts.value(0));
641        assert_eq!(5, min_ts.value(1));
642
643        let max_ts = batch
644            .column(17)
645            .as_any()
646            .downcast_ref::<TimestampNanosecondArray>()
647            .unwrap();
648        assert_eq!(2_000_000_000, max_ts.value(0));
649        assert_eq!(2_000_000, max_ts.value(1));
650
651        let sequences = batch
652            .column(18)
653            .as_any()
654            .downcast_ref::<UInt64Array>()
655            .unwrap();
656        assert!(sequences.is_null(0));
657        assert_eq!(9, sequences.value(1));
658
659        let origin_region_ids = batch
660            .column(19)
661            .as_any()
662            .downcast_ref::<UInt64Array>()
663            .unwrap();
664        assert_eq!(region_id1.as_u64(), origin_region_ids.value(0));
665        assert_eq!(region_id2.as_u64(), origin_region_ids.value(1));
666
667        let node_ids = batch
668            .column(20)
669            .as_any()
670            .downcast_ref::<UInt64Array>()
671            .unwrap();
672        assert_eq!(1, node_ids.value(0));
673        assert!(node_ids.is_null(1));
674
675        let visible = batch
676            .column(21)
677            .as_any()
678            .downcast_ref::<BooleanArray>()
679            .unwrap();
680        assert!(!visible.value(0));
681        assert!(visible.value(1));
682
683        let primary_key_min = batch
684            .column(22)
685            .as_any()
686            .downcast_ref::<BinaryArray>()
687            .unwrap();
688        assert_eq!(b"aaa", primary_key_min.value(0));
689        assert!(primary_key_min.is_null(1));
690
691        let primary_key_max = batch
692            .column(23)
693            .as_any()
694            .downcast_ref::<BinaryArray>()
695            .unwrap();
696        assert_eq!(b"zzz", primary_key_max.value(0));
697        assert!(primary_key_max.is_null(1));
698    }
699
700    #[test]
701    fn test_sst_entry_storage_to_record_batch() {
702        let entries = vec![
703            StorageSstEntry {
704                file_path: "/s1".to_string(),
705                file_size: None,
706                last_modified_ms: None,
707                node_id: Some(1),
708            },
709            StorageSstEntry {
710                file_path: "/s2".to_string(),
711                file_size: Some(123),
712                last_modified_ms: Some(Timestamp::new_millisecond(456)),
713                node_id: None,
714            },
715        ];
716
717        let schema = StorageSstEntry::schema();
718        let batch = StorageSstEntry::to_record_batch(&entries).unwrap();
719
720        assert_eq!(schema.arrow_schema().fields().len(), batch.num_columns());
721        assert_eq!(2, batch.num_rows());
722
723        let file_paths = batch
724            .column(0)
725            .as_any()
726            .downcast_ref::<StringArray>()
727            .unwrap();
728        assert_eq!("/s1", file_paths.value(0));
729        assert_eq!("/s2", file_paths.value(1));
730
731        let file_sizes = batch
732            .column(1)
733            .as_any()
734            .downcast_ref::<UInt64Array>()
735            .unwrap();
736        assert!(file_sizes.is_null(0));
737        assert_eq!(123, file_sizes.value(1));
738
739        let last_modified = batch
740            .column(2)
741            .as_any()
742            .downcast_ref::<TimestampMillisecondArray>()
743            .unwrap();
744        assert!(last_modified.is_null(0));
745        assert_eq!(456, last_modified.value(1));
746
747        let node_ids = batch
748            .column(3)
749            .as_any()
750            .downcast_ref::<UInt64Array>()
751            .unwrap();
752        assert_eq!(1, node_ids.value(0));
753        assert!(node_ids.is_null(1));
754    }
755
756    #[test]
757    fn test_puffin_index_meta_to_record_batch() {
758        let entries = vec![
759            PuffinIndexMetaEntry {
760                table_dir: "table1".to_string(),
761                index_file_path: "index1".to_string(),
762                region_id: RegionId::with_group_and_seq(10, 0, 20),
763                table_id: 10,
764                region_number: 20,
765                region_group: 0,
766                region_sequence: 20,
767                file_id: "file1".to_string(),
768                index_file_size: Some(1024),
769                index_type: "bloom_filter".to_string(),
770                target_type: "column".to_string(),
771                target_key: "1".to_string(),
772                target_json: "{\"column\":1}".to_string(),
773                blob_size: 256,
774                meta_json: Some("{\"bloom\":{}}".to_string()),
775                node_id: Some(42),
776            },
777            PuffinIndexMetaEntry {
778                table_dir: "table2".to_string(),
779                index_file_path: "index2".to_string(),
780                region_id: RegionId::with_group_and_seq(11, 0, 21),
781                table_id: 11,
782                region_number: 21,
783                region_group: 0,
784                region_sequence: 21,
785                file_id: "file2".to_string(),
786                index_file_size: None,
787                index_type: "inverted".to_string(),
788                target_type: "unknown".to_string(),
789                target_key: "legacy".to_string(),
790                target_json: "{}".to_string(),
791                blob_size: 0,
792                meta_json: None,
793                node_id: None,
794            },
795        ];
796
797        let schema = PuffinIndexMetaEntry::schema();
798        let batch = PuffinIndexMetaEntry::to_record_batch(&entries).unwrap();
799
800        assert_eq!(schema.arrow_schema().fields().len(), batch.num_columns());
801        assert_eq!(2, batch.num_rows());
802
803        let table_dirs = batch
804            .column(0)
805            .as_any()
806            .downcast_ref::<StringArray>()
807            .unwrap();
808        assert_eq!("table1", table_dirs.value(0));
809        assert_eq!("table2", table_dirs.value(1));
810
811        let index_file_paths = batch
812            .column(1)
813            .as_any()
814            .downcast_ref::<StringArray>()
815            .unwrap();
816        assert_eq!("index1", index_file_paths.value(0));
817        assert_eq!("index2", index_file_paths.value(1));
818
819        let region_ids = batch
820            .column(2)
821            .as_any()
822            .downcast_ref::<UInt64Array>()
823            .unwrap();
824        assert_eq!(
825            RegionId::with_group_and_seq(10, 0, 20).as_u64(),
826            region_ids.value(0)
827        );
828        assert_eq!(
829            RegionId::with_group_and_seq(11, 0, 21).as_u64(),
830            region_ids.value(1)
831        );
832
833        let table_ids = batch
834            .column(3)
835            .as_any()
836            .downcast_ref::<UInt32Array>()
837            .unwrap();
838        assert_eq!(10, table_ids.value(0));
839        assert_eq!(11, table_ids.value(1));
840
841        let region_numbers = batch
842            .column(4)
843            .as_any()
844            .downcast_ref::<UInt32Array>()
845            .unwrap();
846        assert_eq!(20, region_numbers.value(0));
847        assert_eq!(21, region_numbers.value(1));
848
849        let region_groups = batch
850            .column(5)
851            .as_any()
852            .downcast_ref::<UInt8Array>()
853            .unwrap();
854        assert_eq!(0, region_groups.value(0));
855        assert_eq!(0, region_groups.value(1));
856
857        let region_sequences = batch
858            .column(6)
859            .as_any()
860            .downcast_ref::<UInt32Array>()
861            .unwrap();
862        assert_eq!(20, region_sequences.value(0));
863        assert_eq!(21, region_sequences.value(1));
864
865        let file_ids = batch
866            .column(7)
867            .as_any()
868            .downcast_ref::<StringArray>()
869            .unwrap();
870        assert_eq!("file1", file_ids.value(0));
871        assert_eq!("file2", file_ids.value(1));
872
873        let index_file_sizes = batch
874            .column(8)
875            .as_any()
876            .downcast_ref::<UInt64Array>()
877            .unwrap();
878        assert_eq!(1024, index_file_sizes.value(0));
879        assert!(index_file_sizes.is_null(1));
880
881        let index_types = batch
882            .column(9)
883            .as_any()
884            .downcast_ref::<StringArray>()
885            .unwrap();
886        assert_eq!("bloom_filter", index_types.value(0));
887        assert_eq!("inverted", index_types.value(1));
888
889        let target_types = batch
890            .column(10)
891            .as_any()
892            .downcast_ref::<StringArray>()
893            .unwrap();
894        assert_eq!("column", target_types.value(0));
895        assert_eq!("unknown", target_types.value(1));
896
897        let target_keys = batch
898            .column(11)
899            .as_any()
900            .downcast_ref::<StringArray>()
901            .unwrap();
902        assert_eq!("1", target_keys.value(0));
903        assert_eq!("legacy", target_keys.value(1));
904
905        let target_json = batch
906            .column(12)
907            .as_any()
908            .downcast_ref::<StringArray>()
909            .unwrap();
910        assert_eq!("{\"column\":1}", target_json.value(0));
911        assert_eq!("{}", target_json.value(1));
912
913        let blob_sizes = batch
914            .column(13)
915            .as_any()
916            .downcast_ref::<UInt64Array>()
917            .unwrap();
918        assert_eq!(256, blob_sizes.value(0));
919        assert_eq!(0, blob_sizes.value(1));
920
921        let meta_jsons = batch
922            .column(14)
923            .as_any()
924            .downcast_ref::<StringArray>()
925            .unwrap();
926        assert_eq!("{\"bloom\":{}}", meta_jsons.value(0));
927        assert!(meta_jsons.is_null(1));
928
929        let node_ids = batch
930            .column(15)
931            .as_any()
932            .downcast_ref::<UInt64Array>()
933            .unwrap();
934        assert_eq!(42, node_ids.value(0));
935        assert!(node_ids.is_null(1));
936    }
937
938    #[test]
939    fn test_manifest_build_plan() {
940        // Note: filter must reference a column in the projected schema
941        let projection_input = Some(vec![0, 1, 2].into());
942        let request = ScanRequest {
943            projection_input,
944            filters: vec![binary_expr(col("table_id"), Operator::Gt, lit(0))],
945            limit: Some(5),
946            ..Default::default()
947        };
948
949        let plan = ManifestSstEntry::build_plan(request).unwrap();
950
951        // Expect plan to be Filter -> Limit -> TableScan or Filter+Limit wrapped.
952        // We'll pattern match to reach TableScan and verify key fields.
953        let (scan, has_filter, has_limit) = extract_scan(&plan);
954
955        assert!(has_filter);
956        assert!(has_limit);
957        assert_eq!(
958            scan.table_name,
959            TableReference::bare(ManifestSstEntry::reserved_table_name_for_inspection())
960        );
961        assert_eq!(scan.projection, Some(vec![0, 1, 2]));
962
963        // projected schema should match projection
964        let fields = scan.projected_schema.fields();
965        assert_eq!(fields.len(), 3);
966        assert_eq!(fields[0].name(), "table_dir");
967        assert_eq!(fields[1].name(), "region_id");
968        assert_eq!(fields[2].name(), "table_id");
969    }
970
971    #[test]
972    fn test_storage_build_plan() {
973        let projection_input = Some(vec![0, 2].into());
974        let request = ScanRequest {
975            projection_input,
976            filters: vec![binary_expr(col("file_path"), Operator::Eq, lit("/a"))],
977            limit: Some(1),
978            ..Default::default()
979        };
980
981        let plan = StorageSstEntry::build_plan(request).unwrap();
982        let (scan, has_filter, has_limit) = extract_scan(&plan);
983        assert!(has_filter);
984        assert!(has_limit);
985        assert_eq!(
986            scan.table_name,
987            TableReference::bare(StorageSstEntry::reserved_table_name_for_inspection())
988        );
989        assert_eq!(scan.projection, Some(vec![0, 2]));
990
991        let fields = scan.projected_schema.fields();
992        assert_eq!(fields.len(), 2);
993        assert_eq!(fields[0].name(), "file_path");
994        assert_eq!(fields[1].name(), "last_modified_ms");
995    }
996
997    // Helper to reach TableScan and detect presence of Filter/Limit in plan
998    fn extract_scan(plan: &LogicalPlan) -> (&datafusion_expr::logical_plan::TableScan, bool, bool) {
999        use datafusion_expr::logical_plan::Limit;
1000
1001        match plan {
1002            LogicalPlan::Filter(f) => {
1003                let (scan, _, has_limit) = extract_scan(&f.input);
1004                (scan, true, has_limit)
1005            }
1006            LogicalPlan::Limit(Limit { input, .. }) => {
1007                let (scan, has_filter, _) = extract_scan(input);
1008                (scan, has_filter, true)
1009            }
1010            LogicalPlan::TableScan(scan) => (scan, false, false),
1011            other => panic!("unexpected plan: {other:?}"),
1012        }
1013    }
1014}