feat: add num_rows and num_row_groups to manifest (#4183)

* featadd num_rows and num_row_groups to manifest

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add document

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Ruihang Xia
2024-06-21 15:15:13 +08:00
committed by GitHub
parent 1e52ba325f
commit ac574b66ab
10 changed files with 35 additions and 0 deletions

View File

@@ -313,6 +313,8 @@ impl Compactor for DefaultCompactor {
.then(|| SmallVec::from_iter([IndexType::InvertedIndex]))
.unwrap_or_default(),
index_file_size: sst_info.index_file_size,
num_rows: sst_info.num_rows as u64,
num_row_groups: sst_info.num_row_groups,
});
Ok(file_meta_opt)
});

View File

@@ -37,6 +37,8 @@ pub fn new_file_handle(
file_size: 0,
available_indexes: Default::default(),
index_file_size: 0,
num_rows: 0,
num_row_groups: 0,
},
file_purger,
)

View File

@@ -367,6 +367,8 @@ impl RegionFlushTask {
.then(|| SmallVec::from_iter([IndexType::InvertedIndex]))
.unwrap_or_default(),
index_file_size: sst_info.index_file_size,
num_rows: sst_info.num_rows as u64,
num_row_groups: sst_info.num_row_groups,
};
file_metas.push(file_meta);
}

View File

@@ -216,6 +216,8 @@ async fn checkpoint_with_different_compression_types() {
file_size: 1024000,
available_indexes: Default::default(),
index_file_size: 0,
num_rows: 0,
num_row_groups: 0,
};
let action = RegionMetaActionList::new(vec![RegionMetaAction::Edit(RegionEdit {
files_to_add: vec![file_meta],

View File

@@ -109,6 +109,18 @@ pub struct FileMeta {
pub available_indexes: SmallVec<[IndexType; 4]>,
/// Size of the index file.
pub index_file_size: u64,
/// Number of rows in the file.
///
/// For historical reasons, this field might be missing in old files. Thus
/// the default value `0` doesn't means the file doesn't contains any rows,
/// but instead means the number of rows is unknown.
pub num_rows: u64,
/// Number of row groups in the file.
///
/// For historical reasons, this field might be missing in old files. Thus
/// the default value `0` doesn't means the file doesn't contains any rows,
/// but instead means the number of rows is unknown.
pub num_row_groups: u64,
}
/// Type of index.
@@ -265,6 +277,8 @@ mod tests {
file_size: 0,
available_indexes: SmallVec::from_iter([IndexType::InvertedIndex]),
index_file_size: 0,
num_rows: 0,
num_row_groups: 0,
}
}

View File

@@ -141,6 +141,8 @@ mod tests {
file_size: 4096,
available_indexes: Default::default(),
index_file_size: 0,
num_rows: 0,
num_row_groups: 0,
},
file_purger,
);
@@ -192,6 +194,8 @@ mod tests {
file_size: 4096,
available_indexes: SmallVec::from_iter([IndexType::InvertedIndex]),
index_file_size: 4096,
num_rows: 1024,
num_row_groups: 1,
},
file_purger,
);

View File

@@ -67,6 +67,8 @@ pub struct SstInfo {
pub file_size: u64,
/// Number of rows.
pub num_rows: usize,
/// Number of row groups
pub num_row_groups: u64,
/// File Meta Data
pub file_metadata: Option<Arc<ParquetMetaData>>,
/// Whether inverted index is available.

View File

@@ -163,6 +163,7 @@ where
time_range,
file_size,
num_rows: stats.num_rows,
num_row_groups: parquet_metadata.num_row_groups() as u64,
file_metadata: Some(Arc::new(parquet_metadata)),
inverted_index_available,
index_file_size,

View File

@@ -114,6 +114,8 @@ pub fn sst_file_handle(start_ms: i64, end_ms: i64) -> FileHandle {
file_size: 0,
available_indexes: Default::default(),
index_file_size: 0,
num_rows: 0,
num_row_groups: 0,
},
file_purger,
)

View File

@@ -101,6 +101,8 @@ impl VersionControlBuilder {
file_size: 0, // We don't care file size.
available_indexes: Default::default(),
index_file_size: 0,
num_rows: 0,
num_row_groups: 0,
},
);
self
@@ -190,6 +192,8 @@ pub(crate) fn apply_edit(
file_size: 0, // We don't care file size.
available_indexes: Default::default(),
index_file_size: 0,
num_rows: 0,
num_row_groups: 0,
}
})
.collect();