feat: add sst file num in region stat (#6537)

* feat/add-sst-file-num-in-region-stat:
 ### Add SST File Count to Region Statistics

 - **Enhancements**:
   - Added `sst_num` to track the number of SST files in region statistics across multiple modules.
   - Updated `RegionStat` and `RegionStatistic` structs in `datanode.rs` and `region_engine.rs` to include `sst_num`.
   - Modified `MitoRegion` and `SstVersion` in `region.rs` and `version.rs` to compute and return the number of SST files.
   - Adjusted test cases in `collect_leader_region_handler.rs`, `failure_handler.rs`, `region_lease_handler.rs`, and `weight_compute.rs` to initialize `sst_num`.
   - Updated `get_region_statistic` in `utils.rs` to sum `sst_num` from metadata and data statistics.

Signed-off-by: Lei, HUANG <mrsatangel@gmail.com>

* feat/add-sst-file-num-in-region-stat:
 Add `sst_num` to `region_statistics`

 - Updated `region_statistics.rs` to include a new constant `SST_NUM` and added it to the schema and builder structures.
 - Modified `information_schema.result` to reflect the addition of `sst_num` in the `region_statistics` table.

Signed-off-by: Lei, HUANG <mrsatangel@gmail.com>

---------

Signed-off-by: Lei, HUANG <mrsatangel@gmail.com>
This commit is contained in:
Lei, HUANG
2025-07-18 01:36:20 +08:00
committed by GitHub
parent c23b26461c
commit 0cf25f7b05
12 changed files with 36 additions and 5 deletions

View File

@@ -44,6 +44,7 @@ const DISK_SIZE: &str = "disk_size";
const MEMTABLE_SIZE: &str = "memtable_size";
const MANIFEST_SIZE: &str = "manifest_size";
const SST_SIZE: &str = "sst_size";
const SST_NUM: &str = "sst_num";
const INDEX_SIZE: &str = "index_size";
const ENGINE: &str = "engine";
const REGION_ROLE: &str = "region_role";
@@ -87,6 +88,7 @@ impl InformationSchemaRegionStatistics {
ColumnSchema::new(MEMTABLE_SIZE, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(MANIFEST_SIZE, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(SST_SIZE, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(SST_NUM, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(INDEX_SIZE, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(ENGINE, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(REGION_ROLE, ConcreteDataType::string_datatype(), true),
@@ -149,6 +151,7 @@ struct InformationSchemaRegionStatisticsBuilder {
memtable_sizes: UInt64VectorBuilder,
manifest_sizes: UInt64VectorBuilder,
sst_sizes: UInt64VectorBuilder,
sst_nums: UInt64VectorBuilder,
index_sizes: UInt64VectorBuilder,
engines: StringVectorBuilder,
region_roles: StringVectorBuilder,
@@ -167,6 +170,7 @@ impl InformationSchemaRegionStatisticsBuilder {
memtable_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
manifest_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
sst_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
sst_nums: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
index_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
engines: StringVectorBuilder::with_capacity(INIT_CAPACITY),
region_roles: StringVectorBuilder::with_capacity(INIT_CAPACITY),
@@ -197,6 +201,7 @@ impl InformationSchemaRegionStatisticsBuilder {
(MEMTABLE_SIZE, &Value::from(region_stat.memtable_size)),
(MANIFEST_SIZE, &Value::from(region_stat.manifest_size)),
(SST_SIZE, &Value::from(region_stat.sst_size)),
(SST_NUM, &Value::from(region_stat.sst_num)),
(INDEX_SIZE, &Value::from(region_stat.index_size)),
(ENGINE, &Value::from(region_stat.engine.as_str())),
(REGION_ROLE, &Value::from(region_stat.role.to_string())),
@@ -215,6 +220,7 @@ impl InformationSchemaRegionStatisticsBuilder {
self.memtable_sizes.push(Some(region_stat.memtable_size));
self.manifest_sizes.push(Some(region_stat.manifest_size));
self.sst_sizes.push(Some(region_stat.sst_size));
self.sst_nums.push(Some(region_stat.sst_num));
self.index_sizes.push(Some(region_stat.index_size));
self.engines.push(Some(&region_stat.engine));
self.region_roles.push(Some(&region_stat.role.to_string()));
@@ -230,6 +236,7 @@ impl InformationSchemaRegionStatisticsBuilder {
Arc::new(self.memtable_sizes.finish()),
Arc::new(self.manifest_sizes.finish()),
Arc::new(self.sst_sizes.finish()),
Arc::new(self.sst_nums.finish()),
Arc::new(self.index_sizes.finish()),
Arc::new(self.engines.finish()),
Arc::new(self.region_roles.finish()),

View File

@@ -821,6 +821,7 @@ impl InformationExtension for StandaloneInformationExtension {
memtable_size: region_stat.memtable_size,
manifest_size: region_stat.manifest_size,
sst_size: region_stat.sst_size,
sst_num: region_stat.sst_num,
index_size: region_stat.index_size,
region_manifest: region_stat.manifest.into(),
data_topic_latest_entry_id: region_stat.data_topic_latest_entry_id,

View File

@@ -93,6 +93,8 @@ pub struct RegionStat {
pub manifest_size: u64,
/// The size of the SST data files in bytes.
pub sst_size: u64,
/// The num of the SST data files.
pub sst_num: u64,
/// The size of the SST index files in bytes.
pub index_size: u64,
/// The manifest infoof the region.
@@ -173,8 +175,8 @@ impl RegionStat {
std::mem::size_of::<RegionId>() +
// rcus, wcus, approximate_bytes, num_rows
std::mem::size_of::<i64>() * 4 +
// memtable_size, manifest_size, sst_size, index_size
std::mem::size_of::<u64>() * 4 +
// memtable_size, manifest_size, sst_size, sst_num, index_size
std::mem::size_of::<u64>() * 5 +
// engine
std::mem::size_of::<String>() + self.engine.capacity() +
// region_manifest
@@ -275,6 +277,7 @@ impl From<&api::v1::meta::RegionStat> for RegionStat {
memtable_size: region_stat.memtable_size,
manifest_size: region_stat.manifest_size,
sst_size: region_stat.sst_size,
sst_num: region_stat.sst_num,
index_size: region_stat.index_size,
region_manifest: region_stat.manifest.into(),
data_topic_latest_entry_id: region_stat.data_topic_latest_entry_id,

View File

@@ -121,6 +121,7 @@ mod tests {
memtable_size: 0,
manifest_size: 0,
sst_size: 0,
sst_num: 0,
index_size: 0,
data_topic_latest_entry_id: 0,
metadata_topic_latest_entry_id: 0,

View File

@@ -97,6 +97,7 @@ mod tests {
memtable_size: 0,
manifest_size: 0,
sst_size: 0,
sst_num: 0,
index_size: 0,
region_manifest: RegionManifestInfo::Mito {
manifest_version: 0,

View File

@@ -159,6 +159,7 @@ mod test {
memtable_size: 0,
manifest_size: 0,
sst_size: 0,
sst_num: 0,
index_size: 0,
region_manifest: RegionManifestInfo::Mito {
manifest_version: 0,

View File

@@ -190,6 +190,7 @@ mod tests {
memtable_size: 0,
manifest_size: 0,
sst_size: 0,
sst_num: 0,
index_size: 0,
region_manifest: RegionManifestInfo::Mito {
manifest_version: 0,
@@ -217,6 +218,7 @@ mod tests {
memtable_size: 0,
manifest_size: 0,
sst_size: 0,
sst_num: 0,
index_size: 0,
region_manifest: RegionManifestInfo::Mito {
manifest_version: 0,
@@ -244,6 +246,7 @@ mod tests {
memtable_size: 0,
manifest_size: 0,
sst_size: 0,
sst_num: 0,
index_size: 0,
region_manifest: RegionManifestInfo::Mito {
manifest_version: 0,

View File

@@ -54,6 +54,7 @@ pub fn get_region_statistic(mito: &MitoEngine, region_id: RegionId) -> Option<Re
wal_size: metadata_stat.wal_size + data_stat.wal_size,
manifest_size: metadata_stat.manifest_size + data_stat.manifest_size,
sst_size: metadata_stat.sst_size + data_stat.sst_size,
sst_num: metadata_stat.sst_num + data_stat.sst_num,
index_size: metadata_stat.index_size + data_stat.index_size,
manifest: RegionManifestInfo::Metric {
data_flushed_entry_id: data_stat.manifest.data_flushed_entry_id(),

View File

@@ -310,6 +310,7 @@ impl MitoRegion {
let wal_usage = self.estimated_wal_usage(memtable_usage);
let manifest_usage = self.stats.total_manifest_size();
let num_rows = version.ssts.num_rows() + version.memtables.num_rows();
let num_files = version.ssts.num_files();
let manifest_version = self.stats.manifest_version();
let topic_latest_entry_id = self.topic_latest_entry_id.load(Ordering::Relaxed);
@@ -320,6 +321,7 @@ impl MitoRegion {
wal_size: wal_usage,
manifest_size: manifest_usage,
sst_size: sst_usage,
sst_num: num_files,
index_size: index_usage,
manifest: RegionManifestInfo::Mito {
manifest_version,

View File

@@ -102,6 +102,14 @@ impl SstVersion {
.sum()
}
/// Returns the number of SST files.
pub(crate) fn num_files(&self) -> u64 {
self.levels
.iter()
.map(|level_meta| level_meta.files.len() as u64)
.sum()
}
/// Returns SST data files'space occupied in current version.
pub(crate) fn sst_usage(&self) -> u64 {
self.levels

View File

@@ -444,6 +444,8 @@ pub struct RegionStatistic {
pub manifest_size: u64,
/// The size of SST data files in bytes.
pub sst_size: u64,
/// The num of SST files.
pub sst_num: u64,
/// The size of SST index files in bytes.
#[serde(default)]
pub index_size: u64,

View File

@@ -318,14 +318,15 @@ select * from information_schema.columns order by table_schema, table_name, colu
| greptime | information_schema | region_peers | table_name | 3 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | |
| greptime | information_schema | region_peers | table_schema | 2 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | |
| greptime | information_schema | region_statistics | disk_size | 5 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | Yes | bigint unsigned | | |
| greptime | information_schema | region_statistics | engine | 10 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | |
| greptime | information_schema | region_statistics | index_size | 9 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | Yes | bigint unsigned | | |
| greptime | information_schema | region_statistics | engine | 11 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | |
| greptime | information_schema | region_statistics | index_size | 10 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | Yes | bigint unsigned | | |
| greptime | information_schema | region_statistics | manifest_size | 7 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | Yes | bigint unsigned | | |
| greptime | information_schema | region_statistics | memtable_size | 6 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | Yes | bigint unsigned | | |
| greptime | information_schema | region_statistics | region_id | 1 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | No | bigint unsigned | | |
| greptime | information_schema | region_statistics | region_number | 3 | | | 10 | 0 | | | | | | select,insert | | UInt32 | int unsigned | FIELD | | No | int unsigned | | |
| greptime | information_schema | region_statistics | region_role | 11 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | |
| greptime | information_schema | region_statistics | region_role | 12 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | |
| greptime | information_schema | region_statistics | region_rows | 4 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | Yes | bigint unsigned | | |
| greptime | information_schema | region_statistics | sst_num | 9 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | Yes | bigint unsigned | | |
| greptime | information_schema | region_statistics | sst_size | 8 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | Yes | bigint unsigned | | |
| greptime | information_schema | region_statistics | table_id | 2 | | | 10 | 0 | | | | | | select,insert | | UInt32 | int unsigned | FIELD | | No | int unsigned | | |
| greptime | information_schema | routines | character_maximum_length | 7 | | | 19 | 0 | | | | | | select,insert | | Int64 | bigint | FIELD | | No | bigint | | |