fix(mito): count owned SSTs in region stats (#8191)

* fix(mito): count owned SSTs in region stats

Signed-off-by: WenyXu <wenymedia@gmail.com>

* fix(mito): use origin region for index metadata

Signed-off-by: WenyXu <wenymedia@gmail.com>

* chore: apply suggestions

Signed-off-by: WenyXu <wenymedia@gmail.com>

* chore: apply suggestions

Signed-off-by: WenyXu <wenymedia@gmail.com>

---------

Signed-off-by: WenyXu <wenymedia@gmail.com>
This commit is contained in:
Weny Xu
2026-05-28 18:04:39 +08:00
committed by GitHub
parent 9a4e5e8457
commit 1a2d046351
4 changed files with 93 additions and 24 deletions

View File

@@ -613,8 +613,10 @@ impl MitoEngine {
return Vec::new();
}
};
// The index file path is derived from the physical file owner. After
// repartition, `entry.region_id` is only the referring region.
let region_index_id = RegionIndexId::new(
RegionFileId::new(entry.region_id, file_id),
RegionFileId::new(entry.origin_region_id, file_id),
index_version,
);
let context = IndexEntryContext {

View File

@@ -601,14 +601,14 @@ impl MitoRegion {
let memtables = &version.memtables;
let memtable_usage = (memtables.mutable_usage() + memtables.immutables_usage()) as u64;
let sst_usage = version.ssts.sst_usage();
let index_usage = version.ssts.index_usage();
let sst_usage = version.ssts.owned_sst_usage(self.region_id);
let index_usage = version.ssts.owned_index_usage(self.region_id);
let flushed_entry_id = version.flushed_entry_id;
let wal_usage = self.estimated_wal_usage(memtable_usage);
let manifest_usage = self.stats.total_manifest_size();
let num_rows = version.ssts.num_rows() + version.memtables.num_rows();
let num_files = version.ssts.num_files();
let num_rows = version.ssts.owned_num_rows(self.region_id) + version.memtables.num_rows();
let num_files = version.ssts.owned_num_files(self.region_id);
let manifest_version = self.stats.manifest_version();
let file_removed_cnt = self.stats.file_removed_cnt();

View File

@@ -18,7 +18,7 @@ use std::fmt;
use std::sync::Arc;
use common_time::{TimeToLive, Timestamp};
use store_api::storage::FileId;
use store_api::storage::{FileId, RegionId};
use crate::sst::file::{FileHandle, FileMeta, Level, MAX_LEVEL};
use crate::sst::file_purger::FilePurgerRef;
@@ -106,15 +106,19 @@ impl SstVersion {
}
}
/// Returns the number of rows in SST files.
/// Returns the number of rows in SST files owned by `region_id`.
///
/// Rows from SST files referenced from other regions, for example after
/// repartition, are not counted.
/// For historical reasons, the result is not precise for old SST files.
pub(crate) fn num_rows(&self) -> u64 {
pub(crate) fn owned_num_rows(&self, region_id: RegionId) -> u64 {
self.levels
.iter()
.map(|level_meta| {
level_meta
.files
.values()
.filter(|file_handle| file_handle.region_id() == region_id)
.map(|file_handle| {
let meta = file_handle.meta_ref();
meta.num_rows
@@ -124,22 +128,29 @@ impl SstVersion {
.sum()
}
/// Returns the number of SST files.
pub(crate) fn num_files(&self) -> u64 {
self.levels
.iter()
.map(|level_meta| level_meta.files.len() as u64)
.sum()
}
/// Returns SST data files'space occupied in current version.
pub(crate) fn sst_usage(&self) -> u64 {
/// Returns the number of SST files owned by `region_id`.
pub(crate) fn owned_num_files(&self, region_id: RegionId) -> u64 {
self.levels
.iter()
.map(|level_meta| {
level_meta
.files
.values()
.filter(|file_handle| file_handle.region_id() == region_id)
.count() as u64
})
.sum()
}
/// Returns the space occupied by SST data files owned by `region_id`.
pub(crate) fn owned_sst_usage(&self, region_id: RegionId) -> u64 {
self.levels
.iter()
.map(|level_meta| {
level_meta
.files
.values()
.filter(|file_handle| file_handle.region_id() == region_id)
.map(|file_handle| {
let meta = file_handle.meta_ref();
meta.file_size
@@ -149,14 +160,15 @@ impl SstVersion {
.sum()
}
/// Returns SST index files'space occupied in current version.
pub(crate) fn index_usage(&self) -> u64 {
/// Returns the space occupied by SST index files owned by `region_id`.
pub(crate) fn owned_index_usage(&self, region_id: RegionId) -> u64 {
self.levels
.iter()
.map(|level_meta| {
level_meta
.files
.values()
.filter(|file_handle| file_handle.region_id() == region_id)
.map(|file_handle| {
let meta = file_handle.meta_ref();
meta.index_file_size
@@ -257,4 +269,50 @@ mod tests {
assert!(added_files.contains_key(&f.file_id));
});
}
#[test]
fn test_usage_only_counts_owned_files() {
let purger = new_noop_file_purger();
let region_id = RegionId::new(1, 1);
let other_region_id = RegionId::new(1, 2);
let files = [
FileMeta {
region_id,
file_id: FileId::random(),
file_size: 100,
index_file_size: 10,
num_rows: 1,
..Default::default()
},
FileMeta {
region_id,
file_id: FileId::random(),
file_size: 200,
index_file_size: 20,
num_rows: 2,
..Default::default()
},
FileMeta {
region_id: other_region_id,
file_id: FileId::random(),
file_size: 300,
index_file_size: 30,
num_rows: 3,
..Default::default()
},
];
let mut version = SstVersion::new();
version.add_files(purger, files.iter().cloned());
assert_eq!(3, version.owned_num_rows(region_id));
assert_eq!(2, version.owned_num_files(region_id));
assert_eq!(300, version.owned_sst_usage(region_id));
assert_eq!(30, version.owned_index_usage(region_id));
assert_eq!(3, version.owned_num_rows(other_region_id));
assert_eq!(1, version.owned_num_files(other_region_id));
assert_eq!(300, version.owned_sst_usage(other_region_id));
assert_eq!(30, version.owned_index_usage(other_region_id));
}
}

View File

@@ -483,7 +483,10 @@ pub type BatchResponses = Vec<(RegionId, Result<RegionResponse, BoxedError>)>;
/// Represents the statistics of a region.
#[derive(Debug, Deserialize, Serialize, Default)]
pub struct RegionStatistic {
/// The number of rows
/// The number of rows stored in SST files owned by this region plus rows in memtables.
///
/// Rows from SST files referenced from other regions, for example after repartition,
/// are not counted to avoid table-level double counting when summing region statistics.
#[serde(default)]
pub num_rows: u64,
/// The size of memtable in bytes.
@@ -492,11 +495,17 @@ pub struct RegionStatistic {
pub wal_size: u64,
/// The size of manifest in bytes.
pub manifest_size: u64,
/// The size of SST data files in bytes.
/// The size of SST data files owned by this region in bytes.
///
/// SST files referenced from other regions, for example after repartition, are not counted.
pub sst_size: u64,
/// The num of SST files.
/// The number of SST files owned by this region.
///
/// SST files referenced from other regions, for example after repartition, are not counted.
pub sst_num: u64,
/// The size of SST index files in bytes.
/// The size of SST index files owned by this region in bytes.
///
/// SST index files referenced from other regions, for example after repartition, are not counted.
#[serde(default)]
pub index_size: u64,
/// The details of the region.