diff --git a/src/mito2/src/engine.rs b/src/mito2/src/engine.rs index 64f7576139..782c82b54a 100644 --- a/src/mito2/src/engine.rs +++ b/src/mito2/src/engine.rs @@ -613,8 +613,10 @@ impl MitoEngine { return Vec::new(); } }; + // The index file path is derived from the physical file owner. After + // repartition, `entry.region_id` is only the referring region. let region_index_id = RegionIndexId::new( - RegionFileId::new(entry.region_id, file_id), + RegionFileId::new(entry.origin_region_id, file_id), index_version, ); let context = IndexEntryContext { diff --git a/src/mito2/src/region.rs b/src/mito2/src/region.rs index f6d2a17bba..9d214caed3 100644 --- a/src/mito2/src/region.rs +++ b/src/mito2/src/region.rs @@ -601,14 +601,14 @@ impl MitoRegion { let memtables = &version.memtables; let memtable_usage = (memtables.mutable_usage() + memtables.immutables_usage()) as u64; - let sst_usage = version.ssts.sst_usage(); - let index_usage = version.ssts.index_usage(); + let sst_usage = version.ssts.owned_sst_usage(self.region_id); + let index_usage = version.ssts.owned_index_usage(self.region_id); let flushed_entry_id = version.flushed_entry_id; let wal_usage = self.estimated_wal_usage(memtable_usage); let manifest_usage = self.stats.total_manifest_size(); - let num_rows = version.ssts.num_rows() + version.memtables.num_rows(); - let num_files = version.ssts.num_files(); + let num_rows = version.ssts.owned_num_rows(self.region_id) + version.memtables.num_rows(); + let num_files = version.ssts.owned_num_files(self.region_id); let manifest_version = self.stats.manifest_version(); let file_removed_cnt = self.stats.file_removed_cnt(); diff --git a/src/mito2/src/sst/version.rs b/src/mito2/src/sst/version.rs index 5958cf7513..67d41a3b82 100644 --- a/src/mito2/src/sst/version.rs +++ b/src/mito2/src/sst/version.rs @@ -18,7 +18,7 @@ use std::fmt; use std::sync::Arc; use common_time::{TimeToLive, Timestamp}; -use store_api::storage::FileId; +use store_api::storage::{FileId, RegionId}; use crate::sst::file::{FileHandle, FileMeta, Level, MAX_LEVEL}; use crate::sst::file_purger::FilePurgerRef; @@ -106,15 +106,19 @@ impl SstVersion { } } - /// Returns the number of rows in SST files. + /// Returns the number of rows in SST files owned by `region_id`. + /// + /// Rows from SST files referenced from other regions, for example after + /// repartition, are not counted. /// For historical reasons, the result is not precise for old SST files. - pub(crate) fn num_rows(&self) -> u64 { + pub(crate) fn owned_num_rows(&self, region_id: RegionId) -> u64 { self.levels .iter() .map(|level_meta| { level_meta .files .values() + .filter(|file_handle| file_handle.region_id() == region_id) .map(|file_handle| { let meta = file_handle.meta_ref(); meta.num_rows @@ -124,22 +128,29 @@ impl SstVersion { .sum() } - /// Returns the number of SST files. - pub(crate) fn num_files(&self) -> u64 { - self.levels - .iter() - .map(|level_meta| level_meta.files.len() as u64) - .sum() - } - - /// Returns SST data files'space occupied in current version. - pub(crate) fn sst_usage(&self) -> u64 { + /// Returns the number of SST files owned by `region_id`. + pub(crate) fn owned_num_files(&self, region_id: RegionId) -> u64 { self.levels .iter() .map(|level_meta| { level_meta .files .values() + .filter(|file_handle| file_handle.region_id() == region_id) + .count() as u64 + }) + .sum() + } + + /// Returns the space occupied by SST data files owned by `region_id`. + pub(crate) fn owned_sst_usage(&self, region_id: RegionId) -> u64 { + self.levels + .iter() + .map(|level_meta| { + level_meta + .files + .values() + .filter(|file_handle| file_handle.region_id() == region_id) .map(|file_handle| { let meta = file_handle.meta_ref(); meta.file_size @@ -149,14 +160,15 @@ impl SstVersion { .sum() } - /// Returns SST index files'space occupied in current version. - pub(crate) fn index_usage(&self) -> u64 { + /// Returns the space occupied by SST index files owned by `region_id`. + pub(crate) fn owned_index_usage(&self, region_id: RegionId) -> u64 { self.levels .iter() .map(|level_meta| { level_meta .files .values() + .filter(|file_handle| file_handle.region_id() == region_id) .map(|file_handle| { let meta = file_handle.meta_ref(); meta.index_file_size @@ -257,4 +269,50 @@ mod tests { assert!(added_files.contains_key(&f.file_id)); }); } + + #[test] + fn test_usage_only_counts_owned_files() { + let purger = new_noop_file_purger(); + let region_id = RegionId::new(1, 1); + let other_region_id = RegionId::new(1, 2); + + let files = [ + FileMeta { + region_id, + file_id: FileId::random(), + file_size: 100, + index_file_size: 10, + num_rows: 1, + ..Default::default() + }, + FileMeta { + region_id, + file_id: FileId::random(), + file_size: 200, + index_file_size: 20, + num_rows: 2, + ..Default::default() + }, + FileMeta { + region_id: other_region_id, + file_id: FileId::random(), + file_size: 300, + index_file_size: 30, + num_rows: 3, + ..Default::default() + }, + ]; + + let mut version = SstVersion::new(); + version.add_files(purger, files.iter().cloned()); + + assert_eq!(3, version.owned_num_rows(region_id)); + assert_eq!(2, version.owned_num_files(region_id)); + assert_eq!(300, version.owned_sst_usage(region_id)); + assert_eq!(30, version.owned_index_usage(region_id)); + assert_eq!(3, version.owned_num_rows(other_region_id)); + assert_eq!(1, version.owned_num_files(other_region_id)); + assert_eq!(300, version.owned_sst_usage(other_region_id)); + assert_eq!(30, version.owned_index_usage(other_region_id)); + } } diff --git a/src/store-api/src/region_engine.rs b/src/store-api/src/region_engine.rs index b235fcffc7..e5ab05e5e7 100644 --- a/src/store-api/src/region_engine.rs +++ b/src/store-api/src/region_engine.rs @@ -483,7 +483,10 @@ pub type BatchResponses = Vec<(RegionId, Result)>; /// Represents the statistics of a region. #[derive(Debug, Deserialize, Serialize, Default)] pub struct RegionStatistic { - /// The number of rows + /// The number of rows stored in SST files owned by this region plus rows in memtables. + /// + /// Rows from SST files referenced from other regions, for example after repartition, + /// are not counted to avoid table-level double counting when summing region statistics. #[serde(default)] pub num_rows: u64, /// The size of memtable in bytes. @@ -492,11 +495,17 @@ pub struct RegionStatistic { pub wal_size: u64, /// The size of manifest in bytes. pub manifest_size: u64, - /// The size of SST data files in bytes. + /// The size of SST data files owned by this region in bytes. + /// + /// SST files referenced from other regions, for example after repartition, are not counted. pub sst_size: u64, - /// The num of SST files. + /// The number of SST files owned by this region. + /// + /// SST files referenced from other regions, for example after repartition, are not counted. pub sst_num: u64, - /// The size of SST index files in bytes. + /// The size of SST index files owned by this region in bytes. + /// + /// SST index files referenced from other regions, for example after repartition, are not counted. #[serde(default)] pub index_size: u64, /// The details of the region.