diff --git a/src/catalog/src/system_schema/information_schema/region_statistics.rs b/src/catalog/src/system_schema/information_schema/region_statistics.rs index e92558acd0..a98a70cb23 100644 --- a/src/catalog/src/system_schema/information_schema/region_statistics.rs +++ b/src/catalog/src/system_schema/information_schema/region_statistics.rs @@ -39,9 +39,12 @@ use crate::CatalogManager; const REGION_ID: &str = "region_id"; const TABLE_ID: &str = "table_id"; const REGION_NUMBER: &str = "region_number"; +const REGION_ROWS: &str = "region_rows"; +const DISK_SIZE: &str = "disk_size"; const MEMTABLE_SIZE: &str = "memtable_size"; const MANIFEST_SIZE: &str = "manifest_size"; const SST_SIZE: &str = "sst_size"; +const INDEX_SIZE: &str = "index_size"; const ENGINE: &str = "engine"; const REGION_ROLE: &str = "region_role"; @@ -52,9 +55,12 @@ const INIT_CAPACITY: usize = 42; /// - `region_id`: The region id. /// - `table_id`: The table id. /// - `region_number`: The region number. +/// - `region_rows`: The number of rows in region. /// - `memtable_size`: The memtable size in bytes. +/// - `disk_size`: The approximate disk size in bytes. /// - `manifest_size`: The manifest size in bytes. -/// - `sst_size`: The sst size in bytes. +/// - `sst_size`: The sst data files size in bytes. +/// - `index_size`: The sst index files size in bytes. /// - `engine`: The engine type. /// - `region_role`: The region role. /// @@ -76,9 +82,12 @@ impl InformationSchemaRegionStatistics { ColumnSchema::new(REGION_ID, ConcreteDataType::uint64_datatype(), false), ColumnSchema::new(TABLE_ID, ConcreteDataType::uint32_datatype(), false), ColumnSchema::new(REGION_NUMBER, ConcreteDataType::uint32_datatype(), false), + ColumnSchema::new(REGION_ROWS, ConcreteDataType::uint64_datatype(), true), + ColumnSchema::new(DISK_SIZE, ConcreteDataType::uint64_datatype(), true), ColumnSchema::new(MEMTABLE_SIZE, ConcreteDataType::uint64_datatype(), true), ColumnSchema::new(MANIFEST_SIZE, ConcreteDataType::uint64_datatype(), true), ColumnSchema::new(SST_SIZE, ConcreteDataType::uint64_datatype(), true), + ColumnSchema::new(INDEX_SIZE, ConcreteDataType::uint64_datatype(), true), ColumnSchema::new(ENGINE, ConcreteDataType::string_datatype(), true), ColumnSchema::new(REGION_ROLE, ConcreteDataType::string_datatype(), true), ])) @@ -135,9 +144,12 @@ struct InformationSchemaRegionStatisticsBuilder { region_ids: UInt64VectorBuilder, table_ids: UInt32VectorBuilder, region_numbers: UInt32VectorBuilder, + region_rows: UInt64VectorBuilder, + disk_sizes: UInt64VectorBuilder, memtable_sizes: UInt64VectorBuilder, manifest_sizes: UInt64VectorBuilder, sst_sizes: UInt64VectorBuilder, + index_sizes: UInt64VectorBuilder, engines: StringVectorBuilder, region_roles: StringVectorBuilder, } @@ -150,9 +162,12 @@ impl InformationSchemaRegionStatisticsBuilder { region_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY), table_ids: UInt32VectorBuilder::with_capacity(INIT_CAPACITY), region_numbers: UInt32VectorBuilder::with_capacity(INIT_CAPACITY), + region_rows: UInt64VectorBuilder::with_capacity(INIT_CAPACITY), + disk_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY), memtable_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY), manifest_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY), sst_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY), + index_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY), engines: StringVectorBuilder::with_capacity(INIT_CAPACITY), region_roles: StringVectorBuilder::with_capacity(INIT_CAPACITY), } @@ -177,9 +192,12 @@ impl InformationSchemaRegionStatisticsBuilder { (REGION_ID, &Value::from(region_stat.id.as_u64())), (TABLE_ID, &Value::from(region_stat.id.table_id())), (REGION_NUMBER, &Value::from(region_stat.id.region_number())), + (REGION_ROWS, &Value::from(region_stat.num_rows)), + (DISK_SIZE, &Value::from(region_stat.approximate_bytes)), (MEMTABLE_SIZE, &Value::from(region_stat.memtable_size)), (MANIFEST_SIZE, &Value::from(region_stat.manifest_size)), (SST_SIZE, &Value::from(region_stat.sst_size)), + (INDEX_SIZE, &Value::from(region_stat.index_size)), (ENGINE, &Value::from(region_stat.engine.as_str())), (REGION_ROLE, &Value::from(region_stat.role.to_string())), ]; @@ -192,9 +210,12 @@ impl InformationSchemaRegionStatisticsBuilder { self.table_ids.push(Some(region_stat.id.table_id())); self.region_numbers .push(Some(region_stat.id.region_number())); + self.region_rows.push(Some(region_stat.num_rows)); + self.disk_sizes.push(Some(region_stat.approximate_bytes)); self.memtable_sizes.push(Some(region_stat.memtable_size)); self.manifest_sizes.push(Some(region_stat.manifest_size)); self.sst_sizes.push(Some(region_stat.sst_size)); + self.index_sizes.push(Some(region_stat.index_size)); self.engines.push(Some(®ion_stat.engine)); self.region_roles.push(Some(®ion_stat.role.to_string())); } @@ -204,9 +225,12 @@ impl InformationSchemaRegionStatisticsBuilder { Arc::new(self.region_ids.finish()), Arc::new(self.table_ids.finish()), Arc::new(self.region_numbers.finish()), + Arc::new(self.region_rows.finish()), + Arc::new(self.disk_sizes.finish()), Arc::new(self.memtable_sizes.finish()), Arc::new(self.manifest_sizes.finish()), Arc::new(self.sst_sizes.finish()), + Arc::new(self.index_sizes.finish()), Arc::new(self.engines.finish()), Arc::new(self.region_roles.finish()), ]; diff --git a/src/cmd/src/standalone.rs b/src/cmd/src/standalone.rs index 54d6e4d72c..251957dd28 100644 --- a/src/cmd/src/standalone.rs +++ b/src/cmd/src/standalone.rs @@ -736,12 +736,14 @@ impl InformationExtension for StandaloneInformationExtension { id: stat.region_id, rcus: 0, wcus: 0, - approximate_bytes: region_stat.estimated_disk_size() as i64, + approximate_bytes: region_stat.estimated_disk_size(), engine: stat.engine, role: RegionRole::from(stat.role).into(), + num_rows: region_stat.num_rows, memtable_size: region_stat.memtable_size, manifest_size: region_stat.manifest_size, sst_size: region_stat.sst_size, + index_size: region_stat.index_size, } }) .collect::>(); diff --git a/src/common/meta/src/datanode.rs b/src/common/meta/src/datanode.rs index 4551b8de2f..869af96a28 100644 --- a/src/common/meta/src/datanode.rs +++ b/src/common/meta/src/datanode.rs @@ -78,17 +78,21 @@ pub struct RegionStat { /// The write capacity units during this period pub wcus: i64, /// Approximate bytes of this region - pub approximate_bytes: i64, + pub approximate_bytes: u64, /// The engine name. pub engine: String, /// The region role. pub role: RegionRole, + /// The number of rows + pub num_rows: u64, /// The size of the memtable in bytes. pub memtable_size: u64, /// The size of the manifest in bytes. pub manifest_size: u64, - /// The size of the SST files in bytes. + /// The size of the SST data files in bytes. pub sst_size: u64, + /// The size of the SST index files in bytes. + pub index_size: u64, } impl Stat { @@ -178,12 +182,14 @@ impl From<&api::v1::meta::RegionStat> for RegionStat { id: RegionId::from_u64(value.region_id), rcus: value.rcus, wcus: value.wcus, - approximate_bytes: value.approximate_bytes, + approximate_bytes: value.approximate_bytes as u64, engine: value.engine.to_string(), role: RegionRole::from(value.role()), + num_rows: region_stat.num_rows, memtable_size: region_stat.memtable_size, manifest_size: region_stat.manifest_size, sst_size: region_stat.sst_size, + index_size: region_stat.index_size, } } } diff --git a/src/meta-srv/src/handler/failure_handler.rs b/src/meta-srv/src/handler/failure_handler.rs index 02f423c4b4..ae38f887f4 100644 --- a/src/meta-srv/src/handler/failure_handler.rs +++ b/src/meta-srv/src/handler/failure_handler.rs @@ -93,9 +93,11 @@ mod tests { approximate_bytes: 0, engine: default_engine().to_string(), role: RegionRole::Follower, + num_rows: 0, memtable_size: 0, manifest_size: 0, sst_size: 0, + index_size: 0, } } acc.stat = Some(Stat { diff --git a/src/meta-srv/src/handler/region_lease_handler.rs b/src/meta-srv/src/handler/region_lease_handler.rs index de491da371..98a74f67bb 100644 --- a/src/meta-srv/src/handler/region_lease_handler.rs +++ b/src/meta-srv/src/handler/region_lease_handler.rs @@ -135,9 +135,11 @@ mod test { wcus: 0, approximate_bytes: 0, engine: String::new(), + num_rows: 0, memtable_size: 0, manifest_size: 0, sst_size: 0, + index_size: 0, } } diff --git a/src/meta-srv/src/selector/weight_compute.rs b/src/meta-srv/src/selector/weight_compute.rs index 09d8833e2e..7f3b28a364 100644 --- a/src/meta-srv/src/selector/weight_compute.rs +++ b/src/meta-srv/src/selector/weight_compute.rs @@ -198,9 +198,11 @@ mod tests { approximate_bytes: 1, engine: "mito2".to_string(), role: RegionRole::Leader, + num_rows: 0, memtable_size: 0, manifest_size: 0, sst_size: 0, + index_size: 0, }], ..Default::default() } @@ -217,9 +219,11 @@ mod tests { approximate_bytes: 1, engine: "mito2".to_string(), role: RegionRole::Leader, + num_rows: 0, memtable_size: 0, manifest_size: 0, sst_size: 0, + index_size: 0, }], ..Default::default() } @@ -236,9 +240,11 @@ mod tests { approximate_bytes: 1, engine: "mito2".to_string(), role: RegionRole::Leader, + num_rows: 0, memtable_size: 0, manifest_size: 0, sst_size: 0, + index_size: 0, }], ..Default::default() } diff --git a/src/mito2/src/engine/basic_test.rs b/src/mito2/src/engine/basic_test.rs index 533b6a2ea1..785c914e3e 100644 --- a/src/mito2/src/engine/basic_test.rs +++ b/src/mito2/src/engine/basic_test.rs @@ -580,7 +580,8 @@ async fn test_region_usage() { flush_region(&engine, region_id, None).await; let region_stat = region.region_statistic(); - assert_eq!(region_stat.sst_size, 3010); + assert_eq!(region_stat.sst_size, 2790); + assert_eq!(region_stat.num_rows, 10); // region total usage // Some memtables may share items. diff --git a/src/mito2/src/memtable/time_partition.rs b/src/mito2/src/memtable/time_partition.rs index 6d92488a7b..7fa03ae1be 100644 --- a/src/mito2/src/memtable/time_partition.rs +++ b/src/mito2/src/memtable/time_partition.rs @@ -216,6 +216,16 @@ impl TimePartitions { .sum() } + /// Returns the number of rows. + pub(crate) fn num_rows(&self) -> u64 { + let inner = self.inner.lock().unwrap(); + inner + .parts + .iter() + .map(|part| part.memtable.stats().num_rows as u64) + .sum() + } + /// Append memtables in partitions to small vec. pub(crate) fn list_memtables_to_small_vec(&self, memtables: &mut SmallMemtableVec) { let inner = self.inner.lock().unwrap(); diff --git a/src/mito2/src/memtable/version.rs b/src/mito2/src/memtable/version.rs index 9e18edc673..1c7f2b7d4a 100644 --- a/src/mito2/src/memtable/version.rs +++ b/src/mito2/src/memtable/version.rs @@ -115,6 +115,15 @@ impl MemtableVersion { .sum() } + /// Returns the number of rows in memtables. + pub(crate) fn num_rows(&self) -> u64 { + self.immutables + .iter() + .map(|mem| mem.stats().num_rows as u64) + .sum::() + + self.mutable.num_rows() + } + /// Returns true if the memtable version is empty. /// /// The version is empty when mutable memtable is empty and there is no diff --git a/src/mito2/src/region.rs b/src/mito2/src/region.rs index b05daf3da0..4ce633e6a6 100644 --- a/src/mito2/src/region.rs +++ b/src/mito2/src/region.rs @@ -277,15 +277,19 @@ impl MitoRegion { let memtable_usage = (memtables.mutable_usage() + memtables.immutables_usage()) as u64; let sst_usage = version.ssts.sst_usage(); + let index_usage = version.ssts.index_usage(); let wal_usage = self.estimated_wal_usage(memtable_usage); let manifest_usage = self.stats.total_manifest_size(); + let num_rows = version.ssts.num_rows() + version.memtables.num_rows(); RegionStatistic { + num_rows, memtable_size: memtable_usage, wal_size: wal_usage, manifest_size: manifest_usage, sst_size: sst_usage, + index_size: index_usage, } } @@ -422,15 +426,15 @@ impl ManifestContext { /// Sets the [`RegionRole`]. /// /// ``` - /// +------------------------------------------+ - /// | +-----------------+ | - /// | | | | + /// +------------------------------------------+ + /// | +-----------------+ | + /// | | | | /// +---+------+ +-------+-----+ +--v-v---+ /// | Follower | | Downgrading | | Leader | /// +---^-^----+ +-----+-^-----+ +--+-+---+ - /// | | | | | | - /// | +------------------+ +-----------------+ | - /// +------------------------------------------+ + /// | | | | | | + /// | +------------------+ +-----------------+ | + /// +------------------------------------------+ /// /// Transition: /// - Follower -> Leader diff --git a/src/mito2/src/sst/version.rs b/src/mito2/src/sst/version.rs index 07d6bee9d9..c677a95413 100644 --- a/src/mito2/src/sst/version.rs +++ b/src/mito2/src/sst/version.rs @@ -84,7 +84,25 @@ impl SstVersion { } } - /// Returns SST files'space occupied in current version. + /// Returns the number of rows in SST files. + /// For historical reasons, the result is not precise for old SST files. + pub(crate) fn num_rows(&self) -> u64 { + self.levels + .iter() + .map(|level_meta| { + level_meta + .files + .values() + .map(|file_handle| { + let meta = file_handle.meta_ref(); + meta.num_rows + }) + .sum::() + }) + .sum() + } + + /// Returns SST data files'space occupied in current version. pub(crate) fn sst_usage(&self) -> u64 { self.levels .iter() @@ -94,7 +112,24 @@ impl SstVersion { .values() .map(|file_handle| { let meta = file_handle.meta_ref(); - meta.file_size + meta.index_file_size + meta.file_size + }) + .sum::() + }) + .sum() + } + + /// Returns SST index files'space occupied in current version. + pub(crate) fn index_usage(&self) -> u64 { + self.levels + .iter() + .map(|level_meta| { + level_meta + .files + .values() + .map(|file_handle| { + let meta = file_handle.meta_ref(); + meta.index_file_size }) .sum::() }) diff --git a/src/store-api/src/region_engine.rs b/src/store-api/src/region_engine.rs index 785e66d37f..0832385c93 100644 --- a/src/store-api/src/region_engine.rs +++ b/src/store-api/src/region_engine.rs @@ -291,14 +291,20 @@ pub type BatchResponses = Vec<(RegionId, Result)>; /// Represents the statistics of a region. #[derive(Debug, Deserialize, Serialize, Default)] pub struct RegionStatistic { + /// The number of rows + #[serde(default)] + pub num_rows: u64, /// The size of memtable in bytes. pub memtable_size: u64, /// The size of WAL in bytes. pub wal_size: u64, /// The size of manifest in bytes. pub manifest_size: u64, - /// The size of SST files in bytes. + /// The size of SST data files in bytes. pub sst_size: u64, + /// The size of SST index files in bytes. + #[serde(default)] + pub index_size: u64, } impl RegionStatistic { @@ -320,7 +326,7 @@ impl RegionStatistic { impl RegionStatistic { /// Returns the estimated disk size of the region. pub fn estimated_disk_size(&self) -> u64 { - self.wal_size + self.sst_size + self.manifest_size + self.wal_size + self.sst_size + self.manifest_size + self.index_size } } diff --git a/tests/cases/standalone/common/information_schema/region_statistics.result b/tests/cases/standalone/common/information_schema/region_statistics.result new file mode 100644 index 0000000000..0c62b4ad6c --- /dev/null +++ b/tests/cases/standalone/common/information_schema/region_statistics.result @@ -0,0 +1,39 @@ +USE public; + +Affected Rows: 0 + +CREATE TABLE test ( + a int primary key, + b string, + ts timestamp time index, +) PARTITION ON COLUMNS (a) ( + a < 10, + a >= 10 AND a < 20, + a >= 20, +); + +Affected Rows: 0 + +INSERT INTO test VALUES + (1, 'a', 1), + (11, 'b', 11), + (21, 'c', 21); + +Affected Rows: 3 + +-- SQLNESS SLEEP 11s +-- FIXME(dennis): we need to wait the datanode reporting stats info to metasrv. +SELECT SUM(region_rows), SUM(disk_size), SUM(sst_size), SUM(index_size) + FROM INFORMATION_SCHEMA.REGION_STATISTICS WHERE table_id + IN (SELECT TABLE_ID FROM INFORMATION_SCHEMA.TABLES WHERE table_name = 'test' and table_schema = 'public'); + ++-------------------------------------------------------+-----------------------------------------------------+----------------------------------------------------+------------------------------------------------------+ +| SUM(information_schema.region_statistics.region_rows) | SUM(information_schema.region_statistics.disk_size) | SUM(information_schema.region_statistics.sst_size) | SUM(information_schema.region_statistics.index_size) | ++-------------------------------------------------------+-----------------------------------------------------+----------------------------------------------------+------------------------------------------------------+ +| 3 | 2145 | 0 | 0 | ++-------------------------------------------------------+-----------------------------------------------------+----------------------------------------------------+------------------------------------------------------+ + +DROP TABLE test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/information_schema/region_statistics.sql b/tests/cases/standalone/common/information_schema/region_statistics.sql new file mode 100644 index 0000000000..cbc4424683 --- /dev/null +++ b/tests/cases/standalone/common/information_schema/region_statistics.sql @@ -0,0 +1,25 @@ +USE public; + +CREATE TABLE test ( + a int primary key, + b string, + ts timestamp time index, +) PARTITION ON COLUMNS (a) ( + a < 10, + a >= 10 AND a < 20, + a >= 20, +); + + +INSERT INTO test VALUES + (1, 'a', 1), + (11, 'b', 11), + (21, 'c', 21); + +-- SQLNESS SLEEP 11s +-- FIXME(dennis): we need to wait the datanode reporting stats info to metasrv. +SELECT SUM(region_rows), SUM(disk_size), SUM(sst_size), SUM(index_size) + FROM INFORMATION_SCHEMA.REGION_STATISTICS WHERE table_id + IN (SELECT TABLE_ID FROM INFORMATION_SCHEMA.TABLES WHERE table_name = 'test' and table_schema = 'public'); + +DROP TABLE test; diff --git a/tests/cases/standalone/common/system/information_schema.result b/tests/cases/standalone/common/system/information_schema.result index 4264bd8df6..b1c8c93295 100644 --- a/tests/cases/standalone/common/system/information_schema.result +++ b/tests/cases/standalone/common/system/information_schema.result @@ -299,13 +299,16 @@ select * from information_schema.columns order by table_schema, table_name, colu | greptime | information_schema | region_peers | peer_id | 2 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | Yes | bigint unsigned | | | | greptime | information_schema | region_peers | region_id | 1 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | No | bigint unsigned | | | | greptime | information_schema | region_peers | status | 5 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | | -| greptime | information_schema | region_statistics | engine | 7 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | | -| greptime | information_schema | region_statistics | manifest_size | 5 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | Yes | bigint unsigned | | | -| greptime | information_schema | region_statistics | memtable_size | 4 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | Yes | bigint unsigned | | | +| greptime | information_schema | region_statistics | disk_size | 5 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | Yes | bigint unsigned | | | +| greptime | information_schema | region_statistics | engine | 10 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | | +| greptime | information_schema | region_statistics | index_size | 9 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | Yes | bigint unsigned | | | +| greptime | information_schema | region_statistics | manifest_size | 7 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | Yes | bigint unsigned | | | +| greptime | information_schema | region_statistics | memtable_size | 6 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | Yes | bigint unsigned | | | | greptime | information_schema | region_statistics | region_id | 1 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | No | bigint unsigned | | | | greptime | information_schema | region_statistics | region_number | 3 | | | 10 | 0 | | | | | | select,insert | | UInt32 | int unsigned | FIELD | | No | int unsigned | | | -| greptime | information_schema | region_statistics | region_role | 8 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | | -| greptime | information_schema | region_statistics | sst_size | 6 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | Yes | bigint unsigned | | | +| greptime | information_schema | region_statistics | region_role | 11 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | | +| greptime | information_schema | region_statistics | region_rows | 4 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | Yes | bigint unsigned | | | +| greptime | information_schema | region_statistics | sst_size | 8 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | Yes | bigint unsigned | | | | greptime | information_schema | region_statistics | table_id | 2 | | | 10 | 0 | | | | | | select,insert | | UInt32 | int unsigned | FIELD | | No | int unsigned | | | | greptime | information_schema | routines | character_maximum_length | 7 | | | 19 | 0 | | | | | | select,insert | | Int64 | bigint | FIELD | | No | bigint | | | | greptime | information_schema | routines | character_octet_length | 8 | | | 19 | 0 | | | | | | select,insert | | Int64 | bigint | FIELD | | No | bigint | | | diff --git a/tests/conf/datanode-test.toml.template b/tests/conf/datanode-test.toml.template index 5ed5352124..3c999635d9 100644 --- a/tests/conf/datanode-test.toml.template +++ b/tests/conf/datanode-test.toml.template @@ -32,3 +32,6 @@ tcp_nodelay = false [procedure] max_retry_times = 3 retry_delay = "500ms" + +[heartbeat] +interval = '1s'