feat: adds the number of rows and index files size to region_statistics table (#4909)

* feat: adds index size to region statistics

* feat: adds the number of rows for region statistics

* test: adds sqlness test for region_statistics

* fix: test
This commit is contained in:
dennis zhuang
2024-10-30 19:12:58 +08:00
committed by GitHub
parent a34035a1f2
commit dcc08f6b3e
16 changed files with 198 additions and 21 deletions

View File

@@ -39,9 +39,12 @@ use crate::CatalogManager;
const REGION_ID: &str = "region_id";
const TABLE_ID: &str = "table_id";
const REGION_NUMBER: &str = "region_number";
const REGION_ROWS: &str = "region_rows";
const DISK_SIZE: &str = "disk_size";
const MEMTABLE_SIZE: &str = "memtable_size";
const MANIFEST_SIZE: &str = "manifest_size";
const SST_SIZE: &str = "sst_size";
const INDEX_SIZE: &str = "index_size";
const ENGINE: &str = "engine";
const REGION_ROLE: &str = "region_role";
@@ -52,9 +55,12 @@ const INIT_CAPACITY: usize = 42;
/// - `region_id`: The region id.
/// - `table_id`: The table id.
/// - `region_number`: The region number.
/// - `region_rows`: The number of rows in region.
/// - `memtable_size`: The memtable size in bytes.
/// - `disk_size`: The approximate disk size in bytes.
/// - `manifest_size`: The manifest size in bytes.
/// - `sst_size`: The sst size in bytes.
/// - `sst_size`: The sst data files size in bytes.
/// - `index_size`: The sst index files size in bytes.
/// - `engine`: The engine type.
/// - `region_role`: The region role.
///
@@ -76,9 +82,12 @@ impl InformationSchemaRegionStatistics {
ColumnSchema::new(REGION_ID, ConcreteDataType::uint64_datatype(), false),
ColumnSchema::new(TABLE_ID, ConcreteDataType::uint32_datatype(), false),
ColumnSchema::new(REGION_NUMBER, ConcreteDataType::uint32_datatype(), false),
ColumnSchema::new(REGION_ROWS, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(DISK_SIZE, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(MEMTABLE_SIZE, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(MANIFEST_SIZE, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(SST_SIZE, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(INDEX_SIZE, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(ENGINE, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(REGION_ROLE, ConcreteDataType::string_datatype(), true),
]))
@@ -135,9 +144,12 @@ struct InformationSchemaRegionStatisticsBuilder {
region_ids: UInt64VectorBuilder,
table_ids: UInt32VectorBuilder,
region_numbers: UInt32VectorBuilder,
region_rows: UInt64VectorBuilder,
disk_sizes: UInt64VectorBuilder,
memtable_sizes: UInt64VectorBuilder,
manifest_sizes: UInt64VectorBuilder,
sst_sizes: UInt64VectorBuilder,
index_sizes: UInt64VectorBuilder,
engines: StringVectorBuilder,
region_roles: StringVectorBuilder,
}
@@ -150,9 +162,12 @@ impl InformationSchemaRegionStatisticsBuilder {
region_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
table_ids: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
region_numbers: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
region_rows: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
disk_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
memtable_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
manifest_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
sst_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
index_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
engines: StringVectorBuilder::with_capacity(INIT_CAPACITY),
region_roles: StringVectorBuilder::with_capacity(INIT_CAPACITY),
}
@@ -177,9 +192,12 @@ impl InformationSchemaRegionStatisticsBuilder {
(REGION_ID, &Value::from(region_stat.id.as_u64())),
(TABLE_ID, &Value::from(region_stat.id.table_id())),
(REGION_NUMBER, &Value::from(region_stat.id.region_number())),
(REGION_ROWS, &Value::from(region_stat.num_rows)),
(DISK_SIZE, &Value::from(region_stat.approximate_bytes)),
(MEMTABLE_SIZE, &Value::from(region_stat.memtable_size)),
(MANIFEST_SIZE, &Value::from(region_stat.manifest_size)),
(SST_SIZE, &Value::from(region_stat.sst_size)),
(INDEX_SIZE, &Value::from(region_stat.index_size)),
(ENGINE, &Value::from(region_stat.engine.as_str())),
(REGION_ROLE, &Value::from(region_stat.role.to_string())),
];
@@ -192,9 +210,12 @@ impl InformationSchemaRegionStatisticsBuilder {
self.table_ids.push(Some(region_stat.id.table_id()));
self.region_numbers
.push(Some(region_stat.id.region_number()));
self.region_rows.push(Some(region_stat.num_rows));
self.disk_sizes.push(Some(region_stat.approximate_bytes));
self.memtable_sizes.push(Some(region_stat.memtable_size));
self.manifest_sizes.push(Some(region_stat.manifest_size));
self.sst_sizes.push(Some(region_stat.sst_size));
self.index_sizes.push(Some(region_stat.index_size));
self.engines.push(Some(&region_stat.engine));
self.region_roles.push(Some(&region_stat.role.to_string()));
}
@@ -204,9 +225,12 @@ impl InformationSchemaRegionStatisticsBuilder {
Arc::new(self.region_ids.finish()),
Arc::new(self.table_ids.finish()),
Arc::new(self.region_numbers.finish()),
Arc::new(self.region_rows.finish()),
Arc::new(self.disk_sizes.finish()),
Arc::new(self.memtable_sizes.finish()),
Arc::new(self.manifest_sizes.finish()),
Arc::new(self.sst_sizes.finish()),
Arc::new(self.index_sizes.finish()),
Arc::new(self.engines.finish()),
Arc::new(self.region_roles.finish()),
];

View File

@@ -736,12 +736,14 @@ impl InformationExtension for StandaloneInformationExtension {
id: stat.region_id,
rcus: 0,
wcus: 0,
approximate_bytes: region_stat.estimated_disk_size() as i64,
approximate_bytes: region_stat.estimated_disk_size(),
engine: stat.engine,
role: RegionRole::from(stat.role).into(),
num_rows: region_stat.num_rows,
memtable_size: region_stat.memtable_size,
manifest_size: region_stat.manifest_size,
sst_size: region_stat.sst_size,
index_size: region_stat.index_size,
}
})
.collect::<Vec<_>>();

View File

@@ -78,17 +78,21 @@ pub struct RegionStat {
/// The write capacity units during this period
pub wcus: i64,
/// Approximate bytes of this region
pub approximate_bytes: i64,
pub approximate_bytes: u64,
/// The engine name.
pub engine: String,
/// The region role.
pub role: RegionRole,
/// The number of rows
pub num_rows: u64,
/// The size of the memtable in bytes.
pub memtable_size: u64,
/// The size of the manifest in bytes.
pub manifest_size: u64,
/// The size of the SST files in bytes.
/// The size of the SST data files in bytes.
pub sst_size: u64,
/// The size of the SST index files in bytes.
pub index_size: u64,
}
impl Stat {
@@ -178,12 +182,14 @@ impl From<&api::v1::meta::RegionStat> for RegionStat {
id: RegionId::from_u64(value.region_id),
rcus: value.rcus,
wcus: value.wcus,
approximate_bytes: value.approximate_bytes,
approximate_bytes: value.approximate_bytes as u64,
engine: value.engine.to_string(),
role: RegionRole::from(value.role()),
num_rows: region_stat.num_rows,
memtable_size: region_stat.memtable_size,
manifest_size: region_stat.manifest_size,
sst_size: region_stat.sst_size,
index_size: region_stat.index_size,
}
}
}

View File

@@ -93,9 +93,11 @@ mod tests {
approximate_bytes: 0,
engine: default_engine().to_string(),
role: RegionRole::Follower,
num_rows: 0,
memtable_size: 0,
manifest_size: 0,
sst_size: 0,
index_size: 0,
}
}
acc.stat = Some(Stat {

View File

@@ -135,9 +135,11 @@ mod test {
wcus: 0,
approximate_bytes: 0,
engine: String::new(),
num_rows: 0,
memtable_size: 0,
manifest_size: 0,
sst_size: 0,
index_size: 0,
}
}

View File

@@ -198,9 +198,11 @@ mod tests {
approximate_bytes: 1,
engine: "mito2".to_string(),
role: RegionRole::Leader,
num_rows: 0,
memtable_size: 0,
manifest_size: 0,
sst_size: 0,
index_size: 0,
}],
..Default::default()
}
@@ -217,9 +219,11 @@ mod tests {
approximate_bytes: 1,
engine: "mito2".to_string(),
role: RegionRole::Leader,
num_rows: 0,
memtable_size: 0,
manifest_size: 0,
sst_size: 0,
index_size: 0,
}],
..Default::default()
}
@@ -236,9 +240,11 @@ mod tests {
approximate_bytes: 1,
engine: "mito2".to_string(),
role: RegionRole::Leader,
num_rows: 0,
memtable_size: 0,
manifest_size: 0,
sst_size: 0,
index_size: 0,
}],
..Default::default()
}

View File

@@ -580,7 +580,8 @@ async fn test_region_usage() {
flush_region(&engine, region_id, None).await;
let region_stat = region.region_statistic();
assert_eq!(region_stat.sst_size, 3010);
assert_eq!(region_stat.sst_size, 2790);
assert_eq!(region_stat.num_rows, 10);
// region total usage
// Some memtables may share items.

View File

@@ -216,6 +216,16 @@ impl TimePartitions {
.sum()
}
/// Returns the number of rows.
pub(crate) fn num_rows(&self) -> u64 {
let inner = self.inner.lock().unwrap();
inner
.parts
.iter()
.map(|part| part.memtable.stats().num_rows as u64)
.sum()
}
/// Append memtables in partitions to small vec.
pub(crate) fn list_memtables_to_small_vec(&self, memtables: &mut SmallMemtableVec) {
let inner = self.inner.lock().unwrap();

View File

@@ -115,6 +115,15 @@ impl MemtableVersion {
.sum()
}
/// Returns the number of rows in memtables.
pub(crate) fn num_rows(&self) -> u64 {
self.immutables
.iter()
.map(|mem| mem.stats().num_rows as u64)
.sum::<u64>()
+ self.mutable.num_rows()
}
/// Returns true if the memtable version is empty.
///
/// The version is empty when mutable memtable is empty and there is no

View File

@@ -277,15 +277,19 @@ impl MitoRegion {
let memtable_usage = (memtables.mutable_usage() + memtables.immutables_usage()) as u64;
let sst_usage = version.ssts.sst_usage();
let index_usage = version.ssts.index_usage();
let wal_usage = self.estimated_wal_usage(memtable_usage);
let manifest_usage = self.stats.total_manifest_size();
let num_rows = version.ssts.num_rows() + version.memtables.num_rows();
RegionStatistic {
num_rows,
memtable_size: memtable_usage,
wal_size: wal_usage,
manifest_size: manifest_usage,
sst_size: sst_usage,
index_size: index_usage,
}
}
@@ -422,15 +426,15 @@ impl ManifestContext {
/// Sets the [`RegionRole`].
///
/// ```
/// +------------------------------------------+
/// | +-----------------+ |
/// | | | |
/// +------------------------------------------+
/// | +-----------------+ |
/// | | | |
/// +---+------+ +-------+-----+ +--v-v---+
/// | Follower | | Downgrading | | Leader |
/// +---^-^----+ +-----+-^-----+ +--+-+---+
/// | | | | | |
/// | +------------------+ +-----------------+ |
/// +------------------------------------------+
/// | | | | | |
/// | +------------------+ +-----------------+ |
/// +------------------------------------------+
///
/// Transition:
/// - Follower -> Leader

View File

@@ -84,7 +84,25 @@ impl SstVersion {
}
}
/// Returns SST files'space occupied in current version.
/// Returns the number of rows in SST files.
/// For historical reasons, the result is not precise for old SST files.
pub(crate) fn num_rows(&self) -> u64 {
self.levels
.iter()
.map(|level_meta| {
level_meta
.files
.values()
.map(|file_handle| {
let meta = file_handle.meta_ref();
meta.num_rows
})
.sum::<u64>()
})
.sum()
}
/// Returns SST data files'space occupied in current version.
pub(crate) fn sst_usage(&self) -> u64 {
self.levels
.iter()
@@ -94,7 +112,24 @@ impl SstVersion {
.values()
.map(|file_handle| {
let meta = file_handle.meta_ref();
meta.file_size + meta.index_file_size
meta.file_size
})
.sum::<u64>()
})
.sum()
}
/// Returns SST index files'space occupied in current version.
pub(crate) fn index_usage(&self) -> u64 {
self.levels
.iter()
.map(|level_meta| {
level_meta
.files
.values()
.map(|file_handle| {
let meta = file_handle.meta_ref();
meta.index_file_size
})
.sum::<u64>()
})

View File

@@ -291,14 +291,20 @@ pub type BatchResponses = Vec<(RegionId, Result<RegionResponse, BoxedError>)>;
/// Represents the statistics of a region.
#[derive(Debug, Deserialize, Serialize, Default)]
pub struct RegionStatistic {
/// The number of rows
#[serde(default)]
pub num_rows: u64,
/// The size of memtable in bytes.
pub memtable_size: u64,
/// The size of WAL in bytes.
pub wal_size: u64,
/// The size of manifest in bytes.
pub manifest_size: u64,
/// The size of SST files in bytes.
/// The size of SST data files in bytes.
pub sst_size: u64,
/// The size of SST index files in bytes.
#[serde(default)]
pub index_size: u64,
}
impl RegionStatistic {
@@ -320,7 +326,7 @@ impl RegionStatistic {
impl RegionStatistic {
/// Returns the estimated disk size of the region.
pub fn estimated_disk_size(&self) -> u64 {
self.wal_size + self.sst_size + self.manifest_size
self.wal_size + self.sst_size + self.manifest_size + self.index_size
}
}