From 4b3bd7317b3ffca07a01e780d6364fb58e6ef5be Mon Sep 17 00:00:00 2001 From: Yingwen Date: Tue, 13 Jan 2026 17:17:09 +0800 Subject: [PATCH] feat: add per-partition convert, result cache metrics (#7539) * fix: show convert cost in explain analyze verbose Signed-off-by: evenyag * fix: increase puffin metadata cache metric Signed-off-by: evenyag * feat: add result cache hit/miss to filter metrics Signed-off-by: evenyag * feat: print flat format in debug Signed-off-by: evenyag * test: update sqlness test Signed-off-by: evenyag * feat: make scan cost contains part/reader build cost Signed-off-by: evenyag * feat: collect divider cost Signed-off-by: evenyag * refactor: remove unused field in ScannerMetrics Signed-off-by: evenyag * feat: collect metadata read bytes Signed-off-by: evenyag * chore: collect read metrics in get_parquet_meta_data Signed-off-by: evenyag --------- Signed-off-by: evenyag --- src/mito2/src/cache.rs | 5 +- src/mito2/src/cache/file_cache.rs | 9 +- src/mito2/src/read.rs | 8 -- src/mito2/src/read/scan_region.rs | 1 + src/mito2/src/read/scan_util.rs | 118 +++++++++++++++--- src/mito2/src/read/seq_scan.rs | 26 +++- src/mito2/src/read/series_scan.rs | 18 ++- src/mito2/src/sst/parquet/metadata.rs | 22 +++- src/mito2/src/sst/parquet/reader.rs | 44 ++++++- src/mito2/src/worker/handle_manifest.rs | 3 +- src/puffin/src/puffin_manager/cache.rs | 8 +- .../tql-explain-analyze/analyze.result | 1 + .../tql-explain-analyze/analyze.sql | 1 + .../standalone/common/promql/regex.result | 4 + .../cases/standalone/common/promql/regex.sql | 4 + .../tql-explain-analyze/analyze.result | 2 + .../tql-explain-analyze/analyze.sql | 2 + 17 files changed, 234 insertions(+), 42 deletions(-) diff --git a/src/mito2/src/cache.rs b/src/mito2/src/cache.rs index 2c82fb1f33..89064393d1 100644 --- a/src/mito2/src/cache.rs +++ b/src/mito2/src/cache.rs @@ -350,7 +350,10 @@ impl CacheManager { // Try to get metadata from write cache let key = IndexKey::new(file_id.region_id(), file_id.file_id(), FileType::Parquet); if let Some(write_cache) = &self.write_cache - && let Some(metadata) = write_cache.file_cache().get_parquet_meta_data(key).await + && let Some(metadata) = write_cache + .file_cache() + .get_parquet_meta_data(key, metrics) + .await { metrics.file_cache_hit += 1; let metadata = Arc::new(metadata); diff --git a/src/mito2/src/cache/file_cache.rs b/src/mito2/src/cache/file_cache.rs index 58a2dac588..aeea238fdc 100644 --- a/src/mito2/src/cache/file_cache.rs +++ b/src/mito2/src/cache/file_cache.rs @@ -43,6 +43,7 @@ use crate::metrics::{ use crate::region::opener::RegionLoadCacheTask; use crate::sst::parquet::helper::fetch_byte_ranges; use crate::sst::parquet::metadata::MetadataLoader; +use crate::sst::parquet::reader::MetadataCacheMetrics; /// Subdirectory of cached files for write. /// @@ -566,7 +567,11 @@ impl FileCache { /// Get the parquet metadata in file cache. /// If the file is not in the cache or fail to load metadata, return None. - pub(crate) async fn get_parquet_meta_data(&self, key: IndexKey) -> Option { + pub(crate) async fn get_parquet_meta_data( + &self, + key: IndexKey, + cache_metrics: &mut MetadataCacheMetrics, + ) -> Option { // Check if file cache contains the key if let Some(index_value) = self.inner.parquet_index.get(&key).await { // Load metadata from file cache @@ -575,7 +580,7 @@ impl FileCache { let file_size = index_value.file_size as u64; let metadata_loader = MetadataLoader::new(local_store, &file_path, file_size); - match metadata_loader.load().await { + match metadata_loader.load(cache_metrics).await { Ok(metadata) => { CACHE_HIT .with_label_values(&[key.file_type.metric_label()]) diff --git a/src/mito2/src/read.rs b/src/mito2/src/read.rs index 06d615452a..16eda08390 100644 --- a/src/mito2/src/read.rs +++ b/src/mito2/src/read.rs @@ -1058,10 +1058,6 @@ impl BatchReader for Box { /// Local metrics for scanners. #[derive(Debug, Default)] pub(crate) struct ScannerMetrics { - /// Duration to prepare the scan task. - prepare_scan_cost: Duration, - /// Duration to build the (merge) reader. - build_reader_cost: Duration, /// Duration to scan data. scan_cost: Duration, /// Duration while waiting for `yield`. @@ -1070,10 +1066,6 @@ pub(crate) struct ScannerMetrics { num_batches: usize, /// Number of rows returned. num_rows: usize, - /// Number of mem ranges scanned. - num_mem_ranges: usize, - /// Number of file ranges scanned. - num_file_ranges: usize, } #[cfg(test)] diff --git a/src/mito2/src/read/scan_region.rs b/src/mito2/src/read/scan_region.rs index d36a57c8b6..2037301794 100644 --- a/src/mito2/src/read/scan_region.rs +++ b/src/mito2/src/read/scan_region.rs @@ -1498,6 +1498,7 @@ impl StreamContext { .entries(self.input.files.iter().map(|file| FileWrapper { file })) .finish()?; } + write!(f, ", \"flat_format\": {}", self.input.flat_format)?; #[cfg(feature = "enterprise")] self.format_extension_ranges(f)?; diff --git a/src/mito2/src/read/scan_util.rs b/src/mito2/src/read/scan_util.rs index 721c5562df..3c1a78c7dd 100644 --- a/src/mito2/src/read/scan_util.rs +++ b/src/mito2/src/read/scan_util.rs @@ -115,6 +115,8 @@ pub(crate) struct ScanMetricsSet { scan_cost: Duration, /// Duration while waiting for `yield`. yield_cost: Duration, + /// Duration to convert [`Batch`]es. + convert_cost: Option