From ee2a7a5f10f552bde9817da4096e021ae1c13090 Mon Sep 17 00:00:00 2001 From: "Lei, HUANG" <6406592+v0y4g3r@users.noreply.github.com> Date: Mon, 28 Apr 2025 09:57:17 +0800 Subject: [PATCH] fix: check if memtable is empty by stats (#5989) fix/checking-memtable-empty-and-stats: - **Refactor timestamp updates**: Simplified timestamp range updates in `PartitionTreeMemtable` and `TimeSeriesMemtable` by replacing `update_timestamp_range` with `fetch_max` and `fetch_min` methods for `max_timestamp` and `min_timestamp`. - Affected files: `partition_tree.rs`, `time_series.rs` - **Remove unused code**: Deleted the `update_timestamp_range` method from `WriteMetrics` and removed unnecessary imports. - Affected file: `stats.rs` - **Optimize memtable filtering**: Streamlined the check for empty memtables in `ScanRegion` by directly using `time_range`. - Affected file: `scan_region.rs` (cherry picked from commit 1a517ec8acab61e06296c0fda4a9cc74611384bf) --- src/mito2/src/memtable/partition_tree.rs | 5 ++- src/mito2/src/memtable/stats.rs | 47 ------------------------ src/mito2/src/memtable/time_series.rs | 3 +- src/mito2/src/read/scan_region.rs | 9 ++--- 4 files changed, 9 insertions(+), 55 deletions(-) diff --git a/src/mito2/src/memtable/partition_tree.rs b/src/mito2/src/memtable/partition_tree.rs index d790b65f6b..13ae05cbfe 100644 --- a/src/mito2/src/memtable/partition_tree.rs +++ b/src/mito2/src/memtable/partition_tree.rs @@ -301,7 +301,10 @@ impl PartitionTreeMemtable { fn update_stats(&self, metrics: &WriteMetrics) { // Only let the tracker tracks value bytes. self.alloc_tracker.on_allocation(metrics.value_bytes); - metrics.update_timestamp_range(&self.max_timestamp, &self.min_timestamp); + self.max_timestamp + .fetch_max(metrics.max_ts, Ordering::SeqCst); + self.min_timestamp + .fetch_min(metrics.min_ts, Ordering::SeqCst); } } diff --git a/src/mito2/src/memtable/stats.rs b/src/mito2/src/memtable/stats.rs index cec31d6385..d203da3194 100644 --- a/src/mito2/src/memtable/stats.rs +++ b/src/mito2/src/memtable/stats.rs @@ -14,8 +14,6 @@ //! Internal metrics of the memtable. -use std::sync::atomic::{AtomicI64, Ordering}; - /// Metrics of writing memtables. pub(crate) struct WriteMetrics { /// Size allocated by keys. @@ -28,51 +26,6 @@ pub(crate) struct WriteMetrics { pub(crate) max_ts: i64, } -impl WriteMetrics { - /// Update the min/max timestamp range according to current write metric. - pub(crate) fn update_timestamp_range(&self, prev_max_ts: &AtomicI64, prev_min_ts: &AtomicI64) { - loop { - let current_min = prev_min_ts.load(Ordering::Relaxed); - if self.min_ts >= current_min { - break; - } - - let Err(updated) = prev_min_ts.compare_exchange( - current_min, - self.min_ts, - Ordering::Relaxed, - Ordering::Relaxed, - ) else { - break; - }; - - if updated == self.min_ts { - break; - } - } - - loop { - let current_max = prev_max_ts.load(Ordering::Relaxed); - if self.max_ts <= current_max { - break; - } - - let Err(updated) = prev_max_ts.compare_exchange( - current_max, - self.max_ts, - Ordering::Relaxed, - Ordering::Relaxed, - ) else { - break; - }; - - if updated == self.max_ts { - break; - } - } - } -} - impl Default for WriteMetrics { fn default() -> Self { Self { diff --git a/src/mito2/src/memtable/time_series.rs b/src/mito2/src/memtable/time_series.rs index 75a009f945..39f6d28e7a 100644 --- a/src/mito2/src/memtable/time_series.rs +++ b/src/mito2/src/memtable/time_series.rs @@ -146,7 +146,8 @@ impl TimeSeriesMemtable { fn update_stats(&self, stats: WriteMetrics) { self.alloc_tracker .on_allocation(stats.key_bytes + stats.value_bytes); - stats.update_timestamp_range(&self.max_timestamp, &self.min_timestamp); + self.max_timestamp.fetch_max(stats.max_ts, Ordering::SeqCst); + self.min_timestamp.fetch_min(stats.min_ts, Ordering::SeqCst); } fn write_key_value(&self, kv: KeyValue, stats: &mut WriteMetrics) -> Result<()> { diff --git a/src/mito2/src/read/scan_region.rs b/src/mito2/src/read/scan_region.rs index 193e3c3e17..f12019555f 100644 --- a/src/mito2/src/read/scan_region.rs +++ b/src/mito2/src/read/scan_region.rs @@ -311,13 +311,10 @@ impl ScanRegion { let memtables: Vec<_> = memtables .into_iter() .filter(|mem| { - if mem.is_empty() { + // check if memtable is empty by reading stats. + let Some((start, end)) = mem.stats().time_range() else { return false; - } - let stats = mem.stats(); - // Safety: the memtable is not empty. - let (start, end) = stats.time_range().unwrap(); - + }; // The time range of the memtable is inclusive. let memtable_range = TimestampRange::new_inclusive(Some(start), Some(end)); memtable_range.intersects(&time_range)