From 6eccadbf7352ecea6a606a1f13828b930bb73731 Mon Sep 17 00:00:00 2001 From: Yingwen Date: Fri, 7 Feb 2025 16:39:04 +0800 Subject: [PATCH] fix: force recycle region dir after gc duration (#5485) --- src/mito2/src/worker/handle_drop.rs | 30 +++++++++++++++++++---------- src/mito2/src/worker/handle_open.rs | 2 +- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/mito2/src/worker/handle_drop.rs b/src/mito2/src/worker/handle_drop.rs index d0b8dc3175..434faf85ec 100644 --- a/src/mito2/src/worker/handle_drop.rs +++ b/src/mito2/src/worker/handle_drop.rs @@ -32,7 +32,7 @@ use crate::region::{RegionLeaderState, RegionMapRef}; use crate::worker::{RegionWorkerLoop, DROPPING_MARKER_FILE}; const GC_TASK_INTERVAL_SEC: u64 = 5 * 60; // 5 minutes -const MAX_RETRY_TIMES: u64 = 288; // 24 hours (5m * 288) +const MAX_RETRY_TIMES: u64 = 12; // 1 hours (5m * 12) impl RegionWorkerLoop where @@ -118,12 +118,16 @@ where } } -/// Background GC task to remove the entire region path once it find there is no -/// parquet file left. Returns whether the path is removed. +/// Background GC task to remove the entire region path once one of the following +/// conditions is true: +/// - It finds there is no parquet file left. +/// - After `gc_duration`. /// -/// This task will keep running until finished. Any resource captured by it will -/// not be released before then. Be sure to only pass weak reference if something -/// is depended on ref-count mechanism. +/// Returns whether the path is removed. +/// +/// This task will retry on failure and keep running until finished. Any resource +/// captured by it will not be released before then. Be sure to only pass weak reference +/// if something is depended on ref-count mechanism. async fn later_drop_task( region_id: RegionId, region_path: String, @@ -131,9 +135,9 @@ async fn later_drop_task( dropping_regions: RegionMapRef, gc_duration: Duration, ) -> bool { + let mut force = false; for _ in 0..MAX_RETRY_TIMES { - sleep(gc_duration).await; - let result = remove_region_dir_once(®ion_path, &object_store).await; + let result = remove_region_dir_once(®ion_path, &object_store, force).await; match result { Err(err) => { warn!( @@ -143,11 +147,14 @@ async fn later_drop_task( } Ok(true) => { dropping_regions.remove_region(region_id); - info!("Region {} is dropped", region_path); + info!("Region {} is dropped, force: {}", region_path, force); return true; } Ok(false) => (), } + sleep(gc_duration).await; + // Force recycle after gc duration. + force = true; } warn!( @@ -160,9 +167,11 @@ async fn later_drop_task( // TODO(ruihang): place the marker in a separate dir /// Removes region dir if there is no parquet files, returns whether the directory is removed. +/// If `force = true`, always removes the dir. pub(crate) async fn remove_region_dir_once( region_path: &str, object_store: &ObjectStore, + force: bool, ) -> Result { // list all files under the given region path to check if there are un-deleted parquet files let mut has_parquet_file = false; @@ -173,7 +182,8 @@ pub(crate) async fn remove_region_dir_once( .await .context(OpenDalSnafu)?; while let Some(file) = files.try_next().await.context(OpenDalSnafu)? { - if file.path().ends_with(".parquet") { + if !force && file.path().ends_with(".parquet") { + // If not in force mode, we only remove the region dir if there is no parquet file has_parquet_file = true; break; } else if !file.path().ends_with(DROPPING_MARKER_FILE) { diff --git a/src/mito2/src/worker/handle_open.rs b/src/mito2/src/worker/handle_open.rs index 01eaf17652..ea6f011a90 100644 --- a/src/mito2/src/worker/handle_open.rs +++ b/src/mito2/src/worker/handle_open.rs @@ -55,7 +55,7 @@ impl RegionWorkerLoop { .await .context(OpenDalSnafu)? { - let result = remove_region_dir_once(&request.region_dir, object_store).await; + let result = remove_region_dir_once(&request.region_dir, object_store, true).await; info!( "Region {} is dropped, worker: {}, result: {:?}", region_id, self.id, result