mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-17 21:40:37 +00:00
@@ -102,17 +102,10 @@ impl InstructionHandler for GcRegionsHandler {
|
||||
reports.push(report);
|
||||
}
|
||||
|
||||
// Merge reports
|
||||
let mut merged_report = GcReport::default();
|
||||
for report in reports {
|
||||
merged_report
|
||||
.deleted_files
|
||||
.extend(report.deleted_files.into_iter());
|
||||
merged_report
|
||||
.deleted_indexes
|
||||
.extend(report.deleted_indexes.into_iter());
|
||||
}
|
||||
Ok(merged_report)
|
||||
// Merge reports
|
||||
let mut merged_report = GcReport::default();
|
||||
for report in reports {
|
||||
merged_report.merge(report);
|
||||
}
|
||||
.instrument(common_telemetry::tracing::info_span!("gc_worker_run")),
|
||||
),
|
||||
|
||||
@@ -50,13 +50,16 @@ pub const TEST_REGION_SIZE_200MB: u64 = 200_000_000;
|
||||
/// Helper function to create an empty GcReport for the given region IDs
|
||||
pub fn new_empty_report_with(region_ids: impl IntoIterator<Item = RegionId>) -> GcReport {
|
||||
let mut deleted_files = HashMap::new();
|
||||
let mut processed_regions = HashSet::new();
|
||||
for region_id in region_ids {
|
||||
deleted_files.insert(region_id, vec![]);
|
||||
processed_regions.insert(region_id);
|
||||
}
|
||||
GcReport {
|
||||
deleted_files,
|
||||
deleted_indexes: HashMap::new(),
|
||||
need_retry_regions: HashSet::new(),
|
||||
processed_regions,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -197,76 +197,73 @@ impl GcScheduler {
|
||||
) -> Result<GcJobReport> {
|
||||
info!("Start to handle manual gc request");
|
||||
|
||||
let report = if let Some(regions) = region_ids {
|
||||
let full_listing = full_file_listing.unwrap_or(false);
|
||||
let gc_timeout = timeout.unwrap_or(self.config.mailbox_timeout);
|
||||
let mut dropped_regions = Vec::new();
|
||||
let mut active_regions = Vec::new();
|
||||
let mut dropped_routes_override = Region2Peers::new();
|
||||
// No specific regions, use default tick behavior
|
||||
let Some(regions) = region_ids else {
|
||||
let report = self.trigger_gc().await?;
|
||||
info!("Finished manual gc request");
|
||||
return Ok(report);
|
||||
};
|
||||
|
||||
if !regions.is_empty() {
|
||||
let region_set: HashSet<RegionId> = regions.iter().copied().collect();
|
||||
let table_reparts = self.ctx.get_table_reparts().await?;
|
||||
let dropped_collector = DroppedRegionCollector::new(
|
||||
self.ctx.as_ref(),
|
||||
&self.config,
|
||||
&self.region_gc_tracker,
|
||||
);
|
||||
let dropped_assignment = dropped_collector
|
||||
.collect_and_assign_with_cooldown(&table_reparts, false)
|
||||
.await?;
|
||||
// Empty regions list, return empty report
|
||||
if regions.is_empty() {
|
||||
info!("Finished manual gc request");
|
||||
return Ok(GcJobReport::default());
|
||||
}
|
||||
|
||||
let mut dropped_region_set = HashSet::new();
|
||||
for (_peer, overrides) in dropped_assignment.region_routes_override {
|
||||
for (region_id, route) in overrides {
|
||||
if region_set.contains(®ion_id) {
|
||||
dropped_region_set.insert(region_id);
|
||||
dropped_routes_override.insert(region_id, route);
|
||||
}
|
||||
}
|
||||
}
|
||||
let full_listing = full_file_listing.unwrap_or(false);
|
||||
let gc_timeout = timeout.unwrap_or(self.config.mailbox_timeout);
|
||||
|
||||
for region_id in regions {
|
||||
if dropped_region_set.contains(®ion_id) {
|
||||
dropped_regions.push(region_id);
|
||||
} else {
|
||||
active_regions.push(region_id);
|
||||
}
|
||||
let region_set: HashSet<RegionId> = regions.iter().copied().collect();
|
||||
let table_reparts = self.ctx.get_table_reparts().await?;
|
||||
let dropped_collector =
|
||||
DroppedRegionCollector::new(self.ctx.as_ref(), &self.config, &self.region_gc_tracker);
|
||||
let dropped_assignment = dropped_collector
|
||||
.collect_and_assign_with_cooldown(&table_reparts, false)
|
||||
.await?;
|
||||
|
||||
let mut dropped_region_set = HashSet::new();
|
||||
let mut dropped_routes_override = Region2Peers::new();
|
||||
for overrides in dropped_assignment.region_routes_override.into_values() {
|
||||
for (region_id, route) in overrides {
|
||||
if region_set.contains(®ion_id) {
|
||||
dropped_region_set.insert(region_id);
|
||||
dropped_routes_override.insert(region_id, route);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut combined_report = GcReport::default();
|
||||
let (dropped_regions, active_regions): (Vec<_>, Vec<_>) = regions
|
||||
.into_iter()
|
||||
.partition(|region_id| dropped_region_set.contains(region_id));
|
||||
|
||||
if !active_regions.is_empty() {
|
||||
let report = self
|
||||
.ctx
|
||||
.gc_regions(
|
||||
&active_regions,
|
||||
full_listing,
|
||||
gc_timeout,
|
||||
Region2Peers::new(),
|
||||
)
|
||||
.await?;
|
||||
combined_report.merge(report);
|
||||
}
|
||||
let mut combined_report = GcReport::default();
|
||||
|
||||
if !dropped_regions.is_empty() {
|
||||
let report = self
|
||||
.ctx
|
||||
.gc_regions(&dropped_regions, true, gc_timeout, dropped_routes_override)
|
||||
.await?;
|
||||
combined_report.merge(report);
|
||||
}
|
||||
if !active_regions.is_empty() {
|
||||
let report = self
|
||||
.ctx
|
||||
.gc_regions(
|
||||
&active_regions,
|
||||
full_listing,
|
||||
gc_timeout,
|
||||
Region2Peers::new(),
|
||||
)
|
||||
.await?;
|
||||
combined_report.merge(report);
|
||||
}
|
||||
|
||||
let mut per_datanode_reports = HashMap::new();
|
||||
per_datanode_reports.insert(0, combined_report);
|
||||
GcJobReport {
|
||||
per_datanode_reports,
|
||||
failed_datanodes: HashMap::new(),
|
||||
}
|
||||
} else {
|
||||
// No specific regions, use default tick behavior
|
||||
self.trigger_gc().await?
|
||||
if !dropped_regions.is_empty() {
|
||||
let report = self
|
||||
.ctx
|
||||
.gc_regions(&dropped_regions, true, gc_timeout, dropped_routes_override)
|
||||
.await?;
|
||||
combined_report.merge(report);
|
||||
}
|
||||
|
||||
let mut per_datanode_reports = HashMap::new();
|
||||
per_datanode_reports.insert(0, combined_report);
|
||||
let report = GcJobReport {
|
||||
per_datanode_reports,
|
||||
failed_datanodes: HashMap::new(),
|
||||
};
|
||||
|
||||
info!("Finished manual gc request");
|
||||
|
||||
@@ -310,39 +310,8 @@ impl Metasrv {
|
||||
.into_iter()
|
||||
.map(RegionId::from_u64)
|
||||
.collect();
|
||||
|
||||
// Use GcTickerRef to trigger manual GC
|
||||
let gc_ticker = self.gc_ticker().context(error::UnexpectedSnafu {
|
||||
violated: "GC ticker not available".to_string(),
|
||||
})?;
|
||||
|
||||
let (tx, rx) = tokio::sync::oneshot::channel();
|
||||
gc_ticker
|
||||
.sender
|
||||
.send(gc::Event::Manually {
|
||||
sender: tx,
|
||||
region_ids: Some(region_ids.clone()),
|
||||
full_file_listing: Some(request.full_file_listing),
|
||||
timeout: Some(request.timeout),
|
||||
})
|
||||
self.trigger_gc_for_regions(region_ids, request.full_file_listing, request.timeout)
|
||||
.await
|
||||
.map_err(|_| {
|
||||
error::UnexpectedSnafu {
|
||||
violated: "Failed to send GC event".to_string(),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
|
||||
let job_report = rx.await.map_err(|_| {
|
||||
error::UnexpectedSnafu {
|
||||
violated: "GC job channel closed unexpectedly".to_string(),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
|
||||
let report = gc_job_report_to_gc_report(job_report);
|
||||
|
||||
Ok(gc_report_to_response(&report, region_ids.len() as u64))
|
||||
}
|
||||
|
||||
async fn handle_gc_table(&self, request: MetaGcTableRequest) -> error::Result<GcResponse> {
|
||||
@@ -370,8 +339,17 @@ impl Metasrv {
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
|
||||
let region_ids: Vec<RegionId> = route.region_routes.iter().map(|r| r.region.id).collect();
|
||||
self.trigger_gc_for_regions(region_ids, request.full_file_listing, request.timeout)
|
||||
.await
|
||||
}
|
||||
|
||||
// Use GcTickerRef to trigger manual GC
|
||||
/// Triggers manual GC for specified regions and returns the GC response.
|
||||
async fn trigger_gc_for_regions(
|
||||
&self,
|
||||
region_ids: Vec<RegionId>,
|
||||
full_file_listing: bool,
|
||||
timeout: Duration,
|
||||
) -> error::Result<GcResponse> {
|
||||
let gc_ticker = self.gc_ticker().context(error::UnexpectedSnafu {
|
||||
violated: "GC ticker not available".to_string(),
|
||||
})?;
|
||||
@@ -381,9 +359,9 @@ impl Metasrv {
|
||||
.sender
|
||||
.send(gc::Event::Manually {
|
||||
sender: tx,
|
||||
region_ids: Some(region_ids.clone()),
|
||||
full_file_listing: Some(request.full_file_listing),
|
||||
timeout: Some(request.timeout),
|
||||
region_ids: Some(region_ids),
|
||||
full_file_listing: Some(full_file_listing),
|
||||
timeout: Some(timeout),
|
||||
})
|
||||
.await
|
||||
.map_err(|_| {
|
||||
@@ -402,7 +380,7 @@ impl Metasrv {
|
||||
|
||||
let report = gc_job_report_to_gc_report(job_report);
|
||||
|
||||
Ok(gc_report_to_response(&report, region_ids.len() as u64))
|
||||
Ok(gc_report_to_response(&report))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -415,10 +393,7 @@ fn gc_job_report_to_gc_report(job_report: crate::gc::GcJobReport) -> store_api::
|
||||
gc_report
|
||||
}
|
||||
|
||||
fn gc_report_to_response(
|
||||
report: &store_api::storage::GcReport,
|
||||
processed_regions: u64,
|
||||
) -> GcResponse {
|
||||
fn gc_report_to_response(report: &store_api::storage::GcReport) -> GcResponse {
|
||||
let deleted_files = report.deleted_files.values().map(|v| v.len() as u64).sum();
|
||||
let deleted_indexes = report
|
||||
.deleted_indexes
|
||||
@@ -426,7 +401,7 @@ fn gc_report_to_response(
|
||||
.map(|v| v.len() as u64)
|
||||
.sum();
|
||||
GcResponse {
|
||||
processed_regions,
|
||||
processed_regions: report.processed_regions.len() as u64,
|
||||
need_retry_regions: report
|
||||
.need_retry_regions
|
||||
.iter()
|
||||
|
||||
@@ -282,6 +282,7 @@ impl LocalGcWorker {
|
||||
|
||||
let mut deleted_files = HashMap::new();
|
||||
let mut deleted_indexes = HashMap::new();
|
||||
let mut processed_regions = HashSet::new();
|
||||
let tmp_ref_files = self.read_tmp_ref_files().await?;
|
||||
for (region_id, region) in &self.regions {
|
||||
let per_region_time = std::time::Instant::now();
|
||||
@@ -309,6 +310,7 @@ impl LocalGcWorker {
|
||||
.collect_vec();
|
||||
deleted_files.insert(*region_id, files.into_iter().map(|f| f.file_id()).collect());
|
||||
deleted_indexes.insert(*region_id, index_files);
|
||||
processed_regions.insert(*region_id);
|
||||
debug!(
|
||||
"GC for region {} took {} secs.",
|
||||
region_id,
|
||||
@@ -323,6 +325,7 @@ impl LocalGcWorker {
|
||||
deleted_files,
|
||||
deleted_indexes,
|
||||
need_retry_regions: HashSet::new(),
|
||||
processed_regions,
|
||||
};
|
||||
Ok(report)
|
||||
}
|
||||
|
||||
@@ -114,6 +114,8 @@ pub struct GcReport {
|
||||
pub deleted_indexes: HashMap<RegionId, Vec<(FileId, IndexVersion)>>,
|
||||
/// Regions that need retry in next gc round, usually because their tmp ref files are outdated
|
||||
pub need_retry_regions: HashSet<RegionId>,
|
||||
/// Regions successfully processed in this GC run
|
||||
pub processed_regions: HashSet<RegionId>,
|
||||
}
|
||||
|
||||
impl GcReport {
|
||||
@@ -126,6 +128,7 @@ impl GcReport {
|
||||
deleted_files,
|
||||
deleted_indexes,
|
||||
need_retry_regions,
|
||||
processed_regions: HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -139,7 +142,17 @@ impl GcReport {
|
||||
);
|
||||
*self_files = dedup.into_iter().collect();
|
||||
}
|
||||
for (region, files) in other.deleted_indexes {
|
||||
let self_files = self.deleted_indexes.entry(region).or_default();
|
||||
let dedup: HashSet<(FileId, IndexVersion)> = HashSet::from_iter(
|
||||
std::mem::take(self_files)
|
||||
.into_iter()
|
||||
.chain(files.iter().cloned()),
|
||||
);
|
||||
*self_files = dedup.into_iter().collect();
|
||||
}
|
||||
self.need_retry_regions.extend(other.need_retry_regions);
|
||||
self.processed_regions.extend(other.processed_regions);
|
||||
// Remove regions that have succeeded from need_retry_regions
|
||||
self.need_retry_regions
|
||||
.retain(|region| !self.deleted_files.contains_key(region));
|
||||
|
||||
Reference in New Issue
Block a user