mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-27 18:30:38 +00:00
feat: unify FlushRegions instructions (#6819)
* feat: add FlushRegionsV2 instruction with unified semantics - Add FlushRegionsV2 struct supporting both single and batch operations - Preserve original FlushRegion(RegionId) API for backward compatibility - Support configurable FlushStrategy (Sync/Async) and FlushErrorStrategy (FailFast/TryAll) - Add detailed per-region error reporting in FlushRegionReply - Update datanode handlers to support both legacy and enhanced flush instructions - Maintain zero breaking changes through automatic conversion of legacy formats Signed-off-by: Alex Araujo <alexaraujo@gmail.com> * chore: run make fmt Signed-off-by: Alex Araujo <alexaraujo@gmail.com> * Apply suggestions from code review Co-authored-by: LFC <990479+MichaelScofield@users.noreply.github.com> Signed-off-by: Alex Araujo <alexaraujo@gmail.com> * refactor: extract shared perform_region_flush fn Signed-off-by: Alex Araujo <alexaraujo@gmail.com> * refactor: use consistent error type across similar methods see gh copilot suggestion: https://github.com/GreptimeTeam/greptimedb/pull/6819#discussion_r2299603698 Signed-off-by: Alex Araujo <alexaraujo@gmail.com> * chore: make fmt Signed-off-by: Alex Araujo <alexaraujo@gmail.com> * refactor: consolidate FlushRegion instructions Signed-off-by: Alex Araujo <alexaraujo@gmail.com> --------- Signed-off-by: Alex Araujo <alexaraujo@gmail.com>
This commit is contained in:
@@ -102,9 +102,6 @@ impl RegionHeartbeatResponseHandler {
|
||||
Instruction::FlushRegions(flush_regions) => Ok(Box::new(move |handler_context| {
|
||||
handler_context.handle_flush_regions_instruction(flush_regions)
|
||||
})),
|
||||
Instruction::FlushRegion(flush_region) => Ok(Box::new(move |handler_context| {
|
||||
handler_context.handle_flush_region_instruction(flush_region)
|
||||
})),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -118,7 +115,6 @@ impl HeartbeatResponseHandler for RegionHeartbeatResponseHandler {
|
||||
| Some((_, Instruction::CloseRegion { .. }))
|
||||
| Some((_, Instruction::DowngradeRegion { .. }))
|
||||
| Some((_, Instruction::UpgradeRegion { .. }))
|
||||
| Some((_, Instruction::FlushRegion { .. }))
|
||||
| Some((_, Instruction::FlushRegions { .. }))
|
||||
)
|
||||
}
|
||||
|
||||
@@ -14,99 +14,156 @@
|
||||
|
||||
use std::time::Instant;
|
||||
|
||||
use common_meta::instruction::{FlushRegions, InstructionReply, SimpleReply};
|
||||
use common_meta::instruction::{
|
||||
FlushErrorStrategy, FlushRegionReply, FlushRegions, FlushStrategy, InstructionReply,
|
||||
};
|
||||
use common_telemetry::{debug, warn};
|
||||
use futures_util::future::BoxFuture;
|
||||
use store_api::region_request::{RegionFlushRequest, RegionRequest};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::error;
|
||||
use crate::error::{self, RegionNotFoundSnafu, RegionNotReadySnafu, UnexpectedSnafu};
|
||||
use crate::heartbeat::handler::HandlerContext;
|
||||
|
||||
impl HandlerContext {
|
||||
/// Performs the actual region flush operation.
|
||||
async fn perform_region_flush(&self, region_id: RegionId) -> Result<(), error::Error> {
|
||||
let request = RegionRequest::Flush(RegionFlushRequest {
|
||||
row_group_size: None,
|
||||
});
|
||||
self.region_server
|
||||
.handle_request(region_id, request)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Handles asynchronous flush hints (fire-and-forget).
|
||||
async fn handle_flush_hint(&self, region_ids: Vec<RegionId>) {
|
||||
let start_time = Instant::now();
|
||||
for region_id in ®ion_ids {
|
||||
let result = self.perform_region_flush(*region_id).await;
|
||||
match result {
|
||||
Ok(_) => {}
|
||||
Err(error::Error::RegionNotFound { .. }) => {
|
||||
warn!(
|
||||
"Received a flush region hint from meta, but target region: {} is not found.",
|
||||
region_id
|
||||
);
|
||||
}
|
||||
Err(err) => {
|
||||
warn!("Failed to flush region: {}, error: {}", region_id, err);
|
||||
}
|
||||
}
|
||||
}
|
||||
let elapsed = start_time.elapsed();
|
||||
debug!(
|
||||
"Flush regions hint: {:?}, elapsed: {:?}",
|
||||
region_ids, elapsed
|
||||
);
|
||||
}
|
||||
|
||||
/// Handles synchronous flush operations with proper error handling and replies.
|
||||
async fn handle_flush_sync(
|
||||
&self,
|
||||
region_ids: Vec<RegionId>,
|
||||
error_strategy: FlushErrorStrategy,
|
||||
) -> FlushRegionReply {
|
||||
let mut results = Vec::with_capacity(region_ids.len());
|
||||
|
||||
for region_id in region_ids {
|
||||
let result = self.flush_single_region_sync(region_id).await;
|
||||
|
||||
match &result {
|
||||
Ok(_) => results.push((region_id, Ok(()))),
|
||||
Err(err) => {
|
||||
// Convert error::Error to String for FlushRegionReply compatibility
|
||||
let error_string = err.to_string();
|
||||
results.push((region_id, Err(error_string)));
|
||||
|
||||
// For fail-fast strategy, abort on first error
|
||||
if matches!(error_strategy, FlushErrorStrategy::FailFast) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FlushRegionReply::from_results(results)
|
||||
}
|
||||
|
||||
/// Flushes a single region synchronously with proper error handling.
|
||||
async fn flush_single_region_sync(&self, region_id: RegionId) -> Result<(), error::Error> {
|
||||
// Check if region is leader and writable
|
||||
let Some(writable) = self.region_server.is_region_leader(region_id) else {
|
||||
return Err(RegionNotFoundSnafu { region_id }.build());
|
||||
};
|
||||
|
||||
if !writable {
|
||||
return Err(RegionNotReadySnafu { region_id }.build());
|
||||
}
|
||||
|
||||
// Register and execute the flush task
|
||||
let region_server_moved = self.region_server.clone();
|
||||
let register_result = self
|
||||
.flush_tasks
|
||||
.try_register(
|
||||
region_id,
|
||||
Box::pin(async move {
|
||||
region_server_moved
|
||||
.handle_request(
|
||||
region_id,
|
||||
RegionRequest::Flush(RegionFlushRequest {
|
||||
row_group_size: None,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
if register_result.is_busy() {
|
||||
warn!("Another flush task is running for the region: {region_id}");
|
||||
}
|
||||
|
||||
let mut watcher = register_result.into_watcher();
|
||||
match self.flush_tasks.wait_until_finish(&mut watcher).await {
|
||||
Ok(()) => Ok(()),
|
||||
Err(err) => Err(UnexpectedSnafu {
|
||||
violated: format!("Flush task failed: {err:?}"),
|
||||
}
|
||||
.build()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Unified handler for FlushRegions with all flush semantics.
|
||||
pub(crate) fn handle_flush_regions_instruction(
|
||||
self,
|
||||
flush_regions: FlushRegions,
|
||||
) -> BoxFuture<'static, Option<InstructionReply>> {
|
||||
Box::pin(async move {
|
||||
let start_time = Instant::now();
|
||||
for region_id in &flush_regions.region_ids {
|
||||
let request = RegionRequest::Flush(RegionFlushRequest {
|
||||
row_group_size: None,
|
||||
});
|
||||
let result = self.region_server.handle_request(*region_id, request).await;
|
||||
match result {
|
||||
Ok(_) => {}
|
||||
Err(error::Error::RegionNotFound { .. }) => {
|
||||
warn!(
|
||||
"Received a flush region instruction from meta, but target region: {} is not found.",
|
||||
region_id
|
||||
);
|
||||
}
|
||||
Err(err) => {
|
||||
warn!("Failed to flush region: {}, error: {}", region_id, err);
|
||||
}
|
||||
}
|
||||
}
|
||||
let elapsed = start_time.elapsed();
|
||||
debug!(
|
||||
"Flush regions: {:?}, elapsed: {:?}",
|
||||
flush_regions.region_ids, elapsed
|
||||
);
|
||||
None
|
||||
})
|
||||
}
|
||||
let strategy = flush_regions.strategy;
|
||||
let region_ids = flush_regions.region_ids;
|
||||
let error_strategy = flush_regions.error_strategy;
|
||||
|
||||
pub(crate) fn handle_flush_region_instruction(
|
||||
self,
|
||||
region_id: RegionId,
|
||||
) -> BoxFuture<'static, Option<InstructionReply>> {
|
||||
Box::pin(async move {
|
||||
let Some(writable) = self.region_server.is_region_leader(region_id) else {
|
||||
return Some(InstructionReply::FlushRegion(SimpleReply {
|
||||
result: false,
|
||||
error: Some("Region is not leader".to_string()),
|
||||
}));
|
||||
let reply = if matches!(strategy, FlushStrategy::Async) {
|
||||
// Asynchronous hint mode: fire-and-forget, no reply expected
|
||||
self.handle_flush_hint(region_ids).await;
|
||||
None
|
||||
} else {
|
||||
// Synchronous mode: return reply with results
|
||||
let reply = self.handle_flush_sync(region_ids, error_strategy).await;
|
||||
Some(InstructionReply::FlushRegions(reply))
|
||||
};
|
||||
|
||||
if !writable {
|
||||
return Some(InstructionReply::FlushRegion(SimpleReply {
|
||||
result: false,
|
||||
error: Some("Region is not writable".to_string()),
|
||||
}));
|
||||
}
|
||||
let elapsed = start_time.elapsed();
|
||||
debug!(
|
||||
"FlushRegions strategy: {:?}, elapsed: {:?}, reply: {:?}",
|
||||
strategy, elapsed, reply
|
||||
);
|
||||
|
||||
let region_server_moved = self.region_server.clone();
|
||||
let register_result = self
|
||||
.flush_tasks
|
||||
.try_register(
|
||||
region_id,
|
||||
Box::pin(async move {
|
||||
let request = RegionRequest::Flush(RegionFlushRequest {
|
||||
row_group_size: None,
|
||||
});
|
||||
region_server_moved
|
||||
.handle_request(region_id, request)
|
||||
.await?;
|
||||
Ok(())
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
if register_result.is_busy() {
|
||||
warn!("Another flush task is running for the region: {region_id}");
|
||||
}
|
||||
let mut watcher = register_result.into_watcher();
|
||||
let result = self.flush_tasks.wait_until_finish(&mut watcher).await;
|
||||
match result {
|
||||
Ok(()) => Some(InstructionReply::FlushRegion(SimpleReply {
|
||||
result: true,
|
||||
error: None,
|
||||
})),
|
||||
Err(err) => Some(InstructionReply::FlushRegion(SimpleReply {
|
||||
result: false,
|
||||
error: Some(format!("{err:?}")),
|
||||
})),
|
||||
}
|
||||
reply
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -115,7 +172,7 @@ impl HandlerContext {
|
||||
mod tests {
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use common_meta::instruction::FlushRegions;
|
||||
use common_meta::instruction::{FlushErrorStrategy, FlushRegions};
|
||||
use mito2::engine::MITO_ENGINE_NAME;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
@@ -123,7 +180,7 @@ mod tests {
|
||||
use crate::tests::{mock_region_server, MockRegionEngine};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_handle_flush_region_instruction() {
|
||||
async fn test_handle_flush_region_hint() {
|
||||
let flushed_region_ids: Arc<RwLock<Vec<RegionId>>> = Arc::new(RwLock::new(Vec::new()));
|
||||
|
||||
let mock_region_server = mock_region_server();
|
||||
@@ -142,23 +199,138 @@ mod tests {
|
||||
}
|
||||
let handler_context = HandlerContext::new_for_test(mock_region_server);
|
||||
|
||||
// Async hint mode
|
||||
let flush_instruction = FlushRegions::async_batch(region_ids.clone());
|
||||
let reply = handler_context
|
||||
.clone()
|
||||
.handle_flush_regions_instruction(FlushRegions {
|
||||
region_ids: region_ids.clone(),
|
||||
})
|
||||
.handle_flush_regions_instruction(flush_instruction)
|
||||
.await;
|
||||
assert!(reply.is_none());
|
||||
assert!(reply.is_none()); // Hint mode returns no reply
|
||||
assert_eq!(*flushed_region_ids.read().unwrap(), region_ids);
|
||||
|
||||
// Non-existent regions
|
||||
flushed_region_ids.write().unwrap().clear();
|
||||
let not_found_region_ids = (0..2).map(|i| RegionId::new(2048, i)).collect::<Vec<_>>();
|
||||
let flush_instruction = FlushRegions::async_batch(not_found_region_ids);
|
||||
let reply = handler_context
|
||||
.handle_flush_regions_instruction(FlushRegions {
|
||||
region_ids: not_found_region_ids.clone(),
|
||||
})
|
||||
.handle_flush_regions_instruction(flush_instruction)
|
||||
.await;
|
||||
assert!(reply.is_none());
|
||||
assert!(flushed_region_ids.read().unwrap().is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_handle_flush_region_sync_single() {
|
||||
let flushed_region_ids: Arc<RwLock<Vec<RegionId>>> = Arc::new(RwLock::new(Vec::new()));
|
||||
|
||||
let mock_region_server = mock_region_server();
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
|
||||
let flushed_region_ids_ref = flushed_region_ids.clone();
|
||||
let (mock_engine, _) =
|
||||
MockRegionEngine::with_custom_apply_fn(MITO_ENGINE_NAME, move |region_engine| {
|
||||
region_engine.handle_request_mock_fn = Some(Box::new(move |region_id, _request| {
|
||||
flushed_region_ids_ref.write().unwrap().push(region_id);
|
||||
Ok(0)
|
||||
}))
|
||||
});
|
||||
mock_region_server.register_test_region(region_id, mock_engine);
|
||||
let handler_context = HandlerContext::new_for_test(mock_region_server);
|
||||
|
||||
let flush_instruction = FlushRegions::sync_single(region_id);
|
||||
let reply = handler_context
|
||||
.handle_flush_regions_instruction(flush_instruction)
|
||||
.await;
|
||||
|
||||
assert!(reply.is_some());
|
||||
if let Some(InstructionReply::FlushRegions(flush_reply)) = reply {
|
||||
assert!(flush_reply.overall_success);
|
||||
assert_eq!(flush_reply.results.len(), 1);
|
||||
assert_eq!(flush_reply.results[0].0, region_id);
|
||||
assert!(flush_reply.results[0].1.is_ok());
|
||||
} else {
|
||||
panic!("Expected FlushRegions reply");
|
||||
}
|
||||
|
||||
assert_eq!(*flushed_region_ids.read().unwrap(), vec![region_id]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_handle_flush_region_sync_batch_fail_fast() {
|
||||
let flushed_region_ids: Arc<RwLock<Vec<RegionId>>> = Arc::new(RwLock::new(Vec::new()));
|
||||
|
||||
let mock_region_server = mock_region_server();
|
||||
let region_ids = vec![
|
||||
RegionId::new(1024, 1),
|
||||
RegionId::new(1024, 2),
|
||||
RegionId::new(1024, 3),
|
||||
];
|
||||
|
||||
// Register only the first region, others will fail
|
||||
let flushed_region_ids_ref = flushed_region_ids.clone();
|
||||
let (mock_engine, _) =
|
||||
MockRegionEngine::with_custom_apply_fn(MITO_ENGINE_NAME, move |region_engine| {
|
||||
region_engine.handle_request_mock_fn = Some(Box::new(move |region_id, _request| {
|
||||
flushed_region_ids_ref.write().unwrap().push(region_id);
|
||||
Ok(0)
|
||||
}))
|
||||
});
|
||||
mock_region_server.register_test_region(region_ids[0], mock_engine);
|
||||
let handler_context = HandlerContext::new_for_test(mock_region_server);
|
||||
|
||||
// Sync batch with fail-fast strategy
|
||||
let flush_instruction =
|
||||
FlushRegions::sync_batch(region_ids.clone(), FlushErrorStrategy::FailFast);
|
||||
let reply = handler_context
|
||||
.handle_flush_regions_instruction(flush_instruction)
|
||||
.await;
|
||||
|
||||
assert!(reply.is_some());
|
||||
if let Some(InstructionReply::FlushRegions(flush_reply)) = reply {
|
||||
assert!(!flush_reply.overall_success); // Should fail due to non-existent regions
|
||||
// With fail-fast, only process regions until first failure
|
||||
assert!(flush_reply.results.len() <= region_ids.len());
|
||||
} else {
|
||||
panic!("Expected FlushRegions reply");
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_handle_flush_region_sync_batch_try_all() {
|
||||
let flushed_region_ids: Arc<RwLock<Vec<RegionId>>> = Arc::new(RwLock::new(Vec::new()));
|
||||
|
||||
let mock_region_server = mock_region_server();
|
||||
let region_ids = vec![RegionId::new(1024, 1), RegionId::new(1024, 2)];
|
||||
|
||||
// Register only the first region
|
||||
let flushed_region_ids_ref = flushed_region_ids.clone();
|
||||
let (mock_engine, _) =
|
||||
MockRegionEngine::with_custom_apply_fn(MITO_ENGINE_NAME, move |region_engine| {
|
||||
region_engine.handle_request_mock_fn = Some(Box::new(move |region_id, _request| {
|
||||
flushed_region_ids_ref.write().unwrap().push(region_id);
|
||||
Ok(0)
|
||||
}))
|
||||
});
|
||||
mock_region_server.register_test_region(region_ids[0], mock_engine);
|
||||
let handler_context = HandlerContext::new_for_test(mock_region_server);
|
||||
|
||||
// Sync batch with try-all strategy
|
||||
let flush_instruction =
|
||||
FlushRegions::sync_batch(region_ids.clone(), FlushErrorStrategy::TryAll);
|
||||
let reply = handler_context
|
||||
.handle_flush_regions_instruction(flush_instruction)
|
||||
.await;
|
||||
|
||||
assert!(reply.is_some());
|
||||
if let Some(InstructionReply::FlushRegions(flush_reply)) = reply {
|
||||
assert!(!flush_reply.overall_success); // Should fail due to one non-existent region
|
||||
// With try-all, should process all regions
|
||||
assert_eq!(flush_reply.results.len(), region_ids.len());
|
||||
// First should succeed, second should fail
|
||||
assert!(flush_reply.results[0].1.is_ok());
|
||||
assert!(flush_reply.results[1].1.is_err());
|
||||
} else {
|
||||
panic!("Expected FlushRegions reply");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user