feat: adapt region keep aliver for region server (#2333)

* basic impl

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* refactor, collapse one layer

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove old heartbeat handler impls

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove old region alive keeper

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove remote catalog manager

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* global replace

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* test countdown task

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Ruihang Xia
2023-09-06 04:52:46 -05:00
parent dac6b2e80a
commit f287a5db9f
12 changed files with 549 additions and 2518 deletions

View File

@@ -0,0 +1,467 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::future::Future;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use async_trait::async_trait;
use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode;
use common_meta::error::InvalidProtoMsgSnafu;
use common_meta::heartbeat::handler::{
HandleControl, HeartbeatResponseHandler, HeartbeatResponseHandlerContext,
};
use common_telemetry::{debug, error, info, trace, warn};
use snafu::OptionExt;
use store_api::region_request::{RegionCloseRequest, RegionRequest};
use store_api::storage::RegionId;
#[cfg(test)]
use tokio::sync::oneshot;
use tokio::sync::{mpsc, Mutex};
use tokio::task::JoinHandle;
use tokio::time::{Duration, Instant};
use crate::region_server::RegionServer;
const MAX_CLOSE_RETRY_TIMES: usize = 10;
/// [RegionAliveKeeper] manages all [CountdownTaskHandle]s.
///
/// [RegionAliveKeeper] starts a [CountdownTask] for each region. When deadline is reached,
/// the region will be closed.
/// The deadline is controlled by Metasrv. It works like "lease" for regions: a Datanode submits its
/// opened regions to Metasrv, in heartbeats. If Metasrv decides some region could be resided in this
/// Datanode, it will "extend" the region's "lease", with a deadline for [RegionAliveKeeper] to
/// countdown.
pub struct RegionAliveKeeper {
region_server: RegionServer,
tasks: Arc<Mutex<HashMap<RegionId, Arc<CountdownTaskHandle>>>>,
heartbeat_interval_millis: u64,
started: AtomicBool,
/// The epoch when [RegionAliveKeepers] is created. It's used to get a monotonically non-decreasing
/// elapsed time when submitting heartbeats to Metasrv (because [Instant] is monotonically
/// non-decreasing). The heartbeat request will carry the duration since this epoch, and the
/// duration acts like an "invariant point" for region's keep alive lease.
epoch: Instant,
}
impl RegionAliveKeeper {
pub fn new(region_server: RegionServer, heartbeat_interval_millis: u64) -> Self {
Self {
region_server,
tasks: Arc::new(Mutex::new(HashMap::new())),
heartbeat_interval_millis,
started: AtomicBool::new(false),
epoch: Instant::now(),
}
}
async fn find_handle(&self, region_id: RegionId) -> Option<Arc<CountdownTaskHandle>> {
self.tasks.lock().await.get(&region_id).cloned()
}
pub async fn register_region(&self, region_id: RegionId) {
if self.find_handle(region_id).await.is_some() {
return;
}
let tasks = Arc::downgrade(&self.tasks);
let on_task_finished = async move {
if let Some(x) = tasks.upgrade() {
let _ = x.lock().await.remove(&region_id);
} // Else the countdown task handles map could be dropped because the keeper is dropped.
};
let handle = Arc::new(CountdownTaskHandle::new(
self.region_server.clone(),
region_id,
move |result: Option<bool>| {
info!(
"Deregister region: {region_id} after countdown task finished, result: {result:?}",
);
on_task_finished
},
));
let mut handles = self.tasks.lock().await;
let _ = handles.insert(region_id, handle.clone());
if self.started.load(Ordering::Relaxed) {
handle.start(self.heartbeat_interval_millis).await;
info!("Region alive countdown for region {region_id} is started!",);
} else {
info!(
"Region alive countdown for region {region_id} is registered but not started yet!",
);
}
}
pub async fn deregister_region(&self, region_id: RegionId) {
if self.tasks.lock().await.remove(&region_id).is_some() {
info!("Deregister alive countdown for region {region_id}")
}
}
async fn keep_lived(&self, designated_regions: Vec<RegionId>, deadline: Instant) {
for region_id in designated_regions {
if let Some(handle) = self.find_handle(region_id).await {
handle.reset_deadline(deadline).await;
}
// Else the region alive keeper might be triggered by lagging messages, we can safely ignore it.
}
}
#[cfg(test)]
async fn deadline(&self, region_id: RegionId) -> Option<Instant> {
let mut deadline = None;
if let Some(handle) = self.find_handle(region_id).await {
let (s, r) = oneshot::channel();
if handle.tx.send(CountdownCommand::Deadline(s)).await.is_ok() {
deadline = r.await.ok()
}
}
deadline
}
pub async fn start(&self) {
let tasks = self.tasks.lock().await;
for task in tasks.values() {
task.start(self.heartbeat_interval_millis).await;
}
self.started.store(true, Ordering::Relaxed);
info!(
"RegionAliveKeeper is started with region {:?}",
tasks.keys().map(|x| x.to_string()).collect::<Vec<_>>(),
);
}
pub fn epoch(&self) -> Instant {
self.epoch
}
}
#[async_trait]
impl HeartbeatResponseHandler for RegionAliveKeeper {
fn is_acceptable(&self, ctx: &HeartbeatResponseHandlerContext) -> bool {
ctx.response.region_lease.is_some()
}
async fn handle(
&self,
ctx: &mut HeartbeatResponseHandlerContext,
) -> common_meta::error::Result<HandleControl> {
let region_lease = ctx
.response
.region_lease
.as_ref()
.context(InvalidProtoMsgSnafu {
err_msg: "'region_lease' is missing in heartbeat response",
})?;
let start_instant = self.epoch + Duration::from_millis(region_lease.duration_since_epoch);
let deadline = start_instant + Duration::from_secs(region_lease.lease_seconds);
let region_ids = region_lease
.region_ids
.iter()
.map(|id| RegionId::from_u64(*id))
.collect();
self.keep_lived(region_ids, deadline).await;
Ok(HandleControl::Continue)
}
}
#[derive(Debug)]
enum CountdownCommand {
/// Start this countdown task. The first deadline will be set to
/// 4 * `heartbeat_interval_millis`
Start(u64),
/// Reset countdown deadline to the given instance.
Reset(Instant),
/// Returns the current deadline of the countdown task.
#[cfg(test)]
Deadline(oneshot::Sender<Instant>),
}
struct CountdownTaskHandle {
tx: mpsc::Sender<CountdownCommand>,
handler: JoinHandle<()>,
region_id: RegionId,
}
impl CountdownTaskHandle {
/// Creates a new [CountdownTaskHandle] and starts the countdown task.
/// # Params
/// - `on_task_finished`: a callback to be invoked when the task is finished. Note that it will not
/// be invoked if the task is cancelled (by dropping the handle). This is because we want something
/// meaningful to be done when the task is finished, e.g. deregister the handle from the map.
/// While dropping the handle does not necessarily mean the task is finished.
fn new<Fut>(
region_server: RegionServer,
region_id: RegionId,
on_task_finished: impl FnOnce(Option<bool>) -> Fut + Send + 'static,
) -> Self
where
Fut: Future<Output = ()> + Send,
{
let (tx, rx) = mpsc::channel(1024);
let mut countdown_task = CountdownTask {
region_server,
region_id,
rx,
};
let handler = common_runtime::spawn_bg(async move {
let result = countdown_task.run().await;
on_task_finished(result).await;
});
Self {
tx,
handler,
region_id,
}
}
async fn start(&self, heartbeat_interval_millis: u64) {
if let Err(e) = self
.tx
.send(CountdownCommand::Start(heartbeat_interval_millis))
.await
{
warn!(
"Failed to start region alive keeper countdown: {e}. \
Maybe the task is stopped due to region been closed."
);
}
}
#[cfg(test)]
async fn deadline(&self) -> Option<Instant> {
let (tx, rx) = oneshot::channel();
if self.tx.send(CountdownCommand::Deadline(tx)).await.is_ok() {
return rx.await.ok();
}
None
}
async fn reset_deadline(&self, deadline: Instant) {
if let Err(e) = self.tx.send(CountdownCommand::Reset(deadline)).await {
warn!(
"Failed to reset region alive keeper deadline: {e}. \
Maybe the task is stopped due to region been closed."
);
}
}
}
impl Drop for CountdownTaskHandle {
fn drop(&mut self) {
debug!(
"Aborting region alive countdown task for region {}",
self.region_id
);
self.handler.abort();
}
}
struct CountdownTask {
region_server: RegionServer,
region_id: RegionId,
rx: mpsc::Receiver<CountdownCommand>,
}
impl CountdownTask {
// returns true if region closed successfully
async fn run(&mut self) -> Option<bool> {
// 30 years. See `Instant::far_future`.
let far_future = Instant::now() + Duration::from_secs(86400 * 365 * 30);
// Make sure the alive countdown is not gonna happen before heartbeat task is started (the
// "start countdown" command will be sent from heartbeat task).
let countdown = tokio::time::sleep_until(far_future);
tokio::pin!(countdown);
let region_id = self.region_id;
loop {
tokio::select! {
command = self.rx.recv() => {
match command {
Some(CountdownCommand::Start(heartbeat_interval_millis)) => {
// Set first deadline in 4 heartbeats (roughly after 20 seconds from now if heartbeat
// interval is set to default 5 seconds), to make Datanode and Metasrv more tolerable to
// network or other jitters during startup.
let first_deadline = Instant::now() + Duration::from_millis(heartbeat_interval_millis) * 4;
countdown.set(tokio::time::sleep_until(first_deadline));
},
Some(CountdownCommand::Reset(deadline)) => {
if countdown.deadline() < deadline {
trace!(
"Reset deadline of region {region_id} to approximately {} seconds later",
(deadline - Instant::now()).as_secs_f32(),
);
countdown.set(tokio::time::sleep_until(deadline));
}
// Else the countdown could be either:
// - not started yet;
// - during startup protection;
// - received a lagging heartbeat message.
// All can be safely ignored.
},
None => {
info!(
"The handle of countdown task for region {region_id}\
is dropped, RegionAliveKeeper out."
);
break;
},
#[cfg(test)]
Some(CountdownCommand::Deadline(tx)) => {
let _ = tx.send(countdown.deadline());
}
}
}
() = &mut countdown => {
let result = self.close_region().await;
info!(
"Region {region_id} is closed, result: {result:?}. \
RegionAliveKeeper out.",
);
return Some(result);
}
}
}
None
}
/// Returns if the region is closed successfully.
async fn close_region(&self) -> bool {
for retry in 0..MAX_CLOSE_RETRY_TIMES {
let request = RegionRequest::Close(RegionCloseRequest {});
match self
.region_server
.handle_request(self.region_id, request)
.await
{
Ok(_) => return true,
Err(e) if e.status_code() == StatusCode::RegionNotFound => return true,
// If region is failed to close, immediately retry. Maybe we should panic instead?
Err(e) => error!(e;
"Retry {retry}, failed to close region {}. \
For the integrity of data, retry closing and retry without wait.",
self.region_id,
),
}
}
false
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::tests::mock_region_server;
#[tokio::test(flavor = "multi_thread")]
async fn region_alive_keeper() {
let region_server = mock_region_server();
let alive_keeper = RegionAliveKeeper::new(region_server, 300);
let region_id = RegionId::new(1, 2);
// register a region before starting
alive_keeper.register_region(region_id).await;
assert!(alive_keeper.find_handle(region_id).await.is_some());
alive_keeper.start().await;
// started alive keeper should assign deadline to this region
let deadline = alive_keeper.deadline(region_id).await.unwrap();
assert!(deadline >= Instant::now());
// extend lease then sleep
alive_keeper
.keep_lived(vec![region_id], Instant::now() + Duration::from_millis(500))
.await;
tokio::time::sleep(Duration::from_millis(500)).await;
assert!(alive_keeper.find_handle(region_id).await.is_some());
let deadline = alive_keeper.deadline(region_id).await.unwrap();
assert!(deadline >= Instant::now());
// sleep to wait lease expired
tokio::time::sleep(Duration::from_millis(1000)).await;
assert!(alive_keeper.find_handle(region_id).await.is_none());
}
#[tokio::test(flavor = "multi_thread")]
async fn countdown_task() {
let region_server = mock_region_server();
let (tx, rx) = oneshot::channel();
let countdown_handle = CountdownTaskHandle::new(
region_server,
RegionId::new(9999, 2),
|result: Option<bool>| async move {
tx.send((Instant::now(), result)).unwrap();
},
);
// if countdown task is not started, its deadline is set to far future
assert!(
countdown_handle.deadline().await.unwrap()
> Instant::now() + Duration::from_secs(86400 * 365 * 29)
);
// the first deadline should be set to 4 * heartbeat_interval_millis
// we assert it to be greater than 3 * heartbeat_interval_millis to avoid flaky test
let heartbeat_interval_millis = 100;
countdown_handle.start(heartbeat_interval_millis).await;
assert!(
countdown_handle.deadline().await.unwrap()
> Instant::now() + Duration::from_millis(heartbeat_interval_millis * 3)
);
// reset deadline
// a nearer deadline will be ignored
countdown_handle
.reset_deadline(Instant::now() + Duration::from_millis(heartbeat_interval_millis))
.await;
assert!(
countdown_handle.deadline().await.unwrap()
> Instant::now() + Duration::from_millis(heartbeat_interval_millis * 3)
);
// only a farther deadline will be accepted
countdown_handle
.reset_deadline(Instant::now() + Duration::from_millis(heartbeat_interval_millis * 5))
.await;
assert!(
countdown_handle.deadline().await.unwrap()
> Instant::now() + Duration::from_millis(heartbeat_interval_millis * 4)
);
// wait for countdown task to finish
let before_await = Instant::now();
let (finish_instant, result) = rx.await.unwrap();
// the mock region server cannot close the region
assert_eq!(result, Some(false));
// this task should be finished after 5 * heartbeat_interval_millis
// we assert 4 times here
assert!(
finish_instant > before_await + Duration::from_millis(heartbeat_interval_millis * 4)
);
}
}

View File

@@ -17,7 +17,6 @@ use std::sync::Arc;
use std::time::Duration;
use api::v1::meta::{HeartbeatRequest, Peer, RegionStat};
use catalog::remote::region_alive_keeper::RegionAliveKeepers;
use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
use common_meta::heartbeat::handler::{
HandlerGroupExecutor, HeartbeatResponseHandlerContext, HeartbeatResponseHandlerExecutorRef,
@@ -27,11 +26,11 @@ use common_meta::heartbeat::utils::outgoing_message_to_mailbox_message;
use common_telemetry::{debug, error, info, trace, warn};
use meta_client::client::{HeartbeatSender, MetaClient};
use snafu::{OptionExt, ResultExt};
use table::engine::manager::MemoryTableEngineManager;
use tokio::sync::mpsc;
use tokio::time::Instant;
use self::handler::RegionHeartbeatResponseHandler;
use crate::alive_keeper::RegionAliveKeeper;
use crate::datanode::DatanodeOptions;
use crate::error::{
self, MetaClientInitSnafu, MissingMetasrvOptsSnafu, MissingNodeIdSnafu, Result,
@@ -51,7 +50,7 @@ pub struct HeartbeatTask {
region_server: RegionServer,
interval: u64,
resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
region_alive_keepers: Arc<RegionAliveKeepers>,
region_alive_keeper: Arc<RegionAliveKeeper>,
}
impl Drop for HeartbeatTask {
@@ -77,14 +76,14 @@ impl HeartbeatTask {
let region_server = region_server.unwrap();
let region_alive_keepers = Arc::new(RegionAliveKeepers::new(
Arc::new(MemoryTableEngineManager::new_empty()),
let region_alive_keeper = Arc::new(RegionAliveKeeper::new(
region_server.clone(),
opts.heartbeat.interval_millis,
));
let resp_handler_executor = Arc::new(HandlerGroupExecutor::new(vec![
Arc::new(ParseMailboxMessageHandler),
Arc::new(RegionHeartbeatResponseHandler::new(region_server.clone())),
region_alive_keepers.clone(),
region_alive_keeper.clone(),
]));
Ok(Self {
@@ -98,7 +97,7 @@ impl HeartbeatTask {
region_server,
interval: opts.heartbeat.interval_millis,
resp_handler_executor,
region_alive_keepers,
region_alive_keeper,
})
}
@@ -163,7 +162,7 @@ impl HeartbeatTask {
let addr = resolve_addr(&self.server_addr, &self.server_hostname);
info!("Starting heartbeat to Metasrv with interval {interval}. My node id is {node_id}, address is {addr}.");
self.region_alive_keepers.start().await;
self.region_alive_keeper.start().await;
let meta_client = self.meta_client.clone();
let region_server_clone = self.region_server.clone();
@@ -181,7 +180,7 @@ impl HeartbeatTask {
)
.await?;
let epoch = self.region_alive_keepers.epoch();
let epoch = self.region_alive_keeper.epoch();
common_runtime::spawn_bg(async move {
let sleep = tokio::time::sleep(Duration::from_millis(0));
tokio::pin!(sleep);

View File

@@ -30,9 +30,6 @@ use store_api::storage::RegionId;
use crate::error::Result;
use crate::region_server::RegionServer;
pub mod close_region;
pub mod open_region;
/// Handler for [Instruction::OpenRegion] and [Instruction::CloseRegion].
#[derive(Clone)]
pub struct RegionHeartbeatResponseHandler {

View File

@@ -1,235 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use async_trait::async_trait;
use catalog::remote::region_alive_keeper::RegionAliveKeepers;
use catalog::{CatalogManagerRef, DeregisterTableRequest};
use common_catalog::format_full_table_name;
use common_meta::error::Result as MetaResult;
use common_meta::heartbeat::handler::{
HandleControl, HeartbeatResponseHandler, HeartbeatResponseHandlerContext,
};
use common_meta::instruction::{Instruction, InstructionReply, SimpleReply};
use common_meta::RegionIdent;
use common_telemetry::{error, info, warn};
use snafu::ResultExt;
use store_api::storage::RegionNumber;
use table::engine::manager::TableEngineManagerRef;
use table::engine::{CloseTableResult, EngineContext, TableReference};
use table::requests::CloseTableRequest;
use crate::error::{self, Result};
#[derive(Clone)]
pub struct CloseRegionHandler {
catalog_manager: CatalogManagerRef,
table_engine_manager: TableEngineManagerRef,
region_alive_keepers: Arc<RegionAliveKeepers>,
}
#[async_trait]
impl HeartbeatResponseHandler for CloseRegionHandler {
fn is_acceptable(&self, ctx: &HeartbeatResponseHandlerContext) -> bool {
matches!(
ctx.incoming_message.as_ref(),
Some((_, Instruction::CloseRegion { .. }))
)
}
async fn handle(&self, ctx: &mut HeartbeatResponseHandlerContext) -> MetaResult<HandleControl> {
let Some((meta, Instruction::CloseRegion(region_ident))) = ctx.incoming_message.take()
else {
unreachable!("CloseRegionHandler: should be guarded by 'is_acceptable'");
};
let mailbox = ctx.mailbox.clone();
let self_ref = Arc::new(self.clone());
let _handle = common_runtime::spawn_bg(async move {
let result = self_ref.close_region_inner(region_ident).await;
if let Err(e) = mailbox
.send((meta, CloseRegionHandler::map_result(result)))
.await
{
error!(e; "Failed to send reply to mailbox");
}
});
Ok(HandleControl::Done)
}
}
impl CloseRegionHandler {
pub fn new(
catalog_manager: CatalogManagerRef,
table_engine_manager: TableEngineManagerRef,
region_alive_keepers: Arc<RegionAliveKeepers>,
) -> Self {
Self {
catalog_manager,
table_engine_manager,
region_alive_keepers,
}
}
fn map_result(result: Result<bool>) -> InstructionReply {
result.map_or_else(
|error| {
InstructionReply::CloseRegion(SimpleReply {
result: false,
error: Some(error.to_string()),
})
},
|result| {
InstructionReply::CloseRegion(SimpleReply {
result,
error: None,
})
},
)
}
/// Returns true if a table or target regions have been closed.
async fn regions_closed(
&self,
catalog_name: &str,
schema_name: &str,
table_name: &str,
region_numbers: &[RegionNumber],
) -> Result<bool> {
if let Some(table) = self
.catalog_manager
.table(catalog_name, schema_name, table_name)
.await
.context(error::AccessCatalogSnafu)?
{
for r in region_numbers {
let region_exist =
table
.contains_region(*r)
.with_context(|_| error::CheckRegionSnafu {
table_name: format_full_table_name(
catalog_name,
schema_name,
table_name,
),
region_number: *r,
})?;
if region_exist {
return Ok(false);
}
}
}
// Returns true if table not exist
Ok(true)
}
async fn close_region_inner(&self, region_ident: RegionIdent) -> Result<bool> {
let table_ident = &region_ident.table_ident;
let engine_name = &table_ident.engine;
let engine = self
.table_engine_manager
.engine(engine_name)
.context(error::TableEngineNotFoundSnafu { engine_name })?;
let ctx = EngineContext::default();
let table_ref = &TableReference::full(
&table_ident.catalog,
&table_ident.schema,
&table_ident.table,
);
let region_numbers = vec![region_ident.region_number];
if self
.regions_closed(
table_ref.catalog,
table_ref.schema,
table_ref.table,
&region_numbers,
)
.await?
{
return Ok(true);
}
if engine
.get_table(&ctx, region_ident.table_ident.table_id)
.with_context(|_| error::GetTableSnafu {
table_name: table_ref.to_string(),
})?
.is_some()
{
return match engine
.close_table(
&ctx,
CloseTableRequest {
catalog_name: table_ref.catalog.to_string(),
schema_name: table_ref.schema.to_string(),
table_name: table_ref.table.to_string(),
region_numbers: region_numbers.clone(),
table_id: region_ident.table_ident.table_id,
flush: true,
},
)
.await
.with_context(|_| error::CloseTableSnafu {
table_name: table_ref.to_string(),
region_numbers: region_numbers.clone(),
})? {
CloseTableResult::NotFound | CloseTableResult::Released(_) => {
// Deregister table if The table released.
self.deregister_table(table_ref).await?;
let _ = self
.region_alive_keepers
.deregister_table(table_ident)
.await;
Ok(true)
}
CloseTableResult::PartialClosed(regions) => {
// Requires caller to update the region_numbers
info!(
"Close partial regions: {:?} in table: {}",
regions, table_ref
);
self.region_alive_keepers
.deregister_region(&region_ident)
.await;
Ok(true)
}
};
}
warn!("Trying to close a non-existing table: {}", table_ref);
// Table doesn't exist
Ok(true)
}
async fn deregister_table(&self, table_ref: &TableReference<'_>) -> Result<()> {
self.catalog_manager
.deregister_table(DeregisterTableRequest {
catalog: table_ref.catalog.to_string(),
schema: table_ref.schema.to_string(),
table_name: table_ref.table.to_string(),
})
.await
.with_context(|_| error::DeregisterTableSnafu {
table_name: table_ref.to_string(),
})
}
}

View File

@@ -1,250 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use async_trait::async_trait;
use catalog::error::{Error as CatalogError, Result as CatalogResult};
use catalog::remote::region_alive_keeper::RegionAliveKeepers;
use catalog::{CatalogManagerRef, RegisterSchemaRequest, RegisterTableRequest};
use common_catalog::format_full_table_name;
use common_meta::error::Result as MetaResult;
use common_meta::heartbeat::handler::{
HandleControl, HeartbeatResponseHandler, HeartbeatResponseHandlerContext,
};
use common_meta::instruction::{Instruction, InstructionReply, SimpleReply};
use common_telemetry::{error, warn};
use snafu::ResultExt;
use store_api::storage::RegionNumber;
use table::engine::manager::TableEngineManagerRef;
use table::engine::EngineContext;
use table::requests::OpenTableRequest;
use table::Table;
use crate::error::{self, Result};
#[derive(Clone)]
pub struct OpenRegionHandler {
catalog_manager: CatalogManagerRef,
table_engine_manager: TableEngineManagerRef,
region_alive_keepers: Arc<RegionAliveKeepers>,
}
#[async_trait]
impl HeartbeatResponseHandler for OpenRegionHandler {
fn is_acceptable(&self, ctx: &HeartbeatResponseHandlerContext) -> bool {
matches!(
ctx.incoming_message,
Some((_, Instruction::OpenRegion { .. }))
)
}
async fn handle(&self, ctx: &mut HeartbeatResponseHandlerContext) -> MetaResult<HandleControl> {
let Some((meta, Instruction::OpenRegion(region_ident))) = ctx.incoming_message.take()
else {
unreachable!("OpenRegionHandler: should be guarded by 'is_acceptable'");
};
let mailbox = ctx.mailbox.clone();
let self_ref = Arc::new(self.clone());
let region_alive_keepers = self.region_alive_keepers.clone();
let _handle = common_runtime::spawn_bg(async move {
let table_ident = &region_ident.table_ident;
let request = OpenTableRequest {
catalog_name: table_ident.catalog.clone(),
schema_name: table_ident.schema.clone(),
table_name: table_ident.table.clone(),
table_id: table_ident.table_id,
region_numbers: vec![region_ident.region_number],
};
let result = self_ref
.open_region_inner(table_ident.engine.clone(), request)
.await;
if matches!(result, Ok(true)) {
region_alive_keepers.register_region(&region_ident).await;
}
if let Err(e) = mailbox
.send((meta, OpenRegionHandler::map_result(result)))
.await
{
error!(e; "Failed to send reply to mailbox");
}
});
Ok(HandleControl::Done)
}
}
impl OpenRegionHandler {
pub fn new(
catalog_manager: CatalogManagerRef,
table_engine_manager: TableEngineManagerRef,
region_alive_keepers: Arc<RegionAliveKeepers>,
) -> Self {
Self {
catalog_manager,
table_engine_manager,
region_alive_keepers,
}
}
fn map_result(result: Result<bool>) -> InstructionReply {
result.map_or_else(
|error| {
InstructionReply::OpenRegion(SimpleReply {
result: false,
error: Some(error.to_string()),
})
},
|result| {
InstructionReply::OpenRegion(SimpleReply {
result,
error: None,
})
},
)
}
/// Returns true if a table or target regions have been opened.
async fn regions_opened(
&self,
catalog_name: &str,
schema_name: &str,
table_name: &str,
region_numbers: &[RegionNumber],
) -> Result<bool> {
if let Some(table) = self
.catalog_manager
.table(catalog_name, schema_name, table_name)
.await
.context(error::AccessCatalogSnafu)?
{
for r in region_numbers {
let region_exist =
table
.contains_region(*r)
.with_context(|_| error::CheckRegionSnafu {
table_name: format_full_table_name(
catalog_name,
schema_name,
table_name,
),
region_number: *r,
})?;
if !region_exist {
warn!(
"Failed to check table: {}, region: {} does not exist",
format_full_table_name(catalog_name, schema_name, table_name,),
r
);
return Ok(false);
}
}
return Ok(true);
}
Ok(false)
}
async fn register_table(
&self,
request: &OpenTableRequest,
table: Arc<dyn Table>,
) -> CatalogResult<bool> {
if !self
.catalog_manager
.catalog_exist(&request.catalog_name)
.await?
{
self.catalog_manager
.clone()
.register_catalog(request.catalog_name.to_string())
.await?;
}
if !self
.catalog_manager
.schema_exist(&request.catalog_name, &request.schema_name)
.await?
{
self.catalog_manager
.register_schema(RegisterSchemaRequest {
catalog: request.catalog_name.to_string(),
schema: request.schema_name.to_string(),
})
.await?;
}
let request = RegisterTableRequest {
catalog: request.catalog_name.to_string(),
schema: request.schema_name.to_string(),
table_name: request.table_name.to_string(),
table_id: request.table_id,
table,
};
self.catalog_manager.register_table(request).await
}
async fn open_region_inner(&self, engine: String, request: OpenTableRequest) -> Result<bool> {
let OpenTableRequest {
catalog_name,
schema_name,
table_name,
region_numbers,
..
} = &request;
let engine =
self.table_engine_manager
.engine(&engine)
.context(error::TableEngineNotFoundSnafu {
engine_name: &engine,
})?;
let ctx = EngineContext::default();
if self
.regions_opened(catalog_name, schema_name, table_name, region_numbers)
.await?
{
return Ok(true);
}
if let Some(table) = engine
.open_table(&ctx, request.clone())
.await
.with_context(|_| error::OpenTableSnafu {
table_name: format_full_table_name(catalog_name, schema_name, table_name),
})?
{
let result = self.register_table(&request, table).await;
match result {
Ok(_) | Err(CatalogError::TableExists { .. }) => Ok(true),
e => e.with_context(|_| error::RegisterTableSnafu {
table_name: format_full_table_name(catalog_name, schema_name, table_name),
}),
}
} else {
// Case 1:
// TODO(weny): Fix/Cleanup the broken table manifest
// The manifest writing operation should be atomic.
// Therefore, we won't meet this case, in theory.
// Case 2: The target region was not found in table meta
// Case 3: The table not exist
Ok(false)
}
}
}

View File

@@ -18,8 +18,7 @@ use std::time::Duration;
use std::{fs, path};
use api::v1::meta::Role;
use catalog::remote::region_alive_keeper::RegionAliveKeepers;
use catalog::remote::{CachedMetaKvBackend, RemoteCatalogManager};
use catalog::local::MemoryCatalogManager;
use catalog::CatalogManagerRef;
use common_base::Plugins;
use common_catalog::consts::DEFAULT_CATALOG_NAME;
@@ -29,7 +28,6 @@ use common_greptimedb_telemetry::GreptimeDBTelemetryTask;
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
use common_meta::heartbeat::handler::HandlerGroupExecutor;
use common_meta::key::TableMetadataManager;
use common_procedure::local::{LocalManager, ManagerConfig};
use common_procedure::store::state_store::ObjectStateStore;
use common_procedure::ProcedureManagerRef;
@@ -51,7 +49,7 @@ use storage::scheduler::{LocalScheduler, SchedulerConfig};
use storage::EngineImpl;
use store_api::logstore::LogStore;
use store_api::path_utils::{CLUSTER_DIR, WAL_DIR};
use table::engine::manager::{MemoryTableEngineManager, TableEngineManagerRef};
use table::engine::manager::MemoryTableEngineManager;
use table::engine::{TableEngine, TableEngineProcedureRef};
use table::requests::FlushTableRequest;
use table::table::TableIdProviderRef;
@@ -63,8 +61,6 @@ use crate::error::{
ShutdownInstanceSnafu, StartProcedureManagerSnafu, StopProcedureManagerSnafu,
};
use crate::greptimedb_telemetry::get_greptimedb_telemetry_task;
use crate::heartbeat::handler::close_region::CloseRegionHandler;
use crate::heartbeat::handler::open_region::OpenRegionHandler;
use crate::heartbeat::HeartbeatTask;
use crate::row_inserter::RowInserter;
use crate::sql::{SqlHandler, SqlRequest};
@@ -115,9 +111,6 @@ impl Instance {
fn build_heartbeat_task(
opts: &DatanodeOptions,
meta_client: Option<Arc<MetaClient>>,
catalog_manager: CatalogManagerRef,
engine_manager: TableEngineManagerRef,
region_alive_keepers: Option<Arc<RegionAliveKeepers>>,
) -> Result<Option<HeartbeatTask>> {
Ok(match opts.mode {
Mode::Standalone => None,
@@ -126,24 +119,8 @@ impl Instance {
let _meta_client = meta_client.context(IncorrectInternalStateSnafu {
state: "meta client is not provided when building heartbeat task",
})?;
let region_alive_keepers =
region_alive_keepers.context(IncorrectInternalStateSnafu {
state: "region_alive_keepers is not provided when building heartbeat task",
})?;
let _handlers_executor = HandlerGroupExecutor::new(vec![
Arc::new(ParseMailboxMessageHandler),
Arc::new(OpenRegionHandler::new(
catalog_manager.clone(),
engine_manager.clone(),
region_alive_keepers.clone(),
)),
Arc::new(CloseRegionHandler::new(
catalog_manager.clone(),
engine_manager,
region_alive_keepers.clone(),
)),
region_alive_keepers.clone(),
]);
let _handlers_executor =
HandlerGroupExecutor::new(vec![Arc::new(ParseMailboxMessageHandler)]);
todo!("remove this method")
}
@@ -199,7 +176,7 @@ impl Instance {
);
// create remote catalog manager
let (catalog_manager, table_id_provider, region_alive_keepers) = match opts.mode {
let (catalog_manager, table_id_provider) = match opts.mode {
Mode::Standalone => {
let catalog = Arc::new(
catalog::local::LocalCatalogManager::try_new(engine_manager.clone())
@@ -210,35 +187,13 @@ impl Instance {
(
catalog.clone() as CatalogManagerRef,
Some(catalog as TableIdProviderRef),
None,
)
}
Mode::Distributed => {
let meta_client = meta_client.clone().context(IncorrectInternalStateSnafu {
state: "meta client is not provided when creating distributed Datanode",
})?;
let kv_backend = Arc::new(CachedMetaKvBackend::new(meta_client));
let region_alive_keepers = Arc::new(RegionAliveKeepers::new(
engine_manager.clone(),
opts.heartbeat.interval_millis,
));
let catalog_manager = Arc::new(RemoteCatalogManager::new(
engine_manager.clone(),
opts.node_id.context(MissingNodeIdSnafu)?,
region_alive_keepers.clone(),
Arc::new(TableMetadataManager::new(kv_backend)),
));
(
catalog_manager as CatalogManagerRef,
None,
Some(region_alive_keepers),
)
}
Mode::Distributed => (
MemoryCatalogManager::with_default_setup() as CatalogManagerRef,
None,
),
};
let factory =
@@ -285,13 +240,7 @@ impl Instance {
greptimedb_telemetry_task,
});
let heartbeat_task = Instance::build_heartbeat_task(
opts,
meta_client,
catalog_manager,
engine_manager,
region_alive_keepers,
)?;
let heartbeat_task = Instance::build_heartbeat_task(opts, meta_client)?;
Ok((instance, heartbeat_task))
}

View File

@@ -15,6 +15,7 @@
#![feature(assert_matches)]
#![feature(trait_upcasting)]
pub mod alive_keeper;
pub mod datanode;
pub mod error;
mod greptimedb_telemetry;
@@ -29,4 +30,5 @@ pub mod server;
pub mod sql;
mod store;
#[cfg(test)]
#[allow(dead_code)]
mod tests;

View File

@@ -12,36 +12,40 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::assert_matches::assert_matches;
use std::any::Any;
use std::sync::Arc;
use std::time::Duration;
use api::v1::greptime_request::Request as GrpcRequest;
use api::v1::meta::HeartbeatResponse;
use api::v1::query_request::Query;
use api::v1::QueryRequest;
use catalog::local::MemoryCatalogManager;
use catalog::remote::region_alive_keeper::RegionAliveKeepers;
use catalog::CatalogManagerRef;
use async_trait::async_trait;
use common_function::scalars::aggregate::AggregateFunctionMetaRef;
use common_function::scalars::FunctionRef;
use common_meta::heartbeat::handler::{
HandlerGroupExecutor, HeartbeatResponseHandlerContext, HeartbeatResponseHandlerExecutor,
HeartbeatResponseHandlerContext, HeartbeatResponseHandlerExecutor,
};
use common_meta::heartbeat::mailbox::{HeartbeatMailbox, MessageMeta};
use common_meta::ident::TableIdent;
use common_meta::instruction::{Instruction, InstructionReply, RegionIdent, SimpleReply};
use common_meta::instruction::{Instruction, InstructionReply, RegionIdent};
use common_query::prelude::ScalarUdf;
use common_query::Output;
use common_runtime::Runtime;
use datatypes::prelude::ConcreteDataType;
use query::dataframe::DataFrame;
use query::plan::LogicalPlan;
use query::planner::LogicalPlanner;
use query::query_engine::DescribeResult;
use query::QueryEngine;
use servers::query_handler::grpc::GrpcQueryHandler;
use session::context::QueryContext;
use session::context::{QueryContext, QueryContextRef};
use table::engine::manager::TableEngineManagerRef;
use table::TableRef;
use test_util::MockInstance;
use tokio::sync::mpsc::{self, Receiver};
use tokio::time::Instant;
use crate::heartbeat::handler::close_region::CloseRegionHandler;
use crate::heartbeat::handler::open_region::OpenRegionHandler;
use crate::instance::Instance;
use crate::region_server::RegionServer;
pub(crate) mod test_util;
@@ -50,203 +54,12 @@ struct HandlerTestGuard {
mailbox: Arc<HeartbeatMailbox>,
rx: Receiver<(MessageMeta, InstructionReply)>,
engine_manager_ref: TableEngineManagerRef,
catalog_manager_ref: CatalogManagerRef,
}
#[tokio::test]
async fn test_close_region_handler() {
let HandlerTestGuard {
instance,
mailbox,
mut rx,
engine_manager_ref,
catalog_manager_ref,
..
} = prepare_handler_test("test_close_region_handler").await;
let executor = Arc::new(HandlerGroupExecutor::new(vec![Arc::new(
CloseRegionHandler::new(
catalog_manager_ref.clone(),
engine_manager_ref.clone(),
Arc::new(RegionAliveKeepers::new(engine_manager_ref.clone(), 5000)),
),
)]));
let _ = prepare_table(instance.inner()).await;
// Closes demo table
handle_instruction(
executor.clone(),
mailbox.clone(),
close_region_instruction(),
)
.await;
let (_, reply) = rx.recv().await.unwrap();
assert_matches!(
reply,
InstructionReply::CloseRegion(SimpleReply { result: true, .. })
);
assert_test_table_not_found(instance.inner()).await;
// Closes demo table again
handle_instruction(
executor.clone(),
mailbox.clone(),
close_region_instruction(),
)
.await;
let (_, reply) = rx.recv().await.unwrap();
assert_matches!(
reply,
InstructionReply::CloseRegion(SimpleReply { result: true, .. })
);
// Closes non-exist table
handle_instruction(
executor.clone(),
mailbox.clone(),
Instruction::CloseRegion(RegionIdent {
table_ident: TableIdent {
catalog: "greptime".to_string(),
schema: "public".to_string(),
table: "non-exist".to_string(),
table_id: 1025,
engine: "mito".to_string(),
},
region_number: 0,
cluster_id: 1,
datanode_id: 2,
}),
)
.await;
let (_, reply) = rx.recv().await.unwrap();
assert_matches!(
reply,
InstructionReply::CloseRegion(SimpleReply { result: true, .. })
);
}
#[tokio::test]
async fn test_open_region_handler() {
let HandlerTestGuard {
instance,
mailbox,
mut rx,
engine_manager_ref,
catalog_manager_ref,
..
} = prepare_handler_test("test_open_region_handler").await;
let region_alive_keepers = Arc::new(RegionAliveKeepers::new(engine_manager_ref.clone(), 5000));
region_alive_keepers.start().await;
let executor = Arc::new(HandlerGroupExecutor::new(vec![
Arc::new(OpenRegionHandler::new(
catalog_manager_ref.clone(),
engine_manager_ref.clone(),
region_alive_keepers.clone(),
)),
Arc::new(CloseRegionHandler::new(
catalog_manager_ref.clone(),
engine_manager_ref.clone(),
region_alive_keepers.clone(),
)),
]));
let instruction = open_region_instruction();
let Instruction::OpenRegion(region_ident) = instruction.clone() else {
unreachable!()
};
let table_ident = &region_ident.table_ident;
let table = prepare_table(instance.inner()).await;
let dummy_catalog_manager = MemoryCatalogManager::with_default_setup();
region_alive_keepers
.register_table(table_ident.clone(), table, dummy_catalog_manager)
.await
.unwrap();
// Opens a opened table
handle_instruction(executor.clone(), mailbox.clone(), instruction.clone()).await;
let (_, reply) = rx.recv().await.unwrap();
assert_matches!(
reply,
InstructionReply::OpenRegion(SimpleReply { result: true, .. })
);
let keeper = region_alive_keepers
.find_keeper(table_ident.table_id)
.await
.unwrap();
let deadline = keeper.deadline(0).await.unwrap();
assert!(deadline <= Instant::now() + Duration::from_secs(20));
// Opens a non-exist table
let non_exist_table_ident = TableIdent {
catalog: "foo".to_string(),
schema: "non-exist".to_string(),
table: "non-exist".to_string(),
table_id: 2024,
engine: "mito".to_string(),
};
handle_instruction(
executor.clone(),
mailbox.clone(),
Instruction::OpenRegion(RegionIdent {
table_ident: non_exist_table_ident.clone(),
region_number: 0,
cluster_id: 1,
datanode_id: 2,
}),
)
.await;
let (_, reply) = rx.recv().await.unwrap();
assert_matches!(
reply,
InstructionReply::OpenRegion(SimpleReply { result: false, .. })
);
assert!(region_alive_keepers
.find_keeper(non_exist_table_ident.table_id)
.await
.is_none());
// Closes demo table
handle_instruction(
executor.clone(),
mailbox.clone(),
close_region_instruction(),
)
.await;
let (_, reply) = rx.recv().await.unwrap();
assert_matches!(
reply,
InstructionReply::CloseRegion(SimpleReply { result: true, .. })
);
assert_test_table_not_found(instance.inner()).await;
assert!(region_alive_keepers
.find_keeper(table_ident.table_id)
.await
.is_none());
// Opens demo table
handle_instruction(executor.clone(), mailbox.clone(), instruction).await;
let (_, reply) = rx.recv().await.unwrap();
assert_matches!(
reply,
InstructionReply::OpenRegion(SimpleReply { result: true, .. })
);
assert_test_table_found(instance.inner()).await;
}
async fn prepare_handler_test(name: &str) -> HandlerTestGuard {
let mock_instance = MockInstance::new(name).await;
let instance = mock_instance.inner();
let engine_manager = instance.sql_handler().table_engine_manager().clone();
let catalog_manager = instance.sql_handler().catalog_manager().clone();
let (tx, rx) = mpsc::channel(8);
let mailbox = Arc::new(HeartbeatMailbox::new(tx));
@@ -255,7 +68,6 @@ async fn prepare_handler_test(name: &str) -> HandlerTestGuard {
mailbox,
rx,
engine_manager_ref: engine_manager,
catalog_manager_ref: catalog_manager,
}
}
@@ -346,3 +158,50 @@ async fn assert_test_table_found(instance: &Instance) {
assert!(matches!(output, Output::AffectedRows(2)));
}
pub struct MockQueryEngine;
#[async_trait]
impl QueryEngine for MockQueryEngine {
fn as_any(&self) -> &dyn Any {
self as _
}
fn planner(&self) -> Arc<dyn LogicalPlanner> {
unimplemented!()
}
fn name(&self) -> &str {
"MockQueryEngine"
}
async fn describe(&self, _plan: LogicalPlan) -> query::error::Result<DescribeResult> {
unimplemented!()
}
async fn execute(
&self,
_plan: LogicalPlan,
_query_ctx: QueryContextRef,
) -> query::error::Result<Output> {
unimplemented!()
}
fn register_udf(&self, _udf: ScalarUdf) {}
fn register_aggregate_function(&self, _func: AggregateFunctionMetaRef) {}
fn register_function(&self, _func: FunctionRef) {}
fn read_table(&self, _table: TableRef) -> query::error::Result<DataFrame> {
unimplemented!()
}
}
/// Create a region server without any engine
pub fn mock_region_server() -> RegionServer {
RegionServer::new(
Arc::new(MockQueryEngine),
Arc::new(Runtime::builder().build().unwrap()),
)
}