Compare commits

...

11 Commits

Author SHA1 Message Date
Weny Xu
c26f2f94c0 chore: add logs and metrics (#2858)
* chore: add logs and metrics

* feat: add the timer to track heartbeat intervel

* feat: add the gauge to track region leases

* refactor: use gauge instead of the timer

* chore: apply suggestions from CR

* feat: add hit rate and etcd txn metrics
2023-12-04 02:51:30 +00:00
Weny Xu
781f2422b3 feat: add update metadata step for rollbacking downgraded region (#2812)
* feat: add update metadata step for rollbacking downgraded region

* feat: invalidate table cache after updating metadata

* feat: add migration abort step
2023-12-01 11:36:05 +00:00
Yingwen
7e68ecc498 feat: do not concat batches in MergeReader (#2833) 2023-12-01 06:52:43 +00:00
LFC
9ce9421850 refactor: add builder for Frontend (#2849) 2023-12-01 04:39:47 +00:00
zyy17
c0df2b9086 ci: set 'omitBody' true when releasing (#2845)
ci: set 'omitBody'
2023-11-30 10:53:07 +00:00
Yiran
29d344ccd2 docs: update getting-started document link (#2843) 2023-11-30 10:03:09 +00:00
Wei
fe2fc723bc refactor: DataType name function (#2836)
* refactor: DataType name function

* chore: test case
2023-11-30 03:49:09 +00:00
Wei
2332305b90 refactor: replace usage of ArrayData by clone (#2827)
* refactor: use array clone()

* refactor: slice

* chore: clippy
2023-11-30 03:27:29 +00:00
Ruihang Xia
9ccd182109 feat: implement PromQL set op AND/UNLESS (#2839)
* initial impl

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* disable OR for now

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-11-30 03:17:57 +00:00
Weny Xu
ae8153515b feat: add update metadata step for upgrading candidate region (#2811) 2023-11-29 11:10:38 +00:00
Weny Xu
cce5edc88e feat: add downgrade leader region step (#2792)
* feat: add downgrade leader region step

* chore: apply suggestions from CR

* chore: rename exist to exists

* chore: apply suggestions from CR
2023-11-29 09:17:28 +00:00
76 changed files with 3199 additions and 1535 deletions

View File

@@ -31,10 +31,12 @@ runs:
echo "prerelease=false" >> $GITHUB_ENV
echo "makeLatest=true" >> $GITHUB_ENV
echo "generateReleaseNotes=false" >> $GITHUB_ENV
echo "omitBody=true" >> $GITHUB_ENV
else
echo "prerelease=true" >> $GITHUB_ENV
echo "makeLatest=false" >> $GITHUB_ENV
echo "generateReleaseNotes=true" >> $GITHUB_ENV
echo "omitBody=false" >> $GITHUB_ENV
fi
- name: Publish release
@@ -45,6 +47,7 @@ runs:
makeLatest: ${{ env.makeLatest }}
tag: ${{ inputs.version }}
generateReleaseNotes: ${{ env.generateReleaseNotes }}
omitBody: ${{ env.omitBody }} # omitBody is true when the release is a official release.
allowUpdates: true
artifacts: |
**/greptime-*/*

View File

@@ -100,7 +100,7 @@ Please see the online document site for more installation options and [operation
### Get started
Read the [complete getting started guide](https://docs.greptime.com/getting-started/try-out-greptimedb) on our [official document site](https://docs.greptime.com/).
Read the [complete getting started guide](https://docs.greptime.com/getting-started/overview) on our [official document site](https://docs.greptime.com/).
To write and query data, GreptimeDB is compatible with multiple [protocols and clients](https://docs.greptime.com/user-guide/clients/overview).

View File

@@ -202,7 +202,7 @@ impl InformationSchemaColumnsBuilder {
&schema_name,
&table_name,
&column.name,
column.data_type.name(),
&column.data_type.name(),
semantic_type,
);
}

View File

@@ -19,7 +19,6 @@ use std::sync::{Arc, Weak};
use common_catalog::consts::{DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, NUMBERS_TABLE_ID};
use common_error::ext::BoxedError;
use common_meta::cache_invalidator::{CacheInvalidator, CacheInvalidatorRef, Context};
use common_meta::datanode_manager::DatanodeManagerRef;
use common_meta::error::Result as MetaResult;
use common_meta::key::catalog_name::CatalogNameKey;
use common_meta::key::schema_name::SchemaNameKey;
@@ -55,7 +54,6 @@ pub struct KvBackendCatalogManager {
cache_invalidator: CacheInvalidatorRef,
partition_manager: PartitionRuleManagerRef,
table_metadata_manager: TableMetadataManagerRef,
datanode_manager: DatanodeManagerRef,
/// A sub-CatalogManager that handles system tables
system_catalog: SystemCatalog,
}
@@ -76,16 +74,11 @@ impl CacheInvalidator for KvBackendCatalogManager {
}
impl KvBackendCatalogManager {
pub fn new(
backend: KvBackendRef,
cache_invalidator: CacheInvalidatorRef,
datanode_manager: DatanodeManagerRef,
) -> Arc<Self> {
pub fn new(backend: KvBackendRef, cache_invalidator: CacheInvalidatorRef) -> Arc<Self> {
Arc::new_cyclic(|me| Self {
partition_manager: Arc::new(PartitionRuleManager::new(backend.clone())),
table_metadata_manager: Arc::new(TableMetadataManager::new(backend)),
cache_invalidator,
datanode_manager,
system_catalog: SystemCatalog {
catalog_manager: me.clone(),
},
@@ -99,10 +92,6 @@ impl KvBackendCatalogManager {
pub fn table_metadata_manager_ref(&self) -> &TableMetadataManagerRef {
&self.table_metadata_manager
}
pub fn datanode_manager(&self) -> DatanodeManagerRef {
self.datanode_manager.clone()
}
}
#[async_trait::async_trait]

View File

@@ -17,7 +17,6 @@ use std::sync::Arc;
use std::time::Instant;
use catalog::kvbackend::{CachedMetaKvBackend, KvBackendCatalogManager};
use client::client_manager::DatanodeClients;
use client::{Client, Database, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_base::Plugins;
use common_error::ext::ErrorExt;
@@ -250,13 +249,8 @@ async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {
let cached_meta_backend = Arc::new(CachedMetaKvBackend::new(meta_client.clone()));
let datanode_clients = Arc::new(DatanodeClients::default());
let catalog_list = KvBackendCatalogManager::new(
cached_meta_backend.clone(),
cached_meta_backend.clone(),
datanode_clients,
);
let catalog_list =
KvBackendCatalogManager::new(cached_meta_backend.clone(), cached_meta_backend);
let plugins: Plugins = Default::default();
let state = Arc::new(QueryEngineState::new(
catalog_list,

View File

@@ -37,6 +37,12 @@ pub enum Error {
source: common_meta::error::Error,
},
#[snafu(display("Failed to init DDL manager"))]
InitDdlManager {
location: Location,
source: common_meta::error::Error,
},
#[snafu(display("Failed to start procedure manager"))]
StartProcedureManager {
location: Location,
@@ -225,13 +231,6 @@ pub enum Error {
#[snafu(source)]
error: std::io::Error,
},
#[snafu(display("Failed to parse address {}", addr))]
ParseAddr {
addr: String,
#[snafu(source)]
error: std::net::AddrParseError,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -247,9 +246,11 @@ impl ErrorExt for Error {
Error::ShutdownMetaServer { source, .. } => source.status_code(),
Error::BuildMetaServer { source, .. } => source.status_code(),
Error::UnsupportedSelectorType { source, .. } => source.status_code(),
Error::IterStream { source, .. } | Error::InitMetadata { source, .. } => {
source.status_code()
}
Error::IterStream { source, .. }
| Error::InitMetadata { source, .. }
| Error::InitDdlManager { source, .. } => source.status_code(),
Error::ConnectServer { source, .. } => source.status_code(),
Error::MissingConfig { .. }
| Error::LoadLayeredConfig { .. }
@@ -259,8 +260,7 @@ impl ErrorExt for Error {
| Error::NotDataFromOutput { .. }
| Error::CreateDir { .. }
| Error::EmptyResult { .. }
| Error::InvalidDatabaseName { .. }
| Error::ParseAddr { .. } => StatusCode::InvalidArguments,
| Error::InvalidDatabaseName { .. } => StatusCode::InvalidArguments,
Error::StartProcedureManager { source, .. }
| Error::StopProcedureManager { source, .. } => source.status_code(),

View File

@@ -12,18 +12,26 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use std::time::Duration;
use catalog::kvbackend::CachedMetaKvBackend;
use clap::Parser;
use client::client_manager::DatanodeClients;
use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
use common_meta::heartbeat::handler::HandlerGroupExecutor;
use common_telemetry::logging;
use frontend::frontend::FrontendOptions;
use frontend::heartbeat::handler::invalidate_table_cache::InvalidateTableCacheHandler;
use frontend::heartbeat::HeartbeatTask;
use frontend::instance::builder::FrontendBuilder;
use frontend::instance::{FrontendInstance, Instance as FeInstance};
use meta_client::MetaClientOptions;
use servers::tls::{TlsMode, TlsOption};
use servers::Mode;
use snafu::ResultExt;
use snafu::{OptionExt, ResultExt};
use crate::error::{self, Result, StartFrontendSnafu};
use crate::error::{self, MissingConfigSnafu, Result, StartFrontendSnafu};
use crate::options::{Options, TopLevelOptions};
pub struct Instance {
@@ -196,10 +204,38 @@ impl StartCommand {
logging::info!("Frontend start command: {:#?}", self);
logging::info!("Frontend options: {:#?}", opts);
let mut instance = FeInstance::try_new_distributed(&opts, plugins.clone())
let meta_client_options = opts.meta_client.as_ref().context(MissingConfigSnafu {
msg: "'meta_client'",
})?;
let meta_client = FeInstance::create_meta_client(meta_client_options)
.await
.context(StartFrontendSnafu)?;
let meta_backend = Arc::new(CachedMetaKvBackend::new(meta_client.clone()));
let executor = HandlerGroupExecutor::new(vec![
Arc::new(ParseMailboxMessageHandler),
Arc::new(InvalidateTableCacheHandler::new(meta_backend.clone())),
]);
let heartbeat_task = HeartbeatTask::new(
meta_client.clone(),
opts.heartbeat.clone(),
Arc::new(executor),
);
let mut instance = FrontendBuilder::new(
meta_backend.clone(),
Arc::new(DatanodeClients::default()),
meta_client,
)
.with_cache_invalidator(meta_backend)
.with_plugin(plugins)
.with_heartbeat_task(heartbeat_task)
.try_build()
.await
.context(StartFrontendSnafu)?;
instance
.build_servers(opts)
.await

View File

@@ -173,7 +173,12 @@ impl StartCommand {
logging::info!("MetaSrv start command: {:#?}", self);
logging::info!("MetaSrv options: {:#?}", opts);
let instance = MetaSrvInstance::new(opts, plugins)
let builder = meta_srv::bootstrap::metasrv_builder(&opts, plugins.clone(), None)
.await
.context(error::BuildMetaServerSnafu)?;
let metasrv = builder.build().await.context(error::BuildMetaServerSnafu)?;
let instance = MetaSrvInstance::new(opts, plugins, metasrv)
.await
.context(error::BuildMetaServerSnafu)?;

View File

@@ -15,21 +15,23 @@
use std::sync::Arc;
use std::{fs, path};
use catalog::kvbackend::KvBackendCatalogManager;
use catalog::CatalogManagerRef;
use clap::Parser;
use common_base::Plugins;
use common_config::{metadata_store_dir, KvBackendConfig, WalConfig};
use common_meta::cache_invalidator::DummyKvCacheInvalidator;
use common_meta::cache_invalidator::DummyCacheInvalidator;
use common_meta::datanode_manager::DatanodeManagerRef;
use common_meta::ddl::DdlTaskExecutorRef;
use common_meta::ddl_manager::DdlManager;
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
use common_meta::kv_backend::KvBackendRef;
use common_procedure::ProcedureManagerRef;
use common_telemetry::info;
use common_telemetry::logging::LoggingOptions;
use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, StorageConfig};
use datanode::datanode::{Datanode, DatanodeBuilder};
use datanode::region_server::RegionServer;
use file_engine::config::EngineConfig as FileEngineConfig;
use frontend::frontend::FrontendOptions;
use frontend::instance::builder::FrontendBuilder;
use frontend::instance::standalone::StandaloneTableMetadataCreator;
use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager};
use frontend::service_config::{
GrpcOptions, InfluxdbOptions, MysqlOptions, OpentsdbOptions, PostgresOptions, PromStoreOptions,
@@ -42,9 +44,9 @@ use servers::Mode;
use snafu::ResultExt;
use crate::error::{
CreateDirSnafu, IllegalConfigSnafu, InitMetadataSnafu, Result, ShutdownDatanodeSnafu,
ShutdownFrontendSnafu, StartDatanodeSnafu, StartFrontendSnafu, StartProcedureManagerSnafu,
StopProcedureManagerSnafu,
CreateDirSnafu, IllegalConfigSnafu, InitDdlManagerSnafu, InitMetadataSnafu, Result,
ShutdownDatanodeSnafu, ShutdownFrontendSnafu, StartDatanodeSnafu, StartFrontendSnafu,
StartProcedureManagerSnafu, StopProcedureManagerSnafu,
};
use crate::options::{MixOptions, Options, TopLevelOptions};
@@ -156,6 +158,7 @@ impl StandaloneOptions {
wal: self.wal,
storage: self.storage,
region_engine: self.region_engine,
rpc_addr: self.grpc.addr,
..Default::default()
}
}
@@ -347,36 +350,25 @@ impl StartCommand {
.await
.context(StartFrontendSnafu)?;
let datanode = DatanodeBuilder::new(dn_opts, fe_plugins.clone())
.with_kv_backend(kv_backend.clone())
.build()
.await
.context(StartDatanodeSnafu)?;
let builder =
DatanodeBuilder::new(dn_opts, fe_plugins.clone()).with_kv_backend(kv_backend.clone());
let datanode = builder.build().await.context(StartDatanodeSnafu)?;
let region_server = datanode.region_server();
let datanode_manager = Arc::new(StandaloneDatanodeManager(datanode.region_server()));
let catalog_manager = KvBackendCatalogManager::new(
let ddl_task_executor = Self::create_ddl_task_executor(
kv_backend.clone(),
Arc::new(DummyKvCacheInvalidator),
Arc::new(StandaloneDatanodeManager(region_server.clone())),
);
catalog_manager
.table_metadata_manager_ref()
.init()
.await
.context(InitMetadataSnafu)?;
// TODO: build frontend instance like in distributed mode
let mut frontend = build_frontend(
fe_plugins,
kv_backend,
procedure_manager.clone(),
catalog_manager,
region_server,
datanode_manager.clone(),
)
.await?;
let mut frontend = FrontendBuilder::new(kv_backend, datanode_manager, ddl_task_executor)
.with_plugin(fe_plugins)
.try_build()
.await
.context(StartFrontendSnafu)?;
frontend
.build_servers(opts)
.await
@@ -388,26 +380,41 @@ impl StartCommand {
procedure_manager,
})
}
}
/// Build frontend instance in standalone mode
async fn build_frontend(
plugins: Plugins,
kv_backend: KvBackendRef,
procedure_manager: ProcedureManagerRef,
catalog_manager: CatalogManagerRef,
region_server: RegionServer,
) -> Result<FeInstance> {
let frontend_instance = FeInstance::try_new_standalone(
kv_backend,
procedure_manager,
catalog_manager,
plugins,
region_server,
)
.await
.context(StartFrontendSnafu)?;
Ok(frontend_instance)
async fn create_ddl_task_executor(
kv_backend: KvBackendRef,
procedure_manager: ProcedureManagerRef,
datanode_manager: DatanodeManagerRef,
) -> Result<DdlTaskExecutorRef> {
let table_metadata_manager =
Self::create_table_metadata_manager(kv_backend.clone()).await?;
let ddl_task_executor: DdlTaskExecutorRef = Arc::new(
DdlManager::try_new(
procedure_manager,
datanode_manager,
Arc::new(DummyCacheInvalidator),
table_metadata_manager,
Arc::new(StandaloneTableMetadataCreator::new(kv_backend)),
)
.context(InitDdlManagerSnafu)?,
);
Ok(ddl_task_executor)
}
async fn create_table_metadata_manager(
kv_backend: KvBackendRef,
) -> Result<TableMetadataManagerRef> {
let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend));
table_metadata_manager
.init()
.await
.context(InitMetadataSnafu)?;
Ok(table_metadata_manager)
}
}
#[cfg(test)]

View File

@@ -48,6 +48,27 @@ impl Display for RegionIdent {
}
}
/// The result of downgrade leader region.
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
pub struct DowngradeRegionReply {
/// Returns the `last_entry_id` if available.
pub last_entry_id: Option<u64>,
/// Indicates whether the region exists.
pub exists: bool,
/// Return error if any during the operation.
pub error: Option<String>,
}
impl Display for DowngradeRegionReply {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"(last_entry_id={:?}, exists={}, error={:?})",
self.last_entry_id, self.exists, self.error
)
}
}
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
pub struct SimpleReply {
pub result: bool,
@@ -87,10 +108,23 @@ impl OpenRegion {
}
}
/// The instruction of downgrading leader region.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DowngradeRegion {
pub region_id: RegionId,
}
impl Display for DowngradeRegion {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "DowngradeRegion(region_id={})", self.region_id)
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Display)]
pub enum Instruction {
OpenRegion(OpenRegion),
CloseRegion(RegionIdent),
DowngradeRegion(DowngradeRegion),
InvalidateTableIdCache(TableId),
InvalidateTableNameCache(TableName),
}
@@ -101,6 +135,7 @@ pub enum InstructionReply {
OpenRegion(SimpleReply),
CloseRegion(SimpleReply),
InvalidateTableCache(SimpleReply),
DowngradeRegion(DowngradeRegionReply),
}
impl Display for InstructionReply {
@@ -111,6 +146,9 @@ impl Display for InstructionReply {
Self::InvalidateTableCache(reply) => {
write!(f, "InstructionReply::Invalidate({})", reply)
}
Self::DowngradeRegion(reply) => {
write!(f, "InstructionReply::DowngradeRegion({})", reply)
}
}
}
}

View File

@@ -584,7 +584,7 @@ impl TableMetadataManager {
&self,
table_id: TableId,
region_info: RegionInfo,
current_table_route_value: DeserializedValueWithBytes<TableRouteValue>,
current_table_route_value: &DeserializedValueWithBytes<TableRouteValue>,
new_region_routes: Vec<RegionRoute>,
new_region_options: &HashMap<String, String>,
) -> Result<()> {
@@ -606,7 +606,7 @@ impl TableMetadataManager {
let (update_table_route_txn, on_update_table_route_failure) = self
.table_route_manager()
.build_update_txn(table_id, &current_table_route_value, &new_table_route_value)?;
.build_update_txn(table_id, current_table_route_value, &new_table_route_value)?;
let txn = Txn::merge_all(vec![update_datanode_table_txn, update_table_route_txn]);
@@ -1173,7 +1173,7 @@ mod tests {
region_storage_path: region_storage_path.to_string(),
region_options: HashMap::new(),
},
current_table_route_value.clone(),
&current_table_route_value,
new_region_routes.clone(),
&HashMap::new(),
)
@@ -1190,7 +1190,7 @@ mod tests {
region_storage_path: region_storage_path.to_string(),
region_options: HashMap::new(),
},
current_table_route_value.clone(),
&current_table_route_value,
new_region_routes.clone(),
&HashMap::new(),
)
@@ -1212,7 +1212,7 @@ mod tests {
region_storage_path: region_storage_path.to_string(),
region_options: HashMap::new(),
},
current_table_route_value.clone(),
&current_table_route_value,
new_region_routes.clone(),
&HashMap::new(),
)
@@ -1237,7 +1237,7 @@ mod tests {
region_storage_path: region_storage_path.to_string(),
region_options: HashMap::new(),
},
wrong_table_route_value,
&wrong_table_route_value,
new_region_routes,
&HashMap::new(),
)

View File

@@ -68,6 +68,9 @@ impl EtcdStore {
async fn do_multi_txn(&self, txn_ops: Vec<TxnOp>) -> Result<Vec<TxnResponse>> {
if txn_ops.len() < MAX_TXN_SIZE {
// fast path
let _timer = METRIC_META_TXN_REQUEST
.with_label_values(&["etcd", "txn"])
.start_timer();
let txn = Txn::new().and_then(txn_ops);
let txn_res = self
.client
@@ -81,6 +84,9 @@ impl EtcdStore {
let txns = txn_ops
.chunks(MAX_TXN_SIZE)
.map(|part| async move {
let _timer = METRIC_META_TXN_REQUEST
.with_label_values(&["etcd", "txn"])
.start_timer();
let txn = Txn::new().and_then(part);
self.client.kv_client().txn(txn).await
})

View File

@@ -129,8 +129,12 @@ impl RegionAliveKeeper {
let (role, region_id) = (region.role().into(), RegionId::from(region.region_id));
if let Some(handle) = self.find_handle(region_id).await {
handle.reset_deadline(role, deadline).await;
} else {
warn!(
"Trying to renew the lease for region {region_id}, the keeper handler is not found!"
);
// Else the region alive keeper might be triggered by lagging messages, we can safely ignore it.
}
// Else the region alive keeper might be triggered by lagging messages, we can safely ignore it.
}
}

View File

@@ -37,6 +37,7 @@ use crate::alive_keeper::RegionAliveKeeper;
use crate::config::DatanodeOptions;
use crate::error::{self, MetaClientInitSnafu, Result};
use crate::event_listener::RegionServerEventReceiver;
use crate::metrics;
use crate::region_server::RegionServer;
pub(crate) mod handler;
@@ -72,9 +73,9 @@ impl HeartbeatTask {
opts.heartbeat.interval.as_millis() as u64,
));
let resp_handler_executor = Arc::new(HandlerGroupExecutor::new(vec![
region_alive_keeper.clone(),
Arc::new(ParseMailboxMessageHandler),
Arc::new(RegionHeartbeatResponseHandler::new(region_server.clone())),
region_alive_keeper.clone(),
]));
Ok(Self {
@@ -101,8 +102,10 @@ impl HeartbeatTask {
quit_signal: Arc<Notify>,
) -> Result<HeartbeatSender> {
let client_id = meta_client.id();
let (tx, mut rx) = meta_client.heartbeat().await.context(MetaClientInitSnafu)?;
let mut last_received_lease = Instant::now();
let _handle = common_runtime::spawn_bg(async move {
while let Some(res) = match rx.message().await {
Ok(m) => m,
@@ -114,6 +117,28 @@ impl HeartbeatTask {
if let Some(msg) = res.mailbox_message.as_ref() {
info!("Received mailbox message: {msg:?}, meta_client id: {client_id:?}");
}
if let Some(lease) = res.region_lease.as_ref() {
metrics::LAST_RECEIVED_HEARTBEAT_ELAPSED
.set(last_received_lease.elapsed().as_millis() as i64);
// Resets the timer.
last_received_lease = Instant::now();
let mut leader_region_lease_count = 0;
let mut follower_region_lease_count = 0;
for lease in &lease.regions {
match lease.role() {
RegionRole::Leader => leader_region_lease_count += 1,
RegionRole::Follower => follower_region_lease_count += 1,
}
}
metrics::HEARTBEAT_REGION_LEASES
.with_label_values(&["leader"])
.set(leader_region_lease_count);
metrics::HEARTBEAT_REGION_LEASES
.with_label_values(&["follower"])
.set(follower_region_lease_count);
}
let ctx = HeartbeatResponseHandlerContext::new(mailbox.clone(), res);
if let Err(e) = Self::handle_response(ctx, handler_executor.clone()).await {
error!(e; "Error while handling heartbeat response");

View File

@@ -65,6 +65,10 @@ impl RegionHeartbeatResponseHandler {
Instruction::InvalidateTableIdCache(_) | Instruction::InvalidateTableNameCache(_) => {
InvalidHeartbeatResponseSnafu.fail()
}
Instruction::DowngradeRegion(_) => {
// TODO(weny): add it later.
todo!()
}
}
}
@@ -88,6 +92,10 @@ impl RegionHeartbeatResponseHandler {
error: None,
})
}
Instruction::DowngradeRegion(_) => {
// TODO(weny): add it later.
todo!()
}
}
}
@@ -114,6 +122,10 @@ impl RegionHeartbeatResponseHandler {
reply.result = success;
reply.error = error;
}
InstructionReply::DowngradeRegion(_) => {
// TODO(weny): add it later.
todo!()
}
}
template

View File

@@ -18,6 +18,8 @@ use prometheus::*;
/// Region request type label.
pub const REGION_REQUEST_TYPE: &str = "datanode_region_request_type";
pub const REGION_ROLE: &str = "region_role";
lazy_static! {
/// The elapsed time of handling a request in the region_server.
pub static ref HANDLE_REGION_REQUEST_ELAPSED: HistogramVec = register_histogram_vec!(
@@ -26,4 +28,17 @@ lazy_static! {
&[REGION_REQUEST_TYPE]
)
.unwrap();
/// The elapsed time since the last received heartbeat.
pub static ref LAST_RECEIVED_HEARTBEAT_ELAPSED: IntGauge = register_int_gauge!(
"last_received_heartbeat_lease_elapsed",
"last received heartbeat lease elapsed",
)
.unwrap();
/// The received region leases via heartbeat.
pub static ref HEARTBEAT_REGION_LEASES: IntGaugeVec = register_int_gauge_vec!(
"heartbeat_region_leases",
"received region leases via heartbeat",
&[REGION_ROLE]
)
.unwrap();
}

View File

@@ -85,31 +85,48 @@ pub enum ConcreteDataType {
impl fmt::Display for ConcreteDataType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ConcreteDataType::Null(_) => write!(f, "Null"),
ConcreteDataType::Boolean(_) => write!(f, "Boolean"),
ConcreteDataType::Int8(_) => write!(f, "Int8"),
ConcreteDataType::Int16(_) => write!(f, "Int16"),
ConcreteDataType::Int32(_) => write!(f, "Int32"),
ConcreteDataType::Int64(_) => write!(f, "Int64"),
ConcreteDataType::UInt8(_) => write!(f, "UInt8"),
ConcreteDataType::UInt16(_) => write!(f, "UInt16"),
ConcreteDataType::UInt32(_) => write!(f, "UInt32"),
ConcreteDataType::UInt64(_) => write!(f, "UInt64"),
ConcreteDataType::Float32(_) => write!(f, "Float32"),
ConcreteDataType::Float64(_) => write!(f, "Float64"),
ConcreteDataType::Binary(_) => write!(f, "Binary"),
ConcreteDataType::String(_) => write!(f, "String"),
ConcreteDataType::Date(_) => write!(f, "Date"),
ConcreteDataType::DateTime(_) => write!(f, "DateTime"),
ConcreteDataType::Timestamp(_) => write!(f, "Timestamp"),
ConcreteDataType::Time(_) => write!(f, "Time"),
ConcreteDataType::List(_) => write!(f, "List"),
ConcreteDataType::Dictionary(_) => write!(f, "Dictionary"),
ConcreteDataType::Interval(_) => write!(f, "Interval"),
ConcreteDataType::Duration(_) => write!(f, "Duration"),
ConcreteDataType::Decimal128(d) => {
write!(f, "Decimal128({},{})", d.precision(), d.scale())
}
ConcreteDataType::Null(v) => write!(f, "{}", v.name()),
ConcreteDataType::Boolean(v) => write!(f, "{}", v.name()),
ConcreteDataType::Int8(v) => write!(f, "{}", v.name()),
ConcreteDataType::Int16(v) => write!(f, "{}", v.name()),
ConcreteDataType::Int32(v) => write!(f, "{}", v.name()),
ConcreteDataType::Int64(v) => write!(f, "{}", v.name()),
ConcreteDataType::UInt8(v) => write!(f, "{}", v.name()),
ConcreteDataType::UInt16(v) => write!(f, "{}", v.name()),
ConcreteDataType::UInt32(v) => write!(f, "{}", v.name()),
ConcreteDataType::UInt64(v) => write!(f, "{}", v.name()),
ConcreteDataType::Float32(v) => write!(f, "{}", v.name()),
ConcreteDataType::Float64(v) => write!(f, "{}", v.name()),
ConcreteDataType::Binary(v) => write!(f, "{}", v.name()),
ConcreteDataType::String(v) => write!(f, "{}", v.name()),
ConcreteDataType::Date(v) => write!(f, "{}", v.name()),
ConcreteDataType::DateTime(v) => write!(f, "{}", v.name()),
ConcreteDataType::Timestamp(t) => match t {
TimestampType::Second(v) => write!(f, "{}", v.name()),
TimestampType::Millisecond(v) => write!(f, "{}", v.name()),
TimestampType::Microsecond(v) => write!(f, "{}", v.name()),
TimestampType::Nanosecond(v) => write!(f, "{}", v.name()),
},
ConcreteDataType::Time(t) => match t {
TimeType::Second(v) => write!(f, "{}", v.name()),
TimeType::Millisecond(v) => write!(f, "{}", v.name()),
TimeType::Microsecond(v) => write!(f, "{}", v.name()),
TimeType::Nanosecond(v) => write!(f, "{}", v.name()),
},
ConcreteDataType::Interval(i) => match i {
IntervalType::YearMonth(v) => write!(f, "{}", v.name()),
IntervalType::DayTime(v) => write!(f, "{}", v.name()),
IntervalType::MonthDayNano(v) => write!(f, "{}", v.name()),
},
ConcreteDataType::Duration(d) => match d {
DurationType::Second(v) => write!(f, "{}", v.name()),
DurationType::Millisecond(v) => write!(f, "{}", v.name()),
DurationType::Microsecond(v) => write!(f, "{}", v.name()),
DurationType::Nanosecond(v) => write!(f, "{}", v.name()),
},
ConcreteDataType::Decimal128(v) => write!(f, "{}", v.name()),
ConcreteDataType::List(v) => write!(f, "{}", v.name()),
ConcreteDataType::Dictionary(v) => write!(f, "{}", v.name()),
}
}
}
@@ -492,7 +509,7 @@ impl ConcreteDataType {
#[enum_dispatch::enum_dispatch]
pub trait DataType: std::fmt::Debug + Send + Sync {
/// Name of this data type.
fn name(&self) -> &str;
fn name(&self) -> String;
/// Returns id of the Logical data type.
fn logical_type_id(&self) -> LogicalTypeId;
@@ -523,7 +540,7 @@ mod tests {
fn test_concrete_type_as_datatype_trait() {
let concrete_type = ConcreteDataType::boolean_datatype();
assert_eq!("Boolean", concrete_type.name());
assert_eq!("Boolean", concrete_type.to_string());
assert_eq!(Value::Boolean(false), concrete_type.default_value());
assert_eq!(LogicalTypeId::Boolean, concrete_type.logical_type_id());
assert_eq!(ArrowDataType::Boolean, concrete_type.as_arrow_type());
@@ -767,94 +784,68 @@ mod tests {
#[test]
fn test_display_concrete_data_type() {
assert_eq!(ConcreteDataType::null_datatype().to_string(), "Null");
assert_eq!(ConcreteDataType::boolean_datatype().to_string(), "Boolean");
assert_eq!(ConcreteDataType::binary_datatype().to_string(), "Binary");
assert_eq!(ConcreteDataType::int8_datatype().to_string(), "Int8");
assert_eq!(ConcreteDataType::int16_datatype().to_string(), "Int16");
assert_eq!(ConcreteDataType::int32_datatype().to_string(), "Int32");
assert_eq!(ConcreteDataType::int64_datatype().to_string(), "Int64");
assert_eq!(ConcreteDataType::uint8_datatype().to_string(), "UInt8");
assert_eq!(ConcreteDataType::uint16_datatype().to_string(), "UInt16");
assert_eq!(ConcreteDataType::uint32_datatype().to_string(), "UInt32");
assert_eq!(ConcreteDataType::uint64_datatype().to_string(), "UInt64");
assert_eq!(ConcreteDataType::float32_datatype().to_string(), "Float32");
assert_eq!(ConcreteDataType::float64_datatype().to_string(), "Float64");
assert_eq!(ConcreteDataType::string_datatype().to_string(), "String");
assert_eq!(ConcreteDataType::date_datatype().to_string(), "Date");
assert_eq!(
ConcreteDataType::from_arrow_type(&ArrowDataType::Null).to_string(),
"Null"
ConcreteDataType::timestamp_millisecond_datatype().to_string(),
"TimestampMillisecond"
);
assert_eq!(
ConcreteDataType::from_arrow_type(&ArrowDataType::Boolean).to_string(),
"Boolean"
ConcreteDataType::time_millisecond_datatype().to_string(),
"TimeMillisecond"
);
assert_eq!(
ConcreteDataType::from_arrow_type(&ArrowDataType::Binary).to_string(),
"Binary"
);
assert_eq!(
ConcreteDataType::from_arrow_type(&ArrowDataType::LargeBinary).to_string(),
"Binary"
);
assert_eq!(
ConcreteDataType::from_arrow_type(&ArrowDataType::Int8).to_string(),
"Int8"
);
assert_eq!(
ConcreteDataType::from_arrow_type(&ArrowDataType::Int16).to_string(),
"Int16"
);
assert_eq!(
ConcreteDataType::from_arrow_type(&ArrowDataType::Int32).to_string(),
"Int32"
);
assert_eq!(
ConcreteDataType::from_arrow_type(&ArrowDataType::Int64).to_string(),
"Int64"
);
assert_eq!(
ConcreteDataType::from_arrow_type(&ArrowDataType::UInt8).to_string(),
"UInt8"
);
assert_eq!(
ConcreteDataType::from_arrow_type(&ArrowDataType::UInt16).to_string(),
"UInt16"
);
assert_eq!(
ConcreteDataType::from_arrow_type(&ArrowDataType::UInt32).to_string(),
"UInt32"
);
assert_eq!(
ConcreteDataType::from_arrow_type(&ArrowDataType::UInt64).to_string(),
"UInt64"
);
assert_eq!(
ConcreteDataType::from_arrow_type(&ArrowDataType::Float32).to_string(),
"Float32"
);
assert_eq!(
ConcreteDataType::from_arrow_type(&ArrowDataType::Float64).to_string(),
"Float64"
);
assert_eq!(
ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8).to_string(),
"String"
);
assert_eq!(
ConcreteDataType::from_arrow_type(&ArrowDataType::List(Arc::new(Field::new(
"item",
ArrowDataType::Int32,
true,
))))
.to_string(),
"List"
);
assert_eq!(
ConcreteDataType::from_arrow_type(&ArrowDataType::Date32).to_string(),
"Date"
);
assert_eq!(ConcreteDataType::time_second_datatype().to_string(), "Time");
assert_eq!(
ConcreteDataType::from_arrow_type(&ArrowDataType::Interval(
arrow_schema::IntervalUnit::MonthDayNano,
))
.to_string(),
"Interval"
ConcreteDataType::interval_month_day_nano_datatype().to_string(),
"IntervalMonthDayNano"
);
assert_eq!(
ConcreteDataType::duration_second_datatype().to_string(),
"Duration"
"DurationSecond"
);
assert_eq!(
ConcreteDataType::decimal128_datatype(10, 2).to_string(),
"Decimal128(10,2)"
"Decimal(10, 2)"
);
// Nested types
assert_eq!(
ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()).to_string(),
"List<Int32>"
);
assert_eq!(
ConcreteDataType::list_datatype(ConcreteDataType::Dictionary(DictionaryType::new(
ConcreteDataType::int32_datatype(),
ConcreteDataType::string_datatype()
)))
.to_string(),
"List<Dictionary<Int32, String>>"
);
assert_eq!(
ConcreteDataType::list_datatype(ConcreteDataType::list_datatype(
ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype())
))
.to_string(),
"List<List<List<Int32>>>"
);
assert_eq!(
ConcreteDataType::dictionary_datatype(
ConcreteDataType::int32_datatype(),
ConcreteDataType::string_datatype()
)
.to_string(),
"Dictionary<Int32, String>"
);
}
}

View File

@@ -34,8 +34,8 @@ impl BinaryType {
}
impl DataType for BinaryType {
fn name(&self) -> &str {
"Binary"
fn name(&self) -> String {
"Binary".to_string()
}
fn logical_type_id(&self) -> LogicalTypeId {

View File

@@ -34,8 +34,8 @@ impl BooleanType {
}
impl DataType for BooleanType {
fn name(&self) -> &str {
"Boolean"
fn name(&self) -> String {
"Boolean".to_string()
}
fn logical_type_id(&self) -> LogicalTypeId {

View File

@@ -248,7 +248,7 @@ mod tests {
assert!(res.is_err());
assert_eq!(
res.unwrap_err().to_string(),
"Type Timestamp with value 1970-01-01 08:00:10+0800 can't be cast to the destination type Int8"
"Type TimestampSecond with value 1970-01-01 08:00:10+0800 can't be cast to the destination type Int8"
);
}

View File

@@ -32,8 +32,8 @@ use crate::vectors::{DateVector, DateVectorBuilder, MutableVector, Vector};
pub struct DateType;
impl DataType for DateType {
fn name(&self) -> &str {
"Date"
fn name(&self) -> String {
"Date".to_string()
}
fn logical_type_id(&self) -> LogicalTypeId {

View File

@@ -30,8 +30,8 @@ use crate::vectors::{DateTimeVector, DateTimeVectorBuilder, PrimitiveVector};
pub struct DateTimeType;
impl DataType for DateTimeType {
fn name(&self) -> &str {
"DateTime"
fn name(&self) -> String {
"DateTime".to_string()
}
fn logical_type_id(&self) -> LogicalTypeId {

View File

@@ -56,9 +56,8 @@ impl Decimal128Type {
}
impl DataType for Decimal128Type {
fn name(&self) -> &str {
// TODO(QuenKar): support precision and scale information in name
"decimal"
fn name(&self) -> String {
format!("Decimal({}, {})", self.precision, self.scale)
}
fn logical_type_id(&self) -> LogicalTypeId {

View File

@@ -62,8 +62,12 @@ impl DictionaryType {
}
impl DataType for DictionaryType {
fn name(&self) -> &str {
"Dictionary"
fn name(&self) -> String {
format!(
"Dictionary<{}, {}>",
self.key_type.name(),
self.value_type.name()
)
}
fn logical_type_id(&self) -> LogicalTypeId {

View File

@@ -78,8 +78,8 @@ macro_rules! impl_data_type_for_duration {
pub struct [<Duration $unit Type>];
impl DataType for [<Duration $unit Type>] {
fn name(&self) -> &str {
stringify!([<Duration $unit>])
fn name(&self) -> String {
stringify!([<Duration $unit>]).to_string()
}
fn logical_type_id(&self) -> LogicalTypeId {

View File

@@ -66,8 +66,8 @@ macro_rules! impl_data_type_for_interval {
pub struct [<Interval $unit Type>];
impl DataType for [<Interval $unit Type>] {
fn name(&self) -> &str {
stringify!([<Interval $unit>])
fn name(&self) -> String {
stringify!([<Interval $unit>]).to_string()
}
fn logical_type_id(&self) -> LogicalTypeId {

View File

@@ -52,8 +52,8 @@ impl ListType {
}
impl DataType for ListType {
fn name(&self) -> &str {
"List"
fn name(&self) -> String {
format!("List<{}>", self.item_type.name())
}
fn logical_type_id(&self) -> LogicalTypeId {
@@ -92,7 +92,7 @@ mod tests {
#[test]
fn test_list_type() {
let t = ListType::new(ConcreteDataType::boolean_datatype());
assert_eq!("List", t.name());
assert_eq!("List<Boolean>", t.name());
assert_eq!(LogicalTypeId::List, t.logical_type_id());
assert_eq!(
Value::List(ListValue::new(None, ConcreteDataType::boolean_datatype())),

View File

@@ -32,8 +32,8 @@ impl NullType {
}
impl DataType for NullType {
fn name(&self) -> &str {
"Null"
fn name(&self) -> String {
"Null".to_string()
}
fn logical_type_id(&self) -> LogicalTypeId {

View File

@@ -251,8 +251,8 @@ macro_rules! define_non_timestamp_primitive {
define_logical_primitive_type!($Native, $TypeId, $DataType, $Largest);
impl DataType for $DataType {
fn name(&self) -> &str {
stringify!($TypeId)
fn name(&self) -> String {
stringify!($TypeId).to_string()
}
fn logical_type_id(&self) -> LogicalTypeId {
@@ -350,8 +350,8 @@ define_logical_primitive_type!(i64, Int64, Int64Type, Int64Type);
define_logical_primitive_type!(i32, Int32, Int32Type, Int64Type);
impl DataType for Int64Type {
fn name(&self) -> &str {
"Int64"
fn name(&self) -> String {
"Int64".to_string()
}
fn logical_type_id(&self) -> LogicalTypeId {
@@ -397,8 +397,8 @@ impl DataType for Int64Type {
}
impl DataType for Int32Type {
fn name(&self) -> &str {
"Int32"
fn name(&self) -> String {
"Int32".to_string()
}
fn logical_type_id(&self) -> LogicalTypeId {

View File

@@ -34,8 +34,8 @@ impl StringType {
}
impl DataType for StringType {
fn name(&self) -> &str {
"String"
fn name(&self) -> String {
"String".to_string()
}
fn logical_type_id(&self) -> LogicalTypeId {

View File

@@ -92,8 +92,8 @@ macro_rules! impl_data_type_for_time {
pub struct [<Time $unit Type>];
impl DataType for [<Time $unit Type>] {
fn name(&self) -> &str {
stringify!([<Time $unit>])
fn name(&self) -> String {
stringify!([<Time $unit>]).to_string()
}
fn logical_type_id(&self) -> LogicalTypeId {

View File

@@ -109,8 +109,8 @@ macro_rules! impl_data_type_for_timestamp {
pub struct [<Timestamp $unit Type>];
impl DataType for [<Timestamp $unit Type>] {
fn name(&self) -> &str {
stringify!([<Timestamp $unit>])
fn name(&self) -> String {
stringify!([<Timestamp $unit>]).to_string()
}
fn logical_type_id(&self) -> LogicalTypeId {

View File

@@ -229,17 +229,16 @@ macro_rules! impl_try_from_arrow_array_for_vector {
) -> crate::error::Result<$Vector> {
use snafu::OptionExt;
let data = array
let arrow_array = array
.as_ref()
.as_any()
.downcast_ref::<$Array>()
.with_context(|| crate::error::ConversionSnafu {
from: std::format!("{:?}", array.as_ref().data_type()),
})?
.to_data();
.clone();
let concrete_array = $Array::from(data);
Ok($Vector::from(concrete_array))
Ok($Vector::from(arrow_array))
}
}
};

View File

@@ -15,7 +15,7 @@
use std::any::Any;
use std::sync::Arc;
use arrow::array::{Array, ArrayBuilder, ArrayData, ArrayIter, ArrayRef};
use arrow::array::{Array, ArrayBuilder, ArrayIter, ArrayRef};
use snafu::ResultExt;
use crate::arrow_array::{BinaryArray, MutableBinaryArray};
@@ -36,10 +36,6 @@ impl BinaryVector {
pub(crate) fn as_arrow(&self) -> &dyn Array {
&self.array
}
fn to_array_data(&self) -> ArrayData {
self.array.to_data()
}
}
impl From<BinaryArray> for BinaryVector {
@@ -74,13 +70,11 @@ impl Vector for BinaryVector {
}
fn to_arrow_array(&self) -> ArrayRef {
let data = self.to_array_data();
Arc::new(BinaryArray::from(data))
Arc::new(self.array.clone())
}
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
let data = self.to_array_data();
Box::new(BinaryArray::from(data))
Box::new(self.array.clone())
}
fn validity(&self) -> Validity {

View File

@@ -16,9 +16,7 @@ use std::any::Any;
use std::borrow::Borrow;
use std::sync::Arc;
use arrow::array::{
Array, ArrayBuilder, ArrayData, ArrayIter, ArrayRef, BooleanArray, BooleanBuilder,
};
use arrow::array::{Array, ArrayBuilder, ArrayIter, ArrayRef, BooleanArray, BooleanBuilder};
use snafu::ResultExt;
use crate::data_type::ConcreteDataType;
@@ -44,16 +42,6 @@ impl BooleanVector {
&self.array
}
fn to_array_data(&self) -> ArrayData {
self.array.to_data()
}
fn from_array_data(data: ArrayData) -> BooleanVector {
BooleanVector {
array: BooleanArray::from(data),
}
}
pub(crate) fn false_count(&self) -> usize {
self.array.false_count()
}
@@ -107,13 +95,11 @@ impl Vector for BooleanVector {
}
fn to_arrow_array(&self) -> ArrayRef {
let data = self.to_array_data();
Arc::new(BooleanArray::from(data))
Arc::new(self.array.clone())
}
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
let data = self.to_array_data();
Box::new(BooleanArray::from(data))
Box::new(self.array.clone())
}
fn validity(&self) -> Validity {
@@ -133,8 +119,7 @@ impl Vector for BooleanVector {
}
fn slice(&self, offset: usize, length: usize) -> VectorRef {
let data = self.array.to_data().slice(offset, length);
Arc::new(Self::from_array_data(data))
Arc::new(Self::from(self.array.slice(offset, length)))
}
fn get(&self, index: usize) -> Value {

View File

@@ -196,8 +196,7 @@ impl Vector for Decimal128Vector {
}
fn slice(&self, offset: usize, length: usize) -> VectorRef {
let array = self.array.slice(offset, length);
Arc::new(Self { array })
Arc::new(self.get_slice(offset, length))
}
fn get(&self, index: usize) -> Value {
@@ -535,23 +534,23 @@ pub mod tests {
// because 100 is out of Decimal(3, 1) range, so it will be null
assert!(array.is_null(4));
}
}
#[test]
fn test_decimal28_vector_iter_data() {
let vector = Decimal128Vector::from_values(vec![1, 2, 3, 4])
.with_precision_and_scale(3, 1)
.unwrap();
let mut iter = vector.iter_data();
assert_eq!(iter.next(), Some(Some(Decimal128::new(1, 3, 1))));
assert_eq!(iter.next(), Some(Some(Decimal128::new(2, 3, 1))));
assert_eq!(iter.next(), Some(Some(Decimal128::new(3, 3, 1))));
assert_eq!(iter.next(), Some(Some(Decimal128::new(4, 3, 1))));
assert_eq!(iter.next(), None);
#[test]
fn test_decimal28_vector_iter_data() {
let vector = Decimal128Vector::from_values(vec![1, 2, 3, 4])
.with_precision_and_scale(3, 1)
.unwrap();
let mut iter = vector.iter_data();
assert_eq!(iter.next(), Some(Some(Decimal128::new(1, 3, 1))));
assert_eq!(iter.next(), Some(Some(Decimal128::new(2, 3, 1))));
assert_eq!(iter.next(), Some(Some(Decimal128::new(3, 3, 1))));
assert_eq!(iter.next(), Some(Some(Decimal128::new(4, 3, 1))));
assert_eq!(iter.next(), None);
let values = vector
.iter_data()
.filter_map(|v| v.map(|x| x.val() * 2))
.collect::<Vec<_>>();
assert_eq!(values, vec![2, 4, 6, 8]);
let values = vector
.iter_data()
.filter_map(|v| v.map(|x| x.val() * 2))
.collect::<Vec<_>>();
assert_eq!(values, vec![2, 4, 6, 8]);
}
}

View File

@@ -284,23 +284,21 @@ impl Helper {
ArrowDataType::Date64 => Arc::new(DateTimeVector::try_from_arrow_array(array)?),
ArrowDataType::List(_) => Arc::new(ListVector::try_from_arrow_array(array)?),
ArrowDataType::Timestamp(unit, _) => match unit {
TimeUnit::Second => Arc::new(
TimestampSecondVector::try_from_arrow_timestamp_array(array)?,
),
TimeUnit::Millisecond => Arc::new(
TimestampMillisecondVector::try_from_arrow_timestamp_array(array)?,
),
TimeUnit::Microsecond => Arc::new(
TimestampMicrosecondVector::try_from_arrow_timestamp_array(array)?,
),
TimeUnit::Nanosecond => Arc::new(
TimestampNanosecondVector::try_from_arrow_timestamp_array(array)?,
),
TimeUnit::Second => Arc::new(TimestampSecondVector::try_from_arrow_array(array)?),
TimeUnit::Millisecond => {
Arc::new(TimestampMillisecondVector::try_from_arrow_array(array)?)
}
TimeUnit::Microsecond => {
Arc::new(TimestampMicrosecondVector::try_from_arrow_array(array)?)
}
TimeUnit::Nanosecond => {
Arc::new(TimestampNanosecondVector::try_from_arrow_array(array)?)
}
},
ArrowDataType::Time32(unit) => match unit {
TimeUnit::Second => Arc::new(TimeSecondVector::try_from_arrow_time_array(array)?),
TimeUnit::Second => Arc::new(TimeSecondVector::try_from_arrow_array(array)?),
TimeUnit::Millisecond => {
Arc::new(TimeMillisecondVector::try_from_arrow_time_array(array)?)
Arc::new(TimeMillisecondVector::try_from_arrow_array(array)?)
}
// Arrow use time32 for second/millisecond.
_ => unreachable!(
@@ -310,10 +308,10 @@ impl Helper {
},
ArrowDataType::Time64(unit) => match unit {
TimeUnit::Microsecond => {
Arc::new(TimeMicrosecondVector::try_from_arrow_time_array(array)?)
Arc::new(TimeMicrosecondVector::try_from_arrow_array(array)?)
}
TimeUnit::Nanosecond => {
Arc::new(TimeNanosecondVector::try_from_arrow_time_array(array)?)
Arc::new(TimeNanosecondVector::try_from_arrow_array(array)?)
}
// Arrow use time64 for microsecond/nanosecond.
_ => unreachable!(
@@ -322,29 +320,27 @@ impl Helper {
),
},
ArrowDataType::Interval(unit) => match unit {
IntervalUnit::YearMonth => Arc::new(
IntervalYearMonthVector::try_from_arrow_interval_array(array)?,
),
IntervalUnit::DayTime => {
Arc::new(IntervalDayTimeVector::try_from_arrow_interval_array(array)?)
IntervalUnit::YearMonth => {
Arc::new(IntervalYearMonthVector::try_from_arrow_array(array)?)
}
IntervalUnit::DayTime => {
Arc::new(IntervalDayTimeVector::try_from_arrow_array(array)?)
}
IntervalUnit::MonthDayNano => {
Arc::new(IntervalMonthDayNanoVector::try_from_arrow_array(array)?)
}
IntervalUnit::MonthDayNano => Arc::new(
IntervalMonthDayNanoVector::try_from_arrow_interval_array(array)?,
),
},
ArrowDataType::Duration(unit) => match unit {
TimeUnit::Second => {
Arc::new(DurationSecondVector::try_from_arrow_duration_array(array)?)
TimeUnit::Second => Arc::new(DurationSecondVector::try_from_arrow_array(array)?),
TimeUnit::Millisecond => {
Arc::new(DurationMillisecondVector::try_from_arrow_array(array)?)
}
TimeUnit::Microsecond => {
Arc::new(DurationMicrosecondVector::try_from_arrow_array(array)?)
}
TimeUnit::Nanosecond => {
Arc::new(DurationNanosecondVector::try_from_arrow_array(array)?)
}
TimeUnit::Millisecond => Arc::new(
DurationMillisecondVector::try_from_arrow_duration_array(array)?,
),
TimeUnit::Microsecond => Arc::new(
DurationMicrosecondVector::try_from_arrow_duration_array(array)?,
),
TimeUnit::Nanosecond => Arc::new(
DurationNanosecondVector::try_from_arrow_duration_array(array)?,
),
},
ArrowDataType::Decimal128(_, _) => {
Arc::new(Decimal128Vector::try_from_arrow_array(array)?)

View File

@@ -46,17 +46,6 @@ impl ListVector {
.map(|value_opt| value_opt.map(Helper::try_into_vector).transpose())
}
fn to_array_data(&self) -> ArrayData {
self.array.to_data()
}
fn from_array_data_and_type(data: ArrayData, item_type: ConcreteDataType) -> Self {
Self {
array: ListArray::from(data),
item_type,
}
}
pub(crate) fn as_arrow(&self) -> &dyn Array {
&self.array
}
@@ -80,13 +69,11 @@ impl Vector for ListVector {
}
fn to_arrow_array(&self) -> ArrayRef {
let data = self.to_array_data();
Arc::new(ListArray::from(data))
Arc::new(self.array.clone())
}
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
let data = self.to_array_data();
Box::new(ListArray::from(data))
Box::new(self.array.clone())
}
fn validity(&self) -> Validity {
@@ -106,8 +93,10 @@ impl Vector for ListVector {
}
fn slice(&self, offset: usize, length: usize) -> VectorRef {
let data = self.array.to_data().slice(offset, length);
Arc::new(Self::from_array_data_and_type(data, self.item_type.clone()))
Arc::new(Self {
array: self.array.slice(offset, length),
item_type: self.item_type.clone(),
})
}
fn get(&self, index: usize) -> Value {

View File

@@ -16,23 +16,12 @@ use std::any::Any;
use std::fmt;
use std::sync::Arc;
use arrow::array::{
Array, ArrayBuilder, ArrayData, ArrayIter, ArrayRef, PrimitiveArray, PrimitiveBuilder,
Time32MillisecondArray as TimeMillisecondArray, Time32SecondArray as TimeSecondArray,
Time64MicrosecondArray as TimeMicrosecondArray, Time64NanosecondArray as TimeNanosecondArray,
TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
TimestampSecondArray,
};
use arrow_array::{
DurationMicrosecondArray, DurationMillisecondArray, DurationNanosecondArray,
DurationSecondArray, IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray,
};
use arrow_schema::DataType;
use arrow::array::{Array, ArrayBuilder, ArrayIter, ArrayRef, PrimitiveArray, PrimitiveBuilder};
use serde_json::Value as JsonValue;
use snafu::OptionExt;
use crate::data_type::ConcreteDataType;
use crate::error::{self, CastTypeSnafu, Result};
use crate::error::{self, Result};
use crate::scalars::{Scalar, ScalarRef, ScalarVector, ScalarVectorBuilder};
use crate::serialize::Serializable;
use crate::types::{
@@ -66,178 +55,15 @@ impl<T: LogicalPrimitiveType> PrimitiveVector<T> {
}
pub fn try_from_arrow_array(array: impl AsRef<dyn Array>) -> Result<Self> {
let data = array
let arrow_array = array
.as_ref()
.as_any()
.downcast_ref::<PrimitiveArray<T::ArrowPrimitive>>()
.with_context(|| error::ConversionSnafu {
from: format!("{:?}", array.as_ref().data_type()),
})?
.to_data();
let concrete_array = PrimitiveArray::<T::ArrowPrimitive>::from(data);
Ok(Self::new(concrete_array))
}
})?;
/// Converts arrow timestamp array to vectors, ignoring time zone info.
pub fn try_from_arrow_timestamp_array(array: impl AsRef<dyn Array>) -> Result<Self> {
let array = array.as_ref();
let array_data = match array.data_type() {
DataType::Timestamp(unit, _) => match unit {
arrow_schema::TimeUnit::Second => array
.as_any()
.downcast_ref::<TimestampSecondArray>()
.unwrap()
.clone()
.with_timezone_opt(None::<String>)
.to_data(),
arrow_schema::TimeUnit::Millisecond => array
.as_any()
.downcast_ref::<TimestampMillisecondArray>()
.unwrap()
.clone()
.with_timezone_opt(None::<String>)
.to_data(),
arrow_schema::TimeUnit::Microsecond => array
.as_any()
.downcast_ref::<TimestampMicrosecondArray>()
.unwrap()
.clone()
.with_timezone_opt(None::<String>)
.to_data(),
arrow_schema::TimeUnit::Nanosecond => array
.as_any()
.downcast_ref::<TimestampNanosecondArray>()
.unwrap()
.clone()
.with_timezone_opt(None::<String>)
.to_data(),
},
arrow_type => {
return CastTypeSnafu {
msg: format!(
"Failed to cast arrow array {:?} to timestamp vector",
arrow_type,
),
}
.fail()?;
}
};
let concrete_array = PrimitiveArray::<T::ArrowPrimitive>::from(array_data);
Ok(Self::new(concrete_array))
}
/// Converts arrow time array to vectors
pub fn try_from_arrow_time_array(array: impl AsRef<dyn Array>) -> Result<Self> {
let array = array.as_ref();
let array_data = match array.data_type() {
DataType::Time32(unit) => match unit {
arrow_schema::TimeUnit::Second => array
.as_any()
.downcast_ref::<TimeSecondArray>()
.unwrap()
.to_data(),
arrow_schema::TimeUnit::Millisecond => array
.as_any()
.downcast_ref::<TimeMillisecondArray>()
.unwrap()
.to_data(),
_ => unreachable!(),
},
DataType::Time64(unit) => match unit {
arrow_schema::TimeUnit::Microsecond => array
.as_any()
.downcast_ref::<TimeMicrosecondArray>()
.unwrap()
.to_data(),
arrow_schema::TimeUnit::Nanosecond => array
.as_any()
.downcast_ref::<TimeNanosecondArray>()
.unwrap()
.to_data(),
_ => unreachable!(),
},
arrow_type => {
return CastTypeSnafu {
msg: format!("Failed to cast arrow array {:?} to time vector", arrow_type,),
}
.fail()?;
}
};
let concrete_array = PrimitiveArray::<T::ArrowPrimitive>::from(array_data);
Ok(Self::new(concrete_array))
}
pub fn try_from_arrow_interval_array(array: impl AsRef<dyn Array>) -> Result<Self> {
let array = array.as_ref();
let array_data = match array.data_type() {
DataType::Interval(unit) => match unit {
arrow_schema::IntervalUnit::YearMonth => array
.as_any()
.downcast_ref::<IntervalYearMonthArray>()
.unwrap()
.to_data(),
arrow_schema::IntervalUnit::DayTime => array
.as_any()
.downcast_ref::<IntervalDayTimeArray>()
.unwrap()
.to_data(),
arrow_schema::IntervalUnit::MonthDayNano => array
.as_any()
.downcast_ref::<IntervalMonthDayNanoArray>()
.unwrap()
.to_data(),
},
arrow_type => {
return CastTypeSnafu {
msg: format!(
"Failed to cast arrow array {:?} to interval vector",
arrow_type,
),
}
.fail()?;
}
};
let concrete_array = PrimitiveArray::<T::ArrowPrimitive>::from(array_data);
Ok(Self::new(concrete_array))
}
pub fn try_from_arrow_duration_array(array: impl AsRef<dyn Array>) -> Result<Self> {
let array = array.as_ref();
let array_data = match array.data_type() {
DataType::Duration(unit) => match unit {
arrow_schema::TimeUnit::Second => array
.as_any()
.downcast_ref::<DurationSecondArray>()
.unwrap()
.to_data(),
arrow_schema::TimeUnit::Millisecond => array
.as_any()
.downcast_ref::<DurationMillisecondArray>()
.unwrap()
.to_data(),
arrow_schema::TimeUnit::Microsecond => array
.as_any()
.downcast_ref::<DurationMicrosecondArray>()
.unwrap()
.to_data(),
arrow_schema::TimeUnit::Nanosecond => array
.as_any()
.downcast_ref::<DurationNanosecondArray>()
.unwrap()
.to_data(),
},
arrow_type => {
return CastTypeSnafu {
msg: format!(
"Failed to cast arrow array {:?} to interval vector",
arrow_type,
),
}
.fail()?;
}
};
let concrete_array = PrimitiveArray::<T::ArrowPrimitive>::from(array_data);
Ok(Self::new(concrete_array))
Ok(Self::new(arrow_array.clone()))
}
pub fn from_slice<P: AsRef<[T::Native]>>(slice: P) -> Self {
@@ -277,24 +103,15 @@ impl<T: LogicalPrimitiveType> PrimitiveVector<T> {
&self.array
}
fn to_array_data(&self) -> ArrayData {
self.array.to_data()
}
fn from_array_data(data: ArrayData) -> Self {
Self {
array: PrimitiveArray::from(data),
}
}
// To distinguish with `Vector::slice()`.
/// Slice the vector, returning a new vector.
///
/// # Panics
/// This function panics if `offset + length > self.len()`.
pub fn get_slice(&self, offset: usize, length: usize) -> Self {
let data = self.array.to_data().slice(offset, length);
Self::from_array_data(data)
Self {
array: self.array.slice(offset, length),
}
}
}
@@ -316,13 +133,11 @@ impl<T: LogicalPrimitiveType> Vector for PrimitiveVector<T> {
}
fn to_arrow_array(&self) -> ArrayRef {
let data = self.to_array_data();
Arc::new(PrimitiveArray::<T::ArrowPrimitive>::from(data))
Arc::new(self.array.clone())
}
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
let data = self.to_array_data();
Box::new(PrimitiveArray::<T::ArrowPrimitive>::from(data))
Box::new(self.array.clone())
}
fn validity(&self) -> Validity {
@@ -580,7 +395,12 @@ mod tests {
Time64NanosecondArray,
};
use arrow::datatypes::DataType as ArrowDataType;
use arrow_array::{DurationSecondArray, IntervalDayTimeArray, IntervalYearMonthArray};
use arrow_array::{
DurationMicrosecondArray, DurationMillisecondArray, DurationNanosecondArray,
DurationSecondArray, IntervalDayTimeArray, IntervalYearMonthArray,
TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
TimestampSecondArray,
};
use serde_json;
use super::*;
@@ -703,6 +523,14 @@ mod tests {
assert_eq!(128, v.memory_size());
}
#[test]
fn test_get_slice() {
let v = Int32Vector::from_slice(vec![1, 2, 3, 4, 5]);
let slice = v.get_slice(1, 3);
assert_eq!(v, Int32Vector::from_slice(vec![1, 2, 3, 4, 5]));
assert_eq!(slice, Int32Vector::from_slice(vec![2, 3, 4]));
}
#[test]
fn test_primitive_vector_builder() {
let mut builder = Int64Type::default().create_mutable_vector(3);
@@ -748,48 +576,48 @@ mod tests {
#[test]
fn test_try_from_arrow_time_array() {
let array: ArrayRef = Arc::new(Time32SecondArray::from(vec![1i32, 2, 3]));
let vector = TimeSecondVector::try_from_arrow_time_array(array).unwrap();
let vector = TimeSecondVector::try_from_arrow_array(array).unwrap();
assert_eq!(TimeSecondVector::from_values(vec![1, 2, 3]), vector);
let array: ArrayRef = Arc::new(Time32MillisecondArray::from(vec![1i32, 2, 3]));
let vector = TimeMillisecondVector::try_from_arrow_time_array(array).unwrap();
let vector = TimeMillisecondVector::try_from_arrow_array(array).unwrap();
assert_eq!(TimeMillisecondVector::from_values(vec![1, 2, 3]), vector);
let array: ArrayRef = Arc::new(Time64MicrosecondArray::from(vec![1i64, 2, 3]));
let vector = TimeMicrosecondVector::try_from_arrow_time_array(array).unwrap();
let vector = TimeMicrosecondVector::try_from_arrow_array(array).unwrap();
assert_eq!(TimeMicrosecondVector::from_values(vec![1, 2, 3]), vector);
let array: ArrayRef = Arc::new(Time64NanosecondArray::from(vec![1i64, 2, 3]));
let vector = TimeNanosecondVector::try_from_arrow_time_array(array).unwrap();
let vector = TimeNanosecondVector::try_from_arrow_array(array).unwrap();
assert_eq!(TimeNanosecondVector::from_values(vec![1, 2, 3]), vector);
// Test convert error
let array: ArrayRef = Arc::new(Int32Array::from(vec![1i32, 2, 3]));
assert!(TimeSecondVector::try_from_arrow_time_array(array).is_err());
assert!(TimeSecondVector::try_from_arrow_array(array).is_err());
}
#[test]
fn test_try_from_arrow_timestamp_array() {
let array: ArrayRef = Arc::new(TimestampSecondArray::from(vec![1i64, 2, 3]));
let vector = TimestampSecondVector::try_from_arrow_timestamp_array(array).unwrap();
let vector = TimestampSecondVector::try_from_arrow_array(array).unwrap();
assert_eq!(TimestampSecondVector::from_values(vec![1, 2, 3]), vector);
let array: ArrayRef = Arc::new(TimestampMillisecondArray::from(vec![1i64, 2, 3]));
let vector = TimestampMillisecondVector::try_from_arrow_timestamp_array(array).unwrap();
let vector = TimestampMillisecondVector::try_from_arrow_array(array).unwrap();
assert_eq!(
TimestampMillisecondVector::from_values(vec![1, 2, 3]),
vector
);
let array: ArrayRef = Arc::new(TimestampMicrosecondArray::from(vec![1i64, 2, 3]));
let vector = TimestampMicrosecondVector::try_from_arrow_timestamp_array(array).unwrap();
let vector = TimestampMicrosecondVector::try_from_arrow_array(array).unwrap();
assert_eq!(
TimestampMicrosecondVector::from_values(vec![1, 2, 3]),
vector
);
let array: ArrayRef = Arc::new(TimestampNanosecondArray::from(vec![1i64, 2, 3]));
let vector = TimestampNanosecondVector::try_from_arrow_timestamp_array(array).unwrap();
let vector = TimestampNanosecondVector::try_from_arrow_array(array).unwrap();
assert_eq!(
TimestampNanosecondVector::from_values(vec![1, 2, 3]),
vector
@@ -797,27 +625,27 @@ mod tests {
// Test convert error
let array: ArrayRef = Arc::new(Int32Array::from(vec![1i32, 2, 3]));
assert!(TimestampSecondVector::try_from_arrow_timestamp_array(array).is_err());
assert!(TimestampSecondVector::try_from_arrow_array(array).is_err());
}
#[test]
fn test_try_from_arrow_interval_array() {
let array: ArrayRef = Arc::new(IntervalYearMonthArray::from(vec![1000, 2000, 3000]));
let vector = IntervalYearMonthVector::try_from_arrow_interval_array(array).unwrap();
let vector = IntervalYearMonthVector::try_from_arrow_array(array).unwrap();
assert_eq!(
IntervalYearMonthVector::from_values(vec![1000, 2000, 3000]),
vector
);
let array: ArrayRef = Arc::new(IntervalDayTimeArray::from(vec![1000, 2000, 3000]));
let vector = IntervalDayTimeVector::try_from_arrow_interval_array(array).unwrap();
let vector = IntervalDayTimeVector::try_from_arrow_array(array).unwrap();
assert_eq!(
IntervalDayTimeVector::from_values(vec![1000, 2000, 3000]),
vector
);
let array: ArrayRef = Arc::new(IntervalYearMonthArray::from(vec![1000, 2000, 3000]));
let vector = IntervalYearMonthVector::try_from_arrow_interval_array(array).unwrap();
let vector = IntervalYearMonthVector::try_from_arrow_array(array).unwrap();
assert_eq!(
IntervalYearMonthVector::from_values(vec![1000, 2000, 3000]),
vector
@@ -827,28 +655,28 @@ mod tests {
#[test]
fn test_try_from_arrow_duration_array() {
let array: ArrayRef = Arc::new(DurationSecondArray::from(vec![1000, 2000, 3000]));
let vector = DurationSecondVector::try_from_arrow_duration_array(array).unwrap();
let vector = DurationSecondVector::try_from_arrow_array(array).unwrap();
assert_eq!(
DurationSecondVector::from_values(vec![1000, 2000, 3000]),
vector
);
let array: ArrayRef = Arc::new(DurationMillisecondArray::from(vec![1000, 2000, 3000]));
let vector = DurationMillisecondVector::try_from_arrow_duration_array(array).unwrap();
let vector = DurationMillisecondVector::try_from_arrow_array(array).unwrap();
assert_eq!(
DurationMillisecondVector::from_values(vec![1000, 2000, 3000]),
vector
);
let array: ArrayRef = Arc::new(DurationMicrosecondArray::from(vec![1000, 2000, 3000]));
let vector = DurationMicrosecondVector::try_from_arrow_duration_array(array).unwrap();
let vector = DurationMicrosecondVector::try_from_arrow_array(array).unwrap();
assert_eq!(
DurationMicrosecondVector::from_values(vec![1000, 2000, 3000]),
vector
);
let array: ArrayRef = Arc::new(DurationNanosecondArray::from(vec![1000, 2000, 3000]));
let vector = DurationNanosecondVector::try_from_arrow_duration_array(array).unwrap();
let vector = DurationNanosecondVector::try_from_arrow_array(array).unwrap();
assert_eq!(
DurationNanosecondVector::from_values(vec![1000, 2000, 3000]),
vector

View File

@@ -15,7 +15,7 @@
use std::any::Any;
use std::sync::Arc;
use arrow::array::{Array, ArrayBuilder, ArrayData, ArrayIter, ArrayRef};
use arrow::array::{Array, ArrayBuilder, ArrayIter, ArrayRef};
use snafu::ResultExt;
use crate::arrow_array::{MutableStringArray, StringArray};
@@ -36,16 +36,6 @@ impl StringVector {
pub(crate) fn as_arrow(&self) -> &dyn Array {
&self.array
}
fn to_array_data(&self) -> ArrayData {
self.array.to_data()
}
fn from_array_data(data: ArrayData) -> Self {
Self {
array: StringArray::from(data),
}
}
}
impl From<StringArray> for StringVector {
@@ -120,13 +110,11 @@ impl Vector for StringVector {
}
fn to_arrow_array(&self) -> ArrayRef {
let data = self.to_array_data();
Arc::new(StringArray::from(data))
Arc::new(self.array.clone())
}
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
let data = self.to_array_data();
Box::new(StringArray::from(data))
Box::new(self.array.clone())
}
fn validity(&self) -> Validity {
@@ -146,8 +134,7 @@ impl Vector for StringVector {
}
fn slice(&self, offset: usize, length: usize) -> VectorRef {
let data = self.array.to_data().slice(offset, length);
Arc::new(Self::from_array_data(data))
Arc::new(Self::from(self.array.slice(offset, length)))
}
fn get(&self, index: usize) -> Value {
@@ -256,6 +243,7 @@ vectors::impl_try_from_arrow_array_for_vector!(StringArray, StringVector);
#[cfg(test)]
mod tests {
use arrow::datatypes::DataType;
use super::*;

View File

@@ -182,9 +182,6 @@ pub enum Error {
source: servers::error::Error,
},
#[snafu(display("Missing meta_client_options section in config"))]
MissingMetasrvOpts { location: Location },
#[snafu(display("Failed to find leaders when altering table, table: {}", table))]
LeaderNotFound { table: String, location: Location },
@@ -299,7 +296,6 @@ impl ErrorExt for Error {
| Error::IllegalPrimaryKeysDef { .. }
| Error::SchemaExists { .. }
| Error::ColumnNotFound { .. }
| Error::MissingMetasrvOpts { .. }
| Error::UnsupportedFormat { .. }
| Error::IllegalAuthConfig { .. }
| Error::EmptyData { .. }

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod builder;
mod grpc;
mod influxdb;
mod opentsdb;
@@ -21,24 +22,16 @@ mod region_query;
mod script;
pub mod standalone;
use std::collections::HashMap;
use std::sync::Arc;
use api::v1::meta::Role;
use async_trait::async_trait;
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
use catalog::kvbackend::{CachedMetaKvBackend, KvBackendCatalogManager};
use catalog::CatalogManagerRef;
use client::client_manager::DatanodeClients;
use common_base::Plugins;
use common_config::KvBackendConfig;
use common_error::ext::BoxedError;
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
use common_meta::cache_invalidator::DummyCacheInvalidator;
use common_meta::ddl_manager::DdlManager;
use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
use common_meta::heartbeat::handler::HandlerGroupExecutor;
use common_meta::key::TableMetadataManager;
use common_meta::kv_backend::KvBackendRef;
use common_meta::state_store::KvStateStore;
use common_procedure::local::{LocalManager, ManagerConfig};
@@ -47,19 +40,18 @@ use common_procedure::ProcedureManagerRef;
use common_query::Output;
use common_telemetry::error;
use common_telemetry::logging::info;
use datanode::region_server::RegionServer;
use log_store::raft_engine::RaftEngineBackend;
use meta_client::client::{MetaClient, MetaClientBuilder};
use operator::delete::{Deleter, DeleterRef};
use operator::insert::{Inserter, InserterRef};
use meta_client::MetaClientOptions;
use operator::delete::DeleterRef;
use operator::insert::InserterRef;
use operator::statement::StatementExecutor;
use operator::table::{table_idents_to_full_name, TableMutationOperator};
use partition::manager::PartitionRuleManager;
use operator::table::table_idents_to_full_name;
use query::parser::{PromQuery, QueryLanguageParser, QueryStatement};
use query::plan::LogicalPlan;
use query::query_engine::options::{validate_catalog_and_schema, QueryOptions};
use query::query_engine::DescribeResult;
use query::{QueryEngineFactory, QueryEngineRef};
use query::QueryEngineRef;
use raft_engine::{Config, ReadableSize, RecoveryMode};
use servers::error as server_error;
use servers::error::{AuthSnafu, ExecuteQuerySnafu, ParsePromQLSnafu};
@@ -83,15 +75,11 @@ use sql::statements::statement::Statement;
use sqlparser::ast::ObjectName;
pub use standalone::StandaloneDatanodeManager;
use self::region_query::FrontendRegionQueryHandler;
use self::standalone::StandaloneTableMetadataCreator;
use crate::error::{
self, Error, ExecLogicalPlanSnafu, ExecutePromqlSnafu, ExternalSnafu, MissingMetasrvOptsSnafu,
ParseSqlSnafu, PermissionSnafu, PlanStatementSnafu, Result, SqlExecInterceptedSnafu,
TableOperationSnafu,
self, Error, ExecLogicalPlanSnafu, ExecutePromqlSnafu, ExternalSnafu, ParseSqlSnafu,
PermissionSnafu, PlanStatementSnafu, Result, SqlExecInterceptedSnafu, TableOperationSnafu,
};
use crate::frontend::{FrontendOptions, TomlSerializable};
use crate::heartbeat::handler::invalidate_table_cache::InvalidateTableCacheHandler;
use crate::heartbeat::HeartbeatTask;
use crate::metrics;
use crate::script::ScriptExecutor;
@@ -131,99 +119,9 @@ pub struct Instance {
}
impl Instance {
pub async fn try_new_distributed(opts: &FrontendOptions, plugins: Plugins) -> Result<Self> {
let meta_client = Self::create_meta_client(opts).await?;
let datanode_clients = Arc::new(DatanodeClients::default());
Self::try_new_distributed_with(meta_client, datanode_clients, plugins, opts).await
}
pub async fn try_new_distributed_with(
meta_client: Arc<MetaClient>,
datanode_clients: Arc<DatanodeClients>,
plugins: Plugins,
opts: &FrontendOptions,
) -> Result<Self> {
let meta_backend = Arc::new(CachedMetaKvBackend::new(meta_client.clone()));
let catalog_manager = KvBackendCatalogManager::new(
meta_backend.clone(),
meta_backend.clone(),
datanode_clients.clone(),
);
let partition_manager = Arc::new(PartitionRuleManager::new(meta_backend.clone()));
let region_query_handler = FrontendRegionQueryHandler::arc(
partition_manager.clone(),
catalog_manager.datanode_manager().clone(),
);
let inserter = Arc::new(Inserter::new(
catalog_manager.clone(),
partition_manager.clone(),
datanode_clients.clone(),
));
let deleter = Arc::new(Deleter::new(
catalog_manager.clone(),
partition_manager,
datanode_clients,
));
let table_mutation_handler = Arc::new(TableMutationOperator::new(
inserter.clone(),
deleter.clone(),
));
let query_engine = QueryEngineFactory::new_with_plugins(
catalog_manager.clone(),
Some(region_query_handler.clone()),
Some(table_mutation_handler),
true,
plugins.clone(),
)
.query_engine();
let statement_executor = Arc::new(StatementExecutor::new(
catalog_manager.clone(),
query_engine.clone(),
meta_client.clone(),
meta_backend.clone(),
catalog_manager.clone(),
inserter.clone(),
));
plugins.insert::<StatementExecutorRef>(statement_executor.clone());
let script_executor =
Arc::new(ScriptExecutor::new(catalog_manager.clone(), query_engine.clone()).await?);
let handlers_executor = HandlerGroupExecutor::new(vec![
Arc::new(ParseMailboxMessageHandler),
Arc::new(InvalidateTableCacheHandler::new(meta_backend)),
]);
let heartbeat_task = Some(HeartbeatTask::new(
meta_client.clone(),
opts.heartbeat.clone(),
Arc::new(handlers_executor),
));
Ok(Instance {
catalog_manager,
script_executor,
statement_executor,
query_engine,
plugins: plugins.clone(),
servers: Arc::new(HashMap::new()),
heartbeat_task,
inserter,
deleter,
})
}
async fn create_meta_client(opts: &FrontendOptions) -> Result<Arc<MetaClient>> {
let meta_client_options = opts.meta_client.as_ref().context(MissingMetasrvOptsSnafu)?;
pub async fn create_meta_client(
meta_client_options: &MetaClientOptions,
) -> Result<Arc<MetaClient>> {
info!(
"Creating Frontend instance in distributed mode with Meta server addr {:?}",
meta_client_options.metasrv_addrs
@@ -285,82 +183,6 @@ impl Instance {
Ok((kv_backend, procedure_manager))
}
pub async fn try_new_standalone(
kv_backend: KvBackendRef,
procedure_manager: ProcedureManagerRef,
catalog_manager: CatalogManagerRef,
plugins: Plugins,
region_server: RegionServer,
) -> Result<Self> {
let partition_manager = Arc::new(PartitionRuleManager::new(kv_backend.clone()));
let datanode_manager = Arc::new(StandaloneDatanodeManager(region_server));
let region_query_handler =
FrontendRegionQueryHandler::arc(partition_manager.clone(), datanode_manager.clone());
let inserter = Arc::new(Inserter::new(
catalog_manager.clone(),
partition_manager.clone(),
datanode_manager.clone(),
));
let deleter = Arc::new(Deleter::new(
catalog_manager.clone(),
partition_manager,
datanode_manager.clone(),
));
let table_mutation_handler = Arc::new(TableMutationOperator::new(
inserter.clone(),
deleter.clone(),
));
let query_engine = QueryEngineFactory::new_with_plugins(
catalog_manager.clone(),
Some(region_query_handler),
Some(table_mutation_handler),
true,
plugins.clone(),
)
.query_engine();
let script_executor =
Arc::new(ScriptExecutor::new(catalog_manager.clone(), query_engine.clone()).await?);
let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend.clone()));
let cache_invalidator = Arc::new(DummyCacheInvalidator);
let ddl_executor = Arc::new(
DdlManager::try_new(
procedure_manager,
datanode_manager,
cache_invalidator.clone(),
table_metadata_manager.clone(),
Arc::new(StandaloneTableMetadataCreator::new(kv_backend.clone())),
)
.context(error::InitDdlManagerSnafu)?,
);
let statement_executor = Arc::new(StatementExecutor::new(
catalog_manager.clone(),
query_engine.clone(),
ddl_executor,
kv_backend.clone(),
cache_invalidator,
inserter.clone(),
));
Ok(Instance {
catalog_manager: catalog_manager.clone(),
script_executor,
statement_executor,
query_engine,
plugins,
servers: Arc::new(HashMap::new()),
heartbeat_task: None,
inserter,
deleter,
})
}
pub async fn build_servers(
&mut self,
opts: impl Into<FrontendOptions> + TomlSerializable,
@@ -400,10 +222,13 @@ impl FrontendInstance for Instance {
self.script_executor.start(self)?;
futures::future::try_join_all(self.servers.values().map(start_server))
.await
.context(error::StartServerSnafu)
.map(|_| ())
futures::future::try_join_all(self.servers.iter().map(|(name, handler)| async move {
info!("Starting service: {name}");
start_server(handler).await
}))
.await
.context(error::StartServerSnafu)
.map(|_| ())
}
}

View File

@@ -0,0 +1,149 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::sync::Arc;
use catalog::kvbackend::KvBackendCatalogManager;
use common_base::Plugins;
use common_meta::cache_invalidator::{CacheInvalidatorRef, DummyCacheInvalidator};
use common_meta::datanode_manager::DatanodeManagerRef;
use common_meta::ddl::DdlTaskExecutorRef;
use common_meta::kv_backend::KvBackendRef;
use operator::delete::Deleter;
use operator::insert::Inserter;
use operator::statement::StatementExecutor;
use operator::table::TableMutationOperator;
use partition::manager::PartitionRuleManager;
use query::QueryEngineFactory;
use crate::error::Result;
use crate::heartbeat::HeartbeatTask;
use crate::instance::region_query::FrontendRegionQueryHandler;
use crate::instance::{Instance, StatementExecutorRef};
use crate::script::ScriptExecutor;
pub struct FrontendBuilder {
kv_backend: KvBackendRef,
cache_invalidator: Option<CacheInvalidatorRef>,
datanode_manager: DatanodeManagerRef,
plugins: Option<Plugins>,
ddl_task_executor: DdlTaskExecutorRef,
heartbeat_task: Option<HeartbeatTask>,
}
impl FrontendBuilder {
pub fn new(
kv_backend: KvBackendRef,
datanode_manager: DatanodeManagerRef,
ddl_task_executor: DdlTaskExecutorRef,
) -> Self {
Self {
kv_backend,
cache_invalidator: None,
datanode_manager,
plugins: None,
ddl_task_executor,
heartbeat_task: None,
}
}
pub fn with_cache_invalidator(self, cache_invalidator: CacheInvalidatorRef) -> Self {
Self {
cache_invalidator: Some(cache_invalidator),
..self
}
}
pub fn with_plugin(self, plugins: Plugins) -> Self {
Self {
plugins: Some(plugins),
..self
}
}
pub fn with_heartbeat_task(self, heartbeat_task: HeartbeatTask) -> Self {
Self {
heartbeat_task: Some(heartbeat_task),
..self
}
}
pub async fn try_build(self) -> Result<Instance> {
let kv_backend = self.kv_backend;
let datanode_manager = self.datanode_manager;
let plugins = self.plugins.unwrap_or_default();
let catalog_manager = KvBackendCatalogManager::new(
kv_backend.clone(),
self.cache_invalidator
.unwrap_or_else(|| Arc::new(DummyCacheInvalidator)),
);
let partition_manager = Arc::new(PartitionRuleManager::new(kv_backend.clone()));
let region_query_handler =
FrontendRegionQueryHandler::arc(partition_manager.clone(), datanode_manager.clone());
let inserter = Arc::new(Inserter::new(
catalog_manager.clone(),
partition_manager.clone(),
datanode_manager.clone(),
));
let deleter = Arc::new(Deleter::new(
catalog_manager.clone(),
partition_manager,
datanode_manager.clone(),
));
let table_mutation_handler = Arc::new(TableMutationOperator::new(
inserter.clone(),
deleter.clone(),
));
let query_engine = QueryEngineFactory::new_with_plugins(
catalog_manager.clone(),
Some(region_query_handler.clone()),
Some(table_mutation_handler),
true,
plugins.clone(),
)
.query_engine();
let script_executor =
Arc::new(ScriptExecutor::new(catalog_manager.clone(), query_engine.clone()).await?);
let statement_executor = Arc::new(StatementExecutor::new(
catalog_manager.clone(),
query_engine.clone(),
self.ddl_task_executor,
kv_backend,
catalog_manager.clone(),
inserter.clone(),
));
plugins.insert::<StatementExecutorRef>(statement_executor.clone());
Ok(Instance {
catalog_manager,
script_executor,
statement_executor,
query_engine,
plugins,
servers: Arc::new(HashMap::new()),
heartbeat_task: self.heartbeat_task,
inserter,
deleter,
})
}
}

View File

@@ -107,7 +107,7 @@ impl Datanode for RegionInvoker {
}
}
pub(crate) struct StandaloneTableMetadataCreator {
pub struct StandaloneTableMetadataCreator {
table_id_sequence: SequenceRef,
}

View File

@@ -22,7 +22,7 @@ use api::v1::meta::store_server::StoreServer;
use common_base::Plugins;
use common_meta::kv_backend::etcd::EtcdStore;
use common_meta::kv_backend::memory::MemoryKvBackend;
use common_meta::kv_backend::ResettableKvBackendRef;
use common_meta::kv_backend::{KvBackendRef, ResettableKvBackendRef};
use common_telemetry::info;
use etcd_client::Client;
use servers::configurator::ConfiguratorRef;
@@ -60,8 +60,11 @@ pub struct MetaSrvInstance {
}
impl MetaSrvInstance {
pub async fn new(opts: MetaSrvOptions, plugins: Plugins) -> Result<MetaSrvInstance> {
let meta_srv = build_meta_srv(&opts, plugins.clone()).await?;
pub async fn new(
opts: MetaSrvOptions,
plugins: Plugins,
meta_srv: MetaSrv,
) -> Result<MetaSrvInstance> {
let http_srv = Arc::new(
HttpServerBuilder::new(opts.http.clone())
.with_metrics_handler(MetricsHandler)
@@ -161,28 +164,26 @@ pub fn router(meta_srv: MetaSrv) -> Router {
.add_service(admin::make_admin_service(meta_srv))
}
pub async fn build_meta_srv(opts: &MetaSrvOptions, plugins: Plugins) -> Result<MetaSrv> {
let (kv_backend, election, lock) = if opts.use_memory_store {
(
pub async fn metasrv_builder(
opts: &MetaSrvOptions,
plugins: Plugins,
kv_backend: Option<KvBackendRef>,
) -> Result<MetaSrvBuilder> {
let (kv_backend, election, lock) = match (kv_backend, opts.use_memory_store) {
(Some(kv_backend), _) => (kv_backend, None, Some(Arc::new(MemLock::default()) as _)),
(None, true) => (
Arc::new(MemoryKvBackend::new()) as _,
None,
Some(Arc::new(MemLock::default()) as _),
)
} else {
let etcd_endpoints = opts
.store_addr
.split(',')
.map(|x| x.trim())
.filter(|x| !x.is_empty())
.collect::<Vec<_>>();
let etcd_client = Client::connect(&etcd_endpoints, None)
.await
.context(error::ConnectEtcdSnafu)?;
(
EtcdStore::with_etcd_client(etcd_client.clone()),
Some(EtcdElection::with_etcd_client(&opts.server_addr, etcd_client.clone()).await?),
Some(EtcdLock::with_etcd_client(etcd_client)?),
)
),
(None, false) => {
let etcd_client = create_etcd_client(opts).await?;
(
EtcdStore::with_etcd_client(etcd_client.clone()),
Some(EtcdElection::with_etcd_client(&opts.server_addr, etcd_client.clone()).await?),
Some(EtcdLock::with_etcd_client(etcd_client)?),
)
}
};
let in_memory = Arc::new(MemoryKvBackend::new()) as ResettableKvBackendRef;
@@ -192,14 +193,24 @@ pub async fn build_meta_srv(opts: &MetaSrvOptions, plugins: Plugins) -> Result<M
SelectorType::LeaseBased => Arc::new(LeaseBasedSelector) as SelectorRef,
};
MetaSrvBuilder::new()
Ok(MetaSrvBuilder::new()
.options(opts.clone())
.kv_backend(kv_backend)
.in_memory(in_memory)
.selector(selector)
.election(election)
.lock(lock)
.plugins(plugins)
.build()
.await
.plugins(plugins))
}
async fn create_etcd_client(opts: &MetaSrvOptions) -> Result<Client> {
let etcd_endpoints = opts
.store_addr
.split(',')
.map(|x| x.trim())
.filter(|x| !x.is_empty())
.collect::<Vec<_>>();
Client::connect(&etcd_endpoints, None)
.await
.context(error::ConnectEtcdSnafu)
}

View File

@@ -32,6 +32,9 @@ use crate::pubsub::Message;
#[snafu(visibility(pub))]
#[stack_trace_debug]
pub enum Error {
#[snafu(display("The region migration procedure aborted, reason: {}", reason))]
MigrationAbort { location: Location, reason: String },
#[snafu(display(
"Another procedure is opening the region: {} on peer: {}",
region_id,
@@ -298,6 +301,12 @@ pub enum Error {
location: Location,
},
#[snafu(display("Failed to find table route for {region_id}"))]
RegionRouteNotFound {
region_id: RegionId,
location: Location,
},
#[snafu(display("Table info not found: {}", table_id))]
TableInfoNotFound {
table_id: TableId,
@@ -658,7 +667,9 @@ impl ErrorExt for Error {
| Error::Unexpected { .. }
| Error::Txn { .. }
| Error::TableIdChanged { .. }
| Error::RegionOpeningRace { .. } => StatusCode::Unexpected,
| Error::RegionOpeningRace { .. }
| Error::RegionRouteNotFound { .. }
| Error::MigrationAbort { .. } => StatusCode::Unexpected,
Error::TableNotFound { .. } => StatusCode::TableNotFound,
Error::InvalidateTableCache { source, .. } => source.status_code(),
Error::RequestDatanode { source, .. } => source.status_code(),

View File

@@ -298,6 +298,19 @@ impl HeartbeatMailbox {
serde_json::from_str(payload).context(DeserializeFromJsonSnafu { input: payload })
}
/// Parses the [Instruction] from [MailboxMessage].
#[cfg(test)]
pub(crate) fn json_instruction(msg: &MailboxMessage) -> Result<Instruction> {
let Payload::Json(payload) =
msg.payload
.as_ref()
.with_context(|| UnexpectedInstructionReplySnafu {
mailbox_message: msg.to_string(),
reason: format!("empty payload, msg: {msg:?}"),
})?;
serde_json::from_str(payload).context(DeserializeFromJsonSnafu { input: payload })
}
pub fn create(pushers: Pushers, sequence: Sequence) -> MailboxRef {
let mailbox = Arc::new(Self::new(pushers, sequence));

View File

@@ -18,6 +18,7 @@ use std::sync::Arc;
use api::v1::meta::{HeartbeatRequest, RegionLease, Role};
use async_trait::async_trait;
use common_meta::key::TableMetadataManagerRef;
use common_telemetry::info;
use store_api::region_engine::{GrantedRegion, RegionRole};
use store_api::storage::RegionId;
@@ -123,6 +124,12 @@ impl HeartbeatHandler for RegionLeaseHandler {
&leaders,
RegionRole::Leader,
);
if !closable.is_empty() {
info!(
"Granting region lease, found closable leader regions: {:?} on datanode {}",
closable, datanode_id
);
}
inactive_regions.extend(closable);
let followers = followers.into_iter().flatten().collect::<Vec<_>>();
@@ -144,6 +151,12 @@ impl HeartbeatHandler for RegionLeaseHandler {
&followers,
RegionRole::Follower,
);
if !closable.is_empty() {
info!(
"Granting region lease, found closable follower regions {:?} on datanode {}",
closable, datanode_id
);
}
inactive_regions.extend(closable);
acc.inactive_region_ids = inactive_regions;

View File

@@ -15,6 +15,8 @@
#![feature(async_closure)]
#![feature(result_flattening)]
#![feature(assert_matches)]
#![feature(option_take_if)]
#![feature(extract_if)]
pub mod bootstrap;
mod cache_invalidator;

View File

@@ -19,12 +19,14 @@ use std::time::Duration;
use client::client_manager::DatanodeClients;
use common_base::Plugins;
use common_grpc::channel_manager::ChannelConfig;
use common_meta::datanode_manager::DatanodeManagerRef;
use common_meta::ddl::TableMetadataAllocatorRef;
use common_meta::ddl_manager::{DdlManager, DdlManagerRef};
use common_meta::distributed_time_constants;
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
use common_meta::kv_backend::memory::MemoryKvBackend;
use common_meta::kv_backend::{KvBackendRef, ResettableKvBackendRef};
use common_meta::sequence::{Sequence, SequenceRef};
use common_meta::sequence::Sequence;
use common_meta::state_store::KvStateStore;
use common_procedure::local::{LocalManager, ManagerConfig};
use common_procedure::ProcedureManagerRef;
@@ -70,8 +72,9 @@ pub struct MetaSrvBuilder {
election: Option<ElectionRef>,
meta_peer_client: Option<MetaPeerClientRef>,
lock: Option<DistLockRef>,
datanode_clients: Option<Arc<DatanodeClients>>,
datanode_manager: Option<DatanodeManagerRef>,
plugins: Option<Plugins>,
table_metadata_allocator: Option<TableMetadataAllocatorRef>,
}
impl MetaSrvBuilder {
@@ -85,8 +88,9 @@ impl MetaSrvBuilder {
election: None,
options: None,
lock: None,
datanode_clients: None,
datanode_manager: None,
plugins: None,
table_metadata_allocator: None,
}
}
@@ -130,8 +134,8 @@ impl MetaSrvBuilder {
self
}
pub fn datanode_clients(mut self, clients: Arc<DatanodeClients>) -> Self {
self.datanode_clients = Some(clients);
pub fn datanode_manager(mut self, datanode_manager: DatanodeManagerRef) -> Self {
self.datanode_manager = Some(datanode_manager);
self
}
@@ -140,6 +144,14 @@ impl MetaSrvBuilder {
self
}
pub fn table_metadata_allocator(
mut self,
table_metadata_allocator: TableMetadataAllocatorRef,
) -> Self {
self.table_metadata_allocator = Some(table_metadata_allocator);
self
}
pub async fn build(self) -> Result<MetaSrv> {
let started = Arc::new(AtomicBool::new(false));
@@ -152,8 +164,9 @@ impl MetaSrvBuilder {
selector,
handler_group,
lock,
datanode_clients,
datanode_manager,
plugins,
table_metadata_allocator,
} = self;
let options = options.unwrap_or_default();
@@ -189,14 +202,22 @@ impl MetaSrvBuilder {
meta_peer_client: meta_peer_client.clone(),
table_id: None,
};
let table_metadata_allocator = table_metadata_allocator.unwrap_or_else(|| {
Arc::new(MetaSrvTableMetadataAllocator::new(
selector_ctx.clone(),
selector.clone(),
table_id_sequence.clone(),
))
});
let ddl_manager = build_ddl_manager(
&options,
datanode_clients,
datanode_manager,
&procedure_manager,
&mailbox,
&table_metadata_manager,
(&selector, &selector_ctx),
&table_id_sequence,
table_metadata_allocator,
)?;
let opening_region_keeper = Arc::new(OpeningRegionKeeper::default());
@@ -324,12 +345,11 @@ fn build_procedure_manager(
fn build_ddl_manager(
options: &MetaSrvOptions,
datanode_clients: Option<Arc<DatanodeClients>>,
datanode_clients: Option<DatanodeManagerRef>,
procedure_manager: &ProcedureManagerRef,
mailbox: &MailboxRef,
table_metadata_manager: &TableMetadataManagerRef,
(selector, selector_ctx): (&SelectorRef, &SelectorContext),
table_id_sequence: &SequenceRef,
table_metadata_allocator: TableMetadataAllocatorRef,
) -> Result<DdlManagerRef> {
let datanode_clients = datanode_clients.unwrap_or_else(|| {
let datanode_client_channel_config = ChannelConfig::new()
@@ -349,19 +369,13 @@ fn build_ddl_manager(
},
));
let table_meta_allocator = Arc::new(MetaSrvTableMetadataAllocator::new(
selector_ctx.clone(),
selector.clone(),
table_id_sequence.clone(),
));
Ok(Arc::new(
DdlManager::try_new(
procedure_manager.clone(),
datanode_clients,
cache_invalidator,
table_metadata_manager.clone(),
table_meta_allocator,
table_metadata_allocator,
)
.context(error::InitDdlManagerSnafu)?,
))

View File

@@ -34,4 +34,19 @@ lazy_static! {
pub static ref METRIC_META_LEADER_CACHED_KV_LOAD: HistogramVec =
register_histogram_vec!("meta_leader_cache_kv_load", "meta load cache", &["prefix"])
.unwrap();
pub static ref METRIC_META_LOAD_FOLLOWER_METADATA: Histogram = register_histogram!(
"meta_load_follower_metadata",
"meta load follower regions metadata elapsed"
)
.unwrap();
pub static ref METRIC_META_LOAD_LEADER_METADATA: Histogram = register_histogram!(
"meta_load_leader_metadata",
"meta load leader regions metadata elapsed"
)
.unwrap();
pub static ref METRIC_META_KV_CACHE_BATCH_GET_HIT_RATE: Gauge = register_gauge!(
"meta_kv_cache_batch_get_hit_rate",
"meta kv cache batch get hit rate"
)
.unwrap();
}

View File

@@ -70,7 +70,7 @@ pub async fn mock(
};
let builder = match datanode_clients {
Some(clients) => builder.datanode_clients(clients),
Some(clients) => builder.datanode_manager(clients),
None => builder,
};

View File

@@ -105,7 +105,7 @@ impl UpdateRegionMetadata {
region_storage_path: self.region_storage_path.to_string(),
region_options: self.region_options.clone(),
},
table_route_value,
&table_route_value,
new_region_routes,
&self.region_options,
)

View File

@@ -13,16 +13,22 @@
// limitations under the License.
pub(crate) mod downgrade_leader_region;
pub(crate) mod migration_abort;
pub(crate) mod migration_end;
pub(crate) mod migration_start;
pub(crate) mod open_candidate_region;
#[cfg(test)]
pub(crate) mod test_util;
pub(crate) mod update_metadata;
pub(crate) mod upgrade_candidate_region;
use std::any::Any;
use std::fmt::Debug;
use std::time::Duration;
use api::v1::meta::MailboxMessage;
use common_meta::instruction::Instruction;
use common_meta::key::table_info::TableInfoValue;
use common_meta::key::table_route::TableRouteValue;
use common_meta::key::{DeserializedValueWithBytes, TableMetadataManagerRef};
use common_meta::peer::Peer;
@@ -34,12 +40,13 @@ use common_procedure::{Context as ProcedureContext, LockKey, Procedure, Status};
use serde::{Deserialize, Serialize};
use snafu::{location, Location, OptionExt, ResultExt};
use store_api::storage::RegionId;
use tokio::time::Instant;
use self::migration_start::RegionMigrationStart;
use crate::error::{self, Error, Result};
use crate::procedure::utils::region_lock_key;
use crate::region::lease_keeper::{OpeningRegionGuard, OpeningRegionKeeperRef};
use crate::service::mailbox::MailboxRef;
use crate::service::mailbox::{BroadcastChannel, MailboxRef};
/// It's shared in each step and available even after recovering.
///
@@ -78,8 +85,36 @@ pub struct VolatileContext {
/// the corresponding [RegionRoute](common_meta::rpc::router::RegionRoute) of the opening region
/// was written into [TableRouteValue](common_meta::key::table_route::TableRouteValue).
opening_region_guard: Option<OpeningRegionGuard>,
/// `table_route_info` is stored via previous steps for future use.
table_route_info: Option<DeserializedValueWithBytes<TableRouteValue>>,
/// `table_route` is stored via previous steps for future use.
table_route: Option<DeserializedValueWithBytes<TableRouteValue>>,
/// `table_info` is stored via previous steps for future use.
///
/// `table_info` should remain unchanged during the procedure;
/// no other DDL procedure executed concurrently for the current table.
table_info: Option<DeserializedValueWithBytes<TableInfoValue>>,
/// The deadline of leader region lease.
leader_region_lease_deadline: Option<Instant>,
/// The last_entry_id of leader region.
leader_region_last_entry_id: Option<u64>,
}
impl VolatileContext {
/// Sets the `leader_region_lease_deadline` if it does not exist.
pub fn set_leader_region_lease_deadline(&mut self, lease_timeout: Duration) {
if self.leader_region_lease_deadline.is_none() {
self.leader_region_lease_deadline = Some(Instant::now() + lease_timeout);
}
}
/// Resets the `leader_region_lease_deadline`.
pub fn reset_leader_region_lease_deadline(&mut self) {
self.leader_region_lease_deadline = None;
}
/// Sets the `leader_region_last_entry_id`.
pub fn set_last_entry_id(&mut self, last_entry_id: u64) {
self.leader_region_last_entry_id = Some(last_entry_id)
}
}
/// Used to generate new [Context].
@@ -127,7 +162,7 @@ impl Context {
&self.server_addr
}
/// Returns the `table_route_value` of [VolatileContext] if any.
/// Returns the `table_route` of [VolatileContext] if any.
/// Otherwise, returns the value retrieved from remote.
///
/// Retry:
@@ -135,7 +170,7 @@ impl Context {
pub async fn get_table_route_value(
&mut self,
) -> Result<&DeserializedValueWithBytes<TableRouteValue>> {
let table_route_value = &mut self.volatile_ctx.table_route_info;
let table_route_value = &mut self.volatile_ctx.table_route;
if table_route_value.is_none() {
let table_id = self.persistent_ctx.region_id.table_id();
@@ -157,9 +192,45 @@ impl Context {
Ok(table_route_value.as_ref().unwrap())
}
/// Removes the `table_route_value` of [VolatileContext], returns true if any.
/// Removes the `table_route` of [VolatileContext], returns true if any.
pub fn remove_table_route_value(&mut self) -> bool {
let value = self.volatile_ctx.table_route_info.take();
let value = self.volatile_ctx.table_route.take();
value.is_some()
}
/// Returns the `table_info` of [VolatileContext] if any.
/// Otherwise, returns the value retrieved from remote.
///
/// Retry:
/// - Failed to retrieve the metadata of table.
pub async fn get_table_info_value(
&mut self,
) -> Result<&DeserializedValueWithBytes<TableInfoValue>> {
let table_info_value = &mut self.volatile_ctx.table_info;
if table_info_value.is_none() {
let table_id = self.persistent_ctx.region_id.table_id();
let table_info = self
.table_metadata_manager
.table_info_manager()
.get(table_id)
.await
.context(error::TableMetadataManagerSnafu)
.map_err(|e| error::Error::RetryLater {
reason: e.to_string(),
location: location!(),
})?
.context(error::TableInfoNotFoundSnafu { table_id })?;
*table_info_value = Some(table_info);
}
Ok(table_info_value.as_ref().unwrap())
}
/// Removes the `table_info` of [VolatileContext], returns true if any.
pub fn remove_table_info_value(&mut self) -> bool {
let value = self.volatile_ctx.table_info.take();
value.is_some()
}
@@ -167,6 +238,27 @@ impl Context {
pub fn region_id(&self) -> RegionId {
self.persistent_ctx.region_id
}
/// Broadcasts the invalidate table cache message.
pub async fn invalidate_table_cache(&self) -> Result<()> {
let table_id = self.region_id().table_id();
let instruction = Instruction::InvalidateTableIdCache(table_id);
let msg = &MailboxMessage::json_message(
"Invalidate Table Cache",
&format!("Metasrv@{}", self.server_addr()),
"Frontend broadcast",
common_time::util::current_time_millis(),
&instruction,
)
.with_context(|_| error::SerializeToJsonSnafu {
input: instruction.to_string(),
})?;
self.mailbox
.broadcast(&BroadcastChannel::Frontend, msg)
.await
}
}
#[async_trait::async_trait]
@@ -278,7 +370,9 @@ mod tests {
use super::migration_end::RegionMigrationEnd;
use super::*;
use crate::handler::HeartbeatMailbox;
use crate::procedure::region_migration::test_util::TestingEnv;
use crate::service::mailbox::Channel;
fn new_persistent_context() -> PersistentContext {
PersistentContext {
@@ -378,4 +472,29 @@ mod tests {
assert_eq!(procedure.context.persistent_ctx.cluster_id, 2);
assert_matches!(status.unwrap(), Status::Done);
}
#[tokio::test]
async fn test_broadcast_invalidate_table_cache() {
let mut env = TestingEnv::new();
let persistent_context = test_util::new_persistent_context(1, 2, RegionId::new(1024, 1));
let ctx = env.context_factory().new_context(persistent_context);
let mailbox_ctx = env.mailbox_context();
// No receivers.
ctx.invalidate_table_cache().await.unwrap();
let (tx, mut rx) = tokio::sync::mpsc::channel(1);
mailbox_ctx
.insert_heartbeat_response_receiver(Channel::Frontend(1), tx)
.await;
ctx.invalidate_table_cache().await.unwrap();
let resp = rx.recv().await.unwrap().unwrap();
let msg = resp.mailbox_message.unwrap();
let instruction = HeartbeatMailbox::json_instruction(&msg).unwrap();
assert_matches!(instruction, Instruction::InvalidateTableIdCache(1024));
}
}

View File

@@ -13,23 +13,506 @@
// limitations under the License.
use std::any::Any;
use std::time::Duration;
use api::v1::meta::MailboxMessage;
use common_meta::distributed_time_constants::REGION_LEASE_SECS;
use common_meta::instruction::{
DowngradeRegion, DowngradeRegionReply, Instruction, InstructionReply,
};
use common_telemetry::warn;
use serde::{Deserialize, Serialize};
use snafu::ResultExt;
use tokio::time::sleep;
use crate::error::Result;
use super::upgrade_candidate_region::UpgradeCandidateRegion;
use crate::error::{self, Result};
use crate::handler::HeartbeatMailbox;
use crate::procedure::region_migration::{Context, State};
use crate::service::mailbox::Channel;
const DOWNGRADE_LEADER_REGION_TIMEOUT: Duration = Duration::from_secs(1);
#[derive(Debug, Serialize, Deserialize)]
pub struct DowngradeLeaderRegion;
pub struct DowngradeLeaderRegion {
// The optimistic retry times.
optimistic_retry: usize,
// The retry initial interval.
retry_initial_interval: Duration,
}
impl Default for DowngradeLeaderRegion {
fn default() -> Self {
Self {
optimistic_retry: 3,
retry_initial_interval: Duration::from_millis(500),
}
}
}
#[async_trait::async_trait]
#[typetag::serde]
impl State for DowngradeLeaderRegion {
async fn next(&mut self, _ctx: &mut Context) -> Result<Box<dyn State>> {
todo!()
async fn next(&mut self, ctx: &mut Context) -> Result<Box<dyn State>> {
// Ensures the `leader_region_lease_deadline` must exist after recovering.
ctx.volatile_ctx
.set_leader_region_lease_deadline(Duration::from_secs(REGION_LEASE_SECS));
self.downgrade_region_with_retry(ctx).await;
// Safety: must exist.
if let Some(deadline) = ctx.volatile_ctx.leader_region_lease_deadline.as_ref() {
tokio::time::sleep_until(*deadline).await;
}
Ok(Box::new(UpgradeCandidateRegion))
}
fn as_any(&self) -> &dyn Any {
self
}
}
impl DowngradeLeaderRegion {
/// Builds downgrade region instruction.
fn build_downgrade_region_instruction(&self, ctx: &Context) -> Instruction {
let pc = &ctx.persistent_ctx;
let region_id = pc.region_id;
Instruction::DowngradeRegion(DowngradeRegion { region_id })
}
/// Tries to downgrade a leader region.
///
/// Retry:
/// - [MailboxTimeout](error::Error::MailboxTimeout), Timeout.
/// - Failed to downgrade region on the Datanode.
///
/// Abort:
/// - [PusherNotFound](error::Error::PusherNotFound), The datanode is unreachable.
/// - [PushMessage](error::Error::PushMessage), The receiver is dropped.
/// - [MailboxReceiver](error::Error::MailboxReceiver), The sender is dropped without sending (impossible).
/// - [UnexpectedInstructionReply](error::Error::UnexpectedInstructionReply).
/// - Invalid JSON.
async fn downgrade_region(
&self,
ctx: &mut Context,
downgrade_instruction: &Instruction,
) -> Result<()> {
let pc = &ctx.persistent_ctx;
let region_id = pc.region_id;
let leader = &pc.from_peer;
let msg = MailboxMessage::json_message(
&format!("Downgrade leader region: {}", region_id),
&format!("Meta@{}", ctx.server_addr()),
&format!("Datanode-{}@{}", leader.id, leader.addr),
common_time::util::current_time_millis(),
downgrade_instruction,
)
.with_context(|_| error::SerializeToJsonSnafu {
input: downgrade_instruction.to_string(),
})?;
let ch = Channel::Datanode(leader.id);
let receiver = ctx
.mailbox
.send(&ch, msg, DOWNGRADE_LEADER_REGION_TIMEOUT)
.await?;
match receiver.await? {
Ok(msg) => {
let reply = HeartbeatMailbox::json_reply(&msg)?;
let InstructionReply::DowngradeRegion(DowngradeRegionReply {
last_entry_id,
exists,
error,
}) = reply
else {
return error::UnexpectedInstructionReplySnafu {
mailbox_message: msg.to_string(),
reason: "expect downgrade region reply",
}
.fail();
};
if error.is_some() {
return error::RetryLaterSnafu {
reason: format!(
"Failed to downgrade the region {} on Datanode {:?}, error: {:?}",
region_id, leader, error
),
}
.fail();
}
if !exists {
warn!(
"Trying to downgrade the region {} on Datanode {}, but region doesn't exist!",
region_id, leader
);
}
if let Some(last_entry_id) = last_entry_id {
ctx.volatile_ctx.set_last_entry_id(last_entry_id);
}
Ok(())
}
Err(error::Error::MailboxTimeout { .. }) => {
let reason = format!(
"Mailbox received timeout for downgrade leader region {region_id} on Datanode {:?}",
leader,
);
error::RetryLaterSnafu { reason }.fail()
}
Err(err) => Err(err),
}
}
/// Downgrades a leader region.
///
/// Fast path:
/// - Waits for the reply of downgrade instruction.
///
/// Slow path:
/// - Waits for the lease of the leader region expired.
async fn downgrade_region_with_retry(&self, ctx: &mut Context) {
let instruction = self.build_downgrade_region_instruction(ctx);
let mut retry = 0;
loop {
if let Err(err) = self.downgrade_region(ctx, &instruction).await {
retry += 1;
if err.is_retryable() && retry < self.optimistic_retry {
warn!("Failed to downgrade region, error: {err:?}, retry later");
sleep(self.retry_initial_interval).await;
} else {
break;
}
} else {
// Resets the deadline.
ctx.volatile_ctx.reset_leader_region_lease_deadline();
break;
}
}
}
}
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use api::v1::meta::mailbox_message::Payload;
use common_meta::peer::Peer;
use common_time::util::current_time_millis;
use store_api::storage::RegionId;
use tokio::time::Instant;
use super::*;
use crate::error::Error;
use crate::procedure::region_migration::test_util::{
new_close_region_reply, send_mock_reply, TestingEnv,
};
use crate::procedure::region_migration::{ContextFactory, PersistentContext};
fn new_persistent_context() -> PersistentContext {
PersistentContext {
from_peer: Peer::empty(1),
to_peer: Peer::empty(2),
region_id: RegionId::new(1024, 1),
cluster_id: 0,
}
}
fn new_downgrade_region_reply(
id: u64,
last_entry_id: Option<u64>,
exist: bool,
error: Option<String>,
) -> MailboxMessage {
MailboxMessage {
id,
subject: "mock".to_string(),
from: "datanode".to_string(),
to: "meta".to_string(),
timestamp_millis: current_time_millis(),
payload: Some(Payload::Json(
serde_json::to_string(&InstructionReply::DowngradeRegion(DowngradeRegionReply {
last_entry_id,
exists: exist,
error,
}))
.unwrap(),
)),
}
}
#[tokio::test]
async fn test_datanode_is_unreachable() {
let state = DowngradeLeaderRegion::default();
let persistent_context = new_persistent_context();
let env = TestingEnv::new();
let mut ctx = env.context_factory().new_context(persistent_context);
let instruction = &state.build_downgrade_region_instruction(&ctx);
let err = state
.downgrade_region(&mut ctx, instruction)
.await
.unwrap_err();
assert_matches!(err, Error::PusherNotFound { .. });
assert!(!err.is_retryable());
}
#[tokio::test]
async fn test_pusher_dropped() {
let state = DowngradeLeaderRegion::default();
let persistent_context = new_persistent_context();
let from_peer_id = persistent_context.from_peer.id;
let mut env = TestingEnv::new();
let mut ctx = env.context_factory().new_context(persistent_context);
let mailbox_ctx = env.mailbox_context();
let (tx, rx) = tokio::sync::mpsc::channel(1);
mailbox_ctx
.insert_heartbeat_response_receiver(Channel::Datanode(from_peer_id), tx)
.await;
drop(rx);
let instruction = &state.build_downgrade_region_instruction(&ctx);
let err = state
.downgrade_region(&mut ctx, instruction)
.await
.unwrap_err();
assert_matches!(err, Error::PushMessage { .. });
assert!(!err.is_retryable());
}
#[tokio::test]
async fn test_unexpected_instruction_reply() {
let state = DowngradeLeaderRegion::default();
let persistent_context = new_persistent_context();
let from_peer_id = persistent_context.from_peer.id;
let mut env = TestingEnv::new();
let mut ctx = env.context_factory().new_context(persistent_context);
let mailbox_ctx = env.mailbox_context();
let mailbox = mailbox_ctx.mailbox().clone();
let (tx, rx) = tokio::sync::mpsc::channel(1);
mailbox_ctx
.insert_heartbeat_response_receiver(Channel::Datanode(from_peer_id), tx)
.await;
// Sends an incorrect reply.
send_mock_reply(mailbox, rx, |id| Ok(new_close_region_reply(id)));
let instruction = &state.build_downgrade_region_instruction(&ctx);
let err = state
.downgrade_region(&mut ctx, instruction)
.await
.unwrap_err();
assert_matches!(err, Error::UnexpectedInstructionReply { .. });
assert!(!err.is_retryable());
}
#[tokio::test]
async fn test_instruction_exceeded_deadline() {
let state = DowngradeLeaderRegion::default();
let persistent_context = new_persistent_context();
let from_peer_id = persistent_context.from_peer.id;
let mut env = TestingEnv::new();
let mut ctx = env.context_factory().new_context(persistent_context);
let mailbox_ctx = env.mailbox_context();
let mailbox = mailbox_ctx.mailbox().clone();
let (tx, rx) = tokio::sync::mpsc::channel(1);
mailbox_ctx
.insert_heartbeat_response_receiver(Channel::Datanode(from_peer_id), tx)
.await;
send_mock_reply(mailbox, rx, |id| {
Err(error::MailboxTimeoutSnafu { id }.build())
});
let instruction = &state.build_downgrade_region_instruction(&ctx);
let err = state
.downgrade_region(&mut ctx, instruction)
.await
.unwrap_err();
assert_matches!(err, Error::RetryLater { .. });
assert!(err.is_retryable());
}
#[tokio::test]
async fn test_downgrade_region_failed() {
let state = DowngradeLeaderRegion::default();
let persistent_context = new_persistent_context();
let from_peer_id = persistent_context.from_peer.id;
let mut env = TestingEnv::new();
let mut ctx = env.context_factory().new_context(persistent_context);
let mailbox_ctx = env.mailbox_context();
let mailbox = mailbox_ctx.mailbox().clone();
let (tx, rx) = tokio::sync::mpsc::channel(1);
mailbox_ctx
.insert_heartbeat_response_receiver(Channel::Datanode(from_peer_id), tx)
.await;
send_mock_reply(mailbox, rx, |id| {
Ok(new_downgrade_region_reply(
id,
None,
false,
Some("test mocked".to_string()),
))
});
let instruction = &state.build_downgrade_region_instruction(&ctx);
let err = state
.downgrade_region(&mut ctx, instruction)
.await
.unwrap_err();
assert_matches!(err, Error::RetryLater { .. });
assert!(err.is_retryable());
assert!(err.to_string().contains("test mocked"));
}
#[tokio::test]
async fn test_downgrade_region_with_retry_fast_path() {
let state = DowngradeLeaderRegion::default();
let persistent_context = new_persistent_context();
let from_peer_id = persistent_context.from_peer.id;
let mut env = TestingEnv::new();
let mut ctx = env.context_factory().new_context(persistent_context);
let mailbox_ctx = env.mailbox_context();
let mailbox = mailbox_ctx.mailbox().clone();
let (tx, mut rx) = tokio::sync::mpsc::channel(1);
mailbox_ctx
.insert_heartbeat_response_receiver(Channel::Datanode(from_peer_id), tx)
.await;
common_runtime::spawn_bg(async move {
// retry: 0.
let resp = rx.recv().await.unwrap().unwrap();
let reply_id = resp.mailbox_message.unwrap().id;
mailbox
.on_recv(
reply_id,
Err(error::MailboxTimeoutSnafu { id: reply_id }.build()),
)
.await
.unwrap();
// retry: 1.
let resp = rx.recv().await.unwrap().unwrap();
let reply_id = resp.mailbox_message.unwrap().id;
mailbox
.on_recv(
reply_id,
Ok(new_downgrade_region_reply(reply_id, Some(1), true, None)),
)
.await
.unwrap();
});
state.downgrade_region_with_retry(&mut ctx).await;
assert_eq!(ctx.volatile_ctx.leader_region_last_entry_id, Some(1));
assert!(ctx.volatile_ctx.leader_region_lease_deadline.is_none());
}
#[tokio::test]
async fn test_downgrade_region_with_retry_slow_path() {
let state = DowngradeLeaderRegion {
optimistic_retry: 3,
retry_initial_interval: Duration::from_millis(100),
};
let persistent_context = new_persistent_context();
let from_peer_id = persistent_context.from_peer.id;
let mut env = TestingEnv::new();
let mut ctx = env.context_factory().new_context(persistent_context);
let mailbox_ctx = env.mailbox_context();
let mailbox = mailbox_ctx.mailbox().clone();
let (tx, mut rx) = tokio::sync::mpsc::channel(1);
mailbox_ctx
.insert_heartbeat_response_receiver(Channel::Datanode(from_peer_id), tx)
.await;
common_runtime::spawn_bg(async move {
for _ in 0..3 {
let resp = rx.recv().await.unwrap().unwrap();
let reply_id = resp.mailbox_message.unwrap().id;
mailbox
.on_recv(
reply_id,
Err(error::MailboxTimeoutSnafu { id: reply_id }.build()),
)
.await
.unwrap();
}
});
ctx.volatile_ctx
.set_leader_region_lease_deadline(Duration::from_secs(5));
let expected_deadline = ctx.volatile_ctx.leader_region_lease_deadline.unwrap();
state.downgrade_region_with_retry(&mut ctx).await;
assert_eq!(ctx.volatile_ctx.leader_region_last_entry_id, None);
// Should remain no change.
assert_eq!(
ctx.volatile_ctx.leader_region_lease_deadline.unwrap(),
expected_deadline
)
}
#[tokio::test]
async fn test_next_upgrade_candidate_state() {
let mut state = Box::<DowngradeLeaderRegion>::default();
let persistent_context = new_persistent_context();
let from_peer_id = persistent_context.from_peer.id;
let mut env = TestingEnv::new();
let mut ctx = env.context_factory().new_context(persistent_context);
let mailbox_ctx = env.mailbox_context();
let mailbox = mailbox_ctx.mailbox().clone();
let (tx, rx) = tokio::sync::mpsc::channel(1);
mailbox_ctx
.insert_heartbeat_response_receiver(Channel::Datanode(from_peer_id), tx)
.await;
send_mock_reply(mailbox, rx, |id| {
Ok(new_downgrade_region_reply(id, Some(1), true, None))
});
let timer = Instant::now();
let next = state.next(&mut ctx).await.unwrap();
let elapsed = timer.elapsed().as_secs();
assert!(elapsed < REGION_LEASE_SECS / 2);
assert_eq!(ctx.volatile_ctx.leader_region_last_entry_id, Some(1));
assert!(ctx.volatile_ctx.leader_region_lease_deadline.is_none());
let _ = next
.as_any()
.downcast_ref::<UpgradeCandidateRegion>()
.unwrap();
}
}

View File

@@ -0,0 +1,54 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use common_procedure::Status;
use serde::{Deserialize, Serialize};
use crate::error::{self, Result};
use crate::procedure::region_migration::{Context, State};
#[derive(Debug, Serialize, Deserialize)]
pub struct RegionMigrationAbort {
reason: String,
}
impl RegionMigrationAbort {
/// Returns the [RegionMigrationAbort] with `reason`.
pub fn new(reason: &str) -> Self {
Self {
reason: reason.to_string(),
}
}
}
#[async_trait::async_trait]
#[typetag::serde]
impl State for RegionMigrationAbort {
async fn next(&mut self, _: &mut Context) -> Result<Box<dyn State>> {
error::MigrationAbortSnafu {
reason: &self.reason,
}
.fail()
}
fn status(&self) -> Status {
Status::Done
}
fn as_any(&self) -> &dyn Any {
self
}
}

View File

@@ -47,7 +47,7 @@ impl State for RegionMigrationStart {
if self.check_leader_region_on_peer(&region_route, to_peer)? {
Ok(Box::new(RegionMigrationEnd))
} else if self.check_candidate_region_on_peer(&region_route, to_peer) {
Ok(Box::new(DowngradeLeaderRegion))
Ok(Box::<DowngradeLeaderRegion>::default())
} else {
Ok(Box::new(OpenCandidateRegion))
}
@@ -137,16 +137,11 @@ mod tests {
use super::*;
use crate::error::Error;
use crate::procedure::region_migration::test_util::TestingEnv;
use crate::procedure::region_migration::test_util::{self, TestingEnv};
use crate::procedure::region_migration::{ContextFactory, PersistentContext};
fn new_persistent_context() -> PersistentContext {
PersistentContext {
from_peer: Peer::empty(1),
to_peer: Peer::empty(2),
region_id: RegionId::new(1024, 1),
cluster_id: 0,
}
test_util::new_persistent_context(1, 2, RegionId::new(1024, 1))
}
#[tokio::test]

View File

@@ -21,7 +21,7 @@ use common_meta::ddl::utils::region_storage_path;
use common_meta::instruction::{Instruction, InstructionReply, OpenRegion, SimpleReply};
use common_meta::RegionIdent;
use serde::{Deserialize, Serialize};
use snafu::{location, Location, OptionExt, ResultExt};
use snafu::{OptionExt, ResultExt};
use crate::error::{self, Result};
use crate::handler::HeartbeatMailbox;
@@ -41,7 +41,7 @@ impl State for OpenCandidateRegion {
let instruction = self.build_open_region_instruction(ctx).await?;
self.open_candidate_region(ctx, instruction).await?;
Ok(Box::new(DowngradeLeaderRegion))
Ok(Box::<DowngradeLeaderRegion>::default())
}
fn as_any(&self) -> &dyn Any {
@@ -54,38 +54,28 @@ impl OpenCandidateRegion {
///
/// Abort(non-retry):
/// - Table Info is not found.
async fn build_open_region_instruction(&self, ctx: &Context) -> Result<Instruction> {
async fn build_open_region_instruction(&self, ctx: &mut Context) -> Result<Instruction> {
let pc = &ctx.persistent_ctx;
let cluster_id = pc.cluster_id;
let table_id = pc.region_id.table_id();
let region_number = pc.region_id.region_number();
let candidate = &pc.to_peer;
let table_info = ctx
.table_metadata_manager
.table_info_manager()
.get(table_id)
.await
.context(error::TableMetadataManagerSnafu)
.map_err(|e| error::Error::RetryLater {
reason: e.to_string(),
location: location!(),
})?
.context(error::TableInfoNotFoundSnafu { table_id })?
.into_inner()
.table_info;
let candidate_id = pc.to_peer.id;
let table_info_value = ctx.get_table_info_value().await?;
let table_info = &table_info_value.table_info;
// The region storage path is immutable after the region is created.
// Therefore, it's safe to store it in `VolatileContext` for future use.
let region_storage_path =
region_storage_path(&table_info.catalog_name, &table_info.schema_name);
let engine = table_info.meta.engine;
let engine = table_info.meta.engine.clone();
let region_options: HashMap<String, String> = (&table_info.meta.options).into();
let open_instruction = Instruction::OpenRegion(OpenRegion::new(
RegionIdent {
cluster_id,
datanode_id: candidate.id,
datanode_id: candidate_id,
table_id,
region_number,
engine,
@@ -197,16 +187,13 @@ mod tests {
use super::*;
use crate::error::Error;
use crate::procedure::region_migration::downgrade_leader_region::DowngradeLeaderRegion;
use crate::procedure::region_migration::test_util::TestingEnv;
use crate::procedure::region_migration::test_util::{
self, new_close_region_reply, send_mock_reply, TestingEnv,
};
use crate::procedure::region_migration::{ContextFactory, PersistentContext};
fn new_persistent_context() -> PersistentContext {
PersistentContext {
from_peer: Peer::empty(1),
to_peer: Peer::empty(2),
region_id: RegionId::new(1024, 1),
cluster_id: 0,
}
test_util::new_persistent_context(1, 2, RegionId::new(1024, 1))
}
fn new_mock_open_instruction(datanode_id: DatanodeId, region_id: RegionId) -> Instruction {
@@ -223,23 +210,6 @@ mod tests {
})
}
fn new_close_region_reply(id: u64) -> MailboxMessage {
MailboxMessage {
id,
subject: "mock".to_string(),
from: "datanode".to_string(),
to: "meta".to_string(),
timestamp_millis: current_time_millis(),
payload: Some(Payload::Json(
serde_json::to_string(&InstructionReply::CloseRegion(SimpleReply {
result: false,
error: None,
}))
.unwrap(),
)),
}
}
fn new_open_region_reply(id: u64, result: bool, error: Option<String>) -> MailboxMessage {
MailboxMessage {
id,
@@ -259,9 +229,12 @@ mod tests {
let state = OpenCandidateRegion;
let persistent_context = new_persistent_context();
let env = TestingEnv::new();
let ctx = env.context_factory().new_context(persistent_context);
let mut ctx = env.context_factory().new_context(persistent_context);
let err = state.build_open_region_instruction(&ctx).await.unwrap_err();
let err = state
.build_open_region_instruction(&mut ctx)
.await
.unwrap_err();
assert_matches!(err, Error::TableInfoNotFound { .. });
assert!(!err.is_retryable());
@@ -328,21 +301,14 @@ mod tests {
let mailbox_ctx = env.mailbox_context();
let mailbox = mailbox_ctx.mailbox().clone();
let (tx, mut rx) = tokio::sync::mpsc::channel(1);
let (tx, rx) = tokio::sync::mpsc::channel(1);
mailbox_ctx
.insert_heartbeat_response_receiver(to_peer_id, tx)
.insert_heartbeat_response_receiver(Channel::Datanode(to_peer_id), tx)
.await;
// Sends an incorrect reply.
common_runtime::spawn_bg(async move {
let resp = rx.recv().await.unwrap().unwrap();
let reply_id = resp.mailbox_message.unwrap().id;
mailbox
.on_recv(reply_id, Ok(new_close_region_reply(reply_id)))
.await
.unwrap();
});
send_mock_reply(mailbox, rx, |id| Ok(new_close_region_reply(id)));
let open_instruction = new_mock_open_instruction(to_peer_id, region_id);
let err = state
@@ -368,23 +334,15 @@ mod tests {
let mailbox_ctx = env.mailbox_context();
let mailbox = mailbox_ctx.mailbox().clone();
let (tx, mut rx) = tokio::sync::mpsc::channel(1);
let (tx, rx) = tokio::sync::mpsc::channel(1);
mailbox_ctx
.insert_heartbeat_response_receiver(to_peer_id, tx)
.insert_heartbeat_response_receiver(Channel::Datanode(to_peer_id), tx)
.await;
// Sends an timeout error.
common_runtime::spawn_bg(async move {
let resp = rx.recv().await.unwrap().unwrap();
let reply_id = resp.mailbox_message.unwrap().id;
mailbox
.on_recv(
reply_id,
Err(error::MailboxTimeoutSnafu { id: reply_id }.build()),
)
.await
.unwrap();
send_mock_reply(mailbox, rx, |id| {
Err(error::MailboxTimeoutSnafu { id }.build())
});
let open_instruction = new_mock_open_instruction(to_peer_id, region_id);
@@ -411,26 +369,18 @@ mod tests {
let mailbox_ctx = env.mailbox_context();
let mailbox = mailbox_ctx.mailbox().clone();
let (tx, mut rx) = tokio::sync::mpsc::channel(1);
let (tx, rx) = tokio::sync::mpsc::channel(1);
mailbox_ctx
.insert_heartbeat_response_receiver(to_peer_id, tx)
.insert_heartbeat_response_receiver(Channel::Datanode(to_peer_id), tx)
.await;
common_runtime::spawn_bg(async move {
let resp = rx.recv().await.unwrap().unwrap();
let reply_id = resp.mailbox_message.unwrap().id;
mailbox
.on_recv(
reply_id,
Ok(new_open_region_reply(
reply_id,
false,
Some("test mocked".to_string()),
)),
)
.await
.unwrap();
send_mock_reply(mailbox, rx, |id| {
Ok(new_open_region_reply(
id,
false,
Some("test mocked".to_string()),
))
});
let open_instruction = new_mock_open_instruction(to_peer_id, region_id);
@@ -471,20 +421,13 @@ mod tests {
let mailbox_ctx = env.mailbox_context();
let mailbox = mailbox_ctx.mailbox().clone();
let (tx, mut rx) = tokio::sync::mpsc::channel(1);
let (tx, rx) = tokio::sync::mpsc::channel(1);
mailbox_ctx
.insert_heartbeat_response_receiver(to_peer_id, tx)
.insert_heartbeat_response_receiver(Channel::Datanode(to_peer_id), tx)
.await;
common_runtime::spawn_bg(async move {
let resp = rx.recv().await.unwrap().unwrap();
let reply_id = resp.mailbox_message.unwrap().id;
mailbox
.on_recv(reply_id, Ok(new_open_region_reply(reply_id, true, None)))
.await
.unwrap();
});
send_mock_reply(mailbox, rx, |id| Ok(new_open_region_reply(id, true, None)));
let next = state.next(&mut ctx).await.unwrap();
let vc = ctx.volatile_ctx;

View File

@@ -14,20 +14,28 @@
use std::sync::Arc;
use api::v1::meta::{HeartbeatResponse, RequestHeader};
use api::v1::meta::mailbox_message::Payload;
use api::v1::meta::{HeartbeatResponse, MailboxMessage, RequestHeader};
use common_meta::instruction::{InstructionReply, SimpleReply};
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
use common_meta::kv_backend::memory::MemoryKvBackend;
use common_meta::peer::Peer;
use common_meta::sequence::Sequence;
use common_meta::DatanodeId;
use common_procedure::{Context as ProcedureContext, ProcedureId};
use common_procedure_test::MockContextProvider;
use tokio::sync::mpsc::Sender;
use common_time::util::current_time_millis;
use store_api::storage::RegionId;
use tokio::sync::mpsc::{Receiver, Sender};
use super::ContextFactoryImpl;
use crate::error::Result;
use crate::handler::{HeartbeatMailbox, Pusher, Pushers};
use crate::procedure::region_migration::PersistentContext;
use crate::region::lease_keeper::{OpeningRegionKeeper, OpeningRegionKeeperRef};
use crate::service::mailbox::{Channel, MailboxRef};
pub type MockHeartbeatReceiver = Receiver<std::result::Result<HeartbeatResponse, tonic::Status>>;
/// The context of mailbox.
pub struct MailboxContext {
mailbox: MailboxRef,
@@ -46,10 +54,10 @@ impl MailboxContext {
/// Inserts a pusher for `datanode_id`
pub async fn insert_heartbeat_response_receiver(
&mut self,
datanode_id: DatanodeId,
channel: Channel,
tx: Sender<std::result::Result<HeartbeatResponse, tonic::Status>>,
) {
let pusher_id = Channel::Datanode(datanode_id).pusher_id();
let pusher_id = channel.pusher_id();
let pusher = Pusher::new(tx, &RequestHeader::default());
let _ = self.pushers.insert(pusher_id, pusher).await;
}
@@ -120,3 +128,44 @@ impl TestingEnv {
}
}
}
/// Generates a [InstructionReply::CloseRegion] reply.
pub fn new_close_region_reply(id: u64) -> MailboxMessage {
MailboxMessage {
id,
subject: "mock".to_string(),
from: "datanode".to_string(),
to: "meta".to_string(),
timestamp_millis: current_time_millis(),
payload: Some(Payload::Json(
serde_json::to_string(&InstructionReply::CloseRegion(SimpleReply {
result: false,
error: None,
}))
.unwrap(),
)),
}
}
/// Sends a mock reply.
pub fn send_mock_reply(
mailbox: MailboxRef,
mut rx: MockHeartbeatReceiver,
msg: impl FnOnce(u64) -> Result<MailboxMessage> + Send + 'static,
) {
common_runtime::spawn_bg(async move {
let resp = rx.recv().await.unwrap().unwrap();
let reply_id = resp.mailbox_message.unwrap().id;
mailbox.on_recv(reply_id, msg(reply_id)).await.unwrap();
});
}
/// Generates a [PersistentContext].
pub fn new_persistent_context(from: u64, to: u64, region_id: RegionId) -> PersistentContext {
PersistentContext {
from_peer: Peer::empty(from),
to_peer: Peer::empty(to),
region_id,
cluster_id: 0,
}
}

View File

@@ -12,20 +12,30 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub(crate) mod downgrade_leader_region;
pub(crate) mod rollback_downgraded_region;
pub(crate) mod upgrade_candidate_region;
use std::any::Any;
use common_meta::rpc::router::RegionStatus;
use common_telemetry::warn;
use serde::{Deserialize, Serialize};
use snafu::ResultExt;
use crate::error::{self, Result};
use super::migration_abort::RegionMigrationAbort;
use super::migration_end::RegionMigrationEnd;
use crate::error::Result;
use crate::procedure::region_migration::downgrade_leader_region::DowngradeLeaderRegion;
use crate::procedure::region_migration::{Context, State};
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "UpdateMetadata")]
pub enum UpdateMetadata {
/// Downgrades the leader region.
Downgrade,
/// Upgrade the candidate region.
Upgrade,
/// Rollback the downgraded leader region.
Rollback,
}
#[async_trait::async_trait]
@@ -36,7 +46,25 @@ impl State for UpdateMetadata {
UpdateMetadata::Downgrade => {
self.downgrade_leader_region(ctx).await?;
Ok(Box::new(DowngradeLeaderRegion))
Ok(Box::<DowngradeLeaderRegion>::default())
}
UpdateMetadata::Upgrade => {
self.upgrade_candidate_region(ctx).await?;
if let Err(err) = ctx.invalidate_table_cache().await {
warn!("Failed to broadcast the invalidate table cache message during the upgrade candidate, error: {err:?}");
};
Ok(Box::new(RegionMigrationEnd))
}
UpdateMetadata::Rollback => {
self.rollback_downgraded_region(ctx).await?;
if let Err(err) = ctx.invalidate_table_cache().await {
warn!("Failed to broadcast the invalidate table cache message during the rollback, error: {err:?}");
};
Ok(Box::new(RegionMigrationAbort::new(
"Failed to upgrade the candidate region.",
)))
}
}
}
@@ -45,195 +73,3 @@ impl State for UpdateMetadata {
self
}
}
impl UpdateMetadata {
/// Downgrades the leader region.
///
/// Abort(non-retry):
/// - TableRoute is not found.
///
/// Retry:
/// - Failed to update [TableRouteValue](common_meta::key::table_region::TableRegionValue).
/// - Failed to retrieve the metadata of table.
///
/// About the failure of updating the [TableRouteValue](common_meta::key::table_region::TableRegionValue):
///
/// - There may be another [RegionMigrationProcedure](crate::procedure::region_migration::RegionMigrationProcedure)
/// that is executed concurrently for **other region**.
/// It will only update **other region** info. Therefore, It's safe to retry after failure.
///
/// - There is no other DDL procedure executed concurrently for the current table.
async fn downgrade_leader_region(&self, ctx: &mut Context) -> Result<()> {
let table_metadata_manager = ctx.table_metadata_manager.clone();
let region_id = ctx.region_id();
let table_id = region_id.table_id();
let current_table_route_value = ctx.get_table_route_value().await?;
if let Err(err) = table_metadata_manager
.update_leader_region_status(table_id, current_table_route_value, |route| {
if route.region.id == region_id {
Some(Some(RegionStatus::Downgraded))
} else {
None
}
})
.await
.context(error::TableMetadataManagerSnafu)
{
debug_assert!(ctx.remove_table_route_value());
return error::RetryLaterSnafu {
reason: format!("Failed to update the table route during the downgrading leader region, error: {err}")
}.fail();
}
debug_assert!(ctx.remove_table_route_value());
Ok(())
}
}
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use common_meta::key::test_utils::new_test_table_info;
use common_meta::peer::Peer;
use common_meta::rpc::router::{Region, RegionRoute};
use store_api::storage::RegionId;
use super::*;
use crate::error::Error;
use crate::procedure::region_migration::test_util::TestingEnv;
use crate::procedure::region_migration::{ContextFactory, PersistentContext};
fn new_persistent_context() -> PersistentContext {
PersistentContext {
from_peer: Peer::empty(1),
to_peer: Peer::empty(2),
region_id: RegionId::new(1024, 1),
cluster_id: 0,
}
}
#[test]
fn test_state_serialization() {
let state = UpdateMetadata::Downgrade;
let expected = r#"{"UpdateMetadata":"Downgrade"}"#;
assert_eq!(expected, serde_json::to_string(&state).unwrap());
}
#[tokio::test]
async fn test_table_route_is_not_found_error() {
let state = UpdateMetadata::Downgrade;
let env = TestingEnv::new();
let persistent_context = new_persistent_context();
let mut ctx = env.context_factory().new_context(persistent_context);
let err = state.downgrade_leader_region(&mut ctx).await.unwrap_err();
assert_matches!(err, Error::TableRouteNotFound { .. });
assert!(!err.is_retryable());
}
#[tokio::test]
async fn test_failed_to_update_table_route_error() {
let state = UpdateMetadata::Downgrade;
let persistent_context = new_persistent_context();
let from_peer = persistent_context.from_peer.clone();
let env = TestingEnv::new();
let mut ctx = env.context_factory().new_context(persistent_context);
let table_id = ctx.region_id().table_id();
let table_info = new_test_table_info(1024, vec![1, 2]).into();
let region_routes = vec![
RegionRoute {
region: Region::new_test(RegionId::new(1024, 1)),
leader_peer: Some(from_peer.clone()),
..Default::default()
},
RegionRoute {
region: Region::new_test(RegionId::new(1024, 2)),
leader_peer: Some(Peer::empty(4)),
..Default::default()
},
];
let table_metadata_manager = env.table_metadata_manager();
table_metadata_manager
.create_table_metadata(table_info, region_routes)
.await
.unwrap();
let original_table_route = table_metadata_manager
.table_route_manager()
.get(table_id)
.await
.unwrap()
.unwrap();
// modifies the table route.
table_metadata_manager
.update_leader_region_status(table_id, &original_table_route, |route| {
if route.region.id == RegionId::new(1024, 2) {
Some(Some(RegionStatus::Downgraded))
} else {
None
}
})
.await
.unwrap();
// sets the old table route.
ctx.volatile_ctx.table_route_info = Some(original_table_route);
let err = state.downgrade_leader_region(&mut ctx).await.unwrap_err();
assert_matches!(err, Error::RetryLater { .. });
assert!(err.is_retryable());
assert!(err.to_string().contains("Failed to update the table route"));
}
#[tokio::test]
async fn test_next_downgrade_leader_region_state() {
let mut state = Box::new(UpdateMetadata::Downgrade);
let persistent_context = new_persistent_context();
let from_peer = persistent_context.from_peer.clone();
let env = TestingEnv::new();
let mut ctx = env.context_factory().new_context(persistent_context);
let table_id = ctx.region_id().table_id();
let table_info = new_test_table_info(1024, vec![1, 2]).into();
let region_routes = vec![RegionRoute {
region: Region::new_test(RegionId::new(1024, 1)),
leader_peer: Some(from_peer.clone()),
..Default::default()
}];
let table_metadata_manager = env.table_metadata_manager();
table_metadata_manager
.create_table_metadata(table_info, region_routes)
.await
.unwrap();
let next = state.next(&mut ctx).await.unwrap();
let _ = next
.as_any()
.downcast_ref::<DowngradeLeaderRegion>()
.unwrap();
let latest_table_route = table_metadata_manager
.table_route_manager()
.get(table_id)
.await
.unwrap()
.unwrap();
assert!(latest_table_route.region_routes[0].is_leader_downgraded());
assert!(ctx.volatile_ctx.table_route_info.is_none());
}
}

View File

@@ -0,0 +1,210 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use common_meta::rpc::router::RegionStatus;
use snafu::ResultExt;
use crate::error::{self, Result};
use crate::procedure::region_migration::update_metadata::UpdateMetadata;
use crate::procedure::region_migration::Context;
impl UpdateMetadata {
/// Downgrades the leader region.
///
/// Abort(non-retry):
/// - TableRoute is not found.
///
/// Retry:
/// - Failed to update [TableRouteValue](common_meta::key::table_region::TableRegionValue).
/// - Failed to retrieve the metadata of table.
///
/// About the failure of updating the [TableRouteValue](common_meta::key::table_region::TableRegionValue):
///
/// - There may be another [RegionMigrationProcedure](crate::procedure::region_migration::RegionMigrationProcedure)
/// that is executed concurrently for **other region**.
/// It will only update **other region** info. Therefore, It's safe to retry after failure.
///
/// - There is no other DDL procedure executed concurrently for the current table.
pub async fn downgrade_leader_region(&self, ctx: &mut Context) -> Result<()> {
let table_metadata_manager = ctx.table_metadata_manager.clone();
let region_id = ctx.region_id();
let table_id = region_id.table_id();
let current_table_route_value = ctx.get_table_route_value().await?;
if let Err(err) = table_metadata_manager
.update_leader_region_status(table_id, current_table_route_value, |route| {
if route.region.id == region_id {
Some(Some(RegionStatus::Downgraded))
} else {
None
}
})
.await
.context(error::TableMetadataManagerSnafu)
{
debug_assert!(ctx.remove_table_route_value());
return error::RetryLaterSnafu {
reason: format!("Failed to update the table route during the downgrading leader region, error: {err}")
}.fail();
}
debug_assert!(ctx.remove_table_route_value());
Ok(())
}
}
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use common_meta::key::test_utils::new_test_table_info;
use common_meta::peer::Peer;
use common_meta::rpc::router::{Region, RegionRoute, RegionStatus};
use store_api::storage::RegionId;
use crate::error::Error;
use crate::procedure::region_migration::downgrade_leader_region::DowngradeLeaderRegion;
use crate::procedure::region_migration::test_util::{self, TestingEnv};
use crate::procedure::region_migration::update_metadata::UpdateMetadata;
use crate::procedure::region_migration::{ContextFactory, PersistentContext, State};
fn new_persistent_context() -> PersistentContext {
test_util::new_persistent_context(1, 2, RegionId::new(1024, 1))
}
#[test]
fn test_state_serialization() {
let state = UpdateMetadata::Downgrade;
let expected = r#"{"UpdateMetadata":"Downgrade"}"#;
assert_eq!(expected, serde_json::to_string(&state).unwrap());
}
#[tokio::test]
async fn test_table_route_is_not_found_error() {
let state = UpdateMetadata::Downgrade;
let env = TestingEnv::new();
let persistent_context = new_persistent_context();
let mut ctx = env.context_factory().new_context(persistent_context);
let err = state.downgrade_leader_region(&mut ctx).await.unwrap_err();
assert_matches!(err, Error::TableRouteNotFound { .. });
assert!(!err.is_retryable());
}
#[tokio::test]
async fn test_failed_to_update_table_route_error() {
let state = UpdateMetadata::Downgrade;
let persistent_context = new_persistent_context();
let from_peer = persistent_context.from_peer.clone();
let env = TestingEnv::new();
let mut ctx = env.context_factory().new_context(persistent_context);
let table_id = ctx.region_id().table_id();
let table_info = new_test_table_info(1024, vec![1, 2]).into();
let region_routes = vec![
RegionRoute {
region: Region::new_test(RegionId::new(1024, 1)),
leader_peer: Some(from_peer.clone()),
..Default::default()
},
RegionRoute {
region: Region::new_test(RegionId::new(1024, 2)),
leader_peer: Some(Peer::empty(4)),
..Default::default()
},
];
let table_metadata_manager = env.table_metadata_manager();
table_metadata_manager
.create_table_metadata(table_info, region_routes)
.await
.unwrap();
let original_table_route = table_metadata_manager
.table_route_manager()
.get(table_id)
.await
.unwrap()
.unwrap();
// modifies the table route.
table_metadata_manager
.update_leader_region_status(table_id, &original_table_route, |route| {
if route.region.id == RegionId::new(1024, 2) {
Some(Some(RegionStatus::Downgraded))
} else {
None
}
})
.await
.unwrap();
// sets the old table route.
ctx.volatile_ctx.table_route = Some(original_table_route);
let err = state.downgrade_leader_region(&mut ctx).await.unwrap_err();
assert!(ctx.volatile_ctx.table_route.is_none());
assert_matches!(err, Error::RetryLater { .. });
assert!(err.is_retryable());
assert!(err.to_string().contains("Failed to update the table route"));
}
#[tokio::test]
async fn test_next_downgrade_leader_region_state() {
let mut state = Box::new(UpdateMetadata::Downgrade);
let persistent_context = new_persistent_context();
let from_peer = persistent_context.from_peer.clone();
let env = TestingEnv::new();
let mut ctx = env.context_factory().new_context(persistent_context);
let table_id = ctx.region_id().table_id();
let table_info = new_test_table_info(1024, vec![1, 2]).into();
let region_routes = vec![RegionRoute {
region: Region::new_test(RegionId::new(1024, 1)),
leader_peer: Some(from_peer.clone()),
..Default::default()
}];
let table_metadata_manager = env.table_metadata_manager();
table_metadata_manager
.create_table_metadata(table_info, region_routes)
.await
.unwrap();
let next = state.next(&mut ctx).await.unwrap();
let _ = next
.as_any()
.downcast_ref::<DowngradeLeaderRegion>()
.unwrap();
let latest_table_route = table_metadata_manager
.table_route_manager()
.get(table_id)
.await
.unwrap()
.unwrap();
assert!(latest_table_route.region_routes[0].is_leader_downgraded());
assert!(ctx.volatile_ctx.table_route.is_none());
}
}

View File

@@ -0,0 +1,241 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use snafu::ResultExt;
use crate::error::{self, Result};
use crate::procedure::region_migration::update_metadata::UpdateMetadata;
use crate::procedure::region_migration::Context;
impl UpdateMetadata {
/// Rollbacks the downgraded leader region if the candidate region is unreachable.
///
/// Abort(non-retry):
/// - TableRoute is not found.
///
/// Retry:
/// - Failed to update [TableRouteValue](common_meta::key::table_region::TableRegionValue).
/// - Failed to retrieve the metadata of table.
pub async fn rollback_downgraded_region(&self, ctx: &mut Context) -> Result<()> {
let table_metadata_manager = ctx.table_metadata_manager.clone();
let region_id = ctx.region_id();
let table_id = region_id.table_id();
let current_table_route_value = ctx.get_table_route_value().await?;
if let Err(err) = table_metadata_manager
.update_leader_region_status(table_id, current_table_route_value, |route| {
if route.region.id == region_id {
Some(None)
} else {
None
}
})
.await
.context(error::TableMetadataManagerSnafu)
{
debug_assert!(ctx.remove_table_route_value());
return error::RetryLaterSnafu {
reason: format!("Failed to update the table route during the rollback downgraded leader region, error: {err}")
}.fail();
}
debug_assert!(ctx.remove_table_route_value());
Ok(())
}
}
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use common_meta::key::test_utils::new_test_table_info;
use common_meta::peer::Peer;
use common_meta::rpc::router::{Region, RegionRoute, RegionStatus};
use store_api::storage::RegionId;
use crate::error::Error;
use crate::procedure::region_migration::migration_abort::RegionMigrationAbort;
use crate::procedure::region_migration::test_util::{self, TestingEnv};
use crate::procedure::region_migration::update_metadata::UpdateMetadata;
use crate::procedure::region_migration::{ContextFactory, PersistentContext, State};
fn new_persistent_context() -> PersistentContext {
test_util::new_persistent_context(1, 2, RegionId::new(1024, 1))
}
#[tokio::test]
async fn test_table_route_is_not_found_error() {
let state = UpdateMetadata::Rollback;
let env = TestingEnv::new();
let persistent_context = new_persistent_context();
let mut ctx = env.context_factory().new_context(persistent_context);
let err = state.downgrade_leader_region(&mut ctx).await.unwrap_err();
assert_matches!(err, Error::TableRouteNotFound { .. });
assert!(!err.is_retryable());
}
#[tokio::test]
async fn test_update_table_route_with_retry() {
let state = UpdateMetadata::Rollback;
let persistent_context = new_persistent_context();
let from_peer = persistent_context.from_peer.clone();
let env = TestingEnv::new();
let mut ctx = env.context_factory().new_context(persistent_context);
let table_id = ctx.region_id().table_id();
let table_info = new_test_table_info(1024, vec![1, 2, 3]).into();
let region_routes = vec![
RegionRoute {
region: Region::new_test(RegionId::new(1024, 1)),
leader_peer: Some(from_peer.clone()),
leader_status: Some(RegionStatus::Downgraded),
..Default::default()
},
RegionRoute {
region: Region::new_test(RegionId::new(1024, 2)),
leader_peer: Some(Peer::empty(4)),
leader_status: Some(RegionStatus::Downgraded),
..Default::default()
},
RegionRoute {
region: Region::new_test(RegionId::new(1024, 3)),
leader_peer: Some(Peer::empty(5)),
..Default::default()
},
];
let expected_region_routes = {
let mut region_routes = region_routes.clone();
region_routes[0].leader_status = None;
region_routes[1].leader_status = None;
region_routes
};
let table_metadata_manager = env.table_metadata_manager();
table_metadata_manager
.create_table_metadata(table_info, region_routes)
.await
.unwrap();
let old_table_route = table_metadata_manager
.table_route_manager()
.get(table_id)
.await
.unwrap()
.unwrap();
// modifies the table route.
table_metadata_manager
.update_leader_region_status(table_id, &old_table_route, |route| {
if route.region.id == RegionId::new(1024, 2) {
Some(None)
} else {
None
}
})
.await
.unwrap();
ctx.volatile_ctx.table_route = Some(old_table_route);
let err = state
.rollback_downgraded_region(&mut ctx)
.await
.unwrap_err();
assert!(ctx.volatile_ctx.table_route.is_none());
assert_matches!(err, Error::RetryLater { .. });
assert!(err.is_retryable());
assert!(err.to_string().contains("Failed to update the table route"));
state.rollback_downgraded_region(&mut ctx).await.unwrap();
let region_routes = table_metadata_manager
.table_route_manager()
.get(table_id)
.await
.unwrap()
.unwrap()
.into_inner()
.region_routes;
assert_eq!(expected_region_routes, region_routes);
}
#[tokio::test]
async fn test_next_migration_end_state() {
let mut state = Box::new(UpdateMetadata::Rollback);
let persistent_context = new_persistent_context();
let from_peer = persistent_context.from_peer.clone();
let env = TestingEnv::new();
let mut ctx = env.context_factory().new_context(persistent_context);
let table_id = ctx.region_id().table_id();
let table_info = new_test_table_info(1024, vec![1, 2, 3]).into();
let region_routes = vec![
RegionRoute {
region: Region::new_test(RegionId::new(1024, 1)),
leader_peer: Some(from_peer.clone()),
leader_status: Some(RegionStatus::Downgraded),
..Default::default()
},
RegionRoute {
region: Region::new_test(RegionId::new(1024, 2)),
leader_peer: Some(Peer::empty(4)),
leader_status: Some(RegionStatus::Downgraded),
..Default::default()
},
RegionRoute {
region: Region::new_test(RegionId::new(1024, 3)),
leader_peer: Some(Peer::empty(5)),
..Default::default()
},
];
let expected_region_routes = {
let mut region_routes = region_routes.clone();
region_routes[0].leader_status = None;
region_routes
};
let table_metadata_manager = env.table_metadata_manager();
table_metadata_manager
.create_table_metadata(table_info, region_routes)
.await
.unwrap();
let next = state.next(&mut ctx).await.unwrap();
let _ = next
.as_any()
.downcast_ref::<RegionMigrationAbort>()
.unwrap();
assert!(ctx.volatile_ctx.table_route.is_none());
let region_routes = table_metadata_manager
.table_route_manager()
.get(table_id)
.await
.unwrap()
.unwrap()
.into_inner()
.region_routes;
assert_eq!(expected_region_routes, region_routes);
}
}

View File

@@ -0,0 +1,376 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use common_meta::ddl::utils::region_storage_path;
use common_meta::key::datanode_table::RegionInfo;
use common_meta::rpc::router::RegionRoute;
use common_telemetry::{info, warn};
use snafu::{ensure, OptionExt, ResultExt};
use crate::error::{self, Result};
use crate::procedure::region_migration::update_metadata::UpdateMetadata;
use crate::procedure::region_migration::Context;
impl UpdateMetadata {
/// Returns new [Vec<RegionRoute>].
async fn build_upgrade_candidate_region_metadata(
&self,
ctx: &mut Context,
) -> Result<Vec<RegionRoute>> {
let region_id = ctx.region_id();
let table_route_value = ctx.get_table_route_value().await?.clone();
let mut region_routes = table_route_value.region_routes.clone();
let region_route = region_routes
.iter_mut()
.find(|route| route.region.id == region_id)
.context(error::RegionRouteNotFoundSnafu { region_id })?;
// Removes downgraded status.
region_route.set_leader_status(None);
let candidate = &ctx.persistent_ctx.to_peer;
let expected_old_leader = &ctx.persistent_ctx.from_peer;
// Upgrades candidate to leader.
ensure!(region_route
.leader_peer
.take_if(|old_leader| old_leader.id == expected_old_leader.id)
.is_some(),
error::UnexpectedSnafu{
violated: format!("Unexpected region leader: {:?} during the upgrading candidate metadata, expected: {:?}", region_route.leader_peer, expected_old_leader),
}
);
region_route.leader_peer = Some(candidate.clone());
info!(
"Upgrading candidate region to leader region: {:?} for region: {}",
candidate, region_id
);
// Removes the candidate region in followers.
let removed = region_route
.follower_peers
.extract_if(|peer| peer.id == candidate.id)
.collect::<Vec<_>>();
if removed.len() > 1 {
warn!(
"Removes duplicated regions: {removed:?} during the upgrading candidate metadata for region: {region_id}"
);
}
Ok(region_routes)
}
/// Upgrades the candidate region.
///
/// Abort(non-retry):
/// - TableRoute or RegionRoute is not found.
/// Typically, it's impossible, there is no other DDL procedure executed concurrently for the current table.
///
/// Retry:
/// - Failed to update [TableRouteValue](common_meta::key::table_region::TableRegionValue).
/// - Failed to retrieve the metadata of table.
pub async fn upgrade_candidate_region(&self, ctx: &mut Context) -> Result<()> {
let region_id = ctx.region_id();
let table_metadata_manager = ctx.table_metadata_manager.clone();
let region_routes = self.build_upgrade_candidate_region_metadata(ctx).await?;
let table_info_value = ctx.get_table_info_value().await?;
let table_info = &table_info_value.table_info;
let region_storage_path =
region_storage_path(&table_info.catalog_name, &table_info.schema_name);
let engine = table_info.meta.engine.clone();
let region_options: HashMap<String, String> = (&table_info.meta.options).into();
// No remote fetch.
let table_route_value = ctx.get_table_route_value().await?;
if let Err(err) = table_metadata_manager
.update_table_route(
region_id.table_id(),
RegionInfo {
engine: engine.to_string(),
region_storage_path: region_storage_path.to_string(),
region_options: region_options.clone(),
},
table_route_value,
region_routes,
&region_options,
)
.await
.context(error::TableMetadataManagerSnafu)
{
debug_assert!(ctx.remove_table_route_value());
return error::RetryLaterSnafu {
reason: format!("Failed to update the table route during the upgrading candidate region, error: {err}")
}.fail();
};
debug_assert!(ctx.remove_table_route_value());
// Consumes the guard.
ctx.volatile_ctx.opening_region_guard.take();
Ok(())
}
}
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use common_meta::key::test_utils::new_test_table_info;
use common_meta::peer::Peer;
use common_meta::rpc::router::{Region, RegionRoute, RegionStatus};
use store_api::storage::RegionId;
use crate::error::Error;
use crate::procedure::region_migration::migration_end::RegionMigrationEnd;
use crate::procedure::region_migration::test_util::{self, TestingEnv};
use crate::procedure::region_migration::update_metadata::UpdateMetadata;
use crate::procedure::region_migration::{ContextFactory, PersistentContext, State};
use crate::region::lease_keeper::OpeningRegionKeeper;
fn new_persistent_context() -> PersistentContext {
test_util::new_persistent_context(1, 2, RegionId::new(1024, 1))
}
#[tokio::test]
async fn test_table_route_is_not_found_error() {
let state = UpdateMetadata::Upgrade;
let env = TestingEnv::new();
let persistent_context = new_persistent_context();
let mut ctx = env.context_factory().new_context(persistent_context);
let err = state
.build_upgrade_candidate_region_metadata(&mut ctx)
.await
.unwrap_err();
assert_matches!(err, Error::TableRouteNotFound { .. });
assert!(!err.is_retryable());
}
#[tokio::test]
async fn test_region_route_is_not_found() {
let state = UpdateMetadata::Upgrade;
let env = TestingEnv::new();
let persistent_context = new_persistent_context();
let mut ctx = env.context_factory().new_context(persistent_context);
let table_info = new_test_table_info(1024, vec![2]).into();
let region_routes = vec![RegionRoute {
region: Region::new_test(RegionId::new(1024, 2)),
leader_peer: Some(Peer::empty(4)),
..Default::default()
}];
let table_metadata_manager = env.table_metadata_manager();
table_metadata_manager
.create_table_metadata(table_info, region_routes)
.await
.unwrap();
let err = state
.build_upgrade_candidate_region_metadata(&mut ctx)
.await
.unwrap_err();
assert_matches!(err, Error::RegionRouteNotFound { .. });
assert!(!err.is_retryable());
}
#[tokio::test]
async fn test_region_route_expected_leader() {
let state = UpdateMetadata::Upgrade;
let env = TestingEnv::new();
let persistent_context = new_persistent_context();
let mut ctx = env.context_factory().new_context(persistent_context);
let table_info = new_test_table_info(1024, vec![1]).into();
let region_routes = vec![RegionRoute {
region: Region::new_test(RegionId::new(1024, 1)),
leader_peer: Some(Peer::empty(3)),
..Default::default()
}];
let table_metadata_manager = env.table_metadata_manager();
table_metadata_manager
.create_table_metadata(table_info, region_routes)
.await
.unwrap();
let err = state
.build_upgrade_candidate_region_metadata(&mut ctx)
.await
.unwrap_err();
assert_matches!(err, Error::Unexpected { .. });
assert!(!err.is_retryable());
assert!(err.to_string().contains("Unexpected region leader"));
}
#[tokio::test]
async fn test_build_upgrade_candidate_region_metadata() {
let state = UpdateMetadata::Upgrade;
let env = TestingEnv::new();
let persistent_context = new_persistent_context();
let mut ctx = env.context_factory().new_context(persistent_context);
let table_info = new_test_table_info(1024, vec![1]).into();
let region_routes = vec![RegionRoute {
region: Region::new_test(RegionId::new(1024, 1)),
leader_peer: Some(Peer::empty(1)),
follower_peers: vec![Peer::empty(2), Peer::empty(3)],
leader_status: Some(RegionStatus::Downgraded),
}];
let table_metadata_manager = env.table_metadata_manager();
table_metadata_manager
.create_table_metadata(table_info, region_routes)
.await
.unwrap();
let new_region_routes = state
.build_upgrade_candidate_region_metadata(&mut ctx)
.await
.unwrap();
assert!(!new_region_routes[0].is_leader_downgraded());
assert_eq!(new_region_routes[0].follower_peers, vec![Peer::empty(3)]);
assert_eq!(new_region_routes[0].leader_peer.as_ref().unwrap().id, 2);
}
#[tokio::test]
async fn test_failed_to_update_table_route_error() {
let state = UpdateMetadata::Upgrade;
let env = TestingEnv::new();
let persistent_context = new_persistent_context();
let mut ctx = env.context_factory().new_context(persistent_context);
let opening_keeper = OpeningRegionKeeper::default();
let table_id = 1024;
let table_info = new_test_table_info(table_id, vec![1]).into();
let region_routes = vec![
RegionRoute {
region: Region::new_test(RegionId::new(table_id, 1)),
leader_peer: Some(Peer::empty(1)),
follower_peers: vec![Peer::empty(5), Peer::empty(3)],
leader_status: Some(RegionStatus::Downgraded),
},
RegionRoute {
region: Region::new_test(RegionId::new(table_id, 2)),
leader_peer: Some(Peer::empty(4)),
leader_status: Some(RegionStatus::Downgraded),
..Default::default()
},
];
let table_metadata_manager = env.table_metadata_manager();
table_metadata_manager
.create_table_metadata(table_info, region_routes)
.await
.unwrap();
let original_table_route = table_metadata_manager
.table_route_manager()
.get(table_id)
.await
.unwrap()
.unwrap();
// modifies the table route.
table_metadata_manager
.update_leader_region_status(table_id, &original_table_route, |route| {
if route.region.id == RegionId::new(1024, 2) {
// Removes the status.
Some(None)
} else {
None
}
})
.await
.unwrap();
// sets the old table route.
ctx.volatile_ctx.table_route = Some(original_table_route);
let guard = opening_keeper
.register(2, RegionId::new(table_id, 1))
.unwrap();
ctx.volatile_ctx.opening_region_guard = Some(guard);
let err = state.upgrade_candidate_region(&mut ctx).await.unwrap_err();
assert!(ctx.volatile_ctx.table_route.is_none());
assert!(ctx.volatile_ctx.opening_region_guard.is_some());
assert_matches!(err, Error::RetryLater { .. });
assert!(err.is_retryable());
assert!(err.to_string().contains("Failed to update the table route"));
}
#[tokio::test]
async fn test_next_migration_end_state() {
let mut state = Box::new(UpdateMetadata::Upgrade);
let env = TestingEnv::new();
let persistent_context = new_persistent_context();
let mut ctx = env.context_factory().new_context(persistent_context);
let opening_keeper = OpeningRegionKeeper::default();
let table_id = 1024;
let table_info = new_test_table_info(table_id, vec![1]).into();
let region_routes = vec![RegionRoute {
region: Region::new_test(RegionId::new(table_id, 1)),
leader_peer: Some(Peer::empty(1)),
leader_status: Some(RegionStatus::Downgraded),
..Default::default()
}];
let guard = opening_keeper
.register(2, RegionId::new(table_id, 1))
.unwrap();
ctx.volatile_ctx.opening_region_guard = Some(guard);
let table_metadata_manager = env.table_metadata_manager();
table_metadata_manager
.create_table_metadata(table_info, region_routes)
.await
.unwrap();
let next = state.next(&mut ctx).await.unwrap();
let _ = next.as_any().downcast_ref::<RegionMigrationEnd>().unwrap();
let region_routes = table_metadata_manager
.table_route_manager()
.get(table_id)
.await
.unwrap()
.unwrap()
.into_inner()
.region_routes;
assert!(ctx.volatile_ctx.table_route.is_none());
assert!(ctx.volatile_ctx.opening_region_guard.is_none());
assert_eq!(region_routes.len(), 1);
assert!(!region_routes[0].is_leader_downgraded());
assert!(region_routes[0].follower_peers.is_empty());
assert_eq!(region_routes[0].leader_peer.as_ref().unwrap().id, 2);
}
}

View File

@@ -0,0 +1,36 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use serde::{Deserialize, Serialize};
use crate::error::Result;
use crate::procedure::region_migration::{Context, State};
#[derive(Debug, Serialize, Deserialize)]
pub struct UpgradeCandidateRegion;
#[async_trait::async_trait]
#[typetag::serde]
impl State for UpgradeCandidateRegion {
async fn next(&mut self, _ctx: &mut Context) -> Result<Box<dyn State>> {
todo!();
}
fn as_any(&self) -> &dyn Any {
self
}
}
impl UpgradeCandidateRegion {}

View File

@@ -21,11 +21,13 @@ use std::sync::{Arc, RwLock};
use common_meta::key::table_route::TableRouteValue;
use common_meta::key::TableMetadataManagerRef;
use common_meta::DatanodeId;
use common_telemetry::warn;
use snafu::ResultExt;
use store_api::storage::{RegionId, TableId};
use self::mito::find_staled_leader_regions;
use crate::error::{self, Result};
use crate::metrics;
use crate::region::lease_keeper::utils::find_staled_follower_regions;
pub type RegionLeaseKeeperRef = Arc<RegionLeaseKeeper>;
@@ -89,7 +91,11 @@ impl RegionLeaseKeeper {
) -> Result<(HashSet<RegionId>, HashSet<RegionId>)> {
let tables = self.collect_tables(datanode_regions);
let table_ids = tables.keys().copied().collect::<Vec<_>>();
let metadata_subset = self.collect_tables_metadata(&table_ids).await?;
let metadata_subset = {
let _timer = metrics::METRIC_META_LOAD_LEADER_METADATA.start_timer();
self.collect_tables_metadata(&table_ids).await?
};
let mut closable_set = HashSet::new();
let mut downgradable_set = HashSet::new();
@@ -104,6 +110,10 @@ impl RegionLeaseKeeper {
downgradable_set.extend(downgradable);
closable_set.extend(closable);
} else {
warn!(
"The table {} metadata is not found, appends closable leader regions: {:?}",
table_id, regions
);
// If table metadata is not found.
closable_set.extend(regions);
}
@@ -128,7 +138,11 @@ impl RegionLeaseKeeper {
) -> Result<(HashSet<RegionId>, HashSet<RegionId>)> {
let tables = self.collect_tables(datanode_regions);
let table_ids = tables.keys().copied().collect::<Vec<_>>();
let metadata_subset = self.collect_tables_metadata(&table_ids).await?;
let metadata_subset = {
let _timer = metrics::METRIC_META_LOAD_FOLLOWER_METADATA.start_timer();
self.collect_tables_metadata(&table_ids).await?
};
let mut upgradable_set = HashSet::new();
let mut closable_set = HashSet::new();
@@ -143,6 +157,10 @@ impl RegionLeaseKeeper {
upgradable_set.extend(upgradable);
closable_set.extend(closable);
} else {
warn!(
"The table {} metadata is not found, appends closable followers regions: {:?}",
table_id, regions
);
// If table metadata is not found.
closable_set.extend(regions);
}

View File

@@ -260,6 +260,10 @@ impl KvBackend for LeaderCachedKvBackend {
.iter()
.map(|kv| kv.key.clone())
.collect::<HashSet<_>>();
let hit_rate = hit_keys.len() as f64 / req.keys.len() as f64;
metrics::METRIC_META_KV_CACHE_BATCH_GET_HIT_RATE.set(hit_rate);
let missed_keys = req
.keys
.iter()

View File

@@ -28,9 +28,6 @@ use crate::memtable::BoxedBatchIterator;
use crate::metrics::{MERGE_FILTER_ROWS_TOTAL, READ_STAGE_ELAPSED};
use crate::read::{Batch, BatchReader, BoxedBatchReader, Source};
/// Minimum batch size to output.
const MIN_BATCH_SIZE: usize = 64;
/// Reader to merge sorted batches.
///
/// The merge reader merges [Batch]es from multiple sources that yield sorted batches.
@@ -49,11 +46,8 @@ pub struct MergeReader {
///
/// `Node` in this heap **must** not be empty.
cold: BinaryHeap<Node>,
/// Batches to output.
batch_merger: BatchMerger,
/// Suggested size of each batch. The batch returned by the reader can have more rows than the
/// batch size.
batch_size: usize,
/// Batch to output.
output_batch: Option<Batch>,
/// Local metrics.
metrics: Metrics,
}
@@ -62,15 +56,7 @@ pub struct MergeReader {
impl BatchReader for MergeReader {
async fn next_batch(&mut self) -> Result<Option<Batch>> {
let start = Instant::now();
while !self.hot.is_empty() && self.batch_merger.num_rows() < self.batch_size {
if let Some(current_key) = self.batch_merger.primary_key() {
// If the hottest node has a different key, we have finish collecting current key.
// Safety: hot is not empty.
if self.hot.peek().unwrap().primary_key() != current_key {
break;
}
}
while !self.hot.is_empty() && self.output_batch.is_none() {
if self.hot.len() == 1 {
// No need to do merge sort if only one batch in the hot heap.
self.fetch_batch_from_hottest().await?;
@@ -82,17 +68,14 @@ impl BatchReader for MergeReader {
}
}
if self.batch_merger.is_empty() {
if let Some(batch) = self.output_batch.take() {
self.metrics.scan_cost += start.elapsed();
self.metrics.num_output_rows += batch.num_rows();
Ok(Some(batch))
} else {
// Nothing fetched.
self.metrics.scan_cost += start.elapsed();
// Update deleted rows num.
self.metrics.num_deleted_rows = self.batch_merger.num_deleted_rows();
Ok(None)
} else {
let batch = self.batch_merger.merge_batches()?;
self.metrics.scan_cost += start.elapsed();
self.metrics.num_output_rows += batch.as_ref().map(|b| b.num_rows()).unwrap_or(0);
Ok(batch)
}
}
}
@@ -115,7 +98,7 @@ impl Drop for MergeReader {
impl MergeReader {
/// Creates and initializes a new [MergeReader].
pub async fn new(sources: Vec<Source>, batch_size: usize) -> Result<MergeReader> {
pub async fn new(sources: Vec<Source>) -> Result<MergeReader> {
let start = Instant::now();
let mut metrics = Metrics::default();
@@ -132,8 +115,7 @@ impl MergeReader {
let mut reader = MergeReader {
hot,
cold,
batch_merger: BatchMerger::new(),
batch_size,
output_batch: None,
metrics,
};
// Initializes the reader.
@@ -168,7 +150,7 @@ impl MergeReader {
let mut hottest = self.hot.pop().unwrap();
let batch = hottest.fetch_batch(&mut self.metrics).await?;
self.batch_merger.push(batch)?;
Self::maybe_output_batch(batch, &mut self.output_batch, &mut self.metrics)?;
self.reheap(hottest)
}
@@ -199,7 +181,11 @@ impl MergeReader {
// They have duplicate timestamps. Outputs timestamps before the duplicated timestamp.
// Batch itself doesn't contain duplicate timestamps so timestamps before `pos`
// must be less than `next_min_ts`.
self.batch_merger.push(top.slice(0, pos))?;
Self::maybe_output_batch(
top.slice(0, pos),
&mut self.output_batch,
&mut self.metrics,
)?;
// This keep the duplicate timestamp in the node.
top_node.skip_rows(pos, &mut self.metrics).await?;
// The merge window should contain this timestamp so only nodes in the hot heap
@@ -209,7 +195,11 @@ impl MergeReader {
}
Err(pos) => {
// No duplicate timestamp. Outputs timestamp before `pos`.
self.batch_merger.push(top.slice(0, pos))?;
Self::maybe_output_batch(
top.slice(0, pos),
&mut self.output_batch,
&mut self.metrics,
)?;
top_node.skip_rows(pos, &mut self.metrics).await?;
self.reheap(top_node)?;
}
@@ -300,16 +290,37 @@ impl MergeReader {
Ok(())
}
/// Removeds deleted entries and sets the `batch` to the `output_batch`.
///
/// Ignores the `batch` if it is empty.
fn maybe_output_batch(
mut batch: Batch,
output_batch: &mut Option<Batch>,
metrics: &mut Metrics,
) -> Result<()> {
debug_assert!(output_batch.is_none());
let num_rows = batch.num_rows();
batch.filter_deleted()?;
// Update deleted rows metrics.
metrics.num_deleted_rows += num_rows - batch.num_rows();
if batch.is_empty() {
return Ok(());
}
*output_batch = Some(batch);
Ok(())
}
}
/// Builder to build and initialize a [MergeReader].
#[derive(Default)]
pub struct MergeReaderBuilder {
/// Input sources.
///
/// All source must yield batches with the same schema.
sources: Vec<Source>,
/// Batch size of the reader.
batch_size: usize,
}
impl MergeReaderBuilder {
@@ -330,25 +341,10 @@ impl MergeReaderBuilder {
self
}
/// Sets the batch size of the reader.
pub fn batch_size(&mut self, size: usize) -> &mut Self {
self.batch_size = if size == 0 { MIN_BATCH_SIZE } else { size };
self
}
/// Builds and initializes the reader, then resets the builder.
pub async fn build(&mut self) -> Result<MergeReader> {
let sources = mem::take(&mut self.sources);
MergeReader::new(sources, self.batch_size).await
}
}
impl Default for MergeReaderBuilder {
fn default() -> Self {
MergeReaderBuilder {
sources: Vec::new(),
batch_size: MIN_BATCH_SIZE,
}
MergeReader::new(sources).await
}
}
@@ -371,89 +367,6 @@ struct Metrics {
num_deleted_rows: usize,
}
/// Helper to collect and merge small batches for same primary key.
struct BatchMerger {
/// Buffered non-empty batches to merge.
batches: Vec<Batch>,
/// Number of rows in the batch.
num_rows: usize,
/// Number of rows deleted.
num_deleted_rows: usize,
}
impl BatchMerger {
/// Returns a empty merger.
fn new() -> BatchMerger {
BatchMerger {
batches: Vec::new(),
num_rows: 0,
num_deleted_rows: 0,
}
}
/// Returns the number of rows.
fn num_rows(&self) -> usize {
self.num_rows
}
/// Returns the number of rows deleted.
fn num_deleted_rows(&self) -> usize {
self.num_deleted_rows
}
/// Returns true if the merger is empty.
fn is_empty(&self) -> bool {
self.num_rows() == 0
}
/// Returns the primary key of current merger and `None` if the merger is empty.
fn primary_key(&self) -> Option<&[u8]> {
self.batches.first().map(|batch| batch.primary_key())
}
/// Removeds deleted entries and pushes a `batch` into the merger.
///
/// Ignores the `batch` if it is empty.
///
/// # Panics
/// Panics if the `batch` has another primary key.
fn push(&mut self, mut batch: Batch) -> Result<()> {
debug_assert!(self
.batches
.last()
.map(|b| b.primary_key() == batch.primary_key())
.unwrap_or(true));
let num_rows = batch.num_rows();
batch.filter_deleted()?;
self.num_deleted_rows += num_rows - batch.num_rows();
if batch.is_empty() {
return Ok(());
}
self.num_rows += batch.num_rows();
self.batches.push(batch);
Ok(())
}
/// Merge all buffered batches and returns the merged batch. Then
/// reset the buffer.
fn merge_batches(&mut self) -> Result<Option<Batch>> {
if self.batches.is_empty() {
return Ok(None);
}
// Reset number of rows.
self.num_rows = 0;
if self.batches.len() == 1 {
return Ok(self.batches.pop());
}
let batches = mem::take(&mut self.batches);
Batch::concat(batches).map(Some)
}
}
/// A `Node` represent an individual input data source to be merged.
struct Node {
/// Data source of this `Node`.
@@ -669,17 +582,19 @@ mod tests {
&[
new_batch(
b"k1",
&[1, 2, 4, 5, 7],
&[11, 12, 14, 15, 17],
&[
OpType::Put,
OpType::Put,
OpType::Put,
OpType::Put,
OpType::Put,
],
&[21, 22, 24, 25, 27],
&[1, 2],
&[11, 12],
&[OpType::Put, OpType::Put],
&[21, 22],
),
new_batch(
b"k1",
&[4, 5],
&[14, 15],
&[OpType::Put, OpType::Put],
&[24, 25],
),
new_batch(b"k1", &[7], &[17], &[OpType::Put], &[27]),
new_batch(b"k2", &[3], &[13], &[OpType::Put], &[23]),
],
)
@@ -718,13 +633,10 @@ mod tests {
check_reader_result(
&mut reader,
&[
new_batch(
b"k1",
&[1, 2, 3, 4],
&[10, 11, 10, 11],
&[OpType::Put, OpType::Put, OpType::Put, OpType::Put],
&[21, 32, 23, 34],
),
new_batch(b"k1", &[1], &[10], &[OpType::Put], &[21]),
new_batch(b"k1", &[2], &[11], &[OpType::Put], &[32]),
new_batch(b"k1", &[3], &[10], &[OpType::Put], &[23]),
new_batch(b"k1", &[4], &[11], &[OpType::Put], &[34]),
new_batch(b"k2", &[3], &[10], &[OpType::Put], &[23]),
],
)
@@ -785,18 +697,16 @@ mod tests {
&[
new_batch(
b"k1",
&[1, 2, 3, 4],
&[11, 12, 10, 14],
&[OpType::Put, OpType::Put, OpType::Put, OpType::Put],
&[21, 22, 33, 24],
),
new_batch(
b"k2",
&[1, 3, 10],
&[11, 13, 20],
&[OpType::Put, OpType::Put, OpType::Put],
&[21, 23, 30],
&[1, 2],
&[11, 12],
&[OpType::Put, OpType::Put],
&[21, 22],
),
new_batch(b"k1", &[3], &[10], &[OpType::Put], &[33]),
new_batch(b"k1", &[4], &[14], &[OpType::Put], &[24]),
new_batch(b"k2", &[1], &[11], &[OpType::Put], &[21]),
new_batch(b"k2", &[3], &[13], &[OpType::Put], &[23]),
new_batch(b"k2", &[10], &[20], &[OpType::Put], &[30]),
],
)
.await;
@@ -900,13 +810,16 @@ mod tests {
.unwrap();
check_reader_result(
&mut reader,
&[new_batch(
b"k1",
&[1, 2, 3],
&[10, 11, 11],
&[OpType::Put, OpType::Put, OpType::Put],
&[21, 32, 33],
)],
&[
new_batch(b"k1", &[1], &[10], &[OpType::Put], &[21]),
new_batch(
b"k1",
&[2, 3],
&[11, 11],
&[OpType::Put, OpType::Put],
&[32, 33],
),
],
)
.await;
}
@@ -945,19 +858,18 @@ mod tests {
.unwrap();
check_reader_result(
&mut reader,
&[new_batch(
b"k1",
&[1, 6, 8, 10, 20],
&[11, 11, 11, 10, 11],
&[
OpType::Put,
OpType::Put,
OpType::Put,
OpType::Put,
OpType::Put,
],
&[31, 36, 38, 30, 40],
)],
&[
new_batch(b"k1", &[1], &[11], &[OpType::Put], &[31]),
new_batch(
b"k1",
&[6, 8],
&[11, 11],
&[OpType::Put, OpType::Put],
&[36, 38],
),
new_batch(b"k1", &[10], &[10], &[OpType::Put], &[30]),
new_batch(b"k1", &[20], &[11], &[OpType::Put], &[40]),
],
)
.await;
}
@@ -965,7 +877,6 @@ mod tests {
#[tokio::test]
async fn test_merge_many_duplicates() {
let mut builder = MergeReaderBuilder::new();
builder.batch_size(3);
for i in 0..10 {
let batches: Vec<_> = (0..8)
.map(|ts| new_batch(b"k1", &[ts], &[i], &[OpType::Put], &[100]))
@@ -974,184 +885,9 @@ mod tests {
builder.push_batch_reader(Box::new(reader));
}
let mut reader = builder.build().await.unwrap();
check_reader_result(
&mut reader,
&[
new_batch(
b"k1",
&[0, 1, 2],
&[9, 9, 9],
&[OpType::Put, OpType::Put, OpType::Put],
&[100, 100, 100],
),
new_batch(
b"k1",
&[3, 4, 5],
&[9, 9, 9],
&[OpType::Put, OpType::Put, OpType::Put],
&[100, 100, 100],
),
new_batch(
b"k1",
&[6, 7],
&[9, 9],
&[OpType::Put, OpType::Put],
&[100, 100],
),
],
)
.await;
}
#[tokio::test]
async fn test_merge_more_than_batch_size() {
let batches: Vec<_> = (0..MIN_BATCH_SIZE as i64 * 2)
.map(|ts| new_batch(b"k1", &[ts], &[10], &[OpType::Put], &[100]))
let expect: Vec<_> = (0..8)
.map(|ts| new_batch(b"k1", &[ts], &[9], &[OpType::Put], &[100]))
.collect();
let reader = VecBatchReader::new(&batches);
let mut reader = MergeReaderBuilder::new()
.push_batch_reader(Box::new(reader))
// Still use the default batch size.
.batch_size(0)
.build()
.await
.unwrap();
let ts1: Vec<_> = (0..MIN_BATCH_SIZE as i64).collect();
let ts2: Vec<_> = (MIN_BATCH_SIZE as i64..MIN_BATCH_SIZE as i64 * 2).collect();
let seqs = vec![10; MIN_BATCH_SIZE];
let op_types = vec![OpType::Put; MIN_BATCH_SIZE];
let fields = vec![100; MIN_BATCH_SIZE];
check_reader_result(
&mut reader,
&[
new_batch(b"k1", &ts1, &seqs, &op_types, &fields),
new_batch(b"k1", &ts2, &seqs, &op_types, &fields),
],
)
.await;
}
#[tokio::test]
async fn test_merge_more_than_batch_size_overlapping() {
let reader1 = VecBatchReader::new(&[new_batch(
b"k1",
&[1, 2, 3, 4, 5, 6, 7, 8, 9],
&[11, 10, 11, 10, 11, 10, 11, 10, 11],
&[
OpType::Put,
OpType::Put,
OpType::Put,
OpType::Put,
OpType::Put,
OpType::Put,
OpType::Put,
OpType::Put,
OpType::Put,
],
&[21, 22, 23, 24, 25, 26, 27, 28, 29],
)]);
let reader2 = VecBatchReader::new(&[new_batch(
b"k1",
&[1, 2, 3, 4, 5, 6, 7, 8, 9],
&[10, 11, 10, 11, 10, 11, 10, 11, 10],
&[
OpType::Put,
OpType::Put,
OpType::Put,
OpType::Put,
OpType::Put,
OpType::Put,
OpType::Put,
OpType::Put,
OpType::Put,
],
&[31, 32, 33, 34, 35, 36, 37, 38, 39],
)]);
let mut reader = MergeReaderBuilder::new()
.push_batch_iter(Box::new(reader1))
.push_batch_reader(Box::new(reader2))
.batch_size(3)
.build()
.await
.unwrap();
check_reader_result(
&mut reader,
&[
new_batch(
b"k1",
&[1, 2, 3],
&[11, 11, 11],
&[OpType::Put, OpType::Put, OpType::Put],
&[21, 32, 23],
),
new_batch(
b"k1",
&[4, 5, 6],
&[11, 11, 11],
&[OpType::Put, OpType::Put, OpType::Put],
&[34, 25, 36],
),
new_batch(
b"k1",
&[7, 8, 9],
&[11, 11, 11],
&[OpType::Put, OpType::Put, OpType::Put],
&[27, 38, 29],
),
],
)
.await;
}
#[test]
fn test_batch_merger_empty() {
let mut merger = BatchMerger::new();
assert!(merger.is_empty());
assert!(merger.merge_batches().unwrap().is_none());
assert!(merger.primary_key().is_none());
}
#[test]
fn test_merge_one_batch() {
let mut merger = BatchMerger::new();
let expect = new_batch(b"k1", &[1], &[10], &[OpType::Put], &[21]);
merger.push(expect.clone()).unwrap();
let batch = merger.merge_batches().unwrap().unwrap();
assert_eq!(1, batch.num_rows());
assert_eq!(expect, batch,);
assert!(merger.is_empty());
}
#[test]
fn test_merge_batches() {
let mut merger = BatchMerger::new();
merger
.push(new_batch(b"k1", &[1], &[10], &[OpType::Put], &[21]))
.unwrap();
assert_eq!(1, merger.num_rows());
assert!(!merger.is_empty());
merger
.push(new_batch(b"k1", &[2], &[10], &[OpType::Put], &[22]))
.unwrap();
assert_eq!(2, merger.num_rows());
merger
.push(new_batch(b"k1", &[3], &[10], &[OpType::Delete], &[23]))
.unwrap();
assert_eq!(2, merger.num_rows());
let batch = merger.merge_batches().unwrap().unwrap();
assert_eq!(2, batch.num_rows());
assert_eq!(
batch,
new_batch(
b"k1",
&[1, 2],
&[10, 10],
&[OpType::Put, OpType::Put,],
&[21, 22]
)
);
assert!(merger.is_empty());
assert_eq!(1, merger.num_deleted_rows());
check_reader_result(&mut reader, &expect).await;
}
}

View File

@@ -18,7 +18,7 @@ use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode;
use common_macro::stack_trace_debug;
use datafusion::error::DataFusionError;
use promql_parser::parser::{Expr as PromExpr, TokenType};
use promql_parser::parser::{Expr as PromExpr, TokenType, VectorMatchCardinality};
use snafu::{Location, Snafu};
#[derive(Snafu)]
@@ -28,6 +28,12 @@ pub enum Error {
#[snafu(display("Unsupported expr type: {}", name))]
UnsupportedExpr { name: String, location: Location },
#[snafu(display("Unsupported vector matches: {:?}", name))]
UnsupportedVectorMatch {
name: VectorMatchCardinality,
location: Location,
},
#[snafu(display("Unexpected token: {:?}", token))]
UnexpectedToken {
token: TokenType,
@@ -112,6 +118,17 @@ pub enum Error {
#[snafu(display("Invalid function argument for {}", fn_name))]
FunctionInvalidArgument { fn_name: String, location: Location },
#[snafu(display(
"Attempt to combine two tables with different column sets, left: {:?}, right: {:?}",
left,
right
))]
CombineTableColumnMismatch {
left: Vec<String>,
right: Vec<String>,
location: Location,
},
}
impl ErrorExt for Error {
@@ -128,7 +145,9 @@ impl ErrorExt for Error {
| ZeroRangeSelector { .. }
| ColumnNotFound { .. }
| Deserialize { .. }
| FunctionInvalidArgument { .. } => StatusCode::InvalidArguments,
| FunctionInvalidArgument { .. }
| UnsupportedVectorMatch { .. }
| CombineTableColumnMismatch { .. } => StatusCode::InvalidArguments,
UnknownTable { .. }
| DataFusionPlanning { .. }

View File

@@ -35,19 +35,20 @@ use datafusion::sql::TableReference;
use datatypes::arrow::datatypes::DataType as ArrowDataType;
use promql_parser::label::{MatchOp, Matcher, Matchers, METRIC_NAME};
use promql_parser::parser::{
token, AggregateExpr, BinaryExpr as PromBinaryExpr, Call, EvalStmt, Expr as PromExpr, Function,
LabelModifier, MatrixSelector, NumberLiteral, Offset, ParenExpr, StringLiteral, SubqueryExpr,
TokenType, UnaryExpr, VectorSelector,
token, AggregateExpr, BinModifier, BinaryExpr as PromBinaryExpr, Call, EvalStmt,
Expr as PromExpr, Function, LabelModifier, MatrixSelector, NumberLiteral, Offset, ParenExpr,
StringLiteral, SubqueryExpr, TokenType, UnaryExpr, VectorMatchCardinality, VectorSelector,
};
use snafu::{ensure, OptionExt, ResultExt};
use table::table::adapter::DfTableProviderAdapter;
use crate::error::{
CatalogSnafu, ColumnNotFoundSnafu, DataFusionPlanningSnafu, ExpectExprSnafu,
ExpectRangeSelectorSnafu, FunctionInvalidArgumentSnafu, MultipleMetricMatchersSnafu,
MultipleVectorSnafu, NoMetricMatcherSnafu, Result, TableNameNotFoundSnafu,
TimeIndexNotFoundSnafu, UnexpectedPlanExprSnafu, UnexpectedTokenSnafu, UnknownTableSnafu,
UnsupportedExprSnafu, ValueNotFoundSnafu, ZeroRangeSelectorSnafu,
CatalogSnafu, ColumnNotFoundSnafu, CombineTableColumnMismatchSnafu, DataFusionPlanningSnafu,
ExpectExprSnafu, ExpectRangeSelectorSnafu, FunctionInvalidArgumentSnafu,
MultipleMetricMatchersSnafu, MultipleVectorSnafu, NoMetricMatcherSnafu, Result,
TableNameNotFoundSnafu, TimeIndexNotFoundSnafu, UnexpectedPlanExprSnafu, UnexpectedTokenSnafu,
UnknownTableSnafu, UnsupportedExprSnafu, UnsupportedVectorMatchSnafu, ValueNotFoundSnafu,
ZeroRangeSelectorSnafu,
};
use crate::extension_plan::{
build_special_time_expr, EmptyMetric, HistogramFold, InstantManipulate, Millisecond,
@@ -268,14 +269,29 @@ impl PromPlanner {
let left_field_columns = self.ctx.field_columns.clone();
let left_table_ref: OwnedTableReference =
self.ctx.table_name.clone().unwrap_or_default().into();
let left_tag_cols = self.ctx.tag_columns.clone();
let right_input = self.prom_expr_to_plan(*rhs.clone()).await?;
let right_field_columns = self.ctx.field_columns.clone();
let right_table_ref: OwnedTableReference =
self.ctx.table_name.clone().unwrap_or_default().into();
let right_tag_cols = self.ctx.tag_columns.clone();
// TODO(ruihang): avoid join if left and right are the same table
// set op has "special" join semantics
if Self::is_token_a_set_op(*op) {
return self.set_op_on_non_field_columns(
left_input,
right_input,
left_tag_cols,
right_tag_cols,
*op,
modifier,
);
}
// normal join
let mut field_columns =
left_field_columns.iter().zip(right_field_columns.iter());
let join_plan = self.join_on_non_field_columns(
@@ -1310,6 +1326,16 @@ impl PromPlanner {
)
}
/// Check if the given op is a set operator (UNION, INTERSECT and EXCEPT in SQL).
fn is_token_a_set_op(token: TokenType) -> bool {
matches!(
token.id(),
token::T_LAND // INTERSECT
| token::T_LOR // UNION
| token::T_LUNLESS // EXCEPT
)
}
/// Build a inner join on time index column and tag columns to concat two logical plans.
fn join_on_non_field_columns(
&self,
@@ -1351,6 +1377,107 @@ impl PromPlanner {
.context(DataFusionPlanningSnafu)
}
fn set_op_on_non_field_columns(
&self,
left: LogicalPlan,
right: LogicalPlan,
left_tag_cols: Vec<String>,
right_tag_cols: Vec<String>,
op: TokenType,
modifier: &Option<BinModifier>,
) -> Result<LogicalPlan> {
let mut left_tag_col_set = left_tag_cols.into_iter().collect::<HashSet<_>>();
let mut right_tag_col_set = right_tag_cols.into_iter().collect::<HashSet<_>>();
// apply modifier
if let Some(modifier) = modifier {
// one-to-many and many-to-one are not supported
ensure!(
matches!(
modifier.card,
VectorMatchCardinality::OneToOne | VectorMatchCardinality::ManyToMany
),
UnsupportedVectorMatchSnafu {
name: modifier.card.clone(),
},
);
// apply label modifier
if let Some(matching) = &modifier.matching {
match matching {
// keeps columns mentioned in `on`
LabelModifier::Include(on) => {
let mask = on.labels.iter().cloned().collect::<HashSet<_>>();
left_tag_col_set = left_tag_col_set.intersection(&mask).cloned().collect();
right_tag_col_set =
right_tag_col_set.intersection(&mask).cloned().collect();
}
// removes columns memtioned in `ignoring`
LabelModifier::Exclude(ignoring) => {
// doesn't check existence of label
for label in &ignoring.labels {
let _ = left_tag_col_set.remove(label);
let _ = right_tag_col_set.remove(label);
}
}
}
}
}
// ensure two sides have the same tag columns
if !matches!(op.id(), token::T_LOR) {
ensure!(
left_tag_col_set == right_tag_col_set,
CombineTableColumnMismatchSnafu {
left: left_tag_col_set.into_iter().collect::<Vec<_>>(),
right: right_tag_col_set.into_iter().collect::<Vec<_>>(),
}
)
};
let join_keys = left_tag_col_set
.into_iter()
.chain([self.ctx.time_index_column.clone().unwrap()])
.collect::<Vec<_>>();
// Generate join plan.
// All set operations in PromQL are "distinct"
match op.id() {
token::T_LAND => LogicalPlanBuilder::from(left)
.distinct()
.context(DataFusionPlanningSnafu)?
.join_detailed(
right,
JoinType::LeftSemi,
(join_keys.clone(), join_keys),
None,
true,
)
.context(DataFusionPlanningSnafu)?
.build()
.context(DataFusionPlanningSnafu),
token::T_LUNLESS => LogicalPlanBuilder::from(left)
.distinct()
.context(DataFusionPlanningSnafu)?
.join_detailed(
right,
JoinType::LeftAnti,
(join_keys.clone(), join_keys),
None,
true,
)
.context(DataFusionPlanningSnafu)?
.build()
.context(DataFusionPlanningSnafu),
token::T_LOR => {
// `OR` can not be expressed by `UNION` precisely.
// it will generate unexpceted result when schemas don't match
UnsupportedExprSnafu {
name: "set operation `OR`",
}
.fail()
}
_ => UnexpectedTokenSnafu { token: op }.fail(),
}
}
/// Build a projection that project and perform operation expr for every value columns.
/// Non-value columns (tag and timestamp) will be preserved in the projection.
///

View File

@@ -286,8 +286,11 @@ fn describe_column_names(columns_schemas: &[ColumnSchema]) -> VectorRef {
}
fn describe_column_types(columns_schemas: &[ColumnSchema]) -> VectorRef {
Arc::new(StringVector::from_iterator(
columns_schemas.iter().map(|cs| cs.data_type.name()),
Arc::new(StringVector::from(
columns_schemas
.iter()
.map(|cs| cs.data_type.name())
.collect::<Vec<_>>(),
))
}

View File

@@ -207,7 +207,7 @@ impl TryFrom<Vec<RecordBatch>> for HttpRecordsOutput {
.iter()
.map(|cs| ColumnSchema {
name: cs.name.clone(),
data_type: cs.data_type.name().to_owned(),
data_type: cs.data_type.name(),
})
.collect(),
};

View File

@@ -1141,6 +1141,6 @@ mod test {
fn test_debug_for_column_metadata() {
let region_metadata = build_test_region_metadata();
let formatted = format!("{:?}", region_metadata);
assert_eq!(formatted, "RegionMetadata { column_metadatas: [[a Int64 not null Tag 1], [b Float64 not null Field 2], [c Timestamp not null Timestamp 3]], time_index: 3, primary_key: [1], region_id: 5299989648942(1234, 5678), schema_version: 0 }");
assert_eq!(formatted, "RegionMetadata { column_metadatas: [[a Int64 not null Tag 1], [b Float64 not null Field 2], [c TimestampMillisecond not null Timestamp 3]], time_index: 3, primary_key: [1], region_id: 5299989648942(1234, 5678), schema_version: 0 }");
}
}

View File

@@ -18,11 +18,13 @@ use std::sync::Arc;
use std::time::Duration;
use api::v1::meta::Role;
use catalog::kvbackend::MetaKvBackend;
use catalog::kvbackend::{CachedMetaKvBackend, MetaKvBackend};
use client::client_manager::DatanodeClients;
use client::Client;
use common_base::Plugins;
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
use common_meta::heartbeat::handler::HandlerGroupExecutor;
use common_meta::kv_backend::chroot::ChrootKvBackend;
use common_meta::kv_backend::etcd::EtcdStore;
use common_meta::kv_backend::memory::MemoryKvBackend;
@@ -33,13 +35,16 @@ use common_runtime::Builder as RuntimeBuilder;
use common_test_util::temp_dir::create_temp_dir;
use datanode::config::{DatanodeOptions, ObjectStoreConfig};
use datanode::datanode::{Datanode, DatanodeBuilder, ProcedureConfig};
use frontend::frontend::FrontendOptions;
use frontend::heartbeat::handler::invalidate_table_cache::InvalidateTableCacheHandler;
use frontend::heartbeat::HeartbeatTask;
use frontend::instance::builder::FrontendBuilder;
use frontend::instance::{FrontendInstance, Instance as FeInstance};
use meta_client::client::MetaClientBuilder;
use meta_srv::cluster::MetaPeerClientRef;
use meta_srv::metasrv::{MetaSrv, MetaSrvOptions};
use meta_srv::mocks::MockInfo;
use servers::grpc::GrpcServer;
use servers::heartbeat_options::HeartbeatOptions;
use servers::Mode;
use tonic::transport::Server;
use tower::service_fn;
@@ -252,18 +257,26 @@ impl GreptimeDbClusterBuilder {
meta_client.start(&[&meta_srv.server_addr]).await.unwrap();
let meta_client = Arc::new(meta_client);
let frontend_opts = FrontendOptions::default();
let meta_backend = Arc::new(CachedMetaKvBackend::new(meta_client.clone()));
Arc::new(
FeInstance::try_new_distributed_with(
meta_client,
datanode_clients,
Plugins::default(),
&frontend_opts,
)
let handlers_executor = HandlerGroupExecutor::new(vec![
Arc::new(ParseMailboxMessageHandler),
Arc::new(InvalidateTableCacheHandler::new(meta_backend.clone())),
]);
let heartbeat_task = HeartbeatTask::new(
meta_client.clone(),
HeartbeatOptions::default(),
Arc::new(handlers_executor),
);
let instance = FrontendBuilder::new(meta_backend, datanode_clients, meta_client)
.with_heartbeat_task(heartbeat_task)
.try_build()
.await
.unwrap(),
)
.unwrap();
Arc::new(instance)
}
}

View File

@@ -14,16 +14,19 @@
use std::sync::Arc;
use catalog::kvbackend::KvBackendCatalogManager;
use cmd::options::MixOptions;
use common_base::Plugins;
use common_config::KvBackendConfig;
use common_meta::cache_invalidator::DummyKvCacheInvalidator;
use common_meta::cache_invalidator::DummyCacheInvalidator;
use common_meta::ddl_manager::DdlManager;
use common_meta::key::TableMetadataManager;
use common_procedure::options::ProcedureConfig;
use common_telemetry::logging::LoggingOptions;
use datanode::config::DatanodeOptions;
use datanode::datanode::DatanodeBuilder;
use frontend::frontend::FrontendOptions;
use frontend::instance::builder::FrontendBuilder;
use frontend::instance::standalone::StandaloneTableMetadataCreator;
use frontend::instance::{FrontendInstance, Instance, StandaloneDatanodeManager};
use crate::test_util::{self, create_tmp_dir_and_datanode_opts, StorageType, TestGuard};
@@ -88,29 +91,28 @@ impl GreptimeDbStandaloneBuilder {
.await
.unwrap();
let catalog_manager = KvBackendCatalogManager::new(
kv_backend.clone(),
Arc::new(DummyKvCacheInvalidator),
Arc::new(StandaloneDatanodeManager(datanode.region_server())),
let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend.clone()));
table_metadata_manager.init().await.unwrap();
let datanode_manager = Arc::new(StandaloneDatanodeManager(datanode.region_server()));
let ddl_task_executor = Arc::new(
DdlManager::try_new(
procedure_manager.clone(),
datanode_manager.clone(),
Arc::new(DummyCacheInvalidator),
table_metadata_manager,
Arc::new(StandaloneTableMetadataCreator::new(kv_backend.clone())),
)
.unwrap(),
);
catalog_manager
.table_metadata_manager_ref()
.init()
let instance = FrontendBuilder::new(kv_backend, datanode_manager, ddl_task_executor)
.with_plugin(plugins)
.try_build()
.await
.unwrap();
let instance = Instance::try_new_standalone(
kv_backend,
procedure_manager.clone(),
catalog_manager,
plugins,
datanode.region_server(),
)
.await
.unwrap();
// Ensures all loaders are registered.
procedure_manager.start().await.unwrap();
test_util::prepare_another_catalog_and_schema(&instance).await;

View File

@@ -0,0 +1,270 @@
-- from promql/testdata/operators.test
-- cases related to AND/OR/UNLESS
-- group_left() and group_right() are not included
create table http_requests (
ts timestamp time index,
job string,
instance string,
g string, -- for `group`
val double,
primary key (job, instance, g)
);
Affected Rows: 0
insert into http_requests values
(3000000, "api", "0", "production", 100),
(3000000, "api", "1", "production", 200),
(3000000, "api", "0", "canary", 300),
(3000000, "api", "1", "canary", 400),
(3000000, "app", "0", "production", 500),
(3000000, "app", "1", "production", 600),
(3000000, "app", "0", "canary", 700),
(3000000, "app", "1", "canary", 800);
Affected Rows: 8
-- empty metric
create table cpu_count(ts timestamp time index);
Affected Rows: 0
create table vector_matching_a(
ts timestamp time index,
l string primary key,
val double,
);
Affected Rows: 0
insert into vector_matching_a values
(3000000, "x", 10),
(3000000, "y", 20);
Affected Rows: 2
-- eval instant at 50m http_requests{group="canary"} and http_requests{instance="0"}
-- http_requests{group="canary", instance="0", job="api-server"} 300
-- http_requests{group="canary", instance="0", job="app-server"} 700
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') http_requests{g="canary"} and http_requests{instance="0"};
+---------------------+-----+----------+--------+-------+
| ts | job | instance | g | val |
+---------------------+-----+----------+--------+-------+
| 1970-01-01T00:50:00 | api | 0 | canary | 300.0 |
| 1970-01-01T00:50:00 | app | 0 | canary | 700.0 |
+---------------------+-----+----------+--------+-------+
-- eval instant at 50m (http_requests{group="canary"} + 1) and http_requests{instance="0"}
-- {group="canary", instance="0", job="api-server"} 301
-- {group="canary", instance="0", job="app-server"} 701
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) and http_requests{instance="0"};
+-----+----------+--------+---------------------+------------------+
| job | instance | g | ts | val + Float64(1) |
+-----+----------+--------+---------------------+------------------+
| api | 0 | canary | 1970-01-01T00:50:00 | 301.0 |
| app | 0 | canary | 1970-01-01T00:50:00 | 701.0 |
+-----+----------+--------+---------------------+------------------+
-- eval instant at 50m (http_requests{group="canary"} + 1) and on(instance, job) http_requests{instance="0", group="production"}
-- {group="canary", instance="0", job="api-server"} 301
-- {group="canary", instance="0", job="app-server"} 701
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) and on(instance, job) http_requests{instance="0", g="production"};
+-----+----------+--------+---------------------+------------------+
| job | instance | g | ts | val + Float64(1) |
+-----+----------+--------+---------------------+------------------+
| api | 0 | canary | 1970-01-01T00:50:00 | 301.0 |
| app | 0 | canary | 1970-01-01T00:50:00 | 701.0 |
+-----+----------+--------+---------------------+------------------+
-- eval instant at 50m (http_requests{group="canary"} + 1) and on(instance) http_requests{instance="0", group="production"}
-- {group="canary", instance="0", job="api-server"} 301
-- {group="canary", instance="0", job="app-server"} 701
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) and on(instance) http_requests{instance="0", g="production"};
+-----+----------+--------+---------------------+------------------+
| job | instance | g | ts | val + Float64(1) |
+-----+----------+--------+---------------------+------------------+
| api | 0 | canary | 1970-01-01T00:50:00 | 301.0 |
| app | 0 | canary | 1970-01-01T00:50:00 | 701.0 |
+-----+----------+--------+---------------------+------------------+
-- eval instant at 50m (http_requests{group="canary"} + 1) and ignoring(group) http_requests{instance="0", group="production"}
-- {group="canary", instance="0", job="api-server"} 301
-- {group="canary", instance="0", job="app-server"} 701
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) and ignoring(g) http_requests{instance="0", g="production"};
+-----+----------+--------+---------------------+------------------+
| job | instance | g | ts | val + Float64(1) |
+-----+----------+--------+---------------------+------------------+
| api | 0 | canary | 1970-01-01T00:50:00 | 301.0 |
| app | 0 | canary | 1970-01-01T00:50:00 | 701.0 |
+-----+----------+--------+---------------------+------------------+
-- eval instant at 50m (http_requests{group="canary"} + 1) and ignoring(group, job) http_requests{instance="0", group="production"}
-- {group="canary", instance="0", job="api-server"} 301
-- {group="canary", instance="0", job="app-server"} 701
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) and ignoring(g, job) http_requests{instance="0", g="production"};
+-----+----------+--------+---------------------+------------------+
| job | instance | g | ts | val + Float64(1) |
+-----+----------+--------+---------------------+------------------+
| api | 0 | canary | 1970-01-01T00:50:00 | 301.0 |
| app | 0 | canary | 1970-01-01T00:50:00 | 701.0 |
+-----+----------+--------+---------------------+------------------+
-- eval instant at 50m http_requests{group="canary"} or http_requests{group="production"}
-- http_requests{group="canary", instance="0", job="api-server"} 300
-- http_requests{group="canary", instance="0", job="app-server"} 700
-- http_requests{group="canary", instance="1", job="api-server"} 400
-- http_requests{group="canary", instance="1", job="app-server"} 800
-- http_requests{group="production", instance="0", job="api-server"} 100
-- http_requests{group="production", instance="0", job="app-server"} 500
-- http_requests{group="production", instance="1", job="api-server"} 200
-- http_requests{group="production", instance="1", job="app-server"} 600
-- NOT SUPPORTED: `or`
tql eval (3000, 3000, '1s') http_requests{g="canary"} or http_requests{g="production"};
Error: 1004(InvalidArguments), Unsupported expr type: set operation `OR`
-- # On overlap the rhs samples must be dropped.
-- eval instant at 50m (http_requests{group="canary"} + 1) or http_requests{instance="1"}
-- {group="canary", instance="0", job="api-server"} 301
-- {group="canary", instance="0", job="app-server"} 701
-- {group="canary", instance="1", job="api-server"} 401
-- {group="canary", instance="1", job="app-server"} 801
-- http_requests{group="production", instance="1", job="api-server"} 200
-- http_requests{group="production", instance="1", job="app-server"} 600
-- NOT SUPPORTED: `or`
tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) or http_requests{instance="1"};
Error: 1004(InvalidArguments), Unsupported expr type: set operation `OR`
-- # Matching only on instance excludes everything that has instance=0/1 but includes
-- # entries without the instance label.
-- eval instant at 50m (http_requests{group="canary"} + 1) or on(instance) (http_requests or cpu_count or vector_matching_a)
-- {group="canary", instance="0", job="api-server"} 301
-- {group="canary", instance="0", job="app-server"} 701
-- {group="canary", instance="1", job="api-server"} 401
-- {group="canary", instance="1", job="app-server"} 801
-- vector_matching_a{l="x"} 10
-- vector_matching_a{l="y"} 20
-- NOT SUPPORTED: union on different schemas
-- NOT SUPPORTED: `or`
tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) or on(instance) (http_requests or cpu_count or vector_matching_a);
Error: 1004(InvalidArguments), Unsupported expr type: set operation `OR`
-- eval instant at 50m (http_requests{group="canary"} + 1) or ignoring(l, group, job) (http_requests or cpu_count or vector_matching_a)
-- {group="canary", instance="0", job="api-server"} 301
-- {group="canary", instance="0", job="app-server"} 701
-- {group="canary", instance="1", job="api-server"} 401
-- {group="canary", instance="1", job="app-server"} 801
-- vector_matching_a{l="x"} 10
-- vector_matching_a{l="y"} 20
-- NOT SUPPORTED: union on different schemas
-- NOT SUPPORTED: `or`
tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) or ignoring(l, g, job) (http_requests or cpu_count or vector_matching_a);
Error: 1004(InvalidArguments), Unsupported expr type: set operation `OR`
-- eval instant at 50m http_requests{group="canary"} unless http_requests{instance="0"}
-- http_requests{group="canary", instance="1", job="api-server"} 400
-- http_requests{group="canary", instance="1", job="app-server"} 800
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') http_requests{g="canary"} unless http_requests{instance="0"};
+---------------------+-----+----------+--------+-------+
| ts | job | instance | g | val |
+---------------------+-----+----------+--------+-------+
| 1970-01-01T00:50:00 | api | 1 | canary | 400.0 |
| 1970-01-01T00:50:00 | app | 1 | canary | 800.0 |
+---------------------+-----+----------+--------+-------+
-- eval instant at 50m http_requests{group="canary"} unless on(job) http_requests{instance="0"}
tql eval (3000, 3000, '1s') http_requests{g="canary"} unless on(job) http_requests{instance="0"};
++
++
-- eval instant at 50m http_requests{group="canary"} unless on(job, instance) http_requests{instance="0"}
-- http_requests{group="canary", instance="1", job="api-server"} 400
-- http_requests{group="canary", instance="1", job="app-server"} 800
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') http_requests{g="canary"} unless on(job, instance) http_requests{instance="0"};
+---------------------+-----+----------+--------+-------+
| ts | job | instance | g | val |
+---------------------+-----+----------+--------+-------+
| 1970-01-01T00:50:00 | api | 1 | canary | 400.0 |
| 1970-01-01T00:50:00 | app | 1 | canary | 800.0 |
+---------------------+-----+----------+--------+-------+
-- eval instant at 50m http_requests{group="canary"} unless ignoring(group, instance) http_requests{instance="0"}
tql eval (3000, 3000, '1s') http_requests{g="canary"} unless ignoring(g, instance) http_requests{instance="0"};
++
++
-- eval instant at 50m http_requests{group="canary"} unless ignoring(group) http_requests{instance="0"}
-- http_requests{group="canary", instance="1", job="api-server"} 400
-- http_requests{group="canary", instance="1", job="app-server"} 800
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') http_requests{g="canary"} unless ignoring(g) http_requests{instance="0"};
+---------------------+-----+----------+--------+-------+
| ts | job | instance | g | val |
+---------------------+-----+----------+--------+-------+
| 1970-01-01T00:50:00 | api | 1 | canary | 400.0 |
| 1970-01-01T00:50:00 | app | 1 | canary | 800.0 |
+---------------------+-----+----------+--------+-------+
-- # https://github.com/prometheus/prometheus/issues/1489
-- eval instant at 50m http_requests AND ON (dummy) vector(1)
-- http_requests{group="canary", instance="0", job="api-server"} 300
-- http_requests{group="canary", instance="0", job="app-server"} 700
-- http_requests{group="canary", instance="1", job="api-server"} 400
-- http_requests{group="canary", instance="1", job="app-server"} 800
-- http_requests{group="production", instance="0", job="api-server"} 100
-- http_requests{group="production", instance="0", job="app-server"} 500
-- http_requests{group="production", instance="1", job="api-server"} 200
-- http_requests{group="production", instance="1", job="app-server"} 600
-- NOT SUPPORTED: `vector()`
tql eval (3000, 3000, '1s') http_requests AND ON (dummy) vector(1);
Error: 1004(InvalidArguments), Expect a PromQL expr but not found, input expr: Call(Call { func: Function { name: "vector", arg_types: [Scalar], variadic: false, return_type: Vector }, args: FunctionArgs { args: [NumberLiteral(NumberLiteral { val: 1.0 })] } })
-- eval instant at 50m http_requests AND IGNORING (group, instance, job) vector(1)
-- http_requests{group="canary", instance="0", job="api-server"} 300
-- http_requests{group="canary", instance="0", job="app-server"} 700
-- http_requests{group="canary", instance="1", job="api-server"} 400
-- http_requests{group="canary", instance="1", job="app-server"} 800
-- http_requests{group="production", instance="0", job="api-server"} 100
-- http_requests{group="production", instance="0", job="app-server"} 500
-- http_requests{group="production", instance="1", job="api-server"} 200
-- http_requests{group="production", instance="1", job="app-server"} 600
-- NOT SUPPORTED: `vector()`
tql eval (3000, 3000, '1s') http_requests AND IGNORING (g, instance, job) vector(1);
Error: 1004(InvalidArguments), Expect a PromQL expr but not found, input expr: Call(Call { func: Function { name: "vector", arg_types: [Scalar], variadic: false, return_type: Vector }, args: FunctionArgs { args: [NumberLiteral(NumberLiteral { val: 1.0 })] } })
drop table http_requests;
Affected Rows: 0
drop table cpu_count;
Affected Rows: 0
drop table vector_matching_a;
Affected Rows: 0

View File

@@ -0,0 +1,175 @@
-- from promql/testdata/operators.test
-- cases related to AND/OR/UNLESS
-- group_left() and group_right() are not included
create table http_requests (
ts timestamp time index,
job string,
instance string,
g string, -- for `group`
val double,
primary key (job, instance, g)
);
insert into http_requests values
(3000000, "api", "0", "production", 100),
(3000000, "api", "1", "production", 200),
(3000000, "api", "0", "canary", 300),
(3000000, "api", "1", "canary", 400),
(3000000, "app", "0", "production", 500),
(3000000, "app", "1", "production", 600),
(3000000, "app", "0", "canary", 700),
(3000000, "app", "1", "canary", 800);
-- empty metric
create table cpu_count(ts timestamp time index);
create table vector_matching_a(
ts timestamp time index,
l string primary key,
val double,
);
insert into vector_matching_a values
(3000000, "x", 10),
(3000000, "y", 20);
-- eval instant at 50m http_requests{group="canary"} and http_requests{instance="0"}
-- http_requests{group="canary", instance="0", job="api-server"} 300
-- http_requests{group="canary", instance="0", job="app-server"} 700
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') http_requests{g="canary"} and http_requests{instance="0"};
-- eval instant at 50m (http_requests{group="canary"} + 1) and http_requests{instance="0"}
-- {group="canary", instance="0", job="api-server"} 301
-- {group="canary", instance="0", job="app-server"} 701
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) and http_requests{instance="0"};
-- eval instant at 50m (http_requests{group="canary"} + 1) and on(instance, job) http_requests{instance="0", group="production"}
-- {group="canary", instance="0", job="api-server"} 301
-- {group="canary", instance="0", job="app-server"} 701
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) and on(instance, job) http_requests{instance="0", g="production"};
-- eval instant at 50m (http_requests{group="canary"} + 1) and on(instance) http_requests{instance="0", group="production"}
-- {group="canary", instance="0", job="api-server"} 301
-- {group="canary", instance="0", job="app-server"} 701
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) and on(instance) http_requests{instance="0", g="production"};
-- eval instant at 50m (http_requests{group="canary"} + 1) and ignoring(group) http_requests{instance="0", group="production"}
-- {group="canary", instance="0", job="api-server"} 301
-- {group="canary", instance="0", job="app-server"} 701
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) and ignoring(g) http_requests{instance="0", g="production"};
-- eval instant at 50m (http_requests{group="canary"} + 1) and ignoring(group, job) http_requests{instance="0", group="production"}
-- {group="canary", instance="0", job="api-server"} 301
-- {group="canary", instance="0", job="app-server"} 701
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) and ignoring(g, job) http_requests{instance="0", g="production"};
-- eval instant at 50m http_requests{group="canary"} or http_requests{group="production"}
-- http_requests{group="canary", instance="0", job="api-server"} 300
-- http_requests{group="canary", instance="0", job="app-server"} 700
-- http_requests{group="canary", instance="1", job="api-server"} 400
-- http_requests{group="canary", instance="1", job="app-server"} 800
-- http_requests{group="production", instance="0", job="api-server"} 100
-- http_requests{group="production", instance="0", job="app-server"} 500
-- http_requests{group="production", instance="1", job="api-server"} 200
-- http_requests{group="production", instance="1", job="app-server"} 600
-- NOT SUPPORTED: `or`
tql eval (3000, 3000, '1s') http_requests{g="canary"} or http_requests{g="production"};
-- # On overlap the rhs samples must be dropped.
-- eval instant at 50m (http_requests{group="canary"} + 1) or http_requests{instance="1"}
-- {group="canary", instance="0", job="api-server"} 301
-- {group="canary", instance="0", job="app-server"} 701
-- {group="canary", instance="1", job="api-server"} 401
-- {group="canary", instance="1", job="app-server"} 801
-- http_requests{group="production", instance="1", job="api-server"} 200
-- http_requests{group="production", instance="1", job="app-server"} 600
-- NOT SUPPORTED: `or`
tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) or http_requests{instance="1"};
-- # Matching only on instance excludes everything that has instance=0/1 but includes
-- # entries without the instance label.
-- eval instant at 50m (http_requests{group="canary"} + 1) or on(instance) (http_requests or cpu_count or vector_matching_a)
-- {group="canary", instance="0", job="api-server"} 301
-- {group="canary", instance="0", job="app-server"} 701
-- {group="canary", instance="1", job="api-server"} 401
-- {group="canary", instance="1", job="app-server"} 801
-- vector_matching_a{l="x"} 10
-- vector_matching_a{l="y"} 20
-- NOT SUPPORTED: union on different schemas
-- NOT SUPPORTED: `or`
tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) or on(instance) (http_requests or cpu_count or vector_matching_a);
-- eval instant at 50m (http_requests{group="canary"} + 1) or ignoring(l, group, job) (http_requests or cpu_count or vector_matching_a)
-- {group="canary", instance="0", job="api-server"} 301
-- {group="canary", instance="0", job="app-server"} 701
-- {group="canary", instance="1", job="api-server"} 401
-- {group="canary", instance="1", job="app-server"} 801
-- vector_matching_a{l="x"} 10
-- vector_matching_a{l="y"} 20
-- NOT SUPPORTED: union on different schemas
-- NOT SUPPORTED: `or`
tql eval (3000, 3000, '1s') (http_requests{g="canary"} + 1) or ignoring(l, g, job) (http_requests or cpu_count or vector_matching_a);
-- eval instant at 50m http_requests{group="canary"} unless http_requests{instance="0"}
-- http_requests{group="canary", instance="1", job="api-server"} 400
-- http_requests{group="canary", instance="1", job="app-server"} 800
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') http_requests{g="canary"} unless http_requests{instance="0"};
-- eval instant at 50m http_requests{group="canary"} unless on(job) http_requests{instance="0"}
tql eval (3000, 3000, '1s') http_requests{g="canary"} unless on(job) http_requests{instance="0"};
-- eval instant at 50m http_requests{group="canary"} unless on(job, instance) http_requests{instance="0"}
-- http_requests{group="canary", instance="1", job="api-server"} 400
-- http_requests{group="canary", instance="1", job="app-server"} 800
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') http_requests{g="canary"} unless on(job, instance) http_requests{instance="0"};
-- eval instant at 50m http_requests{group="canary"} unless ignoring(group, instance) http_requests{instance="0"}
tql eval (3000, 3000, '1s') http_requests{g="canary"} unless ignoring(g, instance) http_requests{instance="0"};
-- eval instant at 50m http_requests{group="canary"} unless ignoring(group) http_requests{instance="0"}
-- http_requests{group="canary", instance="1", job="api-server"} 400
-- http_requests{group="canary", instance="1", job="app-server"} 800
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') http_requests{g="canary"} unless ignoring(g) http_requests{instance="0"};
-- # https://github.com/prometheus/prometheus/issues/1489
-- eval instant at 50m http_requests AND ON (dummy) vector(1)
-- http_requests{group="canary", instance="0", job="api-server"} 300
-- http_requests{group="canary", instance="0", job="app-server"} 700
-- http_requests{group="canary", instance="1", job="api-server"} 400
-- http_requests{group="canary", instance="1", job="app-server"} 800
-- http_requests{group="production", instance="0", job="api-server"} 100
-- http_requests{group="production", instance="0", job="app-server"} 500
-- http_requests{group="production", instance="1", job="api-server"} 200
-- http_requests{group="production", instance="1", job="app-server"} 600
-- NOT SUPPORTED: `vector()`
tql eval (3000, 3000, '1s') http_requests AND ON (dummy) vector(1);
-- eval instant at 50m http_requests AND IGNORING (group, instance, job) vector(1)
-- http_requests{group="canary", instance="0", job="api-server"} 300
-- http_requests{group="canary", instance="0", job="app-server"} 700
-- http_requests{group="canary", instance="1", job="api-server"} 400
-- http_requests{group="canary", instance="1", job="app-server"} 800
-- http_requests{group="production", instance="0", job="api-server"} 100
-- http_requests{group="production", instance="0", job="app-server"} 500
-- http_requests{group="production", instance="1", job="api-server"} 200
-- http_requests{group="production", instance="1", job="app-server"} 600
-- NOT SUPPORTED: `vector()`
tql eval (3000, 3000, '1s') http_requests AND IGNORING (g, instance, job) vector(1);
drop table http_requests;
drop table cpu_count;
drop table vector_matching_a;