mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-25 15:40:02 +00:00
Compare commits
26 Commits
self-hoste
...
v0.3.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6ed6e39673 | ||
|
|
e5c61ec290 | ||
|
|
b1ccc7ef5d | ||
|
|
d1b5ce0d35 | ||
|
|
a314993ab4 | ||
|
|
fa522bc579 | ||
|
|
5335203360 | ||
|
|
23bf55a265 | ||
|
|
3b91fc2c64 | ||
|
|
6205616301 | ||
|
|
e47ef1f0d2 | ||
|
|
16c1ee2618 | ||
|
|
323e2aed07 | ||
|
|
cbc2620a59 | ||
|
|
4fdee5ea3c | ||
|
|
30472cebae | ||
|
|
903f02bf10 | ||
|
|
1703e93e15 | ||
|
|
2dd86b686f | ||
|
|
128c6ec98c | ||
|
|
960b84262b | ||
|
|
69854c07c5 | ||
|
|
1eeb5b4330 | ||
|
|
9b3037fe97 | ||
|
|
09747ea206 | ||
|
|
fb35e09072 |
12
.github/workflows/release.yml
vendored
12
.github/workflows/release.yml
vendored
@@ -22,7 +22,7 @@ name: Release
|
||||
env:
|
||||
RUST_TOOLCHAIN: nightly-2023-05-03
|
||||
|
||||
SCHEDULED_BUILD_VERSION_PREFIX: v0.4.0
|
||||
SCHEDULED_BUILD_VERSION_PREFIX: v0.3.1
|
||||
|
||||
SCHEDULED_PERIOD: nightly
|
||||
|
||||
@@ -35,26 +35,27 @@ jobs:
|
||||
build-macos:
|
||||
name: Build macOS binary
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
# The file format is greptime-<os>-<arch>
|
||||
include:
|
||||
- arch: aarch64-apple-darwin
|
||||
os: self-hosted
|
||||
os: macos-latest
|
||||
file: greptime-darwin-arm64
|
||||
continue-on-error: false
|
||||
opts: "-F servers/dashboard"
|
||||
- arch: x86_64-apple-darwin
|
||||
os: self-hosted
|
||||
os: macos-latest
|
||||
file: greptime-darwin-amd64
|
||||
continue-on-error: false
|
||||
opts: "-F servers/dashboard"
|
||||
- arch: aarch64-apple-darwin
|
||||
os: self-hosted
|
||||
os: macos-latest
|
||||
file: greptime-darwin-arm64-pyo3
|
||||
continue-on-error: false
|
||||
opts: "-F pyo3_backend,servers/dashboard"
|
||||
- arch: x86_64-apple-darwin
|
||||
os: self-hosted
|
||||
os: macos-latest
|
||||
file: greptime-darwin-amd64-pyo3
|
||||
continue-on-error: false
|
||||
opts: "-F pyo3_backend,servers/dashboard"
|
||||
@@ -129,6 +130,7 @@ jobs:
|
||||
build-linux:
|
||||
name: Build linux binary
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
# The file format is greptime-<os>-<arch>
|
||||
include:
|
||||
|
||||
100
Cargo.lock
generated
100
Cargo.lock
generated
@@ -209,8 +209,8 @@ dependencies = [
|
||||
"greptime-proto",
|
||||
"prost",
|
||||
"snafu",
|
||||
"tonic 0.9.2",
|
||||
"tonic-build 0.9.2",
|
||||
"tonic",
|
||||
"tonic-build",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -382,7 +382,7 @@ dependencies = [
|
||||
"paste",
|
||||
"prost",
|
||||
"tokio",
|
||||
"tonic 0.9.2",
|
||||
"tonic",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1538,7 +1538,7 @@ dependencies = [
|
||||
"substrait 0.7.5",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tonic 0.9.2",
|
||||
"tonic",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
@@ -1760,7 +1760,7 @@ dependencies = [
|
||||
"rand",
|
||||
"snafu",
|
||||
"tokio",
|
||||
"tonic 0.9.2",
|
||||
"tonic",
|
||||
"tower",
|
||||
]
|
||||
|
||||
@@ -1801,6 +1801,7 @@ name = "common-meta"
|
||||
version = "0.4.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
"chrono",
|
||||
"common-catalog",
|
||||
"common-error",
|
||||
@@ -2005,7 +2006,7 @@ checksum = "c2895653b4d9f1538a83970077cb01dfc77a4810524e51a110944688e916b18e"
|
||||
dependencies = [
|
||||
"prost",
|
||||
"prost-types",
|
||||
"tonic 0.9.2",
|
||||
"tonic",
|
||||
"tracing-core",
|
||||
]
|
||||
|
||||
@@ -2027,7 +2028,7 @@ dependencies = [
|
||||
"thread_local",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tonic 0.9.2",
|
||||
"tonic",
|
||||
"tracing",
|
||||
"tracing-core",
|
||||
"tracing-subscriber",
|
||||
@@ -2647,7 +2648,7 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"toml",
|
||||
"tonic 0.9.2",
|
||||
"tonic",
|
||||
"tower",
|
||||
"tower-http",
|
||||
"url",
|
||||
@@ -3025,16 +3026,16 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "etcd-client"
|
||||
version = "0.10.4"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4319dc0fb739a6e84cb8678b8cf50c9bcfa4712ae826b33ecf00cc0850550a58"
|
||||
checksum = "f4b0ea5ef6dc2388a4b1669fa32097249bc03a15417b97cb75e38afb309e4a89"
|
||||
dependencies = [
|
||||
"http",
|
||||
"prost",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tonic 0.8.3",
|
||||
"tonic-build 0.8.4",
|
||||
"tonic",
|
||||
"tonic-build",
|
||||
"tower",
|
||||
"tower-service",
|
||||
]
|
||||
@@ -3221,6 +3222,7 @@ dependencies = [
|
||||
"common-runtime",
|
||||
"common-telemetry",
|
||||
"common-test-util",
|
||||
"common-time",
|
||||
"datafusion",
|
||||
"datafusion-common",
|
||||
"datafusion-expr",
|
||||
@@ -3257,7 +3259,7 @@ dependencies = [
|
||||
"table",
|
||||
"tokio",
|
||||
"toml",
|
||||
"tonic 0.9.2",
|
||||
"tonic",
|
||||
"tower",
|
||||
"uuid",
|
||||
]
|
||||
@@ -4096,13 +4098,13 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||
[[package]]
|
||||
name = "greptime-proto"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=4398d20c56d5f7939cc2960789cb1fa7dd18e6fe#4398d20c56d5f7939cc2960789cb1fa7dd18e6fe"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=7aeaeaba1e0ca6a5c736b6ab2eb63144ae3d284b#7aeaeaba1e0ca6a5c736b6ab2eb63144ae3d284b"
|
||||
dependencies = [
|
||||
"prost",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tonic 0.9.2",
|
||||
"tonic-build 0.9.2",
|
||||
"tonic",
|
||||
"tonic-build",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -5141,7 +5143,7 @@ dependencies = [
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tonic 0.9.2",
|
||||
"tonic",
|
||||
"tower",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
@@ -5185,10 +5187,11 @@ dependencies = [
|
||||
"serde_json",
|
||||
"servers",
|
||||
"snafu",
|
||||
"store-api",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tonic 0.9.2",
|
||||
"tonic",
|
||||
"tower",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
@@ -8517,6 +8520,7 @@ dependencies = [
|
||||
"axum-macros",
|
||||
"axum-test-helper",
|
||||
"base64 0.13.1",
|
||||
"build-data",
|
||||
"bytes",
|
||||
"catalog",
|
||||
"chrono",
|
||||
@@ -8534,6 +8538,9 @@ dependencies = [
|
||||
"common-telemetry",
|
||||
"common-test-util",
|
||||
"common-time",
|
||||
"datafusion",
|
||||
"datafusion-common",
|
||||
"datafusion-expr",
|
||||
"datatypes",
|
||||
"derive_builder 0.12.0",
|
||||
"digest",
|
||||
@@ -8583,7 +8590,7 @@ dependencies = [
|
||||
"tokio-rustls 0.24.0",
|
||||
"tokio-stream",
|
||||
"tokio-test",
|
||||
"tonic 0.9.2",
|
||||
"tonic",
|
||||
"tonic-reflection",
|
||||
"tower",
|
||||
"tower-http",
|
||||
@@ -8970,6 +8977,7 @@ dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"crc",
|
||||
"crossbeam-queue",
|
||||
"digest",
|
||||
@@ -9137,8 +9145,8 @@ dependencies = [
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tonic 0.9.2",
|
||||
"tonic-build 0.9.2",
|
||||
"tonic",
|
||||
"tonic-build",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
@@ -9550,6 +9558,7 @@ dependencies = [
|
||||
"axum",
|
||||
"axum-test-helper",
|
||||
"catalog",
|
||||
"chrono",
|
||||
"client",
|
||||
"common-base",
|
||||
"common-catalog",
|
||||
@@ -9595,7 +9604,7 @@ dependencies = [
|
||||
"table",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"tonic 0.9.2",
|
||||
"tonic",
|
||||
"tower",
|
||||
"uuid",
|
||||
]
|
||||
@@ -9970,38 +9979,6 @@ dependencies = [
|
||||
"winnow",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tonic"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f219fad3b929bef19b1f86fbc0358d35daed8f2cac972037ac0dc10bbb8d5fb"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
"axum",
|
||||
"base64 0.13.1",
|
||||
"bytes",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"h2",
|
||||
"http",
|
||||
"http-body",
|
||||
"hyper",
|
||||
"hyper-timeout",
|
||||
"percent-encoding",
|
||||
"pin-project",
|
||||
"prost",
|
||||
"prost-derive",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"tower",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
"tracing-futures",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tonic"
|
||||
version = "0.9.2"
|
||||
@@ -10033,19 +10010,6 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tonic-build"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5bf5e9b9c0f7e0a7c027dcfaba7b2c60816c7049171f679d99ee2ff65d0de8c4"
|
||||
dependencies = [
|
||||
"prettyplease 0.1.25",
|
||||
"proc-macro2",
|
||||
"prost-build",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tonic-build"
|
||||
version = "0.9.2"
|
||||
@@ -10069,7 +10033,7 @@ dependencies = [
|
||||
"prost-types",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tonic 0.9.2",
|
||||
"tonic",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -50,7 +50,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "0.4.0"
|
||||
version = "0.3.1"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
@@ -72,7 +72,7 @@ datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev
|
||||
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "4398d20c56d5f7939cc2960789cb1fa7dd18e6fe" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "7aeaeaba1e0ca6a5c736b6ab2eb63144ae3d284b" }
|
||||
itertools = "0.10"
|
||||
parquet = "40.0"
|
||||
paste = "1.0"
|
||||
|
||||
7
Cross.toml
Normal file
7
Cross.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
[build]
|
||||
pre-build = [
|
||||
"dpkg --add-architecture $CROSS_DEB_ARCH",
|
||||
"apt update && apt install -y unzip zlib1g-dev:$CROSS_DEB_ARCH",
|
||||
"curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip && unzip protoc-3.15.8-linux-x86_64.zip -d /usr/",
|
||||
"chmod a+x /usr/bin/protoc && chmod -R a+rx /usr/include/google",
|
||||
]
|
||||
2
Makefile
2
Makefile
@@ -34,7 +34,7 @@ docker-image: ## Build docker image.
|
||||
##@ Test
|
||||
|
||||
test: nextest ## Run unit and integration tests.
|
||||
cargo nextest run
|
||||
cargo nextest run --retries 3
|
||||
|
||||
.PHONY: nextest ## Install nextest tools.
|
||||
nextest:
|
||||
|
||||
@@ -26,8 +26,8 @@ tcp_nodelay = true
|
||||
[wal]
|
||||
# WAL data directory
|
||||
# dir = "/tmp/greptimedb/wal"
|
||||
file_size = "1GB"
|
||||
purge_threshold = "50GB"
|
||||
file_size = "256MB"
|
||||
purge_threshold = "4GB"
|
||||
purge_interval = "10m"
|
||||
read_batch_size = 128
|
||||
sync_write = false
|
||||
|
||||
@@ -81,9 +81,9 @@ addr = "127.0.0.1:4004"
|
||||
# WAL data directory
|
||||
# dir = "/tmp/greptimedb/wal"
|
||||
# WAL file size in bytes.
|
||||
file_size = "1GB"
|
||||
# WAL purge threshold in bytes.
|
||||
purge_threshold = "50GB"
|
||||
file_size = "256MB"
|
||||
# WAL purge threshold.
|
||||
purge_threshold = "4GB"
|
||||
# WAL purge interval in seconds.
|
||||
purge_interval = "10m"
|
||||
# WAL read batch size.
|
||||
|
||||
@@ -91,7 +91,7 @@ pub fn build_table_regional_prefix(
|
||||
}
|
||||
|
||||
/// Table global info has only one key across all datanodes so it does not have `node_id` field.
|
||||
#[derive(Clone)]
|
||||
#[derive(Clone, Hash, Eq, PartialEq)]
|
||||
pub struct TableGlobalKey {
|
||||
pub catalog_name: String,
|
||||
pub schema_name: String,
|
||||
@@ -124,6 +124,14 @@ impl TableGlobalKey {
|
||||
table_name: captures[3].to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn to_raw_key(&self) -> Vec<u8> {
|
||||
self.to_string().into_bytes()
|
||||
}
|
||||
|
||||
pub fn try_from_raw_key(key: &[u8]) -> Result<Self, Error> {
|
||||
Self::parse(String::from_utf8_lossy(key))
|
||||
}
|
||||
}
|
||||
|
||||
/// Table global info contains necessary info for a datanode to create table regions, including
|
||||
@@ -141,6 +149,10 @@ impl TableGlobalValue {
|
||||
pub fn table_id(&self) -> TableId {
|
||||
self.table_info.ident.table_id
|
||||
}
|
||||
|
||||
pub fn engine(&self) -> &str {
|
||||
&self.table_info.meta.engine
|
||||
}
|
||||
}
|
||||
|
||||
/// Table regional info that varies between datanode, so it contains a `node_id` field.
|
||||
|
||||
@@ -467,10 +467,7 @@ impl CatalogManager for LocalCatalogManager {
|
||||
.ident
|
||||
.table_id;
|
||||
|
||||
if !self.system.deregister_table(&request, table_id).await? {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
self.system.deregister_table(&request, table_id).await?;
|
||||
self.catalogs.deregister_table(request).await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,6 +29,7 @@ mod manager;
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
pub mod mock;
|
||||
pub mod region_alive_keeper;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Kv(pub Vec<u8>, pub Vec<u8>);
|
||||
|
||||
@@ -20,13 +20,14 @@ use std::sync::Arc;
|
||||
use async_stream::stream;
|
||||
use async_trait::async_trait;
|
||||
use common_catalog::consts::{MAX_SYS_TABLE_ID, MITO_ENGINE};
|
||||
use common_meta::ident::TableIdent;
|
||||
use common_telemetry::{debug, error, info, warn};
|
||||
use dashmap::DashMap;
|
||||
use futures::Stream;
|
||||
use futures_util::{StreamExt, TryStreamExt};
|
||||
use metrics::{decrement_gauge, increment_gauge};
|
||||
use parking_lot::RwLock;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::engine::manager::TableEngineManagerRef;
|
||||
use table::engine::{EngineContext, TableReference};
|
||||
use table::requests::{CreateTableRequest, OpenTableRequest};
|
||||
@@ -43,6 +44,7 @@ use crate::helper::{
|
||||
build_table_regional_prefix, CatalogKey, CatalogValue, SchemaKey, SchemaValue, TableGlobalKey,
|
||||
TableGlobalValue, TableRegionalKey, TableRegionalValue, CATALOG_KEY_PREFIX,
|
||||
};
|
||||
use crate::remote::region_alive_keeper::RegionAliveKeepers;
|
||||
use crate::remote::{Kv, KvBackendRef};
|
||||
use crate::{
|
||||
handle_system_table_request, CatalogManager, CatalogProvider, CatalogProviderRef,
|
||||
@@ -57,16 +59,23 @@ pub struct RemoteCatalogManager {
|
||||
catalogs: Arc<RwLock<DashMap<String, CatalogProviderRef>>>,
|
||||
engine_manager: TableEngineManagerRef,
|
||||
system_table_requests: Mutex<Vec<RegisterSystemTableRequest>>,
|
||||
region_alive_keepers: Arc<RegionAliveKeepers>,
|
||||
}
|
||||
|
||||
impl RemoteCatalogManager {
|
||||
pub fn new(engine_manager: TableEngineManagerRef, node_id: u64, backend: KvBackendRef) -> Self {
|
||||
pub fn new(
|
||||
engine_manager: TableEngineManagerRef,
|
||||
node_id: u64,
|
||||
backend: KvBackendRef,
|
||||
region_alive_keepers: Arc<RegionAliveKeepers>,
|
||||
) -> Self {
|
||||
Self {
|
||||
engine_manager,
|
||||
node_id,
|
||||
backend,
|
||||
catalogs: Default::default(),
|
||||
system_table_requests: Default::default(),
|
||||
region_alive_keepers,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -76,6 +85,7 @@ impl RemoteCatalogManager {
|
||||
catalog_name: catalog_name.to_string(),
|
||||
backend: self.backend.clone(),
|
||||
engine_manager: self.engine_manager.clone(),
|
||||
region_alive_keepers: self.region_alive_keepers.clone(),
|
||||
}) as _
|
||||
}
|
||||
|
||||
@@ -123,10 +133,17 @@ impl RemoteCatalogManager {
|
||||
|
||||
increment_gauge!(crate::metrics::METRIC_CATALOG_MANAGER_CATALOG_COUNT, 1.0);
|
||||
|
||||
let region_alive_keepers = self.region_alive_keepers.clone();
|
||||
joins.push(common_runtime::spawn_bg(async move {
|
||||
let max_table_id =
|
||||
initiate_schemas(node_id, backend, engine_manager, &catalog_name, catalog)
|
||||
.await?;
|
||||
let max_table_id = initiate_schemas(
|
||||
node_id,
|
||||
backend,
|
||||
engine_manager,
|
||||
&catalog_name,
|
||||
catalog,
|
||||
region_alive_keepers,
|
||||
)
|
||||
.await?;
|
||||
info!(
|
||||
"Catalog name: {}, max table id allocated: {}",
|
||||
&catalog_name, max_table_id
|
||||
@@ -155,6 +172,7 @@ impl RemoteCatalogManager {
|
||||
self.engine_manager.clone(),
|
||||
catalog_name,
|
||||
schema_name,
|
||||
self.region_alive_keepers.clone(),
|
||||
);
|
||||
|
||||
let catalog_provider = self.new_catalog_provider(catalog_name);
|
||||
@@ -200,6 +218,7 @@ fn new_schema_provider(
|
||||
engine_manager: TableEngineManagerRef,
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
region_alive_keepers: Arc<RegionAliveKeepers>,
|
||||
) -> SchemaProviderRef {
|
||||
Arc::new(RemoteSchemaProvider {
|
||||
catalog_name: catalog_name.to_string(),
|
||||
@@ -207,6 +226,7 @@ fn new_schema_provider(
|
||||
node_id,
|
||||
backend,
|
||||
engine_manager,
|
||||
region_alive_keepers,
|
||||
}) as _
|
||||
}
|
||||
|
||||
@@ -240,6 +260,7 @@ async fn initiate_schemas(
|
||||
engine_manager: TableEngineManagerRef,
|
||||
catalog_name: &str,
|
||||
catalog: CatalogProviderRef,
|
||||
region_alive_keepers: Arc<RegionAliveKeepers>,
|
||||
) -> Result<u32> {
|
||||
let mut schemas = iter_remote_schemas(&backend, catalog_name).await;
|
||||
let mut joins = Vec::new();
|
||||
@@ -259,6 +280,7 @@ async fn initiate_schemas(
|
||||
engine_manager.clone(),
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
region_alive_keepers.clone(),
|
||||
);
|
||||
catalog
|
||||
.register_schema(schema_name.clone(), schema.clone())
|
||||
@@ -576,34 +598,33 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
}
|
||||
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
|
||||
let catalog_name = request.catalog;
|
||||
let schema_name = request.schema;
|
||||
let catalog = &request.catalog;
|
||||
let schema = &request.schema;
|
||||
let table_name = &request.table_name;
|
||||
|
||||
let schema_provider = self
|
||||
.catalog(&catalog_name)
|
||||
.catalog(catalog)
|
||||
.await?
|
||||
.context(CatalogNotFoundSnafu {
|
||||
catalog_name: &catalog_name,
|
||||
catalog_name: catalog,
|
||||
})?
|
||||
.schema(&schema_name)
|
||||
.schema(schema)
|
||||
.await?
|
||||
.with_context(|| SchemaNotFoundSnafu {
|
||||
catalog: &catalog_name,
|
||||
schema: &schema_name,
|
||||
})?;
|
||||
if schema_provider.table_exist(&request.table_name).await? {
|
||||
return TableExistsSnafu {
|
||||
table: format!("{}.{}.{}", &catalog_name, &schema_name, &request.table_name),
|
||||
.context(SchemaNotFoundSnafu { catalog, schema })?;
|
||||
ensure!(
|
||||
!schema_provider.table_exist(table_name).await?,
|
||||
TableExistsSnafu {
|
||||
table: common_catalog::format_full_table_name(catalog, schema, table_name),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
);
|
||||
|
||||
increment_gauge!(
|
||||
crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT,
|
||||
1.0,
|
||||
&[crate::metrics::db_label(&catalog_name, &schema_name)],
|
||||
&[crate::metrics::db_label(catalog, schema)],
|
||||
);
|
||||
schema_provider
|
||||
.register_table(request.table_name, request.table)
|
||||
.register_table(table_name.to_string(), request.table)
|
||||
.await?;
|
||||
|
||||
Ok(true)
|
||||
@@ -626,7 +647,22 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
1.0,
|
||||
&[crate::metrics::db_label(catalog_name, schema_name)],
|
||||
);
|
||||
Ok(result.is_none())
|
||||
|
||||
if let Some(table) = result.as_ref() {
|
||||
let table_info = table.table_info();
|
||||
let table_ident = TableIdent {
|
||||
catalog: request.catalog,
|
||||
schema: request.schema,
|
||||
table: request.table_name,
|
||||
table_id: table_info.ident.table_id,
|
||||
engine: table_info.meta.engine.clone(),
|
||||
};
|
||||
self.region_alive_keepers
|
||||
.deregister_table(&table_ident)
|
||||
.await;
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
|
||||
@@ -644,6 +680,7 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
self.engine_manager.clone(),
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
self.region_alive_keepers.clone(),
|
||||
);
|
||||
catalog_provider
|
||||
.register_schema(schema_name, schema_provider)
|
||||
@@ -779,6 +816,7 @@ pub struct RemoteCatalogProvider {
|
||||
catalog_name: String,
|
||||
backend: KvBackendRef,
|
||||
engine_manager: TableEngineManagerRef,
|
||||
region_alive_keepers: Arc<RegionAliveKeepers>,
|
||||
}
|
||||
|
||||
impl RemoteCatalogProvider {
|
||||
@@ -787,12 +825,14 @@ impl RemoteCatalogProvider {
|
||||
backend: KvBackendRef,
|
||||
engine_manager: TableEngineManagerRef,
|
||||
node_id: u64,
|
||||
region_alive_keepers: Arc<RegionAliveKeepers>,
|
||||
) -> Self {
|
||||
Self {
|
||||
node_id,
|
||||
catalog_name,
|
||||
backend,
|
||||
engine_manager,
|
||||
region_alive_keepers,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -810,6 +850,7 @@ impl RemoteCatalogProvider {
|
||||
node_id: self.node_id,
|
||||
backend: self.backend.clone(),
|
||||
engine_manager: self.engine_manager.clone(),
|
||||
region_alive_keepers: self.region_alive_keepers.clone(),
|
||||
};
|
||||
Arc::new(provider) as Arc<_>
|
||||
}
|
||||
@@ -872,6 +913,7 @@ pub struct RemoteSchemaProvider {
|
||||
node_id: u64,
|
||||
backend: KvBackendRef,
|
||||
engine_manager: TableEngineManagerRef,
|
||||
region_alive_keepers: Arc<RegionAliveKeepers>,
|
||||
}
|
||||
|
||||
impl RemoteSchemaProvider {
|
||||
@@ -881,6 +923,7 @@ impl RemoteSchemaProvider {
|
||||
node_id: u64,
|
||||
engine_manager: TableEngineManagerRef,
|
||||
backend: KvBackendRef,
|
||||
region_alive_keepers: Arc<RegionAliveKeepers>,
|
||||
) -> Self {
|
||||
Self {
|
||||
catalog_name,
|
||||
@@ -888,6 +931,7 @@ impl RemoteSchemaProvider {
|
||||
node_id,
|
||||
backend,
|
||||
engine_manager,
|
||||
region_alive_keepers,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -910,15 +954,26 @@ impl SchemaProvider for RemoteSchemaProvider {
|
||||
async fn table_names(&self) -> Result<Vec<String>> {
|
||||
let key_prefix = build_table_regional_prefix(&self.catalog_name, &self.schema_name);
|
||||
let iter = self.backend.range(key_prefix.as_bytes());
|
||||
let table_names = iter
|
||||
let regional_keys = iter
|
||||
.map(|kv| {
|
||||
let Kv(key, _) = kv?;
|
||||
let regional_key = TableRegionalKey::parse(String::from_utf8_lossy(&key))
|
||||
.context(InvalidCatalogValueSnafu)?;
|
||||
Ok(regional_key.table_name)
|
||||
Ok(regional_key)
|
||||
})
|
||||
.try_collect()
|
||||
.try_collect::<Vec<_>>()
|
||||
.await?;
|
||||
|
||||
let table_names = regional_keys
|
||||
.into_iter()
|
||||
.filter_map(|x| {
|
||||
if x.node_id == self.node_id {
|
||||
Some(x.table_name)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
Ok(table_names)
|
||||
}
|
||||
|
||||
@@ -970,6 +1025,18 @@ impl SchemaProvider for RemoteSchemaProvider {
|
||||
&table_value.as_bytes().context(InvalidCatalogValueSnafu)?,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let table_ident = TableIdent {
|
||||
catalog: table_info.catalog_name.clone(),
|
||||
schema: table_info.schema_name.clone(),
|
||||
table: table_info.name.clone(),
|
||||
table_id: table_info.ident.table_id,
|
||||
engine: table_info.meta.engine.clone(),
|
||||
};
|
||||
self.region_alive_keepers
|
||||
.register_table(table_ident, table)
|
||||
.await?;
|
||||
|
||||
debug!(
|
||||
"Successfully set catalog table entry, key: {}, table value: {:?}",
|
||||
table_key, table_value
|
||||
|
||||
@@ -27,9 +27,11 @@ use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use datatypes::vectors::StringVector;
|
||||
use serde::Serializer;
|
||||
use table::engine::{EngineContext, TableEngine, TableReference};
|
||||
use table::engine::{CloseTableResult, EngineContext, TableEngine, TableReference};
|
||||
use table::metadata::TableId;
|
||||
use table::requests::{AlterTableRequest, CreateTableRequest, DropTableRequest, OpenTableRequest};
|
||||
use table::requests::{
|
||||
AlterTableRequest, CloseTableRequest, CreateTableRequest, DropTableRequest, OpenTableRequest,
|
||||
};
|
||||
use table::test_util::MemTable;
|
||||
use table::TableRef;
|
||||
use tokio::sync::RwLock;
|
||||
@@ -183,6 +185,8 @@ impl TableEngine for MockTableEngine {
|
||||
let table_name = request.table_name.clone();
|
||||
let catalog_name = request.catalog_name.clone();
|
||||
let schema_name = request.schema_name.clone();
|
||||
let table_full_name =
|
||||
TableReference::full(&catalog_name, &schema_name, &table_name).to_string();
|
||||
|
||||
let default_table_id = "0".to_owned();
|
||||
let table_id = TableId::from_str(
|
||||
@@ -211,7 +215,7 @@ impl TableEngine for MockTableEngine {
|
||||
)) as Arc<_>;
|
||||
|
||||
let mut tables = self.tables.write().await;
|
||||
tables.insert(table_name, table.clone() as TableRef);
|
||||
tables.insert(table_full_name, table.clone() as TableRef);
|
||||
Ok(table)
|
||||
}
|
||||
|
||||
@@ -263,6 +267,19 @@ impl TableEngine for MockTableEngine {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn close_table(
|
||||
&self,
|
||||
_ctx: &EngineContext,
|
||||
request: CloseTableRequest,
|
||||
) -> table::Result<CloseTableResult> {
|
||||
let _ = self
|
||||
.tables
|
||||
.write()
|
||||
.await
|
||||
.remove(&request.table_ref().to_string());
|
||||
Ok(CloseTableResult::Released(vec![]))
|
||||
}
|
||||
|
||||
async fn close(&self) -> table::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
821
src/catalog/src/remote/region_alive_keeper.rs
Normal file
821
src/catalog/src/remote/region_alive_keeper.rs
Normal file
@@ -0,0 +1,821 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::future::Future;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_meta::error::InvalidProtoMsgSnafu;
|
||||
use common_meta::heartbeat::handler::{
|
||||
HandleControl, HeartbeatResponseHandler, HeartbeatResponseHandlerContext,
|
||||
};
|
||||
use common_meta::ident::TableIdent;
|
||||
use common_meta::RegionIdent;
|
||||
use common_telemetry::{debug, error, info, warn};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::RegionNumber;
|
||||
use table::engine::manager::TableEngineManagerRef;
|
||||
use table::engine::{CloseTableResult, EngineContext, TableEngineRef};
|
||||
use table::requests::CloseTableRequest;
|
||||
use table::TableRef;
|
||||
use tokio::sync::{mpsc, oneshot, Mutex};
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio::time::{Duration, Instant};
|
||||
|
||||
use crate::error::{Result, TableEngineNotFoundSnafu};
|
||||
|
||||
/// [RegionAliveKeepers] manages all [RegionAliveKeeper] in a scope of tables.
|
||||
pub struct RegionAliveKeepers {
|
||||
table_engine_manager: TableEngineManagerRef,
|
||||
keepers: Arc<Mutex<HashMap<TableIdent, Arc<RegionAliveKeeper>>>>,
|
||||
heartbeat_interval_millis: u64,
|
||||
started: AtomicBool,
|
||||
|
||||
/// The epoch when [RegionAliveKeepers] is created. It's used to get a monotonically non-decreasing
|
||||
/// elapsed time when submitting heartbeats to Metasrv (because [Instant] is monotonically
|
||||
/// non-decreasing). The heartbeat request will carry the duration since this epoch, and the
|
||||
/// duration acts like an "invariant point" for region's keep alive lease.
|
||||
epoch: Instant,
|
||||
}
|
||||
|
||||
impl RegionAliveKeepers {
|
||||
pub fn new(
|
||||
table_engine_manager: TableEngineManagerRef,
|
||||
heartbeat_interval_millis: u64,
|
||||
) -> Self {
|
||||
Self {
|
||||
table_engine_manager,
|
||||
keepers: Arc::new(Mutex::new(HashMap::new())),
|
||||
heartbeat_interval_millis,
|
||||
started: AtomicBool::new(false),
|
||||
epoch: Instant::now(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn find_keeper(&self, table_ident: &TableIdent) -> Option<Arc<RegionAliveKeeper>> {
|
||||
self.keepers.lock().await.get(table_ident).cloned()
|
||||
}
|
||||
|
||||
pub async fn register_table(&self, table_ident: TableIdent, table: TableRef) -> Result<()> {
|
||||
let keeper = self.find_keeper(&table_ident).await;
|
||||
if keeper.is_some() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let table_engine = self
|
||||
.table_engine_manager
|
||||
.engine(&table_ident.engine)
|
||||
.context(TableEngineNotFoundSnafu {
|
||||
engine_name: &table_ident.engine,
|
||||
})?;
|
||||
|
||||
let keeper = Arc::new(RegionAliveKeeper::new(
|
||||
table_engine,
|
||||
table_ident.clone(),
|
||||
self.heartbeat_interval_millis,
|
||||
));
|
||||
for r in table.table_info().meta.region_numbers.iter() {
|
||||
keeper.register_region(*r).await;
|
||||
}
|
||||
|
||||
let mut keepers = self.keepers.lock().await;
|
||||
keepers.insert(table_ident.clone(), keeper.clone());
|
||||
|
||||
if self.started.load(Ordering::Relaxed) {
|
||||
keeper.start().await;
|
||||
|
||||
info!("RegionAliveKeeper for table {table_ident} is started!");
|
||||
} else {
|
||||
info!("RegionAliveKeeper for table {table_ident} is registered but not started yet!");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn deregister_table(
|
||||
&self,
|
||||
table_ident: &TableIdent,
|
||||
) -> Option<Arc<RegionAliveKeeper>> {
|
||||
self.keepers.lock().await.remove(table_ident).map(|x| {
|
||||
info!("Deregister RegionAliveKeeper for table {table_ident}");
|
||||
x
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn register_region(&self, region_ident: &RegionIdent) {
|
||||
let table_ident = ®ion_ident.table_ident;
|
||||
let Some(keeper) = self.find_keeper(table_ident).await else {
|
||||
// Alive keeper could be affected by lagging msg, just warn and ignore.
|
||||
warn!("Alive keeper for region {region_ident} is not found!");
|
||||
return;
|
||||
};
|
||||
keeper.register_region(region_ident.region_number).await
|
||||
}
|
||||
|
||||
pub async fn deregister_region(&self, region_ident: &RegionIdent) {
|
||||
let table_ident = ®ion_ident.table_ident;
|
||||
let Some(keeper) = self.find_keeper(table_ident).await else {
|
||||
// Alive keeper could be affected by lagging msg, just warn and ignore.
|
||||
warn!("Alive keeper for region {region_ident} is not found!");
|
||||
return;
|
||||
};
|
||||
let _ = keeper.deregister_region(region_ident.region_number).await;
|
||||
}
|
||||
|
||||
pub async fn start(&self) {
|
||||
let keepers = self.keepers.lock().await;
|
||||
for keeper in keepers.values() {
|
||||
keeper.start().await;
|
||||
}
|
||||
self.started.store(true, Ordering::Relaxed);
|
||||
|
||||
info!(
|
||||
"RegionAliveKeepers for tables {:?} are started!",
|
||||
keepers.keys().map(|x| x.to_string()).collect::<Vec<_>>(),
|
||||
);
|
||||
}
|
||||
|
||||
pub fn epoch(&self) -> Instant {
|
||||
self.epoch
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl HeartbeatResponseHandler for RegionAliveKeepers {
|
||||
fn is_acceptable(&self, ctx: &HeartbeatResponseHandlerContext) -> bool {
|
||||
!ctx.response.region_leases.is_empty()
|
||||
}
|
||||
|
||||
async fn handle(
|
||||
&self,
|
||||
ctx: &mut HeartbeatResponseHandlerContext,
|
||||
) -> common_meta::error::Result<HandleControl> {
|
||||
let leases = ctx.response.region_leases.drain(..).collect::<Vec<_>>();
|
||||
for lease in leases {
|
||||
let table_ident: TableIdent = match lease
|
||||
.table_ident
|
||||
.context(InvalidProtoMsgSnafu {
|
||||
err_msg: "'table_ident' is missing in RegionLease",
|
||||
})
|
||||
.and_then(|x| x.try_into())
|
||||
{
|
||||
Ok(x) => x,
|
||||
Err(e) => {
|
||||
error!(e; "");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let Some(keeper) = self.keepers.lock().await.get(&table_ident).cloned() else {
|
||||
// Alive keeper could be affected by lagging msg, just warn and ignore.
|
||||
warn!("Alive keeper for table {table_ident} is not found!");
|
||||
continue;
|
||||
};
|
||||
|
||||
let start_instant = self.epoch + Duration::from_millis(lease.duration_since_epoch);
|
||||
let deadline = start_instant + Duration::from_secs(lease.lease_seconds);
|
||||
keeper.keep_lived(lease.regions, deadline).await;
|
||||
}
|
||||
Ok(HandleControl::Continue)
|
||||
}
|
||||
}
|
||||
|
||||
/// [RegionAliveKeeper] starts a countdown for each region in a table. When deadline is reached,
|
||||
/// the region will be closed.
|
||||
/// The deadline is controlled by Metasrv. It works like "lease" for regions: a Datanode submits its
|
||||
/// opened regions to Metasrv, in heartbeats. If Metasrv decides some region could be resided in this
|
||||
/// Datanode, it will "extend" the region's "lease", with a deadline for [RegionAliveKeeper] to
|
||||
/// countdown.
|
||||
pub struct RegionAliveKeeper {
|
||||
table_engine: TableEngineRef,
|
||||
table_ident: TableIdent,
|
||||
countdown_task_handles: Arc<Mutex<HashMap<RegionNumber, Arc<CountdownTaskHandle>>>>,
|
||||
heartbeat_interval_millis: u64,
|
||||
started: AtomicBool,
|
||||
}
|
||||
|
||||
impl RegionAliveKeeper {
|
||||
fn new(
|
||||
table_engine: TableEngineRef,
|
||||
table_ident: TableIdent,
|
||||
heartbeat_interval_millis: u64,
|
||||
) -> Self {
|
||||
Self {
|
||||
table_engine,
|
||||
table_ident,
|
||||
countdown_task_handles: Arc::new(Mutex::new(HashMap::new())),
|
||||
heartbeat_interval_millis,
|
||||
started: AtomicBool::new(false),
|
||||
}
|
||||
}
|
||||
|
||||
async fn find_handle(&self, region: &RegionNumber) -> Option<Arc<CountdownTaskHandle>> {
|
||||
self.countdown_task_handles
|
||||
.lock()
|
||||
.await
|
||||
.get(region)
|
||||
.cloned()
|
||||
}
|
||||
|
||||
async fn register_region(&self, region: RegionNumber) {
|
||||
if self.find_handle(®ion).await.is_some() {
|
||||
return;
|
||||
}
|
||||
|
||||
let countdown_task_handles = Arc::downgrade(&self.countdown_task_handles);
|
||||
let on_task_finished = async move {
|
||||
if let Some(x) = countdown_task_handles.upgrade() {
|
||||
x.lock().await.remove(®ion);
|
||||
} // Else the countdown task handles map could be dropped because the keeper is dropped.
|
||||
};
|
||||
let handle = Arc::new(CountdownTaskHandle::new(
|
||||
self.table_engine.clone(),
|
||||
self.table_ident.clone(),
|
||||
region,
|
||||
|| on_task_finished,
|
||||
));
|
||||
|
||||
let mut handles = self.countdown_task_handles.lock().await;
|
||||
handles.insert(region, handle.clone());
|
||||
|
||||
if self.started.load(Ordering::Relaxed) {
|
||||
handle.start(self.heartbeat_interval_millis).await;
|
||||
|
||||
info!(
|
||||
"Region alive countdown for region {region} in table {} is started!",
|
||||
self.table_ident
|
||||
);
|
||||
} else {
|
||||
info!(
|
||||
"Region alive countdown for region {region} in table {} is registered but not started yet!",
|
||||
self.table_ident
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async fn deregister_region(&self, region: RegionNumber) -> Option<Arc<CountdownTaskHandle>> {
|
||||
self.countdown_task_handles
|
||||
.lock()
|
||||
.await
|
||||
.remove(®ion)
|
||||
.map(|x| {
|
||||
info!(
|
||||
"Deregister alive countdown for region {region} in table {}",
|
||||
self.table_ident
|
||||
);
|
||||
x
|
||||
})
|
||||
}
|
||||
|
||||
async fn start(&self) {
|
||||
let handles = self.countdown_task_handles.lock().await;
|
||||
for handle in handles.values() {
|
||||
handle.start(self.heartbeat_interval_millis).await;
|
||||
}
|
||||
|
||||
self.started.store(true, Ordering::Relaxed);
|
||||
info!(
|
||||
"Region alive countdowns for regions {:?} in table {} are started!",
|
||||
handles.keys().copied().collect::<Vec<_>>(),
|
||||
self.table_ident
|
||||
);
|
||||
}
|
||||
|
||||
async fn keep_lived(&self, designated_regions: Vec<RegionNumber>, deadline: Instant) {
|
||||
for region in designated_regions {
|
||||
if let Some(handle) = self.find_handle(®ion).await {
|
||||
handle.reset_deadline(deadline).await;
|
||||
}
|
||||
// Else the region alive keeper might be triggered by lagging messages, we can safely ignore it.
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn deadline(&self, region: RegionNumber) -> Option<Instant> {
|
||||
let mut deadline = None;
|
||||
if let Some(handle) = self.find_handle(®ion).await {
|
||||
let (s, r) = oneshot::channel();
|
||||
if handle.tx.send(CountdownCommand::Deadline(s)).await.is_ok() {
|
||||
deadline = r.await.ok()
|
||||
}
|
||||
}
|
||||
deadline
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum CountdownCommand {
|
||||
Start(u64),
|
||||
Reset(Instant),
|
||||
Deadline(oneshot::Sender<Instant>),
|
||||
}
|
||||
|
||||
struct CountdownTaskHandle {
|
||||
tx: mpsc::Sender<CountdownCommand>,
|
||||
handler: JoinHandle<()>,
|
||||
table_ident: TableIdent,
|
||||
region: RegionNumber,
|
||||
}
|
||||
|
||||
impl CountdownTaskHandle {
|
||||
/// Creates a new [CountdownTaskHandle] and starts the countdown task.
|
||||
/// # Params
|
||||
/// - `on_task_finished`: a callback to be invoked when the task is finished. Note that it will not
|
||||
/// be invoked if the task is cancelled (by dropping the handle). This is because we want something
|
||||
/// meaningful to be done when the task is finished, e.g. deregister the handle from the map.
|
||||
/// While dropping the handle does not necessarily mean the task is finished.
|
||||
fn new<Fut>(
|
||||
table_engine: TableEngineRef,
|
||||
table_ident: TableIdent,
|
||||
region: RegionNumber,
|
||||
on_task_finished: impl FnOnce() -> Fut + Send + 'static,
|
||||
) -> Self
|
||||
where
|
||||
Fut: Future<Output = ()> + Send,
|
||||
{
|
||||
let (tx, rx) = mpsc::channel(1024);
|
||||
|
||||
let mut countdown_task = CountdownTask {
|
||||
table_engine,
|
||||
table_ident: table_ident.clone(),
|
||||
region,
|
||||
rx,
|
||||
};
|
||||
let handler = common_runtime::spawn_bg(async move {
|
||||
countdown_task.run().await;
|
||||
on_task_finished().await;
|
||||
});
|
||||
|
||||
Self {
|
||||
tx,
|
||||
handler,
|
||||
table_ident,
|
||||
region,
|
||||
}
|
||||
}
|
||||
|
||||
async fn start(&self, heartbeat_interval_millis: u64) {
|
||||
if let Err(e) = self
|
||||
.tx
|
||||
.send(CountdownCommand::Start(heartbeat_interval_millis))
|
||||
.await
|
||||
{
|
||||
warn!(
|
||||
"Failed to start region alive keeper countdown: {e}. \
|
||||
Maybe the task is stopped due to region been closed."
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async fn reset_deadline(&self, deadline: Instant) {
|
||||
if let Err(e) = self.tx.send(CountdownCommand::Reset(deadline)).await {
|
||||
warn!(
|
||||
"Failed to reset region alive keeper deadline: {e}. \
|
||||
Maybe the task is stopped due to region been closed."
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for CountdownTaskHandle {
|
||||
fn drop(&mut self) {
|
||||
debug!(
|
||||
"Aborting region alive countdown task for region {} in table {}",
|
||||
self.region, self.table_ident,
|
||||
);
|
||||
self.handler.abort();
|
||||
}
|
||||
}
|
||||
|
||||
struct CountdownTask {
|
||||
table_engine: TableEngineRef,
|
||||
table_ident: TableIdent,
|
||||
region: RegionNumber,
|
||||
rx: mpsc::Receiver<CountdownCommand>,
|
||||
}
|
||||
|
||||
impl CountdownTask {
|
||||
async fn run(&mut self) {
|
||||
// 30 years. See `Instant::far_future`.
|
||||
let far_future = Instant::now() + Duration::from_secs(86400 * 365 * 30);
|
||||
|
||||
// Make sure the alive countdown is not gonna happen before heartbeat task is started (the
|
||||
// "start countdown" command will be sent from heartbeat task).
|
||||
let countdown = tokio::time::sleep_until(far_future);
|
||||
tokio::pin!(countdown);
|
||||
|
||||
let region = &self.region;
|
||||
let table_ident = &self.table_ident;
|
||||
loop {
|
||||
tokio::select! {
|
||||
command = self.rx.recv() => {
|
||||
match command {
|
||||
Some(CountdownCommand::Start(heartbeat_interval_millis)) => {
|
||||
// Set first deadline in 4 heartbeats (roughly after 20 seconds from now if heartbeat
|
||||
// interval is set to default 5 seconds), to make Datanode and Metasrv more tolerable to
|
||||
// network or other jitters during startup.
|
||||
let first_deadline = Instant::now() + Duration::from_millis(heartbeat_interval_millis) * 4;
|
||||
countdown.set(tokio::time::sleep_until(first_deadline));
|
||||
},
|
||||
Some(CountdownCommand::Reset(deadline)) => {
|
||||
if countdown.deadline() < deadline {
|
||||
debug!(
|
||||
"Reset deadline of region {region} of table {table_ident} to approximately {} seconds later",
|
||||
(deadline - Instant::now()).as_secs_f32(),
|
||||
);
|
||||
countdown.set(tokio::time::sleep_until(deadline));
|
||||
}
|
||||
// Else the countdown could be either:
|
||||
// - not started yet;
|
||||
// - during startup protection;
|
||||
// - received a lagging heartbeat message.
|
||||
// All can be safely ignored.
|
||||
},
|
||||
None => {
|
||||
info!(
|
||||
"The handle of countdown task for region {region} of table {table_ident} \
|
||||
is dropped, RegionAliveKeeper out."
|
||||
);
|
||||
break;
|
||||
},
|
||||
Some(CountdownCommand::Deadline(tx)) => {
|
||||
let _ = tx.send(countdown.deadline());
|
||||
}
|
||||
}
|
||||
}
|
||||
() = &mut countdown => {
|
||||
let result = self.close_region().await;
|
||||
warn!(
|
||||
"Region {region} of table {table_ident} is closed, result: {result:?}. \
|
||||
RegionAliveKeeper out.",
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn close_region(&self) -> CloseTableResult {
|
||||
let ctx = EngineContext::default();
|
||||
let region = self.region;
|
||||
let table_ident = &self.table_ident;
|
||||
loop {
|
||||
let request = CloseTableRequest {
|
||||
catalog_name: table_ident.catalog.clone(),
|
||||
schema_name: table_ident.schema.clone(),
|
||||
table_name: table_ident.table.clone(),
|
||||
region_numbers: vec![region],
|
||||
flush: true,
|
||||
};
|
||||
match self.table_engine.close_table(&ctx, request).await {
|
||||
Ok(result) => return result,
|
||||
// If region is failed to close, immediately retry. Maybe we should panic instead?
|
||||
Err(e) => error!(e;
|
||||
"Failed to close region {region} of table {table_ident}. \
|
||||
For the integrity of data, retry closing and retry without wait.",
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::meta::{HeartbeatResponse, RegionLease};
|
||||
use common_meta::heartbeat::mailbox::HeartbeatMailbox;
|
||||
use datatypes::schema::RawSchema;
|
||||
use table::engine::manager::MemoryTableEngineManager;
|
||||
use table::engine::{TableEngine, TableReference};
|
||||
use table::requests::{CreateTableRequest, TableOptions};
|
||||
use table::test_util::EmptyTable;
|
||||
|
||||
use super::*;
|
||||
use crate::remote::mock::MockTableEngine;
|
||||
|
||||
async fn prepare_keepers() -> (TableIdent, RegionAliveKeepers) {
|
||||
let table_engine = Arc::new(MockTableEngine::default());
|
||||
let table_engine_manager = Arc::new(MemoryTableEngineManager::new(table_engine));
|
||||
let keepers = RegionAliveKeepers::new(table_engine_manager, 5000);
|
||||
|
||||
let catalog = "my_catalog";
|
||||
let schema = "my_schema";
|
||||
let table = "my_table";
|
||||
let table_ident = TableIdent {
|
||||
catalog: catalog.to_string(),
|
||||
schema: schema.to_string(),
|
||||
table: table.to_string(),
|
||||
table_id: 1,
|
||||
engine: "MockTableEngine".to_string(),
|
||||
};
|
||||
let table = Arc::new(EmptyTable::new(CreateTableRequest {
|
||||
id: 1,
|
||||
catalog_name: catalog.to_string(),
|
||||
schema_name: schema.to_string(),
|
||||
table_name: table.to_string(),
|
||||
desc: None,
|
||||
schema: RawSchema {
|
||||
column_schemas: vec![],
|
||||
timestamp_index: None,
|
||||
version: 0,
|
||||
},
|
||||
region_numbers: vec![1, 2, 3],
|
||||
primary_key_indices: vec![],
|
||||
create_if_not_exists: false,
|
||||
table_options: TableOptions::default(),
|
||||
engine: "MockTableEngine".to_string(),
|
||||
}));
|
||||
keepers
|
||||
.register_table(table_ident.clone(), table)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(keepers.keepers.lock().await.contains_key(&table_ident));
|
||||
|
||||
(table_ident, keepers)
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_handle_heartbeat_response() {
|
||||
let (table_ident, keepers) = prepare_keepers().await;
|
||||
|
||||
keepers.start().await;
|
||||
let startup_protection_until = Instant::now() + Duration::from_secs(21);
|
||||
|
||||
let duration_since_epoch = (Instant::now() - keepers.epoch).as_millis() as _;
|
||||
let lease_seconds = 100;
|
||||
let response = HeartbeatResponse {
|
||||
region_leases: vec![RegionLease {
|
||||
table_ident: Some(table_ident.clone().into()),
|
||||
regions: vec![1, 3], // Not extending region 2's lease time.
|
||||
duration_since_epoch,
|
||||
lease_seconds,
|
||||
}],
|
||||
..Default::default()
|
||||
};
|
||||
let keep_alive_until = keepers.epoch
|
||||
+ Duration::from_millis(duration_since_epoch)
|
||||
+ Duration::from_secs(lease_seconds);
|
||||
|
||||
let (tx, _) = mpsc::channel(8);
|
||||
let mailbox = Arc::new(HeartbeatMailbox::new(tx));
|
||||
let mut ctx = HeartbeatResponseHandlerContext::new(mailbox, response);
|
||||
|
||||
assert!(keepers.handle(&mut ctx).await.unwrap() == HandleControl::Continue);
|
||||
|
||||
// sleep to wait for background task spawned in `handle`
|
||||
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||
|
||||
async fn test(
|
||||
keeper: &Arc<RegionAliveKeeper>,
|
||||
region_number: RegionNumber,
|
||||
startup_protection_until: Instant,
|
||||
keep_alive_until: Instant,
|
||||
is_kept_live: bool,
|
||||
) {
|
||||
let deadline = keeper.deadline(region_number).await.unwrap();
|
||||
if is_kept_live {
|
||||
assert!(deadline > startup_protection_until && deadline == keep_alive_until);
|
||||
} else {
|
||||
assert!(deadline <= startup_protection_until);
|
||||
}
|
||||
}
|
||||
|
||||
let keeper = &keepers
|
||||
.keepers
|
||||
.lock()
|
||||
.await
|
||||
.get(&table_ident)
|
||||
.cloned()
|
||||
.unwrap();
|
||||
|
||||
// Test region 1 and 3 is kept lived. Their deadlines are updated to desired instant.
|
||||
test(keeper, 1, startup_protection_until, keep_alive_until, true).await;
|
||||
test(keeper, 3, startup_protection_until, keep_alive_until, true).await;
|
||||
|
||||
// Test region 2 is not kept lived. It's deadline is not updated: still during startup protection period.
|
||||
test(keeper, 2, startup_protection_until, keep_alive_until, false).await;
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_region_alive_keepers() {
|
||||
let (table_ident, keepers) = prepare_keepers().await;
|
||||
|
||||
keepers
|
||||
.register_region(&RegionIdent {
|
||||
cluster_id: 1,
|
||||
datanode_id: 1,
|
||||
table_ident: table_ident.clone(),
|
||||
region_number: 4,
|
||||
})
|
||||
.await;
|
||||
|
||||
keepers.start().await;
|
||||
for keeper in keepers.keepers.lock().await.values() {
|
||||
let regions = {
|
||||
let handles = keeper.countdown_task_handles.lock().await;
|
||||
handles.keys().copied().collect::<Vec<_>>()
|
||||
};
|
||||
for region in regions {
|
||||
// assert countdown tasks are started
|
||||
let deadline = keeper.deadline(region).await.unwrap();
|
||||
assert!(deadline <= Instant::now() + Duration::from_secs(20));
|
||||
}
|
||||
}
|
||||
|
||||
keepers
|
||||
.deregister_region(&RegionIdent {
|
||||
cluster_id: 1,
|
||||
datanode_id: 1,
|
||||
table_ident: table_ident.clone(),
|
||||
region_number: 1,
|
||||
})
|
||||
.await;
|
||||
let mut regions = keepers
|
||||
.find_keeper(&table_ident)
|
||||
.await
|
||||
.unwrap()
|
||||
.countdown_task_handles
|
||||
.lock()
|
||||
.await
|
||||
.keys()
|
||||
.copied()
|
||||
.collect::<Vec<_>>();
|
||||
regions.sort();
|
||||
assert_eq!(regions, vec![2, 3, 4]);
|
||||
|
||||
let keeper = keepers.deregister_table(&table_ident).await.unwrap();
|
||||
assert!(Arc::try_unwrap(keeper).is_ok(), "keeper is not dropped");
|
||||
assert!(keepers.keepers.lock().await.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_region_alive_keeper() {
|
||||
let table_engine = Arc::new(MockTableEngine::default());
|
||||
let table_ident = TableIdent {
|
||||
catalog: "my_catalog".to_string(),
|
||||
schema: "my_schema".to_string(),
|
||||
table: "my_table".to_string(),
|
||||
table_id: 1024,
|
||||
engine: "mito".to_string(),
|
||||
};
|
||||
let keeper = RegionAliveKeeper::new(table_engine, table_ident, 1000);
|
||||
|
||||
let region = 1;
|
||||
assert!(keeper.find_handle(®ion).await.is_none());
|
||||
keeper.register_region(region).await;
|
||||
assert!(keeper.find_handle(®ion).await.is_some());
|
||||
|
||||
let ten_seconds_later = || Instant::now() + Duration::from_secs(10);
|
||||
|
||||
keeper.keep_lived(vec![1, 2, 3], ten_seconds_later()).await;
|
||||
assert!(keeper.find_handle(&2).await.is_none());
|
||||
assert!(keeper.find_handle(&3).await.is_none());
|
||||
|
||||
let far_future = Instant::now() + Duration::from_secs(86400 * 365 * 29);
|
||||
// assert if keeper is not started, keep_lived is of no use
|
||||
assert!(keeper.deadline(region).await.unwrap() > far_future);
|
||||
|
||||
keeper.start().await;
|
||||
keeper.keep_lived(vec![1, 2, 3], ten_seconds_later()).await;
|
||||
// assert keep_lived works if keeper is started
|
||||
assert!(keeper.deadline(region).await.unwrap() <= ten_seconds_later());
|
||||
|
||||
let handle = keeper.deregister_region(region).await.unwrap();
|
||||
assert!(Arc::try_unwrap(handle).is_ok(), "handle is not dropped");
|
||||
assert!(keeper.find_handle(®ion).await.is_none());
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_countdown_task_handle() {
|
||||
let table_engine = Arc::new(MockTableEngine::default());
|
||||
let table_ident = TableIdent {
|
||||
catalog: "my_catalog".to_string(),
|
||||
schema: "my_schema".to_string(),
|
||||
table: "my_table".to_string(),
|
||||
table_id: 1024,
|
||||
engine: "mito".to_string(),
|
||||
};
|
||||
let finished = Arc::new(AtomicBool::new(false));
|
||||
let finished_clone = finished.clone();
|
||||
let handle = CountdownTaskHandle::new(
|
||||
table_engine.clone(),
|
||||
table_ident.clone(),
|
||||
1,
|
||||
|| async move { finished_clone.store(true, Ordering::Relaxed) },
|
||||
);
|
||||
let tx = handle.tx.clone();
|
||||
|
||||
// assert countdown task is running
|
||||
assert!(tx.send(CountdownCommand::Start(5000)).await.is_ok());
|
||||
assert!(!finished.load(Ordering::Relaxed));
|
||||
|
||||
drop(handle);
|
||||
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||
|
||||
// assert countdown task is stopped
|
||||
assert!(tx
|
||||
.try_send(CountdownCommand::Reset(
|
||||
Instant::now() + Duration::from_secs(10)
|
||||
))
|
||||
.is_err());
|
||||
// assert `on_task_finished` is not called (because the task is aborted by the handle's drop)
|
||||
assert!(!finished.load(Ordering::Relaxed));
|
||||
|
||||
let finished = Arc::new(AtomicBool::new(false));
|
||||
let finished_clone = finished.clone();
|
||||
let handle = CountdownTaskHandle::new(table_engine, table_ident, 1, || async move {
|
||||
finished_clone.store(true, Ordering::Relaxed)
|
||||
});
|
||||
handle.tx.send(CountdownCommand::Start(100)).await.unwrap();
|
||||
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||
// assert `on_task_finished` is called when task is finished normally
|
||||
assert!(finished.load(Ordering::Relaxed));
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_countdown_task_run() {
|
||||
let ctx = &EngineContext::default();
|
||||
let catalog = "my_catalog";
|
||||
let schema = "my_schema";
|
||||
let table = "my_table";
|
||||
let request = CreateTableRequest {
|
||||
id: 1,
|
||||
catalog_name: catalog.to_string(),
|
||||
schema_name: schema.to_string(),
|
||||
table_name: table.to_string(),
|
||||
desc: None,
|
||||
schema: RawSchema {
|
||||
column_schemas: vec![],
|
||||
timestamp_index: None,
|
||||
version: 0,
|
||||
},
|
||||
region_numbers: vec![],
|
||||
primary_key_indices: vec![],
|
||||
create_if_not_exists: false,
|
||||
table_options: TableOptions::default(),
|
||||
engine: "mito".to_string(),
|
||||
};
|
||||
let table_ref = TableReference::full(catalog, schema, table);
|
||||
|
||||
let table_engine = Arc::new(MockTableEngine::default());
|
||||
table_engine.create_table(ctx, request).await.unwrap();
|
||||
|
||||
let table_ident = TableIdent {
|
||||
catalog: catalog.to_string(),
|
||||
schema: schema.to_string(),
|
||||
table: table.to_string(),
|
||||
table_id: 1024,
|
||||
engine: "mito".to_string(),
|
||||
};
|
||||
let (tx, rx) = mpsc::channel(10);
|
||||
let mut task = CountdownTask {
|
||||
table_engine: table_engine.clone(),
|
||||
table_ident,
|
||||
region: 1,
|
||||
rx,
|
||||
};
|
||||
common_runtime::spawn_bg(async move {
|
||||
task.run().await;
|
||||
});
|
||||
|
||||
async fn deadline(tx: &mpsc::Sender<CountdownCommand>) -> Instant {
|
||||
let (s, r) = oneshot::channel();
|
||||
tx.send(CountdownCommand::Deadline(s)).await.unwrap();
|
||||
r.await.unwrap()
|
||||
}
|
||||
|
||||
// if countdown task is not started, its deadline is set to far future
|
||||
assert!(deadline(&tx).await > Instant::now() + Duration::from_secs(86400 * 365 * 29));
|
||||
|
||||
// start countdown in 250ms * 4 = 1s
|
||||
tx.send(CountdownCommand::Start(250)).await.unwrap();
|
||||
// assert deadline is correctly set
|
||||
assert!(deadline(&tx).await <= Instant::now() + Duration::from_secs(1));
|
||||
|
||||
// reset countdown in 1.5s
|
||||
tx.send(CountdownCommand::Reset(
|
||||
Instant::now() + Duration::from_millis(1500),
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// assert the table is closed after deadline is reached
|
||||
assert!(table_engine.table_exists(ctx, &table_ref));
|
||||
// spare 500ms for the task to close the table
|
||||
tokio::time::sleep(Duration::from_millis(2000)).await;
|
||||
assert!(!table_engine.table_exists(ctx, &table_ref));
|
||||
}
|
||||
}
|
||||
@@ -19,6 +19,7 @@ use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_catalog::consts::{INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_TABLE_NAME};
|
||||
use common_telemetry::logging;
|
||||
use snafu::ResultExt;
|
||||
use table::metadata::TableId;
|
||||
use table::{Table, TableRef};
|
||||
@@ -91,12 +92,21 @@ impl SystemCatalog {
|
||||
&self,
|
||||
request: &DeregisterTableRequest,
|
||||
table_id: TableId,
|
||||
) -> CatalogResult<bool> {
|
||||
) -> CatalogResult<()> {
|
||||
self.information_schema
|
||||
.system
|
||||
.delete(build_table_deletion_request(request, table_id))
|
||||
.await
|
||||
.map(|x| x == 1)
|
||||
.map(|x| {
|
||||
if x != 1 {
|
||||
let table = common_catalog::format_full_table_name(
|
||||
&request.catalog,
|
||||
&request.schema,
|
||||
&request.table_name
|
||||
);
|
||||
logging::warn!("Failed to delete table record from information_schema, unexpected returned result: {x}, table: {table}");
|
||||
}
|
||||
})
|
||||
.with_context(|_| error::DeregisterTableSnafu {
|
||||
request: request.clone(),
|
||||
})
|
||||
|
||||
@@ -19,20 +19,38 @@ mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use catalog::helper::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
|
||||
use catalog::remote::mock::{MockKvBackend, MockTableEngine};
|
||||
use catalog::remote::region_alive_keeper::RegionAliveKeepers;
|
||||
use catalog::remote::{
|
||||
CachedMetaKvBackend, KvBackend, KvBackendRef, RemoteCatalogManager, RemoteCatalogProvider,
|
||||
RemoteSchemaProvider,
|
||||
};
|
||||
use catalog::{CatalogManager, RegisterTableRequest};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MITO_ENGINE};
|
||||
use common_meta::ident::TableIdent;
|
||||
use datatypes::schema::RawSchema;
|
||||
use futures_util::StreamExt;
|
||||
use table::engine::manager::{MemoryTableEngineManager, TableEngineManagerRef};
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
use table::requests::CreateTableRequest;
|
||||
use table::test_util::EmptyTable;
|
||||
use tokio::time::Instant;
|
||||
|
||||
struct TestingComponents {
|
||||
kv_backend: KvBackendRef,
|
||||
catalog_manager: Arc<RemoteCatalogManager>,
|
||||
table_engine_manager: TableEngineManagerRef,
|
||||
region_alive_keepers: Arc<RegionAliveKeepers>,
|
||||
}
|
||||
|
||||
impl TestingComponents {
|
||||
fn table_engine(&self) -> TableEngineRef {
|
||||
self.table_engine_manager.engine(MITO_ENGINE).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_backend() {
|
||||
@@ -120,14 +138,7 @@ mod tests {
|
||||
assert!(ret.is_none());
|
||||
}
|
||||
|
||||
async fn prepare_components(
|
||||
node_id: u64,
|
||||
) -> (
|
||||
KvBackendRef,
|
||||
TableEngineRef,
|
||||
Arc<RemoteCatalogManager>,
|
||||
TableEngineManagerRef,
|
||||
) {
|
||||
async fn prepare_components(node_id: u64) -> TestingComponents {
|
||||
let cached_backend = Arc::new(CachedMetaKvBackend::wrap(
|
||||
Arc::new(MockKvBackend::default()),
|
||||
));
|
||||
@@ -135,26 +146,34 @@ mod tests {
|
||||
let table_engine = Arc::new(MockTableEngine::default());
|
||||
let engine_manager = Arc::new(MemoryTableEngineManager::alias(
|
||||
MITO_ENGINE.to_string(),
|
||||
table_engine.clone(),
|
||||
table_engine,
|
||||
));
|
||||
|
||||
let catalog_manager =
|
||||
RemoteCatalogManager::new(engine_manager.clone(), node_id, cached_backend.clone());
|
||||
let region_alive_keepers = Arc::new(RegionAliveKeepers::new(engine_manager.clone(), 5000));
|
||||
|
||||
let catalog_manager = RemoteCatalogManager::new(
|
||||
engine_manager.clone(),
|
||||
node_id,
|
||||
cached_backend.clone(),
|
||||
region_alive_keepers.clone(),
|
||||
);
|
||||
catalog_manager.start().await.unwrap();
|
||||
|
||||
(
|
||||
cached_backend,
|
||||
table_engine,
|
||||
Arc::new(catalog_manager),
|
||||
engine_manager as Arc<_>,
|
||||
)
|
||||
TestingComponents {
|
||||
kv_backend: cached_backend,
|
||||
catalog_manager: Arc::new(catalog_manager),
|
||||
table_engine_manager: engine_manager,
|
||||
region_alive_keepers,
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_remote_catalog_default() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let node_id = 42;
|
||||
let (_, _, catalog_manager, _) = prepare_components(node_id).await;
|
||||
let TestingComponents {
|
||||
catalog_manager, ..
|
||||
} = prepare_components(node_id).await;
|
||||
assert_eq!(
|
||||
vec![DEFAULT_CATALOG_NAME.to_string()],
|
||||
catalog_manager.catalog_names().await.unwrap()
|
||||
@@ -175,14 +194,16 @@ mod tests {
|
||||
async fn test_remote_catalog_register_nonexistent() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let node_id = 42;
|
||||
let (_, table_engine, catalog_manager, _) = prepare_components(node_id).await;
|
||||
let components = prepare_components(node_id).await;
|
||||
|
||||
// register a new table with an nonexistent catalog
|
||||
let catalog_name = "nonexistent_catalog".to_string();
|
||||
let schema_name = "nonexistent_schema".to_string();
|
||||
let table_name = "fail_table".to_string();
|
||||
// this schema has no effect
|
||||
let table_schema = RawSchema::new(vec![]);
|
||||
let table = table_engine
|
||||
let table = components
|
||||
.table_engine()
|
||||
.create_table(
|
||||
&EngineContext {},
|
||||
CreateTableRequest {
|
||||
@@ -208,7 +229,7 @@ mod tests {
|
||||
table_id: 1,
|
||||
table,
|
||||
};
|
||||
let res = catalog_manager.register_table(reg_req).await;
|
||||
let res = components.catalog_manager.register_table(reg_req).await;
|
||||
|
||||
// because nonexistent_catalog does not exist yet.
|
||||
assert_matches!(
|
||||
@@ -220,7 +241,8 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_register_table() {
|
||||
let node_id = 42;
|
||||
let (_, table_engine, catalog_manager, _) = prepare_components(node_id).await;
|
||||
let components = prepare_components(node_id).await;
|
||||
let catalog_manager = &components.catalog_manager;
|
||||
let default_catalog = catalog_manager
|
||||
.catalog(DEFAULT_CATALOG_NAME)
|
||||
.await
|
||||
@@ -244,7 +266,8 @@ mod tests {
|
||||
let table_id = 1;
|
||||
// this schema has no effect
|
||||
let table_schema = RawSchema::new(vec![]);
|
||||
let table = table_engine
|
||||
let table = components
|
||||
.table_engine()
|
||||
.create_table(
|
||||
&EngineContext {},
|
||||
CreateTableRequest {
|
||||
@@ -280,8 +303,10 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_register_catalog_schema_table() {
|
||||
let node_id = 42;
|
||||
let (backend, table_engine, catalog_manager, engine_manager) =
|
||||
prepare_components(node_id).await;
|
||||
let components = prepare_components(node_id).await;
|
||||
let backend = &components.kv_backend;
|
||||
let catalog_manager = components.catalog_manager.clone();
|
||||
let engine_manager = components.table_engine_manager.clone();
|
||||
|
||||
let catalog_name = "test_catalog".to_string();
|
||||
let schema_name = "nonexistent_schema".to_string();
|
||||
@@ -290,6 +315,7 @@ mod tests {
|
||||
backend.clone(),
|
||||
engine_manager.clone(),
|
||||
node_id,
|
||||
components.region_alive_keepers.clone(),
|
||||
));
|
||||
|
||||
// register catalog to catalog manager
|
||||
@@ -303,7 +329,8 @@ mod tests {
|
||||
HashSet::from_iter(catalog_manager.catalog_names().await.unwrap().into_iter())
|
||||
);
|
||||
|
||||
let table_to_register = table_engine
|
||||
let table_to_register = components
|
||||
.table_engine()
|
||||
.create_table(
|
||||
&EngineContext {},
|
||||
CreateTableRequest {
|
||||
@@ -350,6 +377,7 @@ mod tests {
|
||||
node_id,
|
||||
engine_manager,
|
||||
backend.clone(),
|
||||
components.region_alive_keepers.clone(),
|
||||
));
|
||||
|
||||
let prev = new_catalog
|
||||
@@ -369,4 +397,94 @@ mod tests {
|
||||
.collect()
|
||||
)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_register_table_before_and_after_region_alive_keeper_started() {
|
||||
let components = prepare_components(42).await;
|
||||
let catalog_manager = &components.catalog_manager;
|
||||
let region_alive_keepers = &components.region_alive_keepers;
|
||||
|
||||
let table_before = TableIdent {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table: "table_before".to_string(),
|
||||
table_id: 1,
|
||||
engine: MITO_ENGINE.to_string(),
|
||||
};
|
||||
let request = RegisterTableRequest {
|
||||
catalog: table_before.catalog.clone(),
|
||||
schema: table_before.schema.clone(),
|
||||
table_name: table_before.table.clone(),
|
||||
table_id: table_before.table_id,
|
||||
table: Arc::new(EmptyTable::new(CreateTableRequest {
|
||||
id: table_before.table_id,
|
||||
catalog_name: table_before.catalog.clone(),
|
||||
schema_name: table_before.schema.clone(),
|
||||
table_name: table_before.table.clone(),
|
||||
desc: None,
|
||||
schema: RawSchema::new(vec![]),
|
||||
region_numbers: vec![0],
|
||||
primary_key_indices: vec![],
|
||||
create_if_not_exists: false,
|
||||
table_options: Default::default(),
|
||||
engine: MITO_ENGINE.to_string(),
|
||||
})),
|
||||
};
|
||||
assert!(catalog_manager.register_table(request).await.unwrap());
|
||||
|
||||
let keeper = region_alive_keepers
|
||||
.find_keeper(&table_before)
|
||||
.await
|
||||
.unwrap();
|
||||
let deadline = keeper.deadline(0).await.unwrap();
|
||||
let far_future = Instant::now() + Duration::from_secs(86400 * 365 * 29);
|
||||
// assert region alive countdown is not started
|
||||
assert!(deadline > far_future);
|
||||
|
||||
region_alive_keepers.start().await;
|
||||
|
||||
let table_after = TableIdent {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table: "table_after".to_string(),
|
||||
table_id: 2,
|
||||
engine: MITO_ENGINE.to_string(),
|
||||
};
|
||||
let request = RegisterTableRequest {
|
||||
catalog: table_after.catalog.clone(),
|
||||
schema: table_after.schema.clone(),
|
||||
table_name: table_after.table.clone(),
|
||||
table_id: table_after.table_id,
|
||||
table: Arc::new(EmptyTable::new(CreateTableRequest {
|
||||
id: table_after.table_id,
|
||||
catalog_name: table_after.catalog.clone(),
|
||||
schema_name: table_after.schema.clone(),
|
||||
table_name: table_after.table.clone(),
|
||||
desc: None,
|
||||
schema: RawSchema::new(vec![]),
|
||||
region_numbers: vec![0],
|
||||
primary_key_indices: vec![],
|
||||
create_if_not_exists: false,
|
||||
table_options: Default::default(),
|
||||
engine: MITO_ENGINE.to_string(),
|
||||
})),
|
||||
};
|
||||
assert!(catalog_manager.register_table(request).await.unwrap());
|
||||
|
||||
let keeper = region_alive_keepers
|
||||
.find_keeper(&table_after)
|
||||
.await
|
||||
.unwrap();
|
||||
let deadline = keeper.deadline(0).await.unwrap();
|
||||
// assert countdown is started for the table registered after [RegionAliveKeepers] started
|
||||
assert!(deadline <= Instant::now() + Duration::from_secs(20));
|
||||
|
||||
let keeper = region_alive_keepers
|
||||
.find_keeper(&table_before)
|
||||
.await
|
||||
.unwrap();
|
||||
let deadline = keeper.deadline(0).await.unwrap();
|
||||
// assert countdown is started for the table registered before [RegionAliveKeepers] started, too
|
||||
assert!(deadline <= Instant::now() + Duration::from_secs(20));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,4 +52,4 @@ serde.workspace = true
|
||||
toml = "0.5"
|
||||
|
||||
[build-dependencies]
|
||||
build-data = "0.1.3"
|
||||
build-data = "0.1.4"
|
||||
|
||||
@@ -93,6 +93,8 @@ struct StartCommand {
|
||||
#[clap(long)]
|
||||
use_memory_store: bool,
|
||||
#[clap(long)]
|
||||
disable_region_failover: bool,
|
||||
#[clap(long)]
|
||||
http_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
http_timeout: Option<u64>,
|
||||
@@ -134,9 +136,9 @@ impl StartCommand {
|
||||
.context(error::UnsupportedSelectorTypeSnafu { selector_type })?;
|
||||
}
|
||||
|
||||
if self.use_memory_store {
|
||||
opts.use_memory_store = true;
|
||||
}
|
||||
opts.use_memory_store = self.use_memory_store;
|
||||
|
||||
opts.disable_region_failover = self.disable_region_failover;
|
||||
|
||||
if let Some(http_addr) = &self.http_addr {
|
||||
opts.http_opts.addr = http_addr.clone();
|
||||
|
||||
@@ -12,24 +12,26 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::future::Future;
|
||||
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use async_trait::async_trait;
|
||||
use datafusion::parquet::format::FileMetaData;
|
||||
use object_store::Writer;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use tokio::io::{AsyncWrite, AsyncWriteExt};
|
||||
use tokio_util::compat::Compat;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::share_buffer::SharedBuffer;
|
||||
|
||||
pub struct BufferedWriter<T, U> {
|
||||
writer: T,
|
||||
/// None stands for [`BufferedWriter`] closed.
|
||||
pub struct LazyBufferedWriter<T, U, F> {
|
||||
path: String,
|
||||
writer_factory: F,
|
||||
writer: Option<T>,
|
||||
/// None stands for [`LazyBufferedWriter`] closed.
|
||||
encoder: Option<U>,
|
||||
buffer: SharedBuffer,
|
||||
rows_written: usize,
|
||||
bytes_written: u64,
|
||||
flushed: bool,
|
||||
threshold: usize,
|
||||
}
|
||||
|
||||
@@ -42,58 +44,79 @@ pub trait ArrowWriterCloser {
|
||||
async fn close(mut self) -> Result<FileMetaData>;
|
||||
}
|
||||
|
||||
pub type DefaultBufferedWriter<E> = BufferedWriter<Compat<Writer>, E>;
|
||||
|
||||
impl<T: AsyncWrite + Send + Unpin, U: DfRecordBatchEncoder + ArrowWriterCloser>
|
||||
BufferedWriter<T, U>
|
||||
impl<
|
||||
T: AsyncWrite + Send + Unpin,
|
||||
U: DfRecordBatchEncoder + ArrowWriterCloser,
|
||||
F: FnMut(String) -> Fut,
|
||||
Fut: Future<Output = Result<T>>,
|
||||
> LazyBufferedWriter<T, U, F>
|
||||
{
|
||||
/// Closes `LazyBufferedWriter` and optionally flushes all data to underlying storage
|
||||
/// if any row's been written.
|
||||
pub async fn close_with_arrow_writer(mut self) -> Result<(FileMetaData, u64)> {
|
||||
let encoder = self
|
||||
.encoder
|
||||
.take()
|
||||
.context(error::BufferedWriterClosedSnafu)?;
|
||||
let metadata = encoder.close().await?;
|
||||
let written = self.try_flush(true).await?;
|
||||
|
||||
// Use `rows_written` to keep a track of if any rows have been written.
|
||||
// If no row's been written, then we can simply close the underlying
|
||||
// writer without flush so that no file will be actually created.
|
||||
if self.rows_written != 0 {
|
||||
self.bytes_written += self.try_flush(true).await?;
|
||||
}
|
||||
// It's important to shut down! flushes all pending writes
|
||||
self.close().await?;
|
||||
Ok((metadata, written))
|
||||
self.close_inner_writer().await?;
|
||||
Ok((metadata, self.bytes_written))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AsyncWrite + Send + Unpin, U: DfRecordBatchEncoder> BufferedWriter<T, U> {
|
||||
pub async fn close(&mut self) -> Result<()> {
|
||||
self.writer.shutdown().await.context(error::AsyncWriteSnafu)
|
||||
impl<
|
||||
T: AsyncWrite + Send + Unpin,
|
||||
U: DfRecordBatchEncoder,
|
||||
F: FnMut(String) -> Fut,
|
||||
Fut: Future<Output = Result<T>>,
|
||||
> LazyBufferedWriter<T, U, F>
|
||||
{
|
||||
/// Closes the writer without flushing the buffer data.
|
||||
pub async fn close_inner_writer(&mut self) -> Result<()> {
|
||||
if let Some(writer) = &mut self.writer {
|
||||
writer.shutdown().await.context(error::AsyncWriteSnafu)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn new(threshold: usize, buffer: SharedBuffer, encoder: U, writer: T) -> Self {
|
||||
pub fn new(
|
||||
threshold: usize,
|
||||
buffer: SharedBuffer,
|
||||
encoder: U,
|
||||
path: impl AsRef<str>,
|
||||
writer_factory: F,
|
||||
) -> Self {
|
||||
Self {
|
||||
path: path.as_ref().to_string(),
|
||||
threshold,
|
||||
writer,
|
||||
encoder: Some(encoder),
|
||||
buffer,
|
||||
rows_written: 0,
|
||||
bytes_written: 0,
|
||||
flushed: false,
|
||||
writer_factory,
|
||||
writer: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn bytes_written(&self) -> u64 {
|
||||
self.bytes_written
|
||||
}
|
||||
|
||||
pub async fn write(&mut self, batch: &RecordBatch) -> Result<()> {
|
||||
let encoder = self
|
||||
.encoder
|
||||
.as_mut()
|
||||
.context(error::BufferedWriterClosedSnafu)?;
|
||||
encoder.write(batch)?;
|
||||
self.try_flush(false).await?;
|
||||
self.rows_written += batch.num_rows();
|
||||
self.bytes_written += self.try_flush(false).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn flushed(&self) -> bool {
|
||||
self.flushed
|
||||
}
|
||||
|
||||
pub async fn try_flush(&mut self, all: bool) -> Result<u64> {
|
||||
let mut bytes_written: u64 = 0;
|
||||
|
||||
@@ -106,7 +129,8 @@ impl<T: AsyncWrite + Send + Unpin, U: DfRecordBatchEncoder> BufferedWriter<T, U>
|
||||
};
|
||||
let size = chunk.len();
|
||||
|
||||
self.writer
|
||||
self.maybe_init_writer()
|
||||
.await?
|
||||
.write_all(&chunk)
|
||||
.await
|
||||
.context(error::AsyncWriteSnafu)?;
|
||||
@@ -117,22 +141,27 @@ impl<T: AsyncWrite + Send + Unpin, U: DfRecordBatchEncoder> BufferedWriter<T, U>
|
||||
if all {
|
||||
bytes_written += self.try_flush_all().await?;
|
||||
}
|
||||
|
||||
self.flushed = bytes_written > 0;
|
||||
self.bytes_written += bytes_written;
|
||||
|
||||
Ok(bytes_written)
|
||||
}
|
||||
|
||||
/// Only initiates underlying file writer when rows have been written.
|
||||
async fn maybe_init_writer(&mut self) -> Result<&mut T> {
|
||||
if let Some(ref mut writer) = self.writer {
|
||||
Ok(writer)
|
||||
} else {
|
||||
let writer = (self.writer_factory)(self.path.clone()).await?;
|
||||
Ok(self.writer.insert(writer))
|
||||
}
|
||||
}
|
||||
|
||||
async fn try_flush_all(&mut self) -> Result<u64> {
|
||||
let remain = self.buffer.buffer.lock().unwrap().split();
|
||||
let size = remain.len();
|
||||
|
||||
self.writer
|
||||
self.maybe_init_writer()
|
||||
.await?
|
||||
.write_all(&remain)
|
||||
.await
|
||||
.context(error::AsyncWriteSnafu)?;
|
||||
|
||||
Ok(size as u64)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,12 +35,11 @@ use datafusion::physical_plan::SendableRecordBatchStream;
|
||||
use futures::StreamExt;
|
||||
use object_store::ObjectStore;
|
||||
use snafu::ResultExt;
|
||||
use tokio_util::compat::FuturesAsyncWriteCompatExt;
|
||||
|
||||
use self::csv::CsvFormat;
|
||||
use self::json::JsonFormat;
|
||||
use self::parquet::ParquetFormat;
|
||||
use crate::buffered_writer::{BufferedWriter, DfRecordBatchEncoder};
|
||||
use crate::buffered_writer::{DfRecordBatchEncoder, LazyBufferedWriter};
|
||||
use crate::compression::CompressionType;
|
||||
use crate::error::{self, Result};
|
||||
use crate::share_buffer::SharedBuffer;
|
||||
@@ -59,6 +58,16 @@ pub enum Format {
|
||||
Parquet(ParquetFormat),
|
||||
}
|
||||
|
||||
impl Format {
|
||||
pub fn suffix(&self) -> &'static str {
|
||||
match self {
|
||||
Format::Csv(_) => ".csv",
|
||||
Format::Json(_) => ".json",
|
||||
Format::Parquet(_) => ".parquet",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&HashMap<String, String>> for Format {
|
||||
type Error = error::Error;
|
||||
|
||||
@@ -181,15 +190,14 @@ pub async fn stream_to_file<T: DfRecordBatchEncoder, U: Fn(SharedBuffer) -> T>(
|
||||
threshold: usize,
|
||||
encoder_factory: U,
|
||||
) -> Result<usize> {
|
||||
let writer = store
|
||||
.writer(path)
|
||||
.await
|
||||
.context(error::WriteObjectSnafu { path })?
|
||||
.compat_write();
|
||||
|
||||
let buffer = SharedBuffer::with_capacity(threshold);
|
||||
let encoder = encoder_factory(buffer.clone());
|
||||
let mut writer = BufferedWriter::new(threshold, buffer, encoder, writer);
|
||||
let mut writer = LazyBufferedWriter::new(threshold, buffer, encoder, path, |path| async {
|
||||
store
|
||||
.writer(&path)
|
||||
.await
|
||||
.context(error::WriteObjectSnafu { path })
|
||||
});
|
||||
|
||||
let mut rows = 0;
|
||||
|
||||
@@ -201,8 +209,7 @@ pub async fn stream_to_file<T: DfRecordBatchEncoder, U: Fn(SharedBuffer) -> T>(
|
||||
|
||||
// Flushes all pending writes
|
||||
writer.try_flush(true).await?;
|
||||
|
||||
writer.close().await?;
|
||||
writer.close_inner_writer().await?;
|
||||
|
||||
Ok(rows)
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
api = { path = "../../api" }
|
||||
async-trait.workspace = true
|
||||
common-catalog = { path = "../catalog" }
|
||||
common-error = { path = "../error" }
|
||||
common-runtime = { path = "../runtime" }
|
||||
|
||||
@@ -52,6 +52,9 @@ pub enum Error {
|
||||
err_msg: String,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid protobuf message, err: {}", err_msg))]
|
||||
InvalidProtoMsg { err_msg: String, location: Location },
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -61,7 +64,10 @@ impl ErrorExt for Error {
|
||||
use Error::*;
|
||||
match self {
|
||||
IllegalServerState { .. } => StatusCode::Internal,
|
||||
SerdeJson { .. } | RouteInfoCorrupted { .. } => StatusCode::Unexpected,
|
||||
|
||||
SerdeJson { .. } | RouteInfoCorrupted { .. } | InvalidProtoMsg { .. } => {
|
||||
StatusCode::Unexpected
|
||||
}
|
||||
|
||||
SendMessage { .. } => StatusCode::Internal,
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::meta::HeartbeatResponse;
|
||||
use async_trait::async_trait;
|
||||
use common_telemetry::error;
|
||||
|
||||
use crate::error::Result;
|
||||
@@ -57,14 +58,16 @@ impl HeartbeatResponseHandlerContext {
|
||||
/// [`HeartbeatResponseHandler::is_acceptable`] returns true if handler can handle incoming [`HeartbeatResponseHandlerContext`].
|
||||
///
|
||||
/// [`HeartbeatResponseHandler::handle`] handles all or part of incoming [`HeartbeatResponseHandlerContext`].
|
||||
#[async_trait]
|
||||
pub trait HeartbeatResponseHandler: Send + Sync {
|
||||
fn is_acceptable(&self, ctx: &HeartbeatResponseHandlerContext) -> bool;
|
||||
|
||||
fn handle(&self, ctx: &mut HeartbeatResponseHandlerContext) -> Result<HandleControl>;
|
||||
async fn handle(&self, ctx: &mut HeartbeatResponseHandlerContext) -> Result<HandleControl>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait HeartbeatResponseHandlerExecutor: Send + Sync {
|
||||
fn handle(&self, ctx: HeartbeatResponseHandlerContext) -> Result<()>;
|
||||
async fn handle(&self, ctx: HeartbeatResponseHandlerContext) -> Result<()>;
|
||||
}
|
||||
|
||||
pub struct HandlerGroupExecutor {
|
||||
@@ -77,14 +80,15 @@ impl HandlerGroupExecutor {
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl HeartbeatResponseHandlerExecutor for HandlerGroupExecutor {
|
||||
fn handle(&self, mut ctx: HeartbeatResponseHandlerContext) -> Result<()> {
|
||||
async fn handle(&self, mut ctx: HeartbeatResponseHandlerContext) -> Result<()> {
|
||||
for handler in &self.handlers {
|
||||
if !handler.is_acceptable(&ctx) {
|
||||
continue;
|
||||
}
|
||||
|
||||
match handler.handle(&mut ctx) {
|
||||
match handler.handle(&mut ctx).await {
|
||||
Ok(HandleControl::Done) => break,
|
||||
Ok(HandleControl::Continue) => {}
|
||||
Err(e) => {
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::heartbeat::handler::{
|
||||
HandleControl, HeartbeatResponseHandler, HeartbeatResponseHandlerContext,
|
||||
@@ -21,12 +23,13 @@ use crate::heartbeat::utils::mailbox_message_to_incoming_message;
|
||||
#[derive(Default)]
|
||||
pub struct ParseMailboxMessageHandler;
|
||||
|
||||
#[async_trait]
|
||||
impl HeartbeatResponseHandler for ParseMailboxMessageHandler {
|
||||
fn is_acceptable(&self, _ctx: &HeartbeatResponseHandlerContext) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn handle(&self, ctx: &mut HeartbeatResponseHandlerContext) -> Result<HandleControl> {
|
||||
async fn handle(&self, ctx: &mut HeartbeatResponseHandlerContext) -> Result<HandleControl> {
|
||||
if let Some(message) = &ctx.response.mailbox_message {
|
||||
if message.payload.is_some() {
|
||||
// mailbox_message_to_incoming_message will raise an error if payload is none
|
||||
|
||||
71
src/common/meta/src/ident.rs
Normal file
71
src/common/meta/src/ident.rs
Normal file
@@ -0,0 +1,71 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
use api::v1::meta::{TableIdent as RawTableIdent, TableName};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::error::{Error, InvalidProtoMsgSnafu};
|
||||
|
||||
#[derive(Eq, Hash, PartialEq, Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct TableIdent {
|
||||
pub catalog: String,
|
||||
pub schema: String,
|
||||
pub table: String,
|
||||
pub table_id: u32,
|
||||
pub engine: String,
|
||||
}
|
||||
|
||||
impl Display for TableIdent {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"Table(id={}, name='{}.{}.{}', engine='{}')",
|
||||
self.table_id, self.catalog, self.schema, self.table, self.engine,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<RawTableIdent> for TableIdent {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(value: RawTableIdent) -> Result<Self, Self::Error> {
|
||||
let table_name = value.table_name.context(InvalidProtoMsgSnafu {
|
||||
err_msg: "'table_name' is missing in TableIdent",
|
||||
})?;
|
||||
Ok(Self {
|
||||
catalog: table_name.catalog_name,
|
||||
schema: table_name.schema_name,
|
||||
table: table_name.table_name,
|
||||
table_id: value.table_id,
|
||||
engine: value.engine,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TableIdent> for RawTableIdent {
|
||||
fn from(table_ident: TableIdent) -> Self {
|
||||
Self {
|
||||
table_id: table_ident.table_id,
|
||||
engine: table_ident.engine,
|
||||
table_name: Some(TableName {
|
||||
catalog_name: table_ident.catalog,
|
||||
schema_name: table_ident.schema,
|
||||
table_name: table_ident.table,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -16,6 +16,7 @@ use std::fmt::{Display, Formatter};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::ident::TableIdent;
|
||||
use crate::{ClusterId, DatanodeId};
|
||||
|
||||
#[derive(Eq, Hash, PartialEq, Clone, Debug, Serialize, Deserialize)]
|
||||
@@ -49,25 +50,6 @@ impl From<RegionIdent> for TableIdent {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Eq, Hash, PartialEq, Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct TableIdent {
|
||||
pub catalog: String,
|
||||
pub schema: String,
|
||||
pub table: String,
|
||||
pub table_id: u32,
|
||||
pub engine: String,
|
||||
}
|
||||
|
||||
impl Display for TableIdent {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"TableIdent(table_id='{}', table_name='{}.{}.{}', table_engine='{}')",
|
||||
self.table_id, self.catalog, self.schema, self.table, self.engine,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
|
||||
pub struct SimpleReply {
|
||||
pub result: bool,
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
pub mod error;
|
||||
pub mod heartbeat;
|
||||
pub mod ident;
|
||||
pub mod instruction;
|
||||
pub mod key;
|
||||
pub mod peer;
|
||||
|
||||
@@ -20,6 +20,7 @@ mod udf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
pub use expr::build_filter_from_timestamp;
|
||||
|
||||
pub use self::accumulator::{Accumulator, AggregateFunctionCreator, AggregateFunctionCreatorRef};
|
||||
pub use self::expr::{DfExpr, Expr};
|
||||
@@ -28,7 +29,6 @@ pub use self::udf::ScalarUdf;
|
||||
use crate::function::{ReturnTypeFunction, ScalarFunctionImplementation};
|
||||
use crate::logical_plan::accumulator::*;
|
||||
use crate::signature::{Signature, Volatility};
|
||||
|
||||
/// Creates a new UDF with a specific signature and specific return type.
|
||||
/// This is a helper function to create a new UDF.
|
||||
/// The function `create_udf` returns a subset of all possible `ScalarFunction`:
|
||||
|
||||
@@ -12,7 +12,12 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_time::range::TimestampRange;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::Timestamp;
|
||||
use datafusion_common::{Column, ScalarValue};
|
||||
pub use datafusion_expr::expr::Expr as DfExpr;
|
||||
use datafusion_expr::{and, binary_expr, Operator};
|
||||
|
||||
/// Central struct of query API.
|
||||
/// Represent logical expressions such as `A + 1`, or `CAST(c1 AS int)`.
|
||||
@@ -33,6 +38,54 @@ impl From<DfExpr> for Expr {
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds an `Expr` that filters timestamp column from given timestamp range.
|
||||
/// Returns [None] if time range is [None] or full time range.
|
||||
pub fn build_filter_from_timestamp(
|
||||
ts_col_name: &str,
|
||||
time_range: Option<&TimestampRange>,
|
||||
) -> Option<Expr> {
|
||||
let Some(time_range) = time_range else { return None; };
|
||||
let ts_col_expr = DfExpr::Column(Column {
|
||||
relation: None,
|
||||
name: ts_col_name.to_string(),
|
||||
});
|
||||
|
||||
let df_expr = match (time_range.start(), time_range.end()) {
|
||||
(None, None) => None,
|
||||
(Some(start), None) => Some(binary_expr(
|
||||
ts_col_expr,
|
||||
Operator::GtEq,
|
||||
timestamp_to_literal(start),
|
||||
)),
|
||||
(None, Some(end)) => Some(binary_expr(
|
||||
ts_col_expr,
|
||||
Operator::Lt,
|
||||
timestamp_to_literal(end),
|
||||
)),
|
||||
(Some(start), Some(end)) => Some(and(
|
||||
binary_expr(
|
||||
ts_col_expr.clone(),
|
||||
Operator::GtEq,
|
||||
timestamp_to_literal(start),
|
||||
),
|
||||
binary_expr(ts_col_expr, Operator::Lt, timestamp_to_literal(end)),
|
||||
)),
|
||||
};
|
||||
|
||||
df_expr.map(Expr::from)
|
||||
}
|
||||
|
||||
/// Converts a [Timestamp] to datafusion literal value.
|
||||
fn timestamp_to_literal(timestamp: &Timestamp) -> DfExpr {
|
||||
let scalar_value = match timestamp.unit() {
|
||||
TimeUnit::Second => ScalarValue::TimestampSecond(Some(timestamp.value()), None),
|
||||
TimeUnit::Millisecond => ScalarValue::TimestampMillisecond(Some(timestamp.value()), None),
|
||||
TimeUnit::Microsecond => ScalarValue::TimestampMicrosecond(Some(timestamp.value()), None),
|
||||
TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(Some(timestamp.value()), None),
|
||||
};
|
||||
DfExpr::Literal(scalar_value)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -52,6 +52,12 @@ impl From<i32> for Date {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<NaiveDate> for Date {
|
||||
fn from(date: NaiveDate) -> Self {
|
||||
Self(date.num_days_from_ce() - UNIX_EPOCH_FROM_CE)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Date {
|
||||
/// [Date] is formatted according to ISO-8601 standard.
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
|
||||
@@ -14,6 +14,8 @@
|
||||
|
||||
use std::fmt::{Debug, Display, Formatter};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::timestamp::TimeUnit;
|
||||
use crate::timestamp_millis::TimestampMillis;
|
||||
use crate::Timestamp;
|
||||
@@ -23,7 +25,7 @@ use crate::Timestamp;
|
||||
/// The range contains values that `value >= start` and `val < end`.
|
||||
///
|
||||
/// The range is empty iff `start == end == "the default value of T"`
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub struct GenericRange<T> {
|
||||
start: Option<T>,
|
||||
end: Option<T>,
|
||||
@@ -522,4 +524,25 @@ mod tests {
|
||||
);
|
||||
assert!(range.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_timestamp_range() {
|
||||
macro_rules! test_serde_for_unit {
|
||||
($($unit: expr),*) => {
|
||||
$(
|
||||
let original_range = TimestampRange::with_unit(0, 10, $unit).unwrap();
|
||||
let string = serde_json::to_string(&original_range).unwrap();
|
||||
let deserialized: TimestampRange = serde_json::from_str(&string).unwrap();
|
||||
assert_eq!(original_range, deserialized);
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
test_serde_for_unit!(
|
||||
TimeUnit::Second,
|
||||
TimeUnit::Millisecond,
|
||||
TimeUnit::Microsecond,
|
||||
TimeUnit::Nanosecond
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -192,8 +192,8 @@ impl Default for WalConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
dir: None,
|
||||
file_size: ReadableSize::gb(1), // log file size 1G
|
||||
purge_threshold: ReadableSize::gb(50), // purge threshold 50G
|
||||
file_size: ReadableSize::mb(256), // log file size 256MB
|
||||
purge_threshold: ReadableSize::gb(4), // purge threshold 4GB
|
||||
purge_interval: Duration::from_secs(600),
|
||||
read_batch_size: 128,
|
||||
sync_write: false,
|
||||
|
||||
@@ -17,6 +17,7 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::meta::{HeartbeatRequest, NodeStat, Peer};
|
||||
use catalog::remote::region_alive_keeper::RegionAliveKeepers;
|
||||
use catalog::{datanode_stat, CatalogManagerRef};
|
||||
use common_meta::heartbeat::handler::{
|
||||
HeartbeatResponseHandlerContext, HeartbeatResponseHandlerExecutorRef,
|
||||
@@ -29,12 +30,14 @@ use snafu::ResultExt;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::time::Instant;
|
||||
|
||||
use crate::datanode::DatanodeOptions;
|
||||
use crate::error::{self, MetaClientInitSnafu, Result};
|
||||
|
||||
pub(crate) mod handler;
|
||||
|
||||
pub struct HeartbeatTask {
|
||||
node_id: u64,
|
||||
node_epoch: u64,
|
||||
server_addr: String,
|
||||
server_hostname: Option<String>,
|
||||
running: Arc<AtomicBool>,
|
||||
@@ -42,6 +45,7 @@ pub struct HeartbeatTask {
|
||||
catalog_manager: CatalogManagerRef,
|
||||
interval: u64,
|
||||
resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
|
||||
region_alive_keepers: Arc<RegionAliveKeepers>,
|
||||
}
|
||||
|
||||
impl Drop for HeartbeatTask {
|
||||
@@ -54,21 +58,25 @@ impl HeartbeatTask {
|
||||
/// Create a new heartbeat task instance.
|
||||
pub fn new(
|
||||
node_id: u64,
|
||||
server_addr: String,
|
||||
server_hostname: Option<String>,
|
||||
opts: &DatanodeOptions,
|
||||
meta_client: Arc<MetaClient>,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
|
||||
heartbeat_interval_millis: u64,
|
||||
region_alive_keepers: Arc<RegionAliveKeepers>,
|
||||
) -> Self {
|
||||
Self {
|
||||
node_id,
|
||||
server_addr,
|
||||
server_hostname,
|
||||
// We use datanode's start time millis as the node's epoch.
|
||||
node_epoch: common_time::util::current_time_millis() as u64,
|
||||
server_addr: opts.rpc_addr.clone(),
|
||||
server_hostname: opts.rpc_hostname.clone(),
|
||||
running: Arc::new(AtomicBool::new(false)),
|
||||
meta_client,
|
||||
catalog_manager,
|
||||
interval: 5_000, // default interval is set to 5 secs
|
||||
interval: heartbeat_interval_millis,
|
||||
resp_handler_executor,
|
||||
region_alive_keepers,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -94,7 +102,7 @@ impl HeartbeatTask {
|
||||
}
|
||||
|
||||
let ctx = HeartbeatResponseHandlerContext::new(mailbox.clone(), res);
|
||||
if let Err(e) = Self::handle_response(ctx, handler_executor.clone()) {
|
||||
if let Err(e) = Self::handle_response(ctx, handler_executor.clone()).await {
|
||||
error!(e; "Error while handling heartbeat response");
|
||||
}
|
||||
if !running.load(Ordering::Acquire) {
|
||||
@@ -106,13 +114,14 @@ impl HeartbeatTask {
|
||||
Ok(tx)
|
||||
}
|
||||
|
||||
fn handle_response(
|
||||
async fn handle_response(
|
||||
ctx: HeartbeatResponseHandlerContext,
|
||||
handler_executor: HeartbeatResponseHandlerExecutorRef,
|
||||
) -> Result<()> {
|
||||
trace!("heartbeat response: {:?}", ctx.response);
|
||||
handler_executor
|
||||
.handle(ctx)
|
||||
.await
|
||||
.context(error::HandleHeartbeatResponseSnafu)
|
||||
}
|
||||
|
||||
@@ -128,9 +137,12 @@ impl HeartbeatTask {
|
||||
}
|
||||
let interval = self.interval;
|
||||
let node_id = self.node_id;
|
||||
let node_epoch = self.node_epoch;
|
||||
let addr = resolve_addr(&self.server_addr, &self.server_hostname);
|
||||
info!("Starting heartbeat to Metasrv with interval {interval}. My node id is {node_id}, address is {addr}.");
|
||||
|
||||
self.region_alive_keepers.start().await;
|
||||
|
||||
let meta_client = self.meta_client.clone();
|
||||
let catalog_manager_clone = self.catalog_manager.clone();
|
||||
|
||||
@@ -147,6 +159,7 @@ impl HeartbeatTask {
|
||||
)
|
||||
.await?;
|
||||
|
||||
let epoch = self.region_alive_keepers.epoch();
|
||||
common_runtime::spawn_bg(async move {
|
||||
let sleep = tokio::time::sleep(Duration::from_millis(0));
|
||||
tokio::pin!(sleep);
|
||||
@@ -192,6 +205,8 @@ impl HeartbeatTask {
|
||||
..Default::default()
|
||||
}),
|
||||
region_stats,
|
||||
duration_since_epoch: (Instant::now() - epoch).as_millis() as u64,
|
||||
node_epoch,
|
||||
..Default::default()
|
||||
};
|
||||
sleep.as_mut().reset(Instant::now() + Duration::from_millis(interval));
|
||||
|
||||
@@ -14,15 +14,16 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use catalog::remote::region_alive_keeper::RegionAliveKeepers;
|
||||
use catalog::{CatalogManagerRef, DeregisterTableRequest};
|
||||
use common_catalog::format_full_table_name;
|
||||
use common_meta::error::Result as MetaResult;
|
||||
use common_meta::heartbeat::handler::{
|
||||
HandleControl, HeartbeatResponseHandler, HeartbeatResponseHandlerContext,
|
||||
};
|
||||
use common_meta::instruction::{
|
||||
Instruction, InstructionReply, RegionIdent, SimpleReply, TableIdent,
|
||||
};
|
||||
use common_meta::instruction::{Instruction, InstructionReply, SimpleReply};
|
||||
use common_meta::RegionIdent;
|
||||
use common_telemetry::{error, info, warn};
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::RegionNumber;
|
||||
@@ -36,8 +37,10 @@ use crate::error::{self, Result};
|
||||
pub struct CloseRegionHandler {
|
||||
catalog_manager: CatalogManagerRef,
|
||||
table_engine_manager: TableEngineManagerRef,
|
||||
region_alive_keepers: Arc<RegionAliveKeepers>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl HeartbeatResponseHandler for CloseRegionHandler {
|
||||
fn is_acceptable(&self, ctx: &HeartbeatResponseHandlerContext) -> bool {
|
||||
matches!(
|
||||
@@ -46,35 +49,15 @@ impl HeartbeatResponseHandler for CloseRegionHandler {
|
||||
)
|
||||
}
|
||||
|
||||
fn handle(&self, ctx: &mut HeartbeatResponseHandlerContext) -> MetaResult<HandleControl> {
|
||||
async fn handle(&self, ctx: &mut HeartbeatResponseHandlerContext) -> MetaResult<HandleControl> {
|
||||
let Some((meta, Instruction::CloseRegion(region_ident))) = ctx.incoming_message.take() else {
|
||||
unreachable!("CloseRegionHandler: should be guarded by 'is_acceptable'");
|
||||
};
|
||||
|
||||
let mailbox = ctx.mailbox.clone();
|
||||
let self_ref = Arc::new(self.clone());
|
||||
|
||||
let RegionIdent {
|
||||
table_ident:
|
||||
TableIdent {
|
||||
engine,
|
||||
catalog,
|
||||
schema,
|
||||
table,
|
||||
..
|
||||
},
|
||||
region_number,
|
||||
..
|
||||
} = region_ident;
|
||||
|
||||
common_runtime::spawn_bg(async move {
|
||||
let result = self_ref
|
||||
.close_region_inner(
|
||||
engine,
|
||||
&TableReference::full(&catalog, &schema, &table),
|
||||
vec![region_number],
|
||||
)
|
||||
.await;
|
||||
let result = self_ref.close_region_inner(region_ident).await;
|
||||
|
||||
if let Err(e) = mailbox
|
||||
.send((meta, CloseRegionHandler::map_result(result)))
|
||||
@@ -92,10 +75,12 @@ impl CloseRegionHandler {
|
||||
pub fn new(
|
||||
catalog_manager: CatalogManagerRef,
|
||||
table_engine_manager: TableEngineManagerRef,
|
||||
region_alive_keepers: Arc<RegionAliveKeepers>,
|
||||
) -> Self {
|
||||
Self {
|
||||
catalog_manager,
|
||||
table_engine_manager,
|
||||
region_alive_keepers,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -151,20 +136,21 @@ impl CloseRegionHandler {
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn close_region_inner(
|
||||
&self,
|
||||
engine: String,
|
||||
table_ref: &TableReference<'_>,
|
||||
region_numbers: Vec<RegionNumber>,
|
||||
) -> Result<bool> {
|
||||
let engine =
|
||||
self.table_engine_manager
|
||||
.engine(&engine)
|
||||
.context(error::TableEngineNotFoundSnafu {
|
||||
engine_name: &engine,
|
||||
})?;
|
||||
async fn close_region_inner(&self, region_ident: RegionIdent) -> Result<bool> {
|
||||
let table_ident = ®ion_ident.table_ident;
|
||||
let engine_name = &table_ident.engine;
|
||||
let engine = self
|
||||
.table_engine_manager
|
||||
.engine(engine_name)
|
||||
.context(error::TableEngineNotFoundSnafu { engine_name })?;
|
||||
let ctx = EngineContext::default();
|
||||
|
||||
let table_ref = &TableReference::full(
|
||||
&table_ident.catalog,
|
||||
&table_ident.schema,
|
||||
&table_ident.table,
|
||||
);
|
||||
let region_numbers = vec![region_ident.region_number];
|
||||
if self
|
||||
.regions_closed(
|
||||
table_ref.catalog,
|
||||
@@ -202,7 +188,15 @@ impl CloseRegionHandler {
|
||||
})? {
|
||||
CloseTableResult::NotFound | CloseTableResult::Released(_) => {
|
||||
// Deregister table if The table released.
|
||||
self.deregister_table(table_ref).await
|
||||
let deregistered = self.deregister_table(table_ref).await?;
|
||||
|
||||
if deregistered {
|
||||
self.region_alive_keepers
|
||||
.deregister_table(table_ident)
|
||||
.await;
|
||||
}
|
||||
|
||||
Ok(deregistered)
|
||||
}
|
||||
CloseTableResult::PartialClosed(regions) => {
|
||||
// Requires caller to update the region_numbers
|
||||
@@ -210,6 +204,11 @@ impl CloseRegionHandler {
|
||||
"Close partial regions: {:?} in table: {}",
|
||||
regions, table_ref
|
||||
);
|
||||
|
||||
self.region_alive_keepers
|
||||
.deregister_region(®ion_ident)
|
||||
.await;
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
};
|
||||
|
||||
@@ -14,16 +14,16 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use catalog::error::Error as CatalogError;
|
||||
use catalog::remote::region_alive_keeper::RegionAliveKeepers;
|
||||
use catalog::{CatalogManagerRef, RegisterTableRequest};
|
||||
use common_catalog::format_full_table_name;
|
||||
use common_meta::error::Result as MetaResult;
|
||||
use common_meta::heartbeat::handler::{
|
||||
HandleControl, HeartbeatResponseHandler, HeartbeatResponseHandlerContext,
|
||||
};
|
||||
use common_meta::instruction::{
|
||||
Instruction, InstructionReply, RegionIdent, SimpleReply, TableIdent,
|
||||
};
|
||||
use common_meta::instruction::{Instruction, InstructionReply, SimpleReply};
|
||||
use common_telemetry::{error, warn};
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::RegionNumber;
|
||||
@@ -37,8 +37,10 @@ use crate::error::{self, Result};
|
||||
pub struct OpenRegionHandler {
|
||||
catalog_manager: CatalogManagerRef,
|
||||
table_engine_manager: TableEngineManagerRef,
|
||||
region_alive_keepers: Arc<RegionAliveKeepers>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl HeartbeatResponseHandler for OpenRegionHandler {
|
||||
fn is_acceptable(&self, ctx: &HeartbeatResponseHandlerContext) -> bool {
|
||||
matches!(
|
||||
@@ -47,7 +49,7 @@ impl HeartbeatResponseHandler for OpenRegionHandler {
|
||||
)
|
||||
}
|
||||
|
||||
fn handle(&self, ctx: &mut HeartbeatResponseHandlerContext) -> MetaResult<HandleControl> {
|
||||
async fn handle(&self, ctx: &mut HeartbeatResponseHandlerContext) -> MetaResult<HandleControl> {
|
||||
let Some((meta, Instruction::OpenRegion(region_ident))) = ctx.incoming_message.take() else {
|
||||
unreachable!("OpenRegionHandler: should be guarded by 'is_acceptable'");
|
||||
};
|
||||
@@ -55,9 +57,24 @@ impl HeartbeatResponseHandler for OpenRegionHandler {
|
||||
let mailbox = ctx.mailbox.clone();
|
||||
let self_ref = Arc::new(self.clone());
|
||||
|
||||
let region_alive_keepers = self.region_alive_keepers.clone();
|
||||
common_runtime::spawn_bg(async move {
|
||||
let (engine, request) = OpenRegionHandler::prepare_request(region_ident);
|
||||
let result = self_ref.open_region_inner(engine, request).await;
|
||||
let table_ident = ®ion_ident.table_ident;
|
||||
let request = OpenTableRequest {
|
||||
catalog_name: table_ident.catalog.clone(),
|
||||
schema_name: table_ident.schema.clone(),
|
||||
table_name: table_ident.table.clone(),
|
||||
table_id: table_ident.table_id,
|
||||
region_numbers: vec![region_ident.region_number],
|
||||
};
|
||||
let result = self_ref
|
||||
.open_region_inner(table_ident.engine.clone(), request)
|
||||
.await;
|
||||
|
||||
if matches!(result, Ok(true)) {
|
||||
region_alive_keepers.register_region(®ion_ident).await;
|
||||
}
|
||||
|
||||
if let Err(e) = mailbox
|
||||
.send((meta, OpenRegionHandler::map_result(result)))
|
||||
.await
|
||||
@@ -73,10 +90,12 @@ impl OpenRegionHandler {
|
||||
pub fn new(
|
||||
catalog_manager: CatalogManagerRef,
|
||||
table_engine_manager: TableEngineManagerRef,
|
||||
region_alive_keepers: Arc<RegionAliveKeepers>,
|
||||
) -> Self {
|
||||
Self {
|
||||
catalog_manager,
|
||||
table_engine_manager,
|
||||
region_alive_keepers,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -97,32 +116,6 @@ impl OpenRegionHandler {
|
||||
)
|
||||
}
|
||||
|
||||
fn prepare_request(ident: RegionIdent) -> (String, OpenTableRequest) {
|
||||
let RegionIdent {
|
||||
table_ident:
|
||||
TableIdent {
|
||||
catalog,
|
||||
schema,
|
||||
table,
|
||||
table_id,
|
||||
engine,
|
||||
},
|
||||
region_number,
|
||||
..
|
||||
} = ident;
|
||||
|
||||
(
|
||||
engine,
|
||||
OpenTableRequest {
|
||||
catalog_name: catalog,
|
||||
schema_name: schema,
|
||||
table_name: table,
|
||||
table_id,
|
||||
region_numbers: vec![region_number],
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns true if a table or target regions have been opened.
|
||||
async fn regions_opened(
|
||||
&self,
|
||||
|
||||
@@ -18,7 +18,8 @@ use std::time::Duration;
|
||||
use std::{fs, path};
|
||||
|
||||
use api::v1::meta::Role;
|
||||
use catalog::remote::CachedMetaKvBackend;
|
||||
use catalog::remote::region_alive_keeper::RegionAliveKeepers;
|
||||
use catalog::remote::{CachedMetaKvBackend, RemoteCatalogManager};
|
||||
use catalog::{CatalogManager, CatalogManagerRef, RegisterTableRequest};
|
||||
use common_base::paths::{CLUSTER_DIR, WAL_DIR};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
|
||||
@@ -56,9 +57,9 @@ use table::Table;
|
||||
|
||||
use crate::datanode::{DatanodeOptions, ObjectStoreConfig, ProcedureConfig, WalConfig};
|
||||
use crate::error::{
|
||||
self, CatalogSnafu, MetaClientInitSnafu, MissingMetasrvOptsSnafu, MissingNodeIdSnafu,
|
||||
NewCatalogSnafu, OpenLogStoreSnafu, RecoverProcedureSnafu, Result, ShutdownInstanceSnafu,
|
||||
StartProcedureManagerSnafu, StopProcedureManagerSnafu,
|
||||
self, CatalogSnafu, IncorrectInternalStateSnafu, MetaClientInitSnafu, MissingMetasrvOptsSnafu,
|
||||
MissingNodeIdSnafu, NewCatalogSnafu, OpenLogStoreSnafu, RecoverProcedureSnafu, Result,
|
||||
ShutdownInstanceSnafu, StartProcedureManagerSnafu, StopProcedureManagerSnafu,
|
||||
};
|
||||
use crate::heartbeat::handler::close_region::CloseRegionHandler;
|
||||
use crate::heartbeat::handler::open_region::OpenRegionHandler;
|
||||
@@ -150,7 +151,7 @@ impl Instance {
|
||||
);
|
||||
|
||||
// create remote catalog manager
|
||||
let (catalog_manager, table_id_provider) = match opts.mode {
|
||||
let (catalog_manager, table_id_provider, heartbeat_task) = match opts.mode {
|
||||
Mode::Standalone => {
|
||||
if opts.enable_memory_catalog {
|
||||
let catalog = Arc::new(catalog::local::MemoryCatalogManager::default());
|
||||
@@ -170,6 +171,7 @@ impl Instance {
|
||||
(
|
||||
catalog.clone() as CatalogManagerRef,
|
||||
Some(catalog as TableIdProviderRef),
|
||||
None,
|
||||
)
|
||||
} else {
|
||||
let catalog = Arc::new(
|
||||
@@ -181,51 +183,64 @@ impl Instance {
|
||||
(
|
||||
catalog.clone() as CatalogManagerRef,
|
||||
Some(catalog as TableIdProviderRef),
|
||||
None,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
Mode::Distributed => {
|
||||
let kv_backend = Arc::new(CachedMetaKvBackend::new(
|
||||
meta_client.as_ref().unwrap().clone(),
|
||||
let meta_client = meta_client.context(IncorrectInternalStateSnafu {
|
||||
state: "meta client is not provided when creating distributed Datanode",
|
||||
})?;
|
||||
|
||||
let kv_backend = Arc::new(CachedMetaKvBackend::new(meta_client.clone()));
|
||||
|
||||
let heartbeat_interval_millis = 5000;
|
||||
|
||||
let region_alive_keepers = Arc::new(RegionAliveKeepers::new(
|
||||
engine_manager.clone(),
|
||||
heartbeat_interval_millis,
|
||||
));
|
||||
|
||||
let catalog = Arc::new(catalog::remote::RemoteCatalogManager::new(
|
||||
let catalog_manager = Arc::new(RemoteCatalogManager::new(
|
||||
engine_manager.clone(),
|
||||
opts.node_id.context(MissingNodeIdSnafu)?,
|
||||
kv_backend,
|
||||
region_alive_keepers.clone(),
|
||||
));
|
||||
(catalog as CatalogManagerRef, None)
|
||||
|
||||
let handlers_executor = HandlerGroupExecutor::new(vec![
|
||||
Arc::new(ParseMailboxMessageHandler::default()),
|
||||
Arc::new(OpenRegionHandler::new(
|
||||
catalog_manager.clone(),
|
||||
engine_manager.clone(),
|
||||
region_alive_keepers.clone(),
|
||||
)),
|
||||
Arc::new(CloseRegionHandler::new(
|
||||
catalog_manager.clone(),
|
||||
engine_manager.clone(),
|
||||
region_alive_keepers.clone(),
|
||||
)),
|
||||
region_alive_keepers.clone(),
|
||||
]);
|
||||
|
||||
let heartbeat_task = Some(HeartbeatTask::new(
|
||||
opts.node_id.context(MissingNodeIdSnafu)?,
|
||||
opts,
|
||||
meta_client,
|
||||
catalog_manager.clone(),
|
||||
Arc::new(handlers_executor),
|
||||
heartbeat_interval_millis,
|
||||
region_alive_keepers,
|
||||
));
|
||||
|
||||
(catalog_manager as CatalogManagerRef, None, heartbeat_task)
|
||||
}
|
||||
};
|
||||
|
||||
let factory = QueryEngineFactory::new(catalog_manager.clone(), false);
|
||||
let query_engine = factory.query_engine();
|
||||
|
||||
let handlers_executor = HandlerGroupExecutor::new(vec![
|
||||
Arc::new(ParseMailboxMessageHandler::default()),
|
||||
Arc::new(OpenRegionHandler::new(
|
||||
catalog_manager.clone(),
|
||||
engine_manager.clone(),
|
||||
)),
|
||||
Arc::new(CloseRegionHandler::new(
|
||||
catalog_manager.clone(),
|
||||
engine_manager.clone(),
|
||||
)),
|
||||
]);
|
||||
|
||||
let heartbeat_task = match opts.mode {
|
||||
Mode::Standalone => None,
|
||||
Mode::Distributed => Some(HeartbeatTask::new(
|
||||
opts.node_id.context(MissingNodeIdSnafu)?,
|
||||
opts.rpc_addr.clone(),
|
||||
opts.rpc_hostname.clone(),
|
||||
meta_client.as_ref().unwrap().clone(),
|
||||
catalog_manager.clone(),
|
||||
Arc::new(handlers_executor),
|
||||
)),
|
||||
};
|
||||
|
||||
let procedure_manager =
|
||||
create_procedure_manager(opts.node_id.unwrap_or(0), &opts.procedure, object_store)
|
||||
.await?;
|
||||
@@ -354,7 +369,7 @@ impl Instance {
|
||||
fn create_compaction_scheduler<S: LogStore>(opts: &DatanodeOptions) -> CompactionSchedulerRef<S> {
|
||||
let picker = SimplePicker::default();
|
||||
let config = SchedulerConfig::from(opts);
|
||||
let handler = CompactionHandler::new(picker);
|
||||
let handler = CompactionHandler { picker };
|
||||
let scheduler = LocalScheduler::new(config, handler);
|
||||
Arc::new(scheduler)
|
||||
}
|
||||
|
||||
@@ -228,6 +228,22 @@ pub fn table_idents_to_full_name(
|
||||
}
|
||||
}
|
||||
|
||||
pub fn idents_to_full_database_name(
|
||||
obj_name: &ObjectName,
|
||||
query_ctx: &QueryContextRef,
|
||||
) -> Result<(String, String)> {
|
||||
match &obj_name.0[..] {
|
||||
[database] => Ok((query_ctx.current_catalog(), database.value.clone())),
|
||||
[catalog, database] => Ok((catalog.value.clone(), database.value.clone())),
|
||||
_ => error::InvalidSqlSnafu {
|
||||
msg: format!(
|
||||
"expect database name to be <catalog>.<database>, <database>, found: {obj_name}",
|
||||
),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl SqlStatementExecutor for Instance {
|
||||
async fn execute_sql(
|
||||
|
||||
@@ -14,26 +14,29 @@
|
||||
|
||||
use std::assert_matches::assert_matches;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::greptime_request::Request as GrpcRequest;
|
||||
use api::v1::meta::HeartbeatResponse;
|
||||
use api::v1::query_request::Query;
|
||||
use api::v1::QueryRequest;
|
||||
use catalog::remote::region_alive_keeper::RegionAliveKeepers;
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_meta::heartbeat::handler::{
|
||||
HandlerGroupExecutor, HeartbeatResponseHandlerContext, HeartbeatResponseHandlerExecutor,
|
||||
};
|
||||
use common_meta::heartbeat::mailbox::{HeartbeatMailbox, MessageMeta};
|
||||
use common_meta::instruction::{
|
||||
Instruction, InstructionReply, RegionIdent, SimpleReply, TableIdent,
|
||||
};
|
||||
use common_meta::ident::TableIdent;
|
||||
use common_meta::instruction::{Instruction, InstructionReply, RegionIdent, SimpleReply};
|
||||
use common_query::Output;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use servers::query_handler::grpc::GrpcQueryHandler;
|
||||
use session::context::QueryContext;
|
||||
use table::engine::manager::TableEngineManagerRef;
|
||||
use table::TableRef;
|
||||
use test_util::MockInstance;
|
||||
use tokio::sync::mpsc::{self, Receiver};
|
||||
use tokio::time::Instant;
|
||||
|
||||
use crate::heartbeat::handler::close_region::CloseRegionHandler;
|
||||
use crate::heartbeat::handler::open_region::OpenRegionHandler;
|
||||
@@ -61,7 +64,11 @@ async fn test_close_region_handler() {
|
||||
} = prepare_handler_test("test_close_region_handler").await;
|
||||
|
||||
let executor = Arc::new(HandlerGroupExecutor::new(vec![Arc::new(
|
||||
CloseRegionHandler::new(catalog_manager_ref.clone(), engine_manager_ref.clone()),
|
||||
CloseRegionHandler::new(
|
||||
catalog_manager_ref.clone(),
|
||||
engine_manager_ref.clone(),
|
||||
Arc::new(RegionAliveKeepers::new(engine_manager_ref.clone(), 5000)),
|
||||
),
|
||||
)]));
|
||||
|
||||
prepare_table(instance.inner()).await;
|
||||
@@ -71,7 +78,8 @@ async fn test_close_region_handler() {
|
||||
executor.clone(),
|
||||
mailbox.clone(),
|
||||
close_region_instruction(),
|
||||
);
|
||||
)
|
||||
.await;
|
||||
let (_, reply) = rx.recv().await.unwrap();
|
||||
assert_matches!(
|
||||
reply,
|
||||
@@ -85,7 +93,8 @@ async fn test_close_region_handler() {
|
||||
executor.clone(),
|
||||
mailbox.clone(),
|
||||
close_region_instruction(),
|
||||
);
|
||||
)
|
||||
.await;
|
||||
let (_, reply) = rx.recv().await.unwrap();
|
||||
assert_matches!(
|
||||
reply,
|
||||
@@ -108,7 +117,8 @@ async fn test_close_region_handler() {
|
||||
cluster_id: 1,
|
||||
datanode_id: 2,
|
||||
}),
|
||||
);
|
||||
)
|
||||
.await;
|
||||
let (_, reply) = rx.recv().await.unwrap();
|
||||
assert_matches!(
|
||||
reply,
|
||||
@@ -127,56 +137,81 @@ async fn test_open_region_handler() {
|
||||
..
|
||||
} = prepare_handler_test("test_open_region_handler").await;
|
||||
|
||||
let region_alive_keepers = Arc::new(RegionAliveKeepers::new(engine_manager_ref.clone(), 5000));
|
||||
region_alive_keepers.start().await;
|
||||
|
||||
let executor = Arc::new(HandlerGroupExecutor::new(vec![
|
||||
Arc::new(OpenRegionHandler::new(
|
||||
catalog_manager_ref.clone(),
|
||||
engine_manager_ref.clone(),
|
||||
region_alive_keepers.clone(),
|
||||
)),
|
||||
Arc::new(CloseRegionHandler::new(
|
||||
catalog_manager_ref.clone(),
|
||||
engine_manager_ref.clone(),
|
||||
region_alive_keepers.clone(),
|
||||
)),
|
||||
]));
|
||||
|
||||
prepare_table(instance.inner()).await;
|
||||
let instruction = open_region_instruction();
|
||||
let Instruction::OpenRegion(region_ident) = instruction.clone() else { unreachable!() };
|
||||
let table_ident = ®ion_ident.table_ident;
|
||||
|
||||
let table = prepare_table(instance.inner()).await;
|
||||
region_alive_keepers
|
||||
.register_table(table_ident.clone(), table)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Opens a opened table
|
||||
handle_instruction(executor.clone(), mailbox.clone(), open_region_instruction());
|
||||
handle_instruction(executor.clone(), mailbox.clone(), instruction.clone()).await;
|
||||
let (_, reply) = rx.recv().await.unwrap();
|
||||
assert_matches!(
|
||||
reply,
|
||||
InstructionReply::OpenRegion(SimpleReply { result: true, .. })
|
||||
);
|
||||
|
||||
let keeper = region_alive_keepers.find_keeper(table_ident).await.unwrap();
|
||||
let deadline = keeper.deadline(0).await.unwrap();
|
||||
assert!(deadline <= Instant::now() + Duration::from_secs(20));
|
||||
|
||||
// Opens a non-exist table
|
||||
let non_exist_table_ident = TableIdent {
|
||||
catalog: "greptime".to_string(),
|
||||
schema: "public".to_string(),
|
||||
table: "non-exist".to_string(),
|
||||
table_id: 2024,
|
||||
engine: "mito".to_string(),
|
||||
};
|
||||
handle_instruction(
|
||||
executor.clone(),
|
||||
mailbox.clone(),
|
||||
Instruction::OpenRegion(RegionIdent {
|
||||
table_ident: TableIdent {
|
||||
catalog: "greptime".to_string(),
|
||||
schema: "public".to_string(),
|
||||
table: "non-exist".to_string(),
|
||||
table_id: 2024,
|
||||
engine: "mito".to_string(),
|
||||
},
|
||||
table_ident: non_exist_table_ident.clone(),
|
||||
region_number: 0,
|
||||
cluster_id: 1,
|
||||
datanode_id: 2,
|
||||
}),
|
||||
);
|
||||
)
|
||||
.await;
|
||||
let (_, reply) = rx.recv().await.unwrap();
|
||||
assert_matches!(
|
||||
reply,
|
||||
InstructionReply::OpenRegion(SimpleReply { result: false, .. })
|
||||
);
|
||||
|
||||
assert!(region_alive_keepers
|
||||
.find_keeper(&non_exist_table_ident)
|
||||
.await
|
||||
.is_none());
|
||||
|
||||
// Closes demo table
|
||||
handle_instruction(
|
||||
executor.clone(),
|
||||
mailbox.clone(),
|
||||
close_region_instruction(),
|
||||
);
|
||||
)
|
||||
.await;
|
||||
let (_, reply) = rx.recv().await.unwrap();
|
||||
assert_matches!(
|
||||
reply,
|
||||
@@ -184,8 +219,13 @@ async fn test_open_region_handler() {
|
||||
);
|
||||
assert_test_table_not_found(instance.inner()).await;
|
||||
|
||||
assert!(region_alive_keepers
|
||||
.find_keeper(table_ident)
|
||||
.await
|
||||
.is_none());
|
||||
|
||||
// Opens demo table
|
||||
handle_instruction(executor.clone(), mailbox.clone(), open_region_instruction());
|
||||
handle_instruction(executor.clone(), mailbox.clone(), instruction).await;
|
||||
let (_, reply) = rx.recv().await.unwrap();
|
||||
assert_matches!(
|
||||
reply,
|
||||
@@ -220,7 +260,7 @@ pub fn test_message_meta(id: u64, subject: &str, to: &str, from: &str) -> Messag
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_instruction(
|
||||
async fn handle_instruction(
|
||||
executor: Arc<dyn HeartbeatResponseHandlerExecutor>,
|
||||
mailbox: Arc<HeartbeatMailbox>,
|
||||
instruction: Instruction,
|
||||
@@ -229,7 +269,7 @@ fn handle_instruction(
|
||||
let mut ctx: HeartbeatResponseHandlerContext =
|
||||
HeartbeatResponseHandlerContext::new(mailbox, response);
|
||||
ctx.incoming_message = Some((test_message_meta(1, "hi", "foo", "bar"), instruction));
|
||||
executor.handle(ctx).unwrap();
|
||||
executor.handle(ctx).await.unwrap();
|
||||
}
|
||||
|
||||
fn close_region_instruction() -> Instruction {
|
||||
@@ -262,10 +302,10 @@ fn open_region_instruction() -> Instruction {
|
||||
})
|
||||
}
|
||||
|
||||
async fn prepare_table(instance: &Instance) {
|
||||
async fn prepare_table(instance: &Instance) -> TableRef {
|
||||
test_util::create_test_table(instance, ConcreteDataType::timestamp_millisecond_datatype())
|
||||
.await
|
||||
.unwrap();
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
async fn assert_test_table_not_found(instance: &Instance) {
|
||||
|
||||
@@ -22,6 +22,7 @@ use servers::Mode;
|
||||
use snafu::ResultExt;
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
use table::requests::{CreateTableRequest, TableOptions};
|
||||
use table::TableRef;
|
||||
|
||||
use crate::datanode::{
|
||||
DatanodeOptions, FileConfig, ObjectStoreConfig, ProcedureConfig, StorageConfig, WalConfig,
|
||||
@@ -84,7 +85,7 @@ fn create_tmp_dir_and_datanode_opts(name: &str) -> (DatanodeOptions, TestGuard)
|
||||
pub(crate) async fn create_test_table(
|
||||
instance: &Instance,
|
||||
ts_type: ConcreteDataType,
|
||||
) -> Result<()> {
|
||||
) -> Result<TableRef> {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("host", ConcreteDataType::string_datatype(), true),
|
||||
ColumnSchema::new("cpu", ConcreteDataType::float64_datatype(), true),
|
||||
@@ -125,8 +126,8 @@ pub(crate) async fn create_test_table(
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
schema_provider
|
||||
.register_table(table_name.to_string(), table)
|
||||
.register_table(table_name.to_string(), table.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
Ok(())
|
||||
Ok(table)
|
||||
}
|
||||
|
||||
@@ -183,6 +183,12 @@ impl ConcreteDataType {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&ConcreteDataType> for ConcreteDataType {
|
||||
fn from(t: &ConcreteDataType) -> Self {
|
||||
t.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&ArrowDataType> for ConcreteDataType {
|
||||
type Error = Error;
|
||||
|
||||
|
||||
@@ -248,7 +248,7 @@ impl Value {
|
||||
Value::Binary(v) => ScalarValue::LargeBinary(Some(v.to_vec())),
|
||||
Value::Date(v) => ScalarValue::Date32(Some(v.val())),
|
||||
Value::DateTime(v) => ScalarValue::Date64(Some(v.val())),
|
||||
Value::Null => to_null_value(output_type),
|
||||
Value::Null => to_null_scalar_value(output_type),
|
||||
Value::List(list) => {
|
||||
// Safety: The logical type of the value and output_type are the same.
|
||||
let list_type = output_type.as_list().unwrap();
|
||||
@@ -261,7 +261,7 @@ impl Value {
|
||||
}
|
||||
}
|
||||
|
||||
fn to_null_value(output_type: &ConcreteDataType) -> ScalarValue {
|
||||
pub fn to_null_scalar_value(output_type: &ConcreteDataType) -> ScalarValue {
|
||||
match output_type {
|
||||
ConcreteDataType::Null(_) => ScalarValue::Null,
|
||||
ConcreteDataType::Boolean(_) => ScalarValue::Boolean(None),
|
||||
@@ -285,7 +285,7 @@ fn to_null_value(output_type: &ConcreteDataType) -> ScalarValue {
|
||||
}
|
||||
ConcreteDataType::Dictionary(dict) => ScalarValue::Dictionary(
|
||||
Box::new(dict.key_type().as_arrow_type()),
|
||||
Box::new(to_null_value(dict.value_type())),
|
||||
Box::new(to_null_scalar_value(dict.value_type())),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,6 +29,7 @@ common-meta = { path = "../common/meta" }
|
||||
common-recordbatch = { path = "../common/recordbatch" }
|
||||
common-runtime = { path = "../common/runtime" }
|
||||
common-telemetry = { path = "../common/telemetry" }
|
||||
common-time = { path = "../common/time" }
|
||||
datafusion.workspace = true
|
||||
datafusion-common.workspace = true
|
||||
datafusion-expr.workspace = true
|
||||
|
||||
@@ -279,6 +279,13 @@ pub enum Error {
|
||||
source: query::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to read table: {table_name}, source: {source}"))]
|
||||
ReadTable {
|
||||
table_name: String,
|
||||
#[snafu(backtrace)]
|
||||
source: query::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to execute logical plan, source: {}", source))]
|
||||
ExecLogicalPlan {
|
||||
#[snafu(backtrace)]
|
||||
@@ -363,13 +370,22 @@ pub enum Error {
|
||||
},
|
||||
|
||||
// TODO(ruihang): merge all query execution error kinds
|
||||
#[snafu(display("failed to execute PromQL query {}, source: {}", query, source))]
|
||||
#[snafu(display("Failed to execute PromQL query {}, source: {}", query, source))]
|
||||
ExecutePromql {
|
||||
query: String,
|
||||
#[snafu(backtrace)]
|
||||
source: servers::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to create logical plan for prometheus query, source: {}",
|
||||
source
|
||||
))]
|
||||
PrometheusRemoteQueryPlan {
|
||||
#[snafu(backtrace)]
|
||||
source: servers::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to describe schema for given statement, source: {}", source))]
|
||||
DescribeStatement {
|
||||
#[snafu(backtrace)]
|
||||
@@ -532,6 +548,13 @@ pub enum Error {
|
||||
#[snafu(backtrace)]
|
||||
source: query::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid COPY parameter, key: {}, value: {}", key, value))]
|
||||
InvalidCopyParameter {
|
||||
key: String,
|
||||
value: String,
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -559,7 +582,8 @@ impl ErrorExt for Error {
|
||||
Error::HandleHeartbeatResponse { source, .. } => source.status_code(),
|
||||
|
||||
Error::RuntimeResource { source, .. } => source.status_code(),
|
||||
Error::ExecutePromql { source, .. } => source.status_code(),
|
||||
Error::PrometheusRemoteQueryPlan { source, .. }
|
||||
| Error::ExecutePromql { source, .. } => source.status_code(),
|
||||
|
||||
Error::SqlExecIntercepted { source, .. } => source.status_code(),
|
||||
Error::StartServer { source, .. } => source.status_code(),
|
||||
@@ -621,6 +645,7 @@ impl ErrorExt for Error {
|
||||
Error::ExecuteStatement { source, .. }
|
||||
| Error::PlanStatement { source }
|
||||
| Error::ParseQuery { source }
|
||||
| Error::ReadTable { source, .. }
|
||||
| Error::ExecLogicalPlan { source }
|
||||
| Error::DescribeStatement { source } => source.status_code(),
|
||||
|
||||
@@ -649,6 +674,7 @@ impl ErrorExt for Error {
|
||||
| Error::BuildBackend { source } => source.status_code(),
|
||||
|
||||
Error::WriteParquet { source, .. } => source.status_code(),
|
||||
Error::InvalidCopyParameter { .. } => StatusCode::InvalidArguments,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -84,7 +84,7 @@ impl HeartbeatTask {
|
||||
Ok(Some(resp)) => {
|
||||
debug!("Receiving heartbeat response: {:?}", resp);
|
||||
let ctx = HeartbeatResponseHandlerContext::new(mailbox.clone(), resp);
|
||||
if let Err(e) = capture_self.handle_response(ctx) {
|
||||
if let Err(e) = capture_self.handle_response(ctx).await {
|
||||
error!(e; "Error while handling heartbeat response");
|
||||
}
|
||||
}
|
||||
@@ -153,9 +153,10 @@ impl HeartbeatTask {
|
||||
});
|
||||
}
|
||||
|
||||
fn handle_response(&self, ctx: HeartbeatResponseHandlerContext) -> Result<()> {
|
||||
async fn handle_response(&self, ctx: HeartbeatResponseHandlerContext) -> Result<()> {
|
||||
self.resp_handler_executor
|
||||
.handle(ctx)
|
||||
.await
|
||||
.context(error::HandleHeartbeatResponseSnafu)
|
||||
}
|
||||
|
||||
|
||||
@@ -12,13 +12,15 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use async_trait::async_trait;
|
||||
use catalog::helper::TableGlobalKey;
|
||||
use catalog::remote::KvCacheInvalidatorRef;
|
||||
use common_meta::error::Result as MetaResult;
|
||||
use common_meta::heartbeat::handler::{
|
||||
HandleControl, HeartbeatResponseHandler, HeartbeatResponseHandlerContext,
|
||||
};
|
||||
use common_meta::instruction::{Instruction, InstructionReply, SimpleReply, TableIdent};
|
||||
use common_meta::ident::TableIdent;
|
||||
use common_meta::instruction::{Instruction, InstructionReply, SimpleReply};
|
||||
use common_meta::table_name::TableName;
|
||||
use common_telemetry::{error, info};
|
||||
use partition::manager::TableRouteCacheInvalidatorRef;
|
||||
@@ -29,6 +31,7 @@ pub struct InvalidateTableCacheHandler {
|
||||
table_route_cache_invalidator: TableRouteCacheInvalidatorRef,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl HeartbeatResponseHandler for InvalidateTableCacheHandler {
|
||||
fn is_acceptable(&self, ctx: &HeartbeatResponseHandlerContext) -> bool {
|
||||
matches!(
|
||||
@@ -37,7 +40,7 @@ impl HeartbeatResponseHandler for InvalidateTableCacheHandler {
|
||||
)
|
||||
}
|
||||
|
||||
fn handle(&self, ctx: &mut HeartbeatResponseHandlerContext) -> MetaResult<HandleControl> {
|
||||
async fn handle(&self, ctx: &mut HeartbeatResponseHandlerContext) -> MetaResult<HandleControl> {
|
||||
// TODO(weny): considers introducing a macro
|
||||
let Some((meta, Instruction::InvalidateTableCache(table_ident))) = ctx.incoming_message.take() else {
|
||||
unreachable!("InvalidateTableCacheHandler: should be guarded by 'is_acceptable'");
|
||||
|
||||
@@ -23,7 +23,8 @@ use common_meta::heartbeat::handler::{
|
||||
HandlerGroupExecutor, HeartbeatResponseHandlerContext, HeartbeatResponseHandlerExecutor,
|
||||
};
|
||||
use common_meta::heartbeat::mailbox::{HeartbeatMailbox, MessageMeta};
|
||||
use common_meta::instruction::{Instruction, InstructionReply, SimpleReply, TableIdent};
|
||||
use common_meta::ident::TableIdent;
|
||||
use common_meta::instruction::{Instruction, InstructionReply, SimpleReply};
|
||||
use common_meta::table_name::TableName;
|
||||
use partition::manager::TableRouteCacheInvalidator;
|
||||
use tokio::sync::mpsc;
|
||||
@@ -89,7 +90,8 @@ async fn test_invalidate_table_cache_handler() {
|
||||
table_id: 0,
|
||||
engine: "mito".to_string(),
|
||||
}),
|
||||
);
|
||||
)
|
||||
.await;
|
||||
|
||||
let (_, reply) = rx.recv().await.unwrap();
|
||||
assert_matches!(
|
||||
@@ -125,7 +127,8 @@ async fn test_invalidate_table_cache_handler() {
|
||||
table_id: 0,
|
||||
engine: "mito".to_string(),
|
||||
}),
|
||||
);
|
||||
)
|
||||
.await;
|
||||
|
||||
let (_, reply) = rx.recv().await.unwrap();
|
||||
assert_matches!(
|
||||
@@ -143,7 +146,7 @@ pub fn test_message_meta(id: u64, subject: &str, to: &str, from: &str) -> Messag
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_instruction(
|
||||
async fn handle_instruction(
|
||||
executor: Arc<dyn HeartbeatResponseHandlerExecutor>,
|
||||
mailbox: Arc<HeartbeatMailbox>,
|
||||
instruction: Instruction,
|
||||
@@ -152,5 +155,5 @@ fn handle_instruction(
|
||||
let mut ctx: HeartbeatResponseHandlerContext =
|
||||
HeartbeatResponseHandlerContext::new(mailbox, response);
|
||||
ctx.incoming_message = Some((test_message_meta(1, "hi", "foo", "bar"), instruction));
|
||||
executor.handle(ctx).unwrap();
|
||||
executor.handle(ctx).await.unwrap();
|
||||
}
|
||||
|
||||
@@ -53,7 +53,9 @@ use meta_client::MetaClientOptions;
|
||||
use partition::manager::PartitionRuleManager;
|
||||
use partition::route::TableRoutes;
|
||||
use query::parser::{PromQuery, QueryLanguageParser, QueryStatement};
|
||||
use query::plan::LogicalPlan;
|
||||
use query::query_engine::options::{validate_catalog_and_schema, QueryOptions};
|
||||
use query::query_engine::DescribeResult;
|
||||
use query::{QueryEngineFactory, QueryEngineRef};
|
||||
use servers::error as server_error;
|
||||
use servers::error::{ExecuteQuerySnafu, ParsePromQLSnafu};
|
||||
@@ -73,8 +75,9 @@ use sql::statements::statement::Statement;
|
||||
|
||||
use crate::catalog::FrontendCatalogManager;
|
||||
use crate::error::{
|
||||
self, Error, ExecutePromqlSnafu, ExternalSnafu, InvalidInsertRequestSnafu,
|
||||
MissingMetasrvOptsSnafu, ParseSqlSnafu, PlanStatementSnafu, Result, SqlExecInterceptedSnafu,
|
||||
self, Error, ExecLogicalPlanSnafu, ExecutePromqlSnafu, ExternalSnafu,
|
||||
InvalidInsertRequestSnafu, MissingMetasrvOptsSnafu, ParseSqlSnafu, PlanStatementSnafu, Result,
|
||||
SqlExecInterceptedSnafu,
|
||||
};
|
||||
use crate::expr_factory::{CreateExprFactoryRef, DefaultCreateExprFactory};
|
||||
use crate::frontend::FrontendOptions;
|
||||
@@ -506,6 +509,14 @@ impl SqlQueryHandler for Instance {
|
||||
}
|
||||
}
|
||||
|
||||
async fn do_exec_plan(&self, plan: LogicalPlan, query_ctx: QueryContextRef) -> Result<Output> {
|
||||
let _timer = timer!(metrics::METRIC_EXEC_PLAN_ELAPSED);
|
||||
self.query_engine
|
||||
.execute(plan, query_ctx)
|
||||
.await
|
||||
.context(ExecLogicalPlanSnafu)
|
||||
}
|
||||
|
||||
async fn do_promql_query(
|
||||
&self,
|
||||
query: &PromQuery,
|
||||
@@ -523,8 +534,11 @@ impl SqlQueryHandler for Instance {
|
||||
&self,
|
||||
stmt: Statement,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<Option<Schema>> {
|
||||
if let Statement::Query(_) = stmt {
|
||||
) -> Result<Option<DescribeResult>> {
|
||||
if matches!(
|
||||
stmt,
|
||||
Statement::Insert(_) | Statement::Query(_) | Statement::Delete(_)
|
||||
) {
|
||||
let plan = self
|
||||
.query_engine
|
||||
.planner()
|
||||
@@ -613,12 +627,15 @@ pub fn check_permission(
|
||||
Statement::DescribeTable(stmt) => {
|
||||
validate_param(stmt.name(), query_ctx)?;
|
||||
}
|
||||
Statement::Copy(stmd) => match stmd {
|
||||
Statement::Copy(sql::statements::copy::Copy::CopyTable(stmt)) => match stmt {
|
||||
CopyTable::To(copy_table_to) => validate_param(©_table_to.table_name, query_ctx)?,
|
||||
CopyTable::From(copy_table_from) => {
|
||||
validate_param(©_table_from.table_name, query_ctx)?
|
||||
}
|
||||
},
|
||||
Statement::Copy(sql::statements::copy::Copy::CopyDatabase(stmt)) => {
|
||||
validate_param(&stmt.database_name, query_ctx)?
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -14,9 +14,8 @@
|
||||
|
||||
use api::prometheus::remote::read_request::ResponseType;
|
||||
use api::prometheus::remote::{Query, QueryResult, ReadRequest, ReadResponse, WriteRequest};
|
||||
use api::v1::greptime_request::Request;
|
||||
use api::v1::{query_request, QueryRequest};
|
||||
use async_trait::async_trait;
|
||||
use common_catalog::format_full_table_name;
|
||||
use common_error::prelude::BoxedError;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::RecordBatches;
|
||||
@@ -25,11 +24,14 @@ use metrics::counter;
|
||||
use prost::Message;
|
||||
use servers::error::{self, Result as ServerResult};
|
||||
use servers::prometheus::{self, Metrics};
|
||||
use servers::query_handler::grpc::GrpcQueryHandler;
|
||||
use servers::query_handler::{PrometheusProtocolHandler, PrometheusResponse};
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{
|
||||
CatalogSnafu, ExecLogicalPlanSnafu, PrometheusRemoteQueryPlanSnafu, ReadTableSnafu, Result,
|
||||
TableNotFoundSnafu,
|
||||
};
|
||||
use crate::instance::Instance;
|
||||
use crate::metrics::PROMETHEUS_REMOTE_WRITE_SAMPLES;
|
||||
|
||||
@@ -75,6 +77,45 @@ async fn to_query_result(table_name: &str, output: Output) -> ServerResult<Query
|
||||
}
|
||||
|
||||
impl Instance {
|
||||
async fn handle_remote_query(
|
||||
&self,
|
||||
ctx: &QueryContextRef,
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
table_name: &str,
|
||||
query: &Query,
|
||||
) -> Result<Output> {
|
||||
let table = self
|
||||
.catalog_manager
|
||||
.table(catalog_name, schema_name, table_name)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
table_name: format_full_table_name(catalog_name, schema_name, table_name),
|
||||
})?;
|
||||
|
||||
let dataframe = self
|
||||
.query_engine
|
||||
.read_table(table)
|
||||
.with_context(|_| ReadTableSnafu {
|
||||
table_name: format_full_table_name(catalog_name, schema_name, table_name),
|
||||
})?;
|
||||
|
||||
let logical_plan =
|
||||
prometheus::query_to_plan(dataframe, query).context(PrometheusRemoteQueryPlanSnafu)?;
|
||||
|
||||
logging::debug!(
|
||||
"Prometheus remote read, table: {}, logical plan: {}",
|
||||
table_name,
|
||||
logical_plan.display_indent(),
|
||||
);
|
||||
|
||||
self.query_engine
|
||||
.execute(logical_plan, ctx.clone())
|
||||
.await
|
||||
.context(ExecLogicalPlanSnafu)
|
||||
}
|
||||
|
||||
async fn handle_remote_queries(
|
||||
&self,
|
||||
ctx: QueryContextRef,
|
||||
@@ -82,22 +123,19 @@ impl Instance {
|
||||
) -> ServerResult<Vec<(String, Output)>> {
|
||||
let mut results = Vec::with_capacity(queries.len());
|
||||
|
||||
for query in queries {
|
||||
let (table_name, sql) = prometheus::query_to_sql(query)?;
|
||||
logging::debug!(
|
||||
"prometheus remote read, table: {}, sql: {}",
|
||||
table_name,
|
||||
sql
|
||||
);
|
||||
let catalog_name = ctx.current_catalog();
|
||||
let schema_name = ctx.current_schema();
|
||||
|
||||
for query in queries {
|
||||
let table_name = prometheus::table_name(query)?;
|
||||
|
||||
let query = Request::Query(QueryRequest {
|
||||
query: Some(query_request::Query::Sql(sql.to_string())),
|
||||
});
|
||||
let output = self
|
||||
.do_query(query, ctx.clone())
|
||||
.handle_remote_query(&ctx, &catalog_name, &schema_name, &table_name, query)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ExecuteGrpcQuerySnafu)?;
|
||||
.with_context(|_| error::ExecuteQuerySnafu {
|
||||
query: format!("{query:#?}"),
|
||||
})?;
|
||||
|
||||
results.push((table_name, output));
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
pub(crate) const METRIC_HANDLE_SQL_ELAPSED: &str = "frontend.handle_sql_elapsed";
|
||||
pub(crate) const METRIC_EXEC_PLAN_ELAPSED: &str = "frontend.exec_plan_elapsed";
|
||||
pub(crate) const METRIC_HANDLE_SCRIPTS_ELAPSED: &str = "frontend.handle_scripts_elapsed";
|
||||
pub(crate) const METRIC_RUN_SCRIPT_ELAPSED: &str = "frontend.run_script_elapsed";
|
||||
|
||||
|
||||
@@ -12,32 +12,40 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod backup;
|
||||
mod copy_table_from;
|
||||
mod copy_table_to;
|
||||
mod describe;
|
||||
mod show;
|
||||
mod tql;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::str::FromStr;
|
||||
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_error::prelude::BoxedError;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::RecordBatches;
|
||||
use datanode::instance::sql::table_idents_to_full_name;
|
||||
use common_time::range::TimestampRange;
|
||||
use common_time::Timestamp;
|
||||
use datanode::instance::sql::{idents_to_full_database_name, table_idents_to_full_name};
|
||||
use query::parser::QueryStatement;
|
||||
use query::query_engine::SqlStatementExecutorRef;
|
||||
use query::QueryEngineRef;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use sql::statements::copy::{CopyTable, CopyTableArgument};
|
||||
use sql::statements::copy::{CopyDatabaseArgument, CopyTable, CopyTableArgument};
|
||||
use sql::statements::statement::Statement;
|
||||
use table::engine::TableReference;
|
||||
use table::requests::{CopyDirection, CopyTableRequest};
|
||||
use table::requests::{CopyDatabaseRequest, CopyDirection, CopyTableRequest};
|
||||
use table::TableRef;
|
||||
|
||||
use crate::error;
|
||||
use crate::error::{
|
||||
CatalogSnafu, ExecLogicalPlanSnafu, ExecuteStatementSnafu, ExternalSnafu, PlanStatementSnafu,
|
||||
Result, SchemaNotFoundSnafu, TableNotFoundSnafu,
|
||||
};
|
||||
use crate::statement::backup::{COPY_DATABASE_TIME_END_KEY, COPY_DATABASE_TIME_START_KEY};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct StatementExecutor {
|
||||
@@ -92,14 +100,23 @@ impl StatementExecutor {
|
||||
|
||||
Statement::ShowTables(stmt) => self.show_tables(stmt, query_ctx).await,
|
||||
|
||||
Statement::Copy(stmt) => {
|
||||
Statement::Copy(sql::statements::copy::Copy::CopyTable(stmt)) => {
|
||||
let req = to_copy_table_request(stmt, query_ctx)?;
|
||||
match req.direction {
|
||||
CopyDirection::Export => self.copy_table_to(req).await,
|
||||
CopyDirection::Import => self.copy_table_from(req).await,
|
||||
CopyDirection::Export => {
|
||||
self.copy_table_to(req).await.map(Output::AffectedRows)
|
||||
}
|
||||
CopyDirection::Import => {
|
||||
self.copy_table_from(req).await.map(Output::AffectedRows)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Statement::Copy(sql::statements::copy::Copy::CopyDatabase(arg)) => {
|
||||
self.copy_database(to_copy_database_request(arg, &query_ctx)?)
|
||||
.await
|
||||
}
|
||||
|
||||
Statement::CreateDatabase(_)
|
||||
| Statement::CreateTable(_)
|
||||
| Statement::CreateExternalTable(_)
|
||||
@@ -191,5 +208,47 @@ fn to_copy_table_request(stmt: CopyTable, query_ctx: QueryContextRef) -> Result<
|
||||
connection,
|
||||
pattern,
|
||||
direction,
|
||||
// we copy the whole table by default.
|
||||
timestamp_range: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Converts [CopyDatabaseArgument] to [CopyDatabaseRequest].
|
||||
/// This function extracts the necessary info including catalog/database name, time range, etc.
|
||||
fn to_copy_database_request(
|
||||
arg: CopyDatabaseArgument,
|
||||
query_ctx: &QueryContextRef,
|
||||
) -> Result<CopyDatabaseRequest> {
|
||||
let (catalog_name, database_name) = idents_to_full_database_name(&arg.database_name, query_ctx)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
|
||||
let start_timestamp = extract_timestamp(&arg.with, COPY_DATABASE_TIME_START_KEY)?;
|
||||
let end_timestamp = extract_timestamp(&arg.with, COPY_DATABASE_TIME_END_KEY)?;
|
||||
|
||||
let time_range = match (start_timestamp, end_timestamp) {
|
||||
(Some(start), Some(end)) => TimestampRange::new(start, end),
|
||||
(Some(start), None) => Some(TimestampRange::from_start(start)),
|
||||
(None, Some(end)) => Some(TimestampRange::until_end(end, false)), // exclusive end
|
||||
(None, None) => None,
|
||||
};
|
||||
|
||||
Ok(CopyDatabaseRequest {
|
||||
catalog_name,
|
||||
schema_name: database_name,
|
||||
location: arg.location,
|
||||
with: arg.with,
|
||||
connection: arg.connection,
|
||||
time_range,
|
||||
})
|
||||
}
|
||||
|
||||
/// Extracts timestamp from a [HashMap<String, String>] with given key.
|
||||
fn extract_timestamp(map: &HashMap<String, String>, key: &str) -> Result<Option<Timestamp>> {
|
||||
map.get(key)
|
||||
.map(|v| {
|
||||
Timestamp::from_str(v)
|
||||
.map_err(|_| error::InvalidCopyParameterSnafu { key, value: v }.build())
|
||||
})
|
||||
.transpose()
|
||||
}
|
||||
|
||||
97
src/frontend/src/statement/backup.rs
Normal file
97
src/frontend/src/statement/backup.rs
Normal file
@@ -0,0 +1,97 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_datasource::file_format::Format;
|
||||
use common_query::Output;
|
||||
use common_telemetry::info;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::requests::{CopyDatabaseRequest, CopyDirection, CopyTableRequest};
|
||||
|
||||
use crate::error;
|
||||
use crate::error::{
|
||||
CatalogNotFoundSnafu, CatalogSnafu, InvalidCopyParameterSnafu, SchemaNotFoundSnafu,
|
||||
};
|
||||
use crate::statement::StatementExecutor;
|
||||
|
||||
pub(crate) const COPY_DATABASE_TIME_START_KEY: &str = "start_time";
|
||||
pub(crate) const COPY_DATABASE_TIME_END_KEY: &str = "end_time";
|
||||
|
||||
impl StatementExecutor {
|
||||
pub(crate) async fn copy_database(&self, req: CopyDatabaseRequest) -> error::Result<Output> {
|
||||
// location must end with / so that every table is exported to a file.
|
||||
ensure!(
|
||||
req.location.ends_with('/'),
|
||||
InvalidCopyParameterSnafu {
|
||||
key: "location",
|
||||
value: req.location,
|
||||
}
|
||||
);
|
||||
|
||||
info!(
|
||||
"Copy database {}.{}, dir: {},. time: {:?}",
|
||||
req.catalog_name, req.schema_name, req.location, req.time_range
|
||||
);
|
||||
let schema = self
|
||||
.catalog_manager
|
||||
.catalog(&req.catalog_name)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.context(CatalogNotFoundSnafu {
|
||||
catalog_name: &req.catalog_name,
|
||||
})?
|
||||
.schema(&req.schema_name)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.context(SchemaNotFoundSnafu {
|
||||
schema_info: &req.schema_name,
|
||||
})?;
|
||||
|
||||
let suffix = Format::try_from(&req.with)
|
||||
.context(error::ParseFileFormatSnafu)?
|
||||
.suffix();
|
||||
|
||||
let table_names = schema.table_names().await.context(CatalogSnafu)?;
|
||||
|
||||
let mut exported_rows = 0;
|
||||
for table_name in table_names {
|
||||
// TODO(hl): remove this hardcode once we've removed numbers table.
|
||||
if table_name == "numbers" {
|
||||
continue;
|
||||
}
|
||||
let mut table_file = req.location.clone();
|
||||
table_file.push_str(&table_name);
|
||||
table_file.push_str(suffix);
|
||||
info!(
|
||||
"Copy table: {}.{}.{} to {}",
|
||||
req.catalog_name, req.schema_name, table_name, table_file
|
||||
);
|
||||
|
||||
let exported = self
|
||||
.copy_table_to(CopyTableRequest {
|
||||
catalog_name: req.catalog_name.clone(),
|
||||
schema_name: req.schema_name.clone(),
|
||||
table_name,
|
||||
location: table_file,
|
||||
with: req.with.clone(),
|
||||
connection: req.connection.clone(),
|
||||
pattern: None,
|
||||
direction: CopyDirection::Export,
|
||||
timestamp_range: req.time_range,
|
||||
})
|
||||
.await?;
|
||||
exported_rows += exported;
|
||||
}
|
||||
Ok(Output::AffectedRows(exported_rows))
|
||||
}
|
||||
}
|
||||
@@ -24,7 +24,6 @@ use common_datasource::file_format::{FileFormat, Format};
|
||||
use common_datasource::lister::{Lister, Source};
|
||||
use common_datasource::object_store::{build_backend, parse_url};
|
||||
use common_datasource::util::find_dir_and_filename;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::adapter::ParquetRecordBatchStreamAdapter;
|
||||
use common_recordbatch::DfSendableRecordBatchStream;
|
||||
use datafusion::datasource::listing::PartitionedFile;
|
||||
@@ -205,7 +204,7 @@ impl StatementExecutor {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn copy_table_from(&self, req: CopyTableRequest) -> Result<Output> {
|
||||
pub async fn copy_table_from(&self, req: CopyTableRequest) -> Result<usize> {
|
||||
let table_ref = TableReference {
|
||||
catalog: &req.catalog_name,
|
||||
schema: &req.schema_name,
|
||||
@@ -313,7 +312,7 @@ impl StatementExecutor {
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Output::AffectedRows(rows_inserted))
|
||||
Ok(rows_inserted)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -18,7 +18,6 @@ use common_datasource::file_format::json::stream_to_json;
|
||||
use common_datasource::file_format::Format;
|
||||
use common_datasource::object_store::{build_backend, parse_url};
|
||||
use common_query::physical_plan::SessionContext;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::adapter::DfRecordBatchStreamAdapter;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use object_store::ObjectStore;
|
||||
@@ -72,7 +71,7 @@ impl StatementExecutor {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn copy_table_to(&self, req: CopyTableRequest) -> Result<Output> {
|
||||
pub(crate) async fn copy_table_to(&self, req: CopyTableRequest) -> Result<usize> {
|
||||
let table_ref = TableReference {
|
||||
catalog: &req.catalog_name,
|
||||
schema: &req.schema_name,
|
||||
@@ -82,12 +81,25 @@ impl StatementExecutor {
|
||||
|
||||
let format = Format::try_from(&req.with).context(error::ParseFileFormatSnafu)?;
|
||||
|
||||
let stream = table
|
||||
.scan(None, &[], None)
|
||||
.await
|
||||
.with_context(|_| error::CopyTableSnafu {
|
||||
table_name: table_ref.to_string(),
|
||||
})?;
|
||||
let filters = table
|
||||
.schema()
|
||||
.timestamp_column()
|
||||
.and_then(|c| {
|
||||
common_query::logical_plan::build_filter_from_timestamp(
|
||||
&c.name,
|
||||
req.timestamp_range.as_ref(),
|
||||
)
|
||||
})
|
||||
.into_iter()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let stream =
|
||||
table
|
||||
.scan(None, &filters, None)
|
||||
.await
|
||||
.with_context(|_| error::CopyTableSnafu {
|
||||
table_name: table_ref.to_string(),
|
||||
})?;
|
||||
|
||||
let stream = stream
|
||||
.execute(0, SessionContext::default().task_ctx())
|
||||
@@ -101,6 +113,6 @@ impl StatementExecutor {
|
||||
.stream_to_file(stream, &format, object_store, &path)
|
||||
.await?;
|
||||
|
||||
Ok(Output::AffectedRows(rows_copied))
|
||||
Ok(rows_copied)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ common-error = { path = "../common/error" }
|
||||
common-grpc = { path = "../common/grpc" }
|
||||
common-telemetry = { path = "../common/telemetry" }
|
||||
common-meta = { path = "../common/meta" }
|
||||
etcd-client = "0.10"
|
||||
etcd-client = "0.11"
|
||||
rand.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
|
||||
@@ -755,16 +755,21 @@ mod tests {
|
||||
async fn test_batch_put() {
|
||||
let tc = new_client("test_batch_put").await;
|
||||
|
||||
let req = BatchPutRequest::new()
|
||||
.add_kv(tc.key("key"), b"value".to_vec())
|
||||
.add_kv(tc.key("key2"), b"value2".to_vec());
|
||||
let mut req = BatchPutRequest::new();
|
||||
for i in 0..275 {
|
||||
req = req.add_kv(
|
||||
tc.key(&format!("key-{}", i)),
|
||||
format!("value-{}", i).into_bytes(),
|
||||
);
|
||||
}
|
||||
|
||||
let res = tc.client.batch_put(req).await;
|
||||
assert_eq!(0, res.unwrap().take_prev_kvs().len());
|
||||
|
||||
let req = RangeRequest::new().with_range(tc.key("key"), tc.key("key3"));
|
||||
let req = RangeRequest::new().with_prefix(tc.key("key-"));
|
||||
let res = tc.client.range(req).await;
|
||||
let kvs = res.unwrap().take_kvs();
|
||||
assert_eq!(2, kvs.len());
|
||||
assert_eq!(275, kvs.len());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -772,16 +777,17 @@ mod tests {
|
||||
let tc = new_client("test_batch_get").await;
|
||||
tc.gen_data().await;
|
||||
|
||||
let req = BatchGetRequest::default()
|
||||
.add_key(tc.key("key-1"))
|
||||
.add_key(tc.key("key-2"));
|
||||
let mut req = BatchGetRequest::default();
|
||||
for i in 0..256 {
|
||||
req = req.add_key(tc.key(&format!("key-{}", i)));
|
||||
}
|
||||
let mut res = tc.client.batch_get(req).await.unwrap();
|
||||
|
||||
assert_eq!(2, res.take_kvs().len());
|
||||
assert_eq!(10, res.take_kvs().len());
|
||||
|
||||
let req = BatchGetRequest::default()
|
||||
.add_key(tc.key("key-1"))
|
||||
.add_key(tc.key("key-222"));
|
||||
.add_key(tc.key("key-999"));
|
||||
let mut res = tc.client.batch_get(req).await.unwrap();
|
||||
|
||||
assert_eq!(1, res.take_kvs().len());
|
||||
|
||||
@@ -24,7 +24,7 @@ common-telemetry = { path = "../common/telemetry" }
|
||||
common-time = { path = "../common/time" }
|
||||
dashmap = "5.4"
|
||||
derive_builder = "0.12"
|
||||
etcd-client = "0.10"
|
||||
etcd-client = "0.11"
|
||||
futures.workspace = true
|
||||
h2 = "0.3"
|
||||
http-body = "0.4"
|
||||
@@ -38,6 +38,7 @@ regex = "1.6"
|
||||
serde = "1.0"
|
||||
serde_json = "1.0"
|
||||
snafu.workspace = true
|
||||
store-api = { path = "../store-api" }
|
||||
table = { path = "../table" }
|
||||
tokio.workspace = true
|
||||
tokio-stream = { version = "0.1", features = ["net"] }
|
||||
|
||||
@@ -354,6 +354,12 @@ pub enum Error {
|
||||
source: common_meta::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to convert proto data, source: {}", source))]
|
||||
ConvertProtoData {
|
||||
location: Location,
|
||||
source: common_meta::error::Error,
|
||||
},
|
||||
|
||||
// this error is used for custom error mapping
|
||||
// please do not delete it
|
||||
#[snafu(display("Other error, source: {}", source))]
|
||||
@@ -442,7 +448,9 @@ impl ErrorExt for Error {
|
||||
Error::RegionFailoverCandidatesNotFound { .. } => StatusCode::RuntimeResourcesExhausted,
|
||||
|
||||
Error::RegisterProcedureLoader { source, .. } => source.status_code(),
|
||||
Error::TableRouteConversion { source, .. } => source.status_code(),
|
||||
Error::TableRouteConversion { source, .. } | Error::ConvertProtoData { source, .. } => {
|
||||
source.status_code()
|
||||
}
|
||||
Error::Other { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,18 +19,18 @@ use std::time::Duration;
|
||||
|
||||
use api::v1::meta::mailbox_message::Payload;
|
||||
use api::v1::meta::{
|
||||
HeartbeatRequest, HeartbeatResponse, MailboxMessage, RequestHeader, ResponseHeader, Role,
|
||||
PROTOCOL_VERSION,
|
||||
HeartbeatRequest, HeartbeatResponse, MailboxMessage, RegionLease, RequestHeader,
|
||||
ResponseHeader, Role, PROTOCOL_VERSION,
|
||||
};
|
||||
pub use check_leader_handler::CheckLeaderHandler;
|
||||
pub use collect_stats_handler::CollectStatsHandler;
|
||||
use common_meta::instruction::{Instruction, InstructionReply};
|
||||
use common_telemetry::{debug, info, warn};
|
||||
use common_telemetry::{debug, info, timer, warn};
|
||||
use dashmap::DashMap;
|
||||
pub use failure_handler::RegionFailureHandler;
|
||||
pub use keep_lease_handler::KeepLeaseHandler;
|
||||
use metrics::{decrement_gauge, increment_gauge};
|
||||
pub use on_leader_start::OnLeaderStartHandler;
|
||||
pub use on_leader_start_handler::OnLeaderStartHandler;
|
||||
pub use persist_stats_handler::PersistStatsHandler;
|
||||
pub use response_header_handler::ResponseHeaderHandler;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
@@ -40,7 +40,7 @@ use tokio::sync::{oneshot, Notify, RwLock};
|
||||
use self::node_stat::Stat;
|
||||
use crate::error::{self, DeserializeFromJsonSnafu, Result, UnexpectedInstructionReplySnafu};
|
||||
use crate::metasrv::Context;
|
||||
use crate::metrics::METRIC_META_HEARTBEAT_CONNECTION_NUM;
|
||||
use crate::metrics::{METRIC_META_HANDLER_EXECUTE, METRIC_META_HEARTBEAT_CONNECTION_NUM};
|
||||
use crate::sequence::Sequence;
|
||||
use crate::service::mailbox::{
|
||||
BroadcastChannel, Channel, Mailbox, MailboxReceiver, MailboxRef, MessageId,
|
||||
@@ -52,14 +52,21 @@ pub(crate) mod failure_handler;
|
||||
mod keep_lease_handler;
|
||||
pub mod mailbox_handler;
|
||||
pub mod node_stat;
|
||||
mod on_leader_start;
|
||||
mod on_leader_start_handler;
|
||||
mod persist_stats_handler;
|
||||
pub(crate) mod region_lease_handler;
|
||||
mod response_header_handler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait HeartbeatHandler: Send + Sync {
|
||||
fn is_acceptable(&self, role: Role) -> bool;
|
||||
|
||||
fn name(&self) -> &'static str {
|
||||
let type_name = std::any::type_name::<Self>();
|
||||
// short name
|
||||
type_name.split("::").last().unwrap_or(type_name)
|
||||
}
|
||||
|
||||
async fn handle(
|
||||
&self,
|
||||
req: &HeartbeatRequest,
|
||||
@@ -73,6 +80,7 @@ pub struct HeartbeatAccumulator {
|
||||
pub header: Option<ResponseHeader>,
|
||||
pub instructions: Vec<Instruction>,
|
||||
pub stat: Option<Stat>,
|
||||
pub region_leases: Vec<RegionLease>,
|
||||
}
|
||||
|
||||
impl HeartbeatAccumulator {
|
||||
@@ -130,6 +138,7 @@ impl Pushers {
|
||||
.push(HeartbeatResponse {
|
||||
header: Some(pusher.header()),
|
||||
mailbox_message: Some(mailbox_message),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
}
|
||||
@@ -151,6 +160,7 @@ impl Pushers {
|
||||
.push(HeartbeatResponse {
|
||||
header: Some(pusher.header()),
|
||||
mailbox_message: Some(mailbox_message),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
}
|
||||
@@ -167,9 +177,22 @@ impl Pushers {
|
||||
}
|
||||
}
|
||||
|
||||
struct NameCachedHandler {
|
||||
name: &'static str,
|
||||
handler: Box<dyn HeartbeatHandler>,
|
||||
}
|
||||
|
||||
impl NameCachedHandler {
|
||||
fn new(handler: impl HeartbeatHandler + 'static) -> Self {
|
||||
let name = handler.name();
|
||||
let handler = Box::new(handler);
|
||||
Self { name, handler }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
pub struct HeartbeatHandlerGroup {
|
||||
handlers: Arc<RwLock<Vec<Box<dyn HeartbeatHandler>>>>,
|
||||
handlers: Arc<RwLock<Vec<NameCachedHandler>>>,
|
||||
pushers: Pushers,
|
||||
}
|
||||
|
||||
@@ -183,7 +206,7 @@ impl HeartbeatHandlerGroup {
|
||||
|
||||
pub async fn add_handler(&self, handler: impl HeartbeatHandler + 'static) {
|
||||
let mut handlers = self.handlers.write().await;
|
||||
handlers.push(Box::new(handler));
|
||||
handlers.push(NameCachedHandler::new(handler));
|
||||
}
|
||||
|
||||
pub async fn register(&self, key: impl AsRef<str>, pusher: Pusher) {
|
||||
@@ -219,19 +242,21 @@ impl HeartbeatHandlerGroup {
|
||||
err_msg: format!("invalid role: {:?}", req.header),
|
||||
})?;
|
||||
|
||||
for h in handlers.iter() {
|
||||
for NameCachedHandler { name, handler } in handlers.iter() {
|
||||
if ctx.is_skip_all() {
|
||||
break;
|
||||
}
|
||||
|
||||
if h.is_acceptable(role) {
|
||||
h.handle(&req, &mut ctx, &mut acc).await?;
|
||||
if handler.is_acceptable(role) {
|
||||
let _timer = timer!(METRIC_META_HANDLER_EXECUTE, &[("name", *name)]);
|
||||
handler.handle(&req, &mut ctx, &mut acc).await?;
|
||||
}
|
||||
}
|
||||
let header = std::mem::take(&mut acc.header);
|
||||
let res = HeartbeatResponse {
|
||||
header,
|
||||
mailbox_message: acc.into_mailbox_message(),
|
||||
region_leases: acc.region_leases,
|
||||
..Default::default()
|
||||
};
|
||||
Ok(res)
|
||||
}
|
||||
@@ -378,7 +403,11 @@ mod tests {
|
||||
use api::v1::meta::{MailboxMessage, RequestHeader, Role, PROTOCOL_VERSION};
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use crate::handler::{HeartbeatHandlerGroup, HeartbeatMailbox, Pusher};
|
||||
use crate::handler::mailbox_handler::MailboxHandler;
|
||||
use crate::handler::{
|
||||
CheckLeaderHandler, CollectStatsHandler, HeartbeatHandlerGroup, HeartbeatMailbox,
|
||||
OnLeaderStartHandler, PersistStatsHandler, Pusher, ResponseHeaderHandler,
|
||||
};
|
||||
use crate::sequence::Sequence;
|
||||
use crate::service::mailbox::{Channel, MailboxReceiver, MailboxRef};
|
||||
use crate::service::store::memory::MemStore;
|
||||
@@ -447,4 +476,25 @@ mod tests {
|
||||
|
||||
(mailbox, receiver)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_handler_name() {
|
||||
let group = HeartbeatHandlerGroup::default();
|
||||
group.add_handler(ResponseHeaderHandler::default()).await;
|
||||
group.add_handler(CheckLeaderHandler::default()).await;
|
||||
group.add_handler(OnLeaderStartHandler::default()).await;
|
||||
group.add_handler(CollectStatsHandler::default()).await;
|
||||
group.add_handler(MailboxHandler::default()).await;
|
||||
group.add_handler(PersistStatsHandler::default()).await;
|
||||
|
||||
let handlers = group.handlers.read().await;
|
||||
|
||||
assert_eq!(6, handlers.len());
|
||||
assert_eq!("ResponseHeaderHandler", handlers[0].handler.name());
|
||||
assert_eq!("CheckLeaderHandler", handlers[1].handler.name());
|
||||
assert_eq!("OnLeaderStartHandler", handlers[2].handler.name());
|
||||
assert_eq!("CollectStatsHandler", handlers[3].handler.name());
|
||||
assert_eq!("MailboxHandler", handlers[4].handler.name());
|
||||
assert_eq!("PersistStatsHandler", handlers[5].handler.name());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ use crate::error::Result;
|
||||
use crate::handler::{HeartbeatAccumulator, HeartbeatHandler};
|
||||
use crate::metasrv::Context;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct CollectStatsHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
|
||||
@@ -19,7 +19,7 @@ use std::sync::Arc;
|
||||
use api::v1::meta::{HeartbeatRequest, Role};
|
||||
use async_trait::async_trait;
|
||||
use common_catalog::consts::MITO_ENGINE;
|
||||
use common_meta::instruction::TableIdent;
|
||||
use common_meta::ident::TableIdent;
|
||||
use common_meta::RegionIdent;
|
||||
use table::engine::table_id;
|
||||
|
||||
@@ -36,6 +36,7 @@ pub(crate) struct DatanodeHeartbeat {
|
||||
|
||||
pub struct RegionFailureHandler {
|
||||
failure_detect_runner: FailureDetectRunner,
|
||||
region_failover_manager: Arc<RegionFailoverManager>,
|
||||
}
|
||||
|
||||
impl RegionFailureHandler {
|
||||
@@ -45,13 +46,19 @@ impl RegionFailureHandler {
|
||||
) -> Result<Self> {
|
||||
region_failover_manager.try_start()?;
|
||||
|
||||
let mut failure_detect_runner = FailureDetectRunner::new(election, region_failover_manager);
|
||||
let mut failure_detect_runner =
|
||||
FailureDetectRunner::new(election, region_failover_manager.clone());
|
||||
failure_detect_runner.start().await;
|
||||
|
||||
Ok(Self {
|
||||
failure_detect_runner,
|
||||
region_failover_manager,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn region_failover_manager(&self) -> &Arc<RegionFailoverManager> {
|
||||
&self.region_failover_manager
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
|
||||
@@ -246,7 +246,7 @@ impl FailureDetectorContainer {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_catalog::consts::MITO_ENGINE;
|
||||
use common_meta::instruction::TableIdent;
|
||||
use common_meta::ident::TableIdent;
|
||||
use rand::Rng;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -42,6 +42,8 @@ pub struct Stat {
|
||||
pub write_io_rate: f64,
|
||||
/// Region stats on this node
|
||||
pub region_stats: Vec<RegionStat>,
|
||||
// The node epoch is used to check whether the node has restarted or redeployed.
|
||||
pub node_epoch: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
@@ -79,6 +81,7 @@ impl TryFrom<HeartbeatRequest> for Stat {
|
||||
is_leader,
|
||||
node_stat,
|
||||
region_stats,
|
||||
node_epoch,
|
||||
..
|
||||
} = value;
|
||||
|
||||
@@ -104,6 +107,7 @@ impl TryFrom<HeartbeatRequest> for Stat {
|
||||
read_io_rate: node_stat.read_io_rate,
|
||||
write_io_rate: node_stat.write_io_rate,
|
||||
region_stats: region_stats.into_iter().map(RegionStat::from).collect(),
|
||||
node_epoch,
|
||||
})
|
||||
}
|
||||
_ => Err(()),
|
||||
|
||||
@@ -23,9 +23,47 @@ use crate::metasrv::Context;
|
||||
|
||||
const MAX_CACHED_STATS_PER_KEY: usize = 10;
|
||||
|
||||
#[derive(Default)]
|
||||
struct EpochStats {
|
||||
stats: Vec<Stat>,
|
||||
epoch: Option<u64>,
|
||||
}
|
||||
|
||||
impl EpochStats {
|
||||
#[inline]
|
||||
fn drain_all(&mut self) -> Vec<Stat> {
|
||||
self.stats.drain(..).collect()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn clear(&mut self) {
|
||||
self.stats.clear();
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn push(&mut self, stat: Stat) {
|
||||
self.stats.push(stat);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn len(&self) -> usize {
|
||||
self.stats.len()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn epoch(&self) -> Option<u64> {
|
||||
self.epoch
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn set_epoch(&mut self, epoch: u64) {
|
||||
self.epoch = Some(epoch);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct PersistStatsHandler {
|
||||
stats_cache: DashMap<StatKey, Vec<Stat>>,
|
||||
stats_cache: DashMap<StatKey, EpochStats>,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -40,26 +78,47 @@ impl HeartbeatHandler for PersistStatsHandler {
|
||||
ctx: &mut Context,
|
||||
acc: &mut HeartbeatAccumulator,
|
||||
) -> Result<()> {
|
||||
let Some(stat) = acc.stat.take() else { return Ok(()) };
|
||||
let Some(current_stat) = acc.stat.take() else { return Ok(()) };
|
||||
|
||||
let key = stat.stat_key();
|
||||
let key = current_stat.stat_key();
|
||||
let mut entry = self
|
||||
.stats_cache
|
||||
.entry(key)
|
||||
.or_insert_with(|| Vec::with_capacity(MAX_CACHED_STATS_PER_KEY));
|
||||
let stats = entry.value_mut();
|
||||
stats.push(stat);
|
||||
.or_insert_with(EpochStats::default);
|
||||
|
||||
if stats.len() < MAX_CACHED_STATS_PER_KEY {
|
||||
let key: Vec<u8> = key.into();
|
||||
let epoch_stats = entry.value_mut();
|
||||
|
||||
let refresh = if let Some(epoch) = epoch_stats.epoch() {
|
||||
// This node may have been redeployed.
|
||||
if current_stat.node_epoch > epoch {
|
||||
epoch_stats.set_epoch(current_stat.node_epoch);
|
||||
epoch_stats.clear();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
epoch_stats.set_epoch(current_stat.node_epoch);
|
||||
// If the epoch is empty, it indicates that the current node sending the heartbeat
|
||||
// for the first time to the current meta leader, so it is necessary to persist
|
||||
// the data to the KV store as soon as possible.
|
||||
true
|
||||
};
|
||||
|
||||
epoch_stats.push(current_stat);
|
||||
|
||||
if !refresh && epoch_stats.len() < MAX_CACHED_STATS_PER_KEY {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let stats = stats.drain(..).collect();
|
||||
let val = StatValue { stats };
|
||||
|
||||
let value: Vec<u8> = StatValue {
|
||||
stats: epoch_stats.drain_all(),
|
||||
}
|
||||
.try_into()?;
|
||||
let put = PutRequest {
|
||||
key: key.into(),
|
||||
value: val.try_into()?,
|
||||
key,
|
||||
value,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -74,12 +133,11 @@ mod tests {
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::meta::RangeRequest;
|
||||
|
||||
use super::*;
|
||||
use crate::handler::{HeartbeatMailbox, Pushers};
|
||||
use crate::keys::StatKey;
|
||||
use crate::sequence::Sequence;
|
||||
use crate::service::store::ext::KvStoreExt;
|
||||
use crate::service::store::memory::MemStore;
|
||||
|
||||
#[tokio::test]
|
||||
@@ -88,7 +146,7 @@ mod tests {
|
||||
let kv_store = Arc::new(MemStore::new());
|
||||
let seq = Sequence::new("test_seq", 0, 10, kv_store.clone());
|
||||
let mailbox = HeartbeatMailbox::create(Pushers::default(), seq);
|
||||
let mut ctx = Context {
|
||||
let ctx = Context {
|
||||
server_addr: "127.0.0.1:0000".to_string(),
|
||||
in_memory,
|
||||
kv_store,
|
||||
@@ -98,9 +156,40 @@ mod tests {
|
||||
is_infancy: false,
|
||||
};
|
||||
|
||||
let req = HeartbeatRequest::default();
|
||||
let handler = PersistStatsHandler::default();
|
||||
for i in 1..=MAX_CACHED_STATS_PER_KEY {
|
||||
handle_request_many_times(ctx.clone(), &handler, 1).await;
|
||||
|
||||
let key = StatKey {
|
||||
cluster_id: 3,
|
||||
node_id: 101,
|
||||
};
|
||||
let res = ctx.in_memory.get(key.try_into().unwrap()).await.unwrap();
|
||||
assert!(res.is_some());
|
||||
let kv = res.unwrap();
|
||||
let key: StatKey = kv.key.clone().try_into().unwrap();
|
||||
assert_eq!(3, key.cluster_id);
|
||||
assert_eq!(101, key.node_id);
|
||||
let val: StatValue = kv.value.try_into().unwrap();
|
||||
// first new stat must be set in kv store immediately
|
||||
assert_eq!(1, val.stats.len());
|
||||
assert_eq!(Some(1), val.stats[0].region_num);
|
||||
|
||||
handle_request_many_times(ctx.clone(), &handler, 10).await;
|
||||
let res = ctx.in_memory.get(key.try_into().unwrap()).await.unwrap();
|
||||
assert!(res.is_some());
|
||||
let kv = res.unwrap();
|
||||
let val: StatValue = kv.value.try_into().unwrap();
|
||||
// refresh every 10 stats
|
||||
assert_eq!(10, val.stats.len());
|
||||
}
|
||||
|
||||
async fn handle_request_many_times(
|
||||
mut ctx: Context,
|
||||
handler: &PersistStatsHandler,
|
||||
loop_times: i32,
|
||||
) {
|
||||
let req = HeartbeatRequest::default();
|
||||
for i in 1..=loop_times {
|
||||
let mut acc = HeartbeatAccumulator {
|
||||
stat: Some(Stat {
|
||||
cluster_id: 3,
|
||||
@@ -112,30 +201,5 @@ mod tests {
|
||||
};
|
||||
handler.handle(&req, &mut ctx, &mut acc).await.unwrap();
|
||||
}
|
||||
|
||||
let key = StatKey {
|
||||
cluster_id: 3,
|
||||
node_id: 101,
|
||||
};
|
||||
|
||||
let req = RangeRequest {
|
||||
key: key.try_into().unwrap(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let res = ctx.in_memory.range(req).await.unwrap();
|
||||
|
||||
assert_eq!(1, res.kvs.len());
|
||||
|
||||
let kv = &res.kvs[0];
|
||||
|
||||
let key: StatKey = kv.key.clone().try_into().unwrap();
|
||||
assert_eq!(3, key.cluster_id);
|
||||
assert_eq!(101, key.node_id);
|
||||
|
||||
let val: StatValue = kv.value.clone().try_into().unwrap();
|
||||
|
||||
assert_eq!(10, val.stats.len());
|
||||
assert_eq!(Some(1), val.stats[0].region_num);
|
||||
}
|
||||
}
|
||||
|
||||
226
src/meta-srv/src/handler/region_lease_handler.rs
Normal file
226
src/meta-srv/src/handler/region_lease_handler.rs
Normal file
@@ -0,0 +1,226 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::meta::{HeartbeatRequest, RegionLease, Role};
|
||||
use async_trait::async_trait;
|
||||
use catalog::helper::TableGlobalKey;
|
||||
use common_meta::ident::TableIdent;
|
||||
use common_meta::ClusterId;
|
||||
use store_api::storage::RegionNumber;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::handler::{HeartbeatAccumulator, HeartbeatHandler};
|
||||
use crate::metasrv::Context;
|
||||
use crate::procedure::region_failover::{RegionFailoverKey, RegionFailoverManager};
|
||||
use crate::service::store::kv::KvStoreRef;
|
||||
use crate::table_routes;
|
||||
|
||||
/// The lease seconds of a region. It's set by two default heartbeat intervals (5 second × 2) plus
|
||||
/// two roundtrip time (2 second × 2 × 2), plus some extra buffer (2 second).
|
||||
// TODO(LFC): Make region lease seconds calculated from Datanode heartbeat configuration.
|
||||
pub(crate) const REGION_LEASE_SECONDS: u64 = 20;
|
||||
|
||||
pub(crate) struct RegionLeaseHandler {
|
||||
kv_store: KvStoreRef,
|
||||
region_failover_manager: Option<Arc<RegionFailoverManager>>,
|
||||
}
|
||||
|
||||
impl RegionLeaseHandler {
|
||||
pub(crate) fn new(
|
||||
kv_store: KvStoreRef,
|
||||
region_failover_manager: Option<Arc<RegionFailoverManager>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
kv_store,
|
||||
region_failover_manager,
|
||||
}
|
||||
}
|
||||
|
||||
/// Filter out the regions that are currently in failover.
|
||||
/// It's meaningless to extend the lease of a region if it is in failover.
|
||||
fn filter_failover_regions(
|
||||
&self,
|
||||
cluster_id: ClusterId,
|
||||
table_ident: &TableIdent,
|
||||
regions: Vec<RegionNumber>,
|
||||
) -> Vec<RegionNumber> {
|
||||
if let Some(region_failover_manager) = &self.region_failover_manager {
|
||||
let mut region_failover_key = RegionFailoverKey {
|
||||
cluster_id,
|
||||
table_ident: table_ident.clone(),
|
||||
region_number: 0,
|
||||
};
|
||||
|
||||
regions
|
||||
.into_iter()
|
||||
.filter(|region| {
|
||||
region_failover_key.region_number = *region;
|
||||
!region_failover_manager.is_region_failover_running(®ion_failover_key)
|
||||
})
|
||||
.collect()
|
||||
} else {
|
||||
regions
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl HeartbeatHandler for RegionLeaseHandler {
|
||||
fn is_acceptable(&self, role: Role) -> bool {
|
||||
role == Role::Datanode
|
||||
}
|
||||
|
||||
async fn handle(
|
||||
&self,
|
||||
req: &HeartbeatRequest,
|
||||
_: &mut Context,
|
||||
acc: &mut HeartbeatAccumulator,
|
||||
) -> Result<()> {
|
||||
let Some(stat) = acc.stat.as_ref() else { return Ok(()) };
|
||||
|
||||
let mut datanode_regions = HashMap::new();
|
||||
stat.region_stats.iter().for_each(|x| {
|
||||
let key = TableGlobalKey {
|
||||
catalog_name: x.catalog.to_string(),
|
||||
schema_name: x.schema.to_string(),
|
||||
table_name: x.table.to_string(),
|
||||
};
|
||||
datanode_regions
|
||||
.entry(key)
|
||||
.or_insert_with(Vec::new)
|
||||
.push(table::engine::region_number(x.id));
|
||||
});
|
||||
|
||||
// TODO(LFC): Retrieve table global values from some cache here.
|
||||
let table_global_values = table_routes::batch_get_table_global_value(
|
||||
&self.kv_store,
|
||||
datanode_regions.keys().collect::<Vec<_>>(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut region_leases = Vec::with_capacity(datanode_regions.len());
|
||||
for (table_global_key, local_regions) in datanode_regions {
|
||||
let Some(Some(table_global_value)) = table_global_values.get(&table_global_key) else { continue };
|
||||
|
||||
let Some(global_regions) = table_global_value.regions_id_map.get(&stat.id) else { continue };
|
||||
|
||||
// Filter out the designated regions from table global metadata for the given table on the given Datanode.
|
||||
let designated_regions = local_regions
|
||||
.into_iter()
|
||||
.filter(|x| global_regions.contains(x))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let table_ident = TableIdent {
|
||||
catalog: table_global_key.catalog_name.to_string(),
|
||||
schema: table_global_key.schema_name.to_string(),
|
||||
table: table_global_key.table_name.to_string(),
|
||||
table_id: table_global_value.table_id(),
|
||||
engine: table_global_value.engine().to_string(),
|
||||
};
|
||||
let designated_regions =
|
||||
self.filter_failover_regions(stat.cluster_id, &table_ident, designated_regions);
|
||||
|
||||
region_leases.push(RegionLease {
|
||||
table_ident: Some(table_ident.into()),
|
||||
regions: designated_regions,
|
||||
duration_since_epoch: req.duration_since_epoch,
|
||||
lease_seconds: REGION_LEASE_SECONDS,
|
||||
});
|
||||
}
|
||||
acc.region_leases = region_leases;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
|
||||
use super::*;
|
||||
use crate::handler::node_stat::{RegionStat, Stat};
|
||||
use crate::metasrv::builder::MetaSrvBuilder;
|
||||
use crate::test_util;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_handle_region_lease() {
|
||||
let region_failover_manager = test_util::create_region_failover_manager();
|
||||
let kv_store = region_failover_manager
|
||||
.create_context()
|
||||
.selector_ctx
|
||||
.kv_store
|
||||
.clone();
|
||||
|
||||
let table_name = "my_table";
|
||||
let _ = table_routes::tests::prepare_table_global_value(&kv_store, table_name).await;
|
||||
|
||||
let table_ident = TableIdent {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table: table_name.to_string(),
|
||||
table_id: 1,
|
||||
engine: "mito".to_string(),
|
||||
};
|
||||
region_failover_manager
|
||||
.running_procedures()
|
||||
.write()
|
||||
.unwrap()
|
||||
.insert(RegionFailoverKey {
|
||||
cluster_id: 1,
|
||||
table_ident: table_ident.clone(),
|
||||
region_number: 1,
|
||||
});
|
||||
|
||||
let handler = RegionLeaseHandler::new(kv_store, Some(region_failover_manager));
|
||||
|
||||
let req = HeartbeatRequest {
|
||||
duration_since_epoch: 1234,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let builder = MetaSrvBuilder::new();
|
||||
let metasrv = builder.build().await.unwrap();
|
||||
let ctx = &mut metasrv.new_ctx();
|
||||
|
||||
let acc = &mut HeartbeatAccumulator::default();
|
||||
let new_region_stat = |region_id: u64| -> RegionStat {
|
||||
RegionStat {
|
||||
id: region_id,
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table: table_name.to_string(),
|
||||
..Default::default()
|
||||
}
|
||||
};
|
||||
acc.stat = Some(Stat {
|
||||
cluster_id: 1,
|
||||
id: 1,
|
||||
region_stats: vec![new_region_stat(1), new_region_stat(2), new_region_stat(3)],
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
handler.handle(&req, ctx, acc).await.unwrap();
|
||||
|
||||
// region 1 is during failover and region 3 is not in table global value,
|
||||
// so only region 2's lease is extended.
|
||||
assert_eq!(acc.region_leases.len(), 1);
|
||||
let lease = acc.region_leases.remove(0);
|
||||
assert_eq!(lease.table_ident.unwrap(), table_ident.into());
|
||||
assert_eq!(lease.regions, vec![2]);
|
||||
assert_eq!(lease.duration_since_epoch, 1234);
|
||||
assert_eq!(lease.lease_seconds, REGION_LEASE_SECONDS);
|
||||
}
|
||||
}
|
||||
@@ -88,6 +88,7 @@ mod tests {
|
||||
let res = HeartbeatResponse {
|
||||
header,
|
||||
mailbox_message: acc.into_mailbox_message(),
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(1, res.header.unwrap().cluster_id);
|
||||
}
|
||||
|
||||
@@ -49,6 +49,7 @@ pub struct MetaSrvOptions {
|
||||
pub datanode_lease_secs: i64,
|
||||
pub selector: SelectorType,
|
||||
pub use_memory_store: bool,
|
||||
pub disable_region_failover: bool,
|
||||
pub http_opts: HttpOptions,
|
||||
pub logging: LoggingOptions,
|
||||
}
|
||||
@@ -62,6 +63,7 @@ impl Default for MetaSrvOptions {
|
||||
datanode_lease_secs: 15,
|
||||
selector: SelectorType::default(),
|
||||
use_memory_store: false,
|
||||
disable_region_failover: false,
|
||||
http_opts: HttpOptions::default(),
|
||||
logging: LoggingOptions::default(),
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ use common_procedure::local::{LocalManager, ManagerConfig};
|
||||
use crate::cluster::MetaPeerClient;
|
||||
use crate::error::Result;
|
||||
use crate::handler::mailbox_handler::MailboxHandler;
|
||||
use crate::handler::region_lease_handler::RegionLeaseHandler;
|
||||
use crate::handler::{
|
||||
CheckLeaderHandler, CollectStatsHandler, HeartbeatHandlerGroup, HeartbeatMailbox,
|
||||
KeepLeaseHandler, OnLeaderStartHandler, PersistStatsHandler, Pushers, RegionFailureHandler,
|
||||
@@ -146,24 +147,36 @@ impl MetaSrvBuilder {
|
||||
let handler_group = match handler_group {
|
||||
Some(handler_group) => handler_group,
|
||||
None => {
|
||||
let region_failover_manager = Arc::new(RegionFailoverManager::new(
|
||||
mailbox.clone(),
|
||||
procedure_manager.clone(),
|
||||
selector.clone(),
|
||||
SelectorContext {
|
||||
server_addr: options.server_addr.clone(),
|
||||
datanode_lease_secs: options.datanode_lease_secs,
|
||||
kv_store: kv_store.clone(),
|
||||
catalog: None,
|
||||
schema: None,
|
||||
table: None,
|
||||
},
|
||||
lock.clone(),
|
||||
));
|
||||
let region_failover_handler = if options.disable_region_failover {
|
||||
None
|
||||
} else {
|
||||
let region_failover_manager = Arc::new(RegionFailoverManager::new(
|
||||
mailbox.clone(),
|
||||
procedure_manager.clone(),
|
||||
selector.clone(),
|
||||
SelectorContext {
|
||||
server_addr: options.server_addr.clone(),
|
||||
datanode_lease_secs: options.datanode_lease_secs,
|
||||
kv_store: kv_store.clone(),
|
||||
catalog: None,
|
||||
schema: None,
|
||||
table: None,
|
||||
},
|
||||
lock.clone(),
|
||||
));
|
||||
|
||||
let region_failure_handler =
|
||||
RegionFailureHandler::try_new(election.clone(), region_failover_manager)
|
||||
.await?;
|
||||
Some(
|
||||
RegionFailureHandler::try_new(election.clone(), region_failover_manager)
|
||||
.await?,
|
||||
)
|
||||
};
|
||||
|
||||
let region_lease_handler = RegionLeaseHandler::new(
|
||||
kv_store.clone(),
|
||||
region_failover_handler
|
||||
.as_ref()
|
||||
.map(|x| x.region_failover_manager().clone()),
|
||||
);
|
||||
|
||||
let group = HeartbeatHandlerGroup::new(pushers);
|
||||
let keep_lease_handler = KeepLeaseHandler::new(kv_store.clone());
|
||||
@@ -174,9 +187,12 @@ impl MetaSrvBuilder {
|
||||
group.add_handler(keep_lease_handler).await;
|
||||
group.add_handler(CheckLeaderHandler::default()).await;
|
||||
group.add_handler(OnLeaderStartHandler::default()).await;
|
||||
group.add_handler(CollectStatsHandler).await;
|
||||
group.add_handler(MailboxHandler).await;
|
||||
group.add_handler(region_failure_handler).await;
|
||||
group.add_handler(CollectStatsHandler::default()).await;
|
||||
group.add_handler(MailboxHandler::default()).await;
|
||||
if let Some(region_failover_handler) = region_failover_handler {
|
||||
group.add_handler(region_failover_handler).await;
|
||||
}
|
||||
group.add_handler(region_lease_handler).await;
|
||||
group.add_handler(PersistStatsHandler::default()).await;
|
||||
group
|
||||
}
|
||||
|
||||
@@ -17,3 +17,4 @@ pub(crate) const METRIC_META_CREATE_SCHEMA: &str = "meta.create_schema";
|
||||
pub(crate) const METRIC_META_KV_REQUEST: &str = "meta.kv_request";
|
||||
pub(crate) const METRIC_META_ROUTE_REQUEST: &str = "meta.route_request";
|
||||
pub(crate) const METRIC_META_HEARTBEAT_CONNECTION_NUM: &str = "meta.heartbeat_connection_num";
|
||||
pub(crate) const METRIC_META_HANDLER_EXECUTE: &str = "meta.handler_execute";
|
||||
|
||||
@@ -21,12 +21,13 @@ mod update_metadata;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::fmt::Debug;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use catalog::helper::TableGlobalKey;
|
||||
use common_meta::RegionIdent;
|
||||
use common_meta::ident::TableIdent;
|
||||
use common_meta::{ClusterId, RegionIdent};
|
||||
use common_procedure::error::{
|
||||
Error as ProcedureError, FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu,
|
||||
};
|
||||
@@ -38,6 +39,7 @@ use common_telemetry::{error, info, warn};
|
||||
use failover_start::RegionFailoverStart;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::RegionNumber;
|
||||
|
||||
use crate::error::{Error, RegisterProcedureLoaderSnafu, Result};
|
||||
use crate::lock::DistLockRef;
|
||||
@@ -48,26 +50,41 @@ use crate::service::store::ext::KvStoreExt;
|
||||
const OPEN_REGION_MESSAGE_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
const CLOSE_REGION_MESSAGE_TIMEOUT: Duration = Duration::from_secs(2);
|
||||
|
||||
/// A key for the preventing running multiple failover procedures for the same region.
|
||||
#[derive(PartialEq, Eq, Hash, Clone)]
|
||||
pub(crate) struct RegionFailoverKey {
|
||||
pub(crate) cluster_id: ClusterId,
|
||||
pub(crate) table_ident: TableIdent,
|
||||
pub(crate) region_number: RegionNumber,
|
||||
}
|
||||
|
||||
impl From<RegionIdent> for RegionFailoverKey {
|
||||
fn from(region_ident: RegionIdent) -> Self {
|
||||
Self {
|
||||
cluster_id: region_ident.cluster_id,
|
||||
table_ident: region_ident.table_ident,
|
||||
region_number: region_ident.region_number,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct RegionFailoverManager {
|
||||
mailbox: MailboxRef,
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
selector: SelectorRef,
|
||||
selector_ctx: SelectorContext,
|
||||
dist_lock: DistLockRef,
|
||||
running_procedures: Arc<Mutex<HashSet<RegionIdent>>>,
|
||||
running_procedures: Arc<RwLock<HashSet<RegionFailoverKey>>>,
|
||||
}
|
||||
|
||||
struct FailoverProcedureGuard<'a> {
|
||||
running_procedures: Arc<Mutex<HashSet<RegionIdent>>>,
|
||||
failed_region: &'a RegionIdent,
|
||||
struct FailoverProcedureGuard {
|
||||
running_procedures: Arc<RwLock<HashSet<RegionFailoverKey>>>,
|
||||
key: RegionFailoverKey,
|
||||
}
|
||||
|
||||
impl Drop for FailoverProcedureGuard<'_> {
|
||||
impl Drop for FailoverProcedureGuard {
|
||||
fn drop(&mut self) {
|
||||
self.running_procedures
|
||||
.lock()
|
||||
.unwrap()
|
||||
.remove(self.failed_region);
|
||||
self.running_procedures.write().unwrap().remove(&self.key);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -85,11 +102,11 @@ impl RegionFailoverManager {
|
||||
selector,
|
||||
selector_ctx,
|
||||
dist_lock,
|
||||
running_procedures: Arc::new(Mutex::new(HashSet::new())),
|
||||
running_procedures: Arc::new(RwLock::new(HashSet::new())),
|
||||
}
|
||||
}
|
||||
|
||||
fn create_context(&self) -> RegionFailoverContext {
|
||||
pub(crate) fn create_context(&self) -> RegionFailoverContext {
|
||||
RegionFailoverContext {
|
||||
mailbox: self.mailbox.clone(),
|
||||
selector: self.selector.clone(),
|
||||
@@ -113,19 +130,36 @@ impl RegionFailoverManager {
|
||||
})
|
||||
}
|
||||
|
||||
fn insert_running_procedures(&self, failed_region: &RegionIdent) -> bool {
|
||||
let mut procedures = self.running_procedures.lock().unwrap();
|
||||
if procedures.contains(failed_region) {
|
||||
return false;
|
||||
pub(crate) fn is_region_failover_running(&self, key: &RegionFailoverKey) -> bool {
|
||||
self.running_procedures.read().unwrap().contains(key)
|
||||
}
|
||||
|
||||
fn insert_running_procedures(
|
||||
&self,
|
||||
failed_region: &RegionIdent,
|
||||
) -> Option<FailoverProcedureGuard> {
|
||||
let key = RegionFailoverKey::from(failed_region.clone());
|
||||
let mut procedures = self.running_procedures.write().unwrap();
|
||||
if procedures.insert(key.clone()) {
|
||||
Some(FailoverProcedureGuard {
|
||||
running_procedures: self.running_procedures.clone(),
|
||||
key,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
procedures.insert(failed_region.clone())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn running_procedures(&self) -> Arc<RwLock<HashSet<RegionFailoverKey>>> {
|
||||
self.running_procedures.clone()
|
||||
}
|
||||
|
||||
pub(crate) async fn do_region_failover(&self, failed_region: &RegionIdent) -> Result<()> {
|
||||
if !self.insert_running_procedures(failed_region) {
|
||||
let Some(guard) = self.insert_running_procedures(failed_region) else {
|
||||
warn!("Region failover procedure for region {failed_region} is already running!");
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
|
||||
if !self.table_exists(failed_region).await? {
|
||||
// The table could be dropped before the failure detector knows it. Then the region
|
||||
@@ -142,13 +176,9 @@ impl RegionFailoverManager {
|
||||
info!("Starting region failover procedure {procedure_id} for region {failed_region:?}");
|
||||
|
||||
let procedure_manager = self.procedure_manager.clone();
|
||||
let running_procedures = self.running_procedures.clone();
|
||||
let failed_region = failed_region.clone();
|
||||
common_runtime::spawn_bg(async move {
|
||||
let _guard = FailoverProcedureGuard {
|
||||
running_procedures,
|
||||
failed_region: &failed_region,
|
||||
};
|
||||
let _ = guard;
|
||||
|
||||
let watcher = &mut match procedure_manager.submit(procedure_with_id).await {
|
||||
Ok(watcher) => watcher,
|
||||
@@ -178,7 +208,7 @@ impl RegionFailoverManager {
|
||||
let table_global_value = self
|
||||
.selector_ctx
|
||||
.kv_store
|
||||
.get(table_global_key.to_string().into_bytes())
|
||||
.get(table_global_key.to_raw_key())
|
||||
.await?;
|
||||
Ok(table_global_value.is_some())
|
||||
}
|
||||
@@ -232,7 +262,8 @@ trait State: Sync + Send + Debug {
|
||||
/// │ │ │
|
||||
/// └─────────┘ │ Sends "Close Region" request
|
||||
/// │ to the failed Datanode, and
|
||||
/// ┌─────────┐ │ wait for 2 seconds
|
||||
/// | wait for the Region lease expiry
|
||||
/// ┌─────────┐ │ seconds
|
||||
/// │ │ │
|
||||
/// │ ┌──▼────▼──────┐
|
||||
/// Wait candidate │ │ActivateRegion◄───────────────────────┐
|
||||
@@ -260,7 +291,6 @@ trait State: Sync + Send + Debug {
|
||||
/// │ Broadcast Invalidate Table
|
||||
/// │ Cache
|
||||
/// │
|
||||
/// │
|
||||
/// ┌────────▼────────┐
|
||||
/// │RegionFailoverEnd│
|
||||
/// └─────────────────┘
|
||||
@@ -343,7 +373,8 @@ mod tests {
|
||||
use api::v1::meta::{HeartbeatResponse, MailboxMessage, Peer, RequestHeader};
|
||||
use catalog::helper::TableGlobalKey;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MITO_ENGINE};
|
||||
use common_meta::instruction::{Instruction, InstructionReply, SimpleReply, TableIdent};
|
||||
use common_meta::ident::TableIdent;
|
||||
use common_meta::instruction::{Instruction, InstructionReply, SimpleReply};
|
||||
use common_meta::DatanodeId;
|
||||
use common_procedure::BoxedProcedure;
|
||||
use rand::prelude::SliceRandom;
|
||||
|
||||
@@ -28,6 +28,7 @@ use super::{RegionFailoverContext, State};
|
||||
use crate::error::{
|
||||
Error, Result, RetryLaterSnafu, SerializeToJsonSnafu, UnexpectedInstructionReplySnafu,
|
||||
};
|
||||
use crate::handler::region_lease_handler::REGION_LEASE_SECONDS;
|
||||
use crate::handler::HeartbeatMailbox;
|
||||
use crate::procedure::region_failover::CLOSE_REGION_MESSAGE_TIMEOUT;
|
||||
use crate::service::mailbox::{Channel, MailboxReceiver};
|
||||
@@ -35,11 +36,15 @@ use crate::service::mailbox::{Channel, MailboxReceiver};
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub(super) struct DeactivateRegion {
|
||||
candidate: Peer,
|
||||
region_lease_expiry_seconds: u64,
|
||||
}
|
||||
|
||||
impl DeactivateRegion {
|
||||
pub(super) fn new(candidate: Peer) -> Self {
|
||||
Self { candidate }
|
||||
Self {
|
||||
candidate,
|
||||
region_lease_expiry_seconds: REGION_LEASE_SECONDS * 2,
|
||||
}
|
||||
}
|
||||
|
||||
async fn send_close_region_message(
|
||||
@@ -95,15 +100,21 @@ impl DeactivateRegion {
|
||||
}
|
||||
Err(e) if matches!(e, Error::MailboxTimeout { .. }) => {
|
||||
// Since we are in a region failover situation, the Datanode that the failed region
|
||||
// resides might be unreachable. So region deactivation is happened in a "try our
|
||||
// best" effort, do not retry if mailbox received timeout.
|
||||
// However, if the region failover procedure is also used in a planned maintenance
|
||||
// situation in the future, a proper retry is a must.
|
||||
// resides might be unreachable. So we wait for the region lease to expire. The
|
||||
// region would be closed by its own [RegionAliveKeeper].
|
||||
self.wait_for_region_lease_expiry().await;
|
||||
Ok(Box::new(ActivateRegion::new(self.candidate)))
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
/// Sleep for `region_lease_expiry_seconds`, to make sure the region is closed (by its
|
||||
/// region alive keeper). This is critical for region not being opened in multiple Datanodes
|
||||
/// simultaneously.
|
||||
async fn wait_for_region_lease_expiry(&self) {
|
||||
tokio::time::sleep(Duration::from_secs(self.region_lease_expiry_seconds)).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -120,8 +131,8 @@ impl State for DeactivateRegion {
|
||||
let mailbox_receiver = match result {
|
||||
Ok(mailbox_receiver) => mailbox_receiver,
|
||||
Err(e) if matches!(e, Error::PusherNotFound { .. }) => {
|
||||
// The Datanode could be unreachable and deregistered from pushers,
|
||||
// so simply advancing to the next state here.
|
||||
// See the mailbox received timeout situation comments above.
|
||||
self.wait_for_region_lease_expiry().await;
|
||||
return Ok(Box::new(ActivateRegion::new(self.candidate)));
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
@@ -212,7 +223,10 @@ mod tests {
|
||||
let mut env = TestingEnvBuilder::new().build().await;
|
||||
let failed_region = env.failed_region(1).await;
|
||||
|
||||
let state = DeactivateRegion::new(Peer::new(2, ""));
|
||||
let state = DeactivateRegion {
|
||||
candidate: Peer::new(2, ""),
|
||||
region_lease_expiry_seconds: 2,
|
||||
};
|
||||
let mailbox_receiver = state
|
||||
.send_close_region_message(&env.context, &failed_region, Duration::from_millis(100))
|
||||
.await
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_error::prelude::{ErrorExt, StatusCode};
|
||||
use common_meta::instruction::TableIdent;
|
||||
use common_meta::ident::TableIdent;
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::RegionIdent;
|
||||
use common_telemetry::info;
|
||||
|
||||
@@ -14,7 +14,8 @@
|
||||
|
||||
use api::v1::meta::MailboxMessage;
|
||||
use async_trait::async_trait;
|
||||
use common_meta::instruction::{Instruction, TableIdent};
|
||||
use common_meta::ident::TableIdent;
|
||||
use common_meta::instruction::Instruction;
|
||||
use common_meta::RegionIdent;
|
||||
use common_telemetry::info;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
@@ -24,6 +24,7 @@ use common_error::prelude::*;
|
||||
use common_telemetry::{timer, warn};
|
||||
use etcd_client::{
|
||||
Client, Compare, CompareOp, DeleteOptions, GetOptions, PutOptions, Txn, TxnOp, TxnOpResponse,
|
||||
TxnResponse,
|
||||
};
|
||||
|
||||
use crate::error;
|
||||
@@ -31,6 +32,12 @@ use crate::error::Result;
|
||||
use crate::metrics::METRIC_META_KV_REQUEST;
|
||||
use crate::service::store::kv::{KvStore, KvStoreRef};
|
||||
|
||||
// Maximum number of operations permitted in a transaction.
|
||||
// The etcd default configuration's `--max-txn-ops` is 128.
|
||||
//
|
||||
// For more detail, see: https://etcd.io/docs/v3.5/op-guide/configuration/
|
||||
const MAX_TXN_SIZE: usize = 128;
|
||||
|
||||
pub struct EtcdStore {
|
||||
client: Client,
|
||||
}
|
||||
@@ -51,6 +58,32 @@ impl EtcdStore {
|
||||
pub fn with_etcd_client(client: Client) -> Result<KvStoreRef> {
|
||||
Ok(Arc::new(Self { client }))
|
||||
}
|
||||
|
||||
async fn do_multi_txn(&self, txn_ops: Vec<TxnOp>) -> Result<Vec<TxnResponse>> {
|
||||
if txn_ops.len() < MAX_TXN_SIZE {
|
||||
// fast path
|
||||
let txn = Txn::new().and_then(txn_ops);
|
||||
let txn_res = self
|
||||
.client
|
||||
.kv_client()
|
||||
.txn(txn)
|
||||
.await
|
||||
.context(error::EtcdFailedSnafu)?;
|
||||
return Ok(vec![txn_res]);
|
||||
}
|
||||
|
||||
let txns = txn_ops
|
||||
.chunks(MAX_TXN_SIZE)
|
||||
.map(|part| async move {
|
||||
let txn = Txn::new().and_then(part);
|
||||
self.client.kv_client().txn(txn).await
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
futures::future::try_join_all(txns)
|
||||
.await
|
||||
.context(error::EtcdFailedSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -142,23 +175,19 @@ impl KvStore for EtcdStore {
|
||||
.into_iter()
|
||||
.map(|k| TxnOp::get(k, options.clone()))
|
||||
.collect();
|
||||
let txn = Txn::new().and_then(get_ops);
|
||||
|
||||
let txn_res = self
|
||||
.client
|
||||
.kv_client()
|
||||
.txn(txn)
|
||||
.await
|
||||
.context(error::EtcdFailedSnafu)?;
|
||||
let txn_responses = self.do_multi_txn(get_ops).await?;
|
||||
|
||||
let mut kvs = vec![];
|
||||
for op_res in txn_res.op_responses() {
|
||||
let get_res = match op_res {
|
||||
TxnOpResponse::Get(get_res) => get_res,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
for txn_res in txn_responses {
|
||||
for op_res in txn_res.op_responses() {
|
||||
let get_res = match op_res {
|
||||
TxnOpResponse::Get(get_res) => get_res,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
kvs.extend(get_res.kvs().iter().map(KvPair::from_etcd_kv));
|
||||
kvs.extend(get_res.kvs().iter().map(KvPair::from_etcd_kv));
|
||||
}
|
||||
}
|
||||
|
||||
let header = Some(ResponseHeader::success(cluster_id));
|
||||
@@ -185,24 +214,20 @@ impl KvStore for EtcdStore {
|
||||
.into_iter()
|
||||
.map(|kv| (TxnOp::put(kv.key, kv.value, options.clone())))
|
||||
.collect::<Vec<_>>();
|
||||
let txn = Txn::new().and_then(put_ops);
|
||||
|
||||
let txn_res = self
|
||||
.client
|
||||
.kv_client()
|
||||
.txn(txn)
|
||||
.await
|
||||
.context(error::EtcdFailedSnafu)?;
|
||||
let txn_responses = self.do_multi_txn(put_ops).await?;
|
||||
|
||||
let mut prev_kvs = vec![];
|
||||
for op_res in txn_res.op_responses() {
|
||||
match op_res {
|
||||
TxnOpResponse::Put(put_res) => {
|
||||
if let Some(prev_kv) = put_res.prev_key() {
|
||||
prev_kvs.push(KvPair::from_etcd_kv(prev_kv));
|
||||
for txn_res in txn_responses {
|
||||
for op_res in txn_res.op_responses() {
|
||||
match op_res {
|
||||
TxnOpResponse::Put(put_res) => {
|
||||
if let Some(prev_kv) = put_res.prev_key() {
|
||||
prev_kvs.push(KvPair::from_etcd_kv(prev_kv));
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
_ => unreachable!(), // never get here
|
||||
}
|
||||
}
|
||||
|
||||
@@ -232,28 +257,23 @@ impl KvStore for EtcdStore {
|
||||
.into_iter()
|
||||
.map(|k| TxnOp::delete(k, options.clone()))
|
||||
.collect::<Vec<_>>();
|
||||
let txn = Txn::new().and_then(delete_ops);
|
||||
|
||||
let txn_res = self
|
||||
.client
|
||||
.kv_client()
|
||||
.txn(txn)
|
||||
.await
|
||||
.context(error::EtcdFailedSnafu)?;
|
||||
let txn_responses = self.do_multi_txn(delete_ops).await?;
|
||||
|
||||
for op_res in txn_res.op_responses() {
|
||||
match op_res {
|
||||
TxnOpResponse::Delete(delete_res) => {
|
||||
delete_res.prev_kvs().iter().for_each(|kv| {
|
||||
prev_kvs.push(KvPair::from_etcd_kv(kv));
|
||||
});
|
||||
for txn_res in txn_responses {
|
||||
for op_res in txn_res.op_responses() {
|
||||
match op_res {
|
||||
TxnOpResponse::Delete(delete_res) => {
|
||||
delete_res.prev_kvs().iter().for_each(|kv| {
|
||||
prev_kvs.push(KvPair::from_etcd_kv(kv));
|
||||
});
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
_ => unreachable!(), // never get here
|
||||
}
|
||||
}
|
||||
|
||||
let header = Some(ResponseHeader::success(cluster_id));
|
||||
|
||||
Ok(BatchDeleteResponse { header, prev_kvs })
|
||||
}
|
||||
|
||||
@@ -308,7 +328,7 @@ impl KvStore for EtcdStore {
|
||||
let prev_kv = match op_res {
|
||||
TxnOpResponse::Put(res) => res.prev_key().map(KvPair::from_etcd_kv),
|
||||
TxnOpResponse::Get(res) => res.kvs().first().map(KvPair::from_etcd_kv),
|
||||
_ => unreachable!(), // never get here
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let header = Some(ResponseHeader::success(cluster_id));
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::meta::{KeyValue, RangeRequest};
|
||||
use api::v1::meta::{DeleteRangeRequest, KeyValue, RangeRequest};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::service::store::kv::KvStore;
|
||||
@@ -24,6 +24,10 @@ pub trait KvStoreExt {
|
||||
|
||||
/// Check if a key exists, it does not return the value.
|
||||
async fn exists(&self, key: Vec<u8>) -> Result<bool>;
|
||||
|
||||
/// Delete the value by the given key. If prev_kv is true,
|
||||
/// the previous key-value pairs will be returned.
|
||||
async fn delete(&self, key: Vec<u8>, prev_kv: bool) -> Result<Option<KeyValue>>;
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -53,6 +57,18 @@ where
|
||||
|
||||
Ok(!kvs.is_empty())
|
||||
}
|
||||
|
||||
async fn delete(&self, key: Vec<u8>, prev_kv: bool) -> Result<Option<KeyValue>> {
|
||||
let req = DeleteRangeRequest {
|
||||
key,
|
||||
prev_kv,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut prev_kvs = self.delete_range(req).await?.prev_kvs;
|
||||
|
||||
Ok(prev_kvs.pop())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -115,6 +131,31 @@ mod tests {
|
||||
assert!(!in_mem.exists("test_key".as_bytes().to_vec()).await.unwrap());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_delete() {
|
||||
let mut in_mem = Arc::new(MemStore::new()) as KvStoreRef;
|
||||
|
||||
let mut prev_kv = in_mem
|
||||
.delete("test_key1".as_bytes().to_vec(), true)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(prev_kv.is_none());
|
||||
|
||||
put_stats_to_store(&mut in_mem).await;
|
||||
|
||||
assert!(in_mem
|
||||
.exists("test_key1".as_bytes().to_vec())
|
||||
.await
|
||||
.unwrap());
|
||||
|
||||
prev_kv = in_mem
|
||||
.delete("test_key1".as_bytes().to_vec(), true)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(prev_kv.is_some());
|
||||
assert_eq!("test_key1".as_bytes(), prev_kv.unwrap().key);
|
||||
}
|
||||
|
||||
async fn put_stats_to_store(store: &mut KvStoreRef) {
|
||||
store
|
||||
.put(PutRequest {
|
||||
|
||||
@@ -12,13 +12,17 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::v1::meta::{PutRequest, TableRouteValue};
|
||||
use catalog::helper::{TableGlobalKey, TableGlobalValue};
|
||||
use common_meta::key::TableRouteKey;
|
||||
use common_meta::rpc::store::{BatchGetRequest, BatchGetResponse};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{
|
||||
DecodeTableRouteSnafu, InvalidCatalogValueSnafu, Result, TableRouteNotFoundSnafu,
|
||||
ConvertProtoDataSnafu, DecodeTableRouteSnafu, InvalidCatalogValueSnafu, Result,
|
||||
TableRouteNotFoundSnafu,
|
||||
};
|
||||
use crate::service::store::ext::KvStoreExt;
|
||||
use crate::service::store::kv::KvStoreRef;
|
||||
@@ -27,12 +31,40 @@ pub async fn get_table_global_value(
|
||||
kv_store: &KvStoreRef,
|
||||
key: &TableGlobalKey,
|
||||
) -> Result<Option<TableGlobalValue>> {
|
||||
let key = key.to_string().into_bytes();
|
||||
let kv = kv_store.get(key).await?;
|
||||
let kv = kv_store.get(key.to_raw_key()).await?;
|
||||
kv.map(|kv| TableGlobalValue::from_bytes(kv.value).context(InvalidCatalogValueSnafu))
|
||||
.transpose()
|
||||
}
|
||||
|
||||
pub(crate) async fn batch_get_table_global_value(
|
||||
kv_store: &KvStoreRef,
|
||||
keys: Vec<&TableGlobalKey>,
|
||||
) -> Result<HashMap<TableGlobalKey, Option<TableGlobalValue>>> {
|
||||
let req = BatchGetRequest {
|
||||
keys: keys.iter().map(|x| x.to_raw_key()).collect::<Vec<_>>(),
|
||||
};
|
||||
let mut resp: BatchGetResponse = kv_store
|
||||
.batch_get(req.into())
|
||||
.await?
|
||||
.try_into()
|
||||
.context(ConvertProtoDataSnafu)?;
|
||||
|
||||
let kvs = resp.take_kvs();
|
||||
let mut result = HashMap::with_capacity(kvs.len());
|
||||
for kv in kvs {
|
||||
let key = TableGlobalKey::try_from_raw_key(kv.key()).context(InvalidCatalogValueSnafu)?;
|
||||
let value = TableGlobalValue::from_bytes(kv.value()).context(InvalidCatalogValueSnafu)?;
|
||||
result.insert(key, Some(value));
|
||||
}
|
||||
|
||||
for key in keys {
|
||||
if !result.contains_key(key) {
|
||||
result.insert(key.clone(), None);
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub(crate) async fn put_table_global_value(
|
||||
kv_store: &KvStoreRef,
|
||||
key: &TableGlobalKey,
|
||||
@@ -40,7 +72,7 @@ pub(crate) async fn put_table_global_value(
|
||||
) -> Result<()> {
|
||||
let req = PutRequest {
|
||||
header: None,
|
||||
key: key.to_string().into_bytes(),
|
||||
key: key.to_raw_key(),
|
||||
value: value.as_bytes().context(InvalidCatalogValueSnafu)?,
|
||||
prev_kv: false,
|
||||
};
|
||||
@@ -228,12 +260,12 @@ pub(crate) mod tests {
|
||||
async fn test_put_and_get_table_global_value() {
|
||||
let kv_store = Arc::new(MemStore::new()) as _;
|
||||
|
||||
let key = TableGlobalKey {
|
||||
let not_exist_key = TableGlobalKey {
|
||||
catalog_name: "not_exist_catalog".to_string(),
|
||||
schema_name: "not_exist_schema".to_string(),
|
||||
table_name: "not_exist_table".to_string(),
|
||||
};
|
||||
assert!(get_table_global_value(&kv_store, &key)
|
||||
assert!(get_table_global_value(&kv_store, ¬_exist_key)
|
||||
.await
|
||||
.unwrap()
|
||||
.is_none());
|
||||
@@ -244,6 +276,12 @@ pub(crate) mod tests {
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(actual, value);
|
||||
|
||||
let keys = vec![¬_exist_key, &key];
|
||||
let result = batch_get_table_global_value(&kv_store, keys).await.unwrap();
|
||||
assert_eq!(result.len(), 2);
|
||||
assert!(result.get(¬_exist_key).unwrap().is_none());
|
||||
assert_eq!(result.get(&key).unwrap().as_ref().unwrap(), &value);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -541,7 +541,7 @@ impl PromPlanner {
|
||||
result_set.insert(matcher.value.clone());
|
||||
} else {
|
||||
return Err(ColumnNotFoundSnafu {
|
||||
col: self.ctx.table_name.clone().unwrap(),
|
||||
col: matcher.value.clone(),
|
||||
}
|
||||
.build());
|
||||
}
|
||||
@@ -550,8 +550,8 @@ impl PromPlanner {
|
||||
if col_set.contains(&matcher.value) {
|
||||
reverse_set.insert(matcher.value.clone());
|
||||
} else {
|
||||
return Err(ValueNotFoundSnafu {
|
||||
table: self.ctx.table_name.clone().unwrap(),
|
||||
return Err(ColumnNotFoundSnafu {
|
||||
col: matcher.value.clone(),
|
||||
}
|
||||
.build());
|
||||
}
|
||||
|
||||
22
src/query/src/dataframe.rs
Normal file
22
src/query/src/dataframe.rs
Normal file
@@ -0,0 +1,22 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use datafusion::dataframe::DataFrame as DfDataFrame;
|
||||
|
||||
/// DataFrame represents a logical set of rows with the same named columns.
|
||||
/// Similar to a Pandas DataFrame or Spark DataFrame
|
||||
#[derive(Clone)]
|
||||
pub enum DataFrame {
|
||||
DataFusion(DfDataFrame),
|
||||
}
|
||||
@@ -17,6 +17,7 @@
|
||||
mod error;
|
||||
mod planner;
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -36,13 +37,13 @@ use datafusion::physical_plan::ExecutionPlan;
|
||||
use datafusion_common::ResolvedTableReference;
|
||||
use datafusion_expr::{DmlStatement, LogicalPlan as DfLogicalPlan, WriteOp};
|
||||
use datatypes::prelude::VectorRef;
|
||||
use datatypes::schema::Schema;
|
||||
use futures_util::StreamExt;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::requests::{DeleteRequest, InsertRequest};
|
||||
use table::TableRef;
|
||||
|
||||
use crate::dataframe::DataFrame;
|
||||
pub use crate::datafusion::planner::DfContextProviderAdapter;
|
||||
use crate::error::{
|
||||
CatalogNotFoundSnafu, CatalogSnafu, CreateRecordBatchSnafu, DataFusionSnafu,
|
||||
@@ -55,7 +56,7 @@ use crate::physical_optimizer::PhysicalOptimizer;
|
||||
use crate::physical_planner::PhysicalPlanner;
|
||||
use crate::plan::LogicalPlan;
|
||||
use crate::planner::{DfLogicalPlanner, LogicalPlanner};
|
||||
use crate::query_engine::{QueryEngineContext, QueryEngineState};
|
||||
use crate::query_engine::{DescribeResult, QueryEngineContext, QueryEngineState};
|
||||
use crate::{metrics, QueryEngine};
|
||||
|
||||
pub struct DatafusionQueryEngine {
|
||||
@@ -207,6 +208,10 @@ impl DatafusionQueryEngine {
|
||||
|
||||
#[async_trait]
|
||||
impl QueryEngine for DatafusionQueryEngine {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn planner(&self) -> Arc<dyn LogicalPlanner> {
|
||||
Arc::new(DfLogicalPlanner::new(self.state.clone()))
|
||||
}
|
||||
@@ -215,11 +220,12 @@ impl QueryEngine for DatafusionQueryEngine {
|
||||
"datafusion"
|
||||
}
|
||||
|
||||
async fn describe(&self, plan: LogicalPlan) -> Result<Schema> {
|
||||
// TODO(sunng87): consider cache optmised logical plan between describe
|
||||
// and execute
|
||||
async fn describe(&self, plan: LogicalPlan) -> Result<DescribeResult> {
|
||||
let optimised_plan = self.optimize(&plan)?;
|
||||
optimised_plan.schema()
|
||||
Ok(DescribeResult {
|
||||
schema: optimised_plan.schema()?,
|
||||
logical_plan: optimised_plan,
|
||||
})
|
||||
}
|
||||
|
||||
async fn execute(&self, plan: LogicalPlan, query_ctx: QueryContextRef) -> Result<Output> {
|
||||
@@ -249,6 +255,18 @@ impl QueryEngine for DatafusionQueryEngine {
|
||||
fn register_function(&self, func: FunctionRef) {
|
||||
self.state.register_udf(create_udf(func));
|
||||
}
|
||||
|
||||
fn read_table(&self, table: TableRef) -> Result<DataFrame> {
|
||||
Ok(DataFrame::DataFusion(
|
||||
self.state
|
||||
.read_table(table)
|
||||
.context(error::DatafusionSnafu {
|
||||
msg: "Fail to create dataframe for table",
|
||||
})
|
||||
.map_err(BoxedError::new)
|
||||
.context(QueryExecutionSnafu)?,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl LogicalOptimizer for DatafusionQueryEngine {
|
||||
@@ -374,6 +392,7 @@ impl QueryExecutor for DatafusionQueryEngine {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::borrow::Cow::Borrowed;
|
||||
use std::sync::Arc;
|
||||
|
||||
use catalog::local::{MemoryCatalogProvider, MemorySchemaProvider};
|
||||
@@ -381,12 +400,14 @@ mod tests {
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_query::Output;
|
||||
use common_recordbatch::util;
|
||||
use datafusion::prelude::{col, lit};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::ColumnSchema;
|
||||
use datatypes::vectors::{UInt64Vector, VectorRef};
|
||||
use datatypes::vectors::{Helper, UInt32Vector, UInt64Vector, VectorRef};
|
||||
use session::context::QueryContext;
|
||||
use table::table::numbers::NumbersTable;
|
||||
|
||||
use super::*;
|
||||
use crate::parser::QueryLanguageParser;
|
||||
use crate::query_engine::{QueryEngineFactory, QueryEngineRef};
|
||||
|
||||
@@ -470,6 +491,42 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_read_table() {
|
||||
let engine = create_test_engine().await;
|
||||
|
||||
let engine = engine
|
||||
.as_any()
|
||||
.downcast_ref::<DatafusionQueryEngine>()
|
||||
.unwrap();
|
||||
let table = engine
|
||||
.find_table(&ResolvedTableReference {
|
||||
catalog: Borrowed("greptime"),
|
||||
schema: Borrowed("public"),
|
||||
table: Borrowed("numbers"),
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let DataFrame::DataFusion(df) = engine.read_table(table).unwrap();
|
||||
let df = df
|
||||
.select_columns(&["number"])
|
||||
.unwrap()
|
||||
.filter(col("number").lt(lit(10)))
|
||||
.unwrap();
|
||||
let batches = df.collect().await.unwrap();
|
||||
assert_eq!(1, batches.len());
|
||||
let batch = &batches[0];
|
||||
|
||||
assert_eq!(1, batch.num_columns());
|
||||
assert_eq!(batch.column(0).len(), 10);
|
||||
|
||||
assert_eq!(
|
||||
Helper::try_into_vector(batch.column(0)).unwrap(),
|
||||
Arc::new(UInt32Vector::from_slice([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])) as VectorRef
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_describe() {
|
||||
let engine = create_test_engine().await;
|
||||
@@ -483,7 +540,10 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let schema = engine.describe(plan).await.unwrap();
|
||||
let DescribeResult {
|
||||
schema,
|
||||
logical_plan,
|
||||
} = engine.describe(plan).await.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
schema.column_schemas()[0],
|
||||
@@ -493,5 +553,6 @@ mod tests {
|
||||
true
|
||||
)
|
||||
);
|
||||
assert_eq!("Limit: skip=0, fetch=20\n Aggregate: groupBy=[[]], aggr=[[SUM(numbers.number)]]\n TableScan: numbers projection=[number]", format!("{}", logical_plan.display_indent()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use datafusion_expr::{LogicalPlan, UserDefinedLogicalNode};
|
||||
use datafusion_expr::{Expr, LogicalPlan, UserDefinedLogicalNode};
|
||||
use promql::extension_plan::{
|
||||
EmptyMetric, InstantManipulate, RangeManipulate, SeriesDivide, SeriesNormalize,
|
||||
};
|
||||
@@ -37,7 +37,8 @@ impl Categorizer {
|
||||
pub fn check_plan(plan: &LogicalPlan) -> Commutativity {
|
||||
match plan {
|
||||
LogicalPlan::Projection(_) => Commutativity::Unimplemented,
|
||||
LogicalPlan::Filter(_) => Commutativity::Commutative,
|
||||
// TODO(ruihang): Change this to Commutative once Like is supported in substrait
|
||||
LogicalPlan::Filter(filter) => Self::check_expr(&filter.predicate),
|
||||
LogicalPlan::Window(_) => Commutativity::Unimplemented,
|
||||
LogicalPlan::Aggregate(_) => {
|
||||
// check all children exprs and uses the strictest level
|
||||
@@ -85,6 +86,50 @@ impl Categorizer {
|
||||
_ => Commutativity::Unsupported,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_expr(expr: &Expr) -> Commutativity {
|
||||
match expr {
|
||||
Expr::Alias(_, _)
|
||||
| Expr::Column(_)
|
||||
| Expr::ScalarVariable(_, _)
|
||||
| Expr::Literal(_)
|
||||
| Expr::BinaryExpr(_)
|
||||
| Expr::Not(_)
|
||||
| Expr::IsNotNull(_)
|
||||
| Expr::IsNull(_)
|
||||
| Expr::IsTrue(_)
|
||||
| Expr::IsFalse(_)
|
||||
| Expr::IsNotTrue(_)
|
||||
| Expr::IsNotFalse(_)
|
||||
| Expr::Negative(_)
|
||||
| Expr::Between(_)
|
||||
| Expr::Sort(_)
|
||||
| Expr::Exists(_) => Commutativity::Commutative,
|
||||
|
||||
Expr::Like(_)
|
||||
| Expr::ILike(_)
|
||||
| Expr::SimilarTo(_)
|
||||
| Expr::IsUnknown(_)
|
||||
| Expr::IsNotUnknown(_)
|
||||
| Expr::GetIndexedField(_)
|
||||
| Expr::Case(_)
|
||||
| Expr::Cast(_)
|
||||
| Expr::TryCast(_)
|
||||
| Expr::ScalarFunction(_)
|
||||
| Expr::ScalarUDF(_)
|
||||
| Expr::AggregateFunction(_)
|
||||
| Expr::WindowFunction(_)
|
||||
| Expr::AggregateUDF(_)
|
||||
| Expr::InList(_)
|
||||
| Expr::InSubquery(_)
|
||||
| Expr::ScalarSubquery(_)
|
||||
| Expr::Wildcard => Commutativity::Unimplemented,
|
||||
Expr::QualifiedWildcard { .. }
|
||||
| Expr::GroupingSet(_)
|
||||
| Expr::Placeholder(_)
|
||||
| Expr::OuterReferenceColumn(_, _) => Commutativity::Unimplemented,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type Transformer = Arc<dyn Fn(&LogicalPlan) -> Option<LogicalPlan>>;
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
#![feature(let_chains)]
|
||||
|
||||
pub mod dataframe;
|
||||
pub mod datafusion;
|
||||
pub mod dist_plan;
|
||||
pub mod error;
|
||||
|
||||
@@ -12,13 +12,16 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::Debug;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::{Debug, Display};
|
||||
|
||||
use common_query::prelude::ScalarValue;
|
||||
use datafusion_expr::LogicalPlan as DfLogicalPlan;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::Schema;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{ConvertDatafusionSchemaSnafu, Result};
|
||||
use crate::error::{ConvertDatafusionSchemaSnafu, DataFusionSnafu, Result};
|
||||
|
||||
/// A LogicalPlan represents the different types of relational
|
||||
/// operators (such as Projection, Filter, etc) and can be created by
|
||||
@@ -46,4 +49,41 @@ impl LogicalPlan {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a `format`able structure that produces a single line
|
||||
/// per node. For example:
|
||||
///
|
||||
/// ```text
|
||||
/// Projection: employee.id
|
||||
/// Filter: employee.state Eq Utf8(\"CO\")\
|
||||
/// CsvScan: employee projection=Some([0, 3])
|
||||
/// ```
|
||||
pub fn display_indent(&self) -> impl Display + '_ {
|
||||
let LogicalPlan::DfPlan(plan) = self;
|
||||
plan.display_indent()
|
||||
}
|
||||
|
||||
/// Walk the logical plan, find any `PlaceHolder` tokens,
|
||||
/// and return a map of their IDs and ConcreteDataTypes
|
||||
pub fn get_param_types(&self) -> Result<HashMap<String, Option<ConcreteDataType>>> {
|
||||
let LogicalPlan::DfPlan(plan) = self;
|
||||
let types = plan.get_parameter_types().context(DataFusionSnafu)?;
|
||||
|
||||
Ok(types
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k, v.map(|v| ConcreteDataType::from_arrow_type(&v))))
|
||||
.collect())
|
||||
}
|
||||
|
||||
/// Return a logical plan with all placeholders/params (e.g $1 $2,
|
||||
/// ...) replaced with corresponding values provided in the
|
||||
/// params_values
|
||||
pub fn replace_params_with_values(&self, values: &[ScalarValue]) -> Result<LogicalPlan> {
|
||||
let LogicalPlan::DfPlan(plan) = self;
|
||||
|
||||
plan.clone()
|
||||
.replace_params_with_values(values)
|
||||
.context(DataFusionSnafu)
|
||||
.map(LogicalPlan::DfPlan)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -77,6 +77,7 @@ impl DfLogicalPlanner {
|
||||
};
|
||||
PlanSqlSnafu { sql }
|
||||
})?;
|
||||
|
||||
Ok(LogicalPlan::DfPlan(result))
|
||||
}
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ mod context;
|
||||
pub mod options;
|
||||
mod state;
|
||||
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
@@ -30,7 +31,9 @@ use datatypes::schema::Schema;
|
||||
use partition::manager::PartitionRuleManager;
|
||||
use session::context::QueryContextRef;
|
||||
use sql::statements::statement::Statement;
|
||||
use table::TableRef;
|
||||
|
||||
use crate::dataframe::DataFrame;
|
||||
use crate::datafusion::DatafusionQueryEngine;
|
||||
use crate::error::Result;
|
||||
use crate::plan::LogicalPlan;
|
||||
@@ -40,6 +43,15 @@ pub use crate::query_engine::state::QueryEngineState;
|
||||
|
||||
pub type SqlStatementExecutorRef = Arc<dyn SqlStatementExecutor>;
|
||||
|
||||
/// Describe statement result
|
||||
#[derive(Debug)]
|
||||
pub struct DescribeResult {
|
||||
/// The schema of statement
|
||||
pub schema: Schema,
|
||||
/// The logical plan for statement
|
||||
pub logical_plan: LogicalPlan,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait SqlStatementExecutor: Send + Sync {
|
||||
async fn execute_sql(&self, stmt: Statement, query_ctx: QueryContextRef) -> Result<Output>;
|
||||
@@ -47,11 +59,15 @@ pub trait SqlStatementExecutor: Send + Sync {
|
||||
|
||||
#[async_trait]
|
||||
pub trait QueryEngine: Send + Sync {
|
||||
/// Returns the query engine as Any
|
||||
/// so that it can be downcast to a specific implementation.
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
|
||||
fn planner(&self) -> Arc<dyn LogicalPlanner>;
|
||||
|
||||
fn name(&self) -> &str;
|
||||
|
||||
async fn describe(&self, plan: LogicalPlan) -> Result<Schema>;
|
||||
async fn describe(&self, plan: LogicalPlan) -> Result<DescribeResult>;
|
||||
|
||||
async fn execute(&self, plan: LogicalPlan, query_ctx: QueryContextRef) -> Result<Output>;
|
||||
|
||||
@@ -60,6 +76,9 @@ pub trait QueryEngine: Send + Sync {
|
||||
fn register_aggregate_function(&self, func: AggregateFunctionMetaRef);
|
||||
|
||||
fn register_function(&self, func: FunctionRef);
|
||||
|
||||
/// Create a DataFrame from a table.
|
||||
fn read_table(&self, table: TableRef) -> Result<DataFrame>;
|
||||
}
|
||||
|
||||
pub struct QueryEngineFactory {
|
||||
|
||||
@@ -24,6 +24,7 @@ use common_function::scalars::aggregate::AggregateFunctionMetaRef;
|
||||
use common_query::physical_plan::SessionContext;
|
||||
use common_query::prelude::ScalarUdf;
|
||||
use datafusion::catalog::catalog::MemoryCatalogList;
|
||||
use datafusion::dataframe::DataFrame;
|
||||
use datafusion::error::Result as DfResult;
|
||||
use datafusion::execution::context::{QueryPlanner, SessionConfig, SessionState};
|
||||
use datafusion::execution::runtime_env::RuntimeEnv;
|
||||
@@ -38,6 +39,8 @@ use datafusion_optimizer::analyzer::Analyzer;
|
||||
use datafusion_optimizer::optimizer::Optimizer;
|
||||
use partition::manager::PartitionRuleManager;
|
||||
use promql::extension_plan::PromExtensionPlanner;
|
||||
use table::table::adapter::DfTableProviderAdapter;
|
||||
use table::TableRef;
|
||||
|
||||
use crate::dist_plan::{DistExtensionPlanner, DistPlannerAnalyzer};
|
||||
use crate::extension_serializer::ExtensionSerializer;
|
||||
@@ -59,8 +62,9 @@ pub struct QueryEngineState {
|
||||
|
||||
impl fmt::Debug for QueryEngineState {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
// TODO(dennis) better debug info
|
||||
write!(f, "QueryEngineState: <datafusion context>")
|
||||
f.debug_struct("QueryEngineState")
|
||||
.field("state", &self.df_context.state())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -188,6 +192,12 @@ impl QueryEngineState {
|
||||
pub(crate) fn session_state(&self) -> SessionState {
|
||||
self.df_context.state()
|
||||
}
|
||||
|
||||
/// Create a DataFrame for a table
|
||||
pub fn read_table(&self, table: TableRef) -> DfResult<DataFrame> {
|
||||
self.df_context
|
||||
.read_table(Arc::new(DfTableProviderAdapter::new(table)))
|
||||
}
|
||||
}
|
||||
|
||||
struct DfQueryPlanner {
|
||||
|
||||
@@ -32,6 +32,10 @@ common-recordbatch = { path = "../common/recordbatch" }
|
||||
common-runtime = { path = "../common/runtime" }
|
||||
common-telemetry = { path = "../common/telemetry" }
|
||||
common-time = { path = "../common/time" }
|
||||
datafusion.workspace = true
|
||||
datafusion-common.workspace = true
|
||||
datafusion-expr.workspace = true
|
||||
|
||||
datatypes = { path = "../datatypes" }
|
||||
derive_builder = "0.12"
|
||||
digest = "0.10"
|
||||
@@ -99,3 +103,6 @@ table = { path = "../table" }
|
||||
tokio-postgres = "0.7"
|
||||
tokio-postgres-rustls = "0.10"
|
||||
tokio-test = "0.4"
|
||||
|
||||
[build-dependencies]
|
||||
build-data = "0.1.4"
|
||||
|
||||
@@ -13,6 +13,12 @@
|
||||
// limitations under the License.
|
||||
|
||||
fn main() {
|
||||
build_data::set_RUSTC_VERSION();
|
||||
build_data::set_BUILD_HOSTNAME();
|
||||
build_data::set_GIT_BRANCH();
|
||||
build_data::set_GIT_COMMIT();
|
||||
build_data::set_SOURCE_TIMESTAMP();
|
||||
|
||||
#[cfg(feature = "dashboard")]
|
||||
fetch_dashboard_assets();
|
||||
}
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::net::SocketAddr;
|
||||
use std::string::FromUtf8Error;
|
||||
@@ -23,6 +22,7 @@ use base64::DecodeError;
|
||||
use catalog;
|
||||
use common_error::prelude::*;
|
||||
use common_telemetry::logging;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use query::parser::PromQuery;
|
||||
use serde_json::json;
|
||||
use snafu::Location;
|
||||
@@ -75,6 +75,12 @@ pub enum Error {
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to execute plan, source: {}", source))]
|
||||
ExecutePlan {
|
||||
location: Location,
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("{source}"))]
|
||||
ExecuteGrpcQuery {
|
||||
location: Location,
|
||||
@@ -250,6 +256,12 @@ pub enum Error {
|
||||
source: query::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to get param types, source: {source}, location: {location}"))]
|
||||
GetPreparedStmtParams {
|
||||
source: query::error::Error,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("{}", reason))]
|
||||
UnexpectedResult { reason: String, location: Location },
|
||||
|
||||
@@ -269,16 +281,44 @@ pub enum Error {
|
||||
|
||||
#[cfg(feature = "pprof")]
|
||||
#[snafu(display("Failed to dump pprof data, source: {}", source))]
|
||||
DumpPprof {
|
||||
#[snafu(backtrace)]
|
||||
source: common_pprof::Error,
|
||||
},
|
||||
DumpPprof { source: common_pprof::Error },
|
||||
|
||||
#[snafu(display("Failed to update jemalloc metrics, source: {source}, location: {location}"))]
|
||||
UpdateJemallocMetrics {
|
||||
source: tikv_jemalloc_ctl::Error,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("DataFrame operation error, source: {source}, location: {location}"))]
|
||||
DataFrame {
|
||||
source: datafusion::error::DataFusionError,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to replace params with values in prepared statement, source: {source}, location: {location}"
|
||||
))]
|
||||
ReplacePreparedStmtParams {
|
||||
source: query::error::Error,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to convert scalar value, source: {source}, location: {location}"))]
|
||||
ConvertScalarValue {
|
||||
source: datatypes::error::Error,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Expected type: {:?}, actual: {:?}, location: {location}",
|
||||
expected,
|
||||
actual
|
||||
))]
|
||||
PreparedStmtTypeMismatch {
|
||||
expected: ConcreteDataType,
|
||||
actual: opensrv_mysql::ColumnType,
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -303,6 +343,7 @@ impl ErrorExt for Error {
|
||||
InsertScript { source, .. }
|
||||
| ExecuteScript { source, .. }
|
||||
| ExecuteQuery { source, .. }
|
||||
| ExecutePlan { source, .. }
|
||||
| ExecuteGrpcQuery { source, .. }
|
||||
| CheckDatabaseValidity { source, .. } => source.status_code(),
|
||||
|
||||
@@ -317,6 +358,8 @@ impl ErrorExt for Error {
|
||||
| InvalidPromRemoteRequest { .. }
|
||||
| InvalidFlightTicket { .. }
|
||||
| InvalidPrepareStatement { .. }
|
||||
| DataFrame { .. }
|
||||
| PreparedStmtTypeMismatch { .. }
|
||||
| TimePrecision { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
InfluxdbLinesWrite { source, .. } | PromSeriesWrite { source, .. } => {
|
||||
@@ -340,7 +383,9 @@ impl ErrorExt for Error {
|
||||
DumpProfileData { source, .. } => source.status_code(),
|
||||
InvalidFlushArgument { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
ParsePromQL { source, .. } => source.status_code(),
|
||||
ReplacePreparedStmtParams { source, .. }
|
||||
| GetPreparedStmtParams { source, .. }
|
||||
| ParsePromQL { source, .. } => source.status_code(),
|
||||
Other { source, .. } => source.status_code(),
|
||||
|
||||
UnexpectedResult { .. } => StatusCode::Unexpected,
|
||||
@@ -359,6 +404,8 @@ impl ErrorExt for Error {
|
||||
DumpPprof { source, .. } => source.status_code(),
|
||||
|
||||
UpdateJemallocMetrics { .. } => StatusCode::Internal,
|
||||
|
||||
ConvertScalarValue { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -512,6 +512,8 @@ impl HttpServer {
|
||||
routing::get(handler::health).post(handler::health),
|
||||
);
|
||||
|
||||
router = router.route("/status", routing::get(handler::status));
|
||||
|
||||
#[cfg(feature = "dashboard")]
|
||||
{
|
||||
if !self.options.disable_dashboard {
|
||||
@@ -719,6 +721,8 @@ mod test {
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use datatypes::vectors::{StringVector, UInt32Vector};
|
||||
use query::parser::PromQuery;
|
||||
use query::plan::LogicalPlan;
|
||||
use query::query_engine::DescribeResult;
|
||||
use session::context::QueryContextRef;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
@@ -760,11 +764,19 @@ mod test {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn do_exec_plan(
|
||||
&self,
|
||||
_plan: LogicalPlan,
|
||||
_query_ctx: QueryContextRef,
|
||||
) -> std::result::Result<Output, Self::Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn do_describe(
|
||||
&self,
|
||||
_stmt: sql::statements::statement::Statement,
|
||||
_query_ctx: QueryContextRef,
|
||||
) -> Result<Option<Schema>> {
|
||||
) -> Result<Option<DescribeResult>> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::time::Instant;
|
||||
|
||||
use aide::transform::TransformOperation;
|
||||
@@ -158,3 +159,26 @@ pub struct HealthResponse {}
|
||||
pub async fn health(Query(_params): Query<HealthQuery>) -> Json<HealthResponse> {
|
||||
Json(HealthResponse {})
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, JsonSchema, PartialEq, Eq)]
|
||||
pub struct StatusResponse<'a> {
|
||||
pub source_time: &'a str,
|
||||
pub commit: &'a str,
|
||||
pub branch: &'a str,
|
||||
pub rustc_version: &'a str,
|
||||
pub hostname: &'a str,
|
||||
pub version: &'a str,
|
||||
}
|
||||
|
||||
/// Handler to expose information info about runtime, build, etc.
|
||||
#[axum_macros::debug_handler]
|
||||
pub async fn status() -> Json<StatusResponse<'static>> {
|
||||
Json(StatusResponse {
|
||||
source_time: env!("SOURCE_TIMESTAMP"),
|
||||
commit: env!("GIT_COMMIT"),
|
||||
branch: env!("GIT_BRANCH"),
|
||||
rustc_version: env!("RUSTC_VERSION"),
|
||||
hostname: env!("BUILD_HOSTNAME"),
|
||||
version: env!("CARGO_PKG_VERSION"),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -14,5 +14,6 @@
|
||||
|
||||
mod federated;
|
||||
pub mod handler;
|
||||
mod helper;
|
||||
pub mod server;
|
||||
pub mod writer;
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
@@ -22,18 +21,20 @@ use async_trait::async_trait;
|
||||
use chrono::{NaiveDate, NaiveDateTime};
|
||||
use common_error::prelude::ErrorExt;
|
||||
use common_query::Output;
|
||||
use common_telemetry::tracing::log;
|
||||
use common_telemetry::{error, timer, trace, warn};
|
||||
use common_telemetry::{error, logging, timer, trace, warn};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use metrics::increment_counter;
|
||||
use opensrv_mysql::{
|
||||
AsyncMysqlShim, Column, ColumnFlags, ColumnType, ErrorKind, InitWriter, ParamParser,
|
||||
ParamValue, QueryResultWriter, StatementMetaWriter, ValueInner,
|
||||
AsyncMysqlShim, Column, ErrorKind, InitWriter, ParamParser, ParamValue, QueryResultWriter,
|
||||
StatementMetaWriter, ValueInner,
|
||||
};
|
||||
use parking_lot::RwLock;
|
||||
use query::plan::LogicalPlan;
|
||||
use query::query_engine::DescribeResult;
|
||||
use rand::RngCore;
|
||||
use session::context::Channel;
|
||||
use session::{Session, SessionRef};
|
||||
use snafu::ensure;
|
||||
use snafu::{ensure, ResultExt};
|
||||
use sql::dialect::MySqlDialect;
|
||||
use sql::parser::ParserContext;
|
||||
use sql::statements::statement::Statement;
|
||||
@@ -41,17 +42,27 @@ use tokio::io::AsyncWrite;
|
||||
|
||||
use crate::auth::{Identity, Password, UserProviderRef};
|
||||
use crate::error::{self, InvalidPrepareStatementSnafu, Result};
|
||||
use crate::mysql::helper::{
|
||||
self, format_placeholder, replace_placeholders, transform_placeholders,
|
||||
};
|
||||
use crate::mysql::writer;
|
||||
use crate::mysql::writer::create_mysql_column;
|
||||
use crate::query_handler::sql::ServerSqlQueryHandlerRef;
|
||||
|
||||
/// Cached SQL and logical plan
|
||||
#[derive(Clone)]
|
||||
struct SqlPlan {
|
||||
query: String,
|
||||
plan: Option<LogicalPlan>,
|
||||
}
|
||||
|
||||
// An intermediate shim for executing MySQL queries.
|
||||
pub struct MysqlInstanceShim {
|
||||
query_handler: ServerSqlQueryHandlerRef,
|
||||
salt: [u8; 20],
|
||||
session: SessionRef,
|
||||
user_provider: Option<UserProviderRef>,
|
||||
// TODO(SSebo): use something like moka to achieve TTL or LRU
|
||||
prepared_stmts: Arc<RwLock<HashMap<u32, String>>>,
|
||||
prepared_stmts: Arc<RwLock<HashMap<u32, SqlPlan>>>,
|
||||
prepared_stmts_counter: AtomicU32,
|
||||
}
|
||||
|
||||
@@ -105,14 +116,34 @@ impl MysqlInstanceShim {
|
||||
output
|
||||
}
|
||||
|
||||
fn set_query(&self, query: String) -> u32 {
|
||||
let stmt_id = self.prepared_stmts_counter.fetch_add(1, Ordering::SeqCst);
|
||||
let mut guard = self.prepared_stmts.write();
|
||||
guard.insert(stmt_id, query);
|
||||
/// Execute the logical plan and return the output
|
||||
async fn do_exec_plan(&self, query: &str, plan: LogicalPlan) -> Result<Output> {
|
||||
if let Some(output) = crate::mysql::federated::check(query, self.session.context()) {
|
||||
Ok(output)
|
||||
} else {
|
||||
self.query_handler
|
||||
.do_exec_plan(plan, self.session.context())
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
/// Describe the statement
|
||||
async fn do_describe(&self, statement: Statement) -> Result<Option<DescribeResult>> {
|
||||
self.query_handler
|
||||
.do_describe(statement, self.session.context())
|
||||
.await
|
||||
}
|
||||
|
||||
/// Save query and logical plan, return the unique id
|
||||
fn save_plan(&self, plan: SqlPlan) -> u32 {
|
||||
let stmt_id = self.prepared_stmts_counter.fetch_add(1, Ordering::Relaxed);
|
||||
let mut prepared_stmts = self.prepared_stmts.write();
|
||||
prepared_stmts.insert(stmt_id, plan);
|
||||
stmt_id
|
||||
}
|
||||
|
||||
fn query(&self, stmt_id: u32) -> Option<String> {
|
||||
/// Retrieve the query and logical plan by id
|
||||
fn plan(&self, stmt_id: u32) -> Option<SqlPlan> {
|
||||
let guard = self.prepared_stmts.read();
|
||||
guard.get(&stmt_id).cloned()
|
||||
}
|
||||
@@ -175,15 +206,36 @@ impl<W: AsyncWrite + Send + Sync + Unpin> AsyncMysqlShim<W> for MysqlInstanceShi
|
||||
query: &'a str,
|
||||
w: StatementMetaWriter<'a, W>,
|
||||
) -> Result<()> {
|
||||
let (query, param_num) = replace_placeholder(query);
|
||||
if let Err(e) = validate_query(&query).await {
|
||||
w.error(ErrorKind::ER_UNKNOWN_ERROR, e.to_string().as_bytes())
|
||||
.await?;
|
||||
return Ok(());
|
||||
let raw_query = query.clone();
|
||||
let (query, param_num) = replace_placeholders(query);
|
||||
|
||||
let statement = validate_query(raw_query).await?;
|
||||
|
||||
// We have to transform the placeholder, because DataFusion only parses placeholders
|
||||
// in the form of "$i", it can't process "?" right now.
|
||||
let statement = transform_placeholders(statement);
|
||||
|
||||
let plan = self
|
||||
.do_describe(statement.clone())
|
||||
.await?
|
||||
.map(|DescribeResult { logical_plan, .. }| logical_plan);
|
||||
|
||||
let params = if let Some(plan) = &plan {
|
||||
prepared_params(
|
||||
&plan
|
||||
.get_param_types()
|
||||
.context(error::GetPreparedStmtParamsSnafu)?,
|
||||
)?
|
||||
} else {
|
||||
dummy_params(param_num)?
|
||||
};
|
||||
|
||||
let stmt_id = self.set_query(query);
|
||||
let params = dummy_params(param_num);
|
||||
debug_assert_eq!(params.len(), param_num - 1);
|
||||
|
||||
let stmt_id = self.save_plan(SqlPlan {
|
||||
query: query.to_string(),
|
||||
plan,
|
||||
});
|
||||
|
||||
w.reply(stmt_id, ¶ms, &[]).await?;
|
||||
increment_counter!(
|
||||
@@ -216,7 +268,7 @@ impl<W: AsyncWrite + Send + Sync + Unpin> AsyncMysqlShim<W> for MysqlInstanceShi
|
||||
]
|
||||
);
|
||||
let params: Vec<ParamValue> = p.into_iter().collect();
|
||||
let query = match self.query(stmt_id) {
|
||||
let sql_plan = match self.plan(stmt_id) {
|
||||
None => {
|
||||
w.error(
|
||||
ErrorKind::ER_UNKNOWN_STMT_HANDLER,
|
||||
@@ -225,13 +277,36 @@ impl<W: AsyncWrite + Send + Sync + Unpin> AsyncMysqlShim<W> for MysqlInstanceShi
|
||||
.await?;
|
||||
return Ok(());
|
||||
}
|
||||
Some(query) => query,
|
||||
Some(sql_plan) => sql_plan,
|
||||
};
|
||||
|
||||
let query = replace_params(params, query);
|
||||
log::debug!("execute replaced query: {}", query);
|
||||
let (query, outputs) = match sql_plan.plan {
|
||||
Some(plan) => {
|
||||
let param_types = plan
|
||||
.get_param_types()
|
||||
.context(error::GetPreparedStmtParamsSnafu)?;
|
||||
|
||||
if params.len() != param_types.len() {
|
||||
return error::InternalSnafu {
|
||||
err_msg: "prepare statement params number mismatch".to_string(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
let plan = replace_params_with_values(&plan, param_types, params)?;
|
||||
logging::debug!("Mysql execute prepared plan: {}", plan.display_indent());
|
||||
let outputs = vec![self.do_exec_plan(&sql_plan.query, plan).await];
|
||||
|
||||
(sql_plan.query, outputs)
|
||||
}
|
||||
None => {
|
||||
let query = replace_params(params, sql_plan.query);
|
||||
logging::debug!("Mysql execute replaced query: {}", query);
|
||||
let outputs = self.do_query(&query).await;
|
||||
|
||||
(query, outputs)
|
||||
}
|
||||
};
|
||||
|
||||
let outputs = self.do_query(&query).await;
|
||||
writer::write_output(w, &query, self.session.context(), outputs).await?;
|
||||
|
||||
Ok(())
|
||||
@@ -318,7 +393,7 @@ fn replace_params(params: Vec<ParamValue>, query: String) -> String {
|
||||
ValueInner::Datetime(_) => NaiveDateTime::from(param.value).to_string(),
|
||||
ValueInner::Time(_) => format_duration(Duration::from(param.value)),
|
||||
};
|
||||
query = query.replace(&format!("${}", index), &s);
|
||||
query = query.replace(&format_placeholder(index), &s);
|
||||
index += 1;
|
||||
}
|
||||
query
|
||||
@@ -331,6 +406,27 @@ fn format_duration(duration: Duration) -> String {
|
||||
format!("{}:{}:{}", hours, minutes, seconds)
|
||||
}
|
||||
|
||||
fn replace_params_with_values(
|
||||
plan: &LogicalPlan,
|
||||
param_types: HashMap<String, Option<ConcreteDataType>>,
|
||||
params: Vec<ParamValue>,
|
||||
) -> Result<LogicalPlan> {
|
||||
debug_assert_eq!(param_types.len(), params.len());
|
||||
|
||||
let mut values = Vec::with_capacity(params.len());
|
||||
|
||||
for (i, param) in params.iter().enumerate() {
|
||||
if let Some(Some(t)) = param_types.get(&format_placeholder(i + 1)) {
|
||||
let value = helper::convert_value(param, t)?;
|
||||
|
||||
values.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
plan.replace_params_with_values(&values)
|
||||
.context(error::ReplacePreparedStmtParamsSnafu)
|
||||
}
|
||||
|
||||
async fn validate_query(query: &str) -> Result<Statement> {
|
||||
let statement = ParserContext::create_with_dialect(query, &MySqlDialect {});
|
||||
let mut statement = statement.map_err(|e| {
|
||||
@@ -352,29 +448,27 @@ async fn validate_query(query: &str) -> Result<Statement> {
|
||||
Ok(statement)
|
||||
}
|
||||
|
||||
// dummy columns to satisfy opensrv_mysql, just the number of params is useful
|
||||
// TODO(SSebo): use parameter type inference to return actual types
|
||||
fn dummy_params(index: u32) -> Vec<Column> {
|
||||
let mut params = vec![];
|
||||
fn dummy_params(index: usize) -> Result<Vec<Column>> {
|
||||
let mut params = Vec::with_capacity(index - 1);
|
||||
|
||||
for _ in 1..index {
|
||||
params.push(opensrv_mysql::Column {
|
||||
table: "".to_string(),
|
||||
column: "".to_string(),
|
||||
coltype: ColumnType::MYSQL_TYPE_LONG,
|
||||
colflags: ColumnFlags::NOT_NULL_FLAG,
|
||||
});
|
||||
params.push(create_mysql_column(&ConcreteDataType::null_datatype(), "")?);
|
||||
}
|
||||
params
|
||||
|
||||
Ok(params)
|
||||
}
|
||||
|
||||
fn replace_placeholder(query: &str) -> (String, u32) {
|
||||
let mut query = query.to_string();
|
||||
let mut index = 1;
|
||||
while let Some(position) = query.find('?') {
|
||||
let place_holder = format!("${}", index);
|
||||
query.replace_range(position..position + 1, &place_holder);
|
||||
index += 1;
|
||||
/// Parameters that the client must provide when executing the prepared statement.
|
||||
fn prepared_params(param_types: &HashMap<String, Option<ConcreteDataType>>) -> Result<Vec<Column>> {
|
||||
let mut params = Vec::with_capacity(param_types.len());
|
||||
|
||||
// Placeholder index starts from 1
|
||||
for index in 1..=param_types.len() {
|
||||
if let Some(Some(t)) = param_types.get(&format_placeholder(index)) {
|
||||
let column = create_mysql_column(t, "")?;
|
||||
params.push(column);
|
||||
}
|
||||
}
|
||||
(query, index)
|
||||
|
||||
Ok(params)
|
||||
}
|
||||
|
||||
238
src/servers/src/mysql/helper.rs
Normal file
238
src/servers/src/mysql/helper.rs
Normal file
@@ -0,0 +1,238 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
use std::ops::ControlFlow;
|
||||
use std::time::Duration;
|
||||
|
||||
use chrono::{NaiveDate, NaiveDateTime};
|
||||
use common_query::prelude::ScalarValue;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::value::{self, Value};
|
||||
use itertools::Itertools;
|
||||
use opensrv_mysql::{ParamValue, ValueInner};
|
||||
use snafu::ResultExt;
|
||||
use sql::ast::{visit_expressions_mut, Expr, Value as ValueExpr, VisitMut};
|
||||
use sql::statements::statement::Statement;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
|
||||
/// Returns the placeholder string "$i".
|
||||
pub fn format_placeholder(i: usize) -> String {
|
||||
format!("${}", i)
|
||||
}
|
||||
|
||||
/// Replace all the "?" placeholder into "$i" in SQL,
|
||||
/// returns the new SQL and the last placeholder index.
|
||||
pub fn replace_placeholders(query: &str) -> (String, usize) {
|
||||
let query_parts = query.split('?').collect::<Vec<_>>();
|
||||
let parts_len = query_parts.len();
|
||||
let mut index = 0;
|
||||
let query = query_parts
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(i, part)| {
|
||||
if i == parts_len - 1 {
|
||||
return part.to_string();
|
||||
}
|
||||
|
||||
index += 1;
|
||||
format!("{part}{}", format_placeholder(index))
|
||||
})
|
||||
.join("");
|
||||
|
||||
(query, index + 1)
|
||||
}
|
||||
|
||||
/// Transform all the "?" placeholder into "$i".
|
||||
/// Only works for Insert,Query and Delete statements.
|
||||
pub fn transform_placeholders(stmt: Statement) -> Statement {
|
||||
match stmt {
|
||||
Statement::Query(mut query) => {
|
||||
visit_placeholders(&mut query.inner);
|
||||
Statement::Query(query)
|
||||
}
|
||||
Statement::Insert(mut insert) => {
|
||||
visit_placeholders(&mut insert.inner);
|
||||
Statement::Insert(insert)
|
||||
}
|
||||
Statement::Delete(mut delete) => {
|
||||
visit_placeholders(&mut delete.inner);
|
||||
Statement::Delete(delete)
|
||||
}
|
||||
stmt => stmt,
|
||||
}
|
||||
}
|
||||
|
||||
fn visit_placeholders<V>(v: &mut V)
|
||||
where
|
||||
V: VisitMut,
|
||||
{
|
||||
let mut index = 1;
|
||||
visit_expressions_mut(v, |expr| {
|
||||
if let Expr::Value(ValueExpr::Placeholder(s)) = expr {
|
||||
*s = format_placeholder(index);
|
||||
index += 1;
|
||||
}
|
||||
ControlFlow::<()>::Continue(())
|
||||
});
|
||||
}
|
||||
|
||||
/// Convert [`ParamValue`] into [`Value`] according to param type.
|
||||
/// It will try it's best to do type conversions if possible
|
||||
pub fn convert_value(param: &ParamValue, t: &ConcreteDataType) -> Result<ScalarValue> {
|
||||
match param.value.into_inner() {
|
||||
ValueInner::Int(i) => match t {
|
||||
ConcreteDataType::Int8(_) => Ok(ScalarValue::Int8(Some(i as i8))),
|
||||
ConcreteDataType::Int16(_) => Ok(ScalarValue::Int16(Some(i as i16))),
|
||||
ConcreteDataType::Int32(_) => Ok(ScalarValue::Int32(Some(i as i32))),
|
||||
ConcreteDataType::Int64(_) => Ok(ScalarValue::Int64(Some(i))),
|
||||
ConcreteDataType::UInt8(_) => Ok(ScalarValue::UInt8(Some(i as u8))),
|
||||
ConcreteDataType::UInt16(_) => Ok(ScalarValue::UInt16(Some(i as u16))),
|
||||
ConcreteDataType::UInt32(_) => Ok(ScalarValue::UInt32(Some(i as u32))),
|
||||
ConcreteDataType::UInt64(_) => Ok(ScalarValue::UInt64(Some(i as u64))),
|
||||
ConcreteDataType::Float32(_) => Ok(ScalarValue::Float32(Some(i as f32))),
|
||||
ConcreteDataType::Float64(_) => Ok(ScalarValue::Float64(Some(i as f64))),
|
||||
ConcreteDataType::Timestamp(ts_type) => Value::Timestamp(ts_type.create_timestamp(i))
|
||||
.try_to_scalar_value(t)
|
||||
.context(error::ConvertScalarValueSnafu),
|
||||
|
||||
_ => error::PreparedStmtTypeMismatchSnafu {
|
||||
expected: t,
|
||||
actual: param.coltype,
|
||||
}
|
||||
.fail(),
|
||||
},
|
||||
ValueInner::UInt(u) => match t {
|
||||
ConcreteDataType::Int8(_) => Ok(ScalarValue::Int8(Some(u as i8))),
|
||||
ConcreteDataType::Int16(_) => Ok(ScalarValue::Int16(Some(u as i16))),
|
||||
ConcreteDataType::Int32(_) => Ok(ScalarValue::Int32(Some(u as i32))),
|
||||
ConcreteDataType::Int64(_) => Ok(ScalarValue::Int64(Some(u as i64))),
|
||||
ConcreteDataType::UInt8(_) => Ok(ScalarValue::UInt8(Some(u as u8))),
|
||||
ConcreteDataType::UInt16(_) => Ok(ScalarValue::UInt16(Some(u as u16))),
|
||||
ConcreteDataType::UInt32(_) => Ok(ScalarValue::UInt32(Some(u as u32))),
|
||||
ConcreteDataType::UInt64(_) => Ok(ScalarValue::UInt64(Some(u))),
|
||||
ConcreteDataType::Float32(_) => Ok(ScalarValue::Float32(Some(u as f32))),
|
||||
ConcreteDataType::Float64(_) => Ok(ScalarValue::Float64(Some(u as f64))),
|
||||
ConcreteDataType::Timestamp(ts_type) => {
|
||||
Value::Timestamp(ts_type.create_timestamp(u as i64))
|
||||
.try_to_scalar_value(t)
|
||||
.context(error::ConvertScalarValueSnafu)
|
||||
}
|
||||
|
||||
_ => error::PreparedStmtTypeMismatchSnafu {
|
||||
expected: t,
|
||||
actual: param.coltype,
|
||||
}
|
||||
.fail(),
|
||||
},
|
||||
ValueInner::Double(f) => match t {
|
||||
ConcreteDataType::Int8(_) => Ok(ScalarValue::Int8(Some(f as i8))),
|
||||
ConcreteDataType::Int16(_) => Ok(ScalarValue::Int16(Some(f as i16))),
|
||||
ConcreteDataType::Int32(_) => Ok(ScalarValue::Int32(Some(f as i32))),
|
||||
ConcreteDataType::Int64(_) => Ok(ScalarValue::Int64(Some(f as i64))),
|
||||
ConcreteDataType::UInt8(_) => Ok(ScalarValue::UInt8(Some(f as u8))),
|
||||
ConcreteDataType::UInt16(_) => Ok(ScalarValue::UInt16(Some(f as u16))),
|
||||
ConcreteDataType::UInt32(_) => Ok(ScalarValue::UInt32(Some(f as u32))),
|
||||
ConcreteDataType::UInt64(_) => Ok(ScalarValue::UInt64(Some(f as u64))),
|
||||
ConcreteDataType::Float32(_) => Ok(ScalarValue::Float32(Some(f as f32))),
|
||||
ConcreteDataType::Float64(_) => Ok(ScalarValue::Float64(Some(f))),
|
||||
|
||||
_ => error::PreparedStmtTypeMismatchSnafu {
|
||||
expected: t,
|
||||
actual: param.coltype,
|
||||
}
|
||||
.fail(),
|
||||
},
|
||||
ValueInner::NULL => Ok(value::to_null_scalar_value(t)),
|
||||
ValueInner::Bytes(b) => match t {
|
||||
ConcreteDataType::String(_) => Ok(ScalarValue::Utf8(Some(
|
||||
String::from_utf8_lossy(b).to_string(),
|
||||
))),
|
||||
ConcreteDataType::Binary(_) => Ok(ScalarValue::LargeBinary(Some(b.to_vec()))),
|
||||
|
||||
_ => error::PreparedStmtTypeMismatchSnafu {
|
||||
expected: t,
|
||||
actual: param.coltype,
|
||||
}
|
||||
.fail(),
|
||||
},
|
||||
ValueInner::Date(_) => {
|
||||
let date: common_time::Date = NaiveDate::from(param.value).into();
|
||||
Ok(ScalarValue::Date32(Some(date.val())))
|
||||
}
|
||||
ValueInner::Datetime(_) => Ok(ScalarValue::Date64(Some(
|
||||
NaiveDateTime::from(param.value).timestamp_millis(),
|
||||
))),
|
||||
ValueInner::Time(_) => Ok(ScalarValue::Time64Nanosecond(Some(
|
||||
Duration::from(param.value).as_millis() as i64,
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use sql::dialect::MySqlDialect;
|
||||
use sql::parser::ParserContext;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_format_placeholder() {
|
||||
assert_eq!("$1", format_placeholder(1));
|
||||
assert_eq!("$3", format_placeholder(3));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replace_placeholders() {
|
||||
let create = "create table demo(host string, ts timestamp time index)";
|
||||
let (sql, index) = replace_placeholders(create);
|
||||
assert_eq!(create, sql);
|
||||
assert_eq!(1, index);
|
||||
|
||||
let insert = "insert into demo values(?,?,?)";
|
||||
let (sql, index) = replace_placeholders(insert);
|
||||
assert_eq!("insert into demo values($1,$2,$3)", sql);
|
||||
assert_eq!(4, index);
|
||||
|
||||
let query = "select from demo where host=? and idc in (select idc from idcs where name=?) and cpu>?";
|
||||
let (sql, index) = replace_placeholders(query);
|
||||
assert_eq!("select from demo where host=$1 and idc in (select idc from idcs where name=$2) and cpu>$3", sql);
|
||||
assert_eq!(4, index);
|
||||
}
|
||||
|
||||
fn parse_sql(sql: &str) -> Statement {
|
||||
let mut stmts = ParserContext::create_with_dialect(sql, &MySqlDialect {}).unwrap();
|
||||
stmts.remove(0)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_transform_placeholders() {
|
||||
let insert = parse_sql("insert into demo values(?,?,?)");
|
||||
let Statement::Insert(insert) = transform_placeholders(insert) else { unreachable!()};
|
||||
assert_eq!(
|
||||
"INSERT INTO demo VALUES ($1, $2, $3)",
|
||||
insert.inner.to_string()
|
||||
);
|
||||
|
||||
let delete = parse_sql("delete from demo where host=? and idc=?");
|
||||
let Statement::Delete(delete) = transform_placeholders(delete) else { unreachable!()};
|
||||
assert_eq!(
|
||||
"DELETE FROM demo WHERE host = $1 AND idc = $2",
|
||||
delete.inner.to_string()
|
||||
);
|
||||
|
||||
let select = parse_sql("select from demo where host=? and idc in (select idc from idcs where name=?) and cpu>?");
|
||||
let Statement::Query(select) = transform_placeholders(select) else { unreachable!()};
|
||||
assert_eq!("SELECT from AS demo WHERE host = $1 AND idc IN (SELECT idc FROM idcs WHERE name = $2) AND cpu > $3", select.inner.to_string());
|
||||
}
|
||||
}
|
||||
@@ -18,7 +18,7 @@ use common_query::Output;
|
||||
use common_recordbatch::{util, RecordBatch};
|
||||
use common_telemetry::error;
|
||||
use datatypes::prelude::{ConcreteDataType, Value};
|
||||
use datatypes::schema::{ColumnSchema, SchemaRef};
|
||||
use datatypes::schema::SchemaRef;
|
||||
use opensrv_mysql::{
|
||||
Column, ColumnFlags, ColumnType, ErrorKind, OkResponse, QueryResultWriter, RowWriter,
|
||||
};
|
||||
@@ -176,8 +176,8 @@ impl<'a, W: AsyncWrite + Unpin> MysqlResultWriter<'a, W> {
|
||||
Value::Float64(v) => row_writer.write_col(v.0)?,
|
||||
Value::String(v) => row_writer.write_col(v.as_utf8())?,
|
||||
Value::Binary(v) => row_writer.write_col(v.deref())?,
|
||||
Value::Date(v) => row_writer.write_col(v.val())?,
|
||||
Value::DateTime(v) => row_writer.write_col(v.val())?,
|
||||
Value::Date(v) => row_writer.write_col(v.to_chrono_date())?,
|
||||
Value::DateTime(v) => row_writer.write_col(v.to_chrono_datetime())?,
|
||||
Value::Timestamp(v) => row_writer
|
||||
.write_col(v.to_timezone_aware_string(query_context.time_zone()))?,
|
||||
Value::List(_) => {
|
||||
@@ -208,8 +208,11 @@ impl<'a, W: AsyncWrite + Unpin> MysqlResultWriter<'a, W> {
|
||||
}
|
||||
}
|
||||
|
||||
fn create_mysql_column(column_schema: &ColumnSchema) -> Result<Column> {
|
||||
let column_type = match column_schema.data_type {
|
||||
pub(crate) fn create_mysql_column(
|
||||
data_type: &ConcreteDataType,
|
||||
column_name: &str,
|
||||
) -> Result<Column> {
|
||||
let column_type = match data_type {
|
||||
ConcreteDataType::Null(_) => Ok(ColumnType::MYSQL_TYPE_NULL),
|
||||
ConcreteDataType::Boolean(_) | ConcreteDataType::Int8(_) | ConcreteDataType::UInt8(_) => {
|
||||
Ok(ColumnType::MYSQL_TYPE_TINY)
|
||||
@@ -230,15 +233,12 @@ fn create_mysql_column(column_schema: &ColumnSchema) -> Result<Column> {
|
||||
ConcreteDataType::Date(_) => Ok(ColumnType::MYSQL_TYPE_DATE),
|
||||
ConcreteDataType::DateTime(_) => Ok(ColumnType::MYSQL_TYPE_DATETIME),
|
||||
_ => error::InternalSnafu {
|
||||
err_msg: format!(
|
||||
"not implemented for column datatype {:?}",
|
||||
column_schema.data_type
|
||||
),
|
||||
err_msg: format!("not implemented for column datatype {:?}", data_type),
|
||||
}
|
||||
.fail(),
|
||||
};
|
||||
let mut colflags = ColumnFlags::empty();
|
||||
match column_schema.data_type {
|
||||
match data_type {
|
||||
ConcreteDataType::UInt16(_)
|
||||
| ConcreteDataType::UInt8(_)
|
||||
| ConcreteDataType::UInt32(_)
|
||||
@@ -246,7 +246,7 @@ fn create_mysql_column(column_schema: &ColumnSchema) -> Result<Column> {
|
||||
_ => {}
|
||||
};
|
||||
column_type.map(|column_type| Column {
|
||||
column: column_schema.name.clone(),
|
||||
column: column_name.to_string(),
|
||||
coltype: column_type,
|
||||
|
||||
// TODO(LFC): Currently "table" and "colflags" are not relevant in MySQL server
|
||||
@@ -261,6 +261,6 @@ pub fn create_mysql_column_def(schema: &SchemaRef) -> Result<Vec<Column>> {
|
||||
schema
|
||||
.column_schemas()
|
||||
.iter()
|
||||
.map(create_mysql_column)
|
||||
.map(|column_schema| create_mysql_column(&column_schema.data_type, &column_schema.name))
|
||||
.collect()
|
||||
}
|
||||
|
||||
@@ -33,6 +33,7 @@ use pgwire::api::stmt::QueryParser;
|
||||
use pgwire::api::store::MemPortalStore;
|
||||
use pgwire::api::{ClientInfo, Type};
|
||||
use pgwire::error::{ErrorInfo, PgWireError, PgWireResult};
|
||||
use query::query_engine::DescribeResult;
|
||||
use sql::dialect::PostgreSqlDialect;
|
||||
use sql::parser::ParserContext;
|
||||
use sql::statements::statement::Statement;
|
||||
@@ -405,7 +406,7 @@ impl ExtendedQueryHandler for PostgresServerHandler {
|
||||
// get Statement part of the tuple
|
||||
let (stmt, _) = stmt;
|
||||
|
||||
if let Some(schema) = self
|
||||
if let Some(DescribeResult { schema, .. }) = self
|
||||
.query_handler
|
||||
.do_describe(stmt.clone(), self.session.context())
|
||||
.await
|
||||
|
||||
@@ -24,8 +24,11 @@ use api::v1::{InsertRequest as GrpcInsertRequest, InsertRequests};
|
||||
use common_grpc::writer::{LinesWriter, Precision};
|
||||
use common_recordbatch::{RecordBatch, RecordBatches};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use datafusion::prelude::{col, lit, regexp_match, Expr};
|
||||
use datatypes::prelude::{ConcreteDataType, Value};
|
||||
use openmetrics_parser::{MetricsExposition, PrometheusType, PrometheusValue};
|
||||
use query::dataframe::DataFrame;
|
||||
use query::plan::LogicalPlan;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use snap::raw::{Decoder, Encoder};
|
||||
|
||||
@@ -40,14 +43,11 @@ pub struct Metrics {
|
||||
pub exposition: MetricsExposition<PrometheusType, PrometheusValue>,
|
||||
}
|
||||
|
||||
/// Generate a sql from a remote request query
|
||||
/// TODO(dennis): maybe use logical plan in future to prevent sql injection
|
||||
pub fn query_to_sql(q: &Query) -> Result<(String, String)> {
|
||||
let start_timestamp_ms = q.start_timestamp_ms;
|
||||
let end_timestamp_ms = q.end_timestamp_ms;
|
||||
|
||||
/// Get table name from remote query
|
||||
pub fn table_name(q: &Query) -> Result<String> {
|
||||
let label_matches = &q.matchers;
|
||||
let table_name = label_matches
|
||||
|
||||
label_matches
|
||||
.iter()
|
||||
.find_map(|m| {
|
||||
if m.name == METRIC_NAME_LABEL {
|
||||
@@ -58,13 +58,22 @@ pub fn query_to_sql(q: &Query) -> Result<(String, String)> {
|
||||
})
|
||||
.context(error::InvalidPromRemoteRequestSnafu {
|
||||
msg: "missing '__name__' label in timeseries",
|
||||
})?;
|
||||
})
|
||||
}
|
||||
|
||||
let mut conditions: Vec<String> = Vec::with_capacity(label_matches.len());
|
||||
/// Create a DataFrame from a remote Query
|
||||
pub fn query_to_plan(dataframe: DataFrame, q: &Query) -> Result<LogicalPlan> {
|
||||
let DataFrame::DataFusion(dataframe) = dataframe;
|
||||
|
||||
conditions.push(format!(
|
||||
"{TIMESTAMP_COLUMN_NAME}>={start_timestamp_ms} AND {TIMESTAMP_COLUMN_NAME}<={end_timestamp_ms}",
|
||||
));
|
||||
let start_timestamp_ms = q.start_timestamp_ms;
|
||||
let end_timestamp_ms = q.end_timestamp_ms;
|
||||
|
||||
let label_matches = &q.matchers;
|
||||
|
||||
let mut conditions = Vec::with_capacity(label_matches.len() + 1);
|
||||
|
||||
conditions.push(col(TIMESTAMP_COLUMN_NAME).gt_eq(lit(start_timestamp_ms)));
|
||||
conditions.push(col(TIMESTAMP_COLUMN_NAME).lt_eq(lit(end_timestamp_ms)));
|
||||
|
||||
for m in label_matches {
|
||||
let name = &m.name;
|
||||
@@ -81,28 +90,30 @@ pub fn query_to_sql(q: &Query) -> Result<(String, String)> {
|
||||
|
||||
match m_type {
|
||||
MatcherType::Eq => {
|
||||
conditions.push(format!("{name}='{value}'"));
|
||||
conditions.push(col(name).eq(lit(value)));
|
||||
}
|
||||
MatcherType::Neq => {
|
||||
conditions.push(format!("{name}!='{value}'"));
|
||||
conditions.push(col(name).not_eq(lit(value)));
|
||||
}
|
||||
// Case sensitive regexp match
|
||||
MatcherType::Re => {
|
||||
conditions.push(format!("{name}~'{value}'"));
|
||||
conditions.push(regexp_match(vec![col(name), lit(value)]).is_not_null());
|
||||
}
|
||||
// Case sensitive regexp not match
|
||||
MatcherType::Nre => {
|
||||
conditions.push(format!("{name}!~'{value}'"));
|
||||
conditions.push(regexp_match(vec![col(name), lit(value)]).is_null());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let conditions = conditions.join(" AND ");
|
||||
// Safety: conditions MUST not be empty, reduce always return Some(expr).
|
||||
let conditions = conditions.into_iter().reduce(Expr::and).unwrap();
|
||||
|
||||
Ok((
|
||||
table_name.to_string(),
|
||||
format!("select * from {table_name} where {conditions} order by {TIMESTAMP_COLUMN_NAME}",),
|
||||
))
|
||||
let dataframe = dataframe
|
||||
.filter(conditions)
|
||||
.context(error::DataFrameSnafu)?;
|
||||
|
||||
Ok(LogicalPlan::DfPlan(dataframe.into_parts().1))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@@ -433,8 +444,11 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::prometheus::remote::LabelMatcher;
|
||||
use datafusion::prelude::SessionContext;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use datatypes::vectors::{Float64Vector, StringVector, TimestampMillisecondVector};
|
||||
use table::table::adapter::DfTableProviderAdapter;
|
||||
use table::test_util::MemTable;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -443,14 +457,14 @@ mod tests {
|
||||
const RE_TYPE: i32 = MatcherType::Re as i32;
|
||||
|
||||
#[test]
|
||||
fn test_query_to_sql() {
|
||||
fn test_table_name() {
|
||||
let q = Query {
|
||||
start_timestamp_ms: 1000,
|
||||
end_timestamp_ms: 2000,
|
||||
matchers: vec![],
|
||||
..Default::default()
|
||||
};
|
||||
let err = query_to_sql(&q).unwrap_err();
|
||||
let err = table_name(&q).unwrap_err();
|
||||
assert!(matches!(err, error::Error::InvalidPromRemoteRequest { .. }));
|
||||
|
||||
let q = Query {
|
||||
@@ -463,9 +477,56 @@ mod tests {
|
||||
}],
|
||||
..Default::default()
|
||||
};
|
||||
let (table, sql) = query_to_sql(&q).unwrap();
|
||||
assert_eq!("test", table);
|
||||
assert_eq!("select * from test where greptime_timestamp>=1000 AND greptime_timestamp<=2000 order by greptime_timestamp", sql);
|
||||
assert_eq!("test", table_name(&q).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_to_plan() {
|
||||
let q = Query {
|
||||
start_timestamp_ms: 1000,
|
||||
end_timestamp_ms: 2000,
|
||||
matchers: vec![LabelMatcher {
|
||||
name: METRIC_NAME_LABEL.to_string(),
|
||||
value: "test".to_string(),
|
||||
r#type: EQ_TYPE,
|
||||
}],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
ColumnSchema::new(
|
||||
TIMESTAMP_COLUMN_NAME,
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
FIELD_COLUMN_NAME,
|
||||
ConcreteDataType::float64_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new("instance", ConcreteDataType::string_datatype(), true),
|
||||
ColumnSchema::new("job", ConcreteDataType::string_datatype(), true),
|
||||
]));
|
||||
let recordbatch = RecordBatch::new(
|
||||
schema,
|
||||
vec![
|
||||
Arc::new(TimestampMillisecondVector::from_vec(vec![1000])) as _,
|
||||
Arc::new(Float64Vector::from_vec(vec![3.0])) as _,
|
||||
Arc::new(StringVector::from(vec!["host1"])) as _,
|
||||
Arc::new(StringVector::from(vec!["job"])) as _,
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let ctx = SessionContext::new();
|
||||
let table = Arc::new(MemTable::new("test", recordbatch));
|
||||
let table_provider = Arc::new(DfTableProviderAdapter::new(table));
|
||||
|
||||
let dataframe = ctx.read_table(table_provider.clone()).unwrap();
|
||||
let plan = query_to_plan(DataFrame::DataFusion(dataframe), &q).unwrap();
|
||||
let display_string = format!("{}", plan.display_indent());
|
||||
|
||||
assert_eq!("Filter: ?table?.greptime_timestamp >= Int64(1000) AND ?table?.greptime_timestamp <= Int64(2000)\n TableScan: ?table?", display_string);
|
||||
|
||||
let q = Query {
|
||||
start_timestamp_ms: 1000,
|
||||
@@ -489,9 +550,12 @@ mod tests {
|
||||
],
|
||||
..Default::default()
|
||||
};
|
||||
let (table, sql) = query_to_sql(&q).unwrap();
|
||||
assert_eq!("test", table);
|
||||
assert_eq!("select * from test where greptime_timestamp>=1000 AND greptime_timestamp<=2000 AND job~'*prom*' AND instance!='localhost' order by greptime_timestamp", sql);
|
||||
|
||||
let dataframe = ctx.read_table(table_provider).unwrap();
|
||||
let plan = query_to_plan(DataFrame::DataFusion(dataframe), &q).unwrap();
|
||||
let display_string = format!("{}", plan.display_indent());
|
||||
|
||||
assert_eq!("Filter: ?table?.greptime_timestamp >= Int64(1000) AND ?table?.greptime_timestamp <= Int64(2000) AND regexp_match(?table?.job, Utf8(\"*prom*\")) IS NOT NULL AND ?table?.instance != Utf8(\"localhost\")\n TableScan: ?table?", display_string);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -17,8 +17,8 @@ use std::sync::Arc;
|
||||
use async_trait::async_trait;
|
||||
use common_error::prelude::*;
|
||||
use common_query::Output;
|
||||
use datatypes::schema::Schema;
|
||||
use query::parser::PromQuery;
|
||||
use query::plan::LogicalPlan;
|
||||
use session::context::QueryContextRef;
|
||||
use sql::statements::statement::Statement;
|
||||
|
||||
@@ -26,6 +26,7 @@ use crate::error::{self, Result};
|
||||
|
||||
pub type SqlQueryHandlerRef<E> = Arc<dyn SqlQueryHandler<Error = E> + Send + Sync>;
|
||||
pub type ServerSqlQueryHandlerRef = SqlQueryHandlerRef<error::Error>;
|
||||
use query::query_engine::DescribeResult;
|
||||
|
||||
#[async_trait]
|
||||
pub trait SqlQueryHandler {
|
||||
@@ -37,6 +38,12 @@ pub trait SqlQueryHandler {
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Vec<std::result::Result<Output, Self::Error>>;
|
||||
|
||||
async fn do_exec_plan(
|
||||
&self,
|
||||
plan: LogicalPlan,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> std::result::Result<Output, Self::Error>;
|
||||
|
||||
async fn do_promql_query(
|
||||
&self,
|
||||
query: &PromQuery,
|
||||
@@ -47,7 +54,7 @@ pub trait SqlQueryHandler {
|
||||
&self,
|
||||
stmt: Statement,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> std::result::Result<Option<Schema>, Self::Error>;
|
||||
) -> std::result::Result<Option<DescribeResult>, Self::Error>;
|
||||
|
||||
async fn is_valid_schema(
|
||||
&self,
|
||||
@@ -83,6 +90,14 @@ where
|
||||
.collect()
|
||||
}
|
||||
|
||||
async fn do_exec_plan(&self, plan: LogicalPlan, query_ctx: QueryContextRef) -> Result<Output> {
|
||||
self.0
|
||||
.do_exec_plan(plan, query_ctx)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ExecutePlanSnafu)
|
||||
}
|
||||
|
||||
async fn do_promql_query(
|
||||
&self,
|
||||
query: &PromQuery,
|
||||
@@ -107,7 +122,7 @@ where
|
||||
&self,
|
||||
stmt: Statement,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<Option<Schema>> {
|
||||
) -> Result<Option<DescribeResult>> {
|
||||
self.0
|
||||
.do_describe(stmt, query_ctx)
|
||||
.await
|
||||
|
||||
@@ -365,3 +365,18 @@ async fn test_health() {
|
||||
expected_json_str
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_status() {
|
||||
let expected_json = http_handler::StatusResponse {
|
||||
source_time: env!("SOURCE_TIMESTAMP"),
|
||||
commit: env!("GIT_COMMIT"),
|
||||
branch: env!("GIT_BRANCH"),
|
||||
rustc_version: env!("RUSTC_VERSION"),
|
||||
hostname: env!("BUILD_HOSTNAME"),
|
||||
version: env!("CARGO_PKG_VERSION"),
|
||||
};
|
||||
|
||||
let Json(json) = http_handler::status().await;
|
||||
assert_eq!(json, expected_json);
|
||||
}
|
||||
|
||||
@@ -21,8 +21,9 @@ use axum::{http, Router};
|
||||
use axum_test_helper::TestClient;
|
||||
use common_query::Output;
|
||||
use common_test_util::ports;
|
||||
use datatypes::schema::Schema;
|
||||
use query::parser::PromQuery;
|
||||
use query::plan::LogicalPlan;
|
||||
use query::query_engine::DescribeResult;
|
||||
use servers::error::{Error, Result};
|
||||
use servers::http::{HttpOptions, HttpServerBuilder};
|
||||
use servers::influxdb::InfluxdbRequest;
|
||||
@@ -71,6 +72,14 @@ impl SqlQueryHandler for DummyInstance {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn do_exec_plan(
|
||||
&self,
|
||||
_plan: LogicalPlan,
|
||||
_query_ctx: QueryContextRef,
|
||||
) -> std::result::Result<Output, Self::Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn do_promql_query(
|
||||
&self,
|
||||
_: &PromQuery,
|
||||
@@ -83,7 +92,7 @@ impl SqlQueryHandler for DummyInstance {
|
||||
&self,
|
||||
_stmt: sql::statements::statement::Statement,
|
||||
_query_ctx: QueryContextRef,
|
||||
) -> Result<Option<Schema>> {
|
||||
) -> Result<Option<DescribeResult>> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
|
||||
@@ -20,8 +20,9 @@ use axum::Router;
|
||||
use axum_test_helper::TestClient;
|
||||
use common_query::Output;
|
||||
use common_test_util::ports;
|
||||
use datatypes::schema::Schema;
|
||||
use query::parser::PromQuery;
|
||||
use query::plan::LogicalPlan;
|
||||
use query::query_engine::DescribeResult;
|
||||
use servers::error::{self, Result};
|
||||
use servers::http::{HttpOptions, HttpServerBuilder};
|
||||
use servers::opentsdb::codec::DataPoint;
|
||||
@@ -70,6 +71,14 @@ impl SqlQueryHandler for DummyInstance {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn do_exec_plan(
|
||||
&self,
|
||||
_plan: LogicalPlan,
|
||||
_query_ctx: QueryContextRef,
|
||||
) -> std::result::Result<Output, Self::Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn do_promql_query(
|
||||
&self,
|
||||
_: &PromQuery,
|
||||
@@ -82,7 +91,7 @@ impl SqlQueryHandler for DummyInstance {
|
||||
&self,
|
||||
_stmt: sql::statements::statement::Statement,
|
||||
_query_ctx: QueryContextRef,
|
||||
) -> Result<Option<Schema>> {
|
||||
) -> Result<Option<DescribeResult>> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
|
||||
@@ -23,9 +23,10 @@ use axum::Router;
|
||||
use axum_test_helper::TestClient;
|
||||
use common_query::Output;
|
||||
use common_test_util::ports;
|
||||
use datatypes::schema::Schema;
|
||||
use prost::Message;
|
||||
use query::parser::PromQuery;
|
||||
use query::plan::LogicalPlan;
|
||||
use query::query_engine::DescribeResult;
|
||||
use servers::error::{Error, Result};
|
||||
use servers::http::{HttpOptions, HttpServerBuilder};
|
||||
use servers::prometheus;
|
||||
@@ -95,6 +96,14 @@ impl SqlQueryHandler for DummyInstance {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn do_exec_plan(
|
||||
&self,
|
||||
_plan: LogicalPlan,
|
||||
_query_ctx: QueryContextRef,
|
||||
) -> std::result::Result<Output, Self::Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn do_promql_query(
|
||||
&self,
|
||||
_: &PromQuery,
|
||||
@@ -107,7 +116,7 @@ impl SqlQueryHandler for DummyInstance {
|
||||
&self,
|
||||
_stmt: sql::statements::statement::Statement,
|
||||
_query_ctx: QueryContextRef,
|
||||
) -> Result<Option<Schema>> {
|
||||
) -> Result<Option<DescribeResult>> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user