Compare commits

..

7 Commits

Author SHA1 Message Date
discord9
a4c2ada639 fix: subquery&cte time window expr 2025-03-05 14:49:03 +08:00
discord9
0374c332d2 refactor: even finer&limit time window num 2025-03-05 14:49:03 +08:00
discord9
5d14a1afc2 feat: basic time window aware 2025-03-05 14:49:03 +08:00
discord9
382148f93f metrics: better bucket&longer timeout 2025-03-05 14:49:03 +08:00
discord9
7aba712acc fix: timeout 2025-03-05 14:49:03 +08:00
discord9
f353a200c8 fix: heartbeat&expire_after unit 2025-03-05 14:49:03 +08:00
discord9
0deb670ae7 feat: time window in df plan
WIP

test: found out time window expr

chore: pub

tests: also unparsed

tests: rm dup code

feat: frontend client for recording rule

fix: bound edgecase

WIP

WIP

feat: rule engine

feat: add init options& tmp rerounte to rule

fix: dist client get

fix: also not handle mirror write in flownode

chore: clippy
2025-03-05 14:49:03 +08:00
184 changed files with 8046 additions and 8939 deletions

View File

@@ -41,14 +41,7 @@ runs:
username: ${{ inputs.dockerhub-image-registry-username }}
password: ${{ inputs.dockerhub-image-registry-token }}
- name: Set up qemu for multi-platform builds
uses: docker/setup-qemu-action@v3
with:
platforms: linux/amd64,linux/arm64
# The latest version will lead to segmentation fault.
image: tonistiigi/binfmt:qemu-v7.0.0-28
- name: Build and push dev-builder-ubuntu image # Build image for amd64 and arm64 platform.
- name: Build and push dev-builder-ubuntu image
shell: bash
if: ${{ inputs.build-dev-builder-ubuntu == 'true' }}
run: |
@@ -59,7 +52,7 @@ runs:
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }}
- name: Build and push dev-builder-centos image # Only build image for amd64 platform.
- name: Build and push dev-builder-centos image
shell: bash
if: ${{ inputs.build-dev-builder-centos == 'true' }}
run: |
@@ -76,7 +69,8 @@ runs:
run: |
make dev-builder \
BASE_IMAGE=android \
BUILDX_MULTI_PLATFORM_BUILD=amd64 \
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }}
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }} && \
docker push ${{ inputs.dockerhub-image-registry }}/${{ inputs.dockerhub-image-namespace }}/dev-builder-android:${{ inputs.version }}

103
Cargo.lock generated
View File

@@ -1594,7 +1594,7 @@ dependencies = [
"bitflags 1.3.2",
"strsim 0.8.0",
"textwrap 0.11.0",
"unicode-width 0.1.14",
"unicode-width",
"vec_map",
]
@@ -1876,7 +1876,7 @@ checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7"
dependencies = [
"strum 0.26.3",
"strum_macros 0.26.4",
"unicode-width 0.1.14",
"unicode-width",
]
[[package]]
@@ -2469,7 +2469,6 @@ dependencies = [
"encode_unicode",
"lazy_static",
"libc",
"unicode-width 0.1.14",
"windows-sys 0.52.0",
]
@@ -4168,6 +4167,7 @@ dependencies = [
"bytes",
"cache",
"catalog",
"chrono",
"client",
"common-base",
"common-catalog",
@@ -4646,7 +4646,7 @@ version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5"
dependencies = [
"unicode-width 0.1.14",
"unicode-width",
]
[[package]]
@@ -4702,7 +4702,7 @@ dependencies = [
[[package]]
name = "greptime-proto"
version = "0.1.0"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=d92c9ac4e90ef4abdcf5c2eaf5a164e18ba09486#d92c9ac4e90ef4abdcf5c2eaf5a164e18ba09486"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=072ce580502e015df1a6b03a185b60309a7c2a7a#072ce580502e015df1a6b03a185b60309a7c2a7a"
dependencies = [
"prost 0.13.3",
"serde",
@@ -5600,19 +5600,6 @@ dependencies = [
"serde",
]
[[package]]
name = "indicatif"
version = "0.17.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235"
dependencies = [
"console",
"number_prefix",
"portable-atomic",
"unicode-width 0.2.0",
"web-time 1.1.0",
]
[[package]]
name = "inferno"
version = "0.11.21"
@@ -5642,25 +5629,6 @@ dependencies = [
"snafu 0.7.5",
]
[[package]]
name = "ingester"
version = "0.13.0"
dependencies = [
"clap 4.5.19",
"common-telemetry",
"common-time",
"datanode",
"meta-client",
"mito2",
"object-store",
"reqwest",
"serde",
"serde_json",
"sst-convert",
"tokio",
"toml 0.8.19",
]
[[package]]
name = "inotify"
version = "0.9.6"
@@ -7550,12 +7518,6 @@ dependencies = [
"libc",
]
[[package]]
name = "number_prefix"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
[[package]]
name = "objc"
version = "0.2.7"
@@ -8012,7 +7974,7 @@ version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2ad9b889f1b12e0b9ee24db044b5129150d5eada288edc800f789928dc8c0e3"
dependencies = [
"unicode-width 0.1.14",
"unicode-width",
]
[[package]]
@@ -8108,19 +8070,6 @@ dependencies = [
"zstd-sys",
]
[[package]]
name = "parquet_opendal"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4140ae96f37c170f8d684a544711fabdac1d94adcbd97e8b033329bd37f40446"
dependencies = [
"async-trait",
"bytes",
"futures",
"opendal",
"parquet",
]
[[package]]
name = "parse-zoneinfo"
version = "0.3.1"
@@ -10108,7 +10057,7 @@ dependencies = [
"radix_trie",
"scopeguard",
"unicode-segmentation",
"unicode-width 0.1.14",
"unicode-width",
"utf8parse",
"winapi",
]
@@ -11255,36 +11204,6 @@ dependencies = [
"url",
]
[[package]]
name = "sst-convert"
version = "0.13.0"
dependencies = [
"api",
"arrow-array",
"async-trait",
"catalog",
"common-error",
"common-macro",
"common-meta",
"common-recordbatch",
"common-telemetry",
"datanode",
"datatypes",
"futures",
"futures-util",
"indicatif",
"meta-client",
"metric-engine",
"mito2",
"object-store",
"parquet",
"parquet_opendal",
"prost 0.13.3",
"snafu 0.8.5",
"store-api",
"table",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
@@ -12017,7 +11936,7 @@ version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
dependencies = [
"unicode-width 0.1.14",
"unicode-width",
]
[[package]]
@@ -13120,12 +13039,6 @@ version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
[[package]]
name = "unicode-width"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"
[[package]]
name = "unicode-xid"
version = "0.2.6"

View File

@@ -41,7 +41,6 @@ members = [
"src/flow",
"src/frontend",
"src/index",
"src/ingester",
"src/log-query",
"src/log-store",
"src/meta-client",
@@ -59,7 +58,6 @@ members = [
"src/servers",
"src/session",
"src/sql",
"src/sst-convert",
"src/store-api",
"src/table",
"tests-fuzz",
@@ -131,7 +129,7 @@ etcd-client = "0.14"
fst = "0.4.7"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "d92c9ac4e90ef4abdcf5c2eaf5a164e18ba09486" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "072ce580502e015df1a6b03a185b60309a7c2a7a" }
hex = "0.4"
http = "1"
humantime = "2.1"
@@ -273,7 +271,6 @@ query = { path = "src/query" }
servers = { path = "src/servers" }
session = { path = "src/session" }
sql = { path = "src/sql" }
sst-convert = { path = "src/sst-convert" }
store-api = { path = "src/store-api" }
substrait = { path = "src/common/substrait" }
table = { path = "src/table" }

View File

@@ -8,7 +8,7 @@ CARGO_BUILD_OPTS := --locked
IMAGE_REGISTRY ?= docker.io
IMAGE_NAMESPACE ?= greptime
IMAGE_TAG ?= latest
DEV_BUILDER_IMAGE_TAG ?= 2024-12-25-a71b93dd-20250305072908
DEV_BUILDER_IMAGE_TAG ?= 2024-12-25-9d0fa5d5-20250124085746
BUILDX_MULTI_PLATFORM_BUILD ?= false
BUILDX_BUILDER_NAME ?= gtbuilder
BASE_IMAGE ?= ubuntu
@@ -60,8 +60,6 @@ ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), all)
BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/amd64,linux/arm64 --push
else ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), amd64)
BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/amd64 --push
else ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), arm64)
BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/arm64 --push
else
BUILDX_MULTI_PLATFORM_BUILD_OPTS := -o type=docker
endif

View File

@@ -1,76 +0,0 @@
# log
## first create table
```bash
mysql --host=127.0.0.1 --port=19195 --database=public;
```
```sql
CREATE DATABASE IF NOT EXISTS `cluster1`;
USE `cluster1`;
CREATE TABLE IF NOT EXISTS `app1` (
`greptime_timestamp` TimestampNanosecond NOT NULL TIME INDEX,
`app` STRING NULL INVERTED INDEX,
`cluster` STRING NULL INVERTED INDEX,
`message` STRING NULL,
`region` STRING NULL,
`cloud-provider` STRING NULL,
`environment` STRING NULL,
`product` STRING NULL,
`sub-product` STRING NULL,
`service` STRING NULL
) WITH (
append_mode = 'true',
'compaction.type' = 'twcs',
'compaction.twcs.max_output_file_size' = '500MB',
'compaction.twcs.max_active_window_files' = '16',
'compaction.twcs.max_active_window_runs' = '4',
'compaction.twcs.max_inactive_window_files' = '4',
'compaction.twcs.max_inactive_window_runs' = '2',
);
select count(*) from app1;
SELECT * FROM app1 ORDER BY greptime_timestamp DESC LIMIT 10\G
```
## then ingest
```bash
RUST_LOG="debug" cargo run --bin=ingester -- --input-dir="/home/discord9/greptimedb/parquet_store_bk/" --parquet-dir="parquet_store/" --cfg="ingester.toml" --db-http-addr="http://127.0.0.1:4000/v1/sst/ingest_json"
```
# metrics!!!!!!!
```bash
mysql --host=127.0.0.1 --port=19195 --database=public < public.greptime_physical_table-create-tables.sql
```
## then ingest
```bash
RUST_LOG="debug"
cargo run --bin=ingester -- --input-dir="/home/discord9/greptimedb/parquet_store_bk/" --remote-write-dir="metrics_parquet/" --cfg="ingester.toml" --db-http-addr="http://127.0.0.1:4000/v1/sst/ingest_json"
# perf it
cargo build --release ---bin=ingester
samply record target/release/ingester --input-dir="/home/discord9/greptimedb/parquet_store_bk/" --remote-write-dir="metrics_parquet/" --cfg="ingester.toml" --db-http-addr="http://127.0.0.1:4000/v1/sst/ingest_json"
```
## check data
```sql
select count(*) from greptime_physical_table;
+----------+
| count(*) |
+----------+
| 36200 |
+----------+
1 row in set (0.06 sec)
select count(*) from storage_operation_errors_total;
+----------+
| count(*) |
+----------+
| 10 |
+----------+
1 row in set (0.03 sec)
```
# with oss
the same, only different is change storage config in `ingester.toml`

File diff suppressed because it is too large Load Diff

View File

@@ -1,35 +0,0 @@
## The metasrv client options.
[meta_client]
## The addresses of the metasrv.
metasrv_addrs = ["127.0.0.1:3002", "127.0.0.1:3003"]
## Operation timeout.
timeout = "3s"
## Heartbeat timeout.
heartbeat_timeout = "500ms"
## DDL timeout.
ddl_timeout = "10s"
## Connect server timeout.
connect_timeout = "1s"
## `TCP_NODELAY` option for accepted connections.
tcp_nodelay = true
## The configuration about the cache of the metadata.
metadata_cache_max_capacity = 100000
## TTL of the metadata cache.
metadata_cache_ttl = "10m"
# TTI of the metadata cache.
metadata_cache_tti = "5m"
## The data storage options.
[storage]
## The working home directory.
data_home = "/tmp/greptimedb-cluster/datanode0"
type = "File"
[mito]

View File

@@ -16,7 +16,6 @@
mod client;
pub mod client_manager;
#[cfg(feature = "testing")]
mod database;
pub mod error;
pub mod flow;
@@ -34,7 +33,6 @@ pub use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
use snafu::OptionExt;
pub use self::client::Client;
#[cfg(feature = "testing")]
pub use self::database::Database;
pub use self::error::{Error, Result};
use crate::error::{IllegalDatabaseResponseSnafu, ServerSnafu};

View File

@@ -287,6 +287,7 @@ impl StartCommand {
.await
.context(StartDatanodeSnafu)?;
let cluster_id = 0; // TODO(hl): read from config
let member_id = opts
.node_id
.context(MissingConfigSnafu { msg: "'node_id'" })?;
@@ -295,10 +296,13 @@ impl StartCommand {
msg: "'meta_client_options'",
})?;
let meta_client =
meta_client::create_meta_client(MetaClientType::Datanode { member_id }, meta_config)
.await
.context(MetaClientInitSnafu)?;
let meta_client = meta_client::create_meta_client(
cluster_id,
MetaClientType::Datanode { member_id },
meta_config,
)
.await
.context(MetaClientInitSnafu)?;
let meta_backend = Arc::new(MetaKvBackend {
client: meta_client.clone(),

View File

@@ -32,7 +32,7 @@ use common_meta::key::TableMetadataManager;
use common_telemetry::info;
use common_telemetry::logging::TracingOptions;
use common_version::{short_version, version};
use flow::{FlownodeBuilder, FlownodeInstance, FrontendInvoker};
use flow::{FlownodeBuilder, FlownodeInstance, FrontendClient, FrontendInvoker};
use meta_client::{MetaClientOptions, MetaClientType};
use servers::Mode;
use snafu::{OptionExt, ResultExt};
@@ -241,6 +241,9 @@ impl StartCommand {
let mut opts = opts.component;
opts.grpc.detect_server_addr();
// TODO(discord9): make it not optionale after cluster id is required
let cluster_id = opts.cluster_id.unwrap_or(0);
let member_id = opts
.node_id
.context(MissingConfigSnafu { msg: "'node_id'" })?;
@@ -249,10 +252,13 @@ impl StartCommand {
msg: "'meta_client_options'",
})?;
let meta_client =
meta_client::create_meta_client(MetaClientType::Flownode { member_id }, meta_config)
.await
.context(MetaClientInitSnafu)?;
let meta_client = meta_client::create_meta_client(
cluster_id,
MetaClientType::Flownode { member_id },
meta_config,
)
.await
.context(MetaClientInitSnafu)?;
let cache_max_capacity = meta_config.metadata_cache_max_capacity;
let cache_ttl = meta_config.metadata_cache_ttl;
@@ -311,6 +317,8 @@ impl StartCommand {
Arc::new(executor),
);
let frontend_client = FrontendClient::from_meta_client(meta_client.clone());
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone()));
let flownode_builder = FlownodeBuilder::new(
opts,
@@ -318,6 +326,7 @@ impl StartCommand {
table_metadata_manager,
catalog_manager.clone(),
flow_metadata_manager,
Arc::new(frontend_client),
)
.with_heartbeat_task(heartbeat_task);

View File

@@ -295,10 +295,14 @@ impl StartCommand {
let cache_ttl = meta_client_options.metadata_cache_ttl;
let cache_tti = meta_client_options.metadata_cache_tti;
let meta_client =
meta_client::create_meta_client(MetaClientType::Frontend, meta_client_options)
.await
.context(MetaClientInitSnafu)?;
let cluster_id = 0; // (TODO: jeremy): It is currently a reserved field and has not been enabled.
let meta_client = meta_client::create_meta_client(
cluster_id,
MetaClientType::Frontend,
meta_client_options,
)
.await
.context(MetaClientInitSnafu)?;
// TODO(discord9): add helper function to ease the creation of cache registry&such
let cached_meta_backend =

View File

@@ -54,7 +54,10 @@ use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, Sto
use datanode::datanode::{Datanode, DatanodeBuilder};
use datanode::region_server::RegionServer;
use file_engine::config::EngineConfig as FileEngineConfig;
use flow::{FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeOptions, FrontendInvoker};
use flow::{
FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeOptions, FrontendClient,
FrontendInvoker,
};
use frontend::frontend::FrontendOptions;
use frontend::instance::builder::FrontendBuilder;
use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager};
@@ -533,12 +536,16 @@ impl StartCommand {
flow: opts.flow.clone(),
..Default::default()
};
let fe_server_addr = fe_opts.grpc.bind_addr.clone();
let frontend_client = FrontendClient::from_static_grpc_addr(fe_server_addr);
let flow_builder = FlownodeBuilder::new(
flownode_options,
plugins.clone(),
table_metadata_manager.clone(),
catalog_manager.clone(),
flow_metadata_manager.clone(),
Arc::new(frontend_client),
);
let flownode = Arc::new(
flow_builder

View File

@@ -28,6 +28,7 @@ use crate::error::{
InvalidRoleSnafu, ParseNumSnafu, Result,
};
use crate::peer::Peer;
use crate::ClusterId;
const CLUSTER_NODE_INFO_PREFIX: &str = "__meta_cluster_node_info";
@@ -55,9 +56,12 @@ pub trait ClusterInfo {
// TODO(jeremy): Other info, like region status, etc.
}
/// The key of [NodeInfo] in the storage. The format is `__meta_cluster_node_info-0-{role}-{node_id}`.
/// The key of [NodeInfo] in the storage. The format is `__meta_cluster_node_info-{cluster_id}-{role}-{node_id}`.
#[derive(Debug, Clone, Copy, Eq, Hash, PartialEq, Serialize, Deserialize)]
pub struct NodeInfoKey {
/// The cluster id.
// todo(hl): remove cluster_id as it is not assigned anywhere.
pub cluster_id: ClusterId,
/// The role of the node. It can be `[Role::Datanode]` or `[Role::Frontend]`.
pub role: Role,
/// The node id.
@@ -80,15 +84,24 @@ impl NodeInfoKey {
_ => peer.id,
};
Some(NodeInfoKey { role, node_id })
Some(NodeInfoKey {
cluster_id: header.cluster_id,
role,
node_id,
})
}
pub fn key_prefix() -> String {
format!("{}-0-", CLUSTER_NODE_INFO_PREFIX)
pub fn key_prefix_with_cluster_id(cluster_id: u64) -> String {
format!("{}-{}-", CLUSTER_NODE_INFO_PREFIX, cluster_id)
}
pub fn key_prefix_with_role(role: Role) -> String {
format!("{}-0-{}-", CLUSTER_NODE_INFO_PREFIX, i32::from(role))
pub fn key_prefix_with_role(cluster_id: ClusterId, role: Role) -> String {
format!(
"{}-{}-{}-",
CLUSTER_NODE_INFO_PREFIX,
cluster_id,
i32::from(role)
)
}
}
@@ -180,10 +193,15 @@ impl FromStr for NodeInfoKey {
let caps = CLUSTER_NODE_INFO_PREFIX_PATTERN
.captures(key)
.context(InvalidNodeInfoKeySnafu { key })?;
ensure!(caps.len() == 4, InvalidNodeInfoKeySnafu { key });
let cluster_id = caps[1].to_string();
let role = caps[2].to_string();
let node_id = caps[3].to_string();
let cluster_id: u64 = cluster_id.parse().context(ParseNumSnafu {
err_msg: format!("invalid cluster_id: {cluster_id}"),
})?;
let role: i32 = role.parse().context(ParseNumSnafu {
err_msg: format!("invalid role {role}"),
})?;
@@ -192,7 +210,11 @@ impl FromStr for NodeInfoKey {
err_msg: format!("invalid node_id: {node_id}"),
})?;
Ok(Self { role, node_id })
Ok(Self {
cluster_id,
role,
node_id,
})
}
}
@@ -211,8 +233,9 @@ impl TryFrom<Vec<u8>> for NodeInfoKey {
impl From<&NodeInfoKey> for Vec<u8> {
fn from(key: &NodeInfoKey) -> Self {
format!(
"{}-0-{}-{}",
"{}-{}-{}-{}",
CLUSTER_NODE_INFO_PREFIX,
key.cluster_id,
i32::from(key.role),
key.node_id
)
@@ -285,6 +308,7 @@ mod tests {
#[test]
fn test_node_info_key_round_trip() {
let key = NodeInfoKey {
cluster_id: 1,
role: Datanode,
node_id: 2,
};
@@ -292,6 +316,7 @@ mod tests {
let key_bytes: Vec<u8> = (&key).into();
let new_key: NodeInfoKey = key_bytes.try_into().unwrap();
assert_eq!(1, new_key.cluster_id);
assert_eq!(Datanode, new_key.role);
assert_eq!(2, new_key.node_id);
}
@@ -337,11 +362,11 @@ mod tests {
#[test]
fn test_node_info_key_prefix() {
let prefix = NodeInfoKey::key_prefix();
assert_eq!(prefix, "__meta_cluster_node_info-0-");
let prefix = NodeInfoKey::key_prefix_with_cluster_id(1);
assert_eq!(prefix, "__meta_cluster_node_info-1-");
let prefix = NodeInfoKey::key_prefix_with_role(Frontend);
assert_eq!(prefix, "__meta_cluster_node_info-0-1-");
let prefix = NodeInfoKey::key_prefix_with_role(2, Frontend);
assert_eq!(prefix, "__meta_cluster_node_info-2-1-");
}
#[test]

View File

@@ -25,8 +25,8 @@ use store_api::region_engine::{RegionRole, RegionStatistic};
use store_api::storage::RegionId;
use table::metadata::TableId;
use crate::error;
use crate::error::Result;
use crate::{error, ClusterId};
pub(crate) const DATANODE_LEASE_PREFIX: &str = "__meta_datanode_lease";
const INACTIVE_REGION_PREFIX: &str = "__meta_inactive_region";
@@ -48,10 +48,11 @@ lazy_static! {
/// The key of the datanode stat in the storage.
///
/// The format is `__meta_datanode_stat-0-{node_id}`.
/// The format is `__meta_datanode_stat-{cluster_id}-{node_id}`.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Stat {
pub timestamp_millis: i64,
pub cluster_id: ClusterId,
// The datanode Id.
pub id: u64,
// The datanode address.
@@ -101,7 +102,10 @@ impl Stat {
}
pub fn stat_key(&self) -> DatanodeStatKey {
DatanodeStatKey { node_id: self.id }
DatanodeStatKey {
cluster_id: self.cluster_id,
node_id: self.id,
}
}
/// Returns a tuple array containing [RegionId] and [RegionRole].
@@ -141,7 +145,7 @@ impl TryFrom<&HeartbeatRequest> for Stat {
} = value;
match (header, peer) {
(Some(_header), Some(peer)) => {
(Some(header), Some(peer)) => {
let region_stats = region_stats
.iter()
.map(RegionStat::from)
@@ -149,6 +153,7 @@ impl TryFrom<&HeartbeatRequest> for Stat {
Ok(Self {
timestamp_millis: time_util::current_time_millis(),
cluster_id: header.cluster_id,
// datanode id
id: peer.id,
// datanode address
@@ -191,24 +196,32 @@ impl From<&api::v1::meta::RegionStat> for RegionStat {
/// The key of the datanode stat in the memory store.
///
/// The format is `__meta_datanode_stat-0-{node_id}`.
/// The format is `__meta_datanode_stat-{cluster_id}-{node_id}`.
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct DatanodeStatKey {
pub cluster_id: ClusterId,
pub node_id: u64,
}
impl DatanodeStatKey {
/// The key prefix.
pub fn prefix_key() -> Vec<u8> {
// todo(hl): remove cluster id in prefix
format!("{DATANODE_STAT_PREFIX}-0-").into_bytes()
format!("{DATANODE_STAT_PREFIX}-").into_bytes()
}
/// The key prefix with the cluster id.
pub fn key_prefix_with_cluster_id(cluster_id: ClusterId) -> String {
format!("{DATANODE_STAT_PREFIX}-{cluster_id}-")
}
}
impl From<DatanodeStatKey> for Vec<u8> {
fn from(value: DatanodeStatKey) -> Self {
// todo(hl): remove cluster id in prefix
format!("{}-0-{}", DATANODE_STAT_PREFIX, value.node_id).into_bytes()
format!(
"{}-{}-{}",
DATANODE_STAT_PREFIX, value.cluster_id, value.node_id
)
.into_bytes()
}
}
@@ -221,12 +234,20 @@ impl FromStr for DatanodeStatKey {
.context(error::InvalidStatKeySnafu { key })?;
ensure!(caps.len() == 3, error::InvalidStatKeySnafu { key });
let cluster_id = caps[1].to_string();
let node_id = caps[2].to_string();
let cluster_id: u64 = cluster_id.parse().context(error::ParseNumSnafu {
err_msg: format!("invalid cluster_id: {cluster_id}"),
})?;
let node_id: u64 = node_id.parse().context(error::ParseNumSnafu {
err_msg: format!("invalid node_id: {node_id}"),
})?;
Ok(Self { node_id })
Ok(Self {
cluster_id,
node_id,
})
}
}
@@ -300,6 +321,7 @@ mod tests {
#[test]
fn test_stat_key() {
let stat = Stat {
cluster_id: 3,
id: 101,
region_num: 10,
..Default::default()
@@ -307,12 +329,14 @@ mod tests {
let stat_key = stat.stat_key();
assert_eq!(3, stat_key.cluster_id);
assert_eq!(101, stat_key.node_id);
}
#[test]
fn test_stat_val_round_trip() {
let stat = Stat {
cluster_id: 0,
id: 101,
region_num: 100,
..Default::default()
@@ -327,6 +351,7 @@ mod tests {
assert_eq!(1, stats.len());
let stat = stats.first().unwrap();
assert_eq!(0, stat.cluster_id);
assert_eq!(101, stat.id);
assert_eq!(100, stat.region_num);
}

View File

@@ -30,7 +30,7 @@ use crate::node_manager::NodeManagerRef;
use crate::region_keeper::MemoryRegionKeeperRef;
use crate::rpc::ddl::{SubmitDdlTaskRequest, SubmitDdlTaskResponse};
use crate::rpc::procedure::{MigrateRegionRequest, MigrateRegionResponse, ProcedureStateResponse};
use crate::DatanodeId;
use crate::{ClusterId, DatanodeId};
pub mod alter_database;
pub mod alter_logical_tables;
@@ -57,6 +57,7 @@ pub mod utils;
#[derive(Debug, Default)]
pub struct ExecutorContext {
pub cluster_id: Option<u64>,
pub tracing_context: Option<W3cTrace>,
}
@@ -89,6 +90,10 @@ pub trait ProcedureExecutor: Send + Sync {
pub type ProcedureExecutorRef = Arc<dyn ProcedureExecutor>;
pub struct TableMetadataAllocatorContext {
pub cluster_id: ClusterId,
}
/// Metadata allocated to a table.
#[derive(Default)]
pub struct TableMetadata {
@@ -103,7 +108,7 @@ pub struct TableMetadata {
pub type RegionFailureDetectorControllerRef = Arc<dyn RegionFailureDetectorController>;
pub type DetectingRegion = (DatanodeId, RegionId);
pub type DetectingRegion = (ClusterId, DatanodeId, RegionId);
/// Used for actively registering Region failure detectors.
///

View File

@@ -30,6 +30,7 @@ use crate::key::DeserializedValueWithBytes;
use crate::lock_key::{CatalogLock, SchemaLock};
use crate::rpc::ddl::UnsetDatabaseOption::{self};
use crate::rpc::ddl::{AlterDatabaseKind, AlterDatabaseTask, SetDatabaseOption};
use crate::ClusterId;
pub struct AlterDatabaseProcedure {
pub context: DdlContext,
@@ -64,10 +65,14 @@ fn build_new_schema_value(
impl AlterDatabaseProcedure {
pub const TYPE_NAME: &'static str = "metasrv-procedure::AlterDatabase";
pub fn new(task: AlterDatabaseTask, context: DdlContext) -> Result<Self> {
pub fn new(
cluster_id: ClusterId,
task: AlterDatabaseTask,
context: DdlContext,
) -> Result<Self> {
Ok(Self {
context,
data: AlterDatabaseData::new(task)?,
data: AlterDatabaseData::new(task, cluster_id)?,
})
}
@@ -178,6 +183,7 @@ enum AlterDatabaseState {
/// The data of alter database procedure.
#[derive(Debug, Serialize, Deserialize)]
pub struct AlterDatabaseData {
cluster_id: ClusterId,
state: AlterDatabaseState,
kind: AlterDatabaseKind,
catalog_name: String,
@@ -186,8 +192,9 @@ pub struct AlterDatabaseData {
}
impl AlterDatabaseData {
pub fn new(task: AlterDatabaseTask) -> Result<Self> {
pub fn new(task: AlterDatabaseTask, cluster_id: ClusterId) -> Result<Self> {
Ok(Self {
cluster_id,
state: AlterDatabaseState::Prepare,
kind: AlterDatabaseKind::try_from(task.alter_expr.kind.unwrap())?,
catalog_name: task.alter_expr.catalog_name,

View File

@@ -37,9 +37,9 @@ use crate::key::table_info::TableInfoValue;
use crate::key::table_route::PhysicalTableRouteValue;
use crate::key::DeserializedValueWithBytes;
use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
use crate::metrics;
use crate::rpc::ddl::AlterTableTask;
use crate::rpc::router::find_leaders;
use crate::{metrics, ClusterId};
pub struct AlterLogicalTablesProcedure {
pub context: DdlContext,
@@ -50,6 +50,7 @@ impl AlterLogicalTablesProcedure {
pub const TYPE_NAME: &'static str = "metasrv-procedure::AlterLogicalTables";
pub fn new(
cluster_id: ClusterId,
tasks: Vec<AlterTableTask>,
physical_table_id: TableId,
context: DdlContext,
@@ -57,6 +58,7 @@ impl AlterLogicalTablesProcedure {
Self {
context,
data: AlterTablesData {
cluster_id,
state: AlterTablesState::Prepare,
tasks,
table_info_values: vec![],
@@ -238,6 +240,7 @@ impl Procedure for AlterLogicalTablesProcedure {
#[derive(Debug, Serialize, Deserialize)]
pub struct AlterTablesData {
cluster_id: ClusterId,
state: AlterTablesState,
tasks: Vec<AlterTableTask>,
/// Table info values before the alter operation.

View File

@@ -45,9 +45,9 @@ use crate::instruction::CacheIdent;
use crate::key::table_info::TableInfoValue;
use crate::key::{DeserializedValueWithBytes, RegionDistribution};
use crate::lock_key::{CatalogLock, SchemaLock, TableLock, TableNameLock};
use crate::metrics;
use crate::rpc::ddl::AlterTableTask;
use crate::rpc::router::{find_leader_regions, find_leaders, region_distribution};
use crate::{metrics, ClusterId};
/// The alter table procedure
pub struct AlterTableProcedure {
@@ -64,11 +64,16 @@ pub struct AlterTableProcedure {
impl AlterTableProcedure {
pub const TYPE_NAME: &'static str = "metasrv-procedure::AlterTable";
pub fn new(table_id: TableId, task: AlterTableTask, context: DdlContext) -> Result<Self> {
pub fn new(
cluster_id: ClusterId,
table_id: TableId,
task: AlterTableTask,
context: DdlContext,
) -> Result<Self> {
task.validate()?;
Ok(Self {
context,
data: AlterTableData::new(task, table_id),
data: AlterTableData::new(task, table_id, cluster_id),
new_table_info: None,
})
}
@@ -302,6 +307,7 @@ enum AlterTableState {
// The serialized data of alter table.
#[derive(Debug, Serialize, Deserialize)]
pub struct AlterTableData {
cluster_id: ClusterId,
state: AlterTableState,
task: AlterTableTask,
table_id: TableId,
@@ -312,11 +318,12 @@ pub struct AlterTableData {
}
impl AlterTableData {
pub fn new(task: AlterTableTask, table_id: TableId) -> Self {
pub fn new(task: AlterTableTask, table_id: TableId, cluster_id: u64) -> Self {
Self {
state: AlterTableState::Prepare,
task,
table_id,
cluster_id,
table_info_value: None,
region_distribution: None,
}

View File

@@ -167,9 +167,10 @@ mod tests {
use crate::test_util::{new_ddl_context, MockDatanodeManager};
/// Prepares a region with schema `[ts: Timestamp, host: Tag, cpu: Field]`.
async fn prepare_ddl_context() -> (DdlContext, TableId, RegionId, String) {
async fn prepare_ddl_context() -> (DdlContext, u64, TableId, RegionId, String) {
let datanode_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(datanode_manager);
let cluster_id = 1;
let table_id = 1024;
let region_id = RegionId::new(table_id, 1);
let table_name = "foo";
@@ -224,12 +225,19 @@ mod tests {
)
.await
.unwrap();
(ddl_context, table_id, region_id, table_name.to_string())
(
ddl_context,
cluster_id,
table_id,
region_id,
table_name.to_string(),
)
}
#[tokio::test]
async fn test_make_alter_region_request() {
let (ddl_context, table_id, region_id, table_name) = prepare_ddl_context().await;
let (ddl_context, cluster_id, table_id, region_id, table_name) =
prepare_ddl_context().await;
let task = AlterTableTask {
alter_table: AlterTableExpr {
@@ -257,7 +265,8 @@ mod tests {
},
};
let mut procedure = AlterTableProcedure::new(table_id, task, ddl_context).unwrap();
let mut procedure =
AlterTableProcedure::new(cluster_id, table_id, task, ddl_context).unwrap();
procedure.on_prepare().await.unwrap();
let alter_kind = procedure.make_region_alter_kind().unwrap();
let Some(Body::Alter(alter_region_request)) = procedure
@@ -298,7 +307,8 @@ mod tests {
#[tokio::test]
async fn test_make_alter_column_type_region_request() {
let (ddl_context, table_id, region_id, table_name) = prepare_ddl_context().await;
let (ddl_context, cluster_id, table_id, region_id, table_name) =
prepare_ddl_context().await;
let task = AlterTableTask {
alter_table: AlterTableExpr {
@@ -315,7 +325,8 @@ mod tests {
},
};
let mut procedure = AlterTableProcedure::new(table_id, task, ddl_context).unwrap();
let mut procedure =
AlterTableProcedure::new(cluster_id, table_id, task, ddl_context).unwrap();
procedure.on_prepare().await.unwrap();
let alter_kind = procedure.make_region_alter_kind().unwrap();
let Some(Body::Alter(alter_region_request)) = procedure

View File

@@ -46,9 +46,9 @@ use crate::key::flow::flow_route::FlowRouteValue;
use crate::key::table_name::TableNameKey;
use crate::key::{DeserializedValueWithBytes, FlowId, FlowPartitionId};
use crate::lock_key::{CatalogLock, FlowNameLock, TableNameLock};
use crate::metrics;
use crate::peer::Peer;
use crate::rpc::ddl::{CreateFlowTask, QueryContext};
use crate::{metrics, ClusterId};
/// The procedure of flow creation.
pub struct CreateFlowProcedure {
@@ -60,10 +60,16 @@ impl CreateFlowProcedure {
pub const TYPE_NAME: &'static str = "metasrv-procedure::CreateFlow";
/// Returns a new [CreateFlowProcedure].
pub fn new(task: CreateFlowTask, query_context: QueryContext, context: DdlContext) -> Self {
pub fn new(
cluster_id: ClusterId,
task: CreateFlowTask,
query_context: QueryContext,
context: DdlContext,
) -> Self {
Self {
context,
data: CreateFlowData {
cluster_id,
task,
flow_id: None,
peers: vec![],
@@ -337,6 +343,7 @@ pub enum FlowType {
impl FlowType {
pub const RECORDING_RULE: &str = "recording_rule";
pub const STREAMING: &str = "streaming";
pub const FLOW_TYPE_KEY: &str = "flow_type";
}
impl Default for FlowType {
@@ -357,6 +364,7 @@ impl fmt::Display for FlowType {
/// The serializable data.
#[derive(Debug, Serialize, Deserialize)]
pub struct CreateFlowData {
pub(crate) cluster_id: ClusterId,
pub(crate) state: CreateFlowState,
pub(crate) task: CreateFlowTask,
pub(crate) flow_id: Option<FlowId>,
@@ -391,7 +399,8 @@ impl From<&CreateFlowData> for CreateRequest {
};
let flow_type = value.flow_type.unwrap_or_default().to_string();
req.flow_options.insert("flow_type".to_string(), flow_type);
req.flow_options
.insert(FlowType::FLOW_TYPE_KEY.to_string(), flow_type);
req
}
}
@@ -423,7 +432,7 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
.collect::<Vec<_>>();
let flow_type = value.flow_type.unwrap_or_default().to_string();
options.insert("flow_type".to_string(), flow_type);
options.insert(FlowType::FLOW_TYPE_KEY.to_string(), flow_type);
let flow_info = FlowInfoValue {
source_table_ids: value.source_table_ids.clone(),

View File

@@ -23,10 +23,11 @@ impl CreateFlowProcedure {
pub(crate) async fn allocate_flow_id(&mut self) -> Result<()> {
//TODO(weny, ruihang): We doesn't support the partitions. It's always be 1, now.
let partitions = 1;
let cluster_id = self.data.cluster_id;
let (flow_id, peers) = self
.context
.flow_metadata_allocator
.create(partitions)
.create(cluster_id, partitions)
.await?;
self.data.flow_id = Some(flow_id);
self.data.peers = peers;

View File

@@ -36,9 +36,9 @@ use crate::ddl::DdlContext;
use crate::error::{DecodeJsonSnafu, MetadataCorruptionSnafu, Result};
use crate::key::table_route::TableRouteValue;
use crate::lock_key::{CatalogLock, SchemaLock, TableLock, TableNameLock};
use crate::metrics;
use crate::rpc::ddl::CreateTableTask;
use crate::rpc::router::{find_leaders, RegionRoute};
use crate::{metrics, ClusterId};
pub struct CreateLogicalTablesProcedure {
pub context: DdlContext,
@@ -49,6 +49,7 @@ impl CreateLogicalTablesProcedure {
pub const TYPE_NAME: &'static str = "metasrv-procedure::CreateLogicalTables";
pub fn new(
cluster_id: ClusterId,
tasks: Vec<CreateTableTask>,
physical_table_id: TableId,
context: DdlContext,
@@ -56,6 +57,7 @@ impl CreateLogicalTablesProcedure {
Self {
context,
data: CreateTablesData {
cluster_id,
state: CreateTablesState::Prepare,
tasks,
table_ids_already_exists: vec![],
@@ -243,6 +245,7 @@ impl Procedure for CreateLogicalTablesProcedure {
#[derive(Debug, Serialize, Deserialize)]
pub struct CreateTablesData {
cluster_id: ClusterId,
state: CreateTablesState,
tasks: Vec<CreateTableTask>,
table_ids_already_exists: Vec<Option<TableId>>,

View File

@@ -37,17 +37,17 @@ use crate::ddl::utils::{
add_peer_context_if_needed, convert_region_routes_to_detecting_regions, handle_retry_error,
region_storage_path,
};
use crate::ddl::{DdlContext, TableMetadata};
use crate::ddl::{DdlContext, TableMetadata, TableMetadataAllocatorContext};
use crate::error::{self, Result};
use crate::key::table_name::TableNameKey;
use crate::key::table_route::{PhysicalTableRouteValue, TableRouteValue};
use crate::lock_key::{CatalogLock, SchemaLock, TableNameLock};
use crate::metrics;
use crate::region_keeper::OperatingRegionGuard;
use crate::rpc::ddl::CreateTableTask;
use crate::rpc::router::{
find_leader_regions, find_leaders, operating_leader_regions, RegionRoute,
};
use crate::{metrics, ClusterId};
pub struct CreateTableProcedure {
pub context: DdlContext,
pub creator: TableCreator,
@@ -56,10 +56,10 @@ pub struct CreateTableProcedure {
impl CreateTableProcedure {
pub const TYPE_NAME: &'static str = "metasrv-procedure::CreateTable";
pub fn new(task: CreateTableTask, context: DdlContext) -> Self {
pub fn new(cluster_id: ClusterId, task: CreateTableTask, context: DdlContext) -> Self {
Self {
context,
creator: TableCreator::new(task),
creator: TableCreator::new(cluster_id, task),
}
}
@@ -154,7 +154,12 @@ impl CreateTableProcedure {
} = self
.context
.table_metadata_allocator
.create(&self.creator.data.task)
.create(
&TableMetadataAllocatorContext {
cluster_id: self.creator.data.cluster_id,
},
&self.creator.data.task,
)
.await?;
self.creator
.set_allocated_metadata(table_id, table_route, region_wal_options);
@@ -263,6 +268,7 @@ impl CreateTableProcedure {
/// - Failed to create table metadata.
async fn on_create_metadata(&mut self) -> Result<Status> {
let table_id = self.table_id();
let cluster_id = self.creator.data.cluster_id;
let manager = &self.context.table_metadata_manager;
let raw_table_info = self.table_info().clone();
@@ -270,8 +276,10 @@ impl CreateTableProcedure {
let region_wal_options = self.region_wal_options()?.clone();
// Safety: the table_route must be allocated.
let physical_table_route = self.table_route()?.clone();
let detecting_regions =
convert_region_routes_to_detecting_regions(&physical_table_route.region_routes);
let detecting_regions = convert_region_routes_to_detecting_regions(
cluster_id,
&physical_table_route.region_routes,
);
let table_route = TableRouteValue::Physical(physical_table_route);
manager
.create_table_metadata(raw_table_info, table_route, region_wal_options)
@@ -343,10 +351,11 @@ pub struct TableCreator {
}
impl TableCreator {
pub fn new(task: CreateTableTask) -> Self {
pub fn new(cluster_id: ClusterId, task: CreateTableTask) -> Self {
Self {
data: CreateTableData {
state: CreateTableState::Prepare,
cluster_id,
task,
table_route: None,
region_wal_options: None,
@@ -412,6 +421,7 @@ pub struct CreateTableData {
table_route: Option<PhysicalTableRouteValue>,
/// None stands for not allocated yet.
pub region_wal_options: Option<HashMap<RegionNumber, String>>,
pub cluster_id: ClusterId,
}
impl CreateTableData {

View File

@@ -24,13 +24,13 @@ use table::table_reference::TableReference;
use crate::cache_invalidator::Context;
use crate::ddl::utils::handle_retry_error;
use crate::ddl::{DdlContext, TableMetadata};
use crate::ddl::{DdlContext, TableMetadata, TableMetadataAllocatorContext};
use crate::error::{self, Result};
use crate::instruction::CacheIdent;
use crate::key::table_name::TableNameKey;
use crate::lock_key::{CatalogLock, SchemaLock, TableNameLock};
use crate::metrics;
use crate::rpc::ddl::CreateViewTask;
use crate::{metrics, ClusterId};
// The procedure to execute `[CreateViewTask]`.
pub struct CreateViewProcedure {
@@ -41,11 +41,12 @@ pub struct CreateViewProcedure {
impl CreateViewProcedure {
pub const TYPE_NAME: &'static str = "metasrv-procedure::CreateView";
pub fn new(task: CreateViewTask, context: DdlContext) -> Self {
pub fn new(cluster_id: ClusterId, task: CreateViewTask, context: DdlContext) -> Self {
Self {
context,
data: CreateViewData {
state: CreateViewState::Prepare,
cluster_id,
task,
need_update: false,
},
@@ -143,7 +144,12 @@ impl CreateViewProcedure {
let TableMetadata { table_id, .. } = self
.context
.table_metadata_allocator
.create_view(&None)
.create_view(
&TableMetadataAllocatorContext {
cluster_id: self.data.cluster_id,
},
&None,
)
.await?;
self.data.set_allocated_metadata(table_id, false);
}
@@ -279,6 +285,7 @@ pub enum CreateViewState {
pub struct CreateViewData {
pub state: CreateViewState,
pub task: CreateViewTask,
pub cluster_id: ClusterId,
/// Whether to update the view info.
pub need_update: bool,
}

View File

@@ -35,6 +35,7 @@ use crate::ddl::DdlContext;
use crate::error::Result;
use crate::key::table_name::TableNameValue;
use crate::lock_key::{CatalogLock, SchemaLock};
use crate::ClusterId;
pub struct DropDatabaseProcedure {
/// The context of procedure runtime.
@@ -53,6 +54,7 @@ pub(crate) enum DropTableTarget {
/// Context of [DropDatabaseProcedure] execution.
pub(crate) struct DropDatabaseContext {
cluster_id: ClusterId,
catalog: String,
schema: String,
drop_if_exists: bool,
@@ -85,6 +87,7 @@ impl DropDatabaseProcedure {
Self {
runtime_context: context,
context: DropDatabaseContext {
cluster_id: 0,
catalog,
schema,
drop_if_exists,
@@ -105,6 +108,7 @@ impl DropDatabaseProcedure {
Ok(Self {
runtime_context,
context: DropDatabaseContext {
cluster_id: 0,
catalog,
schema,
drop_if_exists,

View File

@@ -217,10 +217,11 @@ mod tests {
async fn test_next_without_logical_tables() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
create_physical_table(&ddl_context, "phy").await;
create_physical_table(&ddl_context, 0, "phy").await;
// It always starts from Logical
let mut state = DropDatabaseCursor::new(DropTableTarget::Logical);
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
drop_if_exists: false,
@@ -251,11 +252,12 @@ mod tests {
async fn test_next_with_logical_tables() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let physical_table_id = create_physical_table(&ddl_context, "phy").await;
create_logical_table(ddl_context.clone(), physical_table_id, "metric_0").await;
let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await;
create_logical_table(ddl_context.clone(), 0, physical_table_id, "metric_0").await;
// It always starts from Logical
let mut state = DropDatabaseCursor::new(DropTableTarget::Logical);
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
drop_if_exists: false,
@@ -284,6 +286,7 @@ mod tests {
let ddl_context = new_ddl_context(node_manager);
let mut state = DropDatabaseCursor::new(DropTableTarget::Physical);
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
drop_if_exists: false,

View File

@@ -98,10 +98,11 @@ impl State for DropDatabaseExecutor {
async fn next(
&mut self,
ddl_ctx: &DdlContext,
_ctx: &mut DropDatabaseContext,
ctx: &mut DropDatabaseContext,
) -> Result<(Box<dyn State>, Status)> {
self.register_dropping_regions(ddl_ctx)?;
let executor = DropTableExecutor::new(self.table_name.clone(), self.table_id, true);
let executor =
DropTableExecutor::new(ctx.cluster_id, self.table_name.clone(), self.table_id, true);
// Deletes metadata for table permanently.
let table_route_value = TableRouteValue::new(
self.table_id,
@@ -186,7 +187,7 @@ mod tests {
async fn test_next_with_physical_table() {
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let physical_table_id = create_physical_table(&ddl_context, "phy").await;
let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await;
let (_, table_route) = ddl_context
.table_metadata_manager
.table_route_manager()
@@ -202,6 +203,7 @@ mod tests {
DropTableTarget::Physical,
);
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
drop_if_exists: false,
@@ -214,6 +216,7 @@ mod tests {
}
// Execute again
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
drop_if_exists: false,
@@ -236,8 +239,8 @@ mod tests {
async fn test_next_logical_table() {
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let physical_table_id = create_physical_table(&ddl_context, "phy").await;
create_logical_table(ddl_context.clone(), physical_table_id, "metric").await;
let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await;
create_logical_table(ddl_context.clone(), 0, physical_table_id, "metric").await;
let logical_table_id = physical_table_id + 1;
let (_, table_route) = ddl_context
.table_metadata_manager
@@ -254,6 +257,7 @@ mod tests {
DropTableTarget::Logical,
);
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
drop_if_exists: false,
@@ -266,6 +270,7 @@ mod tests {
}
// Execute again
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
drop_if_exists: false,
@@ -340,7 +345,7 @@ mod tests {
async fn test_next_retryable_err() {
let node_manager = Arc::new(MockDatanodeManager::new(RetryErrorDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let physical_table_id = create_physical_table(&ddl_context, "phy").await;
let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await;
let (_, table_route) = ddl_context
.table_metadata_manager
.table_route_manager()
@@ -355,6 +360,7 @@ mod tests {
DropTableTarget::Physical,
);
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
drop_if_exists: false,
@@ -368,7 +374,7 @@ mod tests {
async fn test_on_recovery() {
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let physical_table_id = create_physical_table(&ddl_context, "phy").await;
let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await;
let (_, table_route) = ddl_context
.table_metadata_manager
.table_route_manager()
@@ -384,6 +390,7 @@ mod tests {
DropTableTarget::Physical,
);
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
drop_if_exists: false,

View File

@@ -118,6 +118,7 @@ mod tests {
.unwrap();
let mut state = DropDatabaseRemoveMetadata;
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: "foo".to_string(),
schema: "bar".to_string(),
drop_if_exists: true,
@@ -144,6 +145,7 @@ mod tests {
// Schema not exists
let mut state = DropDatabaseRemoveMetadata;
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: "foo".to_string(),
schema: "bar".to_string(),
drop_if_exists: true,

View File

@@ -89,6 +89,7 @@ mod tests {
let ddl_context = new_ddl_context(node_manager);
let mut step = DropDatabaseStart;
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: "foo".to_string(),
schema: "bar".to_string(),
drop_if_exists: false,
@@ -104,6 +105,7 @@ mod tests {
let ddl_context = new_ddl_context(node_manager);
let mut state = DropDatabaseStart;
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: "foo".to_string(),
schema: "bar".to_string(),
drop_if_exists: true,
@@ -126,6 +128,7 @@ mod tests {
.unwrap();
let mut state = DropDatabaseStart;
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: "foo".to_string(),
schema: "bar".to_string(),
drop_if_exists: false,

View File

@@ -37,8 +37,8 @@ use crate::instruction::{CacheIdent, DropFlow};
use crate::key::flow::flow_info::FlowInfoValue;
use crate::key::flow::flow_route::FlowRouteValue;
use crate::lock_key::{CatalogLock, FlowLock};
use crate::metrics;
use crate::rpc::ddl::DropFlowTask;
use crate::{metrics, ClusterId};
/// The procedure for dropping a flow.
pub struct DropFlowProcedure {
@@ -51,11 +51,12 @@ pub struct DropFlowProcedure {
impl DropFlowProcedure {
pub const TYPE_NAME: &'static str = "metasrv-procedure::DropFlow";
pub fn new(task: DropFlowTask, context: DdlContext) -> Self {
pub fn new(cluster_id: ClusterId, task: DropFlowTask, context: DdlContext) -> Self {
Self {
context,
data: DropFlowData {
state: DropFlowState::Prepare,
cluster_id,
task,
flow_info_value: None,
flow_route_values: vec![],
@@ -217,6 +218,7 @@ impl Procedure for DropFlowProcedure {
#[derive(Debug, Serialize, Deserialize)]
pub(crate) struct DropFlowData {
state: DropFlowState,
cluster_id: ClusterId,
task: DropFlowTask,
pub(crate) flow_info_value: Option<FlowInfoValue>,
pub(crate) flow_route_values: Vec<FlowRouteValue>,

View File

@@ -40,10 +40,10 @@ use crate::ddl::DdlContext;
use crate::error::{self, Result};
use crate::key::table_route::TableRouteValue;
use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
use crate::metrics;
use crate::region_keeper::OperatingRegionGuard;
use crate::rpc::ddl::DropTableTask;
use crate::rpc::router::{operating_leader_regions, RegionRoute};
use crate::{metrics, ClusterId};
pub struct DropTableProcedure {
/// The context of procedure runtime.
@@ -59,8 +59,8 @@ pub struct DropTableProcedure {
impl DropTableProcedure {
pub const TYPE_NAME: &'static str = "metasrv-procedure::DropTable";
pub fn new(task: DropTableTask, context: DdlContext) -> Self {
let data = DropTableData::new(task);
pub fn new(cluster_id: ClusterId, task: DropTableTask, context: DdlContext) -> Self {
let data = DropTableData::new(cluster_id, task);
let executor = data.build_executor();
Self {
context,
@@ -268,6 +268,7 @@ impl Procedure for DropTableProcedure {
#[derive(Debug, Serialize, Deserialize)]
pub struct DropTableData {
pub state: DropTableState,
pub cluster_id: ClusterId,
pub task: DropTableTask,
pub physical_region_routes: Vec<RegionRoute>,
pub physical_table_id: Option<TableId>,
@@ -278,9 +279,10 @@ pub struct DropTableData {
}
impl DropTableData {
pub fn new(task: DropTableTask) -> Self {
pub fn new(cluster_id: ClusterId, task: DropTableTask) -> Self {
Self {
state: DropTableState::Prepare,
cluster_id,
task,
physical_region_routes: vec![],
physical_table_id: None,
@@ -299,6 +301,7 @@ impl DropTableData {
fn build_executor(&self) -> DropTableExecutor {
DropTableExecutor::new(
self.cluster_id,
self.task.table_name(),
self.task.table_id,
self.task.drop_if_exists,

View File

@@ -36,6 +36,7 @@ use crate::instruction::CacheIdent;
use crate::key::table_name::TableNameKey;
use crate::key::table_route::TableRouteValue;
use crate::rpc::router::{find_leader_regions, find_leaders, RegionRoute};
use crate::ClusterId;
/// [Control] indicated to the caller whether to go to the next step.
#[derive(Debug)]
@@ -53,8 +54,14 @@ impl<T> Control<T> {
impl DropTableExecutor {
/// Returns the [DropTableExecutor].
pub fn new(table: TableName, table_id: TableId, drop_if_exists: bool) -> Self {
pub fn new(
cluster_id: ClusterId,
table: TableName,
table_id: TableId,
drop_if_exists: bool,
) -> Self {
Self {
cluster_id,
table,
table_id,
drop_if_exists,
@@ -67,6 +74,7 @@ impl DropTableExecutor {
/// - Invalidates the cache on the Frontend nodes.
/// - Drops the regions on the Datanode nodes.
pub struct DropTableExecutor {
cluster_id: ClusterId,
table: TableName,
table_id: TableId,
drop_if_exists: bool,
@@ -156,7 +164,7 @@ impl DropTableExecutor {
let detecting_regions = if table_route_value.is_physical() {
// Safety: checked.
let regions = table_route_value.region_routes().unwrap();
convert_region_routes_to_detecting_regions(regions)
convert_region_routes_to_detecting_regions(self.cluster_id, regions)
} else {
vec![]
};
@@ -313,6 +321,7 @@ mod tests {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ctx = new_ddl_context(node_manager);
let executor = DropTableExecutor::new(
0,
TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "my_table"),
1024,
true,
@@ -322,6 +331,7 @@ mod tests {
// Drops a non-exists table
let executor = DropTableExecutor::new(
0,
TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "my_table"),
1024,
false,
@@ -331,6 +341,7 @@ mod tests {
// Drops a exists table
let executor = DropTableExecutor::new(
0,
TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "my_table"),
1024,
false,

View File

@@ -31,8 +31,8 @@ use crate::error::{self, Result};
use crate::instruction::CacheIdent;
use crate::key::table_name::TableNameKey;
use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
use crate::metrics;
use crate::rpc::ddl::DropViewTask;
use crate::{metrics, ClusterId};
/// The procedure for dropping a view.
pub struct DropViewProcedure {
@@ -45,11 +45,12 @@ pub struct DropViewProcedure {
impl DropViewProcedure {
pub const TYPE_NAME: &'static str = "metasrv-procedure::DropView";
pub fn new(task: DropViewTask, context: DdlContext) -> Self {
pub fn new(cluster_id: ClusterId, task: DropViewTask, context: DdlContext) -> Self {
Self {
context,
data: DropViewData {
state: DropViewState::Prepare,
cluster_id,
task,
},
}
@@ -215,6 +216,7 @@ impl Procedure for DropViewProcedure {
#[derive(Debug, Serialize, Deserialize)]
pub(crate) struct DropViewData {
state: DropViewState,
cluster_id: ClusterId,
task: DropViewTask,
}

View File

@@ -20,6 +20,7 @@ use crate::error::Result;
use crate::key::FlowId;
use crate::peer::Peer;
use crate::sequence::SequenceRef;
use crate::ClusterId;
/// The reference of [FlowMetadataAllocator].
pub type FlowMetadataAllocatorRef = Arc<FlowMetadataAllocator>;
@@ -59,9 +60,16 @@ impl FlowMetadataAllocator {
}
/// Allocates the [FlowId] and [Peer]s.
pub async fn create(&self, partitions: usize) -> Result<(FlowId, Vec<Peer>)> {
pub async fn create(
&self,
cluster_id: ClusterId,
partitions: usize,
) -> Result<(FlowId, Vec<Peer>)> {
let flow_id = self.allocate_flow_id().await?;
let peers = self.partition_peer_allocator.alloc(partitions).await?;
let peers = self
.partition_peer_allocator
.alloc(cluster_id, partitions)
.await?;
Ok((flow_id, peers))
}
@@ -71,7 +79,7 @@ impl FlowMetadataAllocator {
#[async_trait]
pub trait PartitionPeerAllocator: Send + Sync {
/// Allocates [Peer] nodes for storing partitions.
async fn alloc(&self, partitions: usize) -> Result<Vec<Peer>>;
async fn alloc(&self, cluster_id: ClusterId, partitions: usize) -> Result<Vec<Peer>>;
}
/// [PartitionPeerAllocatorRef] allocates [Peer]s for partitions.
@@ -81,7 +89,7 @@ struct NoopPartitionPeerAllocator;
#[async_trait]
impl PartitionPeerAllocator for NoopPartitionPeerAllocator {
async fn alloc(&self, partitions: usize) -> Result<Vec<Peer>> {
async fn alloc(&self, _cluster_id: ClusterId, partitions: usize) -> Result<Vec<Peer>> {
Ok(vec![Peer::default(); partitions])
}
}

View File

@@ -20,7 +20,7 @@ use common_telemetry::{debug, info};
use snafu::ensure;
use store_api::storage::{RegionId, RegionNumber, TableId};
use crate::ddl::TableMetadata;
use crate::ddl::{TableMetadata, TableMetadataAllocatorContext};
use crate::error::{self, Result, UnsupportedSnafu};
use crate::key::table_route::PhysicalTableRouteValue;
use crate::peer::Peer;
@@ -109,6 +109,7 @@ impl TableMetadataAllocator {
async fn create_table_route(
&self,
ctx: &TableMetadataAllocatorContext,
table_id: TableId,
task: &CreateTableTask,
) -> Result<PhysicalTableRouteValue> {
@@ -120,7 +121,7 @@ impl TableMetadataAllocator {
}
);
let peers = self.peer_allocator.alloc(regions).await?;
let peers = self.peer_allocator.alloc(ctx, regions).await?;
let region_routes = task
.partitions
.iter()
@@ -146,7 +147,11 @@ impl TableMetadataAllocator {
}
/// Create VIEW metadata
pub async fn create_view(&self, table_id: &Option<api::v1::TableId>) -> Result<TableMetadata> {
pub async fn create_view(
&self,
_ctx: &TableMetadataAllocatorContext,
table_id: &Option<api::v1::TableId>,
) -> Result<TableMetadata> {
let table_id = self.allocate_table_id(table_id).await?;
Ok(TableMetadata {
@@ -155,9 +160,13 @@ impl TableMetadataAllocator {
})
}
pub async fn create(&self, task: &CreateTableTask) -> Result<TableMetadata> {
pub async fn create(
&self,
ctx: &TableMetadataAllocatorContext,
task: &CreateTableTask,
) -> Result<TableMetadata> {
let table_id = self.allocate_table_id(&task.create_table.table_id).await?;
let table_route = self.create_table_route(table_id, task).await?;
let table_route = self.create_table_route(ctx, table_id, task).await?;
let region_wal_options = self.create_wal_options(&table_route)?;
debug!(
@@ -179,14 +188,19 @@ pub type PeerAllocatorRef = Arc<dyn PeerAllocator>;
#[async_trait]
pub trait PeerAllocator: Send + Sync {
/// Allocates `regions` size [`Peer`]s.
async fn alloc(&self, regions: usize) -> Result<Vec<Peer>>;
async fn alloc(&self, ctx: &TableMetadataAllocatorContext, regions: usize)
-> Result<Vec<Peer>>;
}
struct NoopPeerAllocator;
#[async_trait]
impl PeerAllocator for NoopPeerAllocator {
async fn alloc(&self, regions: usize) -> Result<Vec<Peer>> {
async fn alloc(
&self,
_ctx: &TableMetadataAllocatorContext,
regions: usize,
) -> Result<Vec<Peer>> {
Ok(vec![Peer::default(); regions])
}
}

View File

@@ -31,9 +31,10 @@ use crate::ddl::test_util::columns::TestColumnDefBuilder;
use crate::ddl::test_util::create_table::{
build_raw_table_info_from_expr, TestCreateTableExprBuilder,
};
use crate::ddl::{DdlContext, TableMetadata};
use crate::ddl::{DdlContext, TableMetadata, TableMetadataAllocatorContext};
use crate::key::table_route::TableRouteValue;
use crate::rpc::ddl::CreateTableTask;
use crate::ClusterId;
pub async fn create_physical_table_metadata(
ddl_context: &DdlContext,
@@ -47,7 +48,11 @@ pub async fn create_physical_table_metadata(
.unwrap();
}
pub async fn create_physical_table(ddl_context: &DdlContext, name: &str) -> TableId {
pub async fn create_physical_table(
ddl_context: &DdlContext,
cluster_id: ClusterId,
name: &str,
) -> TableId {
// Prepares physical table metadata.
let mut create_physical_table_task = test_create_physical_table_task(name);
let TableMetadata {
@@ -56,7 +61,10 @@ pub async fn create_physical_table(ddl_context: &DdlContext, name: &str) -> Tabl
..
} = ddl_context
.table_metadata_allocator
.create(&create_physical_table_task)
.create(
&TableMetadataAllocatorContext { cluster_id },
&create_physical_table_task,
)
.await
.unwrap();
create_physical_table_task.set_table_id(table_id);
@@ -72,13 +80,15 @@ pub async fn create_physical_table(ddl_context: &DdlContext, name: &str) -> Tabl
pub async fn create_logical_table(
ddl_context: DdlContext,
cluster_id: ClusterId,
physical_table_id: TableId,
table_name: &str,
) -> TableId {
use std::assert_matches::assert_matches;
let tasks = vec![test_create_logical_table_task(table_name)];
let mut procedure = CreateLogicalTablesProcedure::new(tasks, physical_table_id, ddl_context);
let mut procedure =
CreateLogicalTablesProcedure::new(cluster_id, tasks, physical_table_id, ddl_context);
let status = procedure.on_prepare().await.unwrap();
assert_matches!(status, Status::Executing { persist: true });
let status = procedure.on_create_metadata().await.unwrap();

View File

@@ -86,6 +86,7 @@ fn make_alter_logical_table_rename_task(
async fn test_on_prepare_check_schema() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let tasks = vec![
make_alter_logical_table_add_column_task(
Some("schema1"),
@@ -99,7 +100,8 @@ async fn test_on_prepare_check_schema() {
),
];
let physical_table_id = 1024u32;
let mut procedure = AlterLogicalTablesProcedure::new(tasks, physical_table_id, ddl_context);
let mut procedure =
AlterLogicalTablesProcedure::new(cluster_id, tasks, physical_table_id, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, AlterLogicalTablesInvalidArguments { .. });
}
@@ -108,46 +110,50 @@ async fn test_on_prepare_check_schema() {
async fn test_on_prepare_check_alter_kind() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let tasks = vec![make_alter_logical_table_rename_task(
"schema1",
"table1",
"new_table1",
)];
let physical_table_id = 1024u32;
let mut procedure = AlterLogicalTablesProcedure::new(tasks, physical_table_id, ddl_context);
let mut procedure =
AlterLogicalTablesProcedure::new(cluster_id, tasks, physical_table_id, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, AlterLogicalTablesInvalidArguments { .. });
}
#[tokio::test]
async fn test_on_prepare_different_physical_table() {
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let phy1_id = create_physical_table(&ddl_context, "phy1").await;
create_logical_table(ddl_context.clone(), phy1_id, "table1").await;
let phy2_id = create_physical_table(&ddl_context, "phy2").await;
create_logical_table(ddl_context.clone(), phy2_id, "table2").await;
let phy1_id = create_physical_table(&ddl_context, cluster_id, "phy1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy1_id, "table1").await;
let phy2_id = create_physical_table(&ddl_context, cluster_id, "phy2").await;
create_logical_table(ddl_context.clone(), cluster_id, phy2_id, "table2").await;
let tasks = vec![
make_alter_logical_table_add_column_task(None, "table1", vec!["column1".to_string()]),
make_alter_logical_table_add_column_task(None, "table2", vec!["column2".to_string()]),
];
let mut procedure = AlterLogicalTablesProcedure::new(tasks, phy1_id, ddl_context);
let mut procedure = AlterLogicalTablesProcedure::new(cluster_id, tasks, phy1_id, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, AlterLogicalTablesInvalidArguments { .. });
}
#[tokio::test]
async fn test_on_prepare_logical_table_not_exists() {
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
// Creates physical table
let phy_id = create_physical_table(&ddl_context, "phy").await;
let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
// Creates 3 logical tables
create_logical_table(ddl_context.clone(), phy_id, "table1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
let tasks = vec![
make_alter_logical_table_add_column_task(None, "table1", vec!["column1".to_string()]),
@@ -155,22 +161,23 @@ async fn test_on_prepare_logical_table_not_exists() {
make_alter_logical_table_add_column_task(None, "table2", vec!["column2".to_string()]),
];
let mut procedure = AlterLogicalTablesProcedure::new(tasks, phy_id, ddl_context);
let mut procedure = AlterLogicalTablesProcedure::new(cluster_id, tasks, phy_id, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, TableNotFound { .. });
}
#[tokio::test]
async fn test_on_prepare() {
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
// Creates physical table
let phy_id = create_physical_table(&ddl_context, "phy").await;
let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
// Creates 3 logical tables
create_logical_table(ddl_context.clone(), phy_id, "table1").await;
create_logical_table(ddl_context.clone(), phy_id, "table2").await;
create_logical_table(ddl_context.clone(), phy_id, "table3").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table3").await;
let tasks = vec![
make_alter_logical_table_add_column_task(None, "table1", vec!["column1".to_string()]),
@@ -178,24 +185,25 @@ async fn test_on_prepare() {
make_alter_logical_table_add_column_task(None, "table3", vec!["column3".to_string()]),
];
let mut procedure = AlterLogicalTablesProcedure::new(tasks, phy_id, ddl_context);
let mut procedure = AlterLogicalTablesProcedure::new(cluster_id, tasks, phy_id, ddl_context);
let result = procedure.on_prepare().await;
assert_matches!(result, Ok(Status::Executing { persist: true }));
}
#[tokio::test]
async fn test_on_update_metadata() {
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
// Creates physical table
let phy_id = create_physical_table(&ddl_context, "phy").await;
let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
// Creates 3 logical tables
create_logical_table(ddl_context.clone(), phy_id, "table1").await;
create_logical_table(ddl_context.clone(), phy_id, "table2").await;
create_logical_table(ddl_context.clone(), phy_id, "table3").await;
create_logical_table(ddl_context.clone(), phy_id, "table4").await;
create_logical_table(ddl_context.clone(), phy_id, "table5").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table3").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table4").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table5").await;
let tasks = vec![
make_alter_logical_table_add_column_task(None, "table1", vec!["new_col".to_string()]),
@@ -203,7 +211,7 @@ async fn test_on_update_metadata() {
make_alter_logical_table_add_column_task(None, "table3", vec!["new_col".to_string()]),
];
let mut procedure = AlterLogicalTablesProcedure::new(tasks, phy_id, ddl_context);
let mut procedure = AlterLogicalTablesProcedure::new(cluster_id, tasks, phy_id, ddl_context);
let mut status = procedure.on_prepare().await.unwrap();
assert_matches!(status, Status::Executing { persist: true });
@@ -221,21 +229,23 @@ async fn test_on_update_metadata() {
#[tokio::test]
async fn test_on_part_duplicate_alter_request() {
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
// Creates physical table
let phy_id = create_physical_table(&ddl_context, "phy").await;
let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
// Creates 3 logical tables
create_logical_table(ddl_context.clone(), phy_id, "table1").await;
create_logical_table(ddl_context.clone(), phy_id, "table2").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;
let tasks = vec![
make_alter_logical_table_add_column_task(None, "table1", vec!["col_0".to_string()]),
make_alter_logical_table_add_column_task(None, "table2", vec!["col_0".to_string()]),
];
let mut procedure = AlterLogicalTablesProcedure::new(tasks, phy_id, ddl_context.clone());
let mut procedure =
AlterLogicalTablesProcedure::new(cluster_id, tasks, phy_id, ddl_context.clone());
let mut status = procedure.on_prepare().await.unwrap();
assert_matches!(status, Status::Executing { persist: true });
@@ -268,7 +278,8 @@ async fn test_on_part_duplicate_alter_request() {
),
];
let mut procedure = AlterLogicalTablesProcedure::new(tasks, phy_id, ddl_context.clone());
let mut procedure =
AlterLogicalTablesProcedure::new(cluster_id, tasks, phy_id, ddl_context.clone());
let mut status = procedure.on_prepare().await.unwrap();
assert_matches!(status, Status::Executing { persist: true });

View File

@@ -59,6 +59,7 @@ fn test_rename_alter_table_task(table_name: &str, new_table_name: &str) -> Alter
async fn test_on_prepare_table_exists_err() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let task = test_create_table_task("foo", 1024);
// Puts a value to table name key.
ddl_context
@@ -72,7 +73,7 @@ async fn test_on_prepare_table_exists_err() {
.unwrap();
let task = test_rename_alter_table_task("non-exists", "foo");
let mut procedure = AlterTableProcedure::new(1024, task, ddl_context).unwrap();
let mut procedure = AlterTableProcedure::new(cluster_id, 1024, task, ddl_context).unwrap();
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err.status_code(), StatusCode::TableAlreadyExists);
}
@@ -81,8 +82,9 @@ async fn test_on_prepare_table_exists_err() {
async fn test_on_prepare_table_not_exists_err() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let task = test_rename_alter_table_task("non-exists", "foo");
let mut procedure = AlterTableProcedure::new(1024, task, ddl_context).unwrap();
let mut procedure = AlterTableProcedure::new(cluster_id, 1024, task, ddl_context).unwrap();
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err.status_code(), StatusCode::TableNotFound);
}
@@ -93,6 +95,7 @@ async fn test_on_submit_alter_request() {
let datanode_handler = DatanodeWatcher(tx);
let node_manager = Arc::new(MockDatanodeManager::new(datanode_handler));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let table_id = 1024;
let table_name = "foo";
let task = test_create_table_task(table_name, table_id);
@@ -141,7 +144,8 @@ async fn test_on_submit_alter_request() {
})),
},
};
let mut procedure = AlterTableProcedure::new(table_id, alter_table_task, ddl_context).unwrap();
let mut procedure =
AlterTableProcedure::new(cluster_id, table_id, alter_table_task, ddl_context).unwrap();
procedure.on_prepare().await.unwrap();
procedure.submit_alter_region_requests().await.unwrap();
@@ -177,6 +181,7 @@ async fn test_on_submit_alter_request_with_outdated_request() {
RequestOutdatedErrorDatanodeHandler,
));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let table_id = 1024;
let table_name = "foo";
let task = test_create_table_task(table_name, table_id);
@@ -225,7 +230,8 @@ async fn test_on_submit_alter_request_with_outdated_request() {
})),
},
};
let mut procedure = AlterTableProcedure::new(table_id, alter_table_task, ddl_context).unwrap();
let mut procedure =
AlterTableProcedure::new(cluster_id, table_id, alter_table_task, ddl_context).unwrap();
procedure.on_prepare().await.unwrap();
procedure.submit_alter_region_requests().await.unwrap();
}
@@ -234,6 +240,7 @@ async fn test_on_submit_alter_request_with_outdated_request() {
async fn test_on_update_metadata_rename() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let table_name = "foo";
let new_table_name = "bar";
let table_id = 1024;
@@ -250,7 +257,8 @@ async fn test_on_update_metadata_rename() {
.unwrap();
let task = test_rename_alter_table_task(table_name, new_table_name);
let mut procedure = AlterTableProcedure::new(table_id, task, ddl_context.clone()).unwrap();
let mut procedure =
AlterTableProcedure::new(cluster_id, table_id, task, ddl_context.clone()).unwrap();
procedure.on_prepare().await.unwrap();
procedure.on_update_metadata().await.unwrap();
@@ -283,6 +291,7 @@ async fn test_on_update_metadata_rename() {
async fn test_on_update_metadata_add_columns() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let table_name = "foo";
let table_id = 1024;
let task = test_create_table_task(table_name, table_id);
@@ -326,7 +335,8 @@ async fn test_on_update_metadata_add_columns() {
})),
},
};
let mut procedure = AlterTableProcedure::new(table_id, task, ddl_context.clone()).unwrap();
let mut procedure =
AlterTableProcedure::new(cluster_id, table_id, task, ddl_context.clone()).unwrap();
procedure.on_prepare().await.unwrap();
procedure.submit_alter_region_requests().await.unwrap();
procedure.on_update_metadata().await.unwrap();
@@ -351,6 +361,7 @@ async fn test_on_update_metadata_add_columns() {
async fn test_on_update_table_options() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let table_name = "foo";
let table_id = 1024;
let task = test_create_table_task(table_name, table_id);
@@ -387,7 +398,8 @@ async fn test_on_update_table_options() {
})),
},
};
let mut procedure = AlterTableProcedure::new(table_id, task, ddl_context.clone()).unwrap();
let mut procedure =
AlterTableProcedure::new(cluster_id, table_id, task, ddl_context.clone()).unwrap();
procedure.on_prepare().await.unwrap();
procedure.submit_alter_region_requests().await.unwrap();
procedure.on_update_metadata().await.unwrap();

View File

@@ -25,11 +25,11 @@ use crate::ddl::create_flow::CreateFlowProcedure;
use crate::ddl::test_util::create_table::test_create_table_task;
use crate::ddl::test_util::flownode_handler::NaiveFlownodeHandler;
use crate::ddl::DdlContext;
use crate::error;
use crate::key::table_route::TableRouteValue;
use crate::key::FlowId;
use crate::rpc::ddl::CreateFlowTask;
use crate::test_util::{new_ddl_context, MockFlownodeManager};
use crate::{error, ClusterId};
pub(crate) fn test_create_flow_task(
name: &str,
@@ -53,6 +53,7 @@ pub(crate) fn test_create_flow_task(
#[tokio::test]
async fn test_create_flow_source_table_not_found() {
let cluster_id = 1;
let source_table_names = vec![TableName::new(
DEFAULT_CATALOG_NAME,
DEFAULT_SCHEMA_NAME,
@@ -64,13 +65,14 @@ async fn test_create_flow_source_table_not_found() {
let node_manager = Arc::new(MockFlownodeManager::new(NaiveFlownodeHandler));
let ddl_context = new_ddl_context(node_manager);
let query_ctx = QueryContext::arc().into();
let mut procedure = CreateFlowProcedure::new(task, query_ctx, ddl_context);
let mut procedure = CreateFlowProcedure::new(cluster_id, task, query_ctx, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, error::Error::TableNotFound { .. });
}
pub(crate) async fn create_test_flow(
ddl_context: &DdlContext,
cluster_id: ClusterId,
flow_name: &str,
source_table_names: Vec<TableName>,
sink_table_name: TableName,
@@ -82,7 +84,8 @@ pub(crate) async fn create_test_flow(
false,
);
let query_ctx = QueryContext::arc().into();
let mut procedure = CreateFlowProcedure::new(task.clone(), query_ctx, ddl_context.clone());
let mut procedure =
CreateFlowProcedure::new(cluster_id, task.clone(), query_ctx, ddl_context.clone());
let output = execute_procedure_until_done(&mut procedure).await.unwrap();
let flow_id = output.downcast_ref::<FlowId>().unwrap();
@@ -91,6 +94,7 @@ pub(crate) async fn create_test_flow(
#[tokio::test]
async fn test_create_flow() {
let cluster_id = 1;
let table_id = 1024;
let source_table_names = vec![TableName::new(
DEFAULT_CATALOG_NAME,
@@ -114,6 +118,7 @@ async fn test_create_flow() {
.unwrap();
let flow_id = create_test_flow(
&ddl_context,
cluster_id,
"my_flow",
source_table_names.clone(),
sink_table_name.clone(),
@@ -129,7 +134,8 @@ async fn test_create_flow() {
true,
);
let query_ctx = QueryContext::arc().into();
let mut procedure = CreateFlowProcedure::new(task.clone(), query_ctx, ddl_context.clone());
let mut procedure =
CreateFlowProcedure::new(cluster_id, task.clone(), query_ctx, ddl_context.clone());
let output = execute_procedure_until_done(&mut procedure).await.unwrap();
let flow_id = output.downcast_ref::<FlowId>().unwrap();
assert_eq!(*flow_id, 1024);
@@ -137,7 +143,7 @@ async fn test_create_flow() {
// Creates again
let task = test_create_flow_task("my_flow", source_table_names, sink_table_name, false);
let query_ctx = QueryContext::arc().into();
let mut procedure = CreateFlowProcedure::new(task.clone(), query_ctx, ddl_context);
let mut procedure = CreateFlowProcedure::new(cluster_id, task.clone(), query_ctx, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, error::Error::FlowAlreadyExists { .. });
}

View File

@@ -26,7 +26,7 @@ use crate::ddl::test_util::datanode_handler::NaiveDatanodeHandler;
use crate::ddl::test_util::{
create_physical_table_metadata, test_create_logical_table_task, test_create_physical_table_task,
};
use crate::ddl::TableMetadata;
use crate::ddl::{TableMetadata, TableMetadataAllocatorContext};
use crate::error::Error;
use crate::key::table_route::TableRouteValue;
use crate::test_util::{new_ddl_context, MockDatanodeManager};
@@ -35,9 +35,11 @@ use crate::test_util::{new_ddl_context, MockDatanodeManager};
async fn test_on_prepare_physical_table_not_found() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let tasks = vec![test_create_logical_table_task("foo")];
let physical_table_id = 1024u32;
let mut procedure = CreateLogicalTablesProcedure::new(tasks, physical_table_id, ddl_context);
let mut procedure =
CreateLogicalTablesProcedure::new(cluster_id, tasks, physical_table_id, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, Error::TableRouteNotFound { .. });
}
@@ -46,6 +48,7 @@ async fn test_on_prepare_physical_table_not_found() {
async fn test_on_prepare() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
// Prepares physical table metadata.
let mut create_physical_table_task = test_create_physical_table_task("phy_table");
let TableMetadata {
@@ -54,7 +57,10 @@ async fn test_on_prepare() {
..
} = ddl_context
.table_metadata_allocator
.create(&create_physical_table_task)
.create(
&TableMetadataAllocatorContext { cluster_id },
&create_physical_table_task,
)
.await
.unwrap();
create_physical_table_task.set_table_id(table_id);
@@ -67,7 +73,8 @@ async fn test_on_prepare() {
// The create logical table procedure.
let tasks = vec![test_create_logical_table_task("foo")];
let physical_table_id = table_id;
let mut procedure = CreateLogicalTablesProcedure::new(tasks, physical_table_id, ddl_context);
let mut procedure =
CreateLogicalTablesProcedure::new(cluster_id, tasks, physical_table_id, ddl_context);
let status = procedure.on_prepare().await.unwrap();
assert_matches!(status, Status::Executing { persist: true });
}
@@ -76,6 +83,7 @@ async fn test_on_prepare() {
async fn test_on_prepare_logical_table_exists_err() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
// Prepares physical table metadata.
let mut create_physical_table_task = test_create_physical_table_task("phy_table");
let TableMetadata {
@@ -84,7 +92,10 @@ async fn test_on_prepare_logical_table_exists_err() {
..
} = ddl_context
.table_metadata_allocator
.create(&create_physical_table_task)
.create(
&TableMetadataAllocatorContext { cluster_id },
&create_physical_table_task,
)
.await
.unwrap();
create_physical_table_task.set_table_id(table_id);
@@ -108,7 +119,7 @@ async fn test_on_prepare_logical_table_exists_err() {
// The create logical table procedure.
let physical_table_id = table_id;
let mut procedure =
CreateLogicalTablesProcedure::new(vec![task], physical_table_id, ddl_context);
CreateLogicalTablesProcedure::new(cluster_id, vec![task], physical_table_id, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, Error::TableAlreadyExists { .. });
assert_eq!(err.status_code(), StatusCode::TableAlreadyExists);
@@ -118,6 +129,7 @@ async fn test_on_prepare_logical_table_exists_err() {
async fn test_on_prepare_with_create_if_table_exists() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
// Prepares physical table metadata.
let mut create_physical_table_task = test_create_physical_table_task("phy_table");
let TableMetadata {
@@ -126,7 +138,10 @@ async fn test_on_prepare_with_create_if_table_exists() {
..
} = ddl_context
.table_metadata_allocator
.create(&create_physical_table_task)
.create(
&TableMetadataAllocatorContext { cluster_id },
&create_physical_table_task,
)
.await
.unwrap();
create_physical_table_task.set_table_id(table_id);
@@ -152,7 +167,7 @@ async fn test_on_prepare_with_create_if_table_exists() {
// Sets `create_if_not_exists`
task.create_table.create_if_not_exists = true;
let mut procedure =
CreateLogicalTablesProcedure::new(vec![task], physical_table_id, ddl_context);
CreateLogicalTablesProcedure::new(cluster_id, vec![task], physical_table_id, ddl_context);
let status = procedure.on_prepare().await.unwrap();
let output = status.downcast_output_ref::<Vec<u32>>().unwrap();
assert_eq!(*output, vec![8192]);
@@ -162,6 +177,7 @@ async fn test_on_prepare_with_create_if_table_exists() {
async fn test_on_prepare_part_logical_tables_exist() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
// Prepares physical table metadata.
let mut create_physical_table_task = test_create_physical_table_task("phy_table");
let TableMetadata {
@@ -170,7 +186,10 @@ async fn test_on_prepare_part_logical_tables_exist() {
..
} = ddl_context
.table_metadata_allocator
.create(&create_physical_table_task)
.create(
&TableMetadataAllocatorContext { cluster_id },
&create_physical_table_task,
)
.await
.unwrap();
create_physical_table_task.set_table_id(table_id);
@@ -197,6 +216,7 @@ async fn test_on_prepare_part_logical_tables_exist() {
task.create_table.create_if_not_exists = true;
let non_exist_task = test_create_logical_table_task("non_exists");
let mut procedure = CreateLogicalTablesProcedure::new(
cluster_id,
vec![task, non_exist_task],
physical_table_id,
ddl_context,
@@ -209,6 +229,7 @@ async fn test_on_prepare_part_logical_tables_exist() {
async fn test_on_create_metadata() {
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
// Prepares physical table metadata.
let mut create_physical_table_task = test_create_physical_table_task("phy_table");
let TableMetadata {
@@ -217,7 +238,10 @@ async fn test_on_create_metadata() {
..
} = ddl_context
.table_metadata_allocator
.create(&create_physical_table_task)
.create(
&TableMetadataAllocatorContext { cluster_id },
&create_physical_table_task,
)
.await
.unwrap();
create_physical_table_task.set_table_id(table_id);
@@ -233,6 +257,7 @@ async fn test_on_create_metadata() {
let task = test_create_logical_table_task("foo");
let yet_another_task = test_create_logical_table_task("bar");
let mut procedure = CreateLogicalTablesProcedure::new(
cluster_id,
vec![task, yet_another_task],
physical_table_id,
ddl_context,
@@ -254,6 +279,7 @@ async fn test_on_create_metadata() {
async fn test_on_create_metadata_part_logical_tables_exist() {
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
// Prepares physical table metadata.
let mut create_physical_table_task = test_create_physical_table_task("phy_table");
let TableMetadata {
@@ -262,7 +288,10 @@ async fn test_on_create_metadata_part_logical_tables_exist() {
..
} = ddl_context
.table_metadata_allocator
.create(&create_physical_table_task)
.create(
&TableMetadataAllocatorContext { cluster_id },
&create_physical_table_task,
)
.await
.unwrap();
create_physical_table_task.set_table_id(table_id);
@@ -289,6 +318,7 @@ async fn test_on_create_metadata_part_logical_tables_exist() {
task.create_table.create_if_not_exists = true;
let non_exist_task = test_create_logical_table_task("non_exists");
let mut procedure = CreateLogicalTablesProcedure::new(
cluster_id,
vec![task, non_exist_task],
physical_table_id,
ddl_context,
@@ -310,6 +340,7 @@ async fn test_on_create_metadata_part_logical_tables_exist() {
async fn test_on_create_metadata_err() {
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
// Prepares physical table metadata.
let mut create_physical_table_task = test_create_physical_table_task("phy_table");
let TableMetadata {
@@ -318,7 +349,10 @@ async fn test_on_create_metadata_err() {
..
} = ddl_context
.table_metadata_allocator
.create(&create_physical_table_task)
.create(
&TableMetadataAllocatorContext { cluster_id },
&create_physical_table_task,
)
.await
.unwrap();
create_physical_table_task.set_table_id(table_id);
@@ -334,6 +368,7 @@ async fn test_on_create_metadata_err() {
let task = test_create_logical_table_task("foo");
let yet_another_task = test_create_logical_table_task("bar");
let mut procedure = CreateLogicalTablesProcedure::new(
cluster_id,
vec![task.clone(), yet_another_task],
physical_table_id,
ddl_context.clone(),

View File

@@ -87,6 +87,7 @@ pub(crate) fn test_create_table_task(name: &str) -> CreateTableTask {
async fn test_on_prepare_table_exists_err() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let task = test_create_table_task("foo");
assert!(!task.create_table.create_if_not_exists);
// Puts a value to table name key.
@@ -99,7 +100,7 @@ async fn test_on_prepare_table_exists_err() {
)
.await
.unwrap();
let mut procedure = CreateTableProcedure::new(task, ddl_context);
let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, Error::TableAlreadyExists { .. });
assert_eq!(err.status_code(), StatusCode::TableAlreadyExists);
@@ -109,6 +110,7 @@ async fn test_on_prepare_table_exists_err() {
async fn test_on_prepare_with_create_if_table_exists() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let mut task = test_create_table_task("foo");
task.create_table.create_if_not_exists = true;
task.table_info.ident.table_id = 1024;
@@ -122,7 +124,7 @@ async fn test_on_prepare_with_create_if_table_exists() {
)
.await
.unwrap();
let mut procedure = CreateTableProcedure::new(task, ddl_context);
let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context);
let status = procedure.on_prepare().await.unwrap();
assert_matches!(status, Status::Done { output: Some(..) });
let table_id = *status.downcast_output_ref::<u32>().unwrap();
@@ -133,9 +135,10 @@ async fn test_on_prepare_with_create_if_table_exists() {
async fn test_on_prepare_without_create_if_table_exists() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let mut task = test_create_table_task("foo");
task.create_table.create_if_not_exists = true;
let mut procedure = CreateTableProcedure::new(task, ddl_context);
let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context);
let status = procedure.on_prepare().await.unwrap();
assert_matches!(status, Status::Executing { persist: true });
assert_eq!(procedure.table_id(), 1024);
@@ -145,10 +148,11 @@ async fn test_on_prepare_without_create_if_table_exists() {
async fn test_on_prepare_with_no_partition_err() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let mut task = test_create_table_task("foo");
task.partitions = vec![];
task.create_table.create_if_not_exists = true;
let mut procedure = CreateTableProcedure::new(task, ddl_context);
let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, Error::Unexpected { .. });
assert!(err
@@ -161,9 +165,10 @@ async fn test_on_datanode_create_regions_should_retry() {
common_telemetry::init_default_ut_logging();
let node_manager = Arc::new(MockDatanodeManager::new(RetryErrorDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let task = test_create_table_task("foo");
assert!(!task.create_table.create_if_not_exists);
let mut procedure = CreateTableProcedure::new(task, ddl_context);
let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context);
procedure.on_prepare().await.unwrap();
let ctx = ProcedureContext {
procedure_id: ProcedureId::random(),
@@ -178,9 +183,10 @@ async fn test_on_datanode_create_regions_should_not_retry() {
common_telemetry::init_default_ut_logging();
let node_manager = Arc::new(MockDatanodeManager::new(UnexpectedErrorDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let task = test_create_table_task("foo");
assert!(!task.create_table.create_if_not_exists);
let mut procedure = CreateTableProcedure::new(task, ddl_context);
let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context);
procedure.on_prepare().await.unwrap();
let ctx = ProcedureContext {
procedure_id: ProcedureId::random(),
@@ -195,9 +201,10 @@ async fn test_on_create_metadata_error() {
common_telemetry::init_default_ut_logging();
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let task = test_create_table_task("foo");
assert!(!task.create_table.create_if_not_exists);
let mut procedure = CreateTableProcedure::new(task.clone(), ddl_context.clone());
let mut procedure = CreateTableProcedure::new(cluster_id, task.clone(), ddl_context.clone());
procedure.on_prepare().await.unwrap();
let ctx = ProcedureContext {
procedure_id: ProcedureId::random(),
@@ -226,9 +233,10 @@ async fn test_on_create_metadata() {
common_telemetry::init_default_ut_logging();
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let task = test_create_table_task("foo");
assert!(!task.create_table.create_if_not_exists);
let mut procedure = CreateTableProcedure::new(task, ddl_context);
let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context);
procedure.on_prepare().await.unwrap();
let ctx = ProcedureContext {
procedure_id: ProcedureId::random(),
@@ -243,12 +251,14 @@ async fn test_on_create_metadata() {
#[tokio::test]
async fn test_memory_region_keeper_guard_dropped_on_procedure_done() {
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let kv_backend = Arc::new(MemoryKvBackend::new());
let ddl_context = new_ddl_context_with_kv_backend(node_manager, kv_backend);
let task = test_create_table_task("foo");
let mut procedure = CreateTableProcedure::new(task, ddl_context.clone());
let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context.clone());
execute_procedure_until(&mut procedure, |p| {
p.creator.data.state == CreateTableState::CreateMetadata

View File

@@ -97,6 +97,7 @@ pub(crate) fn test_create_view_task(name: &str) -> CreateViewTask {
async fn test_on_prepare_view_exists_err() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let task = test_create_view_task("foo");
assert!(!task.create_view.create_if_not_exists);
// Puts a value to table name key.
@@ -112,7 +113,7 @@ async fn test_on_prepare_view_exists_err() {
)
.await
.unwrap();
let mut procedure = CreateViewProcedure::new(task, ddl_context);
let mut procedure = CreateViewProcedure::new(cluster_id, task, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, Error::ViewAlreadyExists { .. });
assert_eq!(err.status_code(), StatusCode::TableAlreadyExists);
@@ -122,6 +123,7 @@ async fn test_on_prepare_view_exists_err() {
async fn test_on_prepare_with_create_if_view_exists() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let mut task = test_create_view_task("foo");
task.create_view.create_if_not_exists = true;
task.view_info.ident.table_id = 1024;
@@ -138,7 +140,7 @@ async fn test_on_prepare_with_create_if_view_exists() {
)
.await
.unwrap();
let mut procedure = CreateViewProcedure::new(task, ddl_context);
let mut procedure = CreateViewProcedure::new(cluster_id, task, ddl_context);
let status = procedure.on_prepare().await.unwrap();
assert_matches!(status, Status::Done { output: Some(..) });
let table_id = *status.downcast_output_ref::<u32>().unwrap();
@@ -149,9 +151,10 @@ async fn test_on_prepare_with_create_if_view_exists() {
async fn test_on_prepare_without_create_if_table_exists() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let mut task = test_create_view_task("foo");
task.create_view.create_if_not_exists = true;
let mut procedure = CreateViewProcedure::new(task, ddl_context);
let mut procedure = CreateViewProcedure::new(cluster_id, task, ddl_context);
let status = procedure.on_prepare().await.unwrap();
assert_matches!(status, Status::Executing { persist: true });
assert_eq!(procedure.view_id(), 1024);
@@ -162,9 +165,10 @@ async fn test_on_create_metadata() {
common_telemetry::init_default_ut_logging();
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let task = test_create_view_task("foo");
assert!(!task.create_view.create_if_not_exists);
let mut procedure = CreateViewProcedure::new(task, ddl_context);
let mut procedure = CreateViewProcedure::new(cluster_id, task, ddl_context);
procedure.on_prepare().await.unwrap();
let ctx = ProcedureContext {
procedure_id: ProcedureId::random(),
@@ -181,9 +185,10 @@ async fn test_replace_view_metadata() {
common_telemetry::init_default_ut_logging();
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager.clone());
let cluster_id = 1;
let task = test_create_view_task("foo");
assert!(!task.create_view.create_if_not_exists);
let mut procedure = CreateViewProcedure::new(task.clone(), ddl_context.clone());
let mut procedure = CreateViewProcedure::new(cluster_id, task.clone(), ddl_context.clone());
procedure.on_prepare().await.unwrap();
let ctx = ProcedureContext {
procedure_id: ProcedureId::random(),
@@ -208,7 +213,7 @@ async fn test_replace_view_metadata() {
let mut task = test_create_view_task("foo");
// The view already exists, prepare should fail
{
let mut procedure = CreateViewProcedure::new(task.clone(), ddl_context.clone());
let mut procedure = CreateViewProcedure::new(cluster_id, task.clone(), ddl_context.clone());
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, Error::ViewAlreadyExists { .. });
assert_eq!(err.status_code(), StatusCode::TableAlreadyExists);
@@ -219,7 +224,7 @@ async fn test_replace_view_metadata() {
task.create_view.logical_plan = vec![4, 5, 6];
task.create_view.definition = "new_definition".to_string();
let mut procedure = CreateViewProcedure::new(task, ddl_context.clone());
let mut procedure = CreateViewProcedure::new(cluster_id, task, ddl_context.clone());
procedure.on_prepare().await.unwrap();
let ctx = ProcedureContext {
procedure_id: ProcedureId::random(),
@@ -249,11 +254,12 @@ async fn test_replace_table() {
common_telemetry::init_default_ut_logging();
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager.clone());
let cluster_id = 1;
{
// Create a `foo` table.
let task = test_create_table_task("foo");
let mut procedure = CreateTableProcedure::new(task, ddl_context.clone());
let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context.clone());
procedure.on_prepare().await.unwrap();
let ctx = ProcedureContext {
procedure_id: ProcedureId::random(),
@@ -266,7 +272,7 @@ async fn test_replace_table() {
// Try to replace a view named `foo` too.
let mut task = test_create_view_task("foo");
task.create_view.or_replace = true;
let mut procedure = CreateViewProcedure::new(task.clone(), ddl_context.clone());
let mut procedure = CreateViewProcedure::new(cluster_id, task.clone(), ddl_context.clone());
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, Error::TableAlreadyExists { .. });
assert_eq!(err.status_code(), StatusCode::TableAlreadyExists);

View File

@@ -31,6 +31,7 @@ use crate::test_util::{new_ddl_context, MockDatanodeManager};
#[tokio::test]
async fn test_drop_database_with_logical_tables() {
common_telemetry::init_default_ut_logging();
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
ddl_context
@@ -44,11 +45,11 @@ async fn test_drop_database_with_logical_tables() {
.await
.unwrap();
// Creates physical table
let phy_id = create_physical_table(&ddl_context, "phy").await;
let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
// Creates 3 logical tables
create_logical_table(ddl_context.clone(), phy_id, "table1").await;
create_logical_table(ddl_context.clone(), phy_id, "table2").await;
create_logical_table(ddl_context.clone(), phy_id, "table3").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table3").await;
let mut procedure = DropDatabaseProcedure::new(
DEFAULT_CATALOG_NAME.to_string(),
@@ -79,6 +80,7 @@ async fn test_drop_database_with_logical_tables() {
#[tokio::test]
async fn test_drop_database_retryable_error() {
common_telemetry::init_default_ut_logging();
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(RetryErrorDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
ddl_context
@@ -92,11 +94,11 @@ async fn test_drop_database_retryable_error() {
.await
.unwrap();
// Creates physical table
let phy_id = create_physical_table(&ddl_context, "phy").await;
let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
// Creates 3 logical tables
create_logical_table(ddl_context.clone(), phy_id, "table1").await;
create_logical_table(ddl_context.clone(), phy_id, "table2").await;
create_logical_table(ddl_context.clone(), phy_id, "table3").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table3").await;
let mut procedure = DropDatabaseProcedure::new(
DEFAULT_CATALOG_NAME.to_string(),
@@ -126,6 +128,7 @@ async fn test_drop_database_retryable_error() {
#[tokio::test]
async fn test_drop_database_recover() {
common_telemetry::init_default_ut_logging();
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
ddl_context
@@ -139,9 +142,9 @@ async fn test_drop_database_recover() {
.await
.unwrap();
// Creates a physical table
let phy_id = create_physical_table(&ddl_context, "phy").await;
let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
// Creates a logical tables
create_logical_table(ddl_context.clone(), phy_id, "table1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
let mut procedure = DropDatabaseProcedure::new(
DEFAULT_CATALOG_NAME.to_string(),
DEFAULT_SCHEMA_NAME.to_string(),

View File

@@ -40,11 +40,12 @@ fn test_drop_flow_task(flow_name: &str, flow_id: u32, drop_if_exists: bool) -> D
#[tokio::test]
async fn test_drop_flow_not_found() {
let cluster_id = 1;
let flow_id = 1024;
let node_manager = Arc::new(MockFlownodeManager::new(NaiveFlownodeHandler));
let ddl_context = new_ddl_context(node_manager);
let task = test_drop_flow_task("my_flow", flow_id, false);
let mut procedure = DropFlowProcedure::new(task, ddl_context);
let mut procedure = DropFlowProcedure::new(cluster_id, task, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, error::Error::FlowNotFound { .. });
}
@@ -52,6 +53,7 @@ async fn test_drop_flow_not_found() {
#[tokio::test]
async fn test_drop_flow() {
// create a flow
let cluster_id = 1;
let table_id = 1024;
let source_table_names = vec![TableName::new(
DEFAULT_CATALOG_NAME,
@@ -73,21 +75,27 @@ async fn test_drop_flow() {
)
.await
.unwrap();
let flow_id =
create_test_flow(&ddl_context, "my_flow", source_table_names, sink_table_name).await;
let flow_id = create_test_flow(
&ddl_context,
cluster_id,
"my_flow",
source_table_names,
sink_table_name,
)
.await;
// Drops the flows
let task = test_drop_flow_task("my_flow", flow_id, false);
let mut procedure = DropFlowProcedure::new(task, ddl_context.clone());
let mut procedure = DropFlowProcedure::new(cluster_id, task, ddl_context.clone());
execute_procedure_until_done(&mut procedure).await;
// Drops if not exists
let task = test_drop_flow_task("my_flow", flow_id, true);
let mut procedure = DropFlowProcedure::new(task, ddl_context.clone());
let mut procedure = DropFlowProcedure::new(cluster_id, task, ddl_context.clone());
execute_procedure_until_done(&mut procedure).await;
// Drops again
let task = test_drop_flow_task("my_flow", flow_id, false);
let mut procedure = DropFlowProcedure::new(task, ddl_context);
let mut procedure = DropFlowProcedure::new(cluster_id, task, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, error::Error::FlowNotFound { .. });
}

View File

@@ -35,7 +35,7 @@ use crate::ddl::test_util::{
create_logical_table, create_physical_table, create_physical_table_metadata,
test_create_logical_table_task, test_create_physical_table_task,
};
use crate::ddl::TableMetadata;
use crate::ddl::{TableMetadata, TableMetadataAllocatorContext};
use crate::key::table_route::TableRouteValue;
use crate::kv_backend::memory::MemoryKvBackend;
use crate::peer::Peer;
@@ -47,6 +47,7 @@ use crate::test_util::{new_ddl_context, new_ddl_context_with_kv_backend, MockDat
async fn test_on_prepare_table_not_exists_err() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let table_name = "foo";
let table_id = 1024;
let task = test_create_table_task(table_name, table_id);
@@ -62,7 +63,7 @@ async fn test_on_prepare_table_not_exists_err() {
.unwrap();
let task = new_drop_table_task("bar", table_id, false);
let mut procedure = DropTableProcedure::new(task, ddl_context);
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_eq!(err.status_code(), StatusCode::TableNotFound);
}
@@ -71,6 +72,7 @@ async fn test_on_prepare_table_not_exists_err() {
async fn test_on_prepare_table() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let table_name = "foo";
let table_id = 1024;
let task = test_create_table_task(table_name, table_id);
@@ -87,13 +89,13 @@ async fn test_on_prepare_table() {
let task = new_drop_table_task("bar", table_id, true);
// Drop if exists
let mut procedure = DropTableProcedure::new(task, ddl_context.clone());
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
procedure.on_prepare().await.unwrap();
assert!(!procedure.rollback_supported());
let task = new_drop_table_task(table_name, table_id, false);
// Drop table
let mut procedure = DropTableProcedure::new(task, ddl_context);
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context);
procedure.on_prepare().await.unwrap();
}
@@ -103,6 +105,7 @@ async fn test_on_datanode_drop_regions() {
let datanode_handler = DatanodeWatcher(tx);
let node_manager = Arc::new(MockDatanodeManager::new(datanode_handler));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let table_id = 1024;
let table_name = "foo";
let task = test_create_table_task(table_name, table_id);
@@ -141,7 +144,7 @@ async fn test_on_datanode_drop_regions() {
let task = new_drop_table_task(table_name, table_id, false);
// Drop table
let mut procedure = DropTableProcedure::new(task, ddl_context);
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context);
procedure.on_prepare().await.unwrap();
procedure.on_datanode_drop_regions().await.unwrap();
@@ -176,6 +179,7 @@ async fn test_on_rollback() {
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let kv_backend = Arc::new(MemoryKvBackend::new());
let ddl_context = new_ddl_context_with_kv_backend(node_manager, kv_backend.clone());
let cluster_id = 1;
// Prepares physical table metadata.
let mut create_physical_table_task = test_create_physical_table_task("phy_table");
let TableMetadata {
@@ -184,7 +188,10 @@ async fn test_on_rollback() {
..
} = ddl_context
.table_metadata_allocator
.create(&create_physical_table_task)
.create(
&TableMetadataAllocatorContext { cluster_id },
&create_physical_table_task,
)
.await
.unwrap();
create_physical_table_task.set_table_id(table_id);
@@ -198,8 +205,12 @@ async fn test_on_rollback() {
let physical_table_id = table_id;
// Creates the logical table metadata.
let task = test_create_logical_table_task("foo");
let mut procedure =
CreateLogicalTablesProcedure::new(vec![task], physical_table_id, ddl_context.clone());
let mut procedure = CreateLogicalTablesProcedure::new(
cluster_id,
vec![task],
physical_table_id,
ddl_context.clone(),
);
procedure.on_prepare().await.unwrap();
let ctx = new_test_procedure_context();
procedure.execute(&ctx).await.unwrap();
@@ -212,7 +223,7 @@ async fn test_on_rollback() {
// Drops the physical table
{
let task = new_drop_table_task("phy_table", physical_table_id, false);
let mut procedure = DropTableProcedure::new(task, ddl_context.clone());
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
procedure.on_prepare().await.unwrap();
assert!(procedure.rollback_supported());
procedure.on_delete_metadata().await.unwrap();
@@ -227,7 +238,7 @@ async fn test_on_rollback() {
// Drops the logical table
let task = new_drop_table_task("foo", table_ids[0], false);
let mut procedure = DropTableProcedure::new(task, ddl_context.clone());
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
procedure.on_prepare().await.unwrap();
assert!(!procedure.rollback_supported());
}
@@ -244,15 +255,18 @@ fn new_drop_table_task(table_name: &str, table_id: TableId, drop_if_exists: bool
#[tokio::test]
async fn test_memory_region_keeper_guard_dropped_on_procedure_done() {
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let kv_backend = Arc::new(MemoryKvBackend::new());
let ddl_context = new_ddl_context_with_kv_backend(node_manager, kv_backend);
let physical_table_id = create_physical_table(&ddl_context, "t").await;
let logical_table_id = create_logical_table(ddl_context.clone(), physical_table_id, "s").await;
let physical_table_id = create_physical_table(&ddl_context, cluster_id, "t").await;
let logical_table_id =
create_logical_table(ddl_context.clone(), cluster_id, physical_table_id, "s").await;
let inner_test = |task: DropTableTask| async {
let mut procedure = DropTableProcedure::new(task, ddl_context.clone());
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
execute_procedure_until(&mut procedure, |p| {
p.data.state == DropTableState::InvalidateTableCache
})
@@ -290,13 +304,14 @@ async fn test_from_json() {
(DropTableState::DatanodeDropRegions, 1, 1),
(DropTableState::DeleteTombstone, 1, 0),
] {
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let kv_backend = Arc::new(MemoryKvBackend::new());
let ddl_context = new_ddl_context_with_kv_backend(node_manager, kv_backend);
let physical_table_id = create_physical_table(&ddl_context, "t").await;
let physical_table_id = create_physical_table(&ddl_context, cluster_id, "t").await;
let task = new_drop_table_task("t", physical_table_id, false);
let mut procedure = DropTableProcedure::new(task, ddl_context.clone());
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
execute_procedure_until(&mut procedure, |p| p.data.state == state).await;
let data = procedure.dump().unwrap();
assert_eq!(
@@ -319,13 +334,14 @@ async fn test_from_json() {
let num_operating_regions = 0;
let num_operating_regions_after_recovery = 0;
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let kv_backend = Arc::new(MemoryKvBackend::new());
let ddl_context = new_ddl_context_with_kv_backend(node_manager, kv_backend);
let physical_table_id = create_physical_table(&ddl_context, "t").await;
let physical_table_id = create_physical_table(&ddl_context, cluster_id, "t").await;
let task = new_drop_table_task("t", physical_table_id, false);
let mut procedure = DropTableProcedure::new(task, ddl_context.clone());
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
execute_procedure_until_done(&mut procedure).await;
let data = procedure.dump().unwrap();
assert_eq!(

View File

@@ -41,6 +41,7 @@ fn new_drop_view_task(view: &str, view_id: TableId, drop_if_exists: bool) -> Dro
async fn test_on_prepare_view_not_exists_err() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let view_id = 1024;
let mut task = test_create_view_task("foo");
task.view_info.ident.table_id = view_id;
@@ -59,7 +60,7 @@ async fn test_on_prepare_view_not_exists_err() {
.unwrap();
let task = new_drop_view_task("bar", view_id, false);
let mut procedure = DropViewProcedure::new(task, ddl_context);
let mut procedure = DropViewProcedure::new(cluster_id, task, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_eq!(err.status_code(), StatusCode::TableNotFound);
}
@@ -68,6 +69,7 @@ async fn test_on_prepare_view_not_exists_err() {
async fn test_on_prepare_not_view_err() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let view_id = 1024;
let view_name = "foo";
let task = test_create_table_task(view_name, view_id);
@@ -83,7 +85,7 @@ async fn test_on_prepare_not_view_err() {
.unwrap();
let task = new_drop_view_task(view_name, view_id, false);
let mut procedure = DropViewProcedure::new(task, ddl_context);
let mut procedure = DropViewProcedure::new(cluster_id, task, ddl_context);
// It's not a view, expect error
let err = procedure.on_prepare().await.unwrap_err();
assert_eq!(err.status_code(), StatusCode::InvalidArguments);
@@ -93,6 +95,7 @@ async fn test_on_prepare_not_view_err() {
async fn test_on_prepare_success() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let view_id = 1024;
let view_name = "foo";
let mut task = test_create_view_task("foo");
@@ -113,12 +116,12 @@ async fn test_on_prepare_success() {
let task = new_drop_view_task("bar", view_id, true);
// Drop if exists
let mut procedure = DropViewProcedure::new(task, ddl_context.clone());
let mut procedure = DropViewProcedure::new(cluster_id, task, ddl_context.clone());
procedure.on_prepare().await.unwrap();
let task = new_drop_view_task(view_name, view_id, false);
// Prepare success
let mut procedure = DropViewProcedure::new(task, ddl_context);
let mut procedure = DropViewProcedure::new(cluster_id, task, ddl_context);
procedure.on_prepare().await.unwrap();
assert_eq!(DropViewState::DeleteMetadata, procedure.state());
}
@@ -127,6 +130,7 @@ async fn test_on_prepare_success() {
async fn test_drop_view_success() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let view_id = 1024;
let view_name = "foo";
let mut task = test_create_view_task("foo");
@@ -155,7 +159,7 @@ async fn test_drop_view_success() {
let task = new_drop_view_task(view_name, view_id, false);
// Prepare success
let mut procedure = DropViewProcedure::new(task, ddl_context.clone());
let mut procedure = DropViewProcedure::new(cluster_id, task, ddl_context.clone());
execute_procedure_until_done(&mut procedure).await;
assert_eq!(DropViewState::InvalidateViewCache, procedure.state());
@@ -170,7 +174,7 @@ async fn test_drop_view_success() {
// Drop again
let task = new_drop_view_task(view_name, view_id, false);
let mut procedure = DropViewProcedure::new(task, ddl_context);
let mut procedure = DropViewProcedure::new(cluster_id, task, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_eq!(err.status_code(), StatusCode::TableNotFound);
}

View File

@@ -39,9 +39,9 @@ use crate::key::table_info::TableInfoValue;
use crate::key::table_name::TableNameKey;
use crate::key::DeserializedValueWithBytes;
use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
use crate::metrics;
use crate::rpc::ddl::TruncateTableTask;
use crate::rpc::router::{find_leader_regions, find_leaders, RegionRoute};
use crate::{metrics, ClusterId};
pub struct TruncateTableProcedure {
context: DdlContext,
@@ -91,6 +91,7 @@ impl TruncateTableProcedure {
pub(crate) const TYPE_NAME: &'static str = "metasrv-procedure::TruncateTable";
pub(crate) fn new(
cluster_id: ClusterId,
task: TruncateTableTask,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
region_routes: Vec<RegionRoute>,
@@ -98,7 +99,7 @@ impl TruncateTableProcedure {
) -> Self {
Self {
context,
data: TruncateTableData::new(task, table_info_value, region_routes),
data: TruncateTableData::new(cluster_id, task, table_info_value, region_routes),
}
}
@@ -188,6 +189,7 @@ impl TruncateTableProcedure {
#[derive(Debug, Serialize, Deserialize)]
pub struct TruncateTableData {
state: TruncateTableState,
cluster_id: ClusterId,
task: TruncateTableTask,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
region_routes: Vec<RegionRoute>,
@@ -195,12 +197,14 @@ pub struct TruncateTableData {
impl TruncateTableData {
pub fn new(
cluster_id: ClusterId,
task: TruncateTableTask,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
region_routes: Vec<RegionRoute>,
) -> Self {
Self {
state: TruncateTableState::Prepare,
cluster_id,
task,
table_info_value,
region_routes,

View File

@@ -34,6 +34,7 @@ use crate::key::TableMetadataManagerRef;
use crate::peer::Peer;
use crate::rpc::ddl::CreateTableTask;
use crate::rpc::router::RegionRoute;
use crate::ClusterId;
/// Adds [Peer] context if the error is unretryable.
pub fn add_peer_context_if_needed(datanode: Peer) -> impl FnOnce(Error) -> Error {
@@ -143,6 +144,7 @@ pub async fn get_physical_table_id(
/// Converts a list of [`RegionRoute`] to a list of [`DetectingRegion`].
pub fn convert_region_routes_to_detecting_regions(
cluster_id: ClusterId,
region_routes: &[RegionRoute],
) -> Vec<DetectingRegion> {
region_routes
@@ -151,7 +153,7 @@ pub fn convert_region_routes_to_detecting_regions(
route
.leader_peer
.as_ref()
.map(|peer| (peer.id, route.region.id))
.map(|peer| (cluster_id, peer.id, route.region.id))
})
.collect::<Vec<_>>()
}

View File

@@ -60,6 +60,7 @@ use crate::rpc::ddl::{
use crate::rpc::procedure;
use crate::rpc::procedure::{MigrateRegionRequest, MigrateRegionResponse, ProcedureStateResponse};
use crate::rpc::router::RegionRoute;
use crate::ClusterId;
pub type DdlManagerRef = Arc<DdlManager>;
@@ -153,12 +154,13 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_alter_table_task(
&self,
cluster_id: ClusterId,
table_id: TableId,
alter_table_task: AlterTableTask,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure = AlterTableProcedure::new(table_id, alter_table_task, context)?;
let procedure = AlterTableProcedure::new(cluster_id, table_id, alter_table_task, context)?;
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
@@ -169,11 +171,12 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_create_table_task(
&self,
cluster_id: ClusterId,
create_table_task: CreateTableTask,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure = CreateTableProcedure::new(create_table_task, context);
let procedure = CreateTableProcedure::new(cluster_id, create_table_task, context);
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
@@ -184,11 +187,12 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_create_view_task(
&self,
cluster_id: ClusterId,
create_view_task: CreateViewTask,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure = CreateViewProcedure::new(create_view_task, context);
let procedure = CreateViewProcedure::new(cluster_id, create_view_task, context);
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
@@ -199,13 +203,18 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_create_logical_table_tasks(
&self,
cluster_id: ClusterId,
create_table_tasks: Vec<CreateTableTask>,
physical_table_id: TableId,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure =
CreateLogicalTablesProcedure::new(create_table_tasks, physical_table_id, context);
let procedure = CreateLogicalTablesProcedure::new(
cluster_id,
create_table_tasks,
physical_table_id,
context,
);
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
@@ -216,13 +225,18 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_alter_logical_table_tasks(
&self,
cluster_id: ClusterId,
alter_table_tasks: Vec<AlterTableTask>,
physical_table_id: TableId,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure =
AlterLogicalTablesProcedure::new(alter_table_tasks, physical_table_id, context);
let procedure = AlterLogicalTablesProcedure::new(
cluster_id,
alter_table_tasks,
physical_table_id,
context,
);
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
@@ -233,11 +247,12 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_drop_table_task(
&self,
cluster_id: ClusterId,
drop_table_task: DropTableTask,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure = DropTableProcedure::new(drop_table_task, context);
let procedure = DropTableProcedure::new(cluster_id, drop_table_task, context);
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
@@ -248,6 +263,7 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_create_database(
&self,
_cluster_id: ClusterId,
CreateDatabaseTask {
catalog,
schema,
@@ -267,6 +283,7 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_drop_database(
&self,
_cluster_id: ClusterId,
DropDatabaseTask {
catalog,
schema,
@@ -282,10 +299,11 @@ impl DdlManager {
pub async fn submit_alter_database(
&self,
cluster_id: ClusterId,
alter_database_task: AlterDatabaseTask,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure = AlterDatabaseProcedure::new(alter_database_task, context)?;
let procedure = AlterDatabaseProcedure::new(cluster_id, alter_database_task, context)?;
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
self.submit_procedure(procedure_with_id).await
@@ -295,11 +313,12 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_create_flow_task(
&self,
cluster_id: ClusterId,
create_flow: CreateFlowTask,
query_context: QueryContext,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure = CreateFlowProcedure::new(create_flow, query_context, context);
let procedure = CreateFlowProcedure::new(cluster_id, create_flow, query_context, context);
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
self.submit_procedure(procedure_with_id).await
@@ -309,10 +328,11 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_drop_flow_task(
&self,
cluster_id: ClusterId,
drop_flow: DropFlowTask,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure = DropFlowProcedure::new(drop_flow, context);
let procedure = DropFlowProcedure::new(cluster_id, drop_flow, context);
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
self.submit_procedure(procedure_with_id).await
@@ -322,10 +342,11 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_drop_view_task(
&self,
cluster_id: ClusterId,
drop_view: DropViewTask,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure = DropViewProcedure::new(drop_view, context);
let procedure = DropViewProcedure::new(cluster_id, drop_view, context);
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
self.submit_procedure(procedure_with_id).await
@@ -335,12 +356,14 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_truncate_table_task(
&self,
cluster_id: ClusterId,
truncate_table_task: TruncateTableTask,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
region_routes: Vec<RegionRoute>,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure = TruncateTableProcedure::new(
cluster_id,
truncate_table_task,
table_info_value,
region_routes,
@@ -374,6 +397,7 @@ impl DdlManager {
async fn handle_truncate_table_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
truncate_table_task: TruncateTableTask,
) -> Result<SubmitDdlTaskResponse> {
let table_id = truncate_table_task.table_id;
@@ -392,7 +416,12 @@ async fn handle_truncate_table_task(
let table_route = table_route_value.into_inner().region_routes()?.clone();
let (id, _) = ddl_manager
.submit_truncate_table_task(truncate_table_task, table_info_value, table_route)
.submit_truncate_table_task(
cluster_id,
truncate_table_task,
table_info_value,
table_route,
)
.await?;
info!("Table: {table_id} is truncated via procedure_id {id:?}");
@@ -405,6 +434,7 @@ async fn handle_truncate_table_task(
async fn handle_alter_table_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
alter_table_task: AlterTableTask,
) -> Result<SubmitDdlTaskResponse> {
let table_ref = alter_table_task.table_ref();
@@ -438,7 +468,7 @@ async fn handle_alter_table_task(
);
let (id, _) = ddl_manager
.submit_alter_table_task(table_id, alter_table_task)
.submit_alter_table_task(cluster_id, table_id, alter_table_task)
.await?;
info!("Table: {table_id} is altered via procedure_id {id:?}");
@@ -451,10 +481,13 @@ async fn handle_alter_table_task(
async fn handle_drop_table_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
drop_table_task: DropTableTask,
) -> Result<SubmitDdlTaskResponse> {
let table_id = drop_table_task.table_id;
let (id, _) = ddl_manager.submit_drop_table_task(drop_table_task).await?;
let (id, _) = ddl_manager
.submit_drop_table_task(cluster_id, drop_table_task)
.await?;
info!("Table: {table_id} is dropped via procedure_id {id:?}");
@@ -466,10 +499,11 @@ async fn handle_drop_table_task(
async fn handle_create_table_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
create_table_task: CreateTableTask,
) -> Result<SubmitDdlTaskResponse> {
let (id, output) = ddl_manager
.submit_create_table_task(create_table_task)
.submit_create_table_task(cluster_id, create_table_task)
.await?;
let procedure_id = id.to_string();
@@ -491,6 +525,7 @@ async fn handle_create_table_task(
async fn handle_create_logical_table_tasks(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
create_table_tasks: Vec<CreateTableTask>,
) -> Result<SubmitDdlTaskResponse> {
ensure!(
@@ -507,7 +542,7 @@ async fn handle_create_logical_table_tasks(
let num_logical_tables = create_table_tasks.len();
let (id, output) = ddl_manager
.submit_create_logical_table_tasks(create_table_tasks, physical_table_id)
.submit_create_logical_table_tasks(cluster_id, create_table_tasks, physical_table_id)
.await?;
info!("{num_logical_tables} logical tables on physical table: {physical_table_id:?} is created via procedure_id {id:?}");
@@ -533,10 +568,11 @@ async fn handle_create_logical_table_tasks(
async fn handle_create_database_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
create_database_task: CreateDatabaseTask,
) -> Result<SubmitDdlTaskResponse> {
let (id, _) = ddl_manager
.submit_create_database(create_database_task.clone())
.submit_create_database(cluster_id, create_database_task.clone())
.await?;
let procedure_id = id.to_string();
@@ -553,10 +589,11 @@ async fn handle_create_database_task(
async fn handle_drop_database_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
drop_database_task: DropDatabaseTask,
) -> Result<SubmitDdlTaskResponse> {
let (id, _) = ddl_manager
.submit_drop_database(drop_database_task.clone())
.submit_drop_database(cluster_id, drop_database_task.clone())
.await?;
let procedure_id = id.to_string();
@@ -573,10 +610,11 @@ async fn handle_drop_database_task(
async fn handle_alter_database_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
alter_database_task: AlterDatabaseTask,
) -> Result<SubmitDdlTaskResponse> {
let (id, _) = ddl_manager
.submit_alter_database(alter_database_task.clone())
.submit_alter_database(cluster_id, alter_database_task.clone())
.await?;
let procedure_id = id.to_string();
@@ -594,10 +632,11 @@ async fn handle_alter_database_task(
async fn handle_drop_flow_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
drop_flow_task: DropFlowTask,
) -> Result<SubmitDdlTaskResponse> {
let (id, _) = ddl_manager
.submit_drop_flow_task(drop_flow_task.clone())
.submit_drop_flow_task(cluster_id, drop_flow_task.clone())
.await?;
let procedure_id = id.to_string();
@@ -614,10 +653,11 @@ async fn handle_drop_flow_task(
async fn handle_drop_view_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
drop_view_task: DropViewTask,
) -> Result<SubmitDdlTaskResponse> {
let (id, _) = ddl_manager
.submit_drop_view_task(drop_view_task.clone())
.submit_drop_view_task(cluster_id, drop_view_task.clone())
.await?;
let procedure_id = id.to_string();
@@ -635,11 +675,12 @@ async fn handle_drop_view_task(
async fn handle_create_flow_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
create_flow_task: CreateFlowTask,
query_context: QueryContext,
) -> Result<SubmitDdlTaskResponse> {
let (id, output) = ddl_manager
.submit_create_flow_task(create_flow_task.clone(), query_context)
.submit_create_flow_task(cluster_id, create_flow_task.clone(), query_context)
.await?;
let procedure_id = id.to_string();
@@ -671,6 +712,7 @@ async fn handle_create_flow_task(
async fn handle_alter_logical_table_tasks(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
alter_table_tasks: Vec<AlterTableTask>,
) -> Result<SubmitDdlTaskResponse> {
ensure!(
@@ -691,7 +733,7 @@ async fn handle_alter_logical_table_tasks(
let num_logical_tables = alter_table_tasks.len();
let (id, _) = ddl_manager
.submit_alter_logical_table_tasks(alter_table_tasks, physical_table_id)
.submit_alter_logical_table_tasks(cluster_id, alter_table_tasks, physical_table_id)
.await?;
info!("{num_logical_tables} logical tables on physical table: {physical_table_id:?} is altered via procedure_id {id:?}");
@@ -707,10 +749,11 @@ async fn handle_alter_logical_table_tasks(
/// Handle the `[CreateViewTask]` and returns the DDL response when success.
async fn handle_create_view_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
create_view_task: CreateViewTask,
) -> Result<SubmitDdlTaskResponse> {
let (id, output) = ddl_manager
.submit_create_view_task(create_view_task)
.submit_create_view_task(cluster_id, create_view_task)
.await?;
let procedure_id = id.to_string();
@@ -745,43 +788,55 @@ impl ProcedureExecutor for DdlManager {
.unwrap_or(TracingContext::from_current_span())
.attach(tracing::info_span!("DdlManager::submit_ddl_task"));
async move {
let cluster_id = ctx.cluster_id.unwrap_or_default();
debug!("Submitting Ddl task: {:?}", request.task);
match request.task {
CreateTable(create_table_task) => {
handle_create_table_task(self, create_table_task).await
handle_create_table_task(self, cluster_id, create_table_task).await
}
DropTable(drop_table_task) => {
handle_drop_table_task(self, cluster_id, drop_table_task).await
}
DropTable(drop_table_task) => handle_drop_table_task(self, drop_table_task).await,
AlterTable(alter_table_task) => {
handle_alter_table_task(self, alter_table_task).await
handle_alter_table_task(self, cluster_id, alter_table_task).await
}
TruncateTable(truncate_table_task) => {
handle_truncate_table_task(self, truncate_table_task).await
handle_truncate_table_task(self, cluster_id, truncate_table_task).await
}
CreateLogicalTables(create_table_tasks) => {
handle_create_logical_table_tasks(self, create_table_tasks).await
handle_create_logical_table_tasks(self, cluster_id, create_table_tasks).await
}
AlterLogicalTables(alter_table_tasks) => {
handle_alter_logical_table_tasks(self, alter_table_tasks).await
handle_alter_logical_table_tasks(self, cluster_id, alter_table_tasks).await
}
DropLogicalTables(_) => todo!(),
CreateDatabase(create_database_task) => {
handle_create_database_task(self, create_database_task).await
handle_create_database_task(self, cluster_id, create_database_task).await
}
DropDatabase(drop_database_task) => {
handle_drop_database_task(self, drop_database_task).await
handle_drop_database_task(self, cluster_id, drop_database_task).await
}
AlterDatabase(alter_database_task) => {
handle_alter_database_task(self, alter_database_task).await
handle_alter_database_task(self, cluster_id, alter_database_task).await
}
CreateFlow(create_flow_task) => {
handle_create_flow_task(self, create_flow_task, request.query_context.into())
.await
handle_create_flow_task(
self,
cluster_id,
create_flow_task,
request.query_context.into(),
)
.await
}
DropFlow(drop_flow_task) => {
handle_drop_flow_task(self, cluster_id, drop_flow_task).await
}
DropFlow(drop_flow_task) => handle_drop_flow_task(self, drop_flow_task).await,
CreateView(create_view_task) => {
handle_create_view_task(self, create_view_task).await
handle_create_view_task(self, cluster_id, create_view_task).await
}
DropView(drop_view_task) => {
handle_drop_view_task(self, cluster_id, drop_view_task).await
}
DropView(drop_view_task) => handle_drop_view_task(self, drop_view_task).await,
}
}
.trace(span)

View File

@@ -26,10 +26,11 @@ use crate::flow_name::FlowName;
use crate::key::schema_name::SchemaName;
use crate::key::FlowId;
use crate::peer::Peer;
use crate::{DatanodeId, FlownodeId};
use crate::{ClusterId, DatanodeId, FlownodeId};
#[derive(Eq, Hash, PartialEq, Clone, Debug, Serialize, Deserialize)]
pub struct RegionIdent {
pub cluster_id: ClusterId,
pub datanode_id: DatanodeId,
pub table_id: TableId,
pub region_number: RegionNumber,
@@ -46,8 +47,8 @@ impl Display for RegionIdent {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"RegionIdent(datanode_id='{}', table_id={}, region_number={}, engine = {})",
self.datanode_id, self.table_id, self.region_number, self.engine
"RegionIdent(datanode_id='{}.{}', table_id={}, region_number={}, engine = {})",
self.cluster_id, self.datanode_id, self.table_id, self.region_number, self.engine
)
}
}
@@ -261,6 +262,7 @@ mod tests {
fn test_serialize_instruction() {
let open_region = Instruction::OpenRegion(OpenRegion::new(
RegionIdent {
cluster_id: 1,
datanode_id: 2,
table_id: 1024,
region_number: 1,
@@ -275,11 +277,12 @@ mod tests {
let serialized = serde_json::to_string(&open_region).unwrap();
assert_eq!(
r#"{"OpenRegion":{"region_ident":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"},"region_storage_path":"test/foo","region_options":{},"region_wal_options":{},"skip_wal_replay":false}}"#,
r#"{"OpenRegion":{"region_ident":{"cluster_id":1,"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"},"region_storage_path":"test/foo","region_options":{},"region_wal_options":{},"skip_wal_replay":false}}"#,
serialized
);
let close_region = Instruction::CloseRegion(RegionIdent {
cluster_id: 1,
datanode_id: 2,
table_id: 1024,
region_number: 1,
@@ -289,7 +292,7 @@ mod tests {
let serialized = serde_json::to_string(&close_region).unwrap();
assert_eq!(
r#"{"CloseRegion":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"}}"#,
r#"{"CloseRegion":{"cluster_id":1,"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"}}"#,
serialized
);
}
@@ -304,6 +307,7 @@ mod tests {
#[test]
fn test_compatible_serialize_open_region() {
let region_ident = RegionIdent {
cluster_id: 1,
datanode_id: 2,
table_id: 1024,
region_number: 1,

View File

@@ -47,6 +47,8 @@ pub mod test_util;
pub mod util;
pub mod wal_options_allocator;
// The id of the cluster.
pub type ClusterId = u64;
// The id of the datanode.
pub type DatanodeId = u64;
// The id of the flownode.

View File

@@ -99,7 +99,7 @@ impl NodeExpiryListener {
in_memory: &ResettableKvBackendRef,
max_idle_time: Duration,
) -> error::Result<impl Iterator<Item = NodeInfoKey>> {
let prefix = NodeInfoKey::key_prefix();
let prefix = NodeInfoKey::key_prefix_with_cluster_id(0);
let req = RangeRequest::new().with_prefix(prefix);
let current_time_millis = common_time::util::current_time_millis();
let resp = in_memory.range(req).await?;

View File

@@ -19,7 +19,7 @@ use api::v1::meta::Peer as PbPeer;
use serde::{Deserialize, Serialize};
use crate::error::Error;
use crate::{DatanodeId, FlownodeId};
use crate::{ClusterId, DatanodeId, FlownodeId};
#[derive(Debug, Default, Clone, Hash, Eq, PartialEq, Deserialize, Serialize)]
pub struct Peer {
@@ -72,8 +72,8 @@ impl Display for Peer {
/// can query peer given a node id
#[async_trait::async_trait]
pub trait PeerLookupService {
async fn datanode(&self, id: DatanodeId) -> Result<Option<Peer>, Error>;
async fn flownode(&self, id: FlownodeId) -> Result<Option<Peer>, Error>;
async fn datanode(&self, cluster_id: ClusterId, id: DatanodeId) -> Result<Option<Peer>, Error>;
async fn flownode(&self, cluster_id: ClusterId, id: FlownodeId) -> Result<Option<Peer>, Error>;
}
pub type PeerLookupServiceRef = Arc<dyn PeerLookupService + Send + Sync>;

View File

@@ -31,6 +31,11 @@ impl ResponseHeader {
self.0.protocol_version
}
#[inline]
pub fn cluster_id(&self) -> u64 {
self.0.cluster_id
}
#[inline]
pub fn error_code(&self) -> i32 {
match self.0.error.as_ref() {
@@ -138,6 +143,7 @@ mod tests {
fn test_response_header_trans() {
let pb_header = PbResponseHeader {
protocol_version: 101,
cluster_id: 1,
error: Some(Error {
code: 100,
err_msg: "test".to_string(),
@@ -146,6 +152,7 @@ mod tests {
let header = ResponseHeader(pb_header);
assert_eq!(101, header.protocol_version());
assert_eq!(1, header.cluster_id());
assert_eq!(100, header.error_code());
assert_eq!("test".to_string(), header.error_msg());
}

View File

@@ -37,7 +37,7 @@ use crate::peer::{Peer, PeerLookupService};
use crate::region_keeper::MemoryRegionKeeper;
use crate::sequence::SequenceBuilder;
use crate::wal_options_allocator::WalOptionsAllocator;
use crate::{DatanodeId, FlownodeId};
use crate::{ClusterId, DatanodeId, FlownodeId};
#[async_trait::async_trait]
pub trait MockDatanodeHandler: Sync + Send + Clone {
@@ -189,11 +189,11 @@ pub struct NoopPeerLookupService;
#[async_trait::async_trait]
impl PeerLookupService for NoopPeerLookupService {
async fn datanode(&self, id: DatanodeId) -> Result<Option<Peer>> {
async fn datanode(&self, _cluster_id: ClusterId, id: DatanodeId) -> Result<Option<Peer>> {
Ok(Some(Peer::empty(id)))
}
async fn flownode(&self, id: FlownodeId) -> Result<Option<Peer>> {
async fn flownode(&self, _cluster_id: ClusterId, id: FlownodeId) -> Result<Option<Peer>> {
Ok(Some(Peer::empty(id)))
}
}

View File

@@ -235,6 +235,7 @@ mod tests {
Instruction::CloseRegion(RegionIdent {
table_id: region_id.table_id(),
region_number: region_id.region_number(),
cluster_id: 1,
datanode_id: 2,
engine: MITO_ENGINE_NAME.to_string(),
})
@@ -245,6 +246,7 @@ mod tests {
RegionIdent {
table_id: region_id.table_id(),
region_number: region_id.region_number(),
cluster_id: 1,
datanode_id: 2,
engine: MITO_ENGINE_NAME.to_string(),
},

View File

@@ -25,6 +25,6 @@ pub mod heartbeat;
pub mod metrics;
pub mod region_server;
pub mod service;
pub mod store;
mod store;
#[cfg(any(test, feature = "testing"))]
pub mod tests;

View File

@@ -15,7 +15,7 @@
//! object storage utilities
mod azblob;
pub mod fs;
mod fs;
mod gcs;
mod oss;
mod s3;

View File

@@ -24,8 +24,7 @@ use crate::config::FileConfig;
use crate::error::{self, Result};
use crate::store;
/// A helper function to create a file system object store.
pub async fn new_fs_object_store(
pub(crate) async fn new_fs_object_store(
data_home: &str,
_file_config: &FileConfig,
) -> Result<ObjectStore> {

View File

@@ -16,6 +16,7 @@ async-trait.workspace = true
bytes.workspace = true
cache.workspace = true
catalog.workspace = true
chrono.workspace = true
client.workspace = true
common-base.workspace = true
common-config.workspace = true

View File

@@ -49,12 +49,13 @@ pub(crate) use crate::adapter::node_context::FlownodeContext;
use crate::adapter::refill::RefillTask;
use crate::adapter::table_source::ManagedTableSource;
use crate::adapter::util::relation_desc_to_column_schemas_with_fallback;
pub(crate) use crate::adapter::worker::{create_worker, Worker, WorkerHandle};
pub(crate) use crate::adapter::worker::{create_worker, WorkerHandle};
use crate::compute::ErrCollector;
use crate::df_optimizer::sql_to_flow_plan;
use crate::error::{EvalSnafu, ExternalSnafu, InternalSnafu, InvalidQuerySnafu, UnexpectedSnafu};
use crate::expr::Batch;
use crate::metrics::{METRIC_FLOW_INSERT_ELAPSED, METRIC_FLOW_ROWS, METRIC_FLOW_RUN_INTERVAL_MS};
use crate::recording_rules::RecordingRuleEngine;
use crate::repr::{self, DiffRow, RelationDesc, Row, BATCH_SIZE};
mod flownode_impl;
@@ -63,7 +64,7 @@ pub(crate) mod refill;
mod stat;
#[cfg(test)]
mod tests;
mod util;
pub(crate) mod util;
mod worker;
pub(crate) mod node_context;
@@ -103,6 +104,7 @@ impl Default for FlowConfig {
#[serde(default)]
pub struct FlownodeOptions {
pub mode: Mode,
pub cluster_id: Option<u64>,
pub node_id: Option<u64>,
pub flow: FlowConfig,
pub grpc: GrpcOptions,
@@ -117,6 +119,7 @@ impl Default for FlownodeOptions {
fn default() -> Self {
Self {
mode: servers::Mode::Standalone,
cluster_id: None,
node_id: None,
flow: FlowConfig::default(),
grpc: GrpcOptions::default().with_bind_addr("127.0.0.1:3004"),
@@ -169,6 +172,8 @@ pub struct FlowWorkerManager {
flush_lock: RwLock<()>,
/// receive a oneshot sender to send state size report
state_report_handler: RwLock<Option<StateReportHandler>>,
/// engine for recording rule
rule_engine: RecordingRuleEngine,
}
/// Building FlownodeManager
@@ -183,6 +188,7 @@ impl FlowWorkerManager {
node_id: Option<u32>,
query_engine: Arc<dyn QueryEngine>,
table_meta: TableMetadataManagerRef,
rule_engine: RecordingRuleEngine,
) -> Self {
let srv_map = ManagedTableSource::new(
table_meta.table_info_manager().clone(),
@@ -205,6 +211,7 @@ impl FlowWorkerManager {
node_id,
flush_lock: RwLock::new(()),
state_report_handler: RwLock::new(None),
rule_engine,
}
}
@@ -213,25 +220,6 @@ impl FlowWorkerManager {
self
}
/// Create a flownode manager with one worker
pub fn new_with_workers<'s>(
node_id: Option<u32>,
query_engine: Arc<dyn QueryEngine>,
table_meta: TableMetadataManagerRef,
num_workers: usize,
) -> (Self, Vec<Worker<'s>>) {
let mut zelf = Self::new(node_id, query_engine, table_meta);
let workers: Vec<_> = (0..num_workers)
.map(|_| {
let (handle, worker) = create_worker();
zelf.add_worker_handle(handle);
worker
})
.collect();
(zelf, workers)
}
/// add a worker handler to manager, meaning this corresponding worker is under it's manage
pub fn add_worker_handle(&mut self, handle: WorkerHandle) {
self.worker_handles.push(handle);
@@ -749,7 +737,11 @@ pub struct CreateFlowArgs {
/// Create&Remove flow
impl FlowWorkerManager {
/// remove a flow by it's id
#[allow(unreachable_code)]
pub async fn remove_flow(&self, flow_id: FlowId) -> Result<(), Error> {
// TODO(discord9): reroute some back to streaming engine later
return self.rule_engine.remove_flow(flow_id).await;
for handle in self.worker_handles.iter() {
if handle.contains_flow(flow_id).await? {
handle.remove_flow(flow_id).await?;
@@ -765,8 +757,10 @@ impl FlowWorkerManager {
/// steps to create task:
/// 1. parse query into typed plan(and optional parse expire_after expr)
/// 2. render source/sink with output table id and used input table id
#[allow(clippy::too_many_arguments)]
#[allow(clippy::too_many_arguments, unreachable_code)]
pub async fn create_flow(&self, args: CreateFlowArgs) -> Result<Option<FlowId>, Error> {
// TODO(discord9): reroute some back to streaming engine later
return self.rule_engine.create_flow(args).await;
let CreateFlowArgs {
flow_id,
sink_table_name,

View File

@@ -153,7 +153,13 @@ impl Flownode for FlowWorkerManager {
}
}
#[allow(unreachable_code, unused)]
async fn handle_inserts(&self, request: InsertRequests) -> Result<FlowResponse> {
return self
.rule_engine
.handle_inserts(request)
.await
.map_err(to_meta_err(snafu::location!()));
// using try_read to ensure two things:
// 1. flush wouldn't happen until inserts before it is inserted
// 2. inserts happening concurrently with flush wouldn't be block by flush
@@ -206,15 +212,15 @@ impl Flownode for FlowWorkerManager {
.collect_vec();
let table_col_names = table_schema.relation_desc.names;
let table_col_names = table_col_names
.iter().enumerate()
.map(|(idx,name)| match name {
Some(name) => Ok(name.clone()),
None => InternalSnafu {
reason: format!("Expect column {idx} of table id={table_id} to have name in table schema, found None"),
}
.fail().map_err(BoxedError::new).context(ExternalSnafu),
})
.collect::<Result<Vec<_>>>()?;
.iter().enumerate()
.map(|(idx,name)| match name {
Some(name) => Ok(name.clone()),
None => InternalSnafu {
reason: format!("Expect column {idx} of table id={table_id} to have name in table schema, found None"),
}
.fail().map_err(BoxedError::new).context(ExternalSnafu),
})
.collect::<Result<Vec<_>>>()?;
let name_to_col = HashMap::<_, _>::from_iter(
insert_schema
.iter()

View File

@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! Some utility functions
use std::sync::Arc;
use api::helper::ColumnDataTypeWrapper;

View File

@@ -16,6 +16,7 @@
use std::any::Any;
use arrow_schema::ArrowError;
use common_error::ext::BoxedError;
use common_error::{define_into_tonic_status, from_err_code_msg_to_header};
use common_macro::stack_trace_debug;
@@ -53,6 +54,13 @@ pub enum Error {
location: Location,
},
#[snafu(display("Time error"))]
Time {
source: common_time::error::Error,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("External error"))]
External {
source: BoxedError,
@@ -156,6 +164,15 @@ pub enum Error {
location: Location,
},
#[snafu(display("Arrow error: {raw:?} in context: {context}"))]
Arrow {
#[snafu(source)]
raw: ArrowError,
context: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Datafusion error: {raw:?} in context: {context}"))]
Datafusion {
#[snafu(source)]
@@ -230,6 +247,7 @@ impl ErrorExt for Error {
match self {
Self::Eval { .. }
| Self::JoinTask { .. }
| Self::Arrow { .. }
| Self::Datafusion { .. }
| Self::InsertIntoFlow { .. } => StatusCode::Internal,
Self::FlowAlreadyExist { .. } => StatusCode::TableAlreadyExists,
@@ -238,7 +256,9 @@ impl ErrorExt for Error {
| Self::FlowNotFound { .. }
| Self::ListFlows { .. } => StatusCode::TableNotFound,
Self::Plan { .. } | Self::Datatypes { .. } => StatusCode::PlanQuery,
Self::InvalidQuery { .. } | Self::CreateFlow { .. } => StatusCode::EngineExecuteQuery,
Self::InvalidQuery { .. } | Self::CreateFlow { .. } | Self::Time { .. } => {
StatusCode::EngineExecuteQuery
}
Self::Unexpected { .. } => StatusCode::Unexpected,
Self::NotImplemented { .. } | Self::UnsupportedTemporalFilter { .. } => {
StatusCode::Unsupported

View File

@@ -238,6 +238,7 @@ mod test {
for (sql, current, expected) in &testcases {
let plan = sql_to_substrait(engine.clone(), sql).await;
let mut ctx = create_test_ctx();
let flow_plan = TypedPlan::from_substrait_plan(&mut ctx, &plan)
.await

View File

@@ -130,13 +130,6 @@ impl HeartbeatTask {
pub fn shutdown(&self) {
info!("Close heartbeat task for flownode");
if self
.running
.compare_exchange(true, false, Ordering::AcqRel, Ordering::Acquire)
.is_err()
{
warn!("Call close heartbeat task multiple times");
}
}
fn new_heartbeat_request(

View File

@@ -33,6 +33,7 @@ mod expr;
pub mod heartbeat;
mod metrics;
mod plan;
mod recording_rules;
mod repr;
mod server;
mod transform;
@@ -43,4 +44,5 @@ mod test_utils;
pub use adapter::{FlowConfig, FlowWorkerManager, FlowWorkerManagerRef, FlownodeOptions};
pub use error::{Error, Result};
pub use recording_rules::FrontendClient;
pub use server::{FlownodeBuilder, FlownodeInstance, FlownodeServer, FrontendInvoker};

View File

@@ -28,6 +28,32 @@ lazy_static! {
&["table_id"]
)
.unwrap();
pub static ref METRIC_FLOW_RULE_ENGINE_QUERY_TIME: HistogramVec = register_histogram_vec!(
"greptime_flow_rule_engine_query_time",
"flow rule engine query time",
&["flow_id"],
vec![
0.0,
1.,
3.,
5.,
10.,
20.,
30.,
60.,
2. * 60.,
5. * 60.,
10. * 60.
]
)
.unwrap();
pub static ref METRIC_FLOW_RULE_ENGINE_SLOW_QUERY: HistogramVec = register_histogram_vec!(
"greptime_flow_rule_engine_slow_query",
"flow rule engine slow query",
&["flow_id", "sql", "peer"],
vec![60., 2. * 60., 3. * 60., 5. * 60., 10. * 60.]
)
.unwrap();
pub static ref METRIC_FLOW_RUN_INTERVAL_MS: IntGauge =
register_int_gauge!("greptime_flow_run_interval_ms", "flow run interval in ms").unwrap();
pub static ref METRIC_FLOW_ROWS: IntCounterVec = register_int_counter_vec!(

View File

@@ -0,0 +1,882 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Run flow as recording rule which is time-window-aware normal query triggered every tick set by user
mod engine;
mod frontend_client;
use std::collections::BTreeSet;
use std::sync::Arc;
use api::helper::pb_value_to_value_ref;
use catalog::CatalogManagerRef;
use common_error::ext::BoxedError;
use common_recordbatch::DfRecordBatch;
use common_telemetry::warn;
use common_time::timestamp::TimeUnit;
use common_time::Timestamp;
use datafusion::error::Result as DfResult;
use datafusion::logical_expr::Expr;
use datafusion::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner};
use datafusion::prelude::SessionContext;
use datafusion::sql::unparser::Unparser;
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion, TreeNodeRewriter};
use datafusion_common::{DFSchema, TableReference};
use datafusion_expr::{ColumnarValue, LogicalPlan};
use datafusion_physical_expr::PhysicalExprRef;
use datatypes::prelude::{ConcreteDataType, DataType};
use datatypes::scalars::ScalarVector;
use datatypes::schema::TIME_INDEX_KEY;
use datatypes::value::Value;
use datatypes::vectors::{
TimestampMicrosecondVector, TimestampMillisecondVector, TimestampNanosecondVector,
TimestampSecondVector, Vector,
};
pub use engine::RecordingRuleEngine;
pub use frontend_client::FrontendClient;
use itertools::Itertools;
use query::parser::QueryLanguageParser;
use query::QueryEngineRef;
use session::context::QueryContextRef;
use snafu::{ensure, OptionExt, ResultExt};
use crate::adapter::util::from_proto_to_data_type;
use crate::df_optimizer::apply_df_optimizer;
use crate::error::{ArrowSnafu, DatafusionSnafu, DatatypesSnafu, ExternalSnafu, UnexpectedSnafu};
use crate::expr::error::DataTypeSnafu;
use crate::Error;
#[derive(Debug, Clone)]
pub struct TimeWindowExpr {
expr: PhysicalExprRef,
column_name: String,
}
impl TimeWindowExpr {
pub fn new(expr: PhysicalExprRef, column_name: String) -> Self {
Self { expr, column_name }
}
pub fn from_expr(expr: &Expr, column_name: &str, df_schema: &DFSchema) -> Result<Self, Error> {
let phy_planner = DefaultPhysicalPlanner::default();
let phy_expr: PhysicalExprRef = phy_planner
.create_physical_expr(expr, df_schema, &SessionContext::new().state())
.with_context(|_e| DatafusionSnafu {
context: format!(
"Failed to create physical expression from {expr:?} using {df_schema:?}"
),
})?;
Ok(Self {
expr: phy_expr,
column_name: column_name.to_string(),
})
}
/// Find timestamps from rows using time window expr
pub async fn handle_rows(
&self,
rows_list: Vec<api::v1::Rows>,
) -> Result<BTreeSet<Timestamp>, Error> {
let mut time_windows = BTreeSet::new();
for rows in rows_list {
// pick the time index column and use it to eval on `self.expr`
let ts_col_index = rows
.schema
.iter()
.map(|col| col.column_name.clone())
.position(|name| name == self.column_name);
let Some(ts_col_index) = ts_col_index else {
warn!("can't found time index column in schema: {:?}", rows.schema);
continue;
};
let col_schema = &rows.schema[ts_col_index];
let cdt = from_proto_to_data_type(col_schema)?;
let column_values = rows
.rows
.iter()
.map(|row| &row.values[ts_col_index])
.collect_vec();
let mut vector = cdt.create_mutable_vector(column_values.len());
for value in column_values {
let value = pb_value_to_value_ref(value, &None);
vector.try_push_value_ref(value).context(DataTypeSnafu {
msg: "Failed to convert rows to columns",
})?;
}
let vector = vector.to_vector();
let df_schema = create_df_schema_for_ts_column(&self.column_name, cdt)?;
let rb =
DfRecordBatch::try_new(df_schema.inner().clone(), vec![vector.to_arrow_array()])
.with_context(|_e| ArrowSnafu {
context: format!(
"Failed to create record batch from {df_schema:?} and {vector:?}"
),
})?;
let eval_res = self.expr.evaluate(&rb).with_context(|_| DatafusionSnafu {
context: format!(
"Failed to evaluate physical expression {:?} on {rb:?}",
self.expr
),
})?;
let res = columnar_to_ts_vector(&eval_res)?;
for ts in res.into_iter().flatten() {
time_windows.insert(ts);
}
}
Ok(time_windows)
}
}
fn create_df_schema_for_ts_column(name: &str, cdt: ConcreteDataType) -> Result<DFSchema, Error> {
let arrow_schema = Arc::new(arrow_schema::Schema::new(vec![arrow_schema::Field::new(
name,
cdt.as_arrow_type(),
false,
)]));
let df_schema = DFSchema::from_field_specific_qualified_schema(
vec![Some(TableReference::bare("TimeIndexOnlyTable"))],
&arrow_schema,
)
.with_context(|_e| DatafusionSnafu {
context: format!("Failed to create DFSchema from arrow schema {arrow_schema:?}"),
})?;
Ok(df_schema)
}
/// Convert `ColumnarValue` to `Vec<Option<Timestamp>>`
fn columnar_to_ts_vector(columnar: &ColumnarValue) -> Result<Vec<Option<Timestamp>>, Error> {
let val = match columnar {
datafusion_expr::ColumnarValue::Array(array) => {
let ty = array.data_type();
let ty = ConcreteDataType::from_arrow_type(ty);
let time_unit = if let ConcreteDataType::Timestamp(ty) = ty {
ty.unit()
} else {
return UnexpectedSnafu {
reason: format!("Non-timestamp type: {ty:?}"),
}
.fail();
};
match time_unit {
TimeUnit::Second => TimestampSecondVector::try_from_arrow_array(array.clone())
.with_context(|_| DatatypesSnafu {
extra: format!("Failed to create vector from arrow array {array:?}"),
})?
.iter_data()
.map(|d| d.map(|d| d.0))
.collect_vec(),
TimeUnit::Millisecond => {
TimestampMillisecondVector::try_from_arrow_array(array.clone())
.with_context(|_| DatatypesSnafu {
extra: format!("Failed to create vector from arrow array {array:?}"),
})?
.iter_data()
.map(|d| d.map(|d| d.0))
.collect_vec()
}
TimeUnit::Microsecond => {
TimestampMicrosecondVector::try_from_arrow_array(array.clone())
.with_context(|_| DatatypesSnafu {
extra: format!("Failed to create vector from arrow array {array:?}"),
})?
.iter_data()
.map(|d| d.map(|d| d.0))
.collect_vec()
}
TimeUnit::Nanosecond => {
TimestampNanosecondVector::try_from_arrow_array(array.clone())
.with_context(|_| DatatypesSnafu {
extra: format!("Failed to create vector from arrow array {array:?}"),
})?
.iter_data()
.map(|d| d.map(|d| d.0))
.collect_vec()
}
}
}
datafusion_expr::ColumnarValue::Scalar(scalar) => {
let value = Value::try_from(scalar.clone()).with_context(|_| DatatypesSnafu {
extra: format!("Failed to convert scalar {scalar:?} to value"),
})?;
let ts = value.as_timestamp().context(UnexpectedSnafu {
reason: format!("Expect Timestamp, found {:?}", value),
})?;
vec![Some(ts)]
}
};
Ok(val)
}
/// Convert sql to datafusion logical plan
pub async fn sql_to_df_plan(
query_ctx: QueryContextRef,
engine: QueryEngineRef,
sql: &str,
optimize: bool,
) -> Result<LogicalPlan, Error> {
let stmt = QueryLanguageParser::parse_sql(sql, &query_ctx)
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let plan = engine
.planner()
.plan(&stmt, query_ctx)
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let plan = if optimize {
apply_df_optimizer(plan).await?
} else {
plan
};
Ok(plan)
}
/// Return (the column name of time index column, the time window expr, the expected time unit of time index column, the expr's schema for evaluating the time window)
async fn find_time_window_expr(
plan: &LogicalPlan,
catalog_man: CatalogManagerRef,
query_ctx: QueryContextRef,
) -> Result<(String, Option<datafusion_expr::Expr>, TimeUnit, DFSchema), Error> {
// TODO(discord9): find the expr that do time window
let mut table_name = None;
// first find the table source in the logical plan
plan.apply(|plan| {
let LogicalPlan::TableScan(table_scan) = plan else {
return Ok(TreeNodeRecursion::Continue);
};
table_name = Some(table_scan.table_name.clone());
Ok(TreeNodeRecursion::Stop)
})
.with_context(|_| DatafusionSnafu {
context: format!("Can't find table source in plan {plan:?}"),
})?;
let Some(table_name) = table_name else {
UnexpectedSnafu {
reason: format!("Can't find table source in plan {plan:?}"),
}
.fail()?
};
let current_schema = query_ctx.current_schema();
let catalog_name = table_name.catalog().unwrap_or(query_ctx.current_catalog());
let schema_name = table_name.schema().unwrap_or(&current_schema);
let table_name = table_name.table();
let Some(table_ref) = catalog_man
.table(catalog_name, schema_name, table_name, Some(&query_ctx))
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?
else {
UnexpectedSnafu {
reason: format!(
"Can't find table {table_name:?} in catalog {catalog_name:?}/{schema_name:?}"
),
}
.fail()?
};
let schema = &table_ref.table_info().meta.schema;
let ts_index = schema.timestamp_column().context(UnexpectedSnafu {
reason: format!("Can't find timestamp column in table {table_name:?}"),
})?;
let ts_col_name = ts_index.name.clone();
let expected_time_unit = ts_index.data_type.as_timestamp().with_context(|| UnexpectedSnafu {
reason: format!(
"Expected timestamp column {ts_col_name:?} in table {table_name:?} to be timestamp, but got {ts_index:?}"
),
})?.unit();
let arrow_schema = Arc::new(arrow_schema::Schema::new(vec![arrow_schema::Field::new(
ts_col_name.clone(),
ts_index.data_type.as_arrow_type(),
false,
)]));
let df_schema = DFSchema::from_field_specific_qualified_schema(
vec![Some(TableReference::bare(table_name))],
&arrow_schema,
)
.with_context(|_e| DatafusionSnafu {
context: format!("Failed to create DFSchema from arrow schema {arrow_schema:?}"),
})?;
// find the time window expr which refers to the time index column
let mut aggr_expr = None;
let mut time_window_expr: Option<Expr> = None;
let find_inner_aggr_expr = |plan: &LogicalPlan| {
if let LogicalPlan::Aggregate(aggregate) = plan {
aggr_expr = Some(aggregate.clone());
};
Ok(TreeNodeRecursion::Continue)
};
plan.apply(find_inner_aggr_expr)
.with_context(|_| DatafusionSnafu {
context: format!("Can't find aggr expr in plan {plan:?}"),
})?;
if let Some(aggregate) = aggr_expr {
for group_expr in &aggregate.group_expr {
let refs = group_expr.column_refs();
if refs.len() != 1 {
continue;
}
let ref_col = refs.iter().next().unwrap();
let index = aggregate.input.schema().maybe_index_of_column(ref_col);
let Some(index) = index else {
continue;
};
let field = aggregate.input.schema().field(index);
let is_time_index = field.metadata().get(TIME_INDEX_KEY) == Some(&"true".to_string());
if is_time_index {
let rewrite_column = group_expr.clone();
let rewritten = rewrite_column
.rewrite(&mut RewriteColumn {
table_name: table_name.to_string(),
})
.with_context(|_| DatafusionSnafu {
context: format!("Rewrite expr failed, expr={:?}", group_expr),
})?
.data;
struct RewriteColumn {
table_name: String,
}
impl TreeNodeRewriter for RewriteColumn {
type Node = Expr;
fn f_down(&mut self, node: Self::Node) -> DfResult<Transformed<Self::Node>> {
let Expr::Column(mut column) = node else {
return Ok(Transformed::no(node));
};
column.relation = Some(TableReference::bare(self.table_name.clone()));
Ok(Transformed::yes(Expr::Column(column)))
}
}
time_window_expr = Some(rewritten);
break;
}
}
Ok((ts_col_name, time_window_expr, expected_time_unit, df_schema))
} else {
// can't found time window expr, return None
Ok((ts_col_name, None, expected_time_unit, df_schema))
}
}
/// Find nearest lower bound for time `current` in given `plan` for the time window expr.
/// i.e. for time window expr being `date_bin(INTERVAL '5 minutes', ts) as time_window` and `current="2021-07-01 00:01:01.000"`,
/// return `Some("2021-07-01 00:00:00.000")`
/// if `plan` doesn't contain a `TIME INDEX` column, return `None`
///
/// Time window expr is a expr that:
/// 1. ref only to a time index column
/// 2. is monotonic increasing
/// 3. show up in GROUP BY clause
///
/// note this plan should only contain one TableScan
pub async fn find_plan_time_window_bound(
plan: &LogicalPlan,
current: Timestamp,
query_ctx: QueryContextRef,
engine: QueryEngineRef,
) -> Result<(String, Option<Timestamp>, Option<Timestamp>), Error> {
// TODO(discord9): find the expr that do time window
let catalog_man = engine.engine_state().catalog_manager();
let (ts_col_name, time_window_expr, expected_time_unit, df_schema) =
find_time_window_expr(plan, catalog_man.clone(), query_ctx).await?;
// cast current to ts_index's type
let new_current = current
.convert_to(expected_time_unit)
.with_context(|| UnexpectedSnafu {
reason: format!("Failed to cast current timestamp {current:?} to {expected_time_unit}"),
})?;
// if no time_window_expr is found, return None
if let Some(time_window_expr) = time_window_expr {
let lower_bound =
find_expr_time_window_lower_bound(&time_window_expr, &df_schema, new_current)?;
let upper_bound =
find_expr_time_window_upper_bound(&time_window_expr, &df_schema, new_current)?;
Ok((ts_col_name, lower_bound, upper_bound))
} else {
Ok((ts_col_name, None, None))
}
}
/// Find the lower bound of time window in given `expr` and `current` timestamp.
///
/// i.e. for `current="2021-07-01 00:01:01.000"` and `expr=date_bin(INTERVAL '5 minutes', ts) as time_window` and `ts_col=ts`,
/// return `Some("2021-07-01 00:00:00.000")` since it's the lower bound
/// of current time window given the current timestamp
///
/// if return None, meaning this time window have no lower bound
fn find_expr_time_window_lower_bound(
expr: &Expr,
df_schema: &DFSchema,
current: Timestamp,
) -> Result<Option<Timestamp>, Error> {
let phy_planner = DefaultPhysicalPlanner::default();
let phy_expr: PhysicalExprRef = phy_planner
.create_physical_expr(expr, df_schema, &SessionContext::new().state())
.with_context(|_e| DatafusionSnafu {
context: format!(
"Failed to create physical expression from {expr:?} using {df_schema:?}"
),
})?;
let cur_time_window = eval_ts_to_ts(&phy_expr, df_schema, current)?;
let input_time_unit = cur_time_window.unit();
Ok(cur_time_window.convert_to(input_time_unit))
}
/// Find the upper bound for time window expression
fn find_expr_time_window_upper_bound(
expr: &Expr,
df_schema: &DFSchema,
current: Timestamp,
) -> Result<Option<Timestamp>, Error> {
use std::cmp::Ordering;
let phy_planner = DefaultPhysicalPlanner::default();
let phy_expr: PhysicalExprRef = phy_planner
.create_physical_expr(expr, df_schema, &SessionContext::new().state())
.with_context(|_e| DatafusionSnafu {
context: format!(
"Failed to create physical expression from {expr:?} using {df_schema:?}"
),
})?;
let cur_time_window = eval_ts_to_ts(&phy_expr, df_schema, current)?;
// search to find the lower bound
let mut offset: i64 = 1;
let mut lower_bound = Some(current);
let upper_bound;
// first expontial probe to found a range for binary search
loop {
let Some(next_val) = current.value().checked_add(offset) else {
// no upper bound if overflow
return Ok(None);
};
let next_time_probe = common_time::Timestamp::new(next_val, current.unit());
let next_time_window = eval_ts_to_ts(&phy_expr, df_schema, next_time_probe)?;
match next_time_window.cmp(&cur_time_window) {
Ordering::Less => {UnexpectedSnafu {
reason: format!(
"Unsupported time window expression, expect monotonic increasing for time window expression {expr:?}"
),
}
.fail()?
}
Ordering::Equal => {
lower_bound = Some(next_time_probe);
}
Ordering::Greater => {
upper_bound = Some(next_time_probe);
break
}
}
let Some(new_offset) = offset.checked_mul(2) else {
// no upper bound if overflow
return Ok(None);
};
offset = new_offset;
}
// binary search for the exact upper bound
ensure!(lower_bound.map(|v|v.unit())==upper_bound.map(|v|v.unit()), UnexpectedSnafu{
reason: format!(" unit mismatch for time window expression {expr:?}, found {lower_bound:?} and {upper_bound:?}"),
});
let output_unit = upper_bound
.context(UnexpectedSnafu {
reason: "should have lower bound",
})?
.unit();
let mut low = lower_bound
.context(UnexpectedSnafu {
reason: "should have lower bound",
})?
.value();
let mut high = upper_bound
.context(UnexpectedSnafu {
reason: "should have upper bound",
})?
.value();
while low < high {
let mid = (low + high) / 2;
let mid_probe = common_time::Timestamp::new(mid, output_unit);
let mid_time_window = eval_ts_to_ts(&phy_expr, df_schema, mid_probe)?;
match mid_time_window.cmp(&cur_time_window) {
Ordering::Less => UnexpectedSnafu {
reason: format!("Binary search failed for time window expression {expr:?}"),
}
.fail()?,
Ordering::Equal => low = mid + 1,
Ordering::Greater => high = mid,
}
}
let final_upper_bound_for_time_window = common_time::Timestamp::new(high, output_unit);
Ok(Some(final_upper_bound_for_time_window))
}
fn eval_ts_to_ts(
phy: &PhysicalExprRef,
df_schema: &DFSchema,
input_value: Timestamp,
) -> Result<Timestamp, Error> {
let ts_vector = match input_value.unit() {
TimeUnit::Second => {
TimestampSecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
}
TimeUnit::Millisecond => {
TimestampMillisecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
}
TimeUnit::Microsecond => {
TimestampMicrosecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
}
TimeUnit::Nanosecond => {
TimestampNanosecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
}
};
let rb = DfRecordBatch::try_new(df_schema.inner().clone(), vec![ts_vector.clone()])
.with_context(|_| ArrowSnafu {
context: format!("Failed to create record batch from {df_schema:?} and {ts_vector:?}"),
})?;
let eval_res = phy.evaluate(&rb).with_context(|_| DatafusionSnafu {
context: format!("Failed to evaluate physical expression {phy:?} on {rb:?}"),
})?;
if let Some(Some(ts)) = columnar_to_ts_vector(&eval_res)?.first() {
Ok(*ts)
} else {
UnexpectedSnafu {
reason: format!(
"Expected timestamp in expression {phy:?} but got {:?}",
eval_res
),
}
.fail()?
}
}
// TODO(discord9): a method to found out the precise time window
/// Find out the `Filter` Node corresponding to outermost `WHERE` and add a new filter expr to it
#[derive(Debug)]
pub struct AddFilterRewriter {
extra_filter: Expr,
is_rewritten: bool,
}
impl AddFilterRewriter {
fn new(filter: Expr) -> Self {
Self {
extra_filter: filter,
is_rewritten: false,
}
}
}
impl TreeNodeRewriter for AddFilterRewriter {
type Node = LogicalPlan;
fn f_up(&mut self, node: Self::Node) -> DfResult<Transformed<Self::Node>> {
if self.is_rewritten {
return Ok(Transformed::no(node));
}
match node {
LogicalPlan::Filter(mut filter) if !filter.having => {
filter.predicate = filter.predicate.and(self.extra_filter.clone());
self.is_rewritten = true;
Ok(Transformed::yes(LogicalPlan::Filter(filter)))
}
LogicalPlan::TableScan(_) => {
// add a new filter
let filter =
datafusion_expr::Filter::try_new(self.extra_filter.clone(), Arc::new(node))?;
self.is_rewritten = true;
Ok(Transformed::yes(LogicalPlan::Filter(filter)))
}
_ => Ok(Transformed::no(node)),
}
}
}
fn df_plan_to_sql(plan: &LogicalPlan) -> Result<String, Error> {
let unparser = Unparser::default();
let sql = unparser
.plan_to_sql(plan)
.with_context(|_e| DatafusionSnafu {
context: format!("Failed to unparse logical plan {plan:?}"),
})?;
Ok(sql.to_string())
}
#[cfg(test)]
mod test {
use datafusion_common::tree_node::TreeNode;
use pretty_assertions::assert_eq;
use session::context::QueryContext;
use super::{sql_to_df_plan, *};
use crate::recording_rules::{df_plan_to_sql, AddFilterRewriter};
use crate::test_utils::create_test_query_engine;
#[tokio::test]
async fn test_add_filter() {
let testcases = vec![
(
"SELECT number FROM numbers_with_ts GROUP BY number","SELECT numbers_with_ts.number FROM numbers_with_ts WHERE (number > 4) GROUP BY numbers_with_ts.number"
),
(
"SELECT number FROM numbers_with_ts WHERE number < 2 OR number >10",
"SELECT numbers_with_ts.number FROM numbers_with_ts WHERE ((numbers_with_ts.number < 2) OR (numbers_with_ts.number > 10)) AND (number > 4)"
),
(
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window",
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE (number > 4) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
)
];
use datafusion_expr::{col, lit};
let query_engine = create_test_query_engine();
let ctx = QueryContext::arc();
for (before, after) in testcases {
let sql = before;
let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), sql, false)
.await
.unwrap();
let mut add_filter = AddFilterRewriter::new(col("number").gt(lit(4u32)));
let plan = plan.rewrite(&mut add_filter).unwrap().data;
let new_sql = df_plan_to_sql(&plan).unwrap();
assert_eq!(after, new_sql);
}
}
#[tokio::test]
async fn test_plan_time_window_lower_bound() {
use datafusion_expr::{col, lit};
let query_engine = create_test_query_engine();
let ctx = QueryContext::arc();
let testcases = [
// same alias is not same column
(
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS ts FROM numbers_with_ts GROUP BY ts;",
Timestamp::new(1740394109, TimeUnit::Second),
(
"ts".to_string(),
Some(Timestamp::new(1740394109000, TimeUnit::Millisecond)),
Some(Timestamp::new(1740394109001, TimeUnit::Millisecond)),
),
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS ts FROM numbers_with_ts WHERE ((ts >= CAST('2025-02-24 10:48:29' AS TIMESTAMP)) AND (ts <= CAST('2025-02-24 10:48:29.001' AS TIMESTAMP))) GROUP BY numbers_with_ts.ts"
),
// complex time window index
(
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts GROUP BY time_window;",
Timestamp::new(1740394109, TimeUnit::Second),
(
"ts".to_string(),
Some(Timestamp::new(1740394080, TimeUnit::Second)),
Some(Timestamp::new(1740394140, TimeUnit::Second)),
),
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('2025-02-24 10:48:00' AS TIMESTAMP)) AND (ts <= CAST('2025-02-24 10:49:00' AS TIMESTAMP))) GROUP BY arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)')"
),
// no time index
(
"SELECT date_bin('5 minutes', ts) FROM numbers_with_ts;",
Timestamp::new(23, TimeUnit::Millisecond),
("ts".to_string(), None, None),
"SELECT date_bin('5 minutes', ts) FROM numbers_with_ts;"
),
// time index
(
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
Timestamp::new(23, TimeUnit::Nanosecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
),
// on spot
(
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
Timestamp::new(0, TimeUnit::Nanosecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
),
// different time unit
(
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
Timestamp::new(23_000_000, TimeUnit::Nanosecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
),
// time index with other fields
(
"SELECT sum(number) as sum_up, date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
Timestamp::new(23, TimeUnit::Millisecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT sum(numbers_with_ts.number) AS sum_up, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
),
// time index with other pks
(
"SELECT number, date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window, number;",
Timestamp::new(23, TimeUnit::Millisecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts), numbers_with_ts.number"
),
// subquery
(
"SELECT number, time_window FROM (SELECT number, date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window, number);",
Timestamp::new(23, TimeUnit::Millisecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT numbers_with_ts.number, time_window FROM (SELECT numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts), numbers_with_ts.number)"
),
// cte
(
"with cte as (select number, date_bin('5 minutes', ts) as time_window from numbers_with_ts GROUP BY time_window, number) select number, time_window from cte;",
Timestamp::new(23, TimeUnit::Millisecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT cte.number, cte.time_window FROM (SELECT numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts), numbers_with_ts.number) AS cte"
),
// complex subquery without alias
(
"SELECT sum(number), number, date_bin('5 minutes', ts) as time_window, bucket_name FROM (SELECT number, ts, case when number < 5 THEN 'bucket_0_5' when number >= 5 THEN 'bucket_5_inf' END as bucket_name FROM numbers_with_ts) GROUP BY number, time_window, bucket_name;",
Timestamp::new(23, TimeUnit::Millisecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT sum(numbers_with_ts.number), numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window, bucket_name FROM (SELECT numbers_with_ts.number, numbers_with_ts.ts, CASE WHEN (numbers_with_ts.number < 5) THEN 'bucket_0_5' WHEN (numbers_with_ts.number >= 5) THEN 'bucket_5_inf' END AS bucket_name FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP)))) GROUP BY numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts), bucket_name"
),
// complex subquery alias
(
"SELECT sum(number), number, date_bin('5 minutes', ts) as time_window, bucket_name FROM (SELECT number, ts, case when number < 5 THEN 'bucket_0_5' when number >= 5 THEN 'bucket_5_inf' END as bucket_name FROM numbers_with_ts) as cte GROUP BY number, time_window, bucket_name;",
Timestamp::new(23, TimeUnit::Millisecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT sum(cte.number), cte.number, date_bin('5 minutes', cte.ts) AS time_window, cte.bucket_name FROM (SELECT numbers_with_ts.number, numbers_with_ts.ts, CASE WHEN (numbers_with_ts.number < 5) THEN 'bucket_0_5' WHEN (numbers_with_ts.number >= 5) THEN 'bucket_5_inf' END AS bucket_name FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP)))) AS cte GROUP BY cte.number, date_bin('5 minutes', cte.ts), cte.bucket_name"
),
];
for (sql, current, expected, expected_unparsed) in testcases {
let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), sql, true)
.await
.unwrap();
let real =
find_plan_time_window_bound(&plan, current, ctx.clone(), query_engine.clone())
.await
.unwrap();
assert_eq!(expected, real);
let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), sql, false)
.await
.unwrap();
let (col_name, lower, upper) = real;
let new_sql = if lower.is_some() {
let to_df_literal = |value| {
let value = Value::from(value);
value.try_to_scalar_value(&value.data_type()).unwrap()
};
let lower = to_df_literal(lower.unwrap());
let upper = to_df_literal(upper.unwrap());
let expr = col(&col_name)
.gt_eq(lit(lower))
.and(col(&col_name).lt_eq(lit(upper)));
let mut add_filter = AddFilterRewriter::new(expr);
let plan = plan.rewrite(&mut add_filter).unwrap().data;
df_plan_to_sql(&plan).unwrap()
} else {
sql.to_string()
};
assert_eq!(expected_unparsed, new_sql);
}
}
}

View File

@@ -0,0 +1,776 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::{BTreeMap, HashMap, HashSet};
use std::sync::Arc;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use api::v1::flow::FlowResponse;
use common_error::ext::BoxedError;
use common_meta::ddl::create_flow::FlowType;
use common_meta::key::flow::FlowMetadataManagerRef;
use common_meta::key::table_info::TableInfoManager;
use common_meta::key::TableMetadataManagerRef;
use common_telemetry::tracing::warn;
use common_telemetry::{debug, info};
use common_time::Timestamp;
use datafusion::sql::unparser::expr_to_sql;
use datafusion_common::tree_node::TreeNode;
use datatypes::value::Value;
use query::QueryEngineRef;
use session::context::QueryContextRef;
use snafu::{ensure, OptionExt, ResultExt};
use store_api::storage::RegionId;
use table::metadata::TableId;
use tokio::sync::oneshot::error::TryRecvError;
use tokio::sync::{oneshot, RwLock};
use tokio::time::Instant;
use super::frontend_client::FrontendClient;
use super::{df_plan_to_sql, AddFilterRewriter, TimeWindowExpr};
use crate::adapter::{CreateFlowArgs, FlowId, TableName};
use crate::error::{
DatafusionSnafu, DatatypesSnafu, ExternalSnafu, FlowAlreadyExistSnafu, InternalSnafu,
TimeSnafu, UnexpectedSnafu,
};
use crate::metrics::{METRIC_FLOW_RULE_ENGINE_QUERY_TIME, METRIC_FLOW_RULE_ENGINE_SLOW_QUERY};
use crate::recording_rules::{find_plan_time_window_bound, find_time_window_expr, sql_to_df_plan};
use crate::Error;
/// TODO(discord9): make those constants configurable
/// The default rule engine query timeout is 10 minutes
pub const DEFAULT_RULE_ENGINE_QUERY_TIMEOUT: Duration = Duration::from_secs(10 * 60);
/// will output a warn log for any query that runs for more that 1 minutes, and also every 1 minutes when that query is still running
pub const SLOW_QUERY_THRESHOLD: Duration = Duration::from_secs(60);
/// TODO(discord9): determine how to configure refresh rate
pub struct RecordingRuleEngine {
tasks: RwLock<BTreeMap<FlowId, RecordingRuleTask>>,
shutdown_txs: RwLock<BTreeMap<FlowId, oneshot::Sender<()>>>,
frontend_client: Arc<FrontendClient>,
flow_metadata_manager: FlowMetadataManagerRef,
table_meta: TableMetadataManagerRef,
engine: QueryEngineRef,
}
impl RecordingRuleEngine {
pub fn new(
frontend_client: Arc<FrontendClient>,
engine: QueryEngineRef,
flow_metadata_manager: FlowMetadataManagerRef,
table_meta: TableMetadataManagerRef,
) -> Self {
Self {
tasks: Default::default(),
shutdown_txs: Default::default(),
frontend_client,
flow_metadata_manager,
table_meta,
engine,
}
}
pub async fn handle_inserts(
&self,
request: api::v1::region::InsertRequests,
) -> Result<FlowResponse, Error> {
let table_info_mgr = self.table_meta.table_info_manager();
let mut group_by_table_name: HashMap<TableName, Vec<api::v1::Rows>> = HashMap::new();
for r in request.requests {
let tid = RegionId::from(r.region_id).table_id();
let name = get_table_name(table_info_mgr, &tid).await?;
let entry = group_by_table_name.entry(name).or_default();
if let Some(rows) = r.rows {
entry.push(rows);
}
}
for (_flow_id, task) in self.tasks.read().await.iter() {
let src_table_names = &task.source_table_names;
for src_table_name in src_table_names {
if let Some(entry) = group_by_table_name.get(src_table_name) {
let Some(expr) = &task.time_window_expr else {
continue;
};
let involved_time_windows = expr.handle_rows(entry.clone()).await?;
let mut state = task.state.write().await;
state
.dirty_time_windows
.add_lower_bounds(involved_time_windows.into_iter());
}
}
}
Ok(Default::default())
}
}
async fn get_table_name(zelf: &TableInfoManager, table_id: &TableId) -> Result<TableName, Error> {
zelf.get(*table_id)
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?
.with_context(|| UnexpectedSnafu {
reason: format!("Table id = {:?}, couldn't found table name", table_id),
})
.map(|name| name.table_name())
.map(|name| [name.catalog_name, name.schema_name, name.table_name])
}
const MIN_REFRESH_DURATION: Duration = Duration::new(5, 0);
impl RecordingRuleEngine {
pub async fn create_flow(&self, args: CreateFlowArgs) -> Result<Option<FlowId>, Error> {
let CreateFlowArgs {
flow_id,
sink_table_name,
source_table_ids,
create_if_not_exists,
or_replace,
expire_after,
comment: _,
sql,
flow_options,
query_ctx,
} = args;
// or replace logic
{
let is_exist = self.tasks.read().await.contains_key(&flow_id);
match (create_if_not_exists, or_replace, is_exist) {
// if replace, ignore that old flow exists
(_, true, true) => {
info!("Replacing flow with id={}", flow_id);
}
(false, false, true) => FlowAlreadyExistSnafu { id: flow_id }.fail()?,
// already exists, and not replace, return None
(true, false, true) => {
info!("Flow with id={} already exists, do nothing", flow_id);
return Ok(None);
}
// continue as normal
(_, _, false) => (),
}
}
let flow_type = flow_options.get(FlowType::FLOW_TYPE_KEY);
ensure!(
flow_type == Some(&FlowType::RecordingRule.to_string()) || flow_type.is_none(),
UnexpectedSnafu {
reason: format!("Flow type is not RecordingRule nor None, got {flow_type:?}")
}
);
let Some(query_ctx) = query_ctx else {
UnexpectedSnafu {
reason: "Query context is None".to_string(),
}
.fail()?
};
let query_ctx = Arc::new(query_ctx);
let mut source_table_names = Vec::new();
for src_id in source_table_ids {
let table_name = self
.table_meta
.table_info_manager()
.get(src_id)
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?
.with_context(|| UnexpectedSnafu {
reason: format!("Table id = {:?}, couldn't found table name", src_id),
})
.map(|name| name.table_name())
.map(|name| [name.catalog_name, name.schema_name, name.table_name])?;
source_table_names.push(table_name);
}
let (tx, rx) = oneshot::channel();
let plan = sql_to_df_plan(query_ctx.clone(), self.engine.clone(), &sql, true).await?;
let (column_name, time_window_expr, _, df_schema) = find_time_window_expr(
&plan,
self.engine.engine_state().catalog_manager().clone(),
query_ctx.clone(),
)
.await?;
let phy_expr = time_window_expr
.map(|expr| TimeWindowExpr::from_expr(&expr, &column_name, &df_schema))
.transpose()?;
let task = RecordingRuleTask::new(
flow_id,
&sql,
phy_expr,
expire_after,
sink_table_name,
source_table_names,
query_ctx,
rx,
);
let task_inner = task.clone();
let engine = self.engine.clone();
let frontend = self.frontend_client.clone();
// TODO(discord9): also save handle & use time wheel or what for better
let _handle = common_runtime::spawn_global(async move {
match task_inner.start_executing(engine, frontend).await {
Ok(()) => info!("Flow {} shutdown", task_inner.flow_id),
Err(err) => common_telemetry::error!(
"Flow {} encounter unrecoverable error: {err:?}",
task_inner.flow_id
),
}
});
// TODO(discord9): deal with replace logic
let replaced_old_task_opt = self.tasks.write().await.insert(flow_id, task);
drop(replaced_old_task_opt);
self.shutdown_txs.write().await.insert(flow_id, tx);
Ok(Some(flow_id))
}
pub async fn remove_flow(&self, flow_id: FlowId) -> Result<(), Error> {
if self.tasks.write().await.remove(&flow_id).is_none() {
warn!("Flow {flow_id} not found in tasks")
}
let Some(tx) = self.shutdown_txs.write().await.remove(&flow_id) else {
UnexpectedSnafu {
reason: format!("Can't found shutdown tx for flow {flow_id}"),
}
.fail()?
};
if tx.send(()).is_err() {
warn!("Fail to shutdown flow {flow_id} due to receiver already dropped, maybe flow {flow_id} is already dropped?")
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct RecordingRuleTask {
flow_id: FlowId,
query: String,
time_window_expr: Option<TimeWindowExpr>,
/// in seconds
expire_after: Option<i64>,
sink_table_name: [String; 3],
source_table_names: HashSet<[String; 3]>,
state: Arc<RwLock<RecordingRuleState>>,
}
impl RecordingRuleTask {
#[allow(clippy::too_many_arguments)]
pub fn new(
flow_id: FlowId,
query: &str,
time_window_expr: Option<TimeWindowExpr>,
expire_after: Option<i64>,
sink_table_name: [String; 3],
source_table_names: Vec<[String; 3]>,
query_ctx: QueryContextRef,
shutdown_rx: oneshot::Receiver<()>,
) -> Self {
Self {
flow_id,
query: query.to_string(),
time_window_expr,
expire_after,
sink_table_name,
source_table_names: source_table_names.into_iter().collect(),
state: Arc::new(RwLock::new(RecordingRuleState::new(query_ctx, shutdown_rx))),
}
}
}
impl RecordingRuleTask {
/// This should be called in a new tokio task
pub async fn start_executing(
&self,
engine: QueryEngineRef,
frontend_client: Arc<FrontendClient>,
) -> Result<(), Error> {
// only first query don't need upper bound
let mut is_first = true;
loop {
// FIXME(discord9): test if need upper bound also works
let new_query = self.gen_query_with_time_window(engine.clone()).await?;
let insert_into = if let Some(new_query) = new_query {
format!(
"INSERT INTO {}.{}.{} {}",
self.sink_table_name[0],
self.sink_table_name[1],
self.sink_table_name[2],
new_query
)
} else {
tokio::time::sleep(MIN_REFRESH_DURATION).await;
continue;
};
if is_first {
is_first = false;
}
let instant = Instant::now();
let flow_id = self.flow_id;
let db_client = frontend_client.get_database_client().await?;
let peer_addr = db_client.peer.addr;
debug!(
"Executing flow {flow_id}(expire_after={:?} secs) on {:?} with query {}",
self.expire_after, peer_addr, &insert_into
);
let timer = METRIC_FLOW_RULE_ENGINE_QUERY_TIME
.with_label_values(&[flow_id.to_string().as_str()])
.start_timer();
let res = db_client.database.sql(&insert_into).await;
drop(timer);
let elapsed = instant.elapsed();
if let Ok(res1) = &res {
debug!(
"Flow {flow_id} executed, result: {res1:?}, elapsed: {:?}",
elapsed
);
} else if let Err(res) = &res {
warn!(
"Failed to execute Flow {flow_id} on frontend {}, result: {res:?}, elapsed: {:?} with query: {}",
peer_addr, elapsed, &insert_into
);
}
// record slow query
if elapsed >= SLOW_QUERY_THRESHOLD {
warn!(
"Flow {flow_id} on frontend {} executed for {:?} before complete, query: {}",
peer_addr, elapsed, &insert_into
);
METRIC_FLOW_RULE_ENGINE_SLOW_QUERY
.with_label_values(&[flow_id.to_string().as_str(), &insert_into, &peer_addr])
.observe(elapsed.as_secs_f64());
}
self.state
.write()
.await
.after_query_exec(elapsed, res.is_ok());
let sleep_until = {
let mut state = self.state.write().await;
match state.shutdown_rx.try_recv() {
Ok(()) => break Ok(()),
Err(TryRecvError::Closed) => {
warn!("Unexpected shutdown flow {flow_id}, shutdown anyway");
break Ok(());
}
Err(TryRecvError::Empty) => (),
}
state.get_next_start_query_time(None)
};
tokio::time::sleep_until(sleep_until).await;
}
}
/// will merge and use the first ten time window in query
async fn gen_query_with_time_window(
&self,
engine: QueryEngineRef,
) -> Result<Option<String>, Error> {
let query_ctx = self.state.read().await.query_ctx.clone();
let start = SystemTime::now();
let since_the_epoch = start
.duration_since(UNIX_EPOCH)
.expect("Time went backwards");
let low_bound = self
.expire_after
.map(|e| since_the_epoch.as_secs() - e as u64);
let Some(low_bound) = low_bound else {
return Ok(Some(self.query.clone()));
};
let low_bound = Timestamp::new_second(low_bound as i64);
let plan = sql_to_df_plan(query_ctx.clone(), engine.clone(), &self.query, true).await?;
// TODO(discord9): find more time window!
let (col_name, lower, upper) =
find_plan_time_window_bound(&plan, low_bound, query_ctx.clone(), engine.clone())
.await?;
let new_sql = {
let expr = {
match (lower, upper) {
(Some(l), Some(u)) => {
let window_size = u.sub(&l).with_context(|| UnexpectedSnafu {
reason: format!("Can't get window size from {u:?} - {l:?}"),
})?;
self.state
.write()
.await
.dirty_time_windows
.gen_filter_exprs(&col_name, lower, window_size)?
}
_ => {
warn!(
"Flow id = {:?}, can't get window size: lower={lower:?}, upper={upper:?}, using the same query", self.flow_id
);
// since no time window lower/upper bound is found, just return the original query
return Ok(Some(self.query.clone()));
}
}
};
debug!(
"Flow id={:?}, Generated filter expr: {:?}",
self.flow_id,
expr.as_ref()
.map(|expr| expr_to_sql(expr).with_context(|_| DatafusionSnafu {
context: format!("Failed to generate filter expr from {expr:?}"),
}))
.transpose()?
.map(|s| s.to_string())
);
let Some(expr) = expr else {
// no new data, hence no need to update
debug!("Flow id={:?}, no new data, not update", self.flow_id);
return Ok(None);
};
let mut add_filter = AddFilterRewriter::new(expr);
// make a not optimized plan for clearer unparse
let plan =
sql_to_df_plan(query_ctx.clone(), engine.clone(), &self.query, false).await?;
let plan = plan
.clone()
.rewrite(&mut add_filter)
.with_context(|_| DatafusionSnafu {
context: format!("Failed to rewrite plan {plan:?}"),
})?
.data;
df_plan_to_sql(&plan)?
};
Ok(Some(new_sql))
}
}
#[derive(Debug)]
pub struct RecordingRuleState {
query_ctx: QueryContextRef,
/// last query complete time
last_update_time: Instant,
/// last time query duration
last_query_duration: Duration,
/// Dirty Time windows need to be updated
/// mapping of `start -> end` and non-overlapping
dirty_time_windows: DirtyTimeWindows,
exec_state: ExecState,
shutdown_rx: oneshot::Receiver<()>,
}
#[derive(Debug, Clone, Default)]
pub struct DirtyTimeWindows {
windows: BTreeMap<Timestamp, Option<Timestamp>>,
}
fn to_df_literal(value: Timestamp) -> Result<datafusion_common::ScalarValue, Error> {
let value = Value::from(value);
let value = value
.try_to_scalar_value(&value.data_type())
.with_context(|_| DatatypesSnafu {
extra: format!("Failed to convert to scalar value: {}", value),
})?;
Ok(value)
}
impl DirtyTimeWindows {
/// Time window merge distance
const MERGE_DIST: i32 = 3;
/// Maximum number of filters allowed in a single query
const MAX_FILTER_NUM: usize = 20;
/// Add lower bounds to the dirty time windows. Upper bounds are ignored.
///
/// # Arguments
///
/// * `lower_bounds` - An iterator of lower bounds to be added.
pub fn add_lower_bounds(&mut self, lower_bounds: impl Iterator<Item = Timestamp>) {
for lower_bound in lower_bounds {
let entry = self.windows.entry(lower_bound);
entry.or_insert(None);
}
}
/// Generate all filter expressions consuming all time windows
pub fn gen_filter_exprs(
&mut self,
col_name: &str,
expire_lower_bound: Option<Timestamp>,
window_size: chrono::Duration,
) -> Result<Option<datafusion_expr::Expr>, Error> {
debug!(
"expire_lower_bound: {:?}, window_size: {:?}",
expire_lower_bound.map(|t| t.to_iso8601_string()),
window_size
);
self.merge_dirty_time_windows(window_size)?;
if self.windows.len() > Self::MAX_FILTER_NUM {
warn!(
"Too many time windows: {}, only the first {} are taken for this query, the group by expression might be wrong",
self.windows.len(),
Self::MAX_FILTER_NUM
);
}
// get the first `MAX_FILTER_NUM` time windows
let nth = self
.windows
.iter()
.nth(Self::MAX_FILTER_NUM)
.map(|(key, _)| *key);
let first_nth = {
if let Some(nth) = nth {
let mut after = self.windows.split_off(&nth);
std::mem::swap(&mut self.windows, &mut after);
after
} else {
std::mem::take(&mut self.windows)
}
};
let mut expr_lst = vec![];
for (start, end) in first_nth.into_iter() {
debug!(
"Time window start: {:?}, end: {:?}",
start.to_iso8601_string(),
end.map(|t| t.to_iso8601_string())
);
let start = if let Some(expire) = expire_lower_bound {
start.max(expire)
} else {
start
};
// filter out expired time window
if let Some(end) = end {
if end <= start {
continue;
}
}
use datafusion_expr::{col, lit};
let lower = to_df_literal(start)?;
let upper = end.map(to_df_literal).transpose()?;
let expr = if let Some(upper) = upper {
col(col_name)
.gt_eq(lit(lower))
.and(col(col_name).lt(lit(upper)))
} else {
col(col_name).gt_eq(lit(lower))
};
expr_lst.push(expr);
}
let expr = expr_lst.into_iter().reduce(|a, b| a.and(b));
Ok(expr)
}
/// Merge time windows that overlaps or get too close
pub fn merge_dirty_time_windows(&mut self, window_size: chrono::Duration) -> Result<(), Error> {
let mut new_windows = BTreeMap::new();
let mut prev_tw = None;
for (lower_bound, upper_bound) in std::mem::take(&mut self.windows) {
let Some(prev_tw) = &mut prev_tw else {
prev_tw = Some((lower_bound, upper_bound));
continue;
};
let std_window_size = window_size.to_std().map_err(|e| {
InternalSnafu {
reason: e.to_string(),
}
.build()
})?;
// if cur.lower - prev.upper <= window_size * 2, merge
let prev_upper = prev_tw
.1
.unwrap_or(prev_tw.0.add_duration(std_window_size).context(TimeSnafu)?);
prev_tw.1 = Some(prev_upper);
let cur_upper = upper_bound.unwrap_or(
lower_bound
.add_duration(std_window_size)
.context(TimeSnafu)?,
);
if lower_bound
.sub(&prev_upper)
.map(|dist| dist <= window_size * Self::MERGE_DIST)
.unwrap_or(false)
{
prev_tw.1 = Some(cur_upper);
} else {
new_windows.insert(prev_tw.0, prev_tw.1);
*prev_tw = (lower_bound, Some(cur_upper));
}
}
if let Some(prev_tw) = prev_tw {
new_windows.insert(prev_tw.0, prev_tw.1);
}
self.windows = new_windows;
Ok(())
}
}
impl RecordingRuleState {
pub fn new(query_ctx: QueryContextRef, shutdown_rx: oneshot::Receiver<()>) -> Self {
Self {
query_ctx,
last_update_time: Instant::now(),
last_query_duration: Duration::from_secs(0),
dirty_time_windows: Default::default(),
exec_state: ExecState::Idle,
shutdown_rx,
}
}
/// called after last query is done
/// `is_succ` indicate whether the last query is successful
pub fn after_query_exec(&mut self, elapsed: Duration, _is_succ: bool) {
self.exec_state = ExecState::Idle;
self.last_query_duration = elapsed;
self.last_update_time = Instant::now();
}
/// wait for at least `last_query_duration`, at most `max_timeout` to start next query
pub fn get_next_start_query_time(&self, max_timeout: Option<Duration>) -> Instant {
let next_duration = max_timeout
.unwrap_or(self.last_query_duration)
.min(self.last_query_duration);
let next_duration = next_duration.max(MIN_REFRESH_DURATION);
self.last_update_time + next_duration
}
}
#[derive(Debug, Clone)]
enum ExecState {
Idle,
Executing,
}
#[cfg(test)]
mod test {
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn test_merge_dirty_time_windows() {
let mut dirty = DirtyTimeWindows::default();
dirty.add_lower_bounds(
vec![
Timestamp::new_second(0),
Timestamp::new_second((1 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
]
.into_iter(),
);
dirty
.merge_dirty_time_windows(chrono::Duration::seconds(5 * 60))
.unwrap();
// just enough to merge
assert_eq!(
dirty.windows,
BTreeMap::from([(
Timestamp::new_second(0),
Some(Timestamp::new_second(
(2 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60
))
)])
);
// separate time window
let mut dirty = DirtyTimeWindows::default();
dirty.add_lower_bounds(
vec![
Timestamp::new_second(0),
Timestamp::new_second((2 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
]
.into_iter(),
);
dirty
.merge_dirty_time_windows(chrono::Duration::seconds(5 * 60))
.unwrap();
// just enough to merge
assert_eq!(
BTreeMap::from([
(
Timestamp::new_second(0),
Some(Timestamp::new_second(5 * 60))
),
(
Timestamp::new_second((2 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
Some(Timestamp::new_second(
(3 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60
))
)
]),
dirty.windows
);
// overlapping
let mut dirty = DirtyTimeWindows::default();
dirty.add_lower_bounds(
vec![
Timestamp::new_second(0),
Timestamp::new_second((DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
]
.into_iter(),
);
dirty
.merge_dirty_time_windows(chrono::Duration::seconds(5 * 60))
.unwrap();
// just enough to merge
assert_eq!(
BTreeMap::from([(
Timestamp::new_second(0),
Some(Timestamp::new_second(
(1 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60
))
),]),
dirty.windows
);
}
}

View File

@@ -0,0 +1,150 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Frontend client to run flow as recording rule which is time-window-aware normal query triggered every tick set by user
use std::sync::Arc;
use client::{Client, Database, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_error::ext::BoxedError;
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
use common_meta::cluster::{NodeInfo, NodeInfoKey, Role};
use common_meta::peer::Peer;
use common_meta::rpc::store::RangeRequest;
use meta_client::client::MetaClient;
use snafu::ResultExt;
use crate::error::{ExternalSnafu, UnexpectedSnafu};
use crate::recording_rules::engine::DEFAULT_RULE_ENGINE_QUERY_TIMEOUT;
use crate::Error;
fn default_channel_mgr() -> ChannelManager {
let cfg = ChannelConfig::new().timeout(DEFAULT_RULE_ENGINE_QUERY_TIMEOUT);
ChannelManager::with_config(cfg)
}
fn client_from_urls(addrs: Vec<String>) -> Client {
Client::with_manager_and_urls(default_channel_mgr(), addrs)
}
/// A simple frontend client able to execute sql using grpc protocol
#[derive(Debug)]
pub enum FrontendClient {
Distributed {
meta_client: Arc<MetaClient>,
},
Standalone {
/// for the sake of simplicity still use grpc even in standalone mode
/// notice the client here should all be lazy, so that can wait after frontend is booted then make conn
/// TODO(discord9): not use grpc under standalone mode
database_client: DatabaseWithPeer,
},
}
#[derive(Debug, Clone)]
pub struct DatabaseWithPeer {
pub database: Database,
pub peer: Peer,
}
impl DatabaseWithPeer {
fn new(database: Database, peer: Peer) -> Self {
Self { database, peer }
}
}
impl FrontendClient {
pub fn from_meta_client(meta_client: Arc<MetaClient>) -> Self {
Self::Distributed { meta_client }
}
pub fn from_static_grpc_addr(addr: String) -> Self {
let peer = Peer {
id: 0,
addr: addr.clone(),
};
let client = client_from_urls(vec![addr]);
let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
Self::Standalone {
database_client: DatabaseWithPeer::new(database, peer),
}
}
}
impl FrontendClient {
async fn scan_for_frontend(&self) -> Result<Vec<(NodeInfoKey, NodeInfo)>, Error> {
let Self::Distributed { meta_client, .. } = self else {
return Ok(vec![]);
};
let cluster_client = meta_client
.cluster_client()
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let cluster_id = meta_client.id().0;
let prefix = NodeInfoKey::key_prefix_with_role(cluster_id, Role::Frontend);
let req = RangeRequest::new().with_prefix(prefix);
let resp = cluster_client
.range(req)
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let mut res = Vec::with_capacity(resp.kvs.len());
for kv in resp.kvs {
let key = NodeInfoKey::try_from(kv.key)
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let val = NodeInfo::try_from(kv.value)
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
res.push((key, val));
}
Ok(res)
}
/// Get the database with max `last_activity_ts`
async fn get_last_active_frontend(&self) -> Result<DatabaseWithPeer, Error> {
if let Self::Standalone { database_client } = self {
return Ok(database_client.clone());
}
let frontends = self.scan_for_frontend().await?;
let mut last_activity_ts = i64::MIN;
let mut peer = None;
for (_key, val) in frontends.iter() {
if val.last_activity_ts > last_activity_ts {
last_activity_ts = val.last_activity_ts;
peer = Some(val.peer.clone());
}
}
let Some(peer) = peer else {
UnexpectedSnafu {
reason: format!("No frontend available: {:?}", frontends),
}
.fail()?
};
let client = client_from_urls(vec![peer.addr.clone()]);
let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
Ok(DatabaseWithPeer::new(database, peer))
}
/// Get a database client, and possibly update it before returning.
pub async fn get_database_client(&self) -> Result<DatabaseWithPeer, Error> {
match self {
Self::Standalone { database_client } => Ok(database_client.clone()),
Self::Distributed { meta_client: _ } => self.get_last_active_frontend().await,
}
}
}

View File

@@ -57,6 +57,7 @@ use crate::error::{
};
use crate::heartbeat::HeartbeatTask;
use crate::metrics::{METRIC_FLOW_PROCESSING_TIME, METRIC_FLOW_ROWS};
use crate::recording_rules::{FrontendClient, RecordingRuleEngine};
use crate::transform::register_function_to_query_engine;
use crate::utils::{SizeReportSender, StateReportHandler};
use crate::{Error, FlowWorkerManager, FlownodeOptions};
@@ -245,6 +246,7 @@ impl FlownodeInstance {
self.server.shutdown().await.context(ShutdownServerSnafu)?;
if let Some(task) = &self.heartbeat_task {
info!("Close heartbeat task for flownode");
task.shutdown();
}
@@ -271,6 +273,8 @@ pub struct FlownodeBuilder {
heartbeat_task: Option<HeartbeatTask>,
/// receive a oneshot sender to send state size report
state_report_handler: Option<StateReportHandler>,
/// Client to send sql to frontend
frontend_client: Arc<FrontendClient>,
}
impl FlownodeBuilder {
@@ -281,6 +285,7 @@ impl FlownodeBuilder {
table_meta: TableMetadataManagerRef,
catalog_manager: CatalogManagerRef,
flow_metadata_manager: FlowMetadataManagerRef,
frontend_client: Arc<FrontendClient>,
) -> Self {
Self {
opts,
@@ -290,6 +295,7 @@ impl FlownodeBuilder {
flow_metadata_manager,
heartbeat_task: None,
state_report_handler: None,
frontend_client,
}
}
@@ -447,7 +453,14 @@ impl FlownodeBuilder {
let node_id = self.opts.node_id.map(|id| id as u32);
let mut man = FlowWorkerManager::new(node_id, query_engine, table_meta);
let rule_engine = RecordingRuleEngine::new(
self.frontend_client.clone(),
query_engine.clone(),
self.flow_metadata_manager.clone(),
table_meta.clone(),
);
let mut man = FlowWorkerManager::new(node_id, query_engine, table_meta, rule_engine);
for worker_id in 0..num_workers {
let (tx, rx) = oneshot::channel();

View File

@@ -86,7 +86,8 @@ pub fn create_test_query_engine() -> Arc<dyn QueryEngine> {
let schema = vec![
datatypes::schema::ColumnSchema::new("number", CDT::uint32_datatype(), false),
datatypes::schema::ColumnSchema::new("ts", CDT::timestamp_millisecond_datatype(), false),
datatypes::schema::ColumnSchema::new("ts", CDT::timestamp_millisecond_datatype(), false)
.with_time_index(true),
];
let mut columns = vec![];
let numbers = (1..=10).collect_vec();

View File

@@ -35,10 +35,11 @@ use servers::error::{
CatalogSnafu, CollectRecordbatchSnafu, DataFusionSnafu, Result as ServerResult,
TableNotFoundSnafu,
};
use servers::http::jaeger::{QueryTraceParams, FIND_TRACES_COLS};
use servers::http::jaeger::QueryTraceParams;
use servers::otlp::trace::{
DURATION_NANO_COLUMN, SERVICE_NAME_COLUMN, SPAN_ATTRIBUTES_COLUMN, SPAN_KIND_COLUMN,
SPAN_KIND_PREFIX, SPAN_NAME_COLUMN, TIMESTAMP_COLUMN, TRACE_ID_COLUMN, TRACE_TABLE_NAME,
DURATION_NANO_COLUMN, SERVICE_NAME_COLUMN, SPAN_ATTRIBUTES_COLUMN, SPAN_ID_COLUMN,
SPAN_KIND_COLUMN, SPAN_KIND_PREFIX, SPAN_NAME_COLUMN, TIMESTAMP_COLUMN, TRACE_ID_COLUMN,
TRACE_TABLE_NAME,
};
use servers::query_handler::JaegerQueryHandler;
use session::context::QueryContextRef;
@@ -101,9 +102,16 @@ impl JaegerQueryHandler for Instance {
}
async fn get_trace(&self, ctx: QueryContextRef, trace_id: &str) -> ServerResult<Output> {
// It's equivalent to `SELECT trace_id, timestamp, duration_nano, service_name, span_name, span_id, span_attributes, resource_attributes, parent_span_id
// FROM {db}.{trace_table} WHERE trace_id = '{trace_id}'`.
let selects: Vec<Expr> = FIND_TRACES_COLS.clone();
// It's equivalent to `SELECT trace_id, timestamp, duration_nano, service_name, span_name, span_id, span_attributes FROM {db}.{trace_table} WHERE trace_id = '{trace_id}'`.
let selects = vec![
col(TRACE_ID_COLUMN),
col(TIMESTAMP_COLUMN),
col(DURATION_NANO_COLUMN),
col(SERVICE_NAME_COLUMN),
col(SPAN_NAME_COLUMN),
col(SPAN_ID_COLUMN),
col(SPAN_ATTRIBUTES_COLUMN),
];
let filters = vec![col(TRACE_ID_COLUMN).eq(lit(trace_id))];
@@ -125,7 +133,15 @@ impl JaegerQueryHandler for Instance {
ctx: QueryContextRef,
query_params: QueryTraceParams,
) -> ServerResult<Output> {
let selects: Vec<Expr> = FIND_TRACES_COLS.clone();
let selects = vec![
col(TRACE_ID_COLUMN),
col(TIMESTAMP_COLUMN),
col(DURATION_NANO_COLUMN),
col(SERVICE_NAME_COLUMN),
col(SPAN_NAME_COLUMN),
col(SPAN_ID_COLUMN),
col(SPAN_ATTRIBUTES_COLUMN),
];
let mut filters = vec![];

View File

@@ -1,23 +0,0 @@
[package]
name = "ingester"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
clap.workspace = true
common-telemetry.workspace = true
common-time.workspace = true
datanode.workspace = true
meta-client.workspace = true
mito2.workspace = true
object-store.workspace = true
reqwest.workspace = true
serde.workspace = true
serde_json.workspace = true
sst-convert.workspace = true
tokio.workspace = true
toml.workspace = true
[lints]
workspace = true

View File

@@ -1,294 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use clap::Parser;
use common_telemetry::info;
use common_time::timestamp::TimeUnit;
use datanode::config::StorageConfig;
use meta_client::MetaClientOptions;
use mito2::config::MitoConfig;
use mito2::sst::file::IndexType;
use mito2::sst::parquet::SstInfo;
use serde::{Deserialize, Serialize};
use sst_convert::converter::{InputFile, InputFileType, SstConverterBuilder};
use tokio::sync::oneshot;
#[derive(Parser, Debug)]
#[command(version, about = "Greptime Ingester", long_about = None)]
struct Args {
/// Input directory
#[arg(short, long)]
input_dir: String,
/// Directory of input parquet files, relative to input_dir
#[arg(short, long)]
parquet_dir: Option<String>,
/// Directory of input json files, relative to input_dir
#[arg(short, long)]
remote_write_dir: Option<String>,
/// Config file
#[arg(short, long)]
cfg: String,
/// DB HTTP address
#[arg(short, long)]
db_http_addr: String,
/// Output path for the converted SST files.
/// If it is not None, the converted SST files will be written to the specified path
/// in the `input_store`.
/// This is for debugging purposes.
#[arg(short, long)]
sst_output_path: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
struct IngesterConfig {
meta_client: MetaClientOptions,
storage: StorageConfig,
mito: MitoConfig,
}
pub const APP_NAME: &str = "greptime-ingester";
#[tokio::main]
async fn main() {
let _guard = common_telemetry::init_global_logging(
APP_NAME,
&Default::default(),
&Default::default(),
None,
);
let args = Args::parse();
let cfg_file = std::fs::read_to_string(&args.cfg).expect("Failed to read config file");
let cfg: IngesterConfig = toml::from_str(&cfg_file).expect("Failed to parse config");
let sst_builder = {
let mut builder = SstConverterBuilder::new_fs(args.input_dir)
.with_meta_options(cfg.meta_client)
.with_storage_config(cfg.storage)
.with_config(cfg.mito);
if let Some(output_path) = args.sst_output_path {
builder = builder.with_output_path(output_path);
}
builder
};
let sst_converter = sst_builder
.clone()
.build()
.await
.expect("Failed to build sst converter");
let input_store = sst_converter.input_store.clone();
if let Some(parquet_dir) = args.parquet_dir {
// using opendal to read parquet files in given input object store
let all_parquets = input_store
.list(&parquet_dir)
.await
.expect("Failed to list parquet files");
info!("Listed all files in parquet directory: {:?}", all_parquets);
let all_parquets = all_parquets
.iter()
.filter(|parquet| parquet.name().ends_with(".parquet") && parquet.metadata().is_file())
.collect::<Vec<_>>();
let input_files = all_parquets
.iter()
.map(|parquet| {
let full_table_name = parquet.name().split("-").next().unwrap();
let (catalog_name, schema_name, table_name) = extract_name(full_table_name);
info!(
"catalog: {}, schema: {}, table: {}",
catalog_name, schema_name, table_name
);
InputFile {
catalog: catalog_name.to_string(),
schema: schema_name.to_string(),
table: table_name.to_string(),
path: parquet.path().to_string(),
file_type: InputFileType::Parquet,
}
})
.collect::<Vec<_>>();
convert_and_send(&input_files, sst_builder.clone(), &args.db_http_addr).await;
}
if let Some(remote_write_dir) = args.remote_write_dir {
// using opendal to read parquet files in given input object store
let all_parquets = input_store
.list(&remote_write_dir)
.await
.expect("Failed to list parquet files");
let all_parquets = all_parquets
.iter()
.filter(|parquet| parquet.name().ends_with(".parquet") && parquet.metadata().is_file())
.collect::<Vec<_>>();
let input_files = all_parquets
.iter()
.map(|parquet| {
let full_table_name = parquet.name().split("-").next().unwrap();
let (catalog_name, schema_name, table_name) = extract_name(full_table_name);
info!(
"catalog: {}, schema: {}, table: {}",
catalog_name, schema_name, table_name
);
InputFile {
catalog: catalog_name.to_string(),
schema: schema_name.to_string(),
table: table_name.to_string(),
path: parquet.path().to_string(),
file_type: InputFileType::RemoteWrite,
}
})
.collect::<Vec<_>>();
convert_and_send(&input_files, sst_builder.clone(), &args.db_http_addr).await;
}
}
async fn convert_and_send(
input_files: &[InputFile],
sst_builder: SstConverterBuilder,
db_http_addr: &str,
) {
let table_names = input_files
.iter()
.map(|f| (f.schema.clone(), f.table.clone()))
.collect::<Vec<_>>();
let mut rxs = Vec::new();
// Spawn a task for each input file
info!("Spawning tasks for {} input files", input_files.len());
for input_file in input_files.iter() {
let (tx, rx) = oneshot::channel();
let sst_builder = sst_builder.clone();
let input_file = (*input_file).clone();
tokio::task::spawn(async move {
let mut sst_converter = sst_builder
.build()
.await
.expect("Failed to build sst converter");
let sst_info = sst_converter
.convert_one(&input_file)
.await
.expect("Failed to convert parquet files");
tx.send(sst_info).unwrap();
});
rxs.push(rx);
}
let mut sst_infos = Vec::new();
for rx in rxs {
sst_infos.push(rx.await.unwrap());
}
info!("Converted {} input files", sst_infos.len());
let ingest_reqs = table_names
.iter()
.zip(sst_infos.iter())
.flat_map(|(schema_name, sst_info)| {
sst_info
.ssts
.iter()
.map(|sst| to_ingest_sst_req(&schema_name.0, &schema_name.1, sst))
.collect::<Vec<_>>()
})
.collect::<Vec<_>>();
// send ingest requests to DB
send_ingest_requests(db_http_addr, ingest_reqs)
.await
.unwrap();
}
fn extract_name(full_table_name: &str) -> (String, String, String) {
let mut names = full_table_name.split('.').rev();
let table_name = names.next().unwrap();
let schema_name = names.next().unwrap_or("public");
let catalog_name = names.next().unwrap_or("greptime");
(
catalog_name.to_string(),
schema_name.to_string(),
table_name.to_string(),
)
}
async fn send_ingest_requests(
addr: &str,
reqs: Vec<ClientIngestSstRequest>,
) -> Result<(), Box<dyn std::error::Error>> {
let client = reqwest::Client::new();
for req in reqs {
info!("ingesting sst: {req:?}");
let req = client.post(addr).json(&req);
let resp = req.send().await?;
info!("ingest response: {resp:?}");
}
Ok(())
}
#[derive(Debug, Serialize, Deserialize)]
pub(crate) struct ClientIngestSstRequest {
schema: Option<String>,
table: String,
pub(crate) file_id: String,
pub(crate) min_ts: i64,
pub(crate) max_ts: i64,
pub(crate) file_size: u64,
pub(crate) rows: u32,
pub(crate) row_groups: u32,
/// Available indexes of the file.
pub available_indexes: Vec<IndexType>,
/// Size of the index file.
pub index_file_size: u64,
pub time_unit: u32,
}
fn to_ingest_sst_req(
schema_name: &str,
table_name: &str,
sst_info: &SstInfo,
) -> ClientIngestSstRequest {
let index_file_size = sst_info.index_metadata.file_size;
let available_indexs = sst_info.index_metadata.build_available_indexes();
ClientIngestSstRequest {
schema: Some(schema_name.to_string()),
table: table_name.to_string(),
file_id: sst_info.file_id.to_string(),
min_ts: sst_info.time_range.0.value(),
max_ts: sst_info.time_range.1.value(),
file_size: sst_info.file_size,
rows: sst_info.num_rows as _,
row_groups: sst_info.num_row_groups as _,
available_indexes: available_indexs.to_vec(),
index_file_size,
time_unit: match sst_info.time_range.0.unit() {
TimeUnit::Second => 0,
TimeUnit::Millisecond => 3,
TimeUnit::Microsecond => 6,
TimeUnit::Nanosecond => 9,
},
}
}

View File

@@ -31,13 +31,13 @@ fn main() {
#[tokio::main]
async fn run() {
let id = 2000u64;
let id = (1000u64, 2000u64);
let config = ChannelConfig::new()
.timeout(Duration::from_secs(3))
.connect_timeout(Duration::from_secs(5))
.tcp_nodelay(true);
let channel_manager = ChannelManager::with_config(config);
let mut meta_client = MetaClientBuilder::datanode_default_options(id)
let mut meta_client = MetaClientBuilder::datanode_default_options(id.0, id.1)
.channel_manager(channel_manager)
.build();
meta_client.start(&["127.0.0.1:3002"]).await.unwrap();

View File

@@ -47,6 +47,7 @@ use common_meta::rpc::store::{
DeleteRangeResponse, PutRequest, PutResponse, RangeRequest, RangeResponse,
};
use common_meta::rpc::KeyValue;
use common_meta::ClusterId;
use common_telemetry::info;
use futures::TryStreamExt;
use heartbeat::Client as HeartbeatClient;
@@ -60,7 +61,7 @@ use crate::error::{
Result,
};
pub type Id = u64;
pub type Id = (u64, u64);
const DEFAULT_ASK_LEADER_MAX_RETRY: usize = 3;
const DEFAULT_SUBMIT_DDL_MAX_RETRY: usize = 3;
@@ -80,18 +81,18 @@ pub struct MetaClientBuilder {
}
impl MetaClientBuilder {
pub fn new(member_id: u64, role: Role) -> Self {
pub fn new(cluster_id: ClusterId, member_id: u64, role: Role) -> Self {
Self {
id: member_id,
id: (cluster_id, member_id),
role,
..Default::default()
}
}
/// Returns the role of Frontend's default options.
pub fn frontend_default_options() -> Self {
pub fn frontend_default_options(cluster_id: ClusterId) -> Self {
// Frontend does not need a member id.
Self::new(0, Role::Frontend)
Self::new(cluster_id, 0, Role::Frontend)
.enable_store()
.enable_heartbeat()
.enable_procedure()
@@ -99,18 +100,19 @@ impl MetaClientBuilder {
}
/// Returns the role of Datanode's default options.
pub fn datanode_default_options(member_id: u64) -> Self {
Self::new(member_id, Role::Datanode)
pub fn datanode_default_options(cluster_id: ClusterId, member_id: u64) -> Self {
Self::new(cluster_id, member_id, Role::Datanode)
.enable_store()
.enable_heartbeat()
}
/// Returns the role of Flownode's default options.
pub fn flownode_default_options(member_id: u64) -> Self {
Self::new(member_id, Role::Flownode)
pub fn flownode_default_options(cluster_id: ClusterId, member_id: u64) -> Self {
Self::new(cluster_id, member_id, Role::Flownode)
.enable_store()
.enable_heartbeat()
.enable_procedure()
.enable_access_cluster_info()
}
pub fn enable_heartbeat(self) -> Self {
@@ -272,9 +274,15 @@ impl ClusterInfo for MetaClient {
let cluster_client = self.cluster_client()?;
let (get_metasrv_nodes, nodes_key_prefix) = match role {
None => (true, Some(NodeInfoKey::key_prefix())),
None => (
true,
Some(NodeInfoKey::key_prefix_with_cluster_id(self.id.0)),
),
Some(ClusterRole::Metasrv) => (true, None),
Some(role) => (false, Some(NodeInfoKey::key_prefix_with_role(role))),
Some(role) => (
false,
Some(NodeInfoKey::key_prefix_with_role(self.id.0, role)),
),
};
let mut nodes = if get_metasrv_nodes {
@@ -317,7 +325,7 @@ impl ClusterInfo for MetaClient {
async fn list_region_stats(&self) -> Result<Vec<RegionStat>> {
let cluster_kv_backend = Arc::new(self.cluster_client()?);
let range_prefix = DatanodeStatKey::prefix_key();
let range_prefix = DatanodeStatKey::key_prefix_with_cluster_id(self.id.0);
let req = RangeRequest::new().with_prefix(range_prefix);
let stream =
PaginationStream::new(cluster_kv_backend, req, 256, decode_stats).into_stream();
@@ -548,8 +556,6 @@ impl MetaClient {
#[cfg(test)]
mod tests {
use std::sync::atomic::{AtomicUsize, Ordering};
use api::v1::meta::{HeartbeatRequest, Peer};
use common_meta::kv_backend::{KvBackendRef, ResettableKvBackendRef};
use rand::Rng;
@@ -619,31 +625,31 @@ mod tests {
async fn test_meta_client_builder() {
let urls = &["127.0.0.1:3001", "127.0.0.1:3002"];
let mut meta_client = MetaClientBuilder::new(0, Role::Datanode)
let mut meta_client = MetaClientBuilder::new(0, 0, Role::Datanode)
.enable_heartbeat()
.build();
let _ = meta_client.heartbeat_client().unwrap();
assert!(meta_client.store_client().is_err());
meta_client.start(urls).await.unwrap();
let mut meta_client = MetaClientBuilder::new(0, Role::Datanode).build();
let mut meta_client = MetaClientBuilder::new(0, 0, Role::Datanode).build();
assert!(meta_client.heartbeat_client().is_err());
assert!(meta_client.store_client().is_err());
meta_client.start(urls).await.unwrap();
let mut meta_client = MetaClientBuilder::new(0, Role::Datanode)
let mut meta_client = MetaClientBuilder::new(0, 0, Role::Datanode)
.enable_store()
.build();
assert!(meta_client.heartbeat_client().is_err());
let _ = meta_client.store_client().unwrap();
meta_client.start(urls).await.unwrap();
let mut meta_client = MetaClientBuilder::new(2, Role::Datanode)
let mut meta_client = MetaClientBuilder::new(1, 2, Role::Datanode)
.enable_heartbeat()
.enable_store()
.build();
assert_eq!(2, meta_client.id());
assert_eq!(2, meta_client.id());
assert_eq!(1, meta_client.id().0);
assert_eq!(2, meta_client.id().1);
let _ = meta_client.heartbeat_client().unwrap();
let _ = meta_client.store_client().unwrap();
meta_client.start(urls).await.unwrap();
@@ -652,7 +658,7 @@ mod tests {
#[tokio::test]
async fn test_not_start_heartbeat_client() {
let urls = &["127.0.0.1:3001", "127.0.0.1:3002"];
let mut meta_client = MetaClientBuilder::new(0, Role::Datanode)
let mut meta_client = MetaClientBuilder::new(0, 0, Role::Datanode)
.enable_store()
.build();
meta_client.start(urls).await.unwrap();
@@ -663,7 +669,7 @@ mod tests {
#[tokio::test]
async fn test_not_start_store_client() {
let urls = &["127.0.0.1:3001", "127.0.0.1:3002"];
let mut meta_client = MetaClientBuilder::new(0, Role::Datanode)
let mut meta_client = MetaClientBuilder::new(0, 0, Role::Datanode)
.enable_heartbeat()
.build();
@@ -683,9 +689,6 @@ mod tests {
let tc = new_client("test_heartbeat").await;
let (sender, mut receiver) = tc.client.heartbeat().await.unwrap();
// send heartbeats
let request_sent = Arc::new(AtomicUsize::new(0));
let request_sent_clone = request_sent.clone();
let _handle = tokio::spawn(async move {
for _ in 0..5 {
let req = HeartbeatRequest {
@@ -696,24 +699,14 @@ mod tests {
..Default::default()
};
sender.send(req).await.unwrap();
request_sent_clone.fetch_add(1, Ordering::Relaxed);
}
});
let heartbeat_count = Arc::new(AtomicUsize::new(0));
let heartbeat_count_clone = heartbeat_count.clone();
let handle = tokio::spawn(async move {
while let Some(_resp) = receiver.message().await.unwrap() {
heartbeat_count_clone.fetch_add(1, Ordering::Relaxed);
let _handle = tokio::spawn(async move {
while let Some(res) = receiver.message().await.unwrap() {
assert_eq!(1000, res.header.unwrap().cluster_id);
}
});
handle.await.unwrap();
//+1 for the initial response
assert_eq!(
request_sent.load(Ordering::Relaxed) + 1,
heartbeat_count.load(Ordering::Relaxed)
);
}
#[tokio::test]

View File

@@ -272,7 +272,7 @@ mod test {
#[tokio::test]
async fn test_already_start() {
let mut client = Client::new(0, Role::Datanode, ChannelManager::default(), 3);
let mut client = Client::new((0, 0), Role::Datanode, ChannelManager::default(), 3);
client
.start(&["127.0.0.1:1000", "127.0.0.1:1001"])
.await
@@ -288,7 +288,7 @@ mod test {
#[tokio::test]
async fn test_heartbeat_stream() {
let (sender, mut receiver) = mpsc::channel::<HeartbeatRequest>(100);
let sender = HeartbeatSender::new(8, Role::Datanode, sender);
let sender = HeartbeatSender::new((8, 8), Role::Datanode, sender);
let _handle = tokio::spawn(async move {
for _ in 0..10 {
sender.send(HeartbeatRequest::default()).await.unwrap();
@@ -296,6 +296,7 @@ mod test {
});
while let Some(req) = receiver.recv().await {
let header = req.header.unwrap();
assert_eq!(8, header.cluster_id);
assert_eq!(8, header.member_id);
}
}

View File

@@ -255,7 +255,7 @@ mod test {
#[tokio::test]
async fn test_already_start() {
let mut client = Client::new(0, Role::Frontend, ChannelManager::default());
let mut client = Client::new((0, 0), Role::Frontend, ChannelManager::default());
client
.start(&["127.0.0.1:1000", "127.0.0.1:1001"])
.await
@@ -270,7 +270,7 @@ mod test {
#[tokio::test]
async fn test_start_with_duplicate_peers() {
let mut client = Client::new(0, Role::Frontend, ChannelManager::default());
let mut client = Client::new((0, 0), Role::Frontend, ChannelManager::default());
client
.start(&["127.0.0.1:1000", "127.0.0.1:1000", "127.0.0.1:1000"])
.await

View File

@@ -71,22 +71,23 @@ pub enum MetaClientType {
pub type MetaClientRef = Arc<client::MetaClient>;
pub async fn create_meta_client(
cluster_id: u64,
client_type: MetaClientType,
meta_client_options: &MetaClientOptions,
) -> error::Result<MetaClientRef> {
info!(
"Creating {:?} instance with Metasrv addrs {:?}",
client_type, meta_client_options.metasrv_addrs
"Creating {:?} instance from cluster {} with Metasrv addrs {:?}",
client_type, cluster_id, meta_client_options.metasrv_addrs
);
let mut builder = match client_type {
MetaClientType::Datanode { member_id } => {
MetaClientBuilder::datanode_default_options(member_id)
MetaClientBuilder::datanode_default_options(cluster_id, member_id)
}
MetaClientType::Flownode { member_id } => {
MetaClientBuilder::flownode_default_options(member_id)
MetaClientBuilder::flownode_default_options(cluster_id, member_id)
}
MetaClientType::Frontend => MetaClientBuilder::frontend_default_options(),
MetaClientType::Frontend => MetaClientBuilder::frontend_default_options(cluster_id),
};
let base_config = ChannelConfig::new()

View File

@@ -60,8 +60,8 @@ pub async fn mock_client_with_etcdstore(addr: &str) -> (MetaClient, MockMetaCont
}
pub async fn mock_client_by(server_addr: String, channel_manager: ChannelManager) -> MetaClient {
let id = 2000u64;
let mut meta_client = MetaClientBuilder::datanode_default_options(id)
let id = (1000u64, 2000u64);
let mut meta_client = MetaClientBuilder::datanode_default_options(id.0, id.1)
.enable_access_cluster_info()
.channel_manager(channel_manager)
.build();

View File

@@ -375,9 +375,13 @@ mod tests {
#[test]
fn test_to_stat_kv_map() {
let stat_key = DatanodeStatKey { node_id: 100 };
let stat_key = DatanodeStatKey {
cluster_id: 0,
node_id: 100,
};
let stat = Stat {
cluster_id: 0,
id: 100,
addr: "127.0.0.1:3001".to_string(),
..Default::default()
@@ -396,6 +400,7 @@ mod tests {
let stat_val = kv_map.get(&stat_key).unwrap();
let stat = stat_val.stats.first().unwrap();
assert_eq!(0, stat.cluster_id);
assert_eq!(100, stat.id);
assert_eq!("127.0.0.1:3001", stat.addr);
}

View File

@@ -15,6 +15,7 @@
use common_error::ext::BoxedError;
use common_meta::ddl::flow_meta::PartitionPeerAllocator;
use common_meta::peer::Peer;
use common_meta::ClusterId;
use snafu::ResultExt;
use crate::metasrv::{SelectorContext, SelectorRef};
@@ -33,9 +34,14 @@ impl FlowPeerAllocator {
#[async_trait::async_trait]
impl PartitionPeerAllocator for FlowPeerAllocator {
async fn alloc(&self, partitions: usize) -> common_meta::error::Result<Vec<Peer>> {
async fn alloc(
&self,
cluster_id: ClusterId,
partitions: usize,
) -> common_meta::error::Result<Vec<Peer>> {
self.selector
.select(
cluster_id,
&self.ctx,
SelectorOptions {
min_required_items: partitions,

View File

@@ -20,8 +20,8 @@ use std::time::{Duration, Instant};
use api::v1::meta::mailbox_message::Payload;
use api::v1::meta::{
HeartbeatRequest, HeartbeatResponse, MailboxMessage, RegionLease, ResponseHeader, Role,
PROTOCOL_VERSION,
HeartbeatRequest, HeartbeatResponse, MailboxMessage, RegionLease, RequestHeader,
ResponseHeader, Role, PROTOCOL_VERSION,
};
use check_leader_handler::CheckLeaderHandler;
use collect_cluster_info_handler::{
@@ -153,9 +153,13 @@ pub struct Pusher {
}
impl Pusher {
pub fn new(sender: Sender<std::result::Result<HeartbeatResponse, tonic::Status>>) -> Self {
pub fn new(
sender: Sender<std::result::Result<HeartbeatResponse, tonic::Status>>,
req_header: &RequestHeader,
) -> Self {
let res_header = ResponseHeader {
protocol_version: PROTOCOL_VERSION,
cluster_id: req_header.cluster_id,
..Default::default()
};
@@ -768,7 +772,7 @@ mod tests {
use std::sync::Arc;
use std::time::Duration;
use api::v1::meta::{MailboxMessage, Role};
use api::v1::meta::{MailboxMessage, RequestHeader, Role, PROTOCOL_VERSION};
use common_meta::kv_backend::memory::MemoryKvBackend;
use common_meta::sequence::SequenceBuilder;
use tokio::sync::mpsc;
@@ -810,8 +814,12 @@ mod tests {
async fn push_msg_via_mailbox() -> (MailboxRef, MailboxReceiver) {
let datanode_id = 12;
let (pusher_tx, mut pusher_rx) = mpsc::channel(16);
let res_header = RequestHeader {
protocol_version: PROTOCOL_VERSION,
..Default::default()
};
let pusher_id = PusherId::new(Role::Datanode, datanode_id);
let pusher: Pusher = Pusher::new(pusher_tx);
let pusher: Pusher = Pusher::new(pusher_tx, &res_header);
let handler_group = HeartbeatHandlerGroup::default();
handler_group.register_pusher(pusher_id, pusher).await;

View File

@@ -262,11 +262,15 @@ mod tests {
let handler = CollectStatsHandler::default();
handle_request_many_times(ctx.clone(), &handler, 1).await;
let key = DatanodeStatKey { node_id: 101 };
let key = DatanodeStatKey {
cluster_id: 3,
node_id: 101,
};
let key: Vec<u8> = key.into();
let res = ctx.in_memory.get(&key).await.unwrap();
let kv = res.unwrap();
let key: DatanodeStatKey = kv.key.clone().try_into().unwrap();
assert_eq!(3, key.cluster_id);
assert_eq!(101, key.node_id);
let val: DatanodeStatValue = kv.value.try_into().unwrap();
// first new stat must be set in kv store immediately
@@ -291,6 +295,7 @@ mod tests {
for i in 1..=loop_times {
let mut acc = HeartbeatAccumulator {
stat: Some(Stat {
cluster_id: 3,
id: 101,
region_num: i as _,
..Default::default()

View File

@@ -101,6 +101,7 @@ mod tests {
}
}
acc.stat = Some(Stat {
cluster_id: 1,
id: 42,
region_stats: vec![new_region_stat(1), new_region_stat(2), new_region_stat(3)],
timestamp_millis: 1000,

View File

@@ -38,14 +38,17 @@ impl HeartbeatHandler for DatanodeKeepLeaseHandler {
_acc: &mut HeartbeatAccumulator,
) -> Result<HandleControl> {
let HeartbeatRequest { header, peer, .. } = req;
let Some(_header) = &header else {
let Some(header) = &header else {
return Ok(HandleControl::Continue);
};
let Some(peer) = &peer else {
return Ok(HandleControl::Continue);
};
let key = DatanodeLeaseKey { node_id: peer.id };
let key = DatanodeLeaseKey {
cluster_id: header.cluster_id,
node_id: peer.id,
};
let value = LeaseValue {
timestamp_millis: time_util::current_time_millis(),
node_addr: peer.addr.clone(),
@@ -77,14 +80,17 @@ impl HeartbeatHandler for FlownodeKeepLeaseHandler {
_acc: &mut HeartbeatAccumulator,
) -> Result<HandleControl> {
let HeartbeatRequest { header, peer, .. } = req;
let Some(_header) = &header else {
let Some(header) = &header else {
return Ok(HandleControl::Continue);
};
let Some(peer) = &peer else {
return Ok(HandleControl::Continue);
};
let key = FlownodeLeaseKey { node_id: peer.id };
let key = FlownodeLeaseKey {
cluster_id: header.cluster_id,
node_id: peer.id,
};
let value = LeaseValue {
timestamp_millis: time_util::current_time_millis(),
node_addr: peer.addr.clone(),

View File

@@ -64,6 +64,7 @@ impl HeartbeatHandler for RegionLeaseHandler {
};
let regions = stat.regions();
let cluster_id = stat.cluster_id;
let datanode_id = stat.id;
let RenewRegionLeasesResponse {
@@ -71,7 +72,7 @@ impl HeartbeatHandler for RegionLeaseHandler {
renewed,
} = self
.region_lease_keeper
.renew_region_leases(datanode_id, &regions)
.renew_region_leases(cluster_id, datanode_id, &regions)
.await?;
let renewed = renewed
@@ -152,6 +153,7 @@ mod test {
let peer = Peer::empty(datanode_id);
let follower_peer = Peer::empty(datanode_id + 1);
let table_info = new_test_table_info(table_id, vec![region_number]).into();
let cluster_id = 1;
let region_routes = vec![RegionRoute {
region: Region::new_test(region_id),
@@ -179,6 +181,7 @@ mod test {
let acc = &mut HeartbeatAccumulator::default();
acc.stat = Some(Stat {
cluster_id,
id: peer.id,
region_stats: vec![
new_empty_region_stat(region_id, RegionRole::Follower),
@@ -212,6 +215,7 @@ mod test {
let acc = &mut HeartbeatAccumulator::default();
acc.stat = Some(Stat {
cluster_id,
id: follower_peer.id,
region_stats: vec![
new_empty_region_stat(region_id, RegionRole::Follower),
@@ -245,6 +249,7 @@ mod test {
let acc = &mut HeartbeatAccumulator::default();
acc.stat = Some(Stat {
cluster_id,
id: follower_peer.id,
region_stats: vec![
new_empty_region_stat(region_id, RegionRole::Follower),
@@ -287,6 +292,7 @@ mod test {
let peer = Peer::empty(datanode_id);
let follower_peer = Peer::empty(datanode_id + 1);
let table_info = new_test_table_info(table_id, vec![region_number]).into();
let cluster_id = 1;
let region_routes = vec![
RegionRoute {
@@ -327,6 +333,7 @@ mod test {
let acc = &mut HeartbeatAccumulator::default();
acc.stat = Some(Stat {
cluster_id,
id: peer.id,
region_stats: vec![
new_empty_region_stat(region_id, RegionRole::Leader),

View File

@@ -28,15 +28,18 @@ impl HeartbeatHandler for ResponseHeaderHandler {
async fn handle(
&self,
_req: &HeartbeatRequest,
req: &HeartbeatRequest,
_ctx: &mut Context,
acc: &mut HeartbeatAccumulator,
) -> Result<HandleControl> {
let HeartbeatRequest { header, .. } = req;
let res_header = ResponseHeader {
protocol_version: PROTOCOL_VERSION,
cluster_id: header.as_ref().map_or(0, |h| h.cluster_id),
..Default::default()
};
acc.header = Some(res_header);
Ok(HandleControl::Continue)
}
}
@@ -45,7 +48,7 @@ impl HeartbeatHandler for ResponseHeaderHandler {
mod tests {
use std::sync::Arc;
use api::v1::meta::RequestHeader;
use api::v1::meta::{HeartbeatResponse, RequestHeader};
use common_meta::cache_invalidator::DummyCacheInvalidator;
use common_meta::key::TableMetadataManager;
use common_meta::kv_backend::memory::MemoryKvBackend;
@@ -87,7 +90,7 @@ mod tests {
};
let req = HeartbeatRequest {
header: Some(RequestHeader::new(2, Role::Datanode, W3cTrace::new())),
header: Some(RequestHeader::new((1, 2), Role::Datanode, W3cTrace::new())),
..Default::default()
};
let mut acc = HeartbeatAccumulator::default();
@@ -97,5 +100,12 @@ mod tests {
.handle(&req, &mut ctx, &mut acc)
.await
.unwrap();
let header = std::mem::take(&mut acc.header);
let res = HeartbeatResponse {
header,
mailbox_message: acc.into_mailbox_message(),
..Default::default()
};
assert_eq!(1, res.header.unwrap().cluster_id);
}
}

View File

@@ -35,12 +35,20 @@ macro_rules! impl_from_str_lease_key {
.context(error::InvalidLeaseKeySnafu { key })?;
ensure!(caps.len() == 3, error::InvalidLeaseKeySnafu { key });
let cluster_id = caps[1].to_string();
let node_id = caps[2].to_string();
let cluster_id: u64 = cluster_id.parse().context(error::ParseNumSnafu {
err_msg: format!("invalid cluster_id: {cluster_id}"),
})?;
let node_id: u64 = node_id.parse().context(error::ParseNumSnafu {
err_msg: format!("invalid node_id: {node_id}"),
})?;
Ok(Self { node_id })
Ok(Self {
cluster_id,
node_id,
})
}
}
};
@@ -65,7 +73,7 @@ macro_rules! impl_try_from_lease_key {
type Error = error::Error;
fn try_from(key: $key_type) -> error::Result<Self> {
Ok(format!("{}-0-{}", $prefix, key.node_id).into_bytes())
Ok(format!("{}-{}-{}", $prefix, key.cluster_id, key.node_id).into_bytes())
}
}
};

View File

@@ -15,6 +15,7 @@
use std::str::FromStr;
use common_meta::datanode::DatanodeStatKey;
use common_meta::ClusterId;
use lazy_static::lazy_static;
use regex::Regex;
use serde::{Deserialize, Serialize};
@@ -41,18 +42,20 @@ lazy_static! {
#[derive(Debug, Clone, Eq, Hash, PartialEq, Serialize, Deserialize)]
pub struct DatanodeLeaseKey {
pub cluster_id: ClusterId,
pub node_id: u64,
}
impl DatanodeLeaseKey {
pub fn prefix_key() -> Vec<u8> {
format!("{DATANODE_LEASE_PREFIX}-0-").into_bytes()
pub fn prefix_key_by_cluster(cluster_id: ClusterId) -> Vec<u8> {
format!("{DATANODE_LEASE_PREFIX}-{cluster_id}-").into_bytes()
}
}
impl From<&DatanodeLeaseKey> for DatanodeStatKey {
fn from(lease_key: &DatanodeLeaseKey) -> Self {
DatanodeStatKey {
cluster_id: lease_key.cluster_id,
node_id: lease_key.node_id,
}
}
@@ -60,21 +63,22 @@ impl From<&DatanodeLeaseKey> for DatanodeStatKey {
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize)]
pub struct InactiveRegionKey {
pub cluster_id: ClusterId,
pub node_id: u64,
pub region_id: u64,
}
impl InactiveRegionKey {
pub fn get_prefix_by_cluster() -> Vec<u8> {
format!("{}-0-", INACTIVE_REGION_PREFIX).into_bytes()
pub fn get_prefix_by_cluster(cluster_id: u64) -> Vec<u8> {
format!("{}-{}-", INACTIVE_REGION_PREFIX, cluster_id).into_bytes()
}
}
impl From<InactiveRegionKey> for Vec<u8> {
fn from(value: InactiveRegionKey) -> Self {
format!(
"{}-0-{}-{}",
INACTIVE_REGION_PREFIX, value.node_id, value.region_id
"{}-{}-{}-{}",
INACTIVE_REGION_PREFIX, value.cluster_id, value.node_id, value.region_id
)
.into_bytes()
}
@@ -93,8 +97,13 @@ impl FromStr for InactiveRegionKey {
error::InvalidInactiveRegionKeySnafu { key }
);
let cluster_id = caps[1].to_string();
let node_id = caps[2].to_string();
let region_id = caps[3].to_string();
let cluster_id: u64 = cluster_id.parse().context(error::ParseNumSnafu {
err_msg: format!("invalid cluster_id: {cluster_id}"),
})?;
let node_id: u64 = node_id.parse().context(error::ParseNumSnafu {
err_msg: format!("invalid node_id: {node_id}"),
})?;
@@ -102,7 +111,11 @@ impl FromStr for InactiveRegionKey {
err_msg: format!("invalid region_id: {region_id}"),
})?;
Ok(Self { node_id, region_id })
Ok(Self {
cluster_id,
node_id,
region_id,
})
}
}
@@ -122,17 +135,24 @@ mod tests {
#[test]
fn test_stat_key_round_trip() {
let key = DatanodeStatKey { node_id: 1 };
let key = DatanodeStatKey {
cluster_id: 0,
node_id: 1,
};
let key_bytes: Vec<u8> = key.into();
let new_key: DatanodeStatKey = key_bytes.try_into().unwrap();
assert_eq!(0, new_key.cluster_id);
assert_eq!(1, new_key.node_id);
}
#[test]
fn test_lease_key_round_trip() {
let key = DatanodeLeaseKey { node_id: 1 };
let key = DatanodeLeaseKey {
cluster_id: 0,
node_id: 1,
};
let key_bytes: Vec<u8> = key.clone().try_into().unwrap();
let new_key: DatanodeLeaseKey = key_bytes.try_into().unwrap();
@@ -142,16 +162,21 @@ mod tests {
#[test]
fn test_lease_key_to_stat_key() {
let lease_key = DatanodeLeaseKey { node_id: 101 };
let lease_key = DatanodeLeaseKey {
cluster_id: 1,
node_id: 101,
};
let stat_key: DatanodeStatKey = (&lease_key).into();
assert_eq!(1, stat_key.cluster_id);
assert_eq!(101, stat_key.node_id);
}
#[test]
fn test_inactive_region_key_round_trip() {
let key = InactiveRegionKey {
cluster_id: 0,
node_id: 1,
region_id: 2,
};

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use common_meta::ClusterId;
use lazy_static::lazy_static;
use regex::Regex;
use serde::{Deserialize, Serialize};
@@ -25,12 +26,13 @@ lazy_static! {
#[derive(Debug, Clone, Eq, Hash, PartialEq, Serialize, Deserialize)]
pub struct FlownodeLeaseKey {
pub cluster_id: ClusterId,
pub node_id: u64,
}
impl FlownodeLeaseKey {
pub fn prefix_key_by_cluster() -> Vec<u8> {
format!("{FLOWNODE_LEASE_PREFIX}-0-").into_bytes()
pub fn prefix_key_by_cluster(cluster_id: ClusterId) -> Vec<u8> {
format!("{FLOWNODE_LEASE_PREFIX}-{cluster_id}-").into_bytes()
}
}
@@ -40,7 +42,10 @@ mod tests {
#[test]
fn test_lease_key_round_trip() {
let key = FlownodeLeaseKey { node_id: 1 };
let key = FlownodeLeaseKey {
cluster_id: 0,
node_id: 1,
};
let key_bytes: Vec<u8> = key.clone().try_into().unwrap();
let new_key: FlownodeLeaseKey = key_bytes.try_into().unwrap();

View File

@@ -18,7 +18,7 @@ use std::hash::Hash;
use common_error::ext::BoxedError;
use common_meta::kv_backend::KvBackend;
use common_meta::peer::{Peer, PeerLookupService};
use common_meta::{util, DatanodeId, FlownodeId};
use common_meta::{util, ClusterId, DatanodeId, FlownodeId};
use common_time::util as time_util;
use snafu::ResultExt;
@@ -35,12 +35,14 @@ fn build_lease_filter(lease_secs: u64) -> impl Fn(&LeaseValue) -> bool {
/// look up [`Peer`] given [`ClusterId`] and [`DatanodeId`], will only return if it's alive under given `lease_secs`
pub async fn lookup_datanode_peer(
cluster_id: ClusterId,
datanode_id: DatanodeId,
meta_peer_client: &MetaPeerClientRef,
lease_secs: u64,
) -> Result<Option<Peer>> {
let lease_filter = build_lease_filter(lease_secs);
let lease_key = DatanodeLeaseKey {
cluster_id,
node_id: datanode_id,
};
let lease_key_bytes: Vec<u8> = lease_key.clone().try_into()?;
@@ -61,24 +63,29 @@ pub async fn lookup_datanode_peer(
/// Find all alive datanodes
pub async fn alive_datanodes(
cluster_id: ClusterId,
meta_peer_client: &MetaPeerClientRef,
lease_secs: u64,
) -> Result<HashMap<DatanodeLeaseKey, LeaseValue>> {
let predicate = build_lease_filter(lease_secs);
filter(DatanodeLeaseKey::prefix_key(), meta_peer_client, |v| {
predicate(v)
})
filter(
DatanodeLeaseKey::prefix_key_by_cluster(cluster_id),
meta_peer_client,
|v| predicate(v),
)
.await
}
/// look up [`Peer`] given [`ClusterId`] and [`DatanodeId`], only return if it's alive under given `lease_secs`
pub async fn lookup_flownode_peer(
cluster_id: ClusterId,
flownode_id: FlownodeId,
meta_peer_client: &MetaPeerClientRef,
lease_secs: u64,
) -> Result<Option<Peer>> {
let lease_filter = build_lease_filter(lease_secs);
let lease_key = FlownodeLeaseKey {
cluster_id,
node_id: flownode_id,
};
let lease_key_bytes: Vec<u8> = lease_key.clone().try_into()?;
@@ -100,12 +107,13 @@ pub async fn lookup_flownode_peer(
/// Find all alive flownodes
pub async fn alive_flownodes(
cluster_id: ClusterId,
meta_peer_client: &MetaPeerClientRef,
lease_secs: u64,
) -> Result<HashMap<FlownodeLeaseKey, LeaseValue>> {
let predicate = build_lease_filter(lease_secs);
filter(
FlownodeLeaseKey::prefix_key_by_cluster(),
FlownodeLeaseKey::prefix_key_by_cluster(cluster_id),
meta_peer_client,
|v| predicate(v),
)
@@ -155,14 +163,22 @@ impl MetaPeerLookupService {
#[async_trait::async_trait]
impl PeerLookupService for MetaPeerLookupService {
async fn datanode(&self, id: DatanodeId) -> common_meta::error::Result<Option<Peer>> {
lookup_datanode_peer(id, &self.meta_peer_client, u64::MAX)
async fn datanode(
&self,
cluster_id: ClusterId,
id: DatanodeId,
) -> common_meta::error::Result<Option<Peer>> {
lookup_datanode_peer(cluster_id, id, &self.meta_peer_client, u64::MAX)
.await
.map_err(BoxedError::new)
.context(common_meta::error::ExternalSnafu)
}
async fn flownode(&self, id: FlownodeId) -> common_meta::error::Result<Option<Peer>> {
lookup_flownode_peer(id, &self.meta_peer_client, u64::MAX)
async fn flownode(
&self,
cluster_id: ClusterId,
id: FlownodeId,
) -> common_meta::error::Result<Option<Peer>> {
lookup_flownode_peer(cluster_id, id, &self.meta_peer_client, u64::MAX)
.await
.map_err(BoxedError::new)
.context(common_meta::error::ExternalSnafu)

View File

@@ -26,7 +26,6 @@ use common_config::Configurable;
use common_greptimedb_telemetry::GreptimeDBTelemetryTask;
use common_meta::cache_invalidator::CacheInvalidatorRef;
use common_meta::ddl::ProcedureExecutorRef;
use common_meta::distributed_time_constants;
use common_meta::key::maintenance::MaintenanceModeManagerRef;
use common_meta::key::TableMetadataManagerRef;
use common_meta::kv_backend::{KvBackendRef, ResettableKvBackend, ResettableKvBackendRef};
@@ -37,6 +36,7 @@ use common_meta::node_expiry_listener::NodeExpiryListener;
use common_meta::peer::Peer;
use common_meta::region_keeper::MemoryRegionKeeperRef;
use common_meta::wal_options_allocator::WalOptionsAllocatorRef;
use common_meta::{distributed_time_constants, ClusterId};
use common_options::datanode::DatanodeClientOptions;
use common_procedure::options::ProcedureConfig;
use common_procedure::ProcedureManagerRef;
@@ -572,8 +572,13 @@ impl Metasrv {
}
/// Lookup a peer by peer_id, return it only when it's alive.
pub(crate) async fn lookup_peer(&self, peer_id: u64) -> Result<Option<Peer>> {
pub(crate) async fn lookup_peer(
&self,
cluster_id: ClusterId,
peer_id: u64,
) -> Result<Option<Peer>> {
lookup_datanode_peer(
cluster_id,
peer_id,
&self.meta_peer_client,
distributed_time_constants::DATANODE_LEASE_SECS,

View File

@@ -20,7 +20,7 @@ lazy_static! {
pub static ref METRIC_META_KV_REQUEST_ELAPSED: HistogramVec = register_histogram_vec!(
"greptime_meta_kv_request_elapsed",
"meta kv request",
&["target", "op"]
&["target", "op", "cluster_id"]
)
.unwrap();
/// The heartbeat connection gauge.

View File

@@ -39,6 +39,7 @@ use common_meta::key::{DeserializedValueWithBytes, TableMetadataManagerRef};
use common_meta::lock_key::{CatalogLock, RegionLock, SchemaLock, TableLock};
use common_meta::peer::Peer;
use common_meta::region_keeper::{MemoryRegionKeeperRef, OperatingRegionGuard};
use common_meta::ClusterId;
use common_procedure::error::{
Error as ProcedureError, FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu,
};
@@ -69,6 +70,8 @@ pub struct PersistentContext {
catalog: String,
/// The table schema.
schema: String,
/// The Id of the cluster.
cluster_id: ClusterId,
/// The [Peer] of migration source.
from_peer: Peer,
/// The [Peer] of migration destination.
@@ -270,11 +273,12 @@ impl Context {
/// The original failure detector was removed once the procedure was triggered.
/// Now, we need to register the failure detector for the failed region again.
pub async fn register_failure_detectors(&self) {
let cluster_id = self.persistent_ctx.cluster_id;
let datanode_id = self.persistent_ctx.from_peer.id;
let region_id = self.persistent_ctx.region_id;
self.region_failure_detector_controller
.register_failure_detectors(vec![(datanode_id, region_id)])
.register_failure_detectors(vec![(cluster_id, datanode_id, region_id)])
.await;
}
@@ -283,11 +287,12 @@ impl Context {
/// The original failure detectors was removed once the procedure was triggered.
/// However, the `from_peer` may still send the heartbeats contains the failed region.
pub async fn deregister_failure_detectors(&self) {
let cluster_id = self.persistent_ctx.cluster_id;
let datanode_id = self.persistent_ctx.from_peer.id;
let region_id = self.persistent_ctx.region_id;
self.region_failure_detector_controller
.deregister_failure_detectors(vec![(datanode_id, region_id)])
.deregister_failure_detectors(vec![(cluster_id, datanode_id, region_id)])
.await;
}
@@ -453,6 +458,7 @@ impl RegionMigrationProcedure {
} = serde_json::from_str(json).context(FromJsonSnafu)?;
let guard = tracker.insert_running_procedure(&RegionMigrationProcedureTask {
cluster_id: persistent_ctx.cluster_id,
region_id: persistent_ctx.region_id,
from_peer: persistent_ctx.from_peer.clone(),
to_peer: persistent_ctx.to_peer.clone(),
@@ -574,6 +580,7 @@ mod tests {
use common_meta::key::test_utils::new_test_table_info;
use common_meta::rpc::router::{Region, RegionRoute};
use super::migration_end::RegionMigrationEnd;
use super::update_metadata::UpdateMetadata;
use super::*;
use crate::handler::HeartbeatMailbox;
@@ -613,7 +620,7 @@ mod tests {
let procedure = RegionMigrationProcedure::new(persistent_context, context, None);
let serialized = procedure.dump().unwrap();
let expected = r#"{"persistent_ctx":{"catalog":"greptime","schema":"public","from_peer":{"id":1,"addr":""},"to_peer":{"id":2,"addr":""},"region_id":4398046511105,"timeout":"10s"},"state":{"region_migration_state":"RegionMigrationStart"}}"#;
let expected = r#"{"persistent_ctx":{"catalog":"greptime","schema":"public","cluster_id":0,"from_peer":{"id":1,"addr":""},"to_peer":{"id":2,"addr":""},"region_id":4398046511105,"timeout":"10s"},"state":{"region_migration_state":"RegionMigrationStart"}}"#;
assert_eq!(expected, serialized);
}
@@ -621,7 +628,7 @@ mod tests {
fn test_backward_compatibility() {
let persistent_ctx = test_util::new_persistent_context(1, 2, RegionId::new(1024, 1));
// NOTES: Changes it will break backward compatibility.
let serialized = r#"{"catalog":"greptime","schema":"public","from_peer":{"id":1,"addr":""},"to_peer":{"id":2,"addr":""},"region_id":4398046511105}"#;
let serialized = r#"{"catalog":"greptime","schema":"public","cluster_id":0,"from_peer":{"id":1,"addr":""},"to_peer":{"id":2,"addr":""},"region_id":4398046511105}"#;
let deserialized: PersistentContext = serde_json::from_str(serialized).unwrap();
assert_eq!(persistent_ctx, deserialized);
@@ -633,8 +640,15 @@ mod tests {
#[async_trait::async_trait]
#[typetag::serde]
impl State for MockState {
async fn next(&mut self, _ctx: &mut Context) -> Result<(Box<dyn State>, Status)> {
Ok((Box::new(MockState), Status::done()))
async fn next(&mut self, ctx: &mut Context) -> Result<(Box<dyn State>, Status)> {
let pc = &mut ctx.persistent_ctx;
if pc.cluster_id == 2 {
Ok((Box::new(RegionMigrationEnd), Status::done()))
} else {
pc.cluster_id += 1;
Ok((Box::new(MockState), Status::executing(false)))
}
}
fn as_any(&self) -> &dyn Any {
@@ -678,6 +692,7 @@ mod tests {
for _ in 1..3 {
status = Some(procedure.execute(&ctx).await.unwrap());
}
assert_eq!(procedure.context.persistent_ctx.cluster_id, 2);
assert!(status.unwrap().is_done());
}

View File

@@ -62,6 +62,7 @@ impl CloseDowngradedRegion {
async fn build_close_region_instruction(&self, ctx: &mut Context) -> Result<Instruction> {
let pc = &ctx.persistent_ctx;
let downgrade_leader_datanode_id = pc.from_peer.id;
let cluster_id = pc.cluster_id;
let table_id = pc.region_id.table_id();
let region_number = pc.region_id.region_number();
let datanode_table_value = ctx.get_from_peer_datanode_table_value().await?;
@@ -69,6 +70,7 @@ impl CloseDowngradedRegion {
let RegionInfo { engine, .. } = datanode_table_value.region_info.clone();
Ok(Instruction::CloseRegion(RegionIdent {
cluster_id,
datanode_id: downgrade_leader_datanode_id,
table_id,
region_number,

View File

@@ -294,6 +294,7 @@ mod tests {
from_peer: Peer::empty(1),
to_peer: Peer::empty(2),
region_id: RegionId::new(1024, 1),
cluster_id: 0,
timeout: Duration::from_millis(1000),
}
}

View File

@@ -22,6 +22,7 @@ use common_meta::key::table_info::TableInfoValue;
use common_meta::key::table_route::TableRouteValue;
use common_meta::peer::Peer;
use common_meta::rpc::router::RegionRoute;
use common_meta::ClusterId;
use common_procedure::{watcher, ProcedureId, ProcedureManagerRef, ProcedureWithId};
use common_telemetry::{error, info};
use snafu::{ensure, OptionExt, ResultExt};
@@ -100,6 +101,7 @@ impl Drop for RegionMigrationProcedureGuard {
#[derive(Debug, Clone)]
pub struct RegionMigrationProcedureTask {
pub(crate) cluster_id: ClusterId,
pub(crate) region_id: RegionId,
pub(crate) from_peer: Peer,
pub(crate) to_peer: Peer,
@@ -107,8 +109,15 @@ pub struct RegionMigrationProcedureTask {
}
impl RegionMigrationProcedureTask {
pub fn new(region_id: RegionId, from_peer: Peer, to_peer: Peer, timeout: Duration) -> Self {
pub fn new(
cluster_id: ClusterId,
region_id: RegionId,
from_peer: Peer,
to_peer: Peer,
timeout: Duration,
) -> Self {
Self {
cluster_id,
region_id,
from_peer,
to_peer,
@@ -121,8 +130,8 @@ impl Display for RegionMigrationProcedureTask {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"region: {}, from_peer: {}, to_peer: {}",
self.region_id, self.from_peer, self.to_peer
"cluster: {}, region: {}, from_peer: {}, to_peer: {}",
self.cluster_id, self.region_id, self.from_peer, self.to_peer
)
}
}
@@ -322,6 +331,7 @@ impl RegionMigrationManager {
.with_label_values(&["desc", &task.to_peer.id.to_string()])
.inc();
let RegionMigrationProcedureTask {
cluster_id,
region_id,
from_peer,
to_peer,
@@ -331,6 +341,7 @@ impl RegionMigrationManager {
PersistentContext {
catalog: catalog_name,
schema: schema_name,
cluster_id,
region_id,
from_peer,
to_peer,
@@ -383,6 +394,7 @@ mod test {
let manager = RegionMigrationManager::new(env.procedure_manager().clone(), context_factory);
let region_id = RegionId::new(1024, 1);
let task = RegionMigrationProcedureTask {
cluster_id: 1,
region_id,
from_peer: Peer::empty(2),
to_peer: Peer::empty(1),
@@ -407,6 +419,7 @@ mod test {
let manager = RegionMigrationManager::new(env.procedure_manager().clone(), context_factory);
let region_id = RegionId::new(1024, 1);
let task = RegionMigrationProcedureTask {
cluster_id: 1,
region_id,
from_peer: Peer::empty(1),
to_peer: Peer::empty(1),
@@ -424,6 +437,7 @@ mod test {
let manager = RegionMigrationManager::new(env.procedure_manager().clone(), context_factory);
let region_id = RegionId::new(1024, 1);
let task = RegionMigrationProcedureTask {
cluster_id: 1,
region_id,
from_peer: Peer::empty(1),
to_peer: Peer::empty(2),
@@ -441,6 +455,7 @@ mod test {
let manager = RegionMigrationManager::new(env.procedure_manager().clone(), context_factory);
let region_id = RegionId::new(1024, 1);
let task = RegionMigrationProcedureTask {
cluster_id: 1,
region_id,
from_peer: Peer::empty(1),
to_peer: Peer::empty(2),
@@ -468,6 +483,7 @@ mod test {
let manager = RegionMigrationManager::new(env.procedure_manager().clone(), context_factory);
let region_id = RegionId::new(1024, 1);
let task = RegionMigrationProcedureTask {
cluster_id: 1,
region_id,
from_peer: Peer::empty(1),
to_peer: Peer::empty(2),
@@ -499,6 +515,7 @@ mod test {
let manager = RegionMigrationManager::new(env.procedure_manager().clone(), context_factory);
let region_id = RegionId::new(1024, 1);
let task = RegionMigrationProcedureTask {
cluster_id: 1,
region_id,
from_peer: Peer::empty(1),
to_peer: Peer::empty(2),
@@ -525,6 +542,7 @@ mod test {
let manager = RegionMigrationManager::new(env.procedure_manager().clone(), context_factory);
let region_id = RegionId::new(1024, 1);
let task = RegionMigrationProcedureTask {
cluster_id: 1,
region_id,
from_peer: Peer::empty(1),
to_peer: Peer::empty(2),

Some files were not shown because too many files have changed in this diff Show More