mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-27 00:19:58 +00:00
Compare commits
73 Commits
v0.3.0-nig
...
v0.3.0-alp
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f00484dff0 | ||
|
|
5004cf6d9a | ||
|
|
8e69aef973 | ||
|
|
2615718999 | ||
|
|
fe6e3daf81 | ||
|
|
b7e1778ada | ||
|
|
ccd666aa9b | ||
|
|
2aa442c86d | ||
|
|
f811ae4665 | ||
|
|
e5b6f8654a | ||
|
|
ff6d11ddc7 | ||
|
|
878c6bf75a | ||
|
|
ce440606a9 | ||
|
|
5fd7250dca | ||
|
|
5a5e88353c | ||
|
|
ef15de5f17 | ||
|
|
86adac1532 | ||
|
|
e7a410573b | ||
|
|
548f0d1e2a | ||
|
|
5467ea496f | ||
|
|
70e17ead68 | ||
|
|
ae8203fafa | ||
|
|
ac3666b841 | ||
|
|
0460f3ae30 | ||
|
|
9d179802b8 | ||
|
|
72b6bd11f7 | ||
|
|
6b08a5f94e | ||
|
|
00104bef76 | ||
|
|
ae81c7329d | ||
|
|
c5f6d7c99a | ||
|
|
bb1b71bcf0 | ||
|
|
a4b884406a | ||
|
|
ab5dfd31ec | ||
|
|
563ce59071 | ||
|
|
51b23664f7 | ||
|
|
9e21632f23 | ||
|
|
b27c569ae0 | ||
|
|
0eaae634fa | ||
|
|
8b9b5a0d3a | ||
|
|
78fab08b51 | ||
|
|
d072947ef2 | ||
|
|
4094907c09 | ||
|
|
0da94930d5 | ||
|
|
f0a519b71b | ||
|
|
89366ba939 | ||
|
|
c042723fc9 | ||
|
|
732784d3f8 | ||
|
|
332b3677ac | ||
|
|
6cd634b105 | ||
|
|
cd1ccb110b | ||
|
|
953793143b | ||
|
|
8a7998cd25 | ||
|
|
eb24bab5df | ||
|
|
8f9e9686fe | ||
|
|
61a32d1b9c | ||
|
|
74a6517bd0 | ||
|
|
fa4a497d75 | ||
|
|
ddca0307d1 | ||
|
|
3dc45f1c13 | ||
|
|
7c55783e53 | ||
|
|
5b304fa692 | ||
|
|
9f67ad8bce | ||
|
|
e646490d16 | ||
|
|
1225edb065 | ||
|
|
8e7ec4626b | ||
|
|
f64527da22 | ||
|
|
6dbceb1ad5 | ||
|
|
067c5ee7ce | ||
|
|
32ad358323 | ||
|
|
77497ca46a | ||
|
|
e5a215de46 | ||
|
|
e5aad0f607 | ||
|
|
edf6c0bf48 |
@@ -9,3 +9,9 @@ GT_OSS_BUCKET=OSS bucket
|
||||
GT_OSS_ACCESS_KEY_ID=OSS access key id
|
||||
GT_OSS_ACCESS_KEY=OSS access key
|
||||
GT_OSS_ENDPOINT=OSS endpoint
|
||||
# Settings for azblob test
|
||||
GT_AZBLOB_CONTAINER=AZBLOB container
|
||||
GT_AZBLOB_ACCOUNT_NAME=AZBLOB account name
|
||||
GT_AZBLOB_ACCOUNT_KEY=AZBLOB account key
|
||||
GT_AZBLOB_ENDPOINT=AZBLOB endpoint
|
||||
|
||||
|
||||
1
.github/workflows/develop.yml
vendored
1
.github/workflows/develop.yml
vendored
@@ -141,6 +141,7 @@ jobs:
|
||||
- name: Run sqlness
|
||||
run: cargo sqlness && ls /tmp
|
||||
- name: Upload sqlness logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: sqlness-logs
|
||||
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -1,6 +1,8 @@
|
||||
# Generated by Cargo
|
||||
# will have compiled files and executables
|
||||
/target/
|
||||
# also ignore if it's a symbolic link
|
||||
/target
|
||||
|
||||
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
|
||||
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
|
||||
@@ -39,3 +41,6 @@ benchmarks/data
|
||||
# dashboard files
|
||||
!/src/servers/dashboard/VERSION
|
||||
/src/servers/dashboard/*
|
||||
|
||||
# Vscode workspace
|
||||
*.code-workspace
|
||||
|
||||
1918
Cargo.lock
generated
1918
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
30
Cargo.toml
30
Cargo.toml
@@ -49,36 +49,38 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "0.2.0"
|
||||
version = "0.3.0"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
arrow = { version = "37.0" }
|
||||
arrow-array = "37.0"
|
||||
arrow-flight = "37.0"
|
||||
arrow-schema = { version = "37.0", features = ["serde"] }
|
||||
arrow = { version = "40.0" }
|
||||
arrow-array = "40.0"
|
||||
arrow-flight = "40.0"
|
||||
arrow-schema = { version = "40.0", features = ["serde"] }
|
||||
async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
# TODO(ruihang): use arrow-datafusion when it contains https://github.com/apache/arrow-datafusion/pull/6032
|
||||
datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b14f7a9ffe91257fc3d2a5d654f2a1a14a8fc793" }
|
||||
datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b14f7a9ffe91257fc3d2a5d654f2a1a14a8fc793" }
|
||||
datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b14f7a9ffe91257fc3d2a5d654f2a1a14a8fc793" }
|
||||
datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b14f7a9ffe91257fc3d2a5d654f2a1a14a8fc793" }
|
||||
datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b14f7a9ffe91257fc3d2a5d654f2a1a14a8fc793" }
|
||||
datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b14f7a9ffe91257fc3d2a5d654f2a1a14a8fc793" }
|
||||
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b14f7a9ffe91257fc3d2a5d654f2a1a14a8fc793" }
|
||||
datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
|
||||
datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
|
||||
datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
|
||||
datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
|
||||
datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
|
||||
datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
|
||||
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
parquet = "37.0"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "4398d20c56d5f7939cc2960789cb1fa7dd18e6fe" }
|
||||
itertools = "0.10"
|
||||
parquet = "40.0"
|
||||
paste = "1.0"
|
||||
prost = "0.11"
|
||||
rand = "0.8"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
sqlparser = "0.33"
|
||||
sqlparser = "0.34"
|
||||
tempfile = "3"
|
||||
tokio = { version = "1.28", features = ["full"] }
|
||||
tokio-util = { version = "0.7", features = ["io-util", "compat"] }
|
||||
|
||||
@@ -9,6 +9,6 @@ arrow.workspace = true
|
||||
clap = { version = "4.0", features = ["derive"] }
|
||||
client = { path = "../src/client" }
|
||||
indicatif = "0.17.1"
|
||||
itertools = "0.10.5"
|
||||
itertools.workspace = true
|
||||
parquet.workspace = true
|
||||
tokio.workspace = true
|
||||
|
||||
@@ -26,7 +26,9 @@ use arrow::datatypes::{DataType, Float64Type, Int64Type};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use clap::Parser;
|
||||
use client::api::v1::column::Values;
|
||||
use client::api::v1::{Column, ColumnDataType, ColumnDef, CreateTableExpr, InsertRequest};
|
||||
use client::api::v1::{
|
||||
Column, ColumnDataType, ColumnDef, CreateTableExpr, InsertRequest, InsertRequests,
|
||||
};
|
||||
use client::{Client, Database, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
|
||||
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
|
||||
@@ -107,8 +109,12 @@ async fn write_data(
|
||||
columns,
|
||||
row_count,
|
||||
};
|
||||
let requests = InsertRequests {
|
||||
inserts: vec![request],
|
||||
};
|
||||
|
||||
let now = Instant::now();
|
||||
db.insert(request).await.unwrap();
|
||||
db.insert(requests).await.unwrap();
|
||||
let elapsed = now.elapsed();
|
||||
total_rpc_elapsed_ms += elapsed.as_millis();
|
||||
progress_bar.inc(row_count as _);
|
||||
@@ -364,7 +370,7 @@ fn create_table_expr() -> CreateTableExpr {
|
||||
primary_keys: vec!["VendorID".to_string()],
|
||||
create_if_not_exists: false,
|
||||
table_options: Default::default(),
|
||||
region_ids: vec![0],
|
||||
region_numbers: vec![0],
|
||||
table_id: None,
|
||||
engine: "mito".to_string(),
|
||||
}
|
||||
|
||||
@@ -24,7 +24,8 @@ tcp_nodelay = true
|
||||
|
||||
# WAL options, see `standalone.example.toml`.
|
||||
[wal]
|
||||
dir = "/tmp/greptimedb/wal"
|
||||
# WAL data directory
|
||||
# dir = "/tmp/greptimedb/wal"
|
||||
file_size = "1GB"
|
||||
purge_threshold = "50GB"
|
||||
purge_interval = "10m"
|
||||
@@ -34,7 +35,9 @@ sync_write = false
|
||||
# Storage options, see `standalone.example.toml`.
|
||||
[storage]
|
||||
type = "File"
|
||||
data_dir = "/tmp/greptimedb/data/"
|
||||
data_home = "/tmp/greptimedb/"
|
||||
# TTL for all tables. Disabled by default.
|
||||
# global_ttl = "7d"
|
||||
|
||||
# Compaction options, see `standalone.example.toml`.
|
||||
[storage.compaction]
|
||||
@@ -58,15 +61,21 @@ checkpoint_on_startup = false
|
||||
max_flush_tasks = 8
|
||||
# Default write buffer size for a region.
|
||||
region_write_buffer_size = "32MB"
|
||||
# Interval to check whether a region needs flush.
|
||||
picker_schedule_interval = "5m"
|
||||
# Interval to auto flush a region if it has not flushed yet.
|
||||
auto_flush_interval = "1h"
|
||||
# Global write buffer size for all regions.
|
||||
global_write_buffer_size = "1GB"
|
||||
|
||||
# Procedure storage options, see `standalone.example.toml`.
|
||||
[procedure]
|
||||
max_retry_times = 3
|
||||
retry_delay = "500ms"
|
||||
|
||||
# Log options, see `standalone.example.toml`
|
||||
[logging]
|
||||
dir = "/tmp/greptimedb/logs"
|
||||
level = "info"
|
||||
# Log options
|
||||
# [logging]
|
||||
# Specify logs directory.
|
||||
# dir = "/tmp/greptimedb/logs"
|
||||
# Specify the log level [info | debug | error | warn]
|
||||
# level = "info"
|
||||
|
||||
@@ -58,6 +58,6 @@ connect_timeout_millis = 5000
|
||||
tcp_nodelay = true
|
||||
|
||||
# Log options, see `standalone.example.toml`
|
||||
[logging]
|
||||
dir = "/tmp/greptimedb/logs"
|
||||
level = "info"
|
||||
# [logging]
|
||||
# dir = "/tmp/greptimedb/logs"
|
||||
# level = "info"
|
||||
|
||||
@@ -15,6 +15,6 @@ selector = "LeaseBased"
|
||||
use_memory_store = false
|
||||
|
||||
# Log options, see `standalone.example.toml`
|
||||
[logging]
|
||||
dir = "/tmp/greptimedb/logs"
|
||||
level = "info"
|
||||
# [logging]
|
||||
# dir = "/tmp/greptimedb/logs"
|
||||
# level = "info"
|
||||
|
||||
@@ -78,8 +78,8 @@ addr = "127.0.0.1:4004"
|
||||
|
||||
# WAL options.
|
||||
[wal]
|
||||
# WAL data directory.
|
||||
dir = "/tmp/greptimedb/wal"
|
||||
# WAL data directory
|
||||
# dir = "/tmp/greptimedb/wal"
|
||||
# WAL file size in bytes.
|
||||
file_size = "1GB"
|
||||
# WAL purge threshold in bytes.
|
||||
@@ -96,7 +96,9 @@ sync_write = false
|
||||
# Storage type.
|
||||
type = "File"
|
||||
# Data directory, "/tmp/greptimedb/data" by default.
|
||||
data_dir = "/tmp/greptimedb/data/"
|
||||
data_home = "/tmp/greptimedb/"
|
||||
# TTL for all tables. Disabled by default.
|
||||
# global_ttl = "7d"
|
||||
|
||||
# Compaction options.
|
||||
[storage.compaction]
|
||||
@@ -123,8 +125,12 @@ checkpoint_on_startup = false
|
||||
max_flush_tasks = 8
|
||||
# Default write buffer size for a region.
|
||||
region_write_buffer_size = "32MB"
|
||||
# Interval to check whether a region needs flush.
|
||||
picker_schedule_interval = "5m"
|
||||
# Interval to auto flush a region if it has not flushed yet.
|
||||
auto_flush_interval = "1h"
|
||||
# Global write buffer size for all regions.
|
||||
global_write_buffer_size = "1GB"
|
||||
|
||||
# Procedure storage options.
|
||||
[procedure]
|
||||
@@ -134,8 +140,8 @@ max_retry_times = 3
|
||||
retry_delay = "500ms"
|
||||
|
||||
# Log options
|
||||
[logging]
|
||||
# [logging]
|
||||
# Specify logs directory.
|
||||
dir = "/tmp/greptimedb/logs"
|
||||
# dir = "/tmp/greptimedb/logs"
|
||||
# Specify the log level [info | debug | error | warn]
|
||||
level = "debug"
|
||||
# level = "info"
|
||||
|
||||
@@ -12,7 +12,9 @@ RUN apt-get update && apt-get install -y \
|
||||
pkg-config \
|
||||
python3 \
|
||||
python3-dev \
|
||||
&& pip install pyarrow
|
||||
python3-pip \
|
||||
&& pip3 install --upgrade pip \
|
||||
&& pip3 install pyarrow
|
||||
|
||||
# Install Rust.
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
@@ -10,7 +10,7 @@ common-base = { path = "../common/base" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-time = { path = "../common/time" }
|
||||
datatypes = { path = "../datatypes" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "6bfb02057c40da0e397c0cb4f6b87bd769669d50" }
|
||||
greptime-proto.workspace = true
|
||||
prost.workspace = true
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
tonic.workspace = true
|
||||
|
||||
@@ -18,6 +18,10 @@ use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::types::TimestampType;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use greptime_proto::v1::ddl_request::Expr;
|
||||
use greptime_proto::v1::greptime_request::Request;
|
||||
use greptime_proto::v1::query_request::Query;
|
||||
use greptime_proto::v1::{DdlRequest, QueryRequest};
|
||||
use snafu::prelude::*;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
@@ -224,6 +228,38 @@ pub fn push_vals(column: &mut Column, origin_count: usize, vector: VectorRef) {
|
||||
column.null_mask = null_mask.into_vec();
|
||||
}
|
||||
|
||||
/// Returns the type name of the [Request].
|
||||
pub fn request_type(request: &Request) -> &'static str {
|
||||
match request {
|
||||
Request::Inserts(_) => "inserts",
|
||||
Request::Query(query_req) => query_request_type(query_req),
|
||||
Request::Ddl(ddl_req) => ddl_request_type(ddl_req),
|
||||
Request::Delete(_) => "delete",
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the type name of the [QueryRequest].
|
||||
fn query_request_type(request: &QueryRequest) -> &'static str {
|
||||
match request.query {
|
||||
Some(Query::Sql(_)) => "query.sql",
|
||||
Some(Query::LogicalPlan(_)) => "query.logical_plan",
|
||||
Some(Query::PromRangeQuery(_)) => "query.prom_range",
|
||||
None => "query.empty",
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the type name of the [DdlRequest].
|
||||
fn ddl_request_type(request: &DdlRequest) -> &'static str {
|
||||
match request.expr {
|
||||
Some(Expr::CreateDatabase(_)) => "ddl.create_database",
|
||||
Some(Expr::CreateTable(_)) => "ddl.create_table",
|
||||
Some(Expr::Alter(_)) => "ddl.alter",
|
||||
Some(Expr::DropTable(_)) => "ddl.drop_table",
|
||||
Some(Expr::FlushTable(_)) => "ddl.flush_table",
|
||||
None => "ddl.empty",
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -23,4 +23,5 @@ pub mod prometheus {
|
||||
|
||||
pub mod v1;
|
||||
|
||||
pub use greptime_proto;
|
||||
pub use prost::DecodeError;
|
||||
|
||||
@@ -4,6 +4,9 @@ version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[features]
|
||||
testing = []
|
||||
|
||||
[dependencies]
|
||||
api = { path = "../api" }
|
||||
arc-swap = "1.0"
|
||||
@@ -14,6 +17,7 @@ backoff = { version = "0.4", features = ["tokio"] }
|
||||
common-catalog = { path = "../common/catalog" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-grpc = { path = "../common/grpc" }
|
||||
common-meta = { path = "../common/meta" }
|
||||
common-query = { path = "../common/query" }
|
||||
common-recordbatch = { path = "../common/recordbatch" }
|
||||
common-runtime = { path = "../common/runtime" }
|
||||
@@ -28,6 +32,7 @@ key-lock = "0.1"
|
||||
lazy_static = "1.4"
|
||||
meta-client = { path = "../meta-client" }
|
||||
metrics.workspace = true
|
||||
moka = { version = "0.11", features = ["future"] }
|
||||
parking_lot = "0.12"
|
||||
regex = "1.6"
|
||||
serde = "1.0"
|
||||
@@ -35,10 +40,12 @@ serde_json = "1.0"
|
||||
session = { path = "../session" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
storage = { path = "../storage" }
|
||||
store-api = { path = "../store-api" }
|
||||
table = { path = "../table" }
|
||||
tokio.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
catalog = { path = ".", features = ["testing"] }
|
||||
common-test-util = { path = "../common/test-util" }
|
||||
chrono.workspace = true
|
||||
log-store = { path = "../log-store" }
|
||||
|
||||
@@ -186,12 +186,6 @@ pub enum Error {
|
||||
source: table::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failure during SchemaProvider operation, source: {}", source))]
|
||||
SchemaProviderOperation {
|
||||
#[snafu(backtrace)]
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("{source}"))]
|
||||
Internal {
|
||||
#[snafu(backtrace)]
|
||||
@@ -221,30 +215,9 @@ pub enum Error {
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to serialize or deserialize catalog entry: {}", source))]
|
||||
CatalogEntrySerde {
|
||||
#[snafu(backtrace)]
|
||||
source: common_catalog::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Illegal access to catalog: {} and schema: {}", catalog, schema))]
|
||||
QueryAccessDenied { catalog: String, schema: String },
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to get region stats, catalog: {}, schema: {}, table: {}, source: {}",
|
||||
catalog,
|
||||
schema,
|
||||
table,
|
||||
source
|
||||
))]
|
||||
RegionStats {
|
||||
catalog: String,
|
||||
schema: String,
|
||||
table: String,
|
||||
#[snafu(backtrace)]
|
||||
source: table::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid system table definition: {err_msg}"))]
|
||||
InvalidSystemTableDef { err_msg: String, location: Location },
|
||||
|
||||
@@ -260,6 +233,9 @@ pub enum Error {
|
||||
#[snafu(backtrace)]
|
||||
source: table::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("A generic error has occurred, msg: {}", msg))]
|
||||
Generic { msg: String, location: Location },
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -280,14 +256,12 @@ impl ErrorExt for Error {
|
||||
| Error::EmptyValue { .. }
|
||||
| Error::ValueDeserialize { .. } => StatusCode::StorageUnavailable,
|
||||
|
||||
Error::SystemCatalogTypeMismatch { .. } => StatusCode::Internal,
|
||||
Error::Generic { .. } | Error::SystemCatalogTypeMismatch { .. } => StatusCode::Internal,
|
||||
|
||||
Error::ReadSystemCatalog { source, .. } | Error::CreateRecordBatch { source } => {
|
||||
source.status_code()
|
||||
}
|
||||
Error::InvalidCatalogValue { source, .. } | Error::CatalogEntrySerde { source } => {
|
||||
source.status_code()
|
||||
}
|
||||
Error::InvalidCatalogValue { source, .. } => source.status_code(),
|
||||
|
||||
Error::TableExists { .. } => StatusCode::TableAlreadyExists,
|
||||
Error::TableNotExist { .. } => StatusCode::TableNotFound,
|
||||
@@ -301,7 +275,6 @@ impl ErrorExt for Error {
|
||||
| Error::OpenTable { source, .. }
|
||||
| Error::CreateTable { source, .. }
|
||||
| Error::DeregisterTable { source, .. }
|
||||
| Error::RegionStats { source, .. }
|
||||
| Error::TableSchemaMismatch { source } => source.status_code(),
|
||||
|
||||
Error::MetaSrv { source, .. } => source.status_code(),
|
||||
@@ -309,9 +282,9 @@ impl ErrorExt for Error {
|
||||
Error::SystemCatalogTableScanExec { source } => source.status_code(),
|
||||
Error::InvalidTableInfoInCatalog { source } => source.status_code(),
|
||||
|
||||
Error::CompileScriptInternal { source }
|
||||
| Error::SchemaProviderOperation { source }
|
||||
| Error::Internal { source } => source.status_code(),
|
||||
Error::CompileScriptInternal { source } | Error::Internal { source } => {
|
||||
source.status_code()
|
||||
}
|
||||
|
||||
Error::Unimplemented { .. } | Error::NotSupported { .. } => StatusCode::Unsupported,
|
||||
Error::QueryAccessDenied { .. } => StatusCode::AccessDenied,
|
||||
|
||||
@@ -19,13 +19,19 @@ use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use datafusion::datasource::streaming::{PartitionStream, StreamingTable};
|
||||
use common_error::prelude::BoxedError;
|
||||
use common_query::physical_plan::PhysicalPlanRef;
|
||||
use common_query::prelude::Expr;
|
||||
use common_recordbatch::{RecordBatchStreamAdaptor, SendableRecordBatchStream};
|
||||
use datatypes::schema::SchemaRef;
|
||||
use futures_util::StreamExt;
|
||||
use snafu::ResultExt;
|
||||
use table::table::adapter::TableAdapter;
|
||||
use table::TableRef;
|
||||
use store_api::storage::ScanRequest;
|
||||
use table::error::{SchemaConversionSnafu, TablesRecordBatchSnafu};
|
||||
use table::{Result as TableResult, Table, TableRef};
|
||||
|
||||
use self::columns::InformationSchemaColumns;
|
||||
use crate::error::{DatafusionSnafu, Result, TableSchemaMismatchSnafu};
|
||||
use crate::error::Result;
|
||||
use crate::information_schema::tables::InformationSchemaTables;
|
||||
use crate::{CatalogProviderRef, SchemaProvider};
|
||||
|
||||
@@ -59,40 +65,21 @@ impl SchemaProvider for InformationSchemaProvider {
|
||||
}
|
||||
|
||||
async fn table(&self, name: &str) -> Result<Option<TableRef>> {
|
||||
let table = match name.to_ascii_lowercase().as_ref() {
|
||||
TABLES => {
|
||||
let inner = Arc::new(InformationSchemaTables::new(
|
||||
self.catalog_name.clone(),
|
||||
self.catalog_provider.clone(),
|
||||
));
|
||||
Arc::new(
|
||||
StreamingTable::try_new(inner.schema().clone(), vec![inner]).with_context(
|
||||
|_| DatafusionSnafu {
|
||||
msg: format!("Failed to get InformationSchema table '{name}'"),
|
||||
},
|
||||
)?,
|
||||
)
|
||||
}
|
||||
COLUMNS => {
|
||||
let inner = Arc::new(InformationSchemaColumns::new(
|
||||
self.catalog_name.clone(),
|
||||
self.catalog_provider.clone(),
|
||||
));
|
||||
Arc::new(
|
||||
StreamingTable::try_new(inner.schema().clone(), vec![inner]).with_context(
|
||||
|_| DatafusionSnafu {
|
||||
msg: format!("Failed to get InformationSchema table '{name}'"),
|
||||
},
|
||||
)?,
|
||||
)
|
||||
}
|
||||
let stream_builder = match name.to_ascii_lowercase().as_ref() {
|
||||
TABLES => Arc::new(InformationSchemaTables::new(
|
||||
self.catalog_name.clone(),
|
||||
self.catalog_provider.clone(),
|
||||
)) as _,
|
||||
COLUMNS => Arc::new(InformationSchemaColumns::new(
|
||||
self.catalog_name.clone(),
|
||||
self.catalog_provider.clone(),
|
||||
)) as _,
|
||||
_ => {
|
||||
return Ok(None);
|
||||
}
|
||||
};
|
||||
|
||||
let table = TableAdapter::new(table).context(TableSchemaMismatchSnafu)?;
|
||||
Ok(Some(Arc::new(table)))
|
||||
Ok(Some(Arc::new(InformationTable::new(stream_builder))))
|
||||
}
|
||||
|
||||
async fn table_exist(&self, name: &str) -> Result<bool> {
|
||||
@@ -100,3 +87,83 @@ impl SchemaProvider for InformationSchemaProvider {
|
||||
Ok(self.tables.contains(&normalized_name))
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(ruihang): make it a more generic trait:
|
||||
// https://github.com/GreptimeTeam/greptimedb/pull/1639#discussion_r1205001903
|
||||
pub trait InformationStreamBuilder: Send + Sync {
|
||||
fn to_stream(&self) -> Result<SendableRecordBatchStream>;
|
||||
|
||||
fn schema(&self) -> SchemaRef;
|
||||
}
|
||||
|
||||
pub struct InformationTable {
|
||||
stream_builder: Arc<dyn InformationStreamBuilder>,
|
||||
}
|
||||
|
||||
impl InformationTable {
|
||||
pub fn new(stream_builder: Arc<dyn InformationStreamBuilder>) -> Self {
|
||||
Self { stream_builder }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Table for InformationTable {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.stream_builder.schema()
|
||||
}
|
||||
|
||||
fn table_info(&self) -> table::metadata::TableInfoRef {
|
||||
unreachable!("Should not call table_info() of InformationTable directly")
|
||||
}
|
||||
|
||||
/// Scan the table and returns a SendableRecordBatchStream.
|
||||
async fn scan(
|
||||
&self,
|
||||
_projection: Option<&Vec<usize>>,
|
||||
_filters: &[Expr],
|
||||
// limit can be used to reduce the amount scanned
|
||||
// from the datasource as a performance optimization.
|
||||
// If set, it contains the amount of rows needed by the `LogicalPlan`,
|
||||
// The datasource should return *at least* this number of rows if available.
|
||||
_limit: Option<usize>,
|
||||
) -> TableResult<PhysicalPlanRef> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn scan_to_stream(&self, request: ScanRequest) -> TableResult<SendableRecordBatchStream> {
|
||||
let projection = request.projection;
|
||||
let projected_schema = if let Some(projection) = &projection {
|
||||
Arc::new(
|
||||
self.schema()
|
||||
.try_project(projection)
|
||||
.context(SchemaConversionSnafu)?,
|
||||
)
|
||||
} else {
|
||||
self.schema()
|
||||
};
|
||||
let stream = self
|
||||
.stream_builder
|
||||
.to_stream()
|
||||
.map_err(BoxedError::new)
|
||||
.context(TablesRecordBatchSnafu)?
|
||||
.map(move |batch| {
|
||||
batch.and_then(|batch| {
|
||||
if let Some(projection) = &projection {
|
||||
batch.try_project(projection)
|
||||
} else {
|
||||
Ok(batch)
|
||||
}
|
||||
})
|
||||
});
|
||||
let stream = RecordBatchStreamAdaptor {
|
||||
schema: projected_schema,
|
||||
stream: Box::pin(stream),
|
||||
output_ordering: None,
|
||||
};
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,8 +18,10 @@ use arrow_schema::SchemaRef as ArrowSchemaRef;
|
||||
use common_catalog::consts::{
|
||||
SEMANTIC_TYPE_FIELD, SEMANTIC_TYPE_PRIMARY_KEY, SEMANTIC_TYPE_TIME_INDEX,
|
||||
};
|
||||
use common_error::prelude::BoxedError;
|
||||
use common_query::physical_plan::TaskContext;
|
||||
use common_recordbatch::RecordBatch;
|
||||
use common_recordbatch::adapter::RecordBatchStreamAdapter;
|
||||
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
|
||||
use datafusion::datasource::streaming::PartitionStream as DfPartitionStream;
|
||||
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
|
||||
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
@@ -29,7 +31,8 @@ use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::vectors::{StringVectorBuilder, VectorRef};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{CreateRecordBatchSnafu, Result};
|
||||
use super::InformationStreamBuilder;
|
||||
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
|
||||
use crate::CatalogProviderRef;
|
||||
|
||||
pub(super) struct InformationSchemaColumns {
|
||||
@@ -71,6 +74,32 @@ impl InformationSchemaColumns {
|
||||
}
|
||||
}
|
||||
|
||||
impl InformationStreamBuilder for InformationSchemaColumns {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn to_stream(&self) -> Result<SendableRecordBatchStream> {
|
||||
let schema = self.schema.arrow_schema().clone();
|
||||
let mut builder = self.builder();
|
||||
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
futures::stream::once(async move {
|
||||
builder
|
||||
.make_tables()
|
||||
.await
|
||||
.map(|x| x.into_df_record_batch())
|
||||
.map_err(Into::into)
|
||||
}),
|
||||
));
|
||||
Ok(Box::pin(
|
||||
RecordBatchStreamAdapter::try_new(stream)
|
||||
.map_err(BoxedError::new)
|
||||
.context(InternalSnafu)?,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
struct InformationSchemaColumnsBuilder {
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
@@ -168,7 +197,7 @@ impl DfPartitionStream for InformationSchemaColumns {
|
||||
}
|
||||
|
||||
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
|
||||
let schema = self.schema().clone();
|
||||
let schema = self.schema.arrow_schema().clone();
|
||||
let mut builder = self.builder();
|
||||
Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
|
||||
@@ -16,8 +16,10 @@ use std::sync::Arc;
|
||||
|
||||
use arrow_schema::SchemaRef as ArrowSchemaRef;
|
||||
use common_catalog::consts::INFORMATION_SCHEMA_NAME;
|
||||
use common_error::prelude::BoxedError;
|
||||
use common_query::physical_plan::TaskContext;
|
||||
use common_recordbatch::RecordBatch;
|
||||
use common_recordbatch::adapter::RecordBatchStreamAdapter;
|
||||
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
|
||||
use datafusion::datasource::streaming::PartitionStream as DfPartitionStream;
|
||||
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
|
||||
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
@@ -27,7 +29,8 @@ use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder};
|
||||
use snafu::ResultExt;
|
||||
use table::metadata::TableType;
|
||||
|
||||
use crate::error::{CreateRecordBatchSnafu, Result};
|
||||
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
|
||||
use crate::information_schema::InformationStreamBuilder;
|
||||
use crate::CatalogProviderRef;
|
||||
|
||||
pub(super) struct InformationSchemaTables {
|
||||
@@ -62,6 +65,32 @@ impl InformationSchemaTables {
|
||||
}
|
||||
}
|
||||
|
||||
impl InformationStreamBuilder for InformationSchemaTables {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn to_stream(&self) -> Result<SendableRecordBatchStream> {
|
||||
let schema = self.schema.arrow_schema().clone();
|
||||
let mut builder = self.builder();
|
||||
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
futures::stream::once(async move {
|
||||
builder
|
||||
.make_tables()
|
||||
.await
|
||||
.map(|x| x.into_df_record_batch())
|
||||
.map_err(Into::into)
|
||||
}),
|
||||
));
|
||||
Ok(Box::pin(
|
||||
RecordBatchStreamAdapter::try_new(stream)
|
||||
.map_err(BoxedError::new)
|
||||
.context(InternalSnafu)?,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds the `information_schema.TABLE` table row by row
|
||||
///
|
||||
/// Columns are based on <https://www.postgresql.org/docs/current/infoschema-columns.html>
|
||||
@@ -160,7 +189,7 @@ impl DfPartitionStream for InformationSchemaTables {
|
||||
}
|
||||
|
||||
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
|
||||
let schema = self.schema().clone();
|
||||
let schema = self.schema.arrow_schema().clone();
|
||||
let mut builder = self.builder();
|
||||
Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
|
||||
@@ -12,9 +12,11 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#![feature(trait_upcasting)]
|
||||
#![feature(assert_matches)]
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -244,6 +246,8 @@ pub async fn datanode_stat(catalog_manager: &CatalogManagerRef) -> (u64, Vec<Reg
|
||||
let region_numbers = &table.table_info().meta.region_numbers;
|
||||
region_number += region_numbers.len() as u64;
|
||||
|
||||
let engine = &table.table_info().meta.engine;
|
||||
|
||||
match table.region_stats() {
|
||||
Ok(stats) => {
|
||||
let stats = stats.into_iter().map(|stat| RegionStat {
|
||||
@@ -254,6 +258,7 @@ pub async fn datanode_stat(catalog_manager: &CatalogManagerRef) -> (u64, Vec<Reg
|
||||
table_name: table_name.clone(),
|
||||
}),
|
||||
approximate_bytes: stat.disk_usage_bytes as i64,
|
||||
attrs: HashMap::from([("engine_name".to_owned(), engine.clone())]),
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
|
||||
@@ -20,6 +20,9 @@ pub(crate) const METRIC_CATALOG_MANAGER_CATALOG_COUNT: &str = "catalog.catalog_c
|
||||
pub(crate) const METRIC_CATALOG_MANAGER_SCHEMA_COUNT: &str = "catalog.schema_count";
|
||||
pub(crate) const METRIC_CATALOG_MANAGER_TABLE_COUNT: &str = "catalog.table_count";
|
||||
|
||||
pub(crate) const METRIC_CATALOG_KV_REMOTE_GET: &str = "catalog.kv.get.remote";
|
||||
pub(crate) const METRIC_CATALOG_KV_GET: &str = "catalog.kv.get";
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn db_label(catalog: &str, schema: &str) -> (&'static str, String) {
|
||||
(METRIC_DB_LABEL, build_db_string(catalog, schema))
|
||||
|
||||
@@ -12,11 +12,12 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::fmt::Debug;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub use client::MetaKvBackend;
|
||||
pub use client::CachedMetaKvBackend;
|
||||
use futures::Stream;
|
||||
use futures_util::StreamExt;
|
||||
pub use manager::{RemoteCatalogManager, RemoteCatalogProvider, RemoteSchemaProvider};
|
||||
@@ -26,6 +27,9 @@ use crate::error::Error;
|
||||
mod client;
|
||||
mod manager;
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
pub mod mock;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Kv(pub Vec<u8>, pub Vec<u8>);
|
||||
|
||||
@@ -70,10 +74,22 @@ pub trait KvBackend: Send + Sync {
|
||||
}
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
/// MoveValue atomically renames the key to the given updated key.
|
||||
async fn move_value(&self, from_key: &[u8], to_key: &[u8]) -> Result<(), Error>;
|
||||
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
}
|
||||
|
||||
pub type KvBackendRef = Arc<dyn KvBackend>;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait KvCacheInvalidator: Send + Sync {
|
||||
async fn invalidate_key(&self, key: &[u8]);
|
||||
}
|
||||
|
||||
pub type KvCacheInvalidatorRef = Arc<dyn KvCacheInvalidator>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use async_stream::stream;
|
||||
@@ -114,17 +130,29 @@ mod tests {
|
||||
async fn delete_range(&self, _key: &[u8], _end: &[u8]) -> Result<(), Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn move_value(&self, _from_key: &[u8], _to_key: &[u8]) -> Result<(), Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_get() {
|
||||
let backend = MockKvBackend {};
|
||||
|
||||
let result = backend.get(0.to_string().as_bytes()).await;
|
||||
assert_eq!(0.to_string().as_bytes(), result.unwrap().unwrap().0);
|
||||
|
||||
let result = backend.get(1.to_string().as_bytes()).await;
|
||||
assert_eq!(1.to_string().as_bytes(), result.unwrap().unwrap().0);
|
||||
|
||||
let result = backend.get(2.to_string().as_bytes()).await;
|
||||
assert_eq!(2.to_string().as_bytes(), result.unwrap().unwrap().0);
|
||||
|
||||
let result = backend.get(3.to_string().as_bytes()).await;
|
||||
assert!(result.unwrap().is_none());
|
||||
}
|
||||
|
||||
@@ -12,17 +12,149 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::fmt::Debug;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_stream::stream;
|
||||
use common_telemetry::info;
|
||||
use common_meta::rpc::store::{
|
||||
CompareAndPutRequest, DeleteRangeRequest, MoveValueRequest, PutRequest, RangeRequest,
|
||||
};
|
||||
use common_telemetry::{info, timer};
|
||||
use meta_client::client::MetaClient;
|
||||
use meta_client::rpc::{CompareAndPutRequest, DeleteRangeRequest, PutRequest, RangeRequest};
|
||||
use moka::future::{Cache, CacheBuilder};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{Error, MetaSrvSnafu};
|
||||
use crate::remote::{Kv, KvBackend, ValueIter};
|
||||
use super::KvCacheInvalidator;
|
||||
use crate::error::{Error, GenericSnafu, MetaSrvSnafu, Result};
|
||||
use crate::metrics::{METRIC_CATALOG_KV_GET, METRIC_CATALOG_KV_REMOTE_GET};
|
||||
use crate::remote::{Kv, KvBackend, KvBackendRef, ValueIter};
|
||||
|
||||
const CACHE_MAX_CAPACITY: u64 = 10000;
|
||||
const CACHE_TTL_SECOND: u64 = 10 * 60;
|
||||
const CACHE_TTI_SECOND: u64 = 5 * 60;
|
||||
|
||||
pub type CacheBackendRef = Arc<Cache<Vec<u8>, Option<Kv>>>;
|
||||
pub struct CachedMetaKvBackend {
|
||||
kv_backend: KvBackendRef,
|
||||
cache: CacheBackendRef,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl KvBackend for CachedMetaKvBackend {
|
||||
fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
|
||||
where
|
||||
'a: 'b,
|
||||
{
|
||||
self.kv_backend.range(key)
|
||||
}
|
||||
|
||||
async fn get(&self, key: &[u8]) -> Result<Option<Kv>> {
|
||||
let _timer = timer!(METRIC_CATALOG_KV_GET);
|
||||
|
||||
let init = async {
|
||||
let _timer = timer!(METRIC_CATALOG_KV_REMOTE_GET);
|
||||
|
||||
self.kv_backend.get(key).await
|
||||
};
|
||||
|
||||
let schema_provider = self.cache.try_get_with_by_ref(key, init).await;
|
||||
schema_provider.map_err(|e| GenericSnafu { msg: e.to_string() }.build())
|
||||
}
|
||||
|
||||
async fn set(&self, key: &[u8], val: &[u8]) -> Result<()> {
|
||||
let ret = self.kv_backend.set(key, val).await;
|
||||
|
||||
if ret.is_ok() {
|
||||
self.invalidate_key(key).await;
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
async fn delete(&self, key: &[u8]) -> Result<()> {
|
||||
let ret = self.kv_backend.delete_range(key, &[]).await;
|
||||
|
||||
if ret.is_ok() {
|
||||
self.invalidate_key(key).await;
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
async fn delete_range(&self, _key: &[u8], _end: &[u8]) -> Result<()> {
|
||||
// TODO(fys): implement it
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn compare_and_set(
|
||||
&self,
|
||||
key: &[u8],
|
||||
expect: &[u8],
|
||||
val: &[u8],
|
||||
) -> Result<std::result::Result<(), Option<Vec<u8>>>> {
|
||||
let ret = self.kv_backend.compare_and_set(key, expect, val).await;
|
||||
|
||||
if ret.is_ok() {
|
||||
self.invalidate_key(key).await;
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
async fn move_value(&self, from_key: &[u8], to_key: &[u8]) -> Result<()> {
|
||||
let ret = self.kv_backend.move_value(from_key, to_key).await;
|
||||
|
||||
if ret.is_ok() {
|
||||
self.invalidate_key(from_key).await;
|
||||
self.invalidate_key(to_key).await;
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl KvCacheInvalidator for CachedMetaKvBackend {
|
||||
async fn invalidate_key(&self, key: &[u8]) {
|
||||
self.cache.invalidate(key).await
|
||||
}
|
||||
}
|
||||
|
||||
impl CachedMetaKvBackend {
|
||||
pub fn new(client: Arc<MetaClient>) -> Self {
|
||||
let cache = Arc::new(
|
||||
CacheBuilder::new(CACHE_MAX_CAPACITY)
|
||||
.time_to_live(Duration::from_secs(CACHE_TTL_SECOND))
|
||||
.time_to_idle(Duration::from_secs(CACHE_TTI_SECOND))
|
||||
.build(),
|
||||
);
|
||||
let kv_backend = Arc::new(MetaKvBackend { client });
|
||||
|
||||
Self { kv_backend, cache }
|
||||
}
|
||||
|
||||
pub fn wrap(kv_backend: KvBackendRef) -> Self {
|
||||
let cache = Arc::new(
|
||||
CacheBuilder::new(CACHE_MAX_CAPACITY)
|
||||
.time_to_live(Duration::from_secs(CACHE_TTL_SECOND))
|
||||
.time_to_idle(Duration::from_secs(CACHE_TTI_SECOND))
|
||||
.build(),
|
||||
);
|
||||
|
||||
Self { kv_backend, cache }
|
||||
}
|
||||
|
||||
pub fn cache(&self) -> &CacheBackendRef {
|
||||
&self.cache
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MetaKvBackend {
|
||||
pub client: Arc<MetaClient>,
|
||||
@@ -51,7 +183,7 @@ impl KvBackend for MetaKvBackend {
|
||||
}))
|
||||
}
|
||||
|
||||
async fn get(&self, key: &[u8]) -> Result<Option<Kv>, Error> {
|
||||
async fn get(&self, key: &[u8]) -> Result<Option<Kv>> {
|
||||
let mut response = self
|
||||
.client
|
||||
.range(RangeRequest::new().with_key(key))
|
||||
@@ -63,7 +195,7 @@ impl KvBackend for MetaKvBackend {
|
||||
.map(|kv| Kv(kv.take_key(), kv.take_value())))
|
||||
}
|
||||
|
||||
async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), Error> {
|
||||
async fn set(&self, key: &[u8], val: &[u8]) -> Result<()> {
|
||||
let req = PutRequest::new()
|
||||
.with_key(key.to_vec())
|
||||
.with_value(val.to_vec());
|
||||
@@ -71,7 +203,7 @@ impl KvBackend for MetaKvBackend {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error> {
|
||||
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<()> {
|
||||
let req = DeleteRangeRequest::new().with_range(key.to_vec(), end.to_vec());
|
||||
let resp = self.client.delete_range(req).await.context(MetaSrvSnafu)?;
|
||||
info!(
|
||||
@@ -89,7 +221,7 @@ impl KvBackend for MetaKvBackend {
|
||||
key: &[u8],
|
||||
expect: &[u8],
|
||||
val: &[u8],
|
||||
) -> Result<Result<(), Option<Vec<u8>>>, Error> {
|
||||
) -> Result<std::result::Result<(), Option<Vec<u8>>>> {
|
||||
let request = CompareAndPutRequest::new()
|
||||
.with_key(key.to_vec())
|
||||
.with_expect(expect.to_vec())
|
||||
@@ -105,4 +237,14 @@ impl KvBackend for MetaKvBackend {
|
||||
Ok(Err(response.take_prev_kv().map(|v| v.value().to_vec())))
|
||||
}
|
||||
}
|
||||
|
||||
async fn move_value(&self, from_key: &[u8], to_key: &[u8]) -> Result<()> {
|
||||
let req = MoveValueRequest::new(from_key, to_key);
|
||||
self.client.move_value(req).await.context(MetaSrvSnafu)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::btree_map::Entry;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::fmt::{Display, Formatter};
|
||||
@@ -19,9 +20,6 @@ use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_stream::stream;
|
||||
use catalog::error::Error;
|
||||
use catalog::helper::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
|
||||
use catalog::remote::{Kv, KvBackend, ValueIter};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_recordbatch::RecordBatch;
|
||||
use common_telemetry::logging::info;
|
||||
@@ -36,6 +34,10 @@ use table::test_util::MemTable;
|
||||
use table::TableRef;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::helper::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
|
||||
use crate::remote::{Kv, KvBackend, ValueIter};
|
||||
|
||||
pub struct MockKvBackend {
|
||||
map: RwLock<BTreeMap<Vec<u8>, Vec<u8>>>,
|
||||
}
|
||||
@@ -139,14 +141,26 @@ impl KvBackend for MockKvBackend {
|
||||
}
|
||||
|
||||
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error> {
|
||||
let start = key.to_vec();
|
||||
let end = end.to_vec();
|
||||
let range = start..end;
|
||||
|
||||
let mut map = self.map.write().await;
|
||||
map.retain(|k, _| !range.contains(k));
|
||||
if end.is_empty() {
|
||||
let _ = map.remove(key);
|
||||
} else {
|
||||
let start = key.to_vec();
|
||||
let end = end.to_vec();
|
||||
let range = start..end;
|
||||
|
||||
map.retain(|k, _| !range.contains(k));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn move_value(&self, _from_key: &[u8], _to_key: &[u8]) -> Result<(), Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
@@ -30,12 +30,13 @@ use datatypes::schema::{ColumnSchema, RawSchema, SchemaRef};
|
||||
use datatypes::vectors::{BinaryVector, TimestampMillisecondVector, UInt8Vector};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::storage::ScanRequest;
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
use table::metadata::{TableId, TableInfoRef};
|
||||
use table::requests::{
|
||||
CreateTableRequest, DeleteRequest, InsertRequest, OpenTableRequest, TableOptions,
|
||||
};
|
||||
use table::{Table, TableRef};
|
||||
use table::{Result as TableResult, Table, TableRef};
|
||||
|
||||
use crate::error::{
|
||||
self, CreateSystemCatalogSnafu, EmptyValueSnafu, Error, InvalidEntryTypeSnafu, InvalidKeySnafu,
|
||||
@@ -68,8 +69,12 @@ impl Table for SystemCatalogTable {
|
||||
self.0.scan(projection, filters, limit).await
|
||||
}
|
||||
|
||||
async fn scan_to_stream(&self, request: ScanRequest) -> TableResult<SendableRecordBatchStream> {
|
||||
self.0.scan_to_stream(request).await
|
||||
}
|
||||
|
||||
/// Insert values into table.
|
||||
async fn insert(&self, request: InsertRequest) -> table::error::Result<usize> {
|
||||
async fn insert(&self, request: InsertRequest) -> TableResult<usize> {
|
||||
self.0.insert(request).await
|
||||
}
|
||||
|
||||
@@ -77,7 +82,7 @@ impl Table for SystemCatalogTable {
|
||||
self.0.table_info()
|
||||
}
|
||||
|
||||
async fn delete(&self, request: DeleteRequest) -> table::Result<usize> {
|
||||
async fn delete(&self, request: DeleteRequest) -> TableResult<usize> {
|
||||
self.0.delete(request).await
|
||||
}
|
||||
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
|
||||
#![feature(assert_matches)]
|
||||
|
||||
mod mock;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
@@ -23,8 +21,10 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use catalog::helper::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
|
||||
use catalog::remote::mock::{MockKvBackend, MockTableEngine};
|
||||
use catalog::remote::{
|
||||
KvBackend, KvBackendRef, RemoteCatalogManager, RemoteCatalogProvider, RemoteSchemaProvider,
|
||||
CachedMetaKvBackend, KvBackend, KvBackendRef, RemoteCatalogManager, RemoteCatalogProvider,
|
||||
RemoteSchemaProvider,
|
||||
};
|
||||
use catalog::{CatalogManager, RegisterTableRequest};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MITO_ENGINE};
|
||||
@@ -34,8 +34,6 @@ mod tests {
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
use table::requests::CreateTableRequest;
|
||||
|
||||
use crate::mock::{MockKvBackend, MockTableEngine};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_backend() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
@@ -76,6 +74,52 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cached_backend() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let backend = CachedMetaKvBackend::wrap(Arc::new(MockKvBackend::default()));
|
||||
|
||||
let default_catalog_key = CatalogKey {
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
}
|
||||
.to_string();
|
||||
|
||||
backend
|
||||
.set(
|
||||
default_catalog_key.as_bytes(),
|
||||
&CatalogValue {}.as_bytes().unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let ret = backend.get(b"__c-greptime").await.unwrap();
|
||||
assert!(ret.is_some());
|
||||
|
||||
let _ = backend
|
||||
.compare_and_set(
|
||||
b"__c-greptime",
|
||||
&CatalogValue {}.as_bytes().unwrap(),
|
||||
b"123",
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let ret = backend.get(b"__c-greptime").await.unwrap();
|
||||
assert!(ret.is_some());
|
||||
assert_eq!(&b"123"[..], &(ret.as_ref().unwrap().1));
|
||||
|
||||
let _ = backend.set(b"__c-greptime", b"1234").await;
|
||||
|
||||
let ret = backend.get(b"__c-greptime").await.unwrap();
|
||||
assert!(ret.is_some());
|
||||
assert_eq!(&b"1234"[..], &(ret.as_ref().unwrap().1));
|
||||
|
||||
backend.delete(b"__c-greptime").await.unwrap();
|
||||
|
||||
let ret = backend.get(b"__c-greptime").await.unwrap();
|
||||
assert!(ret.is_none());
|
||||
}
|
||||
|
||||
async fn prepare_components(
|
||||
node_id: u64,
|
||||
) -> (
|
||||
@@ -84,17 +128,22 @@ mod tests {
|
||||
Arc<RemoteCatalogManager>,
|
||||
TableEngineManagerRef,
|
||||
) {
|
||||
let backend = Arc::new(MockKvBackend::default()) as KvBackendRef;
|
||||
let cached_backend = Arc::new(CachedMetaKvBackend::wrap(
|
||||
Arc::new(MockKvBackend::default()),
|
||||
));
|
||||
|
||||
let table_engine = Arc::new(MockTableEngine::default());
|
||||
let engine_manager = Arc::new(MemoryTableEngineManager::alias(
|
||||
MITO_ENGINE.to_string(),
|
||||
table_engine.clone(),
|
||||
));
|
||||
|
||||
let catalog_manager =
|
||||
RemoteCatalogManager::new(engine_manager.clone(), node_id, backend.clone());
|
||||
RemoteCatalogManager::new(engine_manager.clone(), node_id, cached_backend.clone());
|
||||
catalog_manager.start().await.unwrap();
|
||||
|
||||
(
|
||||
backend,
|
||||
cached_backend,
|
||||
table_engine,
|
||||
Arc::new(catalog_manager),
|
||||
engine_manager as Arc<_>,
|
||||
|
||||
@@ -4,6 +4,9 @@ version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[features]
|
||||
testing = []
|
||||
|
||||
[dependencies]
|
||||
api = { path = "../api" }
|
||||
arrow-flight.workspace = true
|
||||
@@ -16,11 +19,13 @@ common-grpc-expr = { path = "../common/grpc-expr" }
|
||||
common-query = { path = "../common/query" }
|
||||
common-recordbatch = { path = "../common/recordbatch" }
|
||||
common-time = { path = "../common/time" }
|
||||
common-meta = { path = "../common/meta" }
|
||||
common-telemetry = { path = "../common/telemetry" }
|
||||
datafusion.workspace = true
|
||||
datatypes = { path = "../datatypes" }
|
||||
enum_dispatch = "0.3"
|
||||
futures-util.workspace = true
|
||||
moka = { version = "0.9", features = ["future"] }
|
||||
parking_lot = "0.12"
|
||||
prost.workspace = true
|
||||
rand.workspace = true
|
||||
|
||||
@@ -63,7 +63,7 @@ async fn run() {
|
||||
create_if_not_exists: false,
|
||||
table_options: Default::default(),
|
||||
table_id: Some(TableId { id: 1024 }),
|
||||
region_ids: vec![0],
|
||||
region_numbers: vec![0],
|
||||
engine: MITO_ENGINE.to_string(),
|
||||
};
|
||||
|
||||
|
||||
@@ -12,46 +12,61 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Duration;
|
||||
|
||||
use client::Client;
|
||||
use common_grpc::channel_manager::ChannelManager;
|
||||
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
|
||||
use common_meta::peer::Peer;
|
||||
use common_telemetry::info;
|
||||
use meta_client::rpc::Peer;
|
||||
use moka::future::{Cache, CacheBuilder};
|
||||
|
||||
use crate::Client;
|
||||
|
||||
pub struct DatanodeClients {
|
||||
channel_manager: ChannelManager,
|
||||
clients: Cache<Peer, Client>,
|
||||
started: bool,
|
||||
started: Arc<Mutex<bool>>,
|
||||
}
|
||||
|
||||
impl Default for DatanodeClients {
|
||||
fn default() -> Self {
|
||||
// TODO(LFC): Make this channel config configurable.
|
||||
let config = ChannelConfig::new().timeout(Duration::from_secs(8));
|
||||
|
||||
Self {
|
||||
channel_manager: ChannelManager::new(),
|
||||
channel_manager: ChannelManager::with_config(config),
|
||||
clients: CacheBuilder::new(1024)
|
||||
.time_to_live(Duration::from_secs(30 * 60))
|
||||
.time_to_idle(Duration::from_secs(5 * 60))
|
||||
.build(),
|
||||
started: false,
|
||||
started: Arc::new(Mutex::new(false)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for DatanodeClients {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("DatanodeClients")
|
||||
.field("channel_manager", &self.channel_manager)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl DatanodeClients {
|
||||
pub(crate) fn start(&mut self) {
|
||||
if self.started {
|
||||
pub fn start(&self) {
|
||||
let mut started = self.started.lock().unwrap();
|
||||
if *started {
|
||||
return;
|
||||
}
|
||||
|
||||
self.channel_manager.start_channel_recycle();
|
||||
|
||||
info!("Datanode clients manager is started!");
|
||||
self.started = true;
|
||||
*started = true;
|
||||
}
|
||||
|
||||
pub(crate) async fn get_client(&self, datanode: &Peer) -> Client {
|
||||
pub async fn get_client(&self, datanode: &Peer) -> Client {
|
||||
self.clients
|
||||
.get_with_by_ref(datanode, async move {
|
||||
Client::with_manager_and_urls(
|
||||
@@ -18,7 +18,7 @@ use api::v1::greptime_request::Request;
|
||||
use api::v1::query_request::Query;
|
||||
use api::v1::{
|
||||
greptime_response, AffectedRows, AlterExpr, AuthHeader, CreateTableExpr, DdlRequest,
|
||||
DeleteRequest, DropTableExpr, FlushTableExpr, GreptimeRequest, InsertRequest, PromRangeQuery,
|
||||
DeleteRequest, DropTableExpr, FlushTableExpr, GreptimeRequest, InsertRequests, PromRangeQuery,
|
||||
QueryRequest, RequestHeader,
|
||||
};
|
||||
use arrow_flight::{FlightData, Ticket};
|
||||
@@ -109,9 +109,9 @@ impl Database {
|
||||
});
|
||||
}
|
||||
|
||||
pub async fn insert(&self, request: InsertRequest) -> Result<u32> {
|
||||
pub async fn insert(&self, requests: InsertRequests) -> Result<u32> {
|
||||
let _timer = timer!(metrics::METRIC_GRPC_INSERT);
|
||||
self.handle(Request::Insert(request)).await
|
||||
self.handle(Request::Inserts(requests)).await
|
||||
}
|
||||
|
||||
pub async fn delete(&self, request: DeleteRequest) -> Result<u32> {
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
mod client;
|
||||
pub mod client_manager;
|
||||
mod database;
|
||||
mod error;
|
||||
pub mod load_balance;
|
||||
|
||||
@@ -32,6 +32,7 @@ frontend = { path = "../frontend" }
|
||||
futures.workspace = true
|
||||
meta-client = { path = "../meta-client" }
|
||||
meta-srv = { path = "../meta-srv" }
|
||||
metrics.workspace = true
|
||||
nu-ansi-term = "0.46"
|
||||
partition = { path = "../partition" }
|
||||
query = { path = "../query" }
|
||||
|
||||
@@ -18,6 +18,10 @@ fn main() {
|
||||
"cargo:rustc-env=GIT_COMMIT={}",
|
||||
build_data::get_git_commit().unwrap_or_else(|_| DEFAULT_VALUE.to_string())
|
||||
);
|
||||
println!(
|
||||
"cargo:rustc-env=GIT_COMMIT_SHORT={}",
|
||||
build_data::get_git_commit_short().unwrap_or_else(|_| DEFAULT_VALUE.to_string())
|
||||
);
|
||||
println!(
|
||||
"cargo:rustc-env=GIT_BRANCH={}",
|
||||
build_data::get_git_branch().unwrap_or_else(|_| DEFAULT_VALUE.to_string())
|
||||
|
||||
@@ -21,6 +21,7 @@ use cmd::error::Result;
|
||||
use cmd::options::{Options, TopLevelOptions};
|
||||
use cmd::{cli, datanode, frontend, metasrv, standalone};
|
||||
use common_telemetry::logging::{error, info, TracingOptions};
|
||||
use metrics::gauge;
|
||||
|
||||
#[derive(Parser)]
|
||||
#[clap(name = "greptimedb", version = print_version())]
|
||||
@@ -46,13 +47,13 @@ pub enum Application {
|
||||
}
|
||||
|
||||
impl Application {
|
||||
async fn run(&mut self) -> Result<()> {
|
||||
async fn start(&mut self) -> Result<()> {
|
||||
match self {
|
||||
Application::Datanode(instance) => instance.run().await,
|
||||
Application::Frontend(instance) => instance.run().await,
|
||||
Application::Metasrv(instance) => instance.run().await,
|
||||
Application::Standalone(instance) => instance.run().await,
|
||||
Application::Cli(instance) => instance.run().await,
|
||||
Application::Datanode(instance) => instance.start().await,
|
||||
Application::Frontend(instance) => instance.start().await,
|
||||
Application::Metasrv(instance) => instance.start().await,
|
||||
Application::Standalone(instance) => instance.start().await,
|
||||
Application::Cli(instance) => instance.start().await,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -163,6 +164,22 @@ fn print_version() -> &'static str {
|
||||
)
|
||||
}
|
||||
|
||||
fn short_version() -> &'static str {
|
||||
env!("CARGO_PKG_VERSION")
|
||||
}
|
||||
|
||||
// {app_name}-{branch_name}-{commit_short}
|
||||
// The branch name (tag) of a release build should already contain the short
|
||||
// version so the full version doesn't concat the short version explicitly.
|
||||
fn full_version() -> &'static str {
|
||||
concat!(
|
||||
"greptimedb-",
|
||||
env!("GIT_BRANCH"),
|
||||
"-",
|
||||
env!("GIT_COMMIT_SHORT")
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(feature = "mem-prof")]
|
||||
#[global_allocator]
|
||||
static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
@@ -185,10 +202,13 @@ async fn main() -> Result<()> {
|
||||
common_telemetry::init_default_metrics_recorder();
|
||||
let _guard = common_telemetry::init_global_logging(app_name, logging_opts, tracing_opts);
|
||||
|
||||
// Report app version as gauge.
|
||||
gauge!("app_version", 1.0, "short_version" => short_version(), "version" => full_version());
|
||||
|
||||
let mut app = cmd.build(opts).await?;
|
||||
|
||||
tokio::select! {
|
||||
result = app.run() => {
|
||||
result = app.start() => {
|
||||
if let Err(err) = result {
|
||||
error!(err; "Fatal error occurs!");
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ pub struct Instance {
|
||||
}
|
||||
|
||||
impl Instance {
|
||||
pub async fn run(&mut self) -> Result<()> {
|
||||
pub async fn start(&mut self) -> Result<()> {
|
||||
self.repl.run().await
|
||||
}
|
||||
|
||||
@@ -53,8 +53,8 @@ impl Command {
|
||||
if let Some(dir) = top_level_opts.log_dir {
|
||||
logging_opts.dir = dir;
|
||||
}
|
||||
if let Some(level) = top_level_opts.log_level {
|
||||
logging_opts.level = level;
|
||||
if top_level_opts.log_level.is_some() {
|
||||
logging_opts.level = top_level_opts.log_level;
|
||||
}
|
||||
Ok(Options::Cli(Box::new(logging_opts)))
|
||||
}
|
||||
@@ -107,7 +107,7 @@ mod tests {
|
||||
let opts = cmd.load_options(TopLevelOptions::default()).unwrap();
|
||||
let logging_opts = opts.logging_options();
|
||||
assert_eq!("/tmp/greptimedb/logs", logging_opts.dir);
|
||||
assert_eq!("info", logging_opts.level);
|
||||
assert!(logging_opts.level.is_none());
|
||||
assert!(!logging_opts.enable_jaeger_tracing);
|
||||
}
|
||||
|
||||
@@ -129,6 +129,6 @@ mod tests {
|
||||
.unwrap();
|
||||
let logging_opts = opts.logging_options();
|
||||
assert_eq!("/tmp/greptimedb/test/logs", logging_opts.dir);
|
||||
assert_eq!("debug", logging_opts.level);
|
||||
assert_eq!("debug", logging_opts.level.as_ref().unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,8 @@ use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use catalog::remote::MetaKvBackend;
|
||||
use catalog::remote::CachedMetaKvBackend;
|
||||
use client::client_manager::DatanodeClients;
|
||||
use client::{Client, Database, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_error::prelude::ErrorExt;
|
||||
use common_query::Output;
|
||||
@@ -24,7 +25,6 @@ use common_recordbatch::RecordBatches;
|
||||
use common_telemetry::logging;
|
||||
use either::Either;
|
||||
use frontend::catalog::FrontendCatalogManager;
|
||||
use frontend::datanode::DatanodeClients;
|
||||
use meta_client::client::MetaClientBuilder;
|
||||
use partition::manager::PartitionRuleManager;
|
||||
use partition::route::TableRoutes;
|
||||
@@ -253,9 +253,7 @@ async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {
|
||||
.context(StartMetaClientSnafu)?;
|
||||
let meta_client = Arc::new(meta_client);
|
||||
|
||||
let backend = Arc::new(MetaKvBackend {
|
||||
client: meta_client.clone(),
|
||||
});
|
||||
let cached_meta_backend = Arc::new(CachedMetaKvBackend::new(meta_client.clone()));
|
||||
|
||||
let table_routes = Arc::new(TableRoutes::new(meta_client));
|
||||
let partition_manager = Arc::new(PartitionRuleManager::new(table_routes));
|
||||
@@ -263,11 +261,18 @@ async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {
|
||||
let datanode_clients = Arc::new(DatanodeClients::default());
|
||||
|
||||
let catalog_list = Arc::new(FrontendCatalogManager::new(
|
||||
backend,
|
||||
cached_meta_backend.clone(),
|
||||
cached_meta_backend,
|
||||
partition_manager,
|
||||
datanode_clients,
|
||||
));
|
||||
let state = Arc::new(QueryEngineState::new(catalog_list, Default::default()));
|
||||
let state = Arc::new(QueryEngineState::new(
|
||||
catalog_list,
|
||||
false,
|
||||
None,
|
||||
None,
|
||||
Default::default(),
|
||||
));
|
||||
|
||||
Ok(DatafusionQueryEngine::new(state))
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ pub struct Instance {
|
||||
}
|
||||
|
||||
impl Instance {
|
||||
pub async fn run(&mut self) -> Result<()> {
|
||||
pub async fn start(&mut self) -> Result<()> {
|
||||
self.datanode.start().await.context(StartDatanodeSnafu)
|
||||
}
|
||||
|
||||
@@ -91,7 +91,7 @@ struct StartCommand {
|
||||
#[clap(short, long)]
|
||||
config_file: Option<String>,
|
||||
#[clap(long)]
|
||||
data_dir: Option<String>,
|
||||
data_home: Option<String>,
|
||||
#[clap(long)]
|
||||
wal_dir: Option<String>,
|
||||
#[clap(long)]
|
||||
@@ -113,8 +113,9 @@ impl StartCommand {
|
||||
if let Some(dir) = top_level_opts.log_dir {
|
||||
opts.logging.dir = dir;
|
||||
}
|
||||
if let Some(level) = top_level_opts.log_level {
|
||||
opts.logging.level = level;
|
||||
|
||||
if top_level_opts.log_level.is_some() {
|
||||
opts.logging.level = top_level_opts.log_level;
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.rpc_addr {
|
||||
@@ -147,14 +148,14 @@ impl StartCommand {
|
||||
.fail();
|
||||
}
|
||||
|
||||
if let Some(data_dir) = &self.data_dir {
|
||||
if let Some(data_home) = &self.data_home {
|
||||
opts.storage.store = ObjectStoreConfig::File(FileConfig {
|
||||
data_dir: data_dir.clone(),
|
||||
data_home: data_home.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(wal_dir) = &self.wal_dir {
|
||||
opts.wal.dir = wal_dir.clone();
|
||||
opts.wal.dir = Some(wal_dir.clone());
|
||||
}
|
||||
|
||||
if let Some(http_addr) = &self.http_addr {
|
||||
@@ -214,7 +215,7 @@ mod tests {
|
||||
tcp_nodelay = true
|
||||
|
||||
[wal]
|
||||
dir = "/tmp/greptimedb/wal"
|
||||
dir = "/other/wal"
|
||||
file_size = "1GB"
|
||||
purge_threshold = "50GB"
|
||||
purge_interval = "10m"
|
||||
@@ -223,7 +224,7 @@ mod tests {
|
||||
|
||||
[storage]
|
||||
type = "File"
|
||||
data_dir = "/tmp/greptimedb/data/"
|
||||
data_home = "/tmp/greptimedb/"
|
||||
|
||||
[storage.compaction]
|
||||
max_inflight_tasks = 3
|
||||
@@ -255,6 +256,7 @@ mod tests {
|
||||
assert_eq!(2, options.mysql_runtime_size);
|
||||
assert_eq!(Some(42), options.node_id);
|
||||
|
||||
assert_eq!("/other/wal", options.wal.dir.unwrap());
|
||||
assert_eq!(Duration::from_secs(600), options.wal.purge_interval);
|
||||
assert_eq!(1024 * 1024 * 1024, options.wal.file_size.0);
|
||||
assert_eq!(1024 * 1024 * 1024 * 50, options.wal.purge_threshold.0);
|
||||
@@ -273,11 +275,12 @@ mod tests {
|
||||
assert!(tcp_nodelay);
|
||||
|
||||
match &options.storage.store {
|
||||
ObjectStoreConfig::File(FileConfig { data_dir, .. }) => {
|
||||
assert_eq!("/tmp/greptimedb/data/", data_dir)
|
||||
ObjectStoreConfig::File(FileConfig { data_home, .. }) => {
|
||||
assert_eq!("/tmp/greptimedb/", data_home)
|
||||
}
|
||||
ObjectStoreConfig::S3 { .. } => unreachable!(),
|
||||
ObjectStoreConfig::Oss { .. } => unreachable!(),
|
||||
ObjectStoreConfig::Azblob { .. } => unreachable!(),
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
@@ -299,7 +302,7 @@ mod tests {
|
||||
options.storage.manifest,
|
||||
);
|
||||
|
||||
assert_eq!("debug".to_string(), options.logging.level);
|
||||
assert_eq!("debug", options.logging.level.unwrap());
|
||||
assert_eq!("/tmp/greptimedb/test/logs".to_string(), options.logging.dir);
|
||||
}
|
||||
|
||||
@@ -352,7 +355,7 @@ mod tests {
|
||||
|
||||
let logging_opt = options.logging_options();
|
||||
assert_eq!("/tmp/greptimedb/test/logs", logging_opt.dir);
|
||||
assert_eq!("debug", logging_opt.level);
|
||||
assert_eq!("debug", logging_opt.level.as_ref().unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -374,7 +377,6 @@ mod tests {
|
||||
tcp_nodelay = true
|
||||
|
||||
[wal]
|
||||
dir = "/tmp/greptimedb/wal"
|
||||
file_size = "1GB"
|
||||
purge_threshold = "50GB"
|
||||
purge_interval = "10m"
|
||||
@@ -383,7 +385,7 @@ mod tests {
|
||||
|
||||
[storage]
|
||||
type = "File"
|
||||
data_dir = "/tmp/greptimedb/data/"
|
||||
data_home = "/tmp/greptimedb/"
|
||||
|
||||
[storage.compaction]
|
||||
max_inflight_tasks = 3
|
||||
@@ -464,7 +466,7 @@ mod tests {
|
||||
assert_eq!(opts.storage.compaction.max_purge_tasks, 32);
|
||||
|
||||
// Should be read from cli, cli > config file > env > default values.
|
||||
assert_eq!(opts.wal.dir, "/other/wal/dir");
|
||||
assert_eq!(opts.wal.dir.unwrap(), "/other/wal/dir");
|
||||
|
||||
// Should be default value.
|
||||
assert_eq!(
|
||||
|
||||
@@ -64,13 +64,6 @@ pub enum Error {
|
||||
source: meta_srv::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to read config file: {}, source: {}", path, source))]
|
||||
ReadConfig {
|
||||
path: String,
|
||||
source: std::io::Error,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Missing config, msg: {}", msg))]
|
||||
MissingConfig { msg: String, location: Location },
|
||||
|
||||
@@ -154,6 +147,12 @@ pub enum Error {
|
||||
source: ConfigError,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to start catalog manager, source: {}", source))]
|
||||
StartCatalogManager {
|
||||
#[snafu(backtrace)]
|
||||
source: catalog::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -169,8 +168,7 @@ impl ErrorExt for Error {
|
||||
Error::ShutdownMetaServer { source } => source.status_code(),
|
||||
Error::BuildMetaServer { source } => source.status_code(),
|
||||
Error::UnsupportedSelectorType { source, .. } => source.status_code(),
|
||||
Error::ReadConfig { .. }
|
||||
| Error::MissingConfig { .. }
|
||||
Error::MissingConfig { .. }
|
||||
| Error::LoadLayeredConfig { .. }
|
||||
| Error::IllegalConfig { .. }
|
||||
| Error::InvalidReplCommand { .. }
|
||||
@@ -185,6 +183,7 @@ impl ErrorExt for Error {
|
||||
source.status_code()
|
||||
}
|
||||
Error::SubstraitEncodeLogicalPlan { source } => source.status_code(),
|
||||
Error::StartCatalogManager { source } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ use std::sync::Arc;
|
||||
|
||||
use clap::Parser;
|
||||
use common_base::Plugins;
|
||||
use common_telemetry::logging;
|
||||
use frontend::frontend::FrontendOptions;
|
||||
use frontend::instance::{FrontendInstance, Instance as FeInstance};
|
||||
use frontend::service_config::{InfluxdbOptions, PromOptions};
|
||||
@@ -25,7 +26,7 @@ use servers::tls::{TlsMode, TlsOption};
|
||||
use servers::{auth, Mode};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, IllegalAuthConfigSnafu, Result};
|
||||
use crate::error::{self, IllegalAuthConfigSnafu, Result, StartCatalogManagerSnafu};
|
||||
use crate::options::{Options, TopLevelOptions};
|
||||
|
||||
pub struct Instance {
|
||||
@@ -33,7 +34,13 @@ pub struct Instance {
|
||||
}
|
||||
|
||||
impl Instance {
|
||||
pub async fn run(&mut self) -> Result<()> {
|
||||
pub async fn start(&mut self) -> Result<()> {
|
||||
self.frontend
|
||||
.catalog_manager()
|
||||
.start()
|
||||
.await
|
||||
.context(StartCatalogManagerSnafu)?;
|
||||
|
||||
self.frontend
|
||||
.start()
|
||||
.await
|
||||
@@ -128,8 +135,9 @@ impl StartCommand {
|
||||
if let Some(dir) = top_level_opts.log_dir {
|
||||
opts.logging.dir = dir;
|
||||
}
|
||||
if let Some(level) = top_level_opts.log_level {
|
||||
opts.logging.level = level;
|
||||
|
||||
if top_level_opts.log_level.is_some() {
|
||||
opts.logging.level = top_level_opts.log_level;
|
||||
}
|
||||
|
||||
let tls_opts = TlsOption::new(
|
||||
@@ -195,6 +203,9 @@ impl StartCommand {
|
||||
}
|
||||
|
||||
async fn build(self, opts: FrontendOptions) -> Result<Instance> {
|
||||
logging::info!("Frontend start command: {:#?}", self);
|
||||
logging::info!("Frontend options: {:#?}", opts);
|
||||
|
||||
let plugins = Arc::new(load_frontend_plugins(&self.user_provider)?);
|
||||
|
||||
let mut instance = FeInstance::try_new_distributed(&opts, plugins.clone())
|
||||
@@ -290,7 +301,7 @@ mod tests {
|
||||
[http_options]
|
||||
addr = "127.0.0.1:4000"
|
||||
timeout = "30s"
|
||||
|
||||
|
||||
[logging]
|
||||
level = "debug"
|
||||
dir = "/tmp/greptimedb/test/logs"
|
||||
@@ -315,7 +326,7 @@ mod tests {
|
||||
fe_opts.http_options.as_ref().unwrap().timeout
|
||||
);
|
||||
|
||||
assert_eq!("debug".to_string(), fe_opts.logging.level);
|
||||
assert_eq!("debug", fe_opts.logging.level.as_ref().unwrap());
|
||||
assert_eq!("/tmp/greptimedb/test/logs".to_string(), fe_opts.logging.dir);
|
||||
}
|
||||
|
||||
@@ -359,7 +370,7 @@ mod tests {
|
||||
|
||||
let logging_opt = options.logging_options();
|
||||
assert_eq!("/tmp/greptimedb/test/logs", logging_opt.dir);
|
||||
assert_eq!("debug", logging_opt.level);
|
||||
assert_eq!("debug", logging_opt.level.as_ref().unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -370,7 +381,7 @@ mod tests {
|
||||
|
||||
[http_options]
|
||||
addr = "127.0.0.1:4000"
|
||||
|
||||
|
||||
[meta_client_options]
|
||||
timeout_millis = 3000
|
||||
connect_timeout_millis = 5000
|
||||
|
||||
@@ -28,7 +28,7 @@ pub struct Instance {
|
||||
}
|
||||
|
||||
impl Instance {
|
||||
pub async fn run(&mut self) -> Result<()> {
|
||||
pub async fn start(&mut self) -> Result<()> {
|
||||
self.instance
|
||||
.start()
|
||||
.await
|
||||
@@ -111,8 +111,9 @@ impl StartCommand {
|
||||
if let Some(dir) = top_level_opts.log_dir {
|
||||
opts.logging.dir = dir;
|
||||
}
|
||||
if let Some(level) = top_level_opts.log_level {
|
||||
opts.logging.level = level;
|
||||
|
||||
if top_level_opts.log_level.is_some() {
|
||||
opts.logging.level = top_level_opts.log_level;
|
||||
}
|
||||
|
||||
if let Some(addr) = &self.bind_addr {
|
||||
@@ -220,7 +221,7 @@ mod tests {
|
||||
assert_eq!("127.0.0.1:2379".to_string(), options.store_addr);
|
||||
assert_eq!(15, options.datanode_lease_secs);
|
||||
assert_eq!(SelectorType::LeaseBased, options.selector);
|
||||
assert_eq!("debug".to_string(), options.logging.level);
|
||||
assert_eq!("debug", options.logging.level.as_ref().unwrap());
|
||||
assert_eq!("/tmp/greptimedb/test/logs".to_string(), options.logging.dir);
|
||||
}
|
||||
|
||||
@@ -243,7 +244,7 @@ mod tests {
|
||||
|
||||
let logging_opt = options.logging_options();
|
||||
assert_eq!("/tmp/greptimedb/test/logs", logging_opt.dir);
|
||||
assert_eq!("debug", logging_opt.level);
|
||||
assert_eq!("debug", logging_opt.level.as_ref().unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -257,7 +258,7 @@ mod tests {
|
||||
|
||||
[http_options]
|
||||
addr = "127.0.0.1:4000"
|
||||
|
||||
|
||||
[logging]
|
||||
level = "debug"
|
||||
dir = "/tmp/greptimedb/test/logs"
|
||||
|
||||
@@ -264,7 +264,7 @@ mod tests {
|
||||
);
|
||||
|
||||
// Should be the values from config file, not environment variables.
|
||||
assert_eq!(opts.wal.dir, "/tmp/greptimedb/wal".to_string());
|
||||
assert_eq!(opts.wal.dir.unwrap(), "/tmp/greptimedb/wal");
|
||||
|
||||
// Should be default values.
|
||||
assert_eq!(opts.node_id, None);
|
||||
|
||||
@@ -152,7 +152,7 @@ pub struct Instance {
|
||||
}
|
||||
|
||||
impl Instance {
|
||||
pub async fn run(&mut self) -> Result<()> {
|
||||
pub async fn start(&mut self) -> Result<()> {
|
||||
// Start datanode instance before starting services, to avoid requests come in before internal components are started.
|
||||
self.datanode
|
||||
.start_instance()
|
||||
@@ -227,8 +227,9 @@ impl StartCommand {
|
||||
if let Some(dir) = top_level_options.log_dir {
|
||||
opts.logging.dir = dir;
|
||||
}
|
||||
if let Some(level) = top_level_options.log_level {
|
||||
opts.logging.level = level;
|
||||
|
||||
if top_level_options.log_level.is_some() {
|
||||
opts.logging.level = top_level_options.log_level;
|
||||
}
|
||||
|
||||
let tls_opts = TlsOption::new(
|
||||
@@ -301,6 +302,7 @@ impl StartCommand {
|
||||
async fn build(self, fe_opts: FrontendOptions, dn_opts: DatanodeOptions) -> Result<Instance> {
|
||||
let plugins = Arc::new(load_frontend_plugins(&self.user_provider)?);
|
||||
|
||||
info!("Standalone start command: {:#?}", self);
|
||||
info!(
|
||||
"Standalone frontend options: {:#?}, datanode options: {:#?}",
|
||||
fe_opts, dn_opts
|
||||
@@ -449,7 +451,7 @@ mod tests {
|
||||
);
|
||||
assert!(fe_opts.influxdb_options.as_ref().unwrap().enable);
|
||||
|
||||
assert_eq!("/tmp/greptimedb/test/wal", dn_opts.wal.dir);
|
||||
assert_eq!("/tmp/greptimedb/test/wal", dn_opts.wal.dir.unwrap());
|
||||
match &dn_opts.storage.store {
|
||||
datanode::datanode::ObjectStoreConfig::S3(s3_config) => {
|
||||
assert_eq!(
|
||||
@@ -462,7 +464,7 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!("debug".to_string(), logging_opts.level);
|
||||
assert_eq!("debug", logging_opts.level.as_ref().unwrap());
|
||||
assert_eq!("/tmp/greptimedb/test/logs".to_string(), logging_opts.dir);
|
||||
}
|
||||
|
||||
@@ -483,7 +485,7 @@ mod tests {
|
||||
};
|
||||
|
||||
assert_eq!("/tmp/greptimedb/test/logs", opts.logging.dir);
|
||||
assert_eq!("debug", opts.logging.level);
|
||||
assert_eq!("debug", opts.logging.level.unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -553,7 +555,7 @@ mod tests {
|
||||
assert_eq!(opts.logging.dir, "/other/log/dir");
|
||||
|
||||
// Should be read from config file, config file > env > default values.
|
||||
assert_eq!(opts.logging.level, "debug");
|
||||
assert_eq!(opts.logging.level.as_ref().unwrap(), "debug");
|
||||
|
||||
// Should be read from cli, cli > config file > env > default values.
|
||||
assert_eq!(
|
||||
|
||||
@@ -51,7 +51,7 @@ mod tests {
|
||||
#[ignore]
|
||||
#[test]
|
||||
fn test_repl() {
|
||||
let data_dir = create_temp_dir("data");
|
||||
let data_home = create_temp_dir("data");
|
||||
let wal_dir = create_temp_dir("wal");
|
||||
|
||||
let mut bin_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
@@ -65,7 +65,7 @@ mod tests {
|
||||
"start",
|
||||
"--rpc-addr=0.0.0.0:4321",
|
||||
"--node-id=1",
|
||||
&format!("--data-dir={}", data_dir.path().display()),
|
||||
&format!("--data-home={}", data_home.path().display()),
|
||||
&format!("--wal-dir={}", wal_dir.path().display()),
|
||||
])
|
||||
.stdout(Stdio::null())
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
pub mod bit_vec;
|
||||
pub mod buffer;
|
||||
pub mod bytes;
|
||||
pub mod paths;
|
||||
#[allow(clippy::all)]
|
||||
pub mod readable_size;
|
||||
|
||||
|
||||
25
src/common/base/src/paths.rs
Normal file
25
src/common/base/src/paths.rs
Normal file
@@ -0,0 +1,25 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Path constants for table engines, cluster states and WAL
|
||||
/// All paths relative to data_home(file storage) or root path(S3, OSS etc).
|
||||
|
||||
/// WAL dir for local file storage
|
||||
pub const WAL_DIR: &str = "wal/";
|
||||
|
||||
/// Data dir for table engines
|
||||
pub const DATA_DIR: &str = "data/";
|
||||
|
||||
/// Cluster state dir
|
||||
pub const CLUSTER_DIR: &str = "cluster/";
|
||||
@@ -36,9 +36,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
source: serde_json::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse node id: {}", key))]
|
||||
ParseNodeId { key: String, location: Location },
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
@@ -47,7 +44,6 @@ impl ErrorExt for Error {
|
||||
Error::InvalidCatalog { .. }
|
||||
| Error::DeserializeCatalogEntryValue { .. }
|
||||
| Error::SerializeCatalogEntryValue { .. } => StatusCode::Unexpected,
|
||||
Error::ParseNodeId { .. } => StatusCode::InvalidArguments,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -41,9 +41,6 @@ pub enum Error {
|
||||
#[snafu(display("empty host: {}", url))]
|
||||
EmptyHostPath { url: String, location: Location },
|
||||
|
||||
#[snafu(display("Invalid path: {}", path))]
|
||||
InvalidPath { path: String, location: Location },
|
||||
|
||||
#[snafu(display("Invalid url: {}, error :{}", url, source))]
|
||||
InvalidUrl {
|
||||
url: String,
|
||||
@@ -51,12 +48,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to decompression, source: {}", source))]
|
||||
Decompression {
|
||||
source: object_store::Error,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build backend, source: {}", source))]
|
||||
BuildBackend {
|
||||
source: object_store::Error,
|
||||
@@ -148,9 +139,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Missing required field: {}", name))]
|
||||
MissingRequiredField { name: String, location: Location },
|
||||
|
||||
#[snafu(display("Buffered writer closed"))]
|
||||
BufferedWriterClosed { location: Location },
|
||||
}
|
||||
@@ -173,16 +161,13 @@ impl ErrorExt for Error {
|
||||
| InvalidConnection { .. }
|
||||
| InvalidUrl { .. }
|
||||
| EmptyHostPath { .. }
|
||||
| InvalidPath { .. }
|
||||
| InferSchema { .. }
|
||||
| ReadParquetSnafu { .. }
|
||||
| ParquetToSchema { .. }
|
||||
| ParseFormat { .. }
|
||||
| MergeSchema { .. }
|
||||
| MissingRequiredField { .. } => StatusCode::InvalidArguments,
|
||||
| MergeSchema { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
Decompression { .. }
|
||||
| JoinHandle { .. }
|
||||
JoinHandle { .. }
|
||||
| ReadRecordBatch { .. }
|
||||
| WriteRecordBatch { .. }
|
||||
| EncodeRecordBatch { .. }
|
||||
@@ -203,11 +188,9 @@ impl ErrorExt for Error {
|
||||
InferSchema { location, .. } => Some(*location),
|
||||
ReadParquetSnafu { location, .. } => Some(*location),
|
||||
ParquetToSchema { location, .. } => Some(*location),
|
||||
Decompression { location, .. } => Some(*location),
|
||||
JoinHandle { location, .. } => Some(*location),
|
||||
ParseFormat { location, .. } => Some(*location),
|
||||
MergeSchema { location, .. } => Some(*location),
|
||||
MissingRequiredField { location, .. } => Some(*location),
|
||||
WriteObject { location, .. } => Some(*location),
|
||||
ReadRecordBatch { location, .. } => Some(*location),
|
||||
WriteRecordBatch { location, .. } => Some(*location),
|
||||
@@ -217,7 +200,6 @@ impl ErrorExt for Error {
|
||||
|
||||
UnsupportedBackendProtocol { location, .. } => Some(*location),
|
||||
EmptyHostPath { location, .. } => Some(*location),
|
||||
InvalidPath { location, .. } => Some(*location),
|
||||
InvalidUrl { location, .. } => Some(*location),
|
||||
InvalidConnection { location, .. } => Some(*location),
|
||||
UnsupportedCompressionType { location, .. } => Some(*location),
|
||||
|
||||
@@ -110,6 +110,7 @@ impl ArrowDecoder for arrow::csv::reader::Decoder {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(deprecated)]
|
||||
impl ArrowDecoder for arrow::json::RawDecoder {
|
||||
fn decode(&mut self, buf: &[u8]) -> result::Result<usize, ArrowError> {
|
||||
self.decode(buf)
|
||||
|
||||
@@ -17,6 +17,7 @@ use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::csv;
|
||||
#[allow(deprecated)]
|
||||
use arrow::csv::reader::infer_reader_schema as infer_csv_schema;
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use arrow_schema::{Schema, SchemaRef};
|
||||
@@ -113,8 +114,7 @@ pub struct CsvConfig {
|
||||
|
||||
impl CsvConfig {
|
||||
fn builder(&self) -> csv::ReaderBuilder {
|
||||
let mut builder = csv::ReaderBuilder::new()
|
||||
.with_schema(self.file_schema.clone())
|
||||
let mut builder = csv::ReaderBuilder::new(self.file_schema.clone())
|
||||
.with_delimiter(self.delimiter)
|
||||
.with_batch_size(self.batch_size)
|
||||
.has_header(self.has_header);
|
||||
@@ -160,6 +160,7 @@ impl FileOpener for CsvOpener {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(deprecated)]
|
||||
#[async_trait]
|
||||
impl FileFormat for CsvFormat {
|
||||
async fn infer_schema(&self, store: &ObjectStore, path: &str) -> Result<Schema> {
|
||||
|
||||
@@ -20,6 +20,7 @@ use std::sync::Arc;
|
||||
use arrow::datatypes::SchemaRef;
|
||||
use arrow::json::reader::{infer_json_schema_from_iterator, ValueIter};
|
||||
use arrow::json::writer::LineDelimited;
|
||||
#[allow(deprecated)]
|
||||
use arrow::json::{self, RawReaderBuilder};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use arrow_schema::Schema;
|
||||
@@ -129,6 +130,7 @@ impl JsonOpener {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(deprecated)]
|
||||
impl FileOpener for JsonOpener {
|
||||
fn open(&self, meta: FileMeta) -> DataFusionResult<FileOpenFuture> {
|
||||
open_with_decoder(
|
||||
@@ -159,8 +161,7 @@ pub async fn stream_to_json(
|
||||
|
||||
impl DfRecordBatchEncoder for json::Writer<SharedBuffer, LineDelimited> {
|
||||
fn write(&mut self, batch: &RecordBatch) -> Result<()> {
|
||||
self.write(batch.clone())
|
||||
.context(error::WriteRecordBatchSnafu)
|
||||
self.write(batch).context(error::WriteRecordBatchSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
6
src/common/datasource/tests/csv/type_cast.csv
Normal file
6
src/common/datasource/tests/csv/type_cast.csv
Normal file
@@ -0,0 +1,6 @@
|
||||
hostname,environment,usage_user,usage_system,usage_idle,usage_nice,usage_iowait,usage_irq,usage_softirq,usage_steal,usage_guest,usage_guest_nice,ts
|
||||
host_0,test,32,58,36,72,61,21,53,12,59,72,2023-04-01T00:00:00+00:00
|
||||
host_1,staging,12,32,50,84,19,73,38,37,72,2,2023-04-01T00:00:00+00:00
|
||||
host_2,test,98,5,40,95,64,39,21,63,53,94,2023-04-01T00:00:00+00:00
|
||||
host_3,test,98,95,7,48,99,67,14,86,36,23,2023-04-01T00:00:00+00:00
|
||||
host_4,test,32,44,11,53,64,9,17,39,20,7,2023-04-01T00:00:00+00:00
|
||||
|
@@ -33,6 +33,8 @@ pub enum StatusCode {
|
||||
Internal = 1003,
|
||||
/// Invalid arguments.
|
||||
InvalidArguments = 1004,
|
||||
/// The task is cancelled.
|
||||
Cancelled = 1005,
|
||||
// ====== End of common status code ================
|
||||
|
||||
// ====== Begin of SQL related status code =========
|
||||
@@ -100,6 +102,7 @@ impl StatusCode {
|
||||
| StatusCode::Unsupported
|
||||
| StatusCode::Unexpected
|
||||
| StatusCode::InvalidArguments
|
||||
| StatusCode::Cancelled
|
||||
| StatusCode::InvalidSyntax
|
||||
| StatusCode::PlanQuery
|
||||
| StatusCode::EngineExecuteQuery
|
||||
@@ -125,6 +128,7 @@ impl StatusCode {
|
||||
| StatusCode::Unsupported
|
||||
| StatusCode::Unexpected
|
||||
| StatusCode::Internal
|
||||
| StatusCode::Cancelled
|
||||
| StatusCode::PlanQuery
|
||||
| StatusCode::EngineExecuteQuery
|
||||
| StatusCode::StorageUnavailable
|
||||
|
||||
@@ -17,3 +17,6 @@ common-time = { path = "../time" }
|
||||
datatypes = { path = "../../datatypes" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
table = { path = "../../table" }
|
||||
|
||||
[dev-dependencies]
|
||||
paste = "1.0"
|
||||
|
||||
@@ -12,9 +12,12 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::add_column::location::LocationType;
|
||||
use api::v1::add_column::Location;
|
||||
use api::v1::alter_expr::Kind;
|
||||
use api::v1::{column_def, AlterExpr, CreateTableExpr, DropColumns, RenameTable};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_query::AddColumnLocation;
|
||||
use datatypes::schema::{ColumnSchema, RawSchema};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::metadata::TableId;
|
||||
@@ -24,9 +27,12 @@ use table::requests::{
|
||||
|
||||
use crate::error::{
|
||||
ColumnNotFoundSnafu, InvalidColumnDefSnafu, MissingFieldSnafu, MissingTimestampColumnSnafu,
|
||||
Result, UnrecognizedTableOptionSnafu,
|
||||
Result, UnknownLocationTypeSnafu, UnrecognizedTableOptionSnafu,
|
||||
};
|
||||
|
||||
const LOCATION_TYPE_FIRST: i32 = LocationType::First as i32;
|
||||
const LOCATION_TYPE_AFTER: i32 = LocationType::After as i32;
|
||||
|
||||
/// Convert an [`AlterExpr`] to an [`AlterTableRequest`]
|
||||
pub fn alter_expr_to_request(expr: AlterExpr) -> Result<AlterTableRequest> {
|
||||
let catalog_name = expr.catalog_name;
|
||||
@@ -50,6 +56,7 @@ pub fn alter_expr_to_request(expr: AlterExpr) -> Result<AlterTableRequest> {
|
||||
Ok(AddColumnRequest {
|
||||
column_schema: schema,
|
||||
is_key: ac.is_key,
|
||||
location: parse_location(ac.location)?,
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
@@ -163,10 +170,10 @@ pub fn create_expr_to_request(
|
||||
Some(expr.desc)
|
||||
};
|
||||
|
||||
let region_ids = if expr.region_ids.is_empty() {
|
||||
let region_numbers = if expr.region_numbers.is_empty() {
|
||||
vec![0]
|
||||
} else {
|
||||
expr.region_ids
|
||||
expr.region_numbers
|
||||
};
|
||||
|
||||
let table_options =
|
||||
@@ -178,7 +185,7 @@ pub fn create_expr_to_request(
|
||||
table_name: expr.table_name,
|
||||
desc,
|
||||
schema,
|
||||
region_numbers: region_ids,
|
||||
region_numbers,
|
||||
primary_key_indices,
|
||||
create_if_not_exists: expr.create_if_not_exists,
|
||||
table_options,
|
||||
@@ -186,8 +193,26 @@ pub fn create_expr_to_request(
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_location(location: Option<Location>) -> Result<Option<AddColumnLocation>> {
|
||||
match location {
|
||||
Some(Location {
|
||||
location_type: LOCATION_TYPE_FIRST,
|
||||
..
|
||||
}) => Ok(Some(AddColumnLocation::First)),
|
||||
Some(Location {
|
||||
location_type: LOCATION_TYPE_AFTER,
|
||||
after_cloumn_name,
|
||||
}) => Ok(Some(AddColumnLocation::After {
|
||||
column_name: after_cloumn_name,
|
||||
})),
|
||||
Some(Location { location_type, .. }) => UnknownLocationTypeSnafu { location_type }.fail(),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use api::v1::add_column::location::LocationType;
|
||||
use api::v1::{AddColumn, AddColumns, ColumnDataType, ColumnDef, DropColumn};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
|
||||
@@ -209,6 +234,7 @@ mod tests {
|
||||
default_constraint: vec![],
|
||||
}),
|
||||
is_key: false,
|
||||
location: None,
|
||||
}],
|
||||
})),
|
||||
};
|
||||
@@ -228,6 +254,80 @@ mod tests {
|
||||
ConcreteDataType::float64_datatype(),
|
||||
add_column.column_schema.data_type
|
||||
);
|
||||
assert_eq!(None, add_column.location);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_alter_expr_with_location_to_request() {
|
||||
let expr = AlterExpr {
|
||||
catalog_name: "".to_string(),
|
||||
schema_name: "".to_string(),
|
||||
table_name: "monitor".to_string(),
|
||||
|
||||
kind: Some(Kind::AddColumns(AddColumns {
|
||||
add_columns: vec![
|
||||
AddColumn {
|
||||
column_def: Some(ColumnDef {
|
||||
name: "mem_usage".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: false,
|
||||
default_constraint: vec![],
|
||||
}),
|
||||
is_key: false,
|
||||
location: Some(Location {
|
||||
location_type: LocationType::First.into(),
|
||||
after_cloumn_name: "".to_string(),
|
||||
}),
|
||||
},
|
||||
AddColumn {
|
||||
column_def: Some(ColumnDef {
|
||||
name: "cpu_usage".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: false,
|
||||
default_constraint: vec![],
|
||||
}),
|
||||
is_key: false,
|
||||
location: Some(Location {
|
||||
location_type: LocationType::After.into(),
|
||||
after_cloumn_name: "ts".to_string(),
|
||||
}),
|
||||
},
|
||||
],
|
||||
})),
|
||||
};
|
||||
|
||||
let alter_request = alter_expr_to_request(expr).unwrap();
|
||||
assert_eq!(alter_request.catalog_name, "");
|
||||
assert_eq!(alter_request.schema_name, "");
|
||||
assert_eq!("monitor".to_string(), alter_request.table_name);
|
||||
|
||||
let mut add_columns = match alter_request.alter_kind {
|
||||
AlterKind::AddColumns { columns } => columns,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let add_column = add_columns.pop().unwrap();
|
||||
assert!(!add_column.is_key);
|
||||
assert_eq!("cpu_usage", add_column.column_schema.name);
|
||||
assert_eq!(
|
||||
ConcreteDataType::float64_datatype(),
|
||||
add_column.column_schema.data_type
|
||||
);
|
||||
assert_eq!(
|
||||
Some(AddColumnLocation::After {
|
||||
column_name: "ts".to_string()
|
||||
}),
|
||||
add_column.location
|
||||
);
|
||||
|
||||
let add_column = add_columns.pop().unwrap();
|
||||
assert!(!add_column.is_key);
|
||||
assert_eq!("mem_usage", add_column.column_schema.name);
|
||||
assert_eq!(
|
||||
ConcreteDataType::float64_datatype(),
|
||||
add_column.column_schema.data_type
|
||||
);
|
||||
assert_eq!(Some(AddColumnLocation::First), add_column.location);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use api::DecodeError;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::prelude::{Snafu, StatusCode};
|
||||
use snafu::Location;
|
||||
@@ -28,9 +27,6 @@ pub enum Error {
|
||||
table_name: String,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to convert bytes to insert batch, source: {}", source))]
|
||||
DecodeInsert { source: DecodeError },
|
||||
|
||||
#[snafu(display("Illegal delete request, reason: {reason}"))]
|
||||
IllegalDeleteRequest { reason: String, location: Location },
|
||||
|
||||
@@ -65,12 +61,6 @@ pub enum Error {
|
||||
#[snafu(display("Missing required field in protobuf, field: {}", field))]
|
||||
MissingField { field: String, location: Location },
|
||||
|
||||
#[snafu(display("Invalid column default constraint, source: {}", source))]
|
||||
ColumnDefaultConstraint {
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Invalid column proto definition, column: {}, source: {}",
|
||||
column,
|
||||
@@ -93,6 +83,12 @@ pub enum Error {
|
||||
|
||||
#[snafu(display("The column name already exists, column: {}", column))]
|
||||
ColumnAlreadyExists { column: String, location: Location },
|
||||
|
||||
#[snafu(display("Unknown location type: {}", location_type))]
|
||||
UnknownLocationType {
|
||||
location_type: i32,
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -102,9 +98,7 @@ impl ErrorExt for Error {
|
||||
match self {
|
||||
Error::ColumnNotFound { .. } => StatusCode::TableColumnNotFound,
|
||||
|
||||
Error::DecodeInsert { .. } | Error::IllegalDeleteRequest { .. } => {
|
||||
StatusCode::InvalidArguments
|
||||
}
|
||||
Error::IllegalDeleteRequest { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
Error::ColumnDataType { .. } => StatusCode::Internal,
|
||||
Error::DuplicatedTimestampColumn { .. } | Error::MissingTimestampColumn { .. } => {
|
||||
@@ -113,12 +107,11 @@ impl ErrorExt for Error {
|
||||
Error::InvalidColumnProto { .. } => StatusCode::InvalidArguments,
|
||||
Error::CreateVector { .. } => StatusCode::InvalidArguments,
|
||||
Error::MissingField { .. } => StatusCode::InvalidArguments,
|
||||
Error::ColumnDefaultConstraint { source, .. } => source.status_code(),
|
||||
Error::InvalidColumnDef { source, .. } => source.status_code(),
|
||||
Error::UnrecognizedTableOption { .. } => StatusCode::InvalidArguments,
|
||||
Error::UnexpectedValuesLength { .. } | Error::ColumnAlreadyExists { .. } => {
|
||||
StatusCode::InvalidArguments
|
||||
}
|
||||
Error::UnexpectedValuesLength { .. }
|
||||
| Error::ColumnAlreadyExists { .. }
|
||||
| Error::UnknownLocationType { .. } => StatusCode::InvalidArguments,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -68,6 +68,7 @@ pub fn find_new_columns(schema: &SchemaRef, columns: &[Column]) -> Result<Option
|
||||
columns_to_add.push(AddColumn {
|
||||
column_def,
|
||||
is_key: *semantic_type == TAG_SEMANTIC_TYPE,
|
||||
location: None,
|
||||
});
|
||||
new_columns.insert(column_name.to_string());
|
||||
}
|
||||
@@ -257,7 +258,7 @@ pub fn build_create_expr_from_insertion(
|
||||
create_if_not_exists: true,
|
||||
table_options: Default::default(),
|
||||
table_id: table_id.map(|id| api::v1::TableId { id }),
|
||||
region_ids: vec![0], // TODO:(hl): region id should be allocated by frontend
|
||||
region_numbers: vec![0], // TODO:(hl): region number should be allocated by frontend
|
||||
engine: engine.to_string(),
|
||||
};
|
||||
|
||||
@@ -380,22 +381,34 @@ fn convert_values(data_type: &ConcreteDataType, values: Values) -> Vec<Value> {
|
||||
.into_iter()
|
||||
.map(|val| val.into())
|
||||
.collect(),
|
||||
ConcreteDataType::Int8(_) => values.i8_values.into_iter().map(|val| val.into()).collect(),
|
||||
ConcreteDataType::Int8(_) => values
|
||||
.i8_values
|
||||
.into_iter()
|
||||
// Safety: Since i32 only stores i8 data here, so i32 as i8 is safe.
|
||||
.map(|val| (val as i8).into())
|
||||
.collect(),
|
||||
ConcreteDataType::Int16(_) => values
|
||||
.i16_values
|
||||
.into_iter()
|
||||
.map(|val| val.into())
|
||||
// Safety: Since i32 only stores i16 data here, so i32 as i16 is safe.
|
||||
.map(|val| (val as i16).into())
|
||||
.collect(),
|
||||
ConcreteDataType::Int32(_) => values
|
||||
.i32_values
|
||||
.into_iter()
|
||||
.map(|val| val.into())
|
||||
.collect(),
|
||||
ConcreteDataType::UInt8(_) => values.u8_values.into_iter().map(|val| val.into()).collect(),
|
||||
ConcreteDataType::UInt8(_) => values
|
||||
.u8_values
|
||||
.into_iter()
|
||||
// Safety: Since i32 only stores u8 data here, so i32 as u8 is safe.
|
||||
.map(|val| (val as u8).into())
|
||||
.collect(),
|
||||
ConcreteDataType::UInt16(_) => values
|
||||
.u16_values
|
||||
.into_iter()
|
||||
.map(|val| val.into())
|
||||
// Safety: Since i32 only stores u16 data here, so i32 as u16 is safe.
|
||||
.map(|val| (val as u16).into())
|
||||
.collect(),
|
||||
ConcreteDataType::UInt32(_) => values
|
||||
.u32_values
|
||||
@@ -418,12 +431,12 @@ fn convert_values(data_type: &ConcreteDataType, values: Values) -> Vec<Value> {
|
||||
.map(|val| val.into())
|
||||
.collect(),
|
||||
ConcreteDataType::DateTime(_) => values
|
||||
.i64_values
|
||||
.datetime_values
|
||||
.into_iter()
|
||||
.map(|v| Value::DateTime(v.into()))
|
||||
.collect(),
|
||||
ConcreteDataType::Date(_) => values
|
||||
.i32_values
|
||||
.date_values
|
||||
.into_iter()
|
||||
.map(|v| Value::Date(v.into()))
|
||||
.collect(),
|
||||
@@ -459,26 +472,21 @@ fn is_null(null_mask: &BitVec, idx: usize) -> Option<bool> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
use std::{assert_eq, unimplemented, vec};
|
||||
use std::{assert_eq, vec};
|
||||
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
use api::v1::column::{self, SemanticType, Values};
|
||||
use api::v1::{Column, ColumnDataType};
|
||||
use common_base::BitVec;
|
||||
use common_catalog::consts::MITO_ENGINE;
|
||||
use common_query::physical_plan::PhysicalPlanRef;
|
||||
use common_query::prelude::Expr;
|
||||
use common_time::timestamp::Timestamp;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, SchemaBuilder, SchemaRef};
|
||||
use datatypes::schema::{ColumnSchema, SchemaBuilder};
|
||||
use datatypes::types::{TimestampMillisecondType, TimestampSecondType, TimestampType};
|
||||
use datatypes::value::Value;
|
||||
use paste::paste;
|
||||
use snafu::ResultExt;
|
||||
use table::error::Result as TableResult;
|
||||
use table::metadata::TableInfoRef;
|
||||
use table::Table;
|
||||
|
||||
use super::*;
|
||||
use crate::error;
|
||||
@@ -666,26 +674,150 @@ mod tests {
|
||||
assert_eq!(Value::Timestamp(Timestamp::new_millisecond(101)), ts.get(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_values() {
|
||||
let data_type = ConcreteDataType::float64_datatype();
|
||||
let values = Values {
|
||||
f64_values: vec![0.1, 0.2, 0.3],
|
||||
..Default::default()
|
||||
macro_rules! test_convert_values {
|
||||
($grpc_data_type: ident, $values: expr, $concrete_data_type: ident, $expected_ret: expr) => {
|
||||
paste! {
|
||||
#[test]
|
||||
fn [<test_convert_ $grpc_data_type _values>]() {
|
||||
let values = Values {
|
||||
[<$grpc_data_type _values>]: $values,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let data_type = ConcreteDataType::[<$concrete_data_type _datatype>]();
|
||||
let result = convert_values(&data_type, values);
|
||||
|
||||
assert_eq!(
|
||||
$expected_ret,
|
||||
result
|
||||
);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let result = convert_values(&data_type, values);
|
||||
|
||||
assert_eq!(
|
||||
vec![
|
||||
Value::Float64(0.1.into()),
|
||||
Value::Float64(0.2.into()),
|
||||
Value::Float64(0.3.into())
|
||||
],
|
||||
result
|
||||
);
|
||||
}
|
||||
|
||||
test_convert_values!(
|
||||
i8,
|
||||
vec![1_i32, 2, 3],
|
||||
int8,
|
||||
vec![Value::Int8(1), Value::Int8(2), Value::Int8(3)]
|
||||
);
|
||||
|
||||
test_convert_values!(
|
||||
u8,
|
||||
vec![1_u32, 2, 3],
|
||||
uint8,
|
||||
vec![Value::UInt8(1), Value::UInt8(2), Value::UInt8(3)]
|
||||
);
|
||||
|
||||
test_convert_values!(
|
||||
i16,
|
||||
vec![1_i32, 2, 3],
|
||||
int16,
|
||||
vec![Value::Int16(1), Value::Int16(2), Value::Int16(3)]
|
||||
);
|
||||
|
||||
test_convert_values!(
|
||||
u16,
|
||||
vec![1_u32, 2, 3],
|
||||
uint16,
|
||||
vec![Value::UInt16(1), Value::UInt16(2), Value::UInt16(3)]
|
||||
);
|
||||
|
||||
test_convert_values!(
|
||||
i32,
|
||||
vec![1, 2, 3],
|
||||
int32,
|
||||
vec![Value::Int32(1), Value::Int32(2), Value::Int32(3)]
|
||||
);
|
||||
|
||||
test_convert_values!(
|
||||
u32,
|
||||
vec![1, 2, 3],
|
||||
uint32,
|
||||
vec![Value::UInt32(1), Value::UInt32(2), Value::UInt32(3)]
|
||||
);
|
||||
|
||||
test_convert_values!(
|
||||
i64,
|
||||
vec![1, 2, 3],
|
||||
int64,
|
||||
vec![Value::Int64(1), Value::Int64(2), Value::Int64(3)]
|
||||
);
|
||||
|
||||
test_convert_values!(
|
||||
u64,
|
||||
vec![1, 2, 3],
|
||||
uint64,
|
||||
vec![Value::UInt64(1), Value::UInt64(2), Value::UInt64(3)]
|
||||
);
|
||||
|
||||
test_convert_values!(
|
||||
f32,
|
||||
vec![1.0, 2.0, 3.0],
|
||||
float32,
|
||||
vec![
|
||||
Value::Float32(1.0.into()),
|
||||
Value::Float32(2.0.into()),
|
||||
Value::Float32(3.0.into())
|
||||
]
|
||||
);
|
||||
|
||||
test_convert_values!(
|
||||
f64,
|
||||
vec![1.0, 2.0, 3.0],
|
||||
float64,
|
||||
vec![
|
||||
Value::Float64(1.0.into()),
|
||||
Value::Float64(2.0.into()),
|
||||
Value::Float64(3.0.into())
|
||||
]
|
||||
);
|
||||
|
||||
test_convert_values!(
|
||||
string,
|
||||
vec!["1".to_string(), "2".to_string(), "3".to_string()],
|
||||
string,
|
||||
vec![
|
||||
Value::String("1".into()),
|
||||
Value::String("2".into()),
|
||||
Value::String("3".into())
|
||||
]
|
||||
);
|
||||
|
||||
test_convert_values!(
|
||||
binary,
|
||||
vec!["1".into(), "2".into(), "3".into()],
|
||||
binary,
|
||||
vec![
|
||||
Value::Binary(b"1".to_vec().into()),
|
||||
Value::Binary(b"2".to_vec().into()),
|
||||
Value::Binary(b"3".to_vec().into())
|
||||
]
|
||||
);
|
||||
|
||||
test_convert_values!(
|
||||
date,
|
||||
vec![1, 2, 3],
|
||||
date,
|
||||
vec![
|
||||
Value::Date(1.into()),
|
||||
Value::Date(2.into()),
|
||||
Value::Date(3.into())
|
||||
]
|
||||
);
|
||||
|
||||
test_convert_values!(
|
||||
datetime,
|
||||
vec![1.into(), 2.into(), 3.into()],
|
||||
datetime,
|
||||
vec![
|
||||
Value::DateTime(1.into()),
|
||||
Value::DateTime(2.into()),
|
||||
Value::DateTime(3.into())
|
||||
]
|
||||
);
|
||||
|
||||
#[test]
|
||||
fn test_convert_timestamp_values() {
|
||||
// second
|
||||
@@ -733,49 +865,6 @@ mod tests {
|
||||
assert_eq!(None, is_null(&null_mask, 99));
|
||||
}
|
||||
|
||||
struct DemoTable;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Table for DemoTable {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("host", ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new("cpu", ConcreteDataType::float64_datatype(), true),
|
||||
ColumnSchema::new("memory", ConcreteDataType::float64_datatype(), true),
|
||||
ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
true,
|
||||
)
|
||||
.with_time_index(true),
|
||||
];
|
||||
|
||||
Arc::new(
|
||||
SchemaBuilder::try_from(column_schemas)
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap(),
|
||||
)
|
||||
}
|
||||
|
||||
fn table_info(&self) -> TableInfoRef {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn scan(
|
||||
&self,
|
||||
_projection: Option<&Vec<usize>>,
|
||||
_filters: &[Expr],
|
||||
_limit: Option<usize>,
|
||||
) -> TableResult<PhysicalPlanRef> {
|
||||
unimplemented!();
|
||||
}
|
||||
}
|
||||
|
||||
fn mock_insert_batch() -> (Vec<Column>, u32) {
|
||||
let row_count = 2;
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ common-base = { path = "../base" }
|
||||
common-error = { path = "../error" }
|
||||
common-function-macro = { path = "../function-macro" }
|
||||
common-query = { path = "../query" }
|
||||
common-meta = { path = "../meta" }
|
||||
common-recordbatch = { path = "../recordbatch" }
|
||||
common-runtime = { path = "../runtime" }
|
||||
common-telemetry = { path = "../telemetry" }
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Duration;
|
||||
|
||||
use common_telemetry::info;
|
||||
@@ -28,13 +28,14 @@ use tower::make::MakeConnection;
|
||||
use crate::error::{CreateChannelSnafu, InvalidConfigFilePathSnafu, InvalidTlsConfigSnafu, Result};
|
||||
|
||||
const RECYCLE_CHANNEL_INTERVAL_SECS: u64 = 60;
|
||||
const DEFAULT_REQUEST_TIMEOUT_SECS: u64 = 2;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ChannelManager {
|
||||
config: ChannelConfig,
|
||||
client_tls_config: Option<ClientTlsConfig>,
|
||||
pool: Arc<Pool>,
|
||||
channel_recycle_started: bool,
|
||||
channel_recycle_started: Arc<Mutex<bool>>,
|
||||
}
|
||||
|
||||
impl Default for ChannelManager {
|
||||
@@ -54,12 +55,13 @@ impl ChannelManager {
|
||||
config,
|
||||
client_tls_config: None,
|
||||
pool,
|
||||
channel_recycle_started: false,
|
||||
channel_recycle_started: Arc::new(Mutex::new(false)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn start_channel_recycle(&mut self) {
|
||||
if self.channel_recycle_started {
|
||||
pub fn start_channel_recycle(&self) {
|
||||
let mut started = self.channel_recycle_started.lock().unwrap();
|
||||
if *started {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -69,7 +71,7 @@ impl ChannelManager {
|
||||
});
|
||||
info!("Channel recycle is started, running in the background!");
|
||||
|
||||
self.channel_recycle_started = true;
|
||||
*started = true;
|
||||
}
|
||||
|
||||
pub fn with_tls_config(config: ChannelConfig) -> Result<Self> {
|
||||
@@ -235,7 +237,7 @@ pub struct ChannelConfig {
|
||||
impl Default for ChannelConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
timeout: Some(Duration::from_secs(2)),
|
||||
timeout: Some(Duration::from_secs(DEFAULT_REQUEST_TIMEOUT_SECS)),
|
||||
connect_timeout: Some(Duration::from_secs(4)),
|
||||
concurrency_limit: None,
|
||||
rate_limit: None,
|
||||
@@ -496,7 +498,7 @@ mod tests {
|
||||
let default_cfg = ChannelConfig::new();
|
||||
assert_eq!(
|
||||
ChannelConfig {
|
||||
timeout: Some(Duration::from_secs(2)),
|
||||
timeout: Some(Duration::from_secs(DEFAULT_REQUEST_TIMEOUT_SECS)),
|
||||
connect_timeout: Some(Duration::from_secs(4)),
|
||||
concurrency_limit: None,
|
||||
rate_limit: None,
|
||||
|
||||
@@ -32,9 +32,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Missing required field in protobuf, field: {}", field))]
|
||||
MissingField { field: String, location: Location },
|
||||
|
||||
#[snafu(display(
|
||||
"Write type mismatch, column name: {}, expected: {}, actual: {}",
|
||||
column_name,
|
||||
@@ -63,12 +60,6 @@ pub enum Error {
|
||||
#[snafu(display("Failed to convert Arrow type: {}", from))]
|
||||
Conversion { from: String, location: Location },
|
||||
|
||||
#[snafu(display("Column datatype error, source: {}", source))]
|
||||
ColumnDataType {
|
||||
#[snafu(backtrace)]
|
||||
source: api::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to decode FlightData, source: {}", source))]
|
||||
DecodeFlightData {
|
||||
source: api::DecodeError,
|
||||
@@ -90,7 +81,6 @@ impl ErrorExt for Error {
|
||||
match self {
|
||||
Error::InvalidTlsConfig { .. }
|
||||
| Error::InvalidConfigFilePath { .. }
|
||||
| Error::MissingField { .. }
|
||||
| Error::TypeMismatch { .. }
|
||||
| Error::InvalidFlightData { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
@@ -99,7 +89,6 @@ impl ErrorExt for Error {
|
||||
| Error::DecodeFlightData { .. } => StatusCode::Internal,
|
||||
|
||||
Error::CreateRecordBatch { source } => source.status_code(),
|
||||
Error::ColumnDataType { source } => source.status_code(),
|
||||
Error::ConvertArrowSchema { source } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,19 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
api = { path = "../../api" }
|
||||
common-catalog = { path = "../catalog" }
|
||||
common-error = { path = "../error" }
|
||||
common-runtime = { path = "../runtime" }
|
||||
common-telemetry = { path = "../telemetry" }
|
||||
common-time = { path = "../time" }
|
||||
serde.workspace = true
|
||||
snafu.workspace = true
|
||||
serde_json.workspace = true
|
||||
snafu.workspace = true
|
||||
store-api = { path = "../../store-api" }
|
||||
table = { path = "../../table" }
|
||||
tokio.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
chrono.workspace = true
|
||||
datatypes = { path = "../../datatypes" }
|
||||
|
||||
77
src/common/meta/src/error.rs
Normal file
77
src/common/meta/src/error.rs
Normal file
@@ -0,0 +1,77 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_error::prelude::*;
|
||||
use serde_json::error::Error as JsonError;
|
||||
use snafu::Location;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum Error {
|
||||
#[snafu(display("Failed to encode object into json, source: {}", source))]
|
||||
EncodeJson {
|
||||
location: Location,
|
||||
source: JsonError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to decode object from json, source: {}", source))]
|
||||
DecodeJson {
|
||||
location: Location,
|
||||
source: JsonError,
|
||||
},
|
||||
|
||||
#[snafu(display("Payload not exist"))]
|
||||
PayloadNotExist { location: Location },
|
||||
|
||||
#[snafu(display("Failed to send message: {err_msg}"))]
|
||||
SendMessage { err_msg: String, location: Location },
|
||||
|
||||
#[snafu(display("Failed to serde json, source: {}", source))]
|
||||
SerdeJson {
|
||||
source: serde_json::error::Error,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Corrupted table route data, err: {}", err_msg))]
|
||||
RouteInfoCorrupted { err_msg: String, location: Location },
|
||||
|
||||
#[snafu(display("Illegal state from server, code: {}, error: {}", code, err_msg))]
|
||||
IllegalServerState {
|
||||
code: i32,
|
||||
err_msg: String,
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
use Error::*;
|
||||
match self {
|
||||
IllegalServerState { .. } => StatusCode::Internal,
|
||||
SerdeJson { .. } | RouteInfoCorrupted { .. } => StatusCode::Unexpected,
|
||||
|
||||
SendMessage { .. } => StatusCode::Internal,
|
||||
|
||||
EncodeJson { .. } | DecodeJson { .. } | PayloadNotExist { .. } => {
|
||||
StatusCode::Unexpected
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
17
src/common/meta/src/heartbeat.rs
Normal file
17
src/common/meta/src/heartbeat.rs
Normal file
@@ -0,0 +1,17 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod handler;
|
||||
pub mod mailbox;
|
||||
pub mod utils;
|
||||
98
src/common/meta/src/heartbeat/handler.rs
Normal file
98
src/common/meta/src/heartbeat/handler.rs
Normal file
@@ -0,0 +1,98 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::meta::HeartbeatResponse;
|
||||
use common_telemetry::error;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::heartbeat::mailbox::{IncomingMessage, MailboxRef};
|
||||
|
||||
pub mod parse_mailbox_message;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
pub type HeartbeatResponseHandlerExecutorRef = Arc<dyn HeartbeatResponseHandlerExecutor>;
|
||||
pub type HeartbeatResponseHandlerRef = Arc<dyn HeartbeatResponseHandler>;
|
||||
|
||||
pub struct HeartbeatResponseHandlerContext {
|
||||
pub mailbox: MailboxRef,
|
||||
pub response: HeartbeatResponse,
|
||||
pub incoming_message: Option<IncomingMessage>,
|
||||
}
|
||||
|
||||
/// HandleControl
|
||||
///
|
||||
/// Controls process of handling heartbeat response.
|
||||
#[derive(PartialEq)]
|
||||
pub enum HandleControl {
|
||||
Continue,
|
||||
Done,
|
||||
}
|
||||
|
||||
impl HeartbeatResponseHandlerContext {
|
||||
pub fn new(mailbox: MailboxRef, response: HeartbeatResponse) -> Self {
|
||||
Self {
|
||||
mailbox,
|
||||
response,
|
||||
incoming_message: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// HeartbeatResponseHandler
|
||||
///
|
||||
/// [`HeartbeatResponseHandler::is_acceptable`] returns true if handler can handle incoming [`HeartbeatResponseHandlerContext`].
|
||||
///
|
||||
/// [`HeartbeatResponseHandler::handle`] handles all or part of incoming [`HeartbeatResponseHandlerContext`].
|
||||
pub trait HeartbeatResponseHandler: Send + Sync {
|
||||
fn is_acceptable(&self, ctx: &HeartbeatResponseHandlerContext) -> bool;
|
||||
|
||||
fn handle(&self, ctx: &mut HeartbeatResponseHandlerContext) -> Result<HandleControl>;
|
||||
}
|
||||
|
||||
pub trait HeartbeatResponseHandlerExecutor: Send + Sync {
|
||||
fn handle(&self, ctx: HeartbeatResponseHandlerContext) -> Result<()>;
|
||||
}
|
||||
|
||||
pub struct HandlerGroupExecutor {
|
||||
handlers: Vec<HeartbeatResponseHandlerRef>,
|
||||
}
|
||||
|
||||
impl HandlerGroupExecutor {
|
||||
pub fn new(handlers: Vec<HeartbeatResponseHandlerRef>) -> Self {
|
||||
Self { handlers }
|
||||
}
|
||||
}
|
||||
|
||||
impl HeartbeatResponseHandlerExecutor for HandlerGroupExecutor {
|
||||
fn handle(&self, mut ctx: HeartbeatResponseHandlerContext) -> Result<()> {
|
||||
for handler in &self.handlers {
|
||||
if !handler.is_acceptable(&ctx) {
|
||||
continue;
|
||||
}
|
||||
|
||||
match handler.handle(&mut ctx) {
|
||||
Ok(HandleControl::Done) => break,
|
||||
Ok(HandleControl::Continue) => {}
|
||||
Err(e) => {
|
||||
error!(e;"Error while handling: {:?}", ctx.response);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -13,7 +13,9 @@
|
||||
// limitations under the License.
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::heartbeat::handler::{HeartbeatResponseHandler, HeartbeatResponseHandlerContext};
|
||||
use crate::heartbeat::handler::{
|
||||
HandleControl, HeartbeatResponseHandler, HeartbeatResponseHandlerContext,
|
||||
};
|
||||
use crate::heartbeat::utils::mailbox_message_to_incoming_message;
|
||||
|
||||
#[derive(Default)]
|
||||
@@ -24,7 +26,7 @@ impl HeartbeatResponseHandler for ParseMailboxMessageHandler {
|
||||
true
|
||||
}
|
||||
|
||||
fn handle(&self, ctx: &mut HeartbeatResponseHandlerContext) -> Result<()> {
|
||||
fn handle(&self, ctx: &mut HeartbeatResponseHandlerContext) -> Result<HandleControl> {
|
||||
if let Some(message) = &ctx.response.mailbox_message {
|
||||
if message.payload.is_some() {
|
||||
// mailbox_message_to_incoming_message will raise an error if payload is none
|
||||
@@ -32,6 +34,6 @@ impl HeartbeatResponseHandler for ParseMailboxMessageHandler {
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
Ok(HandleControl::Continue)
|
||||
}
|
||||
}
|
||||
@@ -12,10 +12,10 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_meta::instruction::{InstructionReply, SimpleReply};
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use crate::heartbeat::mailbox::{HeartbeatMailbox, MessageMeta};
|
||||
use crate::instruction::{InstructionReply, SimpleReply};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_heartbeat_mailbox() {
|
||||
@@ -14,10 +14,10 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_meta::instruction::{Instruction, InstructionReply};
|
||||
use tokio::sync::mpsc::Sender;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::instruction::{Instruction, InstructionReply};
|
||||
|
||||
pub type IncomingMessage = (MessageMeta, Instruction);
|
||||
pub type OutgoingMessage = (MessageMeta, InstructionReply);
|
||||
@@ -14,12 +14,12 @@
|
||||
|
||||
use api::v1::meta::mailbox_message::Payload;
|
||||
use api::v1::meta::MailboxMessage;
|
||||
use common_meta::instruction::Instruction;
|
||||
use common_time::util::current_time_millis;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::heartbeat::mailbox::{IncomingMessage, MessageMeta, OutgoingMessage};
|
||||
use crate::instruction::Instruction;
|
||||
|
||||
pub fn mailbox_message_to_incoming_message(m: MailboxMessage) -> Result<IncomingMessage> {
|
||||
m.payload
|
||||
@@ -12,16 +12,60 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
use crate::{ClusterId, DatanodeId};
|
||||
|
||||
#[derive(Eq, Hash, PartialEq, Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct RegionIdent {
|
||||
pub cluster_id: ClusterId,
|
||||
pub datanode_id: DatanodeId,
|
||||
pub table_ident: TableIdent,
|
||||
pub region_number: u32,
|
||||
}
|
||||
|
||||
impl Display for RegionIdent {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"RegionIdent(datanode_id='{}.{}', table_id='{}', table_name='{}.{}.{}', table_engine='{}', region_no='{}')",
|
||||
self.cluster_id,
|
||||
self.datanode_id,
|
||||
self.table_ident.table_id,
|
||||
self.table_ident.catalog,
|
||||
self.table_ident.schema,
|
||||
self.table_ident.table,
|
||||
self.table_ident.engine,
|
||||
self.region_number
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<RegionIdent> for TableIdent {
|
||||
fn from(region_ident: RegionIdent) -> Self {
|
||||
region_ident.table_ident
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Eq, Hash, PartialEq, Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct TableIdent {
|
||||
pub catalog: String,
|
||||
pub schema: String,
|
||||
pub table: String,
|
||||
pub table_id: u32,
|
||||
pub engine: String,
|
||||
pub region_number: u32,
|
||||
}
|
||||
|
||||
impl Display for TableIdent {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"TableIdent(table_id='{}', table_name='{}.{}.{}', table_engine='{}')",
|
||||
self.table_id, self.catalog, self.schema, self.table, self.engine,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
|
||||
@@ -30,11 +74,28 @@ pub struct SimpleReply {
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
impl Display for SimpleReply {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "(result={}, error={:?})", self.result, self.error)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum Instruction {
|
||||
OpenRegion(RegionIdent),
|
||||
CloseRegion(RegionIdent),
|
||||
InvalidateTableCache(TableIdent),
|
||||
}
|
||||
|
||||
impl Display for Instruction {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::OpenRegion(region) => write!(f, "Instruction::OpenRegion({})", region),
|
||||
Self::CloseRegion(region) => write!(f, "Instruction::CloseRegion({})", region),
|
||||
Self::InvalidateTableCache(table) => write!(f, "Instruction::Invalidate({})", table),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
|
||||
@@ -42,6 +103,19 @@ pub enum Instruction {
|
||||
pub enum InstructionReply {
|
||||
OpenRegion(SimpleReply),
|
||||
CloseRegion(SimpleReply),
|
||||
InvalidateTableCache(SimpleReply),
|
||||
}
|
||||
|
||||
impl Display for InstructionReply {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::OpenRegion(reply) => write!(f, "InstructionReply::OpenRegion({})", reply),
|
||||
Self::CloseRegion(reply) => write!(f, "InstructionReply::CloseRegion({})", reply),
|
||||
Self::InvalidateTableCache(reply) => {
|
||||
write!(f, "InstructionReply::Invalidate({})", reply)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -51,34 +125,42 @@ mod tests {
|
||||
#[test]
|
||||
fn test_serialize_instruction() {
|
||||
let open_region = Instruction::OpenRegion(RegionIdent {
|
||||
catalog: "foo".to_string(),
|
||||
schema: "bar".to_string(),
|
||||
table: "hi".to_string(),
|
||||
table_id: 1024,
|
||||
engine: "mito".to_string(),
|
||||
cluster_id: 1,
|
||||
datanode_id: 2,
|
||||
table_ident: TableIdent {
|
||||
catalog: "foo".to_string(),
|
||||
schema: "bar".to_string(),
|
||||
table: "hi".to_string(),
|
||||
table_id: 1024,
|
||||
engine: "mito".to_string(),
|
||||
},
|
||||
region_number: 1,
|
||||
});
|
||||
|
||||
let serialized = serde_json::to_string(&open_region).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
r#"{"type":"open_region","catalog":"foo","schema":"bar","table":"hi","table_id":1024,"engine":"mito","region_number":1}"#,
|
||||
r#"{"type":"open_region","cluster_id":1,"datanode_id":2,"table_ident":{"catalog":"foo","schema":"bar","table":"hi","table_id":1024,"engine":"mito"},"region_number":1}"#,
|
||||
serialized
|
||||
);
|
||||
|
||||
let close_region = Instruction::CloseRegion(RegionIdent {
|
||||
catalog: "foo".to_string(),
|
||||
schema: "bar".to_string(),
|
||||
table: "hi".to_string(),
|
||||
table_id: 1024,
|
||||
engine: "mito".to_string(),
|
||||
cluster_id: 1,
|
||||
datanode_id: 2,
|
||||
table_ident: TableIdent {
|
||||
catalog: "foo".to_string(),
|
||||
schema: "bar".to_string(),
|
||||
table: "hi".to_string(),
|
||||
table_id: 1024,
|
||||
engine: "mito".to_string(),
|
||||
},
|
||||
region_number: 1,
|
||||
});
|
||||
|
||||
let serialized = serde_json::to_string(&close_region).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
r#"{"type":"close_region","catalog":"foo","schema":"bar","table":"hi","table_id":1024,"engine":"mito","region_number":1}"#,
|
||||
r#"{"type":"close_region","cluster_id":1,"datanode_id":2,"table_ident":{"catalog":"foo","schema":"bar","table":"hi","table_id":1024,"engine":"mito"},"region_number":1}"#,
|
||||
serialized
|
||||
);
|
||||
}
|
||||
|
||||
35
src/common/meta/src/key.rs
Normal file
35
src/common/meta/src/key.rs
Normal file
@@ -0,0 +1,35 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod table_route;
|
||||
|
||||
pub use crate::key::table_route::{TableRouteKey, TABLE_ROUTE_PREFIX};
|
||||
|
||||
pub const REMOVED_PREFIX: &str = "__removed";
|
||||
|
||||
pub fn to_removed_key(key: &str) -> String {
|
||||
format!("{REMOVED_PREFIX}-{key}")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::key::to_removed_key;
|
||||
|
||||
#[test]
|
||||
fn test_to_removed_key() {
|
||||
let key = "test_key";
|
||||
let removed = "__removed-test_key";
|
||||
assert_eq!(removed, to_removed_key(key));
|
||||
}
|
||||
}
|
||||
97
src/common/meta/src/key/table_route.rs
Normal file
97
src/common/meta/src/key/table_route.rs
Normal file
@@ -0,0 +1,97 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::meta::TableName;
|
||||
|
||||
use crate::key::to_removed_key;
|
||||
|
||||
pub const TABLE_ROUTE_PREFIX: &str = "__meta_table_route";
|
||||
|
||||
pub struct TableRouteKey<'a> {
|
||||
pub table_id: u64,
|
||||
pub catalog_name: &'a str,
|
||||
pub schema_name: &'a str,
|
||||
pub table_name: &'a str,
|
||||
}
|
||||
|
||||
impl<'a> TableRouteKey<'a> {
|
||||
pub fn with_table_name(table_id: u64, t: &'a TableName) -> Self {
|
||||
Self {
|
||||
table_id,
|
||||
catalog_name: &t.catalog_name,
|
||||
schema_name: &t.schema_name,
|
||||
table_name: &t.table_name,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn prefix(&self) -> String {
|
||||
format!(
|
||||
"{}-{}-{}-{}",
|
||||
TABLE_ROUTE_PREFIX, self.catalog_name, self.schema_name, self.table_name
|
||||
)
|
||||
}
|
||||
|
||||
pub fn key(&self) -> String {
|
||||
format!("{}-{}", self.prefix(), self.table_id)
|
||||
}
|
||||
|
||||
pub fn removed_key(&self) -> String {
|
||||
to_removed_key(&self.key())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use api::v1::meta::TableName;
|
||||
|
||||
use super::TableRouteKey;
|
||||
|
||||
#[test]
|
||||
fn test_table_route_key() {
|
||||
let key = TableRouteKey {
|
||||
table_id: 123,
|
||||
catalog_name: "greptime",
|
||||
schema_name: "public",
|
||||
table_name: "demo",
|
||||
};
|
||||
|
||||
let prefix = key.prefix();
|
||||
assert_eq!("__meta_table_route-greptime-public-demo", prefix);
|
||||
|
||||
let key_string = key.key();
|
||||
assert_eq!("__meta_table_route-greptime-public-demo-123", key_string);
|
||||
|
||||
let removed = key.removed_key();
|
||||
assert_eq!(
|
||||
"__removed-__meta_table_route-greptime-public-demo-123",
|
||||
removed
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_with_table_name() {
|
||||
let table_name = TableName {
|
||||
catalog_name: "greptime".to_string(),
|
||||
schema_name: "public".to_string(),
|
||||
table_name: "demo".to_string(),
|
||||
};
|
||||
|
||||
let key = TableRouteKey::with_table_name(123, &table_name);
|
||||
|
||||
assert_eq!(123, key.table_id);
|
||||
assert_eq!("greptime", key.catalog_name);
|
||||
assert_eq!("public", key.schema_name);
|
||||
assert_eq!("demo", key.table_name);
|
||||
}
|
||||
}
|
||||
@@ -12,4 +12,15 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod error;
|
||||
pub mod heartbeat;
|
||||
pub mod instruction;
|
||||
pub mod key;
|
||||
pub mod peer;
|
||||
pub mod rpc;
|
||||
pub mod table_name;
|
||||
|
||||
pub type ClusterId = u64;
|
||||
pub type DatanodeId = u64;
|
||||
|
||||
pub use instruction::RegionIdent;
|
||||
|
||||
58
src/common/meta/src/peer.rs
Normal file
58
src/common/meta/src/peer.rs
Normal file
@@ -0,0 +1,58 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
use api::v1::meta::Peer as PbPeer;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Hash, Eq, PartialEq, Deserialize, Serialize)]
|
||||
pub struct Peer {
|
||||
/// Node identifier. Unique in a cluster.
|
||||
pub id: u64,
|
||||
pub addr: String,
|
||||
}
|
||||
|
||||
impl From<PbPeer> for Peer {
|
||||
fn from(p: PbPeer) -> Self {
|
||||
Self {
|
||||
id: p.id,
|
||||
addr: p.addr,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Peer> for PbPeer {
|
||||
fn from(p: Peer) -> Self {
|
||||
Self {
|
||||
id: p.id,
|
||||
addr: p.addr,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Peer {
|
||||
pub fn new(id: u64, addr: impl Into<String>) -> Self {
|
||||
Self {
|
||||
id,
|
||||
addr: addr.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Peer {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "peer-{}({})", self.id, self.addr)
|
||||
}
|
||||
}
|
||||
@@ -14,25 +14,10 @@
|
||||
|
||||
pub mod lock;
|
||||
pub mod router;
|
||||
mod store;
|
||||
pub mod store;
|
||||
pub mod util;
|
||||
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
use api::v1::meta::{
|
||||
KeyValue as PbKeyValue, Peer as PbPeer, ResponseHeader as PbResponseHeader,
|
||||
TableName as PbTableName,
|
||||
};
|
||||
pub use router::{
|
||||
CreateRequest, Partition, Region, RouteRequest, RouteResponse, Table, TableRoute,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
pub use store::{
|
||||
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
|
||||
BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse, DeleteRangeRequest,
|
||||
DeleteRangeResponse, MoveValueRequest, MoveValueResponse, PutRequest, PutResponse,
|
||||
RangeRequest, RangeResponse,
|
||||
};
|
||||
use api::v1::meta::{KeyValue as PbKeyValue, ResponseHeader as PbResponseHeader};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ResponseHeader(PbResponseHeader);
|
||||
@@ -100,81 +85,6 @@ impl KeyValue {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Hash, Eq, PartialEq, Deserialize, Serialize)]
|
||||
pub struct TableName {
|
||||
pub catalog_name: String,
|
||||
pub schema_name: String,
|
||||
pub table_name: String,
|
||||
}
|
||||
|
||||
impl Display for TableName {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{}.{}.{}",
|
||||
self.catalog_name, self.schema_name, self.table_name
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl TableName {
|
||||
pub fn new(
|
||||
catalog_name: impl Into<String>,
|
||||
schema_name: impl Into<String>,
|
||||
table_name: impl Into<String>,
|
||||
) -> Self {
|
||||
Self {
|
||||
catalog_name: catalog_name.into(),
|
||||
schema_name: schema_name.into(),
|
||||
table_name: table_name.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TableName> for PbTableName {
|
||||
fn from(tb: TableName) -> Self {
|
||||
Self {
|
||||
catalog_name: tb.catalog_name,
|
||||
schema_name: tb.schema_name,
|
||||
table_name: tb.table_name,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PbTableName> for TableName {
|
||||
fn from(tb: PbTableName) -> Self {
|
||||
Self {
|
||||
catalog_name: tb.catalog_name,
|
||||
schema_name: tb.schema_name,
|
||||
table_name: tb.table_name,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Hash, Eq, PartialEq, Deserialize, Serialize)]
|
||||
pub struct Peer {
|
||||
pub id: u64,
|
||||
pub addr: String,
|
||||
}
|
||||
|
||||
impl From<PbPeer> for Peer {
|
||||
fn from(p: PbPeer) -> Self {
|
||||
Self {
|
||||
id: p.id,
|
||||
addr: p.addr,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Peer {
|
||||
pub fn new(id: u64, addr: impl Into<String>) -> Self {
|
||||
Self {
|
||||
id,
|
||||
addr: addr.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use api::v1::meta::{Error, ResponseHeader as PbResponseHeader};
|
||||
@@ -16,16 +16,19 @@ use std::collections::{HashMap, HashSet};
|
||||
|
||||
use api::v1::meta::{
|
||||
CreateRequest as PbCreateRequest, DeleteRequest as PbDeleteRequest, Partition as PbPartition,
|
||||
Region as PbRegion, RouteRequest as PbRouteRequest, RouteResponse as PbRouteResponse,
|
||||
Table as PbTable,
|
||||
Peer as PbPeer, Region as PbRegion, RegionRoute as PbRegionRoute,
|
||||
RouteRequest as PbRouteRequest, RouteResponse as PbRouteResponse, Table as PbTable,
|
||||
TableRoute as PbTableRoute,
|
||||
};
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{RegionId, RegionNumber};
|
||||
use table::metadata::RawTableInfo;
|
||||
|
||||
use crate::error;
|
||||
use crate::error::Result;
|
||||
use crate::rpc::{util, Peer, TableName};
|
||||
use crate::error::{self, Result};
|
||||
use crate::peer::Peer;
|
||||
use crate::rpc::util;
|
||||
use crate::table_name::TableName;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CreateRequest<'a> {
|
||||
@@ -125,57 +128,135 @@ impl TryFrom<PbRouteResponse> for RouteResponse {
|
||||
fn try_from(pb: PbRouteResponse) -> Result<Self> {
|
||||
util::check_response_header(pb.header.as_ref())?;
|
||||
|
||||
let peers: Vec<Peer> = pb.peers.into_iter().map(Into::into).collect();
|
||||
let get_peer = |index: u64| peers.get(index as usize).map(ToOwned::to_owned);
|
||||
let mut table_routes = Vec::with_capacity(pb.table_routes.len());
|
||||
for table_route in pb.table_routes.into_iter() {
|
||||
let table = table_route
|
||||
.table
|
||||
.context(error::RouteInfoCorruptedSnafu {
|
||||
err_msg: "table required",
|
||||
})?
|
||||
.try_into()?;
|
||||
|
||||
let mut region_routes = Vec::with_capacity(table_route.region_routes.len());
|
||||
for region_route in table_route.region_routes.into_iter() {
|
||||
let region = region_route
|
||||
.region
|
||||
.context(error::RouteInfoCorruptedSnafu {
|
||||
err_msg: "'region' not found",
|
||||
})?
|
||||
.into();
|
||||
|
||||
let leader_peer = get_peer(region_route.leader_peer_index);
|
||||
let follower_peers = region_route
|
||||
.follower_peer_indexes
|
||||
.into_iter()
|
||||
.filter_map(get_peer)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
region_routes.push(RegionRoute {
|
||||
region,
|
||||
leader_peer,
|
||||
follower_peers,
|
||||
});
|
||||
}
|
||||
|
||||
table_routes.push(TableRoute {
|
||||
table,
|
||||
region_routes,
|
||||
});
|
||||
}
|
||||
|
||||
let table_routes = pb
|
||||
.table_routes
|
||||
.into_iter()
|
||||
.map(|x| TableRoute::try_from_raw(&pb.peers, x))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
Ok(Self { table_routes })
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
|
||||
pub struct TableRoute {
|
||||
pub table: Table,
|
||||
pub region_routes: Vec<RegionRoute>,
|
||||
region_leaders: HashMap<RegionNumber, Option<Peer>>,
|
||||
}
|
||||
|
||||
impl TableRoute {
|
||||
pub fn new(table: Table, region_routes: Vec<RegionRoute>) -> Self {
|
||||
let region_leaders = region_routes
|
||||
.iter()
|
||||
.map(|x| (x.region.id as RegionNumber, x.leader_peer.clone()))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
Self {
|
||||
table,
|
||||
region_routes,
|
||||
region_leaders,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_raw(peers: &[PbPeer], table_route: PbTableRoute) -> Result<Self> {
|
||||
let table = table_route
|
||||
.table
|
||||
.context(error::RouteInfoCorruptedSnafu {
|
||||
err_msg: "'table' is empty in table route",
|
||||
})?
|
||||
.try_into()?;
|
||||
|
||||
let mut region_routes = Vec::with_capacity(table_route.region_routes.len());
|
||||
for region_route in table_route.region_routes.into_iter() {
|
||||
let region = region_route
|
||||
.region
|
||||
.context(error::RouteInfoCorruptedSnafu {
|
||||
err_msg: "'region' is empty in region route",
|
||||
})?
|
||||
.into();
|
||||
|
||||
let leader_peer = peers
|
||||
.get(region_route.leader_peer_index as usize)
|
||||
.cloned()
|
||||
.map(Into::into);
|
||||
|
||||
let follower_peers = region_route
|
||||
.follower_peer_indexes
|
||||
.into_iter()
|
||||
.filter_map(|x| peers.get(x as usize).cloned().map(Into::into))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
region_routes.push(RegionRoute {
|
||||
region,
|
||||
leader_peer,
|
||||
follower_peers,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(Self::new(table, region_routes))
|
||||
}
|
||||
|
||||
pub fn try_into_raw(self) -> Result<(Vec<PbPeer>, PbTableRoute)> {
|
||||
let mut peers = HashSet::new();
|
||||
self.region_routes
|
||||
.iter()
|
||||
.filter_map(|x| x.leader_peer.as_ref())
|
||||
.for_each(|p| {
|
||||
peers.insert(p.clone());
|
||||
});
|
||||
self.region_routes
|
||||
.iter()
|
||||
.flat_map(|x| x.follower_peers.iter())
|
||||
.for_each(|p| {
|
||||
peers.insert(p.clone());
|
||||
});
|
||||
let mut peers = peers.into_iter().map(Into::into).collect::<Vec<PbPeer>>();
|
||||
peers.sort_by_key(|x| x.id);
|
||||
|
||||
let find_peer = |peer_id: u64| -> u64 {
|
||||
peers
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find_map(|(i, x)| {
|
||||
if x.id == peer_id {
|
||||
Some(i as u64)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.unwrap_or_else(|| {
|
||||
panic!("Peer {peer_id} must be present when collecting all peers.")
|
||||
})
|
||||
};
|
||||
|
||||
let mut region_routes = Vec::with_capacity(self.region_routes.len());
|
||||
for region_route in self.region_routes.into_iter() {
|
||||
let leader_peer_index = region_route.leader_peer.map(|x| find_peer(x.id)).context(
|
||||
error::RouteInfoCorruptedSnafu {
|
||||
err_msg: "'leader_peer' is empty in region route",
|
||||
},
|
||||
)?;
|
||||
|
||||
let follower_peer_indexes = region_route
|
||||
.follower_peers
|
||||
.iter()
|
||||
.map(|x| find_peer(x.id))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
region_routes.push(PbRegionRoute {
|
||||
region: Some(region_route.region.into()),
|
||||
leader_peer_index,
|
||||
follower_peer_indexes,
|
||||
});
|
||||
}
|
||||
|
||||
let table_route = PbTableRoute {
|
||||
table: Some(self.table.into()),
|
||||
region_routes,
|
||||
};
|
||||
Ok((peers, table_route))
|
||||
}
|
||||
|
||||
pub fn find_leaders(&self) -> HashSet<Peer> {
|
||||
self.region_routes
|
||||
.iter()
|
||||
@@ -197,9 +278,15 @@ impl TableRoute {
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn find_region_leader(&self, region_number: RegionNumber) -> Option<&Peer> {
|
||||
self.region_leaders
|
||||
.get(®ion_number)
|
||||
.and_then(|x| x.as_ref())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
|
||||
pub struct Table {
|
||||
pub id: u64,
|
||||
pub table_name: TableName,
|
||||
@@ -225,16 +312,26 @@ impl TryFrom<PbTable> for Table {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
|
||||
impl From<Table> for PbTable {
|
||||
fn from(table: Table) -> Self {
|
||||
PbTable {
|
||||
id: table.id,
|
||||
table_name: Some(table.table_name.into()),
|
||||
table_schema: table.table_schema,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
|
||||
pub struct RegionRoute {
|
||||
pub region: Region,
|
||||
pub leader_peer: Option<Peer>,
|
||||
pub follower_peers: Vec<Peer>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
|
||||
#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
|
||||
pub struct Region {
|
||||
pub id: u64,
|
||||
pub id: RegionId,
|
||||
pub name: String,
|
||||
pub partition: Option<Partition>,
|
||||
pub attrs: HashMap<String, String>,
|
||||
@@ -251,7 +348,18 @@ impl From<PbRegion> for Region {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
impl From<Region> for PbRegion {
|
||||
fn from(region: Region) -> Self {
|
||||
Self {
|
||||
id: region.id,
|
||||
name: region.name,
|
||||
partition: region.partition.map(Into::into),
|
||||
attrs: region.attrs,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
|
||||
pub struct Partition {
|
||||
#[serde(serialize_with = "as_utf8_vec")]
|
||||
pub column_list: Vec<Vec<u8>>,
|
||||
@@ -495,4 +603,101 @@ mod tests {
|
||||
assert_eq!(2, region_route.follower_peers.get(0).unwrap().id);
|
||||
assert_eq!("peer2", region_route.follower_peers.get(0).unwrap().addr);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_table_route_raw_conversion() {
|
||||
let raw_peers = vec![
|
||||
PbPeer {
|
||||
id: 1,
|
||||
addr: "a1".to_string(),
|
||||
},
|
||||
PbPeer {
|
||||
id: 2,
|
||||
addr: "a2".to_string(),
|
||||
},
|
||||
PbPeer {
|
||||
id: 3,
|
||||
addr: "a3".to_string(),
|
||||
},
|
||||
];
|
||||
|
||||
// region distribution:
|
||||
// region id => leader peer id + [follower peer id]
|
||||
// 1 => 2 + [1, 3]
|
||||
// 2 => 1 + [2, 3]
|
||||
|
||||
let raw_table_route = PbTableRoute {
|
||||
table: Some(PbTable {
|
||||
id: 1,
|
||||
table_name: Some(PbTableName {
|
||||
catalog_name: "c1".to_string(),
|
||||
schema_name: "s1".to_string(),
|
||||
table_name: "t1".to_string(),
|
||||
}),
|
||||
table_schema: vec![],
|
||||
}),
|
||||
region_routes: vec![
|
||||
PbRegionRoute {
|
||||
region: Some(PbRegion {
|
||||
id: 1,
|
||||
name: "r1".to_string(),
|
||||
partition: None,
|
||||
attrs: HashMap::new(),
|
||||
}),
|
||||
leader_peer_index: 1,
|
||||
follower_peer_indexes: vec![0, 2],
|
||||
},
|
||||
PbRegionRoute {
|
||||
region: Some(PbRegion {
|
||||
id: 2,
|
||||
name: "r2".to_string(),
|
||||
partition: None,
|
||||
attrs: HashMap::new(),
|
||||
}),
|
||||
leader_peer_index: 0,
|
||||
follower_peer_indexes: vec![1, 2],
|
||||
},
|
||||
],
|
||||
};
|
||||
let table_route = TableRoute {
|
||||
table: Table {
|
||||
id: 1,
|
||||
table_name: TableName::new("c1", "s1", "t1"),
|
||||
table_schema: vec![],
|
||||
},
|
||||
region_routes: vec![
|
||||
RegionRoute {
|
||||
region: Region {
|
||||
id: 1,
|
||||
name: "r1".to_string(),
|
||||
partition: None,
|
||||
attrs: HashMap::new(),
|
||||
},
|
||||
leader_peer: Some(Peer::new(2, "a2")),
|
||||
follower_peers: vec![Peer::new(1, "a1"), Peer::new(3, "a3")],
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region {
|
||||
id: 2,
|
||||
name: "r2".to_string(),
|
||||
partition: None,
|
||||
attrs: HashMap::new(),
|
||||
},
|
||||
leader_peer: Some(Peer::new(1, "a1")),
|
||||
follower_peers: vec![Peer::new(2, "a2"), Peer::new(3, "a3")],
|
||||
},
|
||||
],
|
||||
region_leaders: HashMap::from([
|
||||
(2, Some(Peer::new(1, "a1"))),
|
||||
(1, Some(Peer::new(2, "a2"))),
|
||||
]),
|
||||
};
|
||||
|
||||
let from_raw = TableRoute::try_from_raw(&raw_peers, raw_table_route.clone()).unwrap();
|
||||
assert_eq!(from_raw, table_route);
|
||||
|
||||
let into_raw = table_route.try_into_raw().unwrap();
|
||||
assert_eq!(into_raw.0, raw_peers);
|
||||
assert_eq!(into_raw.1, raw_table_route);
|
||||
}
|
||||
}
|
||||
@@ -18,7 +18,7 @@ use crate::error;
|
||||
use crate::error::Result;
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn check_response_header(header: Option<&ResponseHeader>) -> Result<()> {
|
||||
pub fn check_response_header(header: Option<&ResponseHeader>) -> Result<()> {
|
||||
if let Some(header) = header {
|
||||
if let Some(error) = &header.error {
|
||||
let code = error.code;
|
||||
69
src/common/meta/src/table_name.rs
Normal file
69
src/common/meta/src/table_name.rs
Normal file
@@ -0,0 +1,69 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
use api::v1::meta::TableName as PbTableName;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Hash, Eq, PartialEq, Deserialize, Serialize)]
|
||||
pub struct TableName {
|
||||
pub catalog_name: String,
|
||||
pub schema_name: String,
|
||||
pub table_name: String,
|
||||
}
|
||||
|
||||
impl Display for TableName {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(&common_catalog::format_full_table_name(
|
||||
&self.catalog_name,
|
||||
&self.schema_name,
|
||||
&self.table_name,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl TableName {
|
||||
pub fn new(
|
||||
catalog_name: impl Into<String>,
|
||||
schema_name: impl Into<String>,
|
||||
table_name: impl Into<String>,
|
||||
) -> Self {
|
||||
Self {
|
||||
catalog_name: catalog_name.into(),
|
||||
schema_name: schema_name.into(),
|
||||
table_name: table_name.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TableName> for PbTableName {
|
||||
fn from(table_name: TableName) -> Self {
|
||||
Self {
|
||||
catalog_name: table_name.catalog_name,
|
||||
schema_name: table_name.schema_name,
|
||||
table_name: table_name.table_name,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PbTableName> for TableName {
|
||||
fn from(table_name: PbTableName) -> Self {
|
||||
Self {
|
||||
catalog_name: table_name.catalog_name,
|
||||
schema_name: table_name.schema_name,
|
||||
table_name: table_name.table_name,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -14,20 +14,32 @@
|
||||
|
||||
//! Test utilities for procedures.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_procedure::{
|
||||
BoxedProcedure, Context, ContextProvider, ProcedureId, ProcedureState, Result, Status,
|
||||
Context, ContextProvider, Procedure, ProcedureId, ProcedureState, ProcedureWithId, Result,
|
||||
Status,
|
||||
};
|
||||
|
||||
/// A Mock [ContextProvider] that always return [ProcedureState::Done].
|
||||
struct MockContextProvider {}
|
||||
/// A Mock [ContextProvider].
|
||||
#[derive(Default)]
|
||||
pub struct MockContextProvider {
|
||||
states: HashMap<ProcedureId, ProcedureState>,
|
||||
}
|
||||
|
||||
impl MockContextProvider {
|
||||
/// Returns a new provider.
|
||||
pub fn new(states: HashMap<ProcedureId, ProcedureState>) -> MockContextProvider {
|
||||
MockContextProvider { states }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ContextProvider for MockContextProvider {
|
||||
async fn procedure_state(&self, _procedure_id: ProcedureId) -> Result<Option<ProcedureState>> {
|
||||
Ok(Some(ProcedureState::Done))
|
||||
async fn procedure_state(&self, procedure_id: ProcedureId) -> Result<Option<ProcedureState>> {
|
||||
Ok(self.states.get(&procedure_id).cloned())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,10 +47,10 @@ impl ContextProvider for MockContextProvider {
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the `procedure` has subprocedure to execute.
|
||||
pub async fn execute_procedure_until_done(procedure: &mut BoxedProcedure) {
|
||||
pub async fn execute_procedure_until_done(procedure: &mut dyn Procedure) {
|
||||
let ctx = Context {
|
||||
procedure_id: ProcedureId::random(),
|
||||
provider: Arc::new(MockContextProvider {}),
|
||||
provider: Arc::new(MockContextProvider::default()),
|
||||
};
|
||||
|
||||
loop {
|
||||
@@ -52,3 +64,53 @@ pub async fn execute_procedure_until_done(procedure: &mut BoxedProcedure) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Executes a procedure once.
|
||||
///
|
||||
/// Returns whether the procedure is done.
|
||||
pub async fn execute_procedure_once(
|
||||
procedure_id: ProcedureId,
|
||||
provider: MockContextProvider,
|
||||
procedure: &mut dyn Procedure,
|
||||
) -> bool {
|
||||
let ctx = Context {
|
||||
procedure_id,
|
||||
provider: Arc::new(provider),
|
||||
};
|
||||
|
||||
match procedure.execute(&ctx).await.unwrap() {
|
||||
Status::Executing { .. } => false,
|
||||
Status::Suspended { subprocedures, .. } => {
|
||||
assert!(
|
||||
subprocedures.is_empty(),
|
||||
"Executing subprocedure is unsupported"
|
||||
);
|
||||
false
|
||||
}
|
||||
Status::Done => true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Executes a procedure until it returns [Status::Suspended] or [Status::Done].
|
||||
///
|
||||
/// Returns `Some` if it returns [Status::Suspended] or `None` if it returns [Status::Done].
|
||||
pub async fn execute_until_suspended_or_done(
|
||||
procedure_id: ProcedureId,
|
||||
provider: MockContextProvider,
|
||||
procedure: &mut dyn Procedure,
|
||||
) -> Option<Vec<ProcedureWithId>> {
|
||||
let ctx = Context {
|
||||
procedure_id,
|
||||
provider: Arc::new(provider),
|
||||
};
|
||||
|
||||
loop {
|
||||
match procedure.execute(&ctx).await.unwrap() {
|
||||
Status::Executing { .. } => (),
|
||||
Status::Suspended { subprocedures, .. } => return Some(subprocedures),
|
||||
Status::Done => break,
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
@@ -76,12 +76,6 @@ pub enum Error {
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to read {}, source: {}", key, source))]
|
||||
ReadState {
|
||||
key: String,
|
||||
source: object_store::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to deserialize from json, source: {}", source))]
|
||||
FromJson {
|
||||
source: serde_json::Error,
|
||||
@@ -133,6 +127,13 @@ pub enum Error {
|
||||
source: common_runtime::error::Error,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Subprocedure {} failed, source: {}", subprocedure_id, source))]
|
||||
SubprocedureFailed {
|
||||
subprocedure_id: ProcedureId,
|
||||
source: Arc<Error>,
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -140,14 +141,13 @@ pub type Result<T> = std::result::Result<T, Error>;
|
||||
impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match self {
|
||||
Error::External { source }
|
||||
Error::External { source, .. }
|
||||
| Error::PutState { source, .. }
|
||||
| Error::DeleteStates { source, .. }
|
||||
| Error::ListState { source, .. } => source.status_code(),
|
||||
|
||||
Error::ToJson { .. }
|
||||
| Error::DeleteState { .. }
|
||||
| Error::ReadState { .. }
|
||||
| Error::FromJson { .. }
|
||||
| Error::RetryTimesExceeded { .. }
|
||||
| Error::RetryLater { .. }
|
||||
@@ -159,6 +159,8 @@ impl ErrorExt for Error {
|
||||
Error::ProcedureExec { source, .. } => source.status_code(),
|
||||
Error::StartRemoveOutdatedMetaTask { source, .. }
|
||||
| Error::StopRemoveOutdatedMetaTask { source, .. } => source.status_code(),
|
||||
|
||||
Error::SubprocedureFailed { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -343,6 +343,7 @@ impl ManagerContext {
|
||||
/// Config for [LocalManager].
|
||||
#[derive(Debug)]
|
||||
pub struct ManagerConfig {
|
||||
pub parent_path: String,
|
||||
pub max_retry_times: usize,
|
||||
pub retry_delay: Duration,
|
||||
pub remove_outdated_meta_task_interval: Duration,
|
||||
@@ -352,6 +353,7 @@ pub struct ManagerConfig {
|
||||
impl Default for ManagerConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
parent_path: "".to_string(),
|
||||
max_retry_times: 3,
|
||||
retry_delay: Duration::from_millis(500),
|
||||
remove_outdated_meta_task_interval: Duration::from_secs(60 * 10),
|
||||
@@ -363,7 +365,7 @@ impl Default for ManagerConfig {
|
||||
/// A [ProcedureManager] that maintains procedure states locally.
|
||||
pub struct LocalManager {
|
||||
manager_ctx: Arc<ManagerContext>,
|
||||
state_store: StateStoreRef,
|
||||
procedure_store: Arc<ProcedureStore>,
|
||||
max_retry_times: usize,
|
||||
retry_delay: Duration,
|
||||
remove_outdated_meta_task: RepeatedTask<Error>,
|
||||
@@ -382,7 +384,7 @@ impl LocalManager {
|
||||
);
|
||||
LocalManager {
|
||||
manager_ctx,
|
||||
state_store,
|
||||
procedure_store: Arc::new(ProcedureStore::new(&config.parent_path, state_store)),
|
||||
max_retry_times: config.max_retry_times,
|
||||
retry_delay: config.retry_delay,
|
||||
remove_outdated_meta_task,
|
||||
@@ -405,7 +407,7 @@ impl LocalManager {
|
||||
exponential_builder: ExponentialBuilder::default()
|
||||
.with_min_delay(self.retry_delay)
|
||||
.with_max_times(self.max_retry_times),
|
||||
store: ProcedureStore::new(self.state_store.clone()),
|
||||
store: self.procedure_store.clone(),
|
||||
rolling_back: false,
|
||||
};
|
||||
|
||||
@@ -466,8 +468,7 @@ impl ProcedureManager for LocalManager {
|
||||
logging::info!("LocalManager start to recover");
|
||||
let recover_start = Instant::now();
|
||||
|
||||
let procedure_store = ProcedureStore::new(self.state_store.clone());
|
||||
let (messages, finished_ids) = procedure_store.load_messages().await?;
|
||||
let (messages, finished_ids) = self.procedure_store.load_messages().await?;
|
||||
|
||||
for (procedure_id, message) in &messages {
|
||||
if message.parent_id.is_none() {
|
||||
@@ -502,7 +503,7 @@ impl ProcedureManager for LocalManager {
|
||||
);
|
||||
|
||||
for procedure_id in finished_ids {
|
||||
if let Err(e) = procedure_store.delete_procedure(procedure_id).await {
|
||||
if let Err(e) = self.procedure_store.delete_procedure(procedure_id).await {
|
||||
logging::error!(e; "Failed to delete procedure {}", procedure_id);
|
||||
}
|
||||
}
|
||||
@@ -571,7 +572,7 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
use crate::store::ObjectStateStore;
|
||||
use crate::store::state_store::ObjectStateStore;
|
||||
use crate::{Context, Procedure, Status};
|
||||
|
||||
#[test]
|
||||
@@ -680,6 +681,7 @@ mod tests {
|
||||
fn test_register_loader() {
|
||||
let dir = create_temp_dir("register");
|
||||
let config = ManagerConfig {
|
||||
parent_path: "data/".to_string(),
|
||||
max_retry_times: 3,
|
||||
retry_delay: Duration::from_millis(500),
|
||||
..Default::default()
|
||||
@@ -702,6 +704,7 @@ mod tests {
|
||||
let dir = create_temp_dir("recover");
|
||||
let object_store = test_util::new_object_store(&dir);
|
||||
let config = ManagerConfig {
|
||||
parent_path: "data/".to_string(),
|
||||
max_retry_times: 3,
|
||||
retry_delay: Duration::from_millis(500),
|
||||
..Default::default()
|
||||
@@ -714,7 +717,7 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
// Prepare data
|
||||
let procedure_store = ProcedureStore::from(object_store.clone());
|
||||
let procedure_store = ProcedureStore::from_object_store(object_store.clone());
|
||||
let root: BoxedProcedure = Box::new(ProcedureToLoad::new("test recover manager"));
|
||||
let root_id = ProcedureId::random();
|
||||
// Prepare data for the root procedure.
|
||||
@@ -749,6 +752,7 @@ mod tests {
|
||||
async fn test_submit_procedure() {
|
||||
let dir = create_temp_dir("submit");
|
||||
let config = ManagerConfig {
|
||||
parent_path: "data/".to_string(),
|
||||
max_retry_times: 3,
|
||||
retry_delay: Duration::from_millis(500),
|
||||
..Default::default()
|
||||
@@ -798,6 +802,7 @@ mod tests {
|
||||
async fn test_state_changed_on_err() {
|
||||
let dir = create_temp_dir("on_err");
|
||||
let config = ManagerConfig {
|
||||
parent_path: "data/".to_string(),
|
||||
max_retry_times: 3,
|
||||
retry_delay: Duration::from_millis(500),
|
||||
..Default::default()
|
||||
@@ -860,6 +865,7 @@ mod tests {
|
||||
let dir = create_temp_dir("remove_outdated_meta_task");
|
||||
let object_store = test_util::new_object_store(&dir);
|
||||
let config = ManagerConfig {
|
||||
parent_path: "data/".to_string(),
|
||||
max_retry_times: 3,
|
||||
retry_delay: Duration::from_millis(500),
|
||||
remove_outdated_meta_task_interval: Duration::from_millis(1),
|
||||
|
||||
@@ -109,7 +109,7 @@ pub(crate) struct Runner {
|
||||
pub(crate) manager_ctx: Arc<ManagerContext>,
|
||||
pub(crate) step: u32,
|
||||
pub(crate) exponential_builder: ExponentialBuilder,
|
||||
pub(crate) store: ProcedureStore,
|
||||
pub(crate) store: Arc<ProcedureStore>,
|
||||
pub(crate) rolling_back: bool,
|
||||
}
|
||||
|
||||
@@ -471,7 +471,7 @@ mod tests {
|
||||
fn new_runner(
|
||||
meta: ProcedureMetaRef,
|
||||
procedure: BoxedProcedure,
|
||||
store: ProcedureStore,
|
||||
store: Arc<ProcedureStore>,
|
||||
) -> Runner {
|
||||
Runner {
|
||||
meta,
|
||||
@@ -484,8 +484,13 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
async fn check_files(object_store: &ObjectStore, procedure_id: ProcedureId, files: &[&str]) {
|
||||
let dir = proc_path!("{procedure_id}/");
|
||||
async fn check_files(
|
||||
object_store: &ObjectStore,
|
||||
procedure_store: &ProcedureStore,
|
||||
procedure_id: ProcedureId,
|
||||
files: &[&str],
|
||||
) {
|
||||
let dir = proc_path!(procedure_store, "{procedure_id}/");
|
||||
let lister = object_store.list(&dir).await.unwrap();
|
||||
let mut files_in_dir: Vec<_> = lister
|
||||
.map_ok(|de| de.name().to_string())
|
||||
@@ -578,16 +583,28 @@ mod tests {
|
||||
let meta = normal.new_meta(ROOT_ID);
|
||||
let ctx = context_without_provider(meta.id);
|
||||
let object_store = test_util::new_object_store(&dir);
|
||||
let procedure_store = ProcedureStore::from(object_store.clone());
|
||||
let mut runner = new_runner(meta, Box::new(normal), procedure_store);
|
||||
let procedure_store = Arc::new(ProcedureStore::from_object_store(object_store.clone()));
|
||||
let mut runner = new_runner(meta, Box::new(normal), procedure_store.clone());
|
||||
|
||||
let res = runner.execute_once(&ctx).await;
|
||||
assert!(res.is_continue(), "{res:?}");
|
||||
check_files(&object_store, ctx.procedure_id, first_files).await;
|
||||
check_files(
|
||||
&object_store,
|
||||
&procedure_store,
|
||||
ctx.procedure_id,
|
||||
first_files,
|
||||
)
|
||||
.await;
|
||||
|
||||
let res = runner.execute_once(&ctx).await;
|
||||
assert!(res.is_done(), "{res:?}");
|
||||
check_files(&object_store, ctx.procedure_id, second_files).await;
|
||||
check_files(
|
||||
&object_store,
|
||||
&procedure_store,
|
||||
ctx.procedure_id,
|
||||
second_files,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -626,7 +643,7 @@ mod tests {
|
||||
let meta = suspend.new_meta(ROOT_ID);
|
||||
let ctx = context_without_provider(meta.id);
|
||||
let object_store = test_util::new_object_store(&dir);
|
||||
let procedure_store = ProcedureStore::from(object_store.clone());
|
||||
let procedure_store = Arc::new(ProcedureStore::from_object_store(object_store.clone()));
|
||||
let mut runner = new_runner(meta, Box::new(suspend), procedure_store);
|
||||
|
||||
let res = runner.execute_once(&ctx).await;
|
||||
@@ -726,8 +743,8 @@ mod tests {
|
||||
let procedure_id = meta.id;
|
||||
|
||||
let object_store = test_util::new_object_store(&dir);
|
||||
let procedure_store = ProcedureStore::from(object_store.clone());
|
||||
let mut runner = new_runner(meta.clone(), Box::new(parent), procedure_store);
|
||||
let procedure_store = Arc::new(ProcedureStore::from_object_store(object_store.clone()));
|
||||
let mut runner = new_runner(meta.clone(), Box::new(parent), procedure_store.clone());
|
||||
let manager_ctx = Arc::new(ManagerContext::new());
|
||||
// Manually add this procedure to the manager ctx.
|
||||
assert!(manager_ctx.try_insert_procedure(meta));
|
||||
@@ -744,7 +761,7 @@ mod tests {
|
||||
let state = manager_ctx.state(procedure_id).unwrap();
|
||||
assert!(state.is_done(), "{state:?}");
|
||||
// Files are removed.
|
||||
check_files(&object_store, procedure_id, &[]).await;
|
||||
check_files(&object_store, &procedure_store, procedure_id, &[]).await;
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(5)).await;
|
||||
// Clean outdated meta.
|
||||
@@ -770,13 +787,19 @@ mod tests {
|
||||
let meta = fail.new_meta(ROOT_ID);
|
||||
let ctx = context_without_provider(meta.id);
|
||||
let object_store = test_util::new_object_store(&dir);
|
||||
let procedure_store = ProcedureStore::from(object_store.clone());
|
||||
let mut runner = new_runner(meta.clone(), Box::new(fail), procedure_store);
|
||||
let procedure_store = Arc::new(ProcedureStore::from_object_store(object_store.clone()));
|
||||
let mut runner = new_runner(meta.clone(), Box::new(fail), procedure_store.clone());
|
||||
|
||||
let res = runner.execute_once(&ctx).await;
|
||||
assert!(res.is_failed(), "{res:?}");
|
||||
assert!(meta.state().is_failed());
|
||||
check_files(&object_store, ctx.procedure_id, &["0000000000.rollback"]).await;
|
||||
check_files(
|
||||
&object_store,
|
||||
&procedure_store,
|
||||
ctx.procedure_id,
|
||||
&["0000000000.rollback"],
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -805,8 +828,8 @@ mod tests {
|
||||
let meta = retry_later.new_meta(ROOT_ID);
|
||||
let ctx = context_without_provider(meta.id);
|
||||
let object_store = test_util::new_object_store(&dir);
|
||||
let procedure_store = ProcedureStore::from(object_store.clone());
|
||||
let mut runner = new_runner(meta.clone(), Box::new(retry_later), procedure_store);
|
||||
let procedure_store = Arc::new(ProcedureStore::from_object_store(object_store.clone()));
|
||||
let mut runner = new_runner(meta.clone(), Box::new(retry_later), procedure_store.clone());
|
||||
|
||||
let res = runner.execute_once(&ctx).await;
|
||||
assert!(res.is_retry_later(), "{res:?}");
|
||||
@@ -815,7 +838,13 @@ mod tests {
|
||||
let res = runner.execute_once(&ctx).await;
|
||||
assert!(res.is_done(), "{res:?}");
|
||||
assert!(meta.state().is_done());
|
||||
check_files(&object_store, ctx.procedure_id, &["0000000000.commit"]).await;
|
||||
check_files(
|
||||
&object_store,
|
||||
&procedure_store,
|
||||
ctx.procedure_id,
|
||||
&["0000000000.commit"],
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -832,7 +861,7 @@ mod tests {
|
||||
let dir = create_temp_dir("exceed_max_retry_later");
|
||||
let meta = exceed_max_retry_later.new_meta(ROOT_ID);
|
||||
let object_store = test_util::new_object_store(&dir);
|
||||
let procedure_store = ProcedureStore::from(object_store.clone());
|
||||
let procedure_store = Arc::new(ProcedureStore::from_object_store(object_store.clone()));
|
||||
let mut runner = new_runner(
|
||||
meta.clone(),
|
||||
Box::new(exceed_max_retry_later),
|
||||
@@ -906,7 +935,7 @@ mod tests {
|
||||
let meta = parent.new_meta(ROOT_ID);
|
||||
|
||||
let object_store = test_util::new_object_store(&dir);
|
||||
let procedure_store = ProcedureStore::from(object_store.clone());
|
||||
let procedure_store = Arc::new(ProcedureStore::from_object_store(object_store.clone()));
|
||||
let mut runner = new_runner(meta.clone(), Box::new(parent), procedure_store);
|
||||
|
||||
let manager_ctx = Arc::new(ManagerContext::new());
|
||||
|
||||
@@ -97,6 +97,25 @@ pub trait Procedure: Send + Sync {
|
||||
fn lock_key(&self) -> LockKey;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Procedure + ?Sized> Procedure for Box<T> {
|
||||
fn type_name(&self) -> &str {
|
||||
(**self).type_name()
|
||||
}
|
||||
|
||||
async fn execute(&mut self, ctx: &Context) -> Result<Status> {
|
||||
(**self).execute(ctx).await
|
||||
}
|
||||
|
||||
fn dump(&self) -> Result<String> {
|
||||
(**self).dump()
|
||||
}
|
||||
|
||||
fn lock_key(&self) -> LockKey {
|
||||
(**self).lock_key()
|
||||
}
|
||||
}
|
||||
|
||||
/// Keys to identify required locks.
|
||||
///
|
||||
/// [LockKey] always sorts keys lexicographically so that they can be acquired
|
||||
|
||||
@@ -14,27 +14,25 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::logging;
|
||||
use futures::TryStreamExt;
|
||||
use object_store::ObjectStore;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{Result, ToJsonSnafu};
|
||||
pub(crate) use crate::store::state_store::{ObjectStateStore, StateStoreRef};
|
||||
pub(crate) use crate::store::state_store::StateStoreRef;
|
||||
use crate::{BoxedProcedure, ProcedureId};
|
||||
|
||||
pub mod state_store;
|
||||
|
||||
/// Key prefix of procedure store.
|
||||
pub(crate) const PROC_PATH: &str = "procedure/";
|
||||
const PROC_PATH: &str = "procedure/";
|
||||
|
||||
/// Constructs a path for procedure store.
|
||||
macro_rules! proc_path {
|
||||
($fmt:expr) => { format!("{}{}", $crate::store::PROC_PATH, format_args!($fmt)) };
|
||||
($fmt:expr, $($args:tt)*) => { format!("{}{}", $crate::store::PROC_PATH, format_args!($fmt, $($args)*)) };
|
||||
($store: expr, $fmt:expr) => { format!("{}{}", $store.proc_path(), format_args!($fmt)) };
|
||||
($store: expr, $fmt:expr, $($args:tt)*) => { format!("{}{}", $store.proc_path(), format_args!($fmt, $($args)*)) };
|
||||
}
|
||||
|
||||
pub(crate) use proc_path;
|
||||
@@ -54,13 +52,22 @@ pub struct ProcedureMessage {
|
||||
}
|
||||
|
||||
/// Procedure storage layer.
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct ProcedureStore(StateStoreRef);
|
||||
pub(crate) struct ProcedureStore {
|
||||
proc_path: String,
|
||||
store: StateStoreRef,
|
||||
}
|
||||
|
||||
impl ProcedureStore {
|
||||
/// Creates a new [ProcedureStore] from specific [StateStoreRef].
|
||||
pub(crate) fn new(state_store: StateStoreRef) -> ProcedureStore {
|
||||
ProcedureStore(state_store)
|
||||
pub(crate) fn new(parent_path: &str, store: StateStoreRef) -> ProcedureStore {
|
||||
let proc_path = format!("{}{PROC_PATH}", parent_path);
|
||||
logging::info!("The procedure state store path is: {}", &proc_path);
|
||||
ProcedureStore { proc_path, store }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn proc_path(&self) -> &str {
|
||||
&self.proc_path
|
||||
}
|
||||
|
||||
/// Dump the `procedure` to the storage.
|
||||
@@ -81,6 +88,7 @@ impl ProcedureStore {
|
||||
step,
|
||||
};
|
||||
let key = ParsedKey {
|
||||
prefix: &self.proc_path,
|
||||
procedure_id,
|
||||
step,
|
||||
key_type: KeyType::Step,
|
||||
@@ -88,7 +96,7 @@ impl ProcedureStore {
|
||||
.to_string();
|
||||
let value = serde_json::to_string(&message).context(ToJsonSnafu)?;
|
||||
|
||||
self.0.put(&key, value.into_bytes()).await?;
|
||||
self.store.put(&key, value.into_bytes()).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -100,12 +108,13 @@ impl ProcedureStore {
|
||||
step: u32,
|
||||
) -> Result<()> {
|
||||
let key = ParsedKey {
|
||||
prefix: &self.proc_path,
|
||||
procedure_id,
|
||||
step,
|
||||
key_type: KeyType::Commit,
|
||||
}
|
||||
.to_string();
|
||||
self.0.put(&key, Vec::new()).await?;
|
||||
self.store.put(&key, Vec::new()).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -117,26 +126,27 @@ impl ProcedureStore {
|
||||
step: u32,
|
||||
) -> Result<()> {
|
||||
let key = ParsedKey {
|
||||
prefix: &self.proc_path,
|
||||
procedure_id,
|
||||
step,
|
||||
key_type: KeyType::Rollback,
|
||||
}
|
||||
.to_string();
|
||||
self.0.put(&key, Vec::new()).await?;
|
||||
self.store.put(&key, Vec::new()).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete states of procedure from the storage.
|
||||
pub(crate) async fn delete_procedure(&self, procedure_id: ProcedureId) -> Result<()> {
|
||||
let path = proc_path!("{procedure_id}/");
|
||||
let path = proc_path!(self, "{procedure_id}/");
|
||||
// TODO(yingwen): We can optimize this to avoid reading the value.
|
||||
let mut key_values = self.0.walk_top_down(&path).await?;
|
||||
let mut key_values = self.store.walk_top_down(&path).await?;
|
||||
// 8 should be enough for most procedures.
|
||||
let mut step_keys = Vec::with_capacity(8);
|
||||
let mut finish_keys = Vec::new();
|
||||
while let Some((key, _)) = key_values.try_next().await? {
|
||||
let Some(curr_key) = ParsedKey::parse_str(&key) else {
|
||||
let Some(curr_key) = ParsedKey::parse_str(&self.proc_path, &key) else {
|
||||
logging::warn!("Unknown key while deleting procedures, key: {}", key);
|
||||
continue;
|
||||
};
|
||||
@@ -155,11 +165,11 @@ impl ProcedureStore {
|
||||
finish_keys
|
||||
);
|
||||
// We delete all step keys first.
|
||||
self.0.batch_delete(step_keys.as_slice()).await?;
|
||||
self.store.batch_delete(step_keys.as_slice()).await?;
|
||||
// Then we delete the finish keys, to ensure
|
||||
self.0.batch_delete(finish_keys.as_slice()).await?;
|
||||
self.store.batch_delete(finish_keys.as_slice()).await?;
|
||||
// Finally we remove the directory itself.
|
||||
self.0.delete(&path).await?;
|
||||
self.store.delete(&path).await?;
|
||||
// Maybe we could use procedure_id.commit/rollback as the file name so we could
|
||||
// use remove_all to remove the directory and then remove the commit/rollback file.
|
||||
|
||||
@@ -175,9 +185,9 @@ impl ProcedureStore {
|
||||
let mut procedure_key_values: HashMap<_, (ParsedKey, Vec<u8>)> = HashMap::new();
|
||||
|
||||
// Scan all procedures.
|
||||
let mut key_values = self.0.walk_top_down(PROC_PATH).await?;
|
||||
let mut key_values = self.store.walk_top_down(&self.proc_path).await?;
|
||||
while let Some((key, value)) = key_values.try_next().await? {
|
||||
let Some(curr_key) = ParsedKey::parse_str(&key) else {
|
||||
let Some(curr_key) = ParsedKey::parse_str(&self.proc_path, &key) else {
|
||||
logging::warn!("Unknown key while loading procedures, key: {}", key);
|
||||
continue;
|
||||
};
|
||||
@@ -221,14 +231,6 @@ impl ProcedureStore {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ObjectStore> for ProcedureStore {
|
||||
fn from(store: ObjectStore) -> ProcedureStore {
|
||||
let state_store = ObjectStateStore::new(store);
|
||||
|
||||
ProcedureStore::new(Arc::new(state_store))
|
||||
}
|
||||
}
|
||||
|
||||
/// Suffix type of the key.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
enum KeyType {
|
||||
@@ -258,18 +260,19 @@ impl KeyType {
|
||||
|
||||
/// Key to refer the procedure in the [ProcedureStore].
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
struct ParsedKey {
|
||||
struct ParsedKey<'a> {
|
||||
prefix: &'a str,
|
||||
procedure_id: ProcedureId,
|
||||
step: u32,
|
||||
key_type: KeyType,
|
||||
}
|
||||
|
||||
impl fmt::Display for ParsedKey {
|
||||
impl<'a> fmt::Display for ParsedKey<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{}{}/{:010}.{}",
|
||||
PROC_PATH,
|
||||
self.prefix,
|
||||
self.procedure_id,
|
||||
self.step,
|
||||
self.key_type.as_str(),
|
||||
@@ -277,10 +280,10 @@ impl fmt::Display for ParsedKey {
|
||||
}
|
||||
}
|
||||
|
||||
impl ParsedKey {
|
||||
impl<'a> ParsedKey<'a> {
|
||||
/// Try to parse the key from specific `input`.
|
||||
fn parse_str(input: &str) -> Option<ParsedKey> {
|
||||
let input = input.strip_prefix(PROC_PATH)?;
|
||||
fn parse_str(prefix: &'a str, input: &str) -> Option<ParsedKey<'a>> {
|
||||
let input = input.strip_prefix(prefix)?;
|
||||
let mut iter = input.rsplit('/');
|
||||
let name = iter.next()?;
|
||||
let id_str = iter.next()?;
|
||||
@@ -294,6 +297,7 @@ impl ParsedKey {
|
||||
let step = step_str.parse().ok()?;
|
||||
|
||||
Some(ParsedKey {
|
||||
prefix,
|
||||
procedure_id,
|
||||
step,
|
||||
key_type,
|
||||
@@ -303,6 +307,20 @@ impl ParsedKey {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use object_store::ObjectStore;
|
||||
|
||||
use crate::store::state_store::ObjectStateStore;
|
||||
|
||||
impl ProcedureStore {
|
||||
pub(crate) fn from_object_store(store: ObjectStore) -> ProcedureStore {
|
||||
let state_store = ObjectStateStore::new(store);
|
||||
|
||||
ProcedureStore::new("data/", Arc::new(state_store))
|
||||
}
|
||||
}
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_test_util::temp_dir::{create_temp_dir, TempDir};
|
||||
use object_store::services::Fs as Builder;
|
||||
@@ -316,73 +334,91 @@ mod tests {
|
||||
builder.root(store_dir);
|
||||
let object_store = ObjectStore::new(builder).unwrap().finish();
|
||||
|
||||
ProcedureStore::from(object_store)
|
||||
ProcedureStore::from_object_store(object_store)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsed_key() {
|
||||
let dir = create_temp_dir("store_procedure");
|
||||
let store = procedure_store_for_test(&dir);
|
||||
|
||||
let procedure_id = ProcedureId::random();
|
||||
let key = ParsedKey {
|
||||
prefix: &store.proc_path,
|
||||
procedure_id,
|
||||
step: 2,
|
||||
key_type: KeyType::Step,
|
||||
};
|
||||
assert_eq!(
|
||||
proc_path!("{procedure_id}/0000000002.step"),
|
||||
proc_path!(store, "{procedure_id}/0000000002.step"),
|
||||
key.to_string()
|
||||
);
|
||||
assert_eq!(key, ParsedKey::parse_str(&key.to_string()).unwrap());
|
||||
assert_eq!(
|
||||
key,
|
||||
ParsedKey::parse_str(&store.proc_path, &key.to_string()).unwrap()
|
||||
);
|
||||
|
||||
let key = ParsedKey {
|
||||
prefix: &store.proc_path,
|
||||
procedure_id,
|
||||
step: 2,
|
||||
key_type: KeyType::Commit,
|
||||
};
|
||||
assert_eq!(
|
||||
proc_path!("{procedure_id}/0000000002.commit"),
|
||||
proc_path!(store, "{procedure_id}/0000000002.commit"),
|
||||
key.to_string()
|
||||
);
|
||||
assert_eq!(key, ParsedKey::parse_str(&key.to_string()).unwrap());
|
||||
assert_eq!(
|
||||
key,
|
||||
ParsedKey::parse_str(&store.proc_path, &key.to_string()).unwrap()
|
||||
);
|
||||
|
||||
let key = ParsedKey {
|
||||
prefix: &store.proc_path,
|
||||
procedure_id,
|
||||
step: 2,
|
||||
key_type: KeyType::Rollback,
|
||||
};
|
||||
assert_eq!(
|
||||
proc_path!("{procedure_id}/0000000002.rollback"),
|
||||
proc_path!(store, "{procedure_id}/0000000002.rollback"),
|
||||
key.to_string()
|
||||
);
|
||||
assert_eq!(key, ParsedKey::parse_str(&key.to_string()).unwrap());
|
||||
assert_eq!(
|
||||
key,
|
||||
ParsedKey::parse_str(&store.proc_path, &key.to_string()).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_invalid_key() {
|
||||
assert!(ParsedKey::parse_str("").is_none());
|
||||
assert!(ParsedKey::parse_str("invalidprefix").is_none());
|
||||
assert!(ParsedKey::parse_str("procedu/0000000003.step").is_none());
|
||||
assert!(ParsedKey::parse_str("procedure-0000000003.step").is_none());
|
||||
let dir = create_temp_dir("store_procedure");
|
||||
let store = procedure_store_for_test(&dir);
|
||||
|
||||
assert!(ParsedKey::parse_str(&store.proc_path, "").is_none());
|
||||
assert!(ParsedKey::parse_str(&store.proc_path, "invalidprefix").is_none());
|
||||
assert!(ParsedKey::parse_str(&store.proc_path, "procedu/0000000003.step").is_none());
|
||||
assert!(ParsedKey::parse_str(&store.proc_path, "procedure-0000000003.step").is_none());
|
||||
|
||||
let procedure_id = ProcedureId::random();
|
||||
let input = proc_path!("{procedure_id}");
|
||||
assert!(ParsedKey::parse_str(&input).is_none());
|
||||
let input = proc_path!(store, "{procedure_id}");
|
||||
assert!(ParsedKey::parse_str(&store.proc_path, &input).is_none());
|
||||
|
||||
let input = proc_path!("{procedure_id}/");
|
||||
assert!(ParsedKey::parse_str(&input).is_none());
|
||||
let input = proc_path!(store, "{procedure_id}");
|
||||
assert!(ParsedKey::parse_str(&store.proc_path, &input).is_none());
|
||||
|
||||
let input = proc_path!("{procedure_id}/0000000003");
|
||||
assert!(ParsedKey::parse_str(&input).is_none());
|
||||
let input = proc_path!(store, "{procedure_id}/0000000003");
|
||||
assert!(ParsedKey::parse_str(&store.proc_path, &input).is_none());
|
||||
|
||||
let input = proc_path!("{procedure_id}/0000000003.");
|
||||
assert!(ParsedKey::parse_str(&input).is_none());
|
||||
let input = proc_path!(store, "{procedure_id}/0000000003.");
|
||||
assert!(ParsedKey::parse_str(&store.proc_path, &input).is_none());
|
||||
|
||||
let input = proc_path!("{procedure_id}/0000000003.other");
|
||||
assert!(ParsedKey::parse_str(&input).is_none());
|
||||
let input = proc_path!(store, "{procedure_id}/0000000003.other");
|
||||
assert!(ParsedKey::parse_str(&store.proc_path, &input).is_none());
|
||||
|
||||
assert!(ParsedKey::parse_str("12345/0000000003.step").is_none());
|
||||
assert!(ParsedKey::parse_str(&store.proc_path, "12345/0000000003.step").is_none());
|
||||
|
||||
let input = proc_path!("{procedure_id}-0000000003.commit");
|
||||
assert!(ParsedKey::parse_str(&input).is_none());
|
||||
let input = proc_path!(store, "{procedure_id}-0000000003.commit");
|
||||
assert!(ParsedKey::parse_str(&store.proc_path, &input).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -5,6 +5,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
api = { path = "../../api" }
|
||||
async-trait.workspace = true
|
||||
common-error = { path = "../error" }
|
||||
common-recordbatch = { path = "../recordbatch" }
|
||||
@@ -13,6 +14,7 @@ datafusion.workspace = true
|
||||
datafusion-common.workspace = true
|
||||
datafusion-expr.workspace = true
|
||||
datatypes = { path = "../../datatypes" }
|
||||
serde.workspace = true
|
||||
snafu.workspace = true
|
||||
statrs = "0.16"
|
||||
|
||||
|
||||
@@ -115,12 +115,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to execute DataFusion ExecutionPlan, source: {}", source))]
|
||||
DataFusionExecutionPlan {
|
||||
source: DataFusionError,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to convert DataFusion's recordbatch stream, source: {}",
|
||||
source
|
||||
@@ -198,9 +192,9 @@ impl ErrorExt for Error {
|
||||
| Error::ConvertArrowSchema { source }
|
||||
| Error::FromArrowArray { source } => source.status_code(),
|
||||
|
||||
Error::ExecuteRepeatedly { .. }
|
||||
| Error::GeneralDataFusion { .. }
|
||||
| Error::DataFusionExecutionPlan { .. } => StatusCode::Unexpected,
|
||||
Error::ExecuteRepeatedly { .. } | Error::GeneralDataFusion { .. } => {
|
||||
StatusCode::Unexpected
|
||||
}
|
||||
|
||||
Error::UnsupportedInputDataType { .. }
|
||||
| Error::TypeCast { .. }
|
||||
|
||||
@@ -14,7 +14,10 @@
|
||||
|
||||
use std::fmt::{Debug, Formatter};
|
||||
|
||||
use api::greptime_proto::v1::add_column::location::LocationType;
|
||||
use api::greptime_proto::v1::add_column::Location;
|
||||
use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub mod columnar_value;
|
||||
pub mod error;
|
||||
@@ -44,3 +47,24 @@ impl Debug for Output {
|
||||
}
|
||||
|
||||
pub use datafusion::physical_plan::ExecutionPlan as DfPhysicalPlan;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum AddColumnLocation {
|
||||
First,
|
||||
After { column_name: String },
|
||||
}
|
||||
|
||||
impl From<&AddColumnLocation> for Location {
|
||||
fn from(value: &AddColumnLocation) -> Self {
|
||||
match value {
|
||||
AddColumnLocation::First => Location {
|
||||
location_type: LocationType::First.into(),
|
||||
after_cloumn_name: "".to_string(),
|
||||
},
|
||||
AddColumnLocation::After { column_name } => Location {
|
||||
location_type: LocationType::After.into(),
|
||||
after_cloumn_name: column_name.to_string(),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,12 +32,12 @@ use crate::DfPhysicalPlan;
|
||||
|
||||
pub type PhysicalPlanRef = Arc<dyn PhysicalPlan>;
|
||||
|
||||
/// `PhysicalPlan` represent nodes in the Physical Plan.
|
||||
/// [`PhysicalPlan`] represent nodes in the Physical Plan.
|
||||
///
|
||||
/// Each `PhysicalPlan` is Partition-aware and is responsible for
|
||||
/// creating the actual `async` [`SendableRecordBatchStream`]s
|
||||
/// of [`RecordBatch`] that incrementally compute the operator's
|
||||
/// output from its input partition.
|
||||
/// Each [`PhysicalPlan`] is partition-aware and is responsible for
|
||||
/// creating the actual "async" [`SendableRecordBatchStream`]s
|
||||
/// of [`RecordBatch`](common_recordbatch::RecordBatch) that incrementally
|
||||
/// compute the operator's output from its input partition.
|
||||
pub trait PhysicalPlan: Debug + Send + Sync {
|
||||
/// Returns the physical plan as [`Any`](std::any::Any) so that it can be
|
||||
/// downcast to a specific implementation.
|
||||
@@ -71,6 +71,7 @@ pub trait PhysicalPlan: Debug + Send + Sync {
|
||||
) -> Result<SendableRecordBatchStream>;
|
||||
}
|
||||
|
||||
/// Adapt DataFusion's [`ExecutionPlan`](DfPhysicalPlan) to GreptimeDB's [`PhysicalPlan`].
|
||||
#[derive(Debug)]
|
||||
pub struct PhysicalPlanAdapter {
|
||||
schema: SchemaRef,
|
||||
|
||||
@@ -111,7 +111,7 @@ impl Stream for DfRecordBatchStreamAdapter {
|
||||
}
|
||||
}
|
||||
|
||||
/// DataFusion SendableRecordBatchStream -> Greptime RecordBatchStream
|
||||
/// DataFusion [SendableRecordBatchStream](DfSendableRecordBatchStream) -> Greptime [RecordBatchStream]
|
||||
pub struct RecordBatchStreamAdapter {
|
||||
schema: SchemaRef,
|
||||
stream: DfSendableRecordBatchStream,
|
||||
|
||||
@@ -70,6 +70,19 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to project Arrow RecordBatch with schema {:?} and projection {:?}, source: {}",
|
||||
schema,
|
||||
projection,
|
||||
source
|
||||
))]
|
||||
ProjectArrowRecordBatch {
|
||||
source: datatypes::arrow::error::ArrowError,
|
||||
location: Location,
|
||||
schema: datatypes::schema::SchemaRef,
|
||||
projection: Vec<usize>,
|
||||
},
|
||||
|
||||
#[snafu(display("Column {} not exists in table {}", column_name, table_name))]
|
||||
ColumnNotExists {
|
||||
column_name: String,
|
||||
@@ -101,7 +114,8 @@ impl ErrorExt for Error {
|
||||
| Error::PollStream { .. }
|
||||
| Error::Format { .. }
|
||||
| Error::InitRecordbatchStream { .. }
|
||||
| Error::ColumnNotExists { .. } => StatusCode::Internal,
|
||||
| Error::ColumnNotExists { .. }
|
||||
| Error::ProjectArrowRecordBatch { .. } => StatusCode::Internal,
|
||||
|
||||
Error::External { source } => source.status_code(),
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ use std::sync::Arc;
|
||||
|
||||
use datafusion::physical_plan::memory::MemoryStream;
|
||||
pub use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datatypes::arrow::compute::SortOptions;
|
||||
pub use datatypes::arrow::record_batch::RecordBatch as DfRecordBatch;
|
||||
use datatypes::arrow::util::pretty;
|
||||
use datatypes::prelude::VectorRef;
|
||||
@@ -34,10 +35,20 @@ use snafu::{ensure, ResultExt};
|
||||
|
||||
pub trait RecordBatchStream: Stream<Item = Result<RecordBatch>> {
|
||||
fn schema(&self) -> SchemaRef;
|
||||
|
||||
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub type SendableRecordBatchStream = Pin<Box<dyn RecordBatchStream + Send>>;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct OrderOption {
|
||||
pub name: String,
|
||||
pub options: SortOptions,
|
||||
}
|
||||
|
||||
/// EmptyRecordBatchStream can be used to create a RecordBatchStream
|
||||
/// that will produce no results
|
||||
pub struct EmptyRecordBatchStream {
|
||||
@@ -156,6 +167,15 @@ impl RecordBatches {
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoIterator for RecordBatches {
|
||||
type Item = RecordBatch;
|
||||
type IntoIter = std::vec::IntoIter<Self::Item>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.batches.into_iter()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SimpleRecordBatchStream {
|
||||
inner: RecordBatches,
|
||||
index: usize,
|
||||
@@ -181,6 +201,31 @@ impl Stream for SimpleRecordBatchStream {
|
||||
}
|
||||
}
|
||||
|
||||
/// Adapt a [Stream] of [RecordBatch] to a [RecordBatchStream].
|
||||
pub struct RecordBatchStreamAdaptor {
|
||||
pub schema: SchemaRef,
|
||||
pub stream: Pin<Box<dyn Stream<Item = Result<RecordBatch>> + Send>>,
|
||||
pub output_ordering: Option<Vec<OrderOption>>,
|
||||
}
|
||||
|
||||
impl RecordBatchStream for RecordBatchStreamAdaptor {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||
self.output_ordering.as_deref()
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for RecordBatchStreamAdaptor {
|
||||
type Item = Result<RecordBatch>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
Pin::new(&mut self.stream).poll_next(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::schema::SchemaRef;
|
||||
use datatypes::value::Value;
|
||||
@@ -21,7 +22,10 @@ use serde::ser::{Error, SerializeStruct};
|
||||
use serde::{Serialize, Serializer};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{self, CastVectorSnafu, ColumnNotExistsSnafu, Result};
|
||||
use crate::error::{
|
||||
self, CastVectorSnafu, ColumnNotExistsSnafu, DataTypesSnafu, ProjectArrowRecordBatchSnafu,
|
||||
Result,
|
||||
};
|
||||
use crate::DfRecordBatch;
|
||||
|
||||
/// A two-dimensional batch of column-oriented data with a defined schema.
|
||||
@@ -51,6 +55,26 @@ impl RecordBatch {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn try_project(&self, indices: &[usize]) -> Result<Self> {
|
||||
let schema = Arc::new(self.schema.try_project(indices).context(DataTypesSnafu)?);
|
||||
let mut columns = Vec::with_capacity(indices.len());
|
||||
for index in indices {
|
||||
columns.push(self.columns[*index].clone());
|
||||
}
|
||||
let df_record_batch = self.df_record_batch.project(indices).with_context(|_| {
|
||||
ProjectArrowRecordBatchSnafu {
|
||||
schema: self.schema.clone(),
|
||||
projection: indices.to_vec(),
|
||||
}
|
||||
})?;
|
||||
|
||||
Ok(Self {
|
||||
schema,
|
||||
columns,
|
||||
df_record_batch,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a new [`RecordBatch`] from `schema` and `df_record_batch`.
|
||||
///
|
||||
/// This method doesn't check the schema.
|
||||
|
||||
@@ -21,11 +21,10 @@ prost.workspace = true
|
||||
session = { path = "../../session" }
|
||||
snafu.workspace = true
|
||||
table = { path = "../../table" }
|
||||
query = { path = "../../query" }
|
||||
|
||||
[dependencies.substrait_proto]
|
||||
package = "substrait"
|
||||
version = "0.7"
|
||||
version = "0.10"
|
||||
|
||||
[dev-dependencies]
|
||||
datatypes = { path = "../../datatypes" }
|
||||
|
||||
@@ -1,77 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use datafusion::common::DFSchemaRef;
|
||||
use substrait_proto::proto::extensions::simple_extension_declaration::{
|
||||
ExtensionFunction, MappingType,
|
||||
};
|
||||
use substrait_proto::proto::extensions::SimpleExtensionDeclaration;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ConvertorContext {
|
||||
scalar_fn_names: HashMap<String, u32>,
|
||||
scalar_fn_map: HashMap<u32, String>,
|
||||
df_schema: Option<DFSchemaRef>,
|
||||
}
|
||||
|
||||
impl ConvertorContext {
|
||||
pub fn register_scalar_fn<S: AsRef<str>>(&mut self, name: S) -> u32 {
|
||||
if let Some(anchor) = self.scalar_fn_names.get(name.as_ref()) {
|
||||
return *anchor;
|
||||
}
|
||||
|
||||
let next_anchor = self.scalar_fn_map.len() as _;
|
||||
self.scalar_fn_map
|
||||
.insert(next_anchor, name.as_ref().to_string());
|
||||
self.scalar_fn_names
|
||||
.insert(name.as_ref().to_string(), next_anchor);
|
||||
next_anchor
|
||||
}
|
||||
|
||||
pub fn register_scalar_with_anchor<S: AsRef<str>>(&mut self, name: S, anchor: u32) {
|
||||
self.scalar_fn_map.insert(anchor, name.as_ref().to_string());
|
||||
self.scalar_fn_names
|
||||
.insert(name.as_ref().to_string(), anchor);
|
||||
}
|
||||
|
||||
pub fn find_scalar_fn(&self, anchor: u32) -> Option<&str> {
|
||||
self.scalar_fn_map.get(&anchor).map(|s| s.as_str())
|
||||
}
|
||||
|
||||
pub fn generate_function_extension(&self) -> Vec<SimpleExtensionDeclaration> {
|
||||
let mut result = Vec::with_capacity(self.scalar_fn_map.len());
|
||||
for (anchor, name) in &self.scalar_fn_map {
|
||||
let declaration = SimpleExtensionDeclaration {
|
||||
mapping_type: Some(MappingType::ExtensionFunction(ExtensionFunction {
|
||||
extension_uri_reference: 0,
|
||||
function_anchor: *anchor,
|
||||
name: name.clone(),
|
||||
})),
|
||||
};
|
||||
result.push(declaration);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub(crate) fn set_df_schema(&mut self, schema: DFSchemaRef) {
|
||||
debug_assert!(self.df_schema.is_none());
|
||||
self.df_schema.get_or_insert(schema);
|
||||
}
|
||||
|
||||
pub(crate) fn df_schema(&self) -> Option<&DFSchemaRef> {
|
||||
self.df_schema.as_ref()
|
||||
}
|
||||
}
|
||||
@@ -1,799 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::str::FromStr;
|
||||
|
||||
use datafusion::common::Column;
|
||||
use datafusion_expr::expr::Sort;
|
||||
use datafusion_expr::{expr_fn, lit, Between, BinaryExpr, BuiltinScalarFunction, Expr, Operator};
|
||||
use datatypes::schema::Schema;
|
||||
use snafu::{ensure, OptionExt};
|
||||
use substrait_proto::proto::expression::field_reference::ReferenceType as FieldReferenceType;
|
||||
use substrait_proto::proto::expression::reference_segment::{
|
||||
ReferenceType as SegReferenceType, StructField,
|
||||
};
|
||||
use substrait_proto::proto::expression::{
|
||||
FieldReference, Literal, ReferenceSegment, RexType, ScalarFunction,
|
||||
};
|
||||
use substrait_proto::proto::function_argument::ArgType;
|
||||
use substrait_proto::proto::Expression;
|
||||
|
||||
use crate::context::ConvertorContext;
|
||||
use crate::error::{
|
||||
EmptyExprSnafu, InvalidParametersSnafu, MissingFieldSnafu, Result, UnsupportedExprSnafu,
|
||||
};
|
||||
use crate::types::{literal_type_to_scalar_value, scalar_value_as_literal_type};
|
||||
|
||||
/// Convert substrait's `Expression` to DataFusion's `Expr`.
|
||||
pub(crate) fn to_df_expr(
|
||||
ctx: &ConvertorContext,
|
||||
expression: Expression,
|
||||
schema: &Schema,
|
||||
) -> Result<Expr> {
|
||||
let expr_rex_type = expression.rex_type.context(EmptyExprSnafu)?;
|
||||
match expr_rex_type {
|
||||
RexType::Literal(l) => {
|
||||
let t = l.literal_type.context(MissingFieldSnafu {
|
||||
field: "LiteralType",
|
||||
plan: "Literal",
|
||||
})?;
|
||||
let v = literal_type_to_scalar_value(t)?;
|
||||
Ok(lit(v))
|
||||
}
|
||||
RexType::Selection(selection) => convert_selection_rex(*selection, schema),
|
||||
RexType::ScalarFunction(scalar_fn) => convert_scalar_function(ctx, scalar_fn, schema),
|
||||
RexType::WindowFunction(_)
|
||||
| RexType::IfThen(_)
|
||||
| RexType::SwitchExpression(_)
|
||||
| RexType::SingularOrList(_)
|
||||
| RexType::MultiOrList(_)
|
||||
| RexType::Cast(_)
|
||||
| RexType::Subquery(_)
|
||||
| RexType::Nested(_)
|
||||
| RexType::Enum(_) => UnsupportedExprSnafu {
|
||||
name: format!("substrait expression {expr_rex_type:?}"),
|
||||
}
|
||||
.fail()?,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert Substrait's `FieldReference` - `DirectReference` - `StructField` to Datafusion's
|
||||
/// `Column` expr.
|
||||
pub fn convert_selection_rex(selection: FieldReference, schema: &Schema) -> Result<Expr> {
|
||||
if let Some(FieldReferenceType::DirectReference(direct_ref)) = selection.reference_type
|
||||
&& let Some(SegReferenceType::StructField(field)) = direct_ref.reference_type {
|
||||
let column_name = schema.column_name_by_index(field.field as _).to_string();
|
||||
Ok(Expr::Column(Column {
|
||||
relation: None,
|
||||
name: column_name,
|
||||
}))
|
||||
} else {
|
||||
InvalidParametersSnafu {
|
||||
reason: "Only support direct struct reference in Selection Rex",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn convert_scalar_function(
|
||||
ctx: &ConvertorContext,
|
||||
scalar_fn: ScalarFunction,
|
||||
schema: &Schema,
|
||||
) -> Result<Expr> {
|
||||
// convert argument
|
||||
let mut inputs = VecDeque::with_capacity(scalar_fn.arguments.len());
|
||||
for arg in scalar_fn.arguments {
|
||||
if let Some(ArgType::Value(sub_expr)) = arg.arg_type {
|
||||
inputs.push_back(to_df_expr(ctx, sub_expr, schema)?);
|
||||
} else {
|
||||
InvalidParametersSnafu {
|
||||
reason: "Only value expression arg is supported to be function argument",
|
||||
}
|
||||
.fail()?;
|
||||
}
|
||||
}
|
||||
|
||||
// convert this scalar function
|
||||
// map function name
|
||||
let anchor = scalar_fn.function_reference;
|
||||
let fn_name = ctx
|
||||
.find_scalar_fn(anchor)
|
||||
.with_context(|| InvalidParametersSnafu {
|
||||
reason: format!("Unregistered scalar function reference: {anchor}"),
|
||||
})?;
|
||||
|
||||
// convenient util
|
||||
let ensure_arg_len = |expected: usize| -> Result<()> {
|
||||
ensure!(
|
||||
inputs.len() == expected,
|
||||
InvalidParametersSnafu {
|
||||
reason: format!(
|
||||
"Invalid number of scalar function {}, expected {} but found {}",
|
||||
fn_name,
|
||||
expected,
|
||||
inputs.len()
|
||||
)
|
||||
}
|
||||
);
|
||||
Ok(())
|
||||
};
|
||||
|
||||
// construct DataFusion expr
|
||||
let expr = match fn_name {
|
||||
// begin binary exprs, with the same order of DF `Operator`'s definition.
|
||||
"eq" | "equal" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs.pop_front().unwrap().eq(inputs.pop_front().unwrap())
|
||||
}
|
||||
"not_eq" | "not_equal" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs
|
||||
.pop_front()
|
||||
.unwrap()
|
||||
.not_eq(inputs.pop_front().unwrap())
|
||||
}
|
||||
"lt" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs.pop_front().unwrap().lt(inputs.pop_front().unwrap())
|
||||
}
|
||||
"lt_eq" | "lte" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs
|
||||
.pop_front()
|
||||
.unwrap()
|
||||
.lt_eq(inputs.pop_front().unwrap())
|
||||
}
|
||||
"gt" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs.pop_front().unwrap().gt(inputs.pop_front().unwrap())
|
||||
}
|
||||
"gt_eq" | "gte" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs
|
||||
.pop_front()
|
||||
.unwrap()
|
||||
.gt_eq(inputs.pop_front().unwrap())
|
||||
}
|
||||
"plus" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::Plus,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"minus" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::Minus,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"multiply" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::Multiply,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"divide" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::Divide,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"modulo" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::Modulo,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"and" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::and(inputs.pop_front().unwrap(), inputs.pop_front().unwrap())
|
||||
}
|
||||
"or" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::or(inputs.pop_front().unwrap(), inputs.pop_front().unwrap())
|
||||
}
|
||||
"like" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs
|
||||
.pop_front()
|
||||
.unwrap()
|
||||
.like(inputs.pop_front().unwrap())
|
||||
}
|
||||
"not_like" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs
|
||||
.pop_front()
|
||||
.unwrap()
|
||||
.not_like(inputs.pop_front().unwrap())
|
||||
}
|
||||
"is_distinct_from" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::IsDistinctFrom,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"is_not_distinct_from" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::IsNotDistinctFrom,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"regex_match" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::RegexMatch,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"regex_i_match" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::RegexIMatch,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"regex_not_match" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::RegexNotMatch,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"regex_not_i_match" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::RegexNotIMatch,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"bitwise_and" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::BitwiseAnd,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"bitwise_or" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::BitwiseOr,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
// end binary exprs
|
||||
// start other direct expr, with the same order of DF `Expr`'s definition.
|
||||
"not" => {
|
||||
ensure_arg_len(1)?;
|
||||
inputs.pop_front().unwrap().not()
|
||||
}
|
||||
"is_not_null" => {
|
||||
ensure_arg_len(1)?;
|
||||
inputs.pop_front().unwrap().is_not_null()
|
||||
}
|
||||
"is_null" => {
|
||||
ensure_arg_len(1)?;
|
||||
inputs.pop_front().unwrap().is_null()
|
||||
}
|
||||
"negative" => {
|
||||
ensure_arg_len(1)?;
|
||||
Expr::Negative(Box::new(inputs.pop_front().unwrap()))
|
||||
}
|
||||
// skip GetIndexedField, unimplemented.
|
||||
"between" => {
|
||||
ensure_arg_len(3)?;
|
||||
Expr::Between(Between {
|
||||
expr: Box::new(inputs.pop_front().unwrap()),
|
||||
negated: false,
|
||||
low: Box::new(inputs.pop_front().unwrap()),
|
||||
high: Box::new(inputs.pop_front().unwrap()),
|
||||
})
|
||||
}
|
||||
"not_between" => {
|
||||
ensure_arg_len(3)?;
|
||||
Expr::Between(Between {
|
||||
expr: Box::new(inputs.pop_front().unwrap()),
|
||||
negated: true,
|
||||
low: Box::new(inputs.pop_front().unwrap()),
|
||||
high: Box::new(inputs.pop_front().unwrap()),
|
||||
})
|
||||
}
|
||||
// skip Case, is covered in substrait::SwitchExpression.
|
||||
// skip Cast and TryCast, is covered in substrait::Cast.
|
||||
"sort" | "sort_des" => {
|
||||
ensure_arg_len(1)?;
|
||||
Expr::Sort(Sort {
|
||||
expr: Box::new(inputs.pop_front().unwrap()),
|
||||
asc: false,
|
||||
nulls_first: false,
|
||||
})
|
||||
}
|
||||
"sort_asc" => {
|
||||
ensure_arg_len(1)?;
|
||||
Expr::Sort(Sort {
|
||||
expr: Box::new(inputs.pop_front().unwrap()),
|
||||
asc: true,
|
||||
nulls_first: false,
|
||||
})
|
||||
}
|
||||
// those are datafusion built-in "scalar functions".
|
||||
"abs"
|
||||
| "acos"
|
||||
| "asin"
|
||||
| "atan"
|
||||
| "atan2"
|
||||
| "ceil"
|
||||
| "cos"
|
||||
| "exp"
|
||||
| "floor"
|
||||
| "ln"
|
||||
| "log"
|
||||
| "log10"
|
||||
| "log2"
|
||||
| "power"
|
||||
| "pow"
|
||||
| "round"
|
||||
| "signum"
|
||||
| "sin"
|
||||
| "sqrt"
|
||||
| "tan"
|
||||
| "trunc"
|
||||
| "coalesce"
|
||||
| "make_array"
|
||||
| "ascii"
|
||||
| "bit_length"
|
||||
| "btrim"
|
||||
| "char_length"
|
||||
| "character_length"
|
||||
| "concat"
|
||||
| "concat_ws"
|
||||
| "chr"
|
||||
| "current_date"
|
||||
| "current_time"
|
||||
| "date_part"
|
||||
| "datepart"
|
||||
| "date_trunc"
|
||||
| "datetrunc"
|
||||
| "date_bin"
|
||||
| "initcap"
|
||||
| "left"
|
||||
| "length"
|
||||
| "lower"
|
||||
| "lpad"
|
||||
| "ltrim"
|
||||
| "md5"
|
||||
| "nullif"
|
||||
| "octet_length"
|
||||
| "random"
|
||||
| "regexp_replace"
|
||||
| "repeat"
|
||||
| "replace"
|
||||
| "reverse"
|
||||
| "right"
|
||||
| "rpad"
|
||||
| "rtrim"
|
||||
| "sha224"
|
||||
| "sha256"
|
||||
| "sha384"
|
||||
| "sha512"
|
||||
| "digest"
|
||||
| "split_part"
|
||||
| "starts_with"
|
||||
| "strpos"
|
||||
| "substr"
|
||||
| "to_hex"
|
||||
| "to_timestamp"
|
||||
| "to_timestamp_millis"
|
||||
| "to_timestamp_micros"
|
||||
| "to_timestamp_seconds"
|
||||
| "now"
|
||||
| "translate"
|
||||
| "trim"
|
||||
| "upper"
|
||||
| "uuid"
|
||||
| "regexp_match"
|
||||
| "struct"
|
||||
| "from_unixtime"
|
||||
| "arrow_typeof" => Expr::ScalarFunction {
|
||||
fun: BuiltinScalarFunction::from_str(fn_name).unwrap(),
|
||||
args: inputs.into(),
|
||||
},
|
||||
// skip ScalarUDF, unimplemented.
|
||||
// skip AggregateFunction, is covered in substrait::AggregateRel
|
||||
// skip WindowFunction, is covered in substrait WindowFunction
|
||||
// skip AggregateUDF, unimplemented.
|
||||
// skip InList, unimplemented
|
||||
// skip Wildcard, unimplemented.
|
||||
// end other direct expr
|
||||
_ => UnsupportedExprSnafu {
|
||||
name: format!("scalar function {fn_name}"),
|
||||
}
|
||||
.fail()?,
|
||||
};
|
||||
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
/// Convert DataFusion's `Expr` to substrait's `Expression`
|
||||
pub fn expression_from_df_expr(
|
||||
ctx: &mut ConvertorContext,
|
||||
expr: &Expr,
|
||||
schema: &Schema,
|
||||
) -> Result<Expression> {
|
||||
let expression = match expr {
|
||||
// Don't merge them with other unsupported expr arms to preserve the ordering.
|
||||
Expr::Alias(..) => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
Expr::Column(column) => {
|
||||
let field_reference = convert_column(column, schema)?;
|
||||
Expression {
|
||||
rex_type: Some(RexType::Selection(Box::new(field_reference))),
|
||||
}
|
||||
}
|
||||
// Don't merge them with other unsupported expr arms to preserve the ordering.
|
||||
Expr::ScalarVariable(..) => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
Expr::Literal(v) => {
|
||||
let t = scalar_value_as_literal_type(v)?;
|
||||
let l = Literal {
|
||||
nullable: true,
|
||||
type_variation_reference: 0,
|
||||
literal_type: Some(t),
|
||||
};
|
||||
Expression {
|
||||
rex_type: Some(RexType::Literal(l)),
|
||||
}
|
||||
}
|
||||
Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
|
||||
let left = expression_from_df_expr(ctx, left, schema)?;
|
||||
let right = expression_from_df_expr(ctx, right, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![left, right]);
|
||||
let op_name = utils::name_df_operator(op);
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
Expr::Not(e) => {
|
||||
let arg = expression_from_df_expr(ctx, e, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![arg]);
|
||||
let op_name = "not";
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
Expr::IsNotNull(e) => {
|
||||
let arg = expression_from_df_expr(ctx, e, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![arg]);
|
||||
let op_name = "is_not_null";
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
Expr::IsNull(e) => {
|
||||
let arg = expression_from_df_expr(ctx, e, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![arg]);
|
||||
let op_name = "is_null";
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
Expr::Negative(e) => {
|
||||
let arg = expression_from_df_expr(ctx, e, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![arg]);
|
||||
let op_name = "negative";
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
// Don't merge them with other unsupported expr arms to preserve the ordering.
|
||||
Expr::GetIndexedField { .. } => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
Expr::Between(Between {
|
||||
expr,
|
||||
negated,
|
||||
low,
|
||||
high,
|
||||
}) => {
|
||||
let expr = expression_from_df_expr(ctx, expr, schema)?;
|
||||
let low = expression_from_df_expr(ctx, low, schema)?;
|
||||
let high = expression_from_df_expr(ctx, high, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![expr, low, high]);
|
||||
let op_name = if *negated { "not_between" } else { "between" };
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
// Don't merge them with other unsupported expr arms to preserve the ordering.
|
||||
Expr::Case { .. } | Expr::Cast { .. } | Expr::TryCast { .. } => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
Expr::Sort(Sort {
|
||||
expr,
|
||||
asc,
|
||||
nulls_first: _,
|
||||
}) => {
|
||||
let expr = expression_from_df_expr(ctx, expr, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![expr]);
|
||||
let op_name = if *asc { "sort_asc" } else { "sort_des" };
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
Expr::ScalarFunction { fun, args } => {
|
||||
let arguments = utils::expression_to_argument(
|
||||
args.iter()
|
||||
.map(|e| expression_from_df_expr(ctx, e, schema))
|
||||
.collect::<Result<Vec<_>>>()?,
|
||||
);
|
||||
let op_name = utils::name_builtin_scalar_function(fun);
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
// Don't merge them with other unsupported expr arms to preserve the ordering.
|
||||
Expr::ScalarUDF { .. }
|
||||
| Expr::AggregateFunction { .. }
|
||||
| Expr::WindowFunction { .. }
|
||||
| Expr::AggregateUDF { .. }
|
||||
| Expr::InList { .. }
|
||||
| Expr::Wildcard
|
||||
| Expr::Like(_)
|
||||
| Expr::ILike(_)
|
||||
| Expr::SimilarTo(_)
|
||||
| Expr::IsTrue(_)
|
||||
| Expr::IsFalse(_)
|
||||
| Expr::IsUnknown(_)
|
||||
| Expr::IsNotTrue(_)
|
||||
| Expr::IsNotFalse(_)
|
||||
| Expr::IsNotUnknown(_)
|
||||
| Expr::Exists { .. }
|
||||
| Expr::InSubquery { .. }
|
||||
| Expr::ScalarSubquery(..)
|
||||
| Expr::Placeholder { .. }
|
||||
| Expr::QualifiedWildcard { .. } => todo!(),
|
||||
Expr::GroupingSet(_) | Expr::OuterReferenceColumn(_, _) => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
};
|
||||
|
||||
Ok(expression)
|
||||
}
|
||||
|
||||
/// Convert DataFusion's `Column` expr into substrait's `FieldReference` -
|
||||
/// `DirectReference` - `StructField`.
|
||||
pub fn convert_column(column: &Column, schema: &Schema) -> Result<FieldReference> {
|
||||
let column_name = &column.name;
|
||||
let field_index =
|
||||
schema
|
||||
.column_index_by_name(column_name)
|
||||
.with_context(|| MissingFieldSnafu {
|
||||
field: format!("{column:?}"),
|
||||
plan: format!("schema: {schema:?}"),
|
||||
})?;
|
||||
|
||||
Ok(FieldReference {
|
||||
reference_type: Some(FieldReferenceType::DirectReference(ReferenceSegment {
|
||||
reference_type: Some(SegReferenceType::StructField(Box::new(StructField {
|
||||
field: field_index as _,
|
||||
child: None,
|
||||
}))),
|
||||
})),
|
||||
root_type: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Some utils special for this `DataFusion::Expr` and `Substrait::Expression` conversion.
|
||||
mod utils {
|
||||
use datafusion_expr::{BuiltinScalarFunction, Operator};
|
||||
use substrait_proto::proto::expression::{RexType, ScalarFunction};
|
||||
use substrait_proto::proto::function_argument::ArgType;
|
||||
use substrait_proto::proto::{Expression, FunctionArgument};
|
||||
|
||||
pub(crate) fn name_df_operator(op: &Operator) -> &str {
|
||||
match op {
|
||||
Operator::Eq => "equal",
|
||||
Operator::NotEq => "not_equal",
|
||||
Operator::Lt => "lt",
|
||||
Operator::LtEq => "lte",
|
||||
Operator::Gt => "gt",
|
||||
Operator::GtEq => "gte",
|
||||
Operator::Plus => "plus",
|
||||
Operator::Minus => "minus",
|
||||
Operator::Multiply => "multiply",
|
||||
Operator::Divide => "divide",
|
||||
Operator::Modulo => "modulo",
|
||||
Operator::And => "and",
|
||||
Operator::Or => "or",
|
||||
Operator::IsDistinctFrom => "is_distinct_from",
|
||||
Operator::IsNotDistinctFrom => "is_not_distinct_from",
|
||||
Operator::RegexMatch => "regex_match",
|
||||
Operator::RegexIMatch => "regex_i_match",
|
||||
Operator::RegexNotMatch => "regex_not_match",
|
||||
Operator::RegexNotIMatch => "regex_not_i_match",
|
||||
Operator::BitwiseAnd => "bitwise_and",
|
||||
Operator::BitwiseOr => "bitwise_or",
|
||||
Operator::BitwiseXor => "bitwise_xor",
|
||||
Operator::BitwiseShiftRight => "bitwise_shift_right",
|
||||
Operator::BitwiseShiftLeft => "bitwise_shift_left",
|
||||
Operator::StringConcat => "string_concat",
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert list of [Expression] to [FunctionArgument] vector.
|
||||
pub(crate) fn expression_to_argument<I: IntoIterator<Item = Expression>>(
|
||||
expressions: I,
|
||||
) -> Vec<FunctionArgument> {
|
||||
expressions
|
||||
.into_iter()
|
||||
.map(|expr| FunctionArgument {
|
||||
arg_type: Some(ArgType::Value(expr)),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Convenient builder for [Expression]
|
||||
pub(crate) fn build_scalar_function_expression(
|
||||
function_reference: u32,
|
||||
arguments: Vec<FunctionArgument>,
|
||||
) -> Expression {
|
||||
Expression {
|
||||
rex_type: Some(RexType::ScalarFunction(ScalarFunction {
|
||||
function_reference,
|
||||
arguments,
|
||||
output_type: None,
|
||||
..Default::default()
|
||||
})),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn name_builtin_scalar_function(fun: &BuiltinScalarFunction) -> &str {
|
||||
match fun {
|
||||
BuiltinScalarFunction::Abs => "abs",
|
||||
BuiltinScalarFunction::Acos => "acos",
|
||||
BuiltinScalarFunction::Asin => "asin",
|
||||
BuiltinScalarFunction::Atan => "atan",
|
||||
BuiltinScalarFunction::Ceil => "ceil",
|
||||
BuiltinScalarFunction::Cos => "cos",
|
||||
BuiltinScalarFunction::Digest => "digest",
|
||||
BuiltinScalarFunction::Exp => "exp",
|
||||
BuiltinScalarFunction::Floor => "floor",
|
||||
BuiltinScalarFunction::Ln => "ln",
|
||||
BuiltinScalarFunction::Log => "log",
|
||||
BuiltinScalarFunction::Log10 => "log10",
|
||||
BuiltinScalarFunction::Log2 => "log2",
|
||||
BuiltinScalarFunction::Round => "round",
|
||||
BuiltinScalarFunction::Signum => "signum",
|
||||
BuiltinScalarFunction::Sin => "sin",
|
||||
BuiltinScalarFunction::Sqrt => "sqrt",
|
||||
BuiltinScalarFunction::Tan => "tan",
|
||||
BuiltinScalarFunction::Trunc => "trunc",
|
||||
BuiltinScalarFunction::Ascii => "ascii",
|
||||
BuiltinScalarFunction::BitLength => "bit_length",
|
||||
BuiltinScalarFunction::Btrim => "btrim",
|
||||
BuiltinScalarFunction::CharacterLength => "character_length",
|
||||
BuiltinScalarFunction::Chr => "chr",
|
||||
BuiltinScalarFunction::Concat => "concat",
|
||||
BuiltinScalarFunction::ConcatWithSeparator => "concat_ws",
|
||||
BuiltinScalarFunction::DatePart => "date_part",
|
||||
BuiltinScalarFunction::DateTrunc => "date_trunc",
|
||||
BuiltinScalarFunction::InitCap => "initcap",
|
||||
BuiltinScalarFunction::Left => "left",
|
||||
BuiltinScalarFunction::Lpad => "lpad",
|
||||
BuiltinScalarFunction::Lower => "lower",
|
||||
BuiltinScalarFunction::Ltrim => "ltrim",
|
||||
BuiltinScalarFunction::MD5 => "md5",
|
||||
BuiltinScalarFunction::NullIf => "nullif",
|
||||
BuiltinScalarFunction::OctetLength => "octet_length",
|
||||
BuiltinScalarFunction::Random => "random",
|
||||
BuiltinScalarFunction::RegexpReplace => "regexp_replace",
|
||||
BuiltinScalarFunction::Repeat => "repeat",
|
||||
BuiltinScalarFunction::Replace => "replace",
|
||||
BuiltinScalarFunction::Reverse => "reverse",
|
||||
BuiltinScalarFunction::Right => "right",
|
||||
BuiltinScalarFunction::Rpad => "rpad",
|
||||
BuiltinScalarFunction::Rtrim => "rtrim",
|
||||
BuiltinScalarFunction::SHA224 => "sha224",
|
||||
BuiltinScalarFunction::SHA256 => "sha256",
|
||||
BuiltinScalarFunction::SHA384 => "sha384",
|
||||
BuiltinScalarFunction::SHA512 => "sha512",
|
||||
BuiltinScalarFunction::SplitPart => "split_part",
|
||||
BuiltinScalarFunction::StartsWith => "starts_with",
|
||||
BuiltinScalarFunction::Strpos => "strpos",
|
||||
BuiltinScalarFunction::Substr => "substr",
|
||||
BuiltinScalarFunction::ToHex => "to_hex",
|
||||
BuiltinScalarFunction::ToTimestamp => "to_timestamp",
|
||||
BuiltinScalarFunction::ToTimestampMillis => "to_timestamp_millis",
|
||||
BuiltinScalarFunction::ToTimestampMicros => "to_timestamp_macros",
|
||||
BuiltinScalarFunction::ToTimestampSeconds => "to_timestamp_seconds",
|
||||
BuiltinScalarFunction::Now => "now",
|
||||
BuiltinScalarFunction::Translate => "translate",
|
||||
BuiltinScalarFunction::Trim => "trim",
|
||||
BuiltinScalarFunction::Upper => "upper",
|
||||
BuiltinScalarFunction::RegexpMatch => "regexp_match",
|
||||
BuiltinScalarFunction::Atan2 => "atan2",
|
||||
BuiltinScalarFunction::Coalesce => "coalesce",
|
||||
BuiltinScalarFunction::Power => "power",
|
||||
BuiltinScalarFunction::MakeArray => "make_array",
|
||||
BuiltinScalarFunction::DateBin => "date_bin",
|
||||
BuiltinScalarFunction::FromUnixtime => "from_unixtime",
|
||||
BuiltinScalarFunction::CurrentDate => "current_date",
|
||||
BuiltinScalarFunction::CurrentTime => "current_time",
|
||||
BuiltinScalarFunction::Uuid => "uuid",
|
||||
BuiltinScalarFunction::Struct => "struct",
|
||||
BuiltinScalarFunction::ArrowTypeof => "arrow_type_of",
|
||||
BuiltinScalarFunction::Acosh => "acosh",
|
||||
BuiltinScalarFunction::Asinh => "asinh",
|
||||
BuiltinScalarFunction::Atanh => "atanh",
|
||||
BuiltinScalarFunction::Cbrt => "cbrt",
|
||||
BuiltinScalarFunction::Cosh => "cosh",
|
||||
BuiltinScalarFunction::Pi => "pi",
|
||||
BuiltinScalarFunction::Sinh => "sinh",
|
||||
BuiltinScalarFunction::Tanh => "tanh",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::schema::ColumnSchema;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn expr_round_trip() {
|
||||
let expr = expr_fn::and(
|
||||
expr_fn::col("column_a").lt_eq(expr_fn::col("column_b")),
|
||||
expr_fn::col("column_a").gt(expr_fn::col("column_b")),
|
||||
);
|
||||
|
||||
let schema = Schema::new(vec![
|
||||
ColumnSchema::new(
|
||||
"column_a",
|
||||
datatypes::data_type::ConcreteDataType::int64_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"column_b",
|
||||
datatypes::data_type::ConcreteDataType::float64_datatype(),
|
||||
true,
|
||||
),
|
||||
]);
|
||||
|
||||
let mut ctx = ConvertorContext::default();
|
||||
let substrait_expr = expression_from_df_expr(&mut ctx, &expr, &schema).unwrap();
|
||||
let converted_expr = to_df_expr(&ctx, substrait_expr, &schema).unwrap();
|
||||
|
||||
assert_eq!(expr, converted_expr);
|
||||
}
|
||||
}
|
||||
@@ -1,534 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_recursion::async_recursion;
|
||||
use async_trait::async_trait;
|
||||
use bytes::{Buf, Bytes};
|
||||
use catalog::table_source::DfTableSourceProvider;
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_catalog::format_full_table_name;
|
||||
use common_telemetry::debug;
|
||||
use datafusion::arrow::datatypes::SchemaRef as ArrowSchemaRef;
|
||||
use datafusion::catalog::catalog::CatalogList;
|
||||
use datafusion::common::{DFField, DFSchema};
|
||||
use datafusion::datasource::DefaultTableSource;
|
||||
use datafusion::physical_plan::project_schema;
|
||||
use datafusion::sql::TableReference;
|
||||
use datafusion_expr::{Filter, LogicalPlan, TableScan};
|
||||
use session::context::QueryContext;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use substrait_proto::proto::expression::mask_expression::{StructItem, StructSelect};
|
||||
use substrait_proto::proto::expression::MaskExpression;
|
||||
use substrait_proto::proto::extensions::simple_extension_declaration::MappingType;
|
||||
use substrait_proto::proto::plan_rel::RelType as PlanRelType;
|
||||
use substrait_proto::proto::read_rel::{NamedTable, ReadType};
|
||||
use substrait_proto::proto::rel::RelType;
|
||||
use substrait_proto::proto::{FilterRel, Plan, PlanRel, ReadRel, Rel};
|
||||
use table::table::adapter::DfTableProviderAdapter;
|
||||
|
||||
use crate::context::ConvertorContext;
|
||||
use crate::df_expr::{expression_from_df_expr, to_df_expr};
|
||||
use crate::error::{
|
||||
self, DFInternalSnafu, EmptyPlanSnafu, Error, InvalidParametersSnafu, MissingFieldSnafu,
|
||||
ResolveTableSnafu, SchemaNotMatchSnafu, UnknownPlanSnafu, UnsupportedExprSnafu,
|
||||
UnsupportedPlanSnafu,
|
||||
};
|
||||
use crate::schema::{from_schema, to_schema};
|
||||
use crate::SubstraitPlan;
|
||||
|
||||
pub struct DFLogicalSubstraitConvertorDeprecated;
|
||||
|
||||
#[async_trait]
|
||||
impl SubstraitPlan for DFLogicalSubstraitConvertorDeprecated {
|
||||
type Error = Error;
|
||||
|
||||
type Plan = LogicalPlan;
|
||||
|
||||
async fn decode<B: Buf + Send>(
|
||||
&self,
|
||||
_message: B,
|
||||
_catalog_list: Arc<dyn CatalogList>,
|
||||
) -> Result<Self::Plan, Self::Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn encode(&self, plan: Self::Plan) -> Result<Bytes, Self::Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
impl DFLogicalSubstraitConvertorDeprecated {
|
||||
async fn convert_plan(
|
||||
&self,
|
||||
mut plan: Plan,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
) -> Result<LogicalPlan, Error> {
|
||||
// prepare convertor context
|
||||
let mut ctx = ConvertorContext::default();
|
||||
for simple_ext in plan.extensions {
|
||||
if let Some(MappingType::ExtensionFunction(function_extension)) =
|
||||
simple_ext.mapping_type
|
||||
{
|
||||
ctx.register_scalar_with_anchor(
|
||||
function_extension.name,
|
||||
function_extension.function_anchor,
|
||||
);
|
||||
} else {
|
||||
debug!("Encounter unsupported substrait extension {:?}", simple_ext);
|
||||
}
|
||||
}
|
||||
|
||||
// extract rel
|
||||
let rel = if let Some(PlanRel { rel_type }) = plan.relations.pop()
|
||||
&& let Some(PlanRelType::Rel(rel)) = rel_type {
|
||||
rel
|
||||
} else {
|
||||
UnsupportedPlanSnafu {
|
||||
name: "Emply or non-Rel relation",
|
||||
}
|
||||
.fail()?
|
||||
};
|
||||
|
||||
// TODO(LFC): Create table provider from outside, respect "disallow_cross_schema_query" option in query engine state.
|
||||
let mut table_provider =
|
||||
DfTableSourceProvider::new(catalog_manager, false, &QueryContext::new());
|
||||
self.rel_to_logical_plan(&mut ctx, Box::new(rel), &mut table_provider)
|
||||
.await
|
||||
}
|
||||
|
||||
#[async_recursion]
|
||||
async fn rel_to_logical_plan(
|
||||
&self,
|
||||
ctx: &mut ConvertorContext,
|
||||
rel: Box<Rel>,
|
||||
table_provider: &mut DfTableSourceProvider,
|
||||
) -> Result<LogicalPlan, Error> {
|
||||
let rel_type = rel.rel_type.context(EmptyPlanSnafu)?;
|
||||
|
||||
// build logical plan
|
||||
let logical_plan = match rel_type {
|
||||
RelType::Read(read_rel) => self.convert_read_rel(ctx, read_rel, table_provider).await?,
|
||||
RelType::Filter(filter) => {
|
||||
let FilterRel {
|
||||
common: _,
|
||||
input,
|
||||
condition,
|
||||
advanced_extension: _,
|
||||
} = *filter;
|
||||
|
||||
let input = input.context(MissingFieldSnafu {
|
||||
field: "input",
|
||||
plan: "Filter",
|
||||
})?;
|
||||
let input = Arc::new(self.rel_to_logical_plan(ctx, input, table_provider).await?);
|
||||
|
||||
let condition = condition.context(MissingFieldSnafu {
|
||||
field: "condition",
|
||||
plan: "Filter",
|
||||
})?;
|
||||
|
||||
let schema = ctx.df_schema().context(InvalidParametersSnafu {
|
||||
reason: "the underlying TableScan plan should have included a table schema",
|
||||
})?;
|
||||
let schema = schema
|
||||
.clone()
|
||||
.try_into()
|
||||
.context(error::ConvertDfSchemaSnafu)?;
|
||||
let predicate = to_df_expr(ctx, *condition, &schema)?;
|
||||
|
||||
LogicalPlan::Filter(Filter::try_new(predicate, input).context(DFInternalSnafu)?)
|
||||
}
|
||||
RelType::Fetch(_fetch_rel) => UnsupportedPlanSnafu {
|
||||
name: "Fetch Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::Aggregate(_aggr_rel) => UnsupportedPlanSnafu {
|
||||
name: "Fetch Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::Sort(_sort_rel) => UnsupportedPlanSnafu {
|
||||
name: "Sort Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::Join(_join_rel) => UnsupportedPlanSnafu {
|
||||
name: "Join Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::Project(_project_rel) => UnsupportedPlanSnafu {
|
||||
name: "Project Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::Set(_set_rel) => UnsupportedPlanSnafu {
|
||||
name: "Set Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::ExtensionSingle(_ext_single_rel) => UnsupportedPlanSnafu {
|
||||
name: "Extension Single Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::ExtensionMulti(_ext_multi_rel) => UnsupportedPlanSnafu {
|
||||
name: "Extension Multi Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::ExtensionLeaf(_ext_leaf_rel) => UnsupportedPlanSnafu {
|
||||
name: "Extension Leaf Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::Cross(_cross_rel) => UnsupportedPlanSnafu {
|
||||
name: "Cross Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::HashJoin(_) => UnsupportedPlanSnafu {
|
||||
name: "Cross Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::MergeJoin(_) => UnsupportedPlanSnafu {
|
||||
name: "Cross Relation",
|
||||
}
|
||||
.fail()?,
|
||||
};
|
||||
|
||||
Ok(logical_plan)
|
||||
}
|
||||
|
||||
async fn convert_read_rel(
|
||||
&self,
|
||||
ctx: &mut ConvertorContext,
|
||||
read_rel: Box<ReadRel>,
|
||||
table_provider: &mut DfTableSourceProvider,
|
||||
) -> Result<LogicalPlan, Error> {
|
||||
// Extract the catalog, schema and table name from NamedTable. Assume the first three are those names.
|
||||
let read_type = read_rel.read_type.context(MissingFieldSnafu {
|
||||
field: "read_type",
|
||||
plan: "Read",
|
||||
})?;
|
||||
let (table_name, schema_name, catalog_name) = match read_type {
|
||||
ReadType::NamedTable(mut named_table) => {
|
||||
ensure!(
|
||||
named_table.names.len() == 3,
|
||||
InvalidParametersSnafu {
|
||||
reason:
|
||||
"NamedTable should contains three names for catalog, schema and table",
|
||||
}
|
||||
);
|
||||
(
|
||||
named_table.names.pop().unwrap(),
|
||||
named_table.names.pop().unwrap(),
|
||||
named_table.names.pop().unwrap(),
|
||||
)
|
||||
}
|
||||
ReadType::VirtualTable(_) | ReadType::LocalFiles(_) | ReadType::ExtensionTable(_) => {
|
||||
UnsupportedExprSnafu {
|
||||
name: "Non-NamedTable Read",
|
||||
}
|
||||
.fail()?
|
||||
}
|
||||
};
|
||||
|
||||
// Get projection indices
|
||||
let projection = read_rel
|
||||
.projection
|
||||
.map(|mask_expr| self.convert_mask_expression(mask_expr));
|
||||
|
||||
let table_ref = TableReference::full(
|
||||
catalog_name.clone(),
|
||||
schema_name.clone(),
|
||||
table_name.clone(),
|
||||
);
|
||||
let adapter = table_provider
|
||||
.resolve_table(table_ref.clone())
|
||||
.await
|
||||
.with_context(|_| ResolveTableSnafu {
|
||||
table_name: format_full_table_name(&catalog_name, &schema_name, &table_name),
|
||||
})?;
|
||||
|
||||
// Get schema directly from the table, and compare it with the schema retrieved from substrait proto.
|
||||
let stored_schema = adapter.schema();
|
||||
let retrieved_schema = to_schema(read_rel.base_schema.unwrap_or_default())?;
|
||||
let retrieved_arrow_schema = retrieved_schema.arrow_schema();
|
||||
ensure!(
|
||||
same_schema_without_metadata(&stored_schema, retrieved_arrow_schema),
|
||||
SchemaNotMatchSnafu {
|
||||
substrait_schema: retrieved_arrow_schema.clone(),
|
||||
storage_schema: stored_schema
|
||||
}
|
||||
);
|
||||
|
||||
// Convert filter
|
||||
let filters = if let Some(filter) = read_rel.filter {
|
||||
vec![to_df_expr(ctx, *filter, &retrieved_schema)?]
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
// Calculate the projected schema
|
||||
let projected_schema = Arc::new(
|
||||
project_schema(&stored_schema, projection.as_ref())
|
||||
.and_then(|x| {
|
||||
DFSchema::new_with_metadata(
|
||||
x.fields()
|
||||
.iter()
|
||||
.map(|f| DFField::from_qualified(table_ref.clone(), f.clone()))
|
||||
.collect(),
|
||||
x.metadata().clone(),
|
||||
)
|
||||
})
|
||||
.context(DFInternalSnafu)?,
|
||||
);
|
||||
|
||||
ctx.set_df_schema(projected_schema.clone());
|
||||
|
||||
// TODO(ruihang): Support limit(fetch)
|
||||
Ok(LogicalPlan::TableScan(TableScan {
|
||||
table_name: table_ref,
|
||||
source: adapter,
|
||||
projection,
|
||||
projected_schema,
|
||||
filters,
|
||||
fetch: None,
|
||||
}))
|
||||
}
|
||||
|
||||
fn convert_mask_expression(&self, mask_expression: MaskExpression) -> Vec<usize> {
|
||||
mask_expression
|
||||
.select
|
||||
.unwrap_or_default()
|
||||
.struct_items
|
||||
.into_iter()
|
||||
.map(|select| select.field as _)
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl DFLogicalSubstraitConvertorDeprecated {
|
||||
fn logical_plan_to_rel(
|
||||
&self,
|
||||
ctx: &mut ConvertorContext,
|
||||
plan: Arc<LogicalPlan>,
|
||||
) -> Result<Rel, Error> {
|
||||
Ok(match &*plan {
|
||||
LogicalPlan::Projection(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Projection",
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::Filter(filter) => {
|
||||
let input = Some(Box::new(
|
||||
self.logical_plan_to_rel(ctx, filter.input.clone())?,
|
||||
));
|
||||
|
||||
let schema = plan
|
||||
.schema()
|
||||
.clone()
|
||||
.try_into()
|
||||
.context(error::ConvertDfSchemaSnafu)?;
|
||||
let condition = Some(Box::new(expression_from_df_expr(
|
||||
ctx,
|
||||
&filter.predicate,
|
||||
&schema,
|
||||
)?));
|
||||
|
||||
let rel = FilterRel {
|
||||
common: None,
|
||||
input,
|
||||
condition,
|
||||
advanced_extension: None,
|
||||
};
|
||||
Rel {
|
||||
rel_type: Some(RelType::Filter(Box::new(rel))),
|
||||
}
|
||||
}
|
||||
LogicalPlan::Window(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Window",
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::Aggregate(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Aggregate",
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::Sort(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Sort",
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::Join(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Join",
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::CrossJoin(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical CrossJoin",
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::Repartition(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Repartition",
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::Union(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Union",
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::TableScan(table_scan) => {
|
||||
let read_rel = self.convert_table_scan_plan(ctx, table_scan)?;
|
||||
Rel {
|
||||
rel_type: Some(RelType::Read(Box::new(read_rel))),
|
||||
}
|
||||
}
|
||||
LogicalPlan::EmptyRelation(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical EmptyRelation",
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::Limit(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Limit",
|
||||
}
|
||||
.fail()?,
|
||||
|
||||
LogicalPlan::Subquery(_)
|
||||
| LogicalPlan::SubqueryAlias(_)
|
||||
| LogicalPlan::CreateView(_)
|
||||
| LogicalPlan::CreateCatalogSchema(_)
|
||||
| LogicalPlan::CreateCatalog(_)
|
||||
| LogicalPlan::DropView(_)
|
||||
| LogicalPlan::Distinct(_)
|
||||
| LogicalPlan::CreateExternalTable(_)
|
||||
| LogicalPlan::CreateMemoryTable(_)
|
||||
| LogicalPlan::DropTable(_)
|
||||
| LogicalPlan::Values(_)
|
||||
| LogicalPlan::Explain(_)
|
||||
| LogicalPlan::Analyze(_)
|
||||
| LogicalPlan::Extension(_)
|
||||
| LogicalPlan::Prepare(_)
|
||||
| LogicalPlan::Dml(_)
|
||||
| LogicalPlan::DescribeTable(_)
|
||||
| LogicalPlan::Unnest(_)
|
||||
| LogicalPlan::Statement(_) => InvalidParametersSnafu {
|
||||
reason: format!(
|
||||
"Trying to convert DDL/DML plan to substrait proto, plan: {plan:?}",
|
||||
),
|
||||
}
|
||||
.fail()?,
|
||||
})
|
||||
}
|
||||
|
||||
fn convert_df_plan(&self, plan: LogicalPlan) -> Result<Plan, Error> {
|
||||
let mut ctx = ConvertorContext::default();
|
||||
|
||||
let rel = self.logical_plan_to_rel(&mut ctx, Arc::new(plan))?;
|
||||
|
||||
// convert extension
|
||||
let extensions = ctx.generate_function_extension();
|
||||
|
||||
// assemble PlanRel
|
||||
let plan_rel = PlanRel {
|
||||
rel_type: Some(PlanRelType::Rel(rel)),
|
||||
};
|
||||
|
||||
Ok(Plan {
|
||||
extension_uris: vec![],
|
||||
extensions,
|
||||
relations: vec![plan_rel],
|
||||
advanced_extensions: None,
|
||||
expected_type_urls: vec![],
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
pub fn convert_table_scan_plan(
|
||||
&self,
|
||||
ctx: &mut ConvertorContext,
|
||||
table_scan: &TableScan,
|
||||
) -> Result<ReadRel, Error> {
|
||||
let provider = table_scan
|
||||
.source
|
||||
.as_any()
|
||||
.downcast_ref::<DefaultTableSource>()
|
||||
.context(UnknownPlanSnafu)?
|
||||
.table_provider
|
||||
.as_any()
|
||||
.downcast_ref::<DfTableProviderAdapter>()
|
||||
.context(UnknownPlanSnafu)?;
|
||||
let table_info = provider.table().table_info();
|
||||
|
||||
// assemble NamedTable and ReadType
|
||||
let catalog_name = table_info.catalog_name.clone();
|
||||
let schema_name = table_info.schema_name.clone();
|
||||
let table_name = table_info.name.clone();
|
||||
let named_table = NamedTable {
|
||||
names: vec![catalog_name, schema_name, table_name],
|
||||
advanced_extension: None,
|
||||
};
|
||||
let read_type = ReadType::NamedTable(named_table);
|
||||
|
||||
// assemble projection
|
||||
let projection = table_scan
|
||||
.projection
|
||||
.as_ref()
|
||||
.map(|x| self.convert_schema_projection(x));
|
||||
|
||||
// assemble base (unprojected) schema using Table's schema.
|
||||
let base_schema = from_schema(&provider.table().schema())?;
|
||||
|
||||
// make conjunction over a list of filters and convert the result to substrait
|
||||
let filter = if let Some(conjunction) = table_scan
|
||||
.filters
|
||||
.iter()
|
||||
.cloned()
|
||||
.reduce(|accum, expr| accum.and(expr))
|
||||
{
|
||||
Some(Box::new(expression_from_df_expr(
|
||||
ctx,
|
||||
&conjunction,
|
||||
&provider.table().schema(),
|
||||
)?))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let read_rel = ReadRel {
|
||||
common: None,
|
||||
base_schema: Some(base_schema),
|
||||
filter,
|
||||
projection,
|
||||
advanced_extension: None,
|
||||
read_type: Some(read_type),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
Ok(read_rel)
|
||||
}
|
||||
|
||||
/// Convert a index-based schema projection to substrait's [MaskExpression].
|
||||
fn convert_schema_projection(&self, projections: &[usize]) -> MaskExpression {
|
||||
let struct_items = projections
|
||||
.iter()
|
||||
.map(|index| StructItem {
|
||||
field: *index as i32,
|
||||
child: None,
|
||||
})
|
||||
.collect();
|
||||
MaskExpression {
|
||||
select: Some(StructSelect { struct_items }),
|
||||
// TODO(ruihang): this field is unspecified
|
||||
maintain_singular_struct: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn same_schema_without_metadata(lhs: &ArrowSchemaRef, rhs: &ArrowSchemaRef) -> bool {
|
||||
lhs.fields.len() == rhs.fields.len()
|
||||
&& lhs.fields.iter().zip(rhs.fields.iter()).all(|(x, y)| {
|
||||
x.name() == y.name()
|
||||
&& x.data_type() == y.data_type()
|
||||
&& x.is_nullable() == y.is_nullable()
|
||||
})
|
||||
}
|
||||
@@ -16,8 +16,11 @@ use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bytes::{Buf, Bytes, BytesMut};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use datafusion::catalog::catalog::CatalogList;
|
||||
use datafusion::prelude::SessionContext;
|
||||
use datafusion::execution::context::SessionState;
|
||||
use datafusion::execution::runtime_env::RuntimeEnv;
|
||||
use datafusion::prelude::{SessionConfig, SessionContext};
|
||||
use datafusion_expr::LogicalPlan;
|
||||
use datafusion_substrait::logical_plan::consumer::from_substrait_plan;
|
||||
use datafusion_substrait::logical_plan::producer::to_substrait_plan;
|
||||
@@ -41,9 +44,12 @@ impl SubstraitPlan for DFLogicalSubstraitConvertor {
|
||||
message: B,
|
||||
catalog_list: Arc<dyn CatalogList>,
|
||||
) -> Result<Self::Plan, Self::Error> {
|
||||
let mut context = SessionContext::new();
|
||||
let plan = Plan::decode(message).context(DecodeRelSnafu)?;
|
||||
let state_config = SessionConfig::new()
|
||||
.with_default_catalog_and_schema(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME);
|
||||
let state = SessionState::with_config_rt(state_config, Arc::new(RuntimeEnv::default()));
|
||||
let mut context = SessionContext::with_state(state);
|
||||
context.register_catalog_list(catalog_list);
|
||||
let plan = Plan::decode(message).context(DecodeRelSnafu)?;
|
||||
let df_plan = from_substrait_plan(&mut context, &plan)
|
||||
.await
|
||||
.context(DecodeDfPlanSnafu)?;
|
||||
@@ -52,8 +58,9 @@ impl SubstraitPlan for DFLogicalSubstraitConvertor {
|
||||
|
||||
fn encode(&self, plan: Self::Plan) -> Result<Bytes, Self::Error> {
|
||||
let mut buf = BytesMut::new();
|
||||
let context = SessionContext::new();
|
||||
|
||||
let substrait_plan = to_substrait_plan(&plan).context(EncodeDfPlanSnafu)?;
|
||||
let substrait_plan = to_substrait_plan(&plan, &context).context(EncodeDfPlanSnafu)?;
|
||||
substrait_plan.encode(&mut buf).context(EncodeRelSnafu)?;
|
||||
|
||||
Ok(buf.freeze())
|
||||
|
||||
@@ -78,9 +78,6 @@ pub enum Error {
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Table querying not found: {}", name))]
|
||||
TableNotFound { name: String, location: Location },
|
||||
|
||||
#[snafu(display("Cannot convert plan doesn't belong to GreptimeDB"))]
|
||||
UnknownPlan { location: Location },
|
||||
|
||||
@@ -139,7 +136,6 @@ impl ErrorExt for Error {
|
||||
| Error::EmptyExpr { .. }
|
||||
| Error::MissingField { .. }
|
||||
| Error::InvalidParameters { .. }
|
||||
| Error::TableNotFound { .. }
|
||||
| Error::SchemaNotMatch { .. } => StatusCode::InvalidArguments,
|
||||
Error::DFInternal { .. }
|
||||
| Error::Internal { .. }
|
||||
|
||||
@@ -15,14 +15,8 @@
|
||||
#![feature(let_chains)]
|
||||
#![feature(trait_upcasting)]
|
||||
|
||||
mod context;
|
||||
mod df_expr;
|
||||
#[allow(unused)]
|
||||
mod df_logical;
|
||||
mod df_substrait;
|
||||
pub mod error;
|
||||
mod schema;
|
||||
mod types;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user