mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-24 23:19:57 +00:00
Compare commits
21 Commits
flow_rule_
...
async_deco
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b6e7fb5e08 | ||
|
|
a5df3954f3 | ||
|
|
32fd850c20 | ||
|
|
2bfdae4f8f | ||
|
|
fcb898e9a4 | ||
|
|
8fa2fdfc42 | ||
|
|
4dc1a1d60f | ||
|
|
e375a18011 | ||
|
|
e0ff701e51 | ||
|
|
25645a3303 | ||
|
|
b32ea7d84c | ||
|
|
f164f6eaf3 | ||
|
|
af1920defc | ||
|
|
7c97fae522 | ||
|
|
b8070adc3a | ||
|
|
11bfb17328 | ||
|
|
1d87bd2d43 | ||
|
|
ababeaf538 | ||
|
|
2cbf51d0be | ||
|
|
3059b04b19 | ||
|
|
352b197be4 |
@@ -52,7 +52,7 @@ runs:
|
||||
uses: ./.github/actions/build-greptime-binary
|
||||
with:
|
||||
base-image: ubuntu
|
||||
features: servers/dashboard,pg_kvbackend
|
||||
features: servers/dashboard,pg_kvbackend,mysql_kvbackend
|
||||
cargo-profile: ${{ inputs.cargo-profile }}
|
||||
artifacts-dir: greptime-linux-${{ inputs.arch }}-${{ inputs.version }}
|
||||
version: ${{ inputs.version }}
|
||||
@@ -70,7 +70,7 @@ runs:
|
||||
if: ${{ inputs.arch == 'amd64' && inputs.dev-mode == 'false' }} # Builds greptime for centos if the host machine is amd64.
|
||||
with:
|
||||
base-image: centos
|
||||
features: servers/dashboard,pg_kvbackend
|
||||
features: servers/dashboard,pg_kvbackend,mysql_kvbackend
|
||||
cargo-profile: ${{ inputs.cargo-profile }}
|
||||
artifacts-dir: greptime-linux-${{ inputs.arch }}-centos-${{ inputs.version }}
|
||||
version: ${{ inputs.version }}
|
||||
|
||||
7
.github/workflows/dev-build.yml
vendored
7
.github/workflows/dev-build.yml
vendored
@@ -238,6 +238,13 @@ jobs:
|
||||
version: ${{ needs.allocate-runners.outputs.version }}
|
||||
push-latest-tag: false # Don't push the latest tag to registry.
|
||||
dev-mode: true # Only build the standard images.
|
||||
|
||||
- name: Echo Docker image tag to step summary
|
||||
run: |
|
||||
echo "## Docker Image Tag" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Image Tag: \`${{ needs.allocate-runners.outputs.version }}\`" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Full Image Name: \`docker.io/${{ vars.IMAGE_NAMESPACE }}/${{ vars.DEV_BUILD_IMAGE_NAME }}:${{ needs.allocate-runners.outputs.version }}\`" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Pull Command: \`docker pull docker.io/${{ vars.IMAGE_NAMESPACE }}/${{ vars.DEV_BUILD_IMAGE_NAME }}:${{ needs.allocate-runners.outputs.version }}\`" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
- name: Set build result
|
||||
id: set-build-result
|
||||
|
||||
10
.github/workflows/develop.yml
vendored
10
.github/workflows/develop.yml
vendored
@@ -111,7 +111,7 @@ jobs:
|
||||
- name: Build greptime binaries
|
||||
shell: bash
|
||||
# `cargo gc` will invoke `cargo build` with specified args
|
||||
run: cargo gc -- --bin greptime --bin sqlness-runner --features pg_kvbackend
|
||||
run: cargo gc -- --bin greptime --bin sqlness-runner --features "pg_kvbackend,mysql_kvbackend"
|
||||
- name: Pack greptime binaries
|
||||
shell: bash
|
||||
run: |
|
||||
@@ -270,7 +270,7 @@ jobs:
|
||||
- name: Build greptime bianry
|
||||
shell: bash
|
||||
# `cargo gc` will invoke `cargo build` with specified args
|
||||
run: cargo gc --profile ci -- --bin greptime --features pg_kvbackend
|
||||
run: cargo gc --profile ci -- --bin greptime --features "pg_kvbackend,mysql_kvbackend"
|
||||
- name: Pack greptime binary
|
||||
shell: bash
|
||||
run: |
|
||||
@@ -687,7 +687,7 @@ jobs:
|
||||
working-directory: tests-integration/fixtures
|
||||
run: docker compose up -d --wait
|
||||
- name: Run nextest cases
|
||||
run: cargo nextest run --workspace -F dashboard -F pg_kvbackend
|
||||
run: cargo nextest run --workspace -F dashboard -F pg_kvbackend -F mysql_kvbackend
|
||||
env:
|
||||
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=mold"
|
||||
RUST_BACKTRACE: 1
|
||||
@@ -704,6 +704,7 @@ jobs:
|
||||
GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000
|
||||
GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
|
||||
GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres
|
||||
GT_MYSQL_ENDPOINTS: mysql://greptimedb:admin@127.0.0.1:3306/mysql
|
||||
GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
|
||||
GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093
|
||||
UNITTEST_LOG_DIR: "__unittest_logs"
|
||||
@@ -739,7 +740,7 @@ jobs:
|
||||
working-directory: tests-integration/fixtures
|
||||
run: docker compose up -d --wait
|
||||
- name: Run nextest cases
|
||||
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F dashboard -F pg_kvbackend
|
||||
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F dashboard -F pg_kvbackend -F mysql_kvbackend
|
||||
env:
|
||||
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=mold"
|
||||
RUST_BACKTRACE: 1
|
||||
@@ -755,6 +756,7 @@ jobs:
|
||||
GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000
|
||||
GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
|
||||
GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres
|
||||
GT_MYSQL_ENDPOINTS: mysql://greptimedb:admin@127.0.0.1:3306/mysql
|
||||
GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
|
||||
GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093
|
||||
UNITTEST_LOG_DIR: "__unittest_logs"
|
||||
|
||||
30
Cargo.lock
generated
30
Cargo.lock
generated
@@ -1693,6 +1693,7 @@ dependencies = [
|
||||
"humantime",
|
||||
"meta-client",
|
||||
"nu-ansi-term",
|
||||
"opendal",
|
||||
"query",
|
||||
"rand",
|
||||
"reqwest",
|
||||
@@ -2015,6 +2016,7 @@ dependencies = [
|
||||
"arc-swap",
|
||||
"async-trait",
|
||||
"bincode",
|
||||
"chrono",
|
||||
"common-base",
|
||||
"common-catalog",
|
||||
"common-error",
|
||||
@@ -2196,6 +2198,7 @@ dependencies = [
|
||||
"serde_with",
|
||||
"session",
|
||||
"snafu 0.8.5",
|
||||
"sqlx",
|
||||
"store-api",
|
||||
"strum 0.25.0",
|
||||
"table",
|
||||
@@ -4116,11 +4119,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.0.34"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0"
|
||||
checksum = "11faaf5a5236997af9848be0bef4db95824b1d534ebc64d0f0c6cf3e67bd38dc"
|
||||
dependencies = [
|
||||
"crc32fast",
|
||||
"libz-rs-sys",
|
||||
"libz-sys",
|
||||
"miniz_oxide",
|
||||
]
|
||||
@@ -4702,7 +4706,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "greptime-proto"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=d92c9ac4e90ef4abdcf5c2eaf5a164e18ba09486#d92c9ac4e90ef4abdcf5c2eaf5a164e18ba09486"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=c5419bbd20cb42e568ec325a4d71a3c94cc327e1#c5419bbd20cb42e568ec325a4d71a3c94cc327e1"
|
||||
dependencies = [
|
||||
"prost 0.13.3",
|
||||
"serde",
|
||||
@@ -6275,6 +6279,15 @@ dependencies = [
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libz-rs-sys"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "902bc563b5d65ad9bba616b490842ef0651066a1a1dc3ce1087113ffcb873c8d"
|
||||
dependencies = [
|
||||
"zlib-rs",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libz-sys"
|
||||
version = "1.1.20"
|
||||
@@ -6718,6 +6731,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu 0.8.5",
|
||||
"store-api",
|
||||
"strum 0.25.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
@@ -6818,9 +6832,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.8.0"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1"
|
||||
checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5"
|
||||
dependencies = [
|
||||
"adler2",
|
||||
]
|
||||
@@ -13950,6 +13964,12 @@ dependencies = [
|
||||
"syn 2.0.96",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zlib-rs"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b20717f0917c908dc63de2e44e97f1e6b126ca58d0e391cee86d504eb8fbd05"
|
||||
|
||||
[[package]]
|
||||
name = "zstd"
|
||||
version = "0.11.2+zstd.1.5.2"
|
||||
|
||||
@@ -126,10 +126,11 @@ deadpool-postgres = "0.12"
|
||||
derive_builder = "0.12"
|
||||
dotenv = "0.15"
|
||||
etcd-client = "0.14"
|
||||
flate2 = { version = "1.1.0", default-features = false, features = ["zlib-rs"] }
|
||||
fst = "0.4.7"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "d92c9ac4e90ef4abdcf5c2eaf5a164e18ba09486" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "c5419bbd20cb42e568ec325a4d71a3c94cc327e1" }
|
||||
hex = "0.4"
|
||||
http = "1"
|
||||
humantime = "2.1"
|
||||
@@ -188,6 +189,10 @@ shadow-rs = "0.38"
|
||||
similar-asserts = "1.6.0"
|
||||
smallvec = { version = "1", features = ["serde"] }
|
||||
snafu = "0.8"
|
||||
sqlx = { version = "0.8", features = [
|
||||
"runtime-tokio-rustls",
|
||||
"mysql",
|
||||
] }
|
||||
sysinfo = "0.30"
|
||||
# on branch v0.52.x
|
||||
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "71dd86058d2af97b9925093d40c4e03360403170", features = [
|
||||
|
||||
@@ -231,6 +231,7 @@ overwrite_entry_start_id = false
|
||||
# secret_access_key = "123456"
|
||||
# endpoint = "https://s3.amazonaws.com"
|
||||
# region = "us-west-2"
|
||||
# enable_virtual_host_style = false
|
||||
|
||||
# Example of using Oss as the storage.
|
||||
# [storage]
|
||||
|
||||
@@ -318,6 +318,7 @@ retry_delay = "500ms"
|
||||
# secret_access_key = "123456"
|
||||
# endpoint = "https://s3.amazonaws.com"
|
||||
# region = "us-west-2"
|
||||
# enable_virtual_host_style = false
|
||||
|
||||
# Example of using Oss as the storage.
|
||||
# [storage]
|
||||
|
||||
@@ -19,9 +19,7 @@ use common_decimal::decimal128::{DECIMAL128_DEFAULT_SCALE, DECIMAL128_MAX_PRECIS
|
||||
use common_decimal::Decimal128;
|
||||
use common_time::time::Time;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::{
|
||||
Date, DateTime, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, Timestamp,
|
||||
};
|
||||
use common_time::{Date, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, Timestamp};
|
||||
use datatypes::prelude::{ConcreteDataType, ValueRef};
|
||||
use datatypes::scalars::ScalarVector;
|
||||
use datatypes::types::{
|
||||
@@ -29,8 +27,8 @@ use datatypes::types::{
|
||||
};
|
||||
use datatypes::value::{OrderedF32, OrderedF64, Value};
|
||||
use datatypes::vectors::{
|
||||
BinaryVector, BooleanVector, DateTimeVector, DateVector, Decimal128Vector, Float32Vector,
|
||||
Float64Vector, Int32Vector, Int64Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector,
|
||||
BinaryVector, BooleanVector, DateVector, Decimal128Vector, Float32Vector, Float64Vector,
|
||||
Int32Vector, Int64Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector,
|
||||
IntervalYearMonthVector, PrimitiveVector, StringVector, TimeMicrosecondVector,
|
||||
TimeMillisecondVector, TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector,
|
||||
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt32Vector,
|
||||
@@ -118,7 +116,7 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
|
||||
ColumnDataType::Json => ConcreteDataType::json_datatype(),
|
||||
ColumnDataType::String => ConcreteDataType::string_datatype(),
|
||||
ColumnDataType::Date => ConcreteDataType::date_datatype(),
|
||||
ColumnDataType::Datetime => ConcreteDataType::datetime_datatype(),
|
||||
ColumnDataType::Datetime => ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
ColumnDataType::TimestampSecond => ConcreteDataType::timestamp_second_datatype(),
|
||||
ColumnDataType::TimestampMillisecond => {
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
@@ -271,7 +269,6 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
|
||||
ConcreteDataType::Binary(_) => ColumnDataType::Binary,
|
||||
ConcreteDataType::String(_) => ColumnDataType::String,
|
||||
ConcreteDataType::Date(_) => ColumnDataType::Date,
|
||||
ConcreteDataType::DateTime(_) => ColumnDataType::Datetime,
|
||||
ConcreteDataType::Timestamp(t) => match t {
|
||||
TimestampType::Second(_) => ColumnDataType::TimestampSecond,
|
||||
TimestampType::Millisecond(_) => ColumnDataType::TimestampMillisecond,
|
||||
@@ -476,7 +473,6 @@ pub fn push_vals(column: &mut Column, origin_count: usize, vector: VectorRef) {
|
||||
Value::String(val) => values.string_values.push(val.as_utf8().to_string()),
|
||||
Value::Binary(val) => values.binary_values.push(val.to_vec()),
|
||||
Value::Date(val) => values.date_values.push(val.val()),
|
||||
Value::DateTime(val) => values.datetime_values.push(val.val()),
|
||||
Value::Timestamp(val) => match val.unit() {
|
||||
TimeUnit::Second => values.timestamp_second_values.push(val.value()),
|
||||
TimeUnit::Millisecond => values.timestamp_millisecond_values.push(val.value()),
|
||||
@@ -577,12 +573,11 @@ pub fn pb_value_to_value_ref<'a>(
|
||||
ValueData::BinaryValue(bytes) => ValueRef::Binary(bytes.as_slice()),
|
||||
ValueData::StringValue(string) => ValueRef::String(string.as_str()),
|
||||
ValueData::DateValue(d) => ValueRef::Date(Date::from(*d)),
|
||||
ValueData::DatetimeValue(d) => ValueRef::DateTime(DateTime::new(*d)),
|
||||
ValueData::TimestampSecondValue(t) => ValueRef::Timestamp(Timestamp::new_second(*t)),
|
||||
ValueData::TimestampMillisecondValue(t) => {
|
||||
ValueRef::Timestamp(Timestamp::new_millisecond(*t))
|
||||
}
|
||||
ValueData::TimestampMicrosecondValue(t) => {
|
||||
ValueData::DatetimeValue(t) | ValueData::TimestampMicrosecondValue(t) => {
|
||||
ValueRef::Timestamp(Timestamp::new_microsecond(*t))
|
||||
}
|
||||
ValueData::TimestampNanosecondValue(t) => {
|
||||
@@ -651,7 +646,6 @@ pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) ->
|
||||
ConcreteDataType::Binary(_) => Arc::new(BinaryVector::from(values.binary_values)),
|
||||
ConcreteDataType::String(_) => Arc::new(StringVector::from_vec(values.string_values)),
|
||||
ConcreteDataType::Date(_) => Arc::new(DateVector::from_vec(values.date_values)),
|
||||
ConcreteDataType::DateTime(_) => Arc::new(DateTimeVector::from_vec(values.datetime_values)),
|
||||
ConcreteDataType::Timestamp(unit) => match unit {
|
||||
TimestampType::Second(_) => Arc::new(TimestampSecondVector::from_vec(
|
||||
values.timestamp_second_values,
|
||||
@@ -787,11 +781,6 @@ pub fn pb_values_to_values(data_type: &ConcreteDataType, values: Values) -> Vec<
|
||||
.into_iter()
|
||||
.map(|val| val.into())
|
||||
.collect(),
|
||||
ConcreteDataType::DateTime(_) => values
|
||||
.datetime_values
|
||||
.into_iter()
|
||||
.map(|v| Value::DateTime(v.into()))
|
||||
.collect(),
|
||||
ConcreteDataType::Date(_) => values
|
||||
.date_values
|
||||
.into_iter()
|
||||
@@ -947,9 +936,6 @@ pub fn to_proto_value(value: Value) -> Option<v1::Value> {
|
||||
Value::Date(v) => v1::Value {
|
||||
value_data: Some(ValueData::DateValue(v.val())),
|
||||
},
|
||||
Value::DateTime(v) => v1::Value {
|
||||
value_data: Some(ValueData::DatetimeValue(v.val())),
|
||||
},
|
||||
Value::Timestamp(v) => match v.unit() {
|
||||
TimeUnit::Second => v1::Value {
|
||||
value_data: Some(ValueData::TimestampSecondValue(v.value())),
|
||||
@@ -1066,7 +1052,6 @@ pub fn value_to_grpc_value(value: Value) -> GrpcValue {
|
||||
Value::String(v) => Some(ValueData::StringValue(v.as_utf8().to_string())),
|
||||
Value::Binary(v) => Some(ValueData::BinaryValue(v.to_vec())),
|
||||
Value::Date(v) => Some(ValueData::DateValue(v.val())),
|
||||
Value::DateTime(v) => Some(ValueData::DatetimeValue(v.val())),
|
||||
Value::Timestamp(v) => Some(match v.unit() {
|
||||
TimeUnit::Second => ValueData::TimestampSecondValue(v.value()),
|
||||
TimeUnit::Millisecond => ValueData::TimestampMillisecondValue(v.value()),
|
||||
@@ -1248,7 +1233,7 @@ mod tests {
|
||||
ColumnDataTypeWrapper::date_datatype().into()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
ColumnDataTypeWrapper::datetime_datatype().into()
|
||||
);
|
||||
assert_eq!(
|
||||
@@ -1339,10 +1324,6 @@ mod tests {
|
||||
ColumnDataTypeWrapper::date_datatype(),
|
||||
ConcreteDataType::date_datatype().try_into().unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ColumnDataTypeWrapper::datetime_datatype(),
|
||||
ConcreteDataType::datetime_datatype().try_into().unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ColumnDataTypeWrapper::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
@@ -1830,17 +1811,6 @@ mod tests {
|
||||
]
|
||||
);
|
||||
|
||||
test_convert_values!(
|
||||
datetime,
|
||||
vec![1.into(), 2.into(), 3.into()],
|
||||
datetime,
|
||||
vec![
|
||||
Value::DateTime(1.into()),
|
||||
Value::DateTime(2.into()),
|
||||
Value::DateTime(3.into())
|
||||
]
|
||||
);
|
||||
|
||||
#[test]
|
||||
fn test_vectors_to_rows_for_different_types() {
|
||||
let boolean_vec = BooleanVector::from_vec(vec![true, false, true]);
|
||||
|
||||
@@ -132,6 +132,15 @@ pub fn options_from_skipping(skipping: &SkippingIndexOptions) -> Result<Option<C
|
||||
Ok((!options.options.is_empty()).then_some(options))
|
||||
}
|
||||
|
||||
/// Tries to construct a `ColumnOptions` for inverted index.
|
||||
pub fn options_from_inverted() -> ColumnOptions {
|
||||
let mut options = ColumnOptions::default();
|
||||
options
|
||||
.options
|
||||
.insert(INVERTED_INDEX_GRPC_KEY.to_string(), "true".to_string());
|
||||
options
|
||||
}
|
||||
|
||||
/// Tries to construct a `FulltextAnalyzer` from the given analyzer.
|
||||
pub fn as_fulltext_option(analyzer: Analyzer) -> FulltextAnalyzer {
|
||||
match analyzer {
|
||||
|
||||
@@ -77,7 +77,7 @@ trait SystemSchemaProviderInner {
|
||||
fn system_table(&self, name: &str) -> Option<SystemTableRef>;
|
||||
|
||||
fn table_info(catalog_name: String, table: &SystemTableRef) -> TableInfoRef {
|
||||
let table_meta = TableMetaBuilder::default()
|
||||
let table_meta = TableMetaBuilder::empty()
|
||||
.schema(table.schema())
|
||||
.primary_key_indices(vec![])
|
||||
.next_column_id(0)
|
||||
|
||||
@@ -365,10 +365,6 @@ impl InformationSchemaColumnsBuilder {
|
||||
self.numeric_scales.push(None);
|
||||
|
||||
match &column_schema.data_type {
|
||||
ConcreteDataType::DateTime(datetime_type) => {
|
||||
self.datetime_precisions
|
||||
.push(Some(datetime_type.precision() as i64));
|
||||
}
|
||||
ConcreteDataType::Timestamp(ts_type) => {
|
||||
self.datetime_precisions
|
||||
.push(Some(ts_type.precision() as i64));
|
||||
|
||||
@@ -28,16 +28,19 @@ use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
|
||||
use datatypes::prelude::ConcreteDataType as CDT;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::timestamp::TimestampMillisecond;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{
|
||||
Int64VectorBuilder, StringVectorBuilder, UInt32VectorBuilder, UInt64VectorBuilder, VectorRef,
|
||||
Int64VectorBuilder, StringVectorBuilder, TimestampMillisecondVectorBuilder,
|
||||
UInt32VectorBuilder, UInt64VectorBuilder, VectorRef,
|
||||
};
|
||||
use futures::TryStreamExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, FlowInfoNotFoundSnafu, InternalSnafu, JsonSnafu, ListFlowsSnafu, Result,
|
||||
CreateRecordBatchSnafu, FlowInfoNotFoundSnafu, InternalSnafu, JsonSnafu, ListFlowsSnafu,
|
||||
Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::information_schema::{Predicates, FLOWS};
|
||||
use crate::system_schema::information_schema::InformationTable;
|
||||
@@ -59,6 +62,10 @@ pub const SOURCE_TABLE_IDS: &str = "source_table_ids";
|
||||
pub const SINK_TABLE_NAME: &str = "sink_table_name";
|
||||
pub const FLOWNODE_IDS: &str = "flownode_ids";
|
||||
pub const OPTIONS: &str = "options";
|
||||
pub const CREATED_TIME: &str = "created_time";
|
||||
pub const UPDATED_TIME: &str = "updated_time";
|
||||
pub const LAST_EXECUTION_TIME: &str = "last_execution_time";
|
||||
pub const SOURCE_TABLE_NAMES: &str = "source_table_names";
|
||||
|
||||
/// The `information_schema.flows` to provides information about flows in databases.
|
||||
#[derive(Debug)]
|
||||
@@ -99,6 +106,14 @@ impl InformationSchemaFlows {
|
||||
(SINK_TABLE_NAME, CDT::string_datatype(), false),
|
||||
(FLOWNODE_IDS, CDT::string_datatype(), true),
|
||||
(OPTIONS, CDT::string_datatype(), true),
|
||||
(CREATED_TIME, CDT::timestamp_millisecond_datatype(), false),
|
||||
(UPDATED_TIME, CDT::timestamp_millisecond_datatype(), false),
|
||||
(
|
||||
LAST_EXECUTION_TIME,
|
||||
CDT::timestamp_millisecond_datatype(),
|
||||
true,
|
||||
),
|
||||
(SOURCE_TABLE_NAMES, CDT::string_datatype(), true),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(name, ty, nullable)| ColumnSchema::new(name, ty, nullable))
|
||||
@@ -170,6 +185,10 @@ struct InformationSchemaFlowsBuilder {
|
||||
sink_table_names: StringVectorBuilder,
|
||||
flownode_id_groups: StringVectorBuilder,
|
||||
option_groups: StringVectorBuilder,
|
||||
created_time: TimestampMillisecondVectorBuilder,
|
||||
updated_time: TimestampMillisecondVectorBuilder,
|
||||
last_execution_time: TimestampMillisecondVectorBuilder,
|
||||
source_table_names: StringVectorBuilder,
|
||||
}
|
||||
|
||||
impl InformationSchemaFlowsBuilder {
|
||||
@@ -196,6 +215,10 @@ impl InformationSchemaFlowsBuilder {
|
||||
sink_table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
flownode_id_groups: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
option_groups: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
created_time: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
updated_time: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
last_execution_time: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
source_table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -235,13 +258,14 @@ impl InformationSchemaFlowsBuilder {
|
||||
catalog_name: catalog_name.to_string(),
|
||||
flow_name: flow_name.to_string(),
|
||||
})?;
|
||||
self.add_flow(&predicates, flow_id.flow_id(), flow_info, &flow_stat)?;
|
||||
self.add_flow(&predicates, flow_id.flow_id(), flow_info, &flow_stat)
|
||||
.await?;
|
||||
}
|
||||
|
||||
self.finish()
|
||||
}
|
||||
|
||||
fn add_flow(
|
||||
async fn add_flow(
|
||||
&mut self,
|
||||
predicates: &Predicates,
|
||||
flow_id: FlowId,
|
||||
@@ -290,6 +314,36 @@ impl InformationSchemaFlowsBuilder {
|
||||
input: format!("{:?}", flow_info.options()),
|
||||
},
|
||||
)?));
|
||||
self.created_time
|
||||
.push(Some(flow_info.created_time().timestamp_millis().into()));
|
||||
self.updated_time
|
||||
.push(Some(flow_info.updated_time().timestamp_millis().into()));
|
||||
self.last_execution_time
|
||||
.push(flow_stat.as_ref().and_then(|state| {
|
||||
state
|
||||
.last_exec_time_map
|
||||
.get(&flow_id)
|
||||
.map(|v| TimestampMillisecond::new(*v))
|
||||
}));
|
||||
|
||||
let mut source_table_names = vec![];
|
||||
let catalog_name = self.catalog_name.clone();
|
||||
let catalog_manager = self
|
||||
.catalog_manager
|
||||
.upgrade()
|
||||
.context(UpgradeWeakCatalogManagerRefSnafu)?;
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
|
||||
source_table_names.extend(
|
||||
catalog_manager
|
||||
.tables_by_ids(&catalog_name, &schema_name, flow_info.source_table_ids())
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|table| table.table_info().full_table_name()),
|
||||
);
|
||||
}
|
||||
|
||||
let source_table_names = source_table_names.join(",");
|
||||
self.source_table_names.push(Some(&source_table_names));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -307,6 +361,10 @@ impl InformationSchemaFlowsBuilder {
|
||||
Arc::new(self.sink_table_names.finish()),
|
||||
Arc::new(self.flownode_id_groups.finish()),
|
||||
Arc::new(self.option_groups.finish()),
|
||||
Arc::new(self.created_time.finish()),
|
||||
Arc::new(self.updated_time.finish()),
|
||||
Arc::new(self.last_execution_time.finish()),
|
||||
Arc::new(self.source_table_names.finish()),
|
||||
];
|
||||
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ use datatypes::vectors::{Int64Vector, StringVector, VectorRef};
|
||||
|
||||
use super::table_names::*;
|
||||
use crate::system_schema::utils::tables::{
|
||||
bigint_column, datetime_column, string_column, string_columns,
|
||||
bigint_column, string_column, string_columns, timestamp_micro_column,
|
||||
};
|
||||
|
||||
const NO_VALUE: &str = "NO";
|
||||
@@ -163,17 +163,17 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
|
||||
string_column("EVENT_BODY"),
|
||||
string_column("EVENT_DEFINITION"),
|
||||
string_column("EVENT_TYPE"),
|
||||
datetime_column("EXECUTE_AT"),
|
||||
timestamp_micro_column("EXECUTE_AT"),
|
||||
bigint_column("INTERVAL_VALUE"),
|
||||
string_column("INTERVAL_FIELD"),
|
||||
string_column("SQL_MODE"),
|
||||
datetime_column("STARTS"),
|
||||
datetime_column("ENDS"),
|
||||
timestamp_micro_column("STARTS"),
|
||||
timestamp_micro_column("ENDS"),
|
||||
string_column("STATUS"),
|
||||
string_column("ON_COMPLETION"),
|
||||
datetime_column("CREATED"),
|
||||
datetime_column("LAST_ALTERED"),
|
||||
datetime_column("LAST_EXECUTED"),
|
||||
timestamp_micro_column("CREATED"),
|
||||
timestamp_micro_column("LAST_ALTERED"),
|
||||
timestamp_micro_column("LAST_EXECUTED"),
|
||||
string_column("EVENT_COMMENT"),
|
||||
bigint_column("ORIGINATOR"),
|
||||
string_column("CHARACTER_SET_CLIENT"),
|
||||
@@ -204,10 +204,10 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
|
||||
bigint_column("INITIAL_SIZE"),
|
||||
bigint_column("MAXIMUM_SIZE"),
|
||||
bigint_column("AUTOEXTEND_SIZE"),
|
||||
datetime_column("CREATION_TIME"),
|
||||
datetime_column("LAST_UPDATE_TIME"),
|
||||
datetime_column("LAST_ACCESS_TIME"),
|
||||
datetime_column("RECOVER_TIME"),
|
||||
timestamp_micro_column("CREATION_TIME"),
|
||||
timestamp_micro_column("LAST_UPDATE_TIME"),
|
||||
timestamp_micro_column("LAST_ACCESS_TIME"),
|
||||
timestamp_micro_column("RECOVER_TIME"),
|
||||
bigint_column("TRANSACTION_COUNTER"),
|
||||
string_column("VERSION"),
|
||||
string_column("ROW_FORMAT"),
|
||||
@@ -217,9 +217,9 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
|
||||
bigint_column("MAX_DATA_LENGTH"),
|
||||
bigint_column("INDEX_LENGTH"),
|
||||
bigint_column("DATA_FREE"),
|
||||
datetime_column("CREATE_TIME"),
|
||||
datetime_column("UPDATE_TIME"),
|
||||
datetime_column("CHECK_TIME"),
|
||||
timestamp_micro_column("CREATE_TIME"),
|
||||
timestamp_micro_column("UPDATE_TIME"),
|
||||
timestamp_micro_column("CHECK_TIME"),
|
||||
string_column("CHECKSUM"),
|
||||
string_column("STATUS"),
|
||||
string_column("EXTRA"),
|
||||
@@ -330,8 +330,8 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
|
||||
string_column("SQL_DATA_ACCESS"),
|
||||
string_column("SQL_PATH"),
|
||||
string_column("SECURITY_TYPE"),
|
||||
datetime_column("CREATED"),
|
||||
datetime_column("LAST_ALTERED"),
|
||||
timestamp_micro_column("CREATED"),
|
||||
timestamp_micro_column("LAST_ALTERED"),
|
||||
string_column("SQL_MODE"),
|
||||
string_column("ROUTINE_COMMENT"),
|
||||
string_column("DEFINER"),
|
||||
@@ -383,7 +383,7 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
|
||||
string_column("ACTION_REFERENCE_NEW_TABLE"),
|
||||
string_column("ACTION_REFERENCE_OLD_ROW"),
|
||||
string_column("ACTION_REFERENCE_NEW_ROW"),
|
||||
datetime_column("CREATED"),
|
||||
timestamp_micro_column("CREATED"),
|
||||
string_column("SQL_MODE"),
|
||||
string_column("DEFINER"),
|
||||
string_column("CHARACTER_SET_CLIENT"),
|
||||
|
||||
@@ -20,17 +20,18 @@ use common_catalog::consts::INFORMATION_SCHEMA_PARTITIONS_TABLE_ID;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_recordbatch::adapter::RecordBatchStreamAdapter;
|
||||
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
|
||||
use common_time::datetime::DateTime;
|
||||
use datafusion::execution::TaskContext;
|
||||
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
|
||||
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
|
||||
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::timestamp::TimestampMicrosecond;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{
|
||||
ConstantVector, DateTimeVector, DateTimeVectorBuilder, Int64Vector, Int64VectorBuilder,
|
||||
MutableVector, StringVector, StringVectorBuilder, UInt64VectorBuilder,
|
||||
ConstantVector, Int64Vector, Int64VectorBuilder, MutableVector, StringVector,
|
||||
StringVectorBuilder, TimestampMicrosecondVector, TimestampMicrosecondVectorBuilder,
|
||||
UInt64VectorBuilder,
|
||||
};
|
||||
use futures::{StreamExt, TryStreamExt};
|
||||
use partition::manager::PartitionInfo;
|
||||
@@ -127,9 +128,21 @@ impl InformationSchemaPartitions {
|
||||
ColumnSchema::new("max_data_length", ConcreteDataType::int64_datatype(), true),
|
||||
ColumnSchema::new("index_length", ConcreteDataType::int64_datatype(), true),
|
||||
ColumnSchema::new("data_free", ConcreteDataType::int64_datatype(), true),
|
||||
ColumnSchema::new("create_time", ConcreteDataType::datetime_datatype(), true),
|
||||
ColumnSchema::new("update_time", ConcreteDataType::datetime_datatype(), true),
|
||||
ColumnSchema::new("check_time", ConcreteDataType::datetime_datatype(), true),
|
||||
ColumnSchema::new(
|
||||
"create_time",
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"update_time",
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"check_time",
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new("checksum", ConcreteDataType::int64_datatype(), true),
|
||||
ColumnSchema::new(
|
||||
"partition_comment",
|
||||
@@ -200,7 +213,7 @@ struct InformationSchemaPartitionsBuilder {
|
||||
partition_names: StringVectorBuilder,
|
||||
partition_ordinal_positions: Int64VectorBuilder,
|
||||
partition_expressions: StringVectorBuilder,
|
||||
create_times: DateTimeVectorBuilder,
|
||||
create_times: TimestampMicrosecondVectorBuilder,
|
||||
partition_ids: UInt64VectorBuilder,
|
||||
}
|
||||
|
||||
@@ -220,7 +233,7 @@ impl InformationSchemaPartitionsBuilder {
|
||||
partition_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
partition_ordinal_positions: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
partition_expressions: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
create_times: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
create_times: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
partition_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
}
|
||||
}
|
||||
@@ -324,7 +337,7 @@ impl InformationSchemaPartitionsBuilder {
|
||||
};
|
||||
|
||||
self.partition_expressions.push(expressions.as_deref());
|
||||
self.create_times.push(Some(DateTime::from(
|
||||
self.create_times.push(Some(TimestampMicrosecond::from(
|
||||
table_info.meta.created_on.timestamp_millis(),
|
||||
)));
|
||||
self.partition_ids.push(Some(partition.id.as_u64()));
|
||||
@@ -342,8 +355,8 @@ impl InformationSchemaPartitionsBuilder {
|
||||
Arc::new(Int64Vector::from(vec![None])),
|
||||
rows_num,
|
||||
));
|
||||
let null_datetime_vector = Arc::new(ConstantVector::new(
|
||||
Arc::new(DateTimeVector::from(vec![None])),
|
||||
let null_timestampmicrosecond_vector = Arc::new(ConstantVector::new(
|
||||
Arc::new(TimestampMicrosecondVector::from(vec![None])),
|
||||
rows_num,
|
||||
));
|
||||
let partition_methods = Arc::new(ConstantVector::new(
|
||||
@@ -373,8 +386,8 @@ impl InformationSchemaPartitionsBuilder {
|
||||
null_i64_vector.clone(),
|
||||
Arc::new(self.create_times.finish()),
|
||||
// TODO(dennis): supports update_time
|
||||
null_datetime_vector.clone(),
|
||||
null_datetime_vector,
|
||||
null_timestampmicrosecond_vector.clone(),
|
||||
null_timestampmicrosecond_vector,
|
||||
null_i64_vector,
|
||||
null_string_vector.clone(),
|
||||
null_string_vector.clone(),
|
||||
|
||||
@@ -30,7 +30,8 @@ use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{
|
||||
DateTimeVectorBuilder, StringVectorBuilder, UInt32VectorBuilder, UInt64VectorBuilder,
|
||||
StringVectorBuilder, TimestampMicrosecondVectorBuilder, UInt32VectorBuilder,
|
||||
UInt64VectorBuilder,
|
||||
};
|
||||
use futures::TryStreamExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
@@ -105,9 +106,21 @@ impl InformationSchemaTables {
|
||||
ColumnSchema::new(TABLE_ROWS, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(DATA_FREE, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(AUTO_INCREMENT, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(CREATE_TIME, ConcreteDataType::datetime_datatype(), true),
|
||||
ColumnSchema::new(UPDATE_TIME, ConcreteDataType::datetime_datatype(), true),
|
||||
ColumnSchema::new(CHECK_TIME, ConcreteDataType::datetime_datatype(), true),
|
||||
ColumnSchema::new(
|
||||
CREATE_TIME,
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
UPDATE_TIME,
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
CHECK_TIME,
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(TABLE_COLLATION, ConcreteDataType::string_datatype(), true),
|
||||
ColumnSchema::new(CHECKSUM, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(CREATE_OPTIONS, ConcreteDataType::string_datatype(), true),
|
||||
@@ -182,9 +195,9 @@ struct InformationSchemaTablesBuilder {
|
||||
max_index_length: UInt64VectorBuilder,
|
||||
data_free: UInt64VectorBuilder,
|
||||
auto_increment: UInt64VectorBuilder,
|
||||
create_time: DateTimeVectorBuilder,
|
||||
update_time: DateTimeVectorBuilder,
|
||||
check_time: DateTimeVectorBuilder,
|
||||
create_time: TimestampMicrosecondVectorBuilder,
|
||||
update_time: TimestampMicrosecondVectorBuilder,
|
||||
check_time: TimestampMicrosecondVectorBuilder,
|
||||
table_collation: StringVectorBuilder,
|
||||
checksum: UInt64VectorBuilder,
|
||||
create_options: StringVectorBuilder,
|
||||
@@ -219,9 +232,9 @@ impl InformationSchemaTablesBuilder {
|
||||
max_index_length: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
data_free: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
auto_increment: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
create_time: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
update_time: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
check_time: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
create_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
update_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
check_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
table_collation: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
checksum: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
create_options: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
|
||||
@@ -51,10 +51,10 @@ pub fn bigint_column(name: &str) -> ColumnSchema {
|
||||
)
|
||||
}
|
||||
|
||||
pub fn datetime_column(name: &str) -> ColumnSchema {
|
||||
pub fn timestamp_micro_column(name: &str) -> ColumnSchema {
|
||||
ColumnSchema::new(
|
||||
str::to_lowercase(name),
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ license.workspace = true
|
||||
|
||||
[features]
|
||||
pg_kvbackend = ["common-meta/pg_kvbackend"]
|
||||
mysql_kvbackend = ["common-meta/mysql_kvbackend"]
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
@@ -43,6 +44,10 @@ futures.workspace = true
|
||||
humantime.workspace = true
|
||||
meta-client.workspace = true
|
||||
nu-ansi-term = "0.46"
|
||||
opendal = { version = "0.51.1", features = [
|
||||
"services-fs",
|
||||
"services-s3",
|
||||
] }
|
||||
query.workspace = true
|
||||
rand.workspace = true
|
||||
reqwest.workspace = true
|
||||
|
||||
@@ -23,6 +23,8 @@ use common_error::ext::BoxedError;
|
||||
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
|
||||
use common_meta::kv_backend::etcd::EtcdStore;
|
||||
use common_meta::kv_backend::memory::MemoryKvBackend;
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
use common_meta::kv_backend::rds::MySqlStore;
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
use common_meta::kv_backend::rds::PgStore;
|
||||
use common_meta::peer::Peer;
|
||||
@@ -63,6 +65,9 @@ pub struct BenchTableMetadataCommand {
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
#[clap(long)]
|
||||
postgres_addr: Option<String>,
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
#[clap(long)]
|
||||
mysql_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
count: u32,
|
||||
}
|
||||
@@ -86,6 +91,16 @@ impl BenchTableMetadataCommand {
|
||||
kv_backend
|
||||
};
|
||||
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
let kv_backend = if let Some(mysql_addr) = &self.mysql_addr {
|
||||
info!("Using mysql as kv backend");
|
||||
MySqlStore::with_url(mysql_addr, "greptime_metakv", 128)
|
||||
.await
|
||||
.unwrap()
|
||||
} else {
|
||||
kv_backend
|
||||
};
|
||||
|
||||
let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend));
|
||||
|
||||
let tool = BenchTableMetadata {
|
||||
|
||||
@@ -276,6 +276,24 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("OpenDAL operator failed"))]
|
||||
OpenDal {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: opendal::Error,
|
||||
},
|
||||
#[snafu(display("S3 config need be set"))]
|
||||
S3ConfigNotSet {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Output directory not set"))]
|
||||
OutputDirNotSet {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -319,6 +337,9 @@ impl ErrorExt for Error {
|
||||
| Error::BuildClient { .. } => StatusCode::Unexpected,
|
||||
|
||||
Error::Other { source, .. } => source.status_code(),
|
||||
Error::OpenDal { .. } => StatusCode::Internal,
|
||||
Error::S3ConfigNotSet { .. } => StatusCode::InvalidArguments,
|
||||
Error::OutputDirNotSet { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
Error::BuildRuntime { source, .. } => source.status_code(),
|
||||
|
||||
|
||||
@@ -21,15 +21,18 @@ use async_trait::async_trait;
|
||||
use clap::{Parser, ValueEnum};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_telemetry::{debug, error, info};
|
||||
use opendal::layers::LoggingLayer;
|
||||
use opendal::{services, Operator};
|
||||
use serde_json::Value;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use tokio::fs::File;
|
||||
use tokio::io::{AsyncWriteExt, BufWriter};
|
||||
use tokio::sync::Semaphore;
|
||||
use tokio::time::Instant;
|
||||
|
||||
use crate::database::{parse_proxy_opts, DatabaseClient};
|
||||
use crate::error::{EmptyResultSnafu, Error, FileIoSnafu, Result, SchemaNotFoundSnafu};
|
||||
use crate::error::{
|
||||
EmptyResultSnafu, Error, OpenDalSnafu, OutputDirNotSetSnafu, Result, S3ConfigNotSetSnafu,
|
||||
SchemaNotFoundSnafu,
|
||||
};
|
||||
use crate::{database, Tool};
|
||||
|
||||
type TableReference = (String, String, String);
|
||||
@@ -52,8 +55,9 @@ pub struct ExportCommand {
|
||||
addr: String,
|
||||
|
||||
/// Directory to put the exported data. E.g.: /tmp/greptimedb-export
|
||||
/// for local export.
|
||||
#[clap(long)]
|
||||
output_dir: String,
|
||||
output_dir: Option<String>,
|
||||
|
||||
/// The name of the catalog to export.
|
||||
#[clap(long, default_value = "greptime-*")]
|
||||
@@ -101,10 +105,51 @@ pub struct ExportCommand {
|
||||
/// Disable proxy server, if set, will not use any proxy.
|
||||
#[clap(long)]
|
||||
no_proxy: bool,
|
||||
|
||||
/// if export data to s3
|
||||
#[clap(long)]
|
||||
s3: bool,
|
||||
|
||||
/// The s3 bucket name
|
||||
/// if s3 is set, this is required
|
||||
#[clap(long)]
|
||||
s3_bucket: Option<String>,
|
||||
|
||||
/// The s3 endpoint
|
||||
/// if s3 is set, this is required
|
||||
#[clap(long)]
|
||||
s3_endpoint: Option<String>,
|
||||
|
||||
/// The s3 access key
|
||||
/// if s3 is set, this is required
|
||||
#[clap(long)]
|
||||
s3_access_key: Option<String>,
|
||||
|
||||
/// The s3 secret key
|
||||
/// if s3 is set, this is required
|
||||
#[clap(long)]
|
||||
s3_secret_key: Option<String>,
|
||||
|
||||
/// The s3 region
|
||||
/// if s3 is set, this is required
|
||||
#[clap(long)]
|
||||
s3_region: Option<String>,
|
||||
}
|
||||
|
||||
impl ExportCommand {
|
||||
pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
|
||||
if self.s3
|
||||
&& (self.s3_bucket.is_none()
|
||||
|| self.s3_endpoint.is_none()
|
||||
|| self.s3_access_key.is_none()
|
||||
|| self.s3_secret_key.is_none()
|
||||
|| self.s3_region.is_none())
|
||||
{
|
||||
return Err(BoxedError::new(S3ConfigNotSetSnafu {}.build()));
|
||||
}
|
||||
if !self.s3 && self.output_dir.is_none() {
|
||||
return Err(BoxedError::new(OutputDirNotSetSnafu {}.build()));
|
||||
}
|
||||
let (catalog, schema) =
|
||||
database::split_database(&self.database).map_err(BoxedError::new)?;
|
||||
let proxy = parse_proxy_opts(self.proxy.clone(), self.no_proxy)?;
|
||||
@@ -126,24 +171,43 @@ impl ExportCommand {
|
||||
target: self.target.clone(),
|
||||
start_time: self.start_time.clone(),
|
||||
end_time: self.end_time.clone(),
|
||||
s3: self.s3,
|
||||
s3_bucket: self.s3_bucket.clone(),
|
||||
s3_endpoint: self.s3_endpoint.clone(),
|
||||
s3_access_key: self.s3_access_key.clone(),
|
||||
s3_secret_key: self.s3_secret_key.clone(),
|
||||
s3_region: self.s3_region.clone(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Export {
|
||||
catalog: String,
|
||||
schema: Option<String>,
|
||||
database_client: DatabaseClient,
|
||||
output_dir: String,
|
||||
output_dir: Option<String>,
|
||||
parallelism: usize,
|
||||
target: ExportTarget,
|
||||
start_time: Option<String>,
|
||||
end_time: Option<String>,
|
||||
s3: bool,
|
||||
s3_bucket: Option<String>,
|
||||
s3_endpoint: Option<String>,
|
||||
s3_access_key: Option<String>,
|
||||
s3_secret_key: Option<String>,
|
||||
s3_region: Option<String>,
|
||||
}
|
||||
|
||||
impl Export {
|
||||
fn catalog_path(&self) -> PathBuf {
|
||||
PathBuf::from(&self.output_dir).join(&self.catalog)
|
||||
if self.s3 {
|
||||
PathBuf::from(&self.catalog)
|
||||
} else if let Some(dir) = &self.output_dir {
|
||||
PathBuf::from(dir).join(&self.catalog)
|
||||
} else {
|
||||
unreachable!("catalog_path: output_dir must be set when not using s3")
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_db_names(&self) -> Result<Vec<String>> {
|
||||
@@ -300,19 +364,23 @@ impl Export {
|
||||
let timer = Instant::now();
|
||||
let db_names = self.get_db_names().await?;
|
||||
let db_count = db_names.len();
|
||||
let operator = self.build_operator().await?;
|
||||
|
||||
for schema in db_names {
|
||||
let db_dir = self.catalog_path().join(format!("{schema}/"));
|
||||
tokio::fs::create_dir_all(&db_dir)
|
||||
.await
|
||||
.context(FileIoSnafu)?;
|
||||
let file = db_dir.join("create_database.sql");
|
||||
let mut file = File::create(file).await.context(FileIoSnafu)?;
|
||||
let create_database = self
|
||||
.show_create("DATABASE", &self.catalog, &schema, None)
|
||||
.await?;
|
||||
file.write_all(create_database.as_bytes())
|
||||
.await
|
||||
.context(FileIoSnafu)?;
|
||||
|
||||
let file_path = self.get_file_path(&schema, "create_database.sql");
|
||||
self.write_to_storage(&operator, &file_path, create_database.into_bytes())
|
||||
.await?;
|
||||
|
||||
info!(
|
||||
"Exported {}.{} database creation SQL to {}",
|
||||
self.catalog,
|
||||
schema,
|
||||
self.format_output_path(&file_path)
|
||||
);
|
||||
}
|
||||
|
||||
let elapsed = timer.elapsed();
|
||||
@@ -326,149 +394,267 @@ impl Export {
|
||||
let semaphore = Arc::new(Semaphore::new(self.parallelism));
|
||||
let db_names = self.get_db_names().await?;
|
||||
let db_count = db_names.len();
|
||||
let operator = Arc::new(self.build_operator().await?);
|
||||
let mut tasks = Vec::with_capacity(db_names.len());
|
||||
|
||||
for schema in db_names {
|
||||
let semaphore_moved = semaphore.clone();
|
||||
let export_self = self.clone();
|
||||
let operator = operator.clone();
|
||||
tasks.push(async move {
|
||||
let _permit = semaphore_moved.acquire().await.unwrap();
|
||||
let (metric_physical_tables, remaining_tables, views) =
|
||||
self.get_table_list(&self.catalog, &schema).await?;
|
||||
let table_count =
|
||||
metric_physical_tables.len() + remaining_tables.len() + views.len();
|
||||
let db_dir = self.catalog_path().join(format!("{schema}/"));
|
||||
tokio::fs::create_dir_all(&db_dir)
|
||||
.await
|
||||
.context(FileIoSnafu)?;
|
||||
let file = db_dir.join("create_tables.sql");
|
||||
let mut file = File::create(file).await.context(FileIoSnafu)?;
|
||||
for (c, s, t) in metric_physical_tables.into_iter().chain(remaining_tables) {
|
||||
let create_table = self.show_create("TABLE", &c, &s, Some(&t)).await?;
|
||||
file.write_all(create_table.as_bytes())
|
||||
.await
|
||||
.context(FileIoSnafu)?;
|
||||
}
|
||||
for (c, s, v) in views {
|
||||
let create_view = self.show_create("VIEW", &c, &s, Some(&v)).await?;
|
||||
file.write_all(create_view.as_bytes())
|
||||
.await
|
||||
.context(FileIoSnafu)?;
|
||||
let (metric_physical_tables, remaining_tables, views) = export_self
|
||||
.get_table_list(&export_self.catalog, &schema)
|
||||
.await?;
|
||||
|
||||
// Create directory if needed for file system storage
|
||||
if !export_self.s3 {
|
||||
let db_dir = format!("{}/{}/", export_self.catalog, schema);
|
||||
operator.create_dir(&db_dir).await.context(OpenDalSnafu)?;
|
||||
}
|
||||
|
||||
let file_path = export_self.get_file_path(&schema, "create_tables.sql");
|
||||
let mut content = Vec::new();
|
||||
|
||||
// Add table creation SQL
|
||||
for (c, s, t) in metric_physical_tables.iter().chain(&remaining_tables) {
|
||||
let create_table = export_self.show_create("TABLE", c, s, Some(t)).await?;
|
||||
content.extend_from_slice(create_table.as_bytes());
|
||||
}
|
||||
|
||||
// Add view creation SQL
|
||||
for (c, s, v) in &views {
|
||||
let create_view = export_self.show_create("VIEW", c, s, Some(v)).await?;
|
||||
content.extend_from_slice(create_view.as_bytes());
|
||||
}
|
||||
|
||||
// Write to storage
|
||||
export_self
|
||||
.write_to_storage(&operator, &file_path, content)
|
||||
.await?;
|
||||
|
||||
info!(
|
||||
"Finished exporting {}.{schema} with {table_count} table schemas to path: {}",
|
||||
self.catalog,
|
||||
db_dir.to_string_lossy()
|
||||
"Finished exporting {}.{schema} with {} table schemas to path: {}",
|
||||
export_self.catalog,
|
||||
metric_physical_tables.len() + remaining_tables.len() + views.len(),
|
||||
export_self.format_output_path(&file_path)
|
||||
);
|
||||
|
||||
Ok::<(), Error>(())
|
||||
});
|
||||
}
|
||||
|
||||
let success = futures::future::join_all(tasks)
|
||||
.await
|
||||
.into_iter()
|
||||
.filter(|r| match r {
|
||||
Ok(_) => true,
|
||||
Err(e) => {
|
||||
error!(e; "export schema job failed");
|
||||
false
|
||||
}
|
||||
})
|
||||
.count();
|
||||
|
||||
let success = self.execute_tasks(tasks).await;
|
||||
let elapsed = timer.elapsed();
|
||||
info!("Success {success}/{db_count} jobs, cost: {elapsed:?}");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn build_operator(&self) -> Result<Operator> {
|
||||
if self.s3 {
|
||||
self.build_s3_operator().await
|
||||
} else {
|
||||
self.build_fs_operator().await
|
||||
}
|
||||
}
|
||||
|
||||
async fn build_s3_operator(&self) -> Result<Operator> {
|
||||
let mut builder = services::S3::default().root("").bucket(
|
||||
self.s3_bucket
|
||||
.as_ref()
|
||||
.expect("s3_bucket must be provided when s3 is enabled"),
|
||||
);
|
||||
|
||||
if let Some(endpoint) = self.s3_endpoint.as_ref() {
|
||||
builder = builder.endpoint(endpoint);
|
||||
}
|
||||
|
||||
if let Some(region) = self.s3_region.as_ref() {
|
||||
builder = builder.region(region);
|
||||
}
|
||||
|
||||
if let Some(key_id) = self.s3_access_key.as_ref() {
|
||||
builder = builder.access_key_id(key_id);
|
||||
}
|
||||
|
||||
if let Some(secret_key) = self.s3_secret_key.as_ref() {
|
||||
builder = builder.secret_access_key(secret_key);
|
||||
}
|
||||
|
||||
let op = Operator::new(builder)
|
||||
.context(OpenDalSnafu)?
|
||||
.layer(LoggingLayer::default())
|
||||
.finish();
|
||||
Ok(op)
|
||||
}
|
||||
|
||||
async fn build_fs_operator(&self) -> Result<Operator> {
|
||||
let root = self
|
||||
.output_dir
|
||||
.as_ref()
|
||||
.context(OutputDirNotSetSnafu)?
|
||||
.clone();
|
||||
let op = Operator::new(services::Fs::default().root(&root))
|
||||
.context(OpenDalSnafu)?
|
||||
.layer(LoggingLayer::default())
|
||||
.finish();
|
||||
Ok(op)
|
||||
}
|
||||
|
||||
async fn export_database_data(&self) -> Result<()> {
|
||||
let timer = Instant::now();
|
||||
let semaphore = Arc::new(Semaphore::new(self.parallelism));
|
||||
let db_names = self.get_db_names().await?;
|
||||
let db_count = db_names.len();
|
||||
let mut tasks = Vec::with_capacity(db_count);
|
||||
let operator = Arc::new(self.build_operator().await?);
|
||||
let with_options = build_with_options(&self.start_time, &self.end_time);
|
||||
|
||||
for schema in db_names {
|
||||
let semaphore_moved = semaphore.clone();
|
||||
let export_self = self.clone();
|
||||
let with_options_clone = with_options.clone();
|
||||
let operator = operator.clone();
|
||||
|
||||
tasks.push(async move {
|
||||
let _permit = semaphore_moved.acquire().await.unwrap();
|
||||
let db_dir = self.catalog_path().join(format!("{schema}/"));
|
||||
tokio::fs::create_dir_all(&db_dir)
|
||||
.await
|
||||
.context(FileIoSnafu)?;
|
||||
|
||||
let with_options = match (&self.start_time, &self.end_time) {
|
||||
(Some(start_time), Some(end_time)) => {
|
||||
format!(
|
||||
"WITH (FORMAT='parquet', start_time='{}', end_time='{}')",
|
||||
start_time, end_time
|
||||
)
|
||||
}
|
||||
(Some(start_time), None) => {
|
||||
format!("WITH (FORMAT='parquet', start_time='{}')", start_time)
|
||||
}
|
||||
(None, Some(end_time)) => {
|
||||
format!("WITH (FORMAT='parquet', end_time='{}')", end_time)
|
||||
}
|
||||
(None, None) => "WITH (FORMAT='parquet')".to_string(),
|
||||
};
|
||||
// Create directory if not using S3
|
||||
if !export_self.s3 {
|
||||
let db_dir = format!("{}/{}/", export_self.catalog, schema);
|
||||
operator.create_dir(&db_dir).await.context(OpenDalSnafu)?;
|
||||
}
|
||||
|
||||
let (path, connection_part) = export_self.get_storage_params(&schema);
|
||||
|
||||
// Execute COPY DATABASE TO command
|
||||
let sql = format!(
|
||||
r#"COPY DATABASE "{}"."{}" TO '{}' {};"#,
|
||||
self.catalog,
|
||||
schema,
|
||||
db_dir.to_str().unwrap(),
|
||||
with_options
|
||||
r#"COPY DATABASE "{}"."{}" TO '{}' WITH ({}){};"#,
|
||||
export_self.catalog, schema, path, with_options_clone, connection_part
|
||||
);
|
||||
info!("Executing sql: {sql}");
|
||||
export_self.database_client.sql_in_public(&sql).await?;
|
||||
info!(
|
||||
"Finished exporting {}.{} data to {}",
|
||||
export_self.catalog, schema, path
|
||||
);
|
||||
|
||||
info!("Executing sql: {sql}");
|
||||
// Create copy_from.sql file
|
||||
let copy_database_from_sql = format!(
|
||||
r#"COPY DATABASE "{}"."{}" FROM '{}' WITH ({}){};"#,
|
||||
export_self.catalog, schema, path, with_options_clone, connection_part
|
||||
);
|
||||
|
||||
self.database_client.sql_in_public(&sql).await?;
|
||||
let copy_from_path = export_self.get_file_path(&schema, "copy_from.sql");
|
||||
export_self
|
||||
.write_to_storage(
|
||||
&operator,
|
||||
©_from_path,
|
||||
copy_database_from_sql.into_bytes(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
info!(
|
||||
"Finished exporting {}.{schema} data into path: {}",
|
||||
self.catalog,
|
||||
db_dir.to_string_lossy()
|
||||
);
|
||||
|
||||
// The export copy from sql
|
||||
let copy_from_file = db_dir.join("copy_from.sql");
|
||||
let mut writer =
|
||||
BufWriter::new(File::create(copy_from_file).await.context(FileIoSnafu)?);
|
||||
let copy_database_from_sql = format!(
|
||||
r#"COPY DATABASE "{}"."{}" FROM '{}' WITH (FORMAT='parquet');"#,
|
||||
self.catalog,
|
||||
"Finished exporting {}.{} copy_from.sql to {}",
|
||||
export_self.catalog,
|
||||
schema,
|
||||
db_dir.to_str().unwrap()
|
||||
export_self.format_output_path(©_from_path)
|
||||
);
|
||||
writer
|
||||
.write(copy_database_from_sql.as_bytes())
|
||||
.await
|
||||
.context(FileIoSnafu)?;
|
||||
writer.flush().await.context(FileIoSnafu)?;
|
||||
|
||||
info!("Finished exporting {}.{schema} copy_from.sql", self.catalog);
|
||||
|
||||
Ok::<(), Error>(())
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
let success = futures::future::join_all(tasks)
|
||||
let success = self.execute_tasks(tasks).await;
|
||||
let elapsed = timer.elapsed();
|
||||
info!("Success {success}/{db_count} jobs, costs: {elapsed:?}");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_file_path(&self, schema: &str, file_name: &str) -> String {
|
||||
format!("{}/{}/{}", self.catalog, schema, file_name)
|
||||
}
|
||||
|
||||
fn format_output_path(&self, file_path: &str) -> String {
|
||||
if self.s3 {
|
||||
format!(
|
||||
"s3://{}/{}",
|
||||
self.s3_bucket.as_ref().unwrap_or(&String::new()),
|
||||
file_path
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"{}/{}",
|
||||
self.output_dir.as_ref().unwrap_or(&String::new()),
|
||||
file_path
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
async fn write_to_storage(
|
||||
&self,
|
||||
op: &Operator,
|
||||
file_path: &str,
|
||||
content: Vec<u8>,
|
||||
) -> Result<()> {
|
||||
op.write(file_path, content).await.context(OpenDalSnafu)
|
||||
}
|
||||
|
||||
fn get_storage_params(&self, schema: &str) -> (String, String) {
|
||||
if self.s3 {
|
||||
let s3_path = format!(
|
||||
"s3://{}/{}/{}/",
|
||||
// Safety: s3_bucket is required when s3 is enabled
|
||||
self.s3_bucket.as_ref().unwrap(),
|
||||
self.catalog,
|
||||
schema
|
||||
);
|
||||
|
||||
// endpoint is optional
|
||||
let endpoint_option = if let Some(endpoint) = self.s3_endpoint.as_ref() {
|
||||
format!(", ENDPOINT='{}'", endpoint)
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
// Safety: All s3 options are required
|
||||
let connection_options = format!(
|
||||
"ACCESS_KEY_ID='{}', SECRET_ACCESS_KEY='{}', REGION='{}'{}",
|
||||
self.s3_access_key.as_ref().unwrap(),
|
||||
self.s3_secret_key.as_ref().unwrap(),
|
||||
self.s3_region.as_ref().unwrap(),
|
||||
endpoint_option
|
||||
);
|
||||
|
||||
(s3_path, format!(" CONNECTION ({})", connection_options))
|
||||
} else {
|
||||
(
|
||||
self.catalog_path()
|
||||
.join(format!("{schema}/"))
|
||||
.to_string_lossy()
|
||||
.to_string(),
|
||||
String::new(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
async fn execute_tasks(
|
||||
&self,
|
||||
tasks: Vec<impl std::future::Future<Output = Result<()>>>,
|
||||
) -> usize {
|
||||
futures::future::join_all(tasks)
|
||||
.await
|
||||
.into_iter()
|
||||
.filter(|r| match r {
|
||||
Ok(_) => true,
|
||||
Err(e) => {
|
||||
error!(e; "export database job failed");
|
||||
error!(e; "export job failed");
|
||||
false
|
||||
}
|
||||
})
|
||||
.count();
|
||||
let elapsed = timer.elapsed();
|
||||
|
||||
info!("Success {success}/{db_count} jobs, costs: {elapsed:?}");
|
||||
|
||||
Ok(())
|
||||
.count()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -493,3 +679,15 @@ impl Tool for Export {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds the WITH options string for SQL commands, assuming consistent syntax across S3 and local exports.
|
||||
fn build_with_options(start_time: &Option<String>, end_time: &Option<String>) -> String {
|
||||
let mut options = vec!["format = 'parquet'".to_string()];
|
||||
if let Some(start) = start_time {
|
||||
options.push(format!("start_time = '{}'", start));
|
||||
}
|
||||
if let Some(end) = end_time {
|
||||
options.push(format!("end_time = '{}'", end));
|
||||
}
|
||||
options.join(", ")
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
mod client;
|
||||
pub mod client_manager;
|
||||
#[cfg(feature = "testing")]
|
||||
mod database;
|
||||
pub mod error;
|
||||
pub mod flow;
|
||||
@@ -33,6 +34,7 @@ pub use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
|
||||
use snafu::OptionExt;
|
||||
|
||||
pub use self::client::Client;
|
||||
#[cfg(feature = "testing")]
|
||||
pub use self::database::Database;
|
||||
pub use self::error::{Error, Result};
|
||||
use crate::error::{IllegalDatabaseResponseSnafu, ServerSnafu};
|
||||
|
||||
@@ -32,7 +32,7 @@ use common_meta::key::TableMetadataManager;
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::logging::TracingOptions;
|
||||
use common_version::{short_version, version};
|
||||
use flow::{FlownodeBuilder, FlownodeInstance, FrontendClient, FrontendInvoker};
|
||||
use flow::{FlownodeBuilder, FlownodeInstance, FrontendInvoker};
|
||||
use meta_client::{MetaClientOptions, MetaClientType};
|
||||
use servers::Mode;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
@@ -311,8 +311,6 @@ impl StartCommand {
|
||||
Arc::new(executor),
|
||||
);
|
||||
|
||||
let frontend_client = FrontendClient::from_meta_client(meta_client.clone());
|
||||
|
||||
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone()));
|
||||
let flownode_builder = FlownodeBuilder::new(
|
||||
opts,
|
||||
@@ -320,7 +318,6 @@ impl StartCommand {
|
||||
table_metadata_manager,
|
||||
catalog_manager.clone(),
|
||||
flow_metadata_manager,
|
||||
Arc::new(frontend_client),
|
||||
)
|
||||
.with_heartbeat_task(heartbeat_task);
|
||||
|
||||
|
||||
@@ -54,10 +54,7 @@ use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, Sto
|
||||
use datanode::datanode::{Datanode, DatanodeBuilder};
|
||||
use datanode::region_server::RegionServer;
|
||||
use file_engine::config::EngineConfig as FileEngineConfig;
|
||||
use flow::{
|
||||
FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeOptions, FrontendClient,
|
||||
FrontendInvoker,
|
||||
};
|
||||
use flow::{FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeOptions, FrontendInvoker};
|
||||
use frontend::frontend::FrontendOptions;
|
||||
use frontend::instance::builder::FrontendBuilder;
|
||||
use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager};
|
||||
@@ -536,16 +533,12 @@ impl StartCommand {
|
||||
flow: opts.flow.clone(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let fe_server_addr = fe_opts.grpc.bind_addr.clone();
|
||||
let frontend_client = FrontendClient::from_static_grpc_addr(fe_server_addr);
|
||||
let flow_builder = FlownodeBuilder::new(
|
||||
flownode_options,
|
||||
plugins.clone(),
|
||||
table_metadata_manager.clone(),
|
||||
catalog_manager.clone(),
|
||||
flow_metadata_manager.clone(),
|
||||
Arc::new(frontend_client),
|
||||
);
|
||||
let flownode = Arc::new(
|
||||
flow_builder
|
||||
|
||||
@@ -17,6 +17,7 @@ api.workspace = true
|
||||
arc-swap = "1.0"
|
||||
async-trait.workspace = true
|
||||
bincode = "1.3"
|
||||
chrono.workspace = true
|
||||
common-base.workspace = true
|
||||
common-catalog.workspace = true
|
||||
common-error.workspace = true
|
||||
|
||||
@@ -43,7 +43,6 @@ impl Function for DateFormatFunction {
|
||||
helper::one_of_sigs2(
|
||||
vec![
|
||||
ConcreteDataType::date_datatype(),
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
@@ -105,22 +104,6 @@ impl Function for DateFormatFunction {
|
||||
results.push(result.as_deref());
|
||||
}
|
||||
}
|
||||
ConcreteDataType::DateTime(_) => {
|
||||
for i in 0..size {
|
||||
let datetime = left.get(i).as_datetime();
|
||||
let format = formats.get(i).as_string();
|
||||
|
||||
let result = match (datetime, format) {
|
||||
(Some(datetime), Some(fmt)) => datetime
|
||||
.as_formatted_string(&fmt, Some(&func_ctx.query_ctx.timezone()))
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ExecuteSnafu)?,
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result.as_deref());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
@@ -147,7 +130,7 @@ mod tests {
|
||||
use common_query::prelude::{TypeSignature, Volatility};
|
||||
use datatypes::prelude::{ConcreteDataType, ScalarVector};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{DateTimeVector, DateVector, StringVector, TimestampSecondVector};
|
||||
use datatypes::vectors::{DateVector, StringVector, TimestampSecondVector};
|
||||
|
||||
use super::{DateFormatFunction, *};
|
||||
|
||||
@@ -169,16 +152,11 @@ mod tests {
|
||||
ConcreteDataType::string_datatype(),
|
||||
f.return_type(&[ConcreteDataType::date_datatype()]).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::string_datatype(),
|
||||
f.return_type(&[ConcreteDataType::datetime_datatype()])
|
||||
.unwrap()
|
||||
);
|
||||
assert!(matches!(f.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::OneOf(sigs),
|
||||
volatility: Volatility::Immutable
|
||||
} if sigs.len() == 6));
|
||||
} if sigs.len() == 5));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -262,45 +240,4 @@ mod tests {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_datetime_date_format() {
|
||||
let f = DateFormatFunction;
|
||||
|
||||
let dates = vec![Some(123), None, Some(42), None];
|
||||
let formats = vec![
|
||||
"%Y-%m-%d %T.%3f",
|
||||
"%Y-%m-%d %T.%3f",
|
||||
"%Y-%m-%d %T.%3f",
|
||||
"%Y-%m-%d %T.%3f",
|
||||
];
|
||||
let results = [
|
||||
Some("1970-01-01 00:00:00.123"),
|
||||
None,
|
||||
Some("1970-01-01 00:00:00.042"),
|
||||
None,
|
||||
];
|
||||
|
||||
let date_vector = DateTimeVector::from(dates.clone());
|
||||
let interval_vector = StringVector::from_vec(formats);
|
||||
let args: Vec<VectorRef> = vec![Arc::new(date_vector), Arc::new(interval_vector)];
|
||||
let vector = f.eval(&FunctionContext::default(), &args).unwrap();
|
||||
|
||||
assert_eq!(4, vector.len());
|
||||
for (i, _t) in dates.iter().enumerate() {
|
||||
let v = vector.get(i);
|
||||
let result = results.get(i).unwrap();
|
||||
|
||||
if result.is_none() {
|
||||
assert_eq!(Value::Null, v);
|
||||
continue;
|
||||
}
|
||||
match v {
|
||||
Value::String(s) => {
|
||||
assert_eq!(s.as_utf8(), result.unwrap());
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -118,11 +118,6 @@ mod tests {
|
||||
ConcreteDataType::date_datatype(),
|
||||
f.return_type(&[ConcreteDataType::date_datatype()]).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
f.return_type(&[ConcreteDataType::datetime_datatype()])
|
||||
.unwrap()
|
||||
);
|
||||
assert!(
|
||||
matches!(f.signature(),
|
||||
Signature {
|
||||
|
||||
@@ -23,7 +23,7 @@ use datatypes::arrow::array::AsArray;
|
||||
use datatypes::arrow::compute::cast;
|
||||
use datatypes::arrow::compute::kernels::zip;
|
||||
use datatypes::arrow::datatypes::{
|
||||
DataType as ArrowDataType, Date32Type, Date64Type, TimestampMicrosecondType,
|
||||
DataType as ArrowDataType, Date32Type, TimeUnit, TimestampMicrosecondType,
|
||||
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
|
||||
};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
@@ -69,9 +69,8 @@ impl Function for GreatestFunction {
|
||||
);
|
||||
|
||||
match &input_types[0] {
|
||||
ConcreteDataType::String(_) => Ok(ConcreteDataType::datetime_datatype()),
|
||||
ConcreteDataType::String(_) => Ok(ConcreteDataType::timestamp_millisecond_datatype()),
|
||||
ConcreteDataType::Date(_) => Ok(ConcreteDataType::date_datatype()),
|
||||
ConcreteDataType::DateTime(_) => Ok(ConcreteDataType::datetime_datatype()),
|
||||
ConcreteDataType::Timestamp(ts_type) => Ok(ConcreteDataType::Timestamp(*ts_type)),
|
||||
_ => UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
@@ -87,7 +86,6 @@ impl Function for GreatestFunction {
|
||||
vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::date_datatype(),
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
ConcreteDataType::timestamp_nanosecond_datatype(),
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
@@ -109,20 +107,24 @@ impl Function for GreatestFunction {
|
||||
);
|
||||
match columns[0].data_type() {
|
||||
ConcreteDataType::String(_) => {
|
||||
// Treats string as `DateTime` type.
|
||||
let column1 = cast(&columns[0].to_arrow_array(), &ArrowDataType::Date64)
|
||||
.context(ArrowComputeSnafu)?;
|
||||
let column1 = column1.as_primitive::<Date64Type>();
|
||||
let column2 = cast(&columns[1].to_arrow_array(), &ArrowDataType::Date64)
|
||||
.context(ArrowComputeSnafu)?;
|
||||
let column2 = column2.as_primitive::<Date64Type>();
|
||||
let column1 = cast(
|
||||
&columns[0].to_arrow_array(),
|
||||
&ArrowDataType::Timestamp(TimeUnit::Millisecond, None),
|
||||
)
|
||||
.context(ArrowComputeSnafu)?;
|
||||
let column1 = column1.as_primitive::<TimestampMillisecondType>();
|
||||
let column2 = cast(
|
||||
&columns[1].to_arrow_array(),
|
||||
&ArrowDataType::Timestamp(TimeUnit::Millisecond, None),
|
||||
)
|
||||
.context(ArrowComputeSnafu)?;
|
||||
let column2 = column2.as_primitive::<TimestampMillisecondType>();
|
||||
let boolean_array = gt(&column1, &column2).context(ArrowComputeSnafu)?;
|
||||
let result =
|
||||
zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?;
|
||||
Ok(Helper::try_into_vector(&result).context(error::FromArrowArraySnafu)?)
|
||||
}
|
||||
ConcreteDataType::Date(_) => gt_time_types!(Date32Type, columns),
|
||||
ConcreteDataType::DateTime(_) => gt_time_types!(Date64Type, columns),
|
||||
ConcreteDataType::Timestamp(ts_type) => match ts_type {
|
||||
TimestampType::Second(_) => gt_time_types!(TimestampSecondType, columns),
|
||||
TimestampType::Millisecond(_) => {
|
||||
@@ -155,15 +157,15 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::{Date, DateTime, Timestamp};
|
||||
use common_time::{Date, Timestamp};
|
||||
use datatypes::types::{
|
||||
DateTimeType, DateType, TimestampMicrosecondType, TimestampMillisecondType,
|
||||
TimestampNanosecondType, TimestampSecondType,
|
||||
DateType, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
|
||||
TimestampSecondType,
|
||||
};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{
|
||||
DateTimeVector, DateVector, StringVector, TimestampMicrosecondVector,
|
||||
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, Vector,
|
||||
DateVector, StringVector, TimestampMicrosecondVector, TimestampMillisecondVector,
|
||||
TimestampNanosecondVector, TimestampSecondVector, Vector,
|
||||
};
|
||||
use paste::paste;
|
||||
|
||||
@@ -178,7 +180,7 @@ mod tests {
|
||||
ConcreteDataType::string_datatype()
|
||||
])
|
||||
.unwrap(),
|
||||
ConcreteDataType::DateTime(DateTimeType)
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
);
|
||||
let columns = vec![
|
||||
Arc::new(StringVector::from(vec![
|
||||
@@ -194,15 +196,18 @@ mod tests {
|
||||
let result = function
|
||||
.eval(&FunctionContext::default(), &columns)
|
||||
.unwrap();
|
||||
let result = result.as_any().downcast_ref::<DateTimeVector>().unwrap();
|
||||
let result = result
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMillisecondVector>()
|
||||
.unwrap();
|
||||
assert_eq!(result.len(), 2);
|
||||
assert_eq!(
|
||||
result.get(0),
|
||||
Value::DateTime(DateTime::from_str("2001-02-01 00:00:00", None).unwrap())
|
||||
Value::Timestamp(Timestamp::from_str("2001-02-01 00:00:00", None).unwrap())
|
||||
);
|
||||
assert_eq!(
|
||||
result.get(1),
|
||||
Value::DateTime(DateTime::from_str("2012-12-23 00:00:00", None).unwrap())
|
||||
Value::Timestamp(Timestamp::from_str("2012-12-23 00:00:00", None).unwrap())
|
||||
);
|
||||
}
|
||||
|
||||
@@ -245,30 +250,33 @@ mod tests {
|
||||
assert_eq!(
|
||||
function
|
||||
.return_type(&[
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
ConcreteDataType::datetime_datatype()
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
])
|
||||
.unwrap(),
|
||||
ConcreteDataType::DateTime(DateTimeType)
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
);
|
||||
|
||||
let columns = vec![
|
||||
Arc::new(DateTimeVector::from_slice(vec![-1, 2])) as _,
|
||||
Arc::new(DateTimeVector::from_slice(vec![0, 1])) as _,
|
||||
Arc::new(TimestampMillisecondVector::from_slice(vec![-1, 2])) as _,
|
||||
Arc::new(TimestampMillisecondVector::from_slice(vec![0, 1])) as _,
|
||||
];
|
||||
|
||||
let result = function
|
||||
.eval(&FunctionContext::default(), &columns)
|
||||
.unwrap();
|
||||
let result = result.as_any().downcast_ref::<DateTimeVector>().unwrap();
|
||||
let result = result
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMillisecondVector>()
|
||||
.unwrap();
|
||||
assert_eq!(result.len(), 2);
|
||||
assert_eq!(
|
||||
result.get(0),
|
||||
Value::DateTime(DateTime::from_str("1970-01-01 00:00:00", None).unwrap())
|
||||
Value::Timestamp(Timestamp::from_str("1970-01-01 00:00:00", None).unwrap())
|
||||
);
|
||||
assert_eq!(
|
||||
result.get(1),
|
||||
Value::DateTime(DateTime::from_str("1970-01-01 00:00:00.002", None).unwrap())
|
||||
Value::Timestamp(Timestamp::from_str("1970-01-01 00:00:00.002", None).unwrap())
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ use std::sync::Arc;
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use common_time::{Date, DateTime, Timestamp};
|
||||
use common_time::{Date, Timestamp};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::vectors::{Int64Vector, VectorRef};
|
||||
use snafu::ensure;
|
||||
@@ -32,10 +32,6 @@ const NAME: &str = "to_unixtime";
|
||||
|
||||
fn convert_to_seconds(arg: &str, func_ctx: &FunctionContext) -> Option<i64> {
|
||||
let timezone = &func_ctx.query_ctx.timezone();
|
||||
if let Ok(dt) = DateTime::from_str(arg, Some(timezone)) {
|
||||
return Some(dt.val() / 1000);
|
||||
}
|
||||
|
||||
if let Ok(ts) = Timestamp::from_str(arg, Some(timezone)) {
|
||||
return Some(ts.split().0);
|
||||
}
|
||||
@@ -59,12 +55,6 @@ fn convert_dates_to_seconds(vector: &VectorRef) -> Vec<Option<i64>> {
|
||||
.collect::<Vec<Option<i64>>>()
|
||||
}
|
||||
|
||||
fn convert_datetimes_to_seconds(vector: &VectorRef) -> Vec<Option<i64>> {
|
||||
(0..vector.len())
|
||||
.map(|i| vector.get(i).as_datetime().map(|dt| dt.val() / 1000))
|
||||
.collect::<Vec<Option<i64>>>()
|
||||
}
|
||||
|
||||
impl Function for ToUnixtimeFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
@@ -82,7 +72,6 @@ impl Function for ToUnixtimeFunction {
|
||||
ConcreteDataType::int32_datatype(),
|
||||
ConcreteDataType::int64_datatype(),
|
||||
ConcreteDataType::date_datatype(),
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
@@ -119,10 +108,6 @@ impl Function for ToUnixtimeFunction {
|
||||
let seconds = convert_dates_to_seconds(vector);
|
||||
Ok(Arc::new(Int64Vector::from(seconds)))
|
||||
}
|
||||
ConcreteDataType::DateTime(_) => {
|
||||
let seconds = convert_datetimes_to_seconds(vector);
|
||||
Ok(Arc::new(Int64Vector::from(seconds)))
|
||||
}
|
||||
ConcreteDataType::Timestamp(_) => {
|
||||
let seconds = convert_timestamps_to_seconds(vector);
|
||||
Ok(Arc::new(Int64Vector::from(seconds)))
|
||||
@@ -148,7 +133,7 @@ mod tests {
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{
|
||||
DateTimeVector, DateVector, StringVector, TimestampMillisecondVector, TimestampSecondVector,
|
||||
DateVector, StringVector, TimestampMillisecondVector, TimestampSecondVector,
|
||||
};
|
||||
|
||||
use super::{ToUnixtimeFunction, *};
|
||||
@@ -171,7 +156,6 @@ mod tests {
|
||||
ConcreteDataType::int32_datatype(),
|
||||
ConcreteDataType::int64_datatype(),
|
||||
ConcreteDataType::date_datatype(),
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
@@ -253,31 +237,6 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_datetime_to_unixtime() {
|
||||
let f = ToUnixtimeFunction;
|
||||
|
||||
let times = vec![Some(123000), None, Some(42000), None];
|
||||
let results = [Some(123), None, Some(42), None];
|
||||
let date_vector = DateTimeVector::from(times.clone());
|
||||
let args: Vec<VectorRef> = vec![Arc::new(date_vector)];
|
||||
let vector = f.eval(&FunctionContext::default(), &args).unwrap();
|
||||
assert_eq!(4, vector.len());
|
||||
for (i, _t) in times.iter().enumerate() {
|
||||
let v = vector.get(i);
|
||||
if i == 1 || i == 3 {
|
||||
assert_eq!(Value::Null, v);
|
||||
continue;
|
||||
}
|
||||
match v {
|
||||
Value::Int64(ts) => {
|
||||
assert_eq!(ts, (*results.get(i).unwrap()).unwrap());
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_timestamp_to_unixtime() {
|
||||
let f = ToUnixtimeFunction;
|
||||
|
||||
@@ -445,20 +445,10 @@ impl Pool {
|
||||
|
||||
async fn recycle_channel_in_loop(pool: Arc<Pool>, interval_secs: u64) {
|
||||
let mut interval = tokio::time::interval(Duration::from_secs(interval_secs));
|
||||
// use weak ref here to prevent pool being leaked
|
||||
let pool_weak = {
|
||||
let weak = Arc::downgrade(&pool);
|
||||
drop(pool);
|
||||
weak
|
||||
};
|
||||
|
||||
loop {
|
||||
let _ = interval.tick().await;
|
||||
if let Some(pool) = pool_weak.upgrade() {
|
||||
pool.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0)
|
||||
} else {
|
||||
// no one is using this pool, so we can also let go
|
||||
break;
|
||||
}
|
||||
pool.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -17,8 +17,8 @@ use api::v1::column::Values;
|
||||
use common_base::BitVec;
|
||||
use datatypes::types::{IntervalType, TimeType, TimestampType, WrapperType};
|
||||
use datatypes::vectors::{
|
||||
BinaryVector, BooleanVector, DateTimeVector, DateVector, Decimal128Vector, Float32Vector,
|
||||
Float64Vector, Int16Vector, Int32Vector, Int64Vector, Int8Vector, IntervalDayTimeVector,
|
||||
BinaryVector, BooleanVector, DateVector, Decimal128Vector, Float32Vector, Float64Vector,
|
||||
Int16Vector, Int32Vector, Int64Vector, Int8Vector, IntervalDayTimeVector,
|
||||
IntervalMonthDayNanoVector, IntervalYearMonthVector, StringVector, TimeMicrosecondVector,
|
||||
TimeMillisecondVector, TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector,
|
||||
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt16Vector,
|
||||
@@ -141,12 +141,6 @@ pub fn values(arrays: &[VectorRef]) -> Result<Values> {
|
||||
(ConcreteDataType::Date(_), DateVector, date_values, |x| {
|
||||
x.val()
|
||||
}),
|
||||
(
|
||||
ConcreteDataType::DateTime(_),
|
||||
DateTimeVector,
|
||||
datetime_values,
|
||||
|x| { x.val() }
|
||||
),
|
||||
(
|
||||
ConcreteDataType::Timestamp(TimestampType::Second(_)),
|
||||
TimestampSecondVector,
|
||||
|
||||
@@ -18,11 +18,13 @@ mod print_caller;
|
||||
mod range_fn;
|
||||
mod stack_trace_debug;
|
||||
mod utils;
|
||||
|
||||
use aggr_func::{impl_aggr_func_type_store, impl_as_aggr_func_creator};
|
||||
use print_caller::process_print_caller;
|
||||
use proc_macro::TokenStream;
|
||||
use quote::quote;
|
||||
use range_fn::process_range_fn;
|
||||
use syn::{parse_macro_input, DeriveInput};
|
||||
use syn::{parse_macro_input, Data, DeriveInput, Fields};
|
||||
|
||||
use crate::admin_fn::process_admin_fn;
|
||||
|
||||
@@ -136,3 +138,51 @@ pub fn print_caller(args: TokenStream, input: TokenStream) -> TokenStream {
|
||||
pub fn stack_trace_debug(args: TokenStream, input: TokenStream) -> TokenStream {
|
||||
stack_trace_debug::stack_trace_style_impl(args.into(), input.into()).into()
|
||||
}
|
||||
|
||||
/// Generates implementation for `From<&TableMeta> for TableMetaBuilder`
|
||||
#[proc_macro_derive(ToMetaBuilder)]
|
||||
pub fn derive_meta_builder(input: TokenStream) -> TokenStream {
|
||||
let input = parse_macro_input!(input as DeriveInput);
|
||||
|
||||
let Data::Struct(data_struct) = input.data else {
|
||||
panic!("ToMetaBuilder can only be derived for structs");
|
||||
};
|
||||
|
||||
let Fields::Named(fields) = data_struct.fields else {
|
||||
panic!("ToMetaBuilder can only be derived for structs with named fields");
|
||||
};
|
||||
|
||||
// Check that this is being applied to TableMeta struct
|
||||
if input.ident != "TableMeta" {
|
||||
panic!("ToMetaBuilder can only be derived for TableMeta struct");
|
||||
}
|
||||
|
||||
let field_init = fields.named.iter().map(|field| {
|
||||
let field_name = field.ident.as_ref().unwrap();
|
||||
quote! {
|
||||
#field_name: Default::default(),
|
||||
}
|
||||
});
|
||||
|
||||
let field_assignments = fields.named.iter().map(|field| {
|
||||
let field_name = field.ident.as_ref().unwrap();
|
||||
quote! {
|
||||
builder.#field_name(meta.#field_name.clone());
|
||||
}
|
||||
});
|
||||
|
||||
let gen = quote! {
|
||||
impl From<&TableMeta> for TableMetaBuilder {
|
||||
fn from(meta: &TableMeta) -> Self {
|
||||
let mut builder = Self {
|
||||
#(#field_init)*
|
||||
};
|
||||
|
||||
#(#field_assignments)*
|
||||
builder
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
gen.into()
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ license.workspace = true
|
||||
[features]
|
||||
testing = []
|
||||
pg_kvbackend = ["dep:tokio-postgres", "dep:backon", "dep:deadpool-postgres", "dep:deadpool"]
|
||||
mysql_kvbackend = ["dep:sqlx", "dep:backon"]
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
@@ -57,9 +58,10 @@ serde_json.workspace = true
|
||||
serde_with.workspace = true
|
||||
session.workspace = true
|
||||
snafu.workspace = true
|
||||
sqlx = { workspace = true, optional = true }
|
||||
store-api.workspace = true
|
||||
strum.workspace = true
|
||||
table.workspace = true
|
||||
table = { workspace = true, features = ["testing"] }
|
||||
tokio.workspace = true
|
||||
tokio-postgres = { workspace = true, optional = true }
|
||||
tonic.workspace = true
|
||||
|
||||
@@ -192,6 +192,8 @@ mod tests {
|
||||
expire_after: Some(300),
|
||||
comment: "comment".to_string(),
|
||||
options: Default::default(),
|
||||
created_time: chrono::Utc::now(),
|
||||
updated_time: chrono::Utc::now(),
|
||||
},
|
||||
(1..=3)
|
||||
.map(|i| {
|
||||
|
||||
@@ -337,7 +337,6 @@ pub enum FlowType {
|
||||
impl FlowType {
|
||||
pub const RECORDING_RULE: &str = "recording_rule";
|
||||
pub const STREAMING: &str = "streaming";
|
||||
pub const FLOW_TYPE_KEY: &str = "flow_type";
|
||||
}
|
||||
|
||||
impl Default for FlowType {
|
||||
@@ -392,8 +391,7 @@ impl From<&CreateFlowData> for CreateRequest {
|
||||
};
|
||||
|
||||
let flow_type = value.flow_type.unwrap_or_default().to_string();
|
||||
req.flow_options
|
||||
.insert(FlowType::FLOW_TYPE_KEY.to_string(), flow_type);
|
||||
req.flow_options.insert("flow_type".to_string(), flow_type);
|
||||
req
|
||||
}
|
||||
}
|
||||
@@ -425,9 +423,16 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let flow_type = value.flow_type.unwrap_or_default().to_string();
|
||||
options.insert(FlowType::FLOW_TYPE_KEY.to_string(), flow_type);
|
||||
options.insert("flow_type".to_string(), flow_type);
|
||||
|
||||
let flow_info = FlowInfoValue {
|
||||
let mut create_time = chrono::Utc::now();
|
||||
if let Some(prev_flow_value) = value.prev_flow_info_value.as_ref()
|
||||
&& value.task.or_replace
|
||||
{
|
||||
create_time = prev_flow_value.get_inner_ref().created_time;
|
||||
}
|
||||
|
||||
let flow_info: FlowInfoValue = FlowInfoValue {
|
||||
source_table_ids: value.source_table_ids.clone(),
|
||||
sink_table_name,
|
||||
flownode_ids,
|
||||
@@ -437,6 +442,8 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
|
||||
expire_after,
|
||||
comment,
|
||||
options,
|
||||
created_time: create_time,
|
||||
updated_time: chrono::Utc::now(),
|
||||
};
|
||||
|
||||
(flow_info, flow_routes)
|
||||
|
||||
@@ -685,7 +685,36 @@ pub enum Error {
|
||||
operation: String,
|
||||
},
|
||||
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
#[snafu(display("Failed to execute via MySql, sql: {}", sql))]
|
||||
MySqlExecution {
|
||||
sql: String,
|
||||
#[snafu(source)]
|
||||
error: sqlx::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
#[snafu(display("Failed to create connection pool for MySql"))]
|
||||
CreateMySqlPool {
|
||||
#[snafu(source)]
|
||||
error: sqlx::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
#[snafu(display("Failed to {} MySql transaction", operation))]
|
||||
MySqlTransaction {
|
||||
#[snafu(source)]
|
||||
error: sqlx::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
operation: String,
|
||||
},
|
||||
|
||||
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
|
||||
#[snafu(display("Rds transaction retry failed"))]
|
||||
RdsTransactionRetryFailed {
|
||||
#[snafu(implicit)]
|
||||
@@ -823,8 +852,13 @@ impl ErrorExt for Error {
|
||||
PostgresExecution { .. }
|
||||
| CreatePostgresPool { .. }
|
||||
| GetPostgresConnection { .. }
|
||||
| PostgresTransaction { .. }
|
||||
| RdsTransactionRetryFailed { .. } => StatusCode::Internal,
|
||||
| PostgresTransaction { .. } => StatusCode::Internal,
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
MySqlExecution { .. } | CreateMySqlPool { .. } | MySqlTransaction { .. } => {
|
||||
StatusCode::Internal
|
||||
}
|
||||
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
|
||||
RdsTransactionRetryFailed { .. } => StatusCode::Internal,
|
||||
Error::DatanodeTableInfoNotFound { .. } => StatusCode::Internal,
|
||||
}
|
||||
}
|
||||
@@ -835,16 +869,29 @@ impl ErrorExt for Error {
|
||||
}
|
||||
|
||||
impl Error {
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
|
||||
/// Check if the error is a serialization error.
|
||||
pub fn is_serialization_error(&self) -> bool {
|
||||
match self {
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
Error::PostgresTransaction { error, .. } => {
|
||||
error.code() == Some(&tokio_postgres::error::SqlState::T_R_SERIALIZATION_FAILURE)
|
||||
}
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
Error::PostgresExecution { error, .. } => {
|
||||
error.code() == Some(&tokio_postgres::error::SqlState::T_R_SERIALIZATION_FAILURE)
|
||||
}
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
Error::MySqlExecution {
|
||||
error: sqlx::Error::Database(database_error),
|
||||
..
|
||||
} => {
|
||||
matches!(
|
||||
database_error.message(),
|
||||
"Deadlock found when trying to get lock; try restarting transaction"
|
||||
| "can't serialize access for this transaction"
|
||||
)
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -461,6 +461,8 @@ mod tests {
|
||||
expire_after: Some(300),
|
||||
comment: "hi".to_string(),
|
||||
options: Default::default(),
|
||||
created_time: chrono::Utc::now(),
|
||||
updated_time: chrono::Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -632,6 +634,8 @@ mod tests {
|
||||
expire_after: Some(300),
|
||||
comment: "hi".to_string(),
|
||||
options: Default::default(),
|
||||
created_time: chrono::Utc::now(),
|
||||
updated_time: chrono::Utc::now(),
|
||||
};
|
||||
let err = flow_metadata_manager
|
||||
.create_flow_metadata(flow_id, flow_value, flow_routes.clone())
|
||||
@@ -869,6 +873,8 @@ mod tests {
|
||||
expire_after: Some(300),
|
||||
comment: "hi".to_string(),
|
||||
options: Default::default(),
|
||||
created_time: chrono::Utc::now(),
|
||||
updated_time: chrono::Utc::now(),
|
||||
};
|
||||
let err = flow_metadata_manager
|
||||
.update_flow_metadata(
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::sync::Arc;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -131,6 +132,12 @@ pub struct FlowInfoValue {
|
||||
pub(crate) comment: String,
|
||||
/// The options.
|
||||
pub(crate) options: HashMap<String, String>,
|
||||
/// The created time
|
||||
#[serde(default)]
|
||||
pub(crate) created_time: DateTime<Utc>,
|
||||
/// The updated time.
|
||||
#[serde(default)]
|
||||
pub(crate) updated_time: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl FlowInfoValue {
|
||||
@@ -171,6 +178,14 @@ impl FlowInfoValue {
|
||||
pub fn options(&self) -> &HashMap<String, String> {
|
||||
&self.options
|
||||
}
|
||||
|
||||
pub fn created_time(&self) -> &DateTime<Utc> {
|
||||
&self.created_time
|
||||
}
|
||||
|
||||
pub fn updated_time(&self) -> &DateTime<Utc> {
|
||||
&self.updated_time
|
||||
}
|
||||
}
|
||||
|
||||
pub type FlowInfoManagerRef = Arc<FlowInfoManager>;
|
||||
|
||||
@@ -97,11 +97,19 @@ impl<'a> MetadataKey<'a, FlowStateKey> for FlowStateKey {
|
||||
pub struct FlowStateValue {
|
||||
/// For each key, the bytes of the state in memory
|
||||
pub state_size: BTreeMap<FlowId, usize>,
|
||||
/// For each key, the last execution time of flow in unix timestamp milliseconds.
|
||||
pub last_exec_time_map: BTreeMap<FlowId, i64>,
|
||||
}
|
||||
|
||||
impl FlowStateValue {
|
||||
pub fn new(state_size: BTreeMap<FlowId, usize>) -> Self {
|
||||
Self { state_size }
|
||||
pub fn new(
|
||||
state_size: BTreeMap<FlowId, usize>,
|
||||
last_exec_time_map: BTreeMap<FlowId, i64>,
|
||||
) -> Self {
|
||||
Self {
|
||||
state_size,
|
||||
last_exec_time_map,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -143,12 +151,15 @@ impl FlowStateManager {
|
||||
pub struct FlowStat {
|
||||
/// For each key, the bytes of the state in memory
|
||||
pub state_size: BTreeMap<u32, usize>,
|
||||
/// For each key, the last execution time of flow in unix timestamp milliseconds.
|
||||
pub last_exec_time_map: BTreeMap<FlowId, i64>,
|
||||
}
|
||||
|
||||
impl From<FlowStateValue> for FlowStat {
|
||||
fn from(value: FlowStateValue) -> Self {
|
||||
Self {
|
||||
state_size: value.state_size,
|
||||
last_exec_time_map: value.last_exec_time_map,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -157,6 +168,7 @@ impl From<FlowStat> for FlowStateValue {
|
||||
fn from(value: FlowStat) -> Self {
|
||||
Self {
|
||||
state_size: value.state_size,
|
||||
last_exec_time_map: value.last_exec_time_map,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,7 +40,7 @@ pub fn new_test_table_info_with_name<I: IntoIterator<Item = u32>>(
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let meta = TableMetaBuilder::default()
|
||||
let meta = TableMetaBuilder::empty()
|
||||
.schema(Arc::new(schema))
|
||||
.primary_key_indices(vec![0])
|
||||
.engine("engine")
|
||||
|
||||
@@ -31,7 +31,7 @@ use crate::rpc::KeyValue;
|
||||
pub mod chroot;
|
||||
pub mod etcd;
|
||||
pub mod memory;
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
#[cfg(any(feature = "mysql_kvbackend", feature = "pg_kvbackend"))]
|
||||
pub mod rds;
|
||||
pub mod test;
|
||||
pub mod txn;
|
||||
|
||||
@@ -14,13 +14,11 @@
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::marker::PhantomData;
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_error::ext::ErrorExt;
|
||||
use serde::Serializer;
|
||||
|
||||
use super::{KvBackendRef, ResettableKvBackend};
|
||||
use crate::kv_backend::txn::{Txn, TxnOp, TxnOpResponse, TxnRequest, TxnResponse};
|
||||
@@ -38,19 +36,6 @@ pub struct MemoryKvBackend<T> {
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T> Display for MemoryKvBackend<T> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
let kvs = self.kvs.read().unwrap();
|
||||
for (k, v) in kvs.iter() {
|
||||
f.serialize_str(&String::from_utf8_lossy(k))?;
|
||||
f.serialize_str(" -> ")?;
|
||||
f.serialize_str(&String::from_utf8_lossy(v))?;
|
||||
f.serialize_str("\n")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Default for MemoryKvBackend<T> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
|
||||
@@ -33,10 +33,16 @@ use crate::rpc::store::{
|
||||
};
|
||||
use crate::rpc::KeyValue;
|
||||
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
mod postgres;
|
||||
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
pub use postgres::PgStore;
|
||||
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
mod mysql;
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
pub use mysql::MySqlStore;
|
||||
|
||||
const RDS_STORE_TXN_RETRY_COUNT: usize = 3;
|
||||
|
||||
/// Query executor for rds. It can execute queries or generate a transaction executor.
|
||||
@@ -106,6 +112,14 @@ impl<T: Executor> ExecutorImpl<'_, T> {
|
||||
}
|
||||
}
|
||||
|
||||
#[warn(dead_code)] // Used in #[cfg(feature = "mysql_kvbackend")]
|
||||
async fn execute(&mut self, query: &str, params: &Vec<&Vec<u8>>) -> Result<()> {
|
||||
match self {
|
||||
Self::Default(executor) => executor.execute(query, params).await,
|
||||
Self::Txn(executor) => executor.execute(query, params).await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn commit(self) -> Result<()> {
|
||||
match self {
|
||||
Self::Txn(executor) => executor.commit().await,
|
||||
|
||||
650
src/common/meta/src/kv_backend/rds/mysql.rs
Normal file
650
src/common/meta/src/kv_backend/rds/mysql.rs
Normal file
@@ -0,0 +1,650 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::debug;
|
||||
use snafu::ResultExt;
|
||||
use sqlx::mysql::MySqlRow;
|
||||
use sqlx::pool::Pool;
|
||||
use sqlx::{MySql, MySqlPool, Row, Transaction as MySqlTransaction};
|
||||
|
||||
use crate::error::{CreateMySqlPoolSnafu, MySqlExecutionSnafu, MySqlTransactionSnafu, Result};
|
||||
use crate::kv_backend::rds::{
|
||||
Executor, ExecutorFactory, ExecutorImpl, KvQueryExecutor, RdsStore, Transaction,
|
||||
RDS_STORE_TXN_RETRY_COUNT,
|
||||
};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::rpc::store::{
|
||||
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
|
||||
BatchPutResponse, DeleteRangeRequest, DeleteRangeResponse, RangeRequest, RangeResponse,
|
||||
};
|
||||
use crate::rpc::KeyValue;
|
||||
|
||||
type MySqlClient = Arc<Pool<MySql>>;
|
||||
pub struct MySqlTxnClient(MySqlTransaction<'static, MySql>);
|
||||
|
||||
fn key_value_from_row(row: MySqlRow) -> KeyValue {
|
||||
// Safety: key and value are the first two columns in the row
|
||||
KeyValue {
|
||||
key: row.get_unchecked(0),
|
||||
value: row.get_unchecked(1),
|
||||
}
|
||||
}
|
||||
|
||||
const EMPTY: &[u8] = &[0];
|
||||
|
||||
/// Type of range template.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
enum RangeTemplateType {
|
||||
Point,
|
||||
Range,
|
||||
Full,
|
||||
LeftBounded,
|
||||
Prefix,
|
||||
}
|
||||
|
||||
/// Builds params for the given range template type.
|
||||
impl RangeTemplateType {
|
||||
fn build_params(&self, mut key: Vec<u8>, range_end: Vec<u8>) -> Vec<Vec<u8>> {
|
||||
match self {
|
||||
RangeTemplateType::Point => vec![key],
|
||||
RangeTemplateType::Range => vec![key, range_end],
|
||||
RangeTemplateType::Full => vec![],
|
||||
RangeTemplateType::LeftBounded => vec![key],
|
||||
RangeTemplateType::Prefix => {
|
||||
key.push(b'%');
|
||||
vec![key]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Templates for range request.
|
||||
#[derive(Debug, Clone)]
|
||||
struct RangeTemplate {
|
||||
point: String,
|
||||
range: String,
|
||||
full: String,
|
||||
left_bounded: String,
|
||||
prefix: String,
|
||||
}
|
||||
|
||||
impl RangeTemplate {
|
||||
/// Gets the template for the given type.
|
||||
fn get(&self, typ: RangeTemplateType) -> &str {
|
||||
match typ {
|
||||
RangeTemplateType::Point => &self.point,
|
||||
RangeTemplateType::Range => &self.range,
|
||||
RangeTemplateType::Full => &self.full,
|
||||
RangeTemplateType::LeftBounded => &self.left_bounded,
|
||||
RangeTemplateType::Prefix => &self.prefix,
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds limit to the template.
|
||||
fn with_limit(template: &str, limit: i64) -> String {
|
||||
if limit == 0 {
|
||||
return format!("{};", template);
|
||||
}
|
||||
format!("{} LIMIT {};", template, limit)
|
||||
}
|
||||
}
|
||||
|
||||
fn is_prefix_range(start: &[u8], end: &[u8]) -> bool {
|
||||
if start.len() != end.len() {
|
||||
return false;
|
||||
}
|
||||
let l = start.len();
|
||||
let same_prefix = start[0..l - 1] == end[0..l - 1];
|
||||
if let (Some(rhs), Some(lhs)) = (start.last(), end.last()) {
|
||||
return same_prefix && (*rhs + 1) == *lhs;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Determine the template type for range request.
|
||||
fn range_template(key: &[u8], range_end: &[u8]) -> RangeTemplateType {
|
||||
match (key, range_end) {
|
||||
(_, &[]) => RangeTemplateType::Point,
|
||||
(EMPTY, EMPTY) => RangeTemplateType::Full,
|
||||
(_, EMPTY) => RangeTemplateType::LeftBounded,
|
||||
(start, end) => {
|
||||
if is_prefix_range(start, end) {
|
||||
RangeTemplateType::Prefix
|
||||
} else {
|
||||
RangeTemplateType::Range
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate in placeholders for MySQL.
|
||||
fn mysql_generate_in_placeholders(from: usize, to: usize) -> Vec<String> {
|
||||
(from..=to).map(|_| "?".to_string()).collect()
|
||||
}
|
||||
|
||||
/// Factory for building sql templates.
|
||||
struct MySqlTemplateFactory<'a> {
|
||||
table_name: &'a str,
|
||||
}
|
||||
|
||||
impl<'a> MySqlTemplateFactory<'a> {
|
||||
/// Creates a new [`SqlTemplateFactory`] with the given table name.
|
||||
fn new(table_name: &'a str) -> Self {
|
||||
Self { table_name }
|
||||
}
|
||||
|
||||
/// Builds the template set for the given table name.
|
||||
fn build(&self) -> MySqlTemplateSet {
|
||||
let table_name = self.table_name;
|
||||
// Some of queries don't end with `;`, because we need to add `LIMIT` clause.
|
||||
MySqlTemplateSet {
|
||||
table_name: table_name.to_string(),
|
||||
create_table_statement: format!(
|
||||
// Cannot be more than 3072 bytes in PRIMARY KEY
|
||||
"CREATE TABLE IF NOT EXISTS {table_name}(k VARBINARY(3072) PRIMARY KEY, v BLOB);",
|
||||
),
|
||||
range_template: RangeTemplate {
|
||||
point: format!("SELECT k, v FROM {table_name} WHERE k = ?"),
|
||||
range: format!("SELECT k, v FROM {table_name} WHERE k >= ? AND k < ? ORDER BY k"),
|
||||
full: format!("SELECT k, v FROM {table_name} ? ORDER BY k"),
|
||||
left_bounded: format!("SELECT k, v FROM {table_name} WHERE k >= ? ORDER BY k"),
|
||||
prefix: format!("SELECT k, v FROM {table_name} WHERE k LIKE ? ORDER BY k"),
|
||||
},
|
||||
delete_template: RangeTemplate {
|
||||
point: format!("DELETE FROM {table_name} WHERE k = ?;"),
|
||||
range: format!("DELETE FROM {table_name} WHERE k >= ? AND k < ?;"),
|
||||
full: format!("DELETE FROM {table_name}"),
|
||||
left_bounded: format!("DELETE FROM {table_name} WHERE k >= ?;"),
|
||||
prefix: format!("DELETE FROM {table_name} WHERE k LIKE ?;"),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Templates for the given table name.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MySqlTemplateSet {
|
||||
table_name: String,
|
||||
create_table_statement: String,
|
||||
range_template: RangeTemplate,
|
||||
delete_template: RangeTemplate,
|
||||
}
|
||||
|
||||
impl MySqlTemplateSet {
|
||||
/// Generates the sql for batch get.
|
||||
fn generate_batch_get_query(&self, key_len: usize) -> String {
|
||||
let table_name = &self.table_name;
|
||||
let in_clause = mysql_generate_in_placeholders(1, key_len).join(", ");
|
||||
format!("SELECT k, v FROM {table_name} WHERE k in ({});", in_clause)
|
||||
}
|
||||
|
||||
/// Generates the sql for batch delete.
|
||||
fn generate_batch_delete_query(&self, key_len: usize) -> String {
|
||||
let table_name = &self.table_name;
|
||||
let in_clause = mysql_generate_in_placeholders(1, key_len).join(", ");
|
||||
format!("DELETE FROM {table_name} WHERE k in ({});", in_clause)
|
||||
}
|
||||
|
||||
/// Generates the sql for batch upsert.
|
||||
/// For MySQL, it also generates a select query to get the previous values.
|
||||
fn generate_batch_upsert_query(&self, kv_len: usize) -> (String, String) {
|
||||
let table_name = &self.table_name;
|
||||
let in_placeholders: Vec<String> = (1..=kv_len).map(|_| "?".to_string()).collect();
|
||||
let in_clause = in_placeholders.join(", ");
|
||||
let mut values_placeholders = Vec::new();
|
||||
for _ in 0..kv_len {
|
||||
values_placeholders.push("(?, ?)".to_string());
|
||||
}
|
||||
let values_clause = values_placeholders.join(", ");
|
||||
|
||||
(
|
||||
format!(r#"SELECT k, v FROM {table_name} WHERE k IN ({in_clause})"#,),
|
||||
format!(
|
||||
r#"INSERT INTO {table_name} (k, v) VALUES {values_clause} ON DUPLICATE KEY UPDATE v = VALUES(v);"#,
|
||||
),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Executor for MySqlClient {
|
||||
type Transaction<'a>
|
||||
= MySqlTxnClient
|
||||
where
|
||||
Self: 'a;
|
||||
|
||||
fn name() -> &'static str {
|
||||
"MySql"
|
||||
}
|
||||
|
||||
async fn query(&mut self, raw_query: &str, params: &[&Vec<u8>]) -> Result<Vec<KeyValue>> {
|
||||
let query = sqlx::query(raw_query);
|
||||
let query = params.iter().fold(query, |query, param| query.bind(param));
|
||||
let rows = query
|
||||
.fetch_all(&**self)
|
||||
.await
|
||||
.context(MySqlExecutionSnafu { sql: raw_query })?;
|
||||
Ok(rows.into_iter().map(key_value_from_row).collect())
|
||||
}
|
||||
|
||||
async fn execute(&mut self, raw_query: &str, params: &[&Vec<u8>]) -> Result<()> {
|
||||
let query = sqlx::query(raw_query);
|
||||
let query = params.iter().fold(query, |query, param| query.bind(param));
|
||||
query
|
||||
.execute(&**self)
|
||||
.await
|
||||
.context(MySqlExecutionSnafu { sql: raw_query })?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn txn_executor<'a>(&'a mut self) -> Result<Self::Transaction<'a>> {
|
||||
// sqlx has no isolation level support for now, so we have to set it manually.
|
||||
// TODO(CookiePie): Waiting for https://github.com/launchbadge/sqlx/pull/3614 and remove this.
|
||||
sqlx::query("SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE")
|
||||
.execute(&**self)
|
||||
.await
|
||||
.context(MySqlExecutionSnafu {
|
||||
sql: "SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE",
|
||||
})?;
|
||||
let txn = self
|
||||
.begin()
|
||||
.await
|
||||
.context(MySqlExecutionSnafu { sql: "begin" })?;
|
||||
Ok(MySqlTxnClient(txn))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Transaction<'_> for MySqlTxnClient {
|
||||
async fn query(&mut self, raw_query: &str, params: &[&Vec<u8>]) -> Result<Vec<KeyValue>> {
|
||||
let query = sqlx::query(raw_query);
|
||||
let query = params.iter().fold(query, |query, param| query.bind(param));
|
||||
// As said in https://docs.rs/sqlx/latest/sqlx/trait.Executor.html, we need a `&mut *transaction`. Weird.
|
||||
let rows = query
|
||||
.fetch_all(&mut *(self.0))
|
||||
.await
|
||||
.context(MySqlExecutionSnafu { sql: raw_query })?;
|
||||
Ok(rows.into_iter().map(key_value_from_row).collect())
|
||||
}
|
||||
|
||||
async fn execute(&mut self, raw_query: &str, params: &[&Vec<u8>]) -> Result<()> {
|
||||
let query = sqlx::query(raw_query);
|
||||
let query = params.iter().fold(query, |query, param| query.bind(param));
|
||||
// As said in https://docs.rs/sqlx/latest/sqlx/trait.Executor.html, we need a `&mut *transaction`. Weird.
|
||||
query
|
||||
.execute(&mut *(self.0))
|
||||
.await
|
||||
.context(MySqlExecutionSnafu { sql: raw_query })?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Caution: sqlx will stuck on the query if two transactions conflict with each other.
|
||||
/// Don't know if it's a feature or it depends on the database. Be careful.
|
||||
async fn commit(self) -> Result<()> {
|
||||
self.0.commit().await.context(MySqlTransactionSnafu {
|
||||
operation: "commit",
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MySqlExecutorFactory {
|
||||
pool: Arc<Pool<MySql>>,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl ExecutorFactory<MySqlClient> for MySqlExecutorFactory {
|
||||
async fn default_executor(&self) -> Result<MySqlClient> {
|
||||
Ok(self.pool.clone())
|
||||
}
|
||||
|
||||
async fn txn_executor<'a>(
|
||||
&self,
|
||||
default_executor: &'a mut MySqlClient,
|
||||
) -> Result<MySqlTxnClient> {
|
||||
default_executor.txn_executor().await
|
||||
}
|
||||
}
|
||||
|
||||
/// A MySQL-backed key-value store.
|
||||
/// It uses [sqlx::Pool<MySql>] as the connection pool for [RdsStore].
|
||||
pub type MySqlStore = RdsStore<MySqlClient, MySqlExecutorFactory, MySqlTemplateSet>;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl KvQueryExecutor<MySqlClient> for MySqlStore {
|
||||
async fn range_with_query_executor(
|
||||
&self,
|
||||
query_executor: &mut ExecutorImpl<'_, MySqlClient>,
|
||||
req: RangeRequest,
|
||||
) -> Result<RangeResponse> {
|
||||
let template_type = range_template(&req.key, &req.range_end);
|
||||
let template = self.sql_template_set.range_template.get(template_type);
|
||||
let params = template_type.build_params(req.key, req.range_end);
|
||||
let params_ref = params.iter().collect::<Vec<_>>();
|
||||
// Always add 1 to limit to check if there is more data
|
||||
let query =
|
||||
RangeTemplate::with_limit(template, if req.limit == 0 { 0 } else { req.limit + 1 });
|
||||
let limit = req.limit as usize;
|
||||
debug!("query: {:?}, params: {:?}", query, params);
|
||||
let mut kvs = query_executor.query(&query, ¶ms_ref).await?;
|
||||
if req.keys_only {
|
||||
kvs.iter_mut().for_each(|kv| kv.value = vec![]);
|
||||
}
|
||||
// If limit is 0, we always return all data
|
||||
if limit == 0 || kvs.len() <= limit {
|
||||
return Ok(RangeResponse { kvs, more: false });
|
||||
}
|
||||
// If limit is greater than the number of rows, we remove the last row and set more to true
|
||||
let removed = kvs.pop();
|
||||
debug_assert!(removed.is_some());
|
||||
Ok(RangeResponse { kvs, more: true })
|
||||
}
|
||||
|
||||
async fn batch_put_with_query_executor(
|
||||
&self,
|
||||
query_executor: &mut ExecutorImpl<'_, MySqlClient>,
|
||||
req: BatchPutRequest,
|
||||
) -> Result<BatchPutResponse> {
|
||||
let mut in_params = Vec::with_capacity(req.kvs.len() * 3);
|
||||
let mut values_params = Vec::with_capacity(req.kvs.len() * 2);
|
||||
|
||||
for kv in &req.kvs {
|
||||
let processed_key = &kv.key;
|
||||
in_params.push(processed_key);
|
||||
|
||||
let processed_value = &kv.value;
|
||||
values_params.push(processed_key);
|
||||
values_params.push(processed_value);
|
||||
}
|
||||
let in_params = in_params.iter().map(|x| x as _).collect::<Vec<_>>();
|
||||
let values_params = values_params.iter().map(|x| x as _).collect::<Vec<_>>();
|
||||
let (select, update) = self
|
||||
.sql_template_set
|
||||
.generate_batch_upsert_query(req.kvs.len());
|
||||
|
||||
// Fast path: if we don't need previous kvs, we can just upsert the keys.
|
||||
if !req.prev_kv {
|
||||
query_executor.execute(&update, &values_params).await?;
|
||||
return Ok(BatchPutResponse::default());
|
||||
}
|
||||
// Should use transaction to ensure atomicity.
|
||||
if let ExecutorImpl::Default(query_executor) = query_executor {
|
||||
let txn = query_executor.txn_executor().await?;
|
||||
let mut txn = ExecutorImpl::Txn(txn);
|
||||
let res = self.batch_put_with_query_executor(&mut txn, req).await;
|
||||
txn.commit().await?;
|
||||
return res;
|
||||
}
|
||||
let prev_kvs = query_executor.query(&select, &in_params).await?;
|
||||
query_executor.execute(&update, &values_params).await?;
|
||||
Ok(BatchPutResponse { prev_kvs })
|
||||
}
|
||||
|
||||
async fn batch_get_with_query_executor(
|
||||
&self,
|
||||
query_executor: &mut ExecutorImpl<'_, MySqlClient>,
|
||||
req: BatchGetRequest,
|
||||
) -> Result<BatchGetResponse> {
|
||||
if req.keys.is_empty() {
|
||||
return Ok(BatchGetResponse { kvs: vec![] });
|
||||
}
|
||||
let query = self
|
||||
.sql_template_set
|
||||
.generate_batch_get_query(req.keys.len());
|
||||
let params = req.keys.iter().map(|x| x as _).collect::<Vec<_>>();
|
||||
let kvs = query_executor.query(&query, ¶ms).await?;
|
||||
Ok(BatchGetResponse { kvs })
|
||||
}
|
||||
|
||||
async fn delete_range_with_query_executor(
|
||||
&self,
|
||||
query_executor: &mut ExecutorImpl<'_, MySqlClient>,
|
||||
req: DeleteRangeRequest,
|
||||
) -> Result<DeleteRangeResponse> {
|
||||
// Since we need to know the number of deleted keys, we have no fast path here.
|
||||
// Should use transaction to ensure atomicity.
|
||||
if let ExecutorImpl::Default(query_executor) = query_executor {
|
||||
let txn = query_executor.txn_executor().await?;
|
||||
let mut txn = ExecutorImpl::Txn(txn);
|
||||
let res = self.delete_range_with_query_executor(&mut txn, req).await;
|
||||
txn.commit().await?;
|
||||
return res;
|
||||
}
|
||||
let range_get_req = RangeRequest {
|
||||
key: req.key.clone(),
|
||||
range_end: req.range_end.clone(),
|
||||
limit: 0,
|
||||
keys_only: false,
|
||||
};
|
||||
let prev_kvs = self
|
||||
.range_with_query_executor(query_executor, range_get_req)
|
||||
.await?
|
||||
.kvs;
|
||||
let template_type = range_template(&req.key, &req.range_end);
|
||||
let template = self.sql_template_set.delete_template.get(template_type);
|
||||
let params = template_type.build_params(req.key, req.range_end);
|
||||
let params_ref = params.iter().map(|x| x as _).collect::<Vec<_>>();
|
||||
query_executor.execute(template, ¶ms_ref).await?;
|
||||
let mut resp = DeleteRangeResponse::new(prev_kvs.len() as i64);
|
||||
if req.prev_kv {
|
||||
resp.with_prev_kvs(prev_kvs);
|
||||
}
|
||||
Ok(resp)
|
||||
}
|
||||
|
||||
async fn batch_delete_with_query_executor(
|
||||
&self,
|
||||
query_executor: &mut ExecutorImpl<'_, MySqlClient>,
|
||||
req: BatchDeleteRequest,
|
||||
) -> Result<BatchDeleteResponse> {
|
||||
if req.keys.is_empty() {
|
||||
return Ok(BatchDeleteResponse::default());
|
||||
}
|
||||
let query = self
|
||||
.sql_template_set
|
||||
.generate_batch_delete_query(req.keys.len());
|
||||
let params = req.keys.iter().map(|x| x as _).collect::<Vec<_>>();
|
||||
// Fast path: if we don't need previous kvs, we can just delete the keys.
|
||||
if !req.prev_kv {
|
||||
query_executor.execute(&query, ¶ms).await?;
|
||||
return Ok(BatchDeleteResponse::default());
|
||||
}
|
||||
// Should use transaction to ensure atomicity.
|
||||
if let ExecutorImpl::Default(query_executor) = query_executor {
|
||||
let txn = query_executor.txn_executor().await?;
|
||||
let mut txn = ExecutorImpl::Txn(txn);
|
||||
let res = self.batch_delete_with_query_executor(&mut txn, req).await;
|
||||
txn.commit().await?;
|
||||
return res;
|
||||
}
|
||||
// Should get previous kvs first
|
||||
let batch_get_req = BatchGetRequest {
|
||||
keys: req.keys.clone(),
|
||||
};
|
||||
let prev_kvs = self
|
||||
.batch_get_with_query_executor(query_executor, batch_get_req)
|
||||
.await?
|
||||
.kvs;
|
||||
// Pure `DELETE` has no return value, so we need to use `execute` instead of `query`.
|
||||
query_executor.execute(&query, ¶ms).await?;
|
||||
if req.prev_kv {
|
||||
Ok(BatchDeleteResponse { prev_kvs })
|
||||
} else {
|
||||
Ok(BatchDeleteResponse::default())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MySqlStore {
|
||||
/// Create [MySqlStore] impl of [KvBackendRef] from url.
|
||||
pub async fn with_url(url: &str, table_name: &str, max_txn_ops: usize) -> Result<KvBackendRef> {
|
||||
let pool = MySqlPool::connect(url)
|
||||
.await
|
||||
.context(CreateMySqlPoolSnafu)?;
|
||||
Self::with_mysql_pool(pool, table_name, max_txn_ops).await
|
||||
}
|
||||
|
||||
/// Create [MySqlStore] impl of [KvBackendRef] from [sqlx::Pool<MySql>].
|
||||
pub async fn with_mysql_pool(
|
||||
pool: Pool<MySql>,
|
||||
table_name: &str,
|
||||
max_txn_ops: usize,
|
||||
) -> Result<KvBackendRef> {
|
||||
// This step ensures the mysql metadata backend is ready to use.
|
||||
// We check if greptime_metakv table exists, and we will create a new table
|
||||
// if it does not exist.
|
||||
let sql_template_set = MySqlTemplateFactory::new(table_name).build();
|
||||
sqlx::query(&sql_template_set.create_table_statement)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.context(MySqlExecutionSnafu {
|
||||
sql: sql_template_set.create_table_statement.to_string(),
|
||||
})?;
|
||||
Ok(Arc::new(MySqlStore {
|
||||
max_txn_ops,
|
||||
sql_template_set,
|
||||
txn_retry_count: RDS_STORE_TXN_RETRY_COUNT,
|
||||
executor_factory: MySqlExecutorFactory {
|
||||
pool: Arc::new(pool),
|
||||
},
|
||||
_phantom: PhantomData,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_telemetry::init_default_ut_logging;
|
||||
|
||||
use super::*;
|
||||
use crate::kv_backend::test::{
|
||||
prepare_kv_with_prefix, test_kv_batch_delete_with_prefix, test_kv_batch_get_with_prefix,
|
||||
test_kv_compare_and_put_with_prefix, test_kv_delete_range_with_prefix,
|
||||
test_kv_put_with_prefix, test_kv_range_2_with_prefix, test_kv_range_with_prefix,
|
||||
test_txn_compare_equal, test_txn_compare_greater, test_txn_compare_less,
|
||||
test_txn_compare_not_equal, test_txn_one_compare_op, text_txn_multi_compare_op,
|
||||
unprepare_kv,
|
||||
};
|
||||
|
||||
async fn build_mysql_kv_backend(table_name: &str) -> Option<MySqlStore> {
|
||||
init_default_ut_logging();
|
||||
let endpoints = std::env::var("GT_MYSQL_ENDPOINTS").unwrap_or_default();
|
||||
if endpoints.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let pool = MySqlPool::connect(&endpoints).await.unwrap();
|
||||
let sql_templates = MySqlTemplateFactory::new(table_name).build();
|
||||
sqlx::query(&sql_templates.create_table_statement)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
Some(MySqlStore {
|
||||
max_txn_ops: 128,
|
||||
sql_template_set: sql_templates,
|
||||
txn_retry_count: RDS_STORE_TXN_RETRY_COUNT,
|
||||
executor_factory: MySqlExecutorFactory {
|
||||
pool: Arc::new(pool),
|
||||
},
|
||||
_phantom: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mysql_put() {
|
||||
let kv_backend = build_mysql_kv_backend("put_test").await.unwrap();
|
||||
let prefix = b"put/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_put_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mysql_range() {
|
||||
let kv_backend = build_mysql_kv_backend("range_test").await.unwrap();
|
||||
let prefix = b"range/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_range_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mysql_range_2() {
|
||||
let kv_backend = build_mysql_kv_backend("range2_test").await.unwrap();
|
||||
let prefix = b"range2/";
|
||||
test_kv_range_2_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mysql_batch_get() {
|
||||
let kv_backend = build_mysql_kv_backend("batch_get_test").await.unwrap();
|
||||
let prefix = b"batch_get/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_batch_get_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mysql_batch_delete() {
|
||||
let kv_backend = build_mysql_kv_backend("batch_delete_test").await.unwrap();
|
||||
let prefix = b"batch_delete/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_delete_range_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mysql_batch_delete_with_prefix() {
|
||||
let kv_backend = build_mysql_kv_backend("batch_delete_with_prefix_test")
|
||||
.await
|
||||
.unwrap();
|
||||
let prefix = b"batch_delete/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_batch_delete_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mysql_delete_range() {
|
||||
let kv_backend = build_mysql_kv_backend("delete_range_test").await.unwrap();
|
||||
let prefix = b"delete_range/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_delete_range_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mysql_compare_and_put() {
|
||||
let kv_backend = build_mysql_kv_backend("compare_and_put_test")
|
||||
.await
|
||||
.unwrap();
|
||||
let prefix = b"compare_and_put/";
|
||||
let kv_backend = Arc::new(kv_backend);
|
||||
test_kv_compare_and_put_with_prefix(kv_backend.clone(), prefix.to_vec()).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mysql_txn() {
|
||||
let kv_backend = build_mysql_kv_backend("txn_test").await.unwrap();
|
||||
test_txn_one_compare_op(&kv_backend).await;
|
||||
text_txn_multi_compare_op(&kv_backend).await;
|
||||
test_txn_compare_equal(&kv_backend).await;
|
||||
test_txn_compare_greater(&kv_backend).await;
|
||||
test_txn_compare_less(&kv_backend).await;
|
||||
test_txn_compare_not_equal(&kv_backend).await;
|
||||
}
|
||||
}
|
||||
@@ -153,6 +153,7 @@ impl<'a> PgSqlTemplateFactory<'a> {
|
||||
/// Builds the template set for the given table name.
|
||||
fn build(&self) -> PgSqlTemplateSet {
|
||||
let table_name = self.table_name;
|
||||
// Some of queries don't end with `;`, because we need to add `LIMIT` clause.
|
||||
PgSqlTemplateSet {
|
||||
table_name: table_name.to_string(),
|
||||
create_table_statement: format!(
|
||||
|
||||
@@ -34,6 +34,24 @@ pub struct MigrateRegionRequest {
|
||||
pub timeout: Duration,
|
||||
}
|
||||
|
||||
/// A request to add region follower.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct AddRegionFollowerRequest {
|
||||
/// The region id to add follower.
|
||||
pub region_id: u64,
|
||||
/// The peer id to add follower.
|
||||
pub peer_id: u64,
|
||||
}
|
||||
|
||||
/// A request to remove region follower.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RemoveRegionFollowerRequest {
|
||||
/// The region id to remove follower.
|
||||
pub region_id: u64,
|
||||
/// The peer id to remove follower.
|
||||
pub peer_id: u64,
|
||||
}
|
||||
|
||||
/// Cast the protobuf [`ProcedureId`] to common [`ProcedureId`].
|
||||
pub fn pb_pid_to_pid(pid: &PbProcedureId) -> Result<ProcedureId> {
|
||||
ProcedureId::parse_str(&String::from_utf8_lossy(&pid.key)).with_context(|_| {
|
||||
|
||||
@@ -26,6 +26,7 @@ use datafusion_common::cast::{as_boolean_array, as_null_array};
|
||||
use datafusion_common::{internal_err, DataFusionError, ScalarValue};
|
||||
use datatypes::arrow::array::{Array, BooleanArray, RecordBatch};
|
||||
use datatypes::arrow::compute::filter_record_batch;
|
||||
use datatypes::compute::or_kleene;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use snafu::ResultExt;
|
||||
|
||||
@@ -47,6 +48,8 @@ pub struct SimpleFilterEvaluator {
|
||||
literal: Scalar<ArrayRef>,
|
||||
/// The operator.
|
||||
op: Operator,
|
||||
/// Only used when the operator is `Or`-chain.
|
||||
literal_list: Vec<Scalar<ArrayRef>>,
|
||||
}
|
||||
|
||||
impl SimpleFilterEvaluator {
|
||||
@@ -69,6 +72,7 @@ impl SimpleFilterEvaluator {
|
||||
column_name,
|
||||
literal: val.to_scalar().ok()?,
|
||||
op,
|
||||
literal_list: vec![],
|
||||
})
|
||||
}
|
||||
|
||||
@@ -83,6 +87,35 @@ impl SimpleFilterEvaluator {
|
||||
| Operator::LtEq
|
||||
| Operator::Gt
|
||||
| Operator::GtEq => {}
|
||||
Operator::Or => {
|
||||
let lhs = Self::try_new(&binary.left)?;
|
||||
let rhs = Self::try_new(&binary.right)?;
|
||||
if lhs.column_name != rhs.column_name
|
||||
|| !matches!(lhs.op, Operator::Eq | Operator::Or)
|
||||
|| !matches!(rhs.op, Operator::Eq | Operator::Or)
|
||||
{
|
||||
return None;
|
||||
}
|
||||
let mut list = vec![];
|
||||
let placeholder_literal = lhs.literal.clone();
|
||||
// above check guarantees the op is either `Eq` or `Or`
|
||||
if matches!(lhs.op, Operator::Or) {
|
||||
list.extend(lhs.literal_list);
|
||||
} else {
|
||||
list.push(lhs.literal);
|
||||
}
|
||||
if matches!(rhs.op, Operator::Or) {
|
||||
list.extend(rhs.literal_list);
|
||||
} else {
|
||||
list.push(rhs.literal);
|
||||
}
|
||||
return Some(Self {
|
||||
column_name: lhs.column_name,
|
||||
literal: placeholder_literal,
|
||||
op: Operator::Or,
|
||||
literal_list: list,
|
||||
});
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
|
||||
@@ -103,6 +136,7 @@ impl SimpleFilterEvaluator {
|
||||
column_name: lhs.name.clone(),
|
||||
literal,
|
||||
op,
|
||||
literal_list: vec![],
|
||||
})
|
||||
}
|
||||
_ => None,
|
||||
@@ -118,19 +152,19 @@ impl SimpleFilterEvaluator {
|
||||
let input = input
|
||||
.to_scalar()
|
||||
.with_context(|_| ToArrowScalarSnafu { v: input.clone() })?;
|
||||
let result = self.evaluate_datum(&input)?;
|
||||
let result = self.evaluate_datum(&input, 1)?;
|
||||
Ok(result.value(0))
|
||||
}
|
||||
|
||||
pub fn evaluate_array(&self, input: &ArrayRef) -> Result<BooleanBuffer> {
|
||||
self.evaluate_datum(input)
|
||||
self.evaluate_datum(input, input.len())
|
||||
}
|
||||
|
||||
pub fn evaluate_vector(&self, input: &VectorRef) -> Result<BooleanBuffer> {
|
||||
self.evaluate_datum(&input.to_arrow_array())
|
||||
self.evaluate_datum(&input.to_arrow_array(), input.len())
|
||||
}
|
||||
|
||||
fn evaluate_datum(&self, input: &impl Datum) -> Result<BooleanBuffer> {
|
||||
fn evaluate_datum(&self, input: &impl Datum, input_len: usize) -> Result<BooleanBuffer> {
|
||||
let result = match self.op {
|
||||
Operator::Eq => cmp::eq(input, &self.literal),
|
||||
Operator::NotEq => cmp::neq(input, &self.literal),
|
||||
@@ -138,6 +172,15 @@ impl SimpleFilterEvaluator {
|
||||
Operator::LtEq => cmp::lt_eq(input, &self.literal),
|
||||
Operator::Gt => cmp::gt(input, &self.literal),
|
||||
Operator::GtEq => cmp::gt_eq(input, &self.literal),
|
||||
Operator::Or => {
|
||||
// OR operator stands for OR-chained EQs (or INLIST in other words)
|
||||
let mut result: BooleanArray = vec![false; input_len].into();
|
||||
for literal in &self.literal_list {
|
||||
let rhs = cmp::eq(input, literal).context(ArrowComputeSnafu)?;
|
||||
result = or_kleene(&result, &rhs).context(ArrowComputeSnafu)?;
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
_ => {
|
||||
return UnsupportedOperationSnafu {
|
||||
reason: format!("{:?}", self.op),
|
||||
@@ -349,4 +392,49 @@ mod test {
|
||||
let expected = datatypes::arrow::array::Int32Array::from(vec![5, 6]);
|
||||
assert_eq!(first_column_values, &expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_complex_filter_expression() {
|
||||
// Create an expression tree for: col = 'B' OR col = 'C' OR col = 'D'
|
||||
let col_eq_b = col("col").eq(lit("B"));
|
||||
let col_eq_c = col("col").eq(lit("C"));
|
||||
let col_eq_d = col("col").eq(lit("D"));
|
||||
|
||||
// Build the OR chain
|
||||
let col_or_expr = col_eq_b.or(col_eq_c).or(col_eq_d);
|
||||
|
||||
// Check that SimpleFilterEvaluator can handle OR chain
|
||||
let or_evaluator = SimpleFilterEvaluator::try_new(&col_or_expr).unwrap();
|
||||
assert_eq!(or_evaluator.column_name, "col");
|
||||
assert_eq!(or_evaluator.op, Operator::Or);
|
||||
assert_eq!(or_evaluator.literal_list.len(), 3);
|
||||
assert_eq!(format!("{:?}", or_evaluator.literal_list), "[Scalar(StringArray\n[\n \"B\",\n]), Scalar(StringArray\n[\n \"C\",\n]), Scalar(StringArray\n[\n \"D\",\n])]");
|
||||
|
||||
// Create a schema and batch for testing
|
||||
let schema = Schema::new(vec![Field::new("col", DataType::Utf8, false)]);
|
||||
let df_schema = DFSchema::try_from(schema.clone()).unwrap();
|
||||
let props = ExecutionProps::new();
|
||||
let physical_expr = create_physical_expr(&col_or_expr, &df_schema, &props).unwrap();
|
||||
|
||||
// Create test data
|
||||
let col_data = Arc::new(datatypes::arrow::array::StringArray::from(vec![
|
||||
"B", "C", "E", "B", "C", "D", "F",
|
||||
]));
|
||||
let batch = RecordBatch::try_new(Arc::new(schema), vec![col_data]).unwrap();
|
||||
let expected = datatypes::arrow::array::StringArray::from(vec!["B", "C", "B", "C", "D"]);
|
||||
|
||||
// Filter the batch
|
||||
let filtered_batch = batch_filter(&batch, &physical_expr).unwrap();
|
||||
|
||||
// Expected: rows with col in ("B", "C", "D")
|
||||
// That would be rows 0, 1, 3, 4, 5
|
||||
assert_eq!(filtered_batch.num_rows(), 5);
|
||||
|
||||
let col_filtered = filtered_batch
|
||||
.column(0)
|
||||
.as_any()
|
||||
.downcast_ref::<datatypes::arrow::array::StringArray>()
|
||||
.unwrap();
|
||||
assert_eq!(col_filtered, &expected);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,407 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{Display, Formatter, Write};
|
||||
|
||||
use chrono::{
|
||||
Days, LocalResult, Months, NaiveDateTime, TimeDelta, TimeZone as ChronoTimeZone, Utc,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{InvalidDateStrSnafu, Result};
|
||||
use crate::interval::{IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth};
|
||||
use crate::timezone::{get_timezone, Timezone};
|
||||
use crate::util::{datetime_to_utc, format_utc_datetime};
|
||||
use crate::Date;
|
||||
|
||||
const DATETIME_FORMAT: &str = "%F %H:%M:%S%.f";
|
||||
const DATETIME_FORMAT_WITH_TZ: &str = "%F %H:%M:%S%.f%z";
|
||||
|
||||
/// [DateTime] represents the **milliseconds elapsed since "1970-01-01 00:00:00 UTC" (UNIX Epoch)**.
|
||||
#[derive(
|
||||
Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Serialize, Deserialize,
|
||||
)]
|
||||
pub struct DateTime(i64);
|
||||
|
||||
impl Display for DateTime {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
if let Some(abs_time) = chrono::DateTime::from_timestamp_millis(self.0) {
|
||||
write!(
|
||||
f,
|
||||
"{}",
|
||||
format_utc_datetime(&abs_time.naive_utc(), DATETIME_FORMAT_WITH_TZ)
|
||||
)
|
||||
} else {
|
||||
write!(f, "DateTime({})", self.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DateTime> for serde_json::Value {
|
||||
fn from(d: DateTime) -> Self {
|
||||
serde_json::Value::String(d.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<NaiveDateTime> for DateTime {
|
||||
fn from(value: NaiveDateTime) -> Self {
|
||||
DateTime::from(value.and_utc().timestamp_millis())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<i64> for DateTime {
|
||||
fn from(v: i64) -> Self {
|
||||
Self(v)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Date> for DateTime {
|
||||
fn from(value: Date) -> Self {
|
||||
// It's safe, i32 * 86400000 won't be overflow
|
||||
Self(value.to_secs() * 1000)
|
||||
}
|
||||
}
|
||||
|
||||
impl DateTime {
|
||||
/// Try parsing a string into [`DateTime`] with the system timezone.
|
||||
/// See `DateTime::from_str`.
|
||||
pub fn from_str_system(s: &str) -> Result<Self> {
|
||||
Self::from_str(s, None)
|
||||
}
|
||||
|
||||
/// Try parsing a string into [`DateTime`] with the given timezone.
|
||||
/// Supported format:
|
||||
/// - RFC3339 in the naive UTC timezone.
|
||||
/// - `%F %T` with the given timezone
|
||||
/// - `%F %T%z` with the timezone in string
|
||||
pub fn from_str(s: &str, timezone: Option<&Timezone>) -> Result<Self> {
|
||||
let s = s.trim();
|
||||
let timestamp_millis = if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(s) {
|
||||
dt.naive_utc().and_utc().timestamp_millis()
|
||||
} else if let Ok(d) = NaiveDateTime::parse_from_str(s, DATETIME_FORMAT) {
|
||||
match datetime_to_utc(&d, get_timezone(timezone)) {
|
||||
LocalResult::None => {
|
||||
return InvalidDateStrSnafu { raw: s }.fail();
|
||||
}
|
||||
LocalResult::Single(t) | LocalResult::Ambiguous(t, _) => {
|
||||
t.and_utc().timestamp_millis()
|
||||
}
|
||||
}
|
||||
} else if let Ok(v) = chrono::DateTime::parse_from_str(s, DATETIME_FORMAT_WITH_TZ) {
|
||||
v.timestamp_millis()
|
||||
} else {
|
||||
return InvalidDateStrSnafu { raw: s }.fail();
|
||||
};
|
||||
|
||||
Ok(Self(timestamp_millis))
|
||||
}
|
||||
|
||||
/// Create a new [DateTime] from milliseconds elapsed since "1970-01-01 00:00:00 UTC" (UNIX Epoch).
|
||||
pub fn new(millis: i64) -> Self {
|
||||
Self(millis)
|
||||
}
|
||||
|
||||
/// Get the milliseconds elapsed since "1970-01-01 00:00:00 UTC" (UNIX Epoch).
|
||||
pub fn val(&self) -> i64 {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Convert to [NaiveDateTime].
|
||||
pub fn to_chrono_datetime(&self) -> Option<NaiveDateTime> {
|
||||
chrono::DateTime::from_timestamp_millis(self.0).map(|x| x.naive_utc())
|
||||
}
|
||||
|
||||
/// Format DateTime for given format and timezone.
|
||||
/// If `tz==None`, the server default timezone will used.
|
||||
pub fn as_formatted_string(
|
||||
self,
|
||||
pattern: &str,
|
||||
timezone: Option<&Timezone>,
|
||||
) -> Result<Option<String>> {
|
||||
if let Some(v) = self.to_chrono_datetime() {
|
||||
let mut formatted = String::new();
|
||||
|
||||
match get_timezone(timezone) {
|
||||
Timezone::Offset(offset) => {
|
||||
write!(
|
||||
formatted,
|
||||
"{}",
|
||||
offset.from_utc_datetime(&v).format(pattern)
|
||||
)
|
||||
.context(crate::error::FormatSnafu { pattern })?;
|
||||
}
|
||||
Timezone::Named(tz) => {
|
||||
write!(formatted, "{}", tz.from_utc_datetime(&v).format(pattern))
|
||||
.context(crate::error::FormatSnafu { pattern })?;
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(Some(formatted));
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub fn to_chrono_datetime_with_timezone(&self, tz: Option<&Timezone>) -> Option<NaiveDateTime> {
|
||||
let datetime = self.to_chrono_datetime();
|
||||
datetime.map(|v| match tz {
|
||||
Some(Timezone::Offset(offset)) => offset.from_utc_datetime(&v).naive_local(),
|
||||
Some(Timezone::Named(tz)) => tz.from_utc_datetime(&v).naive_local(),
|
||||
None => Utc.from_utc_datetime(&v).naive_local(),
|
||||
})
|
||||
}
|
||||
|
||||
// FIXME(yingwen): remove add/sub intervals later
|
||||
/// Adds given [IntervalYearMonth] to the current datetime.
|
||||
pub fn add_year_month(&self, interval: IntervalYearMonth) -> Option<Self> {
|
||||
let naive_datetime = self.to_chrono_datetime()?;
|
||||
|
||||
naive_datetime
|
||||
.checked_add_months(Months::new(interval.months as u32))
|
||||
.map(Into::into)
|
||||
}
|
||||
|
||||
/// Adds given [IntervalDayTime] to the current datetime.
|
||||
pub fn add_day_time(&self, interval: IntervalDayTime) -> Option<Self> {
|
||||
let naive_datetime = self.to_chrono_datetime()?;
|
||||
|
||||
naive_datetime
|
||||
.checked_add_days(Days::new(interval.days as u64))?
|
||||
.checked_add_signed(TimeDelta::milliseconds(interval.milliseconds as i64))
|
||||
.map(Into::into)
|
||||
}
|
||||
|
||||
/// Adds given [IntervalMonthDayNano] to the current datetime.
|
||||
pub fn add_month_day_nano(&self, interval: IntervalMonthDayNano) -> Option<Self> {
|
||||
let naive_datetime = self.to_chrono_datetime()?;
|
||||
|
||||
naive_datetime
|
||||
.checked_add_months(Months::new(interval.months as u32))?
|
||||
.checked_add_days(Days::new(interval.days as u64))?
|
||||
.checked_add_signed(TimeDelta::nanoseconds(interval.nanoseconds))
|
||||
.map(Into::into)
|
||||
}
|
||||
|
||||
/// Subtracts given [IntervalYearMonth] to the current datetime.
|
||||
pub fn sub_year_month(&self, interval: IntervalYearMonth) -> Option<Self> {
|
||||
let naive_datetime = self.to_chrono_datetime()?;
|
||||
|
||||
naive_datetime
|
||||
.checked_sub_months(Months::new(interval.months as u32))
|
||||
.map(Into::into)
|
||||
}
|
||||
|
||||
/// Subtracts given [IntervalDayTime] to the current datetime.
|
||||
pub fn sub_day_time(&self, interval: IntervalDayTime) -> Option<Self> {
|
||||
let naive_datetime = self.to_chrono_datetime()?;
|
||||
|
||||
naive_datetime
|
||||
.checked_sub_days(Days::new(interval.days as u64))?
|
||||
.checked_sub_signed(TimeDelta::milliseconds(interval.milliseconds as i64))
|
||||
.map(Into::into)
|
||||
}
|
||||
|
||||
/// Subtracts given [IntervalMonthDayNano] to the current datetime.
|
||||
pub fn sub_month_day_nano(&self, interval: IntervalMonthDayNano) -> Option<Self> {
|
||||
let naive_datetime = self.to_chrono_datetime()?;
|
||||
|
||||
naive_datetime
|
||||
.checked_sub_months(Months::new(interval.months as u32))?
|
||||
.checked_sub_days(Days::new(interval.days as u64))?
|
||||
.checked_sub_signed(TimeDelta::nanoseconds(interval.nanoseconds))
|
||||
.map(Into::into)
|
||||
}
|
||||
|
||||
/// Convert to [common_time::date].
|
||||
pub fn to_date(&self) -> Option<Date> {
|
||||
self.to_chrono_datetime().map(|d| Date::from(d.date()))
|
||||
}
|
||||
|
||||
pub fn negative(&self) -> Self {
|
||||
Self(-self.0)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::timezone::set_default_timezone;
|
||||
|
||||
#[test]
|
||||
pub fn test_new_date_time() {
|
||||
set_default_timezone(Some("Asia/Shanghai")).unwrap();
|
||||
assert_eq!("1970-01-01 08:00:00+0800", DateTime::new(0).to_string());
|
||||
assert_eq!("1970-01-01 08:00:01+0800", DateTime::new(1000).to_string());
|
||||
assert_eq!("1970-01-01 07:59:59+0800", DateTime::new(-1000).to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_parse_from_string() {
|
||||
set_default_timezone(Some("Asia/Shanghai")).unwrap();
|
||||
let time = "1970-01-01 00:00:00+0800";
|
||||
let dt = DateTime::from_str(time, None).unwrap();
|
||||
assert_eq!(time, &dt.to_string());
|
||||
let dt = DateTime::from_str(" 1970-01-01 00:00:00+0800 ", None).unwrap();
|
||||
assert_eq!(time, &dt.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_from() {
|
||||
let d: DateTime = 42.into();
|
||||
assert_eq!(42, d.val());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_add_sub_interval() {
|
||||
let datetime = DateTime::new(1000);
|
||||
|
||||
let interval = IntervalDayTime::new(1, 200);
|
||||
|
||||
let new_datetime = datetime.add_day_time(interval).unwrap();
|
||||
assert_eq!(new_datetime.val(), 1000 + 3600 * 24 * 1000 + 200);
|
||||
|
||||
assert_eq!(datetime, new_datetime.sub_day_time(interval).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_local_date_time() {
|
||||
set_default_timezone(Some("Asia/Shanghai")).unwrap();
|
||||
assert_eq!(
|
||||
-28800000,
|
||||
DateTime::from_str("1970-01-01 00:00:00", None)
|
||||
.unwrap()
|
||||
.val()
|
||||
);
|
||||
assert_eq!(
|
||||
0,
|
||||
DateTime::from_str("1970-01-01 08:00:00", None)
|
||||
.unwrap()
|
||||
.val()
|
||||
);
|
||||
assert_eq!(
|
||||
42,
|
||||
DateTime::from_str("1970-01-01 08:00:00.042", None)
|
||||
.unwrap()
|
||||
.val()
|
||||
);
|
||||
assert_eq!(
|
||||
42,
|
||||
DateTime::from_str("1970-01-01 08:00:00.042424", None)
|
||||
.unwrap()
|
||||
.val()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
0,
|
||||
DateTime::from_str(
|
||||
"1970-01-01 08:00:00",
|
||||
Some(&Timezone::from_tz_string("Asia/Shanghai").unwrap())
|
||||
)
|
||||
.unwrap()
|
||||
.val()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
-28800000,
|
||||
DateTime::from_str(
|
||||
"1970-01-01 00:00:00",
|
||||
Some(&Timezone::from_tz_string("Asia/Shanghai").unwrap())
|
||||
)
|
||||
.unwrap()
|
||||
.val()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
28800000,
|
||||
DateTime::from_str(
|
||||
"1970-01-01 00:00:00",
|
||||
Some(&Timezone::from_tz_string("-8:00").unwrap())
|
||||
)
|
||||
.unwrap()
|
||||
.val()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_local_date_time_with_tz() {
|
||||
let ts = DateTime::from_str("1970-01-01 08:00:00+0000", None)
|
||||
.unwrap()
|
||||
.val();
|
||||
assert_eq!(28800000, ts);
|
||||
let ts = DateTime::from_str("1970-01-01 00:00:00.042+0000", None)
|
||||
.unwrap()
|
||||
.val();
|
||||
assert_eq!(42, ts);
|
||||
|
||||
// the string has the time zone info, the argument doesn't change the result
|
||||
let ts = DateTime::from_str(
|
||||
"1970-01-01 08:00:00+0000",
|
||||
Some(&Timezone::from_tz_string("-8:00").unwrap()),
|
||||
)
|
||||
.unwrap()
|
||||
.val();
|
||||
assert_eq!(28800000, ts);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_as_formatted_string() {
|
||||
let d: DateTime = DateTime::new(1000);
|
||||
|
||||
assert_eq!(
|
||||
"1970-01-01",
|
||||
d.as_formatted_string("%Y-%m-%d", None).unwrap().unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
"1970-01-01 00:00:01",
|
||||
d.as_formatted_string("%Y-%m-%d %H:%M:%S", None)
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
"1970-01-01T00:00:01:000",
|
||||
d.as_formatted_string("%Y-%m-%dT%H:%M:%S:%3f", None)
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"1970-01-01T08:00:01:000",
|
||||
d.as_formatted_string(
|
||||
"%Y-%m-%dT%H:%M:%S:%3f",
|
||||
Some(&Timezone::from_tz_string("Asia/Shanghai").unwrap())
|
||||
)
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_max_date() {
|
||||
let date = Date::new(i32::MAX);
|
||||
let datetime = DateTime::from(date);
|
||||
assert_eq!(datetime.val(), 185542587100800000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_conversion_between_datetime_and_chrono_datetime() {
|
||||
let cases = [1, 10, 100, 1000, 100000];
|
||||
for case in cases {
|
||||
let dt = DateTime::new(case);
|
||||
let ndt = dt.to_chrono_datetime().unwrap();
|
||||
let dt2 = DateTime::from(ndt);
|
||||
assert_eq!(dt, dt2);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -13,7 +13,6 @@
|
||||
// limitations under the License.
|
||||
|
||||
pub mod date;
|
||||
pub mod datetime;
|
||||
pub mod duration;
|
||||
pub mod error;
|
||||
pub mod interval;
|
||||
@@ -26,7 +25,6 @@ pub mod ttl;
|
||||
pub mod util;
|
||||
|
||||
pub use date::Date;
|
||||
pub use datetime::DateTime;
|
||||
pub use duration::Duration;
|
||||
pub use interval::{IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth};
|
||||
pub use range::RangeMillis;
|
||||
|
||||
@@ -171,6 +171,10 @@ pub struct S3Config {
|
||||
pub secret_access_key: SecretString,
|
||||
pub endpoint: Option<String>,
|
||||
pub region: Option<String>,
|
||||
/// Enable virtual host style so that opendal will send API requests in virtual host style instead of path style.
|
||||
/// By default, opendal will send API to https://s3.us-east-1.amazonaws.com/bucket_name
|
||||
/// Enabled, opendal will send API to https://bucket_name.s3.us-east-1.amazonaws.com
|
||||
pub enable_virtual_host_style: bool,
|
||||
#[serde(flatten)]
|
||||
pub cache: ObjectStorageCacheConfig,
|
||||
pub http_client: HttpClientConfig,
|
||||
@@ -185,6 +189,7 @@ impl PartialEq for S3Config {
|
||||
&& self.secret_access_key.expose_secret() == other.secret_access_key.expose_secret()
|
||||
&& self.endpoint == other.endpoint
|
||||
&& self.region == other.region
|
||||
&& self.enable_virtual_host_style == other.enable_virtual_host_style
|
||||
&& self.cache == other.cache
|
||||
&& self.http_client == other.http_client
|
||||
}
|
||||
@@ -289,6 +294,7 @@ impl Default for S3Config {
|
||||
root: String::default(),
|
||||
access_key_id: SecretString::from(String::default()),
|
||||
secret_access_key: SecretString::from(String::default()),
|
||||
enable_virtual_host_style: false,
|
||||
endpoint: Option::default(),
|
||||
region: Option::default(),
|
||||
cache: ObjectStorageCacheConfig::default(),
|
||||
|
||||
@@ -41,10 +41,13 @@ pub(crate) async fn new_s3_object_store(s3_config: &S3Config) -> Result<ObjectSt
|
||||
|
||||
if s3_config.endpoint.is_some() {
|
||||
builder = builder.endpoint(s3_config.endpoint.as_ref().unwrap());
|
||||
};
|
||||
}
|
||||
if s3_config.region.is_some() {
|
||||
builder = builder.region(s3_config.region.as_ref().unwrap());
|
||||
};
|
||||
}
|
||||
if s3_config.enable_virtual_host_style {
|
||||
builder = builder.enable_virtual_host_style();
|
||||
}
|
||||
|
||||
Ok(ObjectStore::new(builder)
|
||||
.context(error::InitBackendSnafu)?
|
||||
|
||||
@@ -30,13 +30,13 @@ use serde::{Deserialize, Serialize};
|
||||
use crate::error::{self, Error, Result};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::types::{
|
||||
BinaryType, BooleanType, DateTimeType, DateType, Decimal128Type, DictionaryType,
|
||||
DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType, DurationSecondType,
|
||||
DurationType, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
|
||||
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType,
|
||||
ListType, NullType, StringType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
|
||||
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
|
||||
UInt16Type, UInt32Type, UInt64Type, UInt8Type, VectorType,
|
||||
BinaryType, BooleanType, DateType, Decimal128Type, DictionaryType, DurationMicrosecondType,
|
||||
DurationMillisecondType, DurationNanosecondType, DurationSecondType, DurationType, Float32Type,
|
||||
Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntervalDayTimeType,
|
||||
IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType, ListType, NullType,
|
||||
StringType, TimeMillisecondType, TimeType, TimestampMicrosecondType, TimestampMillisecondType,
|
||||
TimestampNanosecondType, TimestampSecondType, TimestampType, UInt16Type, UInt32Type,
|
||||
UInt64Type, UInt8Type, VectorType,
|
||||
};
|
||||
use crate::value::Value;
|
||||
use crate::vectors::MutableVector;
|
||||
@@ -68,7 +68,6 @@ pub enum ConcreteDataType {
|
||||
|
||||
// Date and time types:
|
||||
Date(DateType),
|
||||
DateTime(DateTimeType),
|
||||
Timestamp(TimestampType),
|
||||
Time(TimeType),
|
||||
|
||||
@@ -107,7 +106,6 @@ impl fmt::Display for ConcreteDataType {
|
||||
ConcreteDataType::Binary(v) => write!(f, "{}", v.name()),
|
||||
ConcreteDataType::String(v) => write!(f, "{}", v.name()),
|
||||
ConcreteDataType::Date(v) => write!(f, "{}", v.name()),
|
||||
ConcreteDataType::DateTime(v) => write!(f, "{}", v.name()),
|
||||
ConcreteDataType::Timestamp(t) => match t {
|
||||
TimestampType::Second(v) => write!(f, "{}", v.name()),
|
||||
TimestampType::Millisecond(v) => write!(f, "{}", v.name()),
|
||||
@@ -163,7 +161,6 @@ impl ConcreteDataType {
|
||||
self,
|
||||
ConcreteDataType::String(_)
|
||||
| ConcreteDataType::Date(_)
|
||||
| ConcreteDataType::DateTime(_)
|
||||
| ConcreteDataType::Timestamp(_)
|
||||
| ConcreteDataType::Time(_)
|
||||
| ConcreteDataType::Interval(_)
|
||||
@@ -183,7 +180,6 @@ impl ConcreteDataType {
|
||||
| ConcreteDataType::Int32(_)
|
||||
| ConcreteDataType::Int64(_)
|
||||
| ConcreteDataType::Date(_)
|
||||
| ConcreteDataType::DateTime(_)
|
||||
| ConcreteDataType::Timestamp(_)
|
||||
| ConcreteDataType::Time(_)
|
||||
| ConcreteDataType::Interval(_)
|
||||
@@ -385,7 +381,7 @@ impl ConcreteDataType {
|
||||
&ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => "BYTEA",
|
||||
&ConcreteDataType::String(_) => "VARCHAR",
|
||||
&ConcreteDataType::Date(_) => "DATE",
|
||||
&ConcreteDataType::DateTime(_) | &ConcreteDataType::Timestamp(_) => "TIMESTAMP",
|
||||
&ConcreteDataType::Timestamp(_) => "TIMESTAMP",
|
||||
&ConcreteDataType::Time(_) => "TIME",
|
||||
&ConcreteDataType::Interval(_) => "INTERVAL",
|
||||
&ConcreteDataType::Decimal128(_) => "NUMERIC",
|
||||
@@ -402,7 +398,7 @@ impl ConcreteDataType {
|
||||
&ConcreteDataType::Binary(_) => "_BYTEA",
|
||||
&ConcreteDataType::String(_) => "_VARCHAR",
|
||||
&ConcreteDataType::Date(_) => "_DATE",
|
||||
&ConcreteDataType::DateTime(_) | &ConcreteDataType::Timestamp(_) => "_TIMESTAMP",
|
||||
&ConcreteDataType::Timestamp(_) => "_TIMESTAMP",
|
||||
&ConcreteDataType::Time(_) => "_TIME",
|
||||
&ConcreteDataType::Interval(_) => "_INTERVAL",
|
||||
&ConcreteDataType::Decimal128(_) => "_NUMERIC",
|
||||
@@ -441,7 +437,6 @@ impl TryFrom<&ArrowDataType> for ConcreteDataType {
|
||||
ArrowDataType::Float32 => Self::float32_datatype(),
|
||||
ArrowDataType::Float64 => Self::float64_datatype(),
|
||||
ArrowDataType::Date32 => Self::date_datatype(),
|
||||
ArrowDataType::Date64 => Self::datetime_datatype(),
|
||||
ArrowDataType::Timestamp(u, _) => ConcreteDataType::from_arrow_time_unit(u),
|
||||
ArrowDataType::Interval(u) => ConcreteDataType::from_arrow_interval_unit(u),
|
||||
ArrowDataType::Binary | ArrowDataType::LargeBinary => Self::binary_datatype(),
|
||||
@@ -490,7 +485,7 @@ macro_rules! impl_new_concrete_type_functions {
|
||||
|
||||
impl_new_concrete_type_functions!(
|
||||
Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
|
||||
Binary, Date, DateTime, String, Json
|
||||
Binary, Date, String, Json
|
||||
);
|
||||
|
||||
impl ConcreteDataType {
|
||||
@@ -814,7 +809,6 @@ mod tests {
|
||||
assert!(ConcreteDataType::string_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::binary_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::date_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::datetime_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_second_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_stringifiable());
|
||||
@@ -843,7 +837,6 @@ mod tests {
|
||||
assert!(ConcreteDataType::int32_datatype().is_signed());
|
||||
assert!(ConcreteDataType::int64_datatype().is_signed());
|
||||
assert!(ConcreteDataType::date_datatype().is_signed());
|
||||
assert!(ConcreteDataType::datetime_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_second_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_signed());
|
||||
@@ -878,7 +871,6 @@ mod tests {
|
||||
assert!(!ConcreteDataType::int32_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::int64_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::date_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::datetime_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_second_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_millisecond_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_microsecond_datatype().is_unsigned());
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
use std::any::Any;
|
||||
|
||||
use common_decimal::Decimal128;
|
||||
use common_time::{Date, DateTime};
|
||||
use common_time::Date;
|
||||
|
||||
use crate::types::{
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type,
|
||||
@@ -23,8 +23,8 @@ use crate::types::{
|
||||
};
|
||||
use crate::value::{ListValue, ListValueRef, Value};
|
||||
use crate::vectors::{
|
||||
BinaryVector, BooleanVector, DateTimeVector, DateVector, Decimal128Vector, ListVector,
|
||||
MutableVector, PrimitiveVector, StringVector, Vector,
|
||||
BinaryVector, BooleanVector, DateVector, Decimal128Vector, ListVector, MutableVector,
|
||||
PrimitiveVector, StringVector, Vector,
|
||||
};
|
||||
|
||||
fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
|
||||
@@ -302,27 +302,6 @@ impl ScalarRef<'_> for Decimal128 {
|
||||
}
|
||||
}
|
||||
|
||||
impl Scalar for DateTime {
|
||||
type VectorType = DateTimeVector;
|
||||
type RefType<'a> = DateTime;
|
||||
|
||||
fn as_scalar_ref(&self) -> Self::RefType<'_> {
|
||||
*self
|
||||
}
|
||||
|
||||
fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short> {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarRef<'_> for DateTime {
|
||||
type ScalarType = DateTime;
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
// Timestamp types implement Scalar and ScalarRef in `src/timestamp.rs`.
|
||||
|
||||
impl Scalar for ListValue {
|
||||
@@ -428,13 +407,6 @@ mod tests {
|
||||
assert_eq!(decimal, decimal.to_owned_scalar());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_datetime_scalar() {
|
||||
let dt = DateTime::new(123);
|
||||
assert_eq!(dt, dt.as_scalar_ref());
|
||||
assert_eq!(dt, dt.to_owned_scalar());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_list_value_scalar() {
|
||||
let list_value =
|
||||
|
||||
@@ -40,9 +40,6 @@ pub enum LogicalTypeId {
|
||||
/// Date representing the elapsed time since UNIX epoch (1970-01-01)
|
||||
/// in days (32 bits).
|
||||
Date,
|
||||
/// Datetime representing the elapsed time since UNIX epoch (1970-01-01) in
|
||||
/// seconds/milliseconds/microseconds/nanoseconds, determined by precision.
|
||||
DateTime,
|
||||
|
||||
TimestampSecond,
|
||||
TimestampMillisecond,
|
||||
@@ -100,7 +97,6 @@ impl LogicalTypeId {
|
||||
LogicalTypeId::String => ConcreteDataType::string_datatype(),
|
||||
LogicalTypeId::Binary => ConcreteDataType::binary_datatype(),
|
||||
LogicalTypeId::Date => ConcreteDataType::date_datatype(),
|
||||
LogicalTypeId::DateTime => ConcreteDataType::datetime_datatype(),
|
||||
LogicalTypeId::TimestampSecond => ConcreteDataType::timestamp_second_datatype(),
|
||||
LogicalTypeId::TimestampMillisecond => {
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
|
||||
@@ -16,7 +16,6 @@ mod binary_type;
|
||||
mod boolean_type;
|
||||
pub mod cast;
|
||||
mod date_type;
|
||||
mod datetime_type;
|
||||
mod decimal_type;
|
||||
mod dictionary_type;
|
||||
mod duration_type;
|
||||
@@ -34,7 +33,6 @@ pub use binary_type::BinaryType;
|
||||
pub use boolean_type::BooleanType;
|
||||
pub use cast::{cast, cast_with_opt};
|
||||
pub use date_type::DateType;
|
||||
pub use datetime_type::DateTimeType;
|
||||
pub use decimal_type::Decimal128Type;
|
||||
pub use dictionary_type::DictionaryType;
|
||||
pub use duration_type::{
|
||||
|
||||
@@ -119,10 +119,6 @@ pub fn can_cast_type(src_value: &Value, dest_type: &ConcreteDataType) -> bool {
|
||||
(Date(_), Int32(_) | Timestamp(_) | String(_)) => true,
|
||||
(Int32(_) | String(_) | Timestamp(_), Date(_)) => true,
|
||||
(Date(_), Date(_)) => true,
|
||||
// DateTime type
|
||||
(DateTime(_), Int64(_) | Timestamp(_) | String(_)) => true,
|
||||
(Int64(_) | Timestamp(_) | String(_), DateTime(_)) => true,
|
||||
(DateTime(_), DateTime(_)) => true,
|
||||
// Timestamp type
|
||||
(Timestamp(_), Int64(_) | String(_)) => true,
|
||||
(Int64(_) | String(_), Timestamp(_)) => true,
|
||||
@@ -175,7 +171,7 @@ mod tests {
|
||||
use common_base::bytes::StringBytes;
|
||||
use common_time::time::Time;
|
||||
use common_time::timezone::set_default_timezone;
|
||||
use common_time::{Date, DateTime, Timestamp};
|
||||
use common_time::{Date, Timestamp};
|
||||
use ordered_float::OrderedFloat;
|
||||
|
||||
use super::*;
|
||||
@@ -274,7 +270,6 @@ mod tests {
|
||||
null_datatype,
|
||||
boolean_datatype,
|
||||
date_datatype,
|
||||
datetime_datatype,
|
||||
timestamp_second_datatype,
|
||||
binary_datatype
|
||||
);
|
||||
@@ -287,23 +282,12 @@ mod tests {
|
||||
timestamp_second_datatype,
|
||||
string_datatype
|
||||
);
|
||||
|
||||
// datetime -> other types
|
||||
test_can_cast!(
|
||||
Value::DateTime(DateTime::from_str_system("2021-01-01 00:00:00").unwrap()),
|
||||
null_datatype,
|
||||
int64_datatype,
|
||||
timestamp_second_datatype,
|
||||
string_datatype
|
||||
);
|
||||
|
||||
// timestamp -> other types
|
||||
test_can_cast!(
|
||||
Value::Timestamp(Timestamp::from_str_utc("2021-01-01 00:00:00").unwrap()),
|
||||
null_datatype,
|
||||
int64_datatype,
|
||||
date_datatype,
|
||||
datetime_datatype,
|
||||
string_datatype
|
||||
);
|
||||
|
||||
|
||||
@@ -55,7 +55,6 @@ impl DataType for DateType {
|
||||
Value::Int32(v) => Some(Value::Date(Date::from(v))),
|
||||
Value::String(v) => Date::from_str_utc(v.as_utf8()).map(Value::Date).ok(),
|
||||
Value::Timestamp(v) => v.to_chrono_date().map(|date| Value::Date(date.into())),
|
||||
Value::DateTime(v) => Some(Value::DateTime(v)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,140 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use arrow::datatypes::{DataType as ArrowDataType, Date64Type};
|
||||
use common_time::DateTime;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, Result};
|
||||
use crate::prelude::{LogicalTypeId, MutableVector, ScalarVectorBuilder, Value, ValueRef, Vector};
|
||||
use crate::types::LogicalPrimitiveType;
|
||||
use crate::vectors::{DateTimeVector, DateTimeVectorBuilder, PrimitiveVector};
|
||||
|
||||
const MILLISECOND_VARIATION: u64 = 3;
|
||||
/// Data type for [`DateTime`].
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub struct DateTimeType;
|
||||
|
||||
impl DateTimeType {
|
||||
pub fn precision(&self) -> u64 {
|
||||
MILLISECOND_VARIATION
|
||||
}
|
||||
}
|
||||
|
||||
impl DataType for DateTimeType {
|
||||
fn name(&self) -> String {
|
||||
"DateTime".to_string()
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::DateTime
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
Value::DateTime(DateTime::default())
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Date64
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(DateTimeVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn try_cast(&self, from: Value) -> Option<Value> {
|
||||
match from {
|
||||
Value::Int64(v) => Some(Value::DateTime(DateTime::from(v))),
|
||||
Value::Timestamp(v) => v.to_chrono_datetime().map(|d| Value::DateTime(d.into())),
|
||||
Value::String(v) => DateTime::from_str_system(v.as_utf8())
|
||||
.map(Value::DateTime)
|
||||
.ok(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LogicalPrimitiveType for DateTimeType {
|
||||
type ArrowPrimitive = Date64Type;
|
||||
type Native = i64;
|
||||
type Wrapper = DateTime;
|
||||
type LargestType = Self;
|
||||
|
||||
fn build_data_type() -> ConcreteDataType {
|
||||
ConcreteDataType::datetime_datatype()
|
||||
}
|
||||
|
||||
fn type_name() -> &'static str {
|
||||
"DateTime"
|
||||
}
|
||||
|
||||
fn cast_vector(vector: &dyn Vector) -> Result<&PrimitiveVector<Self>> {
|
||||
vector
|
||||
.as_any()
|
||||
.downcast_ref::<DateTimeVector>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to cast {} to DateTimeVector",
|
||||
vector.vector_type_name()
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
fn cast_value_ref(value: ValueRef) -> Result<Option<Self::Wrapper>> {
|
||||
match value {
|
||||
ValueRef::Null => Ok(None),
|
||||
ValueRef::DateTime(v) => Ok(Some(v)),
|
||||
other => error::CastTypeSnafu {
|
||||
msg: format!("Failed to cast value {other:?} to DateTime"),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use common_time::timezone::set_default_timezone;
|
||||
use common_time::Timestamp;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_datetime_cast() {
|
||||
// cast from Int64
|
||||
let val = Value::Int64(1000);
|
||||
let dt = ConcreteDataType::datetime_datatype().try_cast(val).unwrap();
|
||||
assert_eq!(dt, Value::DateTime(DateTime::from(1000)));
|
||||
|
||||
// cast from String
|
||||
set_default_timezone(Some("Asia/Shanghai")).unwrap();
|
||||
let val = Value::String("1970-01-01 00:00:00+0800".into());
|
||||
let dt = ConcreteDataType::datetime_datatype().try_cast(val).unwrap();
|
||||
assert_eq!(
|
||||
dt,
|
||||
Value::DateTime(DateTime::from_str_system("1970-01-01 00:00:00+0800").unwrap())
|
||||
);
|
||||
|
||||
// cast from Timestamp
|
||||
let val = Value::Timestamp(Timestamp::from_str_utc("2020-09-08 21:42:29+0800").unwrap());
|
||||
let dt = ConcreteDataType::datetime_datatype().try_cast(val).unwrap();
|
||||
assert_eq!(
|
||||
dt,
|
||||
Value::DateTime(DateTime::from_str_system("2020-09-08 21:42:29+0800").unwrap())
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -16,7 +16,7 @@ use std::cmp::Ordering;
|
||||
use std::fmt;
|
||||
|
||||
use arrow::datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType as ArrowDataType};
|
||||
use common_time::{Date, DateTime};
|
||||
use common_time::Date;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::OptionExt;
|
||||
|
||||
@@ -25,7 +25,7 @@ use crate::error::{self, Result};
|
||||
use crate::scalars::{Scalar, ScalarRef, ScalarVectorBuilder};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::types::boolean_type::bool_to_numeric;
|
||||
use crate::types::{DateTimeType, DateType};
|
||||
use crate::types::DateType;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{MutableVector, PrimitiveVector, PrimitiveVectorBuilder, Vector};
|
||||
|
||||
@@ -157,19 +157,6 @@ impl WrapperType for Date {
|
||||
}
|
||||
}
|
||||
|
||||
impl WrapperType for DateTime {
|
||||
type LogicalType = DateTimeType;
|
||||
type Native = i64;
|
||||
|
||||
fn from_native(value: Self::Native) -> Self {
|
||||
DateTime::new(value)
|
||||
}
|
||||
|
||||
fn into_native(self) -> Self::Native {
|
||||
self.val()
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! define_logical_primitive_type {
|
||||
($Native: ident, $TypeId: ident, $DataType: ident, $Largest: ident) => {
|
||||
// We need to define it as an empty struct `struct DataType {}` instead of a struct-unit
|
||||
@@ -362,7 +349,6 @@ impl DataType for Int64Type {
|
||||
Value::Float32(v) => num::cast::cast(v).map(Value::Int64),
|
||||
Value::Float64(v) => num::cast::cast(v).map(Value::Int64),
|
||||
Value::String(v) => v.as_utf8().parse::<i64>().map(Value::Int64).ok(),
|
||||
Value::DateTime(v) => Some(Value::Int64(v.val())),
|
||||
Value::Timestamp(v) => Some(Value::Int64(v.value())),
|
||||
Value::Time(v) => Some(Value::Int64(v.value())),
|
||||
// We don't allow casting interval type to int.
|
||||
|
||||
@@ -75,7 +75,6 @@ impl DataType for StringType {
|
||||
Value::Float64(v) => Some(Value::String(StringBytes::from(v.to_string()))),
|
||||
Value::String(v) => Some(Value::String(v)),
|
||||
Value::Date(v) => Some(Value::String(StringBytes::from(v.to_string()))),
|
||||
Value::DateTime(v) => Some(Value::String(StringBytes::from(v.to_string()))),
|
||||
Value::Timestamp(v) => Some(Value::String(StringBytes::from(v.to_iso8601_string()))),
|
||||
Value::Time(v) => Some(Value::String(StringBytes::from(v.to_iso8601_string()))),
|
||||
Value::IntervalYearMonth(v) => {
|
||||
|
||||
@@ -132,7 +132,6 @@ macro_rules! impl_data_type_for_timestamp {
|
||||
Value::Timestamp(v) => v.convert_to(TimeUnit::$unit).map(Value::Timestamp),
|
||||
Value::String(v) => Timestamp::from_str_utc(v.as_utf8()).map(Value::Timestamp).ok(),
|
||||
Value::Int64(v) => Some(Value::Timestamp(Timestamp::new(v, TimeUnit::$unit))),
|
||||
Value::DateTime(v) => Timestamp::new_second(v.val()).convert_to(TimeUnit::$unit).map(Value::Timestamp),
|
||||
Value::Date(v) => Timestamp::new_second(v.to_secs()).convert_to(TimeUnit::$unit).map(Value::Timestamp),
|
||||
_ => None
|
||||
}
|
||||
@@ -202,7 +201,7 @@ impl_data_type_for_timestamp!(Microsecond);
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_time::timezone::set_default_timezone;
|
||||
use common_time::{Date, DateTime};
|
||||
use common_time::Date;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -249,13 +248,6 @@ mod tests {
|
||||
.unwrap();
|
||||
assert_eq!(ts, Value::Timestamp(Timestamp::new_second(1694589525)));
|
||||
|
||||
// Datetime -> TimestampSecond
|
||||
let dt = Value::DateTime(DateTime::from(1234567));
|
||||
let ts = ConcreteDataType::timestamp_second_datatype()
|
||||
.try_cast(dt)
|
||||
.unwrap();
|
||||
assert_eq!(ts, Value::Timestamp(Timestamp::new_second(1234567)));
|
||||
|
||||
// Date -> TimestampMillisecond
|
||||
let d = Value::Date(Date::from_str_utc("1970-01-01").unwrap());
|
||||
let ts = ConcreteDataType::timestamp_millisecond_datatype()
|
||||
|
||||
@@ -24,7 +24,6 @@ use common_base::bytes::{Bytes, StringBytes};
|
||||
use common_decimal::Decimal128;
|
||||
use common_telemetry::error;
|
||||
use common_time::date::Date;
|
||||
use common_time::datetime::DateTime;
|
||||
use common_time::interval::IntervalUnit;
|
||||
use common_time::time::Time;
|
||||
use common_time::timestamp::{TimeUnit, Timestamp};
|
||||
@@ -75,7 +74,6 @@ pub enum Value {
|
||||
|
||||
// Date & Time types:
|
||||
Date(Date),
|
||||
DateTime(DateTime),
|
||||
Timestamp(Timestamp),
|
||||
Time(Time),
|
||||
Duration(Duration),
|
||||
@@ -112,7 +110,6 @@ impl Display for Value {
|
||||
write!(f, "{hex}")
|
||||
}
|
||||
Value::Date(v) => write!(f, "{v}"),
|
||||
Value::DateTime(v) => write!(f, "{v}"),
|
||||
Value::Timestamp(v) => write!(f, "{}", v.to_iso8601_string()),
|
||||
Value::Time(t) => write!(f, "{}", t.to_iso8601_string()),
|
||||
Value::IntervalYearMonth(v) => {
|
||||
@@ -162,7 +159,6 @@ macro_rules! define_data_type_func {
|
||||
$struct::String(_) => ConcreteDataType::string_datatype(),
|
||||
$struct::Binary(_) => ConcreteDataType::binary_datatype(),
|
||||
$struct::Date(_) => ConcreteDataType::date_datatype(),
|
||||
$struct::DateTime(_) => ConcreteDataType::datetime_datatype(),
|
||||
$struct::Time(t) => ConcreteDataType::time_datatype(*t.unit()),
|
||||
$struct::Timestamp(v) => ConcreteDataType::timestamp_datatype(v.unit()),
|
||||
$struct::IntervalYearMonth(_) => {
|
||||
@@ -222,7 +218,6 @@ impl Value {
|
||||
Value::String(v) => ValueRef::String(v.as_utf8()),
|
||||
Value::Binary(v) => ValueRef::Binary(v),
|
||||
Value::Date(v) => ValueRef::Date(*v),
|
||||
Value::DateTime(v) => ValueRef::DateTime(*v),
|
||||
Value::List(v) => ValueRef::List(ListValueRef::Ref { val: v }),
|
||||
Value::Timestamp(v) => ValueRef::Timestamp(*v),
|
||||
Value::Time(v) => ValueRef::Time(*v),
|
||||
@@ -258,14 +253,6 @@ impl Value {
|
||||
}
|
||||
}
|
||||
|
||||
/// Cast Value to DateTime. Return None if value is not a valid datetime data type.
|
||||
pub fn as_datetime(&self) -> Option<DateTime> {
|
||||
match self {
|
||||
Value::DateTime(t) => Some(*t),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Cast Value to [Time]. Return None if value is not a valid time data type.
|
||||
pub fn as_time(&self) -> Option<Time> {
|
||||
match self {
|
||||
@@ -345,7 +332,6 @@ impl Value {
|
||||
Value::Binary(_) => LogicalTypeId::Binary,
|
||||
Value::List(_) => LogicalTypeId::List,
|
||||
Value::Date(_) => LogicalTypeId::Date,
|
||||
Value::DateTime(_) => LogicalTypeId::DateTime,
|
||||
Value::Timestamp(t) => match t.unit() {
|
||||
TimeUnit::Second => LogicalTypeId::TimestampSecond,
|
||||
TimeUnit::Millisecond => LogicalTypeId::TimestampMillisecond,
|
||||
@@ -401,7 +387,6 @@ impl Value {
|
||||
Value::String(v) => ScalarValue::Utf8(Some(v.as_utf8().to_string())),
|
||||
Value::Binary(v) => ScalarValue::Binary(Some(v.to_vec())),
|
||||
Value::Date(v) => ScalarValue::Date32(Some(v.val())),
|
||||
Value::DateTime(v) => ScalarValue::Date64(Some(v.val())),
|
||||
Value::Null => to_null_scalar_value(output_type)?,
|
||||
Value::List(list) => {
|
||||
// Safety: The logical type of the value and output_type are the same.
|
||||
@@ -463,7 +448,6 @@ impl Value {
|
||||
Value::Float64(x) => Some(Value::Float64(-*x)),
|
||||
Value::Decimal128(x) => Some(Value::Decimal128(x.negative())),
|
||||
Value::Date(x) => Some(Value::Date(x.negative())),
|
||||
Value::DateTime(x) => Some(Value::DateTime(x.negative())),
|
||||
Value::Timestamp(x) => Some(Value::Timestamp(x.negative())),
|
||||
Value::Time(x) => Some(Value::Time(x.negative())),
|
||||
Value::Duration(x) => Some(Value::Duration(x.negative())),
|
||||
@@ -525,7 +509,6 @@ pub fn to_null_scalar_value(output_type: &ConcreteDataType) -> Result<ScalarValu
|
||||
}
|
||||
ConcreteDataType::String(_) => ScalarValue::Utf8(None),
|
||||
ConcreteDataType::Date(_) => ScalarValue::Date32(None),
|
||||
ConcreteDataType::DateTime(_) => ScalarValue::Date64(None),
|
||||
ConcreteDataType::Timestamp(t) => timestamp_to_scalar_value(t.unit(), None),
|
||||
ConcreteDataType::Interval(v) => match v {
|
||||
IntervalType::YearMonth(_) => ScalarValue::IntervalYearMonth(None),
|
||||
@@ -631,7 +614,6 @@ macro_rules! impl_ord_for_value_like {
|
||||
($Type::String(v1), $Type::String(v2)) => v1.cmp(v2),
|
||||
($Type::Binary(v1), $Type::Binary(v2)) => v1.cmp(v2),
|
||||
($Type::Date(v1), $Type::Date(v2)) => v1.cmp(v2),
|
||||
($Type::DateTime(v1), $Type::DateTime(v2)) => v1.cmp(v2),
|
||||
($Type::Timestamp(v1), $Type::Timestamp(v2)) => v1.cmp(v2),
|
||||
($Type::Time(v1), $Type::Time(v2)) => v1.cmp(v2),
|
||||
($Type::IntervalYearMonth(v1), $Type::IntervalYearMonth(v2)) => v1.cmp(v2),
|
||||
@@ -712,7 +694,6 @@ impl_try_from_value!(String, StringBytes);
|
||||
impl_try_from_value!(Binary, Bytes);
|
||||
impl_try_from_value!(Date, Date);
|
||||
impl_try_from_value!(Time, Time);
|
||||
impl_try_from_value!(DateTime, DateTime);
|
||||
impl_try_from_value!(Timestamp, Timestamp);
|
||||
impl_try_from_value!(IntervalYearMonth, IntervalYearMonth);
|
||||
impl_try_from_value!(IntervalDayTime, IntervalDayTime);
|
||||
@@ -756,7 +737,6 @@ impl_value_from!(String, StringBytes);
|
||||
impl_value_from!(Binary, Bytes);
|
||||
impl_value_from!(Date, Date);
|
||||
impl_value_from!(Time, Time);
|
||||
impl_value_from!(DateTime, DateTime);
|
||||
impl_value_from!(Timestamp, Timestamp);
|
||||
impl_value_from!(IntervalYearMonth, IntervalYearMonth);
|
||||
impl_value_from!(IntervalDayTime, IntervalDayTime);
|
||||
@@ -803,7 +783,6 @@ impl TryFrom<Value> for serde_json::Value {
|
||||
Value::String(bytes) => serde_json::Value::String(bytes.into_string()),
|
||||
Value::Binary(bytes) => serde_json::to_value(bytes)?,
|
||||
Value::Date(v) => serde_json::Value::Number(v.val().into()),
|
||||
Value::DateTime(v) => serde_json::Value::Number(v.val().into()),
|
||||
Value::List(v) => serde_json::to_value(v)?,
|
||||
Value::Timestamp(v) => serde_json::to_value(v.value())?,
|
||||
Value::Time(v) => serde_json::to_value(v.value())?,
|
||||
@@ -933,9 +912,6 @@ impl TryFrom<ScalarValue> for Value {
|
||||
Value::List(ListValue::new(items, datatype))
|
||||
}
|
||||
ScalarValue::Date32(d) => d.map(|x| Value::Date(Date::new(x))).unwrap_or(Value::Null),
|
||||
ScalarValue::Date64(d) => d
|
||||
.map(|x| Value::DateTime(DateTime::new(x)))
|
||||
.unwrap_or(Value::Null),
|
||||
ScalarValue::TimestampSecond(t, _) => t
|
||||
.map(|x| Value::Timestamp(Timestamp::new(x, TimeUnit::Second)))
|
||||
.unwrap_or(Value::Null),
|
||||
@@ -994,7 +970,8 @@ impl TryFrom<ScalarValue> for Value {
|
||||
| ScalarValue::Float16(_)
|
||||
| ScalarValue::Utf8View(_)
|
||||
| ScalarValue::BinaryView(_)
|
||||
| ScalarValue::Map(_) => {
|
||||
| ScalarValue::Map(_)
|
||||
| ScalarValue::Date64(_) => {
|
||||
return error::UnsupportedArrowTypeSnafu {
|
||||
arrow_type: v.data_type(),
|
||||
}
|
||||
@@ -1023,7 +1000,6 @@ impl From<ValueRef<'_>> for Value {
|
||||
ValueRef::String(v) => Value::String(v.into()),
|
||||
ValueRef::Binary(v) => Value::Binary(v.into()),
|
||||
ValueRef::Date(v) => Value::Date(v),
|
||||
ValueRef::DateTime(v) => Value::DateTime(v),
|
||||
ValueRef::Timestamp(v) => Value::Timestamp(v),
|
||||
ValueRef::Time(v) => Value::Time(v),
|
||||
ValueRef::IntervalYearMonth(v) => Value::IntervalYearMonth(v),
|
||||
@@ -1063,7 +1039,6 @@ pub enum ValueRef<'a> {
|
||||
|
||||
// Date & Time types:
|
||||
Date(Date),
|
||||
DateTime(DateTime),
|
||||
Timestamp(Timestamp),
|
||||
Time(Time),
|
||||
Duration(Duration),
|
||||
@@ -1175,11 +1150,6 @@ impl<'a> ValueRef<'a> {
|
||||
impl_as_for_value_ref!(self, Date)
|
||||
}
|
||||
|
||||
/// Cast itself to [DateTime].
|
||||
pub fn as_datetime(&self) -> Result<Option<DateTime>> {
|
||||
impl_as_for_value_ref!(self, DateTime)
|
||||
}
|
||||
|
||||
/// Cast itself to [Timestamp].
|
||||
pub fn as_timestamp(&self) -> Result<Option<Timestamp>> {
|
||||
impl_as_for_value_ref!(self, Timestamp)
|
||||
@@ -1263,7 +1233,6 @@ impl_value_ref_from!(Int64, i64);
|
||||
impl_value_ref_from!(Float32, f32);
|
||||
impl_value_ref_from!(Float64, f64);
|
||||
impl_value_ref_from!(Date, Date);
|
||||
impl_value_ref_from!(DateTime, DateTime);
|
||||
impl_value_ref_from!(Timestamp, Timestamp);
|
||||
impl_value_ref_from!(Time, Time);
|
||||
impl_value_ref_from!(IntervalYearMonth, IntervalYearMonth);
|
||||
@@ -1327,7 +1296,6 @@ pub fn transform_value_ref_to_json_value<'a>(
|
||||
}
|
||||
}
|
||||
ValueRef::Date(v) => serde_json::Value::Number(v.val().into()),
|
||||
ValueRef::DateTime(v) => serde_json::Value::Number(v.val().into()),
|
||||
ValueRef::List(v) => serde_json::to_value(v)?,
|
||||
ValueRef::Timestamp(v) => serde_json::to_value(v.value())?,
|
||||
ValueRef::Time(v) => serde_json::to_value(v.value())?,
|
||||
@@ -1426,7 +1394,6 @@ impl ValueRef<'_> {
|
||||
ValueRef::String(v) => std::mem::size_of_val(v),
|
||||
ValueRef::Binary(v) => std::mem::size_of_val(v),
|
||||
ValueRef::Date(_) => 4,
|
||||
ValueRef::DateTime(_) => 8,
|
||||
ValueRef::Timestamp(_) => 16,
|
||||
ValueRef::Time(_) => 16,
|
||||
ValueRef::Duration(_) => 16,
|
||||
@@ -1462,7 +1429,9 @@ pub fn column_data_to_json(data: ValueData) -> JsonValue {
|
||||
.unwrap_or(JsonValue::Null),
|
||||
ValueData::StringValue(s) => JsonValue::String(s),
|
||||
ValueData::DateValue(d) => JsonValue::String(Date::from(d).to_string()),
|
||||
ValueData::DatetimeValue(d) => JsonValue::String(DateTime::from(d).to_string()),
|
||||
ValueData::DatetimeValue(d) => {
|
||||
JsonValue::String(Timestamp::new_microsecond(d).to_iso8601_string())
|
||||
}
|
||||
ValueData::TimeSecondValue(d) => JsonValue::String(Time::new_second(d).to_iso8601_string()),
|
||||
ValueData::TimeMillisecondValue(d) => {
|
||||
JsonValue::String(Time::new_millisecond(d).to_iso8601_string())
|
||||
@@ -1511,6 +1480,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_column_data_to_json() {
|
||||
set_default_timezone(Some("Asia/Shanghai")).unwrap();
|
||||
assert_eq!(
|
||||
column_data_to_json(ValueData::BinaryValue(b"hello".to_vec())),
|
||||
JsonValue::String("aGVsbG8=".to_string())
|
||||
@@ -1569,31 +1539,31 @@ mod tests {
|
||||
);
|
||||
assert_eq!(
|
||||
column_data_to_json(ValueData::DatetimeValue(456)),
|
||||
JsonValue::String("1970-01-01 00:00:00.456+0000".to_string())
|
||||
JsonValue::String("1970-01-01 08:00:00.000456+0800".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
column_data_to_json(ValueData::TimeSecondValue(789)),
|
||||
JsonValue::String("00:13:09+0000".to_string())
|
||||
JsonValue::String("08:13:09+0800".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
column_data_to_json(ValueData::TimeMillisecondValue(789)),
|
||||
JsonValue::String("00:00:00.789+0000".to_string())
|
||||
JsonValue::String("08:00:00.789+0800".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
column_data_to_json(ValueData::TimeMicrosecondValue(789)),
|
||||
JsonValue::String("00:00:00.000789+0000".to_string())
|
||||
JsonValue::String("08:00:00.000789+0800".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
column_data_to_json(ValueData::TimestampMillisecondValue(1234567890)),
|
||||
JsonValue::String("1970-01-15 06:56:07.890+0000".to_string())
|
||||
JsonValue::String("1970-01-15 14:56:07.890+0800".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
column_data_to_json(ValueData::TimestampNanosecondValue(1234567890123456789)),
|
||||
JsonValue::String("2009-02-13 23:31:30.123456789+0000".to_string())
|
||||
JsonValue::String("2009-02-14 07:31:30.123456789+0800".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
column_data_to_json(ValueData::TimestampSecondValue(1234567890)),
|
||||
JsonValue::String("2009-02-13 23:31:30+0000".to_string())
|
||||
JsonValue::String("2009-02-14 07:31:30+0800".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
column_data_to_json(ValueData::IntervalYearMonthValue(12)),
|
||||
@@ -1758,12 +1728,6 @@ mod tests {
|
||||
);
|
||||
assert_eq!(Value::Null, ScalarValue::Date32(None).try_into().unwrap());
|
||||
|
||||
assert_eq!(
|
||||
Value::DateTime(DateTime::new(456)),
|
||||
ScalarValue::Date64(Some(456)).try_into().unwrap()
|
||||
);
|
||||
assert_eq!(Value::Null, ScalarValue::Date64(None).try_into().unwrap());
|
||||
|
||||
assert_eq!(
|
||||
Value::Timestamp(Timestamp::new(1, TimeUnit::Second)),
|
||||
ScalarValue::TimestampSecond(Some(1), None)
|
||||
@@ -2027,10 +1991,6 @@ mod tests {
|
||||
&ConcreteDataType::date_datatype(),
|
||||
&Value::Date(Date::new(1)),
|
||||
);
|
||||
check_type_and_value(
|
||||
&ConcreteDataType::datetime_datatype(),
|
||||
&Value::DateTime(DateTime::new(1)),
|
||||
);
|
||||
check_type_and_value(
|
||||
&ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
&Value::Timestamp(Timestamp::new_millisecond(1)),
|
||||
@@ -2169,11 +2129,6 @@ mod tests {
|
||||
serde_json::Value::Number(5000i32.into()),
|
||||
to_json(Value::Date(Date::new(5000)))
|
||||
);
|
||||
assert_eq!(
|
||||
serde_json::Value::Number(5000i64.into()),
|
||||
to_json(Value::DateTime(DateTime::new(5000)))
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
serde_json::Value::Number(1.into()),
|
||||
to_json(Value::Timestamp(Timestamp::new_millisecond(1)))
|
||||
@@ -2259,7 +2214,6 @@ mod tests {
|
||||
);
|
||||
|
||||
check_as_value_ref!(Date, Date::new(103));
|
||||
check_as_value_ref!(DateTime, DateTime::new(1034));
|
||||
|
||||
let list = ListValue {
|
||||
items: vec![],
|
||||
@@ -2291,7 +2245,6 @@ mod tests {
|
||||
check_as_null!(as_string);
|
||||
check_as_null!(as_boolean);
|
||||
check_as_null!(as_date);
|
||||
check_as_null!(as_datetime);
|
||||
check_as_null!(as_list);
|
||||
|
||||
macro_rules! check_as_correct {
|
||||
@@ -2304,7 +2257,6 @@ mod tests {
|
||||
check_as_correct!("hello".as_bytes(), Binary, as_binary);
|
||||
check_as_correct!(true, Boolean, as_boolean);
|
||||
check_as_correct!(Date::new(123), Date, as_date);
|
||||
check_as_correct!(DateTime::new(12), DateTime, as_datetime);
|
||||
check_as_correct!(Time::new_second(12), Time, as_time);
|
||||
check_as_correct!(Duration::new_second(12), Duration, as_duration);
|
||||
let list = ListValue {
|
||||
@@ -2318,7 +2270,6 @@ mod tests {
|
||||
assert!(wrong_value.as_string().is_err());
|
||||
assert!(wrong_value.as_boolean().is_err());
|
||||
assert!(wrong_value.as_date().is_err());
|
||||
assert!(wrong_value.as_datetime().is_err());
|
||||
assert!(wrong_value.as_list().is_err());
|
||||
assert!(wrong_value.as_time().is_err());
|
||||
assert!(wrong_value.as_timestamp().is_err());
|
||||
@@ -2346,10 +2297,6 @@ mod tests {
|
||||
"010203"
|
||||
);
|
||||
assert_eq!(Value::Date(Date::new(0)).to_string(), "1970-01-01");
|
||||
assert_eq!(
|
||||
Value::DateTime(DateTime::new(0)).to_string(),
|
||||
"1970-01-01 08:00:00+0800"
|
||||
);
|
||||
assert_eq!(
|
||||
Value::Timestamp(Timestamp::new(1000, TimeUnit::Millisecond)).to_string(),
|
||||
"1970-01-01 08:00:01+0800"
|
||||
@@ -2755,7 +2702,6 @@ mod tests {
|
||||
check_value_ref_size_eq(&ValueRef::String("greptimedb"), 10);
|
||||
check_value_ref_size_eq(&ValueRef::Binary(b"greptimedb"), 10);
|
||||
check_value_ref_size_eq(&ValueRef::Date(Date::new(1)), 4);
|
||||
check_value_ref_size_eq(&ValueRef::DateTime(DateTime::new(1)), 8);
|
||||
check_value_ref_size_eq(&ValueRef::Timestamp(Timestamp::new_millisecond(1)), 16);
|
||||
check_value_ref_size_eq(&ValueRef::Time(Time::new_millisecond(1)), 16);
|
||||
check_value_ref_size_eq(&ValueRef::IntervalYearMonth(IntervalYearMonth::new(1)), 4);
|
||||
|
||||
@@ -29,7 +29,6 @@ mod binary;
|
||||
mod boolean;
|
||||
mod constant;
|
||||
mod date;
|
||||
mod datetime;
|
||||
mod decimal;
|
||||
mod duration;
|
||||
mod eq;
|
||||
@@ -48,7 +47,6 @@ pub use binary::{BinaryVector, BinaryVectorBuilder};
|
||||
pub use boolean::{BooleanVector, BooleanVectorBuilder};
|
||||
pub use constant::ConstantVector;
|
||||
pub use date::{DateVector, DateVectorBuilder};
|
||||
pub use datetime::{DateTimeVector, DateTimeVectorBuilder};
|
||||
pub use decimal::{Decimal128Vector, Decimal128VectorBuilder};
|
||||
pub use duration::{
|
||||
DurationMicrosecondVector, DurationMicrosecondVectorBuilder, DurationMillisecondVector,
|
||||
@@ -377,7 +375,7 @@ pub mod tests {
|
||||
// Test Primitive types
|
||||
mutable_primitive_data_type_eq_with_lower!(
|
||||
Boolean, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64,
|
||||
Date, DateTime, Binary, String
|
||||
Date, Binary, String
|
||||
);
|
||||
|
||||
// Test types about time
|
||||
|
||||
@@ -1,116 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use crate::types::DateTimeType;
|
||||
use crate::vectors::{PrimitiveVector, PrimitiveVectorBuilder};
|
||||
|
||||
/// Vector of [`DateTime`](common_time::Date)
|
||||
pub type DateTimeVector = PrimitiveVector<DateTimeType>;
|
||||
/// Builder for [`DateTimeVector`].
|
||||
pub type DateTimeVectorBuilder = PrimitiveVectorBuilder<DateTimeType>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, PrimitiveArray};
|
||||
use arrow_array::ArrayRef;
|
||||
use common_time::timezone::set_default_timezone;
|
||||
use common_time::DateTime;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
use crate::prelude::{
|
||||
ConcreteDataType, ScalarVector, ScalarVectorBuilder, Value, ValueRef, Vector, VectorRef,
|
||||
};
|
||||
use crate::serialize::Serializable;
|
||||
|
||||
#[test]
|
||||
fn test_datetime_vector() {
|
||||
set_default_timezone(Some("Asia/Shanghai")).unwrap();
|
||||
let v = DateTimeVector::new(PrimitiveArray::from(vec![1000, 2000, 3000]));
|
||||
assert_eq!(ConcreteDataType::datetime_datatype(), v.data_type());
|
||||
assert_eq!(3, v.len());
|
||||
assert_eq!("DateTimeVector", v.vector_type_name());
|
||||
assert_eq!(
|
||||
&arrow::datatypes::DataType::Date64,
|
||||
v.to_arrow_array().data_type()
|
||||
);
|
||||
|
||||
assert_eq!(Some(DateTime::new(1000)), v.get_data(0));
|
||||
assert_eq!(Value::DateTime(DateTime::new(1000)), v.get(0));
|
||||
assert_eq!(ValueRef::DateTime(DateTime::new(1000)), v.get_ref(0));
|
||||
|
||||
let mut iter = v.iter_data();
|
||||
assert_eq!(Some(DateTime::new(1000)), iter.next().unwrap());
|
||||
assert_eq!(Some(DateTime::new(2000)), iter.next().unwrap());
|
||||
assert_eq!(Some(DateTime::new(3000)), iter.next().unwrap());
|
||||
assert!(!v.is_null(0));
|
||||
assert_eq!(24, v.memory_size());
|
||||
|
||||
if let Value::DateTime(d) = v.get(0) {
|
||||
assert_eq!(1000, d.val());
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
assert_eq!(
|
||||
"[\"1970-01-01 08:00:01+0800\",\"1970-01-01 08:00:02+0800\",\"1970-01-01 08:00:03+0800\"]",
|
||||
serde_json::to_string(&v.serialize_to_json().unwrap()).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_datetime_vector_builder() {
|
||||
let mut builder = DateTimeVectorBuilder::with_capacity(3);
|
||||
builder.push(Some(DateTime::new(1)));
|
||||
builder.push(None);
|
||||
builder.push(Some(DateTime::new(-1)));
|
||||
|
||||
let v = builder.finish();
|
||||
assert_eq!(ConcreteDataType::datetime_datatype(), v.data_type());
|
||||
assert_eq!(Value::DateTime(DateTime::new(1)), v.get(0));
|
||||
assert_eq!(Value::Null, v.get(1));
|
||||
assert_eq!(Value::DateTime(DateTime::new(-1)), v.get(2));
|
||||
|
||||
let input = DateTimeVector::from_wrapper_slice([
|
||||
DateTime::new(1),
|
||||
DateTime::new(2),
|
||||
DateTime::new(3),
|
||||
]);
|
||||
|
||||
let mut builder = DateTimeType.create_mutable_vector(3);
|
||||
builder.push_value_ref(ValueRef::DateTime(DateTime::new(5)));
|
||||
assert!(builder.try_push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice([13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(DateTimeVector::from_wrapper_slice([
|
||||
DateTime::new(5),
|
||||
DateTime::new(2),
|
||||
DateTime::new(3),
|
||||
]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_datetime_from_arrow() {
|
||||
let vector = DateTimeVector::from_wrapper_slice([DateTime::new(1), DateTime::new(2)]);
|
||||
let arrow: ArrayRef = Arc::new(vector.as_arrow().slice(0, vector.len())) as _;
|
||||
let vector2 = DateTimeVector::try_from_arrow_array(arrow).unwrap();
|
||||
assert_eq!(vector, vector2);
|
||||
}
|
||||
}
|
||||
@@ -20,12 +20,12 @@ use crate::data_type::DataType;
|
||||
use crate::types::{DurationType, TimeType, TimestampType};
|
||||
use crate::vectors::constant::ConstantVector;
|
||||
use crate::vectors::{
|
||||
BinaryVector, BooleanVector, DateTimeVector, DateVector, Decimal128Vector,
|
||||
DurationMicrosecondVector, DurationMillisecondVector, DurationNanosecondVector,
|
||||
DurationSecondVector, IntervalDayTimeVector, IntervalMonthDayNanoVector,
|
||||
IntervalYearMonthVector, ListVector, PrimitiveVector, StringVector, TimeMicrosecondVector,
|
||||
TimeMillisecondVector, TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector,
|
||||
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, Vector,
|
||||
BinaryVector, BooleanVector, DateVector, Decimal128Vector, DurationMicrosecondVector,
|
||||
DurationMillisecondVector, DurationNanosecondVector, DurationSecondVector,
|
||||
IntervalDayTimeVector, IntervalMonthDayNanoVector, IntervalYearMonthVector, ListVector,
|
||||
PrimitiveVector, StringVector, TimeMicrosecondVector, TimeMillisecondVector,
|
||||
TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector, TimestampMillisecondVector,
|
||||
TimestampNanosecondVector, TimestampSecondVector, Vector,
|
||||
};
|
||||
use crate::with_match_primitive_type_id;
|
||||
|
||||
@@ -83,7 +83,6 @@ fn equal(lhs: &dyn Vector, rhs: &dyn Vector) -> bool {
|
||||
Binary(_) | Json(_) | Vector(_) => is_vector_eq!(BinaryVector, lhs, rhs),
|
||||
String(_) => is_vector_eq!(StringVector, lhs, rhs),
|
||||
Date(_) => is_vector_eq!(DateVector, lhs, rhs),
|
||||
DateTime(_) => is_vector_eq!(DateTimeVector, lhs, rhs),
|
||||
Timestamp(t) => match t {
|
||||
TimestampType::Second(_) => {
|
||||
is_vector_eq!(TimestampSecondVector, lhs, rhs)
|
||||
@@ -195,7 +194,6 @@ mod tests {
|
||||
)));
|
||||
assert_vector_ref_eq(Arc::new(BooleanVector::from(vec![true, false])));
|
||||
assert_vector_ref_eq(Arc::new(DateVector::from(vec![Some(100), Some(120)])));
|
||||
assert_vector_ref_eq(Arc::new(DateTimeVector::from(vec![Some(100), Some(120)])));
|
||||
assert_vector_ref_eq(Arc::new(TimestampSecondVector::from_values([100, 120])));
|
||||
assert_vector_ref_eq(Arc::new(TimestampMillisecondVector::from_values([
|
||||
100, 120,
|
||||
|
||||
@@ -31,7 +31,7 @@ use crate::prelude::DataType;
|
||||
use crate::scalars::{Scalar, ScalarVectorBuilder};
|
||||
use crate::value::{ListValue, ListValueRef, Value};
|
||||
use crate::vectors::{
|
||||
BinaryVector, BooleanVector, ConstantVector, DateTimeVector, DateVector, Decimal128Vector,
|
||||
BinaryVector, BooleanVector, ConstantVector, DateVector, Decimal128Vector,
|
||||
DurationMicrosecondVector, DurationMillisecondVector, DurationNanosecondVector,
|
||||
DurationSecondVector, Float32Vector, Float64Vector, Int16Vector, Int32Vector, Int64Vector,
|
||||
Int8Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector, IntervalYearMonthVector,
|
||||
@@ -179,9 +179,6 @@ impl Helper {
|
||||
ScalarValue::Date32(v) => {
|
||||
ConstantVector::new(Arc::new(DateVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::Date64(v) => {
|
||||
ConstantVector::new(Arc::new(DateTimeVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::TimestampSecond(v, _) => {
|
||||
// Timezone is unimplemented now.
|
||||
ConstantVector::new(Arc::new(TimestampSecondVector::from(vec![v])), length)
|
||||
@@ -244,7 +241,8 @@ impl Helper {
|
||||
| ScalarValue::Float16(_)
|
||||
| ScalarValue::Utf8View(_)
|
||||
| ScalarValue::BinaryView(_)
|
||||
| ScalarValue::Map(_) => {
|
||||
| ScalarValue::Map(_)
|
||||
| ScalarValue::Date64(_) => {
|
||||
return error::ConversionSnafu {
|
||||
from: format!("Unsupported scalar value: {value}"),
|
||||
}
|
||||
@@ -286,7 +284,6 @@ impl Helper {
|
||||
Arc::new(StringVector::try_from_arrow_array(array)?)
|
||||
}
|
||||
ArrowDataType::Date32 => Arc::new(DateVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Date64 => Arc::new(DateTimeVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::List(_) => Arc::new(ListVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Timestamp(unit, _) => match unit {
|
||||
TimeUnit::Second => Arc::new(TimestampSecondVector::try_from_arrow_array(array)?),
|
||||
@@ -362,7 +359,8 @@ impl Helper {
|
||||
| ArrowDataType::BinaryView
|
||||
| ArrowDataType::Utf8View
|
||||
| ArrowDataType::ListView(_)
|
||||
| ArrowDataType::LargeListView(_) => {
|
||||
| ArrowDataType::LargeListView(_)
|
||||
| ArrowDataType::Date64 => {
|
||||
return error::UnsupportedArrowTypeSnafu {
|
||||
arrow_type: array.as_ref().data_type().clone(),
|
||||
}
|
||||
@@ -411,9 +409,9 @@ impl Helper {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::array::{
|
||||
ArrayRef, BooleanArray, Date32Array, Date64Array, Float32Array, Float64Array, Int16Array,
|
||||
Int32Array, Int64Array, Int8Array, LargeBinaryArray, ListArray, NullArray,
|
||||
Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
|
||||
ArrayRef, BooleanArray, Date32Array, Float32Array, Float64Array, Int16Array, Int32Array,
|
||||
Int64Array, Int8Array, LargeBinaryArray, ListArray, NullArray, Time32MillisecondArray,
|
||||
Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
|
||||
TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
|
||||
TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
|
||||
};
|
||||
@@ -424,7 +422,7 @@ mod tests {
|
||||
use common_decimal::Decimal128;
|
||||
use common_time::time::Time;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::{Date, DateTime, Duration};
|
||||
use common_time::{Date, Duration};
|
||||
|
||||
use super::*;
|
||||
use crate::value::Value;
|
||||
@@ -466,16 +464,6 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_from_scalar_datetime_value() {
|
||||
let vector = Helper::try_from_scalar_value(ScalarValue::Date64(Some(42)), 3).unwrap();
|
||||
assert_eq!(ConcreteDataType::datetime_datatype(), vector.data_type());
|
||||
assert_eq!(3, vector.len());
|
||||
for i in 0..vector.len() {
|
||||
assert_eq!(Value::DateTime(DateTime::new(42)), vector.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_from_scalar_duration_value() {
|
||||
let vector =
|
||||
@@ -606,7 +594,6 @@ mod tests {
|
||||
check_try_into_vector(Float64Array::from(vec![1.0, 2.0, 3.0]));
|
||||
check_try_into_vector(StringArray::from(vec!["hello", "world"]));
|
||||
check_try_into_vector(Date32Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(Date64Array::from(vec![1, 2, 3]));
|
||||
let data = vec![None, Some(vec![Some(6), Some(7)])];
|
||||
let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
|
||||
check_try_into_vector(list_array);
|
||||
@@ -734,7 +721,6 @@ mod tests {
|
||||
check_into_and_from(Float64Array::from(vec![1.0, 2.0, 3.0]));
|
||||
check_into_and_from(StringArray::from(vec!["hello", "world"]));
|
||||
check_into_and_from(Date32Array::from(vec![1, 2, 3]));
|
||||
check_into_and_from(Date64Array::from(vec![1, 2, 3]));
|
||||
|
||||
check_into_and_from(TimestampSecondArray::from(vec![1, 2, 3]));
|
||||
check_into_and_from(TimestampMillisecondArray::from(vec![1, 2, 3]));
|
||||
|
||||
@@ -32,7 +32,7 @@ pub(crate) use filter_non_constant;
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::{Date, DateTime};
|
||||
use common_time::Date;
|
||||
|
||||
use crate::scalars::ScalarVector;
|
||||
use crate::timestamp::{
|
||||
@@ -127,8 +127,6 @@ mod tests {
|
||||
#[test]
|
||||
fn test_filter_date_like() {
|
||||
impl_filter_date_like_test!(DateVector, Date, new);
|
||||
impl_filter_date_like_test!(DateTimeVector, DateTime, new);
|
||||
|
||||
impl_filter_date_like_test!(TimestampSecondVector, TimestampSecond, from_native);
|
||||
impl_filter_date_like_test!(
|
||||
TimestampMillisecondVector,
|
||||
|
||||
@@ -105,7 +105,7 @@ pub(crate) fn find_unique_constant(
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::{Date, DateTime};
|
||||
use common_time::Date;
|
||||
|
||||
use super::*;
|
||||
use crate::timestamp::*;
|
||||
@@ -358,7 +358,6 @@ mod tests {
|
||||
#[test]
|
||||
fn test_find_unique_date_like() {
|
||||
impl_find_unique_date_like_test!(DateVector, Date, new);
|
||||
impl_find_unique_date_like_test!(DateTimeVector, DateTime, new);
|
||||
impl_find_unique_date_like_test!(TimestampSecondVector, TimestampSecond, from);
|
||||
impl_find_unique_date_like_test!(TimestampMillisecondVector, TimestampMillisecond, from);
|
||||
impl_find_unique_date_like_test!(TimestampMicrosecondVector, TimestampMicrosecond, from);
|
||||
|
||||
@@ -41,7 +41,7 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::{Date, DateTime, Timestamp};
|
||||
use common_time::{Date, Timestamp};
|
||||
use paste::paste;
|
||||
|
||||
use super::*;
|
||||
@@ -161,8 +161,6 @@ mod tests {
|
||||
#[test]
|
||||
fn test_replicate_date_like() {
|
||||
impl_replicate_date_like_test!(DateVector, Date, new);
|
||||
impl_replicate_date_like_test!(DateTimeVector, DateTime, new);
|
||||
|
||||
impl_replicate_timestamp_test!(Second);
|
||||
impl_replicate_timestamp_test!(Millisecond);
|
||||
impl_replicate_timestamp_test!(Microsecond);
|
||||
|
||||
@@ -33,7 +33,7 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{PrimitiveArray, UInt32Array};
|
||||
use common_time::{Date, DateTime};
|
||||
use common_time::Date;
|
||||
|
||||
use crate::prelude::VectorRef;
|
||||
use crate::scalars::ScalarVector;
|
||||
@@ -105,7 +105,6 @@ mod tests {
|
||||
|
||||
// test date like type
|
||||
take_time_like_test!(DateVector, Date, new);
|
||||
take_time_like_test!(DateTimeVector, DateTime, new);
|
||||
take_time_like_test!(TimestampSecondVector, TimestampSecond, from_native);
|
||||
take_time_like_test!(
|
||||
TimestampMillisecondVector,
|
||||
|
||||
@@ -16,7 +16,6 @@ async-trait.workspace = true
|
||||
bytes.workspace = true
|
||||
cache.workspace = true
|
||||
catalog.workspace = true
|
||||
chrono.workspace = true
|
||||
client.workspace = true
|
||||
common-base.workspace = true
|
||||
common-config.workspace = true
|
||||
@@ -47,6 +46,7 @@ get-size2 = "0.1.2"
|
||||
greptime-proto.workspace = true
|
||||
# This fork of hydroflow is simply for keeping our dependency in our org, and pin the version
|
||||
# otherwise it is the same with upstream repo
|
||||
chrono.workspace = true
|
||||
http.workspace = true
|
||||
hydroflow = { git = "https://github.com/GreptimeTeam/hydroflow.git", branch = "main" }
|
||||
itertools.workspace = true
|
||||
|
||||
@@ -49,13 +49,12 @@ pub(crate) use crate::adapter::node_context::FlownodeContext;
|
||||
use crate::adapter::refill::RefillTask;
|
||||
use crate::adapter::table_source::ManagedTableSource;
|
||||
use crate::adapter::util::relation_desc_to_column_schemas_with_fallback;
|
||||
pub(crate) use crate::adapter::worker::{create_worker, WorkerHandle};
|
||||
pub(crate) use crate::adapter::worker::{create_worker, Worker, WorkerHandle};
|
||||
use crate::compute::ErrCollector;
|
||||
use crate::df_optimizer::sql_to_flow_plan;
|
||||
use crate::error::{EvalSnafu, ExternalSnafu, InternalSnafu, InvalidQuerySnafu, UnexpectedSnafu};
|
||||
use crate::expr::Batch;
|
||||
use crate::metrics::{METRIC_FLOW_INSERT_ELAPSED, METRIC_FLOW_ROWS, METRIC_FLOW_RUN_INTERVAL_MS};
|
||||
use crate::recording_rules::RecordingRuleEngine;
|
||||
use crate::repr::{self, DiffRow, RelationDesc, Row, BATCH_SIZE};
|
||||
|
||||
mod flownode_impl;
|
||||
@@ -64,7 +63,7 @@ pub(crate) mod refill;
|
||||
mod stat;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
pub(crate) mod util;
|
||||
mod util;
|
||||
mod worker;
|
||||
|
||||
pub(crate) mod node_context;
|
||||
@@ -170,8 +169,6 @@ pub struct FlowWorkerManager {
|
||||
flush_lock: RwLock<()>,
|
||||
/// receive a oneshot sender to send state size report
|
||||
state_report_handler: RwLock<Option<StateReportHandler>>,
|
||||
/// engine for recording rule
|
||||
rule_engine: RecordingRuleEngine,
|
||||
}
|
||||
|
||||
/// Building FlownodeManager
|
||||
@@ -186,7 +183,6 @@ impl FlowWorkerManager {
|
||||
node_id: Option<u32>,
|
||||
query_engine: Arc<dyn QueryEngine>,
|
||||
table_meta: TableMetadataManagerRef,
|
||||
rule_engine: RecordingRuleEngine,
|
||||
) -> Self {
|
||||
let srv_map = ManagedTableSource::new(
|
||||
table_meta.table_info_manager().clone(),
|
||||
@@ -209,7 +205,6 @@ impl FlowWorkerManager {
|
||||
node_id,
|
||||
flush_lock: RwLock::new(()),
|
||||
state_report_handler: RwLock::new(None),
|
||||
rule_engine,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -218,6 +213,25 @@ impl FlowWorkerManager {
|
||||
self
|
||||
}
|
||||
|
||||
/// Create a flownode manager with one worker
|
||||
pub fn new_with_workers<'s>(
|
||||
node_id: Option<u32>,
|
||||
query_engine: Arc<dyn QueryEngine>,
|
||||
table_meta: TableMetadataManagerRef,
|
||||
num_workers: usize,
|
||||
) -> (Self, Vec<Worker<'s>>) {
|
||||
let mut zelf = Self::new(node_id, query_engine, table_meta);
|
||||
|
||||
let workers: Vec<_> = (0..num_workers)
|
||||
.map(|_| {
|
||||
let (handle, worker) = create_worker();
|
||||
zelf.add_worker_handle(handle);
|
||||
worker
|
||||
})
|
||||
.collect();
|
||||
(zelf, workers)
|
||||
}
|
||||
|
||||
/// add a worker handler to manager, meaning this corresponding worker is under it's manage
|
||||
pub fn add_worker_handle(&mut self, handle: WorkerHandle) {
|
||||
self.worker_handles.push(handle);
|
||||
@@ -735,11 +749,7 @@ pub struct CreateFlowArgs {
|
||||
/// Create&Remove flow
|
||||
impl FlowWorkerManager {
|
||||
/// remove a flow by it's id
|
||||
#[allow(unreachable_code)]
|
||||
pub async fn remove_flow(&self, flow_id: FlowId) -> Result<(), Error> {
|
||||
// TODO(discord9): reroute some back to streaming engine later
|
||||
return self.rule_engine.remove_flow(flow_id).await;
|
||||
|
||||
for handle in self.worker_handles.iter() {
|
||||
if handle.contains_flow(flow_id).await? {
|
||||
handle.remove_flow(flow_id).await?;
|
||||
@@ -755,10 +765,8 @@ impl FlowWorkerManager {
|
||||
/// steps to create task:
|
||||
/// 1. parse query into typed plan(and optional parse expire_after expr)
|
||||
/// 2. render source/sink with output table id and used input table id
|
||||
#[allow(clippy::too_many_arguments, unreachable_code)]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn create_flow(&self, args: CreateFlowArgs) -> Result<Option<FlowId>, Error> {
|
||||
// TODO(discord9): reroute some back to streaming engine later
|
||||
return self.rule_engine.create_flow(args).await;
|
||||
let CreateFlowArgs {
|
||||
flow_id,
|
||||
sink_table_name,
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
// limitations under the License.
|
||||
|
||||
//! impl `FlowNode` trait for FlowNodeManager so standalone can call them
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::v1::flow::{
|
||||
@@ -153,13 +152,7 @@ impl Flownode for FlowWorkerManager {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unreachable_code, unused)]
|
||||
async fn handle_inserts(&self, request: InsertRequests) -> Result<FlowResponse> {
|
||||
return self
|
||||
.rule_engine
|
||||
.handle_inserts(request)
|
||||
.await
|
||||
.map_err(to_meta_err(snafu::location!()));
|
||||
// using try_read to ensure two things:
|
||||
// 1. flush wouldn't happen until inserts before it is inserted
|
||||
// 2. inserts happening concurrently with flush wouldn't be block by flush
|
||||
@@ -212,15 +205,15 @@ impl Flownode for FlowWorkerManager {
|
||||
.collect_vec();
|
||||
let table_col_names = table_schema.relation_desc.names;
|
||||
let table_col_names = table_col_names
|
||||
.iter().enumerate()
|
||||
.map(|(idx,name)| match name {
|
||||
Some(name) => Ok(name.clone()),
|
||||
None => InternalSnafu {
|
||||
reason: format!("Expect column {idx} of table id={table_id} to have name in table schema, found None"),
|
||||
}
|
||||
.fail().map_err(BoxedError::new).context(ExternalSnafu),
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
.iter().enumerate()
|
||||
.map(|(idx,name)| match name {
|
||||
Some(name) => Ok(name.clone()),
|
||||
None => InternalSnafu {
|
||||
reason: format!("Expect column {idx} of table id={table_id} to have name in table schema, found None"),
|
||||
}
|
||||
.fail().map_err(BoxedError::new).context(ExternalSnafu),
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
let name_to_col = HashMap::<_, _>::from_iter(
|
||||
insert_schema
|
||||
.iter()
|
||||
|
||||
@@ -21,19 +21,31 @@ use crate::FlowWorkerManager;
|
||||
impl FlowWorkerManager {
|
||||
pub async fn gen_state_report(&self) -> FlowStat {
|
||||
let mut full_report = BTreeMap::new();
|
||||
let mut last_exec_time_map = BTreeMap::new();
|
||||
for worker in self.worker_handles.iter() {
|
||||
match worker.get_state_size().await {
|
||||
Ok(state_size) => {
|
||||
full_report.extend(state_size.into_iter().map(|(k, v)| (k as u32, v)))
|
||||
full_report.extend(state_size.into_iter().map(|(k, v)| (k as u32, v)));
|
||||
}
|
||||
Err(err) => {
|
||||
common_telemetry::error!(err; "Get flow stat size error");
|
||||
}
|
||||
}
|
||||
|
||||
match worker.get_last_exec_time_map().await {
|
||||
Ok(last_exec_time) => {
|
||||
last_exec_time_map
|
||||
.extend(last_exec_time.into_iter().map(|(k, v)| (k as u32, v)));
|
||||
}
|
||||
Err(err) => {
|
||||
common_telemetry::error!(err; "Get last exec time error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FlowStat {
|
||||
state_size: full_report,
|
||||
last_exec_time_map,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@ pub fn new_test_table_info_with_name<I: IntoIterator<Item = u32>>(
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let meta = TableMetaBuilder::default()
|
||||
let meta = TableMetaBuilder::empty()
|
||||
.schema(Arc::new(schema))
|
||||
.primary_key_indices(vec![0])
|
||||
.engine("engine")
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Some utility functions
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
|
||||
@@ -98,6 +98,10 @@ impl<'subgraph> ActiveDataflowState<'subgraph> {
|
||||
self.state.set_current_ts(ts);
|
||||
}
|
||||
|
||||
pub fn set_last_exec_time(&mut self, ts: repr::Timestamp) {
|
||||
self.state.set_last_exec_time(ts);
|
||||
}
|
||||
|
||||
/// Run all available subgraph
|
||||
///
|
||||
/// return true if any subgraph actually executed
|
||||
@@ -212,6 +216,21 @@ impl WorkerHandle {
|
||||
.build()
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn get_last_exec_time_map(&self) -> Result<BTreeMap<FlowId, i64>, Error> {
|
||||
let ret = self
|
||||
.itc_client
|
||||
.call_with_resp(Request::QueryLastExecTimeMap)
|
||||
.await?;
|
||||
ret.into_query_last_exec_time_map().map_err(|ret| {
|
||||
InternalSnafu {
|
||||
reason: format!(
|
||||
"Flow Node/Worker get_last_exec_time_map failed, expect Response::QueryLastExecTimeMap, found {ret:?}"
|
||||
),
|
||||
}
|
||||
.build()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for WorkerHandle {
|
||||
@@ -335,6 +354,7 @@ impl<'s> Worker<'s> {
|
||||
pub fn run_tick(&mut self, now: repr::Timestamp) {
|
||||
for (_flow_id, task_state) in self.task_states.iter_mut() {
|
||||
task_state.set_current_ts(now);
|
||||
task_state.set_last_exec_time(now);
|
||||
task_state.run_available();
|
||||
}
|
||||
}
|
||||
@@ -395,6 +415,15 @@ impl<'s> Worker<'s> {
|
||||
}
|
||||
Some(Response::QueryStateSize { result: ret })
|
||||
}
|
||||
Request::QueryLastExecTimeMap => {
|
||||
let mut ret = BTreeMap::new();
|
||||
for (flow_id, task_state) in self.task_states.iter() {
|
||||
if let Some(last_exec_time) = task_state.state.last_exec_time() {
|
||||
ret.insert(*flow_id, last_exec_time);
|
||||
}
|
||||
}
|
||||
Some(Response::QueryLastExecTimeMap { result: ret })
|
||||
}
|
||||
};
|
||||
Ok(ret)
|
||||
}
|
||||
@@ -427,6 +456,7 @@ pub enum Request {
|
||||
},
|
||||
Shutdown,
|
||||
QueryStateSize,
|
||||
QueryLastExecTimeMap,
|
||||
}
|
||||
|
||||
#[derive(Debug, EnumAsInner)]
|
||||
@@ -446,6 +476,10 @@ enum Response {
|
||||
/// each flow tasks' state size
|
||||
result: BTreeMap<FlowId, usize>,
|
||||
},
|
||||
QueryLastExecTimeMap {
|
||||
/// each flow tasks' last execution time
|
||||
result: BTreeMap<FlowId, i64>,
|
||||
},
|
||||
}
|
||||
|
||||
fn create_inter_thread_call() -> (InterThreadCallClient, InterThreadCallServer) {
|
||||
|
||||
@@ -290,7 +290,9 @@ mod test {
|
||||
let mfp = MapFilterProject::new(1)
|
||||
.filter(vec![
|
||||
ScalarExpr::Column(0)
|
||||
.call_unary(expr::UnaryFunc::Cast(ConcreteDataType::datetime_datatype()))
|
||||
.call_unary(expr::UnaryFunc::Cast(
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
))
|
||||
.call_binary(
|
||||
ScalarExpr::CallUnmaterializable(expr::UnmaterializableFunc::Now),
|
||||
BinaryFunc::Gte,
|
||||
@@ -300,7 +302,9 @@ mod test {
|
||||
ScalarExpr::literal(4i64.into(), ConcreteDataType::int64_datatype()),
|
||||
BinaryFunc::SubInt64,
|
||||
)
|
||||
.call_unary(expr::UnaryFunc::Cast(ConcreteDataType::datetime_datatype()))
|
||||
.call_unary(expr::UnaryFunc::Cast(
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
))
|
||||
.call_binary(
|
||||
ScalarExpr::CallUnmaterializable(expr::UnmaterializableFunc::Now),
|
||||
BinaryFunc::Lt,
|
||||
|
||||
@@ -45,6 +45,8 @@ pub struct DataflowState {
|
||||
arrange_used: Vec<ArrangeHandler>,
|
||||
/// the time arrangement need to be expired after a certain time in milliseconds
|
||||
expire_after: Option<Timestamp>,
|
||||
/// the last time each subgraph executed
|
||||
last_exec_time: Option<Timestamp>,
|
||||
}
|
||||
|
||||
impl DataflowState {
|
||||
@@ -114,6 +116,14 @@ impl DataflowState {
|
||||
pub fn get_state_size(&self) -> usize {
|
||||
self.arrange_used.iter().map(|x| x.read().get_size()).sum()
|
||||
}
|
||||
|
||||
pub fn set_last_exec_time(&mut self, time: Timestamp) {
|
||||
self.last_exec_time = Some(time);
|
||||
}
|
||||
|
||||
pub fn last_exec_time(&self) -> Option<Timestamp> {
|
||||
self.last_exec_time
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
|
||||
@@ -479,7 +479,7 @@ impl ScalarUDFImpl for TumbleExpand {
|
||||
match (arg_types.first(), arg_types.get(1), arg_types.get(2)) {
|
||||
(Some(ts), Some(window), opt) => {
|
||||
use arrow_schema::DataType::*;
|
||||
if !matches!(ts, Date32 | Date64 | Timestamp(_, _)) {
|
||||
if !matches!(ts, Date32 | Timestamp(_, _)) {
|
||||
return Err(DataFusionError::Plan(
|
||||
format!("Expect timestamp column as first arg for tumble_start, found {:?}", ts)
|
||||
));
|
||||
@@ -491,7 +491,7 @@ impl ScalarUDFImpl for TumbleExpand {
|
||||
}
|
||||
|
||||
if let Some(start_time) = opt{
|
||||
if !matches!(start_time, Utf8 | Date32 | Date64 | Timestamp(_, _)){
|
||||
if !matches!(start_time, Utf8 | Date32 | Timestamp(_, _)){
|
||||
return Err(DataFusionError::Plan(
|
||||
format!("Expect start_time to either be date, timestamp or string, found {:?}", start_time)
|
||||
));
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use arrow_schema::ArrowError;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_error::{define_into_tonic_status, from_err_code_msg_to_header};
|
||||
use common_macro::stack_trace_debug;
|
||||
@@ -54,13 +53,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Time error"))]
|
||||
Time {
|
||||
source: common_time::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("External error"))]
|
||||
External {
|
||||
source: BoxedError,
|
||||
@@ -164,15 +156,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Arrow error: {raw:?} in context: {context}"))]
|
||||
Arrow {
|
||||
#[snafu(source)]
|
||||
raw: ArrowError,
|
||||
context: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Datafusion error: {raw:?} in context: {context}"))]
|
||||
Datafusion {
|
||||
#[snafu(source)]
|
||||
@@ -247,7 +230,6 @@ impl ErrorExt for Error {
|
||||
match self {
|
||||
Self::Eval { .. }
|
||||
| Self::JoinTask { .. }
|
||||
| Self::Arrow { .. }
|
||||
| Self::Datafusion { .. }
|
||||
| Self::InsertIntoFlow { .. } => StatusCode::Internal,
|
||||
Self::FlowAlreadyExist { .. } => StatusCode::TableAlreadyExists,
|
||||
@@ -256,9 +238,7 @@ impl ErrorExt for Error {
|
||||
| Self::FlowNotFound { .. }
|
||||
| Self::ListFlows { .. } => StatusCode::TableNotFound,
|
||||
Self::Plan { .. } | Self::Datatypes { .. } => StatusCode::PlanQuery,
|
||||
Self::InvalidQuery { .. } | Self::CreateFlow { .. } | Self::Time { .. } => {
|
||||
StatusCode::EngineExecuteQuery
|
||||
}
|
||||
Self::InvalidQuery { .. } | Self::CreateFlow { .. } => StatusCode::EngineExecuteQuery,
|
||||
Self::Unexpected { .. } => StatusCode::Unexpected,
|
||||
Self::NotImplemented { .. } | Self::UnsupportedTemporalFilter { .. } => {
|
||||
StatusCode::Unsupported
|
||||
|
||||
@@ -554,8 +554,6 @@ fn get_ts_as_millisecond(arg: Value) -> Result<repr::Timestamp, EvalError> {
|
||||
ts.convert_to(TimeUnit::Millisecond)
|
||||
.context(OverflowSnafu)?
|
||||
.value()
|
||||
} else if let Some(ts) = arg.as_datetime() {
|
||||
ts.val()
|
||||
} else {
|
||||
InvalidArgumentSnafu {
|
||||
reason: "Expect input to be timestamp or datetime type",
|
||||
|
||||
@@ -759,7 +759,7 @@ fn ty_eq_without_precision(left: ConcreteDataType, right: ConcreteDataType) -> b
|
||||
#[allow(clippy::too_many_lines)]
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use common_time::DateTime;
|
||||
use common_time::Timestamp;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -813,13 +813,13 @@ mod test {
|
||||
(
|
||||
AggregateFunc::MaxDateTime,
|
||||
vec![
|
||||
(Value::DateTime(DateTime::from(0)), 1),
|
||||
(Value::DateTime(DateTime::from(1)), 1),
|
||||
(Value::Timestamp(Timestamp::from(0)), 1),
|
||||
(Value::Timestamp(Timestamp::from(1)), 1),
|
||||
(Value::Null, 1),
|
||||
],
|
||||
(
|
||||
Value::DateTime(DateTime::from(1)),
|
||||
vec![Value::DateTime(DateTime::from(1)), 2i64.into()],
|
||||
Value::Timestamp(Timestamp::from(1)),
|
||||
vec![Value::Timestamp(Timestamp::from(1)), 2i64.into()],
|
||||
),
|
||||
),
|
||||
(
|
||||
|
||||
@@ -267,7 +267,7 @@ impl AggregateFunc {
|
||||
MaxBool => (boolean_datatype, Max),
|
||||
MaxString => (string_datatype, Max),
|
||||
MaxDate => (date_datatype, Max),
|
||||
MaxDateTime => (datetime_datatype, Max),
|
||||
MaxDateTime => (timestamp_microsecond_datatype, Max),
|
||||
MaxTimestamp => (timestamp_second_datatype, Max),
|
||||
MaxTime => (time_second_datatype, Max),
|
||||
MaxDuration => (duration_second_datatype, Max),
|
||||
@@ -283,7 +283,7 @@ impl AggregateFunc {
|
||||
MinBool => (boolean_datatype, Min),
|
||||
MinString => (string_datatype, Min),
|
||||
MinDate => (date_datatype, Min),
|
||||
MinDateTime => (datetime_datatype, Min),
|
||||
MinDateTime => (timestamp_microsecond_datatype, Min),
|
||||
MinTimestamp => (timestamp_second_datatype, Min),
|
||||
MinTime => (time_second_datatype, Min),
|
||||
MinDuration => (duration_second_datatype, Min),
|
||||
|
||||
@@ -238,7 +238,6 @@ mod test {
|
||||
|
||||
for (sql, current, expected) in &testcases {
|
||||
let plan = sql_to_substrait(engine.clone(), sql).await;
|
||||
|
||||
let mut ctx = create_test_ctx();
|
||||
let flow_plan = TypedPlan::from_substrait_plan(&mut ctx, &plan)
|
||||
.await
|
||||
|
||||
@@ -130,6 +130,13 @@ impl HeartbeatTask {
|
||||
|
||||
pub fn shutdown(&self) {
|
||||
info!("Close heartbeat task for flownode");
|
||||
if self
|
||||
.running
|
||||
.compare_exchange(true, false, Ordering::AcqRel, Ordering::Acquire)
|
||||
.is_err()
|
||||
{
|
||||
warn!("Call close heartbeat task multiple times");
|
||||
}
|
||||
}
|
||||
|
||||
fn new_heartbeat_request(
|
||||
@@ -147,14 +154,18 @@ impl HeartbeatTask {
|
||||
};
|
||||
let flow_stat = latest_report
|
||||
.as_ref()
|
||||
.map(|report| {
|
||||
report
|
||||
.map(|report| api::v1::meta::FlowStat {
|
||||
flow_stat_size: report
|
||||
.state_size
|
||||
.iter()
|
||||
.map(|(k, v)| (*k, *v as u64))
|
||||
.collect()
|
||||
})
|
||||
.map(|f| api::v1::meta::FlowStat { flow_stat_size: f });
|
||||
.collect(),
|
||||
flow_last_exec_time_map: report
|
||||
.last_exec_time_map
|
||||
.iter()
|
||||
.map(|(k, v)| (*k, *v))
|
||||
.collect(),
|
||||
});
|
||||
|
||||
Some(HeartbeatRequest {
|
||||
mailbox_message,
|
||||
|
||||
@@ -33,7 +33,6 @@ mod expr;
|
||||
pub mod heartbeat;
|
||||
mod metrics;
|
||||
mod plan;
|
||||
mod recording_rules;
|
||||
mod repr;
|
||||
mod server;
|
||||
mod transform;
|
||||
@@ -44,5 +43,4 @@ mod test_utils;
|
||||
|
||||
pub use adapter::{FlowConfig, FlowWorkerManager, FlowWorkerManagerRef, FlownodeOptions};
|
||||
pub use error::{Error, Result};
|
||||
pub use recording_rules::FrontendClient;
|
||||
pub use server::{FlownodeBuilder, FlownodeInstance, FlownodeServer, FrontendInvoker};
|
||||
|
||||
@@ -28,32 +28,6 @@ lazy_static! {
|
||||
&["table_id"]
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_FLOW_RULE_ENGINE_QUERY_TIME: HistogramVec = register_histogram_vec!(
|
||||
"greptime_flow_rule_engine_query_time",
|
||||
"flow rule engine query time",
|
||||
&["flow_id"],
|
||||
vec![
|
||||
0.0,
|
||||
1.,
|
||||
3.,
|
||||
5.,
|
||||
10.,
|
||||
20.,
|
||||
30.,
|
||||
60.,
|
||||
2. * 60.,
|
||||
5. * 60.,
|
||||
10. * 60.
|
||||
]
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_FLOW_RULE_ENGINE_SLOW_QUERY: HistogramVec = register_histogram_vec!(
|
||||
"greptime_flow_rule_engine_slow_query",
|
||||
"flow rule engine slow query",
|
||||
&["flow_id", "sql", "peer"],
|
||||
vec![60., 2. * 60., 3. * 60., 5. * 60., 10. * 60.]
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_FLOW_RUN_INTERVAL_MS: IntGauge =
|
||||
register_int_gauge!("greptime_flow_run_interval_ms", "flow run interval in ms").unwrap();
|
||||
pub static ref METRIC_FLOW_ROWS: IntCounterVec = register_int_counter_vec!(
|
||||
|
||||
@@ -1,940 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Run flow as recording rule which is time-window-aware normal query triggered every tick set by user
|
||||
|
||||
mod engine;
|
||||
mod frontend_client;
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::helper::pb_value_to_value_ref;
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_recordbatch::DfRecordBatch;
|
||||
use common_telemetry::warn;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::Timestamp;
|
||||
use datafusion::error::Result as DfResult;
|
||||
use datafusion::logical_expr::Expr;
|
||||
use datafusion::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner};
|
||||
use datafusion::prelude::SessionContext;
|
||||
use datafusion::sql::unparser::Unparser;
|
||||
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion, TreeNodeRewriter};
|
||||
use datafusion_common::{DFSchema, TableReference};
|
||||
use datafusion_expr::{ColumnarValue, LogicalPlan};
|
||||
use datafusion_physical_expr::PhysicalExprRef;
|
||||
use datatypes::prelude::{ConcreteDataType, DataType};
|
||||
use datatypes::scalars::ScalarVector;
|
||||
use datatypes::schema::TIME_INDEX_KEY;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{
|
||||
TimestampMicrosecondVector, TimestampMillisecondVector, TimestampNanosecondVector,
|
||||
TimestampSecondVector, Vector,
|
||||
};
|
||||
pub use engine::RecordingRuleEngine;
|
||||
pub use frontend_client::FrontendClient;
|
||||
use itertools::Itertools;
|
||||
use query::parser::QueryLanguageParser;
|
||||
use query::QueryEngineRef;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
use crate::adapter::util::from_proto_to_data_type;
|
||||
use crate::df_optimizer::apply_df_optimizer;
|
||||
use crate::error::{ArrowSnafu, DatafusionSnafu, DatatypesSnafu, ExternalSnafu, UnexpectedSnafu};
|
||||
use crate::expr::error::DataTypeSnafu;
|
||||
use crate::Error;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TimeWindowExpr {
|
||||
phy_expr: PhysicalExprRef,
|
||||
column_name: String,
|
||||
logical_expr: Expr,
|
||||
df_schema: DFSchema,
|
||||
}
|
||||
|
||||
impl TimeWindowExpr {
|
||||
pub fn from_expr(expr: &Expr, column_name: &str, df_schema: &DFSchema) -> Result<Self, Error> {
|
||||
let phy_planner = DefaultPhysicalPlanner::default();
|
||||
|
||||
let phy_expr: PhysicalExprRef = phy_planner
|
||||
.create_physical_expr(expr, df_schema, &SessionContext::new().state())
|
||||
.with_context(|_e| DatafusionSnafu {
|
||||
context: format!(
|
||||
"Failed to create physical expression from {expr:?} using {df_schema:?}"
|
||||
),
|
||||
})?;
|
||||
Ok(Self {
|
||||
phy_expr,
|
||||
column_name: column_name.to_string(),
|
||||
logical_expr: expr.clone(),
|
||||
df_schema: df_schema.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn eval(
|
||||
&self,
|
||||
current: Timestamp,
|
||||
) -> Result<(Option<Timestamp>, Option<Timestamp>), Error> {
|
||||
let lower_bound =
|
||||
find_expr_time_window_lower_bound(&self.logical_expr, &self.df_schema, current)?;
|
||||
let upper_bound =
|
||||
find_expr_time_window_upper_bound(&self.logical_expr, &self.df_schema, current)?;
|
||||
Ok((lower_bound, upper_bound))
|
||||
}
|
||||
|
||||
/// Find timestamps from rows using time window expr
|
||||
pub async fn handle_rows(
|
||||
&self,
|
||||
rows_list: Vec<api::v1::Rows>,
|
||||
) -> Result<BTreeSet<Timestamp>, Error> {
|
||||
let mut time_windows = BTreeSet::new();
|
||||
|
||||
for rows in rows_list {
|
||||
// pick the time index column and use it to eval on `self.expr`
|
||||
let ts_col_index = rows
|
||||
.schema
|
||||
.iter()
|
||||
.map(|col| col.column_name.clone())
|
||||
.position(|name| name == self.column_name);
|
||||
let Some(ts_col_index) = ts_col_index else {
|
||||
warn!("can't found time index column in schema: {:?}", rows.schema);
|
||||
continue;
|
||||
};
|
||||
let col_schema = &rows.schema[ts_col_index];
|
||||
let cdt = from_proto_to_data_type(col_schema)?;
|
||||
|
||||
let column_values = rows
|
||||
.rows
|
||||
.iter()
|
||||
.map(|row| &row.values[ts_col_index])
|
||||
.collect_vec();
|
||||
|
||||
let mut vector = cdt.create_mutable_vector(column_values.len());
|
||||
for value in column_values {
|
||||
let value = pb_value_to_value_ref(value, &None);
|
||||
vector.try_push_value_ref(value).context(DataTypeSnafu {
|
||||
msg: "Failed to convert rows to columns",
|
||||
})?;
|
||||
}
|
||||
let vector = vector.to_vector();
|
||||
|
||||
let df_schema = create_df_schema_for_ts_column(&self.column_name, cdt)?;
|
||||
|
||||
let rb =
|
||||
DfRecordBatch::try_new(df_schema.inner().clone(), vec![vector.to_arrow_array()])
|
||||
.with_context(|_e| ArrowSnafu {
|
||||
context: format!(
|
||||
"Failed to create record batch from {df_schema:?} and {vector:?}"
|
||||
),
|
||||
})?;
|
||||
|
||||
let eval_res = self
|
||||
.phy_expr
|
||||
.evaluate(&rb)
|
||||
.with_context(|_| DatafusionSnafu {
|
||||
context: format!(
|
||||
"Failed to evaluate physical expression {:?} on {rb:?}",
|
||||
self.phy_expr
|
||||
),
|
||||
})?;
|
||||
|
||||
let res = columnar_to_ts_vector(&eval_res)?;
|
||||
|
||||
for ts in res.into_iter().flatten() {
|
||||
time_windows.insert(ts);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(time_windows)
|
||||
}
|
||||
}
|
||||
|
||||
fn create_df_schema_for_ts_column(name: &str, cdt: ConcreteDataType) -> Result<DFSchema, Error> {
|
||||
let arrow_schema = Arc::new(arrow_schema::Schema::new(vec![arrow_schema::Field::new(
|
||||
name,
|
||||
cdt.as_arrow_type(),
|
||||
false,
|
||||
)]));
|
||||
|
||||
let df_schema = DFSchema::from_field_specific_qualified_schema(
|
||||
vec![Some(TableReference::bare("TimeIndexOnlyTable"))],
|
||||
&arrow_schema,
|
||||
)
|
||||
.with_context(|_e| DatafusionSnafu {
|
||||
context: format!("Failed to create DFSchema from arrow schema {arrow_schema:?}"),
|
||||
})?;
|
||||
|
||||
Ok(df_schema)
|
||||
}
|
||||
|
||||
/// Convert `ColumnarValue` to `Vec<Option<Timestamp>>`
|
||||
fn columnar_to_ts_vector(columnar: &ColumnarValue) -> Result<Vec<Option<Timestamp>>, Error> {
|
||||
let val = match columnar {
|
||||
datafusion_expr::ColumnarValue::Array(array) => {
|
||||
let ty = array.data_type();
|
||||
let ty = ConcreteDataType::from_arrow_type(ty);
|
||||
let time_unit = if let ConcreteDataType::Timestamp(ty) = ty {
|
||||
ty.unit()
|
||||
} else {
|
||||
return UnexpectedSnafu {
|
||||
reason: format!("Non-timestamp type: {ty:?}"),
|
||||
}
|
||||
.fail();
|
||||
};
|
||||
|
||||
match time_unit {
|
||||
TimeUnit::Second => TimestampSecondVector::try_from_arrow_array(array.clone())
|
||||
.with_context(|_| DatatypesSnafu {
|
||||
extra: format!("Failed to create vector from arrow array {array:?}"),
|
||||
})?
|
||||
.iter_data()
|
||||
.map(|d| d.map(|d| d.0))
|
||||
.collect_vec(),
|
||||
TimeUnit::Millisecond => {
|
||||
TimestampMillisecondVector::try_from_arrow_array(array.clone())
|
||||
.with_context(|_| DatatypesSnafu {
|
||||
extra: format!("Failed to create vector from arrow array {array:?}"),
|
||||
})?
|
||||
.iter_data()
|
||||
.map(|d| d.map(|d| d.0))
|
||||
.collect_vec()
|
||||
}
|
||||
TimeUnit::Microsecond => {
|
||||
TimestampMicrosecondVector::try_from_arrow_array(array.clone())
|
||||
.with_context(|_| DatatypesSnafu {
|
||||
extra: format!("Failed to create vector from arrow array {array:?}"),
|
||||
})?
|
||||
.iter_data()
|
||||
.map(|d| d.map(|d| d.0))
|
||||
.collect_vec()
|
||||
}
|
||||
TimeUnit::Nanosecond => {
|
||||
TimestampNanosecondVector::try_from_arrow_array(array.clone())
|
||||
.with_context(|_| DatatypesSnafu {
|
||||
extra: format!("Failed to create vector from arrow array {array:?}"),
|
||||
})?
|
||||
.iter_data()
|
||||
.map(|d| d.map(|d| d.0))
|
||||
.collect_vec()
|
||||
}
|
||||
}
|
||||
}
|
||||
datafusion_expr::ColumnarValue::Scalar(scalar) => {
|
||||
let value = Value::try_from(scalar.clone()).with_context(|_| DatatypesSnafu {
|
||||
extra: format!("Failed to convert scalar {scalar:?} to value"),
|
||||
})?;
|
||||
let ts = value.as_timestamp().context(UnexpectedSnafu {
|
||||
reason: format!("Expect Timestamp, found {:?}", value),
|
||||
})?;
|
||||
vec![Some(ts)]
|
||||
}
|
||||
};
|
||||
Ok(val)
|
||||
}
|
||||
|
||||
/// Convert sql to datafusion logical plan
|
||||
pub async fn sql_to_df_plan(
|
||||
query_ctx: QueryContextRef,
|
||||
engine: QueryEngineRef,
|
||||
sql: &str,
|
||||
optimize: bool,
|
||||
) -> Result<LogicalPlan, Error> {
|
||||
let stmt = QueryLanguageParser::parse_sql(sql, &query_ctx)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
let plan = engine
|
||||
.planner()
|
||||
.plan(&stmt, query_ctx)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
let plan = if optimize {
|
||||
apply_df_optimizer(plan).await?
|
||||
} else {
|
||||
plan
|
||||
};
|
||||
Ok(plan)
|
||||
}
|
||||
|
||||
/// Return (the column name of time index column, the time window expr, the expected time unit of time index column, the expr's schema for evaluating the time window)
|
||||
async fn find_time_window_expr(
|
||||
plan: &LogicalPlan,
|
||||
catalog_man: CatalogManagerRef,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<(String, Option<datafusion_expr::Expr>, TimeUnit, DFSchema), Error> {
|
||||
// TODO(discord9): find the expr that do time window
|
||||
|
||||
let mut table_name = None;
|
||||
|
||||
// first find the table source in the logical plan
|
||||
plan.apply(|plan| {
|
||||
let LogicalPlan::TableScan(table_scan) = plan else {
|
||||
return Ok(TreeNodeRecursion::Continue);
|
||||
};
|
||||
table_name = Some(table_scan.table_name.clone());
|
||||
Ok(TreeNodeRecursion::Stop)
|
||||
})
|
||||
.with_context(|_| DatafusionSnafu {
|
||||
context: format!("Can't find table source in plan {plan:?}"),
|
||||
})?;
|
||||
let Some(table_name) = table_name else {
|
||||
UnexpectedSnafu {
|
||||
reason: format!("Can't find table source in plan {plan:?}"),
|
||||
}
|
||||
.fail()?
|
||||
};
|
||||
|
||||
let current_schema = query_ctx.current_schema();
|
||||
|
||||
let catalog_name = table_name.catalog().unwrap_or(query_ctx.current_catalog());
|
||||
let schema_name = table_name.schema().unwrap_or(¤t_schema);
|
||||
let table_name = table_name.table();
|
||||
|
||||
let Some(table_ref) = catalog_man
|
||||
.table(catalog_name, schema_name, table_name, Some(&query_ctx))
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?
|
||||
else {
|
||||
UnexpectedSnafu {
|
||||
reason: format!(
|
||||
"Can't find table {table_name:?} in catalog {catalog_name:?}/{schema_name:?}"
|
||||
),
|
||||
}
|
||||
.fail()?
|
||||
};
|
||||
|
||||
let schema = &table_ref.table_info().meta.schema;
|
||||
|
||||
let ts_index = schema.timestamp_column().context(UnexpectedSnafu {
|
||||
reason: format!("Can't find timestamp column in table {table_name:?}"),
|
||||
})?;
|
||||
|
||||
let ts_col_name = ts_index.name.clone();
|
||||
|
||||
let expected_time_unit = ts_index.data_type.as_timestamp().with_context(|| UnexpectedSnafu {
|
||||
reason: format!(
|
||||
"Expected timestamp column {ts_col_name:?} in table {table_name:?} to be timestamp, but got {ts_index:?}"
|
||||
),
|
||||
})?.unit();
|
||||
|
||||
let arrow_schema = Arc::new(arrow_schema::Schema::new(vec![arrow_schema::Field::new(
|
||||
ts_col_name.clone(),
|
||||
ts_index.data_type.as_arrow_type(),
|
||||
false,
|
||||
)]));
|
||||
|
||||
let df_schema = DFSchema::from_field_specific_qualified_schema(
|
||||
vec![Some(TableReference::bare(table_name))],
|
||||
&arrow_schema,
|
||||
)
|
||||
.with_context(|_e| DatafusionSnafu {
|
||||
context: format!("Failed to create DFSchema from arrow schema {arrow_schema:?}"),
|
||||
})?;
|
||||
|
||||
// find the time window expr which refers to the time index column
|
||||
let mut aggr_expr = None;
|
||||
let mut time_window_expr: Option<Expr> = None;
|
||||
|
||||
let find_inner_aggr_expr = |plan: &LogicalPlan| {
|
||||
if let LogicalPlan::Aggregate(aggregate) = plan {
|
||||
aggr_expr = Some(aggregate.clone());
|
||||
};
|
||||
|
||||
Ok(TreeNodeRecursion::Continue)
|
||||
};
|
||||
plan.apply(find_inner_aggr_expr)
|
||||
.with_context(|_| DatafusionSnafu {
|
||||
context: format!("Can't find aggr expr in plan {plan:?}"),
|
||||
})?;
|
||||
|
||||
if let Some(aggregate) = aggr_expr {
|
||||
for group_expr in &aggregate.group_expr {
|
||||
let refs = group_expr.column_refs();
|
||||
if refs.len() != 1 {
|
||||
continue;
|
||||
}
|
||||
let ref_col = refs.iter().next().unwrap();
|
||||
|
||||
let index = aggregate.input.schema().maybe_index_of_column(ref_col);
|
||||
let Some(index) = index else {
|
||||
continue;
|
||||
};
|
||||
let field = aggregate.input.schema().field(index);
|
||||
|
||||
let is_time_index = field.metadata().get(TIME_INDEX_KEY) == Some(&"true".to_string());
|
||||
|
||||
if is_time_index {
|
||||
let rewrite_column = group_expr.clone();
|
||||
let rewritten = rewrite_column
|
||||
.rewrite(&mut RewriteColumn {
|
||||
table_name: table_name.to_string(),
|
||||
})
|
||||
.with_context(|_| DatafusionSnafu {
|
||||
context: format!("Rewrite expr failed, expr={:?}", group_expr),
|
||||
})?
|
||||
.data;
|
||||
struct RewriteColumn {
|
||||
table_name: String,
|
||||
}
|
||||
|
||||
impl TreeNodeRewriter for RewriteColumn {
|
||||
type Node = Expr;
|
||||
fn f_down(&mut self, node: Self::Node) -> DfResult<Transformed<Self::Node>> {
|
||||
let Expr::Column(mut column) = node else {
|
||||
return Ok(Transformed::no(node));
|
||||
};
|
||||
|
||||
column.relation = Some(TableReference::bare(self.table_name.clone()));
|
||||
|
||||
Ok(Transformed::yes(Expr::Column(column)))
|
||||
}
|
||||
}
|
||||
|
||||
time_window_expr = Some(rewritten);
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok((ts_col_name, time_window_expr, expected_time_unit, df_schema))
|
||||
} else {
|
||||
// can't found time window expr, return None
|
||||
Ok((ts_col_name, None, expected_time_unit, df_schema))
|
||||
}
|
||||
}
|
||||
|
||||
/// Find nearest lower bound for time `current` in given `plan` for the time window expr.
|
||||
/// i.e. for time window expr being `date_bin(INTERVAL '5 minutes', ts) as time_window` and `current="2021-07-01 00:01:01.000"`,
|
||||
/// return `Some("2021-07-01 00:00:00.000")`
|
||||
/// if `plan` doesn't contain a `TIME INDEX` column, return `None`
|
||||
///
|
||||
/// Time window expr is a expr that:
|
||||
/// 1. ref only to a time index column
|
||||
/// 2. is monotonic increasing
|
||||
/// 3. show up in GROUP BY clause
|
||||
///
|
||||
/// note this plan should only contain one TableScan
|
||||
pub async fn find_plan_time_window_bound(
|
||||
plan: &LogicalPlan,
|
||||
current: Timestamp,
|
||||
query_ctx: QueryContextRef,
|
||||
engine: QueryEngineRef,
|
||||
) -> Result<(String, Option<Timestamp>, Option<Timestamp>), Error> {
|
||||
// TODO(discord9): find the expr that do time window
|
||||
let catalog_man = engine.engine_state().catalog_manager();
|
||||
|
||||
let (ts_col_name, time_window_expr, expected_time_unit, df_schema) =
|
||||
find_time_window_expr(plan, catalog_man.clone(), query_ctx).await?;
|
||||
// cast current to ts_index's type
|
||||
let new_current = current
|
||||
.convert_to(expected_time_unit)
|
||||
.with_context(|| UnexpectedSnafu {
|
||||
reason: format!("Failed to cast current timestamp {current:?} to {expected_time_unit}"),
|
||||
})?;
|
||||
|
||||
// if no time_window_expr is found, return None
|
||||
if let Some(time_window_expr) = time_window_expr {
|
||||
let lower_bound =
|
||||
find_expr_time_window_lower_bound(&time_window_expr, &df_schema, new_current)?;
|
||||
let upper_bound =
|
||||
find_expr_time_window_upper_bound(&time_window_expr, &df_schema, new_current)?;
|
||||
Ok((ts_col_name, lower_bound, upper_bound))
|
||||
} else {
|
||||
Ok((ts_col_name, None, None))
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the lower bound of time window in given `expr` and `current` timestamp.
|
||||
///
|
||||
/// i.e. for `current="2021-07-01 00:01:01.000"` and `expr=date_bin(INTERVAL '5 minutes', ts) as time_window` and `ts_col=ts`,
|
||||
/// return `Some("2021-07-01 00:00:00.000")` since it's the lower bound
|
||||
/// return `Some("2021-07-01 00:00:00.000")` since it's the lower bound
|
||||
/// of current time window given the current timestamp
|
||||
///
|
||||
/// if return None, meaning this time window have no lower bound
|
||||
fn find_expr_time_window_lower_bound(
|
||||
expr: &Expr,
|
||||
df_schema: &DFSchema,
|
||||
current: Timestamp,
|
||||
) -> Result<Option<Timestamp>, Error> {
|
||||
let phy_planner = DefaultPhysicalPlanner::default();
|
||||
|
||||
let phy_expr: PhysicalExprRef = phy_planner
|
||||
.create_physical_expr(expr, df_schema, &SessionContext::new().state())
|
||||
.with_context(|_e| DatafusionSnafu {
|
||||
context: format!(
|
||||
"Failed to create physical expression from {expr:?} using {df_schema:?}"
|
||||
),
|
||||
})?;
|
||||
|
||||
let cur_time_window = eval_ts_to_ts(&phy_expr, df_schema, current)?;
|
||||
let input_time_unit = cur_time_window.unit();
|
||||
Ok(cur_time_window.convert_to(input_time_unit))
|
||||
}
|
||||
|
||||
/// Find the upper bound for time window expression
|
||||
fn find_expr_time_window_upper_bound(
|
||||
expr: &Expr,
|
||||
df_schema: &DFSchema,
|
||||
current: Timestamp,
|
||||
) -> Result<Option<Timestamp>, Error> {
|
||||
use std::cmp::Ordering;
|
||||
|
||||
let phy_planner = DefaultPhysicalPlanner::default();
|
||||
|
||||
let phy_expr: PhysicalExprRef = phy_planner
|
||||
.create_physical_expr(expr, df_schema, &SessionContext::new().state())
|
||||
.with_context(|_e| DatafusionSnafu {
|
||||
context: format!(
|
||||
"Failed to create physical expression from {expr:?} using {df_schema:?}"
|
||||
),
|
||||
})?;
|
||||
|
||||
let cur_time_window = eval_ts_to_ts(&phy_expr, df_schema, current)?;
|
||||
|
||||
// search to find the lower bound
|
||||
let mut offset: i64 = 1;
|
||||
let mut lower_bound = Some(current);
|
||||
let upper_bound;
|
||||
// first expontial probe to found a range for binary search
|
||||
loop {
|
||||
let Some(next_val) = current.value().checked_add(offset) else {
|
||||
// no upper bound if overflow
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let next_time_probe = common_time::Timestamp::new(next_val, current.unit());
|
||||
|
||||
let next_time_window = eval_ts_to_ts(&phy_expr, df_schema, next_time_probe)?;
|
||||
|
||||
match next_time_window.cmp(&cur_time_window) {
|
||||
Ordering::Less => {UnexpectedSnafu {
|
||||
reason: format!(
|
||||
"Unsupported time window expression, expect monotonic increasing for time window expression {expr:?}"
|
||||
),
|
||||
}
|
||||
.fail()?
|
||||
}
|
||||
Ordering::Equal => {
|
||||
lower_bound = Some(next_time_probe);
|
||||
}
|
||||
Ordering::Greater => {
|
||||
upper_bound = Some(next_time_probe);
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
let Some(new_offset) = offset.checked_mul(2) else {
|
||||
// no upper bound if overflow
|
||||
return Ok(None);
|
||||
};
|
||||
offset = new_offset;
|
||||
}
|
||||
|
||||
// binary search for the exact upper bound
|
||||
|
||||
ensure!(lower_bound.map(|v|v.unit())==upper_bound.map(|v|v.unit()), UnexpectedSnafu{
|
||||
reason: format!(" unit mismatch for time window expression {expr:?}, found {lower_bound:?} and {upper_bound:?}"),
|
||||
});
|
||||
|
||||
let output_unit = upper_bound
|
||||
.context(UnexpectedSnafu {
|
||||
reason: "should have lower bound",
|
||||
})?
|
||||
.unit();
|
||||
|
||||
let mut low = lower_bound
|
||||
.context(UnexpectedSnafu {
|
||||
reason: "should have lower bound",
|
||||
})?
|
||||
.value();
|
||||
let mut high = upper_bound
|
||||
.context(UnexpectedSnafu {
|
||||
reason: "should have upper bound",
|
||||
})?
|
||||
.value();
|
||||
while low < high {
|
||||
let mid = (low + high) / 2;
|
||||
let mid_probe = common_time::Timestamp::new(mid, output_unit);
|
||||
let mid_time_window = eval_ts_to_ts(&phy_expr, df_schema, mid_probe)?;
|
||||
|
||||
match mid_time_window.cmp(&cur_time_window) {
|
||||
Ordering::Less => UnexpectedSnafu {
|
||||
reason: format!("Binary search failed for time window expression {expr:?}"),
|
||||
}
|
||||
.fail()?,
|
||||
Ordering::Equal => low = mid + 1,
|
||||
Ordering::Greater => high = mid,
|
||||
}
|
||||
}
|
||||
|
||||
let final_upper_bound_for_time_window = common_time::Timestamp::new(high, output_unit);
|
||||
|
||||
Ok(Some(final_upper_bound_for_time_window))
|
||||
}
|
||||
|
||||
fn eval_ts_to_ts(
|
||||
phy: &PhysicalExprRef,
|
||||
df_schema: &DFSchema,
|
||||
input_value: Timestamp,
|
||||
) -> Result<Timestamp, Error> {
|
||||
let schema_ty = df_schema.field(0).data_type();
|
||||
let schema_cdt = ConcreteDataType::from_arrow_type(schema_ty);
|
||||
let schema_unit = if let ConcreteDataType::Timestamp(ts) = schema_cdt {
|
||||
ts.unit()
|
||||
} else {
|
||||
return UnexpectedSnafu {
|
||||
reason: format!("Expect Timestamp, found {:?}", schema_cdt),
|
||||
}
|
||||
.fail();
|
||||
};
|
||||
let input_value = input_value
|
||||
.convert_to(schema_unit)
|
||||
.with_context(|| UnexpectedSnafu {
|
||||
reason: format!("Failed to convert timestamp {input_value:?} to {schema_unit}"),
|
||||
})?;
|
||||
let ts_vector = match schema_unit {
|
||||
TimeUnit::Second => {
|
||||
TimestampSecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
|
||||
}
|
||||
TimeUnit::Millisecond => {
|
||||
TimestampMillisecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
|
||||
}
|
||||
TimeUnit::Microsecond => {
|
||||
TimestampMicrosecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
|
||||
}
|
||||
TimeUnit::Nanosecond => {
|
||||
TimestampNanosecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
|
||||
}
|
||||
};
|
||||
|
||||
let rb = DfRecordBatch::try_new(df_schema.inner().clone(), vec![ts_vector.clone()])
|
||||
.with_context(|_| ArrowSnafu {
|
||||
context: format!("Failed to create record batch from {df_schema:?} and {ts_vector:?}"),
|
||||
})?;
|
||||
|
||||
let eval_res = phy.evaluate(&rb).with_context(|_| DatafusionSnafu {
|
||||
context: format!("Failed to evaluate physical expression {phy:?} on {rb:?}"),
|
||||
})?;
|
||||
|
||||
if let Some(Some(ts)) = columnar_to_ts_vector(&eval_res)?.first() {
|
||||
Ok(*ts)
|
||||
} else {
|
||||
UnexpectedSnafu {
|
||||
reason: format!(
|
||||
"Expected timestamp in expression {phy:?} but got {:?}",
|
||||
eval_res
|
||||
),
|
||||
}
|
||||
.fail()?
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(discord9): a method to found out the precise time window
|
||||
|
||||
/// Find out the `Filter` Node corresponding to outermost `WHERE` and add a new filter expr to it
|
||||
#[derive(Debug)]
|
||||
pub struct AddFilterRewriter {
|
||||
extra_filter: Expr,
|
||||
is_rewritten: bool,
|
||||
}
|
||||
|
||||
impl AddFilterRewriter {
|
||||
fn new(filter: Expr) -> Self {
|
||||
Self {
|
||||
extra_filter: filter,
|
||||
is_rewritten: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TreeNodeRewriter for AddFilterRewriter {
|
||||
type Node = LogicalPlan;
|
||||
fn f_up(&mut self, node: Self::Node) -> DfResult<Transformed<Self::Node>> {
|
||||
if self.is_rewritten {
|
||||
return Ok(Transformed::no(node));
|
||||
}
|
||||
match node {
|
||||
LogicalPlan::Filter(mut filter) if !filter.having => {
|
||||
filter.predicate = filter.predicate.and(self.extra_filter.clone());
|
||||
self.is_rewritten = true;
|
||||
Ok(Transformed::yes(LogicalPlan::Filter(filter)))
|
||||
}
|
||||
LogicalPlan::TableScan(_) => {
|
||||
// add a new filter
|
||||
let filter =
|
||||
datafusion_expr::Filter::try_new(self.extra_filter.clone(), Arc::new(node))?;
|
||||
self.is_rewritten = true;
|
||||
Ok(Transformed::yes(LogicalPlan::Filter(filter)))
|
||||
}
|
||||
_ => Ok(Transformed::no(node)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn df_plan_to_sql(plan: &LogicalPlan) -> Result<String, Error> {
|
||||
/// A dialect that forces all identifiers to be quoted
|
||||
struct ForceQuoteIdentifiers;
|
||||
impl datafusion::sql::unparser::dialect::Dialect for ForceQuoteIdentifiers {
|
||||
fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
|
||||
if identifier.to_lowercase() != identifier {
|
||||
Some('"')
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
let unparser = Unparser::new(&ForceQuoteIdentifiers);
|
||||
// first make all column qualified
|
||||
let sql = unparser
|
||||
.plan_to_sql(plan)
|
||||
.with_context(|_e| DatafusionSnafu {
|
||||
context: format!("Failed to unparse logical plan {plan:?}"),
|
||||
})?;
|
||||
Ok(sql.to_string())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datafusion_common::tree_node::TreeNode;
|
||||
use pretty_assertions::assert_eq;
|
||||
use session::context::QueryContext;
|
||||
|
||||
use super::{sql_to_df_plan, *};
|
||||
use crate::recording_rules::{df_plan_to_sql, AddFilterRewriter};
|
||||
use crate::test_utils::create_test_query_engine;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_sql_plan_convert() {
|
||||
let query_engine = create_test_query_engine();
|
||||
let ctx = QueryContext::arc();
|
||||
let old = r#"SELECT "NUMBER" FROM "UPPERCASE_NUMBERS_WITH_TS""#;
|
||||
let new = sql_to_df_plan(ctx.clone(), query_engine.clone(), old, false)
|
||||
.await
|
||||
.unwrap();
|
||||
let new_sql = df_plan_to_sql(&new).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
r#"SELECT "UPPERCASE_NUMBERS_WITH_TS"."NUMBER" FROM "UPPERCASE_NUMBERS_WITH_TS""#,
|
||||
new_sql
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_add_filter() {
|
||||
let testcases = vec![
|
||||
(
|
||||
"SELECT number FROM numbers_with_ts GROUP BY number","SELECT numbers_with_ts.number FROM numbers_with_ts WHERE (number > 4) GROUP BY numbers_with_ts.number"
|
||||
),
|
||||
(
|
||||
"SELECT number FROM numbers_with_ts WHERE number < 2 OR number >10",
|
||||
"SELECT numbers_with_ts.number FROM numbers_with_ts WHERE ((numbers_with_ts.number < 2) OR (numbers_with_ts.number > 10)) AND (number > 4)"
|
||||
),
|
||||
(
|
||||
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window",
|
||||
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE (number > 4) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
|
||||
)
|
||||
];
|
||||
use datafusion_expr::{col, lit};
|
||||
let query_engine = create_test_query_engine();
|
||||
let ctx = QueryContext::arc();
|
||||
|
||||
for (before, after) in testcases {
|
||||
let sql = before;
|
||||
let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), sql, false)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut add_filter = AddFilterRewriter::new(col("number").gt(lit(4u32)));
|
||||
let plan = plan.rewrite(&mut add_filter).unwrap().data;
|
||||
let new_sql = df_plan_to_sql(&plan).unwrap();
|
||||
assert_eq!(after, new_sql);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_plan_time_window_lower_bound() {
|
||||
use datafusion_expr::{col, lit};
|
||||
let query_engine = create_test_query_engine();
|
||||
let ctx = QueryContext::arc();
|
||||
|
||||
let testcases = [
|
||||
// same alias is not same column
|
||||
(
|
||||
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS ts FROM numbers_with_ts GROUP BY ts;",
|
||||
Timestamp::new(1740394109, TimeUnit::Second),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(1740394109000, TimeUnit::Millisecond)),
|
||||
Some(Timestamp::new(1740394109001, TimeUnit::Millisecond)),
|
||||
),
|
||||
r#"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS ts FROM numbers_with_ts WHERE ((ts >= CAST('2025-02-24 10:48:29' AS TIMESTAMP)) AND (ts <= CAST('2025-02-24 10:48:29.001' AS TIMESTAMP))) GROUP BY numbers_with_ts.ts"#
|
||||
),
|
||||
// complex time window index
|
||||
(
|
||||
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts GROUP BY time_window;",
|
||||
Timestamp::new(1740394109, TimeUnit::Second),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(1740394080, TimeUnit::Second)),
|
||||
Some(Timestamp::new(1740394140, TimeUnit::Second)),
|
||||
),
|
||||
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('2025-02-24 10:48:00' AS TIMESTAMP)) AND (ts <= CAST('2025-02-24 10:49:00' AS TIMESTAMP))) GROUP BY arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)')"
|
||||
),
|
||||
// no time index
|
||||
(
|
||||
"SELECT date_bin('5 minutes', ts) FROM numbers_with_ts;",
|
||||
Timestamp::new(23, TimeUnit::Millisecond),
|
||||
("ts".to_string(), None, None),
|
||||
"SELECT date_bin('5 minutes', ts) FROM numbers_with_ts;"
|
||||
),
|
||||
// time index
|
||||
(
|
||||
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
|
||||
Timestamp::new(23, TimeUnit::Nanosecond),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(0, TimeUnit::Millisecond)),
|
||||
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
|
||||
),
|
||||
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
|
||||
),
|
||||
// on spot
|
||||
(
|
||||
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
|
||||
Timestamp::new(0, TimeUnit::Nanosecond),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(0, TimeUnit::Millisecond)),
|
||||
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
|
||||
),
|
||||
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
|
||||
),
|
||||
// different time unit
|
||||
(
|
||||
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
|
||||
Timestamp::new(23_000_000, TimeUnit::Nanosecond),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(0, TimeUnit::Millisecond)),
|
||||
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
|
||||
),
|
||||
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
|
||||
),
|
||||
// time index with other fields
|
||||
(
|
||||
"SELECT sum(number) as sum_up, date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
|
||||
Timestamp::new(23, TimeUnit::Millisecond),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(0, TimeUnit::Millisecond)),
|
||||
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
|
||||
),
|
||||
"SELECT sum(numbers_with_ts.number) AS sum_up, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
|
||||
),
|
||||
// time index with other pks
|
||||
(
|
||||
"SELECT number, date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window, number;",
|
||||
Timestamp::new(23, TimeUnit::Millisecond),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(0, TimeUnit::Millisecond)),
|
||||
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
|
||||
),
|
||||
"SELECT numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts), numbers_with_ts.number"
|
||||
),
|
||||
// subquery
|
||||
(
|
||||
"SELECT number, time_window FROM (SELECT number, date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window, number);",
|
||||
Timestamp::new(23, TimeUnit::Millisecond),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(0, TimeUnit::Millisecond)),
|
||||
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
|
||||
),
|
||||
"SELECT numbers_with_ts.number, time_window FROM (SELECT numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts), numbers_with_ts.number)"
|
||||
),
|
||||
// cte
|
||||
(
|
||||
"with cte as (select number, date_bin('5 minutes', ts) as time_window from numbers_with_ts GROUP BY time_window, number) select number, time_window from cte;",
|
||||
Timestamp::new(23, TimeUnit::Millisecond),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(0, TimeUnit::Millisecond)),
|
||||
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
|
||||
),
|
||||
"SELECT cte.number, cte.time_window FROM (SELECT numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts), numbers_with_ts.number) AS cte"
|
||||
),
|
||||
// complex subquery without alias
|
||||
(
|
||||
"SELECT sum(number), number, date_bin('5 minutes', ts) as time_window, bucket_name FROM (SELECT number, ts, case when number < 5 THEN 'bucket_0_5' when number >= 5 THEN 'bucket_5_inf' END as bucket_name FROM numbers_with_ts) GROUP BY number, time_window, bucket_name;",
|
||||
Timestamp::new(23, TimeUnit::Millisecond),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(0, TimeUnit::Millisecond)),
|
||||
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
|
||||
),
|
||||
"SELECT sum(numbers_with_ts.number), numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window, bucket_name FROM (SELECT numbers_with_ts.number, numbers_with_ts.ts, CASE WHEN (numbers_with_ts.number < 5) THEN 'bucket_0_5' WHEN (numbers_with_ts.number >= 5) THEN 'bucket_5_inf' END AS bucket_name FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP)))) GROUP BY numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts), bucket_name"
|
||||
),
|
||||
// complex subquery alias
|
||||
(
|
||||
"SELECT sum(number), number, date_bin('5 minutes', ts) as time_window, bucket_name FROM (SELECT number, ts, case when number < 5 THEN 'bucket_0_5' when number >= 5 THEN 'bucket_5_inf' END as bucket_name FROM numbers_with_ts) as cte GROUP BY number, time_window, bucket_name;",
|
||||
Timestamp::new(23, TimeUnit::Millisecond),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(0, TimeUnit::Millisecond)),
|
||||
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
|
||||
),
|
||||
"SELECT sum(cte.number), cte.number, date_bin('5 minutes', cte.ts) AS time_window, cte.bucket_name FROM (SELECT numbers_with_ts.number, numbers_with_ts.ts, CASE WHEN (numbers_with_ts.number < 5) THEN 'bucket_0_5' WHEN (numbers_with_ts.number >= 5) THEN 'bucket_5_inf' END AS bucket_name FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP)))) AS cte GROUP BY cte.number, date_bin('5 minutes', cte.ts), cte.bucket_name"
|
||||
),
|
||||
];
|
||||
|
||||
for (sql, current, expected, expected_unparsed) in testcases {
|
||||
let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), sql, true)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let real =
|
||||
find_plan_time_window_bound(&plan, current, ctx.clone(), query_engine.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(expected, real);
|
||||
|
||||
let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), sql, false)
|
||||
.await
|
||||
.unwrap();
|
||||
let (col_name, lower, upper) = real;
|
||||
let new_sql = if lower.is_some() {
|
||||
let to_df_literal = |value| {
|
||||
let value = Value::from(value);
|
||||
|
||||
value.try_to_scalar_value(&value.data_type()).unwrap()
|
||||
};
|
||||
let lower = to_df_literal(lower.unwrap());
|
||||
let upper = to_df_literal(upper.unwrap());
|
||||
let expr = col(&col_name)
|
||||
.gt_eq(lit(lower))
|
||||
.and(col(&col_name).lt_eq(lit(upper)));
|
||||
let mut add_filter = AddFilterRewriter::new(expr);
|
||||
let plan = plan.rewrite(&mut add_filter).unwrap().data;
|
||||
df_plan_to_sql(&plan).unwrap()
|
||||
} else {
|
||||
sql.to_string()
|
||||
};
|
||||
assert_eq!(expected_unparsed, new_sql);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,815 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use api::v1::flow::FlowResponse;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::ddl::create_flow::FlowType;
|
||||
use common_meta::key::flow::FlowMetadataManagerRef;
|
||||
use common_meta::key::table_info::TableInfoManager;
|
||||
use common_meta::key::TableMetadataManagerRef;
|
||||
use common_telemetry::tracing::warn;
|
||||
use common_telemetry::{debug, info};
|
||||
use common_time::Timestamp;
|
||||
use datafusion::sql::unparser::expr_to_sql;
|
||||
use datafusion_common::tree_node::TreeNode;
|
||||
use datatypes::value::Value;
|
||||
use query::QueryEngineRef;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::storage::RegionId;
|
||||
use table::metadata::TableId;
|
||||
use tokio::sync::oneshot::error::TryRecvError;
|
||||
use tokio::sync::{oneshot, RwLock};
|
||||
use tokio::time::Instant;
|
||||
|
||||
use super::frontend_client::FrontendClient;
|
||||
use super::{df_plan_to_sql, AddFilterRewriter, TimeWindowExpr};
|
||||
use crate::adapter::{CreateFlowArgs, FlowId, TableName};
|
||||
use crate::error::{
|
||||
DatafusionSnafu, DatatypesSnafu, ExternalSnafu, FlowAlreadyExistSnafu, InternalSnafu,
|
||||
TimeSnafu, UnexpectedSnafu,
|
||||
};
|
||||
use crate::metrics::{METRIC_FLOW_RULE_ENGINE_QUERY_TIME, METRIC_FLOW_RULE_ENGINE_SLOW_QUERY};
|
||||
use crate::recording_rules::{find_time_window_expr, sql_to_df_plan};
|
||||
use crate::Error;
|
||||
|
||||
/// TODO(discord9): make those constants configurable
|
||||
/// The default rule engine query timeout is 10 minutes
|
||||
pub const DEFAULT_RULE_ENGINE_QUERY_TIMEOUT: Duration = Duration::from_secs(10 * 60);
|
||||
|
||||
/// will output a warn log for any query that runs for more that 1 minutes, and also every 1 minutes when that query is still running
|
||||
pub const SLOW_QUERY_THRESHOLD: Duration = Duration::from_secs(60);
|
||||
|
||||
/// TODO(discord9): determine how to configure refresh rate
|
||||
pub struct RecordingRuleEngine {
|
||||
tasks: RwLock<BTreeMap<FlowId, RecordingRuleTask>>,
|
||||
shutdown_txs: RwLock<BTreeMap<FlowId, oneshot::Sender<()>>>,
|
||||
frontend_client: Arc<FrontendClient>,
|
||||
flow_metadata_manager: FlowMetadataManagerRef,
|
||||
table_meta: TableMetadataManagerRef,
|
||||
engine: QueryEngineRef,
|
||||
}
|
||||
|
||||
impl RecordingRuleEngine {
|
||||
pub fn new(
|
||||
frontend_client: Arc<FrontendClient>,
|
||||
engine: QueryEngineRef,
|
||||
flow_metadata_manager: FlowMetadataManagerRef,
|
||||
table_meta: TableMetadataManagerRef,
|
||||
) -> Self {
|
||||
Self {
|
||||
tasks: Default::default(),
|
||||
shutdown_txs: Default::default(),
|
||||
frontend_client,
|
||||
flow_metadata_manager,
|
||||
table_meta,
|
||||
engine,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn handle_inserts(
|
||||
&self,
|
||||
request: api::v1::region::InsertRequests,
|
||||
) -> Result<FlowResponse, Error> {
|
||||
let table_info_mgr = self.table_meta.table_info_manager();
|
||||
let mut group_by_table_name: HashMap<TableName, Vec<api::v1::Rows>> = HashMap::new();
|
||||
for r in request.requests {
|
||||
let tid = RegionId::from(r.region_id).table_id();
|
||||
let name = get_table_name(table_info_mgr, &tid).await?;
|
||||
let entry = group_by_table_name.entry(name).or_default();
|
||||
if let Some(rows) = r.rows {
|
||||
entry.push(rows);
|
||||
}
|
||||
}
|
||||
|
||||
for (_flow_id, task) in self.tasks.read().await.iter() {
|
||||
let src_table_names = &task.source_table_names;
|
||||
|
||||
for src_table_name in src_table_names {
|
||||
if let Some(entry) = group_by_table_name.get(src_table_name) {
|
||||
let Some(expr) = &task.time_window_expr else {
|
||||
continue;
|
||||
};
|
||||
let involved_time_windows = expr.handle_rows(entry.clone()).await?;
|
||||
let mut state = task.state.write().await;
|
||||
state
|
||||
.dirty_time_windows
|
||||
.add_lower_bounds(involved_time_windows.into_iter());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Default::default())
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_table_name(zelf: &TableInfoManager, table_id: &TableId) -> Result<TableName, Error> {
|
||||
zelf.get(*table_id)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?
|
||||
.with_context(|| UnexpectedSnafu {
|
||||
reason: format!("Table id = {:?}, couldn't found table name", table_id),
|
||||
})
|
||||
.map(|name| name.table_name())
|
||||
.map(|name| [name.catalog_name, name.schema_name, name.table_name])
|
||||
}
|
||||
|
||||
const MIN_REFRESH_DURATION: Duration = Duration::new(5, 0);
|
||||
|
||||
impl RecordingRuleEngine {
|
||||
pub async fn create_flow(&self, args: CreateFlowArgs) -> Result<Option<FlowId>, Error> {
|
||||
let CreateFlowArgs {
|
||||
flow_id,
|
||||
sink_table_name,
|
||||
source_table_ids,
|
||||
create_if_not_exists,
|
||||
or_replace,
|
||||
expire_after,
|
||||
comment: _,
|
||||
sql,
|
||||
flow_options,
|
||||
query_ctx,
|
||||
} = args;
|
||||
|
||||
// or replace logic
|
||||
{
|
||||
let is_exist = self.tasks.read().await.contains_key(&flow_id);
|
||||
match (create_if_not_exists, or_replace, is_exist) {
|
||||
// if replace, ignore that old flow exists
|
||||
(_, true, true) => {
|
||||
info!("Replacing flow with id={}", flow_id);
|
||||
}
|
||||
(false, false, true) => FlowAlreadyExistSnafu { id: flow_id }.fail()?,
|
||||
// already exists, and not replace, return None
|
||||
(true, false, true) => {
|
||||
info!("Flow with id={} already exists, do nothing", flow_id);
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// continue as normal
|
||||
(_, _, false) => (),
|
||||
}
|
||||
}
|
||||
|
||||
let flow_type = flow_options.get(FlowType::FLOW_TYPE_KEY);
|
||||
|
||||
ensure!(
|
||||
flow_type == Some(&FlowType::RecordingRule.to_string()) || flow_type.is_none(),
|
||||
UnexpectedSnafu {
|
||||
reason: format!("Flow type is not RecordingRule nor None, got {flow_type:?}")
|
||||
}
|
||||
);
|
||||
|
||||
let Some(query_ctx) = query_ctx else {
|
||||
UnexpectedSnafu {
|
||||
reason: "Query context is None".to_string(),
|
||||
}
|
||||
.fail()?
|
||||
};
|
||||
let query_ctx = Arc::new(query_ctx);
|
||||
let mut source_table_names = Vec::new();
|
||||
for src_id in source_table_ids {
|
||||
let table_name = self
|
||||
.table_meta
|
||||
.table_info_manager()
|
||||
.get(src_id)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?
|
||||
.with_context(|| UnexpectedSnafu {
|
||||
reason: format!("Table id = {:?}, couldn't found table name", src_id),
|
||||
})
|
||||
.map(|name| name.table_name())
|
||||
.map(|name| [name.catalog_name, name.schema_name, name.table_name])?;
|
||||
source_table_names.push(table_name);
|
||||
}
|
||||
|
||||
let (tx, rx) = oneshot::channel();
|
||||
|
||||
let plan = sql_to_df_plan(query_ctx.clone(), self.engine.clone(), &sql, true).await?;
|
||||
let (column_name, time_window_expr, _, df_schema) = find_time_window_expr(
|
||||
&plan,
|
||||
self.engine.engine_state().catalog_manager().clone(),
|
||||
query_ctx.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let phy_expr = time_window_expr
|
||||
.map(|expr| TimeWindowExpr::from_expr(&expr, &column_name, &df_schema))
|
||||
.transpose()?;
|
||||
|
||||
info!("Flow id={}, found time window expr={:?}", flow_id, phy_expr);
|
||||
|
||||
let task = RecordingRuleTask::new(
|
||||
flow_id,
|
||||
&sql,
|
||||
phy_expr,
|
||||
expire_after,
|
||||
sink_table_name,
|
||||
source_table_names,
|
||||
query_ctx,
|
||||
rx,
|
||||
);
|
||||
|
||||
let task_inner = task.clone();
|
||||
let engine = self.engine.clone();
|
||||
let frontend = self.frontend_client.clone();
|
||||
|
||||
// TODO(discord9): also save handle & use time wheel or what for better
|
||||
let _handle = common_runtime::spawn_global(async move {
|
||||
match task_inner.start_executing(engine, frontend).await {
|
||||
Ok(()) => info!("Flow {} shutdown", task_inner.flow_id),
|
||||
Err(err) => common_telemetry::error!(
|
||||
"Flow {} encounter unrecoverable error: {err:?}",
|
||||
task_inner.flow_id
|
||||
),
|
||||
}
|
||||
});
|
||||
|
||||
// TODO(discord9): deal with replace logic
|
||||
let replaced_old_task_opt = self.tasks.write().await.insert(flow_id, task);
|
||||
drop(replaced_old_task_opt);
|
||||
|
||||
self.shutdown_txs.write().await.insert(flow_id, tx);
|
||||
|
||||
Ok(Some(flow_id))
|
||||
}
|
||||
|
||||
pub async fn remove_flow(&self, flow_id: FlowId) -> Result<(), Error> {
|
||||
if self.tasks.write().await.remove(&flow_id).is_none() {
|
||||
warn!("Flow {flow_id} not found in tasks")
|
||||
}
|
||||
let Some(tx) = self.shutdown_txs.write().await.remove(&flow_id) else {
|
||||
UnexpectedSnafu {
|
||||
reason: format!("Can't found shutdown tx for flow {flow_id}"),
|
||||
}
|
||||
.fail()?
|
||||
};
|
||||
if tx.send(()).is_err() {
|
||||
warn!("Fail to shutdown flow {flow_id} due to receiver already dropped, maybe flow {flow_id} is already dropped?")
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RecordingRuleTask {
|
||||
pub flow_id: FlowId,
|
||||
query: String,
|
||||
pub time_window_expr: Option<TimeWindowExpr>,
|
||||
/// in seconds
|
||||
pub expire_after: Option<i64>,
|
||||
sink_table_name: [String; 3],
|
||||
source_table_names: HashSet<[String; 3]>,
|
||||
state: Arc<RwLock<RecordingRuleState>>,
|
||||
}
|
||||
|
||||
impl RecordingRuleTask {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new(
|
||||
flow_id: FlowId,
|
||||
query: &str,
|
||||
time_window_expr: Option<TimeWindowExpr>,
|
||||
expire_after: Option<i64>,
|
||||
sink_table_name: [String; 3],
|
||||
source_table_names: Vec<[String; 3]>,
|
||||
query_ctx: QueryContextRef,
|
||||
shutdown_rx: oneshot::Receiver<()>,
|
||||
) -> Self {
|
||||
Self {
|
||||
flow_id,
|
||||
query: query.to_string(),
|
||||
time_window_expr,
|
||||
expire_after,
|
||||
sink_table_name,
|
||||
source_table_names: source_table_names.into_iter().collect(),
|
||||
state: Arc::new(RwLock::new(RecordingRuleState::new(query_ctx, shutdown_rx))),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl RecordingRuleTask {
|
||||
/// This should be called in a new tokio task
|
||||
pub async fn start_executing(
|
||||
&self,
|
||||
engine: QueryEngineRef,
|
||||
frontend_client: Arc<FrontendClient>,
|
||||
) -> Result<(), Error> {
|
||||
// only first query don't need upper bound
|
||||
let mut is_first = true;
|
||||
|
||||
loop {
|
||||
// FIXME(discord9): test if need upper bound also works
|
||||
let new_query = self.gen_query_with_time_window(engine.clone()).await?;
|
||||
|
||||
let insert_into = if let Some(new_query) = new_query {
|
||||
format!(
|
||||
"INSERT INTO {}.{}.{} {}",
|
||||
self.sink_table_name[0],
|
||||
self.sink_table_name[1],
|
||||
self.sink_table_name[2],
|
||||
new_query
|
||||
)
|
||||
} else {
|
||||
tokio::time::sleep(MIN_REFRESH_DURATION).await;
|
||||
continue;
|
||||
};
|
||||
|
||||
if is_first {
|
||||
is_first = false;
|
||||
}
|
||||
|
||||
let instant = Instant::now();
|
||||
let flow_id = self.flow_id;
|
||||
let db_client = frontend_client.get_database_client().await?;
|
||||
let peer_addr = db_client.peer.addr;
|
||||
debug!(
|
||||
"Executing flow {flow_id}(expire_after={:?} secs) on {:?} with query {}",
|
||||
self.expire_after, peer_addr, &insert_into
|
||||
);
|
||||
|
||||
let timer = METRIC_FLOW_RULE_ENGINE_QUERY_TIME
|
||||
.with_label_values(&[flow_id.to_string().as_str()])
|
||||
.start_timer();
|
||||
|
||||
let res = db_client.database.sql(&insert_into).await;
|
||||
drop(timer);
|
||||
|
||||
let elapsed = instant.elapsed();
|
||||
if let Ok(res1) = &res {
|
||||
debug!(
|
||||
"Flow {flow_id} executed, result: {res1:?}, elapsed: {:?}",
|
||||
elapsed
|
||||
);
|
||||
} else if let Err(res) = &res {
|
||||
warn!(
|
||||
"Failed to execute Flow {flow_id} on frontend {}, result: {res:?}, elapsed: {:?} with query: {}",
|
||||
peer_addr, elapsed, &insert_into
|
||||
);
|
||||
}
|
||||
|
||||
// record slow query
|
||||
if elapsed >= SLOW_QUERY_THRESHOLD {
|
||||
warn!(
|
||||
"Flow {flow_id} on frontend {} executed for {:?} before complete, query: {}",
|
||||
peer_addr, elapsed, &insert_into
|
||||
);
|
||||
METRIC_FLOW_RULE_ENGINE_SLOW_QUERY
|
||||
.with_label_values(&[flow_id.to_string().as_str(), &insert_into, &peer_addr])
|
||||
.observe(elapsed.as_secs_f64());
|
||||
}
|
||||
|
||||
self.state
|
||||
.write()
|
||||
.await
|
||||
.after_query_exec(elapsed, res.is_ok());
|
||||
// drop the result to free client-related resources
|
||||
drop(res);
|
||||
|
||||
let sleep_until = {
|
||||
let mut state = self.state.write().await;
|
||||
match state.shutdown_rx.try_recv() {
|
||||
Ok(()) => break Ok(()),
|
||||
Err(TryRecvError::Closed) => {
|
||||
warn!("Unexpected shutdown flow {flow_id}, shutdown anyway");
|
||||
break Ok(());
|
||||
}
|
||||
Err(TryRecvError::Empty) => (),
|
||||
}
|
||||
state.get_next_start_query_time(None)
|
||||
};
|
||||
tokio::time::sleep_until(sleep_until).await;
|
||||
}
|
||||
}
|
||||
|
||||
/// will merge and use the first ten time window in query
|
||||
async fn gen_query_with_time_window(
|
||||
&self,
|
||||
engine: QueryEngineRef,
|
||||
) -> Result<Option<String>, Error> {
|
||||
let query_ctx = self.state.read().await.query_ctx.clone();
|
||||
let start = SystemTime::now();
|
||||
let since_the_epoch = start
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.expect("Time went backwards");
|
||||
let low_bound = self
|
||||
.expire_after
|
||||
.map(|e| since_the_epoch.as_secs() - e as u64)
|
||||
.unwrap_or(u64::MIN);
|
||||
|
||||
let low_bound = Timestamp::new_second(low_bound as i64);
|
||||
|
||||
// TODO(discord9): use time window expr to get the precise expire lower bound
|
||||
let expire_time_window_bound = self
|
||||
.time_window_expr
|
||||
.as_ref()
|
||||
.map(|expr| expr.eval(low_bound))
|
||||
.transpose()?;
|
||||
|
||||
let new_sql = {
|
||||
let expr = {
|
||||
match expire_time_window_bound {
|
||||
Some((Some(l), Some(u))) => {
|
||||
let window_size = u.sub(&l).with_context(|| UnexpectedSnafu {
|
||||
reason: format!("Can't get window size from {u:?} - {l:?}"),
|
||||
})?;
|
||||
let col_name = self
|
||||
.time_window_expr
|
||||
.as_ref()
|
||||
.map(|expr| expr.column_name.clone())
|
||||
.with_context(|| UnexpectedSnafu {
|
||||
reason: format!(
|
||||
"Flow id={:?}, Failed to get column name from time window expr",
|
||||
self.flow_id
|
||||
),
|
||||
})?;
|
||||
|
||||
self.state
|
||||
.write()
|
||||
.await
|
||||
.dirty_time_windows
|
||||
.gen_filter_exprs(&col_name, Some(l), window_size, self)?
|
||||
}
|
||||
_ => {
|
||||
debug!(
|
||||
"Flow id = {:?}, can't get window size: precise_lower_bound={expire_time_window_bound:?}, using the same query", self.flow_id
|
||||
);
|
||||
// since no time window lower/upper bound is found, just return the original query
|
||||
return Ok(Some(self.query.clone()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
debug!(
|
||||
"Flow id={:?}, Generated filter expr: {:?}",
|
||||
self.flow_id,
|
||||
expr.as_ref()
|
||||
.map(|expr| expr_to_sql(expr).with_context(|_| DatafusionSnafu {
|
||||
context: format!("Failed to generate filter expr from {expr:?}"),
|
||||
}))
|
||||
.transpose()?
|
||||
.map(|s| s.to_string())
|
||||
);
|
||||
|
||||
let Some(expr) = expr else {
|
||||
// no new data, hence no need to update
|
||||
debug!("Flow id={:?}, no new data, not update", self.flow_id);
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let mut add_filter = AddFilterRewriter::new(expr);
|
||||
// make a not optimized plan for clearer unparse
|
||||
let plan =
|
||||
sql_to_df_plan(query_ctx.clone(), engine.clone(), &self.query, false).await?;
|
||||
let plan = plan
|
||||
.clone()
|
||||
.rewrite(&mut add_filter)
|
||||
.with_context(|_| DatafusionSnafu {
|
||||
context: format!("Failed to rewrite plan {plan:?}"),
|
||||
})?
|
||||
.data;
|
||||
df_plan_to_sql(&plan)?
|
||||
};
|
||||
|
||||
Ok(Some(new_sql))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RecordingRuleState {
|
||||
query_ctx: QueryContextRef,
|
||||
/// last query complete time
|
||||
last_update_time: Instant,
|
||||
/// last time query duration
|
||||
last_query_duration: Duration,
|
||||
/// Dirty Time windows need to be updated
|
||||
/// mapping of `start -> end` and non-overlapping
|
||||
dirty_time_windows: DirtyTimeWindows,
|
||||
exec_state: ExecState,
|
||||
shutdown_rx: oneshot::Receiver<()>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct DirtyTimeWindows {
|
||||
windows: BTreeMap<Timestamp, Option<Timestamp>>,
|
||||
}
|
||||
|
||||
fn to_df_literal(value: Timestamp) -> Result<datafusion_common::ScalarValue, Error> {
|
||||
let value = Value::from(value);
|
||||
let value = value
|
||||
.try_to_scalar_value(&value.data_type())
|
||||
.with_context(|_| DatatypesSnafu {
|
||||
extra: format!("Failed to convert to scalar value: {}", value),
|
||||
})?;
|
||||
Ok(value)
|
||||
}
|
||||
|
||||
impl DirtyTimeWindows {
|
||||
/// Time window merge distance
|
||||
const MERGE_DIST: i32 = 3;
|
||||
|
||||
/// Maximum number of filters allowed in a single query
|
||||
const MAX_FILTER_NUM: usize = 20;
|
||||
|
||||
/// Add lower bounds to the dirty time windows. Upper bounds are ignored.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `lower_bounds` - An iterator of lower bounds to be added.
|
||||
pub fn add_lower_bounds(&mut self, lower_bounds: impl Iterator<Item = Timestamp>) {
|
||||
for lower_bound in lower_bounds {
|
||||
let entry = self.windows.entry(lower_bound);
|
||||
entry.or_insert(None);
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate all filter expressions consuming all time windows
|
||||
pub fn gen_filter_exprs(
|
||||
&mut self,
|
||||
col_name: &str,
|
||||
expire_lower_bound: Option<Timestamp>,
|
||||
window_size: chrono::Duration,
|
||||
task_ctx: &RecordingRuleTask,
|
||||
) -> Result<Option<datafusion_expr::Expr>, Error> {
|
||||
debug!(
|
||||
"expire_lower_bound: {:?}, window_size: {:?}",
|
||||
expire_lower_bound.map(|t| t.to_iso8601_string()),
|
||||
window_size
|
||||
);
|
||||
self.merge_dirty_time_windows(window_size, expire_lower_bound)?;
|
||||
|
||||
if self.windows.len() > Self::MAX_FILTER_NUM {
|
||||
let first_time_window = self.windows.first_key_value();
|
||||
let last_time_window = self.windows.last_key_value();
|
||||
warn!(
|
||||
"Flow id = {:?}, too many time windows: {}, only the first {} are taken for this query, the group by expression might be wrong. Time window expr={:?}, expire_after={:?}, first_time_window={:?}, last_time_window={:?}, the original query: {:?}",
|
||||
task_ctx.flow_id,
|
||||
self.windows.len(),
|
||||
Self::MAX_FILTER_NUM,
|
||||
task_ctx.time_window_expr,
|
||||
task_ctx.expire_after,
|
||||
first_time_window,
|
||||
last_time_window,
|
||||
task_ctx.query
|
||||
);
|
||||
}
|
||||
|
||||
// get the first `MAX_FILTER_NUM` time windows
|
||||
let nth = self
|
||||
.windows
|
||||
.iter()
|
||||
.nth(Self::MAX_FILTER_NUM)
|
||||
.map(|(key, _)| *key);
|
||||
let first_nth = {
|
||||
if let Some(nth) = nth {
|
||||
let mut after = self.windows.split_off(&nth);
|
||||
std::mem::swap(&mut self.windows, &mut after);
|
||||
|
||||
after
|
||||
} else {
|
||||
std::mem::take(&mut self.windows)
|
||||
}
|
||||
};
|
||||
|
||||
let mut expr_lst = vec![];
|
||||
for (start, end) in first_nth.into_iter() {
|
||||
debug!(
|
||||
"Time window start: {:?}, end: {:?}",
|
||||
start.to_iso8601_string(),
|
||||
end.map(|t| t.to_iso8601_string())
|
||||
);
|
||||
|
||||
use datafusion_expr::{col, lit};
|
||||
let lower = to_df_literal(start)?;
|
||||
let upper = end.map(to_df_literal).transpose()?;
|
||||
let expr = if let Some(upper) = upper {
|
||||
col(col_name)
|
||||
.gt_eq(lit(lower))
|
||||
.and(col(col_name).lt(lit(upper)))
|
||||
} else {
|
||||
col(col_name).gt_eq(lit(lower))
|
||||
};
|
||||
expr_lst.push(expr);
|
||||
}
|
||||
let expr = expr_lst.into_iter().reduce(|a, b| a.or(b));
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
/// Merge time windows that overlaps or get too close
|
||||
pub fn merge_dirty_time_windows(
|
||||
&mut self,
|
||||
window_size: chrono::Duration,
|
||||
expire_lower_bound: Option<Timestamp>,
|
||||
) -> Result<(), Error> {
|
||||
let mut new_windows = BTreeMap::new();
|
||||
|
||||
let mut prev_tw = None;
|
||||
for (lower_bound, upper_bound) in std::mem::take(&mut self.windows) {
|
||||
// filter out expired time window
|
||||
if let Some(expire_lower_bound) = expire_lower_bound {
|
||||
if lower_bound <= expire_lower_bound {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let Some(prev_tw) = &mut prev_tw else {
|
||||
prev_tw = Some((lower_bound, upper_bound));
|
||||
continue;
|
||||
};
|
||||
|
||||
let std_window_size = window_size.to_std().map_err(|e| {
|
||||
InternalSnafu {
|
||||
reason: e.to_string(),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
|
||||
// if cur.lower - prev.upper <= window_size * 2, merge
|
||||
let prev_upper = prev_tw
|
||||
.1
|
||||
.unwrap_or(prev_tw.0.add_duration(std_window_size).context(TimeSnafu)?);
|
||||
prev_tw.1 = Some(prev_upper);
|
||||
|
||||
let cur_upper = upper_bound.unwrap_or(
|
||||
lower_bound
|
||||
.add_duration(std_window_size)
|
||||
.context(TimeSnafu)?,
|
||||
);
|
||||
|
||||
if lower_bound
|
||||
.sub(&prev_upper)
|
||||
.map(|dist| dist <= window_size * Self::MERGE_DIST)
|
||||
.unwrap_or(false)
|
||||
{
|
||||
prev_tw.1 = Some(cur_upper);
|
||||
} else {
|
||||
new_windows.insert(prev_tw.0, prev_tw.1);
|
||||
*prev_tw = (lower_bound, Some(cur_upper));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(prev_tw) = prev_tw {
|
||||
new_windows.insert(prev_tw.0, prev_tw.1);
|
||||
}
|
||||
|
||||
self.windows = new_windows;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl RecordingRuleState {
|
||||
pub fn new(query_ctx: QueryContextRef, shutdown_rx: oneshot::Receiver<()>) -> Self {
|
||||
Self {
|
||||
query_ctx,
|
||||
last_update_time: Instant::now(),
|
||||
last_query_duration: Duration::from_secs(0),
|
||||
dirty_time_windows: Default::default(),
|
||||
exec_state: ExecState::Idle,
|
||||
shutdown_rx,
|
||||
}
|
||||
}
|
||||
|
||||
/// called after last query is done
|
||||
/// `is_succ` indicate whether the last query is successful
|
||||
pub fn after_query_exec(&mut self, elapsed: Duration, _is_succ: bool) {
|
||||
self.exec_state = ExecState::Idle;
|
||||
self.last_query_duration = elapsed;
|
||||
self.last_update_time = Instant::now();
|
||||
}
|
||||
|
||||
/// wait for at least `last_query_duration`, at most `max_timeout` to start next query
|
||||
pub fn get_next_start_query_time(&self, max_timeout: Option<Duration>) -> Instant {
|
||||
let next_duration = max_timeout
|
||||
.unwrap_or(self.last_query_duration)
|
||||
.min(self.last_query_duration);
|
||||
let next_duration = next_duration.max(MIN_REFRESH_DURATION);
|
||||
|
||||
self.last_update_time + next_duration
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
enum ExecState {
|
||||
Idle,
|
||||
Executing,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_merge_dirty_time_windows() {
|
||||
let mut dirty = DirtyTimeWindows::default();
|
||||
dirty.add_lower_bounds(
|
||||
vec![
|
||||
Timestamp::new_second(0),
|
||||
Timestamp::new_second((1 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
|
||||
]
|
||||
.into_iter(),
|
||||
);
|
||||
dirty
|
||||
.merge_dirty_time_windows(chrono::Duration::seconds(5 * 60), None)
|
||||
.unwrap();
|
||||
// just enough to merge
|
||||
assert_eq!(
|
||||
dirty.windows,
|
||||
BTreeMap::from([(
|
||||
Timestamp::new_second(0),
|
||||
Some(Timestamp::new_second(
|
||||
(2 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60
|
||||
))
|
||||
)])
|
||||
);
|
||||
|
||||
// separate time window
|
||||
let mut dirty = DirtyTimeWindows::default();
|
||||
dirty.add_lower_bounds(
|
||||
vec![
|
||||
Timestamp::new_second(0),
|
||||
Timestamp::new_second((2 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
|
||||
]
|
||||
.into_iter(),
|
||||
);
|
||||
dirty
|
||||
.merge_dirty_time_windows(chrono::Duration::seconds(5 * 60), None)
|
||||
.unwrap();
|
||||
// just enough to merge
|
||||
assert_eq!(
|
||||
BTreeMap::from([
|
||||
(
|
||||
Timestamp::new_second(0),
|
||||
Some(Timestamp::new_second(5 * 60))
|
||||
),
|
||||
(
|
||||
Timestamp::new_second((2 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
|
||||
Some(Timestamp::new_second(
|
||||
(3 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60
|
||||
))
|
||||
)
|
||||
]),
|
||||
dirty.windows
|
||||
);
|
||||
|
||||
// overlapping
|
||||
let mut dirty = DirtyTimeWindows::default();
|
||||
dirty.add_lower_bounds(
|
||||
vec![
|
||||
Timestamp::new_second(0),
|
||||
Timestamp::new_second((DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
|
||||
]
|
||||
.into_iter(),
|
||||
);
|
||||
dirty
|
||||
.merge_dirty_time_windows(chrono::Duration::seconds(5 * 60), None)
|
||||
.unwrap();
|
||||
// just enough to merge
|
||||
assert_eq!(
|
||||
BTreeMap::from([(
|
||||
Timestamp::new_second(0),
|
||||
Some(Timestamp::new_second(
|
||||
(1 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60
|
||||
))
|
||||
),]),
|
||||
dirty.windows
|
||||
);
|
||||
|
||||
// expired
|
||||
let mut dirty = DirtyTimeWindows::default();
|
||||
dirty.add_lower_bounds(
|
||||
vec![
|
||||
Timestamp::new_second(0),
|
||||
Timestamp::new_second((DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
|
||||
]
|
||||
.into_iter(),
|
||||
);
|
||||
dirty
|
||||
.merge_dirty_time_windows(
|
||||
chrono::Duration::seconds(5 * 60),
|
||||
Some(Timestamp::new_second(
|
||||
(DirtyTimeWindows::MERGE_DIST as i64) * 6 * 60,
|
||||
)),
|
||||
)
|
||||
.unwrap();
|
||||
// just enough to merge
|
||||
assert_eq!(BTreeMap::from([]), dirty.windows);
|
||||
}
|
||||
}
|
||||
@@ -1,163 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Frontend client to run flow as recording rule which is time-window-aware normal query triggered every tick set by user
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use client::{Client, Database, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
|
||||
use common_meta::cluster::{NodeInfo, NodeInfoKey, Role};
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::store::RangeRequest;
|
||||
use meta_client::client::MetaClient;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{ExternalSnafu, UnexpectedSnafu};
|
||||
use crate::recording_rules::engine::DEFAULT_RULE_ENGINE_QUERY_TIMEOUT;
|
||||
use crate::Error;
|
||||
|
||||
fn default_channel_mgr() -> ChannelManager {
|
||||
let cfg = ChannelConfig::new().timeout(DEFAULT_RULE_ENGINE_QUERY_TIMEOUT);
|
||||
ChannelManager::with_config(cfg)
|
||||
}
|
||||
|
||||
fn client_from_urls(addrs: Vec<String>) -> Client {
|
||||
Client::with_manager_and_urls(default_channel_mgr(), addrs)
|
||||
}
|
||||
|
||||
/// A simple frontend client able to execute sql using grpc protocol
|
||||
#[derive(Debug)]
|
||||
pub enum FrontendClient {
|
||||
Distributed {
|
||||
meta_client: Arc<MetaClient>,
|
||||
channel_mgr: ChannelManager,
|
||||
},
|
||||
Standalone {
|
||||
/// for the sake of simplicity still use grpc even in standalone mode
|
||||
/// notice the client here should all be lazy, so that can wait after frontend is booted then make conn
|
||||
/// TODO(discord9): not use grpc under standalone mode
|
||||
database_client: DatabaseWithPeer,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DatabaseWithPeer {
|
||||
pub database: Database,
|
||||
pub peer: Peer,
|
||||
}
|
||||
|
||||
impl DatabaseWithPeer {
|
||||
fn new(database: Database, peer: Peer) -> Self {
|
||||
Self { database, peer }
|
||||
}
|
||||
}
|
||||
|
||||
impl FrontendClient {
|
||||
pub fn from_meta_client(meta_client: Arc<MetaClient>) -> Self {
|
||||
Self::Distributed {
|
||||
meta_client,
|
||||
channel_mgr: default_channel_mgr(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_static_grpc_addr(addr: String) -> Self {
|
||||
let peer = Peer {
|
||||
id: 0,
|
||||
addr: addr.clone(),
|
||||
};
|
||||
|
||||
let mgr = default_channel_mgr();
|
||||
let client = Client::with_manager_and_urls(mgr.clone(), vec![addr]);
|
||||
let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
|
||||
Self::Standalone {
|
||||
database_client: DatabaseWithPeer::new(database, peer),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FrontendClient {
|
||||
async fn scan_for_frontend(&self) -> Result<Vec<(NodeInfoKey, NodeInfo)>, Error> {
|
||||
let Self::Distributed { meta_client, .. } = self else {
|
||||
return Ok(vec![]);
|
||||
};
|
||||
let cluster_client = meta_client
|
||||
.cluster_client()
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
|
||||
let prefix = NodeInfoKey::key_prefix_with_role(Role::Frontend);
|
||||
let req = RangeRequest::new().with_prefix(prefix);
|
||||
let resp = cluster_client
|
||||
.range(req)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
let mut res = Vec::with_capacity(resp.kvs.len());
|
||||
for kv in resp.kvs {
|
||||
let key = NodeInfoKey::try_from(kv.key)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
|
||||
let val = NodeInfo::try_from(kv.value)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
res.push((key, val));
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
/// Get the database with max `last_activity_ts`
|
||||
async fn get_last_active_frontend(&self) -> Result<DatabaseWithPeer, Error> {
|
||||
if let Self::Standalone { database_client } = self {
|
||||
return Ok(database_client.clone());
|
||||
}
|
||||
match &self {
|
||||
Self::Standalone { database_client } => Ok(database_client.clone()),
|
||||
Self::Distributed {
|
||||
meta_client: _,
|
||||
channel_mgr,
|
||||
} => {
|
||||
let frontends = self.scan_for_frontend().await?;
|
||||
let mut last_activity_ts = i64::MIN;
|
||||
let mut peer = None;
|
||||
for (_key, val) in frontends.iter() {
|
||||
if val.last_activity_ts > last_activity_ts {
|
||||
last_activity_ts = val.last_activity_ts;
|
||||
peer = Some(val.peer.clone());
|
||||
}
|
||||
}
|
||||
let Some(peer) = peer else {
|
||||
UnexpectedSnafu {
|
||||
reason: format!("No frontend available: {:?}", frontends),
|
||||
}
|
||||
.fail()?
|
||||
};
|
||||
let client =
|
||||
Client::with_manager_and_urls(channel_mgr.clone(), vec![peer.addr.clone()]);
|
||||
let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
|
||||
Ok(DatabaseWithPeer::new(database, peer))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a database client, and possibly update it before returning.
|
||||
pub async fn get_database_client(&self) -> Result<DatabaseWithPeer, Error> {
|
||||
match self {
|
||||
Self::Standalone { database_client } => Ok(database_client.clone()),
|
||||
Self::Distributed { meta_client: _, .. } => self.get_last_active_frontend().await,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -68,13 +68,10 @@ pub fn value_to_internal_ts(value: Value) -> Result<i64, EvalError> {
|
||||
let ty = arg.data_type();
|
||||
matches!(
|
||||
ty,
|
||||
ConcreteDataType::Date(..)
|
||||
| ConcreteDataType::DateTime(..)
|
||||
| ConcreteDataType::Timestamp(..)
|
||||
ConcreteDataType::Date(..) | ConcreteDataType::Timestamp(..)
|
||||
)
|
||||
};
|
||||
match value {
|
||||
Value::DateTime(ts) => Ok(ts.val()),
|
||||
Value::Int64(ts) => Ok(ts),
|
||||
arg if is_supported_time_type(&arg) => {
|
||||
let arg_ty = arg.data_type();
|
||||
@@ -214,7 +211,7 @@ impl From<Row> for ProtoRow {
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use common_time::{Date, DateTime};
|
||||
use common_time::{Date, Timestamp};
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -244,7 +241,7 @@ mod test {
|
||||
{
|
||||
let a = Value::from(1i32);
|
||||
let b = Value::from(1i64);
|
||||
let c = Value::DateTime(DateTime::new(1i64));
|
||||
let c = Value::Timestamp(Timestamp::new_millisecond(1i64));
|
||||
let d = Value::from(1.0);
|
||||
|
||||
assert!(value_to_internal_ts(a).is_err());
|
||||
|
||||
@@ -57,7 +57,6 @@ use crate::error::{
|
||||
};
|
||||
use crate::heartbeat::HeartbeatTask;
|
||||
use crate::metrics::{METRIC_FLOW_PROCESSING_TIME, METRIC_FLOW_ROWS};
|
||||
use crate::recording_rules::{FrontendClient, RecordingRuleEngine};
|
||||
use crate::transform::register_function_to_query_engine;
|
||||
use crate::utils::{SizeReportSender, StateReportHandler};
|
||||
use crate::{Error, FlowWorkerManager, FlownodeOptions};
|
||||
@@ -246,7 +245,6 @@ impl FlownodeInstance {
|
||||
self.server.shutdown().await.context(ShutdownServerSnafu)?;
|
||||
|
||||
if let Some(task) = &self.heartbeat_task {
|
||||
info!("Close heartbeat task for flownode");
|
||||
task.shutdown();
|
||||
}
|
||||
|
||||
@@ -273,8 +271,6 @@ pub struct FlownodeBuilder {
|
||||
heartbeat_task: Option<HeartbeatTask>,
|
||||
/// receive a oneshot sender to send state size report
|
||||
state_report_handler: Option<StateReportHandler>,
|
||||
/// Client to send sql to frontend
|
||||
frontend_client: Arc<FrontendClient>,
|
||||
}
|
||||
|
||||
impl FlownodeBuilder {
|
||||
@@ -285,7 +281,6 @@ impl FlownodeBuilder {
|
||||
table_meta: TableMetadataManagerRef,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
flow_metadata_manager: FlowMetadataManagerRef,
|
||||
frontend_client: Arc<FrontendClient>,
|
||||
) -> Self {
|
||||
Self {
|
||||
opts,
|
||||
@@ -295,7 +290,6 @@ impl FlownodeBuilder {
|
||||
flow_metadata_manager,
|
||||
heartbeat_task: None,
|
||||
state_report_handler: None,
|
||||
frontend_client,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -453,14 +447,7 @@ impl FlownodeBuilder {
|
||||
|
||||
let node_id = self.opts.node_id.map(|id| id as u32);
|
||||
|
||||
let rule_engine = RecordingRuleEngine::new(
|
||||
self.frontend_client.clone(),
|
||||
query_engine.clone(),
|
||||
self.flow_metadata_manager.clone(),
|
||||
table_meta.clone(),
|
||||
);
|
||||
|
||||
let mut man = FlowWorkerManager::new(node_id, query_engine, table_meta, rule_engine);
|
||||
let mut man = FlowWorkerManager::new(node_id, query_engine, table_meta);
|
||||
for worker_id in 0..num_workers {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
|
||||
|
||||
@@ -86,8 +86,7 @@ pub fn create_test_query_engine() -> Arc<dyn QueryEngine> {
|
||||
|
||||
let schema = vec![
|
||||
datatypes::schema::ColumnSchema::new("number", CDT::uint32_datatype(), false),
|
||||
datatypes::schema::ColumnSchema::new("ts", CDT::timestamp_millisecond_datatype(), false)
|
||||
.with_time_index(true),
|
||||
datatypes::schema::ColumnSchema::new("ts", CDT::timestamp_millisecond_datatype(), false),
|
||||
];
|
||||
let mut columns = vec![];
|
||||
let numbers = (1..=10).collect_vec();
|
||||
@@ -115,37 +114,6 @@ pub fn create_test_query_engine() -> Arc<dyn QueryEngine> {
|
||||
};
|
||||
catalog_list.register_table_sync(req_with_ts).unwrap();
|
||||
|
||||
let schema = vec![
|
||||
datatypes::schema::ColumnSchema::new("NUMBER", CDT::uint32_datatype(), false),
|
||||
datatypes::schema::ColumnSchema::new("ts", CDT::timestamp_millisecond_datatype(), false)
|
||||
.with_time_index(true),
|
||||
];
|
||||
let mut columns = vec![];
|
||||
let numbers = (1..=10).collect_vec();
|
||||
let column: VectorRef = Arc::new(<u32 as Scalar>::VectorType::from_vec(numbers));
|
||||
columns.push(column);
|
||||
|
||||
let ts = (1..=10).collect_vec();
|
||||
let mut builder = TimestampMillisecondVectorBuilder::with_capacity(10);
|
||||
ts.into_iter()
|
||||
.map(|v| builder.push(Some(TimestampMillisecond::new(v))))
|
||||
.count();
|
||||
let column: VectorRef = builder.to_vector_cloned();
|
||||
columns.push(column);
|
||||
|
||||
let schema = Arc::new(Schema::new(schema));
|
||||
let recordbatch = common_recordbatch::RecordBatch::new(schema, columns).unwrap();
|
||||
let table = MemTable::table("UPPERCASE_NUMBERS_WITH_TS", recordbatch);
|
||||
|
||||
let req_with_ts = RegisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "UPPERCASE_NUMBERS_WITH_TS".to_string(),
|
||||
table_id: 1025,
|
||||
table,
|
||||
};
|
||||
catalog_list.register_table_sync(req_with_ts).unwrap();
|
||||
|
||||
let factory = query::QueryEngineFactory::new(catalog_list, None, None, None, None, false);
|
||||
|
||||
let engine = factory.query_engine();
|
||||
|
||||
@@ -133,6 +133,7 @@ impl FrontendBuilder {
|
||||
.context(error::CacheRequiredSnafu {
|
||||
name: TABLE_FLOWNODE_SET_CACHE_NAME,
|
||||
})?;
|
||||
|
||||
let inserter = Arc::new(Inserter::new(
|
||||
self.catalog_manager.clone(),
|
||||
partition_manager.clone(),
|
||||
|
||||
@@ -111,7 +111,6 @@ impl MetaClientBuilder {
|
||||
.enable_store()
|
||||
.enable_heartbeat()
|
||||
.enable_procedure()
|
||||
.enable_access_cluster_info()
|
||||
}
|
||||
|
||||
pub fn enable_heartbeat(self) -> Self {
|
||||
|
||||
@@ -7,7 +7,6 @@ license.workspace = true
|
||||
[features]
|
||||
mock = []
|
||||
pg_kvbackend = ["dep:tokio-postgres", "common-meta/pg_kvbackend"]
|
||||
mysql_kvbackend = [] # placeholder features so CI can compile
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
@@ -62,6 +61,7 @@ serde_json.workspace = true
|
||||
servers.workspace = true
|
||||
snafu.workspace = true
|
||||
store-api.workspace = true
|
||||
strum.workspace = true
|
||||
table.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio-postgres = { workspace = true, optional = true, features = ["with-chrono-0_4"] }
|
||||
|
||||
@@ -749,6 +749,41 @@ pub enum Error {
|
||||
location: Location,
|
||||
source: common_meta::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Logical table cannot add follower: {table_id}"))]
|
||||
LogicalTableCannotAddFollower {
|
||||
table_id: TableId,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"A region follower cannot be placed on the same node as the leader: {region_id}, {peer_id}"
|
||||
))]
|
||||
RegionFollowerLeaderConflict {
|
||||
region_id: RegionId,
|
||||
peer_id: u64,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Multiple region followers cannot be placed on the same node: {region_id}, {peer_id}"
|
||||
))]
|
||||
MultipleRegionFollowersOnSameNode {
|
||||
region_id: RegionId,
|
||||
peer_id: u64,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Region follower not exists: {region_id}, {peer_id}"))]
|
||||
RegionFollowerNotExists {
|
||||
region_id: RegionId,
|
||||
peer_id: u64,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
impl Error {
|
||||
@@ -818,6 +853,10 @@ impl ErrorExt for Error {
|
||||
| Error::ProcedureNotFound { .. }
|
||||
| Error::TooManyPartitions { .. }
|
||||
| Error::TomlFormat { .. }
|
||||
| Error::LogicalTableCannotAddFollower { .. }
|
||||
| Error::RegionFollowerLeaderConflict { .. }
|
||||
| Error::MultipleRegionFollowersOnSameNode { .. }
|
||||
| Error::RegionFollowerNotExists { .. }
|
||||
| Error::HandlerNotFound { .. } => StatusCode::InvalidArguments,
|
||||
Error::LeaseKeyFromUtf8 { .. }
|
||||
| Error::LeaseValueFromUtf8 { .. }
|
||||
|
||||
@@ -42,12 +42,20 @@ impl HeartbeatHandler for FlowStateHandler {
|
||||
_ctx: &mut Context,
|
||||
_acc: &mut HeartbeatAccumulator,
|
||||
) -> Result<HandleControl> {
|
||||
if let Some(FlowStat { flow_stat_size }) = &req.flow_stat {
|
||||
if let Some(FlowStat {
|
||||
flow_stat_size,
|
||||
flow_last_exec_time_map,
|
||||
}) = &req.flow_stat
|
||||
{
|
||||
let state_size = flow_stat_size
|
||||
.iter()
|
||||
.map(|(k, v)| (*k, *v as usize))
|
||||
.collect();
|
||||
let value = FlowStateValue::new(state_size);
|
||||
let last_exec_time_map = flow_last_exec_time_map
|
||||
.iter()
|
||||
.map(|(k, v)| (*k, *v))
|
||||
.collect();
|
||||
let value: FlowStateValue = FlowStateValue::new(state_size, last_exec_time_map);
|
||||
self.flow_state_manager
|
||||
.put(value)
|
||||
.await
|
||||
|
||||
@@ -55,6 +55,8 @@ use crate::lease::MetaPeerLookupService;
|
||||
use crate::metasrv::{
|
||||
ElectionRef, Metasrv, MetasrvInfo, MetasrvOptions, SelectorContext, SelectorRef, TABLE_ID_SEQ,
|
||||
};
|
||||
use crate::procedure::region_follower::manager::RegionFollowerManager;
|
||||
use crate::procedure::region_follower::Context as ArfContext;
|
||||
use crate::procedure::region_migration::manager::RegionMigrationManager;
|
||||
use crate::procedure::region_migration::DefaultContextFactory;
|
||||
use crate::region::supervisor::{
|
||||
@@ -292,6 +294,7 @@ impl MetasrvBuilder {
|
||||
(Arc::new(NoopRegionFailureDetectorControl) as _, None as _)
|
||||
};
|
||||
|
||||
// region migration manager
|
||||
let region_migration_manager = Arc::new(RegionMigrationManager::new(
|
||||
procedure_manager.clone(),
|
||||
DefaultContextFactory::new(
|
||||
@@ -342,6 +345,19 @@ impl MetasrvBuilder {
|
||||
.context(error::InitDdlManagerSnafu)?,
|
||||
);
|
||||
|
||||
// alter region follower manager
|
||||
let region_follower_manager = Arc::new(RegionFollowerManager::new(
|
||||
procedure_manager.clone(),
|
||||
ArfContext {
|
||||
table_metadata_manager: table_metadata_manager.clone(),
|
||||
mailbox: mailbox.clone(),
|
||||
server_addr: options.server_addr.clone(),
|
||||
cache_invalidator: cache_invalidator.clone(),
|
||||
meta_peer_client: meta_peer_client.clone(),
|
||||
},
|
||||
));
|
||||
region_follower_manager.try_start()?;
|
||||
|
||||
let handler_group_builder = match handler_group_builder {
|
||||
Some(handler_group_builder) => handler_group_builder,
|
||||
None => {
|
||||
|
||||
@@ -60,5 +60,10 @@ lazy_static! {
|
||||
/// The migration fail counter.
|
||||
pub static ref METRIC_META_REGION_MIGRATION_FAIL: IntCounter =
|
||||
register_int_counter!("greptime_meta_region_migration_fail", "meta region migration fail").unwrap();
|
||||
|
||||
/// The add region follower execute histogram.
|
||||
pub static ref METRIC_META_ADD_REGION_FOLLOWER_EXECUTE: HistogramVec =
|
||||
register_histogram_vec!("greptime_meta_add_region_follower_execute", "meta add region follower execute", &["state"]).unwrap();
|
||||
/// The remove region follower execute histogram.
|
||||
pub static ref METRIC_META_REMOVE_REGION_FOLLOWER_EXECUTE: HistogramVec =
|
||||
register_histogram_vec!("greptime_meta_remove_region_follower_execute", "meta remove region follower execute", &["state"]).unwrap();
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user