mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-03 20:02:54 +00:00
Compare commits
24 Commits
feat/index
...
feat/scann
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
59ddfa84ec | ||
|
|
dd043eadc4 | ||
|
|
7e6af2c7ee | ||
|
|
87d3b17f4d | ||
|
|
5acac3d403 | ||
|
|
f9c66ba0de | ||
|
|
37847a8df6 | ||
|
|
6e06ac9e5c | ||
|
|
09effc8128 | ||
|
|
c14728e3ae | ||
|
|
cce4d56e00 | ||
|
|
69cf13b33a | ||
|
|
c83a282b39 | ||
|
|
5329efcdba | ||
|
|
50b5c90d53 | ||
|
|
fea2966dec | ||
|
|
e00452c4db | ||
|
|
7a31b2a8ea | ||
|
|
f363d73f72 | ||
|
|
7a6befcad3 | ||
|
|
d6c75ec55f | ||
|
|
5b8f1d819f | ||
|
|
b68286e8af | ||
|
|
4519607bc6 |
32
.github/workflows/release.yml
vendored
32
.github/workflows/release.yml
vendored
@@ -49,9 +49,14 @@ on:
|
||||
description: Do not run integration tests during the build
|
||||
type: boolean
|
||||
default: true
|
||||
build_linux_artifacts:
|
||||
build_linux_amd64_artifacts:
|
||||
type: boolean
|
||||
description: Build linux artifacts (both amd64 and arm64)
|
||||
description: Build linux-amd64 artifacts
|
||||
required: false
|
||||
default: false
|
||||
build_linux_arm64_artifacts:
|
||||
type: boolean
|
||||
description: Build linux-arm64 artifacts
|
||||
required: false
|
||||
default: false
|
||||
build_macos_artifacts:
|
||||
@@ -139,7 +144,7 @@ jobs:
|
||||
./.github/scripts/check-version.sh "${{ steps.create-version.outputs.version }}"
|
||||
|
||||
- name: Allocate linux-amd64 runner
|
||||
if: ${{ inputs.build_linux_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
|
||||
if: ${{ inputs.build_linux_amd64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
|
||||
uses: ./.github/actions/start-runner
|
||||
id: start-linux-amd64-runner
|
||||
with:
|
||||
@@ -153,7 +158,7 @@ jobs:
|
||||
subnet-id: ${{ vars.EC2_RUNNER_SUBNET_ID }}
|
||||
|
||||
- name: Allocate linux-arm64 runner
|
||||
if: ${{ inputs.build_linux_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
|
||||
if: ${{ inputs.build_linux_arm64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
|
||||
uses: ./.github/actions/start-runner
|
||||
id: start-linux-arm64-runner
|
||||
with:
|
||||
@@ -168,7 +173,7 @@ jobs:
|
||||
|
||||
build-linux-amd64-artifacts:
|
||||
name: Build linux-amd64 artifacts
|
||||
if: ${{ inputs.build_linux_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
|
||||
if: ${{ inputs.build_linux_amd64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
|
||||
needs: [
|
||||
allocate-runners,
|
||||
]
|
||||
@@ -190,7 +195,7 @@ jobs:
|
||||
|
||||
build-linux-arm64-artifacts:
|
||||
name: Build linux-arm64 artifacts
|
||||
if: ${{ inputs.build_linux_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
|
||||
if: ${{ inputs.build_linux_arm64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
|
||||
needs: [
|
||||
allocate-runners,
|
||||
]
|
||||
@@ -212,7 +217,7 @@ jobs:
|
||||
|
||||
run-multi-lang-tests:
|
||||
name: Run Multi-language SDK Tests
|
||||
if: ${{ inputs.build_linux_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
|
||||
if: ${{ inputs.build_linux_amd64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
|
||||
needs: [
|
||||
allocate-runners,
|
||||
build-linux-amd64-artifacts,
|
||||
@@ -381,18 +386,7 @@ jobs:
|
||||
|
||||
publish-github-release:
|
||||
name: Create GitHub release and upload artifacts
|
||||
# Use always() to run even when optional jobs (macos, windows) are skipped.
|
||||
# Then check that required jobs succeeded and optional jobs didn't fail.
|
||||
if: |
|
||||
always() &&
|
||||
(inputs.publish_github_release || github.event_name == 'push' || github.event_name == 'schedule') &&
|
||||
needs.allocate-runners.result == 'success' &&
|
||||
(needs.build-linux-amd64-artifacts.result == 'success' || needs.build-linux-amd64-artifacts.result == 'skipped') &&
|
||||
(needs.build-linux-arm64-artifacts.result == 'success' || needs.build-linux-arm64-artifacts.result == 'skipped') &&
|
||||
(needs.build-macos-artifacts.result == 'success' || needs.build-macos-artifacts.result == 'skipped') &&
|
||||
(needs.build-windows-artifacts.result == 'success' || needs.build-windows-artifacts.result == 'skipped') &&
|
||||
(needs.release-images-to-dockerhub.result == 'success' || needs.release-images-to-dockerhub.result == 'skipped') &&
|
||||
(needs.run-multi-lang-tests.result == 'success' || needs.run-multi-lang-tests.result == 'skipped')
|
||||
if: ${{ inputs.publish_github_release || github.event_name == 'push' || github.event_name == 'schedule' }}
|
||||
needs: [ # The job have to wait for all the artifacts are built.
|
||||
allocate-runners,
|
||||
build-linux-amd64-artifacts,
|
||||
|
||||
64
AUTHOR.md
64
AUTHOR.md
@@ -2,41 +2,41 @@
|
||||
|
||||
## Individual Committers (in alphabetical order)
|
||||
|
||||
- [apdong2022](https://github.com/apdong2022)
|
||||
- [beryl678](https://github.com/beryl678)
|
||||
- [CookiePieWw](https://github.com/CookiePieWw)
|
||||
- [etolbakov](https://github.com/etolbakov)
|
||||
- [irenjj](https://github.com/irenjj)
|
||||
- [KKould](https://github.com/KKould)
|
||||
- [Lanqing Yang](https://github.com/lyang24)
|
||||
- [nicecui](https://github.com/nicecui)
|
||||
- [NiwakaDev](https://github.com/NiwakaDev)
|
||||
- [paomian](https://github.com/paomian)
|
||||
- [tisonkun](https://github.com/tisonkun)
|
||||
- [Wenjie0329](https://github.com/Wenjie0329)
|
||||
- [zhaoyingnan01](https://github.com/zhaoyingnan01)
|
||||
- [zhongzc](https://github.com/zhongzc)
|
||||
- [ZonaHex](https://github.com/ZonaHex)
|
||||
- [zyy17](https://github.com/zyy17)
|
||||
* [CookiePieWw](https://github.com/CookiePieWw)
|
||||
* [etolbakov](https://github.com/etolbakov)
|
||||
* [irenjj](https://github.com/irenjj)
|
||||
* [KKould](https://github.com/KKould)
|
||||
* [Lanqing Yang](https://github.com/lyang24)
|
||||
* [NiwakaDev](https://github.com/NiwakaDev)
|
||||
* [tisonkun](https://github.com/tisonkun)
|
||||
|
||||
## Team Members (in alphabetical order)
|
||||
|
||||
- [daviderli614](https://github.com/daviderli614)
|
||||
- [discord9](https://github.com/discord9)
|
||||
- [evenyag](https://github.com/evenyag)
|
||||
- [fengjiachun](https://github.com/fengjiachun)
|
||||
- [fengys1996](https://github.com/fengys1996)
|
||||
- [GrepTime](https://github.com/GrepTime)
|
||||
- [holalengyu](https://github.com/holalengyu)
|
||||
- [killme2008](https://github.com/killme2008)
|
||||
- [MichaelScofield](https://github.com/MichaelScofield)
|
||||
- [shuiyisong](https://github.com/shuiyisong)
|
||||
- [sunchanglong](https://github.com/sunchanglong)
|
||||
- [sunng87](https://github.com/sunng87)
|
||||
- [v0y4g3r](https://github.com/v0y4g3r)
|
||||
- [waynexia](https://github.com/waynexia)
|
||||
- [WenyXu](https://github.com/WenyXu)
|
||||
- [xtang](https://github.com/xtang)
|
||||
* [apdong2022](https://github.com/apdong2022)
|
||||
* [beryl678](https://github.com/beryl678)
|
||||
* [daviderli614](https://github.com/daviderli614)
|
||||
* [discord9](https://github.com/discord9)
|
||||
* [evenyag](https://github.com/evenyag)
|
||||
* [fengjiachun](https://github.com/fengjiachun)
|
||||
* [fengys1996](https://github.com/fengys1996)
|
||||
* [GrepTime](https://github.com/GrepTime)
|
||||
* [holalengyu](https://github.com/holalengyu)
|
||||
* [killme2008](https://github.com/killme2008)
|
||||
* [MichaelScofield](https://github.com/MichaelScofield)
|
||||
* [nicecui](https://github.com/nicecui)
|
||||
* [paomian](https://github.com/paomian)
|
||||
* [shuiyisong](https://github.com/shuiyisong)
|
||||
* [sunchanglong](https://github.com/sunchanglong)
|
||||
* [sunng87](https://github.com/sunng87)
|
||||
* [v0y4g3r](https://github.com/v0y4g3r)
|
||||
* [waynexia](https://github.com/waynexia)
|
||||
* [Wenjie0329](https://github.com/Wenjie0329)
|
||||
* [WenyXu](https://github.com/WenyXu)
|
||||
* [xtang](https://github.com/xtang)
|
||||
* [zhaoyingnan01](https://github.com/zhaoyingnan01)
|
||||
* [zhongzc](https://github.com/zhongzc)
|
||||
* [ZonaHex](https://github.com/ZonaHex)
|
||||
* [zyy17](https://github.com/zyy17)
|
||||
|
||||
## All Contributors
|
||||
|
||||
|
||||
15
Cargo.lock
generated
15
Cargo.lock
generated
@@ -3741,9 +3741,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-pg-catalog"
|
||||
version = "0.12.2"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "755393864c0c2dd95575ceed4b25e348686028e1b83d06f8f39914209999f821"
|
||||
checksum = "15824c98ff2009c23b0398d441499b147f7c5ac0e5ee993e7a473d79040e3626"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"datafusion",
|
||||
@@ -7445,7 +7445,6 @@ dependencies = [
|
||||
"once_cell",
|
||||
"ordered-float 4.6.0",
|
||||
"parking_lot 0.12.4",
|
||||
"partition",
|
||||
"prometheus",
|
||||
"prost 0.13.5",
|
||||
"rand 0.9.1",
|
||||
@@ -7514,11 +7513,9 @@ dependencies = [
|
||||
"common-test-util",
|
||||
"common-time",
|
||||
"common-wal",
|
||||
"criterion 0.4.0",
|
||||
"datafusion",
|
||||
"datatypes",
|
||||
"futures-util",
|
||||
"fxhash",
|
||||
"humantime-serde",
|
||||
"itertools 0.14.0",
|
||||
"lazy_static",
|
||||
@@ -8364,7 +8361,6 @@ dependencies = [
|
||||
"common-macro",
|
||||
"common-telemetry",
|
||||
"common-test-util",
|
||||
"derive_builder 0.20.2",
|
||||
"futures",
|
||||
"humantime-serde",
|
||||
"lazy_static",
|
||||
@@ -9203,9 +9199,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pgwire"
|
||||
version = "0.36.3"
|
||||
version = "0.36.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "70a2bcdcc4b20a88e0648778ecf00415bbd5b447742275439c22176835056f99"
|
||||
checksum = "d331bb0eef5bc83a221c0a85b1f205bccf094d4f72a26ae1d68a1b1c535123b7"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"base64 0.22.1",
|
||||
@@ -9503,7 +9499,6 @@ name = "plugins"
|
||||
version = "1.0.0-beta.2"
|
||||
dependencies = [
|
||||
"auth",
|
||||
"catalog",
|
||||
"clap 4.5.40",
|
||||
"cli",
|
||||
"common-base",
|
||||
@@ -9512,7 +9507,6 @@ dependencies = [
|
||||
"datanode",
|
||||
"flow",
|
||||
"frontend",
|
||||
"meta-client",
|
||||
"meta-srv",
|
||||
"serde",
|
||||
"snafu 0.8.6",
|
||||
@@ -13069,7 +13063,6 @@ dependencies = [
|
||||
"loki-proto",
|
||||
"meta-client",
|
||||
"meta-srv",
|
||||
"mito2",
|
||||
"moka",
|
||||
"mysql_async",
|
||||
"object-store",
|
||||
|
||||
@@ -131,7 +131,7 @@ datafusion-functions = "50"
|
||||
datafusion-functions-aggregate-common = "50"
|
||||
datafusion-optimizer = "50"
|
||||
datafusion-orc = "0.5"
|
||||
datafusion-pg-catalog = "0.12.2"
|
||||
datafusion-pg-catalog = "0.12.1"
|
||||
datafusion-physical-expr = "50"
|
||||
datafusion-physical-plan = "50"
|
||||
datafusion-sql = "50"
|
||||
|
||||
@@ -294,6 +294,7 @@
|
||||
| `meta_client` | -- | -- | The metasrv client options. |
|
||||
| `meta_client.metasrv_addrs` | Array | -- | The addresses of the metasrv. |
|
||||
| `meta_client.timeout` | String | `3s` | Operation timeout. |
|
||||
| `meta_client.heartbeat_timeout` | String | `500ms` | Heartbeat timeout. |
|
||||
| `meta_client.ddl_timeout` | String | `10s` | DDL timeout. |
|
||||
| `meta_client.connect_timeout` | String | `1s` | Connect server timeout. |
|
||||
| `meta_client.tcp_nodelay` | Bool | `true` | `TCP_NODELAY` option for accepted connections. |
|
||||
@@ -456,6 +457,7 @@
|
||||
| `meta_client` | -- | -- | The metasrv client options. |
|
||||
| `meta_client.metasrv_addrs` | Array | -- | The addresses of the metasrv. |
|
||||
| `meta_client.timeout` | String | `3s` | Operation timeout. |
|
||||
| `meta_client.heartbeat_timeout` | String | `500ms` | Heartbeat timeout. |
|
||||
| `meta_client.ddl_timeout` | String | `10s` | DDL timeout. |
|
||||
| `meta_client.connect_timeout` | String | `1s` | Connect server timeout. |
|
||||
| `meta_client.tcp_nodelay` | Bool | `true` | `TCP_NODELAY` option for accepted connections. |
|
||||
@@ -627,6 +629,7 @@
|
||||
| `meta_client` | -- | -- | The metasrv client options. |
|
||||
| `meta_client.metasrv_addrs` | Array | -- | The addresses of the metasrv. |
|
||||
| `meta_client.timeout` | String | `3s` | Operation timeout. |
|
||||
| `meta_client.heartbeat_timeout` | String | `500ms` | Heartbeat timeout. |
|
||||
| `meta_client.ddl_timeout` | String | `10s` | DDL timeout. |
|
||||
| `meta_client.connect_timeout` | String | `1s` | Connect server timeout. |
|
||||
| `meta_client.tcp_nodelay` | Bool | `true` | `TCP_NODELAY` option for accepted connections. |
|
||||
|
||||
@@ -99,6 +99,9 @@ metasrv_addrs = ["127.0.0.1:3002"]
|
||||
## Operation timeout.
|
||||
timeout = "3s"
|
||||
|
||||
## Heartbeat timeout.
|
||||
heartbeat_timeout = "500ms"
|
||||
|
||||
## DDL timeout.
|
||||
ddl_timeout = "10s"
|
||||
|
||||
|
||||
@@ -78,6 +78,9 @@ metasrv_addrs = ["127.0.0.1:3002"]
|
||||
## Operation timeout.
|
||||
timeout = "3s"
|
||||
|
||||
## Heartbeat timeout.
|
||||
heartbeat_timeout = "500ms"
|
||||
|
||||
## DDL timeout.
|
||||
ddl_timeout = "10s"
|
||||
|
||||
|
||||
@@ -226,6 +226,9 @@ metasrv_addrs = ["127.0.0.1:3002"]
|
||||
## Operation timeout.
|
||||
timeout = "3s"
|
||||
|
||||
## Heartbeat timeout.
|
||||
heartbeat_timeout = "500ms"
|
||||
|
||||
## DDL timeout.
|
||||
ddl_timeout = "10s"
|
||||
|
||||
|
||||
@@ -23,9 +23,11 @@ use common_time::{Date, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth
|
||||
use datatypes::json::value::{JsonNumber, JsonValue, JsonValueRef, JsonVariant};
|
||||
use datatypes::prelude::{ConcreteDataType, ValueRef};
|
||||
use datatypes::types::{
|
||||
IntervalType, JsonFormat, JsonType, StructField, StructType, TimeType, TimestampType,
|
||||
IntervalType, JsonFormat, StructField, StructType, TimeType, TimestampType,
|
||||
};
|
||||
use datatypes::value::{
|
||||
ListValue, ListValueRef, OrderedF32, OrderedF64, StructValue, StructValueRef, Value,
|
||||
};
|
||||
use datatypes::value::{ListValueRef, OrderedF32, OrderedF64, StructValueRef, Value};
|
||||
use datatypes::vectors::VectorRef;
|
||||
use greptime_proto::v1::column_data_type_extension::TypeExt;
|
||||
use greptime_proto::v1::ddl_request::Expr;
|
||||
@@ -80,10 +82,6 @@ impl ColumnDataTypeWrapper {
|
||||
pub fn to_parts(&self) -> (ColumnDataType, Option<ColumnDataTypeExtension>) {
|
||||
(self.datatype, self.datatype_ext.clone())
|
||||
}
|
||||
|
||||
pub fn into_parts(self) -> (ColumnDataType, Option<ColumnDataTypeExtension>) {
|
||||
(self.datatype, self.datatype_ext)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ColumnDataTypeWrapper> for ConcreteDataType {
|
||||
@@ -129,7 +127,6 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
|
||||
};
|
||||
ConcreteDataType::json_native_datatype(inner_type.into())
|
||||
}
|
||||
None => ConcreteDataType::Json(JsonType::null()),
|
||||
_ => {
|
||||
// invalid state, type extension is missing or invalid
|
||||
ConcreteDataType::null_datatype()
|
||||
@@ -444,22 +441,18 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
|
||||
JsonFormat::Jsonb => Some(ColumnDataTypeExtension {
|
||||
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
|
||||
}),
|
||||
JsonFormat::Native(native_type) => {
|
||||
if native_type.is_null() {
|
||||
None
|
||||
} else {
|
||||
let native_type = ConcreteDataType::from(native_type.as_ref());
|
||||
let (datatype, datatype_extension) =
|
||||
ColumnDataTypeWrapper::try_from(native_type)?.into_parts();
|
||||
Some(ColumnDataTypeExtension {
|
||||
type_ext: Some(TypeExt::JsonNativeType(Box::new(
|
||||
JsonNativeTypeExtension {
|
||||
datatype: datatype as i32,
|
||||
datatype_extension: datatype_extension.map(Box::new),
|
||||
},
|
||||
))),
|
||||
})
|
||||
}
|
||||
JsonFormat::Native(inner) => {
|
||||
let inner_type = ColumnDataTypeWrapper::try_from(
|
||||
ConcreteDataType::from(inner.as_ref()),
|
||||
)?;
|
||||
Some(ColumnDataTypeExtension {
|
||||
type_ext: Some(TypeExt::JsonNativeType(Box::new(
|
||||
JsonNativeTypeExtension {
|
||||
datatype: inner_type.datatype.into(),
|
||||
datatype_extension: inner_type.datatype_ext.map(Box::new),
|
||||
},
|
||||
))),
|
||||
})
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -894,6 +887,111 @@ pub fn is_column_type_value_eq(
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Convert value into proto's value.
|
||||
pub fn to_proto_value(value: Value) -> v1::Value {
|
||||
match value {
|
||||
Value::Null => v1::Value { value_data: None },
|
||||
Value::Boolean(v) => v1::Value {
|
||||
value_data: Some(ValueData::BoolValue(v)),
|
||||
},
|
||||
Value::UInt8(v) => v1::Value {
|
||||
value_data: Some(ValueData::U8Value(v.into())),
|
||||
},
|
||||
Value::UInt16(v) => v1::Value {
|
||||
value_data: Some(ValueData::U16Value(v.into())),
|
||||
},
|
||||
Value::UInt32(v) => v1::Value {
|
||||
value_data: Some(ValueData::U32Value(v)),
|
||||
},
|
||||
Value::UInt64(v) => v1::Value {
|
||||
value_data: Some(ValueData::U64Value(v)),
|
||||
},
|
||||
Value::Int8(v) => v1::Value {
|
||||
value_data: Some(ValueData::I8Value(v.into())),
|
||||
},
|
||||
Value::Int16(v) => v1::Value {
|
||||
value_data: Some(ValueData::I16Value(v.into())),
|
||||
},
|
||||
Value::Int32(v) => v1::Value {
|
||||
value_data: Some(ValueData::I32Value(v)),
|
||||
},
|
||||
Value::Int64(v) => v1::Value {
|
||||
value_data: Some(ValueData::I64Value(v)),
|
||||
},
|
||||
Value::Float32(v) => v1::Value {
|
||||
value_data: Some(ValueData::F32Value(*v)),
|
||||
},
|
||||
Value::Float64(v) => v1::Value {
|
||||
value_data: Some(ValueData::F64Value(*v)),
|
||||
},
|
||||
Value::String(v) => v1::Value {
|
||||
value_data: Some(ValueData::StringValue(v.as_utf8().to_string())),
|
||||
},
|
||||
Value::Binary(v) => v1::Value {
|
||||
value_data: Some(ValueData::BinaryValue(v.to_vec())),
|
||||
},
|
||||
Value::Date(v) => v1::Value {
|
||||
value_data: Some(ValueData::DateValue(v.val())),
|
||||
},
|
||||
Value::Timestamp(v) => match v.unit() {
|
||||
TimeUnit::Second => v1::Value {
|
||||
value_data: Some(ValueData::TimestampSecondValue(v.value())),
|
||||
},
|
||||
TimeUnit::Millisecond => v1::Value {
|
||||
value_data: Some(ValueData::TimestampMillisecondValue(v.value())),
|
||||
},
|
||||
TimeUnit::Microsecond => v1::Value {
|
||||
value_data: Some(ValueData::TimestampMicrosecondValue(v.value())),
|
||||
},
|
||||
TimeUnit::Nanosecond => v1::Value {
|
||||
value_data: Some(ValueData::TimestampNanosecondValue(v.value())),
|
||||
},
|
||||
},
|
||||
Value::Time(v) => match v.unit() {
|
||||
TimeUnit::Second => v1::Value {
|
||||
value_data: Some(ValueData::TimeSecondValue(v.value())),
|
||||
},
|
||||
TimeUnit::Millisecond => v1::Value {
|
||||
value_data: Some(ValueData::TimeMillisecondValue(v.value())),
|
||||
},
|
||||
TimeUnit::Microsecond => v1::Value {
|
||||
value_data: Some(ValueData::TimeMicrosecondValue(v.value())),
|
||||
},
|
||||
TimeUnit::Nanosecond => v1::Value {
|
||||
value_data: Some(ValueData::TimeNanosecondValue(v.value())),
|
||||
},
|
||||
},
|
||||
Value::IntervalYearMonth(v) => v1::Value {
|
||||
value_data: Some(ValueData::IntervalYearMonthValue(v.to_i32())),
|
||||
},
|
||||
Value::IntervalDayTime(v) => v1::Value {
|
||||
value_data: Some(ValueData::IntervalDayTimeValue(v.to_i64())),
|
||||
},
|
||||
Value::IntervalMonthDayNano(v) => v1::Value {
|
||||
value_data: Some(ValueData::IntervalMonthDayNanoValue(
|
||||
convert_month_day_nano_to_pb(v),
|
||||
)),
|
||||
},
|
||||
Value::Decimal128(v) => v1::Value {
|
||||
value_data: Some(ValueData::Decimal128Value(convert_to_pb_decimal128(v))),
|
||||
},
|
||||
Value::List(list_value) => v1::Value {
|
||||
value_data: Some(ValueData::ListValue(v1::ListValue {
|
||||
items: convert_list_to_pb_values(list_value),
|
||||
})),
|
||||
},
|
||||
Value::Struct(struct_value) => v1::Value {
|
||||
value_data: Some(ValueData::StructValue(v1::StructValue {
|
||||
items: convert_struct_to_pb_values(struct_value),
|
||||
})),
|
||||
},
|
||||
Value::Json(v) => v1::Value {
|
||||
value_data: Some(ValueData::JsonValue(encode_json_value(*v))),
|
||||
},
|
||||
Value::Duration(_) => v1::Value { value_data: None },
|
||||
}
|
||||
}
|
||||
|
||||
fn encode_json_value(value: JsonValue) -> v1::JsonValue {
|
||||
fn helper(json: JsonVariant) -> v1::JsonValue {
|
||||
let value = match json {
|
||||
@@ -954,6 +1052,22 @@ fn decode_json_value(value: &v1::JsonValue) -> JsonValueRef<'_> {
|
||||
}
|
||||
}
|
||||
|
||||
fn convert_list_to_pb_values(list_value: ListValue) -> Vec<v1::Value> {
|
||||
list_value
|
||||
.take_items()
|
||||
.into_iter()
|
||||
.map(to_proto_value)
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn convert_struct_to_pb_values(struct_value: StructValue) -> Vec<v1::Value> {
|
||||
struct_value
|
||||
.take_items()
|
||||
.into_iter()
|
||||
.map(to_proto_value)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Returns the [ColumnDataTypeWrapper] of the value.
|
||||
///
|
||||
/// If value is null, returns `None`.
|
||||
@@ -1000,14 +1114,14 @@ pub fn vectors_to_rows<'a>(
|
||||
let mut rows = vec![Row { values: vec![] }; row_count];
|
||||
for column in columns {
|
||||
for (row_index, row) in rows.iter_mut().enumerate() {
|
||||
row.values.push(to_grpc_value(column.get(row_index)))
|
||||
row.values.push(value_to_grpc_value(column.get(row_index)))
|
||||
}
|
||||
}
|
||||
|
||||
rows
|
||||
}
|
||||
|
||||
pub fn to_grpc_value(value: Value) -> GrpcValue {
|
||||
pub fn value_to_grpc_value(value: Value) -> GrpcValue {
|
||||
GrpcValue {
|
||||
value_data: match value {
|
||||
Value::Null => None,
|
||||
@@ -1047,7 +1161,7 @@ pub fn to_grpc_value(value: Value) -> GrpcValue {
|
||||
let items = list_value
|
||||
.take_items()
|
||||
.into_iter()
|
||||
.map(to_grpc_value)
|
||||
.map(value_to_grpc_value)
|
||||
.collect();
|
||||
Some(ValueData::ListValue(v1::ListValue { items }))
|
||||
}
|
||||
@@ -1055,7 +1169,7 @@ pub fn to_grpc_value(value: Value) -> GrpcValue {
|
||||
let items = struct_value
|
||||
.take_items()
|
||||
.into_iter()
|
||||
.map(to_grpc_value)
|
||||
.map(value_to_grpc_value)
|
||||
.collect();
|
||||
Some(ValueData::StructValue(v1::StructValue { items }))
|
||||
}
|
||||
@@ -1155,7 +1269,6 @@ mod tests {
|
||||
use common_time::interval::IntervalUnit;
|
||||
use datatypes::scalars::ScalarVector;
|
||||
use datatypes::types::{Int8Type, Int32Type, UInt8Type, UInt32Type};
|
||||
use datatypes::value::{ListValue, StructValue};
|
||||
use datatypes::vectors::{
|
||||
BooleanVector, DateVector, Float32Vector, PrimitiveVector, StringVector,
|
||||
};
|
||||
@@ -1759,7 +1872,7 @@ mod tests {
|
||||
Arc::new(ConcreteDataType::boolean_datatype()),
|
||||
));
|
||||
|
||||
let pb_value = to_grpc_value(value);
|
||||
let pb_value = to_proto_value(value);
|
||||
|
||||
match pb_value.value_data.unwrap() {
|
||||
ValueData::ListValue(pb_list_value) => {
|
||||
@@ -1788,7 +1901,7 @@ mod tests {
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
let pb_value = to_grpc_value(value);
|
||||
let pb_value = to_proto_value(value);
|
||||
|
||||
match pb_value.value_data.unwrap() {
|
||||
ValueData::StructValue(pb_struct_value) => {
|
||||
|
||||
@@ -5,6 +5,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[features]
|
||||
enterprise = []
|
||||
testing = []
|
||||
|
||||
[lints]
|
||||
|
||||
@@ -12,14 +12,13 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub use client::{CachedKvBackend, CachedKvBackendBuilder, MetaKvBackend};
|
||||
|
||||
mod builder;
|
||||
mod client;
|
||||
mod manager;
|
||||
mod table_cache;
|
||||
|
||||
pub use builder::{
|
||||
CatalogManagerConfigurator, CatalogManagerConfiguratorRef, KvBackendCatalogManagerBuilder,
|
||||
};
|
||||
pub use client::{CachedKvBackend, CachedKvBackendBuilder, MetaKvBackend};
|
||||
pub use builder::KvBackendCatalogManagerBuilder;
|
||||
pub use manager::KvBackendCatalogManager;
|
||||
pub use table_cache::{TableCache, TableCacheRef, new_table_cache};
|
||||
|
||||
@@ -12,11 +12,9 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_catalog::consts::DEFAULT_CATALOG_NAME;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::cache::LayeredCacheRegistryRef;
|
||||
use common_meta::key::TableMetadataManager;
|
||||
use common_meta::key::flow::FlowMetadataManager;
|
||||
@@ -25,34 +23,24 @@ use common_procedure::ProcedureManagerRef;
|
||||
use moka::sync::Cache;
|
||||
use partition::manager::PartitionRuleManager;
|
||||
|
||||
use crate::information_schema::{
|
||||
InformationExtensionRef, InformationSchemaProvider, InformationSchemaTableFactoryRef,
|
||||
};
|
||||
#[cfg(feature = "enterprise")]
|
||||
use crate::information_schema::InformationSchemaTableFactoryRef;
|
||||
use crate::information_schema::{InformationExtensionRef, InformationSchemaProvider};
|
||||
use crate::kvbackend::KvBackendCatalogManager;
|
||||
use crate::kvbackend::manager::{CATALOG_CACHE_MAX_CAPACITY, SystemCatalog};
|
||||
use crate::process_manager::ProcessManagerRef;
|
||||
use crate::system_schema::numbers_table_provider::NumbersTableProvider;
|
||||
use crate::system_schema::pg_catalog::PGCatalogProvider;
|
||||
|
||||
/// The configurator that customizes or enhances the [`KvBackendCatalogManagerBuilder`].
|
||||
#[async_trait::async_trait]
|
||||
pub trait CatalogManagerConfigurator<C>: Send + Sync {
|
||||
async fn configure(
|
||||
&self,
|
||||
builder: KvBackendCatalogManagerBuilder,
|
||||
ctx: C,
|
||||
) -> std::result::Result<KvBackendCatalogManagerBuilder, BoxedError>;
|
||||
}
|
||||
|
||||
pub type CatalogManagerConfiguratorRef<C> = Arc<dyn CatalogManagerConfigurator<C>>;
|
||||
|
||||
pub struct KvBackendCatalogManagerBuilder {
|
||||
information_extension: InformationExtensionRef,
|
||||
backend: KvBackendRef,
|
||||
cache_registry: LayeredCacheRegistryRef,
|
||||
procedure_manager: Option<ProcedureManagerRef>,
|
||||
process_manager: Option<ProcessManagerRef>,
|
||||
extra_information_table_factories: HashMap<String, InformationSchemaTableFactoryRef>,
|
||||
#[cfg(feature = "enterprise")]
|
||||
extra_information_table_factories:
|
||||
std::collections::HashMap<String, InformationSchemaTableFactoryRef>,
|
||||
}
|
||||
|
||||
impl KvBackendCatalogManagerBuilder {
|
||||
@@ -67,7 +55,8 @@ impl KvBackendCatalogManagerBuilder {
|
||||
cache_registry,
|
||||
procedure_manager: None,
|
||||
process_manager: None,
|
||||
extra_information_table_factories: HashMap::new(),
|
||||
#[cfg(feature = "enterprise")]
|
||||
extra_information_table_factories: std::collections::HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -82,9 +71,10 @@ impl KvBackendCatalogManagerBuilder {
|
||||
}
|
||||
|
||||
/// Sets the extra information tables.
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub fn with_extra_information_table_factories(
|
||||
mut self,
|
||||
factories: HashMap<String, InformationSchemaTableFactoryRef>,
|
||||
factories: std::collections::HashMap<String, InformationSchemaTableFactoryRef>,
|
||||
) -> Self {
|
||||
self.extra_information_table_factories = factories;
|
||||
self
|
||||
@@ -97,6 +87,7 @@ impl KvBackendCatalogManagerBuilder {
|
||||
cache_registry,
|
||||
procedure_manager,
|
||||
process_manager,
|
||||
#[cfg(feature = "enterprise")]
|
||||
extra_information_table_factories,
|
||||
} = self;
|
||||
Arc::new_cyclic(|me| KvBackendCatalogManager {
|
||||
@@ -120,6 +111,7 @@ impl KvBackendCatalogManagerBuilder {
|
||||
process_manager.clone(),
|
||||
backend.clone(),
|
||||
);
|
||||
#[cfg(feature = "enterprise")]
|
||||
let provider = provider
|
||||
.with_extra_table_factories(extra_information_table_factories.clone());
|
||||
Arc::new(provider)
|
||||
@@ -131,6 +123,7 @@ impl KvBackendCatalogManagerBuilder {
|
||||
numbers_table_provider: NumbersTableProvider,
|
||||
backend,
|
||||
process_manager,
|
||||
#[cfg(feature = "enterprise")]
|
||||
extra_information_table_factories,
|
||||
},
|
||||
cache_registry,
|
||||
|
||||
@@ -53,9 +53,9 @@ use crate::error::{
|
||||
CacheNotFoundSnafu, GetTableCacheSnafu, InvalidTableInfoInCatalogSnafu, ListCatalogsSnafu,
|
||||
ListSchemasSnafu, ListTablesSnafu, Result, TableMetadataManagerSnafu,
|
||||
};
|
||||
use crate::information_schema::{
|
||||
InformationExtensionRef, InformationSchemaProvider, InformationSchemaTableFactoryRef,
|
||||
};
|
||||
#[cfg(feature = "enterprise")]
|
||||
use crate::information_schema::InformationSchemaTableFactoryRef;
|
||||
use crate::information_schema::{InformationExtensionRef, InformationSchemaProvider};
|
||||
use crate::kvbackend::TableCacheRef;
|
||||
use crate::process_manager::ProcessManagerRef;
|
||||
use crate::system_schema::SystemSchemaProvider;
|
||||
@@ -557,6 +557,7 @@ pub(super) struct SystemCatalog {
|
||||
pub(super) numbers_table_provider: NumbersTableProvider,
|
||||
pub(super) backend: KvBackendRef,
|
||||
pub(super) process_manager: Option<ProcessManagerRef>,
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub(super) extra_information_table_factories:
|
||||
std::collections::HashMap<String, InformationSchemaTableFactoryRef>,
|
||||
}
|
||||
@@ -627,6 +628,7 @@ impl SystemCatalog {
|
||||
self.process_manager.clone(),
|
||||
self.backend.clone(),
|
||||
);
|
||||
#[cfg(feature = "enterprise")]
|
||||
let provider = provider
|
||||
.with_extra_table_factories(self.extra_information_table_factories.clone());
|
||||
Arc::new(provider)
|
||||
|
||||
@@ -117,6 +117,7 @@ macro_rules! setup_memory_table {
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub struct MakeInformationTableRequest {
|
||||
pub catalog_name: String,
|
||||
pub catalog_manager: Weak<dyn CatalogManager>,
|
||||
@@ -127,10 +128,12 @@ pub struct MakeInformationTableRequest {
|
||||
///
|
||||
/// This trait allows for extensibility of the information schema by providing
|
||||
/// a way to dynamically create custom information schema tables.
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub trait InformationSchemaTableFactory {
|
||||
fn make_information_table(&self, req: MakeInformationTableRequest) -> SystemTableRef;
|
||||
}
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub type InformationSchemaTableFactoryRef = Arc<dyn InformationSchemaTableFactory + Send + Sync>;
|
||||
|
||||
/// The `information_schema` tables info provider.
|
||||
@@ -140,7 +143,9 @@ pub struct InformationSchemaProvider {
|
||||
process_manager: Option<ProcessManagerRef>,
|
||||
flow_metadata_manager: Arc<FlowMetadataManager>,
|
||||
tables: HashMap<String, TableRef>,
|
||||
#[allow(dead_code)]
|
||||
kv_backend: KvBackendRef,
|
||||
#[cfg(feature = "enterprise")]
|
||||
extra_table_factories: HashMap<String, InformationSchemaTableFactoryRef>,
|
||||
}
|
||||
|
||||
@@ -161,6 +166,7 @@ impl SystemSchemaProviderInner for InformationSchemaProvider {
|
||||
}
|
||||
|
||||
fn system_table(&self, name: &str) -> Option<SystemTableRef> {
|
||||
#[cfg(feature = "enterprise")]
|
||||
if let Some(factory) = self.extra_table_factories.get(name) {
|
||||
let req = MakeInformationTableRequest {
|
||||
catalog_name: self.catalog_name.clone(),
|
||||
@@ -275,6 +281,7 @@ impl InformationSchemaProvider {
|
||||
process_manager,
|
||||
tables: HashMap::new(),
|
||||
kv_backend,
|
||||
#[cfg(feature = "enterprise")]
|
||||
extra_table_factories: HashMap::new(),
|
||||
};
|
||||
|
||||
@@ -283,6 +290,7 @@ impl InformationSchemaProvider {
|
||||
provider
|
||||
}
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub(crate) fn with_extra_table_factories(
|
||||
mut self,
|
||||
factories: HashMap<String, InformationSchemaTableFactoryRef>,
|
||||
@@ -350,6 +358,7 @@ impl InformationSchemaProvider {
|
||||
if let Some(process_list) = self.build_table(PROCESS_LIST) {
|
||||
tables.insert(PROCESS_LIST.to_string(), process_list);
|
||||
}
|
||||
#[cfg(feature = "enterprise")]
|
||||
for name in self.extra_table_factories.keys() {
|
||||
tables.insert(name.clone(), self.build_table(name).expect(name));
|
||||
}
|
||||
|
||||
@@ -211,7 +211,6 @@ struct InformationSchemaPartitionsBuilder {
|
||||
partition_names: StringVectorBuilder,
|
||||
partition_ordinal_positions: Int64VectorBuilder,
|
||||
partition_expressions: StringVectorBuilder,
|
||||
partition_descriptions: StringVectorBuilder,
|
||||
create_times: TimestampSecondVectorBuilder,
|
||||
partition_ids: UInt64VectorBuilder,
|
||||
}
|
||||
@@ -232,7 +231,6 @@ impl InformationSchemaPartitionsBuilder {
|
||||
partition_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
partition_ordinal_positions: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
partition_expressions: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
partition_descriptions: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
create_times: TimestampSecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
partition_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
}
|
||||
@@ -321,21 +319,6 @@ impl InformationSchemaPartitionsBuilder {
|
||||
return;
|
||||
}
|
||||
|
||||
// Get partition column names (shared by all partitions)
|
||||
// In MySQL, PARTITION_EXPRESSION is the partitioning function expression (e.g., column name)
|
||||
let partition_columns: String = table_info
|
||||
.meta
|
||||
.partition_column_names()
|
||||
.cloned()
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
|
||||
let partition_expr_str = if partition_columns.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(partition_columns)
|
||||
};
|
||||
|
||||
for (index, partition) in partitions.iter().enumerate() {
|
||||
let partition_name = format!("p{index}");
|
||||
|
||||
@@ -345,12 +328,8 @@ impl InformationSchemaPartitionsBuilder {
|
||||
self.partition_names.push(Some(&partition_name));
|
||||
self.partition_ordinal_positions
|
||||
.push(Some((index + 1) as i64));
|
||||
// PARTITION_EXPRESSION: partition column names (same for all partitions)
|
||||
self.partition_expressions
|
||||
.push(partition_expr_str.as_deref());
|
||||
// PARTITION_DESCRIPTION: partition boundary expression (different for each partition)
|
||||
let description = partition.partition_expr.as_ref().map(|e| e.to_string());
|
||||
self.partition_descriptions.push(description.as_deref());
|
||||
let expression = partition.partition_expr.as_ref().map(|e| e.to_string());
|
||||
self.partition_expressions.push(expression.as_deref());
|
||||
self.create_times.push(Some(TimestampSecond::from(
|
||||
table_info.meta.created_on.timestamp(),
|
||||
)));
|
||||
@@ -390,7 +369,7 @@ impl InformationSchemaPartitionsBuilder {
|
||||
null_string_vector.clone(),
|
||||
Arc::new(self.partition_expressions.finish()),
|
||||
null_string_vector.clone(),
|
||||
Arc::new(self.partition_descriptions.finish()),
|
||||
null_string_vector.clone(),
|
||||
// TODO(dennis): rows and index statistics info
|
||||
null_i64_vector.clone(),
|
||||
null_i64_vector.clone(),
|
||||
|
||||
@@ -16,7 +16,7 @@ default = [
|
||||
"meta-srv/pg_kvbackend",
|
||||
"meta-srv/mysql_kvbackend",
|
||||
]
|
||||
enterprise = ["common-meta/enterprise", "frontend/enterprise", "meta-srv/enterprise"]
|
||||
enterprise = ["common-meta/enterprise", "frontend/enterprise", "meta-srv/enterprise", "catalog/enterprise"]
|
||||
tokio-console = ["common-telemetry/tokio-console"]
|
||||
|
||||
[lints]
|
||||
|
||||
@@ -163,7 +163,7 @@ impl ObjbenchCommand {
|
||||
available_indexes: Default::default(),
|
||||
indexes: Default::default(),
|
||||
index_file_size: 0,
|
||||
index_version: 0,
|
||||
index_file_id: None,
|
||||
num_rows,
|
||||
num_row_groups,
|
||||
sequence: None,
|
||||
@@ -565,7 +565,6 @@ fn new_noop_file_purger() -> FilePurgerRef {
|
||||
struct Noop;
|
||||
impl FilePurger for Noop {
|
||||
fn remove_file(&self, _file_meta: FileMeta, _is_delete: bool) {}
|
||||
fn update_index(&self, _file_meta: FileMeta, _version: store_api::storage::IndexVersion) {}
|
||||
}
|
||||
Arc::new(Noop)
|
||||
}
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::Debug;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
@@ -40,14 +39,12 @@ use flow::{
|
||||
get_flow_auth_options,
|
||||
};
|
||||
use meta_client::{MetaClientOptions, MetaClientType};
|
||||
use plugins::flownode::context::GrpcConfigureContext;
|
||||
use servers::configurator::GrpcBuilderConfiguratorRef;
|
||||
use snafu::{OptionExt, ResultExt, ensure};
|
||||
use tracing_appender::non_blocking::WorkerGuard;
|
||||
|
||||
use crate::error::{
|
||||
BuildCacheRegistrySnafu, InitMetadataSnafu, LoadLayeredConfigSnafu, MetaClientInitSnafu,
|
||||
MissingConfigSnafu, OtherSnafu, Result, ShutdownFlownodeSnafu, StartFlownodeSnafu,
|
||||
MissingConfigSnafu, Result, ShutdownFlownodeSnafu, StartFlownodeSnafu,
|
||||
};
|
||||
use crate::options::{GlobalOptions, GreptimeOptions};
|
||||
use crate::{App, create_resource_limit_metrics, log_versions, maybe_activate_heap_profile};
|
||||
@@ -58,14 +55,33 @@ type FlownodeOptions = GreptimeOptions<flow::FlownodeOptions>;
|
||||
|
||||
pub struct Instance {
|
||||
flownode: FlownodeInstance,
|
||||
|
||||
// The components of flownode, which make it easier to expand based
|
||||
// on the components.
|
||||
#[cfg(feature = "enterprise")]
|
||||
components: Components,
|
||||
|
||||
// Keep the logging guard to prevent the worker from being dropped.
|
||||
_guard: Vec<WorkerGuard>,
|
||||
}
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub struct Components {
|
||||
pub catalog_manager: catalog::CatalogManagerRef,
|
||||
pub fe_client: Arc<FrontendClient>,
|
||||
pub kv_backend: common_meta::kv_backend::KvBackendRef,
|
||||
}
|
||||
|
||||
impl Instance {
|
||||
pub fn new(flownode: FlownodeInstance, guard: Vec<WorkerGuard>) -> Self {
|
||||
pub fn new(
|
||||
flownode: FlownodeInstance,
|
||||
#[cfg(feature = "enterprise")] components: Components,
|
||||
guard: Vec<WorkerGuard>,
|
||||
) -> Self {
|
||||
Self {
|
||||
flownode,
|
||||
#[cfg(feature = "enterprise")]
|
||||
components,
|
||||
_guard: guard,
|
||||
}
|
||||
}
|
||||
@@ -78,6 +94,11 @@ impl Instance {
|
||||
pub fn flownode_mut(&mut self) -> &mut FlownodeInstance {
|
||||
&mut self.flownode
|
||||
}
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub fn components(&self) -> &Components {
|
||||
&self.components
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -375,7 +396,7 @@ impl StartCommand {
|
||||
let frontend_client = Arc::new(frontend_client);
|
||||
let flownode_builder = FlownodeBuilder::new(
|
||||
opts.clone(),
|
||||
plugins.clone(),
|
||||
plugins,
|
||||
table_metadata_manager,
|
||||
catalog_manager.clone(),
|
||||
flow_metadata_manager,
|
||||
@@ -384,29 +405,8 @@ impl StartCommand {
|
||||
.with_heartbeat_task(heartbeat_task);
|
||||
|
||||
let mut flownode = flownode_builder.build().await.context(StartFlownodeSnafu)?;
|
||||
|
||||
let builder =
|
||||
FlownodeServiceBuilder::grpc_server_builder(&opts, flownode.flownode_server());
|
||||
let builder = if let Some(configurator) =
|
||||
plugins.get::<GrpcBuilderConfiguratorRef<GrpcConfigureContext>>()
|
||||
{
|
||||
let context = GrpcConfigureContext {
|
||||
kv_backend: cached_meta_backend.clone(),
|
||||
fe_client: frontend_client.clone(),
|
||||
flownode_id: member_id,
|
||||
catalog_manager: catalog_manager.clone(),
|
||||
};
|
||||
configurator
|
||||
.configure(builder, context)
|
||||
.await
|
||||
.context(OtherSnafu)?
|
||||
} else {
|
||||
builder
|
||||
};
|
||||
let grpc_server = builder.build();
|
||||
|
||||
let services = FlownodeServiceBuilder::new(&opts)
|
||||
.with_grpc_server(grpc_server)
|
||||
.with_default_grpc_server(flownode.flownode_server())
|
||||
.enable_http_service()
|
||||
.build()
|
||||
.context(StartFlownodeSnafu)?;
|
||||
@@ -430,6 +430,16 @@ impl StartCommand {
|
||||
.set_frontend_invoker(invoker)
|
||||
.await;
|
||||
|
||||
Ok(Instance::new(flownode, guard))
|
||||
#[cfg(feature = "enterprise")]
|
||||
let components = Components {
|
||||
catalog_manager: catalog_manager.clone(),
|
||||
fe_client: frontend_client,
|
||||
kv_backend: cached_meta_backend,
|
||||
};
|
||||
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
return Ok(Instance::new(flownode, guard));
|
||||
#[cfg(feature = "enterprise")]
|
||||
Ok(Instance::new(flownode, components, guard))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::Debug;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
@@ -20,10 +19,7 @@ use std::time::Duration;
|
||||
use async_trait::async_trait;
|
||||
use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
|
||||
use catalog::information_extension::DistributedInformationExtension;
|
||||
use catalog::kvbackend::{
|
||||
CachedKvBackendBuilder, CatalogManagerConfiguratorRef, KvBackendCatalogManagerBuilder,
|
||||
MetaKvBackend,
|
||||
};
|
||||
use catalog::kvbackend::{CachedKvBackendBuilder, KvBackendCatalogManagerBuilder, MetaKvBackend};
|
||||
use catalog::process_manager::ProcessManager;
|
||||
use clap::Parser;
|
||||
use client::client_manager::NodeClients;
|
||||
@@ -46,16 +42,13 @@ use frontend::heartbeat::HeartbeatTask;
|
||||
use frontend::instance::builder::FrontendBuilder;
|
||||
use frontend::server::Services;
|
||||
use meta_client::{MetaClientOptions, MetaClientType};
|
||||
use plugins::frontend::context::{
|
||||
CatalogManagerConfigureContext, DistributedCatalogManagerConfigureContext,
|
||||
};
|
||||
use servers::addrs;
|
||||
use servers::grpc::GrpcOptions;
|
||||
use servers::tls::{TlsMode, TlsOption};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use tracing_appender::non_blocking::WorkerGuard;
|
||||
|
||||
use crate::error::{self, OtherSnafu, Result};
|
||||
use crate::error::{self, Result};
|
||||
use crate::options::{GlobalOptions, GreptimeOptions};
|
||||
use crate::{App, create_resource_limit_metrics, log_versions, maybe_activate_heap_profile};
|
||||
|
||||
@@ -423,18 +416,9 @@ impl StartCommand {
|
||||
layered_cache_registry.clone(),
|
||||
)
|
||||
.with_process_manager(process_manager.clone());
|
||||
let builder = if let Some(configurator) =
|
||||
plugins.get::<CatalogManagerConfiguratorRef<CatalogManagerConfigureContext>>()
|
||||
{
|
||||
let ctx = DistributedCatalogManagerConfigureContext {
|
||||
meta_client: meta_client.clone(),
|
||||
};
|
||||
let ctx = CatalogManagerConfigureContext::Distributed(ctx);
|
||||
|
||||
configurator
|
||||
.configure(builder, ctx)
|
||||
.await
|
||||
.context(OtherSnafu)?
|
||||
#[cfg(feature = "enterprise")]
|
||||
let builder = if let Some(factories) = plugins.get() {
|
||||
builder.with_extra_information_table_factories(factories)
|
||||
} else {
|
||||
builder
|
||||
};
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{self, Debug};
|
||||
use std::fmt;
|
||||
use std::path::Path;
|
||||
use std::time::Duration;
|
||||
|
||||
@@ -23,7 +23,7 @@ use common_config::Configurable;
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::logging::{DEFAULT_LOGGING_DIR, TracingOptions};
|
||||
use common_version::{short_version, verbose_version};
|
||||
use meta_srv::bootstrap::{MetasrvInstance, metasrv_builder};
|
||||
use meta_srv::bootstrap::MetasrvInstance;
|
||||
use meta_srv::metasrv::BackendImpl;
|
||||
use snafu::ResultExt;
|
||||
use tracing_appender::non_blocking::WorkerGuard;
|
||||
@@ -177,7 +177,7 @@ pub struct StartCommand {
|
||||
backend: Option<BackendImpl>,
|
||||
}
|
||||
|
||||
impl Debug for StartCommand {
|
||||
impl fmt::Debug for StartCommand {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("StartCommand")
|
||||
.field("rpc_bind_addr", &self.rpc_bind_addr)
|
||||
@@ -341,7 +341,7 @@ impl StartCommand {
|
||||
.await
|
||||
.context(StartMetaServerSnafu)?;
|
||||
|
||||
let builder = metasrv_builder(&opts, plugins, None)
|
||||
let builder = meta_srv::bootstrap::metasrv_builder(&opts, plugins, None)
|
||||
.await
|
||||
.context(error::BuildMetaServerSnafu)?;
|
||||
let metasrv = builder.build().await.context(error::BuildMetaServerSnafu)?;
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::Debug;
|
||||
use std::net::SocketAddr;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
@@ -21,7 +20,7 @@ use std::{fs, path};
|
||||
use async_trait::async_trait;
|
||||
use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
|
||||
use catalog::information_schema::InformationExtensionRef;
|
||||
use catalog::kvbackend::{CatalogManagerConfiguratorRef, KvBackendCatalogManagerBuilder};
|
||||
use catalog::kvbackend::KvBackendCatalogManagerBuilder;
|
||||
use catalog::process_manager::ProcessManager;
|
||||
use clap::Parser;
|
||||
use common_base::Plugins;
|
||||
@@ -32,7 +31,7 @@ use common_meta::cache::LayeredCacheRegistryBuilder;
|
||||
use common_meta::ddl::flow_meta::FlowMetadataAllocator;
|
||||
use common_meta::ddl::table_meta::TableMetadataAllocator;
|
||||
use common_meta::ddl::{DdlContext, NoopRegionFailureDetectorControl};
|
||||
use common_meta::ddl_manager::{DdlManager, DdlManagerConfiguratorRef};
|
||||
use common_meta::ddl_manager::DdlManager;
|
||||
use common_meta::key::flow::FlowMetadataManager;
|
||||
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
@@ -58,17 +57,13 @@ use frontend::instance::StandaloneDatanodeManager;
|
||||
use frontend::instance::builder::FrontendBuilder;
|
||||
use frontend::server::Services;
|
||||
use meta_srv::metasrv::{FLOW_ID_SEQ, TABLE_ID_SEQ};
|
||||
use plugins::frontend::context::{
|
||||
CatalogManagerConfigureContext, StandaloneCatalogManagerConfigureContext,
|
||||
};
|
||||
use plugins::standalone::context::DdlManagerConfigureContext;
|
||||
use servers::tls::{TlsMode, TlsOption};
|
||||
use snafu::ResultExt;
|
||||
use standalone::StandaloneInformationExtension;
|
||||
use standalone::options::StandaloneOptions;
|
||||
use tracing_appender::non_blocking::WorkerGuard;
|
||||
|
||||
use crate::error::{OtherSnafu, Result, StartFlownodeSnafu};
|
||||
use crate::error::{Result, StartFlownodeSnafu};
|
||||
use crate::options::{GlobalOptions, GreptimeOptions};
|
||||
use crate::{App, create_resource_limit_metrics, error, log_versions, maybe_activate_heap_profile};
|
||||
|
||||
@@ -121,15 +116,34 @@ pub struct Instance {
|
||||
flownode: FlownodeInstance,
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
wal_options_allocator: WalOptionsAllocatorRef,
|
||||
|
||||
// The components of standalone, which make it easier to expand based
|
||||
// on the components.
|
||||
#[cfg(feature = "enterprise")]
|
||||
components: Components,
|
||||
|
||||
// Keep the logging guard to prevent the worker from being dropped.
|
||||
_guard: Vec<WorkerGuard>,
|
||||
}
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub struct Components {
|
||||
pub plugins: Plugins,
|
||||
pub kv_backend: KvBackendRef,
|
||||
pub frontend_client: Arc<FrontendClient>,
|
||||
pub catalog_manager: catalog::CatalogManagerRef,
|
||||
}
|
||||
|
||||
impl Instance {
|
||||
/// Find the socket addr of a server by its `name`.
|
||||
pub fn server_addr(&self, name: &str) -> Option<SocketAddr> {
|
||||
self.frontend.server_handlers().addr(name)
|
||||
}
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub fn components(&self) -> &Components {
|
||||
&self.components
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -401,13 +415,6 @@ impl StartCommand {
|
||||
plugins.insert::<InformationExtensionRef>(information_extension.clone());
|
||||
|
||||
let process_manager = Arc::new(ProcessManager::new(opts.grpc.server_addr.clone(), None));
|
||||
|
||||
// for standalone not use grpc, but get a handler to frontend grpc client without
|
||||
// actually make a connection
|
||||
let (frontend_client, frontend_instance_handler) =
|
||||
FrontendClient::from_empty_grpc_handler(opts.query.clone());
|
||||
let frontend_client = Arc::new(frontend_client);
|
||||
|
||||
let builder = KvBackendCatalogManagerBuilder::new(
|
||||
information_extension.clone(),
|
||||
kv_backend.clone(),
|
||||
@@ -415,17 +422,9 @@ impl StartCommand {
|
||||
)
|
||||
.with_procedure_manager(procedure_manager.clone())
|
||||
.with_process_manager(process_manager.clone());
|
||||
let builder = if let Some(configurator) =
|
||||
plugins.get::<CatalogManagerConfiguratorRef<CatalogManagerConfigureContext>>()
|
||||
{
|
||||
let ctx = StandaloneCatalogManagerConfigureContext {
|
||||
fe_client: frontend_client.clone(),
|
||||
};
|
||||
let ctx = CatalogManagerConfigureContext::Standalone(ctx);
|
||||
configurator
|
||||
.configure(builder, ctx)
|
||||
.await
|
||||
.context(OtherSnafu)?
|
||||
#[cfg(feature = "enterprise")]
|
||||
let builder = if let Some(factories) = plugins.get() {
|
||||
builder.with_extra_information_table_factories(factories)
|
||||
} else {
|
||||
builder
|
||||
};
|
||||
@@ -440,6 +439,11 @@ impl StartCommand {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// for standalone not use grpc, but get a handler to frontend grpc client without
|
||||
// actually make a connection
|
||||
let (frontend_client, frontend_instance_handler) =
|
||||
FrontendClient::from_empty_grpc_handler(opts.query.clone());
|
||||
let frontend_client = Arc::new(frontend_client);
|
||||
let flow_builder = FlownodeBuilder::new(
|
||||
flownode_options,
|
||||
plugins.clone(),
|
||||
@@ -510,21 +514,11 @@ impl StartCommand {
|
||||
|
||||
let ddl_manager = DdlManager::try_new(ddl_context, procedure_manager.clone(), true)
|
||||
.context(error::InitDdlManagerSnafu)?;
|
||||
|
||||
let ddl_manager = if let Some(configurator) =
|
||||
plugins.get::<DdlManagerConfiguratorRef<DdlManagerConfigureContext>>()
|
||||
{
|
||||
let ctx = DdlManagerConfigureContext {
|
||||
kv_backend: kv_backend.clone(),
|
||||
fe_client: frontend_client.clone(),
|
||||
catalog_manager: catalog_manager.clone(),
|
||||
};
|
||||
configurator
|
||||
.configure(ddl_manager, ctx)
|
||||
.await
|
||||
.context(OtherSnafu)?
|
||||
} else {
|
||||
ddl_manager
|
||||
#[cfg(feature = "enterprise")]
|
||||
let ddl_manager = {
|
||||
let trigger_ddl_manager: Option<common_meta::ddl_manager::TriggerDdlManagerRef> =
|
||||
plugins.get();
|
||||
ddl_manager.with_trigger_ddl_manager(trigger_ddl_manager)
|
||||
};
|
||||
|
||||
let procedure_executor = Arc::new(LocalProcedureExecutor::new(
|
||||
@@ -580,12 +574,22 @@ impl StartCommand {
|
||||
heartbeat_task: None,
|
||||
};
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
let components = Components {
|
||||
plugins,
|
||||
kv_backend,
|
||||
frontend_client,
|
||||
catalog_manager,
|
||||
};
|
||||
|
||||
Ok(Instance {
|
||||
datanode,
|
||||
frontend,
|
||||
flownode,
|
||||
procedure_manager,
|
||||
wal_options_allocator,
|
||||
#[cfg(feature = "enterprise")]
|
||||
components,
|
||||
_guard: guard,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -52,6 +52,7 @@ fn test_load_datanode_example_config() {
|
||||
meta_client: Some(MetaClientOptions {
|
||||
metasrv_addrs: vec!["127.0.0.1:3002".to_string()],
|
||||
timeout: Duration::from_secs(3),
|
||||
heartbeat_timeout: Duration::from_millis(500),
|
||||
ddl_timeout: Duration::from_secs(10),
|
||||
connect_timeout: Duration::from_secs(1),
|
||||
tcp_nodelay: true,
|
||||
@@ -117,6 +118,7 @@ fn test_load_frontend_example_config() {
|
||||
meta_client: Some(MetaClientOptions {
|
||||
metasrv_addrs: vec!["127.0.0.1:3002".to_string()],
|
||||
timeout: Duration::from_secs(3),
|
||||
heartbeat_timeout: Duration::from_millis(500),
|
||||
ddl_timeout: Duration::from_secs(10),
|
||||
connect_timeout: Duration::from_secs(1),
|
||||
tcp_nodelay: true,
|
||||
@@ -239,6 +241,7 @@ fn test_load_flownode_example_config() {
|
||||
meta_client: Some(MetaClientOptions {
|
||||
metasrv_addrs: vec!["127.0.0.1:3002".to_string()],
|
||||
timeout: Duration::from_secs(3),
|
||||
heartbeat_timeout: Duration::from_millis(500),
|
||||
ddl_timeout: Duration::from_secs(10),
|
||||
connect_timeout: Duration::from_secs(1),
|
||||
tcp_nodelay: true,
|
||||
|
||||
@@ -32,12 +32,7 @@ impl Plugins {
|
||||
|
||||
pub fn insert<T: 'static + Send + Sync>(&self, value: T) {
|
||||
let last = self.write().insert(value);
|
||||
if last.is_some() {
|
||||
panic!(
|
||||
"Plugin of type {} already exists",
|
||||
std::any::type_name::<T>()
|
||||
);
|
||||
}
|
||||
assert!(last.is_none(), "each type of plugins must be one and only");
|
||||
}
|
||||
|
||||
pub fn get<T: 'static + Send + Sync + Clone>(&self) -> Option<T> {
|
||||
@@ -145,7 +140,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "Plugin of type i32 already exists")]
|
||||
#[should_panic(expected = "each type of plugins must be one and only")]
|
||||
fn test_plugin_uniqueness() {
|
||||
let plugins = Plugins::new();
|
||||
plugins.insert(1i32);
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
|
||||
mod binary;
|
||||
mod ctx;
|
||||
mod if_func;
|
||||
mod is_null;
|
||||
mod unary;
|
||||
|
||||
@@ -23,7 +22,6 @@ pub use ctx::EvalContext;
|
||||
pub use unary::scalar_unary_op;
|
||||
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
use crate::scalars::expression::if_func::IfFunction;
|
||||
use crate::scalars::expression::is_null::IsNullFunction;
|
||||
|
||||
pub(crate) struct ExpressionFunction;
|
||||
@@ -31,6 +29,5 @@ pub(crate) struct ExpressionFunction;
|
||||
impl ExpressionFunction {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
registry.register_scalar(IsNullFunction::default());
|
||||
registry.register_scalar(IfFunction::default());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,404 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt;
|
||||
use std::fmt::Display;
|
||||
|
||||
use arrow::array::ArrowNativeTypeOp;
|
||||
use arrow::datatypes::ArrowPrimitiveType;
|
||||
use datafusion::arrow::array::{Array, ArrayRef, AsArray, BooleanArray, PrimitiveArray};
|
||||
use datafusion::arrow::compute::kernels::zip::zip;
|
||||
use datafusion::arrow::datatypes::DataType;
|
||||
use datafusion_common::DataFusionError;
|
||||
use datafusion_expr::type_coercion::binary::comparison_coercion;
|
||||
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
|
||||
|
||||
use crate::function::Function;
|
||||
|
||||
const NAME: &str = "if";
|
||||
|
||||
/// MySQL-compatible IF function: IF(condition, true_value, false_value)
|
||||
///
|
||||
/// Returns true_value if condition is TRUE (not NULL and not 0),
|
||||
/// otherwise returns false_value.
|
||||
///
|
||||
/// MySQL truthy rules:
|
||||
/// - NULL -> false
|
||||
/// - 0 (numeric zero) -> false
|
||||
/// - Any non-zero numeric -> true
|
||||
/// - Boolean true/false -> use directly
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct IfFunction {
|
||||
signature: Signature,
|
||||
}
|
||||
|
||||
impl Default for IfFunction {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
signature: Signature::any(3, Volatility::Immutable),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for IfFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", NAME.to_ascii_uppercase())
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for IfFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, input_types: &[DataType]) -> datafusion_common::Result<DataType> {
|
||||
// Return the common type of true_value and false_value (args[1] and args[2])
|
||||
if input_types.len() < 3 {
|
||||
return Err(DataFusionError::Plan(format!(
|
||||
"{} requires 3 arguments, got {}",
|
||||
NAME,
|
||||
input_types.len()
|
||||
)));
|
||||
}
|
||||
let true_type = &input_types[1];
|
||||
let false_type = &input_types[2];
|
||||
|
||||
// Use comparison_coercion to find common type
|
||||
comparison_coercion(true_type, false_type).ok_or_else(|| {
|
||||
DataFusionError::Plan(format!(
|
||||
"Cannot find common type for IF function between {:?} and {:?}",
|
||||
true_type, false_type
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
fn signature(&self) -> &Signature {
|
||||
&self.signature
|
||||
}
|
||||
|
||||
fn invoke_with_args(
|
||||
&self,
|
||||
args: ScalarFunctionArgs,
|
||||
) -> datafusion_common::Result<ColumnarValue> {
|
||||
if args.args.len() != 3 {
|
||||
return Err(DataFusionError::Plan(format!(
|
||||
"{} requires exactly 3 arguments, got {}",
|
||||
NAME,
|
||||
args.args.len()
|
||||
)));
|
||||
}
|
||||
|
||||
let condition = &args.args[0];
|
||||
let true_value = &args.args[1];
|
||||
let false_value = &args.args[2];
|
||||
|
||||
// Convert condition to boolean array using MySQL truthy rules
|
||||
let bool_array = to_boolean_array(condition, args.number_rows)?;
|
||||
|
||||
// Convert true and false values to arrays
|
||||
let true_array = true_value.to_array(args.number_rows)?;
|
||||
let false_array = false_value.to_array(args.number_rows)?;
|
||||
|
||||
// Use zip to select values based on condition
|
||||
// zip expects &dyn Datum, and ArrayRef (Arc<dyn Array>) implements Datum
|
||||
let result = zip(&bool_array, &true_array, &false_array)?;
|
||||
Ok(ColumnarValue::Array(result))
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a ColumnarValue to a BooleanArray using MySQL truthy rules:
|
||||
/// - NULL -> false
|
||||
/// - 0 (any numeric zero) -> false
|
||||
/// - Non-zero numeric -> true
|
||||
/// - Boolean -> use directly
|
||||
fn to_boolean_array(
|
||||
value: &ColumnarValue,
|
||||
num_rows: usize,
|
||||
) -> datafusion_common::Result<BooleanArray> {
|
||||
let array = value.to_array(num_rows)?;
|
||||
array_to_bool(array)
|
||||
}
|
||||
|
||||
/// Convert an integer PrimitiveArray to BooleanArray using MySQL truthy rules:
|
||||
/// NULL -> false, 0 -> false, non-zero -> true
|
||||
fn int_array_to_bool<T>(array: &PrimitiveArray<T>) -> BooleanArray
|
||||
where
|
||||
T: ArrowPrimitiveType,
|
||||
T::Native: ArrowNativeTypeOp,
|
||||
{
|
||||
BooleanArray::from_iter(
|
||||
array
|
||||
.iter()
|
||||
.map(|opt| Some(opt.is_some_and(|v| !v.is_zero()))),
|
||||
)
|
||||
}
|
||||
|
||||
/// Convert a float PrimitiveArray to BooleanArray using MySQL truthy rules:
|
||||
/// NULL -> false, 0 (including -0.0) -> false, NaN -> true, other non-zero -> true
|
||||
fn float_array_to_bool<T>(array: &PrimitiveArray<T>) -> BooleanArray
|
||||
where
|
||||
T: ArrowPrimitiveType,
|
||||
T::Native: ArrowNativeTypeOp + num_traits::Float,
|
||||
{
|
||||
use num_traits::Float;
|
||||
BooleanArray::from_iter(
|
||||
array
|
||||
.iter()
|
||||
.map(|opt| Some(opt.is_some_and(|v| v.is_nan() || !v.is_zero()))),
|
||||
)
|
||||
}
|
||||
|
||||
/// Convert an Array to BooleanArray using MySQL truthy rules
|
||||
fn array_to_bool(array: ArrayRef) -> datafusion_common::Result<BooleanArray> {
|
||||
use arrow::datatypes::*;
|
||||
|
||||
match array.data_type() {
|
||||
DataType::Boolean => {
|
||||
let bool_array = array.as_boolean();
|
||||
Ok(BooleanArray::from_iter(
|
||||
bool_array.iter().map(|opt| Some(opt.unwrap_or(false))),
|
||||
))
|
||||
}
|
||||
DataType::Int8 => Ok(int_array_to_bool(array.as_primitive::<Int8Type>())),
|
||||
DataType::Int16 => Ok(int_array_to_bool(array.as_primitive::<Int16Type>())),
|
||||
DataType::Int32 => Ok(int_array_to_bool(array.as_primitive::<Int32Type>())),
|
||||
DataType::Int64 => Ok(int_array_to_bool(array.as_primitive::<Int64Type>())),
|
||||
DataType::UInt8 => Ok(int_array_to_bool(array.as_primitive::<UInt8Type>())),
|
||||
DataType::UInt16 => Ok(int_array_to_bool(array.as_primitive::<UInt16Type>())),
|
||||
DataType::UInt32 => Ok(int_array_to_bool(array.as_primitive::<UInt32Type>())),
|
||||
DataType::UInt64 => Ok(int_array_to_bool(array.as_primitive::<UInt64Type>())),
|
||||
// Float16 needs special handling since half::f16 doesn't implement num_traits::Float
|
||||
DataType::Float16 => {
|
||||
let typed_array = array.as_primitive::<Float16Type>();
|
||||
Ok(BooleanArray::from_iter(typed_array.iter().map(|opt| {
|
||||
Some(opt.is_some_and(|v| {
|
||||
let f = v.to_f32();
|
||||
f.is_nan() || !f.is_zero()
|
||||
}))
|
||||
})))
|
||||
}
|
||||
DataType::Float32 => Ok(float_array_to_bool(array.as_primitive::<Float32Type>())),
|
||||
DataType::Float64 => Ok(float_array_to_bool(array.as_primitive::<Float64Type>())),
|
||||
// Null type is always false.
|
||||
// Note: NullArray::is_null() returns false (physical null), so we must handle it explicitly.
|
||||
// See: https://github.com/apache/arrow-rs/issues/4840
|
||||
DataType::Null => Ok(BooleanArray::from(vec![false; array.len()])),
|
||||
// For other types, treat non-null as true
|
||||
_ => {
|
||||
let len = array.len();
|
||||
Ok(BooleanArray::from_iter(
|
||||
(0..len).map(|i| Some(!array.is_null(i))),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_schema::Field;
|
||||
use datafusion_common::ScalarValue;
|
||||
use datafusion_common::arrow::array::{AsArray, Int32Array, StringArray};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_if_function_basic() {
|
||||
let if_func = IfFunction::default();
|
||||
assert_eq!("if", if_func.name());
|
||||
|
||||
// Test IF(true, 'yes', 'no') -> 'yes'
|
||||
let result = if_func
|
||||
.invoke_with_args(ScalarFunctionArgs {
|
||||
args: vec![
|
||||
ColumnarValue::Scalar(ScalarValue::Boolean(Some(true))),
|
||||
ColumnarValue::Scalar(ScalarValue::Utf8(Some("yes".to_string()))),
|
||||
ColumnarValue::Scalar(ScalarValue::Utf8(Some("no".to_string()))),
|
||||
],
|
||||
arg_fields: vec![],
|
||||
number_rows: 1,
|
||||
return_field: Arc::new(Field::new("", DataType::Utf8, true)),
|
||||
config_options: Arc::new(Default::default()),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
if let ColumnarValue::Array(arr) = result {
|
||||
let str_arr = arr.as_string::<i32>();
|
||||
assert_eq!(str_arr.value(0), "yes");
|
||||
} else {
|
||||
panic!("Expected Array result");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_if_function_false() {
|
||||
let if_func = IfFunction::default();
|
||||
|
||||
// Test IF(false, 'yes', 'no') -> 'no'
|
||||
let result = if_func
|
||||
.invoke_with_args(ScalarFunctionArgs {
|
||||
args: vec![
|
||||
ColumnarValue::Scalar(ScalarValue::Boolean(Some(false))),
|
||||
ColumnarValue::Scalar(ScalarValue::Utf8(Some("yes".to_string()))),
|
||||
ColumnarValue::Scalar(ScalarValue::Utf8(Some("no".to_string()))),
|
||||
],
|
||||
arg_fields: vec![],
|
||||
number_rows: 1,
|
||||
return_field: Arc::new(Field::new("", DataType::Utf8, true)),
|
||||
config_options: Arc::new(Default::default()),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
if let ColumnarValue::Array(arr) = result {
|
||||
let str_arr = arr.as_string::<i32>();
|
||||
assert_eq!(str_arr.value(0), "no");
|
||||
} else {
|
||||
panic!("Expected Array result");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_if_function_null_is_false() {
|
||||
let if_func = IfFunction::default();
|
||||
|
||||
// Test IF(NULL, 'yes', 'no') -> 'no' (NULL is treated as false)
|
||||
// Using Boolean(None) - typed null
|
||||
let result = if_func
|
||||
.invoke_with_args(ScalarFunctionArgs {
|
||||
args: vec![
|
||||
ColumnarValue::Scalar(ScalarValue::Boolean(None)),
|
||||
ColumnarValue::Scalar(ScalarValue::Utf8(Some("yes".to_string()))),
|
||||
ColumnarValue::Scalar(ScalarValue::Utf8(Some("no".to_string()))),
|
||||
],
|
||||
arg_fields: vec![],
|
||||
number_rows: 1,
|
||||
return_field: Arc::new(Field::new("", DataType::Utf8, true)),
|
||||
config_options: Arc::new(Default::default()),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
if let ColumnarValue::Array(arr) = result {
|
||||
let str_arr = arr.as_string::<i32>();
|
||||
assert_eq!(str_arr.value(0), "no");
|
||||
} else {
|
||||
panic!("Expected Array result");
|
||||
}
|
||||
|
||||
// Test IF(NULL, 'yes', 'no') -> 'no' using ScalarValue::Null (untyped null from SQL NULL literal)
|
||||
let result = if_func
|
||||
.invoke_with_args(ScalarFunctionArgs {
|
||||
args: vec![
|
||||
ColumnarValue::Scalar(ScalarValue::Null),
|
||||
ColumnarValue::Scalar(ScalarValue::Utf8(Some("yes".to_string()))),
|
||||
ColumnarValue::Scalar(ScalarValue::Utf8(Some("no".to_string()))),
|
||||
],
|
||||
arg_fields: vec![],
|
||||
number_rows: 1,
|
||||
return_field: Arc::new(Field::new("", DataType::Utf8, true)),
|
||||
config_options: Arc::new(Default::default()),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
if let ColumnarValue::Array(arr) = result {
|
||||
let str_arr = arr.as_string::<i32>();
|
||||
assert_eq!(str_arr.value(0), "no");
|
||||
} else {
|
||||
panic!("Expected Array result");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_if_function_numeric_truthy() {
|
||||
let if_func = IfFunction::default();
|
||||
|
||||
// Test IF(1, 'yes', 'no') -> 'yes' (non-zero is true)
|
||||
let result = if_func
|
||||
.invoke_with_args(ScalarFunctionArgs {
|
||||
args: vec![
|
||||
ColumnarValue::Scalar(ScalarValue::Int32(Some(1))),
|
||||
ColumnarValue::Scalar(ScalarValue::Utf8(Some("yes".to_string()))),
|
||||
ColumnarValue::Scalar(ScalarValue::Utf8(Some("no".to_string()))),
|
||||
],
|
||||
arg_fields: vec![],
|
||||
number_rows: 1,
|
||||
return_field: Arc::new(Field::new("", DataType::Utf8, true)),
|
||||
config_options: Arc::new(Default::default()),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
if let ColumnarValue::Array(arr) = result {
|
||||
let str_arr = arr.as_string::<i32>();
|
||||
assert_eq!(str_arr.value(0), "yes");
|
||||
} else {
|
||||
panic!("Expected Array result");
|
||||
}
|
||||
|
||||
// Test IF(0, 'yes', 'no') -> 'no' (zero is false)
|
||||
let result = if_func
|
||||
.invoke_with_args(ScalarFunctionArgs {
|
||||
args: vec![
|
||||
ColumnarValue::Scalar(ScalarValue::Int32(Some(0))),
|
||||
ColumnarValue::Scalar(ScalarValue::Utf8(Some("yes".to_string()))),
|
||||
ColumnarValue::Scalar(ScalarValue::Utf8(Some("no".to_string()))),
|
||||
],
|
||||
arg_fields: vec![],
|
||||
number_rows: 1,
|
||||
return_field: Arc::new(Field::new("", DataType::Utf8, true)),
|
||||
config_options: Arc::new(Default::default()),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
if let ColumnarValue::Array(arr) = result {
|
||||
let str_arr = arr.as_string::<i32>();
|
||||
assert_eq!(str_arr.value(0), "no");
|
||||
} else {
|
||||
panic!("Expected Array result");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_if_function_with_arrays() {
|
||||
let if_func = IfFunction::default();
|
||||
|
||||
// Test with array condition
|
||||
let condition = Int32Array::from(vec![Some(1), Some(0), None, Some(5)]);
|
||||
let true_val = StringArray::from(vec!["yes", "yes", "yes", "yes"]);
|
||||
let false_val = StringArray::from(vec!["no", "no", "no", "no"]);
|
||||
|
||||
let result = if_func
|
||||
.invoke_with_args(ScalarFunctionArgs {
|
||||
args: vec![
|
||||
ColumnarValue::Array(Arc::new(condition)),
|
||||
ColumnarValue::Array(Arc::new(true_val)),
|
||||
ColumnarValue::Array(Arc::new(false_val)),
|
||||
],
|
||||
arg_fields: vec![],
|
||||
number_rows: 4,
|
||||
return_field: Arc::new(Field::new("", DataType::Utf8, true)),
|
||||
config_options: Arc::new(Default::default()),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
if let ColumnarValue::Array(arr) = result {
|
||||
let str_arr = arr.as_string::<i32>();
|
||||
assert_eq!(str_arr.value(0), "yes"); // 1 is true
|
||||
assert_eq!(str_arr.value(1), "no"); // 0 is false
|
||||
assert_eq!(str_arr.value(2), "no"); // NULL is false
|
||||
assert_eq!(str_arr.value(3), "yes"); // 5 is true
|
||||
} else {
|
||||
panic!("Expected Array result");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -17,7 +17,7 @@ use std::sync::Arc;
|
||||
use common_catalog::consts::{
|
||||
DEFAULT_PRIVATE_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, PG_CATALOG_NAME,
|
||||
};
|
||||
use datafusion::arrow::array::{ArrayRef, StringArray, StringBuilder, as_boolean_array};
|
||||
use datafusion::arrow::array::{ArrayRef, StringArray, as_boolean_array};
|
||||
use datafusion::catalog::TableFunction;
|
||||
use datafusion::common::ScalarValue;
|
||||
use datafusion::common::utils::SingleRowListArrayBuilder;
|
||||
@@ -34,15 +34,10 @@ const CURRENT_SCHEMA_FUNCTION_NAME: &str = "current_schema";
|
||||
const CURRENT_SCHEMAS_FUNCTION_NAME: &str = "current_schemas";
|
||||
const SESSION_USER_FUNCTION_NAME: &str = "session_user";
|
||||
const CURRENT_DATABASE_FUNCTION_NAME: &str = "current_database";
|
||||
const OBJ_DESCRIPTION_FUNCTION_NAME: &str = "obj_description";
|
||||
const COL_DESCRIPTION_FUNCTION_NAME: &str = "col_description";
|
||||
const SHOBJ_DESCRIPTION_FUNCTION_NAME: &str = "shobj_description";
|
||||
const PG_MY_TEMP_SCHEMA_FUNCTION_NAME: &str = "pg_my_temp_schema";
|
||||
|
||||
define_nullary_udf!(CurrentSchemaFunction);
|
||||
define_nullary_udf!(SessionUserFunction);
|
||||
define_nullary_udf!(CurrentDatabaseFunction);
|
||||
define_nullary_udf!(PgMyTempSchemaFunction);
|
||||
|
||||
impl Function for CurrentDatabaseFunction {
|
||||
fn name(&self) -> &str {
|
||||
@@ -178,175 +173,6 @@ impl Function for CurrentSchemasFunction {
|
||||
}
|
||||
}
|
||||
|
||||
/// PostgreSQL obj_description - returns NULL for compatibility
|
||||
#[derive(Display, Debug, Clone)]
|
||||
#[display("{}", self.name())]
|
||||
pub(super) struct ObjDescriptionFunction {
|
||||
signature: Signature,
|
||||
}
|
||||
|
||||
impl ObjDescriptionFunction {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
signature: Signature::one_of(
|
||||
vec![
|
||||
TypeSignature::Exact(vec![DataType::Int64, DataType::Utf8]),
|
||||
TypeSignature::Exact(vec![DataType::UInt32, DataType::Utf8]),
|
||||
TypeSignature::Exact(vec![DataType::Int64]),
|
||||
TypeSignature::Exact(vec![DataType::UInt32]),
|
||||
],
|
||||
Volatility::Stable,
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for ObjDescriptionFunction {
|
||||
fn name(&self) -> &str {
|
||||
OBJ_DESCRIPTION_FUNCTION_NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
|
||||
Ok(DataType::Utf8)
|
||||
}
|
||||
|
||||
fn signature(&self) -> &Signature {
|
||||
&self.signature
|
||||
}
|
||||
|
||||
fn invoke_with_args(
|
||||
&self,
|
||||
args: ScalarFunctionArgs,
|
||||
) -> datafusion_common::Result<ColumnarValue> {
|
||||
let num_rows = args.number_rows;
|
||||
let mut builder = StringBuilder::with_capacity(num_rows, 0);
|
||||
for _ in 0..num_rows {
|
||||
builder.append_null();
|
||||
}
|
||||
Ok(ColumnarValue::Array(Arc::new(builder.finish())))
|
||||
}
|
||||
}
|
||||
|
||||
/// PostgreSQL col_description - returns NULL for compatibility
|
||||
#[derive(Display, Debug, Clone)]
|
||||
#[display("{}", self.name())]
|
||||
pub(super) struct ColDescriptionFunction {
|
||||
signature: Signature,
|
||||
}
|
||||
|
||||
impl ColDescriptionFunction {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
signature: Signature::one_of(
|
||||
vec![
|
||||
TypeSignature::Exact(vec![DataType::Int64, DataType::Int32]),
|
||||
TypeSignature::Exact(vec![DataType::UInt32, DataType::Int32]),
|
||||
TypeSignature::Exact(vec![DataType::Int64, DataType::Int64]),
|
||||
TypeSignature::Exact(vec![DataType::UInt32, DataType::Int64]),
|
||||
],
|
||||
Volatility::Stable,
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for ColDescriptionFunction {
|
||||
fn name(&self) -> &str {
|
||||
COL_DESCRIPTION_FUNCTION_NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
|
||||
Ok(DataType::Utf8)
|
||||
}
|
||||
|
||||
fn signature(&self) -> &Signature {
|
||||
&self.signature
|
||||
}
|
||||
|
||||
fn invoke_with_args(
|
||||
&self,
|
||||
args: ScalarFunctionArgs,
|
||||
) -> datafusion_common::Result<ColumnarValue> {
|
||||
let num_rows = args.number_rows;
|
||||
let mut builder = StringBuilder::with_capacity(num_rows, 0);
|
||||
for _ in 0..num_rows {
|
||||
builder.append_null();
|
||||
}
|
||||
Ok(ColumnarValue::Array(Arc::new(builder.finish())))
|
||||
}
|
||||
}
|
||||
|
||||
/// PostgreSQL shobj_description - returns NULL for compatibility
|
||||
#[derive(Display, Debug, Clone)]
|
||||
#[display("{}", self.name())]
|
||||
pub(super) struct ShobjDescriptionFunction {
|
||||
signature: Signature,
|
||||
}
|
||||
|
||||
impl ShobjDescriptionFunction {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
signature: Signature::one_of(
|
||||
vec![
|
||||
TypeSignature::Exact(vec![DataType::Int64, DataType::Utf8]),
|
||||
TypeSignature::Exact(vec![DataType::UInt64, DataType::Utf8]),
|
||||
TypeSignature::Exact(vec![DataType::Int32, DataType::Utf8]),
|
||||
TypeSignature::Exact(vec![DataType::UInt32, DataType::Utf8]),
|
||||
],
|
||||
Volatility::Stable,
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for ShobjDescriptionFunction {
|
||||
fn name(&self) -> &str {
|
||||
SHOBJ_DESCRIPTION_FUNCTION_NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
|
||||
Ok(DataType::Utf8)
|
||||
}
|
||||
|
||||
fn signature(&self) -> &Signature {
|
||||
&self.signature
|
||||
}
|
||||
|
||||
fn invoke_with_args(
|
||||
&self,
|
||||
args: ScalarFunctionArgs,
|
||||
) -> datafusion_common::Result<ColumnarValue> {
|
||||
let num_rows = args.number_rows;
|
||||
let mut builder = StringBuilder::with_capacity(num_rows, 0);
|
||||
for _ in 0..num_rows {
|
||||
builder.append_null();
|
||||
}
|
||||
Ok(ColumnarValue::Array(Arc::new(builder.finish())))
|
||||
}
|
||||
}
|
||||
|
||||
/// PostgreSQL pg_my_temp_schema - returns 0 (no temp schema) for compatibility
|
||||
impl Function for PgMyTempSchemaFunction {
|
||||
fn name(&self) -> &str {
|
||||
PG_MY_TEMP_SCHEMA_FUNCTION_NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _: &[DataType]) -> datafusion_common::Result<DataType> {
|
||||
Ok(DataType::UInt32)
|
||||
}
|
||||
|
||||
fn signature(&self) -> &Signature {
|
||||
&self.signature
|
||||
}
|
||||
|
||||
fn invoke_with_args(
|
||||
&self,
|
||||
_args: ScalarFunctionArgs,
|
||||
) -> datafusion_common::Result<ColumnarValue> {
|
||||
Ok(ColumnarValue::Scalar(ScalarValue::UInt32(Some(0))))
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) struct PGCatalogFunction;
|
||||
|
||||
impl PGCatalogFunction {
|
||||
@@ -386,98 +212,5 @@ impl PGCatalogFunction {
|
||||
registry.register(pg_catalog::create_pg_total_relation_size_udf());
|
||||
registry.register(pg_catalog::create_pg_stat_get_numscans());
|
||||
registry.register(pg_catalog::create_pg_get_constraintdef());
|
||||
registry.register(pg_catalog::create_pg_get_partition_ancestors_udf());
|
||||
registry.register_scalar(ObjDescriptionFunction::new());
|
||||
registry.register_scalar(ColDescriptionFunction::new());
|
||||
registry.register_scalar(ShobjDescriptionFunction::new());
|
||||
registry.register_scalar(PgMyTempSchemaFunction::default());
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_schema::Field;
|
||||
use datafusion::arrow::array::Array;
|
||||
use datafusion_common::ScalarValue;
|
||||
use datafusion_expr::ColumnarValue;
|
||||
|
||||
use super::*;
|
||||
|
||||
fn create_test_args(args: Vec<ColumnarValue>, number_rows: usize) -> ScalarFunctionArgs {
|
||||
ScalarFunctionArgs {
|
||||
args,
|
||||
arg_fields: vec![],
|
||||
number_rows,
|
||||
return_field: Arc::new(Field::new("result", DataType::Utf8, true)),
|
||||
config_options: Arc::new(Default::default()),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_obj_description_function() {
|
||||
let func = ObjDescriptionFunction::new();
|
||||
assert_eq!("obj_description", func.name());
|
||||
assert_eq!(DataType::Utf8, func.return_type(&[]).unwrap());
|
||||
|
||||
let args = create_test_args(
|
||||
vec![
|
||||
ColumnarValue::Scalar(ScalarValue::Int64(Some(1234))),
|
||||
ColumnarValue::Scalar(ScalarValue::Utf8(Some("pg_class".to_string()))),
|
||||
],
|
||||
1,
|
||||
);
|
||||
let result = func.invoke_with_args(args).unwrap();
|
||||
if let ColumnarValue::Array(arr) = result {
|
||||
assert_eq!(1, arr.len());
|
||||
assert!(arr.is_null(0));
|
||||
} else {
|
||||
panic!("Expected Array result");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_col_description_function() {
|
||||
let func = ColDescriptionFunction::new();
|
||||
assert_eq!("col_description", func.name());
|
||||
assert_eq!(DataType::Utf8, func.return_type(&[]).unwrap());
|
||||
|
||||
let args = create_test_args(
|
||||
vec![
|
||||
ColumnarValue::Scalar(ScalarValue::Int64(Some(1234))),
|
||||
ColumnarValue::Scalar(ScalarValue::Int64(Some(1))),
|
||||
],
|
||||
1,
|
||||
);
|
||||
let result = func.invoke_with_args(args).unwrap();
|
||||
if let ColumnarValue::Array(arr) = result {
|
||||
assert_eq!(1, arr.len());
|
||||
assert!(arr.is_null(0));
|
||||
} else {
|
||||
panic!("Expected Array result");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shobj_description_function() {
|
||||
let func = ShobjDescriptionFunction::new();
|
||||
assert_eq!("shobj_description", func.name());
|
||||
assert_eq!(DataType::Utf8, func.return_type(&[]).unwrap());
|
||||
|
||||
let args = create_test_args(
|
||||
vec![
|
||||
ColumnarValue::Scalar(ScalarValue::Int64(Some(1))),
|
||||
ColumnarValue::Scalar(ScalarValue::Utf8(Some("pg_database".to_string()))),
|
||||
],
|
||||
1,
|
||||
);
|
||||
let result = func.invoke_with_args(args).unwrap();
|
||||
if let ColumnarValue::Array(arr) = result {
|
||||
assert_eq!(1, arr.len());
|
||||
assert!(arr.is_null(0));
|
||||
} else {
|
||||
panic!("Expected Array result");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -46,16 +46,13 @@ pub struct DoPutResponse {
|
||||
request_id: i64,
|
||||
/// The successfully ingested rows number.
|
||||
affected_rows: AffectedRows,
|
||||
/// The elapsed time in seconds for handling the bulk insert.
|
||||
elapsed_secs: f64,
|
||||
}
|
||||
|
||||
impl DoPutResponse {
|
||||
pub fn new(request_id: i64, affected_rows: AffectedRows, elapsed_secs: f64) -> Self {
|
||||
pub fn new(request_id: i64, affected_rows: AffectedRows) -> Self {
|
||||
Self {
|
||||
request_id,
|
||||
affected_rows,
|
||||
elapsed_secs,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -66,10 +63,6 @@ impl DoPutResponse {
|
||||
pub fn affected_rows(&self) -> AffectedRows {
|
||||
self.affected_rows
|
||||
}
|
||||
|
||||
pub fn elapsed_secs(&self) -> f64 {
|
||||
self.elapsed_secs
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<PutResult> for DoPutResponse {
|
||||
@@ -93,11 +86,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_serde_do_put_response() {
|
||||
let x = DoPutResponse::new(42, 88, 0.123);
|
||||
let x = DoPutResponse::new(42, 88);
|
||||
let serialized = serde_json::to_string(&x).unwrap();
|
||||
assert_eq!(
|
||||
serialized,
|
||||
r#"{"request_id":42,"affected_rows":88,"elapsed_secs":0.123}"#
|
||||
);
|
||||
assert_eq!(serialized, r#"{"request_id":42,"affected_rows":88}"#);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_error::ext::BoxedError;
|
||||
use common_procedure::{
|
||||
BoxedProcedureLoader, Output, ProcedureId, ProcedureManagerRef, ProcedureWithId, watcher,
|
||||
};
|
||||
@@ -67,19 +66,6 @@ use crate::rpc::ddl::{
|
||||
};
|
||||
use crate::rpc::router::RegionRoute;
|
||||
|
||||
/// A configurator that customizes or enhances a [`DdlManager`].
|
||||
#[async_trait::async_trait]
|
||||
pub trait DdlManagerConfigurator<C>: Send + Sync {
|
||||
/// Configures the given [`DdlManager`] using the provided [`DdlManagerConfigureContext`].
|
||||
async fn configure(
|
||||
&self,
|
||||
ddl_manager: DdlManager,
|
||||
ctx: C,
|
||||
) -> std::result::Result<DdlManager, BoxedError>;
|
||||
}
|
||||
|
||||
pub type DdlManagerConfiguratorRef<C> = Arc<dyn DdlManagerConfigurator<C>>;
|
||||
|
||||
pub type DdlManagerRef = Arc<DdlManager>;
|
||||
|
||||
pub type BoxedProcedureLoaderFactory = dyn Fn(DdlContext) -> BoxedProcedureLoader;
|
||||
@@ -162,8 +148,11 @@ impl DdlManager {
|
||||
}
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub fn with_trigger_ddl_manager(mut self, trigger_ddl_manager: TriggerDdlManagerRef) -> Self {
|
||||
self.trigger_ddl_manager = Some(trigger_ddl_manager);
|
||||
pub fn with_trigger_ddl_manager(
|
||||
mut self,
|
||||
trigger_ddl_manager: Option<TriggerDdlManagerRef>,
|
||||
) -> Self {
|
||||
self.trigger_ddl_manager = trigger_ddl_manager;
|
||||
self
|
||||
}
|
||||
|
||||
|
||||
@@ -41,17 +41,6 @@ pub const POSTGRES_KEEP_ALIVE_SECS: u64 = 30;
|
||||
/// In a lease, there are two opportunities for renewal.
|
||||
pub const META_KEEP_ALIVE_INTERVAL_SECS: u64 = META_LEASE_SECS / 2;
|
||||
|
||||
/// The timeout of the heartbeat request.
|
||||
pub const HEARTBEAT_TIMEOUT: Duration = Duration::from_secs(META_KEEP_ALIVE_INTERVAL_SECS + 1);
|
||||
|
||||
/// The keep-alive interval of the heartbeat channel.
|
||||
pub const HEARTBEAT_CHANNEL_KEEP_ALIVE_INTERVAL_SECS: Duration =
|
||||
Duration::from_secs(META_KEEP_ALIVE_INTERVAL_SECS + 1);
|
||||
|
||||
/// The keep-alive timeout of the heartbeat channel.
|
||||
pub const HEARTBEAT_CHANNEL_KEEP_ALIVE_TIMEOUT_SECS: Duration =
|
||||
Duration::from_secs(META_KEEP_ALIVE_INTERVAL_SECS + 1);
|
||||
|
||||
/// The default mailbox round-trip timeout.
|
||||
pub const MAILBOX_RTT_SECS: u64 = 1;
|
||||
|
||||
|
||||
@@ -339,16 +339,6 @@ pub struct FlushRegions {
|
||||
pub error_strategy: FlushErrorStrategy,
|
||||
}
|
||||
|
||||
impl Display for FlushRegions {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"FlushRegions(region_ids={:?}, strategy={:?}, error_strategy={:?})",
|
||||
self.region_ids, self.strategy, self.error_strategy
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl FlushRegions {
|
||||
/// Create synchronous single-region flush
|
||||
pub fn sync_single(region_id: RegionId) -> Self {
|
||||
|
||||
@@ -34,8 +34,6 @@ pub mod memory;
|
||||
#[cfg(any(feature = "mysql_kvbackend", feature = "pg_kvbackend"))]
|
||||
pub mod rds;
|
||||
pub mod test;
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
pub mod test_util;
|
||||
pub mod txn;
|
||||
pub mod util;
|
||||
pub type KvBackendRef<E = Error> = Arc<dyn KvBackend<Error = E> + Send + Sync>;
|
||||
|
||||
@@ -1,125 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use derive_builder::Builder;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::kv_backend::txn::{Txn, TxnResponse};
|
||||
use crate::kv_backend::{
|
||||
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
|
||||
BatchPutResponse, DeleteRangeRequest, DeleteRangeResponse, KvBackend, PutRequest, PutResponse,
|
||||
RangeRequest, RangeResponse, TxnService,
|
||||
};
|
||||
|
||||
pub type MockFn<Req, Resp> = Arc<dyn Fn(Req) -> Result<Resp> + Send + Sync>;
|
||||
|
||||
/// A mock kv backend for testing.
|
||||
#[derive(Builder)]
|
||||
pub struct MockKvBackend {
|
||||
#[builder(setter(strip_option), default)]
|
||||
pub range_fn: Option<MockFn<RangeRequest, RangeResponse>>,
|
||||
#[builder(setter(strip_option), default)]
|
||||
pub put_fn: Option<MockFn<PutRequest, PutResponse>>,
|
||||
#[builder(setter(strip_option), default)]
|
||||
pub batch_put_fn: Option<MockFn<BatchPutRequest, BatchPutResponse>>,
|
||||
#[builder(setter(strip_option), default)]
|
||||
pub batch_get_fn: Option<MockFn<BatchGetRequest, BatchGetResponse>>,
|
||||
#[builder(setter(strip_option), default)]
|
||||
pub delete_range_fn: Option<MockFn<DeleteRangeRequest, DeleteRangeResponse>>,
|
||||
#[builder(setter(strip_option), default)]
|
||||
pub batch_delete_fn: Option<MockFn<BatchDeleteRequest, BatchDeleteResponse>>,
|
||||
#[builder(setter(strip_option), default)]
|
||||
pub txn: Option<MockFn<Txn, TxnResponse>>,
|
||||
#[builder(setter(strip_option), default)]
|
||||
pub max_txn_ops: Option<usize>,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl TxnService for MockKvBackend {
|
||||
type Error = crate::error::Error;
|
||||
|
||||
async fn txn(&self, txn: Txn) -> Result<TxnResponse> {
|
||||
if let Some(f) = &self.txn {
|
||||
f(txn)
|
||||
} else {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
fn max_txn_ops(&self) -> usize {
|
||||
self.max_txn_ops.unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl KvBackend for MockKvBackend {
|
||||
fn name(&self) -> &str {
|
||||
"mock_kv_backend"
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
async fn range(&self, req: RangeRequest) -> Result<RangeResponse> {
|
||||
if let Some(f) = &self.range_fn {
|
||||
f(req)
|
||||
} else {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
async fn put(&self, req: PutRequest) -> Result<PutResponse> {
|
||||
if let Some(f) = &self.put_fn {
|
||||
f(req)
|
||||
} else {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> {
|
||||
if let Some(f) = &self.batch_put_fn {
|
||||
f(req)
|
||||
} else {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
|
||||
if let Some(f) = &self.batch_get_fn {
|
||||
f(req)
|
||||
} else {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
async fn delete_range(&self, req: DeleteRangeRequest) -> Result<DeleteRangeResponse> {
|
||||
if let Some(f) = &self.delete_range_fn {
|
||||
f(req)
|
||||
} else {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
async fn batch_delete(&self, req: BatchDeleteRequest) -> Result<BatchDeleteResponse> {
|
||||
if let Some(f) = &self.batch_delete_fn {
|
||||
f(req)
|
||||
} else {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -246,6 +246,14 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Loader for {type_name} is not implemented: {reason}"))]
|
||||
ProcedureLoaderNotImplemented {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
type_name: String,
|
||||
reason: String,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -266,7 +274,8 @@ impl ErrorExt for Error {
|
||||
Error::ToJson { .. }
|
||||
| Error::DeleteState { .. }
|
||||
| Error::FromJson { .. }
|
||||
| Error::WaitWatcher { .. } => StatusCode::Internal,
|
||||
| Error::WaitWatcher { .. }
|
||||
| Error::ProcedureLoaderNotImplemented { .. } => StatusCode::Internal,
|
||||
|
||||
Error::RetryTimesExceeded { .. }
|
||||
| Error::RollbackTimesExceeded { .. }
|
||||
|
||||
@@ -188,13 +188,6 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to align JSON array, reason: {reason}"))]
|
||||
AlignJsonArray {
|
||||
reason: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
@@ -210,8 +203,7 @@ impl ErrorExt for Error {
|
||||
| Error::ToArrowScalar { .. }
|
||||
| Error::ProjectArrowRecordBatch { .. }
|
||||
| Error::PhysicalExpr { .. }
|
||||
| Error::RecordBatchSliceIndexOverflow { .. }
|
||||
| Error::AlignJsonArray { .. } => StatusCode::Internal,
|
||||
| Error::RecordBatchSliceIndexOverflow { .. } => StatusCode::Internal,
|
||||
|
||||
Error::PollStream { .. } => StatusCode::EngineExecuteQuery,
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ pub mod adapter;
|
||||
pub mod cursor;
|
||||
pub mod error;
|
||||
pub mod filter;
|
||||
pub mod recordbatch;
|
||||
mod recordbatch;
|
||||
pub mod util;
|
||||
|
||||
use std::fmt;
|
||||
|
||||
@@ -20,8 +20,7 @@ use datafusion::arrow::util::pretty::pretty_format_batches;
|
||||
use datafusion_common::arrow::array::ArrayRef;
|
||||
use datafusion_common::arrow::compute;
|
||||
use datafusion_common::arrow::datatypes::{DataType as ArrowDataType, SchemaRef as ArrowSchemaRef};
|
||||
use datatypes::arrow::array::{Array, AsArray, RecordBatchOptions, StructArray, new_null_array};
|
||||
use datatypes::extension::json::is_json_extension_type;
|
||||
use datatypes::arrow::array::{Array, AsArray, RecordBatchOptions};
|
||||
use datatypes::prelude::DataType;
|
||||
use datatypes::schema::SchemaRef;
|
||||
use datatypes::vectors::{Helper, VectorRef};
|
||||
@@ -31,8 +30,8 @@ use snafu::{OptionExt, ResultExt, ensure};
|
||||
|
||||
use crate::DfRecordBatch;
|
||||
use crate::error::{
|
||||
self, AlignJsonArraySnafu, ArrowComputeSnafu, ColumnNotExistsSnafu, DataTypesSnafu,
|
||||
NewDfRecordBatchSnafu, ProjectArrowRecordBatchSnafu, Result,
|
||||
self, ArrowComputeSnafu, ColumnNotExistsSnafu, DataTypesSnafu, ProjectArrowRecordBatchSnafu,
|
||||
Result,
|
||||
};
|
||||
|
||||
/// A two-dimensional batch of column-oriented data with a defined schema.
|
||||
@@ -60,8 +59,6 @@ impl RecordBatch {
|
||||
// TODO(LFC): Remove the casting here once `Batch` is no longer used.
|
||||
let arrow_arrays = Self::cast_view_arrays(schema.arrow_schema(), arrow_arrays)?;
|
||||
|
||||
let arrow_arrays = maybe_align_json_array_with_schema(schema.arrow_schema(), arrow_arrays)?;
|
||||
|
||||
let df_record_batch = DfRecordBatch::try_new(schema.arrow_schema().clone(), arrow_arrays)
|
||||
.context(error::NewDfRecordBatchSnafu)?;
|
||||
|
||||
@@ -330,111 +327,12 @@ pub fn merge_record_batches(schema: SchemaRef, batches: &[RecordBatch]) -> Resul
|
||||
Ok(RecordBatch::from_df_record_batch(schema, record_batch))
|
||||
}
|
||||
|
||||
/// Align a json array `json_array` to the json type `schema_type`. The `schema_type` is often the
|
||||
/// "largest" json type after some insertions in the table schema, while the json array previously
|
||||
/// written in the SST could be lagged behind it. So it's important to "amend" the json array's
|
||||
/// missing fields with null arrays, to align the array's data type with the provided one.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// - The json array is not an Arrow [StructArray], or the provided data type `schema_type` is not
|
||||
/// of Struct type. Both of which shouldn't happen unless we switch our implementation of how
|
||||
/// json array is physically stored.
|
||||
pub fn align_json_array(json_array: &ArrayRef, schema_type: &ArrowDataType) -> Result<ArrayRef> {
|
||||
let json_type = json_array.data_type();
|
||||
if json_type == schema_type {
|
||||
return Ok(json_array.clone());
|
||||
}
|
||||
|
||||
let json_array = json_array.as_struct();
|
||||
let array_fields = json_array.fields();
|
||||
let array_columns = json_array.columns();
|
||||
let ArrowDataType::Struct(schema_fields) = schema_type else {
|
||||
unreachable!()
|
||||
};
|
||||
let mut aligned = Vec::with_capacity(schema_fields.len());
|
||||
|
||||
// Compare the fields in the json array and the to-be-aligned schema, amending with null arrays
|
||||
// on the way. It's very important to note that fields in the json array and in the json type
|
||||
// are both SORTED.
|
||||
|
||||
let mut i = 0; // point to the schema fields
|
||||
let mut j = 0; // point to the array fields
|
||||
while i < schema_fields.len() && j < array_fields.len() {
|
||||
let schema_field = &schema_fields[i];
|
||||
let array_field = &array_fields[j];
|
||||
if schema_field.name() == array_field.name() {
|
||||
if matches!(schema_field.data_type(), ArrowDataType::Struct(_)) {
|
||||
// A `StructArray`s in a json array must be another json array. (Like a nested json
|
||||
// object in a json value.)
|
||||
aligned.push(align_json_array(
|
||||
&array_columns[j],
|
||||
schema_field.data_type(),
|
||||
)?);
|
||||
} else {
|
||||
aligned.push(array_columns[j].clone());
|
||||
}
|
||||
j += 1;
|
||||
} else {
|
||||
aligned.push(new_null_array(schema_field.data_type(), json_array.len()));
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
if i < schema_fields.len() {
|
||||
for field in &schema_fields[i..] {
|
||||
aligned.push(new_null_array(field.data_type(), json_array.len()));
|
||||
}
|
||||
}
|
||||
ensure!(
|
||||
j == array_fields.len(),
|
||||
AlignJsonArraySnafu {
|
||||
reason: format!(
|
||||
"this json array has more fields {:?}",
|
||||
array_fields[j..]
|
||||
.iter()
|
||||
.map(|x| x.name())
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
}
|
||||
);
|
||||
|
||||
let json_array =
|
||||
StructArray::try_new(schema_fields.clone(), aligned, json_array.nulls().cloned())
|
||||
.context(NewDfRecordBatchSnafu)?;
|
||||
Ok(Arc::new(json_array))
|
||||
}
|
||||
|
||||
fn maybe_align_json_array_with_schema(
|
||||
schema: &ArrowSchemaRef,
|
||||
arrays: Vec<ArrayRef>,
|
||||
) -> Result<Vec<ArrayRef>> {
|
||||
if schema.fields().iter().all(|f| !is_json_extension_type(f)) {
|
||||
return Ok(arrays);
|
||||
}
|
||||
|
||||
let mut aligned = Vec::with_capacity(arrays.len());
|
||||
for (field, array) in schema.fields().iter().zip(arrays.into_iter()) {
|
||||
if !is_json_extension_type(field) {
|
||||
aligned.push(array);
|
||||
continue;
|
||||
}
|
||||
|
||||
let json_array = align_json_array(&array, field.data_type())?;
|
||||
aligned.push(json_array);
|
||||
}
|
||||
Ok(aligned)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::arrow::array::{
|
||||
AsArray, BooleanArray, Float64Array, Int64Array, ListArray, UInt32Array,
|
||||
};
|
||||
use datatypes::arrow::datatypes::{
|
||||
DataType, Field, Fields, Int64Type, Schema as ArrowSchema, UInt32Type,
|
||||
};
|
||||
use datatypes::arrow::array::{AsArray, UInt32Array};
|
||||
use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema, UInt32Type};
|
||||
use datatypes::arrow_array::StringArray;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
@@ -442,165 +340,6 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_align_json_array() -> Result<()> {
|
||||
struct TestCase {
|
||||
json_array: ArrayRef,
|
||||
schema_type: DataType,
|
||||
expected: std::result::Result<ArrayRef, String>,
|
||||
}
|
||||
|
||||
impl TestCase {
|
||||
fn new(
|
||||
json_array: StructArray,
|
||||
schema_type: Fields,
|
||||
expected: std::result::Result<Vec<ArrayRef>, String>,
|
||||
) -> Self {
|
||||
Self {
|
||||
json_array: Arc::new(json_array),
|
||||
schema_type: DataType::Struct(schema_type.clone()),
|
||||
expected: expected
|
||||
.map(|x| Arc::new(StructArray::new(schema_type, x, None)) as ArrayRef),
|
||||
}
|
||||
}
|
||||
|
||||
fn test(self) -> Result<()> {
|
||||
let result = align_json_array(&self.json_array, &self.schema_type);
|
||||
match (result, self.expected) {
|
||||
(Ok(json_array), Ok(expected)) => assert_eq!(&json_array, &expected),
|
||||
(Ok(json_array), Err(e)) => {
|
||||
panic!("expecting error {e} but actually get: {json_array:?}")
|
||||
}
|
||||
(Err(e), Err(expected)) => assert_eq!(e.to_string(), expected),
|
||||
(Err(e), Ok(_)) => return Err(e),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// Test empty json array can be aligned with a complex json type.
|
||||
TestCase::new(
|
||||
StructArray::new_empty_fields(2, None),
|
||||
Fields::from(vec![
|
||||
Field::new("int", DataType::Int64, true),
|
||||
Field::new_struct(
|
||||
"nested",
|
||||
vec![Field::new("bool", DataType::Boolean, true)],
|
||||
true,
|
||||
),
|
||||
Field::new("string", DataType::Utf8, true),
|
||||
]),
|
||||
Ok(vec![
|
||||
Arc::new(Int64Array::new_null(2)) as ArrayRef,
|
||||
Arc::new(StructArray::new_null(
|
||||
Fields::from(vec![Arc::new(Field::new("bool", DataType::Boolean, true))]),
|
||||
2,
|
||||
)),
|
||||
Arc::new(StringArray::new_null(2)),
|
||||
]),
|
||||
)
|
||||
.test()?;
|
||||
|
||||
// Test simple json array alignment.
|
||||
TestCase::new(
|
||||
StructArray::from(vec![(
|
||||
Arc::new(Field::new("float", DataType::Float64, true)),
|
||||
Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0])) as ArrayRef,
|
||||
)]),
|
||||
Fields::from(vec![
|
||||
Field::new("float", DataType::Float64, true),
|
||||
Field::new("string", DataType::Utf8, true),
|
||||
]),
|
||||
Ok(vec![
|
||||
Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0])) as ArrayRef,
|
||||
Arc::new(StringArray::new_null(3)),
|
||||
]),
|
||||
)
|
||||
.test()?;
|
||||
|
||||
// Test complex json array alignment.
|
||||
TestCase::new(
|
||||
StructArray::from(vec![
|
||||
(
|
||||
Arc::new(Field::new_list(
|
||||
"list",
|
||||
Field::new_list_field(DataType::Int64, true),
|
||||
true,
|
||||
)),
|
||||
Arc::new(ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
|
||||
Some(vec![Some(1)]),
|
||||
None,
|
||||
Some(vec![Some(2), Some(3)]),
|
||||
])) as ArrayRef,
|
||||
),
|
||||
(
|
||||
Arc::new(Field::new_struct(
|
||||
"nested",
|
||||
vec![Field::new("int", DataType::Int64, true)],
|
||||
true,
|
||||
)),
|
||||
Arc::new(StructArray::from(vec![(
|
||||
Arc::new(Field::new("int", DataType::Int64, true)),
|
||||
Arc::new(Int64Array::from(vec![-1, -2, -3])) as ArrayRef,
|
||||
)])),
|
||||
),
|
||||
(
|
||||
Arc::new(Field::new("string", DataType::Utf8, true)),
|
||||
Arc::new(StringArray::from(vec!["a", "b", "c"])),
|
||||
),
|
||||
]),
|
||||
Fields::from(vec![
|
||||
Field::new("bool", DataType::Boolean, true),
|
||||
Field::new_list("list", Field::new_list_field(DataType::Int64, true), true),
|
||||
Field::new_struct(
|
||||
"nested",
|
||||
vec![
|
||||
Field::new("float", DataType::Float64, true),
|
||||
Field::new("int", DataType::Int64, true),
|
||||
],
|
||||
true,
|
||||
),
|
||||
Field::new("string", DataType::Utf8, true),
|
||||
]),
|
||||
Ok(vec![
|
||||
Arc::new(BooleanArray::new_null(3)) as ArrayRef,
|
||||
Arc::new(ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
|
||||
Some(vec![Some(1)]),
|
||||
None,
|
||||
Some(vec![Some(2), Some(3)]),
|
||||
])),
|
||||
Arc::new(StructArray::from(vec![
|
||||
(
|
||||
Arc::new(Field::new("float", DataType::Float64, true)),
|
||||
Arc::new(Float64Array::new_null(3)) as ArrayRef,
|
||||
),
|
||||
(
|
||||
Arc::new(Field::new("int", DataType::Int64, true)),
|
||||
Arc::new(Int64Array::from(vec![-1, -2, -3])),
|
||||
),
|
||||
])),
|
||||
Arc::new(StringArray::from(vec!["a", "b", "c"])),
|
||||
]),
|
||||
)
|
||||
.test()?;
|
||||
|
||||
// Test align failed.
|
||||
TestCase::new(
|
||||
StructArray::try_from(vec![
|
||||
("i", Arc::new(Int64Array::from(vec![1])) as ArrayRef),
|
||||
("j", Arc::new(Int64Array::from(vec![2])) as ArrayRef),
|
||||
])
|
||||
.unwrap(),
|
||||
Fields::from(vec![Field::new("i", DataType::Int64, true)]),
|
||||
Err(
|
||||
r#"Failed to align JSON array, reason: this json array has more fields ["j"]"#
|
||||
.to_string(),
|
||||
),
|
||||
)
|
||||
.test()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_record_batch() {
|
||||
let arrow_schema = Arc::new(ArrowSchema::new(vec![
|
||||
|
||||
@@ -231,15 +231,13 @@ pub fn sql_value_to_value(
|
||||
}
|
||||
}
|
||||
|
||||
let value_datatype = value.data_type();
|
||||
// The datatype of json value is determined by its actual data, so we can't simply "cast" it here.
|
||||
if value_datatype.is_json() || value_datatype == *data_type {
|
||||
Ok(value)
|
||||
} else {
|
||||
if value.data_type() != *data_type {
|
||||
datatypes::types::cast(value, data_type).with_context(|_| InvalidCastSnafu {
|
||||
sql_value: sql_val.clone(),
|
||||
datatype: data_type,
|
||||
})
|
||||
} else {
|
||||
Ok(value)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -16,7 +16,6 @@ use common_time::timezone::Timezone;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::ColumnDefaultConstraint;
|
||||
use datatypes::schema::constraint::{CURRENT_TIMESTAMP, CURRENT_TIMESTAMP_FN};
|
||||
use snafu::ensure;
|
||||
use sqlparser::ast::ValueWithSpan;
|
||||
pub use sqlparser::ast::{
|
||||
BinaryOperator, ColumnDef, ColumnOption, ColumnOptionDef, DataType, Expr, Function,
|
||||
@@ -38,14 +37,6 @@ pub fn parse_column_default_constraint(
|
||||
.iter()
|
||||
.find(|o| matches!(o.option, ColumnOption::Default(_)))
|
||||
{
|
||||
ensure!(
|
||||
!data_type.is_json(),
|
||||
UnsupportedDefaultValueSnafu {
|
||||
column_name,
|
||||
reason: "json column cannot have a default value",
|
||||
}
|
||||
);
|
||||
|
||||
let default_constraint = match &opt.option {
|
||||
ColumnOption::Default(Expr::Value(v)) => ColumnDefaultConstraint::Value(
|
||||
sql_value_to_value(column_name, data_type, &v.value, timezone, None, false)?,
|
||||
@@ -91,7 +82,7 @@ pub fn parse_column_default_constraint(
|
||||
} else {
|
||||
return UnsupportedDefaultValueSnafu {
|
||||
column_name,
|
||||
reason: format!("expr '{expr}' not supported"),
|
||||
expr: *expr.clone(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
@@ -99,14 +90,14 @@ pub fn parse_column_default_constraint(
|
||||
ColumnOption::Default(others) => {
|
||||
return UnsupportedDefaultValueSnafu {
|
||||
column_name,
|
||||
reason: format!("expr '{others}' not supported"),
|
||||
expr: others.clone(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
_ => {
|
||||
return UnsupportedDefaultValueSnafu {
|
||||
column_name,
|
||||
reason: format!("option '{}' not supported", opt.option),
|
||||
expr: Expr::Value(SqlValue::Null.into()),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
@@ -55,11 +55,13 @@ pub enum Error {
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Unsupported default constraint for column: '{column_name}', reason: {reason}"
|
||||
"Unsupported expr in default constraint: {} for column: {}",
|
||||
expr,
|
||||
column_name
|
||||
))]
|
||||
UnsupportedDefaultValue {
|
||||
column_name: String,
|
||||
reason: String,
|
||||
expr: Expr,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
@@ -320,15 +320,4 @@ mod tests {
|
||||
assert!(flush_reply.results[0].1.is_ok());
|
||||
assert!(flush_reply.results[1].1.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_flush_regions_display() {
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
let flush_regions = FlushRegions::sync_single(region_id);
|
||||
let display = format!("{}", flush_regions);
|
||||
assert_eq!(
|
||||
display,
|
||||
"FlushRegions(region_ids=[4398046511105(1024, 1)], strategy=Sync, error_strategy=FailFast)"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1200,8 +1200,7 @@ impl RegionServerInner {
|
||||
| RegionRequest::Flush(_)
|
||||
| RegionRequest::Compact(_)
|
||||
| RegionRequest::Truncate(_)
|
||||
| RegionRequest::BuildIndex(_)
|
||||
| RegionRequest::EnterStaging(_) => RegionChange::None,
|
||||
| RegionRequest::BuildIndex(_) => RegionChange::None,
|
||||
RegionRequest::Catchup(_) => RegionChange::Catchup,
|
||||
};
|
||||
|
||||
@@ -1261,6 +1260,7 @@ impl RegionServerInner {
|
||||
.with_context(|_| HandleRegionRequestSnafu { region_id })?
|
||||
.new_opened_logical_region_ids()
|
||||
else {
|
||||
warn!("No new opened logical regions");
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
|
||||
@@ -24,8 +24,8 @@ use common_query::Output;
|
||||
use common_runtime::Runtime;
|
||||
use common_runtime::runtime::{BuilderBuild, RuntimeTrait};
|
||||
use datafusion::catalog::TableFunction;
|
||||
use datafusion::dataframe::DataFrame;
|
||||
use datafusion_expr::{AggregateUDF, LogicalPlan};
|
||||
use query::dataframe::DataFrame;
|
||||
use query::planner::LogicalPlanner;
|
||||
use query::query_engine::{DescribeResult, QueryEngineState};
|
||||
use query::{QueryEngine, QueryEngineContext};
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::compute::cast as arrow_array_cast;
|
||||
use arrow::datatypes::{
|
||||
DataType as ArrowDataType, IntervalUnit as ArrowIntervalUnit, TimeUnit as ArrowTimeUnit,
|
||||
};
|
||||
@@ -367,10 +368,8 @@ impl ConcreteDataType {
|
||||
|
||||
/// Checks if the data type can cast to another data type.
|
||||
pub fn can_arrow_type_cast_to(&self, to_type: &ConcreteDataType) -> bool {
|
||||
match (self, to_type) {
|
||||
(ConcreteDataType::Json(this), ConcreteDataType::Json(that)) => that.is_include(this),
|
||||
_ => arrow::compute::can_cast_types(&self.as_arrow_type(), &to_type.as_arrow_type()),
|
||||
}
|
||||
let array = arrow_array::new_empty_array(&self.as_arrow_type());
|
||||
arrow_array_cast(array.as_ref(), &to_type.as_arrow_type()).is_ok()
|
||||
}
|
||||
|
||||
/// Try to cast data type as a [`DurationType`].
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_schema::extension::ExtensionType;
|
||||
use arrow_schema::{ArrowError, DataType, FieldRef};
|
||||
use arrow_schema::{ArrowError, DataType};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::json::JsonStructureSettings;
|
||||
@@ -102,8 +102,3 @@ impl ExtensionType for JsonExtensionType {
|
||||
Ok(json)
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if this field is to be treated as json extension type.
|
||||
pub fn is_json_extension_type(field: &FieldRef) -> bool {
|
||||
field.extension_type_name() == Some(JsonExtensionType::NAME)
|
||||
}
|
||||
|
||||
@@ -260,7 +260,7 @@ impl JsonValue {
|
||||
ConcreteDataType::Json(self.json_type().clone())
|
||||
}
|
||||
|
||||
pub fn json_type(&self) -> &JsonType {
|
||||
pub(crate) fn json_type(&self) -> &JsonType {
|
||||
self.json_type.get_or_init(|| self.json_variant.json_type())
|
||||
}
|
||||
|
||||
@@ -268,14 +268,6 @@ impl JsonValue {
|
||||
matches!(self.json_variant, JsonVariant::Null)
|
||||
}
|
||||
|
||||
/// Check if this JSON value is an empty object.
|
||||
pub fn is_empty_object(&self) -> bool {
|
||||
match &self.json_variant {
|
||||
JsonVariant::Object(object) => object.is_empty(),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn as_i64(&self) -> Option<i64> {
|
||||
match self.json_variant {
|
||||
JsonVariant::Number(n) => n.as_i64(),
|
||||
|
||||
@@ -273,9 +273,8 @@ fn collect_fields(column_schemas: &[ColumnSchema]) -> Result<FieldsAndIndices> {
|
||||
_ => None,
|
||||
};
|
||||
if let Some(extype) = extype {
|
||||
field
|
||||
.metadata_mut()
|
||||
.insert(TYPE_KEY.to_string(), extype.to_string());
|
||||
let metadata = HashMap::from([(TYPE_KEY.to_string(), extype.to_string())]);
|
||||
field = field.with_metadata(metadata);
|
||||
}
|
||||
fields.push(field);
|
||||
ensure!(
|
||||
|
||||
@@ -20,7 +20,7 @@ mod decimal_type;
|
||||
mod dictionary_type;
|
||||
mod duration_type;
|
||||
mod interval_type;
|
||||
pub mod json_type;
|
||||
pub(crate) mod json_type;
|
||||
mod list_type;
|
||||
mod null_type;
|
||||
mod primitive_type;
|
||||
|
||||
@@ -18,6 +18,7 @@ use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use arrow_schema::Fields;
|
||||
use common_base::bytes::Bytes;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ResultExt;
|
||||
@@ -35,7 +36,7 @@ use crate::vectors::json::builder::JsonVectorBuilder;
|
||||
use crate::vectors::{BinaryVectorBuilder, MutableVector};
|
||||
|
||||
pub const JSON_TYPE_NAME: &str = "Json";
|
||||
const JSON_PLAIN_FIELD_NAME: &str = "__json_plain__";
|
||||
const JSON_PLAIN_FIELD_NAME: &str = "__plain__";
|
||||
const JSON_PLAIN_FIELD_METADATA_KEY: &str = "is_plain_json";
|
||||
|
||||
pub type JsonObjectType = BTreeMap<String, JsonNativeType>;
|
||||
@@ -58,10 +59,6 @@ pub enum JsonNativeType {
|
||||
}
|
||||
|
||||
impl JsonNativeType {
|
||||
pub fn is_null(&self) -> bool {
|
||||
matches!(self, JsonNativeType::Null)
|
||||
}
|
||||
|
||||
pub fn u64() -> Self {
|
||||
Self::Number(JsonNumberType::U64)
|
||||
}
|
||||
@@ -190,7 +187,7 @@ impl JsonType {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn null() -> Self {
|
||||
pub(crate) fn empty() -> Self {
|
||||
Self {
|
||||
format: JsonFormat::Native(Box::new(JsonNativeType::Null)),
|
||||
}
|
||||
@@ -211,7 +208,7 @@ impl JsonType {
|
||||
}
|
||||
|
||||
/// Try to merge this json type with others, error on datatype conflict.
|
||||
pub fn merge(&mut self, other: &JsonType) -> Result<()> {
|
||||
pub(crate) fn merge(&mut self, other: &JsonType) -> Result<()> {
|
||||
match (&self.format, &other.format) {
|
||||
(JsonFormat::Jsonb, JsonFormat::Jsonb) => Ok(()),
|
||||
(JsonFormat::Native(this), JsonFormat::Native(that)) => {
|
||||
@@ -226,8 +223,7 @@ impl JsonType {
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if it can merge with `other` json type.
|
||||
pub fn is_mergeable(&self, other: &JsonType) -> bool {
|
||||
pub(crate) fn is_mergeable(&self, other: &JsonType) -> bool {
|
||||
match (&self.format, &other.format) {
|
||||
(JsonFormat::Jsonb, JsonFormat::Jsonb) => true,
|
||||
(JsonFormat::Native(this), JsonFormat::Native(that)) => {
|
||||
@@ -236,43 +232,6 @@ impl JsonType {
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if it includes all fields in `other` json type.
|
||||
pub fn is_include(&self, other: &JsonType) -> bool {
|
||||
match (&self.format, &other.format) {
|
||||
(JsonFormat::Jsonb, JsonFormat::Jsonb) => true,
|
||||
(JsonFormat::Native(this), JsonFormat::Native(that)) => {
|
||||
is_include(this.as_ref(), that.as_ref())
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_include(this: &JsonNativeType, that: &JsonNativeType) -> bool {
|
||||
fn is_include_object(this: &JsonObjectType, that: &JsonObjectType) -> bool {
|
||||
for (type_name, that_type) in that {
|
||||
let Some(this_type) = this.get(type_name) else {
|
||||
return false;
|
||||
};
|
||||
if !is_include(this_type, that_type) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
match (this, that) {
|
||||
(this, that) if this == that => true,
|
||||
(JsonNativeType::Array(this), JsonNativeType::Array(that)) => {
|
||||
is_include(this.as_ref(), that.as_ref())
|
||||
}
|
||||
(JsonNativeType::Object(this), JsonNativeType::Object(that)) => {
|
||||
is_include_object(this, that)
|
||||
}
|
||||
(_, JsonNativeType::Null) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// A special struct type for denoting "plain"(not object) json value. It has only one field, with
|
||||
@@ -358,14 +317,14 @@ impl DataType for JsonType {
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
match self.format {
|
||||
JsonFormat::Jsonb => ArrowDataType::Binary,
|
||||
JsonFormat::Native(_) => self.as_struct_type().as_arrow_type(),
|
||||
JsonFormat::Native(_) => ArrowDataType::Struct(Fields::empty()),
|
||||
}
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
match &self.format {
|
||||
match self.format {
|
||||
JsonFormat::Jsonb => Box::new(BinaryVectorBuilder::with_capacity(capacity)),
|
||||
JsonFormat::Native(x) => Box::new(JsonVectorBuilder::new(*x.clone(), capacity)),
|
||||
JsonFormat::Native(_) => Box::new(JsonVectorBuilder::with_capacity(capacity)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -377,12 +336,6 @@ impl DataType for JsonType {
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for JsonType {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.name())
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a json type value to string
|
||||
pub fn jsonb_to_string(val: &[u8]) -> Result<String> {
|
||||
match jsonb::from_slice(val) {
|
||||
@@ -413,204 +366,6 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::json::JsonStructureSettings;
|
||||
|
||||
#[test]
|
||||
fn test_json_type_include() {
|
||||
fn test(this: &JsonNativeType, that: &JsonNativeType, expected: bool) {
|
||||
assert_eq!(is_include(this, that), expected);
|
||||
}
|
||||
|
||||
test(&JsonNativeType::Null, &JsonNativeType::Null, true);
|
||||
test(&JsonNativeType::Null, &JsonNativeType::Bool, false);
|
||||
|
||||
test(&JsonNativeType::Bool, &JsonNativeType::Null, true);
|
||||
test(&JsonNativeType::Bool, &JsonNativeType::Bool, true);
|
||||
test(&JsonNativeType::Bool, &JsonNativeType::u64(), false);
|
||||
|
||||
test(&JsonNativeType::u64(), &JsonNativeType::Null, true);
|
||||
test(&JsonNativeType::u64(), &JsonNativeType::u64(), true);
|
||||
test(&JsonNativeType::u64(), &JsonNativeType::String, false);
|
||||
|
||||
test(&JsonNativeType::String, &JsonNativeType::Null, true);
|
||||
test(&JsonNativeType::String, &JsonNativeType::String, true);
|
||||
test(
|
||||
&JsonNativeType::String,
|
||||
&JsonNativeType::Array(Box::new(JsonNativeType::f64())),
|
||||
false,
|
||||
);
|
||||
|
||||
test(
|
||||
&JsonNativeType::Array(Box::new(JsonNativeType::f64())),
|
||||
&JsonNativeType::Null,
|
||||
true,
|
||||
);
|
||||
test(
|
||||
&JsonNativeType::Array(Box::new(JsonNativeType::f64())),
|
||||
&JsonNativeType::Array(Box::new(JsonNativeType::Null)),
|
||||
true,
|
||||
);
|
||||
test(
|
||||
&JsonNativeType::Array(Box::new(JsonNativeType::f64())),
|
||||
&JsonNativeType::Array(Box::new(JsonNativeType::f64())),
|
||||
true,
|
||||
);
|
||||
test(
|
||||
&JsonNativeType::Array(Box::new(JsonNativeType::f64())),
|
||||
&JsonNativeType::String,
|
||||
false,
|
||||
);
|
||||
test(
|
||||
&JsonNativeType::Array(Box::new(JsonNativeType::f64())),
|
||||
&JsonNativeType::Object(JsonObjectType::new()),
|
||||
false,
|
||||
);
|
||||
|
||||
let simple_json_object = &JsonNativeType::Object(JsonObjectType::from([(
|
||||
"foo".to_string(),
|
||||
JsonNativeType::String,
|
||||
)]));
|
||||
test(simple_json_object, &JsonNativeType::Null, true);
|
||||
test(simple_json_object, simple_json_object, true);
|
||||
test(simple_json_object, &JsonNativeType::i64(), false);
|
||||
test(
|
||||
simple_json_object,
|
||||
&JsonNativeType::Object(JsonObjectType::from([(
|
||||
"bar".to_string(),
|
||||
JsonNativeType::i64(),
|
||||
)])),
|
||||
false,
|
||||
);
|
||||
|
||||
let complex_json_object = &JsonNativeType::Object(JsonObjectType::from([
|
||||
(
|
||||
"nested".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"a".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"b".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"c".to_string(),
|
||||
JsonNativeType::String,
|
||||
)])),
|
||||
)])),
|
||||
)])),
|
||||
),
|
||||
("bar".to_string(), JsonNativeType::i64()),
|
||||
]));
|
||||
test(complex_json_object, &JsonNativeType::Null, true);
|
||||
test(complex_json_object, &JsonNativeType::String, false);
|
||||
test(complex_json_object, complex_json_object, true);
|
||||
test(
|
||||
complex_json_object,
|
||||
&JsonNativeType::Object(JsonObjectType::from([(
|
||||
"bar".to_string(),
|
||||
JsonNativeType::i64(),
|
||||
)])),
|
||||
true,
|
||||
);
|
||||
test(
|
||||
complex_json_object,
|
||||
&JsonNativeType::Object(JsonObjectType::from([
|
||||
(
|
||||
"nested".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"a".to_string(),
|
||||
JsonNativeType::Null,
|
||||
)])),
|
||||
),
|
||||
("bar".to_string(), JsonNativeType::i64()),
|
||||
])),
|
||||
true,
|
||||
);
|
||||
test(
|
||||
complex_json_object,
|
||||
&JsonNativeType::Object(JsonObjectType::from([
|
||||
(
|
||||
"nested".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"a".to_string(),
|
||||
JsonNativeType::String,
|
||||
)])),
|
||||
),
|
||||
("bar".to_string(), JsonNativeType::i64()),
|
||||
])),
|
||||
false,
|
||||
);
|
||||
test(
|
||||
complex_json_object,
|
||||
&JsonNativeType::Object(JsonObjectType::from([
|
||||
(
|
||||
"nested".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"a".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"b".to_string(),
|
||||
JsonNativeType::String,
|
||||
)])),
|
||||
)])),
|
||||
),
|
||||
("bar".to_string(), JsonNativeType::i64()),
|
||||
])),
|
||||
false,
|
||||
);
|
||||
test(
|
||||
complex_json_object,
|
||||
&JsonNativeType::Object(JsonObjectType::from([
|
||||
(
|
||||
"nested".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"a".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"b".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"c".to_string(),
|
||||
JsonNativeType::Null,
|
||||
)])),
|
||||
)])),
|
||||
)])),
|
||||
),
|
||||
("bar".to_string(), JsonNativeType::i64()),
|
||||
])),
|
||||
true,
|
||||
);
|
||||
test(
|
||||
complex_json_object,
|
||||
&JsonNativeType::Object(JsonObjectType::from([
|
||||
(
|
||||
"nested".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"a".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"b".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"c".to_string(),
|
||||
JsonNativeType::Bool,
|
||||
)])),
|
||||
)])),
|
||||
)])),
|
||||
),
|
||||
("bar".to_string(), JsonNativeType::i64()),
|
||||
])),
|
||||
false,
|
||||
);
|
||||
test(
|
||||
complex_json_object,
|
||||
&JsonNativeType::Object(JsonObjectType::from([(
|
||||
"nested".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"a".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"b".to_string(),
|
||||
JsonNativeType::Object(JsonObjectType::from([(
|
||||
"c".to_string(),
|
||||
JsonNativeType::String,
|
||||
)])),
|
||||
)])),
|
||||
)])),
|
||||
)])),
|
||||
true,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_json_type() -> Result<()> {
|
||||
fn test(
|
||||
|
||||
@@ -20,7 +20,6 @@ use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{Result, TryFromValueSnafu, UnsupportedOperationSnafu};
|
||||
use crate::json::value::JsonValueRef;
|
||||
use crate::prelude::{ValueRef, Vector, VectorRef};
|
||||
use crate::types::json_type::JsonNativeType;
|
||||
use crate::types::{JsonType, json_type};
|
||||
use crate::value::StructValueRef;
|
||||
use crate::vectors::{MutableVector, StructVectorBuilder};
|
||||
@@ -182,9 +181,9 @@ pub(crate) struct JsonVectorBuilder {
|
||||
}
|
||||
|
||||
impl JsonVectorBuilder {
|
||||
pub(crate) fn new(json_type: JsonNativeType, capacity: usize) -> Self {
|
||||
pub(crate) fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
merged_type: JsonType::new_native(json_type),
|
||||
merged_type: JsonType::empty(),
|
||||
capacity,
|
||||
builders: vec![],
|
||||
}
|
||||
@@ -327,18 +326,18 @@ mod tests {
|
||||
"Failed to merge JSON datatype: datatypes have conflict, this: Number(I64), that: Array[Bool]",
|
||||
),
|
||||
];
|
||||
let mut builder = JsonVectorBuilder::new(JsonNativeType::Null, 1);
|
||||
let mut builder = JsonVectorBuilder::with_capacity(1);
|
||||
for (json, result) in jsons.into_iter().zip(results.into_iter()) {
|
||||
push(json, &mut builder, result);
|
||||
}
|
||||
let vector = builder.to_vector();
|
||||
let expected = r#"
|
||||
+---------------------+
|
||||
| StructVector |
|
||||
+---------------------+
|
||||
| {__json_plain__: 1} |
|
||||
| {__json_plain__: 2} |
|
||||
+---------------------+"#;
|
||||
+----------------+
|
||||
| StructVector |
|
||||
+----------------+
|
||||
| {__plain__: 1} |
|
||||
| {__plain__: 2} |
|
||||
+----------------+"#;
|
||||
assert_eq!(pretty_print(vector), expected.trim());
|
||||
Ok(())
|
||||
}
|
||||
@@ -387,7 +386,7 @@ mod tests {
|
||||
"object": {"timestamp": 1761523203000}
|
||||
}"#,
|
||||
];
|
||||
let mut builder = JsonVectorBuilder::new(JsonNativeType::Null, 1);
|
||||
let mut builder = JsonVectorBuilder::with_capacity(1);
|
||||
for json in jsons {
|
||||
push(json, &mut builder, Ok(()));
|
||||
}
|
||||
|
||||
@@ -379,8 +379,10 @@ impl MutableVector for StructVectorBuilder {
|
||||
},
|
||||
StructValueRef::Ref(val) => self.push_struct_value(val)?,
|
||||
StructValueRef::RefList { val, fields } => {
|
||||
let struct_value =
|
||||
StructValue::try_new(val.into_iter().map(Value::from).collect(), fields)?;
|
||||
let struct_value = StructValue::try_new(
|
||||
val.iter().map(|v| Value::from(v.clone())).collect(),
|
||||
fields.clone(),
|
||||
)?;
|
||||
self.push_struct_value(&struct_value)?;
|
||||
}
|
||||
}
|
||||
@@ -427,17 +429,12 @@ impl ScalarVectorBuilder for StructVectorBuilder {
|
||||
.value_builders
|
||||
.iter_mut()
|
||||
.map(|b| b.to_vector().to_arrow_array())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let struct_array = if arrays.is_empty() {
|
||||
StructArray::new_empty_fields(self.len(), self.null_buffer.finish())
|
||||
} else {
|
||||
StructArray::new(
|
||||
self.fields.as_arrow_fields(),
|
||||
arrays,
|
||||
self.null_buffer.finish(),
|
||||
)
|
||||
};
|
||||
.collect();
|
||||
let struct_array = StructArray::new(
|
||||
self.fields.as_arrow_fields(),
|
||||
arrays,
|
||||
self.null_buffer.finish(),
|
||||
);
|
||||
|
||||
StructVector::try_new(self.fields.clone(), struct_array).unwrap()
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
|
||||
mod relation;
|
||||
|
||||
use api::helper::{pb_value_to_value_ref, to_grpc_value};
|
||||
use api::helper::{pb_value_to_value_ref, value_to_grpc_value};
|
||||
use api::v1::Row as ProtoRow;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::types::cast;
|
||||
@@ -201,7 +201,11 @@ impl From<ProtoRow> for Row {
|
||||
|
||||
impl From<Row> for ProtoRow {
|
||||
fn from(row: Row) -> Self {
|
||||
let values = row.unpack().into_iter().map(to_grpc_value).collect_vec();
|
||||
let values = row
|
||||
.unpack()
|
||||
.into_iter()
|
||||
.map(value_to_grpc_value)
|
||||
.collect_vec();
|
||||
ProtoRow { values }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,18 +32,15 @@ use operator::flow::FlowServiceOperator;
|
||||
use operator::insert::Inserter;
|
||||
use operator::procedure::ProcedureServiceOperator;
|
||||
use operator::request::Requester;
|
||||
use operator::statement::{
|
||||
ExecutorConfigureContext, StatementExecutor, StatementExecutorConfiguratorRef,
|
||||
StatementExecutorRef,
|
||||
};
|
||||
use operator::statement::{StatementExecutor, StatementExecutorRef};
|
||||
use operator::table::TableMutationOperator;
|
||||
use partition::manager::PartitionRuleManager;
|
||||
use pipeline::pipeline_operator::PipelineOperator;
|
||||
use query::QueryEngineFactory;
|
||||
use query::region_query::RegionQueryHandlerFactoryRef;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::error::{self, ExternalSnafu, Result};
|
||||
use crate::error::{self, Result};
|
||||
use crate::events::EventHandlerImpl;
|
||||
use crate::frontend::FrontendOptions;
|
||||
use crate::instance::Instance;
|
||||
@@ -190,15 +187,10 @@ impl FrontendBuilder {
|
||||
Some(process_manager.clone()),
|
||||
);
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
let statement_executor =
|
||||
if let Some(configurator) = plugins.get::<StatementExecutorConfiguratorRef>() {
|
||||
let ctx = ExecutorConfigureContext {
|
||||
kv_backend: kv_backend.clone(),
|
||||
};
|
||||
configurator
|
||||
.configure(statement_executor, ctx)
|
||||
.await
|
||||
.context(ExternalSnafu)?
|
||||
if let Some(factory) = plugins.get::<operator::statement::TriggerQuerierFactoryRef>() {
|
||||
statement_executor.with_trigger_querier(factory.create(kv_backend.clone()))
|
||||
} else {
|
||||
statement_executor
|
||||
};
|
||||
|
||||
@@ -12,9 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use api::helper::from_pb_time_ranges;
|
||||
use api::v1::ddl_request::{Expr as DdlExpr, Expr};
|
||||
@@ -24,18 +22,16 @@ use api::v1::{
|
||||
DeleteRequests, DropFlowExpr, InsertIntoPlan, InsertRequests, RowDeleteRequests,
|
||||
RowInsertRequests,
|
||||
};
|
||||
use async_stream::try_stream;
|
||||
use async_trait::async_trait;
|
||||
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
|
||||
use common_base::AffectedRows;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_grpc::flight::do_put::DoPutResponse;
|
||||
use common_grpc::FlightData;
|
||||
use common_grpc::flight::FlightDecoder;
|
||||
use common_query::Output;
|
||||
use common_query::logical_plan::add_insert_to_logical_plan;
|
||||
use common_telemetry::tracing::{self};
|
||||
use datafusion::datasource::DefaultTableSource;
|
||||
use futures::Stream;
|
||||
use futures::stream::StreamExt;
|
||||
use query::parser::PromQuery;
|
||||
use servers::interceptor::{GrpcQueryInterceptor, GrpcQueryInterceptorRef};
|
||||
use servers::query_handler::grpc::GrpcQueryHandler;
|
||||
@@ -244,8 +240,10 @@ impl GrpcQueryHandler for Instance {
|
||||
|
||||
async fn put_record_batch(
|
||||
&self,
|
||||
request: servers::grpc::flight::PutRecordBatchRequest,
|
||||
table_name: &TableName,
|
||||
table_ref: &mut Option<TableRef>,
|
||||
decoder: &mut FlightDecoder,
|
||||
data: FlightData,
|
||||
ctx: QueryContextRef,
|
||||
) -> Result<AffectedRows> {
|
||||
let table = if let Some(table) = table_ref {
|
||||
@@ -254,15 +252,15 @@ impl GrpcQueryHandler for Instance {
|
||||
let table = self
|
||||
.catalog_manager()
|
||||
.table(
|
||||
&request.table_name.catalog_name,
|
||||
&request.table_name.schema_name,
|
||||
&request.table_name.table_name,
|
||||
&table_name.catalog_name,
|
||||
&table_name.schema_name,
|
||||
&table_name.table_name,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
table_name: request.table_name.to_string(),
|
||||
table_name: table_name.to_string(),
|
||||
})?;
|
||||
*table_ref = Some(table.clone());
|
||||
table
|
||||
@@ -281,77 +279,10 @@ impl GrpcQueryHandler for Instance {
|
||||
// do we check limit for bulk insert?
|
||||
|
||||
self.inserter
|
||||
.handle_bulk_insert(
|
||||
table,
|
||||
request.flight_data,
|
||||
request.record_batch,
|
||||
request.schema_bytes,
|
||||
)
|
||||
.handle_bulk_insert(table, decoder, data)
|
||||
.await
|
||||
.context(TableOperationSnafu)
|
||||
}
|
||||
|
||||
fn handle_put_record_batch_stream(
|
||||
&self,
|
||||
mut stream: servers::grpc::flight::PutRecordBatchRequestStream,
|
||||
ctx: QueryContextRef,
|
||||
) -> Pin<Box<dyn Stream<Item = Result<DoPutResponse>> + Send>> {
|
||||
// Resolve table once for the stream
|
||||
// Clone all necessary data to make it 'static
|
||||
let catalog_manager = self.catalog_manager().clone();
|
||||
let plugins = self.plugins.clone();
|
||||
let inserter = self.inserter.clone();
|
||||
let table_name = stream.table_name().clone();
|
||||
let ctx = ctx.clone();
|
||||
|
||||
Box::pin(try_stream! {
|
||||
plugins
|
||||
.get::<PermissionCheckerRef>()
|
||||
.as_ref()
|
||||
.check_permission(ctx.current_user(), PermissionReq::BulkInsert)
|
||||
.context(PermissionSnafu)?;
|
||||
// Cache for resolved table reference - resolve once and reuse
|
||||
let table_ref = catalog_manager
|
||||
.table(
|
||||
&table_name.catalog_name,
|
||||
&table_name.schema_name,
|
||||
&table_name.table_name,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
table_name: table_name.to_string(),
|
||||
})?;
|
||||
|
||||
// Check permissions once for the stream
|
||||
let interceptor_ref = plugins.get::<GrpcQueryInterceptorRef<Error>>();
|
||||
let interceptor = interceptor_ref.as_ref();
|
||||
interceptor.pre_bulk_insert(table_ref.clone(), ctx.clone())?;
|
||||
|
||||
// Process each request in the stream
|
||||
while let Some(request_result) = stream.next().await {
|
||||
let request = request_result.map_err(|e| {
|
||||
let error_msg = format!("Stream error: {:?}", e);
|
||||
IncompleteGrpcRequestSnafu { err_msg: error_msg }.build()
|
||||
})?;
|
||||
|
||||
let request_id = request.request_id;
|
||||
let start = Instant::now();
|
||||
let rows = inserter
|
||||
.handle_bulk_insert(
|
||||
table_ref.clone(),
|
||||
request.flight_data,
|
||||
request.record_batch,
|
||||
request.schema_bytes,
|
||||
)
|
||||
.await
|
||||
.context(TableOperationSnafu)?;
|
||||
let elapsed_secs = start.elapsed().as_secs_f64();
|
||||
yield DoPutResponse::new(request_id, rows, elapsed_secs);
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn fill_catalog_and_schema_from_context(ddl_expr: &mut DdlExpr, ctx: &QueryContextRef) {
|
||||
|
||||
@@ -136,7 +136,7 @@ impl Instance {
|
||||
table_name: format_full_table_name(ctx.current_catalog(), &table_schema, &metric),
|
||||
})?;
|
||||
|
||||
let scan_plan = dataframe.into_unoptimized_plan();
|
||||
let scan_plan = dataframe.into_logical_plan();
|
||||
let filter_conditions =
|
||||
PromPlanner::matchers_to_expr(Matchers::new(matchers), scan_plan.schema())
|
||||
.context(PrometheusLabelValuesQueryPlanSnafu)?;
|
||||
|
||||
@@ -22,7 +22,6 @@ use common_telemetry::info;
|
||||
use meta_client::MetaClientOptions;
|
||||
use servers::error::Error as ServerError;
|
||||
use servers::grpc::builder::GrpcServerBuilder;
|
||||
use servers::grpc::flight::FlightCraftRef;
|
||||
use servers::grpc::frontend_grpc_handler::FrontendGrpcHandler;
|
||||
use servers::grpc::greptime_handler::GreptimeRequestHandler;
|
||||
use servers::grpc::{GrpcOptions, GrpcServer};
|
||||
@@ -53,7 +52,6 @@ where
|
||||
grpc_server_builder: Option<GrpcServerBuilder>,
|
||||
http_server_builder: Option<HttpServerBuilder>,
|
||||
plugins: Plugins,
|
||||
flight_handler: Option<FlightCraftRef>,
|
||||
}
|
||||
|
||||
impl<T> Services<T>
|
||||
@@ -67,7 +65,6 @@ where
|
||||
grpc_server_builder: None,
|
||||
http_server_builder: None,
|
||||
plugins,
|
||||
flight_handler: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -142,13 +139,6 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_flight_handler(self, flight_handler: FlightCraftRef) -> Self {
|
||||
Self {
|
||||
flight_handler: Some(flight_handler),
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
fn build_grpc_server(
|
||||
&mut self,
|
||||
grpc: &GrpcOptions,
|
||||
@@ -183,12 +173,6 @@ where
|
||||
grpc.flight_compression,
|
||||
);
|
||||
|
||||
// Use custom flight handler if provided, otherwise use the default GreptimeRequestHandler
|
||||
let flight_handler = self
|
||||
.flight_handler
|
||||
.clone()
|
||||
.unwrap_or_else(|| Arc::new(greptime_request_handler.clone()) as FlightCraftRef);
|
||||
|
||||
let grpc_server = builder
|
||||
.name(name)
|
||||
.database_handler(greptime_request_handler.clone())
|
||||
@@ -197,7 +181,7 @@ where
|
||||
self.instance.clone(),
|
||||
user_provider.clone(),
|
||||
))
|
||||
.flight_handler(flight_handler);
|
||||
.flight_handler(Arc::new(greptime_request_handler));
|
||||
|
||||
let grpc_server = if !external {
|
||||
let frontend_grpc_handler =
|
||||
|
||||
@@ -36,7 +36,7 @@ const BLOOM_META_LEN_SIZE: u64 = 4;
|
||||
pub const DEFAULT_PREFETCH_SIZE: u64 = 8192; // 8KiB
|
||||
|
||||
/// Metrics for bloom filter read operations.
|
||||
#[derive(Default, Clone)]
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct BloomFilterReadMetrics {
|
||||
/// Total byte size to read.
|
||||
pub total_bytes: u64,
|
||||
@@ -50,46 +50,6 @@ pub struct BloomFilterReadMetrics {
|
||||
pub cache_miss: usize,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for BloomFilterReadMetrics {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self {
|
||||
total_bytes,
|
||||
total_ranges,
|
||||
fetch_elapsed,
|
||||
cache_hit,
|
||||
cache_miss,
|
||||
} = self;
|
||||
|
||||
// If both total_bytes and cache_hit are 0, we didn't read anything.
|
||||
if *total_bytes == 0 && *cache_hit == 0 {
|
||||
return write!(f, "{{}}");
|
||||
}
|
||||
write!(f, "{{")?;
|
||||
|
||||
if *total_bytes > 0 {
|
||||
write!(f, "\"total_bytes\":{}", total_bytes)?;
|
||||
}
|
||||
if *cache_hit > 0 {
|
||||
if *total_bytes > 0 {
|
||||
write!(f, ", ")?;
|
||||
}
|
||||
write!(f, "\"cache_hit\":{}", cache_hit)?;
|
||||
}
|
||||
|
||||
if *total_ranges > 0 {
|
||||
write!(f, ", \"total_ranges\":{}", total_ranges)?;
|
||||
}
|
||||
if !fetch_elapsed.is_zero() {
|
||||
write!(f, ", \"fetch_elapsed\":\"{:?}\"", fetch_elapsed)?;
|
||||
}
|
||||
if *cache_miss > 0 {
|
||||
write!(f, ", \"cache_miss\":{}", cache_miss)?;
|
||||
}
|
||||
|
||||
write!(f, "}}")
|
||||
}
|
||||
}
|
||||
|
||||
impl BloomFilterReadMetrics {
|
||||
/// Merges another metrics into this one.
|
||||
pub fn merge_from(&mut self, other: &Self) {
|
||||
@@ -158,7 +118,26 @@ pub trait BloomFilterReader: Sync {
|
||||
&self,
|
||||
ranges: &[Range<u64>],
|
||||
metrics: Option<&mut BloomFilterReadMetrics>,
|
||||
) -> Result<Vec<Bytes>>;
|
||||
) -> Result<Vec<Bytes>> {
|
||||
let start = metrics.as_ref().map(|_| Instant::now());
|
||||
|
||||
let mut results = Vec::with_capacity(ranges.len());
|
||||
for range in ranges {
|
||||
let size = (range.end - range.start) as u32;
|
||||
let data = self.range_read(range.start, size, None).await?;
|
||||
results.push(data);
|
||||
}
|
||||
|
||||
if let Some(m) = metrics {
|
||||
m.total_ranges += ranges.len();
|
||||
m.total_bytes += ranges.iter().map(|r| r.end - r.start).sum::<u64>();
|
||||
if let Some(start) = start {
|
||||
m.fetch_elapsed += start.elapsed();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Reads the meta information of the bloom filter.
|
||||
async fn metadata(
|
||||
|
||||
@@ -31,7 +31,7 @@ mod blob;
|
||||
mod footer;
|
||||
|
||||
/// Metrics for inverted index read operations.
|
||||
#[derive(Default, Clone)]
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct InvertedIndexReadMetrics {
|
||||
/// Total byte size to read.
|
||||
pub total_bytes: u64,
|
||||
@@ -45,46 +45,6 @@ pub struct InvertedIndexReadMetrics {
|
||||
pub cache_miss: usize,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for InvertedIndexReadMetrics {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self {
|
||||
total_bytes,
|
||||
total_ranges,
|
||||
fetch_elapsed,
|
||||
cache_hit,
|
||||
cache_miss,
|
||||
} = self;
|
||||
|
||||
// If both total_bytes and cache_hit are 0, we didn't read anything.
|
||||
if *total_bytes == 0 && *cache_hit == 0 {
|
||||
return write!(f, "{{}}");
|
||||
}
|
||||
write!(f, "{{")?;
|
||||
|
||||
if *total_bytes > 0 {
|
||||
write!(f, "\"total_bytes\":{}", total_bytes)?;
|
||||
}
|
||||
if *cache_hit > 0 {
|
||||
if *total_bytes > 0 {
|
||||
write!(f, ", ")?;
|
||||
}
|
||||
write!(f, "\"cache_hit\":{}", cache_hit)?;
|
||||
}
|
||||
|
||||
if *total_ranges > 0 {
|
||||
write!(f, ", \"total_ranges\":{}", total_ranges)?;
|
||||
}
|
||||
if !fetch_elapsed.is_zero() {
|
||||
write!(f, ", \"fetch_elapsed\":\"{:?}\"", fetch_elapsed)?;
|
||||
}
|
||||
if *cache_miss > 0 {
|
||||
write!(f, ", \"cache_miss\":{}", cache_miss)?;
|
||||
}
|
||||
|
||||
write!(f, "}}")
|
||||
}
|
||||
}
|
||||
|
||||
impl InvertedIndexReadMetrics {
|
||||
/// Merges another metrics into this one.
|
||||
pub fn merge_from(&mut self, other: &Self) {
|
||||
@@ -113,7 +73,21 @@ pub trait InvertedIndexReader: Send + Sync {
|
||||
&self,
|
||||
ranges: &[Range<u64>],
|
||||
metrics: Option<&'a mut InvertedIndexReadMetrics>,
|
||||
) -> Result<Vec<Bytes>>;
|
||||
) -> Result<Vec<Bytes>> {
|
||||
let mut metrics = metrics;
|
||||
let mut result = Vec::with_capacity(ranges.len());
|
||||
for range in ranges {
|
||||
let data = self
|
||||
.range_read(
|
||||
range.start,
|
||||
(range.end - range.start) as u32,
|
||||
metrics.as_deref_mut(),
|
||||
)
|
||||
.await?;
|
||||
result.push(Bytes::from(data));
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Retrieves metadata of all inverted indices stored within the blob.
|
||||
async fn metadata<'a>(
|
||||
|
||||
@@ -189,9 +189,6 @@ impl MetaClientBuilder {
|
||||
let mgr = client.channel_manager.clone();
|
||||
|
||||
if self.enable_heartbeat {
|
||||
if self.heartbeat_channel_manager.is_some() {
|
||||
info!("Enable heartbeat channel using the heartbeat channel manager.");
|
||||
}
|
||||
let mgr = self.heartbeat_channel_manager.unwrap_or(mgr.clone());
|
||||
client.heartbeat = Some(HeartbeatClient::new(
|
||||
self.id,
|
||||
|
||||
@@ -24,7 +24,7 @@ use common_meta::distributed_time_constants::META_KEEP_ALIVE_INTERVAL_SECS;
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use common_telemetry::warn;
|
||||
use rand::seq::SliceRandom;
|
||||
use snafu::ResultExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use tokio::time::timeout;
|
||||
use tonic::transport::Channel;
|
||||
|
||||
@@ -101,14 +101,12 @@ impl AskLeader {
|
||||
};
|
||||
|
||||
let (tx, mut rx) = tokio::sync::mpsc::channel(peers.len());
|
||||
let channel_manager = self.channel_manager.clone();
|
||||
|
||||
for addr in &peers {
|
||||
let mut client = self.create_asker(addr)?;
|
||||
let tx_clone = tx.clone();
|
||||
let req = req.clone();
|
||||
let addr = addr.clone();
|
||||
let channel_manager = channel_manager.clone();
|
||||
tokio::spawn(async move {
|
||||
match client.ask_leader(req).await {
|
||||
Ok(res) => {
|
||||
@@ -119,19 +117,13 @@ impl AskLeader {
|
||||
};
|
||||
}
|
||||
Err(status) => {
|
||||
// Reset cached channel even on generic errors: the VIP may keep us on a dead
|
||||
// backend, so forcing a reconnect gives us a chance to hit a healthy peer.
|
||||
Self::reset_channels_with_manager(
|
||||
&channel_manager,
|
||||
std::slice::from_ref(&addr),
|
||||
);
|
||||
warn!("Failed to ask leader from: {addr}, {status}");
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
let leader = match timeout(
|
||||
let leader = timeout(
|
||||
self.channel_manager
|
||||
.config()
|
||||
.timeout
|
||||
@@ -139,16 +131,8 @@ impl AskLeader {
|
||||
rx.recv(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Some(leader)) => leader,
|
||||
Ok(None) => return error::NoLeaderSnafu.fail(),
|
||||
Err(e) => {
|
||||
// All peers timed out. Reset channels to force reconnection,
|
||||
// which may help escape dead backends in VIP/LB scenarios.
|
||||
Self::reset_channels_with_manager(&self.channel_manager, &peers);
|
||||
return Err(e).context(error::AskLeaderTimeoutSnafu);
|
||||
}
|
||||
};
|
||||
.context(error::AskLeaderTimeoutSnafu)?
|
||||
.context(error::NoLeaderSnafu)?;
|
||||
|
||||
let mut leadership_group = self.leadership_group.write().unwrap();
|
||||
leadership_group.leader = Some(leader.clone());
|
||||
@@ -185,15 +169,6 @@ impl AskLeader {
|
||||
.context(error::CreateChannelSnafu)?,
|
||||
))
|
||||
}
|
||||
|
||||
/// Drop cached channels for the given peers so a fresh connection is used next time.
|
||||
fn reset_channels_with_manager(channel_manager: &ChannelManager, peers: &[String]) {
|
||||
if peers.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
channel_manager.retain_channel(|addr, _| !peers.iter().any(|peer| peer == addr));
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
|
||||
@@ -18,10 +18,6 @@ use std::time::Duration;
|
||||
use client::RegionFollowerClientRef;
|
||||
use common_base::Plugins;
|
||||
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
|
||||
use common_meta::distributed_time_constants::{
|
||||
HEARTBEAT_CHANNEL_KEEP_ALIVE_INTERVAL_SECS, HEARTBEAT_CHANNEL_KEEP_ALIVE_TIMEOUT_SECS,
|
||||
HEARTBEAT_TIMEOUT,
|
||||
};
|
||||
use common_telemetry::{debug, info};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
@@ -38,6 +34,8 @@ pub struct MetaClientOptions {
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub timeout: Duration,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub heartbeat_timeout: Duration,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub ddl_timeout: Duration,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub connect_timeout: Duration,
|
||||
@@ -54,6 +52,7 @@ impl Default for MetaClientOptions {
|
||||
Self {
|
||||
metasrv_addrs: vec!["127.0.0.1:3002".to_string()],
|
||||
timeout: Duration::from_millis(3_000u64),
|
||||
heartbeat_timeout: Duration::from_millis(500u64),
|
||||
ddl_timeout: Duration::from_millis(10_000u64),
|
||||
connect_timeout: Duration::from_millis(1_000u64),
|
||||
tcp_nodelay: true,
|
||||
@@ -98,11 +97,7 @@ pub async fn create_meta_client(
|
||||
.timeout(meta_client_options.timeout)
|
||||
.connect_timeout(meta_client_options.connect_timeout)
|
||||
.tcp_nodelay(meta_client_options.tcp_nodelay);
|
||||
let heartbeat_config = base_config
|
||||
.clone()
|
||||
.timeout(HEARTBEAT_TIMEOUT)
|
||||
.http2_keep_alive_interval(HEARTBEAT_CHANNEL_KEEP_ALIVE_INTERVAL_SECS)
|
||||
.http2_keep_alive_timeout(HEARTBEAT_CHANNEL_KEEP_ALIVE_TIMEOUT_SECS);
|
||||
let heartbeat_config = base_config.clone();
|
||||
|
||||
if let MetaClientType::Frontend = client_type {
|
||||
let ddl_config = base_config.clone().timeout(meta_client_options.ddl_timeout);
|
||||
|
||||
@@ -64,7 +64,6 @@ lazy_static.workspace = true
|
||||
once_cell.workspace = true
|
||||
ordered-float.workspace = true
|
||||
parking_lot.workspace = true
|
||||
partition.workspace = true
|
||||
prometheus.workspace = true
|
||||
prost.workspace = true
|
||||
rand.workspace = true
|
||||
|
||||
@@ -29,7 +29,7 @@ use common_meta::kv_backend::memory::MemoryKvBackend;
|
||||
use common_meta::kv_backend::{KvBackendRef, ResettableKvBackendRef};
|
||||
use common_telemetry::info;
|
||||
use either::Either;
|
||||
use servers::configurator::GrpcRouterConfiguratorRef;
|
||||
use servers::configurator::ConfiguratorRef;
|
||||
use servers::http::{HttpServer, HttpServerBuilder};
|
||||
use servers::metrics_handler::MetricsHandler;
|
||||
use servers::server::Server;
|
||||
@@ -44,7 +44,6 @@ use crate::cluster::{MetaPeerClientBuilder, MetaPeerClientRef};
|
||||
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
|
||||
use crate::election::CANDIDATE_LEASE_SECS;
|
||||
use crate::election::etcd::EtcdElection;
|
||||
use crate::error::OtherSnafu;
|
||||
use crate::metasrv::builder::MetasrvBuilder;
|
||||
use crate::metasrv::{
|
||||
BackendImpl, ElectionRef, Metasrv, MetasrvOptions, SelectTarget, SelectorRef,
|
||||
@@ -132,15 +131,8 @@ impl MetasrvInstance {
|
||||
|
||||
// Start gRPC server with admin services for backward compatibility
|
||||
let mut router = router(self.metasrv.clone());
|
||||
if let Some(configurator) = self
|
||||
.metasrv
|
||||
.plugins()
|
||||
.get::<GrpcRouterConfiguratorRef<()>>()
|
||||
{
|
||||
router = configurator
|
||||
.configure_grpc_router(router, ())
|
||||
.await
|
||||
.context(OtherSnafu)?;
|
||||
if let Some(configurator) = self.metasrv.plugins().get::<ConfiguratorRef>() {
|
||||
router = configurator.config_grpc(router);
|
||||
}
|
||||
|
||||
let (serve_state_tx, serve_state_rx) = oneshot::channel();
|
||||
|
||||
@@ -23,7 +23,6 @@ use store_api::storage::RegionId;
|
||||
use table::metadata::TableId;
|
||||
use tokio::sync::mpsc::error::SendError;
|
||||
use tonic::codegen::http;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::metasrv::SelectTarget;
|
||||
use crate::pubsub::Message;
|
||||
@@ -983,52 +982,6 @@ pub enum Error {
|
||||
#[snafu(source)]
|
||||
source: common_meta::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Repartition group {} source region missing, region id: {}",
|
||||
group_id,
|
||||
region_id
|
||||
))]
|
||||
RepartitionSourceRegionMissing {
|
||||
group_id: Uuid,
|
||||
region_id: RegionId,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Repartition group {} target region missing, region id: {}",
|
||||
group_id,
|
||||
region_id
|
||||
))]
|
||||
RepartitionTargetRegionMissing {
|
||||
group_id: Uuid,
|
||||
region_id: RegionId,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to serialize partition expression: {}", source))]
|
||||
SerializePartitionExpr {
|
||||
#[snafu(source)]
|
||||
source: partition::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Partition expression mismatch, region id: {}, expected: {}, actual: {}",
|
||||
region_id,
|
||||
expected,
|
||||
actual
|
||||
))]
|
||||
PartitionExprMismatch {
|
||||
region_id: RegionId,
|
||||
expected: String,
|
||||
actual: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
impl Error {
|
||||
@@ -1088,7 +1041,6 @@ impl ErrorExt for Error {
|
||||
| Error::MailboxChannelClosed { .. }
|
||||
| Error::IsNotLeader { .. } => StatusCode::IllegalState,
|
||||
Error::RetryLaterWithSource { source, .. } => source.status_code(),
|
||||
Error::SerializePartitionExpr { source, .. } => source.status_code(),
|
||||
|
||||
Error::Unsupported { .. } => StatusCode::Unsupported,
|
||||
|
||||
@@ -1110,10 +1062,7 @@ impl ErrorExt for Error {
|
||||
| Error::TooManyPartitions { .. }
|
||||
| Error::TomlFormat { .. }
|
||||
| Error::HandlerNotFound { .. }
|
||||
| Error::LeaderPeerChanged { .. }
|
||||
| Error::RepartitionSourceRegionMissing { .. }
|
||||
| Error::RepartitionTargetRegionMissing { .. }
|
||||
| Error::PartitionExprMismatch { .. } => StatusCode::InvalidArguments,
|
||||
| Error::LeaderPeerChanged { .. } => StatusCode::InvalidArguments,
|
||||
Error::LeaseKeyFromUtf8 { .. }
|
||||
| Error::LeaseValueFromUtf8 { .. }
|
||||
| Error::InvalidRegionKeyFromUtf8 { .. }
|
||||
|
||||
@@ -23,17 +23,14 @@ use store_api::storage::RegionId;
|
||||
mod candidate;
|
||||
mod ctx;
|
||||
mod handler;
|
||||
#[cfg(test)]
|
||||
mod mock;
|
||||
mod options;
|
||||
mod procedure;
|
||||
mod scheduler;
|
||||
mod tracker;
|
||||
|
||||
pub use options::GcSchedulerOptions;
|
||||
pub use procedure::BatchGcProcedure;
|
||||
pub(crate) use options::GcSchedulerOptions;
|
||||
pub(crate) use scheduler::{GcScheduler, GcTickerRef};
|
||||
|
||||
pub type Region2Peers = HashMap<RegionId, (Peer, Vec<Peer>)>;
|
||||
pub(crate) type Region2Peers = HashMap<RegionId, (Peer, Vec<Peer>)>;
|
||||
|
||||
pub(crate) type Peer2Regions = HashMap<Peer, HashSet<RegionId>>;
|
||||
|
||||
@@ -32,7 +32,7 @@ use table::metadata::TableId;
|
||||
use crate::cluster::MetaPeerClientRef;
|
||||
use crate::error::{self, Result, TableMetadataManagerSnafu, UnexpectedSnafu};
|
||||
use crate::gc::Region2Peers;
|
||||
use crate::gc::procedure::{BatchGcProcedure, GcRegionProcedure};
|
||||
use crate::gc::procedure::GcRegionProcedure;
|
||||
use crate::handler::HeartbeatMailbox;
|
||||
use crate::service::mailbox::{Channel, MailboxRef};
|
||||
|
||||
@@ -84,6 +84,25 @@ impl DefaultGcSchedulerCtx {
|
||||
mailbox: MailboxRef,
|
||||
server_addr: String,
|
||||
) -> Result<Self> {
|
||||
// register a noop loader for `GcRegionProcedure` to avoid error when deserializing procedure when rebooting
|
||||
|
||||
procedure_manager
|
||||
.register_loader(
|
||||
GcRegionProcedure::TYPE_NAME,
|
||||
Box::new(move |json| {
|
||||
common_procedure::error::ProcedureLoaderNotImplementedSnafu {
|
||||
type_name: GcRegionProcedure::TYPE_NAME.to_string(),
|
||||
reason:
|
||||
"GC procedure should be retried by scheduler, not reloaded from storage"
|
||||
.to_string(),
|
||||
}
|
||||
.fail()
|
||||
}),
|
||||
)
|
||||
.context(error::RegisterProcedureLoaderSnafu {
|
||||
type_name: GcRegionProcedure::TYPE_NAME,
|
||||
});
|
||||
|
||||
Ok(Self {
|
||||
table_metadata_manager,
|
||||
procedure_manager,
|
||||
|
||||
@@ -1,458 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod basic;
|
||||
mod candidate_select;
|
||||
mod concurrent;
|
||||
mod config;
|
||||
mod err_handle;
|
||||
mod full_list;
|
||||
mod integration;
|
||||
mod misc;
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use common_meta::datanode::{RegionManifestInfo, RegionStat};
|
||||
use common_meta::key::table_route::PhysicalTableRouteValue;
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::router::{Region, RegionRoute};
|
||||
use common_telemetry::debug;
|
||||
use ordered_float::OrderedFloat;
|
||||
use store_api::region_engine::RegionRole;
|
||||
use store_api::storage::{FileRefsManifest, GcReport, RegionId};
|
||||
use table::metadata::TableId;
|
||||
use tokio::sync::mpsc::Sender;
|
||||
|
||||
use crate::error::{Result, UnexpectedSnafu};
|
||||
use crate::gc::candidate::GcCandidate;
|
||||
use crate::gc::ctx::SchedulerCtx;
|
||||
use crate::gc::handler::Region2Peers;
|
||||
use crate::gc::options::GcSchedulerOptions;
|
||||
use crate::gc::scheduler::{Event, GcScheduler};
|
||||
|
||||
pub const TEST_REGION_SIZE_200MB: u64 = 200_000_000;
|
||||
|
||||
/// Helper function to create an empty GcReport for the given region IDs
|
||||
pub fn new_empty_report_with(region_ids: impl IntoIterator<Item = RegionId>) -> GcReport {
|
||||
let mut deleted_files = HashMap::new();
|
||||
for region_id in region_ids {
|
||||
deleted_files.insert(region_id, vec![]);
|
||||
}
|
||||
GcReport {
|
||||
deleted_files,
|
||||
need_retry_regions: HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
#[derive(Debug, Default)]
|
||||
pub struct MockSchedulerCtx {
|
||||
pub table_to_region_stats: Arc<Mutex<Option<HashMap<TableId, Vec<RegionStat>>>>>,
|
||||
pub table_routes: Arc<Mutex<HashMap<TableId, (TableId, PhysicalTableRouteValue)>>>,
|
||||
pub file_refs: Arc<Mutex<Option<FileRefsManifest>>>,
|
||||
pub gc_reports: Arc<Mutex<HashMap<RegionId, GcReport>>>,
|
||||
pub candidates: Arc<Mutex<Option<HashMap<TableId, Vec<GcCandidate>>>>>,
|
||||
pub get_table_to_region_stats_calls: Arc<Mutex<usize>>,
|
||||
pub get_file_references_calls: Arc<Mutex<usize>>,
|
||||
pub gc_regions_calls: Arc<Mutex<usize>>,
|
||||
// Error injection fields for testing
|
||||
pub get_table_to_region_stats_error: Arc<Mutex<Option<crate::error::Error>>>,
|
||||
pub get_table_route_error: Arc<Mutex<Option<crate::error::Error>>>,
|
||||
pub get_file_references_error: Arc<Mutex<Option<crate::error::Error>>>,
|
||||
pub gc_regions_error: Arc<Mutex<Option<crate::error::Error>>>,
|
||||
// Retry testing fields
|
||||
pub gc_regions_retry_count: Arc<Mutex<HashMap<RegionId, usize>>>,
|
||||
pub gc_regions_error_sequence: Arc<Mutex<Vec<crate::error::Error>>>,
|
||||
pub gc_regions_success_after_retries: Arc<Mutex<HashMap<RegionId, usize>>>,
|
||||
// Per-region error injection
|
||||
pub gc_regions_per_region_errors: Arc<Mutex<HashMap<RegionId, crate::error::Error>>>,
|
||||
}
|
||||
|
||||
impl MockSchedulerCtx {
|
||||
pub fn with_table_routes(
|
||||
self,
|
||||
table_routes: HashMap<TableId, (TableId, Vec<(RegionId, Peer)>)>,
|
||||
) -> Self {
|
||||
*self.table_routes.lock().unwrap() = table_routes
|
||||
.into_iter()
|
||||
.map(|(k, (phy_id, region2peer))| {
|
||||
let phy = PhysicalTableRouteValue::new(
|
||||
region2peer
|
||||
.into_iter()
|
||||
.map(|(region_id, peer)| RegionRoute {
|
||||
region: Region::new_test(region_id),
|
||||
leader_peer: Some(peer),
|
||||
..Default::default()
|
||||
})
|
||||
.collect(),
|
||||
);
|
||||
|
||||
(k, (phy_id, phy))
|
||||
})
|
||||
.collect();
|
||||
self
|
||||
}
|
||||
|
||||
/// Set an error to be returned by `get_table_to_region_stats`
|
||||
#[allow(dead_code)]
|
||||
pub fn with_get_table_to_region_stats_error(self, error: crate::error::Error) -> Self {
|
||||
*self.get_table_to_region_stats_error.lock().unwrap() = Some(error);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set an error to be returned by `get_table_route`
|
||||
pub fn set_table_route_error(&self, error: crate::error::Error) {
|
||||
*self.get_table_route_error.lock().unwrap() = Some(error);
|
||||
}
|
||||
|
||||
/// Set an error to be returned by `get_file_references`
|
||||
#[allow(dead_code)]
|
||||
pub fn with_get_file_references_error(self, error: crate::error::Error) -> Self {
|
||||
*self.get_file_references_error.lock().unwrap() = Some(error);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set an error to be returned by `gc_regions`
|
||||
pub fn with_gc_regions_error(self, error: crate::error::Error) -> Self {
|
||||
*self.gc_regions_error.lock().unwrap() = Some(error);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set a sequence of errors to be returned by `gc_regions` for retry testing
|
||||
pub fn set_gc_regions_error_sequence(&self, errors: Vec<crate::error::Error>) {
|
||||
*self.gc_regions_error_sequence.lock().unwrap() = errors;
|
||||
}
|
||||
|
||||
/// Set success after a specific number of retries for a region
|
||||
pub fn set_gc_regions_success_after_retries(&self, region_id: RegionId, retries: usize) {
|
||||
self.gc_regions_success_after_retries
|
||||
.lock()
|
||||
.unwrap()
|
||||
.insert(region_id, retries);
|
||||
}
|
||||
|
||||
/// Get the retry count for a specific region
|
||||
pub fn get_retry_count(&self, region_id: RegionId) -> usize {
|
||||
self.gc_regions_retry_count
|
||||
.lock()
|
||||
.unwrap()
|
||||
.get(®ion_id)
|
||||
.copied()
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
/// Reset all retry tracking
|
||||
pub fn reset_retry_tracking(&self) {
|
||||
*self.gc_regions_retry_count.lock().unwrap() = HashMap::new();
|
||||
*self.gc_regions_error_sequence.lock().unwrap() = Vec::new();
|
||||
*self.gc_regions_success_after_retries.lock().unwrap() = HashMap::new();
|
||||
}
|
||||
|
||||
/// Set an error to be returned for a specific region
|
||||
pub fn set_gc_regions_error_for_region(&self, region_id: RegionId, error: crate::error::Error) {
|
||||
self.gc_regions_per_region_errors
|
||||
.lock()
|
||||
.unwrap()
|
||||
.insert(region_id, error);
|
||||
}
|
||||
|
||||
/// Clear per-region errors
|
||||
#[allow(unused)]
|
||||
pub fn clear_gc_regions_per_region_errors(&self) {
|
||||
self.gc_regions_per_region_errors.lock().unwrap().clear();
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl SchedulerCtx for MockSchedulerCtx {
|
||||
async fn get_table_to_region_stats(&self) -> Result<HashMap<TableId, Vec<RegionStat>>> {
|
||||
*self.get_table_to_region_stats_calls.lock().unwrap() += 1;
|
||||
|
||||
// Check if we should return an injected error
|
||||
if let Some(error) = self.get_table_to_region_stats_error.lock().unwrap().take() {
|
||||
return Err(error);
|
||||
}
|
||||
|
||||
Ok(self
|
||||
.table_to_region_stats
|
||||
.lock()
|
||||
.unwrap()
|
||||
.clone()
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
async fn get_table_route(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
) -> Result<(TableId, PhysicalTableRouteValue)> {
|
||||
// Check if we should return an injected error
|
||||
if let Some(error) = self.get_table_route_error.lock().unwrap().take() {
|
||||
return Err(error);
|
||||
}
|
||||
|
||||
Ok(self
|
||||
.table_routes
|
||||
.lock()
|
||||
.unwrap()
|
||||
.get(&table_id)
|
||||
.cloned()
|
||||
.unwrap_or_else(|| (table_id, PhysicalTableRouteValue::default())))
|
||||
}
|
||||
|
||||
async fn get_file_references(
|
||||
&self,
|
||||
query_regions: &[RegionId],
|
||||
_related_regions: HashMap<RegionId, Vec<RegionId>>,
|
||||
region_to_peer: &Region2Peers,
|
||||
_timeout: Duration,
|
||||
) -> Result<FileRefsManifest> {
|
||||
*self.get_file_references_calls.lock().unwrap() += 1;
|
||||
|
||||
// Check if we should return an injected error
|
||||
if let Some(error) = self.get_file_references_error.lock().unwrap().take() {
|
||||
return Err(error);
|
||||
}
|
||||
if query_regions
|
||||
.iter()
|
||||
.any(|region_id| !region_to_peer.contains_key(region_id))
|
||||
{
|
||||
UnexpectedSnafu {
|
||||
violated: format!(
|
||||
"region_to_peer{region_to_peer:?} does not contain all region_ids requested: {:?}",
|
||||
query_regions
|
||||
),
|
||||
}.fail()?;
|
||||
}
|
||||
|
||||
Ok(self.file_refs.lock().unwrap().clone().unwrap_or_default())
|
||||
}
|
||||
|
||||
async fn gc_regions(
|
||||
&self,
|
||||
_peer: Peer,
|
||||
region_ids: &[RegionId],
|
||||
_file_refs_manifest: &FileRefsManifest,
|
||||
_full_file_listing: bool,
|
||||
_timeout: Duration,
|
||||
) -> Result<GcReport> {
|
||||
*self.gc_regions_calls.lock().unwrap() += 1;
|
||||
|
||||
// Check per-region error injection first (for any region)
|
||||
for ®ion_id in region_ids {
|
||||
if let Some(error) = self
|
||||
.gc_regions_per_region_errors
|
||||
.lock()
|
||||
.unwrap()
|
||||
.remove(®ion_id)
|
||||
{
|
||||
*self
|
||||
.gc_regions_retry_count
|
||||
.lock()
|
||||
.unwrap()
|
||||
.entry(region_id)
|
||||
.or_insert(0) += 1;
|
||||
return Err(error);
|
||||
}
|
||||
}
|
||||
|
||||
// Check if we should return an injected error
|
||||
if let Some(error) = self.gc_regions_error.lock().unwrap().take() {
|
||||
for region_id in region_ids {
|
||||
*self
|
||||
.gc_regions_retry_count
|
||||
.lock()
|
||||
.unwrap()
|
||||
.entry(*region_id)
|
||||
.or_insert(0) += 1;
|
||||
}
|
||||
return Err(error);
|
||||
}
|
||||
|
||||
// Handle error sequence for retry testing
|
||||
{
|
||||
let mut error_sequence = self.gc_regions_error_sequence.lock().unwrap();
|
||||
if !error_sequence.is_empty() {
|
||||
let error = error_sequence.remove(0);
|
||||
for region_id in region_ids {
|
||||
*self
|
||||
.gc_regions_retry_count
|
||||
.lock()
|
||||
.unwrap()
|
||||
.entry(*region_id)
|
||||
.or_insert(0) += 1;
|
||||
}
|
||||
return Err(error);
|
||||
}
|
||||
}
|
||||
|
||||
// Build the final report by processing each region individually
|
||||
let mut final_report = GcReport::default();
|
||||
let gc_reports = self.gc_reports.lock().unwrap();
|
||||
let success_after_retries = self.gc_regions_success_after_retries.lock().unwrap();
|
||||
|
||||
for ®ion_id in region_ids {
|
||||
// Get current retry count for this region
|
||||
let retry_count = self
|
||||
.gc_regions_retry_count
|
||||
.lock()
|
||||
.unwrap()
|
||||
.get(®ion_id)
|
||||
.copied()
|
||||
.unwrap_or(0);
|
||||
|
||||
// Check if this region should succeed or need retry
|
||||
if let Some(&required_retries) = success_after_retries.get(®ion_id) {
|
||||
if retry_count < required_retries {
|
||||
debug!(
|
||||
"Region {} needs retry (attempt {}/{})",
|
||||
region_id,
|
||||
retry_count + 1,
|
||||
required_retries
|
||||
);
|
||||
// This region needs more retries - add to need_retry_regions
|
||||
final_report.need_retry_regions.insert(region_id);
|
||||
// Track the retry attempt
|
||||
let mut retry_count_map = self.gc_regions_retry_count.lock().unwrap();
|
||||
*retry_count_map.entry(region_id).or_insert(0) += 1;
|
||||
} else {
|
||||
debug!(
|
||||
"Region {} has completed retries - succeeding now",
|
||||
region_id
|
||||
);
|
||||
// This region has completed all required retries - succeed
|
||||
if let Some(report) = gc_reports.get(®ion_id) {
|
||||
final_report.merge(report.clone());
|
||||
}
|
||||
// Track the success attempt
|
||||
let mut retry_count_map = self.gc_regions_retry_count.lock().unwrap();
|
||||
*retry_count_map.entry(region_id).or_insert(0) += 1;
|
||||
}
|
||||
} else {
|
||||
// No retry requirement - check if we have a GC report for this region
|
||||
if let Some(report) = gc_reports.get(®ion_id) {
|
||||
// We have a GC report - succeed immediately
|
||||
final_report.merge(report.clone());
|
||||
// Track the success attempt
|
||||
let mut retry_count_map = self.gc_regions_retry_count.lock().unwrap();
|
||||
*retry_count_map.entry(region_id).or_insert(0) += 1;
|
||||
} else {
|
||||
// No GC report available - this region should be marked for retry
|
||||
final_report.need_retry_regions.insert(region_id);
|
||||
// Track the attempt
|
||||
let mut retry_count_map = self.gc_regions_retry_count.lock().unwrap();
|
||||
*retry_count_map.entry(region_id).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Return the report with need_retry_regions populated - let the caller handle retry logic
|
||||
Ok(final_report)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TestEnv {
|
||||
pub scheduler: GcScheduler,
|
||||
pub ctx: Arc<MockSchedulerCtx>,
|
||||
#[allow(dead_code)]
|
||||
tx: Sender<Event>,
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
impl TestEnv {
|
||||
pub fn new() -> Self {
|
||||
let ctx = Arc::new(MockSchedulerCtx::default());
|
||||
let (tx, rx) = GcScheduler::channel();
|
||||
let config = GcSchedulerOptions::default();
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: rx,
|
||||
config,
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
Self { scheduler, ctx, tx }
|
||||
}
|
||||
|
||||
pub fn with_candidates(self, candidates: HashMap<TableId, Vec<GcCandidate>>) -> Self {
|
||||
*self.ctx.candidates.lock().unwrap() = Some(candidates);
|
||||
self
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub async fn run_scheduler(mut self) {
|
||||
self.scheduler.run().await;
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub async fn tick(&self) {
|
||||
self.tx.send(Event::Tick).await.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper function to create a mock GC candidate that will pass the GC threshold
|
||||
fn new_candidate(region_id: RegionId, score: f64) -> GcCandidate {
|
||||
// will pass threshold for gc
|
||||
let region_stat = mock_region_stat(region_id, RegionRole::Leader, 10_000, 10);
|
||||
|
||||
GcCandidate {
|
||||
region_id,
|
||||
score: OrderedFloat(score),
|
||||
region_stat,
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper function to create a mock GC candidate
|
||||
fn mock_candidate(region_id: RegionId) -> GcCandidate {
|
||||
let region_stat = mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10);
|
||||
GcCandidate {
|
||||
region_id,
|
||||
score: ordered_float::OrderedFloat(1.0),
|
||||
region_stat,
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper function to create a mock RegionStat
|
||||
fn mock_region_stat(
|
||||
id: RegionId,
|
||||
role: RegionRole,
|
||||
approximate_bytes: u64,
|
||||
sst_num: u64,
|
||||
) -> RegionStat {
|
||||
RegionStat {
|
||||
id,
|
||||
role,
|
||||
approximate_bytes,
|
||||
sst_num,
|
||||
region_manifest: RegionManifestInfo::Mito {
|
||||
manifest_version: 0,
|
||||
flushed_entry_id: 0,
|
||||
file_removed_cnt: 0,
|
||||
},
|
||||
rcus: 0,
|
||||
wcus: 0,
|
||||
engine: "mito".to_string(),
|
||||
num_rows: 0,
|
||||
memtable_size: 0,
|
||||
manifest_size: 0,
|
||||
sst_size: 0,
|
||||
index_size: 0,
|
||||
data_topic_latest_entry_id: 0,
|
||||
metadata_topic_latest_entry_id: 0,
|
||||
written_bytes: 0,
|
||||
}
|
||||
}
|
||||
@@ -1,164 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Instant;
|
||||
|
||||
use common_meta::peer::Peer;
|
||||
use common_telemetry::init_default_ut_logging;
|
||||
use store_api::region_engine::RegionRole;
|
||||
use store_api::storage::{FileId, FileRefsManifest, GcReport, RegionId};
|
||||
|
||||
use crate::gc::mock::{
|
||||
MockSchedulerCtx, TEST_REGION_SIZE_200MB, TestEnv, mock_region_stat, new_candidate,
|
||||
};
|
||||
use crate::gc::{GcScheduler, GcSchedulerOptions};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_parallel_process_datanodes_empty() {
|
||||
let env = TestEnv::new();
|
||||
let report = env
|
||||
.scheduler
|
||||
.parallel_process_datanodes(HashMap::new())
|
||||
.await;
|
||||
|
||||
assert_eq!(report.per_datanode_reports.len(), 0);
|
||||
assert_eq!(report.failed_datanodes.len(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_parallel_process_datanodes_with_candidates() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table_id = 1;
|
||||
let region_id = RegionId::new(table_id, 1);
|
||||
let peer = Peer::new(1, "");
|
||||
let candidates = HashMap::from([(table_id, vec![new_candidate(region_id, 1.0)])]);
|
||||
|
||||
let mut gc_reports = HashMap::new();
|
||||
let deleted_files = vec![FileId::random()];
|
||||
gc_reports.insert(
|
||||
region_id,
|
||||
GcReport {
|
||||
deleted_files: HashMap::from([(region_id, deleted_files.clone())]),
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
let file_refs = FileRefsManifest {
|
||||
manifest_version: HashMap::from([(region_id, 1)]),
|
||||
..Default::default()
|
||||
};
|
||||
let ctx = MockSchedulerCtx {
|
||||
gc_reports: Arc::new(Mutex::new(gc_reports)),
|
||||
file_refs: Arc::new(Mutex::new(Some(file_refs))),
|
||||
..Default::default()
|
||||
}
|
||||
.with_table_routes(HashMap::from([(
|
||||
table_id,
|
||||
(table_id, vec![(region_id, peer.clone())]),
|
||||
)]));
|
||||
|
||||
let env = TestEnv::new();
|
||||
// We need to replace the ctx with the one with gc_reports
|
||||
let mut scheduler = env.scheduler;
|
||||
scheduler.ctx = Arc::new(ctx);
|
||||
|
||||
// Convert table-based candidates to datanode-based candidates
|
||||
let datanode_to_candidates = HashMap::from([(
|
||||
peer,
|
||||
candidates
|
||||
.into_iter()
|
||||
.flat_map(|(table_id, candidates)| candidates.into_iter().map(move |c| (table_id, c)))
|
||||
.collect(),
|
||||
)]);
|
||||
|
||||
let report = scheduler
|
||||
.parallel_process_datanodes(datanode_to_candidates)
|
||||
.await;
|
||||
|
||||
assert_eq!(report.per_datanode_reports.len(), 1);
|
||||
assert_eq!(report.failed_datanodes.len(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_handle_tick() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table_id = 1;
|
||||
let region_id = RegionId::new(table_id, 1);
|
||||
let peer = Peer::new(1, "");
|
||||
let candidates = HashMap::from([(table_id, vec![new_candidate(region_id, 1.0)])]);
|
||||
|
||||
let mut gc_reports = HashMap::new();
|
||||
gc_reports.insert(region_id, GcReport::default());
|
||||
let file_refs = FileRefsManifest {
|
||||
manifest_version: HashMap::from([(region_id, 1)]),
|
||||
..Default::default()
|
||||
};
|
||||
let ctx = Arc::new(
|
||||
MockSchedulerCtx {
|
||||
table_to_region_stats: Arc::new(Mutex::new(Some(HashMap::from([(
|
||||
table_id,
|
||||
vec![mock_region_stat(
|
||||
region_id,
|
||||
RegionRole::Leader,
|
||||
TEST_REGION_SIZE_200MB,
|
||||
10,
|
||||
)],
|
||||
)])))),
|
||||
gc_reports: Arc::new(Mutex::new(gc_reports)),
|
||||
candidates: Arc::new(Mutex::new(Some(candidates))),
|
||||
file_refs: Arc::new(Mutex::new(Some(file_refs))),
|
||||
..Default::default()
|
||||
}
|
||||
.with_table_routes(HashMap::from([(
|
||||
table_id,
|
||||
(table_id, vec![(region_id, peer)]),
|
||||
)])),
|
||||
);
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config: GcSchedulerOptions::default(),
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
let report = scheduler.handle_tick().await.unwrap();
|
||||
|
||||
// Validate the returned GcJobReport
|
||||
assert_eq!(
|
||||
report.per_datanode_reports.len(),
|
||||
1,
|
||||
"Should process 1 datanode"
|
||||
);
|
||||
assert_eq!(
|
||||
report.failed_datanodes.len(),
|
||||
0,
|
||||
"Should have 0 failed datanodes"
|
||||
);
|
||||
|
||||
assert_eq!(*ctx.get_table_to_region_stats_calls.lock().unwrap(), 1);
|
||||
assert_eq!(*ctx.get_file_references_calls.lock().unwrap(), 1);
|
||||
assert_eq!(*ctx.gc_regions_calls.lock().unwrap(), 1);
|
||||
|
||||
let tracker = scheduler.region_gc_tracker.lock().await;
|
||||
assert!(
|
||||
tracker.contains_key(®ion_id),
|
||||
"Tracker should have one region: {:?}",
|
||||
tracker
|
||||
);
|
||||
}
|
||||
@@ -1,390 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Instant;
|
||||
|
||||
use common_meta::datanode::RegionManifestInfo;
|
||||
use common_telemetry::init_default_ut_logging;
|
||||
use store_api::region_engine::RegionRole;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::gc::mock::{MockSchedulerCtx, TEST_REGION_SIZE_200MB, mock_region_stat};
|
||||
use crate::gc::{GcScheduler, GcSchedulerOptions};
|
||||
|
||||
/// Candidate Selection Tests
|
||||
#[tokio::test]
|
||||
async fn test_gc_candidate_filtering_by_role() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table_id = 1;
|
||||
let leader_region = RegionId::new(table_id, 1);
|
||||
let follower_region = RegionId::new(table_id, 2);
|
||||
|
||||
let mut leader_stat = mock_region_stat(
|
||||
leader_region,
|
||||
RegionRole::Leader,
|
||||
TEST_REGION_SIZE_200MB,
|
||||
10,
|
||||
); // 200MB
|
||||
|
||||
let mut follower_stat = mock_region_stat(
|
||||
follower_region,
|
||||
RegionRole::Follower,
|
||||
TEST_REGION_SIZE_200MB,
|
||||
10,
|
||||
); // 200MB
|
||||
|
||||
// Set up manifest info for scoring
|
||||
if let RegionManifestInfo::Mito {
|
||||
file_removed_cnt, ..
|
||||
} = &mut leader_stat.region_manifest
|
||||
{
|
||||
*file_removed_cnt = 5;
|
||||
}
|
||||
if let RegionManifestInfo::Mito {
|
||||
file_removed_cnt, ..
|
||||
} = &mut follower_stat.region_manifest
|
||||
{
|
||||
*file_removed_cnt = 5;
|
||||
}
|
||||
|
||||
let table_stats = HashMap::from([(table_id, vec![leader_stat.clone(), follower_stat.clone()])]);
|
||||
|
||||
let ctx = Arc::new(MockSchedulerCtx {
|
||||
table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config: GcSchedulerOptions::default(),
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
let stats = ctx
|
||||
.table_to_region_stats
|
||||
.lock()
|
||||
.unwrap()
|
||||
.clone()
|
||||
.unwrap_or_default();
|
||||
|
||||
let candidates = scheduler.select_gc_candidates(&stats).await.unwrap();
|
||||
|
||||
// Should only select leader regions
|
||||
assert_eq!(
|
||||
candidates.len(),
|
||||
1,
|
||||
"Expected 1 table with candidates, got {}",
|
||||
candidates.len()
|
||||
);
|
||||
if let Some(table_candidates) = candidates.get(&table_id) {
|
||||
assert_eq!(
|
||||
table_candidates.len(),
|
||||
1,
|
||||
"Expected 1 candidate for table {}, got {}",
|
||||
table_id,
|
||||
table_candidates.len()
|
||||
);
|
||||
assert_eq!(
|
||||
table_candidates[0].region_id, leader_region,
|
||||
"Expected leader region {}, got {}",
|
||||
leader_region, table_candidates[0].region_id
|
||||
);
|
||||
} else {
|
||||
panic!("Expected table {} to have candidates", table_id);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_gc_candidate_size_threshold() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table_id = 1;
|
||||
let small_region = RegionId::new(table_id, 1);
|
||||
let large_region = RegionId::new(table_id, 2);
|
||||
|
||||
let mut small_stat = mock_region_stat(small_region, RegionRole::Leader, 50_000_000, 5); // 50MB
|
||||
if let RegionManifestInfo::Mito {
|
||||
file_removed_cnt, ..
|
||||
} = &mut small_stat.region_manifest
|
||||
{
|
||||
*file_removed_cnt = 3;
|
||||
}
|
||||
|
||||
let mut large_stat =
|
||||
mock_region_stat(large_region, RegionRole::Leader, TEST_REGION_SIZE_200MB, 20); // 200MB
|
||||
if let RegionManifestInfo::Mito {
|
||||
file_removed_cnt, ..
|
||||
} = &mut large_stat.region_manifest
|
||||
{
|
||||
*file_removed_cnt = 5;
|
||||
}
|
||||
|
||||
let table_stats = HashMap::from([(table_id, vec![small_stat, large_stat])]);
|
||||
|
||||
let ctx = Arc::new(MockSchedulerCtx {
|
||||
table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let config = GcSchedulerOptions {
|
||||
min_region_size_threshold: 100 * 1024 * 1024, // 100MB (default)
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config,
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
let stats = ctx
|
||||
.table_to_region_stats
|
||||
.lock()
|
||||
.unwrap()
|
||||
.clone()
|
||||
.unwrap_or_default();
|
||||
|
||||
let candidates = scheduler.select_gc_candidates(&stats).await.unwrap();
|
||||
|
||||
// Should only select large region
|
||||
assert_eq!(
|
||||
candidates.len(),
|
||||
1,
|
||||
"Expected 1 table with candidates, got {}",
|
||||
candidates.len()
|
||||
);
|
||||
if let Some(table_candidates) = candidates.get(&table_id) {
|
||||
assert_eq!(
|
||||
table_candidates.len(),
|
||||
1,
|
||||
"Expected 1 candidate for table {}, got {}",
|
||||
table_id,
|
||||
table_candidates.len()
|
||||
);
|
||||
assert_eq!(
|
||||
table_candidates[0].region_id, large_region,
|
||||
"Expected large region {}, got {}",
|
||||
large_region, table_candidates[0].region_id
|
||||
);
|
||||
} else {
|
||||
panic!("Expected table {} to have candidates", table_id);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_gc_candidate_scoring() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table_id = 1;
|
||||
let low_score_region = RegionId::new(table_id, 1);
|
||||
let high_score_region = RegionId::new(table_id, 2);
|
||||
|
||||
let mut low_stat = mock_region_stat(
|
||||
low_score_region,
|
||||
RegionRole::Leader,
|
||||
TEST_REGION_SIZE_200MB,
|
||||
5,
|
||||
); // 200MB
|
||||
// Set low file removal rate for low_score_region
|
||||
if let RegionManifestInfo::Mito {
|
||||
file_removed_cnt, ..
|
||||
} = &mut low_stat.region_manifest
|
||||
{
|
||||
*file_removed_cnt = 2;
|
||||
}
|
||||
|
||||
let mut high_stat = mock_region_stat(
|
||||
high_score_region,
|
||||
RegionRole::Leader,
|
||||
TEST_REGION_SIZE_200MB,
|
||||
50,
|
||||
); // 200MB
|
||||
// Set high file removal rate for high_score_region
|
||||
if let RegionManifestInfo::Mito {
|
||||
file_removed_cnt, ..
|
||||
} = &mut high_stat.region_manifest
|
||||
{
|
||||
*file_removed_cnt = 20;
|
||||
}
|
||||
|
||||
let table_stats = HashMap::from([(table_id, vec![low_stat, high_stat])]);
|
||||
|
||||
let ctx = Arc::new(MockSchedulerCtx {
|
||||
table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let config = GcSchedulerOptions {
|
||||
sst_count_weight: 1.0,
|
||||
file_removed_count_weight: 0.5,
|
||||
min_region_size_threshold: 100 * 1024 * 1024, // 100MB (default)
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config,
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
let stats = ctx
|
||||
.table_to_region_stats
|
||||
.lock()
|
||||
.unwrap()
|
||||
.clone()
|
||||
.unwrap_or_default();
|
||||
|
||||
let candidates = scheduler.select_gc_candidates(&stats).await.unwrap();
|
||||
|
||||
// Should select both regions but high score region should be first
|
||||
assert_eq!(
|
||||
candidates.len(),
|
||||
1,
|
||||
"Expected 1 table with candidates, got {}",
|
||||
candidates.len()
|
||||
);
|
||||
if let Some(table_candidates) = candidates.get(&table_id) {
|
||||
assert_eq!(
|
||||
table_candidates.len(),
|
||||
2,
|
||||
"Expected 2 candidates for table {}, got {}",
|
||||
table_id,
|
||||
table_candidates.len()
|
||||
);
|
||||
// Higher score region should come first (sorted by score descending)
|
||||
assert_eq!(
|
||||
table_candidates[0].region_id, high_score_region,
|
||||
"High score region should be first"
|
||||
);
|
||||
assert!(
|
||||
table_candidates[0].score > table_candidates[1].score,
|
||||
"High score region should have higher score: {} > {}",
|
||||
table_candidates[0].score,
|
||||
table_candidates[1].score
|
||||
);
|
||||
} else {
|
||||
panic!("Expected table {} to have candidates", table_id);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_gc_candidate_regions_per_table_threshold() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table_id = 1;
|
||||
// Create 10 regions for the same table
|
||||
let mut region_stats = Vec::new();
|
||||
|
||||
for i in 0..10 {
|
||||
let region_id = RegionId::new(table_id, i + 1);
|
||||
let mut stat = mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 20); // 200MB
|
||||
|
||||
// Set different file removal rates to create different scores
|
||||
// Higher region IDs get higher scores (better GC candidates)
|
||||
if let RegionManifestInfo::Mito {
|
||||
file_removed_cnt, ..
|
||||
} = &mut stat.region_manifest
|
||||
{
|
||||
*file_removed_cnt = (i as u64 + 1) * 2; // Region 1: 2, Region 2: 4, ..., Region 10: 20
|
||||
}
|
||||
|
||||
region_stats.push(stat);
|
||||
}
|
||||
|
||||
let table_stats = HashMap::from([(table_id, region_stats)]);
|
||||
|
||||
let ctx = Arc::new(MockSchedulerCtx {
|
||||
table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
// Set regions_per_table_threshold to 3
|
||||
let config = GcSchedulerOptions {
|
||||
regions_per_table_threshold: 3,
|
||||
min_region_size_threshold: 100 * 1024 * 1024, // 100MB (default)
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config,
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
let stats = ctx
|
||||
.table_to_region_stats
|
||||
.lock()
|
||||
.unwrap()
|
||||
.clone()
|
||||
.unwrap_or_default();
|
||||
|
||||
let candidates = scheduler.select_gc_candidates(&stats).await.unwrap();
|
||||
|
||||
// Should have 1 table with candidates
|
||||
assert_eq!(
|
||||
candidates.len(),
|
||||
1,
|
||||
"Expected 1 table with candidates, got {}",
|
||||
candidates.len()
|
||||
);
|
||||
|
||||
if let Some(table_candidates) = candidates.get(&table_id) {
|
||||
// Should only have 3 candidates due to regions_per_table_threshold
|
||||
assert_eq!(
|
||||
table_candidates.len(),
|
||||
3,
|
||||
"Expected 3 candidates for table {} due to regions_per_table_threshold, got {}",
|
||||
table_id,
|
||||
table_candidates.len()
|
||||
);
|
||||
|
||||
// Verify that the top 3 scoring regions are selected
|
||||
// Regions 8, 9, 10 should have the highest scores (file_removed_cnt: 16, 18, 20)
|
||||
// They should be returned in descending order by score
|
||||
let expected_regions = vec![10, 9, 8];
|
||||
let actual_regions: Vec<u32> = table_candidates
|
||||
.iter()
|
||||
.map(|c| c.region_id.region_number())
|
||||
.collect();
|
||||
|
||||
assert_eq!(
|
||||
actual_regions, expected_regions,
|
||||
"Expected regions {:?} to be selected, got {:?}",
|
||||
expected_regions, actual_regions
|
||||
);
|
||||
|
||||
// Verify they are sorted by score in descending order
|
||||
for i in 0..table_candidates.len() - 1 {
|
||||
assert!(
|
||||
table_candidates[i].score >= table_candidates[i + 1].score,
|
||||
"Candidates should be sorted by score descending: {} >= {}",
|
||||
table_candidates[i].score,
|
||||
table_candidates[i + 1].score
|
||||
);
|
||||
}
|
||||
} else {
|
||||
panic!("Expected table {} to have candidates", table_id);
|
||||
}
|
||||
}
|
||||
@@ -1,516 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use common_meta::key::table_route::PhysicalTableRouteValue;
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::router::{Region, RegionRoute};
|
||||
use common_telemetry::{info, init_default_ut_logging};
|
||||
use store_api::region_engine::RegionRole;
|
||||
use store_api::storage::{FileId, FileRefsManifest, GcReport, RegionId};
|
||||
|
||||
use crate::gc::mock::{
|
||||
MockSchedulerCtx, TEST_REGION_SIZE_200MB, mock_candidate, mock_region_stat, new_candidate,
|
||||
};
|
||||
use crate::gc::{GcScheduler, GcSchedulerOptions};
|
||||
|
||||
/// Concurrent Processing Tests
|
||||
#[tokio::test]
|
||||
async fn test_concurrent_table_processing_limits() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let mut candidates = HashMap::new();
|
||||
let mut gc_reports = HashMap::new();
|
||||
|
||||
// Create many tables with candidates
|
||||
for table_id in 1..=10 {
|
||||
let region_id = RegionId::new(table_id, 1);
|
||||
candidates.insert(table_id, vec![new_candidate(region_id, 1.0)]);
|
||||
gc_reports.insert(
|
||||
region_id,
|
||||
GcReport {
|
||||
deleted_files: HashMap::from([(region_id, vec![FileId::random()])]),
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
let ctx = MockSchedulerCtx {
|
||||
candidates: Arc::new(Mutex::new(Some(candidates))),
|
||||
file_refs: Arc::new(Mutex::new(Some(FileRefsManifest {
|
||||
manifest_version: (1..=10).map(|i| (RegionId::new(i, 1), 1)).collect(),
|
||||
..Default::default()
|
||||
}))),
|
||||
gc_reports: Arc::new(Mutex::new(gc_reports)),
|
||||
..Default::default()
|
||||
}
|
||||
.with_table_routes(
|
||||
(1..=10)
|
||||
.map(|table_id| {
|
||||
let region_id = RegionId::new(table_id, 1);
|
||||
(table_id, (table_id, vec![(region_id, Peer::new(1, ""))]))
|
||||
})
|
||||
.collect(),
|
||||
);
|
||||
|
||||
let ctx = Arc::new(ctx);
|
||||
|
||||
let config = GcSchedulerOptions {
|
||||
max_concurrent_tables: 3, // Set a low limit
|
||||
retry_backoff_duration: Duration::from_millis(50), // for faster test
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config,
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
let candidates = ctx.candidates.lock().unwrap().clone().unwrap_or_default();
|
||||
|
||||
// Convert table-based candidates to datanode-based candidates
|
||||
let peer = Peer::new(1, "");
|
||||
let datanode_to_candidates = HashMap::from([(
|
||||
peer,
|
||||
candidates
|
||||
.into_iter()
|
||||
.flat_map(|(table_id, candidates)| candidates.into_iter().map(move |c| (table_id, c)))
|
||||
.collect(),
|
||||
)]);
|
||||
|
||||
let report = scheduler
|
||||
.parallel_process_datanodes(datanode_to_candidates)
|
||||
.await;
|
||||
|
||||
// Should process all datanodes
|
||||
assert_eq!(report.per_datanode_reports.len(), 1);
|
||||
assert_eq!(report.failed_datanodes.len(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_datanode_processes_tables_with_partial_gc_failures() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table1 = 1;
|
||||
let region1 = RegionId::new(table1, 1);
|
||||
let table2 = 2;
|
||||
let region2 = RegionId::new(table2, 1);
|
||||
let peer = Peer::new(1, "");
|
||||
|
||||
let mut candidates = HashMap::new();
|
||||
candidates.insert(table1, vec![new_candidate(region1, 1.0)]);
|
||||
candidates.insert(table2, vec![new_candidate(region2, 1.0)]);
|
||||
|
||||
// Set up GC reports for success and failure
|
||||
let mut gc_reports = HashMap::new();
|
||||
gc_reports.insert(
|
||||
region1,
|
||||
GcReport {
|
||||
deleted_files: HashMap::from([(region1, vec![])]),
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
// region2 will have no GC report, simulating failure
|
||||
|
||||
let file_refs = FileRefsManifest {
|
||||
manifest_version: HashMap::from([(region1, 1), (region2, 1)]),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let ctx = Arc::new(
|
||||
MockSchedulerCtx {
|
||||
gc_reports: Arc::new(Mutex::new(gc_reports)),
|
||||
file_refs: Arc::new(Mutex::new(Some(file_refs))),
|
||||
candidates: Arc::new(Mutex::new(Some(candidates))),
|
||||
..Default::default()
|
||||
}
|
||||
.with_table_routes(HashMap::from([
|
||||
(table1, (table1, vec![(region1, peer.clone())])),
|
||||
(table2, (table2, vec![(region2, peer.clone())])),
|
||||
])),
|
||||
);
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config: GcSchedulerOptions::default(),
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
let candidates = ctx.candidates.lock().unwrap().clone().unwrap_or_default();
|
||||
|
||||
// Convert table-based candidates to datanode-based candidates
|
||||
|
||||
let datanode_to_candidates = HashMap::from([(
|
||||
peer,
|
||||
candidates
|
||||
.into_iter()
|
||||
.flat_map(|(table_id, candidates)| candidates.into_iter().map(move |c| (table_id, c)))
|
||||
.collect(),
|
||||
)]);
|
||||
|
||||
let report = scheduler
|
||||
.parallel_process_datanodes(datanode_to_candidates)
|
||||
.await;
|
||||
|
||||
// Should have one datanode with mixed results
|
||||
assert_eq!(report.per_datanode_reports.len(), 1);
|
||||
// also check one failed region (region2 has no GC report, so it should be in need_retry_regions)
|
||||
let datanode_report = report.per_datanode_reports.values().next().unwrap();
|
||||
assert_eq!(datanode_report.need_retry_regions.len(), 1);
|
||||
assert_eq!(report.failed_datanodes.len(), 0);
|
||||
}
|
||||
|
||||
// Region Concurrency Tests
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_region_gc_concurrency_limit() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table_id = 1;
|
||||
let peer = Peer::new(1, "");
|
||||
|
||||
// Create multiple regions for the same table
|
||||
let mut region_stats = Vec::new();
|
||||
let mut candidates = Vec::new();
|
||||
let mut gc_reports = HashMap::new();
|
||||
|
||||
for i in 1..=10 {
|
||||
let region_id = RegionId::new(table_id, i as u32);
|
||||
let region_stat =
|
||||
mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
|
||||
region_stats.push(region_stat);
|
||||
|
||||
candidates.push(mock_candidate(region_id));
|
||||
|
||||
gc_reports.insert(
|
||||
region_id,
|
||||
GcReport {
|
||||
deleted_files: HashMap::from([(
|
||||
region_id,
|
||||
vec![FileId::random(), FileId::random()],
|
||||
)]),
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
let table_stats = HashMap::from([(table_id, region_stats)]);
|
||||
|
||||
let file_refs = FileRefsManifest {
|
||||
manifest_version: (1..=10)
|
||||
.map(|i| (RegionId::new(table_id, i as u32), 1))
|
||||
.collect(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let ctx = Arc::new(
|
||||
MockSchedulerCtx {
|
||||
table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
|
||||
gc_reports: Arc::new(Mutex::new(gc_reports)),
|
||||
file_refs: Arc::new(Mutex::new(Some(file_refs))),
|
||||
..Default::default()
|
||||
}
|
||||
.with_table_routes(HashMap::from([(
|
||||
table_id,
|
||||
(
|
||||
table_id,
|
||||
(1..=10)
|
||||
.map(|i| (RegionId::new(table_id, i as u32), peer.clone()))
|
||||
.collect(),
|
||||
),
|
||||
)])),
|
||||
);
|
||||
|
||||
// Configure low concurrency limit
|
||||
let config = GcSchedulerOptions {
|
||||
region_gc_concurrency: 3, // Only 3 regions can be processed concurrently
|
||||
retry_backoff_duration: Duration::from_millis(50), // for faster test
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config,
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
let start_time = Instant::now();
|
||||
let report = scheduler
|
||||
.process_datanode_gc(
|
||||
peer,
|
||||
candidates.into_iter().map(|c| (table_id, c)).collect(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let duration = start_time.elapsed();
|
||||
|
||||
// All regions should be processed successfully
|
||||
// Check that all 10 regions have deleted files
|
||||
assert_eq!(report.deleted_files.len(), 10);
|
||||
for i in 1..=10 {
|
||||
let region_id = RegionId::new(table_id, i as u32);
|
||||
assert!(report.deleted_files.contains_key(®ion_id));
|
||||
assert_eq!(report.deleted_files[®ion_id].len(), 2); // Each region has 2 deleted files
|
||||
}
|
||||
assert!(report.need_retry_regions.is_empty());
|
||||
|
||||
// Verify that concurrency limit was respected (this is hard to test directly,
|
||||
// but we can verify that the processing completed successfully)
|
||||
info!(
|
||||
"Processed 10 regions with concurrency limit 3 in {:?}",
|
||||
duration
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_region_gc_concurrency_with_partial_failures() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table_id = 1;
|
||||
let peer = Peer::new(1, "");
|
||||
|
||||
// Create multiple regions with mixed success/failure
|
||||
let mut region_stats = Vec::new();
|
||||
let mut candidates = Vec::new();
|
||||
let mut gc_reports = HashMap::new();
|
||||
|
||||
// Create the context first so we can set errors on it
|
||||
let ctx = Arc::new(MockSchedulerCtx::default());
|
||||
|
||||
for i in 1..=6 {
|
||||
let region_id = RegionId::new(table_id, i as u32);
|
||||
let region_stat =
|
||||
mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
|
||||
region_stats.push(region_stat);
|
||||
|
||||
candidates.push(mock_candidate(region_id));
|
||||
|
||||
if i % 2 == 0 {
|
||||
// Even regions will succeed
|
||||
gc_reports.insert(
|
||||
region_id,
|
||||
GcReport {
|
||||
deleted_files: HashMap::from([(
|
||||
region_id,
|
||||
vec![FileId::random(), FileId::random()],
|
||||
)]),
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
} else {
|
||||
// Odd regions will fail - don't add them to gc_reports
|
||||
// This will cause them to be marked as needing retry
|
||||
}
|
||||
}
|
||||
|
||||
let table_stats = HashMap::from([(table_id, region_stats)]);
|
||||
|
||||
let file_refs = FileRefsManifest {
|
||||
manifest_version: (1..=6)
|
||||
.map(|i| (RegionId::new(table_id, i as u32), 1))
|
||||
.collect(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Update the context with the data
|
||||
*ctx.table_to_region_stats.lock().unwrap() = Some(table_stats);
|
||||
*ctx.gc_reports.lock().unwrap() = gc_reports;
|
||||
*ctx.file_refs.lock().unwrap() = Some(file_refs);
|
||||
let region_routes = (1..=6)
|
||||
.map(|i| RegionRoute {
|
||||
region: Region::new_test(RegionId::new(table_id, i as u32)),
|
||||
leader_peer: Some(peer.clone()),
|
||||
..Default::default()
|
||||
})
|
||||
.collect();
|
||||
|
||||
*ctx.table_routes.lock().unwrap() = HashMap::from([(
|
||||
table_id,
|
||||
(table_id, PhysicalTableRouteValue::new(region_routes)),
|
||||
)]);
|
||||
|
||||
// Configure concurrency limit
|
||||
let config = GcSchedulerOptions {
|
||||
region_gc_concurrency: 2, // Process 2 regions concurrently
|
||||
retry_backoff_duration: Duration::from_millis(50), // for faster test
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config,
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
let datanode_to_candidates = HashMap::from([(
|
||||
peer.clone(),
|
||||
candidates.into_iter().map(|c| (table_id, c)).collect(),
|
||||
)]);
|
||||
|
||||
let report = scheduler
|
||||
.parallel_process_datanodes(datanode_to_candidates)
|
||||
.await;
|
||||
|
||||
let report = report.per_datanode_reports.get(&peer.id).unwrap();
|
||||
|
||||
// Should have 3 successful and 3 failed regions
|
||||
// Even regions (2, 4, 6) should succeed, odd regions (1, 3, 5) should fail
|
||||
let mut successful_regions = 0;
|
||||
let mut failed_regions = 0;
|
||||
|
||||
for i in 1..=6 {
|
||||
let region_id = RegionId::new(table_id, i as u32);
|
||||
if i % 2 == 0 {
|
||||
// Even regions should succeed
|
||||
if report.deleted_files.contains_key(®ion_id) {
|
||||
successful_regions += 1;
|
||||
}
|
||||
} else {
|
||||
// Odd regions should fail - they should be in need_retry_regions
|
||||
if report.need_retry_regions.contains(®ion_id) {
|
||||
failed_regions += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// In the new implementation, regions that cause gc_regions to return an error
|
||||
// are added to need_retry_regions. Let's check if we have the expected mix.
|
||||
info!(
|
||||
"Successful regions: {}, Failed regions: {}",
|
||||
successful_regions, failed_regions
|
||||
);
|
||||
info!(
|
||||
"Deleted files: {:?}",
|
||||
report.deleted_files.keys().collect::<Vec<_>>()
|
||||
);
|
||||
info!("Need retry regions: {:?}", report.need_retry_regions);
|
||||
|
||||
// The exact count might vary depending on how the mock handles errors,
|
||||
// but we should have some successful and some failed regions
|
||||
assert!(
|
||||
successful_regions > 0,
|
||||
"Should have at least some successful regions"
|
||||
);
|
||||
assert!(
|
||||
failed_regions > 0,
|
||||
"Should have at least some failed regions"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_region_gc_concurrency_with_retryable_errors() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table_id = 1;
|
||||
let peer = Peer::new(1, "");
|
||||
|
||||
// Create multiple regions
|
||||
let mut region_stats = Vec::new();
|
||||
let mut candidates = Vec::new();
|
||||
|
||||
for i in 1..=5 {
|
||||
let region_id = RegionId::new(table_id, i as u32);
|
||||
let region_stat =
|
||||
mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
|
||||
region_stats.push(region_stat);
|
||||
candidates.push(mock_candidate(region_id));
|
||||
}
|
||||
|
||||
let table_stats = HashMap::from([(table_id, region_stats)]);
|
||||
|
||||
let file_refs = FileRefsManifest {
|
||||
manifest_version: (1..=5)
|
||||
.map(|i| (RegionId::new(table_id, i as u32), 1))
|
||||
.collect(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let gc_report = (1..=5)
|
||||
.map(|i| {
|
||||
let region_id = RegionId::new(table_id, i as u32);
|
||||
(
|
||||
region_id,
|
||||
// mock the actual gc report with deleted files when succeeded(even no files to delete)
|
||||
GcReport::new(HashMap::from([(region_id, vec![])]), HashSet::new()),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let ctx = Arc::new(
|
||||
MockSchedulerCtx {
|
||||
table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
|
||||
file_refs: Arc::new(Mutex::new(Some(file_refs))),
|
||||
gc_reports: Arc::new(Mutex::new(gc_report)),
|
||||
..Default::default()
|
||||
}
|
||||
.with_table_routes(HashMap::from([(
|
||||
table_id,
|
||||
(
|
||||
table_id,
|
||||
(1..=5)
|
||||
.map(|i| (RegionId::new(table_id, i as u32), peer.clone()))
|
||||
.collect(),
|
||||
),
|
||||
)])),
|
||||
);
|
||||
|
||||
// Configure concurrency limit
|
||||
let config = GcSchedulerOptions {
|
||||
region_gc_concurrency: 2, // Process 2 regions concurrently
|
||||
retry_backoff_duration: Duration::from_millis(50),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config,
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
let datanode_to_candidates = HashMap::from([(
|
||||
peer.clone(),
|
||||
candidates.into_iter().map(|c| (table_id, c)).collect(),
|
||||
)]);
|
||||
let report = scheduler
|
||||
.parallel_process_datanodes(datanode_to_candidates)
|
||||
.await;
|
||||
|
||||
let report = report.per_datanode_reports.get(&peer.id).unwrap();
|
||||
|
||||
// In the new implementation without retry logic, all regions should be processed
|
||||
// The exact behavior depends on how the mock handles the regions
|
||||
info!(
|
||||
"Deleted files: {:?}",
|
||||
report.deleted_files.keys().collect::<Vec<_>>()
|
||||
);
|
||||
info!("Need retry regions: {:?}", report.need_retry_regions);
|
||||
|
||||
// We should have processed all 5 regions in some way
|
||||
let total_processed = report.deleted_files.len() + report.need_retry_regions.len();
|
||||
assert_eq!(total_processed, 5, "Should have processed all 5 regions");
|
||||
}
|
||||
@@ -1,197 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Instant;
|
||||
|
||||
use common_meta::datanode::RegionManifestInfo;
|
||||
use common_telemetry::init_default_ut_logging;
|
||||
use store_api::region_engine::RegionRole;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::gc::mock::{MockSchedulerCtx, TEST_REGION_SIZE_200MB, mock_region_stat};
|
||||
use crate::gc::{GcScheduler, GcSchedulerOptions};
|
||||
|
||||
/// Configuration Tests
|
||||
#[tokio::test]
|
||||
async fn test_different_gc_weights() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table_id = 1;
|
||||
let region_id = RegionId::new(table_id, 1);
|
||||
|
||||
let mut region_stat =
|
||||
mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB to pass size threshold
|
||||
|
||||
if let RegionManifestInfo::Mito {
|
||||
file_removed_cnt, ..
|
||||
} = &mut region_stat.region_manifest
|
||||
{
|
||||
*file_removed_cnt = 5;
|
||||
}
|
||||
|
||||
let table_stats = HashMap::from([(table_id, vec![region_stat])]);
|
||||
|
||||
let ctx = Arc::new(MockSchedulerCtx {
|
||||
table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
// Test with different weights
|
||||
let config1 = GcSchedulerOptions {
|
||||
sst_count_weight: 2.0,
|
||||
file_removed_count_weight: 0.5,
|
||||
min_region_size_threshold: 100 * 1024 * 1024, // 100MB (default)
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let scheduler1 = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config: config1,
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
let stats = ctx
|
||||
.table_to_region_stats
|
||||
.lock()
|
||||
.unwrap()
|
||||
.clone()
|
||||
.unwrap_or_default();
|
||||
|
||||
let candidates1 = scheduler1.select_gc_candidates(&stats).await.unwrap();
|
||||
|
||||
let config2 = GcSchedulerOptions {
|
||||
sst_count_weight: 0.5,
|
||||
file_removed_count_weight: 2.0,
|
||||
min_region_size_threshold: 100 * 1024 * 1024, // 100MB (default)
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let scheduler2 = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config: config2,
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
let stats = &ctx
|
||||
.table_to_region_stats
|
||||
.lock()
|
||||
.unwrap()
|
||||
.clone()
|
||||
.unwrap_or_default();
|
||||
let candidates2 = scheduler2.select_gc_candidates(stats).await.unwrap();
|
||||
|
||||
// Both should select the region but with different scores
|
||||
assert_eq!(
|
||||
candidates1.len(),
|
||||
1,
|
||||
"Expected 1 table with candidates for config1, got {}",
|
||||
candidates1.len()
|
||||
);
|
||||
assert_eq!(
|
||||
candidates2.len(),
|
||||
1,
|
||||
"Expected 1 table with candidates for config2, got {}",
|
||||
candidates2.len()
|
||||
);
|
||||
|
||||
// Verify the region is actually selected
|
||||
assert!(
|
||||
candidates1.contains_key(&table_id),
|
||||
"Config1 should contain table_id {}",
|
||||
table_id
|
||||
);
|
||||
assert!(
|
||||
candidates2.contains_key(&table_id),
|
||||
"Config2 should contain table_id {}",
|
||||
table_id
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_regions_per_table_threshold() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table_id = 1;
|
||||
let mut region_stats = Vec::new();
|
||||
|
||||
// Create many regions
|
||||
for i in 1..=10 {
|
||||
let region_id = RegionId::new(table_id, i as u32);
|
||||
let mut stat = mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
|
||||
|
||||
if let RegionManifestInfo::Mito {
|
||||
file_removed_cnt, ..
|
||||
} = &mut stat.region_manifest
|
||||
{
|
||||
*file_removed_cnt = 5;
|
||||
}
|
||||
|
||||
region_stats.push(stat);
|
||||
}
|
||||
|
||||
let table_stats = HashMap::from([(table_id, region_stats)]);
|
||||
|
||||
let ctx = Arc::new(MockSchedulerCtx {
|
||||
table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let config = GcSchedulerOptions {
|
||||
regions_per_table_threshold: 3, // Limit to 3 regions per table
|
||||
min_region_size_threshold: 100 * 1024 * 1024, // 100MB (default)
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config,
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
let stats = ctx
|
||||
.table_to_region_stats
|
||||
.lock()
|
||||
.unwrap()
|
||||
.clone()
|
||||
.unwrap_or_default();
|
||||
|
||||
let candidates = scheduler.select_gc_candidates(&stats).await.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
candidates.len(),
|
||||
1,
|
||||
"Expected 1 table with candidates, got {}",
|
||||
candidates.len()
|
||||
);
|
||||
if let Some(table_candidates) = candidates.get(&table_id) {
|
||||
// Should be limited to 3 regions
|
||||
assert_eq!(
|
||||
table_candidates.len(),
|
||||
3,
|
||||
"Expected 3 candidates for table {}, got {}",
|
||||
table_id,
|
||||
table_candidates.len()
|
||||
);
|
||||
} else {
|
||||
panic!("Expected table {} to have candidates", table_id);
|
||||
}
|
||||
}
|
||||
@@ -1,293 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use common_meta::datanode::RegionManifestInfo;
|
||||
use common_meta::peer::Peer;
|
||||
use common_telemetry::init_default_ut_logging;
|
||||
use store_api::region_engine::RegionRole;
|
||||
use store_api::storage::{FileId, FileRefsManifest, GcReport, RegionId};
|
||||
|
||||
use crate::gc::mock::{
|
||||
MockSchedulerCtx, TEST_REGION_SIZE_200MB, mock_region_stat, new_empty_report_with,
|
||||
};
|
||||
use crate::gc::{GcScheduler, GcSchedulerOptions};
|
||||
|
||||
/// Error Handling Tests
|
||||
#[tokio::test]
|
||||
async fn test_gc_regions_failure_handling() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table_id = 1;
|
||||
let region_id = RegionId::new(table_id, 1);
|
||||
let peer = Peer::new(1, "");
|
||||
|
||||
// Create region stat with proper size and file_removed_cnt to ensure it gets selected as candidate
|
||||
let mut region_stat =
|
||||
mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
|
||||
if let RegionManifestInfo::Mito {
|
||||
file_removed_cnt, ..
|
||||
} = &mut region_stat.region_manifest
|
||||
{
|
||||
*file_removed_cnt = 5;
|
||||
}
|
||||
|
||||
let table_stats = HashMap::from([(table_id, vec![region_stat])]);
|
||||
|
||||
// Create a context that will return an error for gc_regions
|
||||
let mut gc_reports = HashMap::new();
|
||||
gc_reports.insert(region_id, GcReport::default());
|
||||
|
||||
// Inject an error for gc_regions method
|
||||
let gc_error = crate::error::UnexpectedSnafu {
|
||||
violated: "Simulated GC failure for testing".to_string(),
|
||||
}
|
||||
.build();
|
||||
|
||||
let file_refs = FileRefsManifest {
|
||||
manifest_version: HashMap::from([(region_id, 1)]),
|
||||
file_refs: HashMap::from([(region_id, HashSet::from([FileId::random()]))]),
|
||||
};
|
||||
|
||||
let ctx = Arc::new(
|
||||
MockSchedulerCtx {
|
||||
table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
|
||||
gc_reports: Arc::new(Mutex::new(gc_reports)),
|
||||
file_refs: Arc::new(Mutex::new(Some(file_refs))),
|
||||
..Default::default()
|
||||
}
|
||||
.with_table_routes(HashMap::from([(
|
||||
table_id,
|
||||
(table_id, vec![(region_id, peer)]),
|
||||
)]))
|
||||
.with_gc_regions_error(gc_error),
|
||||
);
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config: GcSchedulerOptions::default(),
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
// This should handle the failure gracefully
|
||||
let report = scheduler.handle_tick().await.unwrap();
|
||||
|
||||
// Validate the report shows the failure handling
|
||||
assert_eq!(
|
||||
report.per_datanode_reports.len(),
|
||||
1,
|
||||
"Should process 1 datanode despite failure"
|
||||
);
|
||||
assert_eq!(
|
||||
report.failed_datanodes.len(),
|
||||
0,
|
||||
"Should have 0 failed datanodes (failure handled via need_retry_regions)"
|
||||
);
|
||||
|
||||
// Check that the region is in need_retry_regions due to the failure
|
||||
let datanode_report = report.per_datanode_reports.values().next().unwrap();
|
||||
assert_eq!(
|
||||
datanode_report.need_retry_regions.len(),
|
||||
1,
|
||||
"Should have 1 region in need_retry_regions due to failure"
|
||||
);
|
||||
assert!(
|
||||
datanode_report.need_retry_regions.contains(®ion_id),
|
||||
"Region should be in need_retry_regions"
|
||||
);
|
||||
|
||||
// Verify that calls were made despite potential failures
|
||||
assert_eq!(
|
||||
*ctx.get_table_to_region_stats_calls.lock().unwrap(),
|
||||
1,
|
||||
"Expected 1 call to get_table_to_region_stats"
|
||||
);
|
||||
assert!(
|
||||
*ctx.get_file_references_calls.lock().unwrap() >= 1,
|
||||
"Expected at least 1 call to get_file_references"
|
||||
);
|
||||
assert!(
|
||||
*ctx.gc_regions_calls.lock().unwrap() >= 1,
|
||||
"Expected at least 1 call to gc_regions"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_get_file_references_failure() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table_id = 1;
|
||||
let region_id = RegionId::new(table_id, 1);
|
||||
let peer = Peer::new(1, "");
|
||||
|
||||
// Create region stat with proper size and file_removed_cnt to ensure it gets selected as candidate
|
||||
let mut region_stat =
|
||||
mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
|
||||
if let RegionManifestInfo::Mito {
|
||||
file_removed_cnt, ..
|
||||
} = &mut region_stat.region_manifest
|
||||
{
|
||||
*file_removed_cnt = 5;
|
||||
}
|
||||
|
||||
let table_stats = HashMap::from([(table_id, vec![region_stat])]);
|
||||
|
||||
// Create context with empty file refs (simulating failure)
|
||||
let ctx = Arc::new(
|
||||
MockSchedulerCtx {
|
||||
table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
|
||||
file_refs: Arc::new(Mutex::new(Some(FileRefsManifest::default()))),
|
||||
gc_reports: Arc::new(Mutex::new(HashMap::from([(
|
||||
region_id,
|
||||
new_empty_report_with([region_id]),
|
||||
)]))),
|
||||
..Default::default()
|
||||
}
|
||||
.with_table_routes(HashMap::from([(
|
||||
table_id,
|
||||
(table_id, vec![(region_id, peer)]),
|
||||
)])),
|
||||
);
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config: GcSchedulerOptions {
|
||||
retry_backoff_duration: Duration::from_millis(10), // shorten for test
|
||||
..Default::default()
|
||||
},
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
let report = scheduler.handle_tick().await.unwrap();
|
||||
|
||||
// Validate the report shows the expected results
|
||||
// In the new implementation, even if get_file_references fails, we still create a datanode report
|
||||
assert_eq!(
|
||||
report.per_datanode_reports.len(),
|
||||
1,
|
||||
"Should process 1 datanode"
|
||||
);
|
||||
assert_eq!(
|
||||
report.failed_datanodes.len(),
|
||||
0,
|
||||
"Should have 0 failed datanodes (failure handled gracefully)"
|
||||
);
|
||||
|
||||
// The region should be processed but may have empty results due to file refs failure
|
||||
let datanode_report = report.per_datanode_reports.values().next().unwrap();
|
||||
// The current implementation still processes the region even with file refs failure
|
||||
// and creates an empty entry in deleted_files
|
||||
assert!(
|
||||
datanode_report.deleted_files.contains_key(®ion_id),
|
||||
"Should have region in deleted_files (even if empty)"
|
||||
);
|
||||
assert!(
|
||||
datanode_report.deleted_files[®ion_id].is_empty(),
|
||||
"Should have empty deleted files due to file refs failure"
|
||||
);
|
||||
|
||||
// Should still attempt to get file references (may be called multiple times due to retry logic)
|
||||
assert!(
|
||||
*ctx.get_file_references_calls.lock().unwrap() >= 1,
|
||||
"Expected at least 1 call to get_file_references, got {}",
|
||||
*ctx.get_file_references_calls.lock().unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_get_table_route_failure() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table_id = 1;
|
||||
let region_id = RegionId::new(table_id, 1);
|
||||
|
||||
// Create region stat with proper size and file_removed_cnt to ensure it gets selected as candidate
|
||||
let mut region_stat =
|
||||
mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
|
||||
if let RegionManifestInfo::Mito {
|
||||
file_removed_cnt, ..
|
||||
} = &mut region_stat.region_manifest
|
||||
{
|
||||
*file_removed_cnt = 5;
|
||||
}
|
||||
|
||||
let table_stats = HashMap::from([(table_id, vec![region_stat])]);
|
||||
|
||||
// Inject an error for get_table_route method to simulate failure
|
||||
let route_error = crate::error::UnexpectedSnafu {
|
||||
violated: "Simulated table route failure for testing".to_string(),
|
||||
}
|
||||
.build();
|
||||
|
||||
// Create context with table route error injection
|
||||
let ctx = Arc::new(MockSchedulerCtx {
|
||||
table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
|
||||
..Default::default()
|
||||
});
|
||||
ctx.set_table_route_error(route_error);
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config: GcSchedulerOptions::default(),
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
// Get candidates first
|
||||
let stats = &ctx
|
||||
.table_to_region_stats
|
||||
.lock()
|
||||
.unwrap()
|
||||
.clone()
|
||||
.unwrap_or_default();
|
||||
let candidates = scheduler.select_gc_candidates(stats).await.unwrap();
|
||||
|
||||
// Convert table-based candidates to datanode-based candidates
|
||||
let datanode_to_candidates = HashMap::from([(
|
||||
Peer::new(1, ""),
|
||||
candidates
|
||||
.into_iter()
|
||||
.flat_map(|(table_id, candidates)| candidates.into_iter().map(move |c| (table_id, c)))
|
||||
.collect(),
|
||||
)]);
|
||||
|
||||
// This should handle table route failure gracefully
|
||||
let report = scheduler
|
||||
.parallel_process_datanodes(datanode_to_candidates)
|
||||
.await;
|
||||
|
||||
// Should process the datanode but handle route error gracefully
|
||||
assert_eq!(
|
||||
report.per_datanode_reports.len(),
|
||||
0,
|
||||
"Expected 0 datanode report"
|
||||
);
|
||||
assert_eq!(
|
||||
report.failed_datanodes.len(),
|
||||
1,
|
||||
"Expected 1 failed datanodes (route error handled gracefully)"
|
||||
);
|
||||
assert!(
|
||||
report.failed_datanodes.contains_key(&1),
|
||||
"Failed datanodes should contain the datanode with route error"
|
||||
);
|
||||
}
|
||||
@@ -1,272 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use common_meta::peer::Peer;
|
||||
use common_telemetry::init_default_ut_logging;
|
||||
use store_api::region_engine::RegionRole;
|
||||
use store_api::storage::{FileId, FileRefsManifest, GcReport, RegionId};
|
||||
|
||||
use crate::gc::mock::{MockSchedulerCtx, TEST_REGION_SIZE_200MB, mock_candidate, mock_region_stat};
|
||||
use crate::gc::{GcScheduler, GcSchedulerOptions};
|
||||
|
||||
// Full File Listing Tests
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_full_file_listing_first_time_gc() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table_id = 1;
|
||||
let region_id = RegionId::new(table_id, 1);
|
||||
let peer = Peer::new(1, "");
|
||||
|
||||
let region_stat = mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
|
||||
let table_stats = HashMap::from([(table_id, vec![region_stat])]);
|
||||
|
||||
let gc_report = GcReport {
|
||||
deleted_files: HashMap::from([(region_id, vec![FileId::random(), FileId::random()])]),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let file_refs = FileRefsManifest {
|
||||
manifest_version: HashMap::from([(region_id, 1)]),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let ctx = Arc::new(
|
||||
MockSchedulerCtx {
|
||||
table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
|
||||
gc_reports: Arc::new(Mutex::new(HashMap::from([(region_id, gc_report)]))),
|
||||
file_refs: Arc::new(Mutex::new(Some(file_refs))),
|
||||
..Default::default()
|
||||
}
|
||||
.with_table_routes(HashMap::from([(
|
||||
table_id,
|
||||
(table_id, vec![(region_id, peer.clone())]),
|
||||
)])),
|
||||
);
|
||||
|
||||
// Configure short full file listing interval for testing
|
||||
let config = GcSchedulerOptions {
|
||||
full_file_listing_interval: Duration::from_secs(3600), // 1 hour
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config,
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
// First GC - should use full listing since region has never been GC'd
|
||||
let reports = scheduler
|
||||
.process_datanode_gc(peer.clone(), vec![(table_id, mock_candidate(region_id))])
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(reports.deleted_files.len(), 1);
|
||||
|
||||
// Verify that full listing was used by checking the tracker
|
||||
let tracker = scheduler.region_gc_tracker.lock().await;
|
||||
let gc_info = tracker
|
||||
.get(®ion_id)
|
||||
.expect("Region should be in tracker");
|
||||
assert!(
|
||||
gc_info.last_full_listing_time.is_some(),
|
||||
"First GC should use full listing"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_full_file_listing_interval_enforcement() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table_id = 1;
|
||||
let region_id = RegionId::new(table_id, 1);
|
||||
let peer = Peer::new(1, "");
|
||||
|
||||
let region_stat = mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
|
||||
let table_stats = HashMap::from([(table_id, vec![region_stat])]);
|
||||
|
||||
let gc_report = GcReport {
|
||||
deleted_files: HashMap::from([(region_id, vec![FileId::random(), FileId::random()])]),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let file_refs = FileRefsManifest {
|
||||
manifest_version: HashMap::from([(region_id, 1)]),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let ctx = Arc::new(
|
||||
MockSchedulerCtx {
|
||||
table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
|
||||
gc_reports: Arc::new(Mutex::new(HashMap::from([(region_id, gc_report)]))),
|
||||
file_refs: Arc::new(Mutex::new(Some(file_refs))),
|
||||
..Default::default()
|
||||
}
|
||||
.with_table_routes(HashMap::from([(
|
||||
table_id,
|
||||
(table_id, vec![(region_id, peer.clone())]),
|
||||
)])),
|
||||
);
|
||||
|
||||
// Configure very short full file listing interval for testing
|
||||
let config = GcSchedulerOptions {
|
||||
full_file_listing_interval: Duration::from_millis(100), // 100ms
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config,
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
// First GC - should use full listing
|
||||
let reports1 = scheduler
|
||||
.process_datanode_gc(peer.clone(), vec![(table_id, mock_candidate(region_id))])
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(reports1.deleted_files.len(), 1);
|
||||
|
||||
// Get the first full listing time
|
||||
let first_full_listing_time = {
|
||||
let tracker = scheduler.region_gc_tracker.lock().await;
|
||||
let gc_info = tracker
|
||||
.get(®ion_id)
|
||||
.expect("Region should be in tracker");
|
||||
gc_info
|
||||
.last_full_listing_time
|
||||
.expect("Should have full listing time")
|
||||
};
|
||||
|
||||
// Wait for interval to pass
|
||||
tokio::time::sleep(Duration::from_millis(150)).await;
|
||||
|
||||
// Second GC - should use full listing again since interval has passed
|
||||
let _reports2 = scheduler
|
||||
.process_datanode_gc(peer.clone(), vec![(table_id, mock_candidate(region_id))])
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Verify that full listing was used again
|
||||
let tracker = scheduler.region_gc_tracker.lock().await;
|
||||
let gc_info = tracker
|
||||
.get(®ion_id)
|
||||
.expect("Region should be in tracker");
|
||||
let second_full_listing_time = gc_info
|
||||
.last_full_listing_time
|
||||
.expect("Should have full listing time");
|
||||
|
||||
assert!(
|
||||
second_full_listing_time > first_full_listing_time,
|
||||
"Second GC should update full listing time"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_full_file_listing_no_interval_passed() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table_id = 1;
|
||||
let region_id = RegionId::new(table_id, 1);
|
||||
let peer = Peer::new(1, "");
|
||||
|
||||
let region_stat = mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
|
||||
let table_stats = HashMap::from([(table_id, vec![region_stat])]);
|
||||
|
||||
let gc_report = GcReport {
|
||||
deleted_files: HashMap::from([(region_id, vec![FileId::random(), FileId::random()])]),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let file_refs = FileRefsManifest {
|
||||
manifest_version: HashMap::from([(region_id, 1)]),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let ctx = Arc::new(
|
||||
MockSchedulerCtx {
|
||||
table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
|
||||
gc_reports: Arc::new(Mutex::new(HashMap::from([(region_id, gc_report)]))),
|
||||
file_refs: Arc::new(Mutex::new(Some(file_refs))),
|
||||
..Default::default()
|
||||
}
|
||||
.with_table_routes(HashMap::from([(
|
||||
table_id,
|
||||
(table_id, vec![(region_id, peer.clone())]),
|
||||
)])),
|
||||
);
|
||||
|
||||
// Configure long full file listing interval
|
||||
let config = GcSchedulerOptions {
|
||||
full_file_listing_interval: Duration::from_secs(3600), // 1 hour
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config,
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
// First GC - should use full listing
|
||||
let reports1 = scheduler
|
||||
.process_datanode_gc(peer.clone(), vec![(table_id, mock_candidate(region_id))])
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(reports1.deleted_files.len(), 1);
|
||||
|
||||
// Get the first full listing time
|
||||
let first_full_listing_time = {
|
||||
let tracker = scheduler.region_gc_tracker.lock().await;
|
||||
let gc_info = tracker
|
||||
.get(®ion_id)
|
||||
.expect("Region should be in tracker");
|
||||
gc_info
|
||||
.last_full_listing_time
|
||||
.expect("Should have full listing time")
|
||||
};
|
||||
|
||||
// Second GC immediately - should NOT use full listing since interval hasn't passed
|
||||
let reports2 = scheduler
|
||||
.process_datanode_gc(peer.clone(), vec![(table_id, mock_candidate(region_id))])
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(reports2.deleted_files.len(), 1);
|
||||
|
||||
// Verify that full listing time was NOT updated
|
||||
let tracker = scheduler.region_gc_tracker.lock().await;
|
||||
let gc_info = tracker
|
||||
.get(®ion_id)
|
||||
.expect("Region should be in tracker");
|
||||
let second_full_listing_time = gc_info
|
||||
.last_full_listing_time
|
||||
.expect("Should have full listing time");
|
||||
|
||||
assert_eq!(
|
||||
second_full_listing_time, first_full_listing_time,
|
||||
"Second GC should not update full listing time when interval hasn't passed"
|
||||
);
|
||||
}
|
||||
@@ -1,252 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use common_meta::datanode::RegionManifestInfo;
|
||||
use common_meta::peer::Peer;
|
||||
use common_telemetry::init_default_ut_logging;
|
||||
use store_api::region_engine::RegionRole;
|
||||
use store_api::storage::{FileId, FileRefsManifest, GcReport, RegionId};
|
||||
|
||||
use crate::gc::mock::{
|
||||
MockSchedulerCtx, TEST_REGION_SIZE_200MB, mock_region_stat, new_empty_report_with,
|
||||
};
|
||||
use crate::gc::{GcScheduler, GcSchedulerOptions};
|
||||
|
||||
// Integration Flow Tests
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_full_gc_workflow() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table_id = 1;
|
||||
let region_id = RegionId::new(table_id, 1);
|
||||
let peer = Peer::new(1, "");
|
||||
|
||||
let mut region_stat =
|
||||
mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
|
||||
|
||||
if let RegionManifestInfo::Mito {
|
||||
file_removed_cnt, ..
|
||||
} = &mut region_stat.region_manifest
|
||||
{
|
||||
*file_removed_cnt = 5;
|
||||
}
|
||||
|
||||
let table_stats = HashMap::from([(table_id, vec![region_stat])]);
|
||||
|
||||
let mut gc_reports = HashMap::new();
|
||||
gc_reports.insert(
|
||||
region_id,
|
||||
GcReport {
|
||||
deleted_files: HashMap::from([(region_id, vec![FileId::random(), FileId::random()])]),
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
|
||||
let file_refs = FileRefsManifest {
|
||||
manifest_version: HashMap::from([(region_id, 1)]),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let ctx = Arc::new(
|
||||
MockSchedulerCtx {
|
||||
table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
|
||||
gc_reports: Arc::new(Mutex::new(gc_reports)),
|
||||
file_refs: Arc::new(Mutex::new(Some(file_refs))),
|
||||
..Default::default()
|
||||
}
|
||||
.with_table_routes(HashMap::from([(
|
||||
table_id,
|
||||
(table_id, vec![(region_id, peer)]),
|
||||
)])),
|
||||
);
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config: GcSchedulerOptions::default(),
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
// Run the full workflow
|
||||
let report = scheduler.handle_tick().await.unwrap();
|
||||
|
||||
// Validate the returned GcJobReport - should have 1 datanode report
|
||||
assert_eq!(
|
||||
report.per_datanode_reports.len(),
|
||||
1,
|
||||
"Should process 1 datanode"
|
||||
);
|
||||
assert_eq!(
|
||||
report.failed_datanodes.len(),
|
||||
0,
|
||||
"Should have no failed datanodes"
|
||||
);
|
||||
|
||||
// Get the datanode report
|
||||
let datanode_report = report.per_datanode_reports.values().next().unwrap();
|
||||
|
||||
// Check that the region was processed successfully
|
||||
assert!(
|
||||
datanode_report.deleted_files.contains_key(®ion_id),
|
||||
"Should have deleted files for region"
|
||||
);
|
||||
assert_eq!(
|
||||
datanode_report.deleted_files[®ion_id].len(),
|
||||
2,
|
||||
"Should have 2 deleted files"
|
||||
);
|
||||
assert!(
|
||||
datanode_report.need_retry_regions.is_empty(),
|
||||
"Should have no retry regions"
|
||||
);
|
||||
|
||||
// Verify all steps were executed
|
||||
assert_eq!(
|
||||
*ctx.get_table_to_region_stats_calls.lock().unwrap(),
|
||||
1,
|
||||
"Expected 1 call to get_table_to_region_stats"
|
||||
);
|
||||
assert_eq!(
|
||||
*ctx.get_file_references_calls.lock().unwrap(),
|
||||
1,
|
||||
"Expected 1 call to get_file_references"
|
||||
);
|
||||
assert_eq!(
|
||||
*ctx.gc_regions_calls.lock().unwrap(),
|
||||
1,
|
||||
"Expected 1 call to gc_regions"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_tracker_cleanup() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table_id = 1;
|
||||
let region_id = RegionId::new(table_id, 1);
|
||||
let peer = Peer::new(1, "");
|
||||
|
||||
// Create region stat with proper file_removed_cnt to ensure it gets selected as candidate
|
||||
let mut region_stat =
|
||||
mock_region_stat(region_id, RegionRole::Leader, TEST_REGION_SIZE_200MB, 10); // 200MB
|
||||
if let RegionManifestInfo::Mito {
|
||||
file_removed_cnt, ..
|
||||
} = &mut region_stat.region_manifest
|
||||
{
|
||||
*file_removed_cnt = 5;
|
||||
}
|
||||
|
||||
let table_stats = HashMap::from([(table_id, vec![region_stat])]);
|
||||
|
||||
let mut gc_reports = HashMap::new();
|
||||
gc_reports.insert(region_id, new_empty_report_with([region_id]));
|
||||
|
||||
let file_refs = FileRefsManifest {
|
||||
manifest_version: HashMap::from([(region_id, 1)]),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let ctx = Arc::new(
|
||||
MockSchedulerCtx {
|
||||
table_to_region_stats: Arc::new(Mutex::new(Some(table_stats))),
|
||||
gc_reports: Arc::new(Mutex::new(gc_reports)),
|
||||
file_refs: Arc::new(Mutex::new(Some(file_refs))),
|
||||
..Default::default()
|
||||
}
|
||||
.with_table_routes(HashMap::from([(
|
||||
table_id,
|
||||
(table_id, vec![(region_id, peer)]),
|
||||
)])),
|
||||
);
|
||||
|
||||
let old_region_gc_tracker = {
|
||||
let mut tracker = HashMap::new();
|
||||
tracker.insert(
|
||||
region_id,
|
||||
crate::gc::tracker::RegionGcInfo {
|
||||
last_full_listing_time: Some(Instant::now() - Duration::from_secs(7200)), // 2 hours ago
|
||||
last_gc_time: Instant::now() - Duration::from_secs(7200), // 2 hours ago
|
||||
},
|
||||
);
|
||||
// also insert a different table that should also be cleaned up
|
||||
tracker.insert(
|
||||
RegionId::new(2, 1),
|
||||
crate::gc::tracker::RegionGcInfo {
|
||||
last_full_listing_time: Some(Instant::now() - Duration::from_secs(7200)), // 2 hours ago
|
||||
last_gc_time: Instant::now() - Duration::from_secs(7200), // 2 hours ago
|
||||
},
|
||||
);
|
||||
tracker
|
||||
};
|
||||
|
||||
// Use a custom config with shorter cleanup interval to trigger cleanup
|
||||
let config = GcSchedulerOptions {
|
||||
// 30 minutes
|
||||
tracker_cleanup_interval: Duration::from_secs(1800),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config,
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(old_region_gc_tracker)),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(
|
||||
Instant::now() - Duration::from_secs(3600), // Old cleanup time (1 hour ago)
|
||||
)),
|
||||
};
|
||||
|
||||
let report = scheduler.handle_tick().await.unwrap();
|
||||
|
||||
// Validate the returned GcJobReport - should have 1 datanode report
|
||||
assert_eq!(
|
||||
report.per_datanode_reports.len(),
|
||||
1,
|
||||
"Should process 1 datanode"
|
||||
);
|
||||
assert_eq!(
|
||||
report.failed_datanodes.len(),
|
||||
0,
|
||||
"Should have no failed datanodes"
|
||||
);
|
||||
|
||||
// Get the datanode report
|
||||
let datanode_report = report.per_datanode_reports.values().next().unwrap();
|
||||
|
||||
// Check that the region was processed successfully
|
||||
assert!(
|
||||
datanode_report.deleted_files.contains_key(®ion_id),
|
||||
"Should have deleted files for region"
|
||||
);
|
||||
assert!(
|
||||
datanode_report.need_retry_regions.is_empty(),
|
||||
"Should have no retry regions"
|
||||
);
|
||||
|
||||
// Verify tracker was updated
|
||||
let tracker = scheduler.region_gc_tracker.lock().await;
|
||||
assert!(
|
||||
tracker.contains_key(®ion_id),
|
||||
"Tracker should contain region {}",
|
||||
region_id
|
||||
);
|
||||
// only one entry
|
||||
assert_eq!(tracker.len(), 1, "Tracker should only have 1 entry");
|
||||
}
|
||||
@@ -1,155 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Instant;
|
||||
|
||||
use common_meta::peer::Peer;
|
||||
use common_telemetry::init_default_ut_logging;
|
||||
use store_api::storage::{FileRefsManifest, GcReport, RegionId};
|
||||
|
||||
use crate::gc::mock::{MockSchedulerCtx, new_candidate};
|
||||
use crate::gc::{GcScheduler, GcSchedulerOptions};
|
||||
|
||||
/// Edge Case Tests
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_empty_file_refs_manifest() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table_id = 1;
|
||||
let region_id = RegionId::new(table_id, 1);
|
||||
let peer = Peer::new(1, "");
|
||||
let candidates = HashMap::from([(table_id, vec![new_candidate(region_id, 1.0)])]);
|
||||
|
||||
// Empty file refs manifest
|
||||
let file_refs = FileRefsManifest::default();
|
||||
|
||||
let ctx = Arc::new(
|
||||
MockSchedulerCtx {
|
||||
file_refs: Arc::new(Mutex::new(Some(file_refs))),
|
||||
candidates: Arc::new(Mutex::new(Some(candidates))),
|
||||
..Default::default()
|
||||
}
|
||||
.with_table_routes(HashMap::from([(
|
||||
table_id,
|
||||
(table_id, vec![(region_id, peer)]),
|
||||
)])),
|
||||
);
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config: GcSchedulerOptions::default(),
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
let candidates = ctx.candidates.lock().unwrap().clone().unwrap_or_default();
|
||||
|
||||
// Convert table-based candidates to datanode-based candidates
|
||||
let peer = Peer::new(1, "");
|
||||
let datanode_to_candidates = HashMap::from([(
|
||||
peer,
|
||||
candidates
|
||||
.into_iter()
|
||||
.flat_map(|(table_id, candidates)| candidates.into_iter().map(move |c| (table_id, c)))
|
||||
.collect(),
|
||||
)]);
|
||||
|
||||
let report = scheduler
|
||||
.parallel_process_datanodes(datanode_to_candidates)
|
||||
.await;
|
||||
|
||||
assert_eq!(report.per_datanode_reports.len(), 1);
|
||||
assert_eq!(report.failed_datanodes.len(), 0);
|
||||
// Should handle empty file refs gracefully
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_multiple_regions_per_table() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let table_id = 1;
|
||||
let region1 = RegionId::new(table_id, 1);
|
||||
let region2 = RegionId::new(table_id, 2);
|
||||
let region3 = RegionId::new(table_id, 3);
|
||||
let peer = Peer::new(1, "");
|
||||
|
||||
let candidates = HashMap::from([(
|
||||
table_id,
|
||||
vec![
|
||||
new_candidate(region1, 1.0),
|
||||
new_candidate(region2, 2.0),
|
||||
new_candidate(region3, 3.0),
|
||||
],
|
||||
)]);
|
||||
|
||||
let mut gc_reports = HashMap::new();
|
||||
gc_reports.insert(region1, GcReport::default());
|
||||
gc_reports.insert(region2, GcReport::default());
|
||||
gc_reports.insert(region3, GcReport::default());
|
||||
|
||||
let file_refs = FileRefsManifest {
|
||||
manifest_version: HashMap::from([(region1, 1), (region2, 1), (region3, 1)]),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let ctx = Arc::new(
|
||||
MockSchedulerCtx {
|
||||
gc_reports: Arc::new(Mutex::new(gc_reports)),
|
||||
file_refs: Arc::new(Mutex::new(Some(file_refs))),
|
||||
candidates: Arc::new(Mutex::new(Some(candidates))),
|
||||
..Default::default()
|
||||
}
|
||||
.with_table_routes(HashMap::from([(
|
||||
table_id,
|
||||
(
|
||||
table_id,
|
||||
vec![
|
||||
(region1, peer.clone()),
|
||||
(region2, peer.clone()),
|
||||
(region3, peer.clone()),
|
||||
],
|
||||
),
|
||||
)])),
|
||||
);
|
||||
|
||||
let scheduler = GcScheduler {
|
||||
ctx: ctx.clone(),
|
||||
receiver: GcScheduler::channel().1,
|
||||
config: GcSchedulerOptions::default(),
|
||||
region_gc_tracker: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
let candidates = ctx.candidates.lock().unwrap().clone().unwrap_or_default();
|
||||
|
||||
// Convert table-based candidates to datanode-based candidates
|
||||
let datanode_to_candidates = HashMap::from([(
|
||||
peer.clone(),
|
||||
candidates
|
||||
.into_iter()
|
||||
.flat_map(|(table_id, candidates)| candidates.into_iter().map(move |c| (table_id, c)))
|
||||
.collect(),
|
||||
)]);
|
||||
|
||||
let report = scheduler
|
||||
.parallel_process_datanodes(datanode_to_candidates)
|
||||
.await;
|
||||
|
||||
assert_eq!(report.per_datanode_reports.len(), 1);
|
||||
assert_eq!(report.failed_datanodes.len(), 0);
|
||||
}
|
||||
@@ -13,12 +13,11 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::meta::MailboxMessage;
|
||||
use common_meta::instruction::{self, GcRegions, GetFileRefs, GetFileRefsReply, InstructionReply};
|
||||
use common_meta::instruction::{self, GcRegions, InstructionReply};
|
||||
use common_meta::lock_key::RegionLock;
|
||||
use common_meta::peer::Peer;
|
||||
use common_procedure::error::ToJsonSnafu;
|
||||
@@ -26,126 +25,16 @@ use common_procedure::{
|
||||
Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure,
|
||||
Result as ProcedureResult, Status,
|
||||
};
|
||||
use common_telemetry::{debug, error, info, warn};
|
||||
use common_telemetry::error;
|
||||
use itertools::Itertools as _;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ResultExt as _;
|
||||
use store_api::storage::{FileRefsManifest, GcReport, RegionId};
|
||||
use store_api::storage::GcReport;
|
||||
|
||||
use crate::error::{self, Result, SerializeToJsonSnafu};
|
||||
use crate::gc::Region2Peers;
|
||||
use crate::handler::HeartbeatMailbox;
|
||||
use crate::service::mailbox::{Channel, MailboxRef};
|
||||
|
||||
/// Helper function to send GetFileRefs instruction and wait for reply.
|
||||
async fn send_get_file_refs(
|
||||
mailbox: &MailboxRef,
|
||||
server_addr: &str,
|
||||
peer: &Peer,
|
||||
instruction: GetFileRefs,
|
||||
timeout: Duration,
|
||||
) -> Result<GetFileRefsReply> {
|
||||
let instruction = instruction::Instruction::GetFileRefs(instruction);
|
||||
let msg = MailboxMessage::json_message(
|
||||
&format!("Get file references: {}", instruction),
|
||||
&format!("Metasrv@{}", server_addr),
|
||||
&format!("Datanode-{}@{}", peer.id, peer.addr),
|
||||
common_time::util::current_time_millis(),
|
||||
&instruction,
|
||||
)
|
||||
.with_context(|_| SerializeToJsonSnafu {
|
||||
input: instruction.to_string(),
|
||||
})?;
|
||||
|
||||
let mailbox_rx = mailbox
|
||||
.send(&Channel::Datanode(peer.id), msg, timeout)
|
||||
.await?;
|
||||
|
||||
let reply = match mailbox_rx.await {
|
||||
Ok(reply_msg) => HeartbeatMailbox::json_reply(&reply_msg)?,
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Failed to receive reply from datanode {} for GetFileRefs: {}",
|
||||
peer, e
|
||||
);
|
||||
return Err(e);
|
||||
}
|
||||
};
|
||||
|
||||
let InstructionReply::GetFileRefs(reply) = reply else {
|
||||
return error::UnexpectedInstructionReplySnafu {
|
||||
mailbox_message: format!("{:?}", reply),
|
||||
reason: "Unexpected reply of the GetFileRefs instruction",
|
||||
}
|
||||
.fail();
|
||||
};
|
||||
|
||||
Ok(reply)
|
||||
}
|
||||
|
||||
/// Helper function to send GcRegions instruction and wait for reply.
|
||||
async fn send_gc_regions(
|
||||
mailbox: &MailboxRef,
|
||||
peer: &Peer,
|
||||
gc_regions: GcRegions,
|
||||
server_addr: &str,
|
||||
timeout: Duration,
|
||||
description: &str,
|
||||
) -> Result<GcReport> {
|
||||
let instruction = instruction::Instruction::GcRegions(gc_regions.clone());
|
||||
let msg = MailboxMessage::json_message(
|
||||
&format!("{}: {}", description, instruction),
|
||||
&format!("Metasrv@{}", server_addr),
|
||||
&format!("Datanode-{}@{}", peer.id, peer.addr),
|
||||
common_time::util::current_time_millis(),
|
||||
&instruction,
|
||||
)
|
||||
.with_context(|_| SerializeToJsonSnafu {
|
||||
input: instruction.to_string(),
|
||||
})?;
|
||||
|
||||
let mailbox_rx = mailbox
|
||||
.send(&Channel::Datanode(peer.id), msg, timeout)
|
||||
.await?;
|
||||
|
||||
let reply = match mailbox_rx.await {
|
||||
Ok(reply_msg) => HeartbeatMailbox::json_reply(&reply_msg)?,
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Failed to receive reply from datanode {} for {}: {}",
|
||||
peer, description, e
|
||||
);
|
||||
return Err(e);
|
||||
}
|
||||
};
|
||||
|
||||
let InstructionReply::GcRegions(reply) = reply else {
|
||||
return error::UnexpectedInstructionReplySnafu {
|
||||
mailbox_message: format!("{:?}", reply),
|
||||
reason: "Unexpected reply of the GcRegions instruction",
|
||||
}
|
||||
.fail();
|
||||
};
|
||||
|
||||
let res = reply.result;
|
||||
match res {
|
||||
Ok(report) => Ok(report),
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Datanode {} reported error during GC for regions {:?}: {}",
|
||||
peer, gc_regions, e
|
||||
);
|
||||
error::UnexpectedSnafu {
|
||||
violated: format!(
|
||||
"Datanode {} reported error during GC for regions {:?}: {}",
|
||||
peer, gc_regions, e
|
||||
),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// TODO(discord9): another procedure which do both get file refs and gc regions.
|
||||
pub struct GcRegionProcedure {
|
||||
mailbox: MailboxRef,
|
||||
@@ -185,15 +74,60 @@ impl GcRegionProcedure {
|
||||
}
|
||||
|
||||
async fn send_gc_instr(&self) -> Result<GcReport> {
|
||||
send_gc_regions(
|
||||
&self.mailbox,
|
||||
&self.data.peer,
|
||||
self.data.gc_regions.clone(),
|
||||
&self.data.server_addr,
|
||||
self.data.timeout,
|
||||
&self.data.description,
|
||||
let peer = &self.data.peer;
|
||||
let instruction = instruction::Instruction::GcRegions(self.data.gc_regions.clone());
|
||||
let msg = MailboxMessage::json_message(
|
||||
&format!("{}: {}", self.data.description, instruction),
|
||||
&format!("Metasrv@{}", self.data.server_addr),
|
||||
&format!("Datanode-{}@{}", peer.id, peer.addr),
|
||||
common_time::util::current_time_millis(),
|
||||
&instruction,
|
||||
)
|
||||
.await
|
||||
.with_context(|_| SerializeToJsonSnafu {
|
||||
input: instruction.to_string(),
|
||||
})?;
|
||||
|
||||
let mailbox_rx = self
|
||||
.mailbox
|
||||
.send(&Channel::Datanode(peer.id), msg, self.data.timeout)
|
||||
.await?;
|
||||
|
||||
let reply = match mailbox_rx.await {
|
||||
Ok(reply_msg) => HeartbeatMailbox::json_reply(&reply_msg)?,
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Failed to receive reply from datanode {} for {}: {}",
|
||||
peer, self.data.description, e
|
||||
);
|
||||
return Err(e);
|
||||
}
|
||||
};
|
||||
|
||||
let InstructionReply::GcRegions(reply) = reply else {
|
||||
return error::UnexpectedInstructionReplySnafu {
|
||||
mailbox_message: format!("{:?}", reply),
|
||||
reason: "Unexpected reply of the GcRegions instruction",
|
||||
}
|
||||
.fail();
|
||||
};
|
||||
|
||||
let res = reply.result;
|
||||
match res {
|
||||
Ok(report) => Ok(report),
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Datanode {} reported error during GC for regions {:?}: {}",
|
||||
peer, self.data.gc_regions, e
|
||||
);
|
||||
Err(error::UnexpectedSnafu {
|
||||
violated: format!(
|
||||
"Datanode {} reported error during GC for regions {:?}: {}",
|
||||
peer, self.data.gc_regions, e
|
||||
),
|
||||
}
|
||||
.fail()?)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn cast_result(res: Arc<dyn Any>) -> Result<GcReport> {
|
||||
@@ -230,10 +164,10 @@ impl Procedure for GcRegionProcedure {
|
||||
serde_json::to_string(&self.data).context(ToJsonSnafu)
|
||||
}
|
||||
|
||||
/// Read lock all regions involved in this GC procedure.
|
||||
/// Write lock all regions involved in this GC procedure.
|
||||
/// So i.e. region migration won't happen during GC and cause race conditions.
|
||||
///
|
||||
/// only read lock the regions not catatlog/schema because it can run concurrently with other procedures(i.e. drop database/table)
|
||||
/// only write lock the regions not catatlog/schema because it can run concurrently with other procedures(i.e. drop database/table)
|
||||
/// TODO:(discord9): integration test to verify this
|
||||
fn lock_key(&self) -> LockKey {
|
||||
let lock_key: Vec<_> = self
|
||||
@@ -248,297 +182,3 @@ impl Procedure for GcRegionProcedure {
|
||||
LockKey::new(lock_key)
|
||||
}
|
||||
}
|
||||
|
||||
/// Procedure to perform get file refs then batch GC for multiple regions, should only be used by admin function
|
||||
/// for triggering manual gc, as it holds locks for too long and for all regions during the procedure.
|
||||
pub struct BatchGcProcedure {
|
||||
mailbox: MailboxRef,
|
||||
data: BatchGcData,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct BatchGcData {
|
||||
state: State,
|
||||
server_addr: String,
|
||||
/// The regions to be GC-ed
|
||||
regions: Vec<RegionId>,
|
||||
full_file_listing: bool,
|
||||
region_routes: Region2Peers,
|
||||
/// Related regions (e.g., for shared files). Map: RegionId -> List of related RegionIds.
|
||||
related_regions: HashMap<RegionId, Vec<RegionId>>,
|
||||
/// Acquired file references (Populated in Acquiring state)
|
||||
file_refs: FileRefsManifest,
|
||||
/// mailbox timeout duration
|
||||
timeout: Duration,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub enum State {
|
||||
/// Initial state
|
||||
Start,
|
||||
/// Fetching file references from datanodes
|
||||
Acquiring,
|
||||
/// Sending GC instruction to the target datanode
|
||||
Gcing,
|
||||
}
|
||||
|
||||
impl BatchGcProcedure {
|
||||
pub const TYPE_NAME: &'static str = "metasrv-procedure::BatchGcProcedure";
|
||||
|
||||
pub fn new(
|
||||
mailbox: MailboxRef,
|
||||
server_addr: String,
|
||||
regions: Vec<RegionId>,
|
||||
full_file_listing: bool,
|
||||
region_routes: Region2Peers,
|
||||
related_regions: HashMap<RegionId, Vec<RegionId>>,
|
||||
timeout: Duration,
|
||||
) -> Self {
|
||||
Self {
|
||||
mailbox,
|
||||
data: BatchGcData {
|
||||
state: State::Start,
|
||||
server_addr,
|
||||
regions,
|
||||
full_file_listing,
|
||||
region_routes,
|
||||
related_regions,
|
||||
file_refs: FileRefsManifest::default(),
|
||||
timeout,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Get file references from all datanodes that host the regions
|
||||
async fn get_file_references(&self) -> Result<FileRefsManifest> {
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
let query_regions = &self.data.regions;
|
||||
let related_regions = &self.data.related_regions;
|
||||
let region_routes = &self.data.region_routes;
|
||||
let timeout = self.data.timeout;
|
||||
|
||||
// Group regions by datanode to minimize RPC calls
|
||||
let mut datanode2query_regions: HashMap<Peer, Vec<RegionId>> = HashMap::new();
|
||||
|
||||
for region_id in query_regions {
|
||||
if let Some((leader, followers)) = region_routes.get(region_id) {
|
||||
datanode2query_regions
|
||||
.entry(leader.clone())
|
||||
.or_default()
|
||||
.push(*region_id);
|
||||
// also need to send for follower regions for file refs in case query is running on follower
|
||||
for follower in followers {
|
||||
datanode2query_regions
|
||||
.entry(follower.clone())
|
||||
.or_default()
|
||||
.push(*region_id);
|
||||
}
|
||||
} else {
|
||||
return error::UnexpectedSnafu {
|
||||
violated: format!(
|
||||
"region_routes: {region_routes:?} does not contain region_id: {region_id}",
|
||||
),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
|
||||
let mut datanode2related_regions: HashMap<Peer, HashMap<RegionId, Vec<RegionId>>> =
|
||||
HashMap::new();
|
||||
for (related_region, queries) in related_regions {
|
||||
if let Some((leader, _followers)) = region_routes.get(related_region) {
|
||||
datanode2related_regions
|
||||
.entry(leader.clone())
|
||||
.or_default()
|
||||
.insert(*related_region, queries.clone());
|
||||
} // since read from manifest, no need to send to followers
|
||||
}
|
||||
|
||||
// Send GetFileRefs instructions to each datanode
|
||||
let mut all_file_refs: HashMap<RegionId, HashSet<store_api::storage::FileId>> =
|
||||
HashMap::new();
|
||||
let mut all_manifest_versions = HashMap::new();
|
||||
|
||||
for (peer, regions) in datanode2query_regions {
|
||||
let related_regions_for_peer =
|
||||
datanode2related_regions.remove(&peer).unwrap_or_default();
|
||||
|
||||
let instruction = GetFileRefs {
|
||||
query_regions: regions.clone(),
|
||||
related_regions: related_regions_for_peer,
|
||||
};
|
||||
|
||||
let reply = send_get_file_refs(
|
||||
&self.mailbox,
|
||||
&self.data.server_addr,
|
||||
&peer,
|
||||
instruction,
|
||||
timeout,
|
||||
)
|
||||
.await?;
|
||||
|
||||
if !reply.success {
|
||||
return error::UnexpectedSnafu {
|
||||
violated: format!(
|
||||
"Failed to get file references from datanode {}: {:?}",
|
||||
peer, reply.error
|
||||
),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
// Merge the file references from this datanode
|
||||
for (region_id, file_refs) in reply.file_refs_manifest.file_refs {
|
||||
all_file_refs
|
||||
.entry(region_id)
|
||||
.or_default()
|
||||
.extend(file_refs);
|
||||
}
|
||||
|
||||
// region manifest version should be the smallest one among all peers, so outdated region can be detected
|
||||
for (region_id, version) in reply.file_refs_manifest.manifest_version {
|
||||
let entry = all_manifest_versions.entry(region_id).or_insert(version);
|
||||
*entry = (*entry).min(version);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(FileRefsManifest {
|
||||
file_refs: all_file_refs,
|
||||
manifest_version: all_manifest_versions,
|
||||
})
|
||||
}
|
||||
|
||||
/// Send GC instruction to all datanodes that host the regions,
|
||||
/// returns regions that need retry.
|
||||
async fn send_gc_instructions(&self) -> Result<Vec<RegionId>> {
|
||||
let regions = &self.data.regions;
|
||||
let region_routes = &self.data.region_routes;
|
||||
let file_refs = &self.data.file_refs;
|
||||
let timeout = self.data.timeout;
|
||||
|
||||
// Group regions by datanode
|
||||
let mut datanode2regions: HashMap<Peer, Vec<RegionId>> = HashMap::new();
|
||||
|
||||
for region_id in regions {
|
||||
if let Some((leader, _followers)) = region_routes.get(region_id) {
|
||||
datanode2regions
|
||||
.entry(leader.clone())
|
||||
.or_default()
|
||||
.push(*region_id);
|
||||
} else {
|
||||
return error::UnexpectedSnafu {
|
||||
violated: format!(
|
||||
"region_routes: {region_routes:?} does not contain region_id: {region_id}",
|
||||
),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
|
||||
let mut all_need_retry = HashSet::new();
|
||||
// Send GC instructions to each datanode
|
||||
for (peer, regions_for_peer) in datanode2regions {
|
||||
let gc_regions = GcRegions {
|
||||
regions: regions_for_peer.clone(),
|
||||
// file_refs_manifest can be large; cloning for each datanode is acceptable here since this is an admin-only operation.
|
||||
file_refs_manifest: file_refs.clone(),
|
||||
full_file_listing: self.data.full_file_listing,
|
||||
};
|
||||
|
||||
let report = send_gc_regions(
|
||||
&self.mailbox,
|
||||
&peer,
|
||||
gc_regions,
|
||||
self.data.server_addr.as_str(),
|
||||
timeout,
|
||||
"Batch GC",
|
||||
)
|
||||
.await?;
|
||||
|
||||
let success = report.deleted_files.keys().collect_vec();
|
||||
let need_retry = report.need_retry_regions.iter().cloned().collect_vec();
|
||||
|
||||
if need_retry.is_empty() {
|
||||
info!(
|
||||
"GC report from datanode {}: successfully deleted files for regions {:?}",
|
||||
peer, success
|
||||
);
|
||||
} else {
|
||||
warn!(
|
||||
"GC report from datanode {}: successfully deleted files for regions {:?}, need retry for regions {:?}",
|
||||
peer, success, need_retry
|
||||
);
|
||||
}
|
||||
all_need_retry.extend(report.need_retry_regions);
|
||||
}
|
||||
|
||||
Ok(all_need_retry.into_iter().collect())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Procedure for BatchGcProcedure {
|
||||
fn type_name(&self) -> &str {
|
||||
Self::TYPE_NAME
|
||||
}
|
||||
|
||||
async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
|
||||
match self.data.state {
|
||||
State::Start => {
|
||||
// Transition to Acquiring state
|
||||
self.data.state = State::Acquiring;
|
||||
Ok(Status::executing(false))
|
||||
}
|
||||
State::Acquiring => {
|
||||
// Get file references from all datanodes
|
||||
match self.get_file_references().await {
|
||||
Ok(file_refs) => {
|
||||
self.data.file_refs = file_refs;
|
||||
self.data.state = State::Gcing;
|
||||
Ok(Status::executing(false))
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to get file references: {}", e);
|
||||
Err(ProcedureError::external(e))
|
||||
}
|
||||
}
|
||||
}
|
||||
State::Gcing => {
|
||||
// Send GC instructions to all datanodes
|
||||
// TODO(discord9): handle need-retry regions
|
||||
match self.send_gc_instructions().await {
|
||||
Ok(_) => {
|
||||
info!(
|
||||
"Batch GC completed successfully for regions {:?}",
|
||||
self.data.regions
|
||||
);
|
||||
Ok(Status::done())
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to send GC instructions: {}", e);
|
||||
Err(ProcedureError::external(e))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn dump(&self) -> ProcedureResult<String> {
|
||||
serde_json::to_string(&self.data).context(ToJsonSnafu)
|
||||
}
|
||||
|
||||
/// Read lock all regions involved in this GC procedure.
|
||||
/// So i.e. region migration won't happen during GC and cause race conditions.
|
||||
fn lock_key(&self) -> LockKey {
|
||||
let lock_key: Vec<_> = self
|
||||
.data
|
||||
.regions
|
||||
.iter()
|
||||
.sorted() // sort to have a deterministic lock order
|
||||
.map(|id| RegionLock::Read(*id).into())
|
||||
.collect();
|
||||
|
||||
LockKey::new(lock_key)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,7 +50,7 @@ impl GcScheduler {
|
||||
let now = Instant::now();
|
||||
|
||||
// Check if enough time has passed since last cleanup
|
||||
if now.saturating_duration_since(last_cleanup) < self.config.tracker_cleanup_interval {
|
||||
if now.duration_since(last_cleanup) < self.config.tracker_cleanup_interval {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
|
||||
@@ -129,20 +129,27 @@ impl HeartbeatHandler for RegionLeaseHandler {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_meta::datanode::{RegionManifestInfo, RegionStat, Stat};
|
||||
use common_meta::distributed_time_constants;
|
||||
use common_meta::error::Result as MetaResult;
|
||||
use common_meta::key::TableMetadataManager;
|
||||
use common_meta::key::table_route::TableRouteValue;
|
||||
use common_meta::key::test_utils::new_test_table_info;
|
||||
use common_meta::kv_backend::memory::MemoryKvBackend;
|
||||
use common_meta::kv_backend::test_util::MockKvBackendBuilder;
|
||||
use common_meta::kv_backend::txn::{Txn, TxnResponse};
|
||||
use common_meta::kv_backend::{KvBackend, TxnService};
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::region_keeper::MemoryRegionKeeper;
|
||||
use common_meta::rpc::router::{LeaderState, Region, RegionRoute};
|
||||
use common_meta::rpc::store::{
|
||||
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse,
|
||||
BatchPutRequest, BatchPutResponse, DeleteRangeRequest, DeleteRangeResponse, PutRequest,
|
||||
PutResponse, RangeRequest, RangeResponse,
|
||||
};
|
||||
use store_api::region_engine::RegionRole;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
@@ -418,19 +425,63 @@ mod test {
|
||||
assert_eq!(granted, expected);
|
||||
}
|
||||
|
||||
struct MockKvBackend;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl TxnService for MockKvBackend {
|
||||
type Error = common_meta::error::Error;
|
||||
|
||||
async fn txn(&self, _txn: Txn) -> MetaResult<TxnResponse> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn max_txn_ops(&self) -> usize {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl KvBackend for MockKvBackend {
|
||||
fn name(&self) -> &str {
|
||||
"mock_kv_backend"
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
async fn range(&self, _req: RangeRequest) -> MetaResult<RangeResponse> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn put(&self, _req: PutRequest) -> MetaResult<PutResponse> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn batch_put(&self, _req: BatchPutRequest) -> MetaResult<BatchPutResponse> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn batch_get(&self, _req: BatchGetRequest) -> MetaResult<BatchGetResponse> {
|
||||
common_meta::error::UnexpectedSnafu {
|
||||
err_msg: "mock err",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
async fn delete_range(&self, _req: DeleteRangeRequest) -> MetaResult<DeleteRangeResponse> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn batch_delete(&self, _req: BatchDeleteRequest) -> MetaResult<BatchDeleteResponse> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_handle_renew_region_lease_failure() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let kv = MockKvBackendBuilder::default()
|
||||
.batch_get_fn(Arc::new(|_| {
|
||||
common_meta::error::UnexpectedSnafu {
|
||||
err_msg: "mock err",
|
||||
}
|
||||
.fail()
|
||||
}) as _)
|
||||
.build()
|
||||
.unwrap();
|
||||
let kvbackend = Arc::new(kv);
|
||||
let kvbackend = Arc::new(MockKvBackend);
|
||||
let table_metadata_manager = Arc::new(TableMetadataManager::new(kvbackend));
|
||||
|
||||
let datanode_id = 1;
|
||||
|
||||
@@ -28,7 +28,7 @@ use common_meta::ddl::table_meta::{TableMetadataAllocator, TableMetadataAllocato
|
||||
use common_meta::ddl::{
|
||||
DdlContext, NoopRegionFailureDetectorControl, RegionFailureDetectorControllerRef,
|
||||
};
|
||||
use common_meta::ddl_manager::{DdlManager, DdlManagerConfiguratorRef};
|
||||
use common_meta::ddl_manager::DdlManager;
|
||||
use common_meta::distributed_time_constants::{self};
|
||||
use common_meta::key::TableMetadataManager;
|
||||
use common_meta::key::flow::FlowMetadataManager;
|
||||
@@ -54,7 +54,7 @@ use store_api::storage::MAX_REGION_SEQ;
|
||||
use crate::bootstrap::build_default_meta_peer_client;
|
||||
use crate::cache_invalidator::MetasrvCacheInvalidator;
|
||||
use crate::cluster::MetaPeerClientRef;
|
||||
use crate::error::{self, BuildWalOptionsAllocatorSnafu, OtherSnafu, Result};
|
||||
use crate::error::{self, BuildWalOptionsAllocatorSnafu, Result};
|
||||
use crate::events::EventHandlerImpl;
|
||||
use crate::gc::GcScheduler;
|
||||
use crate::greptimedb_telemetry::get_greptimedb_telemetry_task;
|
||||
@@ -402,23 +402,13 @@ impl MetasrvBuilder {
|
||||
let procedure_manager_c = procedure_manager.clone();
|
||||
let ddl_manager = DdlManager::try_new(ddl_context, procedure_manager_c, true)
|
||||
.context(error::InitDdlManagerSnafu)?;
|
||||
|
||||
let ddl_manager = if let Some(configurator) = plugins
|
||||
.as_ref()
|
||||
.and_then(|p| p.get::<DdlManagerConfiguratorRef<DdlManagerConfigureContext>>())
|
||||
{
|
||||
let ctx = DdlManagerConfigureContext {
|
||||
kv_backend: kv_backend.clone(),
|
||||
meta_peer_client: meta_peer_client.clone(),
|
||||
};
|
||||
configurator
|
||||
.configure(ddl_manager, ctx)
|
||||
.await
|
||||
.context(OtherSnafu)?
|
||||
} else {
|
||||
ddl_manager
|
||||
#[cfg(feature = "enterprise")]
|
||||
let ddl_manager = {
|
||||
let trigger_ddl_manager = plugins.as_ref().and_then(|plugins| {
|
||||
plugins.get::<common_meta::ddl_manager::TriggerDdlManagerRef>()
|
||||
});
|
||||
ddl_manager.with_trigger_ddl_manager(trigger_ddl_manager)
|
||||
};
|
||||
|
||||
let ddl_manager = Arc::new(ddl_manager);
|
||||
|
||||
let region_flush_ticker = if is_remote_wal {
|
||||
@@ -638,9 +628,3 @@ impl Default for MetasrvBuilder {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// The context for [`DdlManagerConfiguratorRef`].
|
||||
pub struct DdlManagerConfigureContext {
|
||||
pub kv_backend: KvBackendRef,
|
||||
pub meta_peer_client: MetaPeerClientRef,
|
||||
}
|
||||
|
||||
@@ -19,7 +19,6 @@ use common_procedure::ProcedureManagerRef;
|
||||
use snafu::ResultExt;
|
||||
|
||||
pub mod region_migration;
|
||||
pub mod repartition;
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
pub mod test_util;
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod group;
|
||||
pub mod plan;
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test_util;
|
||||
@@ -1,284 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub(crate) mod repartition_start;
|
||||
pub(crate) mod update_metadata;
|
||||
|
||||
use std::any::Any;
|
||||
use std::fmt::Debug;
|
||||
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::DatanodeId;
|
||||
use common_meta::cache_invalidator::CacheInvalidatorRef;
|
||||
use common_meta::instruction::CacheIdent;
|
||||
use common_meta::key::datanode_table::{DatanodeTableKey, DatanodeTableValue, RegionInfo};
|
||||
use common_meta::key::table_route::TableRouteValue;
|
||||
use common_meta::key::{DeserializedValueWithBytes, TableMetadataManagerRef};
|
||||
use common_meta::rpc::router::RegionRoute;
|
||||
use common_procedure::{Context as ProcedureContext, Status};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{RegionId, TableId};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::procedure::repartition::plan::RegionDescriptor;
|
||||
|
||||
pub type GroupId = Uuid;
|
||||
|
||||
pub struct RepartitionGroupProcedure {}
|
||||
|
||||
pub struct Context {
|
||||
pub persistent_ctx: PersistentContext,
|
||||
|
||||
pub cache_invalidator: CacheInvalidatorRef,
|
||||
|
||||
pub table_metadata_manager: TableMetadataManagerRef,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct GroupPrepareResult {
|
||||
pub source_routes: Vec<RegionRoute>,
|
||||
pub target_routes: Vec<RegionRoute>,
|
||||
pub central_region: RegionId,
|
||||
pub central_region_datanode_id: DatanodeId,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct PersistentContext {
|
||||
pub group_id: GroupId,
|
||||
/// The table id of the repartition group.
|
||||
pub table_id: TableId,
|
||||
/// The source regions of the repartition group.
|
||||
pub sources: Vec<RegionDescriptor>,
|
||||
/// The target regions of the repartition group.
|
||||
pub targets: Vec<RegionDescriptor>,
|
||||
/// The result of group prepare.
|
||||
/// The value will be set in [RepartitionStart](crate::procedure::repartition::group::repartition_start::RepartitionStart) state.
|
||||
pub group_prepare_result: Option<GroupPrepareResult>,
|
||||
}
|
||||
|
||||
impl Context {
|
||||
/// Retrieves the table route value for the given table id.
|
||||
///
|
||||
/// Retry:
|
||||
/// - Failed to retrieve the metadata of table.
|
||||
///
|
||||
/// Abort:
|
||||
/// - Table route not found.
|
||||
pub async fn get_table_route_value(
|
||||
&self,
|
||||
) -> Result<DeserializedValueWithBytes<TableRouteValue>> {
|
||||
let table_id = self.persistent_ctx.table_id;
|
||||
let group_id = self.persistent_ctx.group_id;
|
||||
let table_route_value = self
|
||||
.table_metadata_manager
|
||||
.table_route_manager()
|
||||
.table_route_storage()
|
||||
.get_with_raw_bytes(table_id)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.with_context(|_| error::RetryLaterWithSourceSnafu {
|
||||
reason: format!(
|
||||
"Failed to get table route for table: {}, repartition group: {}",
|
||||
table_id, group_id
|
||||
),
|
||||
})?
|
||||
.context(error::TableRouteNotFoundSnafu { table_id })?;
|
||||
|
||||
Ok(table_route_value)
|
||||
}
|
||||
|
||||
/// Returns the `datanode_table_value`
|
||||
///
|
||||
/// Retry:
|
||||
/// - Failed to retrieve the metadata of datanode table.
|
||||
pub async fn get_datanode_table_value(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
datanode_id: u64,
|
||||
) -> Result<DatanodeTableValue> {
|
||||
let datanode_table_value = self
|
||||
.table_metadata_manager
|
||||
.datanode_table_manager()
|
||||
.get(&DatanodeTableKey {
|
||||
datanode_id,
|
||||
table_id,
|
||||
})
|
||||
.await
|
||||
.context(error::TableMetadataManagerSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.with_context(|_| error::RetryLaterWithSourceSnafu {
|
||||
reason: format!("Failed to get DatanodeTable: {table_id}"),
|
||||
})?
|
||||
.context(error::DatanodeTableNotFoundSnafu {
|
||||
table_id,
|
||||
datanode_id,
|
||||
})?;
|
||||
Ok(datanode_table_value)
|
||||
}
|
||||
|
||||
/// Broadcasts the invalidate table cache message.
|
||||
pub async fn invalidate_table_cache(&self) -> Result<()> {
|
||||
let table_id = self.persistent_ctx.table_id;
|
||||
let group_id = self.persistent_ctx.group_id;
|
||||
let subject = format!(
|
||||
"Invalidate table cache for repartition table, group: {}, table: {}",
|
||||
group_id, table_id,
|
||||
);
|
||||
let ctx = common_meta::cache_invalidator::Context {
|
||||
subject: Some(subject),
|
||||
};
|
||||
let _ = self
|
||||
.cache_invalidator
|
||||
.invalidate(&ctx, &[CacheIdent::TableId(table_id)])
|
||||
.await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Updates the table route.
|
||||
///
|
||||
/// Retry:
|
||||
/// - Failed to retrieve the metadata of datanode table.
|
||||
///
|
||||
/// Abort:
|
||||
/// - Table route not found.
|
||||
/// - Failed to update the table route.
|
||||
pub async fn update_table_route(
|
||||
&self,
|
||||
current_table_route_value: &DeserializedValueWithBytes<TableRouteValue>,
|
||||
new_region_routes: Vec<RegionRoute>,
|
||||
) -> Result<()> {
|
||||
let table_id = self.persistent_ctx.table_id;
|
||||
// Safety: prepare result is set in [RepartitionStart] state.
|
||||
let prepare_result = self.persistent_ctx.group_prepare_result.as_ref().unwrap();
|
||||
let central_region_datanode_table_value = self
|
||||
.get_datanode_table_value(table_id, prepare_result.central_region_datanode_id)
|
||||
.await?;
|
||||
let RegionInfo {
|
||||
region_options,
|
||||
region_wal_options,
|
||||
..
|
||||
} = ¢ral_region_datanode_table_value.region_info;
|
||||
|
||||
self.table_metadata_manager
|
||||
.update_table_route(
|
||||
table_id,
|
||||
central_region_datanode_table_value.region_info.clone(),
|
||||
current_table_route_value,
|
||||
new_region_routes,
|
||||
region_options,
|
||||
region_wal_options,
|
||||
)
|
||||
.await
|
||||
.context(error::TableMetadataManagerSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the region routes of the given table route value.
|
||||
///
|
||||
/// Abort:
|
||||
/// - Table route value is not physical.
|
||||
pub fn region_routes(
|
||||
table_id: TableId,
|
||||
table_route_value: &TableRouteValue,
|
||||
) -> Result<&Vec<RegionRoute>> {
|
||||
table_route_value
|
||||
.region_routes()
|
||||
.with_context(|_| error::UnexpectedLogicalRouteTableSnafu {
|
||||
err_msg: format!(
|
||||
"TableRoute({:?}) is a non-physical TableRouteValue.",
|
||||
table_id
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
#[typetag::serde(tag = "repartition_group_state")]
|
||||
pub(crate) trait State: Sync + Send + Debug {
|
||||
fn name(&self) -> &'static str {
|
||||
let type_name = std::any::type_name::<Self>();
|
||||
// short name
|
||||
type_name.split("::").last().unwrap_or(type_name)
|
||||
}
|
||||
|
||||
/// Yields the next [State] and [Status].
|
||||
async fn next(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
procedure_ctx: &ProcedureContext,
|
||||
) -> Result<(Box<dyn State>, Status)>;
|
||||
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_meta::key::TableMetadataManager;
|
||||
use common_meta::kv_backend::test_util::MockKvBackendBuilder;
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::procedure::repartition::test_util::{TestingEnv, new_persistent_context};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_get_table_route_value_not_found_error() {
|
||||
let env = TestingEnv::new();
|
||||
let persistent_context = new_persistent_context(1024, vec![], vec![]);
|
||||
let ctx = env.create_context(persistent_context);
|
||||
let err = ctx.get_table_route_value().await.unwrap_err();
|
||||
assert_matches!(err, Error::TableRouteNotFound { .. });
|
||||
assert!(!err.is_retryable());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_get_table_route_value_retry_error() {
|
||||
let kv = MockKvBackendBuilder::default()
|
||||
.range_fn(Arc::new(|_| {
|
||||
common_meta::error::UnexpectedSnafu {
|
||||
err_msg: "mock err",
|
||||
}
|
||||
.fail()
|
||||
}))
|
||||
.build()
|
||||
.unwrap();
|
||||
let mut env = TestingEnv::new();
|
||||
env.table_metadata_manager = Arc::new(TableMetadataManager::new(Arc::new(kv)));
|
||||
let persistent_context = new_persistent_context(1024, vec![], vec![]);
|
||||
let ctx = env.create_context(persistent_context);
|
||||
let err = ctx.get_table_route_value().await.unwrap_err();
|
||||
assert!(err.is_retryable());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_get_datanode_table_value_retry_error() {
|
||||
let kv = MockKvBackendBuilder::default()
|
||||
.range_fn(Arc::new(|_| {
|
||||
common_meta::error::UnexpectedSnafu {
|
||||
err_msg: "mock err",
|
||||
}
|
||||
.fail()
|
||||
}))
|
||||
.build()
|
||||
.unwrap();
|
||||
let mut env = TestingEnv::new();
|
||||
env.table_metadata_manager = Arc::new(TableMetadataManager::new(Arc::new(kv)));
|
||||
let persistent_context = new_persistent_context(1024, vec![], vec![]);
|
||||
let ctx = env.create_context(persistent_context);
|
||||
let err = ctx.get_datanode_table_value(1024, 1).await.unwrap_err();
|
||||
assert!(err.is_retryable());
|
||||
}
|
||||
}
|
||||
@@ -1,273 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use common_meta::rpc::router::RegionRoute;
|
||||
use common_procedure::{Context as ProcedureContext, Status};
|
||||
use common_telemetry::debug;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{OptionExt, ResultExt, ensure};
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::procedure::repartition::group::{
|
||||
Context, GroupId, GroupPrepareResult, State, region_routes,
|
||||
};
|
||||
use crate::procedure::repartition::plan::RegionDescriptor;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct RepartitionStart;
|
||||
|
||||
/// Ensures that the partition expression of the region route matches the partition expression of the region descriptor.
|
||||
fn ensure_region_route_expr_match(
|
||||
region_route: &RegionRoute,
|
||||
region_descriptor: &RegionDescriptor,
|
||||
) -> Result<RegionRoute> {
|
||||
let actual = ®ion_route.region.partition_expr;
|
||||
let expected = region_descriptor
|
||||
.partition_expr
|
||||
.as_json_str()
|
||||
.context(error::SerializePartitionExprSnafu)?;
|
||||
ensure!(
|
||||
actual == &expected,
|
||||
error::PartitionExprMismatchSnafu {
|
||||
region_id: region_route.region.id,
|
||||
expected,
|
||||
actual,
|
||||
}
|
||||
);
|
||||
Ok(region_route.clone())
|
||||
}
|
||||
|
||||
impl RepartitionStart {
|
||||
/// Ensures that both source and target regions are present in the region routes.
|
||||
///
|
||||
/// Both source and target regions must be present in the region routes (target regions should be allocated before repartitioning).
|
||||
#[allow(dead_code)]
|
||||
fn ensure_route_present(
|
||||
group_id: GroupId,
|
||||
region_routes: &[RegionRoute],
|
||||
sources: &[RegionDescriptor],
|
||||
targets: &[RegionDescriptor],
|
||||
) -> Result<GroupPrepareResult> {
|
||||
ensure!(
|
||||
!sources.is_empty(),
|
||||
error::UnexpectedSnafu {
|
||||
violated: "Sources are empty"
|
||||
}
|
||||
);
|
||||
|
||||
let region_routes_map = region_routes
|
||||
.iter()
|
||||
.map(|r| (r.region.id, r))
|
||||
.collect::<HashMap<_, _>>();
|
||||
let source_region_routes = sources
|
||||
.iter()
|
||||
.map(|s| {
|
||||
region_routes_map
|
||||
.get(&s.region_id)
|
||||
.context(error::RepartitionSourceRegionMissingSnafu {
|
||||
group_id,
|
||||
region_id: s.region_id,
|
||||
})
|
||||
.and_then(|r| ensure_region_route_expr_match(r, s))
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
let target_region_routes = targets
|
||||
.iter()
|
||||
.map(|t| {
|
||||
region_routes_map
|
||||
.get(&t.region_id)
|
||||
.context(error::RepartitionTargetRegionMissingSnafu {
|
||||
group_id,
|
||||
region_id: t.region_id,
|
||||
})
|
||||
.map(|r| (*r).clone())
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
let central_region = sources[0].region_id;
|
||||
let central_region_datanode_id = source_region_routes[0]
|
||||
.leader_peer
|
||||
.as_ref()
|
||||
.context(error::UnexpectedSnafu {
|
||||
violated: format!(
|
||||
"Leader peer is not set for central region: {}",
|
||||
central_region
|
||||
),
|
||||
})?
|
||||
.id;
|
||||
|
||||
Ok(GroupPrepareResult {
|
||||
source_routes: source_region_routes,
|
||||
target_routes: target_region_routes,
|
||||
central_region,
|
||||
central_region_datanode_id,
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn next_state() -> (Box<dyn State>, Status) {
|
||||
// TODO(weny): change it later.
|
||||
(Box::new(RepartitionStart), Status::executing(true))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
#[typetag::serde]
|
||||
impl State for RepartitionStart {
|
||||
/// Captures the group prepare result.
|
||||
///
|
||||
/// Retry:
|
||||
/// - Failed to get the table route.
|
||||
///
|
||||
/// Abort
|
||||
/// - Table route not found.
|
||||
/// - Table route is not physical.
|
||||
/// - Failed to ensure the route is present.
|
||||
/// - Failed to capture the group prepare result.
|
||||
async fn next(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
_procedure_ctx: &ProcedureContext,
|
||||
) -> Result<(Box<dyn State>, Status)> {
|
||||
if ctx.persistent_ctx.group_prepare_result.is_some() {
|
||||
return Ok(Self::next_state());
|
||||
}
|
||||
let table_id = ctx.persistent_ctx.table_id;
|
||||
let group_id = ctx.persistent_ctx.group_id;
|
||||
let table_route_value = ctx.get_table_route_value().await?.into_inner();
|
||||
let region_routes = region_routes(table_id, &table_route_value)?;
|
||||
let group_prepare_result = Self::ensure_route_present(
|
||||
group_id,
|
||||
region_routes,
|
||||
&ctx.persistent_ctx.sources,
|
||||
&ctx.persistent_ctx.targets,
|
||||
)?;
|
||||
ctx.persistent_ctx.group_prepare_result = Some(group_prepare_result);
|
||||
debug!(
|
||||
"Repartition group {}: captured {} sources, {} targets",
|
||||
group_id,
|
||||
ctx.persistent_ctx.sources.len(),
|
||||
ctx.persistent_ctx.targets.len()
|
||||
);
|
||||
|
||||
Ok(Self::next_state())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::router::{Region, RegionRoute};
|
||||
use store_api::storage::RegionId;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::procedure::repartition::group::repartition_start::RepartitionStart;
|
||||
use crate::procedure::repartition::plan::RegionDescriptor;
|
||||
use crate::procedure::repartition::test_util::range_expr;
|
||||
|
||||
#[test]
|
||||
fn test_ensure_route_present_missing_source_region() {
|
||||
let source_region = RegionDescriptor {
|
||||
region_id: RegionId::new(1024, 1),
|
||||
partition_expr: range_expr("x", 0, 100),
|
||||
};
|
||||
let target_region = RegionDescriptor {
|
||||
region_id: RegionId::new(1024, 2),
|
||||
partition_expr: range_expr("x", 0, 10),
|
||||
};
|
||||
let region_routes = vec![RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(1024, 2),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
..Default::default()
|
||||
}];
|
||||
let err = RepartitionStart::ensure_route_present(
|
||||
Uuid::new_v4(),
|
||||
®ion_routes,
|
||||
&[source_region],
|
||||
&[target_region],
|
||||
)
|
||||
.unwrap_err();
|
||||
assert_matches!(err, Error::RepartitionSourceRegionMissing { .. });
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ensure_route_present_partition_expr_mismatch() {
|
||||
let source_region = RegionDescriptor {
|
||||
region_id: RegionId::new(1024, 1),
|
||||
partition_expr: range_expr("x", 0, 100),
|
||||
};
|
||||
let target_region = RegionDescriptor {
|
||||
region_id: RegionId::new(1024, 2),
|
||||
partition_expr: range_expr("x", 0, 10),
|
||||
};
|
||||
let region_routes = vec![RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(1024, 1),
|
||||
partition_expr: range_expr("x", 0, 5).as_json_str().unwrap(),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
..Default::default()
|
||||
}];
|
||||
let err = RepartitionStart::ensure_route_present(
|
||||
Uuid::new_v4(),
|
||||
®ion_routes,
|
||||
&[source_region],
|
||||
&[target_region],
|
||||
)
|
||||
.unwrap_err();
|
||||
assert_matches!(err, Error::PartitionExprMismatch { .. });
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ensure_route_present_missing_target_region() {
|
||||
let source_region = RegionDescriptor {
|
||||
region_id: RegionId::new(1024, 1),
|
||||
partition_expr: range_expr("x", 0, 100),
|
||||
};
|
||||
let target_region = RegionDescriptor {
|
||||
region_id: RegionId::new(1024, 2),
|
||||
partition_expr: range_expr("x", 0, 10),
|
||||
};
|
||||
let region_routes = vec![RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(1024, 1),
|
||||
partition_expr: range_expr("x", 0, 100).as_json_str().unwrap(),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
..Default::default()
|
||||
}];
|
||||
let err = RepartitionStart::ensure_route_present(
|
||||
Uuid::new_v4(),
|
||||
®ion_routes,
|
||||
&[source_region],
|
||||
&[target_region],
|
||||
)
|
||||
.unwrap_err();
|
||||
assert_matches!(err, Error::RepartitionTargetRegionMissing { .. });
|
||||
}
|
||||
}
|
||||
@@ -1,80 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub(crate) mod apply_staging_region;
|
||||
pub(crate) mod rollback_staging_region;
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use common_procedure::{Context as ProcedureContext, Status};
|
||||
use common_telemetry::warn;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::procedure::repartition::group::repartition_start::RepartitionStart;
|
||||
use crate::procedure::repartition::group::{Context, State};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub enum UpdateMetadata {
|
||||
/// Applies the new partition expressions for staging regions.
|
||||
ApplyStaging,
|
||||
/// Rolls back the new partition expressions for staging regions.
|
||||
RollbackStaging,
|
||||
}
|
||||
|
||||
impl UpdateMetadata {
|
||||
#[allow(dead_code)]
|
||||
fn next_state() -> (Box<dyn State>, Status) {
|
||||
// TODO(weny): change it later.
|
||||
(Box::new(RepartitionStart), Status::executing(true))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
#[typetag::serde]
|
||||
impl State for UpdateMetadata {
|
||||
async fn next(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
_procedure_ctx: &ProcedureContext,
|
||||
) -> Result<(Box<dyn State>, Status)> {
|
||||
match self {
|
||||
UpdateMetadata::ApplyStaging => {
|
||||
// TODO(weny): If all metadata have already been updated, skip applying staging regions.
|
||||
self.apply_staging_regions(ctx).await?;
|
||||
|
||||
if let Err(err) = ctx.invalidate_table_cache().await {
|
||||
warn!(
|
||||
"Failed to broadcast the invalidate table cache message during the apply staging regions, error: {err:?}"
|
||||
);
|
||||
};
|
||||
Ok(Self::next_state())
|
||||
}
|
||||
UpdateMetadata::RollbackStaging => {
|
||||
self.rollback_staging_regions(ctx).await?;
|
||||
|
||||
if let Err(err) = ctx.invalidate_table_cache().await {
|
||||
warn!(
|
||||
"Failed to broadcast the invalidate table cache message during the rollback staging regions, error: {err:?}"
|
||||
);
|
||||
};
|
||||
Ok(Self::next_state())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
@@ -1,181 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::rpc::router::RegionRoute;
|
||||
use common_telemetry::error;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::procedure::repartition::group::update_metadata::UpdateMetadata;
|
||||
use crate::procedure::repartition::group::{Context, GroupId, region_routes};
|
||||
use crate::procedure::repartition::plan::RegionDescriptor;
|
||||
|
||||
impl UpdateMetadata {
|
||||
/// Applies the new partition expressions for staging regions.
|
||||
///
|
||||
/// Abort:
|
||||
/// - Target region not found.
|
||||
/// - Source region not found.
|
||||
fn apply_staging_region_routes(
|
||||
group_id: GroupId,
|
||||
sources: &[RegionDescriptor],
|
||||
targets: &[RegionDescriptor],
|
||||
current_region_routes: &[RegionRoute],
|
||||
) -> Result<Vec<RegionRoute>> {
|
||||
let mut region_routes = current_region_routes.to_vec();
|
||||
let mut region_routes_map = region_routes
|
||||
.iter_mut()
|
||||
.map(|route| (route.region.id, route))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
for target in targets {
|
||||
let region_route = region_routes_map.get_mut(&target.region_id).context(
|
||||
error::RepartitionTargetRegionMissingSnafu {
|
||||
group_id,
|
||||
region_id: target.region_id,
|
||||
},
|
||||
)?;
|
||||
region_route.region.partition_expr = target
|
||||
.partition_expr
|
||||
.as_json_str()
|
||||
.context(error::SerializePartitionExprSnafu)?;
|
||||
region_route.set_leader_staging();
|
||||
}
|
||||
|
||||
for source in sources {
|
||||
let region_route = region_routes_map.get_mut(&source.region_id).context(
|
||||
error::RepartitionSourceRegionMissingSnafu {
|
||||
group_id,
|
||||
region_id: source.region_id,
|
||||
},
|
||||
)?;
|
||||
region_route.set_leader_staging();
|
||||
}
|
||||
|
||||
Ok(region_routes)
|
||||
}
|
||||
|
||||
/// Applies the new partition expressions for staging regions.
|
||||
///
|
||||
/// Abort:
|
||||
/// - Table route is not physical.
|
||||
/// - Target region not found.
|
||||
/// - Source region not found.
|
||||
/// - Failed to update the table route.
|
||||
/// - Central region datanode table value not found.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) async fn apply_staging_regions(&self, ctx: &mut Context) -> Result<()> {
|
||||
let table_id = ctx.persistent_ctx.table_id;
|
||||
let group_id = ctx.persistent_ctx.group_id;
|
||||
let current_table_route_value = ctx.get_table_route_value().await?;
|
||||
let region_routes = region_routes(table_id, current_table_route_value.get_inner_ref())?;
|
||||
let new_region_routes = Self::apply_staging_region_routes(
|
||||
group_id,
|
||||
&ctx.persistent_ctx.sources,
|
||||
&ctx.persistent_ctx.targets,
|
||||
region_routes,
|
||||
)?;
|
||||
|
||||
if let Err(err) = ctx
|
||||
.update_table_route(¤t_table_route_value, new_region_routes)
|
||||
.await
|
||||
{
|
||||
error!(err; "Failed to update the table route during the updating metadata for repartition: {table_id}, group_id: {group_id}");
|
||||
return Err(BoxedError::new(err)).context(error::RetryLaterWithSourceSnafu {
|
||||
reason: format!(
|
||||
"Failed to update the table route during the updating metadata for repartition: {table_id}, group_id: {group_id}"
|
||||
),
|
||||
});
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::router::{Region, RegionRoute};
|
||||
use store_api::storage::RegionId;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::procedure::repartition::group::update_metadata::UpdateMetadata;
|
||||
use crate::procedure::repartition::plan::RegionDescriptor;
|
||||
use crate::procedure::repartition::test_util::range_expr;
|
||||
|
||||
#[test]
|
||||
fn test_generate_region_routes() {
|
||||
let group_id = Uuid::new_v4();
|
||||
let table_id = 1024;
|
||||
let region_routes = vec![
|
||||
RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(table_id, 1),
|
||||
partition_expr: range_expr("x", 0, 100).as_json_str().unwrap(),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
..Default::default()
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(table_id, 2),
|
||||
partition_expr: String::new(),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
..Default::default()
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(table_id, 3),
|
||||
partition_expr: String::new(),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
..Default::default()
|
||||
},
|
||||
];
|
||||
let source_region = RegionDescriptor {
|
||||
region_id: RegionId::new(table_id, 1),
|
||||
partition_expr: range_expr("x", 0, 100),
|
||||
};
|
||||
let target_region = RegionDescriptor {
|
||||
region_id: RegionId::new(table_id, 2),
|
||||
partition_expr: range_expr("x", 0, 10),
|
||||
};
|
||||
|
||||
let new_region_routes = UpdateMetadata::apply_staging_region_routes(
|
||||
group_id,
|
||||
&[source_region],
|
||||
&[target_region],
|
||||
®ion_routes,
|
||||
)
|
||||
.unwrap();
|
||||
assert!(new_region_routes[0].is_leader_staging());
|
||||
assert_eq!(
|
||||
new_region_routes[0].region.partition_expr,
|
||||
range_expr("x", 0, 100).as_json_str().unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
new_region_routes[1].region.partition_expr,
|
||||
range_expr("x", 0, 10).as_json_str().unwrap()
|
||||
);
|
||||
assert!(new_region_routes[1].is_leader_staging());
|
||||
assert!(!new_region_routes[2].is_leader_staging());
|
||||
}
|
||||
}
|
||||
@@ -1,187 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::rpc::router::RegionRoute;
|
||||
use common_telemetry::error;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::procedure::repartition::group::update_metadata::UpdateMetadata;
|
||||
use crate::procedure::repartition::group::{Context, GroupId, region_routes};
|
||||
|
||||
impl UpdateMetadata {
|
||||
/// Rolls back the staging regions.
|
||||
///
|
||||
/// Abort:
|
||||
/// - Source region not found.
|
||||
/// - Target region not found.
|
||||
#[allow(dead_code)]
|
||||
fn rollback_staging_region_routes(
|
||||
group_id: GroupId,
|
||||
source_routes: &[RegionRoute],
|
||||
target_routes: &[RegionRoute],
|
||||
current_region_routes: &[RegionRoute],
|
||||
) -> Result<Vec<RegionRoute>> {
|
||||
let mut region_routes = current_region_routes.to_vec();
|
||||
let mut region_routes_map = region_routes
|
||||
.iter_mut()
|
||||
.map(|route| (route.region.id, route))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
for source in source_routes {
|
||||
let region_route = region_routes_map.get_mut(&source.region.id).context(
|
||||
error::RepartitionSourceRegionMissingSnafu {
|
||||
group_id,
|
||||
region_id: source.region.id,
|
||||
},
|
||||
)?;
|
||||
region_route.region.partition_expr = source.region.partition_expr.clone();
|
||||
region_route.clear_leader_staging();
|
||||
}
|
||||
|
||||
for target in target_routes {
|
||||
let region_route = region_routes_map.get_mut(&target.region.id).context(
|
||||
error::RepartitionTargetRegionMissingSnafu {
|
||||
group_id,
|
||||
region_id: target.region.id,
|
||||
},
|
||||
)?;
|
||||
region_route.clear_leader_staging();
|
||||
}
|
||||
|
||||
Ok(region_routes)
|
||||
}
|
||||
|
||||
/// Rolls back the metadata for staging regions.
|
||||
///
|
||||
/// Abort:
|
||||
/// - Table route is not physical.
|
||||
/// - Source region not found.
|
||||
/// - Target region not found.
|
||||
/// - Failed to update the table route.
|
||||
/// - Central region datanode table value not found.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) async fn rollback_staging_regions(&self, ctx: &mut Context) -> Result<()> {
|
||||
let table_id = ctx.persistent_ctx.table_id;
|
||||
let group_id = ctx.persistent_ctx.group_id;
|
||||
let current_table_route_value = ctx.get_table_route_value().await?;
|
||||
let region_routes = region_routes(table_id, current_table_route_value.get_inner_ref())?;
|
||||
// Safety: prepare result is set in [RepartitionStart] state.
|
||||
let prepare_result = ctx.persistent_ctx.group_prepare_result.as_ref().unwrap();
|
||||
let new_region_routes = Self::rollback_staging_region_routes(
|
||||
group_id,
|
||||
&prepare_result.source_routes,
|
||||
&prepare_result.target_routes,
|
||||
region_routes,
|
||||
)?;
|
||||
|
||||
if let Err(err) = ctx
|
||||
.update_table_route(¤t_table_route_value, new_region_routes)
|
||||
.await
|
||||
{
|
||||
error!(err; "Failed to update the table route during the updating metadata for repartition: {table_id}, group_id: {group_id}");
|
||||
return Err(BoxedError::new(err)).context(error::RetryLaterWithSourceSnafu {
|
||||
reason: format!(
|
||||
"Failed to update the table route during the updating metadata for repartition: {table_id}, group_id: {group_id}"
|
||||
),
|
||||
});
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::router::{LeaderState, Region, RegionRoute};
|
||||
use store_api::storage::RegionId;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::procedure::repartition::group::update_metadata::UpdateMetadata;
|
||||
use crate::procedure::repartition::test_util::range_expr;
|
||||
|
||||
#[test]
|
||||
fn test_rollback_staging_region_routes() {
|
||||
let group_id = Uuid::new_v4();
|
||||
let table_id = 1024;
|
||||
let region_routes = vec![
|
||||
RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(table_id, 1),
|
||||
partition_expr: range_expr("x", 0, 100).as_json_str().unwrap(),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
leader_state: Some(LeaderState::Staging),
|
||||
..Default::default()
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(table_id, 2),
|
||||
partition_expr: String::new(),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
leader_state: Some(LeaderState::Staging),
|
||||
..Default::default()
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(table_id, 3),
|
||||
partition_expr: String::new(),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
leader_state: Some(LeaderState::Downgrading),
|
||||
..Default::default()
|
||||
},
|
||||
];
|
||||
let source_routes = vec![RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(table_id, 1),
|
||||
partition_expr: range_expr("x", 0, 20).as_json_str().unwrap(),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
..Default::default()
|
||||
}];
|
||||
let target_routes = vec![RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(table_id, 2),
|
||||
partition_expr: range_expr("x", 0, 20).as_json_str().unwrap(),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
..Default::default()
|
||||
}];
|
||||
let new_region_routes = UpdateMetadata::rollback_staging_region_routes(
|
||||
group_id,
|
||||
&source_routes,
|
||||
&target_routes,
|
||||
®ion_routes,
|
||||
)
|
||||
.unwrap();
|
||||
assert!(!new_region_routes[0].is_leader_staging());
|
||||
assert_eq!(
|
||||
new_region_routes[0].region.partition_expr,
|
||||
range_expr("x", 0, 20).as_json_str().unwrap(),
|
||||
);
|
||||
assert!(!new_region_routes[1].is_leader_staging());
|
||||
assert!(new_region_routes[2].is_leader_downgrading());
|
||||
}
|
||||
}
|
||||
@@ -1,91 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
|
||||
use common_meta::kv_backend::memory::MemoryKvBackend;
|
||||
use common_meta::sequence::SequenceBuilder;
|
||||
use datatypes::value::Value;
|
||||
use partition::expr::{PartitionExpr, col};
|
||||
use store_api::storage::TableId;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::cache_invalidator::MetasrvCacheInvalidator;
|
||||
use crate::metasrv::MetasrvInfo;
|
||||
use crate::procedure::repartition::group::{Context, PersistentContext};
|
||||
use crate::procedure::repartition::plan::RegionDescriptor;
|
||||
use crate::procedure::test_util::MailboxContext;
|
||||
|
||||
/// `TestingEnv` provides components during the tests.
|
||||
pub struct TestingEnv {
|
||||
pub table_metadata_manager: TableMetadataManagerRef,
|
||||
pub mailbox_ctx: MailboxContext,
|
||||
}
|
||||
|
||||
impl Default for TestingEnv {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl TestingEnv {
|
||||
pub fn new() -> Self {
|
||||
let kv_backend = Arc::new(MemoryKvBackend::new());
|
||||
let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend.clone()));
|
||||
let mailbox_sequence =
|
||||
SequenceBuilder::new("test_heartbeat_mailbox", kv_backend.clone()).build();
|
||||
let mailbox_ctx = MailboxContext::new(mailbox_sequence);
|
||||
|
||||
Self {
|
||||
table_metadata_manager,
|
||||
mailbox_ctx,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_context(self, persistent_context: PersistentContext) -> Context {
|
||||
let cache_invalidator = Arc::new(MetasrvCacheInvalidator::new(
|
||||
self.mailbox_ctx.mailbox().clone(),
|
||||
MetasrvInfo {
|
||||
server_addr: String::new(),
|
||||
},
|
||||
));
|
||||
|
||||
Context {
|
||||
persistent_ctx: persistent_context,
|
||||
table_metadata_manager: self.table_metadata_manager.clone(),
|
||||
cache_invalidator,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn range_expr(col_name: &str, start: i64, end: i64) -> PartitionExpr {
|
||||
col(col_name)
|
||||
.gt_eq(Value::Int64(start))
|
||||
.and(col(col_name).lt(Value::Int64(end)))
|
||||
}
|
||||
|
||||
pub fn new_persistent_context(
|
||||
table_id: TableId,
|
||||
sources: Vec<RegionDescriptor>,
|
||||
targets: Vec<RegionDescriptor>,
|
||||
) -> PersistentContext {
|
||||
PersistentContext {
|
||||
group_id: Uuid::new_v4(),
|
||||
table_id,
|
||||
sources,
|
||||
targets,
|
||||
group_prepare_result: None,
|
||||
}
|
||||
}
|
||||
@@ -14,7 +14,6 @@ async-stream.workspace = true
|
||||
async-trait.workspace = true
|
||||
base64.workspace = true
|
||||
bytes.workspace = true
|
||||
fxhash = "0.2"
|
||||
common-base.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
@@ -32,6 +31,7 @@ lazy_static = "1.4"
|
||||
mito-codec.workspace = true
|
||||
mito2.workspace = true
|
||||
moka.workspace = true
|
||||
mur3 = "0.1"
|
||||
object-store.workspace = true
|
||||
prometheus.workspace = true
|
||||
serde.workspace = true
|
||||
@@ -47,12 +47,6 @@ common-meta = { workspace = true, features = ["testing"] }
|
||||
common-test-util.workspace = true
|
||||
mito2 = { workspace = true, features = ["test"] }
|
||||
common-wal = { workspace = true }
|
||||
criterion = { version = "0.4", features = ["async", "async_tokio"] }
|
||||
mur3 = "0.1"
|
||||
|
||||
[[bench]]
|
||||
name = "bench_tsid_generator"
|
||||
harness = false
|
||||
|
||||
[package.metadata.cargo-udeps.ignore]
|
||||
normal = ["aquamarine"]
|
||||
|
||||
@@ -1,273 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::hash::Hasher;
|
||||
|
||||
use criterion::{Criterion, black_box, criterion_group, criterion_main};
|
||||
use fxhash::FxHasher;
|
||||
use mur3::Hasher128;
|
||||
|
||||
// A random number (from original implementation)
|
||||
const TSID_HASH_SEED: u32 = 846793005;
|
||||
|
||||
/// Original TSID generator using mur3::Hasher128
|
||||
/// Hashes both label name and value for each label pair
|
||||
struct OriginalTsidGenerator {
|
||||
hasher: Hasher128,
|
||||
}
|
||||
|
||||
impl OriginalTsidGenerator {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
hasher: Hasher128::with_seed(TSID_HASH_SEED),
|
||||
}
|
||||
}
|
||||
|
||||
/// Writes a label pair (name and value) to the generator.
|
||||
fn write_label(&mut self, name: &str, value: &str) {
|
||||
use std::hash::Hash;
|
||||
name.hash(&mut self.hasher);
|
||||
value.hash(&mut self.hasher);
|
||||
}
|
||||
|
||||
/// Generates a new TSID.
|
||||
fn finish(&mut self) -> u64 {
|
||||
// TSID is 64 bits, simply truncate the 128 bits hash
|
||||
let (hash, _) = self.hasher.finish128();
|
||||
hash
|
||||
}
|
||||
}
|
||||
|
||||
/// Current TSID generator using fxhash::FxHasher
|
||||
/// Fast path: pre-computes label name hash, only hashes values
|
||||
struct CurrentTsidGenerator {
|
||||
hasher: FxHasher,
|
||||
}
|
||||
|
||||
impl CurrentTsidGenerator {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
hasher: FxHasher::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn new_with_label_name_hash(label_name_hash: u64) -> Self {
|
||||
let mut hasher = FxHasher::default();
|
||||
hasher.write_u64(label_name_hash);
|
||||
Self { hasher }
|
||||
}
|
||||
|
||||
/// Writes a label value to the generator.
|
||||
fn write_str(&mut self, value: &str) {
|
||||
self.hasher.write(value.as_bytes());
|
||||
self.hasher.write_u8(0xff);
|
||||
}
|
||||
|
||||
/// Generates a new TSID.
|
||||
fn finish(&mut self) -> u64 {
|
||||
self.hasher.finish()
|
||||
}
|
||||
}
|
||||
|
||||
/// Pre-computes label name hash (used in fast path)
|
||||
fn compute_label_name_hash(labels: &[(&str, &str)]) -> u64 {
|
||||
let mut hasher = FxHasher::default();
|
||||
for (name, _) in labels {
|
||||
hasher.write(name.as_bytes());
|
||||
hasher.write_u8(0xff);
|
||||
}
|
||||
hasher.finish()
|
||||
}
|
||||
|
||||
fn bench_tsid_generator_small(c: &mut Criterion) {
|
||||
let labels = vec![("namespace", "greptimedb"), ("host", "127.0.0.1")];
|
||||
|
||||
let mut group = c.benchmark_group("tsid_generator_small_2_labels");
|
||||
group.bench_function("original_mur3", |b| {
|
||||
b.iter(|| {
|
||||
let mut tsid_gen = OriginalTsidGenerator::new();
|
||||
for (name, value) in &labels {
|
||||
tsid_gen.write_label(black_box(name), black_box(value));
|
||||
}
|
||||
black_box(tsid_gen.finish())
|
||||
})
|
||||
});
|
||||
|
||||
let label_name_hash = compute_label_name_hash(&labels);
|
||||
group.bench_function("current_fxhash_fast_path", |b| {
|
||||
b.iter(|| {
|
||||
let mut tsid_gen =
|
||||
CurrentTsidGenerator::new_with_label_name_hash(black_box(label_name_hash));
|
||||
for (_, value) in &labels {
|
||||
tsid_gen.write_str(black_box(value));
|
||||
}
|
||||
black_box(tsid_gen.finish())
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_tsid_generator_medium(c: &mut Criterion) {
|
||||
let labels = vec![
|
||||
("namespace", "greptimedb"),
|
||||
("host", "127.0.0.1"),
|
||||
("region", "us-west-2"),
|
||||
("env", "production"),
|
||||
("service", "api"),
|
||||
];
|
||||
|
||||
let mut group = c.benchmark_group("tsid_generator_medium_5_labels");
|
||||
group.bench_function("original_mur3", |b| {
|
||||
b.iter(|| {
|
||||
let mut tsid_gen = OriginalTsidGenerator::new();
|
||||
for (name, value) in &labels {
|
||||
tsid_gen.write_label(black_box(name), black_box(value));
|
||||
}
|
||||
black_box(tsid_gen.finish())
|
||||
})
|
||||
});
|
||||
|
||||
let label_name_hash = compute_label_name_hash(&labels);
|
||||
group.bench_function("current_fxhash_fast_path", |b| {
|
||||
b.iter(|| {
|
||||
let mut tsid_gen =
|
||||
CurrentTsidGenerator::new_with_label_name_hash(black_box(label_name_hash));
|
||||
for (_, value) in &labels {
|
||||
tsid_gen.write_str(black_box(value));
|
||||
}
|
||||
black_box(tsid_gen.finish())
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_tsid_generator_large(c: &mut Criterion) {
|
||||
let labels = vec![
|
||||
("namespace", "greptimedb"),
|
||||
("host", "127.0.0.1"),
|
||||
("region", "us-west-2"),
|
||||
("env", "production"),
|
||||
("service", "api"),
|
||||
("version", "v1.0.0"),
|
||||
("cluster", "cluster-1"),
|
||||
("dc", "dc1"),
|
||||
("rack", "rack-1"),
|
||||
("pod", "pod-123"),
|
||||
];
|
||||
|
||||
let mut group = c.benchmark_group("tsid_generator_large_10_labels");
|
||||
group.bench_function("original_mur3", |b| {
|
||||
b.iter(|| {
|
||||
let mut tsid_gen = OriginalTsidGenerator::new();
|
||||
for (name, value) in &labels {
|
||||
tsid_gen.write_label(black_box(name), black_box(value));
|
||||
}
|
||||
black_box(tsid_gen.finish())
|
||||
})
|
||||
});
|
||||
|
||||
let label_name_hash = compute_label_name_hash(&labels);
|
||||
group.bench_function("current_fxhash_fast_path", |b| {
|
||||
b.iter(|| {
|
||||
let mut tsid_gen =
|
||||
CurrentTsidGenerator::new_with_label_name_hash(black_box(label_name_hash));
|
||||
for (_, value) in &labels {
|
||||
tsid_gen.write_str(black_box(value));
|
||||
}
|
||||
black_box(tsid_gen.finish())
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_tsid_generator_slow_path(c: &mut Criterion) {
|
||||
// Simulate slow path: some labels have null values (empty strings)
|
||||
let labels_with_nulls = vec![
|
||||
("namespace", "greptimedb"),
|
||||
("host", "127.0.0.1"),
|
||||
("region", ""), // null
|
||||
("env", "production"),
|
||||
];
|
||||
|
||||
let labels_all_non_null = vec![
|
||||
("namespace", "greptimedb"),
|
||||
("host", "127.0.0.1"),
|
||||
("env", "production"),
|
||||
];
|
||||
|
||||
let mut group = c.benchmark_group("tsid_generator_slow_path_with_nulls");
|
||||
|
||||
// Original: always hashes name and value
|
||||
group.bench_function("original_mur3_with_nulls", |b| {
|
||||
b.iter(|| {
|
||||
let mut tsid_gen = OriginalTsidGenerator::new();
|
||||
for (name, value) in &labels_with_nulls {
|
||||
if !value.is_empty() {
|
||||
tsid_gen.write_label(black_box(name), black_box(value));
|
||||
}
|
||||
}
|
||||
black_box(tsid_gen.finish())
|
||||
})
|
||||
});
|
||||
|
||||
// Current slow path: recomputes label name hash
|
||||
group.bench_function("current_fxhash_slow_path", |b| {
|
||||
b.iter(|| {
|
||||
// Step 1: Compute label name hash for non-null labels
|
||||
let mut name_hasher = CurrentTsidGenerator::new();
|
||||
for (name, value) in &labels_with_nulls {
|
||||
if !value.is_empty() {
|
||||
name_hasher.write_str(black_box(name));
|
||||
}
|
||||
}
|
||||
let label_name_hash = name_hasher.finish();
|
||||
|
||||
// Step 2: Use label name hash and hash values
|
||||
let mut tsid_gen = CurrentTsidGenerator::new_with_label_name_hash(label_name_hash);
|
||||
for (_, value) in &labels_with_nulls {
|
||||
if !value.is_empty() {
|
||||
tsid_gen.write_str(black_box(value));
|
||||
}
|
||||
}
|
||||
black_box(tsid_gen.finish())
|
||||
})
|
||||
});
|
||||
|
||||
// Current fast path: pre-computed (for comparison)
|
||||
let label_name_hash = compute_label_name_hash(&labels_all_non_null);
|
||||
group.bench_function("current_fxhash_fast_path_no_nulls", |b| {
|
||||
b.iter(|| {
|
||||
let mut tsid_gen =
|
||||
CurrentTsidGenerator::new_with_label_name_hash(black_box(label_name_hash));
|
||||
for (_, value) in &labels_all_non_null {
|
||||
tsid_gen.write_str(black_box(value));
|
||||
}
|
||||
black_box(tsid_gen.finish())
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
bench_tsid_generator_small,
|
||||
bench_tsid_generator_medium,
|
||||
bench_tsid_generator_large,
|
||||
bench_tsid_generator_slow_path
|
||||
);
|
||||
criterion_main!(benches);
|
||||
@@ -23,7 +23,6 @@ mod options;
|
||||
mod put;
|
||||
mod read;
|
||||
mod region_metadata;
|
||||
mod staging;
|
||||
mod state;
|
||||
mod sync;
|
||||
|
||||
@@ -212,13 +211,6 @@ impl RegionEngine for MetricEngine {
|
||||
let mut extension_return_value = HashMap::new();
|
||||
|
||||
let result = match request {
|
||||
RegionRequest::EnterStaging(_) => {
|
||||
if self.inner.is_physical_region(region_id) {
|
||||
self.handle_enter_staging_request(region_id, request).await
|
||||
} else {
|
||||
UnsupportedRegionRequestSnafu { request }.fail()
|
||||
}
|
||||
}
|
||||
RegionRequest::Put(put) => self.inner.put_region(region_id, put).await,
|
||||
RegionRequest::Create(create) => {
|
||||
self.inner
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
mod extract_new_columns;
|
||||
mod validate;
|
||||
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use extract_new_columns::extract_new_columns;
|
||||
use snafu::{OptionExt, ResultExt, ensure};
|
||||
@@ -143,20 +143,16 @@ impl MetricEngineInner {
|
||||
};
|
||||
let data_region_id = to_data_region_id(physical_region_id);
|
||||
|
||||
// Acquire logical region locks in a deterministic order to avoid deadlocks when multiple
|
||||
// alter operations target overlapping regions concurrently.
|
||||
let region_ids = requests
|
||||
.iter()
|
||||
.map(|(region_id, _)| *region_id)
|
||||
.collect::<BTreeSet<_>>();
|
||||
|
||||
let mut write_guards = Vec::with_capacity(region_ids.len());
|
||||
for region_id in region_ids {
|
||||
write_guards.push(
|
||||
self.metadata_region
|
||||
.write_lock_logical_region(region_id)
|
||||
.await?,
|
||||
);
|
||||
let mut write_guards = HashMap::with_capacity(requests.len());
|
||||
for (region_id, _) in requests.iter() {
|
||||
if write_guards.contains_key(region_id) {
|
||||
continue;
|
||||
}
|
||||
let _write_guard = self
|
||||
.metadata_region
|
||||
.write_lock_logical_region(*region_id)
|
||||
.await?;
|
||||
write_guards.insert(*region_id, _write_guard);
|
||||
}
|
||||
|
||||
self.data_region
|
||||
|
||||
@@ -119,7 +119,7 @@ mod tests {
|
||||
.index_file_path
|
||||
.map(|path| path.replace(&e.file_id, "<file_id>"));
|
||||
e.file_id = "<file_id>".to_string();
|
||||
e.index_version = 0;
|
||||
e.index_file_id = e.index_file_id.map(|_| "<index_file_id>".to_string());
|
||||
format!("\n{:?}", e)
|
||||
})
|
||||
.sorted()
|
||||
@@ -128,12 +128,12 @@ mod tests {
|
||||
assert_eq!(
|
||||
debug_format,
|
||||
r#"
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/11_0000000001/data/<file_id>.parquet", file_size: 3217, index_file_path: Some("test_metric_region/11_0000000001/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(20), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/11_0000000002/data/<file_id>.parquet", file_size: 3217, index_file_path: Some("test_metric_region/11_0000000002/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417473(11, 16777217), table_id: 11, region_number: 16777217, region_group: 1, region_sequence: 1, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/11_0000000001/metadata/<file_id>.parquet", file_size: 3487, index_file_path: None, index_file_size: None, num_rows: 8, num_row_groups: 1, num_series: Some(8), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(8), origin_region_id: 47261417473(11, 16777217), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417474(11, 16777218), table_id: 11, region_number: 16777218, region_group: 1, region_sequence: 2, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/11_0000000002/metadata/<file_id>.parquet", file_size: 3471, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, num_series: Some(4), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 47261417474(11, 16777218), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/22_0000000042/data/<file_id>.parquet", file_size: 3217, index_file_path: Some("test_metric_region/22_0000000042/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94506057770(22, 16777258), table_id: 22, region_number: 16777258, region_group: 1, region_sequence: 42, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/22_0000000042/metadata/<file_id>.parquet", file_size: 3471, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, num_series: Some(4), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 94506057770(22, 16777258), node_id: None, visible: true }"#,
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_file_id: Some("<index_file_id>"), level: 0, file_path: "test_metric_region/11_0000000001/data/<file_id>.parquet", file_size: 3217, index_file_path: Some("test_metric_region/11_0000000001/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(20), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", index_file_id: Some("<index_file_id>"), level: 0, file_path: "test_metric_region/11_0000000002/data/<file_id>.parquet", file_size: 3217, index_file_path: Some("test_metric_region/11_0000000002/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417473(11, 16777217), table_id: 11, region_number: 16777217, region_group: 1, region_sequence: 1, file_id: "<file_id>", index_file_id: None, level: 0, file_path: "test_metric_region/11_0000000001/metadata/<file_id>.parquet", file_size: 3487, index_file_path: None, index_file_size: None, num_rows: 8, num_row_groups: 1, num_series: Some(8), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(8), origin_region_id: 47261417473(11, 16777217), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417474(11, 16777218), table_id: 11, region_number: 16777218, region_group: 1, region_sequence: 2, file_id: "<file_id>", index_file_id: None, level: 0, file_path: "test_metric_region/11_0000000002/metadata/<file_id>.parquet", file_size: 3471, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, num_series: Some(4), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 47261417474(11, 16777218), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", index_file_id: Some("<index_file_id>"), level: 0, file_path: "test_metric_region/22_0000000042/data/<file_id>.parquet", file_size: 3217, index_file_path: Some("test_metric_region/22_0000000042/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94506057770(22, 16777258), table_id: 22, region_number: 16777258, region_group: 1, region_sequence: 42, file_id: "<file_id>", index_file_id: None, level: 0, file_path: "test_metric_region/22_0000000042/metadata/<file_id>.parquet", file_size: 3471, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, num_series: Some(4), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 94506057770(22, 16777258), node_id: None, visible: true }"#
|
||||
);
|
||||
// list from storage
|
||||
let storage_entries = mito
|
||||
|
||||
@@ -272,15 +272,15 @@ mod tests {
|
||||
.unwrap();
|
||||
let batches = RecordBatches::try_collect(stream).await.unwrap();
|
||||
let expected = "\
|
||||
+-------------------------+----------------+------------+---------------------+-------+
|
||||
| greptime_timestamp | greptime_value | __table_id | __tsid | job |
|
||||
+-------------------------+----------------+------------+---------------------+-------+
|
||||
| 1970-01-01T00:00:00 | 0.0 | 3 | 2955007454552897459 | tag_0 |
|
||||
| 1970-01-01T00:00:00.001 | 1.0 | 3 | 2955007454552897459 | tag_0 |
|
||||
| 1970-01-01T00:00:00.002 | 2.0 | 3 | 2955007454552897459 | tag_0 |
|
||||
| 1970-01-01T00:00:00.003 | 3.0 | 3 | 2955007454552897459 | tag_0 |
|
||||
| 1970-01-01T00:00:00.004 | 4.0 | 3 | 2955007454552897459 | tag_0 |
|
||||
+-------------------------+----------------+------------+---------------------+-------+";
|
||||
+-------------------------+----------------+------------+----------------------+-------+
|
||||
| greptime_timestamp | greptime_value | __table_id | __tsid | job |
|
||||
+-------------------------+----------------+------------+----------------------+-------+
|
||||
| 1970-01-01T00:00:00 | 0.0 | 3 | 12881218023286672757 | tag_0 |
|
||||
| 1970-01-01T00:00:00.001 | 1.0 | 3 | 12881218023286672757 | tag_0 |
|
||||
| 1970-01-01T00:00:00.002 | 2.0 | 3 | 12881218023286672757 | tag_0 |
|
||||
| 1970-01-01T00:00:00.003 | 3.0 | 3 | 12881218023286672757 | tag_0 |
|
||||
| 1970-01-01T00:00:00.004 | 4.0 | 3 | 12881218023286672757 | tag_0 |
|
||||
+-------------------------+----------------+------------+----------------------+-------+";
|
||||
assert_eq!(expected, batches.pretty_print().unwrap(), "physical region");
|
||||
|
||||
// read data from logical region
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_base::AffectedRows;
|
||||
use snafu::ResultExt;
|
||||
use store_api::region_engine::RegionEngine;
|
||||
use store_api::region_request::{EnterStagingRequest, RegionRequest};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::engine::MetricEngine;
|
||||
use crate::error::{MitoEnterStagingOperationSnafu, Result};
|
||||
use crate::utils;
|
||||
|
||||
impl MetricEngine {
|
||||
/// Handles the enter staging request for the given region.
|
||||
pub(crate) async fn handle_enter_staging_request(
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
request: RegionRequest,
|
||||
) -> Result<AffectedRows> {
|
||||
let metadata_region_id = utils::to_metadata_region_id(region_id);
|
||||
let data_region_id = utils::to_data_region_id(region_id);
|
||||
|
||||
// For metadata region, it doesn't care about the partition expr, so we can just pass an empty string.
|
||||
self.inner
|
||||
.mito
|
||||
.handle_request(
|
||||
metadata_region_id,
|
||||
RegionRequest::EnterStaging(EnterStagingRequest {
|
||||
partition_expr: String::new(),
|
||||
}),
|
||||
)
|
||||
.await
|
||||
.context(MitoEnterStagingOperationSnafu)?;
|
||||
|
||||
self.inner
|
||||
.mito
|
||||
.handle_request(data_region_id, request)
|
||||
.await
|
||||
.context(MitoEnterStagingOperationSnafu)
|
||||
.map(|response| response.affected_rows)
|
||||
}
|
||||
}
|
||||
@@ -156,13 +156,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Mito enter staging operation fails"))]
|
||||
MitoEnterStagingOperation {
|
||||
source: BoxedError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to collect record batch stream"))]
|
||||
CollectRecordBatchStream {
|
||||
source: common_recordbatch::error::Error,
|
||||
@@ -367,7 +360,6 @@ impl ErrorExt for Error {
|
||||
| MitoWriteOperation { source, .. }
|
||||
| MitoFlushOperation { source, .. }
|
||||
| MitoSyncOperation { source, .. }
|
||||
| MitoEnterStagingOperation { source, .. }
|
||||
| BatchOpenMitoRegion { source, .. }
|
||||
| BatchCatchupMitoRegion { source, .. } => source.status_code(),
|
||||
|
||||
|
||||
@@ -13,12 +13,11 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::hash::Hasher;
|
||||
use std::hash::Hash;
|
||||
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::{ColumnDataType, ColumnSchema, Row, Rows, SemanticType, Value};
|
||||
use datatypes::value::ValueRef;
|
||||
use fxhash::FxHasher;
|
||||
use mito_codec::row_converter::SparsePrimaryKeyCodec;
|
||||
use smallvec::SmallVec;
|
||||
use snafu::ResultExt;
|
||||
@@ -31,6 +30,9 @@ use store_api::storage::{ColumnId, TableId};
|
||||
|
||||
use crate::error::{EncodePrimaryKeySnafu, Result};
|
||||
|
||||
// A random number
|
||||
const TSID_HASH_SEED: u32 = 846793005;
|
||||
|
||||
/// A row modifier modifies [`Rows`].
|
||||
///
|
||||
/// - For [`PrimaryKeyEncoding::Sparse`] encoding,
|
||||
@@ -73,7 +75,6 @@ impl RowModifier {
|
||||
let num_output_column = num_column - num_primary_key_column + 1;
|
||||
|
||||
let mut buffer = vec![];
|
||||
|
||||
for mut iter in iter.iter_mut() {
|
||||
let (table_id, tsid) = Self::fill_internal_columns(table_id, &iter);
|
||||
let mut values = Vec::with_capacity(num_output_column);
|
||||
@@ -146,72 +147,47 @@ impl RowModifier {
|
||||
|
||||
/// Fills internal columns of a row with table name and a hash of tag values.
|
||||
pub fn fill_internal_columns(table_id: TableId, iter: &RowIter<'_>) -> (Value, Value) {
|
||||
let ts_id = if !iter.has_null_labels() {
|
||||
// No null labels in row, we can safely reuse the precomputed label name hash.
|
||||
let mut ts_id_gen = TsidGenerator::new(iter.index.label_name_hash);
|
||||
for (_, value) in iter.primary_keys_with_name() {
|
||||
// The type is checked before. So only null is ignored.
|
||||
if let Some(ValueData::StringValue(string)) = &value.value_data {
|
||||
ts_id_gen.write_str(string);
|
||||
} else {
|
||||
unreachable!(
|
||||
"Should not contain null or non-string value: {:?}, table id: {}",
|
||||
value, table_id
|
||||
);
|
||||
}
|
||||
let mut hasher = TsidGenerator::default();
|
||||
for (name, value) in iter.primary_keys_with_name() {
|
||||
// The type is checked before. So only null is ignored.
|
||||
if let Some(ValueData::StringValue(string)) = &value.value_data {
|
||||
hasher.write_label(name, string);
|
||||
}
|
||||
ts_id_gen.finish()
|
||||
} else {
|
||||
// Slow path: row contains null, recompute label hash
|
||||
let mut hasher = TsidGenerator::default();
|
||||
// 1. Find out label names with non-null values and get the hash.
|
||||
for (name, value) in iter.primary_keys_with_name() {
|
||||
// The type is checked before. So only null is ignored.
|
||||
if let Some(ValueData::StringValue(_)) = &value.value_data {
|
||||
hasher.write_str(name);
|
||||
}
|
||||
}
|
||||
let label_name_hash = hasher.finish();
|
||||
|
||||
// 2. Use label name hash as seed and continue with label values.
|
||||
let mut final_hasher = TsidGenerator::new(label_name_hash);
|
||||
for (_, value) in iter.primary_keys_with_name() {
|
||||
if let Some(ValueData::StringValue(value)) = &value.value_data {
|
||||
final_hasher.write_str(value);
|
||||
}
|
||||
}
|
||||
final_hasher.finish()
|
||||
};
|
||||
}
|
||||
let hash = hasher.finish();
|
||||
|
||||
(
|
||||
ValueData::U32Value(table_id).into(),
|
||||
ValueData::U64Value(ts_id).into(),
|
||||
ValueData::U64Value(hash).into(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Tsid generator.
|
||||
#[derive(Default)]
|
||||
pub struct TsidGenerator {
|
||||
hasher: FxHasher,
|
||||
hasher: mur3::Hasher128,
|
||||
}
|
||||
|
||||
impl Default for TsidGenerator {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
hasher: mur3::Hasher128::with_seed(TSID_HASH_SEED),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TsidGenerator {
|
||||
pub fn new(label_name_hash: u64) -> Self {
|
||||
let mut hasher = FxHasher::default();
|
||||
hasher.write_u64(label_name_hash);
|
||||
Self { hasher }
|
||||
}
|
||||
|
||||
/// Writes a label pair to the generator.
|
||||
pub fn write_str(&mut self, value: &str) {
|
||||
self.hasher.write(value.as_bytes());
|
||||
self.hasher.write_u8(0xff);
|
||||
pub fn write_label(&mut self, name: &str, value: &str) {
|
||||
name.hash(&mut self.hasher);
|
||||
value.hash(&mut self.hasher);
|
||||
}
|
||||
|
||||
/// Generates a new TSID.
|
||||
pub fn finish(&mut self) -> u64 {
|
||||
self.hasher.finish()
|
||||
// TSID is 64 bits, simply truncate the 128 bits hash
|
||||
let (hash, _) = self.hasher.finish128();
|
||||
hash
|
||||
}
|
||||
}
|
||||
|
||||
@@ -226,8 +202,6 @@ struct ValueIndex {
|
||||
struct IterIndex {
|
||||
indices: Vec<ValueIndex>,
|
||||
num_primary_key_column: usize,
|
||||
/// Precomputed hash for label names.
|
||||
label_name_hash: u64,
|
||||
}
|
||||
|
||||
impl IterIndex {
|
||||
@@ -278,22 +252,15 @@ impl IterIndex {
|
||||
}
|
||||
}
|
||||
let num_primary_key_column = primary_key_indices.len() + reserved_indices.len();
|
||||
let mut indices = Vec::with_capacity(num_primary_key_column + 2);
|
||||
indices.extend(reserved_indices);
|
||||
let mut label_name_hasher = TsidGenerator::default();
|
||||
for (pk_name, pk_index) in primary_key_indices {
|
||||
// primary_key_indices already sorted.
|
||||
label_name_hasher.write_str(pk_name);
|
||||
indices.push(pk_index);
|
||||
}
|
||||
let label_name_hash = label_name_hasher.finish();
|
||||
|
||||
indices.extend(ts_index);
|
||||
indices.extend(field_indices);
|
||||
let indices = reserved_indices
|
||||
.into_iter()
|
||||
.chain(primary_key_indices.values().cloned())
|
||||
.chain(ts_index)
|
||||
.chain(field_indices)
|
||||
.collect();
|
||||
IterIndex {
|
||||
indices,
|
||||
num_primary_key_column,
|
||||
label_name_hash,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -347,13 +314,6 @@ impl RowIter<'_> {
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns true if any label in current row is null.
|
||||
fn has_null_labels(&self) -> bool {
|
||||
self.index.indices[..self.index.num_primary_key_column]
|
||||
.iter()
|
||||
.any(|idx| self.row.values[idx.index].value_data.is_none())
|
||||
}
|
||||
|
||||
/// Returns the primary keys.
|
||||
pub fn primary_keys(&self) -> impl Iterator<Item = (ColumnId, ValueRef<'_>)> {
|
||||
self.index.indices[..self.index.num_primary_key_column]
|
||||
@@ -439,9 +399,9 @@ mod tests {
|
||||
let result = encoder.modify_rows_sparse(rows_iter, table_id).unwrap();
|
||||
assert_eq!(result.rows[0].values.len(), 1);
|
||||
let encoded_primary_key = vec![
|
||||
128, 0, 0, 4, 1, 0, 0, 4, 1, 128, 0, 0, 3, 1, 37, 196, 242, 181, 117, 224, 7, 137, 0,
|
||||
0, 0, 2, 1, 1, 49, 50, 55, 46, 48, 46, 48, 46, 9, 49, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
1, 1, 1, 103, 114, 101, 112, 116, 105, 109, 101, 9, 100, 98, 0, 0, 0, 0, 0, 0, 2,
|
||||
128, 0, 0, 4, 1, 0, 0, 4, 1, 128, 0, 0, 3, 1, 131, 9, 166, 190, 173, 37, 39, 240, 0, 0,
|
||||
0, 2, 1, 1, 49, 50, 55, 46, 48, 46, 48, 46, 9, 49, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
|
||||
1, 1, 103, 114, 101, 112, 116, 105, 109, 101, 9, 100, 98, 0, 0, 0, 0, 0, 0, 2,
|
||||
];
|
||||
assert_eq!(
|
||||
result.rows[0].values[0],
|
||||
@@ -517,7 +477,7 @@ mod tests {
|
||||
assert_eq!(result.rows[0].values[2], ValueData::U32Value(1025).into());
|
||||
assert_eq!(
|
||||
result.rows[0].values[3],
|
||||
ValueData::U64Value(2721566936019240841).into()
|
||||
ValueData::U64Value(9442261431637846000).into()
|
||||
);
|
||||
assert_eq!(result.schema, expected_dense_schema());
|
||||
}
|
||||
@@ -536,7 +496,7 @@ mod tests {
|
||||
let row_iter = rows_iter.iter_mut().next().unwrap();
|
||||
let (encoded_table_id, tsid) = RowModifier::fill_internal_columns(table_id, &row_iter);
|
||||
assert_eq!(encoded_table_id, ValueData::U32Value(1025).into());
|
||||
assert_eq!(tsid, ValueData::U64Value(2721566936019240841).into());
|
||||
assert_eq!(tsid, ValueData::U64Value(9442261431637846000).into());
|
||||
|
||||
// Change the column order
|
||||
let schema = vec![
|
||||
@@ -564,264 +524,6 @@ mod tests {
|
||||
let row_iter = rows_iter.iter_mut().next().unwrap();
|
||||
let (encoded_table_id, tsid) = RowModifier::fill_internal_columns(table_id, &row_iter);
|
||||
assert_eq!(encoded_table_id, ValueData::U32Value(1025).into());
|
||||
assert_eq!(tsid, ValueData::U64Value(2721566936019240841).into());
|
||||
}
|
||||
|
||||
/// Helper function to create a schema with multiple label columns
|
||||
fn create_multi_label_schema(labels: &[&str]) -> Vec<ColumnSchema> {
|
||||
labels
|
||||
.iter()
|
||||
.map(|name| ColumnSchema {
|
||||
column_name: name.to_string(),
|
||||
datatype: ColumnDataType::String as i32,
|
||||
semantic_type: SemanticType::Tag as _,
|
||||
datatype_extension: None,
|
||||
options: None,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Helper function to create a name_to_column_id map
|
||||
fn create_name_to_column_id(labels: &[&str]) -> HashMap<String, ColumnId> {
|
||||
labels
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(idx, name)| (name.to_string(), idx as ColumnId + 1))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Helper function to create a row with string values
|
||||
fn create_row_with_values(values: &[&str]) -> Row {
|
||||
Row {
|
||||
values: values
|
||||
.iter()
|
||||
.map(|v| ValueData::StringValue(v.to_string()).into())
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper function to create a row with some null values
|
||||
fn create_row_with_nulls(values: &[Option<&str>]) -> Row {
|
||||
Row {
|
||||
values: values
|
||||
.iter()
|
||||
.map(|v| {
|
||||
v.map(|s| ValueData::StringValue(s.to_string()).into())
|
||||
.unwrap_or(Value { value_data: None })
|
||||
})
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper function to extract TSID from a row
|
||||
fn extract_tsid(
|
||||
schema: Vec<ColumnSchema>,
|
||||
row: Row,
|
||||
name_to_column_id: &HashMap<String, ColumnId>,
|
||||
table_id: TableId,
|
||||
) -> u64 {
|
||||
let rows = Rows {
|
||||
schema,
|
||||
rows: vec![row],
|
||||
};
|
||||
let mut rows_iter = RowsIter::new(rows, name_to_column_id);
|
||||
let row_iter = rows_iter.iter_mut().next().unwrap();
|
||||
let (_, tsid_value) = RowModifier::fill_internal_columns(table_id, &row_iter);
|
||||
match tsid_value.value_data {
|
||||
Some(ValueData::U64Value(tsid)) => tsid,
|
||||
_ => panic!("Expected U64Value for TSID"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tsid_same_for_different_label_orders() {
|
||||
// Test that rows with the same label name-value pairs but in different orders
|
||||
// produce the same TSID
|
||||
let table_id = 1025;
|
||||
|
||||
// Schema 1: a, b, c
|
||||
let schema1 = create_multi_label_schema(&["a", "b", "c"]);
|
||||
let name_to_column_id1 = create_name_to_column_id(&["a", "b", "c"]);
|
||||
let row1 = create_row_with_values(&["A", "B", "C"]);
|
||||
let tsid1 = extract_tsid(schema1, row1, &name_to_column_id1, table_id);
|
||||
|
||||
// Schema 2: b, a, c (different order)
|
||||
let schema2 = create_multi_label_schema(&["b", "a", "c"]);
|
||||
let name_to_column_id2 = create_name_to_column_id(&["a", "b", "c"]);
|
||||
let row2 = create_row_with_values(&["B", "A", "C"]);
|
||||
let tsid2 = extract_tsid(schema2, row2, &name_to_column_id2, table_id);
|
||||
|
||||
// Schema 3: c, b, a (another different order)
|
||||
let schema3 = create_multi_label_schema(&["c", "b", "a"]);
|
||||
let name_to_column_id3 = create_name_to_column_id(&["a", "b", "c"]);
|
||||
let row3 = create_row_with_values(&["C", "B", "A"]);
|
||||
let tsid3 = extract_tsid(schema3, row3, &name_to_column_id3, table_id);
|
||||
|
||||
// All should have the same TSID since label names are sorted lexicographically
|
||||
// and we're using the same label name-value pairs
|
||||
assert_eq!(
|
||||
tsid1, tsid2,
|
||||
"TSID should be same for different column orders"
|
||||
);
|
||||
assert_eq!(
|
||||
tsid2, tsid3,
|
||||
"TSID should be same for different column orders"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tsid_same_with_null_labels() {
|
||||
// Test that rows that differ only by null label values produce the same TSID
|
||||
let table_id = 1025;
|
||||
|
||||
// Row 1: a=A, b=B (no nulls, fast path)
|
||||
let schema1 = create_multi_label_schema(&["a", "b"]);
|
||||
let name_to_column_id1 = create_name_to_column_id(&["a", "b"]);
|
||||
let row1 = create_row_with_values(&["A", "B"]);
|
||||
let tsid1 = extract_tsid(schema1, row1, &name_to_column_id1, table_id);
|
||||
|
||||
// Row 2: a=A, b=B, c=null (has null, slow path)
|
||||
let schema2 = create_multi_label_schema(&["a", "b", "c"]);
|
||||
let name_to_column_id2 = create_name_to_column_id(&["a", "b", "c"]);
|
||||
let row2 = create_row_with_nulls(&[Some("A"), Some("B"), None]);
|
||||
let tsid2 = extract_tsid(schema2, row2, &name_to_column_id2, table_id);
|
||||
|
||||
// Both should have the same TSID since null labels are ignored
|
||||
assert_eq!(
|
||||
tsid1, tsid2,
|
||||
"TSID should be same when only difference is null label values"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tsid_same_with_multiple_null_labels() {
|
||||
// Test with multiple null labels
|
||||
let table_id = 1025;
|
||||
|
||||
// Row 1: a=A, b=B (no nulls)
|
||||
let schema1 = create_multi_label_schema(&["a", "b"]);
|
||||
let name_to_column_id1 = create_name_to_column_id(&["a", "b"]);
|
||||
let row1 = create_row_with_values(&["A", "B"]);
|
||||
let tsid1 = extract_tsid(schema1, row1, &name_to_column_id1, table_id);
|
||||
|
||||
// Row 2: a=A, b=B, c=null, d=null (multiple nulls)
|
||||
let schema2 = create_multi_label_schema(&["a", "b", "c", "d"]);
|
||||
let name_to_column_id2 = create_name_to_column_id(&["a", "b", "c", "d"]);
|
||||
let row2 = create_row_with_nulls(&[Some("A"), Some("B"), None, None]);
|
||||
let tsid2 = extract_tsid(schema2, row2, &name_to_column_id2, table_id);
|
||||
|
||||
assert_eq!(
|
||||
tsid1, tsid2,
|
||||
"TSID should be same when only difference is multiple null label values"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tsid_different_with_different_non_null_values() {
|
||||
// Test that rows with different non-null values produce different TSIDs
|
||||
let table_id = 1025;
|
||||
|
||||
// Row 1: a=A, b=B
|
||||
let schema1 = create_multi_label_schema(&["a", "b"]);
|
||||
let name_to_column_id1 = create_name_to_column_id(&["a", "b"]);
|
||||
let row1 = create_row_with_values(&["A", "B"]);
|
||||
let tsid1 = extract_tsid(schema1, row1, &name_to_column_id1, table_id);
|
||||
|
||||
// Row 2: a=A, b=C (different value for b)
|
||||
let schema2 = create_multi_label_schema(&["a", "b"]);
|
||||
let name_to_column_id2 = create_name_to_column_id(&["a", "b"]);
|
||||
let row2 = create_row_with_values(&["A", "C"]);
|
||||
let tsid2 = extract_tsid(schema2, row2, &name_to_column_id2, table_id);
|
||||
|
||||
assert_ne!(
|
||||
tsid1, tsid2,
|
||||
"TSID should be different when label values differ"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tsid_fast_path_vs_slow_path_consistency() {
|
||||
// Test that fast path (no nulls) and slow path (with nulls) produce
|
||||
// the same TSID for the same non-null label values
|
||||
let table_id = 1025;
|
||||
|
||||
// Fast path: a=A, b=B (no nulls)
|
||||
let schema_fast = create_multi_label_schema(&["a", "b"]);
|
||||
let name_to_column_id_fast = create_name_to_column_id(&["a", "b"]);
|
||||
let row_fast = create_row_with_values(&["A", "B"]);
|
||||
let tsid_fast = extract_tsid(schema_fast, row_fast, &name_to_column_id_fast, table_id);
|
||||
|
||||
// Slow path: a=A, b=B, c=null (has null, triggers slow path)
|
||||
let schema_slow = create_multi_label_schema(&["a", "b", "c"]);
|
||||
let name_to_column_id_slow = create_name_to_column_id(&["a", "b", "c"]);
|
||||
let row_slow = create_row_with_nulls(&[Some("A"), Some("B"), None]);
|
||||
let tsid_slow = extract_tsid(schema_slow, row_slow, &name_to_column_id_slow, table_id);
|
||||
|
||||
assert_eq!(
|
||||
tsid_fast, tsid_slow,
|
||||
"Fast path and slow path should produce same TSID for same non-null values"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tsid_with_null_in_middle() {
|
||||
// Test with null in the middle of labels
|
||||
let table_id = 1025;
|
||||
|
||||
// Row 1: a=A, b=B, c=C
|
||||
let schema1 = create_multi_label_schema(&["a", "b", "c"]);
|
||||
let name_to_column_id1 = create_name_to_column_id(&["a", "b", "c"]);
|
||||
let row1 = create_row_with_values(&["A", "B", "C"]);
|
||||
let tsid1 = extract_tsid(schema1, row1, &name_to_column_id1, table_id);
|
||||
|
||||
// Row 2: a=A, b=null, c=C (null in middle)
|
||||
let schema2 = create_multi_label_schema(&["a", "b", "c"]);
|
||||
let name_to_column_id2 = create_name_to_column_id(&["a", "b", "c"]);
|
||||
let row2 = create_row_with_nulls(&[Some("A"), None, Some("C")]);
|
||||
let tsid2 = extract_tsid(schema2, row2, &name_to_column_id2, table_id);
|
||||
|
||||
// Should be different because b is null in row2 but B in row1
|
||||
// Actually wait, let me reconsider - if b is null, it should be ignored
|
||||
// So row2 should be equivalent to a=A, c=C
|
||||
// But row1 is a=A, b=B, c=C, so they should be different
|
||||
assert_ne!(
|
||||
tsid1, tsid2,
|
||||
"TSID should be different when a non-null value becomes null"
|
||||
);
|
||||
|
||||
// Row 3: a=A, c=C (no b at all, equivalent to row2)
|
||||
let schema3 = create_multi_label_schema(&["a", "c"]);
|
||||
let name_to_column_id3 = create_name_to_column_id(&["a", "c"]);
|
||||
let row3 = create_row_with_values(&["A", "C"]);
|
||||
let tsid3 = extract_tsid(schema3, row3, &name_to_column_id3, table_id);
|
||||
|
||||
// Row2 (a=A, b=null, c=C) should be same as row3 (a=A, c=C)
|
||||
assert_eq!(
|
||||
tsid2, tsid3,
|
||||
"TSID should be same when null label is ignored"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tsid_all_null_labels() {
|
||||
// Test with all labels being null
|
||||
let table_id = 1025;
|
||||
|
||||
// Row with all nulls
|
||||
let schema = create_multi_label_schema(&["a", "b", "c"]);
|
||||
let name_to_column_id = create_name_to_column_id(&["a", "b", "c"]);
|
||||
let row = create_row_with_nulls(&[None, None, None]);
|
||||
let tsid = extract_tsid(schema.clone(), row, &name_to_column_id, table_id);
|
||||
|
||||
// Should still produce a TSID (based on label names only when all values are null)
|
||||
// This tests that the slow path handles the case where all values are null
|
||||
// The TSID will be based on the label name hash only
|
||||
// Test that it's consistent - same schema with all nulls should produce same TSID
|
||||
let row2 = create_row_with_nulls(&[None, None, None]);
|
||||
let tsid2 = extract_tsid(schema, row2, &name_to_column_id, table_id);
|
||||
assert_eq!(
|
||||
tsid, tsid2,
|
||||
"TSID should be consistent when all label values are null"
|
||||
);
|
||||
assert_eq!(tsid, ValueData::U64Value(9442261431637846000).into());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -55,7 +55,7 @@ lazy_static = "1.4"
|
||||
log-store = { workspace = true }
|
||||
mito-codec.workspace = true
|
||||
moka = { workspace = true, features = ["sync", "future"] }
|
||||
object-store = { workspace = true, features = ["testing"] }
|
||||
object-store.workspace = true
|
||||
parquet = { workspace = true, features = ["async"] }
|
||||
paste.workspace = true
|
||||
pin-project.workspace = true
|
||||
|
||||
@@ -37,7 +37,7 @@ use crate::error::{CleanDirSnafu, DeleteIndexSnafu, DeleteSstSnafu, OpenDalSnafu
|
||||
use crate::metrics::{COMPACTION_STAGE_ELAPSED, FLUSH_ELAPSED};
|
||||
use crate::read::{FlatSource, Source};
|
||||
use crate::region::options::IndexOptions;
|
||||
use crate::sst::file::{FileHandle, RegionFileId, RegionIndexId};
|
||||
use crate::sst::file::{FileHandle, RegionFileId};
|
||||
use crate::sst::index::IndexerBuilderImpl;
|
||||
use crate::sst::index::intermediate::IntermediateManager;
|
||||
use crate::sst::index::puffin_manager::{PuffinManagerFactory, SstPuffinManager};
|
||||
@@ -216,7 +216,7 @@ impl AccessLayer {
|
||||
pub(crate) async fn delete_sst(
|
||||
&self,
|
||||
region_file_id: &RegionFileId,
|
||||
index_file_id: &RegionIndexId,
|
||||
index_file_id: &RegionFileId,
|
||||
) -> Result<()> {
|
||||
let path = location::sst_file_path(&self.table_dir, *region_file_id, self.path_type);
|
||||
self.object_store
|
||||
@@ -226,22 +226,12 @@ impl AccessLayer {
|
||||
file_id: region_file_id.file_id(),
|
||||
})?;
|
||||
|
||||
// Delete all versions of the index file.
|
||||
for version in 0..=index_file_id.version {
|
||||
self.delete_index(&RegionIndexId::new(index_file_id.file_id, version))
|
||||
.await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn delete_index(&self, region_index_id: &RegionIndexId) -> Result<()> {
|
||||
let path = location::index_file_path(&self.table_dir, *region_index_id, self.path_type);
|
||||
let path = location::index_file_path(&self.table_dir, *index_file_id, self.path_type);
|
||||
self.object_store
|
||||
.delete(&path)
|
||||
.await
|
||||
.context(DeleteIndexSnafu {
|
||||
file_id: region_index_id.file_id(),
|
||||
file_id: region_file_id.file_id(),
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
@@ -301,7 +291,6 @@ impl AccessLayer {
|
||||
puffin_manager: self
|
||||
.puffin_manager_factory
|
||||
.build(store, path_provider.clone()),
|
||||
write_cache_enabled: false,
|
||||
intermediate_manager: self.intermediate_manager.clone(),
|
||||
index_options: request.index_options,
|
||||
inverted_index_config: request.inverted_index_config,
|
||||
@@ -479,10 +468,9 @@ impl TempFileCleaner {
|
||||
}
|
||||
|
||||
/// Removes the SST and index file from the local atomic dir by the file id.
|
||||
/// This only removes the initial index, since the index version is always 0 for a new SST, this method should be safe to pass 0.
|
||||
pub(crate) async fn clean_by_file_id(&self, file_id: FileId) {
|
||||
let sst_key = IndexKey::new(self.region_id, file_id, FileType::Parquet).to_string();
|
||||
let index_key = IndexKey::new(self.region_id, file_id, FileType::Puffin(0)).to_string();
|
||||
let index_key = IndexKey::new(self.region_id, file_id, FileType::Puffin).to_string();
|
||||
|
||||
Self::clean_atomic_dir_files(&self.object_store, &[&sst_key, &index_key]).await;
|
||||
}
|
||||
@@ -565,12 +553,9 @@ async fn clean_dir(dir: &str) -> Result<()> {
|
||||
|
||||
/// Path provider for SST file and index file.
|
||||
pub trait FilePathProvider: Send + Sync {
|
||||
/// Creates index file path of given file id. Version default to 0, and not shown in the path.
|
||||
/// Creates index file path of given file id.
|
||||
fn build_index_file_path(&self, file_id: RegionFileId) -> String;
|
||||
|
||||
/// Creates index file path of given index id (with version support).
|
||||
fn build_index_file_path_with_version(&self, index_id: RegionIndexId) -> String;
|
||||
|
||||
/// Creates SST file path of given file id.
|
||||
fn build_sst_file_path(&self, file_id: RegionFileId) -> String;
|
||||
}
|
||||
@@ -590,16 +575,7 @@ impl WriteCachePathProvider {
|
||||
|
||||
impl FilePathProvider for WriteCachePathProvider {
|
||||
fn build_index_file_path(&self, file_id: RegionFileId) -> String {
|
||||
let puffin_key = IndexKey::new(file_id.region_id(), file_id.file_id(), FileType::Puffin(0));
|
||||
self.file_cache.cache_file_path(puffin_key)
|
||||
}
|
||||
|
||||
fn build_index_file_path_with_version(&self, index_id: RegionIndexId) -> String {
|
||||
let puffin_key = IndexKey::new(
|
||||
index_id.region_id(),
|
||||
index_id.file_id(),
|
||||
FileType::Puffin(index_id.version),
|
||||
);
|
||||
let puffin_key = IndexKey::new(file_id.region_id(), file_id.file_id(), FileType::Puffin);
|
||||
self.file_cache.cache_file_path(puffin_key)
|
||||
}
|
||||
|
||||
@@ -629,11 +605,7 @@ impl RegionFilePathFactory {
|
||||
|
||||
impl FilePathProvider for RegionFilePathFactory {
|
||||
fn build_index_file_path(&self, file_id: RegionFileId) -> String {
|
||||
location::index_file_path_legacy(&self.table_dir, file_id, self.path_type)
|
||||
}
|
||||
|
||||
fn build_index_file_path_with_version(&self, index_id: RegionIndexId) -> String {
|
||||
location::index_file_path(&self.table_dir, index_id, self.path_type)
|
||||
location::index_file_path(&self.table_dir, file_id, self.path_type)
|
||||
}
|
||||
|
||||
fn build_sst_file_path(&self, file_id: RegionFileId) -> String {
|
||||
|
||||
@@ -18,7 +18,6 @@ mod cache_size;
|
||||
|
||||
pub(crate) mod file_cache;
|
||||
pub(crate) mod index;
|
||||
pub(crate) mod manifest_cache;
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test_util;
|
||||
pub(crate) mod write_cache;
|
||||
@@ -44,7 +43,7 @@ use crate::cache::index::inverted_index::{InvertedIndexCache, InvertedIndexCache
|
||||
use crate::cache::write_cache::WriteCacheRef;
|
||||
use crate::metrics::{CACHE_BYTES, CACHE_EVICTION, CACHE_HIT, CACHE_MISS};
|
||||
use crate::read::Batch;
|
||||
use crate::sst::file::{RegionFileId, RegionIndexId};
|
||||
use crate::sst::file::RegionFileId;
|
||||
use crate::sst::parquet::reader::MetadataCacheMetrics;
|
||||
|
||||
/// Metrics type key for sst meta.
|
||||
@@ -76,22 +75,43 @@ pub enum CacheStrategy {
|
||||
}
|
||||
|
||||
impl CacheStrategy {
|
||||
/// Calls [CacheManager::get_parquet_meta_data()].
|
||||
pub async fn get_parquet_meta_data(
|
||||
&self,
|
||||
file_id: RegionFileId,
|
||||
) -> Option<Arc<ParquetMetaData>> {
|
||||
match self {
|
||||
CacheStrategy::EnableAll(cache_manager) => {
|
||||
cache_manager.get_parquet_meta_data(file_id).await
|
||||
}
|
||||
CacheStrategy::Compaction(cache_manager) => {
|
||||
cache_manager.get_parquet_meta_data(file_id).await
|
||||
}
|
||||
CacheStrategy::Disabled => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets parquet metadata with cache metrics tracking.
|
||||
/// Returns the metadata and updates the provided metrics.
|
||||
pub(crate) async fn get_parquet_meta_data(
|
||||
pub(crate) async fn get_parquet_meta_data_with_metrics(
|
||||
&self,
|
||||
file_id: RegionFileId,
|
||||
metrics: &mut MetadataCacheMetrics,
|
||||
) -> Option<Arc<ParquetMetaData>> {
|
||||
match self {
|
||||
CacheStrategy::EnableAll(cache_manager) => {
|
||||
cache_manager.get_parquet_meta_data(file_id, metrics).await
|
||||
cache_manager
|
||||
.get_parquet_meta_data_with_metrics(file_id, metrics)
|
||||
.await
|
||||
}
|
||||
CacheStrategy::Compaction(cache_manager) => {
|
||||
cache_manager.get_parquet_meta_data(file_id, metrics).await
|
||||
cache_manager
|
||||
.get_parquet_meta_data_with_metrics(file_id, metrics)
|
||||
.await
|
||||
}
|
||||
CacheStrategy::Disabled => {
|
||||
metrics.cache_miss += 1;
|
||||
metrics.mem_cache_miss += 1;
|
||||
metrics.file_cache_miss += 1;
|
||||
None
|
||||
}
|
||||
}
|
||||
@@ -180,7 +200,7 @@ impl CacheStrategy {
|
||||
}
|
||||
|
||||
/// Calls [CacheManager::evict_puffin_cache()].
|
||||
pub async fn evict_puffin_cache(&self, file_id: RegionIndexId) {
|
||||
pub async fn evict_puffin_cache(&self, file_id: RegionFileId) {
|
||||
match self {
|
||||
CacheStrategy::EnableAll(cache_manager) => {
|
||||
cache_manager.evict_puffin_cache(file_id).await
|
||||
@@ -298,18 +318,45 @@ impl CacheManager {
|
||||
CacheManagerBuilder::default()
|
||||
}
|
||||
|
||||
/// Gets cached [ParquetMetaData] from in-memory cache first.
|
||||
/// If not found, tries to get it from write cache and fill the in-memory cache.
|
||||
pub async fn get_parquet_meta_data(
|
||||
&self,
|
||||
file_id: RegionFileId,
|
||||
) -> Option<Arc<ParquetMetaData>> {
|
||||
// Try to get metadata from sst meta cache
|
||||
let metadata = self.get_parquet_meta_data_from_mem_cache(file_id);
|
||||
if metadata.is_some() {
|
||||
return metadata;
|
||||
}
|
||||
|
||||
// Try to get metadata from write cache
|
||||
let key = IndexKey::new(file_id.region_id(), file_id.file_id(), FileType::Parquet);
|
||||
if let Some(write_cache) = &self.write_cache
|
||||
&& let Some(metadata) = write_cache.file_cache().get_parquet_meta_data(key).await
|
||||
{
|
||||
let metadata = Arc::new(metadata);
|
||||
// Put metadata into sst meta cache
|
||||
self.put_parquet_meta_data(file_id, metadata.clone());
|
||||
return Some(metadata);
|
||||
};
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Gets cached [ParquetMetaData] with metrics tracking.
|
||||
/// Tries in-memory cache first, then file cache, updating metrics accordingly.
|
||||
pub(crate) async fn get_parquet_meta_data(
|
||||
pub(crate) async fn get_parquet_meta_data_with_metrics(
|
||||
&self,
|
||||
file_id: RegionFileId,
|
||||
metrics: &mut MetadataCacheMetrics,
|
||||
) -> Option<Arc<ParquetMetaData>> {
|
||||
// Try to get metadata from sst meta cache
|
||||
if let Some(metadata) = self.get_parquet_meta_data_from_mem_cache(file_id) {
|
||||
if let Some(metadata) = self.get_parquet_meta_data_from_mem_cache_inner(file_id) {
|
||||
metrics.mem_cache_hit += 1;
|
||||
return Some(metadata);
|
||||
}
|
||||
metrics.mem_cache_miss += 1;
|
||||
|
||||
// Try to get metadata from write cache
|
||||
let key = IndexKey::new(file_id.region_id(), file_id.file_id(), FileType::Parquet);
|
||||
@@ -322,7 +369,7 @@ impl CacheManager {
|
||||
self.put_parquet_meta_data(file_id, metadata.clone());
|
||||
return Some(metadata);
|
||||
};
|
||||
metrics.cache_miss += 1;
|
||||
metrics.file_cache_miss += 1;
|
||||
|
||||
None
|
||||
}
|
||||
@@ -340,6 +387,17 @@ impl CacheManager {
|
||||
})
|
||||
}
|
||||
|
||||
/// Gets cached [ParquetMetaData] from in-memory cache without updating global metrics.
|
||||
/// This is used by `get_parquet_meta_data_with_metrics` to avoid double counting.
|
||||
fn get_parquet_meta_data_from_mem_cache_inner(
|
||||
&self,
|
||||
file_id: RegionFileId,
|
||||
) -> Option<Arc<ParquetMetaData>> {
|
||||
self.sst_meta_cache.as_ref().and_then(|sst_meta_cache| {
|
||||
sst_meta_cache.get(&SstMetaKey(file_id.region_id(), file_id.file_id()))
|
||||
})
|
||||
}
|
||||
|
||||
/// Puts [ParquetMetaData] into the cache.
|
||||
pub fn put_parquet_meta_data(&self, file_id: RegionFileId, metadata: Arc<ParquetMetaData>) {
|
||||
if let Some(cache) = &self.sst_meta_cache {
|
||||
@@ -400,7 +458,7 @@ impl CacheManager {
|
||||
}
|
||||
|
||||
/// Evicts every puffin-related cache entry for the given file.
|
||||
pub async fn evict_puffin_cache(&self, file_id: RegionIndexId) {
|
||||
pub async fn evict_puffin_cache(&self, file_id: RegionFileId) {
|
||||
if let Some(cache) = &self.bloom_filter_index_cache {
|
||||
cache.invalidate_file(file_id.file_id());
|
||||
}
|
||||
@@ -422,7 +480,7 @@ impl CacheManager {
|
||||
.remove(IndexKey::new(
|
||||
file_id.region_id(),
|
||||
file_id.file_id(),
|
||||
FileType::Puffin(file_id.version),
|
||||
FileType::Puffin,
|
||||
))
|
||||
.await;
|
||||
}
|
||||
@@ -835,14 +893,8 @@ mod tests {
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let file_id = RegionFileId::new(region_id, FileId::random());
|
||||
let metadata = parquet_meta();
|
||||
let mut metrics = MetadataCacheMetrics::default();
|
||||
cache.put_parquet_meta_data(file_id, metadata);
|
||||
assert!(
|
||||
cache
|
||||
.get_parquet_meta_data(file_id, &mut metrics)
|
||||
.await
|
||||
.is_none()
|
||||
);
|
||||
assert!(cache.get_parquet_meta_data(file_id).await.is_none());
|
||||
|
||||
let value = Value::Int64(10);
|
||||
let vector: VectorRef = Arc::new(Int64Vector::from_slice([10, 10, 10, 10]));
|
||||
@@ -864,30 +916,14 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_parquet_meta_cache() {
|
||||
let cache = CacheManager::builder().sst_meta_cache_size(2000).build();
|
||||
let mut metrics = MetadataCacheMetrics::default();
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let file_id = RegionFileId::new(region_id, FileId::random());
|
||||
assert!(
|
||||
cache
|
||||
.get_parquet_meta_data(file_id, &mut metrics)
|
||||
.await
|
||||
.is_none()
|
||||
);
|
||||
assert!(cache.get_parquet_meta_data(file_id).await.is_none());
|
||||
let metadata = parquet_meta();
|
||||
cache.put_parquet_meta_data(file_id, metadata);
|
||||
assert!(
|
||||
cache
|
||||
.get_parquet_meta_data(file_id, &mut metrics)
|
||||
.await
|
||||
.is_some()
|
||||
);
|
||||
assert!(cache.get_parquet_meta_data(file_id).await.is_some());
|
||||
cache.remove_parquet_meta_data(file_id);
|
||||
assert!(
|
||||
cache
|
||||
.get_parquet_meta_data(file_id, &mut metrics)
|
||||
.await
|
||||
.is_none()
|
||||
);
|
||||
assert!(cache.get_parquet_meta_data(file_id).await.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -949,7 +985,7 @@ mod tests {
|
||||
let cache = Arc::new(cache);
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let index_id = RegionIndexId::new(RegionFileId::new(region_id, FileId::random()), 0);
|
||||
let region_file_id = RegionFileId::new(region_id, FileId::random());
|
||||
let column_id: ColumnId = 1;
|
||||
|
||||
let bloom_cache = cache.bloom_filter_index_cache().unwrap().clone();
|
||||
@@ -957,21 +993,16 @@ mod tests {
|
||||
let result_cache = cache.index_result_cache().unwrap();
|
||||
let puffin_metadata_cache = cache.puffin_metadata_cache().unwrap().clone();
|
||||
|
||||
let bloom_key = (
|
||||
index_id.file_id(),
|
||||
index_id.version,
|
||||
column_id,
|
||||
Tag::Skipping,
|
||||
);
|
||||
let bloom_key = (region_file_id.file_id(), column_id, Tag::Skipping);
|
||||
bloom_cache.put_metadata(bloom_key, Arc::new(BloomFilterMeta::default()));
|
||||
inverted_cache.put_metadata(
|
||||
(index_id.file_id(), index_id.version),
|
||||
region_file_id.file_id(),
|
||||
Arc::new(InvertedIndexMetas::default()),
|
||||
);
|
||||
let predicate = PredicateKey::new_bloom(Arc::new(BTreeMap::new()));
|
||||
let selection = Arc::new(RowGroupSelection::default());
|
||||
result_cache.put(predicate.clone(), index_id.file_id(), selection);
|
||||
let file_id_str = index_id.to_string();
|
||||
result_cache.put(predicate.clone(), region_file_id.file_id(), selection);
|
||||
let file_id_str = region_file_id.to_string();
|
||||
let metadata = Arc::new(FileMetadata {
|
||||
blobs: Vec::new(),
|
||||
properties: HashMap::new(),
|
||||
@@ -981,32 +1012,40 @@ mod tests {
|
||||
assert!(bloom_cache.get_metadata(bloom_key).is_some());
|
||||
assert!(
|
||||
inverted_cache
|
||||
.get_metadata((index_id.file_id(), index_id.version))
|
||||
.get_metadata(region_file_id.file_id())
|
||||
.is_some()
|
||||
);
|
||||
assert!(
|
||||
result_cache
|
||||
.get(&predicate, region_file_id.file_id())
|
||||
.is_some()
|
||||
);
|
||||
assert!(result_cache.get(&predicate, index_id.file_id()).is_some());
|
||||
assert!(puffin_metadata_cache.get_metadata(&file_id_str).is_some());
|
||||
|
||||
cache.evict_puffin_cache(index_id).await;
|
||||
cache.evict_puffin_cache(region_file_id).await;
|
||||
|
||||
assert!(bloom_cache.get_metadata(bloom_key).is_none());
|
||||
assert!(
|
||||
inverted_cache
|
||||
.get_metadata((index_id.file_id(), index_id.version))
|
||||
.get_metadata(region_file_id.file_id())
|
||||
.is_none()
|
||||
);
|
||||
assert!(
|
||||
result_cache
|
||||
.get(&predicate, region_file_id.file_id())
|
||||
.is_none()
|
||||
);
|
||||
assert!(result_cache.get(&predicate, index_id.file_id()).is_none());
|
||||
assert!(puffin_metadata_cache.get_metadata(&file_id_str).is_none());
|
||||
|
||||
// Refill caches and evict via CacheStrategy to ensure delegation works.
|
||||
bloom_cache.put_metadata(bloom_key, Arc::new(BloomFilterMeta::default()));
|
||||
inverted_cache.put_metadata(
|
||||
(index_id.file_id(), index_id.version),
|
||||
region_file_id.file_id(),
|
||||
Arc::new(InvertedIndexMetas::default()),
|
||||
);
|
||||
result_cache.put(
|
||||
predicate.clone(),
|
||||
index_id.file_id(),
|
||||
region_file_id.file_id(),
|
||||
Arc::new(RowGroupSelection::default()),
|
||||
);
|
||||
puffin_metadata_cache.put_metadata(
|
||||
@@ -1018,15 +1057,19 @@ mod tests {
|
||||
);
|
||||
|
||||
let strategy = CacheStrategy::EnableAll(cache.clone());
|
||||
strategy.evict_puffin_cache(index_id).await;
|
||||
strategy.evict_puffin_cache(region_file_id).await;
|
||||
|
||||
assert!(bloom_cache.get_metadata(bloom_key).is_none());
|
||||
assert!(
|
||||
inverted_cache
|
||||
.get_metadata((index_id.file_id(), index_id.version))
|
||||
.get_metadata(region_file_id.file_id())
|
||||
.is_none()
|
||||
);
|
||||
assert!(
|
||||
result_cache
|
||||
.get(&predicate, region_file_id.file_id())
|
||||
.is_none()
|
||||
);
|
||||
assert!(result_cache.get(&predicate, index_id.file_id()).is_none());
|
||||
assert!(puffin_metadata_cache.get_metadata(&file_id_str).is_none());
|
||||
}
|
||||
}
|
||||
|
||||
758
src/mito2/src/cache/file_cache.rs
vendored
758
src/mito2/src/cache/file_cache.rs
vendored
@@ -55,35 +55,121 @@ pub(crate) const DEFAULT_INDEX_CACHE_PERCENT: u8 = 20;
|
||||
/// Minimum capacity for each cache (512MB).
|
||||
const MIN_CACHE_CAPACITY: u64 = 512 * 1024 * 1024;
|
||||
|
||||
/// Inner struct for FileCache that can be used in spawned tasks.
|
||||
/// A file cache manages files on local store and evict files based
|
||||
/// on size.
|
||||
#[derive(Debug)]
|
||||
struct FileCacheInner {
|
||||
pub(crate) struct FileCache {
|
||||
/// Local store to cache files.
|
||||
local_store: ObjectStore,
|
||||
/// Index to track cached Parquet files.
|
||||
parquet_index: Cache<IndexKey, IndexValue>,
|
||||
/// Index to track cached Puffin files.
|
||||
puffin_index: Cache<IndexKey, IndexValue>,
|
||||
/// Capacity of the puffin (index) cache in bytes.
|
||||
puffin_capacity: u64,
|
||||
}
|
||||
|
||||
impl FileCacheInner {
|
||||
pub(crate) type FileCacheRef = Arc<FileCache>;
|
||||
|
||||
impl FileCache {
|
||||
/// Creates a new file cache.
|
||||
pub(crate) fn new(
|
||||
local_store: ObjectStore,
|
||||
capacity: ReadableSize,
|
||||
ttl: Option<Duration>,
|
||||
index_cache_percent: Option<u8>,
|
||||
) -> FileCache {
|
||||
// Validate and use the provided percent or default
|
||||
let index_percent = index_cache_percent
|
||||
.filter(|&percent| percent > 0 && percent < 100)
|
||||
.unwrap_or(DEFAULT_INDEX_CACHE_PERCENT);
|
||||
let total_capacity = capacity.as_bytes();
|
||||
|
||||
// Convert percent to ratio and calculate capacity for each cache
|
||||
let index_ratio = index_percent as f64 / 100.0;
|
||||
let puffin_capacity = (total_capacity as f64 * index_ratio) as u64;
|
||||
let parquet_capacity = total_capacity - puffin_capacity;
|
||||
|
||||
// Ensure both capacities are at least 512MB
|
||||
let puffin_capacity = puffin_capacity.max(MIN_CACHE_CAPACITY);
|
||||
let parquet_capacity = parquet_capacity.max(MIN_CACHE_CAPACITY);
|
||||
|
||||
info!(
|
||||
"Initializing file cache with index_percent: {}%, total_capacity: {}, parquet_capacity: {}, puffin_capacity: {}",
|
||||
index_percent,
|
||||
ReadableSize(total_capacity),
|
||||
ReadableSize(parquet_capacity),
|
||||
ReadableSize(puffin_capacity)
|
||||
);
|
||||
|
||||
let parquet_index = Self::build_cache(local_store.clone(), parquet_capacity, ttl, "file");
|
||||
let puffin_index = Self::build_cache(local_store.clone(), puffin_capacity, ttl, "index");
|
||||
|
||||
FileCache {
|
||||
local_store,
|
||||
parquet_index,
|
||||
puffin_index,
|
||||
puffin_capacity,
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a cache for a specific file type.
|
||||
fn build_cache(
|
||||
local_store: ObjectStore,
|
||||
capacity: u64,
|
||||
ttl: Option<Duration>,
|
||||
label: &'static str,
|
||||
) -> Cache<IndexKey, IndexValue> {
|
||||
let cache_store = local_store;
|
||||
let mut builder = Cache::builder()
|
||||
.eviction_policy(EvictionPolicy::lru())
|
||||
.weigher(|_key, value: &IndexValue| -> u32 {
|
||||
// We only measure space on local store.
|
||||
value.file_size
|
||||
})
|
||||
.max_capacity(capacity)
|
||||
.async_eviction_listener(move |key, value, cause| {
|
||||
let store = cache_store.clone();
|
||||
// Stores files under FILE_DIR.
|
||||
let file_path = cache_file_path(FILE_DIR, *key);
|
||||
async move {
|
||||
if let RemovalCause::Replaced = cause {
|
||||
// The cache is replaced by another file. This is unexpected, we don't remove the same
|
||||
// file but updates the metrics as the file is already replaced by users.
|
||||
CACHE_BYTES.with_label_values(&[label]).sub(value.file_size.into());
|
||||
warn!("Replace existing cache {} for region {} unexpectedly", file_path, key.region_id);
|
||||
return;
|
||||
}
|
||||
|
||||
match store.delete(&file_path).await {
|
||||
Ok(()) => {
|
||||
CACHE_BYTES.with_label_values(&[label]).sub(value.file_size.into());
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(e; "Failed to delete cached file {} for region {}", file_path, key.region_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
.boxed()
|
||||
});
|
||||
if let Some(ttl) = ttl {
|
||||
builder = builder.time_to_idle(ttl);
|
||||
}
|
||||
builder.build()
|
||||
}
|
||||
|
||||
/// Returns the appropriate memory index for the given file type.
|
||||
fn memory_index(&self, file_type: FileType) -> &Cache<IndexKey, IndexValue> {
|
||||
match file_type {
|
||||
FileType::Parquet => &self.parquet_index,
|
||||
FileType::Puffin { .. } => &self.puffin_index,
|
||||
FileType::Puffin => &self.puffin_index,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the cache file path for the key.
|
||||
fn cache_file_path(&self, key: IndexKey) -> String {
|
||||
cache_file_path(FILE_DIR, key)
|
||||
}
|
||||
|
||||
/// Puts a file into the cache index.
|
||||
///
|
||||
/// The `WriteCache` should ensure the file is in the correct path.
|
||||
async fn put(&self, key: IndexKey, value: IndexValue) {
|
||||
pub(crate) async fn put(&self, key: IndexKey, value: IndexValue) {
|
||||
CACHE_BYTES
|
||||
.with_label_values(&[key.file_type.metric_label()])
|
||||
.add(value.file_size.into());
|
||||
@@ -94,8 +180,100 @@ impl FileCacheInner {
|
||||
index.run_pending_tasks().await;
|
||||
}
|
||||
|
||||
/// Recovers the index from local store.
|
||||
async fn recover(&self) -> Result<()> {
|
||||
pub(crate) async fn get(&self, key: IndexKey) -> Option<IndexValue> {
|
||||
self.memory_index(key.file_type).get(&key).await
|
||||
}
|
||||
|
||||
/// Reads a file from the cache.
|
||||
#[allow(unused)]
|
||||
pub(crate) async fn reader(&self, key: IndexKey) -> Option<Reader> {
|
||||
// We must use `get()` to update the estimator of the cache.
|
||||
// See https://docs.rs/moka/latest/moka/future/struct.Cache.html#method.contains_key
|
||||
let index = self.memory_index(key.file_type);
|
||||
if index.get(&key).await.is_none() {
|
||||
CACHE_MISS
|
||||
.with_label_values(&[key.file_type.metric_label()])
|
||||
.inc();
|
||||
return None;
|
||||
}
|
||||
|
||||
let file_path = self.cache_file_path(key);
|
||||
match self.get_reader(&file_path).await {
|
||||
Ok(Some(reader)) => {
|
||||
CACHE_HIT
|
||||
.with_label_values(&[key.file_type.metric_label()])
|
||||
.inc();
|
||||
return Some(reader);
|
||||
}
|
||||
Err(e) => {
|
||||
if e.kind() != ErrorKind::NotFound {
|
||||
warn!(e; "Failed to get file for key {:?}", key);
|
||||
}
|
||||
}
|
||||
Ok(None) => {}
|
||||
}
|
||||
|
||||
// We removes the file from the index.
|
||||
index.remove(&key).await;
|
||||
CACHE_MISS
|
||||
.with_label_values(&[key.file_type.metric_label()])
|
||||
.inc();
|
||||
None
|
||||
}
|
||||
|
||||
/// Reads ranges from the cache.
|
||||
pub(crate) async fn read_ranges(
|
||||
&self,
|
||||
key: IndexKey,
|
||||
ranges: &[Range<u64>],
|
||||
) -> Option<Vec<Bytes>> {
|
||||
let index = self.memory_index(key.file_type);
|
||||
if index.get(&key).await.is_none() {
|
||||
CACHE_MISS
|
||||
.with_label_values(&[key.file_type.metric_label()])
|
||||
.inc();
|
||||
return None;
|
||||
}
|
||||
|
||||
let file_path = self.cache_file_path(key);
|
||||
// In most cases, it will use blocking read,
|
||||
// because FileCache is normally based on local file system, which supports blocking read.
|
||||
let bytes_result = fetch_byte_ranges(&file_path, self.local_store.clone(), ranges).await;
|
||||
match bytes_result {
|
||||
Ok(bytes) => {
|
||||
CACHE_HIT
|
||||
.with_label_values(&[key.file_type.metric_label()])
|
||||
.inc();
|
||||
Some(bytes)
|
||||
}
|
||||
Err(e) => {
|
||||
if e.kind() != ErrorKind::NotFound {
|
||||
warn!(e; "Failed to get file for key {:?}", key);
|
||||
}
|
||||
|
||||
// We removes the file from the index.
|
||||
index.remove(&key).await;
|
||||
CACHE_MISS
|
||||
.with_label_values(&[key.file_type.metric_label()])
|
||||
.inc();
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Removes a file from the cache explicitly.
|
||||
/// It always tries to remove the file from the local store because we may not have the file
|
||||
/// in the memory index if upload is failed.
|
||||
pub(crate) async fn remove(&self, key: IndexKey) {
|
||||
let file_path = self.cache_file_path(key);
|
||||
self.memory_index(key.file_type).remove(&key).await;
|
||||
// Always delete the file from the local store.
|
||||
if let Err(e) = self.local_store.delete(&file_path).await {
|
||||
warn!(e; "Failed to delete a cached file {}", file_path);
|
||||
}
|
||||
}
|
||||
|
||||
async fn recover_inner(&self) -> Result<()> {
|
||||
let now = Instant::now();
|
||||
let mut lister = self
|
||||
.local_store
|
||||
@@ -130,7 +308,7 @@ impl FileCacheInner {
|
||||
// Track sizes separately for each file type
|
||||
match key.file_type {
|
||||
FileType::Parquet => parquet_size += size,
|
||||
FileType::Puffin { .. } => puffin_size += size,
|
||||
FileType::Puffin => puffin_size += size,
|
||||
}
|
||||
}
|
||||
// The metrics is a signed int gauge so we can updates it finally.
|
||||
@@ -163,7 +341,136 @@ impl FileCacheInner {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Downloads a file without cleaning up on error.
|
||||
/// Recovers the index from local store.
|
||||
///
|
||||
/// If `task_receiver` is provided, spawns a background task after recovery
|
||||
/// to process `RegionLoadCacheTask` messages for loading files into the cache.
|
||||
pub(crate) async fn recover(
|
||||
self: &Arc<Self>,
|
||||
sync: bool,
|
||||
task_receiver: Option<UnboundedReceiver<RegionLoadCacheTask>>,
|
||||
) {
|
||||
let moved_self = self.clone();
|
||||
let handle = tokio::spawn(async move {
|
||||
if let Err(err) = moved_self.recover_inner().await {
|
||||
error!(err; "Failed to recover file cache.")
|
||||
}
|
||||
|
||||
// Spawns background task to process region load cache tasks after recovery.
|
||||
// So it won't block the recovery when `sync` is true.
|
||||
if let Some(mut receiver) = task_receiver {
|
||||
let cache_ref = moved_self.clone();
|
||||
info!("Spawning background task for processing region load cache tasks");
|
||||
tokio::spawn(async move {
|
||||
while let Some(task) = receiver.recv().await {
|
||||
let file_cache = cache_ref.clone();
|
||||
task.fill_cache(file_cache).await;
|
||||
}
|
||||
info!("Background task for processing region load cache tasks stopped");
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
if sync {
|
||||
let _ = handle.await;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the cache file path for the key.
|
||||
pub(crate) fn cache_file_path(&self, key: IndexKey) -> String {
|
||||
cache_file_path(FILE_DIR, key)
|
||||
}
|
||||
|
||||
/// Returns the local store of the file cache.
|
||||
pub(crate) fn local_store(&self) -> ObjectStore {
|
||||
self.local_store.clone()
|
||||
}
|
||||
|
||||
/// Get the parquet metadata in file cache.
|
||||
/// If the file is not in the cache or fail to load metadata, return None.
|
||||
pub(crate) async fn get_parquet_meta_data(&self, key: IndexKey) -> Option<ParquetMetaData> {
|
||||
// Check if file cache contains the key
|
||||
if let Some(index_value) = self.parquet_index.get(&key).await {
|
||||
// Load metadata from file cache
|
||||
let local_store = self.local_store();
|
||||
let file_path = self.cache_file_path(key);
|
||||
let file_size = index_value.file_size as u64;
|
||||
let metadata_loader = MetadataLoader::new(local_store, &file_path, file_size);
|
||||
|
||||
match metadata_loader.load().await {
|
||||
Ok(metadata) => {
|
||||
CACHE_HIT
|
||||
.with_label_values(&[key.file_type.metric_label()])
|
||||
.inc();
|
||||
Some(metadata)
|
||||
}
|
||||
Err(e) => {
|
||||
if !e.is_object_not_found() {
|
||||
warn!(
|
||||
e; "Failed to get parquet metadata for key {:?}",
|
||||
key
|
||||
);
|
||||
}
|
||||
// We removes the file from the index.
|
||||
self.parquet_index.remove(&key).await;
|
||||
CACHE_MISS
|
||||
.with_label_values(&[key.file_type.metric_label()])
|
||||
.inc();
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
CACHE_MISS
|
||||
.with_label_values(&[key.file_type.metric_label()])
|
||||
.inc();
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_reader(&self, file_path: &str) -> object_store::Result<Option<Reader>> {
|
||||
if self.local_store.exists(file_path).await? {
|
||||
Ok(Some(self.local_store.reader(file_path).await?))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks if the key is in the file cache.
|
||||
pub(crate) fn contains_key(&self, key: &IndexKey) -> bool {
|
||||
self.memory_index(key.file_type).contains_key(key)
|
||||
}
|
||||
|
||||
/// Returns the capacity of the puffin (index) cache in bytes.
|
||||
pub(crate) fn puffin_cache_capacity(&self) -> u64 {
|
||||
self.puffin_capacity
|
||||
}
|
||||
|
||||
/// Returns the current weighted size (used bytes) of the puffin (index) cache.
|
||||
pub(crate) fn puffin_cache_size(&self) -> u64 {
|
||||
self.puffin_index.weighted_size()
|
||||
}
|
||||
|
||||
/// Downloads a file in `remote_path` from the remote object store to the local cache
|
||||
/// (specified by `index_key`).
|
||||
pub(crate) async fn download(
|
||||
&self,
|
||||
index_key: IndexKey,
|
||||
remote_path: &str,
|
||||
remote_store: &ObjectStore,
|
||||
file_size: u64,
|
||||
) -> Result<()> {
|
||||
if let Err(e) = self
|
||||
.download_without_cleaning(index_key, remote_path, remote_store, file_size)
|
||||
.await
|
||||
{
|
||||
let filename = index_key.to_string();
|
||||
TempFileCleaner::clean_atomic_dir_files(&self.local_store, &[&filename]).await;
|
||||
|
||||
return Err(e);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn download_without_cleaning(
|
||||
&self,
|
||||
index_key: IndexKey,
|
||||
@@ -178,7 +485,7 @@ impl FileCacheInner {
|
||||
let timer = WRITE_CACHE_DOWNLOAD_ELAPSED
|
||||
.with_label_values(&[match file_type {
|
||||
FileType::Parquet => "download_parquet",
|
||||
FileType::Puffin { .. } => "download_puffin",
|
||||
FileType::Puffin => "download_puffin",
|
||||
}])
|
||||
.start_timer();
|
||||
|
||||
@@ -230,360 +537,11 @@ impl FileCacheInner {
|
||||
self.put(index_key, index_value).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Downloads a file from remote store to local cache.
|
||||
async fn download(
|
||||
&self,
|
||||
index_key: IndexKey,
|
||||
remote_path: &str,
|
||||
remote_store: &ObjectStore,
|
||||
file_size: u64,
|
||||
) -> Result<()> {
|
||||
if let Err(e) = self
|
||||
.download_without_cleaning(index_key, remote_path, remote_store, file_size)
|
||||
.await
|
||||
{
|
||||
let filename = index_key.to_string();
|
||||
TempFileCleaner::clean_atomic_dir_files(&self.local_store, &[&filename]).await;
|
||||
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// A file cache manages files on local store and evict files based
|
||||
/// on size.
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct FileCache {
|
||||
/// Inner cache state shared with background worker.
|
||||
inner: Arc<FileCacheInner>,
|
||||
/// Capacity of the puffin (index) cache in bytes.
|
||||
puffin_capacity: u64,
|
||||
}
|
||||
|
||||
pub(crate) type FileCacheRef = Arc<FileCache>;
|
||||
|
||||
impl FileCache {
|
||||
/// Creates a new file cache.
|
||||
pub(crate) fn new(
|
||||
local_store: ObjectStore,
|
||||
capacity: ReadableSize,
|
||||
ttl: Option<Duration>,
|
||||
index_cache_percent: Option<u8>,
|
||||
) -> FileCache {
|
||||
// Validate and use the provided percent or default
|
||||
let index_percent = index_cache_percent
|
||||
.filter(|&percent| percent > 0 && percent < 100)
|
||||
.unwrap_or(DEFAULT_INDEX_CACHE_PERCENT);
|
||||
let total_capacity = capacity.as_bytes();
|
||||
|
||||
// Convert percent to ratio and calculate capacity for each cache
|
||||
let index_ratio = index_percent as f64 / 100.0;
|
||||
let puffin_capacity = (total_capacity as f64 * index_ratio) as u64;
|
||||
let parquet_capacity = total_capacity - puffin_capacity;
|
||||
|
||||
// Ensure both capacities are at least 512MB
|
||||
let puffin_capacity = puffin_capacity.max(MIN_CACHE_CAPACITY);
|
||||
let parquet_capacity = parquet_capacity.max(MIN_CACHE_CAPACITY);
|
||||
|
||||
info!(
|
||||
"Initializing file cache with index_percent: {}%, total_capacity: {}, parquet_capacity: {}, puffin_capacity: {}",
|
||||
index_percent,
|
||||
ReadableSize(total_capacity),
|
||||
ReadableSize(parquet_capacity),
|
||||
ReadableSize(puffin_capacity)
|
||||
);
|
||||
|
||||
let parquet_index = Self::build_cache(local_store.clone(), parquet_capacity, ttl, "file");
|
||||
let puffin_index = Self::build_cache(local_store.clone(), puffin_capacity, ttl, "index");
|
||||
|
||||
// Create inner cache shared with background worker
|
||||
let inner = Arc::new(FileCacheInner {
|
||||
local_store,
|
||||
parquet_index,
|
||||
puffin_index,
|
||||
});
|
||||
|
||||
FileCache {
|
||||
inner,
|
||||
puffin_capacity,
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a cache for a specific file type.
|
||||
fn build_cache(
|
||||
local_store: ObjectStore,
|
||||
capacity: u64,
|
||||
ttl: Option<Duration>,
|
||||
label: &'static str,
|
||||
) -> Cache<IndexKey, IndexValue> {
|
||||
let cache_store = local_store;
|
||||
let mut builder = Cache::builder()
|
||||
.eviction_policy(EvictionPolicy::lru())
|
||||
.weigher(|_key, value: &IndexValue| -> u32 {
|
||||
// We only measure space on local store.
|
||||
value.file_size
|
||||
})
|
||||
.max_capacity(capacity)
|
||||
.async_eviction_listener(move |key, value, cause| {
|
||||
let store = cache_store.clone();
|
||||
// Stores files under FILE_DIR.
|
||||
let file_path = cache_file_path(FILE_DIR, *key);
|
||||
async move {
|
||||
if let RemovalCause::Replaced = cause {
|
||||
// The cache is replaced by another file. This is unexpected, we don't remove the same
|
||||
// file but updates the metrics as the file is already replaced by users.
|
||||
CACHE_BYTES.with_label_values(&[label]).sub(value.file_size.into());
|
||||
// TODO(yingwen): Don't log warn later.
|
||||
warn!("Replace existing cache {} for region {} unexpectedly", file_path, key.region_id);
|
||||
return;
|
||||
}
|
||||
|
||||
match store.delete(&file_path).await {
|
||||
Ok(()) => {
|
||||
CACHE_BYTES.with_label_values(&[label]).sub(value.file_size.into());
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(e; "Failed to delete cached file {} for region {}", file_path, key.region_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
.boxed()
|
||||
});
|
||||
if let Some(ttl) = ttl {
|
||||
builder = builder.time_to_idle(ttl);
|
||||
}
|
||||
builder.build()
|
||||
}
|
||||
|
||||
/// Puts a file into the cache index.
|
||||
///
|
||||
/// The `WriteCache` should ensure the file is in the correct path.
|
||||
pub(crate) async fn put(&self, key: IndexKey, value: IndexValue) {
|
||||
self.inner.put(key, value).await
|
||||
}
|
||||
|
||||
pub(crate) async fn get(&self, key: IndexKey) -> Option<IndexValue> {
|
||||
self.inner.memory_index(key.file_type).get(&key).await
|
||||
}
|
||||
|
||||
/// Reads a file from the cache.
|
||||
#[allow(unused)]
|
||||
pub(crate) async fn reader(&self, key: IndexKey) -> Option<Reader> {
|
||||
// We must use `get()` to update the estimator of the cache.
|
||||
// See https://docs.rs/moka/latest/moka/future/struct.Cache.html#method.contains_key
|
||||
let index = self.inner.memory_index(key.file_type);
|
||||
if index.get(&key).await.is_none() {
|
||||
CACHE_MISS
|
||||
.with_label_values(&[key.file_type.metric_label()])
|
||||
.inc();
|
||||
return None;
|
||||
}
|
||||
|
||||
let file_path = self.inner.cache_file_path(key);
|
||||
match self.get_reader(&file_path).await {
|
||||
Ok(Some(reader)) => {
|
||||
CACHE_HIT
|
||||
.with_label_values(&[key.file_type.metric_label()])
|
||||
.inc();
|
||||
return Some(reader);
|
||||
}
|
||||
Err(e) => {
|
||||
if e.kind() != ErrorKind::NotFound {
|
||||
warn!(e; "Failed to get file for key {:?}", key);
|
||||
}
|
||||
}
|
||||
Ok(None) => {}
|
||||
}
|
||||
|
||||
// We removes the file from the index.
|
||||
index.remove(&key).await;
|
||||
CACHE_MISS
|
||||
.with_label_values(&[key.file_type.metric_label()])
|
||||
.inc();
|
||||
None
|
||||
}
|
||||
|
||||
/// Reads ranges from the cache.
|
||||
pub(crate) async fn read_ranges(
|
||||
&self,
|
||||
key: IndexKey,
|
||||
ranges: &[Range<u64>],
|
||||
) -> Option<Vec<Bytes>> {
|
||||
let index = self.inner.memory_index(key.file_type);
|
||||
if index.get(&key).await.is_none() {
|
||||
CACHE_MISS
|
||||
.with_label_values(&[key.file_type.metric_label()])
|
||||
.inc();
|
||||
return None;
|
||||
}
|
||||
|
||||
let file_path = self.inner.cache_file_path(key);
|
||||
// In most cases, it will use blocking read,
|
||||
// because FileCache is normally based on local file system, which supports blocking read.
|
||||
let bytes_result =
|
||||
fetch_byte_ranges(&file_path, self.inner.local_store.clone(), ranges).await;
|
||||
match bytes_result {
|
||||
Ok(bytes) => {
|
||||
CACHE_HIT
|
||||
.with_label_values(&[key.file_type.metric_label()])
|
||||
.inc();
|
||||
Some(bytes)
|
||||
}
|
||||
Err(e) => {
|
||||
if e.kind() != ErrorKind::NotFound {
|
||||
warn!(e; "Failed to get file for key {:?}", key);
|
||||
}
|
||||
|
||||
// We removes the file from the index.
|
||||
index.remove(&key).await;
|
||||
CACHE_MISS
|
||||
.with_label_values(&[key.file_type.metric_label()])
|
||||
.inc();
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Removes a file from the cache explicitly.
|
||||
/// It always tries to remove the file from the local store because we may not have the file
|
||||
/// in the memory index if upload is failed.
|
||||
pub(crate) async fn remove(&self, key: IndexKey) {
|
||||
let file_path = self.inner.cache_file_path(key);
|
||||
self.inner.memory_index(key.file_type).remove(&key).await;
|
||||
// Always delete the file from the local store.
|
||||
if let Err(e) = self.inner.local_store.delete(&file_path).await {
|
||||
warn!(e; "Failed to delete a cached file {}", file_path);
|
||||
}
|
||||
}
|
||||
|
||||
/// Recovers the index from local store.
|
||||
///
|
||||
/// If `task_receiver` is provided, spawns a background task after recovery
|
||||
/// to process `RegionLoadCacheTask` messages for loading files into the cache.
|
||||
pub(crate) async fn recover(
|
||||
&self,
|
||||
sync: bool,
|
||||
task_receiver: Option<UnboundedReceiver<RegionLoadCacheTask>>,
|
||||
) {
|
||||
let moved_self = self.clone();
|
||||
let handle = tokio::spawn(async move {
|
||||
if let Err(err) = moved_self.inner.recover().await {
|
||||
error!(err; "Failed to recover file cache.")
|
||||
}
|
||||
|
||||
// Spawns background task to process region load cache tasks after recovery.
|
||||
// So it won't block the recovery when `sync` is true.
|
||||
if let Some(mut receiver) = task_receiver {
|
||||
info!("Spawning background task for processing region load cache tasks");
|
||||
tokio::spawn(async move {
|
||||
while let Some(task) = receiver.recv().await {
|
||||
task.fill_cache(&moved_self).await;
|
||||
}
|
||||
info!("Background task for processing region load cache tasks stopped");
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
if sync {
|
||||
let _ = handle.await;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the cache file path for the key.
|
||||
pub(crate) fn cache_file_path(&self, key: IndexKey) -> String {
|
||||
self.inner.cache_file_path(key)
|
||||
}
|
||||
|
||||
/// Returns the local store of the file cache.
|
||||
pub(crate) fn local_store(&self) -> ObjectStore {
|
||||
self.inner.local_store.clone()
|
||||
}
|
||||
|
||||
/// Get the parquet metadata in file cache.
|
||||
/// If the file is not in the cache or fail to load metadata, return None.
|
||||
pub(crate) async fn get_parquet_meta_data(&self, key: IndexKey) -> Option<ParquetMetaData> {
|
||||
// Check if file cache contains the key
|
||||
if let Some(index_value) = self.inner.parquet_index.get(&key).await {
|
||||
// Load metadata from file cache
|
||||
let local_store = self.local_store();
|
||||
let file_path = self.inner.cache_file_path(key);
|
||||
let file_size = index_value.file_size as u64;
|
||||
let metadata_loader = MetadataLoader::new(local_store, &file_path, file_size);
|
||||
|
||||
match metadata_loader.load().await {
|
||||
Ok(metadata) => {
|
||||
CACHE_HIT
|
||||
.with_label_values(&[key.file_type.metric_label()])
|
||||
.inc();
|
||||
Some(metadata)
|
||||
}
|
||||
Err(e) => {
|
||||
if !e.is_object_not_found() {
|
||||
warn!(
|
||||
e; "Failed to get parquet metadata for key {:?}",
|
||||
key
|
||||
);
|
||||
}
|
||||
// We removes the file from the index.
|
||||
self.inner.parquet_index.remove(&key).await;
|
||||
CACHE_MISS
|
||||
.with_label_values(&[key.file_type.metric_label()])
|
||||
.inc();
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
CACHE_MISS
|
||||
.with_label_values(&[key.file_type.metric_label()])
|
||||
.inc();
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_reader(&self, file_path: &str) -> object_store::Result<Option<Reader>> {
|
||||
if self.inner.local_store.exists(file_path).await? {
|
||||
Ok(Some(self.inner.local_store.reader(file_path).await?))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks if the key is in the file cache.
|
||||
pub(crate) fn contains_key(&self, key: &IndexKey) -> bool {
|
||||
self.inner.memory_index(key.file_type).contains_key(key)
|
||||
}
|
||||
|
||||
/// Returns the capacity of the puffin (index) cache in bytes.
|
||||
pub(crate) fn puffin_cache_capacity(&self) -> u64 {
|
||||
self.puffin_capacity
|
||||
}
|
||||
|
||||
/// Returns the current weighted size (used bytes) of the puffin (index) cache.
|
||||
pub(crate) fn puffin_cache_size(&self) -> u64 {
|
||||
self.inner.puffin_index.weighted_size()
|
||||
}
|
||||
|
||||
/// Downloads a file in `remote_path` from the remote object store to the local cache
|
||||
/// (specified by `index_key`).
|
||||
pub(crate) async fn download(
|
||||
&self,
|
||||
index_key: IndexKey,
|
||||
remote_path: &str,
|
||||
remote_store: &ObjectStore,
|
||||
file_size: u64,
|
||||
) -> Result<()> {
|
||||
self.inner
|
||||
.download(index_key, remote_path, remote_store, file_size)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
/// Key of file cache index.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct IndexKey {
|
||||
pub(crate) struct IndexKey {
|
||||
pub region_id: RegionId,
|
||||
pub file_id: FileId,
|
||||
pub file_type: FileType,
|
||||
@@ -607,7 +565,7 @@ impl fmt::Display for IndexKey {
|
||||
"{}.{}.{}",
|
||||
self.region_id.as_u64(),
|
||||
self.file_id,
|
||||
self.file_type
|
||||
self.file_type.as_str()
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -618,16 +576,7 @@ pub enum FileType {
|
||||
/// Parquet file.
|
||||
Parquet,
|
||||
/// Puffin file.
|
||||
Puffin(u64),
|
||||
}
|
||||
|
||||
impl fmt::Display for FileType {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
FileType::Parquet => write!(f, "parquet"),
|
||||
FileType::Puffin(version) => write!(f, "{}.puffin", version),
|
||||
}
|
||||
}
|
||||
Puffin,
|
||||
}
|
||||
|
||||
impl FileType {
|
||||
@@ -635,16 +584,16 @@ impl FileType {
|
||||
fn parse(s: &str) -> Option<FileType> {
|
||||
match s {
|
||||
"parquet" => Some(FileType::Parquet),
|
||||
"puffin" => Some(FileType::Puffin(0)),
|
||||
_ => {
|
||||
// if post-fix with .puffin, try to parse the version
|
||||
if let Some(version_str) = s.strip_suffix(".puffin") {
|
||||
let version = version_str.parse::<u64>().ok()?;
|
||||
Some(FileType::Puffin(version))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
"puffin" => Some(FileType::Puffin),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts the file type to string.
|
||||
fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
FileType::Parquet => "parquet",
|
||||
FileType::Puffin => "puffin",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -652,7 +601,7 @@ impl FileType {
|
||||
fn metric_label(&self) -> &'static str {
|
||||
match self {
|
||||
FileType::Parquet => FILE_TYPE,
|
||||
FileType::Puffin(_) => INDEX_TYPE,
|
||||
FileType::Puffin => INDEX_TYPE,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -734,7 +683,7 @@ mod tests {
|
||||
let exist = cache.reader(key).await;
|
||||
assert!(exist.is_some());
|
||||
tokio::time::sleep(Duration::from_millis(15)).await;
|
||||
cache.inner.parquet_index.run_pending_tasks().await;
|
||||
cache.parquet_index.run_pending_tasks().await;
|
||||
let non = cache.reader(key).await;
|
||||
assert!(non.is_none());
|
||||
}
|
||||
@@ -772,19 +721,19 @@ mod tests {
|
||||
assert_eq!("hello", String::from_utf8(buf).unwrap());
|
||||
|
||||
// Get weighted size.
|
||||
cache.inner.parquet_index.run_pending_tasks().await;
|
||||
assert_eq!(5, cache.inner.parquet_index.weighted_size());
|
||||
cache.parquet_index.run_pending_tasks().await;
|
||||
assert_eq!(5, cache.parquet_index.weighted_size());
|
||||
|
||||
// Remove the file.
|
||||
cache.remove(key).await;
|
||||
assert!(cache.reader(key).await.is_none());
|
||||
|
||||
// Ensure all pending tasks of the moka cache is done before assertion.
|
||||
cache.inner.parquet_index.run_pending_tasks().await;
|
||||
cache.parquet_index.run_pending_tasks().await;
|
||||
|
||||
// The file also not exists.
|
||||
assert!(!local_store.exists(&file_path).await.unwrap());
|
||||
assert_eq!(0, cache.inner.parquet_index.weighted_size());
|
||||
assert_eq!(0, cache.parquet_index.weighted_size());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -817,7 +766,7 @@ mod tests {
|
||||
// Reader is none.
|
||||
assert!(cache.reader(key).await.is_none());
|
||||
// Key is removed.
|
||||
assert!(!cache.inner.parquet_index.contains_key(&key));
|
||||
assert!(!cache.parquet_index.contains_key(&key));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -850,7 +799,12 @@ mod tests {
|
||||
}
|
||||
|
||||
// Recover the cache.
|
||||
let cache = FileCache::new(local_store.clone(), ReadableSize::mb(10), None, None);
|
||||
let cache = Arc::new(FileCache::new(
|
||||
local_store.clone(),
|
||||
ReadableSize::mb(10),
|
||||
None,
|
||||
None,
|
||||
));
|
||||
// No entry before recovery.
|
||||
assert!(
|
||||
cache
|
||||
@@ -861,11 +815,8 @@ mod tests {
|
||||
cache.recover(true, None).await;
|
||||
|
||||
// Check size.
|
||||
cache.inner.parquet_index.run_pending_tasks().await;
|
||||
assert_eq!(
|
||||
total_size,
|
||||
cache.inner.parquet_index.weighted_size() as usize
|
||||
);
|
||||
cache.parquet_index.run_pending_tasks().await;
|
||||
assert_eq!(total_size, cache.parquet_index.weighted_size() as usize);
|
||||
|
||||
for (i, file_id) in file_ids.iter().enumerate() {
|
||||
let key = IndexKey::new(region_id, *file_id, file_type);
|
||||
@@ -930,15 +881,6 @@ mod tests {
|
||||
IndexKey::new(region_id, file_id, FileType::Parquet),
|
||||
parse_index_key("5299989643269.3368731b-a556-42b8-a5df-9c31ce155095.parquet").unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
IndexKey::new(region_id, file_id, FileType::Puffin(0)),
|
||||
parse_index_key("5299989643269.3368731b-a556-42b8-a5df-9c31ce155095.puffin").unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
IndexKey::new(region_id, file_id, FileType::Puffin(42)),
|
||||
parse_index_key("5299989643269.3368731b-a556-42b8-a5df-9c31ce155095.42.puffin")
|
||||
.unwrap()
|
||||
);
|
||||
assert!(parse_index_key("").is_none());
|
||||
assert!(parse_index_key(".").is_none());
|
||||
assert!(parse_index_key("5299989643269").is_none());
|
||||
|
||||
40
src/mito2/src/cache/index/bloom_filter_index.rs
vendored
40
src/mito2/src/cache/index/bloom_filter_index.rs
vendored
@@ -21,7 +21,7 @@ use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use index::bloom_filter::error::Result;
|
||||
use index::bloom_filter::reader::{BloomFilterReadMetrics, BloomFilterReader};
|
||||
use store_api::storage::{ColumnId, FileId, IndexVersion};
|
||||
use store_api::storage::{ColumnId, FileId};
|
||||
|
||||
use crate::cache::index::{INDEX_METADATA_TYPE, IndexCache, PageKey};
|
||||
use crate::metrics::{CACHE_HIT, CACHE_MISS};
|
||||
@@ -35,10 +35,8 @@ pub enum Tag {
|
||||
Fulltext,
|
||||
}
|
||||
|
||||
pub type BloomFilterIndexKey = (FileId, IndexVersion, ColumnId, Tag);
|
||||
|
||||
/// Cache for bloom filter index.
|
||||
pub type BloomFilterIndexCache = IndexCache<BloomFilterIndexKey, BloomFilterMeta>;
|
||||
pub type BloomFilterIndexCache = IndexCache<(FileId, ColumnId, Tag), BloomFilterMeta>;
|
||||
pub type BloomFilterIndexCacheRef = Arc<BloomFilterIndexCache>;
|
||||
|
||||
impl BloomFilterIndexCache {
|
||||
@@ -61,9 +59,11 @@ impl BloomFilterIndexCache {
|
||||
}
|
||||
|
||||
/// Calculates weight for bloom filter index metadata.
|
||||
fn bloom_filter_index_metadata_weight(k: &BloomFilterIndexKey, meta: &Arc<BloomFilterMeta>) -> u32 {
|
||||
fn bloom_filter_index_metadata_weight(
|
||||
k: &(FileId, ColumnId, Tag),
|
||||
meta: &Arc<BloomFilterMeta>,
|
||||
) -> u32 {
|
||||
let base = k.0.as_bytes().len()
|
||||
+ std::mem::size_of::<IndexVersion>()
|
||||
+ std::mem::size_of::<ColumnId>()
|
||||
+ std::mem::size_of::<Tag>()
|
||||
+ std::mem::size_of::<BloomFilterMeta>();
|
||||
@@ -75,14 +75,16 @@ fn bloom_filter_index_metadata_weight(k: &BloomFilterIndexKey, meta: &Arc<BloomF
|
||||
}
|
||||
|
||||
/// Calculates weight for bloom filter index content.
|
||||
fn bloom_filter_index_content_weight((k, _): &(BloomFilterIndexKey, PageKey), v: &Bytes) -> u32 {
|
||||
fn bloom_filter_index_content_weight(
|
||||
(k, _): &((FileId, ColumnId, Tag), PageKey),
|
||||
v: &Bytes,
|
||||
) -> u32 {
|
||||
(k.0.as_bytes().len() + std::mem::size_of::<ColumnId>() + v.len()) as u32
|
||||
}
|
||||
|
||||
/// Bloom filter index blob reader with cache.
|
||||
pub struct CachedBloomFilterIndexBlobReader<R> {
|
||||
file_id: FileId,
|
||||
index_version: IndexVersion,
|
||||
column_id: ColumnId,
|
||||
tag: Tag,
|
||||
blob_size: u64,
|
||||
@@ -94,7 +96,6 @@ impl<R> CachedBloomFilterIndexBlobReader<R> {
|
||||
/// Creates a new bloom filter index blob reader with cache.
|
||||
pub fn new(
|
||||
file_id: FileId,
|
||||
index_version: IndexVersion,
|
||||
column_id: ColumnId,
|
||||
tag: Tag,
|
||||
blob_size: u64,
|
||||
@@ -103,7 +104,6 @@ impl<R> CachedBloomFilterIndexBlobReader<R> {
|
||||
) -> Self {
|
||||
Self {
|
||||
file_id,
|
||||
index_version,
|
||||
column_id,
|
||||
tag,
|
||||
blob_size,
|
||||
@@ -126,7 +126,7 @@ impl<R: BloomFilterReader + Send> BloomFilterReader for CachedBloomFilterIndexBl
|
||||
let (result, cache_metrics) = self
|
||||
.cache
|
||||
.get_or_load(
|
||||
(self.file_id, self.index_version, self.column_id, self.tag),
|
||||
(self.file_id, self.column_id, self.tag),
|
||||
self.blob_size,
|
||||
offset,
|
||||
size,
|
||||
@@ -161,7 +161,7 @@ impl<R: BloomFilterReader + Send> BloomFilterReader for CachedBloomFilterIndexBl
|
||||
let (page, cache_metrics) = self
|
||||
.cache
|
||||
.get_or_load(
|
||||
(self.file_id, self.index_version, self.column_id, self.tag),
|
||||
(self.file_id, self.column_id, self.tag),
|
||||
self.blob_size,
|
||||
range.start,
|
||||
(range.end - range.start) as u32,
|
||||
@@ -191,9 +191,9 @@ impl<R: BloomFilterReader + Send> BloomFilterReader for CachedBloomFilterIndexBl
|
||||
&self,
|
||||
metrics: Option<&mut BloomFilterReadMetrics>,
|
||||
) -> Result<BloomFilterMeta> {
|
||||
if let Some(cached) =
|
||||
self.cache
|
||||
.get_metadata((self.file_id, self.index_version, self.column_id, self.tag))
|
||||
if let Some(cached) = self
|
||||
.cache
|
||||
.get_metadata((self.file_id, self.column_id, self.tag))
|
||||
{
|
||||
CACHE_HIT.with_label_values(&[INDEX_METADATA_TYPE]).inc();
|
||||
if let Some(m) = metrics {
|
||||
@@ -203,7 +203,7 @@ impl<R: BloomFilterReader + Send> BloomFilterReader for CachedBloomFilterIndexBl
|
||||
} else {
|
||||
let meta = self.inner.metadata(metrics).await?;
|
||||
self.cache.put_metadata(
|
||||
(self.file_id, self.index_version, self.column_id, self.tag),
|
||||
(self.file_id, self.column_id, self.tag),
|
||||
Arc::new(meta.clone()),
|
||||
);
|
||||
CACHE_MISS.with_label_values(&[INDEX_METADATA_TYPE]).inc();
|
||||
@@ -223,7 +223,6 @@ mod test {
|
||||
#[test]
|
||||
fn bloom_filter_metadata_weight_counts_vec_contents() {
|
||||
let file_id = FileId::parse_str("00000000-0000-0000-0000-000000000001").unwrap();
|
||||
let version = 0;
|
||||
let column_id: ColumnId = 42;
|
||||
let tag = Tag::Skipping;
|
||||
|
||||
@@ -247,13 +246,10 @@ mod test {
|
||||
],
|
||||
};
|
||||
|
||||
let weight = bloom_filter_index_metadata_weight(
|
||||
&(file_id, version, column_id, tag),
|
||||
&Arc::new(meta.clone()),
|
||||
);
|
||||
let weight =
|
||||
bloom_filter_index_metadata_weight(&(file_id, column_id, tag), &Arc::new(meta.clone()));
|
||||
|
||||
let base = file_id.as_bytes().len()
|
||||
+ std::mem::size_of::<IndexVersion>()
|
||||
+ std::mem::size_of::<ColumnId>()
|
||||
+ std::mem::size_of::<Tag>()
|
||||
+ std::mem::size_of::<BloomFilterMeta>();
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user