feat(promql-planner): introduce vector matching binary operation (#5578 )

* feat(promql-planner): support vector matching for binary operation * test: add sqlness tests
fix(metasrv): clean expired nodes in memory (#5592 )
2025-12-26 08:00:01 +00:00 · 2025-02-27 07:39:19 +00:00 · 2025-02-27 06:16:36 +00:00 · 2025-02-27 03:58:21 +00:00 · 2025-02-27 03:30:15 +00:00 · 2025-02-27 03:28:04 +00:00
57 changed files with 1865 additions and 180 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -432,7 +432,7 @@ dependencies = [
 "arrow-schema",
 "chrono",
 "half",
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 "lexical-core",
 "num",
 "serde",
@@ -1475,7 +1475,7 @@ version = "0.13.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6026d8cd82ada8bbcfe337805dd1eb6afdc9e80fa4d57e977b3a36315e0c5525"
 dependencies = [
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 "lazy_static",
 "num-traits",
 "regex",
@@ -2009,6 +2009,7 @@ dependencies = [
 name = "common-function"
 version = "0.12.0"
 dependencies = [
+ "ahash 0.8.11",
 "api",
 "approx 0.5.1",
 "arc-swap",
@@ -2031,6 +2032,7 @@ dependencies = [
 "geo-types",
 "geohash",
 "h3o",
+ "hyperloglogplus",
 "jsonb",
 "nalgebra 0.33.2",
 "num",
@@ -2974,7 +2976,7 @@ dependencies = [
 "chrono",
 "half",
 "hashbrown 0.14.5",
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 "libc",
 "object_store",
 "parquet",
@@ -3034,7 +3036,7 @@ dependencies = [
 "datafusion-functions-aggregate-common",
 "datafusion-functions-window-common",
 "datafusion-physical-expr-common",
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 "paste",
 "recursive",
 "serde_json",
@@ -3156,7 +3158,7 @@ dependencies = [
 "datafusion-physical-expr-common",
 "datafusion-physical-plan",
 "half",
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 "log",
 "parking_lot 0.12.3",
 "paste",
@@ -3207,7 +3209,7 @@ dependencies = [
 "datafusion-common",
 "datafusion-expr",
 "datafusion-physical-expr",
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 "itertools 0.13.0",
 "log",
 "recursive",
@@ -3232,7 +3234,7 @@ dependencies = [
 "datafusion-physical-expr-common",
 "half",
 "hashbrown 0.14.5",
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 "itertools 0.13.0",
 "log",
 "paste",
@@ -3291,7 +3293,7 @@ dependencies = [
 "futures",
 "half",
 "hashbrown 0.14.5",
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 "itertools 0.13.0",
 "log",
 "once_cell",
@@ -3311,7 +3313,7 @@ dependencies = [
 "arrow-schema",
 "datafusion-common",
 "datafusion-expr",
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 "log",
 "recursive",
 "regex",
@@ -4697,7 +4699,7 @@ dependencies = [
 [[package]]
 name = "greptime-proto"
 version = "0.1.0"
-source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=a25adc8a01340231121646d8f0a29d0e92f45461#a25adc8a01340231121646d8f0a29d0e92f45461"
+source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=072ce580502e015df1a6b03a185b60309a7c2a7a#072ce580502e015df1a6b03a185b60309a7c2a7a"
 dependencies = [
 "prost 0.13.3",
 "serde",
@@ -4720,7 +4722,7 @@ dependencies = [
 "futures-sink",
 "futures-util",
 "http 0.2.12",
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 "slab",
 "tokio",
 "tokio-util",
@@ -4739,7 +4741,7 @@ dependencies = [
 "futures-core",
 "futures-sink",
 "http 1.1.0",
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 "slab",
 "tokio",
 "tokio-util",
@@ -5289,6 +5291,15 @@ dependencies = [
 "tracing",
 ]

+[[package]]
+name = "hyperloglogplus"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "621debdf94dcac33e50475fdd76d34d5ea9c0362a834b9db08c3024696c1fbe3"
+dependencies = [
+ "serde",
+]
+
 [[package]]
 name = "i_float"
 version = "1.3.1"
@@ -5577,9 +5588,9 @@ dependencies = [

 [[package]]
 name = "indexmap"
-version = "2.6.0"
+version = "2.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da"
+checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652"
 dependencies = [
 "equivalent",
 "hashbrown 0.15.2",
@@ -5593,7 +5604,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88"
 dependencies = [
 "ahash 0.8.11",
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 "is-terminal",
 "itoa",
 "log",
@@ -5940,7 +5951,7 @@ version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4ee7893dab2e44ae5f9d0173f26ff4aa327c10b01b06a72b52dd9405b628640d"
 dependencies = [
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 ]

 [[package]]
@@ -6423,7 +6434,7 @@ dependencies = [
 "cactus",
 "cfgrammar",
 "filetime",
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 "lazy_static",
 "lrtable",
 "num-traits",
@@ -7664,7 +7675,7 @@ checksum = "1e32339a5dc40459130b3bd269e9892439f55b33e772d2a9d402a789baaf4e8a"
 dependencies = [
 "futures-core",
 "futures-sink",
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 "js-sys",
 "once_cell",
 "pin-project-lite",
@@ -8236,7 +8247,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
 dependencies = [
 "fixedbitset",
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 ]

 [[package]]
@@ -10327,7 +10338,7 @@ version = "1.0.137"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "930cfb6e6abf99298aaad7d29abbef7a9999a9a8806a40088f55f0dcec03146b"
 dependencies = [
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 "itoa",
 "memchr",
 "ryu",
@@ -10398,7 +10409,7 @@ dependencies = [
 "chrono",
 "hex",
 "indexmap 1.9.3",
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 "serde",
 "serde_derive",
 "serde_json",
@@ -10424,7 +10435,7 @@ version = "0.9.34+deprecated"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
 dependencies = [
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 "itoa",
 "ryu",
 "serde",
@@ -10485,6 +10496,7 @@ dependencies = [
 "humantime",
 "humantime-serde",
 "hyper 1.4.1",
+ "indexmap 2.7.1",
 "influxdb_line_protocol",
 "itertools 0.10.5",
 "json5",
@@ -11026,7 +11038,7 @@ dependencies = [
 "futures-util",
 "hashbrown 0.15.2",
 "hashlink",
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 "log",
 "memchr",
 "once_cell",
@@ -12322,7 +12334,7 @@ version = "0.19.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421"
 dependencies = [
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 "toml_datetime",
 "winnow 0.5.40",
 ]
@@ -12333,7 +12345,7 @@ version = "0.22.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5"
 dependencies = [
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 "serde",
 "serde_spanned",
 "toml_datetime",
@@ -12471,7 +12483,7 @@ dependencies = [
 "futures-core",
 "futures-util",
 "hdrhistogram",
- "indexmap 2.6.0",
+ "indexmap 2.7.1",
 "pin-project-lite",
 "slab",
 "sync_wrapper 1.0.1",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -129,7 +129,7 @@ etcd-client = "0.14"
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "a25adc8a01340231121646d8f0a29d0e92f45461" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "072ce580502e015df1a6b03a185b60309a7c2a7a" }
 hex = "0.4"
 http = "1"
 humantime = "2.1"
--- a/config/config.md
+++ b/config/config.md
@@ -319,6 +319,7 @@
 | `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
 | `use_memory_store` | Bool | `false` | Store data in memory. |
 | `enable_region_failover` | Bool | `false` | Whether to enable region failover.<br/>This feature is only available on GreptimeDB running on cluster mode and<br/>- Using Remote WAL<br/>- Using shared storage (e.g., s3). |
+| `node_max_idle_time` | String | `24hours` | Max allowed idle time before removing node info from metasrv memory. |
 | `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. |
 | `runtime` | -- | -- | The runtime options. |
 | `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
--- a/config/metasrv.example.toml
+++ b/config/metasrv.example.toml
@@ -50,6 +50,9 @@ use_memory_store = false
 ## - Using shared storage (e.g., s3).
 enable_region_failover = false

+## Max allowed idle time before removing node info from metasrv memory.
+node_max_idle_time = "24hours"
+
 ## Whether to enable greptimedb telemetry. Enabled by default.
 #+ enable_telemetry = true

--- a/src/common/function/Cargo.toml
+++ b/src/common/function/Cargo.toml
@@ -12,6 +12,7 @@ default = ["geo"]
 geo = ["geohash", "h3o", "s2", "wkt", "geo-types", "dep:geo"]

 [dependencies]
+ahash = "0.8"
 api.workspace = true
 arc-swap = "1.0"
 async-trait.workspace = true
@@ -33,6 +34,7 @@ geo = { version = "0.29", optional = true }
 geo-types = { version = "0.7", optional = true }
 geohash = { version = "0.13", optional = true }
 h3o = { version = "0.6", optional = true }
+hyperloglogplus = "0.4"
 jsonb.workspace = true
 nalgebra.workspace = true
 num = "0.4"
--- a/src/common/function/src/aggr.rs
+++ b/src/common/function/src/aggr.rs
@@ -12,6 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+mod hll;
 mod uddsketch_state;

+pub(crate) use hll::HllStateType;
+pub use hll::{HllState, HLL_MERGE_NAME, HLL_NAME};
 pub use uddsketch_state::{UddSketchState, UDDSKETCH_STATE_NAME};
--- a/src/common/function/src/aggr/hll.rs
+++ b/src/common/function/src/aggr/hll.rs
@@ -0,0 +1,319 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use common_query::prelude::*;
+use common_telemetry::trace;
+use datafusion::arrow::array::ArrayRef;
+use datafusion::common::cast::{as_binary_array, as_string_array};
+use datafusion::common::not_impl_err;
+use datafusion::error::{DataFusionError, Result as DfResult};
+use datafusion::logical_expr::function::AccumulatorArgs;
+use datafusion::logical_expr::{Accumulator as DfAccumulator, AggregateUDF};
+use datafusion::prelude::create_udaf;
+use datatypes::arrow::datatypes::DataType;
+use hyperloglogplus::{HyperLogLog, HyperLogLogPlus};
+
+use crate::utils::FixedRandomState;
+
+pub const HLL_NAME: &str = "hll";
+pub const HLL_MERGE_NAME: &str = "hll_merge";
+
+const DEFAULT_PRECISION: u8 = 14;
+
+pub(crate) type HllStateType = HyperLogLogPlus<String, FixedRandomState>;
+
+pub struct HllState {
+    hll: HllStateType,
+}
+
+impl std::fmt::Debug for HllState {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "HllState<Opaque>")
+    }
+}
+
+impl Default for HllState {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl HllState {
+    pub fn new() -> Self {
+        Self {
+            // Safety: the DEFAULT_PRECISION is fixed and valid
+            hll: HllStateType::new(DEFAULT_PRECISION, FixedRandomState::new()).unwrap(),
+        }
+    }
+
+    /// Create a UDF for the `hll` function.
+    ///
+    /// `hll` accepts a string column and aggregates the
+    /// values into a HyperLogLog state.
+    pub fn state_udf_impl() -> AggregateUDF {
+        create_udaf(
+            HLL_NAME,
+            vec![DataType::Utf8],
+            Arc::new(DataType::Binary),
+            Volatility::Immutable,
+            Arc::new(Self::create_accumulator),
+            Arc::new(vec![DataType::Binary]),
+        )
+    }
+
+    /// Create a UDF for the `hll_merge` function.
+    ///
+    /// `hll_merge` accepts a binary column of states generated by `hll`
+    /// and merges them into a single state.
+    pub fn merge_udf_impl() -> AggregateUDF {
+        create_udaf(
+            HLL_MERGE_NAME,
+            vec![DataType::Binary],
+            Arc::new(DataType::Binary),
+            Volatility::Immutable,
+            Arc::new(Self::create_merge_accumulator),
+            Arc::new(vec![DataType::Binary]),
+        )
+    }
+
+    fn update(&mut self, value: &str) {
+        self.hll.insert(value);
+    }
+
+    fn merge(&mut self, raw: &[u8]) {
+        if let Ok(serialized) = bincode::deserialize::<HllStateType>(raw) {
+            if let Ok(()) = self.hll.merge(&serialized) {
+                return;
+            }
+        }
+        trace!("Warning: Failed to merge HyperLogLog from {:?}", raw);
+    }
+
+    fn create_accumulator(acc_args: AccumulatorArgs) -> DfResult<Box<dyn DfAccumulator>> {
+        let data_type = acc_args.exprs[0].data_type(acc_args.schema)?;
+
+        match data_type {
+            DataType::Utf8 => Ok(Box::new(HllState::new())),
+            other => not_impl_err!("{HLL_NAME} does not support data type: {other}"),
+        }
+    }
+
+    fn create_merge_accumulator(acc_args: AccumulatorArgs) -> DfResult<Box<dyn DfAccumulator>> {
+        let data_type = acc_args.exprs[0].data_type(acc_args.schema)?;
+
+        match data_type {
+            DataType::Binary => Ok(Box::new(HllState::new())),
+            other => not_impl_err!("{HLL_MERGE_NAME} does not support data type: {other}"),
+        }
+    }
+}
+
+impl DfAccumulator for HllState {
+    fn update_batch(&mut self, values: &[ArrayRef]) -> DfResult<()> {
+        let array = &values[0];
+
+        match array.data_type() {
+            DataType::Utf8 => {
+                let string_array = as_string_array(array)?;
+                for value in string_array.iter().flatten() {
+                    self.update(value);
+                }
+            }
+            DataType::Binary => {
+                let binary_array = as_binary_array(array)?;
+                for v in binary_array.iter().flatten() {
+                    self.merge(v);
+                }
+            }
+            _ => {
+                return not_impl_err!(
+                    "HLL functions do not support data type: {}",
+                    array.data_type()
+                )
+            }
+        }
+
+        Ok(())
+    }
+
+    fn evaluate(&mut self) -> DfResult<ScalarValue> {
+        Ok(ScalarValue::Binary(Some(
+            bincode::serialize(&self.hll).map_err(|e| {
+                DataFusionError::Internal(format!("Failed to serialize HyperLogLog: {}", e))
+            })?,
+        )))
+    }
+
+    fn size(&self) -> usize {
+        std::mem::size_of_val(&self.hll)
+    }
+
+    fn state(&mut self) -> DfResult<Vec<ScalarValue>> {
+        Ok(vec![ScalarValue::Binary(Some(
+            bincode::serialize(&self.hll).map_err(|e| {
+                DataFusionError::Internal(format!("Failed to serialize HyperLogLog: {}", e))
+            })?,
+        ))])
+    }
+
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> DfResult<()> {
+        let array = &states[0];
+        let binary_array = as_binary_array(array)?;
+        for v in binary_array.iter().flatten() {
+            self.merge(v);
+        }
+
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use datafusion::arrow::array::{BinaryArray, StringArray};
+
+    use super::*;
+
+    #[test]
+    fn test_hll_basic() {
+        let mut state = HllState::new();
+        state.update("1");
+        state.update("2");
+        state.update("3");
+
+        let result = state.evaluate().unwrap();
+        if let ScalarValue::Binary(Some(bytes)) = result {
+            let mut hll: HllStateType = bincode::deserialize(&bytes).unwrap();
+            assert_eq!(hll.count().trunc() as u32, 3);
+        } else {
+            panic!("Expected binary scalar value");
+        }
+    }
+
+    #[test]
+    fn test_hll_roundtrip() {
+        let mut state = HllState::new();
+        state.update("1");
+        state.update("2");
+
+        // Serialize
+        let serialized = state.evaluate().unwrap();
+
+        // Create new state and merge the serialized data
+        let mut new_state = HllState::new();
+        if let ScalarValue::Binary(Some(bytes)) = &serialized {
+            new_state.merge(bytes);
+
+            // Verify the merged state matches original
+            let result = new_state.evaluate().unwrap();
+            if let ScalarValue::Binary(Some(new_bytes)) = result {
+                let mut original: HllStateType = bincode::deserialize(bytes).unwrap();
+                let mut merged: HllStateType = bincode::deserialize(&new_bytes).unwrap();
+                assert_eq!(original.count(), merged.count());
+            } else {
+                panic!("Expected binary scalar value");
+            }
+        } else {
+            panic!("Expected binary scalar value");
+        }
+    }
+
+    #[test]
+    fn test_hll_batch_update() {
+        let mut state = HllState::new();
+
+        // Test string values
+        let str_values = vec!["a", "b", "c", "d", "e", "f", "g", "h", "i"];
+        let str_array = Arc::new(StringArray::from(str_values)) as ArrayRef;
+        state.update_batch(&[str_array]).unwrap();
+
+        let result = state.evaluate().unwrap();
+        if let ScalarValue::Binary(Some(bytes)) = result {
+            let mut hll: HllStateType = bincode::deserialize(&bytes).unwrap();
+            assert_eq!(hll.count().trunc() as u32, 9);
+        } else {
+            panic!("Expected binary scalar value");
+        }
+    }
+
+    #[test]
+    fn test_hll_merge_batch() {
+        let mut state1 = HllState::new();
+        state1.update("1");
+        let state1_binary = state1.evaluate().unwrap();
+
+        let mut state2 = HllState::new();
+        state2.update("2");
+        let state2_binary = state2.evaluate().unwrap();
+
+        let mut merged_state = HllState::new();
+        if let (ScalarValue::Binary(Some(bytes1)), ScalarValue::Binary(Some(bytes2))) =
+            (&state1_binary, &state2_binary)
+        {
+            let binary_array = Arc::new(BinaryArray::from(vec![
+                bytes1.as_slice(),
+                bytes2.as_slice(),
+            ])) as ArrayRef;
+            merged_state.merge_batch(&[binary_array]).unwrap();
+
+            let result = merged_state.evaluate().unwrap();
+            if let ScalarValue::Binary(Some(bytes)) = result {
+                let mut hll: HllStateType = bincode::deserialize(&bytes).unwrap();
+                assert_eq!(hll.count().trunc() as u32, 2);
+            } else {
+                panic!("Expected binary scalar value");
+            }
+        } else {
+            panic!("Expected binary scalar values");
+        }
+    }
+
+    #[test]
+    fn test_hll_merge_function() {
+        // Create two HLL states with different values
+        let mut state1 = HllState::new();
+        state1.update("1");
+        state1.update("2");
+        let state1_binary = state1.evaluate().unwrap();
+
+        let mut state2 = HllState::new();
+        state2.update("2");
+        state2.update("3");
+        let state2_binary = state2.evaluate().unwrap();
+
+        // Create a merge state and merge both states
+        let mut merge_state = HllState::new();
+        if let (ScalarValue::Binary(Some(bytes1)), ScalarValue::Binary(Some(bytes2))) =
+            (&state1_binary, &state2_binary)
+        {
+            let binary_array = Arc::new(BinaryArray::from(vec![
+                bytes1.as_slice(),
+                bytes2.as_slice(),
+            ])) as ArrayRef;
+            merge_state.update_batch(&[binary_array]).unwrap();
+
+            let result = merge_state.evaluate().unwrap();
+            if let ScalarValue::Binary(Some(bytes)) = result {
+                let mut hll: HllStateType = bincode::deserialize(&bytes).unwrap();
+                // Should have 3 unique values: "1", "2", "3"
+                assert_eq!(hll.count().trunc() as u32, 3);
+            } else {
+                panic!("Expected binary scalar value");
+            }
+        } else {
+            panic!("Expected binary scalar values");
+        }
+    }
+}
--- a/src/common/function/src/function_registry.rs
+++ b/src/common/function/src/function_registry.rs
@@ -22,6 +22,7 @@ use crate::function::{AsyncFunctionRef, FunctionRef};
 use crate::scalars::aggregate::{AggregateFunctionMetaRef, AggregateFunctions};
 use crate::scalars::date::DateFunction;
 use crate::scalars::expression::ExpressionFunction;
+use crate::scalars::hll_count::HllCalcFunction;
 use crate::scalars::json::JsonFunction;
 use crate::scalars::matches::MatchesFunction;
 use crate::scalars::math::MathFunction;
@@ -107,6 +108,7 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
    DateFunction::register(&function_registry);
    ExpressionFunction::register(&function_registry);
    UddSketchCalcFunction::register(&function_registry);
+    HllCalcFunction::register(&function_registry);

    // Aggregate functions
    AggregateFunctions::register(&function_registry);
--- a/src/common/function/src/scalars.rs
+++ b/src/common/function/src/scalars.rs
@@ -22,6 +22,7 @@ pub mod matches;
 pub mod math;
 pub mod vector;

+pub(crate) mod hll_count;
 #[cfg(test)]
 pub(crate) mod test;
 pub(crate) mod timestamp;
--- a/src/common/function/src/scalars/hll_count.rs
+++ b/src/common/function/src/scalars/hll_count.rs
@@ -0,0 +1,175 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Implementation of the scalar function `hll_count`.
+
+use std::fmt;
+use std::fmt::Display;
+use std::sync::Arc;
+
+use common_query::error::{DowncastVectorSnafu, InvalidFuncArgsSnafu, Result};
+use common_query::prelude::{Signature, Volatility};
+use datatypes::data_type::ConcreteDataType;
+use datatypes::prelude::Vector;
+use datatypes::scalars::{ScalarVector, ScalarVectorBuilder};
+use datatypes::vectors::{BinaryVector, MutableVector, UInt64VectorBuilder, VectorRef};
+use hyperloglogplus::HyperLogLog;
+use snafu::OptionExt;
+
+use crate::aggr::HllStateType;
+use crate::function::{Function, FunctionContext};
+use crate::function_registry::FunctionRegistry;
+
+const NAME: &str = "hll_count";
+
+/// HllCalcFunction implements the scalar function `hll_count`.
+///
+/// It accepts one argument:
+/// 1. The serialized HyperLogLogPlus state, as produced by the aggregator (binary).
+///
+/// For each row, it deserializes the sketch and returns the estimated cardinality.
+#[derive(Debug, Default)]
+pub struct HllCalcFunction;
+
+impl HllCalcFunction {
+    pub fn register(registry: &FunctionRegistry) {
+        registry.register(Arc::new(HllCalcFunction));
+    }
+}
+
+impl Display for HllCalcFunction {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", NAME.to_ascii_uppercase())
+    }
+}
+
+impl Function for HllCalcFunction {
+    fn name(&self) -> &str {
+        NAME
+    }
+
+    fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
+        Ok(ConcreteDataType::uint64_datatype())
+    }
+
+    fn signature(&self) -> Signature {
+        // Only argument: HyperLogLogPlus state (binary)
+        Signature::exact(
+            vec![ConcreteDataType::binary_datatype()],
+            Volatility::Immutable,
+        )
+    }
+
+    fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
+        if columns.len() != 1 {
+            return InvalidFuncArgsSnafu {
+                err_msg: format!("hll_count expects 1 argument, got {}", columns.len()),
+            }
+            .fail();
+        }
+
+        let hll_vec = columns[0]
+            .as_any()
+            .downcast_ref::<BinaryVector>()
+            .with_context(|| DowncastVectorSnafu {
+                err_msg: format!("expect BinaryVector, got {}", columns[0].vector_type_name()),
+            })?;
+        let len = hll_vec.len();
+        let mut builder = UInt64VectorBuilder::with_capacity(len);
+
+        for i in 0..len {
+            let hll_opt = hll_vec.get_data(i);
+
+            if hll_opt.is_none() {
+                builder.push_null();
+                continue;
+            }
+
+            let hll_bytes = hll_opt.unwrap();
+
+            // Deserialize the HyperLogLogPlus from its bincode representation
+            let mut hll: HllStateType = match bincode::deserialize(hll_bytes) {
+                Ok(h) => h,
+                Err(e) => {
+                    common_telemetry::trace!("Failed to deserialize HyperLogLogPlus: {}", e);
+                    builder.push_null();
+                    continue;
+                }
+            };
+
+            builder.push(Some(hll.count().round() as u64));
+        }
+
+        Ok(builder.to_vector())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use datatypes::vectors::BinaryVector;
+
+    use super::*;
+    use crate::utils::FixedRandomState;
+
+    #[test]
+    fn test_hll_count_function() {
+        let function = HllCalcFunction;
+        assert_eq!("hll_count", function.name());
+        assert_eq!(
+            ConcreteDataType::uint64_datatype(),
+            function
+                .return_type(&[ConcreteDataType::uint64_datatype()])
+                .unwrap()
+        );
+
+        // Create a test HLL
+        let mut hll = HllStateType::new(14, FixedRandomState::new()).unwrap();
+        for i in 1..=10 {
+            hll.insert(&i.to_string());
+        }
+
+        let serialized_bytes = bincode::serialize(&hll).unwrap();
+        let args: Vec<VectorRef> = vec![Arc::new(BinaryVector::from(vec![Some(serialized_bytes)]))];
+
+        let result = function.eval(FunctionContext::default(), &args).unwrap();
+        assert_eq!(result.len(), 1);
+
+        // Test cardinality estimate
+        if let datatypes::value::Value::UInt64(v) = result.get(0) {
+            assert_eq!(v, 10);
+        } else {
+            panic!("Expected uint64 value");
+        }
+    }
+
+    #[test]
+    fn test_hll_count_function_errors() {
+        let function = HllCalcFunction;
+
+        // Test with invalid number of arguments
+        let args: Vec<VectorRef> = vec![];
+        let result = function.eval(FunctionContext::default(), &args);
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("hll_count expects 1 argument"));
+
+        // Test with invalid binary data
+        let args: Vec<VectorRef> = vec![Arc::new(BinaryVector::from(vec![Some(vec![1, 2, 3])]))]; // Invalid binary data
+        let result = function.eval(FunctionContext::default(), &args).unwrap();
+        assert_eq!(result.len(), 1);
+        assert!(matches!(result.get(0), datatypes::value::Value::Null));
+    }
+}
--- a/src/common/function/src/utils.rs
+++ b/src/common/function/src/utils.rs
@@ -12,6 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::hash::BuildHasher;
+
+use ahash::RandomState;
+use serde::{Deserialize, Serialize};
+
 /// Escapes special characters in the provided pattern string for `LIKE`.
 ///
 /// Specifically, it prefixes the backslash (`\`), percent (`%`), and underscore (`_`)
@@ -32,6 +37,71 @@ pub fn escape_like_pattern(pattern: &str) -> String {
        })
        .collect::<String>()
 }
+
+/// A random state with fixed seeds.
+///
+/// This is used to ensure that the hash values are consistent across
+/// different processes, and easy to serialize and deserialize.
+#[derive(Debug)]
+pub struct FixedRandomState {
+    state: RandomState,
+}
+
+impl FixedRandomState {
+    // some random seeds
+    const RANDOM_SEED_0: u64 = 0x517cc1b727220a95;
+    const RANDOM_SEED_1: u64 = 0x428a2f98d728ae22;
+    const RANDOM_SEED_2: u64 = 0x7137449123ef65cd;
+    const RANDOM_SEED_3: u64 = 0xb5c0fbcfec4d3b2f;
+
+    pub fn new() -> Self {
+        Self {
+            state: ahash::RandomState::with_seeds(
+                Self::RANDOM_SEED_0,
+                Self::RANDOM_SEED_1,
+                Self::RANDOM_SEED_2,
+                Self::RANDOM_SEED_3,
+            ),
+        }
+    }
+}
+
+impl Default for FixedRandomState {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl BuildHasher for FixedRandomState {
+    type Hasher = ahash::AHasher;
+
+    fn build_hasher(&self) -> Self::Hasher {
+        self.state.build_hasher()
+    }
+
+    fn hash_one<T: std::hash::Hash>(&self, x: T) -> u64 {
+        self.state.hash_one(x)
+    }
+}
+
+impl Serialize for FixedRandomState {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        serializer.serialize_unit()
+    }
+}
+
+impl<'de> Deserialize<'de> for FixedRandomState {
+    fn deserialize<D>(_deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        Ok(Self::new())
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/src/common/meta/src/cluster.rs
+++ b/src/common/meta/src/cluster.rs
@@ -57,12 +57,10 @@ pub trait ClusterInfo {
 }

 /// The key of [NodeInfo] in the storage. The format is `__meta_cluster_node_info-{cluster_id}-{role}-{node_id}`.
-///
-/// This key cannot be used to describe the `Metasrv` because the `Metasrv` does not have
-/// a `cluster_id`, it serves multiple clusters.
 #[derive(Debug, Clone, Copy, Eq, Hash, PartialEq, Serialize, Deserialize)]
 pub struct NodeInfoKey {
    /// The cluster id.
+    // todo(hl): remove cluster_id as it is not assigned anywhere.
    pub cluster_id: ClusterId,
    /// The role of the node. It can be `[Role::Datanode]` or `[Role::Frontend]`.
    pub role: Role,
@@ -232,8 +230,8 @@ impl TryFrom<Vec<u8>> for NodeInfoKey {
    }
 }

-impl From<NodeInfoKey> for Vec<u8> {
-    fn from(key: NodeInfoKey) -> Self {
+impl From<&NodeInfoKey> for Vec<u8> {
+    fn from(key: &NodeInfoKey) -> Self {
        format!(
            "{}-{}-{}-{}",
            CLUSTER_NODE_INFO_PREFIX,
@@ -315,7 +313,7 @@ mod tests {
            node_id: 2,
        };

-        let key_bytes: Vec<u8> = key.into();
+        let key_bytes: Vec<u8> = (&key).into();
        let new_key: NodeInfoKey = key_bytes.try_into().unwrap();

        assert_eq!(1, new_key.cluster_id);
--- a/src/common/meta/src/ddl/drop_database/executor.rs
+++ b/src/common/meta/src/ddl/drop_database/executor.rs
@@ -128,7 +128,7 @@ impl State for DropDatabaseExecutor {
            .await?;
        executor.invalidate_table_cache(ddl_ctx).await?;
        executor
-            .on_drop_regions(ddl_ctx, &self.physical_region_routes)
+            .on_drop_regions(ddl_ctx, &self.physical_region_routes, true)
            .await?;
        info!("Table: {}({}) is dropped", self.table_name, self.table_id);

--- a/src/common/meta/src/ddl/drop_table.rs
+++ b/src/common/meta/src/ddl/drop_table.rs
@@ -156,7 +156,7 @@ impl DropTableProcedure {

    pub async fn on_datanode_drop_regions(&mut self) -> Result<Status> {
        self.executor
-            .on_drop_regions(&self.context, &self.data.physical_region_routes)
+            .on_drop_regions(&self.context, &self.data.physical_region_routes, false)
            .await?;
        self.data.state = DropTableState::DeleteTombstone;
        Ok(Status::executing(true))
--- a/src/common/meta/src/ddl/drop_table/executor.rs
+++ b/src/common/meta/src/ddl/drop_table/executor.rs
@@ -214,6 +214,7 @@ impl DropTableExecutor {
        &self,
        ctx: &DdlContext,
        region_routes: &[RegionRoute],
+        fast_path: bool,
    ) -> Result<()> {
        let leaders = find_leaders(region_routes);
        let mut drop_region_tasks = Vec::with_capacity(leaders.len());
@@ -236,6 +237,7 @@ impl DropTableExecutor {
                    }),
                    body: Some(region_request::Body::Drop(PbDropRegionRequest {
                        region_id: region_id.as_u64(),
+                        fast_path,
                    })),
                };
                let datanode = datanode.clone();
--- a/src/common/meta/src/lib.rs
+++ b/src/common/meta/src/lib.rs
@@ -34,6 +34,7 @@ pub mod kv_backend;
 pub mod leadership_notifier;
 pub mod lock_key;
 pub mod metrics;
+pub mod node_expiry_listener;
 pub mod node_manager;
 pub mod peer;
 pub mod range_stream;
--- a/src/common/meta/src/node_expiry_listener.rs
+++ b/src/common/meta/src/node_expiry_listener.rs
@@ -0,0 +1,152 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Mutex;
+use std::time::Duration;
+
+use common_telemetry::{debug, error, info, warn};
+use tokio::task::JoinHandle;
+use tokio::time::{interval, MissedTickBehavior};
+
+use crate::cluster::{NodeInfo, NodeInfoKey};
+use crate::error;
+use crate::kv_backend::ResettableKvBackendRef;
+use crate::leadership_notifier::LeadershipChangeListener;
+use crate::rpc::store::RangeRequest;
+use crate::rpc::KeyValue;
+
+/// [NodeExpiryListener] periodically checks all node info in memory and removes
+/// expired node info to prevent memory leak.
+pub struct NodeExpiryListener {
+    handle: Mutex<Option<JoinHandle<()>>>,
+    max_idle_time: Duration,
+    in_memory: ResettableKvBackendRef,
+}
+
+impl Drop for NodeExpiryListener {
+    fn drop(&mut self) {
+        self.stop();
+    }
+}
+
+impl NodeExpiryListener {
+    pub fn new(max_idle_time: Duration, in_memory: ResettableKvBackendRef) -> Self {
+        Self {
+            handle: Mutex::new(None),
+            max_idle_time,
+            in_memory,
+        }
+    }
+
+    async fn start(&self) {
+        let mut handle = self.handle.lock().unwrap();
+        if handle.is_none() {
+            let in_memory = self.in_memory.clone();
+
+            let max_idle_time = self.max_idle_time;
+            let ticker_loop = tokio::spawn(async move {
+                // Run clean task every minute.
+                let mut interval = interval(Duration::from_secs(60));
+                interval.set_missed_tick_behavior(MissedTickBehavior::Skip);
+                loop {
+                    interval.tick().await;
+                    if let Err(e) = Self::clean_expired_nodes(&in_memory, max_idle_time).await {
+                        error!(e; "Failed to clean expired node");
+                    }
+                }
+            });
+            *handle = Some(ticker_loop);
+        }
+    }
+
+    fn stop(&self) {
+        if let Some(handle) = self.handle.lock().unwrap().take() {
+            handle.abort();
+            info!("Node expiry listener stopped")
+        }
+    }
+
+    /// Cleans expired nodes from memory.
+    async fn clean_expired_nodes(
+        in_memory: &ResettableKvBackendRef,
+        max_idle_time: Duration,
+    ) -> error::Result<()> {
+        let node_keys = Self::list_expired_nodes(in_memory, max_idle_time).await?;
+        for key in node_keys {
+            let key_bytes: Vec<u8> = (&key).into();
+            if let Err(e) = in_memory.delete(&key_bytes, false).await {
+                warn!(e; "Failed to delete expired node: {:?}", key_bytes);
+            } else {
+                debug!("Deleted expired node key: {:?}", key);
+            }
+        }
+        Ok(())
+    }
+
+    /// Lists expired nodes that have been inactive more than `max_idle_time`.
+    async fn list_expired_nodes(
+        in_memory: &ResettableKvBackendRef,
+        max_idle_time: Duration,
+    ) -> error::Result<impl Iterator<Item = NodeInfoKey>> {
+        let prefix = NodeInfoKey::key_prefix_with_cluster_id(0);
+        let req = RangeRequest::new().with_prefix(prefix);
+        let current_time_millis = common_time::util::current_time_millis();
+        let resp = in_memory.range(req).await?;
+        Ok(resp
+            .kvs
+            .into_iter()
+            .filter_map(move |KeyValue { key, value }| {
+                let Ok(info) = NodeInfo::try_from(value).inspect_err(|e| {
+                    warn!(e; "Unrecognized node info value");
+                }) else {
+                    return None;
+                };
+                if (current_time_millis - info.last_activity_ts) > max_idle_time.as_millis() as i64
+                {
+                    NodeInfoKey::try_from(key)
+                        .inspect_err(|e| {
+                            warn!(e; "Unrecognized node info key: {:?}", info.peer);
+                        })
+                        .ok()
+                        .inspect(|node_key| {
+                            debug!("Found expired node: {:?}", node_key);
+                        })
+                } else {
+                    None
+                }
+            }))
+    }
+}
+
+#[async_trait::async_trait]
+impl LeadershipChangeListener for NodeExpiryListener {
+    fn name(&self) -> &str {
+        "NodeExpiryListener"
+    }
+
+    async fn on_leader_start(&self) -> error::Result<()> {
+        self.start().await;
+        info!(
+            "On leader start, node expiry listener started with max idle time: {:?}",
+            self.max_idle_time
+        );
+        Ok(())
+    }
+
+    async fn on_leader_stop(&self) -> error::Result<()> {
+        self.stop();
+        info!("On leader stop, node expiry listener stopped");
+        Ok(())
+    }
+}
--- a/src/datanode/src/region_server.rs
+++ b/src/datanode/src/region_server.rs
@@ -1218,7 +1218,10 @@ mod tests {
        );

        let response = mock_region_server
-            .handle_request(region_id, RegionRequest::Drop(RegionDropRequest {}))
+            .handle_request(
+                region_id,
+                RegionRequest::Drop(RegionDropRequest { fast_path: false }),
+            )
            .await
            .unwrap();
        assert_eq!(response.affected_rows, 0);
@@ -1310,7 +1313,10 @@ mod tests {
            .insert(region_id, RegionEngineWithStatus::Ready(engine.clone()));

        mock_region_server
-            .handle_request(region_id, RegionRequest::Drop(RegionDropRequest {}))
+            .handle_request(
+                region_id,
+                RegionRequest::Drop(RegionDropRequest { fast_path: false }),
+            )
            .await
            .unwrap_err();

--- a/src/datatypes/src/lib.rs
+++ b/src/datatypes/src/lib.rs
@@ -32,5 +32,5 @@ pub mod types;
 pub mod value;
 pub mod vectors;

-pub use arrow;
+pub use arrow::{self, compute};
 pub use error::{Error, Result};
--- a/src/flow/src/heartbeat.rs
+++ b/src/flow/src/heartbeat.rs
@@ -103,6 +103,11 @@ impl HeartbeatTask {
            warn!("Heartbeat task started multiple times");
            return Ok(());
        }
+
+        self.create_streams().await
+    }
+
+    async fn create_streams(&self) -> Result<(), Error> {
        info!("Start to establish the heartbeat connection to metasrv.");
        let (req_sender, resp_stream) = self
            .meta_client
@@ -231,6 +236,8 @@ impl HeartbeatTask {
                // set the timeout to half of the report interval so that it wouldn't delay heartbeat if something went horribly wrong
                latest_report = query_flow_state(&query_stat_size, report_interval / 2).await;
            }
+
+            info!("flownode heartbeat task stopped.");
        });
    }

@@ -274,7 +281,7 @@ impl HeartbeatTask {

            info!("Try to re-establish the heartbeat connection to metasrv.");

-            if self.start().await.is_ok() {
+            if self.create_streams().await.is_ok() {
                break;
            }
        }
--- a/src/meta-client/src/client/heartbeat.rs
+++ b/src/meta-client/src/client/heartbeat.rs
@@ -198,13 +198,13 @@ impl Inner {
            }
        );

-        let leader = self
+        let leader_addr = self
            .ask_leader
            .as_ref()
            .unwrap()
            .get_leader()
            .context(error::NoLeaderSnafu)?;
-        let mut leader = self.make_client(leader)?;
+        let mut leader = self.make_client(&leader_addr)?;

        let (sender, receiver) = mpsc::channel::<HeartbeatRequest>(128);

@@ -236,7 +236,11 @@ impl Inner {
            .await
            .map_err(error::Error::from)?
            .context(error::CreateHeartbeatStreamSnafu)?;
-        info!("Success to create heartbeat stream to server: {:#?}", res);
+
+        info!(
+            "Success to create heartbeat stream to server: {}, response: {:#?}",
+            leader_addr, res
+        );

        Ok((
            HeartbeatSender::new(self.id, self.role, sender),
--- a/src/meta-srv/src/handler/check_leader_handler.rs
+++ b/src/meta-srv/src/handler/check_leader_handler.rs
@@ -23,8 +23,8 @@ pub struct CheckLeaderHandler;

 #[async_trait::async_trait]
 impl HeartbeatHandler for CheckLeaderHandler {
-    fn is_acceptable(&self, role: Role) -> bool {
-        role == Role::Datanode
+    fn is_acceptable(&self, _role: Role) -> bool {
+        true
    }

    async fn handle(
--- a/src/meta-srv/src/handler/collect_cluster_info_handler.rs
+++ b/src/meta-srv/src/handler/collect_cluster_info_handler.rs
@@ -157,7 +157,7 @@ fn extract_base_info(request: &HeartbeatRequest) -> Option<(NodeInfoKey, Peer, P
 }

 async fn put_into_memory_store(ctx: &mut Context, key: NodeInfoKey, value: NodeInfo) -> Result<()> {
-    let key = key.into();
+    let key = (&key).into();
    let value = value.try_into().context(InvalidClusterInfoFormatSnafu)?;
    let put_req = PutRequest {
        key,
--- a/src/meta-srv/src/metasrv.rs
+++ b/src/meta-srv/src/metasrv.rs
@@ -32,6 +32,7 @@ use common_meta::kv_backend::{KvBackendRef, ResettableKvBackend, ResettableKvBac
 use common_meta::leadership_notifier::{
    LeadershipChangeNotifier, LeadershipChangeNotifierCustomizerRef,
 };
+use common_meta::node_expiry_listener::NodeExpiryListener;
 use common_meta::peer::Peer;
 use common_meta::region_keeper::MemoryRegionKeeperRef;
 use common_meta::wal_options_allocator::WalOptionsAllocatorRef;
@@ -151,6 +152,8 @@ pub struct MetasrvOptions {
    #[cfg(feature = "pg_kvbackend")]
    /// Lock id for meta kv election. Only effect when using pg_kvbackend.
    pub meta_election_lock_id: u64,
+    #[serde(with = "humantime_serde")]
+    pub node_max_idle_time: Duration,
 }

 const DEFAULT_METASRV_ADDR_PORT: &str = "3002";
@@ -192,6 +195,7 @@ impl Default for MetasrvOptions {
            meta_table_name: DEFAULT_META_TABLE_NAME.to_string(),
            #[cfg(feature = "pg_kvbackend")]
            meta_election_lock_id: DEFAULT_META_ELECTION_LOCK_ID,
+            node_max_idle_time: Duration::from_secs(24 * 60 * 60),
        }
    }
 }
@@ -442,6 +446,10 @@ impl Metasrv {
            leadership_change_notifier.add_listener(self.wal_options_allocator.clone());
            leadership_change_notifier
                .add_listener(Arc::new(ProcedureManagerListenerAdapter(procedure_manager)));
+            leadership_change_notifier.add_listener(Arc::new(NodeExpiryListener::new(
+                self.options.node_max_idle_time,
+                self.in_memory.clone(),
+            )));
            if let Some(region_supervisor_ticker) = &self.region_supervisor_ticker {
                leadership_change_notifier.add_listener(region_supervisor_ticker.clone() as _);
            }
--- a/src/meta-srv/src/service/heartbeat.rs
+++ b/src/meta-srv/src/service/heartbeat.rs
@@ -68,13 +68,15 @@ impl heartbeat_server::Heartbeat for Metasrv {
                        };

                        if pusher_id.is_none() {
-                            pusher_id = register_pusher(&handler_group, header, tx.clone()).await;
+                            pusher_id =
+                                Some(register_pusher(&handler_group, header, tx.clone()).await);
                        }
                        if let Some(k) = &pusher_id {
                            METRIC_META_HEARTBEAT_RECV.with_label_values(&[&k.to_string()]);
                        } else {
                            METRIC_META_HEARTBEAT_RECV.with_label_values(&["none"]);
                        }
+
                        let res = handler_group
                            .handle(req, ctx.clone())
                            .await
@@ -173,13 +175,13 @@ async fn register_pusher(
    handler_group: &HeartbeatHandlerGroup,
    header: &RequestHeader,
    sender: Sender<std::result::Result<HeartbeatResponse, tonic::Status>>,
-) -> Option<PusherId> {
+) -> PusherId {
    let role = header.role();
    let id = get_node_id(header);
    let pusher_id = PusherId::new(role, id);
    let pusher = Pusher::new(sender, header);
    handler_group.register_pusher(pusher_id, pusher).await;
-    Some(pusher_id)
+    pusher_id
 }

 #[cfg(test)]
--- a/src/meta-srv/src/service/procedure.rs
+++ b/src/meta-srv/src/service/procedure.rs
@@ -17,13 +17,15 @@ use std::time::Duration;

 use api::v1::meta::{
    procedure_service_server, DdlTaskRequest as PbDdlTaskRequest,
-    DdlTaskResponse as PbDdlTaskResponse, MigrateRegionRequest, MigrateRegionResponse,
+    DdlTaskResponse as PbDdlTaskResponse, Error, MigrateRegionRequest, MigrateRegionResponse,
    ProcedureDetailRequest, ProcedureDetailResponse, ProcedureStateResponse, QueryProcedureRequest,
+    ResponseHeader,
 };
 use common_meta::ddl::ExecutorContext;
 use common_meta::rpc::ddl::{DdlTask, SubmitDdlTaskRequest};
 use common_meta::rpc::procedure;
-use snafu::{ensure, OptionExt, ResultExt};
+use common_telemetry::warn;
+use snafu::{OptionExt, ResultExt};
 use tonic::{Request, Response};

 use super::GrpcResult;
@@ -37,6 +39,16 @@ impl procedure_service_server::ProcedureService for Metasrv {
        &self,
        request: Request<QueryProcedureRequest>,
    ) -> GrpcResult<ProcedureStateResponse> {
+        if !self.is_leader() {
+            let resp = ProcedureStateResponse {
+                header: Some(ResponseHeader::failed(0, Error::is_not_leader())),
+                ..Default::default()
+            };
+
+            warn!("The current meta is not leader, but a `query procedure state` request have reached the meta. Detail: {:?}.", request);
+            return Ok(Response::new(resp));
+        }
+
        let QueryProcedureRequest { header, pid, .. } = request.into_inner();
        let _header = header.context(error::MissingRequestHeaderSnafu)?;
        let pid = pid.context(error::MissingRequiredParameterSnafu { param: "pid" })?;
@@ -57,6 +69,16 @@ impl procedure_service_server::ProcedureService for Metasrv {
    }

    async fn ddl(&self, request: Request<PbDdlTaskRequest>) -> GrpcResult<PbDdlTaskResponse> {
+        if !self.is_leader() {
+            let resp = PbDdlTaskResponse {
+                header: Some(ResponseHeader::failed(0, Error::is_not_leader())),
+                ..Default::default()
+            };
+
+            warn!("The current meta is not leader, but a `ddl` request have reached the meta. Detail: {:?}.", request);
+            return Ok(Response::new(resp));
+        }
+
        let PbDdlTaskRequest {
            header,
            query_context,
@@ -99,12 +121,15 @@ impl procedure_service_server::ProcedureService for Metasrv {
        &self,
        request: Request<MigrateRegionRequest>,
    ) -> GrpcResult<MigrateRegionResponse> {
-        ensure!(
-            self.meta_peer_client().is_leader(),
-            error::UnexpectedSnafu {
-                violated: "Trying to submit a region migration procedure to non-leader meta server"
-            }
-        );
+        if !self.is_leader() {
+            let resp = MigrateRegionResponse {
+                header: Some(ResponseHeader::failed(0, Error::is_not_leader())),
+                ..Default::default()
+            };
+
+            warn!("The current meta is not leader, but a `migrate` request have reached the meta. Detail: {:?}.", request);
+            return Ok(Response::new(resp));
+        }

        let MigrateRegionRequest {
            header,
@@ -150,6 +175,16 @@ impl procedure_service_server::ProcedureService for Metasrv {
        &self,
        request: Request<ProcedureDetailRequest>,
    ) -> GrpcResult<ProcedureDetailResponse> {
+        if !self.is_leader() {
+            let resp = ProcedureDetailResponse {
+                header: Some(ResponseHeader::failed(0, Error::is_not_leader())),
+                ..Default::default()
+            };
+
+            warn!("The current meta is not leader, but a `procedure details` request have reached the meta. Detail: {:?}.", request);
+            return Ok(Response::new(resp));
+        }
+
        let ProcedureDetailRequest { header } = request.into_inner();
        let _header = header.context(error::MissingRequestHeaderSnafu)?;
        let metas = self
--- a/src/metric-engine/src/engine/drop.rs
+++ b/src/metric-engine/src/engine/drop.rs
@@ -30,9 +30,10 @@ impl MetricEngineInner {
    pub async fn drop_region(
        &self,
        region_id: RegionId,
-        _req: RegionDropRequest,
+        req: RegionDropRequest,
    ) -> Result<AffectedRows> {
        let data_region_id = utils::to_data_region_id(region_id);
+        let fast_path = req.fast_path;

        // enclose the guard in a block to prevent the guard from polluting the async context
        let (is_physical_region, is_physical_region_busy) = {
@@ -52,7 +53,7 @@ impl MetricEngineInner {

        if is_physical_region {
            // check if there is no logical region relates to this physical region
-            if is_physical_region_busy {
+            if is_physical_region_busy && !fast_path {
                // reject if there is any present logical region
                return Err(PhysicalRegionBusySnafu {
                    region_id: data_region_id,
@@ -60,9 +61,21 @@ impl MetricEngineInner {
                .build());
            }

-            self.drop_physical_region(data_region_id).await
+            return self.drop_physical_region(data_region_id).await;
+        }
+
+        if fast_path {
+            // for fast path, we don't delete the metadata in the metadata region.
+            // it only remove the logical region from the engine state.
+            //
+            // The drop database procedure will ensure the metadata region and data region are dropped eventually.
+            self.state
+                .write()
+                .unwrap()
+                .remove_logical_region(region_id)?;
+
+            Ok(0)
        } else {
-            // cannot merge these two `if` otherwise the stupid type checker will complain
            let metadata_region_id = self
                .state
                .read()
@@ -87,13 +100,16 @@ impl MetricEngineInner {
        // Since the physical regions are going to be dropped, we don't need to
        // update the contents in metadata region.
        self.mito
-            .handle_request(data_region_id, RegionRequest::Drop(RegionDropRequest {}))
+            .handle_request(
+                data_region_id,
+                RegionRequest::Drop(RegionDropRequest { fast_path: false }),
+            )
            .await
            .with_context(|_| CloseMitoRegionSnafu { region_id })?;
        self.mito
            .handle_request(
                metadata_region_id,
-                RegionRequest::Drop(RegionDropRequest {}),
+                RegionRequest::Drop(RegionDropRequest { fast_path: false }),
            )
            .await
            .with_context(|_| CloseMitoRegionSnafu { region_id })?;
--- a/src/mito2/src/cache/file_cache.rs
+++ b/src/mito2/src/cache/file_cache.rs
@@ -187,9 +187,12 @@ impl FileCache {
    }

    /// Removes a file from the cache explicitly.
+    /// It always tries to remove the file from the local store because we may not have the file
+    /// in the memory index if upload is failed.
    pub(crate) async fn remove(&self, key: IndexKey) {
        let file_path = self.cache_file_path(key);
        self.memory_index.remove(&key).await;
+        // Always delete the file from the local store.
        if let Err(e) = self.local_store.delete(&file_path).await {
            warn!(e; "Failed to delete a cached file {}", file_path);
        }
--- a/src/mito2/src/cache/write_cache.rs
+++ b/src/mito2/src/cache/write_cache.rs
@@ -22,6 +22,7 @@ use common_telemetry::{debug, info};
 use futures::AsyncWriteExt;
 use object_store::ObjectStore;
 use snafu::ResultExt;
+use store_api::storage::RegionId;

 use crate::access_layer::{
    new_fs_cache_store, FilePathProvider, RegionFilePathFactory, SstInfoArray, SstWriteRequest,
@@ -149,24 +150,41 @@ impl WriteCache {
            return Ok(sst_info);
        }

+        let mut upload_tracker = UploadTracker::new(region_id);
+        let mut err = None;
        let remote_store = &upload_request.remote_store;
        for sst in &sst_info {
            let parquet_key = IndexKey::new(region_id, sst.file_id, FileType::Parquet);
            let parquet_path = upload_request
                .dest_path_provider
                .build_sst_file_path(sst.file_id);
-            self.upload(parquet_key, &parquet_path, remote_store)
-                .await?;
+            if let Err(e) = self.upload(parquet_key, &parquet_path, remote_store).await {
+                err = Some(e);
+                break;
+            }
+            upload_tracker.push_uploaded_file(parquet_path);

            if sst.index_metadata.file_size > 0 {
                let puffin_key = IndexKey::new(region_id, sst.file_id, FileType::Puffin);
-                let puffin_path = &upload_request
+                let puffin_path = upload_request
                    .dest_path_provider
                    .build_index_file_path(sst.file_id);
-                self.upload(puffin_key, puffin_path, remote_store).await?;
+                if let Err(e) = self.upload(puffin_key, &puffin_path, remote_store).await {
+                    err = Some(e);
+                    break;
+                }
+                upload_tracker.push_uploaded_file(puffin_path);
            }
        }

+        if let Some(err) = err {
+            // Cleans files on failure.
+            upload_tracker
+                .clean(&sst_info, &self.file_cache, remote_store)
+                .await;
+            return Err(err);
+        }
+
        Ok(sst_info)
    }

@@ -332,6 +350,61 @@ pub struct SstUploadRequest {
    pub remote_store: ObjectStore,
 }

+/// A structs to track files to upload and clean them if upload failed.
+struct UploadTracker {
+    /// Id of the region to track.
+    region_id: RegionId,
+    /// Paths of files uploaded successfully.
+    files_uploaded: Vec<String>,
+}
+
+impl UploadTracker {
+    /// Creates a new instance of `UploadTracker` for a given region.
+    fn new(region_id: RegionId) -> Self {
+        Self {
+            region_id,
+            files_uploaded: Vec::new(),
+        }
+    }
+
+    /// Add a file path to the list of uploaded files.
+    fn push_uploaded_file(&mut self, path: String) {
+        self.files_uploaded.push(path);
+    }
+
+    /// Cleans uploaded files and files in the file cache at best effort.
+    async fn clean(
+        &self,
+        sst_info: &SstInfoArray,
+        file_cache: &FileCacheRef,
+        remote_store: &ObjectStore,
+    ) {
+        common_telemetry::info!(
+            "Start cleaning files on upload failure, region: {}, num_ssts: {}",
+            self.region_id,
+            sst_info.len()
+        );
+
+        // Cleans files in the file cache first.
+        for sst in sst_info {
+            let parquet_key = IndexKey::new(self.region_id, sst.file_id, FileType::Parquet);
+            file_cache.remove(parquet_key).await;
+
+            if sst.index_metadata.file_size > 0 {
+                let puffin_key = IndexKey::new(self.region_id, sst.file_id, FileType::Puffin);
+                file_cache.remove(puffin_key).await;
+            }
+        }
+
+        // Cleans uploaded files.
+        for file_path in &self.files_uploaded {
+            if let Err(e) = remote_store.delete(file_path).await {
+                common_telemetry::error!(e; "Failed to delete file {}", file_path);
+            }
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use common_test_util::temp_dir::create_temp_dir;
--- a/src/mito2/src/engine/drop_test.rs
+++ b/src/mito2/src/engine/drop_test.rs
@@ -56,7 +56,10 @@ async fn test_engine_drop_region() {

    // It's okay to drop a region doesn't exist.
    engine
-        .handle_request(region_id, RegionRequest::Drop(RegionDropRequest {}))
+        .handle_request(
+            region_id,
+            RegionRequest::Drop(RegionDropRequest { fast_path: false }),
+        )
        .await
        .unwrap_err();

@@ -86,7 +89,10 @@ async fn test_engine_drop_region() {

    // drop the created region.
    engine
-        .handle_request(region_id, RegionRequest::Drop(RegionDropRequest {}))
+        .handle_request(
+            region_id,
+            RegionRequest::Drop(RegionDropRequest { fast_path: false }),
+        )
        .await
        .unwrap();
    assert!(!engine.is_region_exists(region_id));
@@ -192,7 +198,10 @@ async fn test_engine_drop_region_for_custom_store() {

    // Drop the custom region.
    engine
-        .handle_request(custom_region_id, RegionRequest::Drop(RegionDropRequest {}))
+        .handle_request(
+            custom_region_id,
+            RegionRequest::Drop(RegionDropRequest { fast_path: false }),
+        )
        .await
        .unwrap();
    assert!(!engine.is_region_exists(custom_region_id));
--- a/src/mito2/src/request.rs
+++ b/src/mito2/src/request.rs
@@ -35,8 +35,8 @@ use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataRef};
 use store_api::region_engine::{SetRegionRoleStateResponse, SettableRegionRoleState};
 use store_api::region_request::{
    AffectedRows, RegionAlterRequest, RegionCatchupRequest, RegionCloseRequest,
-    RegionCompactRequest, RegionCreateRequest, RegionDropRequest, RegionFlushRequest,
-    RegionOpenRequest, RegionRequest, RegionTruncateRequest,
+    RegionCompactRequest, RegionCreateRequest, RegionFlushRequest, RegionOpenRequest,
+    RegionRequest, RegionTruncateRequest,
 };
 use store_api::storage::{RegionId, SequenceNumber};
 use tokio::sync::oneshot::{self, Receiver, Sender};
@@ -624,10 +624,10 @@ impl WorkerRequest {
                sender: sender.into(),
                request: DdlRequest::Create(v),
            }),
-            RegionRequest::Drop(v) => WorkerRequest::Ddl(SenderDdlRequest {
+            RegionRequest::Drop(_) => WorkerRequest::Ddl(SenderDdlRequest {
                region_id,
                sender: sender.into(),
-                request: DdlRequest::Drop(v),
+                request: DdlRequest::Drop,
            }),
            RegionRequest::Open(v) => WorkerRequest::Ddl(SenderDdlRequest {
                region_id,
@@ -690,7 +690,7 @@ impl WorkerRequest {
 #[derive(Debug)]
 pub(crate) enum DdlRequest {
    Create(RegionCreateRequest),
-    Drop(RegionDropRequest),
+    Drop,
    Open((RegionOpenRequest, Option<WalEntryReceiver>)),
    Close(RegionCloseRequest),
    Alter(RegionAlterRequest),
--- a/src/mito2/src/worker.rs
+++ b/src/mito2/src/worker.rs
@@ -836,7 +836,7 @@ impl<S: LogStore> RegionWorkerLoop<S> {
        for ddl in ddl_requests.drain(..) {
            let res = match ddl.request {
                DdlRequest::Create(req) => self.handle_create_request(ddl.region_id, req).await,
-                DdlRequest::Drop(_) => self.handle_drop_request(ddl.region_id).await,
+                DdlRequest::Drop => self.handle_drop_request(ddl.region_id).await,
                DdlRequest::Open((req, wal_entry_receiver)) => {
                    self.handle_open_request(ddl.region_id, req, wal_entry_receiver, ddl.sender)
                        .await;
--- a/src/promql/src/extension_plan/histogram_fold.rs
+++ b/src/promql/src/extension_plan/histogram_fold.rs
@@ -583,7 +583,8 @@ impl HistogramFoldStream {
                    .expect("field column should not be nullable");
                counters.push(counter);
            }
-            let result = Self::evaluate_row(self.quantile, &bucket, &counters)?;
+            // ignore invalid data
+            let result = Self::evaluate_row(self.quantile, &bucket, &counters).unwrap_or(f64::NAN);
            self.output_buffer[self.field_column_index].push_value_ref(ValueRef::from(result));
            cursor += bucket_num;
            remaining_rows -= bucket_num;
@@ -672,7 +673,7 @@ impl HistogramFoldStream {
        if bucket.len() <= 1 {
            return Ok(f64::NAN);
        }
-        if *bucket.last().unwrap() != f64::INFINITY {
+        if bucket.last().unwrap().is_finite() {
            return Err(DataFusionError::Execution(
                "last bucket should be +Inf".to_string(),
            ));
@@ -692,8 +693,8 @@ impl HistogramFoldStream {
        }

        // check input value
-        debug_assert!(bucket.windows(2).all(|w| w[0] <= w[1]));
-        debug_assert!(counter.windows(2).all(|w| w[0] <= w[1]));
+        debug_assert!(bucket.windows(2).all(|w| w[0] <= w[1]), "{bucket:?}");
+        debug_assert!(counter.windows(2).all(|w| w[0] <= w[1]), "{counter:?}");

        let total = *counter.last().unwrap();
        let expected_pos = total * quantile;
--- a/src/promql/src/functions.rs
+++ b/src/promql/src/functions.rs
@@ -21,6 +21,7 @@ mod idelta;
 mod predict_linear;
 mod quantile;
 mod resets;
+mod round;
 #[cfg(test)]
 mod test_util;

@@ -39,6 +40,7 @@ pub use idelta::IDelta;
 pub use predict_linear::PredictLinear;
 pub use quantile::QuantileOverTime;
 pub use resets::Resets;
+pub use round::Round;

 pub(crate) fn extract_array(columnar_value: &ColumnarValue) -> Result<ArrayRef, DataFusionError> {
    if let ColumnarValue::Array(array) = columnar_value {
--- a/src/promql/src/functions/round.rs
+++ b/src/promql/src/functions/round.rs
@@ -0,0 +1,105 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use datafusion::error::DataFusionError;
+use datafusion_expr::{create_udf, ColumnarValue, ScalarUDF, Volatility};
+use datatypes::arrow::array::AsArray;
+use datatypes::arrow::datatypes::{DataType, Float64Type};
+use datatypes::compute;
+
+use crate::functions::extract_array;
+
+pub struct Round {
+    nearest: f64,
+}
+
+impl Round {
+    fn new(nearest: f64) -> Self {
+        Self { nearest }
+    }
+
+    pub const fn name() -> &'static str {
+        "prom_round"
+    }
+
+    fn input_type() -> Vec<DataType> {
+        vec![DataType::Float64]
+    }
+
+    pub fn return_type() -> DataType {
+        DataType::Float64
+    }
+
+    pub fn scalar_udf(nearest: f64) -> ScalarUDF {
+        create_udf(
+            Self::name(),
+            Self::input_type(),
+            Self::return_type(),
+            Volatility::Immutable,
+            Arc::new(move |input: &_| Self::new(nearest).calc(input)) as _,
+        )
+    }
+
+    fn calc(&self, input: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
+        assert_eq!(input.len(), 1);
+
+        let value_array = extract_array(&input[0])?;
+
+        if self.nearest == 0.0 {
+            let values = value_array.as_primitive::<Float64Type>();
+            let result = compute::unary::<_, _, Float64Type>(values, |a| a.round());
+            Ok(ColumnarValue::Array(Arc::new(result) as _))
+        } else {
+            let values = value_array.as_primitive::<Float64Type>();
+            let nearest = self.nearest;
+            let result =
+                compute::unary::<_, _, Float64Type>(values, |a| ((a / nearest).round() * nearest));
+            Ok(ColumnarValue::Array(Arc::new(result) as _))
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use datatypes::arrow::array::Float64Array;
+
+    use super::*;
+
+    fn test_round_f64(value: Vec<f64>, nearest: f64, expected: Vec<f64>) {
+        let round_udf = Round::scalar_udf(nearest);
+        let input = vec![ColumnarValue::Array(Arc::new(Float64Array::from(value)))];
+        let result = round_udf.invoke_batch(&input, 1).unwrap();
+        let result_array = extract_array(&result).unwrap();
+        assert_eq!(result_array.len(), 1);
+        assert_eq!(
+            result_array.as_primitive::<Float64Type>().values(),
+            &expected
+        );
+    }
+
+    #[test]
+    fn test_round() {
+        test_round_f64(vec![123.456], 0.001, vec![123.456]);
+        test_round_f64(vec![123.456], 0.01, vec![123.46000000000001]);
+        test_round_f64(vec![123.456], 0.1, vec![123.5]);
+        test_round_f64(vec![123.456], 0.0, vec![123.0]);
+        test_round_f64(vec![123.456], 1.0, vec![123.0]);
+        test_round_f64(vec![123.456], 10.0, vec![120.0]);
+        test_round_f64(vec![123.456], 100.0, vec![100.0]);
+        test_round_f64(vec![123.456], 105.0, vec![105.0]);
+        test_round_f64(vec![123.456], 1000.0, vec![0.0]);
+    }
+}
--- a/src/query/src/datafusion/planner.rs
+++ b/src/query/src/datafusion/planner.rs
@@ -18,7 +18,9 @@ use std::sync::Arc;

 use arrow_schema::DataType;
 use catalog::table_source::DfTableSourceProvider;
-use common_function::aggr::{UddSketchState, UDDSKETCH_STATE_NAME};
+use common_function::aggr::{
+    HllState, UddSketchState, HLL_MERGE_NAME, HLL_NAME, UDDSKETCH_STATE_NAME,
+};
 use common_function::scalars::udf::create_udf;
 use common_query::logical_plan::create_aggregate_function;
 use datafusion::common::TableReference;
@@ -169,6 +171,12 @@ impl ContextProvider for DfContextProviderAdapter {
        if name == UDDSKETCH_STATE_NAME {
            return Some(Arc::new(UddSketchState::udf_impl()));
        }
+        if name == HLL_NAME {
+            return Some(Arc::new(HllState::state_udf_impl()));
+        }
+        if name == HLL_MERGE_NAME {
+            return Some(Arc::new(HllState::merge_udf_impl()));
+        }

        self.engine_state.aggregate_function(name).map_or_else(
            || self.session_state.aggregate_functions().get(name).cloned(),
--- a/src/query/src/promql/planner.rs
+++ b/src/query/src/promql/planner.rs
@@ -52,7 +52,7 @@ use promql::extension_plan::{
 use promql::functions::{
    AbsentOverTime, AvgOverTime, Changes, CountOverTime, Delta, Deriv, HoltWinters, IDelta,
    Increase, LastOverTime, MaxOverTime, MinOverTime, PredictLinear, PresentOverTime,
-    QuantileOverTime, Rate, Resets, StddevOverTime, StdvarOverTime, SumOverTime,
+    QuantileOverTime, Rate, Resets, Round, StddevOverTime, StdvarOverTime, SumOverTime,
 };
 use promql_parser::label::{MatchOp, Matcher, Matchers, METRIC_NAME};
 use promql_parser::parser::token::TokenType;
@@ -200,10 +200,9 @@ impl PromPlanner {
            PromExpr::Paren(ParenExpr { expr }) => {
                self.prom_expr_to_plan(expr, session_state).await?
            }
-            PromExpr::Subquery(SubqueryExpr { .. }) => UnsupportedExprSnafu {
-                name: "Prom Subquery",
+            PromExpr::Subquery(expr) => {
+                self.prom_subquery_expr_to_plan(session_state, expr).await?
            }
-            .fail()?,
            PromExpr::NumberLiteral(lit) => self.prom_number_lit_to_plan(lit)?,
            PromExpr::StringLiteral(lit) => self.prom_string_lit_to_plan(lit)?,
            PromExpr::VectorSelector(selector) => {
@@ -218,6 +217,48 @@ impl PromPlanner {
        Ok(res)
    }

+    async fn prom_subquery_expr_to_plan(
+        &mut self,
+        session_state: &SessionState,
+        subquery_expr: &SubqueryExpr,
+    ) -> Result<LogicalPlan> {
+        let SubqueryExpr {
+            expr, range, step, ..
+        } = subquery_expr;
+
+        let current_interval = self.ctx.interval;
+        if let Some(step) = step {
+            self.ctx.interval = step.as_millis() as _;
+        }
+        let current_start = self.ctx.start;
+        self.ctx.start -= range.as_millis() as i64 - self.ctx.interval;
+        let input = self.prom_expr_to_plan(expr, session_state).await?;
+        self.ctx.interval = current_interval;
+        self.ctx.start = current_start;
+
+        ensure!(!range.is_zero(), ZeroRangeSelectorSnafu);
+        let range_ms = range.as_millis() as _;
+        self.ctx.range = Some(range_ms);
+
+        let manipulate = RangeManipulate::new(
+            self.ctx.start,
+            self.ctx.end,
+            self.ctx.interval,
+            range_ms,
+            self.ctx
+                .time_index_column
+                .clone()
+                .expect("time index should be set in `setup_context`"),
+            self.ctx.field_columns.clone(),
+            input,
+        )
+        .context(DataFusionPlanningSnafu)?;
+
+        Ok(LogicalPlan::Extension(Extension {
+            node: Arc::new(manipulate),
+        }))
+    }
+
    async fn prom_aggr_expr_to_plan(
        &mut self,
        session_state: &SessionState,
@@ -441,6 +482,7 @@ impl PromPlanner {
                    // if left plan or right plan tag is empty, means case like `scalar(...) + host` or `host + scalar(...)`
                    // under this case we only join on time index
                    left_context.tag_columns.is_empty() || right_context.tag_columns.is_empty(),
+                    modifier,
                )?;
                let join_plan_schema = join_plan.schema().clone();

@@ -1468,6 +1510,20 @@ impl PromPlanner {

                ScalarFunc::GeneratedExpr
            }
+            "round" => {
+                let nearest = match other_input_exprs.pop_front() {
+                    Some(DfExpr::Literal(ScalarValue::Float64(Some(t)))) => t,
+                    Some(DfExpr::Literal(ScalarValue::Int64(Some(t)))) => t as f64,
+                    None => 0.0,
+                    other => UnexpectedPlanExprSnafu {
+                        desc: format!("expected f64 literal as t, but found {:?}", other),
+                    }
+                    .fail()?,
+                };
+
+                ScalarFunc::DataFusionUdf(Arc::new(Round::scalar_udf(nearest)))
+            }
+
            _ => {
                if let Some(f) = session_state.scalar_functions().get(func.name) {
                    ScalarFunc::DataFusionBuiltin(f.clone())
@@ -1674,7 +1730,7 @@ impl PromPlanner {
        ensure!(
            !src_labels.is_empty(),
            FunctionInvalidArgumentSnafu {
-                fn_name: "label_join",
+                fn_name: "label_join"
            }
        );

@@ -2121,24 +2177,49 @@ impl PromPlanner {
        left_time_index_column: Option<String>,
        right_time_index_column: Option<String>,
        only_join_time_index: bool,
+        modifier: &Option<BinModifier>,
    ) -> Result<LogicalPlan> {
        let mut left_tag_columns = if only_join_time_index {
-            vec![]
+            BTreeSet::new()
        } else {
            self.ctx
                .tag_columns
                .iter()
-                .map(Column::from_name)
-                .collect::<Vec<_>>()
+                .cloned()
+                .collect::<BTreeSet<_>>()
        };
        let mut right_tag_columns = left_tag_columns.clone();

+        // apply modifier
+        if let Some(modifier) = modifier {
+            // apply label modifier
+            if let Some(matching) = &modifier.matching {
+                match matching {
+                    // keeps columns mentioned in `on`
+                    LabelModifier::Include(on) => {
+                        let mask = on.labels.iter().cloned().collect::<BTreeSet<_>>();
+                        left_tag_columns = left_tag_columns.intersection(&mask).cloned().collect();
+                        right_tag_columns =
+                            right_tag_columns.intersection(&mask).cloned().collect();
+                    }
+                    // removes columns memtioned in `ignoring`
+                    LabelModifier::Exclude(ignoring) => {
+                        // doesn't check existence of label
+                        for label in &ignoring.labels {
+                            let _ = left_tag_columns.remove(label);
+                            let _ = right_tag_columns.remove(label);
+                        }
+                    }
+                }
+            }
+        }
+
        // push time index column if it exists
        if let (Some(left_time_index_column), Some(right_time_index_column)) =
            (left_time_index_column, right_time_index_column)
        {
-            left_tag_columns.push(Column::from_name(left_time_index_column));
-            right_tag_columns.push(Column::from_name(right_time_index_column));
+            left_tag_columns.insert(left_time_index_column);
+            right_tag_columns.insert(right_time_index_column);
        }

        let right = LogicalPlanBuilder::from(right)
@@ -2154,7 +2235,16 @@ impl PromPlanner {
            .join(
                right,
                JoinType::Inner,
-                (left_tag_columns, right_tag_columns),
+                (
+                    left_tag_columns
+                        .into_iter()
+                        .map(Column::from_name)
+                        .collect::<Vec<_>>(),
+                    right_tag_columns
+                        .into_iter()
+                        .map(Column::from_name)
+                        .collect::<Vec<_>>(),
+                ),
                None,
            )
            .context(DataFusionPlanningSnafu)?
@@ -3340,6 +3430,59 @@ mod test {
        indie_query_plan_compare(query, expected).await;
    }

+    #[tokio::test]
+    async fn test_hash_join() {
+        let mut eval_stmt = EvalStmt {
+            expr: PromExpr::NumberLiteral(NumberLiteral { val: 1.0 }),
+            start: UNIX_EPOCH,
+            end: UNIX_EPOCH
+                .checked_add(Duration::from_secs(100_000))
+                .unwrap(),
+            interval: Duration::from_secs(5),
+            lookback_delta: Duration::from_secs(1),
+        };
+
+        let case = r#"http_server_requests_seconds_sum{uri="/accounts/login"} / ignoring(kubernetes_pod_name,kubernetes_namespace) http_server_requests_seconds_count{uri="/accounts/login"}"#;
+
+        let prom_expr = parser::parse(case).unwrap();
+        eval_stmt.expr = prom_expr;
+        let table_provider = build_test_table_provider_with_fields(
+            &[
+                (
+                    DEFAULT_SCHEMA_NAME.to_string(),
+                    "http_server_requests_seconds_sum".to_string(),
+                ),
+                (
+                    DEFAULT_SCHEMA_NAME.to_string(),
+                    "http_server_requests_seconds_count".to_string(),
+                ),
+            ],
+            &["uri", "kubernetes_namespace", "kubernetes_pod_name"],
+        )
+        .await;
+        // Should be ok
+        let plan = PromPlanner::stmt_to_plan(table_provider, &eval_stmt, &build_session_state())
+            .await
+            .unwrap();
+        let expected = r#"Projection: http_server_requests_seconds_count.uri, http_server_requests_seconds_count.kubernetes_namespace, http_server_requests_seconds_count.kubernetes_pod_name, http_server_requests_seconds_count.greptime_timestamp, http_server_requests_seconds_sum.greptime_value / http_server_requests_seconds_count.greptime_value AS http_server_requests_seconds_sum.greptime_value / http_server_requests_seconds_count.greptime_value
+  Inner Join: http_server_requests_seconds_sum.greptime_timestamp = http_server_requests_seconds_count.greptime_timestamp, http_server_requests_seconds_sum.uri = http_server_requests_seconds_count.uri
+    SubqueryAlias: http_server_requests_seconds_sum
+      PromInstantManipulate: range=[0..100000000], lookback=[1000], interval=[5000], time index=[greptime_timestamp]
+        PromSeriesNormalize: offset=[0], time index=[greptime_timestamp], filter NaN: [false]
+          PromSeriesDivide: tags=["uri", "kubernetes_namespace", "kubernetes_pod_name"]
+            Sort: http_server_requests_seconds_sum.uri DESC NULLS LAST, http_server_requests_seconds_sum.kubernetes_namespace DESC NULLS LAST, http_server_requests_seconds_sum.kubernetes_pod_name DESC NULLS LAST, http_server_requests_seconds_sum.greptime_timestamp DESC NULLS LAST
+              Filter: http_server_requests_seconds_sum.uri = Utf8("/accounts/login") AND http_server_requests_seconds_sum.greptime_timestamp >= TimestampMillisecond(-1000, None) AND http_server_requests_seconds_sum.greptime_timestamp <= TimestampMillisecond(100001000, None)
+                TableScan: http_server_requests_seconds_sum
+    SubqueryAlias: http_server_requests_seconds_count
+      PromInstantManipulate: range=[0..100000000], lookback=[1000], interval=[5000], time index=[greptime_timestamp]
+        PromSeriesNormalize: offset=[0], time index=[greptime_timestamp], filter NaN: [false]
+          PromSeriesDivide: tags=["uri", "kubernetes_namespace", "kubernetes_pod_name"]
+            Sort: http_server_requests_seconds_count.uri DESC NULLS LAST, http_server_requests_seconds_count.kubernetes_namespace DESC NULLS LAST, http_server_requests_seconds_count.kubernetes_pod_name DESC NULLS LAST, http_server_requests_seconds_count.greptime_timestamp DESC NULLS LAST
+              Filter: http_server_requests_seconds_count.uri = Utf8("/accounts/login") AND http_server_requests_seconds_count.greptime_timestamp >= TimestampMillisecond(-1000, None) AND http_server_requests_seconds_count.greptime_timestamp <= TimestampMillisecond(100001000, None)
+                TableScan: http_server_requests_seconds_count"#;
+        assert_eq!(plan.to_string(), expected);
+    }
+
    #[tokio::test]
    async fn test_nested_histogram_quantile() {
        let mut eval_stmt = EvalStmt {
--- a/src/query/src/query_engine/default_serializer.rs
+++ b/src/query/src/query_engine/default_serializer.rs
@@ -15,7 +15,7 @@
 use std::sync::Arc;

 use common_error::ext::BoxedError;
-use common_function::aggr::UddSketchState;
+use common_function::aggr::{HllState, UddSketchState};
 use common_function::function_registry::FUNCTION_REGISTRY;
 use common_function::scalars::udf::create_udf;
 use common_query::error::RegisterUdfSnafu;
@@ -127,6 +127,8 @@ impl SubstraitPlanDecoder for DefaultPlanDecoder {
                .register_udf(udf)
                .context(RegisterUdfSnafu { name: func.name() })?;
            let _ = session_state.register_udaf(Arc::new(UddSketchState::udf_impl()));
+            let _ = session_state.register_udaf(Arc::new(HllState::state_udf_impl()));
+            let _ = session_state.register_udaf(Arc::new(HllState::merge_udf_impl()));
        }
        let logical_plan = DFLogicalSubstraitConvertor
            .decode(message, session_state)
--- a/src/query/src/sql/show_create_table.rs
+++ b/src/query/src/sql/show_create_table.rs
@@ -327,7 +327,7 @@ CREATE TABLE IF NOT EXISTS "system_metrics" (
  "host" STRING NULL INVERTED INDEX,
  "cpu" DOUBLE NULL,
  "disk" FLOAT NULL,
-  "msg" STRING NULL FULLTEXT WITH(analyzer = 'English', case_sensitive = 'false'),
+  "msg" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', case_sensitive = 'false'),
  "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(),
  TIME INDEX ("ts"),
  PRIMARY KEY ("id", "host")
--- a/src/servers/Cargo.toml
+++ b/src/servers/Cargo.toml
@@ -65,6 +65,7 @@ http-body = "1"
 humantime.workspace = true
 humantime-serde.workspace = true
 hyper = { workspace = true, features = ["full"] }
+indexmap = "2.7"
 influxdb_line_protocol = { git = "https://github.com/evenyag/influxdb_iox", branch = "feat/line-protocol" }
 itertools.workspace = true
 jsonb.workspace = true
--- a/src/servers/dashboard/VERSION
+++ b/src/servers/dashboard/VERSION
@@ -1 +1 @@
-v0.7.10
+v0.7.11
--- a/src/servers/src/http/result/prometheus_resp.rs
+++ b/src/servers/src/http/result/prometheus_resp.rs
@@ -13,7 +13,7 @@
 // limitations under the License.

 //! prom supply the prometheus HTTP API Server compliance
-use std::collections::{BTreeMap, HashMap};
+use std::collections::HashMap;

 use axum::http::HeaderValue;
 use axum::response::{IntoResponse, Response};
@@ -25,6 +25,7 @@ use common_recordbatch::RecordBatches;
 use datatypes::prelude::ConcreteDataType;
 use datatypes::scalars::ScalarVector;
 use datatypes::vectors::{Float64Vector, StringVector, TimestampMillisecondVector};
+use indexmap::IndexMap;
 use promql_parser::label::METRIC_NAME;
 use promql_parser::parser::value::ValueType;
 use serde::{Deserialize, Serialize};
@@ -229,7 +230,9 @@ impl PrometheusJsonResponse {
        })?;

        let metric_name = (METRIC_NAME, metric_name.as_str());
-        let mut buffer = BTreeMap::<Vec<(&str, &str)>, Vec<(f64, String)>>::new();
+        // Preserves the order of output tags.
+        // Tag order matters, e.g., after sorc and sort_desc, the output order must be kept.
+        let mut buffer = IndexMap::<Vec<(&str, &str)>, Vec<(f64, String)>>::new();

        let schema = batches.schema();
        for batch in batches.iter() {
--- a/src/sql/src/statements/create.rs
+++ b/src/sql/src/statements/create.rs
@@ -156,9 +156,9 @@ impl Display for Column {
        if let Some(fulltext_options) = &self.extensions.fulltext_index_options {
            if !fulltext_options.is_empty() {
                let options = fulltext_options.kv_pairs();
-                write!(f, " FULLTEXT WITH({})", format_list_comma!(options))?;
+                write!(f, " FULLTEXT INDEX WITH({})", format_list_comma!(options))?;
            } else {
-                write!(f, " FULLTEXT")?;
+                write!(f, " FULLTEXT INDEX")?;
            }
        }

--- a/src/store-api/src/region_request.rs
+++ b/src/store-api/src/region_request.rs
@@ -222,7 +222,12 @@ fn make_region_creates(creates: CreateRequests) -> Result<Vec<(RegionId, RegionR

 fn parse_region_drop(drop: DropRequest) -> Result<(RegionId, RegionDropRequest)> {
    let region_id = drop.region_id.into();
-    Ok((region_id, RegionDropRequest {}))
+    Ok((
+        region_id,
+        RegionDropRequest {
+            fast_path: drop.fast_path,
+        },
+    ))
 }

 fn make_region_drop(drop: DropRequest) -> Result<Vec<(RegionId, RegionRequest)>> {
@@ -397,8 +402,10 @@ impl RegionCreateRequest {
    }
 }

-#[derive(Debug, Clone, Default)]
-pub struct RegionDropRequest {}
+#[derive(Debug, Clone)]
+pub struct RegionDropRequest {
+    pub fast_path: bool,
+}

 /// Open region request.
 #[derive(Debug, Clone)]
--- a/tests/cases/standalone/common/aggregate/hll.result
+++ b/tests/cases/standalone/common/aggregate/hll.result
@@ -0,0 +1,84 @@
+CREATE TABLE test_hll (
+    `id` INT PRIMARY KEY,
+    `value` STRING,
+    `ts` timestamp time index default now()
+);
+
+Affected Rows: 0
+
+INSERT INTO test_hll (`id`, `value`) VALUES
+    (1, "a"),
+    (2, "b"),
+    (5, "e"),
+    (6, "f"),
+    (7, "g"),
+    (8, "h"),
+    (9, "i"),
+    (10, "j"),
+    (11, "i"),
+    (12, "j"),
+    (13, "i"),
+    (14, "n"),
+    (15, "o");
+
+Affected Rows: 13
+
+select hll_count(hll(`value`)) from test_hll;
+
+--------------------------------+
+| hll_count(hll(test_hll.value)) |
+--------------------------------+
+| 10                             |
+--------------------------------+
+
+INSERT INTO test_hll (`id`, `value`) VALUES
+    (16, "b"),
+    (17, "i"),
+    (18, "j"),
+    (19, "s"),
+    (20, "t");
+
+Affected Rows: 5
+
+select hll_count(hll(`value`)) from test_hll;
+
+--------------------------------+
+| hll_count(hll(test_hll.value)) |
+--------------------------------+
+| 12                             |
+--------------------------------+
+
+create table test_hll_merge (
+    `id` INT PRIMARY KEY,
+    `state` BINARY,
+    `ts` timestamp time index default now()
+);
+
+Affected Rows: 0
+
+insert into test_hll_merge (`id`, `state`)
+select 1, hll(`value`) from test_hll;
+
+Affected Rows: 1
+
+insert into test_hll_merge (`id`, `state`)
+select 2, hll(`value`) from test_hll;
+
+Affected Rows: 1
+
+select hll_count(hll_merge(`state`)) from test_hll_merge;
+
+--------------------------------------------+
+| hll_count(hll_merge(test_hll_merge.state)) |
+--------------------------------------------+
+| 12                                         |
+--------------------------------------------+
+
+drop table test_hll;
+
+Affected Rows: 0
+
+drop table test_hll_merge;
+
+Affected Rows: 0
+
--- a/tests/cases/standalone/common/aggregate/hll.sql
+++ b/tests/cases/standalone/common/aggregate/hll.sql
@@ -0,0 +1,49 @@
+CREATE TABLE test_hll (
+    `id` INT PRIMARY KEY,
+    `value` STRING,
+    `ts` timestamp time index default now()
+);
+
+INSERT INTO test_hll (`id`, `value`) VALUES
+    (1, "a"),
+    (2, "b"),
+    (5, "e"),
+    (6, "f"),
+    (7, "g"),
+    (8, "h"),
+    (9, "i"),
+    (10, "j"),
+    (11, "i"),
+    (12, "j"),
+    (13, "i"),
+    (14, "n"),
+    (15, "o");
+
+select hll_count(hll(`value`)) from test_hll;
+
+INSERT INTO test_hll (`id`, `value`) VALUES
+    (16, "b"),
+    (17, "i"),
+    (18, "j"),
+    (19, "s"),
+    (20, "t");
+
+select hll_count(hll(`value`)) from test_hll;
+
+create table test_hll_merge (
+    `id` INT PRIMARY KEY,
+    `state` BINARY,
+    `ts` timestamp time index default now()
+);
+
+insert into test_hll_merge (`id`, `state`)
+select 1, hll(`value`) from test_hll;
+
+insert into test_hll_merge (`id`, `state`)
+select 2, hll(`value`) from test_hll;
+
+select hll_count(hll_merge(`state`)) from test_hll_merge;
+
+drop table test_hll;
+
+drop table test_hll_merge;
--- a/tests/cases/standalone/common/alter/change_col_fulltext_options.result
+++ b/tests/cases/standalone/common/alter/change_col_fulltext_options.result
@@ -79,20 +79,20 @@ SELECT * FROM test WHERE MATCHES(message, 'hello') ORDER BY message;
 -- SQLNESS ARG restart=true
 SHOW CREATE TABLE test;

-+-------+---------------------------------------------------------------------------------------+
-| Table | Create Table                                                                          |
-+-------+---------------------------------------------------------------------------------------+
-| test  | CREATE TABLE IF NOT EXISTS "test" (                                                   |
-|       |   "message" STRING NULL FULLTEXT WITH(analyzer = 'Chinese', case_sensitive = 'true'), |
-|       |   "time" TIMESTAMP(3) NOT NULL,                                                       |
-|       |   TIME INDEX ("time")                                                                 |
-|       | )                                                                                     |
-|       |                                                                                       |
-|       | ENGINE=mito                                                                           |
-|       | WITH(                                                                                 |
-|       |   append_mode = 'true'                                                                |
-|       | )                                                                                     |
-+-------+---------------------------------------------------------------------------------------+
+-------+---------------------------------------------------------------------------------------------+
+| Table | Create Table                                                                                |
+-------+---------------------------------------------------------------------------------------------+
+| test  | CREATE TABLE IF NOT EXISTS "test" (                                                         |
+|       |   "message" STRING NULL FULLTEXT INDEX WITH(analyzer = 'Chinese', case_sensitive = 'true'), |
+|       |   "time" TIMESTAMP(3) NOT NULL,                                                             |
+|       |   TIME INDEX ("time")                                                                       |
+|       | )                                                                                           |
+|       |                                                                                             |
+|       | ENGINE=mito                                                                                 |
+|       | WITH(                                                                                       |
+|       |   append_mode = 'true'                                                                      |
+|       | )                                                                                           |
+-------+---------------------------------------------------------------------------------------------+

 SHOW INDEX FROM test;

@@ -138,20 +138,20 @@ Affected Rows: 0

 SHOW CREATE TABLE test;

-+-------+---------------------------------------------------------------------------------------+
-| Table | Create Table                                                                          |
-+-------+---------------------------------------------------------------------------------------+
-| test  | CREATE TABLE IF NOT EXISTS "test" (                                                   |
-|       |   "message" STRING NULL FULLTEXT WITH(analyzer = 'Chinese', case_sensitive = 'true'), |
-|       |   "time" TIMESTAMP(3) NOT NULL,                                                       |
-|       |   TIME INDEX ("time")                                                                 |
-|       | )                                                                                     |
-|       |                                                                                       |
-|       | ENGINE=mito                                                                           |
-|       | WITH(                                                                                 |
-|       |   append_mode = 'true'                                                                |
-|       | )                                                                                     |
-+-------+---------------------------------------------------------------------------------------+
+-------+---------------------------------------------------------------------------------------------+
+| Table | Create Table                                                                                |
+-------+---------------------------------------------------------------------------------------------+
+| test  | CREATE TABLE IF NOT EXISTS "test" (                                                         |
+|       |   "message" STRING NULL FULLTEXT INDEX WITH(analyzer = 'Chinese', case_sensitive = 'true'), |
+|       |   "time" TIMESTAMP(3) NOT NULL,                                                             |
+|       |   TIME INDEX ("time")                                                                       |
+|       | )                                                                                           |
+|       |                                                                                             |
+|       | ENGINE=mito                                                                                 |
+|       | WITH(                                                                                       |
+|       |   append_mode = 'true'                                                                      |
+|       | )                                                                                           |
+-------+---------------------------------------------------------------------------------------------+

 SHOW INDEX FROM test;

--- a/tests/cases/standalone/common/create/create_with_fulltext.result
+++ b/tests/cases/standalone/common/create/create_with_fulltext.result
@@ -7,18 +7,18 @@ Affected Rows: 0

 SHOW CREATE TABLE log;

-+-------+------------------------------------------------------------------------------------+
-| Table | Create Table                                                                       |
-+-------+------------------------------------------------------------------------------------+
-| log   | CREATE TABLE IF NOT EXISTS "log" (                                                 |
-|       |   "ts" TIMESTAMP(3) NOT NULL,                                                      |
-|       |   "msg" STRING NULL FULLTEXT WITH(analyzer = 'English', case_sensitive = 'false'), |
-|       |   TIME INDEX ("ts")                                                                |
-|       | )                                                                                  |
-|       |                                                                                    |
-|       | ENGINE=mito                                                                        |
-|       |                                                                                    |
-+-------+------------------------------------------------------------------------------------+
+-------+------------------------------------------------------------------------------------------+
+| Table | Create Table                                                                             |
+-------+------------------------------------------------------------------------------------------+
+| log   | CREATE TABLE IF NOT EXISTS "log" (                                                       |
+|       |   "ts" TIMESTAMP(3) NOT NULL,                                                            |
+|       |   "msg" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', case_sensitive = 'false'), |
+|       |   TIME INDEX ("ts")                                                                      |
+|       | )                                                                                        |
+|       |                                                                                          |
+|       | ENGINE=mito                                                                              |
+|       |                                                                                          |
+-------+------------------------------------------------------------------------------------------+

 DROP TABLE log;

@@ -33,18 +33,18 @@ Affected Rows: 0

 SHOW CREATE TABLE log_with_opts;

-+---------------+-----------------------------------------------------------------------------------+
-| Table         | Create Table                                                                      |
-+---------------+-----------------------------------------------------------------------------------+
-| log_with_opts | CREATE TABLE IF NOT EXISTS "log_with_opts" (                                      |
-|               |   "ts" TIMESTAMP(3) NOT NULL,                                                     |
-|               |   "msg" STRING NULL FULLTEXT WITH(analyzer = 'English', case_sensitive = 'true'), |
-|               |   TIME INDEX ("ts")                                                               |
-|               | )                                                                                 |
-|               |                                                                                   |
-|               | ENGINE=mito                                                                       |
-|               |                                                                                   |
-+---------------+-----------------------------------------------------------------------------------+
+---------------+-----------------------------------------------------------------------------------------+
+| Table         | Create Table                                                                            |
+---------------+-----------------------------------------------------------------------------------------+
+| log_with_opts | CREATE TABLE IF NOT EXISTS "log_with_opts" (                                            |
+|               |   "ts" TIMESTAMP(3) NOT NULL,                                                           |
+|               |   "msg" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', case_sensitive = 'true'), |
+|               |   TIME INDEX ("ts")                                                                     |
+|               | )                                                                                       |
+|               |                                                                                         |
+|               | ENGINE=mito                                                                             |
+|               |                                                                                         |
+---------------+-----------------------------------------------------------------------------------------+

 DROP TABLE log_with_opts;

@@ -60,19 +60,19 @@ Affected Rows: 0

 SHOW CREATE TABLE log_multi_fulltext_cols;

-+-------------------------+-------------------------------------------------------------------------------------+
-| Table                   | Create Table                                                                        |
-+-------------------------+-------------------------------------------------------------------------------------+
-| log_multi_fulltext_cols | CREATE TABLE IF NOT EXISTS "log_multi_fulltext_cols" (                              |
-|                         |   "ts" TIMESTAMP(3) NOT NULL,                                                       |
-|                         |   "msg" STRING NULL FULLTEXT WITH(analyzer = 'English', case_sensitive = 'false'),  |
-|                         |   "msg2" STRING NULL FULLTEXT WITH(analyzer = 'English', case_sensitive = 'false'), |
-|                         |   TIME INDEX ("ts")                                                                 |
-|                         | )                                                                                   |
-|                         |                                                                                     |
-|                         | ENGINE=mito                                                                         |
-|                         |                                                                                     |
-+-------------------------+-------------------------------------------------------------------------------------+
+-------------------------+-------------------------------------------------------------------------------------------+
+| Table                   | Create Table                                                                              |
+-------------------------+-------------------------------------------------------------------------------------------+
+| log_multi_fulltext_cols | CREATE TABLE IF NOT EXISTS "log_multi_fulltext_cols" (                                    |
+|                         |   "ts" TIMESTAMP(3) NOT NULL,                                                             |
+|                         |   "msg" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', case_sensitive = 'false'),  |
+|                         |   "msg2" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', case_sensitive = 'false'), |
+|                         |   TIME INDEX ("ts")                                                                       |
+|                         | )                                                                                         |
+|                         |                                                                                           |
+|                         | ENGINE=mito                                                                               |
+|                         |                                                                                           |
+-------------------------+-------------------------------------------------------------------------------------------+

 DROP TABLE log_multi_fulltext_cols;

--- a/tests/cases/standalone/common/promql/round_fn.result
+++ b/tests/cases/standalone/common/promql/round_fn.result
@@ -0,0 +1,81 @@
+create table cache_hit (
+    ts timestamp time index,
+    job string,
+    greptime_value double,
+    primary key (job)
+);
+
+Affected Rows: 0
+
+insert into cache_hit values
+    (3000, "read", 123.45),
+    (3000, "write", 234.567),
+    (4000, "read", 345.678),
+    (4000, "write", 456.789);
+
+Affected Rows: 4
+
+-- SQLNESS SORT_RESULT 3 1
+tql eval (3, 4, '1s') round(cache_hit, 0.01);
+
+---------------------+----------------------------+-------+
+| ts                  | prom_round(greptime_value) | job   |
+---------------------+----------------------------+-------+
+| 1970-01-01T00:00:03 | 123.45                     | read  |
+| 1970-01-01T00:00:03 | 234.57                     | write |
+| 1970-01-01T00:00:04 | 345.68                     | read  |
+| 1970-01-01T00:00:04 | 456.79                     | write |
+---------------------+----------------------------+-------+
+
+-- SQLNESS SORT_RESULT 3 1
+tql eval (3, 4, '1s') round(cache_hit, 0.1);
+
+---------------------+----------------------------+-------+
+| ts                  | prom_round(greptime_value) | job   |
+---------------------+----------------------------+-------+
+| 1970-01-01T00:00:03 | 123.5                      | read  |
+| 1970-01-01T00:00:03 | 234.60000000000002         | write |
+| 1970-01-01T00:00:04 | 345.70000000000005         | read  |
+| 1970-01-01T00:00:04 | 456.8                      | write |
+---------------------+----------------------------+-------+
+
+-- SQLNESS SORT_RESULT 3 1
+tql eval (3, 4, '1s') round(cache_hit, 1.0);
+
+---------------------+----------------------------+-------+
+| ts                  | prom_round(greptime_value) | job   |
+---------------------+----------------------------+-------+
+| 1970-01-01T00:00:03 | 123.0                      | read  |
+| 1970-01-01T00:00:03 | 235.0                      | write |
+| 1970-01-01T00:00:04 | 346.0                      | read  |
+| 1970-01-01T00:00:04 | 457.0                      | write |
+---------------------+----------------------------+-------+
+
+-- SQLNESS SORT_RESULT 3 1
+tql eval (3, 4, '1s') round(cache_hit);
+
+---------------------+----------------------------+-------+
+| ts                  | prom_round(greptime_value) | job   |
+---------------------+----------------------------+-------+
+| 1970-01-01T00:00:03 | 123.0                      | read  |
+| 1970-01-01T00:00:03 | 235.0                      | write |
+| 1970-01-01T00:00:04 | 346.0                      | read  |
+| 1970-01-01T00:00:04 | 457.0                      | write |
+---------------------+----------------------------+-------+
+
+-- SQLNESS SORT_RESULT 3 1
+tql eval (3, 4, '1s') round(cache_hit, 10.0);
+
+---------------------+----------------------------+-------+
+| ts                  | prom_round(greptime_value) | job   |
+---------------------+----------------------------+-------+
+| 1970-01-01T00:00:03 | 120.0                      | read  |
+| 1970-01-01T00:00:03 | 230.0                      | write |
+| 1970-01-01T00:00:04 | 350.0                      | read  |
+| 1970-01-01T00:00:04 | 460.0                      | write |
+---------------------+----------------------------+-------+
+
+drop table cache_hit;
+
+Affected Rows: 0
+
--- a/tests/cases/standalone/common/promql/round_fn.sql
+++ b/tests/cases/standalone/common/promql/round_fn.sql
@@ -0,0 +1,30 @@
+
+create table cache_hit (
+    ts timestamp time index,
+    job string,
+    greptime_value double,
+    primary key (job)
+);
+
+insert into cache_hit values
+    (3000, "read", 123.45),
+    (3000, "write", 234.567),
+    (4000, "read", 345.678),
+    (4000, "write", 456.789);
+
+-- SQLNESS SORT_RESULT 3 1
+tql eval (3, 4, '1s') round(cache_hit, 0.01);
+
+-- SQLNESS SORT_RESULT 3 1
+tql eval (3, 4, '1s') round(cache_hit, 0.1);
+
+-- SQLNESS SORT_RESULT 3 1
+tql eval (3, 4, '1s') round(cache_hit, 1.0);
+
+-- SQLNESS SORT_RESULT 3 1
+tql eval (3, 4, '1s') round(cache_hit);
+
+-- SQLNESS SORT_RESULT 3 1
+tql eval (3, 4, '1s') round(cache_hit, 10.0);
+
+drop table cache_hit;
--- a/tests/cases/standalone/common/promql/set_operation.result
+++ b/tests/cases/standalone/common/promql/set_operation.result
@@ -638,3 +638,78 @@ drop table cache_miss;

 Affected Rows: 0

+create table cache_hit_with_null_label (
+    ts timestamp time index,
+    job string,
+    null_label string null,
+    greptime_value double,
+    primary key (job, null_label)
+);
+
+Affected Rows: 0
+
+create table cache_miss_with_null_label (
+    ts timestamp time index,
+    job string,
+    null_label string null,
+    greptime_value double,
+    primary key (job, null_label)
+);
+
+Affected Rows: 0
+
+insert into cache_hit_with_null_label values
+    (3000, "read", null, 1.0),
+    (3000, "write", null, 2.0),
+    (4000, "read", null, 3.0),
+    (4000, "write", null, 4.0);
+
+Affected Rows: 4
+
+insert into cache_miss_with_null_label values
+    (3000, "read", null, 1.0),
+    (3000, "write", null, 2.0),
+    (4000, "read", null, 1.0),
+    (4000, "write", null, 2.0);
+
+Affected Rows: 4
+
+-- SQLNESS SORT_RESULT 3 1
+-- null!=null, so it will returns the empty set.
+tql eval (3, 4, '1s') cache_hit_with_null_label / (cache_miss_with_null_label + cache_hit_with_null_label);
+
++
++
+
+-- SQLNESS SORT_RESULT 3 1
+tql eval (3, 4, '1s') cache_hit_with_null_label / ignoring(null_label) (cache_miss_with_null_label + ignoring(null_label) cache_hit_with_null_label);
+
+-------+------------+---------------------+---------------------------------------------------------------------------------------------------------------+
+| job   | null_label | ts                  | lhs.greptime_value / rhs.cache_miss_with_null_label.greptime_value + cache_hit_with_null_label.greptime_value |
+-------+------------+---------------------+---------------------------------------------------------------------------------------------------------------+
+| read  |            | 1970-01-01T00:00:03 | 0.5                                                                                                           |
+| read  |            | 1970-01-01T00:00:04 | 0.75                                                                                                          |
+| write |            | 1970-01-01T00:00:03 | 0.5                                                                                                           |
+| write |            | 1970-01-01T00:00:04 | 0.6666666666666666                                                                                            |
+-------+------------+---------------------+---------------------------------------------------------------------------------------------------------------+
+
+-- SQLNESS SORT_RESULT 3 1
+tql eval (3, 4, '1s') cache_hit_with_null_label / on(job) (cache_miss_with_null_label + on(job) cache_hit_with_null_label);
+
+-------+------------+---------------------+---------------------------------------------------------------------------------------------------------------+
+| job   | null_label | ts                  | lhs.greptime_value / rhs.cache_miss_with_null_label.greptime_value + cache_hit_with_null_label.greptime_value |
+-------+------------+---------------------+---------------------------------------------------------------------------------------------------------------+
+| read  |            | 1970-01-01T00:00:03 | 0.5                                                                                                           |
+| read  |            | 1970-01-01T00:00:04 | 0.75                                                                                                          |
+| write |            | 1970-01-01T00:00:03 | 0.5                                                                                                           |
+| write |            | 1970-01-01T00:00:04 | 0.6666666666666666                                                                                            |
+-------+------------+---------------------+---------------------------------------------------------------------------------------------------------------+
+
+drop table cache_hit_with_null_label;
+
+Affected Rows: 0
+
+drop table cache_miss_with_null_label;
+
+Affected Rows: 0
+
--- a/tests/cases/standalone/common/promql/set_operation.sql
+++ b/tests/cases/standalone/common/promql/set_operation.sql
@@ -295,3 +295,45 @@ tql eval (3, 4, '1s') cache_hit / (cache_miss + cache_hit);
 drop table cache_hit;

 drop table cache_miss;
+
+create table cache_hit_with_null_label (
+    ts timestamp time index,
+    job string,
+    null_label string null,
+    greptime_value double,
+    primary key (job, null_label)
+);
+
+create table cache_miss_with_null_label (
+    ts timestamp time index,
+    job string,
+    null_label string null,
+    greptime_value double,
+    primary key (job, null_label)
+);
+
+insert into cache_hit_with_null_label values
+    (3000, "read", null, 1.0),
+    (3000, "write", null, 2.0),
+    (4000, "read", null, 3.0),
+    (4000, "write", null, 4.0);
+
+insert into cache_miss_with_null_label values
+    (3000, "read", null, 1.0),
+    (3000, "write", null, 2.0),
+    (4000, "read", null, 1.0),
+    (4000, "write", null, 2.0);
+
+-- SQLNESS SORT_RESULT 3 1
+-- null!=null, so it will returns the empty set.
+tql eval (3, 4, '1s') cache_hit_with_null_label / (cache_miss_with_null_label + cache_hit_with_null_label);
+
+-- SQLNESS SORT_RESULT 3 1
+tql eval (3, 4, '1s') cache_hit_with_null_label / ignoring(null_label) (cache_miss_with_null_label + ignoring(null_label) cache_hit_with_null_label);
+
+-- SQLNESS SORT_RESULT 3 1
+tql eval (3, 4, '1s') cache_hit_with_null_label / on(job) (cache_miss_with_null_label + on(job) cache_hit_with_null_label);
+
+drop table cache_hit_with_null_label;
+
+drop table cache_miss_with_null_label;
--- a/tests/cases/standalone/common/promql/simple_histogram.result
+++ b/tests/cases/standalone/common/promql/simple_histogram.result
@@ -295,3 +295,40 @@ drop table histogram3_bucket;

 Affected Rows: 0

+-- test with invalid data (unaligned buckets)
+create table histogram4_bucket (
+    ts timestamp time index,
+    le string,
+    s string,
+    val double,
+    primary key (s, le),
+);
+
+Affected Rows: 0
+
+insert into histogram4_bucket values
+    (2900000, "0.1", "a", 0),
+    (2900000, "1", "a", 10),
+    (2900000, "5", "a", 20),
+    (2900000, "+Inf", "a", 150),
+    (3000000, "0.1", "a", 50),
+    (3000000, "1", "a", 70),
+    (3000000, "5", "a", 120),
+    -- INF here is missing
+;
+
+Affected Rows: 7
+
+tql eval (2900, 3000, '100s') histogram_quantile(0.9, histogram4_bucket);
+
+---------------------+---+-----+
+| ts                  | s | val |
+---------------------+---+-----+
+| 1970-01-01T00:48:20 | a | 5.0 |
+| 1970-01-01T00:50:00 | a | 5.0 |
+---------------------+---+-----+
+
+drop table histogram4_bucket;
+
+Affected Rows: 0
+
--- a/tests/cases/standalone/common/promql/simple_histogram.sql
+++ b/tests/cases/standalone/common/promql/simple_histogram.sql
@@ -163,3 +163,27 @@ insert into histogram3_bucket values
 tql eval (3000, 3005, '3s') histogram_quantile(0.5, sum by(le, s) (rate(histogram3_bucket[5m])));

 drop table histogram3_bucket;
+
+-- test with invalid data (unaligned buckets)
+create table histogram4_bucket (
+    ts timestamp time index,
+    le string,
+    s string,
+    val double,
+    primary key (s, le),
+);
+
+insert into histogram4_bucket values
+    (2900000, "0.1", "a", 0),
+    (2900000, "1", "a", 10),
+    (2900000, "5", "a", 20),
+    (2900000, "+Inf", "a", 150),
+    (3000000, "0.1", "a", 50),
+    (3000000, "1", "a", 70),
+    (3000000, "5", "a", 120),
+    -- INF here is missing
+;
+
+tql eval (2900, 3000, '100s') histogram_quantile(0.9, histogram4_bucket);
+
+drop table histogram4_bucket;
--- a/tests/cases/standalone/common/promql/subquery.result
+++ b/tests/cases/standalone/common/promql/subquery.result
@@ -0,0 +1,65 @@
+create table metric_total (
+    ts timestamp time index,
+    val double,
+);
+
+Affected Rows: 0
+
+insert into metric_total values
+    (0, 1),
+    (10000, 2);
+
+Affected Rows: 2
+
+tql eval (10, 10, '1s') sum_over_time(metric_total[50s:10s]);
+
+---------------------+----------------------------------+
+| ts                  | prom_sum_over_time(ts_range,val) |
+---------------------+----------------------------------+
+| 1970-01-01T00:00:10 | 3.0                              |
+---------------------+----------------------------------+
+
+tql eval (10, 10, '1s') sum_over_time(metric_total[50s:5s]);
+
+---------------------+----------------------------------+
+| ts                  | prom_sum_over_time(ts_range,val) |
+---------------------+----------------------------------+
+| 1970-01-01T00:00:10 | 4.0                              |
+---------------------+----------------------------------+
+
+tql eval (300, 300, '1s') sum_over_time(metric_total[50s:10s]);
+
+---------------------+----------------------------------+
+| ts                  | prom_sum_over_time(ts_range,val) |
+---------------------+----------------------------------+
+| 1970-01-01T00:05:00 | 10.0                             |
+---------------------+----------------------------------+
+
+tql eval (359, 359, '1s') sum_over_time(metric_total[60s:10s]);
+
+---------------------+----------------------------------+
+| ts                  | prom_sum_over_time(ts_range,val) |
+---------------------+----------------------------------+
+| 1970-01-01T00:05:59 | 2.0                              |
+---------------------+----------------------------------+
+
+tql eval (10, 10, '1s') rate(metric_total[20s:10s]);
+
+---------------------+----------------------------+
+| ts                  | prom_rate(ts_range,val,ts) |
+---------------------+----------------------------+
+| 1970-01-01T00:00:10 | 0.1                        |
+---------------------+----------------------------+
+
+tql eval (20, 20, '1s') rate(metric_total[20s:5s]);
+
+---------------------+----------------------------+
+| ts                  | prom_rate(ts_range,val,ts) |
+---------------------+----------------------------+
+| 1970-01-01T00:00:20 | 0.06666666666666667        |
+---------------------+----------------------------+
+
+drop table metric_total;
+
+Affected Rows: 0
+
--- a/tests/cases/standalone/common/promql/subquery.sql
+++ b/tests/cases/standalone/common/promql/subquery.sql
@@ -0,0 +1,22 @@
+create table metric_total (
+    ts timestamp time index,
+    val double,
+);
+
+insert into metric_total values
+    (0, 1),
+    (10000, 2);
+
+tql eval (10, 10, '1s') sum_over_time(metric_total[50s:10s]);
+
+tql eval (10, 10, '1s') sum_over_time(metric_total[50s:5s]);
+
+tql eval (300, 300, '1s') sum_over_time(metric_total[50s:10s]);
+
+tql eval (359, 359, '1s') sum_over_time(metric_total[60s:10s]);
+
+tql eval (10, 10, '1s') rate(metric_total[20s:10s]);
+
+tql eval (20, 20, '1s') rate(metric_total[20s:5s]);
+
+drop table metric_total;
--- a/tests/cases/standalone/common/show/show_create.result
+++ b/tests/cases/standalone/common/show/show_create.result
@@ -373,20 +373,20 @@ Affected Rows: 0

 show create table test_column_constrain_composite_indexes;

-+-----------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| Table                                   | Create Table                                                                                                                                                  |
-+-----------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| test_column_constrain_composite_indexes | CREATE TABLE IF NOT EXISTS "test_column_constrain_composite_indexes" (                                                                                        |
-|                                         |   "id" INT NULL SKIPPING INDEX WITH(granularity = '10240', type = 'BLOOM') INVERTED INDEX,                                                                    |
-|                                         |   "host" STRING NULL FULLTEXT WITH(analyzer = 'English', case_sensitive = 'false') SKIPPING INDEX WITH(granularity = '10240', type = 'BLOOM') INVERTED INDEX, |
-|                                         |   "ts" TIMESTAMP(3) NOT NULL,                                                                                                                                 |
-|                                         |   TIME INDEX ("ts"),                                                                                                                                          |
-|                                         |   PRIMARY KEY ("host")                                                                                                                                        |
-|                                         | )                                                                                                                                                             |
-|                                         |                                                                                                                                                               |
-|                                         | ENGINE=mito                                                                                                                                                   |
-|                                         |                                                                                                                                                               |
-+-----------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+
+-----------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Table                                   | Create Table                                                                                                                                                        |
+-----------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| test_column_constrain_composite_indexes | CREATE TABLE IF NOT EXISTS "test_column_constrain_composite_indexes" (                                                                                              |
+|                                         |   "id" INT NULL SKIPPING INDEX WITH(granularity = '10240', type = 'BLOOM') INVERTED INDEX,                                                                          |
+|                                         |   "host" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', case_sensitive = 'false') SKIPPING INDEX WITH(granularity = '10240', type = 'BLOOM') INVERTED INDEX, |
+|                                         |   "ts" TIMESTAMP(3) NOT NULL,                                                                                                                                       |
+|                                         |   TIME INDEX ("ts"),                                                                                                                                                |
+|                                         |   PRIMARY KEY ("host")                                                                                                                                              |
+|                                         | )                                                                                                                                                                   |
+|                                         |                                                                                                                                                                     |
+|                                         | ENGINE=mito                                                                                                                                                         |
+|                                         |                                                                                                                                                                     |
+-----------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+

 drop table test_column_constrain_composite_indexes;
Author	SHA1	Message	Date
Weny Xu	904d560175	feat(promql-planner): introduce vector matching binary operation (#5578 ) * feat(promql-planner): support vector matching for binary operation * test: add sqlness tests	2025-02-27 07:39:19 +00:00
Lei, HUANG	765d1277ee	fix(metasrv): clean expired nodes in memory (#5592 ) * fix/frontend-node-state: Refactor NodeInfoKey and Context Handling in Meta Server • Removed unused cluster_id from NodeInfoKey struct. • Updated HeartbeatHandlerGroup to return Context alongside HeartbeatResponse. • Added current_node_info to Context for tracking node information. • Implemented on_node_disconnect in Context to handle node disconnection events, specifically for Frontend roles. • Adjusted register_pusher function to return PusherId directly. • Updated tests to accommodate changes in Context structure. * fix/frontend-node-state: Refactor Heartbeat Handler Context Management Refactored the HeartbeatHandlerGroup::handle method to use a mutable reference for Context instead of passing it by value. This change simplifies the context management by eliminating the need to return the context with the response. Updated the Metasrv implementation to align with this new context handling approach, improving code clarity and reducing unnecessary context cloning. * revert: clean cluster info on disconnect * fix/frontend-node-state: Add Frontend Expiry Listener and Update NodeInfoKey Conversion • Introduced FrontendExpiryListener to manage the expiration of frontend nodes, including its integration with leadership change notifications. • Modified NodeInfoKey conversion to use references, enhancing efficiency and consistency across the codebase. • Updated collect_cluster_info_handler and metasrv to incorporate the new listener and conversion changes. • Added frontend_expiry module to the project structure for better organization and maintainability. * chore: add config for node expiry * add some doc * fix: clippy * fix/frontend-node-state: ### Refactor Node Expiry Handling - Configuration Update: Removed `node_expiry_tick` from `metasrv.example.toml` and `MetasrvOptions` in `metasrv.rs`. - Module Renaming: Renamed `frontend_expiry.rs` to `node_expiry_listener.rs` and updated references in `lib.rs`. - Code Refactoring: Replaced `FrontendExpiryListener` with `NodeExpiryListener` in `node_expiry_listener.rs` and `metasrv.rs`, removing the tick interval and adjusting logic to use a fixed 60-second interval for node expiry checks. * fix/frontend-node-state: Improve logging in `node_expiry_listener.rs` - Enhanced warning message to include peer information when an unrecognized node info key is encountered in `node_expiry_listener.rs`. * docs: update config docs * fix/frontend-node-state: Refactor Context Handling in Heartbeat Services - Updated `HeartbeatHandlerGroup` in `handler.rs` to pass `Context` by value instead of by mutable reference, allowing for more flexible context management. - Modified `Metasrv` implementation in `heartbeat.rs` to clone `Context` when passing to `handle` method, ensuring thread safety and consistency in asynchronous operations.	2025-02-27 06:16:36 +00:00
discord9	ccf42a9d97	fix: flow heartbeat retry (#5600 ) * fix: flow heartbeat retry * fix?: not sure if fixed * chore: per review	2025-02-27 03:58:21 +00:00
Weny Xu	71e2fb895f	feat: introduce `prom_round` fn (#5604 ) * feat: introduce `prom_round` fn * test: add sqlness tests	2025-02-27 03:30:15 +00:00
Ruihang Xia	c9671fd669	feat(promql): implement subquery (#5606 ) * feat: initial implement for promql subquery Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * impl and test Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * refactor Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix clippy Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>	2025-02-27 03:28:04 +00:00
Ruihang Xia	b5efc75aab	feat(promql): ignore invalid input in histogram plan (#5607 ) Signed-off-by: Ruihang Xia <waynestxia@gmail.com>	2025-02-27 03:18:20 +00:00
Weny Xu	c1d18d9980	fix(prom): preserve the order of series in `PromQueryResult` (#5601 ) fix(prom): keep the order of tags	2025-02-26 13:40:09 +00:00
Lei, HUANG	5d9faaaf39	fix(metasrv): reject ddl when metasrv is follower (#5599 ) * fix/reject-ddl-in-follower-metasrv: Add leader check and logging for gRPC requests in `procedure.rs` - Implemented leader verification for `query_procedure_state`, `ddl`, and `procedure_details` gRPC requests in `procedure.rs`. - Added logging with `warn` for requests reaching a non-leader node. - Introduced `ResponseHeader` and `Error::is_not_leader()` to handle non-leader responses. * fix/reject-ddl-in-follower-metasrv: Improve leader address handling in `heartbeat.rs` - Refactor leader address retrieval by renaming `leader` to `leader_addr` for clarity. - Update `make_client` function to use a reference to `leader_addr`. - Enhance logging to include the leader address in the success message for creating a heartbeat stream. * fmt * fix/reject-ddl-in-follower-metasrv: Enhance Leader Check in `procedure.rs` - Updated the leader verification logic in `procedure.rs` to return a failed `MigrateRegionResponse` when the server is not the leader. - Added logging to warn when a migrate request is received by a non-leader server.	2025-02-26 08:10:40 +00:00
ZonaHe	538875abee	feat: update dashboard to v0.7.11 (#5597 ) Co-authored-by: sunchanglong <sunchanglong@users.noreply.github.com>	2025-02-26 07:57:59 +00:00
jeremyhi	5ed09c4584	fix: all heartbeat channel need to check leader (#5593 )	2025-02-25 10:45:30 +00:00
Yingwen	3f6a41eac5	fix: update show create table output for fulltext index (#5591 ) * fix: update full index syntax in show create table * test: update fulltext sqlness result	2025-02-25 09:36:27 +00:00
yihong	ff0dcf12c5	perf: close issue 4974 by do not delete columns when drop logical region about 100 times faster (#5561 ) * perf: do not delete columns when drop logical region in drop database Signed-off-by: yihong0618 <zouzou0208@gmail.com> * fix: make ci happy Signed-off-by: yihong0618 <zouzou0208@gmail.com> * fix: address review comments Signed-off-by: yihong0618 <zouzou0208@gmail.com> * fix: address some comments Signed-off-by: yihong0618 <zouzou0208@gmail.com> * fix: drop stupid comments by copilot Signed-off-by: yihong0618 <zouzou0208@gmail.com> * chore: minor refactor * chore: minor refactor * chore: update grpetime-proto --------- Signed-off-by: yihong0618 <zouzou0208@gmail.com> Co-authored-by: WenyXu <wenymedia@gmail.com>	2025-02-25 09:00:49 +00:00
Yingwen	5b1fca825a	fix: remove cached and uploaded files on failure (#5590 )	2025-02-25 08:51:37 +00:00
Ruihang Xia	7bd108e2be	feat: impl `hll_state`, `hll_merge` and `hll_calc` for incremental distinct counting (#5579 ) * basic impl Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * more tests Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * sqlness test Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix clippy Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * update with more test and logs Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * impl Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * impl merge fn Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * rename function names Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>	2025-02-24 19:07:37 +00:00