From e1e39993f7847821da113b0102357ec6b07ec0f0 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Wed, 11 Dec 2024 17:25:56 +0800 Subject: [PATCH 01/46] feat(vector): add scalar add function (#5119) * refactor: extract implicit conversion helper functions of vector Signed-off-by: Zhenchi * feat(vector): add scalar add function Signed-off-by: Zhenchi * fix fmt Signed-off-by: Zhenchi --------- Signed-off-by: Zhenchi --- src/common/function/src/scalars/vector.rs | 4 + .../function/src/scalars/vector/impl_conv.rs | 1 - .../function/src/scalars/vector/scalar_add.rs | 173 ++++++++++++++++++ .../function/vector/vector_scalar.result | 48 +++++ .../common/function/vector/vector_scalar.sql | 11 ++ 5 files changed, 236 insertions(+), 1 deletion(-) create mode 100644 src/common/function/src/scalars/vector/scalar_add.rs create mode 100644 tests/cases/standalone/common/function/vector/vector_scalar.result create mode 100644 tests/cases/standalone/common/function/vector/vector_scalar.sql diff --git a/src/common/function/src/scalars/vector.rs b/src/common/function/src/scalars/vector.rs index 7c8cf5550e..0c0428ce9a 100644 --- a/src/common/function/src/scalars/vector.rs +++ b/src/common/function/src/scalars/vector.rs @@ -15,6 +15,7 @@ mod convert; mod distance; pub(crate) mod impl_conv; +mod scalar_add; use std::sync::Arc; @@ -32,5 +33,8 @@ impl VectorFunction { registry.register(Arc::new(distance::CosDistanceFunction)); registry.register(Arc::new(distance::DotProductFunction)); registry.register(Arc::new(distance::L2SqDistanceFunction)); + + // scalar calculation + registry.register(Arc::new(scalar_add::ScalarAddFunction)); } } diff --git a/src/common/function/src/scalars/vector/impl_conv.rs b/src/common/function/src/scalars/vector/impl_conv.rs index 903bfb2a03..70a142c290 100644 --- a/src/common/function/src/scalars/vector/impl_conv.rs +++ b/src/common/function/src/scalars/vector/impl_conv.rs @@ -109,7 +109,6 @@ pub fn parse_veclit_from_strlit(s: &str) -> Result> { }) } -#[allow(unused)] /// Convert a vector literal to a binary literal. pub fn veclit_to_binlit(vec: &[f32]) -> Vec { if cfg!(target_endian = "little") { diff --git a/src/common/function/src/scalars/vector/scalar_add.rs b/src/common/function/src/scalars/vector/scalar_add.rs new file mode 100644 index 0000000000..ef016eff4b --- /dev/null +++ b/src/common/function/src/scalars/vector/scalar_add.rs @@ -0,0 +1,173 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::borrow::Cow; +use std::fmt::Display; + +use common_query::error::{InvalidFuncArgsSnafu, Result}; +use common_query::prelude::Signature; +use datatypes::prelude::ConcreteDataType; +use datatypes::scalars::ScalarVectorBuilder; +use datatypes::vectors::{BinaryVectorBuilder, MutableVector, VectorRef}; +use nalgebra::DVectorView; +use snafu::ensure; + +use crate::function::{Function, FunctionContext}; +use crate::helper; +use crate::scalars::vector::impl_conv::{as_veclit, as_veclit_if_const, veclit_to_binlit}; + +const NAME: &str = "vec_scalar_add"; + +/// Adds a scalar to each element of a vector. +/// +/// # Example +/// +/// ```sql +/// SELECT vec_to_string(vec_scalar_add(1, "[1, 2, 3]")) as result; +/// +/// +---------+ +/// | result | +/// +---------+ +/// | [2,3,4] | +/// +---------+ +/// +/// -- Negative scalar to simulate subtraction +/// SELECT vec_to_string(vec_scalar_add(-1, "[1, 2, 3]")) as result; +/// +/// +---------+ +/// | result | +/// +---------+ +/// | [0,1,2] | +/// +---------+ +/// ``` +#[derive(Debug, Clone, Default)] +pub struct ScalarAddFunction; + +impl Function for ScalarAddFunction { + fn name(&self) -> &str { + NAME + } + + fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result { + Ok(ConcreteDataType::binary_datatype()) + } + + fn signature(&self) -> Signature { + helper::one_of_sigs2( + vec![ConcreteDataType::float64_datatype()], + vec![ + ConcreteDataType::string_datatype(), + ConcreteDataType::binary_datatype(), + ], + ) + } + + fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result { + ensure!( + columns.len() == 2, + InvalidFuncArgsSnafu { + err_msg: format!( + "The length of the args is not correct, expect exactly two, have: {}", + columns.len() + ), + } + ); + let arg0 = &columns[0]; + let arg1 = &columns[1]; + + let len = arg0.len(); + let mut result = BinaryVectorBuilder::with_capacity(len); + if len == 0 { + return Ok(result.to_vector()); + } + + let arg1_const = as_veclit_if_const(arg1)?; + + for i in 0..len { + let arg0 = arg0.get(i).as_f64_lossy(); + let Some(arg0) = arg0 else { + result.push_null(); + continue; + }; + + let arg1 = match arg1_const.as_ref() { + Some(arg1) => Some(Cow::Borrowed(arg1.as_ref())), + None => as_veclit(arg1.get_ref(i))?, + }; + let Some(arg1) = arg1 else { + result.push_null(); + continue; + }; + + let vec = DVectorView::from_slice(&arg1, arg1.len()); + let vec_res = vec.add_scalar(arg0 as _); + + let veclit = vec_res.as_slice(); + let binlit = veclit_to_binlit(veclit); + result.push(Some(&binlit)); + } + + Ok(result.to_vector()) + } +} + +impl Display for ScalarAddFunction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", NAME.to_ascii_uppercase()) + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use datatypes::vectors::{Float32Vector, StringVector}; + + use super::*; + + #[test] + fn test_scalar_add() { + let func = ScalarAddFunction; + + let input0 = Arc::new(Float32Vector::from(vec![ + Some(1.0), + Some(-1.0), + None, + Some(3.0), + ])); + let input1 = Arc::new(StringVector::from(vec![ + Some("[1.0,2.0,3.0]".to_string()), + Some("[4.0,5.0,6.0]".to_string()), + Some("[7.0,8.0,9.0]".to_string()), + None, + ])); + + let result = func + .eval(FunctionContext::default(), &[input0, input1]) + .unwrap(); + + let result = result.as_ref(); + assert_eq!(result.len(), 4); + assert_eq!( + result.get_ref(0).as_binary().unwrap(), + Some(veclit_to_binlit(&[2.0, 3.0, 4.0]).as_slice()) + ); + assert_eq!( + result.get_ref(1).as_binary().unwrap(), + Some(veclit_to_binlit(&[3.0, 4.0, 5.0]).as_slice()) + ); + assert!(result.get_ref(2).is_null()); + assert!(result.get_ref(3).is_null()); + } +} diff --git a/tests/cases/standalone/common/function/vector/vector_scalar.result b/tests/cases/standalone/common/function/vector/vector_scalar.result new file mode 100644 index 0000000000..5750a0adfd --- /dev/null +++ b/tests/cases/standalone/common/function/vector/vector_scalar.result @@ -0,0 +1,48 @@ +SELECT vec_to_string(vec_scalar_add(1.0, '[1.0, 2.0]')); + ++--------------------------------------------------------------+ +| vec_to_string(vec_scalar_add(Float64(1),Utf8("[1.0, 2.0]"))) | ++--------------------------------------------------------------+ +| [2,3] | ++--------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_add(-1.0, '[1.0, 2.0]')); + ++---------------------------------------------------------------+ +| vec_to_string(vec_scalar_add(Float64(-1),Utf8("[1.0, 2.0]"))) | ++---------------------------------------------------------------+ +| [0,1] | ++---------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_add(1.0, parse_vec('[1.0, 2.0]'))); + ++-------------------------------------------------------------------------+ +| vec_to_string(vec_scalar_add(Float64(1),parse_vec(Utf8("[1.0, 2.0]")))) | ++-------------------------------------------------------------------------+ +| [2,3] | ++-------------------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_add(-1.0, parse_vec('[1.0, 2.0]'))); + ++--------------------------------------------------------------------------+ +| vec_to_string(vec_scalar_add(Float64(-1),parse_vec(Utf8("[1.0, 2.0]")))) | ++--------------------------------------------------------------------------+ +| [0,1] | ++--------------------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_add(1, '[1.0, 2.0]')); + ++------------------------------------------------------------+ +| vec_to_string(vec_scalar_add(Int64(1),Utf8("[1.0, 2.0]"))) | ++------------------------------------------------------------+ +| [2,3] | ++------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_add(-1, '[1.0, 2.0]')); + ++-------------------------------------------------------------+ +| vec_to_string(vec_scalar_add(Int64(-1),Utf8("[1.0, 2.0]"))) | ++-------------------------------------------------------------+ +| [0,1] | ++-------------------------------------------------------------+ + diff --git a/tests/cases/standalone/common/function/vector/vector_scalar.sql b/tests/cases/standalone/common/function/vector/vector_scalar.sql new file mode 100644 index 0000000000..e438ac6a40 --- /dev/null +++ b/tests/cases/standalone/common/function/vector/vector_scalar.sql @@ -0,0 +1,11 @@ +SELECT vec_to_string(vec_scalar_add(1.0, '[1.0, 2.0]')); + +SELECT vec_to_string(vec_scalar_add(-1.0, '[1.0, 2.0]')); + +SELECT vec_to_string(vec_scalar_add(1.0, parse_vec('[1.0, 2.0]'))); + +SELECT vec_to_string(vec_scalar_add(-1.0, parse_vec('[1.0, 2.0]'))); + +SELECT vec_to_string(vec_scalar_add(1, '[1.0, 2.0]')); + +SELECT vec_to_string(vec_scalar_add(-1, '[1.0, 2.0]')); From 1a8e77a480cdd0b4d625c919b3594b27ddf76207 Mon Sep 17 00:00:00 2001 From: Yohan Wal Date: Wed, 11 Dec 2024 17:28:13 +0800 Subject: [PATCH 02/46] test: part of parser test migrated from duckdb (#5125) * test: update test * fix: fix test --- .../standalone/common/parser/parser.result | 50 +++++++++++++++++++ .../cases/standalone/common/parser/parser.sql | 35 +++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 tests/cases/standalone/common/parser/parser.result create mode 100644 tests/cases/standalone/common/parser/parser.sql diff --git a/tests/cases/standalone/common/parser/parser.result b/tests/cases/standalone/common/parser/parser.result new file mode 100644 index 0000000000..7e6dce85b7 --- /dev/null +++ b/tests/cases/standalone/common/parser/parser.result @@ -0,0 +1,50 @@ +-- columns aliases, from: +-- https://github.com/duckdb/duckdb/blob/9196dd9b0a163e6c8aada26218803d04be30c562/test/sql/parser/columns_aliases.test +CREATE TABLE integers (ts TIMESTAMP TIME INDEX, i INT, j INT); + +Affected Rows: 0 + +INSERT INTO integers SELECT 0::TIMESTAMP ts, 42 i, 84 j UNION ALL SELECT 1::TIMESTAMP, 13, 14; + +Affected Rows: 2 + +SELECT i, j FROM (SELECT COLUMNS(*)::VARCHAR FROM integers); + +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Invalid function 'columns'. +Did you mean 'COUNT'? + +SELECT i, j FROM (SELECT * FROM integers); + ++----+----+ +| i | j | ++----+----+ +| 42 | 84 | +| 13 | 14 | ++----+----+ + +SELECT min_i, min_j, max_i, max_j FROM (SELECT MIN(i) AS "min_i", MAX(i) AS "max_i", MIN(j) AS "min_j", MAX(j) AS "max_j" FROM integers); + ++-------+-------+-------+-------+ +| min_i | min_j | max_i | max_j | ++-------+-------+-------+-------+ +| 13 | 14 | 42 | 84 | ++-------+-------+-------+-------+ + +DROP TABLE integers; + +Affected Rows: 0 + +-- skipped, unsupported feature: digit separators +-- SELECT 1_000_000; +-- skipped, unsupported feature: division operator precedence +-- SELECT 6 + 1 // 2; +-- expression depth, from: +-- https://github.com/duckdb/duckdb/blob/9196dd9b0a163e6c8aada26218803d04be30c562/test/sql/parser/expression_depth_limit.test +SELECT (1+(1+(1+(1+(1+(1+(1+1))))))); + ++---------------------------------------------------------------------------------------+ +| Int64(1) + Int64(1) + Int64(1) + Int64(1) + Int64(1) + Int64(1) + Int64(1) + Int64(1) | ++---------------------------------------------------------------------------------------+ +| 8 | ++---------------------------------------------------------------------------------------+ + diff --git a/tests/cases/standalone/common/parser/parser.sql b/tests/cases/standalone/common/parser/parser.sql new file mode 100644 index 0000000000..bd7dcbf400 --- /dev/null +++ b/tests/cases/standalone/common/parser/parser.sql @@ -0,0 +1,35 @@ + +-- columns aliases, from: +-- https://github.com/duckdb/duckdb/blob/9196dd9b0a163e6c8aada26218803d04be30c562/test/sql/parser/columns_aliases.test + +CREATE TABLE integers (ts TIMESTAMP TIME INDEX, i INT, j INT); + +INSERT INTO integers SELECT 0::TIMESTAMP ts, 42 i, 84 j UNION ALL SELECT 1::TIMESTAMP, 13, 14; + +SELECT i, j FROM (SELECT COLUMNS(*)::VARCHAR FROM integers); + +SELECT i, j FROM (SELECT * FROM integers); + +SELECT min_i, min_j, max_i, max_j FROM (SELECT MIN(i) AS "min_i", MAX(i) AS "max_i", MIN(j) AS "min_j", MAX(j) AS "max_j" FROM integers); + +DROP TABLE integers; + +-- skipped, unsupported feature: digit separators +-- SELECT 1_000_000; + +-- skipped, unsupported feature: division operator precedence +-- SELECT 6 + 1 // 2; + +-- expression depth, from: +-- https://github.com/duckdb/duckdb/blob/9196dd9b0a163e6c8aada26218803d04be30c562/test/sql/parser/expression_depth_limit.test +SELECT (1+(1+(1+(1+(1+(1+(1+1))))))); + +-- skipped, unsupported feature: dollar quotes +-- SELECT $$$$ = ''; + +-- skipped, unsupported feature: from_first, see also: +-- https://github.com/GreptimeTeam/greptimedb/issues/5012 +-- FROM integers; + +-- skipped, unsupported feature: function chaining +-- SELECT "abcd".upper().lower(); From 9da2e17d0e0a6302e243f8fefe1c636b0497d45d Mon Sep 17 00:00:00 2001 From: ZonaHe Date: Wed, 11 Dec 2024 20:47:59 +0800 Subject: [PATCH 03/46] feat: update dashboard to v0.7.2 (#5141) Co-authored-by: sunchanglong --- src/servers/dashboard/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/servers/dashboard/VERSION b/src/servers/dashboard/VERSION index 63f2359f64..2c0a9c7b77 100644 --- a/src/servers/dashboard/VERSION +++ b/src/servers/dashboard/VERSION @@ -1 +1 @@ -v0.7.1 +v0.7.2 From 60f8dbf7f01dc08e43b1145f7444ff467d741e38 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Wed, 11 Dec 2024 21:33:54 +0800 Subject: [PATCH 04/46] feat: implement `v1/sql/parse` endpoint to parse GreptimeDB's SQL dialect (#5144) * derive ser/de Signed-off-by: Ruihang Xia * impl method Signed-off-by: Ruihang Xia * fix typo Signed-off-by: Ruihang Xia * remove deserialize Signed-off-by: Ruihang Xia --------- Signed-off-by: Ruihang Xia --- Cargo.lock | 2 ++ Cargo.toml | 1 + src/servers/src/error.rs | 10 ++++++++- src/servers/src/http.rs | 4 ++++ src/servers/src/http/handler.rs | 28 +++++++++++++++++++++++- src/sql/Cargo.toml | 1 + src/sql/src/statements/admin.rs | 3 ++- src/sql/src/statements/alter.rs | 11 +++++----- src/sql/src/statements/copy.rs | 11 +++++----- src/sql/src/statements/create.rs | 21 +++++++++--------- src/sql/src/statements/cursor.rs | 7 +++--- src/sql/src/statements/delete.rs | 3 ++- src/sql/src/statements/describe.rs | 3 ++- src/sql/src/statements/drop.rs | 9 ++++---- src/sql/src/statements/explain.rs | 3 ++- src/sql/src/statements/insert.rs | 3 ++- src/sql/src/statements/option_map.rs | 4 +++- src/sql/src/statements/query.rs | 3 ++- src/sql/src/statements/set_variables.rs | 3 ++- src/sql/src/statements/show.rs | 29 +++++++++++++------------ src/sql/src/statements/statement.rs | 7 ++++-- src/sql/src/statements/tql.rs | 9 ++++---- src/sql/src/statements/truncate.rs | 3 ++- tests-integration/tests/http.rs | 8 +++++++ 24 files changed, 128 insertions(+), 58 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 628c6a5824..311caafcb2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11295,6 +11295,7 @@ dependencies = [ "jsonb", "lazy_static", "regex", + "serde", "serde_json", "snafu 0.8.5", "sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)", @@ -11371,6 +11372,7 @@ dependencies = [ "lazy_static", "log", "regex", + "serde", "sqlparser 0.45.0 (registry+https://github.com/rust-lang/crates.io-index)", "sqlparser_derive 0.2.2 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)", ] diff --git a/Cargo.toml b/Cargo.toml index d1d360850e..990bc71a90 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -180,6 +180,7 @@ sysinfo = "0.30" # on branch v0.44.x sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "54a267ac89c09b11c0c88934690530807185d3e7", features = [ "visitor", + "serde", ] } strum = { version = "0.25", features = ["derive"] } tempfile = "3" diff --git a/src/servers/src/error.rs b/src/servers/src/error.rs index 6682a1c789..071de93683 100644 --- a/src/servers/src/error.rs +++ b/src/servers/src/error.rs @@ -189,6 +189,13 @@ pub enum Error { location: Location, }, + #[snafu(display("Failed to parse query"))] + FailedToParseQuery { + #[snafu(implicit)] + location: Location, + source: sql::error::Error, + }, + #[snafu(display("Failed to parse InfluxDB line protocol"))] InfluxdbLineProtocol { #[snafu(implicit)] @@ -651,7 +658,8 @@ impl ErrorExt for Error { | OpenTelemetryLog { .. } | UnsupportedJsonDataTypeForTag { .. } | InvalidTableName { .. } - | PrepareStatementNotFound { .. } => StatusCode::InvalidArguments, + | PrepareStatementNotFound { .. } + | FailedToParseQuery { .. } => StatusCode::InvalidArguments, Catalog { source, .. } => source.status_code(), RowWriter { source, .. } => source.status_code(), diff --git a/src/servers/src/http.rs b/src/servers/src/http.rs index d8d07ed31f..1107870c9a 100644 --- a/src/servers/src/http.rs +++ b/src/servers/src/http.rs @@ -755,6 +755,10 @@ impl HttpServer { fn route_sql(api_state: ApiState) -> Router { Router::new() .route("/sql", routing::get(handler::sql).post(handler::sql)) + .route( + "/sql/parse", + routing::get(handler::sql_parse).post(handler::sql_parse), + ) .route( "/promql", routing::get(handler::promql).post(handler::promql), diff --git a/src/servers/src/http/handler.rs b/src/servers/src/http/handler.rs index 15a1a0e16c..153b824d6e 100644 --- a/src/servers/src/http/handler.rs +++ b/src/servers/src/http/handler.rs @@ -30,8 +30,13 @@ use query::parser::{PromQuery, DEFAULT_LOOKBACK_STRING}; use serde::{Deserialize, Serialize}; use serde_json::Value; use session::context::{Channel, QueryContext, QueryContextRef}; +use snafu::ResultExt; +use sql::dialect::GreptimeDbDialect; +use sql::parser::{ParseOptions, ParserContext}; +use sql::statements::statement::Statement; use super::header::collect_plan_metrics; +use crate::error::{FailedToParseQuerySnafu, InvalidQuerySnafu, Result}; use crate::http::result::arrow_result::ArrowResponse; use crate::http::result::csv_result::CsvResponse; use crate::http::result::error_result::ErrorResponse; @@ -146,10 +151,31 @@ pub async fn sql( resp.with_execution_time(start.elapsed().as_millis() as u64) } +/// Handler to parse sql +#[axum_macros::debug_handler] +#[tracing::instrument(skip_all, fields(protocol = "http", request_type = "sql"))] +pub async fn sql_parse( + Query(query_params): Query, + Form(form_params): Form, +) -> Result>> { + let Some(sql) = query_params.sql.or(form_params.sql) else { + return InvalidQuerySnafu { + reason: "sql parameter is required.", + } + .fail(); + }; + + let stmts = + ParserContext::create_with_dialect(&sql, &GreptimeDbDialect {}, ParseOptions::default()) + .context(FailedToParseQuerySnafu)?; + + Ok(stmts.into()) +} + /// Create a response from query result pub async fn from_output( outputs: Vec>, -) -> Result<(Vec, HashMap), ErrorResponse> { +) -> std::result::Result<(Vec, HashMap), ErrorResponse> { // TODO(sunng87): this api response structure cannot represent error well. // It hides successful execution results from error response let mut results = Vec::with_capacity(outputs.len()); diff --git a/src/sql/Cargo.toml b/src/sql/Cargo.toml index e3340a8f6c..3cb81d6dd4 100644 --- a/src/sql/Cargo.toml +++ b/src/sql/Cargo.toml @@ -30,6 +30,7 @@ itertools.workspace = true jsonb.workspace = true lazy_static.workspace = true regex.workspace = true +serde.workspace = true serde_json.workspace = true snafu.workspace = true sqlparser.workspace = true diff --git a/src/sql/src/statements/admin.rs b/src/sql/src/statements/admin.rs index bbe805a4c1..ed068ea475 100644 --- a/src/sql/src/statements/admin.rs +++ b/src/sql/src/statements/admin.rs @@ -14,12 +14,13 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser_derive::{Visit, VisitMut}; use crate::ast::Function; /// `ADMIN` statement to execute some administration commands. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum Admin { /// Run a admin function. Func(Function), diff --git a/src/sql/src/statements/alter.rs b/src/sql/src/statements/alter.rs index cf59257e89..174bdbbdc3 100644 --- a/src/sql/src/statements/alter.rs +++ b/src/sql/src/statements/alter.rs @@ -18,10 +18,11 @@ use api::v1; use common_query::AddColumnLocation; use datatypes::schema::FulltextOptions; use itertools::Itertools; +use serde::Serialize; use sqlparser::ast::{ColumnDef, DataType, Ident, ObjectName, TableConstraint}; use sqlparser_derive::{Visit, VisitMut}; -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct AlterTable { pub table_name: ObjectName, pub alter_operation: AlterTableOperation, @@ -56,7 +57,7 @@ impl Display for AlterTable { } } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum AlterTableOperation { /// `ADD ` AddConstraint(TableConstraint), @@ -151,7 +152,7 @@ impl Display for AlterTableOperation { } } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct KeyValueOption { pub key: String, pub value: String, @@ -166,7 +167,7 @@ impl From for v1::Option { } } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct AlterDatabase { pub database_name: ObjectName, pub alter_operation: AlterDatabaseOperation, @@ -197,7 +198,7 @@ impl Display for AlterDatabase { } } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum AlterDatabaseOperation { SetDatabaseOption { options: Vec }, UnsetDatabaseOption { keys: Vec }, diff --git a/src/sql/src/statements/copy.rs b/src/sql/src/statements/copy.rs index c68b9d8c03..436d86d3ab 100644 --- a/src/sql/src/statements/copy.rs +++ b/src/sql/src/statements/copy.rs @@ -14,12 +14,13 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser::ast::ObjectName; use sqlparser_derive::{Visit, VisitMut}; use crate::statements::OptionMap; -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum Copy { CopyTable(CopyTable), CopyDatabase(CopyDatabase), @@ -34,7 +35,7 @@ impl Display for Copy { } } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum CopyTable { To(CopyTableArgument), From(CopyTableArgument), @@ -65,7 +66,7 @@ impl Display for CopyTable { } } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum CopyDatabase { To(CopyDatabaseArgument), From(CopyDatabaseArgument), @@ -96,7 +97,7 @@ impl Display for CopyDatabase { } } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct CopyDatabaseArgument { pub database_name: ObjectName, pub with: OptionMap, @@ -104,7 +105,7 @@ pub struct CopyDatabaseArgument { pub location: String, } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct CopyTableArgument { pub table_name: ObjectName, pub with: OptionMap, diff --git a/src/sql/src/statements/create.rs b/src/sql/src/statements/create.rs index 20ed7b5559..e4ea46572e 100644 --- a/src/sql/src/statements/create.rs +++ b/src/sql/src/statements/create.rs @@ -18,6 +18,7 @@ use std::fmt::{Display, Formatter}; use common_catalog::consts::FILE_ENGINE; use datatypes::schema::FulltextOptions; use itertools::Itertools; +use serde::Serialize; use snafu::ResultExt; use sqlparser::ast::{ColumnOptionDef, DataType, Expr, Query}; use sqlparser_derive::{Visit, VisitMut}; @@ -58,7 +59,7 @@ fn format_table_constraint(constraints: &[TableConstraint]) -> String { } /// Table constraint for create table statement. -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub enum TableConstraint { /// Primary key constraint. PrimaryKey { columns: Vec }, @@ -84,7 +85,7 @@ impl Display for TableConstraint { } } -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct CreateTable { /// Create if not exists pub if_not_exists: bool, @@ -100,7 +101,7 @@ pub struct CreateTable { } /// Column definition in `CREATE TABLE` statement. -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct Column { /// `ColumnDef` from `sqlparser::ast` pub column_def: ColumnDef, @@ -109,7 +110,7 @@ pub struct Column { } /// Column extensions for greptimedb dialect. -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Default)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Default, Serialize)] pub struct ColumnExtensions { /// Fulltext options. pub fulltext_options: Option, @@ -172,7 +173,7 @@ impl ColumnExtensions { } } -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct Partitions { pub column_list: Vec, pub exprs: Vec, @@ -244,7 +245,7 @@ impl Display for CreateTable { } } -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct CreateDatabase { pub name: ObjectName, /// Create if not exists @@ -278,7 +279,7 @@ impl Display for CreateDatabase { } } -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct CreateExternalTable { /// Table name pub name: ObjectName, @@ -309,7 +310,7 @@ impl Display for CreateExternalTable { } } -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct CreateTableLike { /// Table name pub table_name: ObjectName, @@ -325,7 +326,7 @@ impl Display for CreateTableLike { } } -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct CreateFlow { /// Flow name pub flow_name: ObjectName, @@ -367,7 +368,7 @@ impl Display for CreateFlow { } /// Create SQL view statement. -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct CreateView { /// View name pub name: ObjectName, diff --git a/src/sql/src/statements/cursor.rs b/src/sql/src/statements/cursor.rs index 72ef4cdcae..4381cc5e7b 100644 --- a/src/sql/src/statements/cursor.rs +++ b/src/sql/src/statements/cursor.rs @@ -14,6 +14,7 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser::ast::ObjectName; use sqlparser_derive::{Visit, VisitMut}; @@ -22,7 +23,7 @@ use super::query::Query; /// Represents a DECLARE CURSOR statement /// /// This statement will carry a SQL query -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct DeclareCursor { pub cursor_name: ObjectName, pub query: Box, @@ -35,7 +36,7 @@ impl Display for DeclareCursor { } /// Represents a FETCH FROM cursor statement -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct FetchCursor { pub cursor_name: ObjectName, pub fetch_size: u64, @@ -48,7 +49,7 @@ impl Display for FetchCursor { } /// Represents a CLOSE cursor statement -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct CloseCursor { pub cursor_name: ObjectName, } diff --git a/src/sql/src/statements/delete.rs b/src/sql/src/statements/delete.rs index 4346610b7d..dc8f5d6901 100644 --- a/src/sql/src/statements/delete.rs +++ b/src/sql/src/statements/delete.rs @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +use serde::Serialize; use sqlparser::ast::Statement; use sqlparser_derive::{Visit, VisitMut}; -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct Delete { pub inner: Statement, } diff --git a/src/sql/src/statements/describe.rs b/src/sql/src/statements/describe.rs index 743f2b0123..1a7bba24e5 100644 --- a/src/sql/src/statements/describe.rs +++ b/src/sql/src/statements/describe.rs @@ -14,11 +14,12 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser::ast::ObjectName; use sqlparser_derive::{Visit, VisitMut}; /// SQL structure for `DESCRIBE TABLE`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct DescribeTable { name: ObjectName, } diff --git a/src/sql/src/statements/drop.rs b/src/sql/src/statements/drop.rs index a46450db78..799722904d 100644 --- a/src/sql/src/statements/drop.rs +++ b/src/sql/src/statements/drop.rs @@ -14,11 +14,12 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser::ast::ObjectName; use sqlparser_derive::{Visit, VisitMut}; /// DROP TABLE statement. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct DropTable { table_names: Vec, @@ -62,7 +63,7 @@ impl Display for DropTable { } /// DROP DATABASE statement. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct DropDatabase { name: ObjectName, /// drop table if exists @@ -99,7 +100,7 @@ impl Display for DropDatabase { } /// DROP FLOW statement. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct DropFlow { flow_name: ObjectName, /// drop flow if exists @@ -138,7 +139,7 @@ impl Display for DropFlow { } /// `DROP VIEW` statement. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct DropView { // The view name pub view_name: ObjectName, diff --git a/src/sql/src/statements/explain.rs b/src/sql/src/statements/explain.rs index 5b3a2671f9..96a12c7a41 100644 --- a/src/sql/src/statements/explain.rs +++ b/src/sql/src/statements/explain.rs @@ -14,13 +14,14 @@ use std::fmt::{Display, Formatter}; +use serde::Serialize; use sqlparser::ast::Statement as SpStatement; use sqlparser_derive::{Visit, VisitMut}; use crate::error::Error; /// Explain statement. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct Explain { pub inner: SpStatement, } diff --git a/src/sql/src/statements/insert.rs b/src/sql/src/statements/insert.rs index 4eae7f1e18..f1c0b71444 100644 --- a/src/sql/src/statements/insert.rs +++ b/src/sql/src/statements/insert.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use serde::Serialize; use sqlparser::ast::{ObjectName, Query, SetExpr, Statement, UnaryOperator, Values}; use sqlparser::parser::ParserError; use sqlparser_derive::{Visit, VisitMut}; @@ -20,7 +21,7 @@ use crate::ast::{Expr, Value}; use crate::error::Result; use crate::statements::query::Query as GtQuery; -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct Insert { // Can only be sqlparser::ast::Statement::Insert variant pub inner: Statement, diff --git a/src/sql/src/statements/option_map.rs b/src/sql/src/statements/option_map.rs index 9ff8d94312..d66cadf164 100644 --- a/src/sql/src/statements/option_map.rs +++ b/src/sql/src/statements/option_map.rs @@ -16,14 +16,16 @@ use std::collections::{BTreeMap, HashMap}; use std::ops::ControlFlow; use common_base::secrets::{ExposeSecret, ExposeSecretMut, SecretString}; +use serde::Serialize; use sqlparser::ast::{Visit, VisitMut, Visitor, VisitorMut}; const REDACTED_OPTIONS: [&str; 2] = ["access_key_id", "secret_access_key"]; /// Options hashmap. -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug, Default, Serialize)] pub struct OptionMap { options: BTreeMap, + #[serde(skip_serializing)] secrets: BTreeMap, } diff --git a/src/sql/src/statements/query.rs b/src/sql/src/statements/query.rs index 3b571a1a0b..b5221a2263 100644 --- a/src/sql/src/statements/query.rs +++ b/src/sql/src/statements/query.rs @@ -14,13 +14,14 @@ use std::fmt; +use serde::Serialize; use sqlparser::ast::Query as SpQuery; use sqlparser_derive::{Visit, VisitMut}; use crate::error::Error; /// Query statement instance. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct Query { pub inner: SpQuery, } diff --git a/src/sql/src/statements/set_variables.rs b/src/sql/src/statements/set_variables.rs index 7a2a94a531..748d077d84 100644 --- a/src/sql/src/statements/set_variables.rs +++ b/src/sql/src/statements/set_variables.rs @@ -14,11 +14,12 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser::ast::{Expr, ObjectName}; use sqlparser_derive::{Visit, VisitMut}; /// SET variables statement. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct SetVariables { pub variable: ObjectName, pub value: Vec, diff --git a/src/sql/src/statements/show.rs b/src/sql/src/statements/show.rs index f6a8dab728..055cd7768f 100644 --- a/src/sql/src/statements/show.rs +++ b/src/sql/src/statements/show.rs @@ -14,12 +14,13 @@ use std::fmt::{self, Display}; +use serde::Serialize; use sqlparser_derive::{Visit, VisitMut}; use crate::ast::{Expr, Ident, ObjectName}; /// Show kind for SQL expressions like `SHOW DATABASE` or `SHOW TABLE` -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum ShowKind { All, Like(Ident), @@ -46,14 +47,14 @@ macro_rules! format_kind { } /// SQL structure for `SHOW DATABASES`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowDatabases { pub kind: ShowKind, pub full: bool, } /// The SQL `SHOW COLUMNS` statement -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowColumns { pub kind: ShowKind, pub table: String, @@ -77,7 +78,7 @@ impl Display for ShowColumns { } /// The SQL `SHOW INDEX` statement -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowIndex { pub kind: ShowKind, pub table: String, @@ -118,7 +119,7 @@ impl Display for ShowDatabases { } /// SQL structure for `SHOW TABLES`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowTables { pub kind: ShowKind, pub database: Option, @@ -142,7 +143,7 @@ impl Display for ShowTables { } /// SQL structure for `SHOW TABLE STATUS`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowTableStatus { pub kind: ShowKind, pub database: Option, @@ -162,7 +163,7 @@ impl Display for ShowTableStatus { } /// SQL structure for `SHOW CREATE DATABASE`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowCreateDatabase { pub database_name: ObjectName, } @@ -175,7 +176,7 @@ impl Display for ShowCreateDatabase { } /// SQL structure for `SHOW CREATE TABLE`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowCreateTable { pub table_name: ObjectName, } @@ -188,7 +189,7 @@ impl Display for ShowCreateTable { } /// SQL structure for `SHOW CREATE FLOW`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowCreateFlow { pub flow_name: ObjectName, } @@ -201,7 +202,7 @@ impl Display for ShowCreateFlow { } /// SQL structure for `SHOW FLOWS`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowFlows { pub kind: ShowKind, pub database: Option, @@ -220,7 +221,7 @@ impl Display for ShowFlows { } /// SQL structure for `SHOW CREATE VIEW`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowCreateView { pub view_name: ObjectName, } @@ -233,7 +234,7 @@ impl Display for ShowCreateView { } /// SQL structure for `SHOW VIEWS`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowViews { pub kind: ShowKind, pub database: Option, @@ -252,7 +253,7 @@ impl Display for ShowViews { } /// SQL structure for `SHOW VARIABLES xxx`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowVariables { pub variable: ObjectName, } @@ -265,7 +266,7 @@ impl Display for ShowVariables { } /// SQL structure for "SHOW STATUS" -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowStatus {} impl Display for ShowStatus { diff --git a/src/sql/src/statements/statement.rs b/src/sql/src/statements/statement.rs index 8ad391a00d..2870f2b64a 100644 --- a/src/sql/src/statements/statement.rs +++ b/src/sql/src/statements/statement.rs @@ -15,12 +15,14 @@ use std::fmt::Display; use datafusion_sql::parser::Statement as DfStatement; +use serde::Serialize; use sqlparser::ast::Statement as SpStatement; use sqlparser_derive::{Visit, VisitMut}; use crate::error::{ConvertToDfStatementSnafu, Error}; use crate::statements::admin::Admin; use crate::statements::alter::{AlterDatabase, AlterTable}; +use crate::statements::copy::Copy; use crate::statements::create::{ CreateDatabase, CreateExternalTable, CreateFlow, CreateTable, CreateTableLike, CreateView, }; @@ -42,7 +44,7 @@ use crate::statements::truncate::TruncateTable; /// Tokens parsed by `DFParser` are converted into these values. #[allow(clippy::large_enum_variant)] -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum Statement { // Query Query(Box), @@ -107,7 +109,8 @@ pub enum Statement { // EXPLAIN QUERY Explain(Explain), // COPY - Copy(crate::statements::copy::Copy), + Copy(Copy), + // Telemetry Query Language Tql(Tql), // TRUNCATE TABLE TruncateTable(TruncateTable), diff --git a/src/sql/src/statements/tql.rs b/src/sql/src/statements/tql.rs index 0f7a85f95a..7980103431 100644 --- a/src/sql/src/statements/tql.rs +++ b/src/sql/src/statements/tql.rs @@ -14,9 +14,10 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser_derive::{Visit, VisitMut}; -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum Tql { Eval(TqlEval), Explain(TqlExplain), @@ -49,7 +50,7 @@ fn format_tql( } /// TQL EVAL (, , , [lookback]) -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct TqlEval { pub start: String, pub end: String, @@ -74,7 +75,7 @@ impl Display for TqlEval { /// TQL EXPLAIN [VERBOSE] [, , , [lookback]] /// doesn't execute the query but tells how the query would be executed (similar to SQL EXPLAIN). -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct TqlExplain { pub start: String, pub end: String, @@ -103,7 +104,7 @@ impl Display for TqlExplain { /// TQL ANALYZE [VERBOSE] (, , , [lookback]) /// executes the plan and tells the detailed per-step execution time (similar to SQL ANALYZE). -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct TqlAnalyze { pub start: String, pub end: String, diff --git a/src/sql/src/statements/truncate.rs b/src/sql/src/statements/truncate.rs index c1a063f959..710b5f72df 100644 --- a/src/sql/src/statements/truncate.rs +++ b/src/sql/src/statements/truncate.rs @@ -14,11 +14,12 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser::ast::ObjectName; use sqlparser_derive::{Visit, VisitMut}; /// TRUNCATE TABLE statement. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct TruncateTable { table_name: ObjectName, } diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index 4da65f0b21..5a48fef39e 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -361,6 +361,14 @@ pub async fn test_sql_api(store_type: StorageType) { let body = serde_json::from_str::(&res.text().await).unwrap(); assert_eq!(body.code(), ErrorCode::DatabaseNotFound as u32); + // test parse method + let res = client.get("/v1/sql/parse?sql=desc table t").send().await; + assert_eq!(res.status(), StatusCode::OK); + assert_eq!( + res.text().await, + "[{\"DescribeTable\":{\"name\":[{\"value\":\"t\",\"quote_style\":null}]}}]" + ); + // test timezone header let res = client .get("/v1/sql?&sql=show variables system_time_zone") From a8012147ab52f43513580f17ae210a2dbb439318 Mon Sep 17 00:00:00 2001 From: Niwaka <61189782+NiwakaDev@users.noreply.github.com> Date: Wed, 11 Dec 2024 22:46:23 +0900 Subject: [PATCH 05/46] feat: support push down IN filter (#5129) * feat: support push down IN filter * chore: move tests to prune.sql --- src/query/src/dist_plan/commutativity.rs | 2 +- .../standalone/common/select/prune.result | 26 +++++++++++++++++++ .../cases/standalone/common/select/prune.sql | 10 +++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/query/src/dist_plan/commutativity.rs b/src/query/src/dist_plan/commutativity.rs index 8166400b8f..45378e532c 100644 --- a/src/query/src/dist_plan/commutativity.rs +++ b/src/query/src/dist_plan/commutativity.rs @@ -146,6 +146,7 @@ impl Categorizer { | Expr::Between(_) | Expr::Sort(_) | Expr::Exists(_) + | Expr::InList(_) | Expr::ScalarFunction(_) => Commutativity::Commutative, Expr::Like(_) @@ -157,7 +158,6 @@ impl Categorizer { | Expr::TryCast(_) | Expr::AggregateFunction(_) | Expr::WindowFunction(_) - | Expr::InList(_) | Expr::InSubquery(_) | Expr::ScalarSubquery(_) | Expr::Wildcard { .. } => Commutativity::Unimplemented, diff --git a/tests/cases/standalone/common/select/prune.result b/tests/cases/standalone/common/select/prune.result index 13ddee5510..04282b6035 100644 --- a/tests/cases/standalone/common/select/prune.result +++ b/tests/cases/standalone/common/select/prune.result @@ -94,6 +94,32 @@ explain analyze select * from demo where idc='idc1'; |_|_| Total rows: 2_| +-+-+-+ +SELECT * FROM demo where host in ('test1'); + ++-------------------------+-------+-------+------+-----------+ +| ts | value | host | idc | collector | ++-------------------------+-------+-------+------+-----------+ +| 1970-01-01T00:00:00.001 | 2.0 | test1 | idc1 | disk | ++-------------------------+-------+-------+------+-----------+ + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +explain analyze SELECT * FROM demo where host in ('test1'); + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges) REDACTED +|_|_|_| +|_|_| Total rows: 1_| ++-+-+-+ + drop table demo; Affected Rows: 0 diff --git a/tests/cases/standalone/common/select/prune.sql b/tests/cases/standalone/common/select/prune.sql index e7fd643537..4b976cdb1c 100644 --- a/tests/cases/standalone/common/select/prune.sql +++ b/tests/cases/standalone/common/select/prune.sql @@ -27,4 +27,14 @@ select * from demo where collector='disk' order by ts; -- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED explain analyze select * from demo where idc='idc1'; +SELECT * FROM demo where host in ('test1'); + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +explain analyze SELECT * FROM demo where host in ('test1'); + drop table demo; From e2a41ccaec9976641dbaeeb4b1e6cec6f3d37783 Mon Sep 17 00:00:00 2001 From: Weny Xu Date: Thu, 12 Dec 2024 11:13:36 +0800 Subject: [PATCH 06/46] feat: add prefetch support to `PuffinFileFooterReader` for reduced I/O time (#5145) * feat: introduce `PuffinFileFooterReader` * refactor: remove `SyncReader` trait and impl * refactor: replace `FooterParser` with `PuffinFileFooterReader` * chore: remove unused errors --- src/index/src/inverted_index/error.rs | 11 +- src/puffin/src/error.rs | 52 +-- src/puffin/src/file_format/reader.rs | 14 +- src/puffin/src/file_format/reader/file.rs | 73 +---- src/puffin/src/file_format/reader/footer.rs | 333 +++++--------------- src/puffin/src/tests.rs | 180 ++--------- 6 files changed, 135 insertions(+), 528 deletions(-) diff --git a/src/index/src/inverted_index/error.rs b/src/index/src/inverted_index/error.rs index 07a42b8b87..49816e63c4 100644 --- a/src/index/src/inverted_index/error.rs +++ b/src/index/src/inverted_index/error.rs @@ -26,14 +26,6 @@ use crate::inverted_index::search::predicate::Predicate; #[snafu(visibility(pub))] #[stack_trace_debug] pub enum Error { - #[snafu(display("Failed to seek"))] - Seek { - #[snafu(source)] - error: IoError, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Failed to read"))] Read { #[snafu(source)] @@ -215,8 +207,7 @@ impl ErrorExt for Error { fn status_code(&self) -> StatusCode { use Error::*; match self { - Seek { .. } - | Read { .. } + Read { .. } | Write { .. } | Flush { .. } | Close { .. } diff --git a/src/puffin/src/error.rs b/src/puffin/src/error.rs index 57aec44d1f..634ede5b13 100644 --- a/src/puffin/src/error.rs +++ b/src/puffin/src/error.rs @@ -25,14 +25,6 @@ use snafu::{Location, Snafu}; #[snafu(visibility(pub))] #[stack_trace_debug] pub enum Error { - #[snafu(display("Failed to seek"))] - Seek { - #[snafu(source)] - error: IoError, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Failed to read"))] Read { #[snafu(source)] @@ -119,14 +111,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Failed to convert bytes to integer"))] - BytesToInteger { - #[snafu(source)] - error: std::array::TryFromSliceError, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Unsupported decompression: {}", decompression))] UnsupportedDecompression { decompression: String, @@ -150,17 +134,15 @@ pub enum Error { location: Location, }, - #[snafu(display("Parse stage not match, expected: {}, actual: {}", expected, actual))] - ParseStageNotMatch { - expected: String, - actual: String, + #[snafu(display("Unexpected footer payload size: {}", size))] + UnexpectedFooterPayloadSize { + size: i32, #[snafu(implicit)] location: Location, }, - #[snafu(display("Unexpected footer payload size: {}", size))] - UnexpectedFooterPayloadSize { - size: i32, + #[snafu(display("Invalid puffin footer"))] + InvalidPuffinFooter { #[snafu(implicit)] location: Location, }, @@ -177,20 +159,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Invalid blob offset: {}, location: {:?}", offset, location))] - InvalidBlobOffset { - offset: i64, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Invalid blob area end: {}, location: {:?}", offset, location))] - InvalidBlobAreaEnd { - offset: u64, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Failed to compress lz4"))] Lz4Compression { #[snafu(source)] @@ -262,8 +230,7 @@ impl ErrorExt for Error { fn status_code(&self) -> StatusCode { use Error::*; match self { - Seek { .. } - | Read { .. } + Read { .. } | MagicNotMatched { .. } | DeserializeJson { .. } | Write { .. } @@ -275,18 +242,15 @@ impl ErrorExt for Error { | Remove { .. } | Rename { .. } | SerializeJson { .. } - | BytesToInteger { .. } - | ParseStageNotMatch { .. } | UnexpectedFooterPayloadSize { .. } | UnexpectedPuffinFileSize { .. } - | InvalidBlobOffset { .. } - | InvalidBlobAreaEnd { .. } | Lz4Compression { .. } | Lz4Decompression { .. } | BlobNotFound { .. } | BlobIndexOutOfBound { .. } | FileKeyNotMatch { .. } - | WalkDir { .. } => StatusCode::Unexpected, + | WalkDir { .. } + | InvalidPuffinFooter { .. } => StatusCode::Unexpected, UnsupportedCompression { .. } | UnsupportedDecompression { .. } => { StatusCode::Unsupported diff --git a/src/puffin/src/file_format/reader.rs b/src/puffin/src/file_format/reader.rs index 3f48bf4b10..162d7116a5 100644 --- a/src/puffin/src/file_format/reader.rs +++ b/src/puffin/src/file_format/reader.rs @@ -21,21 +21,9 @@ use common_base::range_read::RangeReader; use crate::blob_metadata::BlobMetadata; use crate::error::Result; pub use crate::file_format::reader::file::PuffinFileReader; +pub use crate::file_format::reader::footer::PuffinFileFooterReader; use crate::file_metadata::FileMetadata; -/// `SyncReader` defines a synchronous reader for puffin data. -pub trait SyncReader<'a> { - type Reader: std::io::Read + std::io::Seek; - - /// Fetches the FileMetadata. - fn metadata(&'a mut self) -> Result; - - /// Reads particular blob data based on given metadata. - /// - /// Data read from the reader is compressed leaving the caller to decompress the data. - fn blob_reader(&'a mut self, blob_metadata: &BlobMetadata) -> Result; -} - /// `AsyncReader` defines an asynchronous reader for puffin data. #[async_trait] pub trait AsyncReader<'a> { diff --git a/src/puffin/src/file_format/reader/file.rs b/src/puffin/src/file_format/reader/file.rs index 3736ed5d2d..31e8e10bc4 100644 --- a/src/puffin/src/file_format/reader/file.rs +++ b/src/puffin/src/file_format/reader/file.rs @@ -12,20 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::io::{self, SeekFrom}; - use async_trait::async_trait; use common_base::range_read::RangeReader; use snafu::{ensure, ResultExt}; use crate::blob_metadata::BlobMetadata; -use crate::error::{ - MagicNotMatchedSnafu, ReadSnafu, Result, SeekSnafu, UnexpectedPuffinFileSizeSnafu, - UnsupportedDecompressionSnafu, -}; -use crate::file_format::reader::footer::FooterParser; -use crate::file_format::reader::{AsyncReader, SyncReader}; -use crate::file_format::{MAGIC, MAGIC_SIZE, MIN_FILE_SIZE}; +use crate::error::{ReadSnafu, Result, UnexpectedPuffinFileSizeSnafu}; +use crate::file_format::reader::footer::DEFAULT_PREFETCH_SIZE; +use crate::file_format::reader::{AsyncReader, PuffinFileFooterReader}; +use crate::file_format::MIN_FILE_SIZE; use crate::file_metadata::FileMetadata; use crate::partial_reader::PartialReader; @@ -72,45 +67,6 @@ impl PuffinFileReader { } } -impl<'a, R: io::Read + io::Seek + 'a> SyncReader<'a> for PuffinFileReader { - type Reader = PartialReader<&'a mut R>; - - fn metadata(&mut self) -> Result { - if let Some(metadata) = &self.metadata { - return Ok(metadata.clone()); - } - - // check the magic - let mut magic = [0; MAGIC_SIZE as usize]; - self.source.read_exact(&mut magic).context(ReadSnafu)?; - ensure!(magic == MAGIC, MagicNotMatchedSnafu); - - let file_size = self.get_file_size_sync()?; - - // parse the footer - let metadata = FooterParser::new(&mut self.source, file_size).parse_sync()?; - self.metadata = Some(metadata.clone()); - Ok(metadata) - } - - fn blob_reader(&'a mut self, blob_metadata: &BlobMetadata) -> Result { - // TODO(zhongzc): support decompression - let compression = blob_metadata.compression_codec.as_ref(); - ensure!( - compression.is_none(), - UnsupportedDecompressionSnafu { - decompression: compression.unwrap().to_string() - } - ); - - Ok(PartialReader::new( - &mut self.source, - blob_metadata.offset as _, - blob_metadata.length as _, - )) - } -} - #[async_trait] impl<'a, R: RangeReader + 'a> AsyncReader<'a> for PuffinFileReader { type Reader = PartialReader<&'a mut R>; @@ -119,17 +75,10 @@ impl<'a, R: RangeReader + 'a> AsyncReader<'a> for PuffinFileReader { if let Some(metadata) = &self.metadata { return Ok(metadata.clone()); } - - // check the magic - let magic = self.source.read(0..MAGIC_SIZE).await.context(ReadSnafu)?; - ensure!(*magic == MAGIC, MagicNotMatchedSnafu); - let file_size = self.get_file_size_async().await?; - - // parse the footer - let metadata = FooterParser::new(&mut self.source, file_size) - .parse_async() - .await?; + let mut reader = PuffinFileFooterReader::new(&mut self.source, file_size) + .with_prefetch_size(DEFAULT_PREFETCH_SIZE); + let metadata = reader.metadata().await?; self.metadata = Some(metadata.clone()); Ok(metadata) } @@ -143,14 +92,6 @@ impl<'a, R: RangeReader + 'a> AsyncReader<'a> for PuffinFileReader { } } -impl PuffinFileReader { - fn get_file_size_sync(&mut self) -> Result { - let file_size = self.source.seek(SeekFrom::End(0)).context(SeekSnafu)?; - Self::validate_file_size(file_size)?; - Ok(file_size) - } -} - impl PuffinFileReader { async fn get_file_size_async(&mut self) -> Result { let file_size = self diff --git a/src/puffin/src/file_format/reader/footer.rs b/src/puffin/src/file_format/reader/footer.rs index aa764fd32a..d0cd1e8ed4 100644 --- a/src/puffin/src/file_format/reader/footer.rs +++ b/src/puffin/src/file_format/reader/footer.rs @@ -12,240 +12,98 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::io::{self, Cursor, SeekFrom}; +use std::io::Cursor; use common_base::range_read::RangeReader; use snafu::{ensure, ResultExt}; use crate::error::{ - BytesToIntegerSnafu, DeserializeJsonSnafu, InvalidBlobAreaEndSnafu, InvalidBlobOffsetSnafu, - Lz4DecompressionSnafu, MagicNotMatchedSnafu, ParseStageNotMatchSnafu, ReadSnafu, Result, - SeekSnafu, UnexpectedFooterPayloadSizeSnafu, + DeserializeJsonSnafu, InvalidPuffinFooterSnafu, Lz4DecompressionSnafu, MagicNotMatchedSnafu, + ReadSnafu, Result, UnexpectedFooterPayloadSizeSnafu, }; use crate::file_format::{Flags, FLAGS_SIZE, MAGIC, MAGIC_SIZE, MIN_FILE_SIZE, PAYLOAD_SIZE_SIZE}; use crate::file_metadata::FileMetadata; -/// Parser for the footer of a Puffin data file +/// The default prefetch size for the footer reader. +pub const DEFAULT_PREFETCH_SIZE: u64 = 1024; // 1KiB + +/// Reader for the footer of a Puffin data file /// /// The footer has a specific layout that needs to be read and parsed to /// extract metadata about the file, which is encapsulated in the [`FileMetadata`] type. /// +/// This reader supports prefetching, allowing for more efficient reading +/// of the footer by fetching additional data ahead of time. +/// /// ```text /// Footer layout: HeadMagic Payload PayloadSize Flags FootMagic /// [4] [?] [4] [4] [4] /// ``` -pub struct FooterParser { - // The underlying IO source +pub struct PuffinFileFooterReader { + /// The source of the puffin file source: R, - - // The size of the file, used for calculating offsets to read from + /// The content length of the puffin file file_size: u64, + /// The prefetch footer size + prefetch_size: Option, } -impl FooterParser { - pub fn new(source: R, file_size: u64) -> Self { - Self { source, file_size } - } -} - -impl FooterParser { - /// Parses the footer from the IO source in a synchronous manner. - pub fn parse_sync(&mut self) -> Result { - let mut parser = StageParser::new(self.file_size); - - let mut buf = vec![]; - while let Some(byte_to_read) = parser.next_to_read() { - self.source - .seek(SeekFrom::Start(byte_to_read.offset)) - .context(SeekSnafu)?; - let size = byte_to_read.size as usize; - - buf.resize(size, 0); - let buf = &mut buf[..size]; - - self.source.read_exact(buf).context(ReadSnafu)?; - - parser.consume_bytes(buf)?; +impl<'a, R: RangeReader + 'a> PuffinFileFooterReader { + pub fn new(source: R, content_len: u64) -> Self { + Self { + source, + file_size: content_len, + prefetch_size: None, } - - parser.finish() } -} -impl FooterParser { - /// Parses the footer from the IO source in a asynchronous manner. - pub async fn parse_async(&mut self) -> Result { - let mut parser = StageParser::new(self.file_size); + fn prefetch_size(&self) -> u64 { + self.prefetch_size.unwrap_or(MIN_FILE_SIZE) + } - let mut buf = vec![]; - while let Some(byte_to_read) = parser.next_to_read() { - buf.clear(); - let range = byte_to_read.offset..byte_to_read.offset + byte_to_read.size; - self.source - .read_into(range, &mut buf) + pub fn with_prefetch_size(mut self, prefetch_size: u64) -> Self { + self.prefetch_size = Some(prefetch_size.max(MIN_FILE_SIZE)); + self + } + + pub async fn metadata(&'a mut self) -> Result { + // Note: prefetch > content_len is allowed, since we're using saturating_sub. + let footer_start = self.file_size.saturating_sub(self.prefetch_size()); + let suffix = self + .source + .read(footer_start..self.file_size) + .await + .context(ReadSnafu)?; + let suffix_len = suffix.len(); + + // check the magic + let magic = Self::read_tailing_four_bytes(&suffix)?; + ensure!(magic == MAGIC, MagicNotMatchedSnafu); + + let flags = self.decode_flags(&suffix[..suffix_len - MAGIC_SIZE as usize])?; + let length = self.decode_payload_size( + &suffix[..suffix_len - MAGIC_SIZE as usize - FLAGS_SIZE as usize], + )?; + let footer_size = PAYLOAD_SIZE_SIZE + FLAGS_SIZE + MAGIC_SIZE; + + // Did not fetch the entire file metadata in the initial read, need to make a second request. + if length > suffix_len as u64 - footer_size { + let metadata_start = self.file_size - length - footer_size; + let meta = self + .source + .read(metadata_start..self.file_size - footer_size) .await .context(ReadSnafu)?; - parser.consume_bytes(&buf)?; - } - - parser.finish() - } -} - -/// The internal stages of parsing the footer. -/// This enum allows the StageParser to keep track of which part -/// of the footer needs to be parsed next. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum ParseStage { - FootMagic, - Flags, - PayloadSize, - Payload, - HeadMagic, - Done, -} - -/// Manages the parsing process of the file's footer. -struct StageParser { - /// Current stage in the parsing sequence of the footer. - stage: ParseStage, - - /// Total file size; used for calculating offsets to read from. - file_size: u64, - - /// Flags from the footer, set when the `Flags` field is parsed. - flags: Flags, - - /// Size of the footer's payload, set when the `PayloadSize` is parsed. - payload_size: u64, - - /// Metadata from the footer's payload, set when the `Payload` is parsed. - metadata: Option, -} - -/// Represents a read operation that needs to be performed, including the -/// offset from the start of the file and the number of bytes to read. -struct BytesToRead { - offset: u64, - size: u64, -} - -impl StageParser { - fn new(file_size: u64) -> Self { - Self { - stage: ParseStage::FootMagic, - file_size, - payload_size: 0, - flags: Flags::empty(), - metadata: None, + self.parse_payload(&flags, &meta) + } else { + let metadata_start = self.file_size - length - footer_size - footer_start; + let meta = &suffix[metadata_start as usize..suffix_len - footer_size as usize]; + self.parse_payload(&flags, meta) } } - /// Determines the next segment of bytes to read based on the current parsing stage. - /// This method returns information like the offset and size of the next read, - /// or None if parsing is complete. - fn next_to_read(&self) -> Option { - if self.stage == ParseStage::Done { - return None; - } - - let btr = match self.stage { - ParseStage::FootMagic => BytesToRead { - offset: self.foot_magic_offset(), - size: MAGIC_SIZE, - }, - ParseStage::Flags => BytesToRead { - offset: self.flags_offset(), - size: FLAGS_SIZE, - }, - ParseStage::PayloadSize => BytesToRead { - offset: self.payload_size_offset(), - size: PAYLOAD_SIZE_SIZE, - }, - ParseStage::Payload => BytesToRead { - offset: self.payload_offset(), - size: self.payload_size, - }, - ParseStage::HeadMagic => BytesToRead { - offset: self.head_magic_offset(), - size: MAGIC_SIZE, - }, - ParseStage::Done => unreachable!(), - }; - - Some(btr) - } - - /// Processes the bytes that have been read according to the current parsing stage - /// and advances the parsing stage. It ensures the correct sequence of bytes is - /// encountered and stores the necessary information in the `StageParser`. - fn consume_bytes(&mut self, bytes: &[u8]) -> Result<()> { - match self.stage { - ParseStage::FootMagic => { - ensure!(bytes == MAGIC, MagicNotMatchedSnafu); - self.stage = ParseStage::Flags; - } - ParseStage::Flags => { - self.flags = Self::parse_flags(bytes)?; - self.stage = ParseStage::PayloadSize; - } - ParseStage::PayloadSize => { - self.payload_size = Self::parse_payload_size(bytes)?; - self.validate_payload_size()?; - self.stage = ParseStage::Payload; - } - ParseStage::Payload => { - self.metadata = Some(self.parse_payload(bytes)?); - self.validate_metadata()?; - self.stage = ParseStage::HeadMagic; - } - ParseStage::HeadMagic => { - ensure!(bytes == MAGIC, MagicNotMatchedSnafu); - self.stage = ParseStage::Done; - } - ParseStage::Done => unreachable!(), - } - - Ok(()) - } - - /// Finalizes the parsing process, ensuring all stages are complete, and returns - /// the parsed `FileMetadata`. It converts the raw footer payload into structured data. - fn finish(self) -> Result { - ensure!( - self.stage == ParseStage::Done, - ParseStageNotMatchSnafu { - expected: format!("{:?}", ParseStage::Done), - actual: format!("{:?}", self.stage), - } - ); - - Ok(self.metadata.unwrap()) - } - - fn parse_flags(bytes: &[u8]) -> Result { - let n = u32::from_le_bytes(bytes.try_into().context(BytesToIntegerSnafu)?); - Ok(Flags::from_bits_truncate(n)) - } - - fn parse_payload_size(bytes: &[u8]) -> Result { - let n = i32::from_le_bytes(bytes.try_into().context(BytesToIntegerSnafu)?); - ensure!(n >= 0, UnexpectedFooterPayloadSizeSnafu { size: n }); - Ok(n as u64) - } - - fn validate_payload_size(&self) -> Result<()> { - ensure!( - self.payload_size <= self.file_size - MIN_FILE_SIZE, - UnexpectedFooterPayloadSizeSnafu { - size: self.payload_size as i32 - } - ); - Ok(()) - } - - fn parse_payload(&self, bytes: &[u8]) -> Result { - if self.flags.contains(Flags::FOOTER_PAYLOAD_COMPRESSED_LZ4) { + fn parse_payload(&self, flags: &Flags, bytes: &[u8]) -> Result { + if flags.contains(Flags::FOOTER_PAYLOAD_COMPRESSED_LZ4) { let decoder = lz4_flex::frame::FrameDecoder::new(Cursor::new(bytes)); let res = serde_json::from_reader(decoder).context(Lz4DecompressionSnafu)?; Ok(res) @@ -254,54 +112,35 @@ impl StageParser { } } - fn validate_metadata(&self) -> Result<()> { - let metadata = self.metadata.as_ref().expect("metadata is not set"); + fn read_tailing_four_bytes(suffix: &[u8]) -> Result<[u8; 4]> { + let suffix_len = suffix.len(); + ensure!(suffix_len >= 4, InvalidPuffinFooterSnafu); + let mut bytes = [0; 4]; + bytes.copy_from_slice(&suffix[suffix_len - 4..suffix_len]); - let mut next_blob_offset = MAGIC_SIZE; - // check blob offsets - for blob in &metadata.blobs { - ensure!( - blob.offset as u64 == next_blob_offset, - InvalidBlobOffsetSnafu { - offset: blob.offset - } - ); - next_blob_offset += blob.length as u64; - } + Ok(bytes) + } + + fn decode_flags(&self, suffix: &[u8]) -> Result { + let flags = u32::from_le_bytes(Self::read_tailing_four_bytes(suffix)?); + Ok(Flags::from_bits_truncate(flags)) + } + + fn decode_payload_size(&self, suffix: &[u8]) -> Result { + let payload_size = i32::from_le_bytes(Self::read_tailing_four_bytes(suffix)?); - let blob_area_end = metadata - .blobs - .last() - .map_or(MAGIC_SIZE, |b| (b.offset + b.length) as u64); ensure!( - blob_area_end == self.head_magic_offset(), - InvalidBlobAreaEndSnafu { - offset: blob_area_end + payload_size >= 0, + UnexpectedFooterPayloadSizeSnafu { size: payload_size } + ); + let payload_size = payload_size as u64; + ensure!( + payload_size <= self.file_size - MIN_FILE_SIZE, + UnexpectedFooterPayloadSizeSnafu { + size: self.file_size as i32 } ); - Ok(()) - } - - fn foot_magic_offset(&self) -> u64 { - self.file_size - MAGIC_SIZE - } - - fn flags_offset(&self) -> u64 { - self.file_size - MAGIC_SIZE - FLAGS_SIZE - } - - fn payload_size_offset(&self) -> u64 { - self.file_size - MAGIC_SIZE - FLAGS_SIZE - PAYLOAD_SIZE_SIZE - } - - fn payload_offset(&self) -> u64 { - // `validate_payload_size` ensures that this subtraction will not overflow - self.file_size - MAGIC_SIZE - FLAGS_SIZE - PAYLOAD_SIZE_SIZE - self.payload_size - } - - fn head_magic_offset(&self) -> u64 { - // `validate_payload_size` ensures that this subtraction will not overflow - self.file_size - MAGIC_SIZE * 2 - FLAGS_SIZE - PAYLOAD_SIZE_SIZE - self.payload_size + Ok(payload_size) } } diff --git a/src/puffin/src/tests.rs b/src/puffin/src/tests.rs index a152d4124b..a3bb485879 100644 --- a/src/puffin/src/tests.rs +++ b/src/puffin/src/tests.rs @@ -13,26 +13,14 @@ // limitations under the License. use std::collections::HashMap; -use std::fs::File; -use std::io::{Cursor, Read}; use std::vec; use common_base::range_read::{FileReader, RangeReader}; use futures::io::Cursor as AsyncCursor; -use crate::file_format::reader::{AsyncReader, PuffinFileReader, SyncReader}; -use crate::file_format::writer::{AsyncWriter, Blob, PuffinFileWriter, SyncWriter}; - -#[test] -fn test_read_empty_puffin_sync() { - let path = "src/tests/resources/empty-puffin-uncompressed.puffin"; - - let file = File::open(path).unwrap(); - let mut reader = PuffinFileReader::new(file); - let metadata = reader.metadata().unwrap(); - assert_eq!(metadata.properties.len(), 0); - assert_eq!(metadata.blobs.len(), 0); -} +use crate::file_format::reader::{AsyncReader, PuffinFileFooterReader, PuffinFileReader}; +use crate::file_format::writer::{AsyncWriter, Blob, PuffinFileWriter}; +use crate::file_metadata::FileMetadata; #[tokio::test] async fn test_read_empty_puffin_async() { @@ -45,39 +33,37 @@ async fn test_read_empty_puffin_async() { assert_eq!(metadata.blobs.len(), 0); } -#[test] -fn test_sample_metric_data_puffin_sync() { - let path = "src/tests/resources/sample-metric-data-uncompressed.puffin"; +async fn test_read_puffin_file_metadata( + path: &str, + file_size: u64, + expeccted_metadata: FileMetadata, +) { + for prefetch_size in [0, file_size / 2, file_size, file_size + 10] { + let reader = FileReader::new(path).await.unwrap(); + let mut footer_reader = PuffinFileFooterReader::new(reader, file_size); + if prefetch_size > 0 { + footer_reader = footer_reader.with_prefetch_size(prefetch_size); + } + let metadata = footer_reader.metadata().await.unwrap(); + assert_eq!(metadata.properties, expeccted_metadata.properties,); + assert_eq!(metadata.blobs, expeccted_metadata.blobs); + } +} - let file = File::open(path).unwrap(); - let mut reader = PuffinFileReader::new(file); - let metadata = reader.metadata().unwrap(); +#[tokio::test] +async fn test_read_puffin_file_metadata_async() { + let paths = vec![ + "src/tests/resources/empty-puffin-uncompressed.puffin", + "src/tests/resources/sample-metric-data-uncompressed.puffin", + ]; + for path in paths { + let mut reader = FileReader::new(path).await.unwrap(); + let file_size = reader.metadata().await.unwrap().content_length; + let mut reader = PuffinFileReader::new(reader); + let metadata = reader.metadata().await.unwrap(); - assert_eq!(metadata.properties.len(), 1); - assert_eq!( - metadata.properties.get("created-by"), - Some(&"Test 1234".to_string()) - ); - - assert_eq!(metadata.blobs.len(), 2); - assert_eq!(metadata.blobs[0].blob_type, "some-blob"); - assert_eq!(metadata.blobs[0].offset, 4); - assert_eq!(metadata.blobs[0].length, 9); - - assert_eq!(metadata.blobs[1].blob_type, "some-other-blob"); - assert_eq!(metadata.blobs[1].offset, 13); - assert_eq!(metadata.blobs[1].length, 83); - - let mut some_blob = reader.blob_reader(&metadata.blobs[0]).unwrap(); - let mut buf = String::new(); - some_blob.read_to_string(&mut buf).unwrap(); - assert_eq!(buf, "abcdefghi"); - - let mut some_other_blob = reader.blob_reader(&metadata.blobs[1]).unwrap(); - let mut buf = Vec::new(); - some_other_blob.read_to_end(&mut buf).unwrap(); - let expected = include_bytes!("tests/resources/sample-metric-data.blob"); - assert_eq!(buf, expected); + test_read_puffin_file_metadata(path, file_size, metadata).await; + } } #[tokio::test] @@ -113,38 +99,6 @@ async fn test_sample_metric_data_puffin_async() { assert_eq!(buf, expected); } -#[test] -fn test_writer_reader_with_empty_sync() { - fn test_writer_reader_with_empty_sync(footer_compressed: bool) { - let mut buf = Cursor::new(vec![]); - - let mut writer = PuffinFileWriter::new(&mut buf); - writer.set_properties(HashMap::from([( - "created-by".to_string(), - "Test 1234".to_string(), - )])); - - writer.set_footer_lz4_compressed(footer_compressed); - let written_bytes = writer.finish().unwrap(); - assert!(written_bytes > 0); - - let mut buf = Cursor::new(buf.into_inner()); - let mut reader = PuffinFileReader::new(&mut buf); - let metadata = reader.metadata().unwrap(); - - assert_eq!(metadata.properties.len(), 1); - assert_eq!( - metadata.properties.get("created-by"), - Some(&"Test 1234".to_string()) - ); - - assert_eq!(metadata.blobs.len(), 0); - } - - test_writer_reader_with_empty_sync(false); - test_writer_reader_with_empty_sync(true); -} - #[tokio::test] async fn test_writer_reader_empty_async() { async fn test_writer_reader_empty_async(footer_compressed: bool) { @@ -176,76 +130,6 @@ async fn test_writer_reader_empty_async() { test_writer_reader_empty_async(true).await; } -#[test] -fn test_writer_reader_sync() { - fn test_writer_reader_sync(footer_compressed: bool) { - let mut buf = Cursor::new(vec![]); - - let mut writer = PuffinFileWriter::new(&mut buf); - - let blob1 = "abcdefghi"; - writer - .add_blob(Blob { - compressed_data: Cursor::new(&blob1), - blob_type: "some-blob".to_string(), - properties: Default::default(), - compression_codec: None, - }) - .unwrap(); - - let blob2 = include_bytes!("tests/resources/sample-metric-data.blob"); - writer - .add_blob(Blob { - compressed_data: Cursor::new(&blob2), - blob_type: "some-other-blob".to_string(), - properties: Default::default(), - compression_codec: None, - }) - .unwrap(); - - writer.set_properties(HashMap::from([( - "created-by".to_string(), - "Test 1234".to_string(), - )])); - - writer.set_footer_lz4_compressed(footer_compressed); - let written_bytes = writer.finish().unwrap(); - assert!(written_bytes > 0); - - let mut buf = Cursor::new(buf.into_inner()); - let mut reader = PuffinFileReader::new(&mut buf); - let metadata = reader.metadata().unwrap(); - - assert_eq!(metadata.properties.len(), 1); - assert_eq!( - metadata.properties.get("created-by"), - Some(&"Test 1234".to_string()) - ); - - assert_eq!(metadata.blobs.len(), 2); - assert_eq!(metadata.blobs[0].blob_type, "some-blob"); - assert_eq!(metadata.blobs[0].offset, 4); - assert_eq!(metadata.blobs[0].length, 9); - - assert_eq!(metadata.blobs[1].blob_type, "some-other-blob"); - assert_eq!(metadata.blobs[1].offset, 13); - assert_eq!(metadata.blobs[1].length, 83); - - let mut some_blob = reader.blob_reader(&metadata.blobs[0]).unwrap(); - let mut buf = String::new(); - some_blob.read_to_string(&mut buf).unwrap(); - assert_eq!(buf, blob1); - - let mut some_other_blob = reader.blob_reader(&metadata.blobs[1]).unwrap(); - let mut buf = Vec::new(); - some_other_blob.read_to_end(&mut buf).unwrap(); - assert_eq!(buf, blob2); - } - - test_writer_reader_sync(false); - test_writer_reader_sync(true); -} - #[tokio::test] async fn test_writer_reader_async() { async fn test_writer_reader_async(footer_compressed: bool) { From 8c1959c580fdb3c5ecafdb6bc4fb6395a80ebedf Mon Sep 17 00:00:00 2001 From: Weny Xu Date: Thu, 12 Dec 2024 11:49:54 +0800 Subject: [PATCH 07/46] feat: add prefetch support to `InvertedIndexFooterReader` for reduced I/O time (#5146) * feat: add prefetch support to `InvertedIndeFooterReader` * chore: correct struct name * chore: apply suggestions from CR --- src/index/src/inverted_index/error.rs | 16 ++- .../src/inverted_index/format/reader/blob.rs | 6 +- .../inverted_index/format/reader/footer.rs | 133 ++++++++++++------ src/index/src/lib.rs | 1 + 4 files changed, 113 insertions(+), 43 deletions(-) diff --git a/src/index/src/inverted_index/error.rs b/src/index/src/inverted_index/error.rs index 49816e63c4..7e861beda6 100644 --- a/src/index/src/inverted_index/error.rs +++ b/src/index/src/inverted_index/error.rs @@ -68,6 +68,18 @@ pub enum Error { location: Location, }, + #[snafu(display("Blob size too small"))] + BlobSizeTooSmall { + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Invalid footer payload size"))] + InvalidFooterPayloadSize { + #[snafu(implicit)] + location: Location, + }, + #[snafu(display("Unexpected inverted index footer payload size, max: {max_payload_size}, actual: {actual_payload_size}"))] UnexpectedFooterPayloadSize { max_payload_size: u64, @@ -220,7 +232,9 @@ impl ErrorExt for Error { | KeysApplierUnexpectedPredicates { .. } | CommonIo { .. } | UnknownIntermediateCodecMagic { .. } - | FstCompile { .. } => StatusCode::Unexpected, + | FstCompile { .. } + | InvalidFooterPayloadSize { .. } + | BlobSizeTooSmall { .. } => StatusCode::Unexpected, ParseRegex { .. } | ParseDFA { .. } diff --git a/src/index/src/inverted_index/format/reader/blob.rs b/src/index/src/inverted_index/format/reader/blob.rs index ace0e5c485..de34cd36f8 100644 --- a/src/index/src/inverted_index/format/reader/blob.rs +++ b/src/index/src/inverted_index/format/reader/blob.rs @@ -19,8 +19,9 @@ use common_base::range_read::RangeReader; use greptime_proto::v1::index::InvertedIndexMetas; use snafu::{ensure, ResultExt}; +use super::footer::DEFAULT_PREFETCH_SIZE; use crate::inverted_index::error::{CommonIoSnafu, Result, UnexpectedBlobSizeSnafu}; -use crate::inverted_index::format::reader::footer::InvertedIndeFooterReader; +use crate::inverted_index::format::reader::footer::InvertedIndexFooterReader; use crate::inverted_index::format::reader::InvertedIndexReader; use crate::inverted_index::format::MIN_BLOB_SIZE; @@ -72,7 +73,8 @@ impl InvertedIndexReader for InvertedIndexBlobReader { let blob_size = metadata.content_length; Self::validate_blob_size(blob_size)?; - let mut footer_reader = InvertedIndeFooterReader::new(&mut self.source, blob_size); + let mut footer_reader = InvertedIndexFooterReader::new(&mut self.source, blob_size) + .with_prefetch_size(DEFAULT_PREFETCH_SIZE); footer_reader.metadata().await.map(Arc::new) } } diff --git a/src/index/src/inverted_index/format/reader/footer.rs b/src/index/src/inverted_index/format/reader/footer.rs index 1f35237711..c025ecf52e 100644 --- a/src/index/src/inverted_index/format/reader/footer.rs +++ b/src/index/src/inverted_index/format/reader/footer.rs @@ -18,53 +18,88 @@ use prost::Message; use snafu::{ensure, ResultExt}; use crate::inverted_index::error::{ - CommonIoSnafu, DecodeProtoSnafu, Result, UnexpectedFooterPayloadSizeSnafu, - UnexpectedOffsetSizeSnafu, UnexpectedZeroSegmentRowCountSnafu, + BlobSizeTooSmallSnafu, CommonIoSnafu, DecodeProtoSnafu, InvalidFooterPayloadSizeSnafu, Result, + UnexpectedFooterPayloadSizeSnafu, UnexpectedOffsetSizeSnafu, + UnexpectedZeroSegmentRowCountSnafu, }; use crate::inverted_index::format::FOOTER_PAYLOAD_SIZE_SIZE; -/// InvertedIndeFooterReader is for reading the footer section of the blob. -pub struct InvertedIndeFooterReader { +pub const DEFAULT_PREFETCH_SIZE: u64 = 1024; // 1KiB + +/// InvertedIndexFooterReader is for reading the footer section of the blob. +pub struct InvertedIndexFooterReader { source: R, blob_size: u64, + prefetch_size: Option, } -impl InvertedIndeFooterReader { +impl InvertedIndexFooterReader { pub fn new(source: R, blob_size: u64) -> Self { - Self { source, blob_size } + Self { + source, + blob_size, + prefetch_size: None, + } + } + + /// Set the prefetch size for the footer reader. + pub fn with_prefetch_size(mut self, prefetch_size: u64) -> Self { + self.prefetch_size = Some(prefetch_size.max(FOOTER_PAYLOAD_SIZE_SIZE)); + self + } + + pub fn prefetch_size(&self) -> u64 { + self.prefetch_size.unwrap_or(FOOTER_PAYLOAD_SIZE_SIZE) } } -impl InvertedIndeFooterReader { +impl InvertedIndexFooterReader { pub async fn metadata(&mut self) -> Result { - let payload_size = self.read_payload_size().await?; - let metas = self.read_payload(payload_size).await?; - Ok(metas) - } + ensure!( + self.blob_size >= FOOTER_PAYLOAD_SIZE_SIZE, + BlobSizeTooSmallSnafu + ); - async fn read_payload_size(&mut self) -> Result { - let mut size_buf = [0u8; FOOTER_PAYLOAD_SIZE_SIZE as usize]; - let end = self.blob_size; - let start = end - FOOTER_PAYLOAD_SIZE_SIZE; - self.source - .read_into(start..end, &mut &mut size_buf[..]) + let footer_start = self.blob_size.saturating_sub(self.prefetch_size()); + let suffix = self + .source + .read(footer_start..self.blob_size) .await .context(CommonIoSnafu)?; + let suffix_len = suffix.len(); + let length = u32::from_le_bytes(Self::read_tailing_four_bytes(&suffix)?) as u64; + self.validate_payload_size(length)?; - let payload_size = u32::from_le_bytes(size_buf) as u64; - self.validate_payload_size(payload_size)?; + let footer_size = FOOTER_PAYLOAD_SIZE_SIZE; - Ok(payload_size) + // Did not fetch the entire file metadata in the initial read, need to make a second request. + if length > suffix_len as u64 - footer_size { + let metadata_start = self.blob_size - length - footer_size; + let meta = self + .source + .read(metadata_start..self.blob_size - footer_size) + .await + .context(CommonIoSnafu)?; + self.parse_payload(&meta, length) + } else { + let metadata_start = self.blob_size - length - footer_size - footer_start; + let meta = &suffix[metadata_start as usize..suffix_len - footer_size as usize]; + self.parse_payload(meta, length) + } } - async fn read_payload(&mut self, payload_size: u64) -> Result { - let end = self.blob_size - FOOTER_PAYLOAD_SIZE_SIZE; - let start = end - payload_size; - let bytes = self.source.read(start..end).await.context(CommonIoSnafu)?; + fn read_tailing_four_bytes(suffix: &[u8]) -> Result<[u8; 4]> { + let suffix_len = suffix.len(); + ensure!(suffix_len >= 4, InvalidFooterPayloadSizeSnafu); + let mut bytes = [0; 4]; + bytes.copy_from_slice(&suffix[suffix_len - 4..suffix_len]); - let metas = InvertedIndexMetas::decode(&*bytes).context(DecodeProtoSnafu)?; + Ok(bytes) + } + + fn parse_payload(&mut self, bytes: &[u8], payload_size: u64) -> Result { + let metas = InvertedIndexMetas::decode(bytes).context(DecodeProtoSnafu)?; self.validate_metas(&metas, payload_size)?; - Ok(metas) } @@ -113,9 +148,12 @@ impl InvertedIndeFooterReader { #[cfg(test)] mod tests { + use std::assert_matches::assert_matches; + use prost::Message; use super::*; + use crate::inverted_index::error::Error; fn create_test_payload(meta: InvertedIndexMeta) -> Vec { let mut metas = InvertedIndexMetas { @@ -141,14 +179,18 @@ mod tests { let mut payload_buf = create_test_payload(meta); let blob_size = payload_buf.len() as u64; - let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size); - let payload_size = reader.read_payload_size().await.unwrap(); - let metas = reader.read_payload(payload_size).await.unwrap(); + for prefetch in [0, blob_size / 2, blob_size, blob_size + 10] { + let mut reader = InvertedIndexFooterReader::new(&mut payload_buf, blob_size); + if prefetch > 0 { + reader = reader.with_prefetch_size(prefetch); + } - assert_eq!(metas.metas.len(), 1); - let index_meta = &metas.metas.get("test").unwrap(); - assert_eq!(index_meta.name, "test"); + let metas = reader.metadata().await.unwrap(); + assert_eq!(metas.metas.len(), 1); + let index_meta = &metas.metas.get("test").unwrap(); + assert_eq!(index_meta.name, "test"); + } } #[tokio::test] @@ -157,14 +199,20 @@ mod tests { name: "test".to_string(), ..Default::default() }; - let mut payload_buf = create_test_payload(meta); payload_buf.push(0xff); // Add an extra byte to corrupt the footer let blob_size = payload_buf.len() as u64; - let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size); - let payload_size_result = reader.read_payload_size().await; - assert!(payload_size_result.is_err()); + for prefetch in [0, blob_size / 2, blob_size, blob_size + 10] { + let blob_size = payload_buf.len() as u64; + let mut reader = InvertedIndexFooterReader::new(&mut payload_buf, blob_size); + if prefetch > 0 { + reader = reader.with_prefetch_size(prefetch); + } + + let result = reader.metadata().await; + assert_matches!(result, Err(Error::UnexpectedFooterPayloadSize { .. })); + } } #[tokio::test] @@ -178,10 +226,15 @@ mod tests { let mut payload_buf = create_test_payload(meta); let blob_size = payload_buf.len() as u64; - let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size); - let payload_size = reader.read_payload_size().await.unwrap(); - let payload_result = reader.read_payload(payload_size).await; - assert!(payload_result.is_err()); + for prefetch in [0, blob_size / 2, blob_size, blob_size + 10] { + let mut reader = InvertedIndexFooterReader::new(&mut payload_buf, blob_size); + if prefetch > 0 { + reader = reader.with_prefetch_size(prefetch); + } + + let result = reader.metadata().await; + assert_matches!(result, Err(Error::UnexpectedOffsetSize { .. })); + } } } diff --git a/src/index/src/lib.rs b/src/index/src/lib.rs index 197fc01818..5e2e411668 100644 --- a/src/index/src/lib.rs +++ b/src/index/src/lib.rs @@ -13,6 +13,7 @@ // limitations under the License. #![feature(iter_partition_in_place)] +#![feature(assert_matches)] pub mod fulltext_index; pub mod inverted_index; From d53fbcb9362892623da9a8d6475c82a4ac250faa Mon Sep 17 00:00:00 2001 From: Weny Xu Date: Thu, 12 Dec 2024 12:09:36 +0800 Subject: [PATCH 08/46] feat: introduce `PuffinMetadataCache` (#5148) * feat: introduce `PuffinMetadataCache` * refactor: remove too_many_arguments * chore: fmt toml --- Cargo.lock | 1 + src/mito2/src/cache.rs | 17 ++++++ src/mito2/src/config.rs | 4 ++ src/mito2/src/read/scan_region.rs | 11 +++- src/mito2/src/sst/file.rs | 1 + .../src/sst/index/inverted_index/applier.rs | 42 ++++++++++--- .../index/inverted_index/applier/builder.rs | 55 ++++++++++++----- .../inverted_index/applier/builder/between.rs | 10 ---- .../applier/builder/comparison.rs | 8 --- .../inverted_index/applier/builder/eq_list.rs | 14 ----- .../inverted_index/applier/builder/in_list.rs | 10 ---- .../applier/builder/regex_match.rs | 8 --- .../src/sst/index/inverted_index/creator.rs | 7 ++- src/mito2/src/worker.rs | 1 + src/puffin/Cargo.toml | 1 + src/puffin/src/blob_metadata.rs | 14 +++++ src/puffin/src/file_format/reader/file.rs | 5 ++ src/puffin/src/file_metadata.rs | 16 +++++ src/puffin/src/puffin_manager.rs | 1 + src/puffin/src/puffin_manager/cache.rs | 60 +++++++++++++++++++ .../src/puffin_manager/fs_puffin_manager.rs | 17 +++++- .../fs_puffin_manager/reader.rs | 39 ++++++++++-- 22 files changed, 258 insertions(+), 84 deletions(-) create mode 100644 src/puffin/src/puffin_manager/cache.rs diff --git a/Cargo.lock b/Cargo.lock index 311caafcb2..e57a6542af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8883,6 +8883,7 @@ dependencies = [ "lz4_flex 0.11.3", "moka", "pin-project", + "prometheus", "serde", "serde_json", "sha2", diff --git a/src/mito2/src/cache.rs b/src/mito2/src/cache.rs index 7d977a328c..7018b039d6 100644 --- a/src/mito2/src/cache.rs +++ b/src/mito2/src/cache.rs @@ -32,6 +32,7 @@ use moka::notification::RemovalCause; use moka::sync::Cache; use parquet::column::page::Page; use parquet::file::metadata::ParquetMetaData; +use puffin::puffin_manager::cache::{PuffinMetadataCache, PuffinMetadataCacheRef}; use store_api::storage::{ConcreteDataType, RegionId, TimeSeriesRowSelector}; use crate::cache::cache_size::parquet_meta_size; @@ -68,6 +69,8 @@ pub struct CacheManager { write_cache: Option, /// Cache for inverted index. index_cache: Option, + /// Puffin metadata cache. + puffin_metadata_cache: Option, /// Cache for time series selectors. selector_result_cache: Option, } @@ -217,6 +220,10 @@ impl CacheManager { pub(crate) fn index_cache(&self) -> Option<&InvertedIndexCacheRef> { self.index_cache.as_ref() } + + pub(crate) fn puffin_metadata_cache(&self) -> Option<&PuffinMetadataCacheRef> { + self.puffin_metadata_cache.as_ref() + } } /// Increases selector cache miss metrics. @@ -237,6 +244,7 @@ pub struct CacheManagerBuilder { page_cache_size: u64, index_metadata_size: u64, index_content_size: u64, + puffin_metadata_size: u64, write_cache: Option, selector_result_cache_size: u64, } @@ -278,6 +286,12 @@ impl CacheManagerBuilder { self } + /// Sets cache size for puffin metadata. + pub fn puffin_metadata_size(mut self, bytes: u64) -> Self { + self.puffin_metadata_size = bytes; + self + } + /// Sets selector result cache size. pub fn selector_result_cache_size(mut self, bytes: u64) -> Self { self.selector_result_cache_size = bytes; @@ -340,6 +354,8 @@ impl CacheManagerBuilder { }); let inverted_index_cache = InvertedIndexCache::new(self.index_metadata_size, self.index_content_size); + let puffin_metadata_cache = + PuffinMetadataCache::new(self.puffin_metadata_size, &CACHE_BYTES); let selector_result_cache = (self.selector_result_cache_size != 0).then(|| { Cache::builder() .max_capacity(self.selector_result_cache_size) @@ -361,6 +377,7 @@ impl CacheManagerBuilder { page_cache, write_cache: self.write_cache, index_cache: Some(Arc::new(inverted_index_cache)), + puffin_metadata_cache: Some(Arc::new(puffin_metadata_cache)), selector_result_cache, } } diff --git a/src/mito2/src/config.rs b/src/mito2/src/config.rs index 9b113027a4..dda3f42710 100644 --- a/src/mito2/src/config.rs +++ b/src/mito2/src/config.rs @@ -304,6 +304,9 @@ pub struct IndexConfig { /// Write buffer size for creating the index. pub write_buffer_size: ReadableSize, + + /// Cache size for metadata of puffin files. Setting it to 0 to disable the cache. + pub metadata_cache_size: ReadableSize, } impl Default for IndexConfig { @@ -312,6 +315,7 @@ impl Default for IndexConfig { aux_path: String::new(), staging_size: ReadableSize::gb(2), write_buffer_size: ReadableSize::mb(8), + metadata_cache_size: ReadableSize::mb(64), } } } diff --git a/src/mito2/src/read/scan_region.rs b/src/mito2/src/read/scan_region.rs index 19324f119f..32b8c90cda 100644 --- a/src/mito2/src/read/scan_region.rs +++ b/src/mito2/src/read/scan_region.rs @@ -413,11 +413,15 @@ impl ScanRegion { .and_then(|c| c.index_cache()) .cloned(); + let puffin_metadata_cache = self + .cache_manager + .as_ref() + .and_then(|c| c.puffin_metadata_cache()) + .cloned(); + InvertedIndexApplierBuilder::new( self.access_layer.region_dir().to_string(), self.access_layer.object_store().clone(), - file_cache, - index_cache, self.version.metadata.as_ref(), self.version.metadata.inverted_indexed_column_ids( self.version @@ -429,6 +433,9 @@ impl ScanRegion { ), self.access_layer.puffin_manager_factory().clone(), ) + .with_file_cache(file_cache) + .with_index_cache(index_cache) + .with_puffin_metadata_cache(puffin_metadata_cache) .build(&self.request.filters) .inspect_err(|err| warn!(err; "Failed to build invereted index applier")) .ok() diff --git a/src/mito2/src/sst/file.rs b/src/mito2/src/sst/file.rs index 451ec44f1c..4353ae55e3 100644 --- a/src/mito2/src/sst/file.rs +++ b/src/mito2/src/sst/file.rs @@ -149,6 +149,7 @@ impl FileMeta { pub fn inverted_index_available(&self) -> bool { self.available_indexes.contains(&IndexType::InvertedIndex) } + pub fn fulltext_index_available(&self) -> bool { self.available_indexes.contains(&IndexType::FulltextIndex) } diff --git a/src/mito2/src/sst/index/inverted_index/applier.rs b/src/mito2/src/sst/index/inverted_index/applier.rs index cac3ffedd7..bf5206ef44 100644 --- a/src/mito2/src/sst/index/inverted_index/applier.rs +++ b/src/mito2/src/sst/index/inverted_index/applier.rs @@ -22,6 +22,7 @@ use index::inverted_index::search::index_apply::{ ApplyOutput, IndexApplier, IndexNotFoundStrategy, SearchContext, }; use object_store::ObjectStore; +use puffin::puffin_manager::cache::PuffinMetadataCacheRef; use puffin::puffin_manager::{BlobGuard, PuffinManager, PuffinReader}; use snafu::ResultExt; use store_api::storage::RegionId; @@ -60,6 +61,9 @@ pub(crate) struct InvertedIndexApplier { /// In-memory cache for inverted index. inverted_index_cache: Option, + + /// Puffin metadata cache. + puffin_metadata_cache: Option, } pub(crate) type InvertedIndexApplierRef = Arc; @@ -70,8 +74,6 @@ impl InvertedIndexApplier { region_dir: String, region_id: RegionId, store: ObjectStore, - file_cache: Option, - index_cache: Option, index_applier: Box, puffin_manager_factory: PuffinManagerFactory, ) -> Self { @@ -81,13 +83,35 @@ impl InvertedIndexApplier { region_dir, region_id, store, - file_cache, + file_cache: None, index_applier, puffin_manager_factory, - inverted_index_cache: index_cache, + inverted_index_cache: None, + puffin_metadata_cache: None, } } + /// Sets the file cache. + pub fn with_file_cache(mut self, file_cache: Option) -> Self { + self.file_cache = file_cache; + self + } + + /// Sets the index cache. + pub fn with_index_cache(mut self, index_cache: Option) -> Self { + self.inverted_index_cache = index_cache; + self + } + + /// Sets the puffin metadata cache. + pub fn with_puffin_metadata_cache( + mut self, + puffin_metadata_cache: Option, + ) -> Self { + self.puffin_metadata_cache = puffin_metadata_cache; + self + } + /// Applies predicates to the provided SST file id and returns the relevant row group ids pub async fn apply(&self, file_id: FileId) -> Result { let _timer = INDEX_APPLY_ELAPSED @@ -105,6 +129,7 @@ impl InvertedIndexApplier { if let Err(err) = other { warn!(err; "An unexpected error occurred while reading the cached index file. Fallback to remote index file.") } + self.remote_blob_reader(file_id).await? } }; @@ -157,7 +182,10 @@ impl InvertedIndexApplier { /// Creates a blob reader from the remote index file. async fn remote_blob_reader(&self, file_id: FileId) -> Result { - let puffin_manager = self.puffin_manager_factory.build(self.store.clone()); + let puffin_manager = self + .puffin_manager_factory + .build(self.store.clone()) + .with_puffin_metadata_cache(self.puffin_metadata_cache.clone()); let file_path = location::index_file_path(&self.region_dir, file_id); puffin_manager .reader(&file_path) @@ -219,8 +247,6 @@ mod tests { region_dir.clone(), RegionId::new(0, 0), object_store, - None, - None, Box::new(mock_index_applier), puffin_manager_factory, ); @@ -261,8 +287,6 @@ mod tests { region_dir.clone(), RegionId::new(0, 0), object_store, - None, - None, Box::new(mock_index_applier), puffin_manager_factory, ); diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder.rs b/src/mito2/src/sst/index/inverted_index/applier/builder.rs index 603cf5aa23..653679b9fc 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder.rs @@ -28,6 +28,7 @@ use datatypes::value::Value; use index::inverted_index::search::index_apply::PredicatesIndexApplier; use index::inverted_index::search::predicate::Predicate; use object_store::ObjectStore; +use puffin::puffin_manager::cache::PuffinMetadataCacheRef; use snafu::{OptionExt, ResultExt}; use store_api::metadata::RegionMetadata; use store_api::storage::ColumnId; @@ -65,6 +66,9 @@ pub(crate) struct InvertedIndexApplierBuilder<'a> { /// Cache for inverted index. index_cache: Option, + + /// Cache for puffin metadata. + puffin_metadata_cache: Option, } impl<'a> InvertedIndexApplierBuilder<'a> { @@ -72,8 +76,6 @@ impl<'a> InvertedIndexApplierBuilder<'a> { pub fn new( region_dir: String, object_store: ObjectStore, - file_cache: Option, - index_cache: Option, metadata: &'a RegionMetadata, indexed_column_ids: HashSet, puffin_manager_factory: PuffinManagerFactory, @@ -81,15 +83,37 @@ impl<'a> InvertedIndexApplierBuilder<'a> { Self { region_dir, object_store, - file_cache, metadata, indexed_column_ids, output: HashMap::default(), - index_cache, puffin_manager_factory, + file_cache: None, + index_cache: None, + puffin_metadata_cache: None, } } + /// Sets the file cache. + pub fn with_file_cache(mut self, file_cache: Option) -> Self { + self.file_cache = file_cache; + self + } + + /// Sets the puffin metadata cache. + pub fn with_puffin_metadata_cache( + mut self, + puffin_metadata_cache: Option, + ) -> Self { + self.puffin_metadata_cache = puffin_metadata_cache; + self + } + + /// Sets the index cache. + pub fn with_index_cache(mut self, index_cache: Option) -> Self { + self.index_cache = index_cache; + self + } + /// Consumes the builder to construct an [`InvertedIndexApplier`], optionally returned based on /// the expressions provided. If no predicates match, returns `None`. pub fn build(mut self, exprs: &[Expr]) -> Result> { @@ -108,15 +132,18 @@ impl<'a> InvertedIndexApplierBuilder<'a> { .collect(); let applier = PredicatesIndexApplier::try_from(predicates); - Ok(Some(InvertedIndexApplier::new( - self.region_dir, - self.metadata.region_id, - self.object_store, - self.file_cache, - self.index_cache, - Box::new(applier.context(BuildIndexApplierSnafu)?), - self.puffin_manager_factory, - ))) + Ok(Some( + InvertedIndexApplier::new( + self.region_dir, + self.metadata.region_id, + self.object_store, + Box::new(applier.context(BuildIndexApplierSnafu)?), + self.puffin_manager_factory, + ) + .with_file_cache(self.file_cache) + .with_puffin_metadata_cache(self.puffin_metadata_cache) + .with_index_cache(self.index_cache), + )) } /// Recursively traverses expressions to collect predicates. @@ -322,8 +349,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder/between.rs b/src/mito2/src/sst/index/inverted_index/applier/builder/between.rs index 0a196e6f1a..51f7f001e2 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder/between.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder/between.rs @@ -75,8 +75,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -118,8 +116,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -144,8 +140,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -187,8 +181,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -214,8 +206,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder/comparison.rs b/src/mito2/src/sst/index/inverted_index/applier/builder/comparison.rs index cdaec9f94e..138b15b82e 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder/comparison.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder/comparison.rs @@ -231,8 +231,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -260,8 +258,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -280,8 +276,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -315,8 +309,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder/eq_list.rs b/src/mito2/src/sst/index/inverted_index/applier/builder/eq_list.rs index 1d07cca487..35a5caad56 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder/eq_list.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder/eq_list.rs @@ -137,8 +137,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -175,8 +173,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -204,8 +200,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -224,8 +218,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -244,8 +236,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -303,8 +293,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -341,8 +329,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder/in_list.rs b/src/mito2/src/sst/index/inverted_index/applier/builder/in_list.rs index 6a520ba401..224e10c452 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder/in_list.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder/in_list.rs @@ -68,8 +68,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -101,8 +99,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -126,8 +122,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -159,8 +153,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -186,8 +178,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder/regex_match.rs b/src/mito2/src/sst/index/inverted_index/applier/builder/regex_match.rs index 7fdf7f3de5..7148986e6d 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder/regex_match.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder/regex_match.rs @@ -62,8 +62,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -91,8 +89,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -120,8 +116,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -142,8 +136,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, diff --git a/src/mito2/src/sst/index/inverted_index/creator.rs b/src/mito2/src/sst/index/inverted_index/creator.rs index 6db1ef6e0b..029a0da848 100644 --- a/src/mito2/src/sst/index/inverted_index/creator.rs +++ b/src/mito2/src/sst/index/inverted_index/creator.rs @@ -310,12 +310,14 @@ mod tests { use futures::future::BoxFuture; use object_store::services::Memory; use object_store::ObjectStore; + use puffin::puffin_manager::cache::PuffinMetadataCache; use puffin::puffin_manager::PuffinManager; use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder}; use store_api::storage::RegionId; use super::*; use crate::cache::index::InvertedIndexCache; + use crate::metrics::CACHE_BYTES; use crate::read::BatchColumn; use crate::row_converter::{McmpRowCodec, RowCodec, SortField}; use crate::sst::index::inverted_index::applier::builder::InvertedIndexApplierBuilder; @@ -447,15 +449,16 @@ mod tests { move |expr| { let _d = &d; let cache = Arc::new(InvertedIndexCache::new(10, 10)); + let puffin_metadata_cache = Arc::new(PuffinMetadataCache::new(10, &CACHE_BYTES)); let applier = InvertedIndexApplierBuilder::new( region_dir.clone(), object_store.clone(), - None, - Some(cache), ®ion_metadata, indexed_column_ids.clone(), factory.clone(), ) + .with_index_cache(Some(cache)) + .with_puffin_metadata_cache(Some(puffin_metadata_cache)) .build(&[expr]) .unwrap() .unwrap(); diff --git a/src/mito2/src/worker.rs b/src/mito2/src/worker.rs index 33d26c8196..f8ab9c3f4e 100644 --- a/src/mito2/src/worker.rs +++ b/src/mito2/src/worker.rs @@ -170,6 +170,7 @@ impl WorkerGroup { .selector_result_cache_size(config.selector_result_cache_size.as_bytes()) .index_metadata_size(config.inverted_index.metadata_cache_size.as_bytes()) .index_content_size(config.inverted_index.content_cache_size.as_bytes()) + .puffin_metadata_size(config.index.metadata_cache_size.as_bytes()) .write_cache(write_cache) .build(), ); diff --git a/src/puffin/Cargo.toml b/src/puffin/Cargo.toml index e4e6c74a5c..31c92ba4f9 100644 --- a/src/puffin/Cargo.toml +++ b/src/puffin/Cargo.toml @@ -25,6 +25,7 @@ futures.workspace = true lz4_flex = "0.11" moka = { workspace = true, features = ["future", "sync"] } pin-project.workspace = true +prometheus.workspace = true serde.workspace = true serde_json.workspace = true sha2 = "0.10.8" diff --git a/src/puffin/src/blob_metadata.rs b/src/puffin/src/blob_metadata.rs index bb2475bfa3..67eb62c5ff 100644 --- a/src/puffin/src/blob_metadata.rs +++ b/src/puffin/src/blob_metadata.rs @@ -68,6 +68,20 @@ pub struct BlobMetadata { pub properties: HashMap, } +impl BlobMetadata { + /// Calculates the memory usage of the blob metadata in bytes. + pub fn memory_usage(&self) -> usize { + self.blob_type.len() + + self.input_fields.len() * std::mem::size_of::() + + self + .properties + .iter() + .map(|(k, v)| k.len() + v.len()) + .sum::() + + std::mem::size_of::() + } +} + /// Compression codec used to compress the blob #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "lowercase")] diff --git a/src/puffin/src/file_format/reader/file.rs b/src/puffin/src/file_format/reader/file.rs index 31e8e10bc4..9ed40a7f18 100644 --- a/src/puffin/src/file_format/reader/file.rs +++ b/src/puffin/src/file_format/reader/file.rs @@ -46,6 +46,11 @@ impl PuffinFileReader { } } + pub fn with_metadata(mut self, metadata: Option) -> Self { + self.metadata = metadata; + self + } + fn validate_file_size(file_size: u64) -> Result<()> { ensure!( file_size >= MIN_FILE_SIZE, diff --git a/src/puffin/src/file_metadata.rs b/src/puffin/src/file_metadata.rs index 74eea3aa08..4804c65be4 100644 --- a/src/puffin/src/file_metadata.rs +++ b/src/puffin/src/file_metadata.rs @@ -33,6 +33,22 @@ pub struct FileMetadata { pub properties: HashMap, } +impl FileMetadata { + /// Calculates the memory usage of the file metadata in bytes. + pub fn memory_usage(&self) -> usize { + self.blobs + .iter() + .map(|blob| blob.memory_usage()) + .sum::() + + self + .properties + .iter() + .map(|(k, v)| k.len() + v.len()) + .sum::() + + std::mem::size_of::() + } +} + #[cfg(test)] mod tests { use std::collections::HashMap; diff --git a/src/puffin/src/puffin_manager.rs b/src/puffin/src/puffin_manager.rs index 7bd5e9039d..17101b1662 100644 --- a/src/puffin/src/puffin_manager.rs +++ b/src/puffin/src/puffin_manager.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +pub mod cache; pub mod file_accessor; pub mod fs_puffin_manager; pub mod stager; diff --git a/src/puffin/src/puffin_manager/cache.rs b/src/puffin/src/puffin_manager/cache.rs new file mode 100644 index 0000000000..66fcb36bf9 --- /dev/null +++ b/src/puffin/src/puffin_manager/cache.rs @@ -0,0 +1,60 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use prometheus::IntGaugeVec; + +use crate::file_metadata::FileMetadata; +/// Metrics for index metadata. +const PUFFIN_METADATA_TYPE: &str = "puffin_metadata"; + +pub type PuffinMetadataCacheRef = Arc; + +/// A cache for storing the metadata of the index files. +pub struct PuffinMetadataCache { + cache: moka::sync::Cache>, +} + +fn puffin_metadata_weight(k: &String, v: &Arc) -> u32 { + (k.as_bytes().len() + v.memory_usage()) as u32 +} + +impl PuffinMetadataCache { + pub fn new(capacity: u64, cache_bytes: &'static IntGaugeVec) -> Self { + common_telemetry::debug!("Building PuffinMetadataCache with capacity: {capacity}"); + Self { + cache: moka::sync::CacheBuilder::new(capacity) + .name("puffin_metadata") + .weigher(puffin_metadata_weight) + .eviction_listener(|k, v, _cause| { + let size = puffin_metadata_weight(&k, &v); + cache_bytes + .with_label_values(&[PUFFIN_METADATA_TYPE]) + .sub(size.into()); + }) + .build(), + } + } + + /// Gets the metadata from the cache. + pub fn get_metadata(&self, file_id: &str) -> Option> { + self.cache.get(file_id) + } + + /// Puts the metadata into the cache. + pub fn put_metadata(&self, file_id: String, metadata: Arc) { + self.cache.insert(file_id, metadata); + } +} diff --git a/src/puffin/src/puffin_manager/fs_puffin_manager.rs b/src/puffin/src/puffin_manager/fs_puffin_manager.rs index 976eb23997..52190f92fb 100644 --- a/src/puffin/src/puffin_manager/fs_puffin_manager.rs +++ b/src/puffin/src/puffin_manager/fs_puffin_manager.rs @@ -21,6 +21,7 @@ pub use reader::FsPuffinReader; pub use writer::FsPuffinWriter; use crate::error::Result; +use crate::puffin_manager::cache::PuffinMetadataCacheRef; use crate::puffin_manager::file_accessor::PuffinFileAccessor; use crate::puffin_manager::stager::Stager; use crate::puffin_manager::PuffinManager; @@ -31,16 +32,29 @@ pub struct FsPuffinManager { stager: S, /// The puffin file accessor. puffin_file_accessor: F, + /// The puffin metadata cache. + puffin_metadata_cache: Option, } impl FsPuffinManager { - /// Creates a new `FsPuffinManager` with the specified `stager` and `puffin_file_accessor`. + /// Creates a new `FsPuffinManager` with the specified `stager` and `puffin_file_accessor`, + /// and optionally with a `puffin_metadata_cache`. pub fn new(stager: S, puffin_file_accessor: F) -> Self { Self { stager, puffin_file_accessor, + puffin_metadata_cache: None, } } + + /// Sets the puffin metadata cache. + pub fn with_puffin_metadata_cache( + mut self, + puffin_metadata_cache: Option, + ) -> Self { + self.puffin_metadata_cache = puffin_metadata_cache; + self + } } #[async_trait] @@ -57,6 +71,7 @@ where puffin_file_name.to_string(), self.stager.clone(), self.puffin_file_accessor.clone(), + self.puffin_metadata_cache.clone(), )) } diff --git a/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs b/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs index 3de27fdb77..2e1ae594ad 100644 --- a/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs +++ b/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs @@ -14,6 +14,7 @@ use std::io; use std::ops::Range; +use std::sync::Arc; use async_compression::futures::bufread::ZstdDecoder; use async_trait::async_trait; @@ -23,12 +24,14 @@ use futures::io::BufReader; use futures::{AsyncRead, AsyncWrite}; use snafu::{ensure, OptionExt, ResultExt}; +use super::PuffinMetadataCacheRef; use crate::blob_metadata::{BlobMetadata, CompressionCodec}; use crate::error::{ BlobIndexOutOfBoundSnafu, BlobNotFoundSnafu, DeserializeJsonSnafu, FileKeyNotMatchSnafu, MetadataSnafu, ReadSnafu, Result, UnsupportedDecompressionSnafu, WriteSnafu, }; use crate::file_format::reader::{AsyncReader, PuffinFileReader}; +use crate::file_metadata::FileMetadata; use crate::partial_reader::PartialReader; use crate::puffin_manager::file_accessor::PuffinFileAccessor; use crate::puffin_manager::fs_puffin_manager::dir_meta::DirMetadata; @@ -45,14 +48,23 @@ pub struct FsPuffinReader { /// The puffin file accessor. puffin_file_accessor: F, + + /// The puffin file metadata cache. + puffin_file_metadata_cache: Option, } impl FsPuffinReader { - pub(crate) fn new(puffin_file_name: String, stager: S, puffin_file_accessor: F) -> Self { + pub(crate) fn new( + puffin_file_name: String, + stager: S, + puffin_file_accessor: F, + puffin_file_metadata_cache: Option, + ) -> Self { Self { puffin_file_name, stager, puffin_file_accessor, + puffin_file_metadata_cache, } } } @@ -73,13 +85,13 @@ where .await?; let mut file = PuffinFileReader::new(reader); - // TODO(zhongzc): cache the metadata. - let metadata = file.metadata().await?; + let metadata = self.get_puffin_file_metadata(&mut file).await?; let blob_metadata = metadata .blobs - .into_iter() + .iter() .find(|m| m.blob_type == key) - .context(BlobNotFoundSnafu { blob: key })?; + .context(BlobNotFoundSnafu { blob: key })? + .clone(); let blob = if blob_metadata.compression_codec.is_none() { // If the blob is not compressed, we can directly read it from the puffin file. @@ -133,6 +145,23 @@ where S: Stager, F: PuffinFileAccessor + Clone, { + async fn get_puffin_file_metadata( + &self, + reader: &mut PuffinFileReader, + ) -> Result> { + if let Some(cache) = self.puffin_file_metadata_cache.as_ref() { + if let Some(metadata) = cache.get_metadata(&self.puffin_file_name) { + return Ok(metadata); + } + } + + let metadata = Arc::new(reader.metadata().await?); + if let Some(cache) = self.puffin_file_metadata_cache.as_ref() { + cache.put_metadata(self.puffin_file_name.to_string(), metadata.clone()); + } + Ok(metadata) + } + async fn init_blob_to_stager( reader: PuffinFileReader, blob_metadata: BlobMetadata, From 03ad6e2a8dd8cc5632e433b94bb935fdd286c94c Mon Sep 17 00:00:00 2001 From: Yohan Wal Date: Thu, 12 Dec 2024 12:21:38 +0800 Subject: [PATCH 09/46] feat(fuzz): add alter table options for alter fuzzer (#5074) * feat(fuzz): add set table options to alter fuzzer * chore: clippy is happy, I'm sad * chore: happy ci happy * fix: unit test * feat(fuzz): add unset table options to alter fuzzer * fix: unit test * feat(fuzz): add table option validator * fix: make clippy happy * chore: add comments * chore: apply review comments * fix: unit test * feat(fuzz): add more ttl options * fix: #5108 * chore: add comments * chore: add comments --- Cargo.lock | 1 + src/common/base/src/readable_size.rs | 2 +- src/sql/src/statements/alter.rs | 21 +- tests-fuzz/Cargo.toml | 11 +- tests-fuzz/src/context.rs | 59 ++++- tests-fuzz/src/generator/alter_expr.rs | 143 +++++++++++- tests-fuzz/src/ir.rs | 2 +- tests-fuzz/src/ir/alter_expr.rs | 206 +++++++++++++++++- tests-fuzz/src/test_utils.rs | 1 + tests-fuzz/src/translator.rs | 1 + tests-fuzz/src/translator/common.rs | 67 ++++++ tests-fuzz/src/translator/mysql/alter_expr.rs | 67 +++++- .../src/translator/postgres/alter_expr.rs | 67 +++++- tests-fuzz/src/validator.rs | 1 + tests-fuzz/src/validator/table.rs | 103 +++++++++ .../{ => ddl}/fuzz_alter_logical_table.rs | 0 .../targets/{ => ddl}/fuzz_alter_table.rs | 58 ++++- .../targets/{ => ddl}/fuzz_create_database.rs | 0 .../{ => ddl}/fuzz_create_logical_table.rs | 0 .../targets/{ => ddl}/fuzz_create_table.rs | 0 20 files changed, 742 insertions(+), 68 deletions(-) create mode 100644 tests-fuzz/src/translator/common.rs create mode 100644 tests-fuzz/src/validator/table.rs rename tests-fuzz/targets/{ => ddl}/fuzz_alter_logical_table.rs (100%) rename tests-fuzz/targets/{ => ddl}/fuzz_alter_table.rs (72%) rename tests-fuzz/targets/{ => ddl}/fuzz_create_database.rs (100%) rename tests-fuzz/targets/{ => ddl}/fuzz_create_logical_table.rs (100%) rename tests-fuzz/targets/{ => ddl}/fuzz_create_table.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index e57a6542af..534b8c465a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12197,6 +12197,7 @@ dependencies = [ "arbitrary", "async-trait", "chrono", + "common-base", "common-error", "common-macro", "common-query", diff --git a/src/common/base/src/readable_size.rs b/src/common/base/src/readable_size.rs index 21908526c7..4298989291 100644 --- a/src/common/base/src/readable_size.rs +++ b/src/common/base/src/readable_size.rs @@ -19,7 +19,7 @@ pub const GIB: u64 = MIB * BINARY_DATA_MAGNITUDE; pub const TIB: u64 = GIB * BINARY_DATA_MAGNITUDE; pub const PIB: u64 = TIB * BINARY_DATA_MAGNITUDE; -#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd)] +#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Default)] pub struct ReadableSize(pub u64); impl ReadableSize { diff --git a/src/sql/src/statements/alter.rs b/src/sql/src/statements/alter.rs index 174bdbbdc3..df148ae5b6 100644 --- a/src/sql/src/statements/alter.rs +++ b/src/sql/src/statements/alter.rs @@ -72,29 +72,20 @@ pub enum AlterTableOperation { target_type: DataType, }, /// `SET =
` - SetTableOptions { - options: Vec, - }, - UnsetTableOptions { - keys: Vec, - }, + SetTableOptions { options: Vec }, + /// `UNSET
` + UnsetTableOptions { keys: Vec }, /// `DROP COLUMN ` - DropColumn { - name: Ident, - }, + DropColumn { name: Ident }, /// `RENAME ` - RenameTable { - new_table_name: String, - }, + RenameTable { new_table_name: String }, /// `MODIFY COLUMN SET FULLTEXT [WITH ]` SetColumnFulltext { column_name: Ident, options: FulltextOptions, }, /// `MODIFY COLUMN UNSET FULLTEXT` - UnsetColumnFulltext { - column_name: Ident, - }, + UnsetColumnFulltext { column_name: Ident }, } impl Display for AlterTableOperation { diff --git a/tests-fuzz/Cargo.toml b/tests-fuzz/Cargo.toml index cbac9df713..c408992bd5 100644 --- a/tests-fuzz/Cargo.toml +++ b/tests-fuzz/Cargo.toml @@ -18,6 +18,7 @@ unstable = ["nix"] arbitrary = { version = "1.3.0", features = ["derive"] } async-trait = { workspace = true } chrono = { workspace = true } +common-base = { workspace = true } common-error = { workspace = true } common-macro = { workspace = true } common-query = { workspace = true } @@ -67,14 +68,14 @@ dotenv.workspace = true [[bin]] name = "fuzz_create_table" -path = "targets/fuzz_create_table.rs" +path = "targets/ddl/fuzz_create_table.rs" test = false bench = false doc = false [[bin]] name = "fuzz_create_logical_table" -path = "targets/fuzz_create_logical_table.rs" +path = "targets/ddl/fuzz_create_logical_table.rs" test = false bench = false doc = false @@ -95,21 +96,21 @@ doc = false [[bin]] name = "fuzz_alter_table" -path = "targets/fuzz_alter_table.rs" +path = "targets/ddl/fuzz_alter_table.rs" test = false bench = false doc = false [[bin]] name = "fuzz_alter_logical_table" -path = "targets/fuzz_alter_logical_table.rs" +path = "targets/ddl/fuzz_alter_logical_table.rs" test = false bench = false doc = false [[bin]] name = "fuzz_create_database" -path = "targets/fuzz_create_database.rs" +path = "targets/ddl/fuzz_create_database.rs" test = false bench = false doc = false diff --git a/tests-fuzz/src/context.rs b/tests-fuzz/src/context.rs index 8cfd0ca9fa..d0d5dee72d 100644 --- a/tests-fuzz/src/context.rs +++ b/tests-fuzz/src/context.rs @@ -21,7 +21,7 @@ use snafu::{ensure, OptionExt}; use crate::error::{self, Result}; use crate::generator::Random; -use crate::ir::alter_expr::AlterTableOperation; +use crate::ir::alter_expr::{AlterTableOperation, AlterTableOption}; use crate::ir::{AlterTableExpr, Column, CreateTableExpr, Ident}; pub type TableContextRef = Arc; @@ -35,6 +35,7 @@ pub struct TableContext { // GreptimeDB specific options pub partition: Option, pub primary_keys: Vec, + pub table_options: Vec, } impl From<&CreateTableExpr> for TableContext { @@ -52,6 +53,7 @@ impl From<&CreateTableExpr> for TableContext { columns: columns.clone(), partition: partition.clone(), primary_keys: primary_keys.clone(), + table_options: vec![], } } } @@ -64,7 +66,7 @@ impl TableContext { /// Applies the [AlterTableExpr]. pub fn alter(mut self, expr: AlterTableExpr) -> Result { - match expr.alter_options { + match expr.alter_kinds { AlterTableOperation::AddColumn { column, location } => { ensure!( !self.columns.iter().any(|col| col.name == column.name), @@ -140,6 +142,25 @@ impl TableContext { } Ok(self) } + AlterTableOperation::SetTableOptions { options } => { + for option in options { + if let Some(idx) = self + .table_options + .iter() + .position(|opt| opt.key() == option.key()) + { + self.table_options[idx] = option; + } else { + self.table_options.push(option); + } + } + Ok(self) + } + AlterTableOperation::UnsetTableOptions { keys } => { + self.table_options + .retain(|opt| !keys.contains(&opt.key().to_string())); + Ok(self) + } } } @@ -171,10 +192,11 @@ impl TableContext { #[cfg(test)] mod tests { use common_query::AddColumnLocation; + use common_time::Duration; use datatypes::data_type::ConcreteDataType; use super::TableContext; - use crate::ir::alter_expr::AlterTableOperation; + use crate::ir::alter_expr::{AlterTableOperation, AlterTableOption, Ttl}; use crate::ir::create_expr::ColumnOption; use crate::ir::{AlterTableExpr, Column, Ident}; @@ -185,11 +207,12 @@ mod tests { columns: vec![], partition: None, primary_keys: vec![], + table_options: vec![], }; // Add a column let expr = AlterTableExpr { table_name: "foo".into(), - alter_options: AlterTableOperation::AddColumn { + alter_kinds: AlterTableOperation::AddColumn { column: Column { name: "a".into(), column_type: ConcreteDataType::timestamp_microsecond_datatype(), @@ -205,7 +228,7 @@ mod tests { // Add a column at first let expr = AlterTableExpr { table_name: "foo".into(), - alter_options: AlterTableOperation::AddColumn { + alter_kinds: AlterTableOperation::AddColumn { column: Column { name: "b".into(), column_type: ConcreteDataType::timestamp_microsecond_datatype(), @@ -221,7 +244,7 @@ mod tests { // Add a column after "b" let expr = AlterTableExpr { table_name: "foo".into(), - alter_options: AlterTableOperation::AddColumn { + alter_kinds: AlterTableOperation::AddColumn { column: Column { name: "c".into(), column_type: ConcreteDataType::timestamp_microsecond_datatype(), @@ -239,10 +262,32 @@ mod tests { // Drop the column "b" let expr = AlterTableExpr { table_name: "foo".into(), - alter_options: AlterTableOperation::DropColumn { name: "b".into() }, + alter_kinds: AlterTableOperation::DropColumn { name: "b".into() }, }; let table_ctx = table_ctx.alter(expr).unwrap(); assert_eq!(table_ctx.columns[1].name, Ident::new("a")); assert_eq!(table_ctx.primary_keys, vec![0, 1]); + + // Set table options + let ttl_option = AlterTableOption::Ttl(Ttl::Duration(Duration::new_second(60))); + let expr = AlterTableExpr { + table_name: "foo".into(), + alter_kinds: AlterTableOperation::SetTableOptions { + options: vec![ttl_option.clone()], + }, + }; + let table_ctx = table_ctx.alter(expr).unwrap(); + assert_eq!(table_ctx.table_options.len(), 1); + assert_eq!(table_ctx.table_options[0], ttl_option); + + // Unset table options + let expr = AlterTableExpr { + table_name: "foo".into(), + alter_kinds: AlterTableOperation::UnsetTableOptions { + keys: vec![ttl_option.key().to_string()], + }, + }; + let table_ctx = table_ctx.alter(expr).unwrap(); + assert_eq!(table_ctx.table_options.len(), 0); } } diff --git a/tests-fuzz/src/generator/alter_expr.rs b/tests-fuzz/src/generator/alter_expr.rs index 03aed702fb..0c5a628999 100644 --- a/tests-fuzz/src/generator/alter_expr.rs +++ b/tests-fuzz/src/generator/alter_expr.rs @@ -14,17 +14,19 @@ use std::marker::PhantomData; +use common_base::readable_size::ReadableSize; use common_query::AddColumnLocation; use datatypes::data_type::ConcreteDataType; use derive_builder::Builder; use rand::Rng; use snafu::ensure; +use strum::IntoEnumIterator; use crate::context::TableContextRef; use crate::error::{self, Error, Result}; use crate::fake::WordGenerator; use crate::generator::{ColumnOptionGenerator, ConcreteDataTypeGenerator, Generator, Random}; -use crate::ir::alter_expr::{AlterTableExpr, AlterTableOperation}; +use crate::ir::alter_expr::{AlterTableExpr, AlterTableOperation, AlterTableOption, Ttl}; use crate::ir::create_expr::ColumnOption; use crate::ir::{ droppable_columns, generate_columns, generate_random_value, modifiable_columns, Column, @@ -107,7 +109,7 @@ impl Generator for AlterExprAddColumnGenera .remove(0); Ok(AlterTableExpr { table_name: self.table_ctx.name.clone(), - alter_options: AlterTableOperation::AddColumn { column, location }, + alter_kinds: AlterTableOperation::AddColumn { column, location }, }) } } @@ -130,7 +132,7 @@ impl Generator for AlterExprDropColumnGenerator { let name = droppable[rng.gen_range(0..droppable.len())].name.clone(); Ok(AlterTableExpr { table_name: self.table_ctx.name.clone(), - alter_options: AlterTableOperation::DropColumn { name }, + alter_kinds: AlterTableOperation::DropColumn { name }, }) } } @@ -153,7 +155,7 @@ impl Generator for AlterExprRenameGenerator { .generate_unique_table_name(rng, self.name_generator.as_ref()); Ok(AlterTableExpr { table_name: self.table_ctx.name.clone(), - alter_options: AlterTableOperation::RenameTable { new_table_name }, + alter_kinds: AlterTableOperation::RenameTable { new_table_name }, }) } } @@ -180,7 +182,7 @@ impl Generator for AlterExprModifyDataTypeGenerator Generator for AlterExprModifyDataTypeGenerator { + table_ctx: TableContextRef, + #[builder(default)] + _phantom: PhantomData, +} + +impl Generator for AlterExprSetTableOptionsGenerator { + type Error = Error; + + fn generate(&self, rng: &mut R) -> Result { + let all_options = AlterTableOption::iter().collect::>(); + // Generate random distinct options + let mut option_templates_idx = vec![]; + for _ in 1..rng.gen_range(2..=all_options.len()) { + let option = rng.gen_range(0..all_options.len()); + if !option_templates_idx.contains(&option) { + option_templates_idx.push(option); + } + } + let options = option_templates_idx + .iter() + .map(|idx| match all_options[*idx] { + AlterTableOption::Ttl(_) => { + let ttl_type = rng.gen_range(0..3); + match ttl_type { + 0 => { + let duration: u32 = rng.gen(); + AlterTableOption::Ttl(Ttl::Duration((duration as i64).into())) + } + 1 => AlterTableOption::Ttl(Ttl::Instant), + 2 => AlterTableOption::Ttl(Ttl::Forever), + _ => unreachable!(), + } + } + AlterTableOption::TwcsTimeWindow(_) => { + let time_window: u32 = rng.gen(); + AlterTableOption::TwcsTimeWindow((time_window as i64).into()) + } + AlterTableOption::TwcsMaxOutputFileSize(_) => { + let max_output_file_size: u64 = rng.gen(); + AlterTableOption::TwcsMaxOutputFileSize(ReadableSize(max_output_file_size)) + } + AlterTableOption::TwcsMaxInactiveWindowRuns(_) => { + let max_inactive_window_runs: u64 = rng.gen(); + AlterTableOption::TwcsMaxInactiveWindowRuns(max_inactive_window_runs) + } + AlterTableOption::TwcsMaxActiveWindowFiles(_) => { + let max_active_window_files: u64 = rng.gen(); + AlterTableOption::TwcsMaxActiveWindowFiles(max_active_window_files) + } + AlterTableOption::TwcsMaxActiveWindowRuns(_) => { + let max_active_window_runs: u64 = rng.gen(); + AlterTableOption::TwcsMaxActiveWindowRuns(max_active_window_runs) + } + AlterTableOption::TwcsMaxInactiveWindowFiles(_) => { + let max_inactive_window_files: u64 = rng.gen(); + AlterTableOption::TwcsMaxInactiveWindowFiles(max_inactive_window_files) + } + }) + .collect(); + Ok(AlterTableExpr { + table_name: self.table_ctx.name.clone(), + alter_kinds: AlterTableOperation::SetTableOptions { options }, + }) + } +} + +/// Generates the [AlterTableOperation::UnsetTableOptions] of [AlterTableExpr]. +#[derive(Builder)] +#[builder(pattern = "owned")] +pub struct AlterExprUnsetTableOptionsGenerator { + table_ctx: TableContextRef, + #[builder(default)] + _phantom: PhantomData, +} + +impl Generator for AlterExprUnsetTableOptionsGenerator { + type Error = Error; + + fn generate(&self, rng: &mut R) -> Result { + let all_options = AlterTableOption::iter().collect::>(); + // Generate random distinct options + let mut option_templates_idx = vec![]; + for _ in 1..rng.gen_range(2..=all_options.len()) { + let option = rng.gen_range(0..all_options.len()); + if !option_templates_idx.contains(&option) { + option_templates_idx.push(option); + } + } + let options = option_templates_idx + .iter() + .map(|idx| all_options[*idx].key().to_string()) + .collect(); + Ok(AlterTableExpr { + table_name: self.table_ctx.name.clone(), + alter_kinds: AlterTableOperation::UnsetTableOptions { keys: options }, + }) + } +} + #[cfg(test)] mod tests { use std::sync::Arc; @@ -220,7 +325,7 @@ mod tests { .generate(&mut rng) .unwrap(); let serialized = serde_json::to_string(&expr).unwrap(); - let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_options":{"AddColumn":{"column":{"name":{"value":"velit","quote_style":null},"column_type":{"Int32":{}},"options":[{"DefaultValue":{"Int32":1606462472}}]},"location":null}}}"#; + let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_kinds":{"AddColumn":{"column":{"name":{"value":"velit","quote_style":null},"column_type":{"Int32":{}},"options":[{"DefaultValue":{"Int32":1606462472}}]},"location":null}}}"#; assert_eq!(expected, serialized); let expr = AlterExprRenameGeneratorBuilder::default() @@ -230,7 +335,7 @@ mod tests { .generate(&mut rng) .unwrap(); let serialized = serde_json::to_string(&expr).unwrap(); - let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_options":{"RenameTable":{"new_table_name":{"value":"nihil","quote_style":null}}}}"#; + let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_kinds":{"RenameTable":{"new_table_name":{"value":"nihil","quote_style":null}}}}"#; assert_eq!(expected, serialized); let expr = AlterExprDropColumnGeneratorBuilder::default() @@ -240,17 +345,37 @@ mod tests { .generate(&mut rng) .unwrap(); let serialized = serde_json::to_string(&expr).unwrap(); - let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_options":{"DropColumn":{"name":{"value":"cUmquE","quote_style":null}}}}"#; + let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_kinds":{"DropColumn":{"name":{"value":"cUmquE","quote_style":null}}}}"#; assert_eq!(expected, serialized); let expr = AlterExprModifyDataTypeGeneratorBuilder::default() + .table_ctx(table_ctx.clone()) + .build() + .unwrap() + .generate(&mut rng) + .unwrap(); + let serialized = serde_json::to_string(&expr).unwrap(); + let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_kinds":{"ModifyDataType":{"column":{"name":{"value":"toTAm","quote_style":null},"column_type":{"Int64":{}},"options":[]}}}}"#; + assert_eq!(expected, serialized); + + let expr = AlterExprSetTableOptionsGeneratorBuilder::default() + .table_ctx(table_ctx.clone()) + .build() + .unwrap() + .generate(&mut rng) + .unwrap(); + let serialized = serde_json::to_string(&expr).unwrap(); + let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_kinds":{"SetTableOptions":{"options":[{"TwcsMaxActiveWindowRuns":14908016120444947142},{"TwcsMaxActiveWindowFiles":5840340123887173415},{"TwcsMaxOutputFileSize":17740311466571102265}]}}}"#; + assert_eq!(expected, serialized); + + let expr = AlterExprUnsetTableOptionsGeneratorBuilder::default() .table_ctx(table_ctx) .build() .unwrap() .generate(&mut rng) .unwrap(); let serialized = serde_json::to_string(&expr).unwrap(); - let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_options":{"ModifyDataType":{"column":{"name":{"value":"toTAm","quote_style":null},"column_type":{"Int64":{}},"options":[]}}}}"#; + let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_kinds":{"UnsetTableOptions":{"keys":["compaction.twcs.max_active_window_runs"]}}}"#; assert_eq!(expected, serialized); } } diff --git a/tests-fuzz/src/ir.rs b/tests-fuzz/src/ir.rs index b9d13ca9fb..ae6edd595c 100644 --- a/tests-fuzz/src/ir.rs +++ b/tests-fuzz/src/ir.rs @@ -24,7 +24,7 @@ use std::collections::HashMap; use std::sync::{Arc, Mutex}; use std::time::Duration; -pub use alter_expr::AlterTableExpr; +pub use alter_expr::{AlterTableExpr, AlterTableOption}; use common_time::timestamp::TimeUnit; use common_time::{Date, DateTime, Timestamp}; pub use create_expr::{CreateDatabaseExpr, CreateTableExpr}; diff --git a/tests-fuzz/src/ir/alter_expr.rs b/tests-fuzz/src/ir/alter_expr.rs index a9fdc18c22..1d637ff660 100644 --- a/tests-fuzz/src/ir/alter_expr.rs +++ b/tests-fuzz/src/ir/alter_expr.rs @@ -12,16 +12,28 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::fmt::Display; +use std::str::FromStr; + +use common_base::readable_size::ReadableSize; use common_query::AddColumnLocation; +use common_time::{Duration, FOREVER, INSTANT}; use derive_builder::Builder; use serde::{Deserialize, Serialize}; +use store_api::mito_engine_options::{ + APPEND_MODE_KEY, COMPACTION_TYPE, TTL_KEY, TWCS_MAX_ACTIVE_WINDOW_FILES, + TWCS_MAX_ACTIVE_WINDOW_RUNS, TWCS_MAX_INACTIVE_WINDOW_FILES, TWCS_MAX_INACTIVE_WINDOW_RUNS, + TWCS_MAX_OUTPUT_FILE_SIZE, TWCS_TIME_WINDOW, +}; +use strum::EnumIter; +use crate::error::{self, Result}; use crate::ir::{Column, Ident}; #[derive(Debug, Builder, Clone, Serialize, Deserialize)] pub struct AlterTableExpr { pub table_name: Ident, - pub alter_options: AlterTableOperation, + pub alter_kinds: AlterTableOperation, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -37,4 +49,196 @@ pub enum AlterTableOperation { RenameTable { new_table_name: Ident }, /// `MODIFY COLUMN ` ModifyDataType { column: Column }, + /// `SET
=
` + SetTableOptions { options: Vec }, + /// `UNSET
` + UnsetTableOptions { keys: Vec }, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)] +pub enum Ttl { + Duration(Duration), + Instant, + #[default] + Forever, +} + +impl Display for Ttl { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Ttl::Duration(d) => write!(f, "{}", d), + Ttl::Instant => write!(f, "{}", INSTANT), + Ttl::Forever => write!(f, "{}", FOREVER), + } + } +} + +#[derive(Debug, EnumIter, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub enum AlterTableOption { + Ttl(Ttl), + TwcsTimeWindow(Duration), + TwcsMaxOutputFileSize(ReadableSize), + TwcsMaxInactiveWindowFiles(u64), + TwcsMaxActiveWindowFiles(u64), + TwcsMaxInactiveWindowRuns(u64), + TwcsMaxActiveWindowRuns(u64), +} + +impl AlterTableOption { + pub fn key(&self) -> &str { + match self { + AlterTableOption::Ttl(_) => TTL_KEY, + AlterTableOption::TwcsTimeWindow(_) => TWCS_TIME_WINDOW, + AlterTableOption::TwcsMaxOutputFileSize(_) => TWCS_MAX_OUTPUT_FILE_SIZE, + AlterTableOption::TwcsMaxInactiveWindowFiles(_) => TWCS_MAX_INACTIVE_WINDOW_FILES, + AlterTableOption::TwcsMaxActiveWindowFiles(_) => TWCS_MAX_ACTIVE_WINDOW_FILES, + AlterTableOption::TwcsMaxInactiveWindowRuns(_) => TWCS_MAX_INACTIVE_WINDOW_RUNS, + AlterTableOption::TwcsMaxActiveWindowRuns(_) => TWCS_MAX_ACTIVE_WINDOW_RUNS, + } + } + + /// Parses the AlterTableOption from a key-value pair + fn parse_kv(key: &str, value: &str) -> Result { + match key { + TTL_KEY => { + let ttl = if value.to_lowercase() == INSTANT { + Ttl::Instant + } else if value.to_lowercase() == FOREVER { + Ttl::Forever + } else { + let duration = humantime::parse_duration(value).unwrap(); + Ttl::Duration(duration.into()) + }; + Ok(AlterTableOption::Ttl(ttl)) + } + TWCS_MAX_ACTIVE_WINDOW_RUNS => { + let runs = value.parse().unwrap(); + Ok(AlterTableOption::TwcsMaxActiveWindowRuns(runs)) + } + TWCS_MAX_ACTIVE_WINDOW_FILES => { + let files = value.parse().unwrap(); + Ok(AlterTableOption::TwcsMaxActiveWindowFiles(files)) + } + TWCS_MAX_INACTIVE_WINDOW_RUNS => { + let runs = value.parse().unwrap(); + Ok(AlterTableOption::TwcsMaxInactiveWindowRuns(runs)) + } + TWCS_MAX_INACTIVE_WINDOW_FILES => { + let files = value.parse().unwrap(); + Ok(AlterTableOption::TwcsMaxInactiveWindowFiles(files)) + } + TWCS_MAX_OUTPUT_FILE_SIZE => { + // may be "1M" instead of "1 MiB" + let value = if value.ends_with("B") { + value.to_string() + } else { + format!("{}B", value) + }; + let size = ReadableSize::from_str(&value).unwrap(); + Ok(AlterTableOption::TwcsMaxOutputFileSize(size)) + } + TWCS_TIME_WINDOW => { + let time = humantime::parse_duration(value).unwrap(); + Ok(AlterTableOption::TwcsTimeWindow(time.into())) + } + _ => error::UnexpectedSnafu { + violated: format!("Unknown table option key: {}", key), + } + .fail(), + } + } + + /// Parses the AlterTableOption from comma-separated string + pub fn parse_kv_pairs(option_string: &str) -> Result> { + let mut options = vec![]; + for pair in option_string.split(',') { + let pair = pair.trim(); + let (key, value) = pair.split_once('=').unwrap(); + let key = key.trim().replace("\'", ""); + let value = value.trim().replace('\'', ""); + // Currently we have only one compaction type, so we ignore it + // Cautious: COMPACTION_TYPE may be kept even if there are no compaction options enabled + if key == COMPACTION_TYPE || key == APPEND_MODE_KEY { + continue; + } else { + let option = AlterTableOption::parse_kv(&key, &value)?; + options.push(option); + } + } + Ok(options) + } +} + +impl Display for AlterTableOption { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + AlterTableOption::Ttl(d) => write!(f, "'{}' = '{}'", TTL_KEY, d), + AlterTableOption::TwcsTimeWindow(d) => write!(f, "'{}' = '{}'", TWCS_TIME_WINDOW, d), + AlterTableOption::TwcsMaxOutputFileSize(s) => { + // Caution: to_string loses precision for ReadableSize + write!(f, "'{}' = '{}'", TWCS_MAX_OUTPUT_FILE_SIZE, s) + } + AlterTableOption::TwcsMaxInactiveWindowFiles(u) => { + write!(f, "'{}' = '{}'", TWCS_MAX_INACTIVE_WINDOW_FILES, u) + } + AlterTableOption::TwcsMaxActiveWindowFiles(u) => { + write!(f, "'{}' = '{}'", TWCS_MAX_ACTIVE_WINDOW_FILES, u) + } + AlterTableOption::TwcsMaxInactiveWindowRuns(u) => { + write!(f, "'{}' = '{}'", TWCS_MAX_INACTIVE_WINDOW_RUNS, u) + } + AlterTableOption::TwcsMaxActiveWindowRuns(u) => { + write!(f, "'{}' = '{}'", TWCS_MAX_ACTIVE_WINDOW_RUNS, u) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_kv_pairs() { + let option_string = + "compaction.twcs.max_output_file_size = '1M', compaction.type = 'twcs', ttl = 'forever'"; + let options = AlterTableOption::parse_kv_pairs(option_string).unwrap(); + assert_eq!(options.len(), 2); + assert_eq!( + options, + vec![ + AlterTableOption::TwcsMaxOutputFileSize(ReadableSize::from_str("1MB").unwrap()), + AlterTableOption::Ttl(Ttl::Forever), + ] + ); + + let option_string = "compaction.twcs.max_active_window_files = '5030469694939972912', + compaction.twcs.max_active_window_runs = '8361168990283879099', + compaction.twcs.max_inactive_window_files = '6028716566907830876', + compaction.twcs.max_inactive_window_runs = '10622283085591494074', + compaction.twcs.max_output_file_size = '15686.4PiB', + compaction.twcs.time_window = '2061999256ms', + compaction.type = 'twcs', + ttl = '1month 3days 15h 49m 8s 279ms'"; + let options = AlterTableOption::parse_kv_pairs(option_string).unwrap(); + assert_eq!(options.len(), 7); + let expected = vec![ + AlterTableOption::TwcsMaxActiveWindowFiles(5030469694939972912), + AlterTableOption::TwcsMaxActiveWindowRuns(8361168990283879099), + AlterTableOption::TwcsMaxInactiveWindowFiles(6028716566907830876), + AlterTableOption::TwcsMaxInactiveWindowRuns(10622283085591494074), + AlterTableOption::TwcsMaxOutputFileSize(ReadableSize::from_str("15686.4PiB").unwrap()), + AlterTableOption::TwcsTimeWindow(Duration::new_nanosecond(2_061_999_256_000_000)), + AlterTableOption::Ttl(Ttl::Duration(Duration::new_millisecond( + // A month is 2_630_016 seconds + 2_630_016 * 1000 + + 3 * 24 * 60 * 60 * 1000 + + 15 * 60 * 60 * 1000 + + 49 * 60 * 1000 + + 8 * 1000 + + 279, + ))), + ]; + assert_eq!(options, expected); + } } diff --git a/tests-fuzz/src/test_utils.rs b/tests-fuzz/src/test_utils.rs index e65548969a..bef96a1fd7 100644 --- a/tests-fuzz/src/test_utils.rs +++ b/tests-fuzz/src/test_utils.rs @@ -55,5 +55,6 @@ pub fn new_test_ctx() -> TableContext { ], partition: None, primary_keys: vec![], + table_options: vec![], } } diff --git a/tests-fuzz/src/translator.rs b/tests-fuzz/src/translator.rs index 1745aa9336..673b543f2c 100644 --- a/tests-fuzz/src/translator.rs +++ b/tests-fuzz/src/translator.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod common; pub mod mysql; pub mod postgres; diff --git a/tests-fuzz/src/translator/common.rs b/tests-fuzz/src/translator/common.rs new file mode 100644 index 0000000000..2b968ed439 --- /dev/null +++ b/tests-fuzz/src/translator/common.rs @@ -0,0 +1,67 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Display; + +use super::DslTranslator; +use crate::error::{Error, Result}; +use crate::ir::alter_expr::AlterTableOperation; +use crate::ir::{AlterTableExpr, AlterTableOption}; + +/// Shared translator for `ALTER TABLE` operations. +pub(crate) struct CommonAlterTableTranslator; + +impl DslTranslator for CommonAlterTableTranslator { + type Error = Error; + + fn translate(&self, input: &AlterTableExpr) -> Result { + Ok(match &input.alter_kinds { + AlterTableOperation::DropColumn { name } => Self::format_drop(&input.table_name, name), + AlterTableOperation::SetTableOptions { options } => { + Self::format_set_table_options(&input.table_name, options) + } + AlterTableOperation::UnsetTableOptions { keys } => { + Self::format_unset_table_options(&input.table_name, keys) + } + _ => unimplemented!(), + }) + } +} + +impl CommonAlterTableTranslator { + fn format_drop(name: impl Display, column: impl Display) -> String { + format!("ALTER TABLE {name} DROP COLUMN {column};") + } + + fn format_set_table_options(name: impl Display, options: &[AlterTableOption]) -> String { + format!( + "ALTER TABLE {name} SET {};", + options + .iter() + .map(|option| option.to_string()) + .collect::>() + .join(", ") + ) + } + + fn format_unset_table_options(name: impl Display, keys: &[String]) -> String { + format!( + "ALTER TABLE {name} UNSET {};", + keys.iter() + .map(|key| format!("'{}'", key)) + .collect::>() + .join(", ") + ) + } +} diff --git a/tests-fuzz/src/translator/mysql/alter_expr.rs b/tests-fuzz/src/translator/mysql/alter_expr.rs index c973d7cb4b..3bf30b09a3 100644 --- a/tests-fuzz/src/translator/mysql/alter_expr.rs +++ b/tests-fuzz/src/translator/mysql/alter_expr.rs @@ -22,6 +22,7 @@ use crate::error::{Error, Result}; use crate::ir::alter_expr::AlterTableOperation; use crate::ir::create_expr::ColumnOption; use crate::ir::{AlterTableExpr, Column}; +use crate::translator::common::CommonAlterTableTranslator; use crate::translator::DslTranslator; pub struct AlterTableExprTranslator; @@ -30,26 +31,22 @@ impl DslTranslator for AlterTableExprTranslator { type Error = Error; fn translate(&self, input: &AlterTableExpr) -> Result { - Ok(match &input.alter_options { + Ok(match &input.alter_kinds { AlterTableOperation::AddColumn { column, location } => { Self::format_add_column(&input.table_name, column, location) } - AlterTableOperation::DropColumn { name } => Self::format_drop(&input.table_name, name), AlterTableOperation::RenameTable { new_table_name } => { Self::format_rename(&input.table_name, new_table_name) } AlterTableOperation::ModifyDataType { column } => { Self::format_modify_data_type(&input.table_name, column) } + _ => CommonAlterTableTranslator.translate(input)?, }) } } impl AlterTableExprTranslator { - fn format_drop(name: impl Display, column: impl Display) -> String { - format!("ALTER TABLE {name} DROP COLUMN {column};") - } - fn format_rename(name: impl Display, new_name: impl Display) -> String { format!("ALTER TABLE {name} RENAME {new_name};") } @@ -119,11 +116,15 @@ impl AlterTableExprTranslator { #[cfg(test)] mod tests { + use std::str::FromStr; + + use common_base::readable_size::ReadableSize; use common_query::AddColumnLocation; + use common_time::Duration; use datatypes::data_type::ConcreteDataType; use super::AlterTableExprTranslator; - use crate::ir::alter_expr::AlterTableOperation; + use crate::ir::alter_expr::{AlterTableOperation, AlterTableOption, Ttl}; use crate::ir::create_expr::ColumnOption; use crate::ir::{AlterTableExpr, Column}; use crate::translator::DslTranslator; @@ -132,7 +133,7 @@ mod tests { fn test_alter_table_expr() { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::AddColumn { + alter_kinds: AlterTableOperation::AddColumn { column: Column { name: "host".into(), column_type: ConcreteDataType::string_datatype(), @@ -150,7 +151,7 @@ mod tests { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::RenameTable { + alter_kinds: AlterTableOperation::RenameTable { new_table_name: "foo".into(), }, }; @@ -160,7 +161,7 @@ mod tests { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::DropColumn { name: "foo".into() }, + alter_kinds: AlterTableOperation::DropColumn { name: "foo".into() }, }; let output = AlterTableExprTranslator.translate(&alter_expr).unwrap(); @@ -168,7 +169,7 @@ mod tests { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::ModifyDataType { + alter_kinds: AlterTableOperation::ModifyDataType { column: Column { name: "host".into(), column_type: ConcreteDataType::string_datatype(), @@ -180,4 +181,48 @@ mod tests { let output = AlterTableExprTranslator.translate(&alter_expr).unwrap(); assert_eq!("ALTER TABLE test MODIFY COLUMN host STRING;", output); } + + #[test] + fn test_alter_table_expr_set_table_options() { + let alter_expr = AlterTableExpr { + table_name: "test".into(), + alter_kinds: AlterTableOperation::SetTableOptions { + options: vec![ + AlterTableOption::Ttl(Ttl::Duration(Duration::new_second(60))), + AlterTableOption::TwcsTimeWindow(Duration::new_second(60)), + AlterTableOption::TwcsMaxOutputFileSize(ReadableSize::from_str("1GB").unwrap()), + AlterTableOption::TwcsMaxActiveWindowFiles(10), + AlterTableOption::TwcsMaxActiveWindowRuns(10), + AlterTableOption::TwcsMaxInactiveWindowFiles(5), + AlterTableOption::TwcsMaxInactiveWindowRuns(5), + ], + }, + }; + + let output = AlterTableExprTranslator.translate(&alter_expr).unwrap(); + let expected = concat!( + "ALTER TABLE test SET 'ttl' = '60s', ", + "'compaction.twcs.time_window' = '60s', ", + "'compaction.twcs.max_output_file_size' = '1.0GiB', ", + "'compaction.twcs.max_active_window_files' = '10', ", + "'compaction.twcs.max_active_window_runs' = '10', ", + "'compaction.twcs.max_inactive_window_files' = '5', ", + "'compaction.twcs.max_inactive_window_runs' = '5';" + ); + assert_eq!(expected, output); + } + + #[test] + fn test_alter_table_expr_unset_table_options() { + let alter_expr = AlterTableExpr { + table_name: "test".into(), + alter_kinds: AlterTableOperation::UnsetTableOptions { + keys: vec!["ttl".into(), "compaction.twcs.time_window".into()], + }, + }; + + let output = AlterTableExprTranslator.translate(&alter_expr).unwrap(); + let expected = "ALTER TABLE test UNSET 'ttl', 'compaction.twcs.time_window';"; + assert_eq!(expected, output); + } } diff --git a/tests-fuzz/src/translator/postgres/alter_expr.rs b/tests-fuzz/src/translator/postgres/alter_expr.rs index 42db202efe..f66ce0db92 100644 --- a/tests-fuzz/src/translator/postgres/alter_expr.rs +++ b/tests-fuzz/src/translator/postgres/alter_expr.rs @@ -21,6 +21,7 @@ use crate::error::{Error, Result}; use crate::ir::alter_expr::AlterTableOperation; use crate::ir::create_expr::ColumnOption; use crate::ir::{AlterTableExpr, Column}; +use crate::translator::common::CommonAlterTableTranslator; use crate::translator::postgres::sql_data_type_to_postgres_data_type; use crate::translator::DslTranslator; @@ -30,26 +31,22 @@ impl DslTranslator for AlterTableExprTranslator { type Error = Error; fn translate(&self, input: &AlterTableExpr) -> Result { - Ok(match &input.alter_options { + Ok(match &input.alter_kinds { AlterTableOperation::AddColumn { column, .. } => { Self::format_add_column(&input.table_name, column) } - AlterTableOperation::DropColumn { name } => Self::format_drop(&input.table_name, name), AlterTableOperation::RenameTable { new_table_name } => { Self::format_rename(&input.table_name, new_table_name) } AlterTableOperation::ModifyDataType { column } => { Self::format_modify_data_type(&input.table_name, column) } + _ => CommonAlterTableTranslator.translate(input)?, }) } } impl AlterTableExprTranslator { - fn format_drop(name: impl Display, column: impl Display) -> String { - format!("ALTER TABLE {name} DROP COLUMN {column};") - } - fn format_rename(name: impl Display, new_name: impl Display) -> String { format!("ALTER TABLE {name} RENAME TO {new_name};") } @@ -116,11 +113,15 @@ impl AlterTableExprTranslator { #[cfg(test)] mod tests { + use std::str::FromStr; + + use common_base::readable_size::ReadableSize; use common_query::AddColumnLocation; + use common_time::Duration; use datatypes::data_type::ConcreteDataType; use super::AlterTableExprTranslator; - use crate::ir::alter_expr::AlterTableOperation; + use crate::ir::alter_expr::{AlterTableOperation, AlterTableOption, Ttl}; use crate::ir::create_expr::ColumnOption; use crate::ir::{AlterTableExpr, Column}; use crate::translator::DslTranslator; @@ -129,7 +130,7 @@ mod tests { fn test_alter_table_expr() { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::AddColumn { + alter_kinds: AlterTableOperation::AddColumn { column: Column { name: "host".into(), column_type: ConcreteDataType::string_datatype(), @@ -145,7 +146,7 @@ mod tests { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::RenameTable { + alter_kinds: AlterTableOperation::RenameTable { new_table_name: "foo".into(), }, }; @@ -155,7 +156,7 @@ mod tests { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::DropColumn { name: "foo".into() }, + alter_kinds: AlterTableOperation::DropColumn { name: "foo".into() }, }; let output = AlterTableExprTranslator.translate(&alter_expr).unwrap(); @@ -163,7 +164,7 @@ mod tests { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::ModifyDataType { + alter_kinds: AlterTableOperation::ModifyDataType { column: Column { name: "host".into(), column_type: ConcreteDataType::string_datatype(), @@ -176,4 +177,48 @@ mod tests { // Ignores the location and primary key option. assert_eq!("ALTER TABLE test MODIFY COLUMN host STRING;", output); } + + #[test] + fn test_alter_table_expr_set_table_options() { + let alter_expr = AlterTableExpr { + table_name: "test".into(), + alter_kinds: AlterTableOperation::SetTableOptions { + options: vec![ + AlterTableOption::Ttl(Ttl::Duration(Duration::new_second(60))), + AlterTableOption::TwcsTimeWindow(Duration::new_second(60)), + AlterTableOption::TwcsMaxOutputFileSize(ReadableSize::from_str("1GB").unwrap()), + AlterTableOption::TwcsMaxActiveWindowFiles(10), + AlterTableOption::TwcsMaxActiveWindowRuns(10), + AlterTableOption::TwcsMaxInactiveWindowFiles(5), + AlterTableOption::TwcsMaxInactiveWindowRuns(5), + ], + }, + }; + + let output = AlterTableExprTranslator.translate(&alter_expr).unwrap(); + let expected = concat!( + "ALTER TABLE test SET 'ttl' = '60s', ", + "'compaction.twcs.time_window' = '60s', ", + "'compaction.twcs.max_output_file_size' = '1.0GiB', ", + "'compaction.twcs.max_active_window_files' = '10', ", + "'compaction.twcs.max_active_window_runs' = '10', ", + "'compaction.twcs.max_inactive_window_files' = '5', ", + "'compaction.twcs.max_inactive_window_runs' = '5';" + ); + assert_eq!(expected, output); + } + + #[test] + fn test_alter_table_expr_unset_table_options() { + let alter_expr = AlterTableExpr { + table_name: "test".into(), + alter_kinds: AlterTableOperation::UnsetTableOptions { + keys: vec!["ttl".into(), "compaction.twcs.time_window".into()], + }, + }; + + let output = AlterTableExprTranslator.translate(&alter_expr).unwrap(); + let expected = "ALTER TABLE test UNSET 'ttl', 'compaction.twcs.time_window';"; + assert_eq!(expected, output); + } } diff --git a/tests-fuzz/src/validator.rs b/tests-fuzz/src/validator.rs index cf2df9af22..406dd66041 100644 --- a/tests-fuzz/src/validator.rs +++ b/tests-fuzz/src/validator.rs @@ -14,3 +14,4 @@ pub mod column; pub mod row; +pub mod table; diff --git a/tests-fuzz/src/validator/table.rs b/tests-fuzz/src/validator/table.rs new file mode 100644 index 0000000000..406719b2d6 --- /dev/null +++ b/tests-fuzz/src/validator/table.rs @@ -0,0 +1,103 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use snafu::{ensure, ResultExt}; +use sqlx::database::HasArguments; +use sqlx::{ColumnIndex, Database, Decode, Encode, Executor, IntoArguments, Row, Type}; + +use crate::error::{self, Result, UnexpectedSnafu}; +use crate::ir::alter_expr::AlterTableOption; + +/// Parses table options from the result of `SHOW CREATE TABLE` +/// An example of the result of `SHOW CREATE TABLE`: +/// +-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +/// | Table | Create Table | +/// +-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +/// | json | CREATE TABLE IF NOT EXISTS `json` (`ts` TIMESTAMP(3) NOT NULL, `j` JSON NULL, TIME INDEX (`ts`)) ENGINE=mito WITH(compaction.twcs.max_output_file_size = '1M', compaction.type = 'twcs', ttl = '1day') | +/// +-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +fn parse_show_create(show_create: &str) -> Result> { + if let Some(option_start) = show_create.find("WITH(") { + let option_end = { + let remain_str = &show_create[option_start..]; + if let Some(end) = remain_str.find(')') { + end + option_start + } else { + return UnexpectedSnafu { + violated: format!("Cannot find the end of the options in: {}", show_create), + } + .fail(); + } + }; + let options = &show_create[option_start + 5..option_end]; + Ok(AlterTableOption::parse_kv_pairs(options)?) + } else { + Ok(vec![]) + } +} + +/// Fetches table options from the context +pub async fn fetch_table_options<'a, DB, E>(e: E, sql: &'a str) -> Result> +where + DB: Database, + >::Arguments: IntoArguments<'a, DB>, + for<'c> E: 'a + Executor<'c, Database = DB>, + for<'c> String: Decode<'c, DB> + Type, + for<'c> String: Encode<'c, DB> + Type, + usize: ColumnIndex<::Row>, +{ + let fetched_rows = sqlx::query(sql) + .fetch_all(e) + .await + .context(error::ExecuteQuerySnafu { sql })?; + ensure!( + fetched_rows.len() == 1, + error::AssertSnafu { + reason: format!( + "Expected fetched row length: 1, got: {}", + fetched_rows.len(), + ) + } + ); + + let row = fetched_rows.first().unwrap(); + let show_create = row.try_get::(1).unwrap(); + parse_show_create(&show_create) +} + +#[cfg(test)] +mod tests { + use std::str::FromStr; + + use common_base::readable_size::ReadableSize; + use common_time::Duration; + + use super::*; + use crate::ir::alter_expr::Ttl; + use crate::ir::AlterTableOption; + + #[test] + fn test_parse_show_create() { + let show_create = "CREATE TABLE IF NOT EXISTS `json` (`ts` TIMESTAMP(3) NOT NULL, `j` JSON NULL, TIME INDEX (`ts`)) ENGINE=mito WITH(compaction.twcs.max_output_file_size = '1M', compaction.type = 'twcs', ttl = '1day')"; + let options = parse_show_create(show_create).unwrap(); + assert_eq!(options.len(), 2); + assert_eq!( + options[0], + AlterTableOption::TwcsMaxOutputFileSize(ReadableSize::from_str("1MB").unwrap()) + ); + assert_eq!( + options[1], + AlterTableOption::Ttl(Ttl::Duration(Duration::new_second(24 * 60 * 60))) + ); + } +} diff --git a/tests-fuzz/targets/fuzz_alter_logical_table.rs b/tests-fuzz/targets/ddl/fuzz_alter_logical_table.rs similarity index 100% rename from tests-fuzz/targets/fuzz_alter_logical_table.rs rename to tests-fuzz/targets/ddl/fuzz_alter_logical_table.rs diff --git a/tests-fuzz/targets/fuzz_alter_table.rs b/tests-fuzz/targets/ddl/fuzz_alter_table.rs similarity index 72% rename from tests-fuzz/targets/fuzz_alter_table.rs rename to tests-fuzz/targets/ddl/fuzz_alter_table.rs index 7f2a809c9e..247d7632ee 100644 --- a/tests-fuzz/targets/fuzz_alter_table.rs +++ b/tests-fuzz/targets/ddl/fuzz_alter_table.rs @@ -34,10 +34,13 @@ use tests_fuzz::fake::{ use tests_fuzz::generator::alter_expr::{ AlterExprAddColumnGeneratorBuilder, AlterExprDropColumnGeneratorBuilder, AlterExprModifyDataTypeGeneratorBuilder, AlterExprRenameGeneratorBuilder, + AlterExprSetTableOptionsGeneratorBuilder, AlterExprUnsetTableOptionsGeneratorBuilder, }; use tests_fuzz::generator::create_expr::CreateTableExprGeneratorBuilder; use tests_fuzz::generator::Generator; -use tests_fuzz::ir::{droppable_columns, modifiable_columns, AlterTableExpr, CreateTableExpr}; +use tests_fuzz::ir::{ + droppable_columns, modifiable_columns, AlterTableExpr, AlterTableOption, CreateTableExpr, +}; use tests_fuzz::translator::mysql::alter_expr::AlterTableExprTranslator; use tests_fuzz::translator::mysql::create_expr::CreateTableExprTranslator; use tests_fuzz::translator::DslTranslator; @@ -62,11 +65,13 @@ struct FuzzInput { } #[derive(Debug, EnumIter)] -enum AlterTableOption { +enum AlterTableKind { AddColumn, DropColumn, RenameTable, ModifyDataType, + SetTableOptions, + UnsetTableOptions, } fn generate_create_table_expr(rng: &mut R) -> Result { @@ -93,23 +98,23 @@ fn generate_alter_table_expr( table_ctx: TableContextRef, rng: &mut R, ) -> Result { - let options = AlterTableOption::iter().collect::>(); - match options[rng.gen_range(0..options.len())] { - AlterTableOption::DropColumn if !droppable_columns(&table_ctx.columns).is_empty() => { + let kinds = AlterTableKind::iter().collect::>(); + match kinds[rng.gen_range(0..kinds.len())] { + AlterTableKind::DropColumn if !droppable_columns(&table_ctx.columns).is_empty() => { AlterExprDropColumnGeneratorBuilder::default() .table_ctx(table_ctx) .build() .unwrap() .generate(rng) } - AlterTableOption::ModifyDataType if !modifiable_columns(&table_ctx.columns).is_empty() => { + AlterTableKind::ModifyDataType if !modifiable_columns(&table_ctx.columns).is_empty() => { AlterExprModifyDataTypeGeneratorBuilder::default() .table_ctx(table_ctx) .build() .unwrap() .generate(rng) } - AlterTableOption::RenameTable => AlterExprRenameGeneratorBuilder::default() + AlterTableKind::RenameTable => AlterExprRenameGeneratorBuilder::default() .table_ctx(table_ctx) .name_generator(Box::new(MappedGenerator::new( WordGenerator, @@ -118,6 +123,20 @@ fn generate_alter_table_expr( .build() .unwrap() .generate(rng), + AlterTableKind::SetTableOptions => { + let expr_generator = AlterExprSetTableOptionsGeneratorBuilder::default() + .table_ctx(table_ctx) + .build() + .unwrap(); + expr_generator.generate(rng) + } + AlterTableKind::UnsetTableOptions => { + let expr_generator = AlterExprUnsetTableOptionsGeneratorBuilder::default() + .table_ctx(table_ctx) + .build() + .unwrap(); + expr_generator.generate(rng) + } _ => { let location = rng.gen_bool(0.5); let expr_generator = AlterExprAddColumnGeneratorBuilder::default() @@ -179,6 +198,31 @@ async fn execute_alter_table(ctx: FuzzContext, input: FuzzInput) -> Result<()> { let mut columns = table_ctx.columns.clone(); columns.sort_by(|a, b| a.name.value.cmp(&b.name.value)); validator::column::assert_eq(&column_entries, &columns)?; + + // Validates table options + let sql = format!("SHOW CREATE TABLE {}", table_ctx.name); + let mut table_options = validator::table::fetch_table_options(&ctx.greptime, &sql).await?; + table_options.sort_by(|a, b| a.key().cmp(b.key())); + let mut expected_table_options = table_ctx.table_options.clone(); + expected_table_options.sort_by(|a, b| a.key().cmp(b.key())); + table_options + .iter() + .zip(expected_table_options.iter()) + .for_each(|(a, b)| { + if let ( + AlterTableOption::TwcsMaxOutputFileSize(a), + AlterTableOption::TwcsMaxOutputFileSize(b), + ) = (a, b) + { + // to_string loses precision for ReadableSize, so the size in generated SQL is not the same as the size in the table context, + // but the string representation should be the same. For example: + // to_string() from_str() + // ReadableSize(13001360408898724524) ------------> "11547.5PiB" -----------> ReadableSize(13001329174265200640) + assert_eq!(a.to_string(), b.to_string()); + } else { + assert_eq!(a, b); + } + }); } // Cleans up diff --git a/tests-fuzz/targets/fuzz_create_database.rs b/tests-fuzz/targets/ddl/fuzz_create_database.rs similarity index 100% rename from tests-fuzz/targets/fuzz_create_database.rs rename to tests-fuzz/targets/ddl/fuzz_create_database.rs diff --git a/tests-fuzz/targets/fuzz_create_logical_table.rs b/tests-fuzz/targets/ddl/fuzz_create_logical_table.rs similarity index 100% rename from tests-fuzz/targets/fuzz_create_logical_table.rs rename to tests-fuzz/targets/ddl/fuzz_create_logical_table.rs diff --git a/tests-fuzz/targets/fuzz_create_table.rs b/tests-fuzz/targets/ddl/fuzz_create_table.rs similarity index 100% rename from tests-fuzz/targets/fuzz_create_table.rs rename to tests-fuzz/targets/ddl/fuzz_create_table.rs From 2137c53274d162f4a4131ca0d9b1d5a7bb9f155b Mon Sep 17 00:00:00 2001 From: Weny Xu Date: Thu, 12 Dec 2024 12:45:40 +0800 Subject: [PATCH 10/46] feat(index): add `file_size_hint` for remote blob reader (#5147) feat(index): add file_size_hint for remote blob reader --- src/common/base/src/range_read.rs | 17 +++++++++++++++ src/mito2/src/sst/file.rs | 20 ++++++++++++++++++ .../src/sst/index/inverted_index/applier.rs | 17 +++++++++------ .../src/sst/index/inverted_index/creator.rs | 2 +- src/mito2/src/sst/index/store.rs | 21 +++++++++++++++---- src/mito2/src/sst/parquet/reader.rs | 7 +++++-- src/puffin/src/partial_reader/async.rs | 4 ++++ src/puffin/src/puffin_manager.rs | 3 ++- .../fs_puffin_manager/reader.rs | 21 ++++++++++++++++++- 9 files changed, 97 insertions(+), 15 deletions(-) diff --git a/src/common/base/src/range_read.rs b/src/common/base/src/range_read.rs index 91f865d17e..61f28cb629 100644 --- a/src/common/base/src/range_read.rs +++ b/src/common/base/src/range_read.rs @@ -36,6 +36,11 @@ pub struct Metadata { /// `RangeReader` reads a range of bytes from a source. #[async_trait] pub trait RangeReader: Send + Unpin { + /// Sets the file size hint for the reader. + /// + /// It's used to optimize the reading process by reducing the number of remote requests. + fn with_file_size_hint(&mut self, file_size_hint: u64); + /// Returns the metadata of the source. async fn metadata(&mut self) -> io::Result; @@ -70,6 +75,10 @@ pub trait RangeReader: Send + Unpin { #[async_trait] impl RangeReader for &mut R { + fn with_file_size_hint(&mut self, file_size_hint: u64) { + (*self).with_file_size_hint(file_size_hint) + } + async fn metadata(&mut self) -> io::Result { (*self).metadata().await } @@ -186,6 +195,10 @@ impl AsyncRead for AsyncReadAdapter { #[async_trait] impl RangeReader for Vec { + fn with_file_size_hint(&mut self, _file_size_hint: u64) { + // do nothing + } + async fn metadata(&mut self) -> io::Result { Ok(Metadata { content_length: self.len() as u64, @@ -222,6 +235,10 @@ impl FileReader { #[async_trait] impl RangeReader for FileReader { + fn with_file_size_hint(&mut self, _file_size_hint: u64) { + // do nothing + } + async fn metadata(&mut self) -> io::Result { Ok(Metadata { content_length: self.content_length, diff --git a/src/mito2/src/sst/file.rs b/src/mito2/src/sst/file.rs index 4353ae55e3..5a9932ab43 100644 --- a/src/mito2/src/sst/file.rs +++ b/src/mito2/src/sst/file.rs @@ -146,13 +146,33 @@ pub enum IndexType { } impl FileMeta { + /// Returns true if the file has an inverted index pub fn inverted_index_available(&self) -> bool { self.available_indexes.contains(&IndexType::InvertedIndex) } + /// Returns true if the file has a fulltext index pub fn fulltext_index_available(&self) -> bool { self.available_indexes.contains(&IndexType::FulltextIndex) } + + /// Returns the size of the inverted index file + pub fn inverted_index_size(&self) -> Option { + if self.available_indexes.len() == 1 && self.inverted_index_available() { + Some(self.index_file_size) + } else { + None + } + } + + /// Returns the size of the fulltext index file + pub fn fulltext_index_size(&self) -> Option { + if self.available_indexes.len() == 1 && self.fulltext_index_available() { + Some(self.index_file_size) + } else { + None + } + } } /// Handle to a SST file. diff --git a/src/mito2/src/sst/index/inverted_index/applier.rs b/src/mito2/src/sst/index/inverted_index/applier.rs index bf5206ef44..d060d4bec1 100644 --- a/src/mito2/src/sst/index/inverted_index/applier.rs +++ b/src/mito2/src/sst/index/inverted_index/applier.rs @@ -113,7 +113,7 @@ impl InvertedIndexApplier { } /// Applies predicates to the provided SST file id and returns the relevant row group ids - pub async fn apply(&self, file_id: FileId) -> Result { + pub async fn apply(&self, file_id: FileId, file_size_hint: Option) -> Result { let _timer = INDEX_APPLY_ELAPSED .with_label_values(&[TYPE_INVERTED_INDEX]) .start_timer(); @@ -129,8 +129,7 @@ impl InvertedIndexApplier { if let Err(err) = other { warn!(err; "An unexpected error occurred while reading the cached index file. Fallback to remote index file.") } - - self.remote_blob_reader(file_id).await? + self.remote_blob_reader(file_id, file_size_hint).await? } }; @@ -181,16 +180,22 @@ impl InvertedIndexApplier { } /// Creates a blob reader from the remote index file. - async fn remote_blob_reader(&self, file_id: FileId) -> Result { + async fn remote_blob_reader( + &self, + file_id: FileId, + file_size_hint: Option, + ) -> Result { let puffin_manager = self .puffin_manager_factory .build(self.store.clone()) .with_puffin_metadata_cache(self.puffin_metadata_cache.clone()); + let file_path = location::index_file_path(&self.region_dir, file_id); puffin_manager .reader(&file_path) .await .context(PuffinBuildReaderSnafu)? + .with_file_size_hint(file_size_hint) .blob(INDEX_BLOB_TYPE) .await .context(PuffinReadBlobSnafu)? @@ -250,7 +255,7 @@ mod tests { Box::new(mock_index_applier), puffin_manager_factory, ); - let output = sst_index_applier.apply(file_id).await.unwrap(); + let output = sst_index_applier.apply(file_id, None).await.unwrap(); assert_eq!( output, ApplyOutput { @@ -290,7 +295,7 @@ mod tests { Box::new(mock_index_applier), puffin_manager_factory, ); - let res = sst_index_applier.apply(file_id).await; + let res = sst_index_applier.apply(file_id, None).await; assert!(format!("{:?}", res.unwrap_err()).contains("Blob not found")); } } diff --git a/src/mito2/src/sst/index/inverted_index/creator.rs b/src/mito2/src/sst/index/inverted_index/creator.rs index 029a0da848..43cf54fa28 100644 --- a/src/mito2/src/sst/index/inverted_index/creator.rs +++ b/src/mito2/src/sst/index/inverted_index/creator.rs @@ -464,7 +464,7 @@ mod tests { .unwrap(); Box::pin(async move { applier - .apply(sst_file_id) + .apply(sst_file_id, None) .await .unwrap() .matched_segment_ids diff --git a/src/mito2/src/sst/index/store.rs b/src/mito2/src/sst/index/store.rs index 2750c69fc2..7322bd4db4 100644 --- a/src/mito2/src/sst/index/store.rs +++ b/src/mito2/src/sst/index/store.rs @@ -68,6 +68,7 @@ impl InstrumentedStore { path: path.to_string(), read_byte_count, read_count, + file_size_hint: None, }) } @@ -262,15 +263,27 @@ pub(crate) struct InstrumentedRangeReader<'a> { path: String, read_byte_count: &'a IntCounter, read_count: &'a IntCounter, + file_size_hint: Option, } #[async_trait] impl RangeReader for InstrumentedRangeReader<'_> { + fn with_file_size_hint(&mut self, file_size_hint: u64) { + self.file_size_hint = Some(file_size_hint); + } + async fn metadata(&mut self) -> io::Result { - let stat = self.store.stat(&self.path).await?; - Ok(Metadata { - content_length: stat.content_length(), - }) + match self.file_size_hint { + Some(file_size_hint) => Ok(Metadata { + content_length: file_size_hint, + }), + None => { + let stat = self.store.stat(&self.path).await?; + Ok(Metadata { + content_length: stat.content_length(), + }) + } + } } async fn read(&mut self, range: Range) -> io::Result { diff --git a/src/mito2/src/sst/parquet/reader.rs b/src/mito2/src/sst/parquet/reader.rs index b73026a7a6..02c5c2cf3c 100644 --- a/src/mito2/src/sst/parquet/reader.rs +++ b/src/mito2/src/sst/parquet/reader.rs @@ -475,8 +475,11 @@ impl ParquetReaderBuilder { if !self.file_handle.meta_ref().inverted_index_available() { return false; } - - let apply_output = match index_applier.apply(self.file_handle.file_id()).await { + let file_size_hint = self.file_handle.meta_ref().inverted_index_size(); + let apply_output = match index_applier + .apply(self.file_handle.file_id(), file_size_hint) + .await + { Ok(output) => output, Err(err) => { if cfg!(any(test, feature = "test")) { diff --git a/src/puffin/src/partial_reader/async.rs b/src/puffin/src/partial_reader/async.rs index 3de40cb3a1..4eedd1ee31 100644 --- a/src/puffin/src/partial_reader/async.rs +++ b/src/puffin/src/partial_reader/async.rs @@ -23,6 +23,10 @@ use crate::partial_reader::PartialReader; #[async_trait] impl RangeReader for PartialReader { + fn with_file_size_hint(&mut self, _file_size_hint: u64) { + // do nothing + } + async fn metadata(&mut self) -> io::Result { Ok(Metadata { content_length: self.size, diff --git a/src/puffin/src/puffin_manager.rs b/src/puffin/src/puffin_manager.rs index 17101b1662..204bc2c66e 100644 --- a/src/puffin/src/puffin_manager.rs +++ b/src/puffin/src/puffin_manager.rs @@ -73,11 +73,12 @@ pub struct PutOptions { /// The `PuffinReader` trait provides methods for reading blobs and directories from a Puffin file. #[async_trait] -#[auto_impl::auto_impl(Arc)] pub trait PuffinReader { type Blob: BlobGuard; type Dir: DirGuard; + fn with_file_size_hint(self, file_size_hint: Option) -> Self; + /// Reads a blob from the Puffin file. /// /// The returned `BlobGuard` is used to access the blob data. diff --git a/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs b/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs index 2e1ae594ad..a5da2f75f8 100644 --- a/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs +++ b/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs @@ -43,6 +43,9 @@ pub struct FsPuffinReader { /// The name of the puffin file. puffin_file_name: String, + /// The file size hint. + file_size_hint: Option, + /// The stager. stager: S, @@ -62,6 +65,7 @@ impl FsPuffinReader { ) -> Self { Self { puffin_file_name, + file_size_hint: None, stager, puffin_file_accessor, puffin_file_metadata_cache, @@ -78,11 +82,19 @@ where type Blob = Either, S::Blob>; type Dir = S::Dir; + fn with_file_size_hint(mut self, file_size_hint: Option) -> Self { + self.file_size_hint = file_size_hint; + self + } + async fn blob(&self, key: &str) -> Result { - let reader = self + let mut reader = self .puffin_file_accessor .reader(&self.puffin_file_name) .await?; + if let Some(file_size_hint) = self.file_size_hint { + reader.with_file_size_hint(file_size_hint); + } let mut file = PuffinFileReader::new(reader); let metadata = self.get_puffin_file_metadata(&mut file).await?; @@ -303,6 +315,13 @@ where A: RangeReader, B: RangeReader, { + fn with_file_size_hint(&mut self, file_size_hint: u64) { + match self { + Either::L(a) => a.with_file_size_hint(file_size_hint), + Either::R(b) => b.with_file_size_hint(file_size_hint), + } + } + async fn metadata(&mut self) -> io::Result { match self { Either::L(a) => a.metadata().await, From b8a78b78389ae9edd6b3e4a05ee8697ad0c578a3 Mon Sep 17 00:00:00 2001 From: localhost Date: Thu, 12 Dec 2024 17:01:21 +0800 Subject: [PATCH 11/46] chore: decide tag column in log api follow table schema if table exists (#5138) * chore: decide tag column in log api follow table schema if table exists * chore: add more test for greptime_identity pipeline * chore: change pipeline get_table function signature * chore: change identity_pipeline_inner tag_column_names type --- src/frontend/src/instance/log_handler.rs | 15 ++- .../src/etl/transform/transformer/greptime.rs | 117 +++++++++++++++--- src/servers/src/http/event.rs | 13 +- src/servers/src/query_handler.rs | 8 +- 4 files changed, 130 insertions(+), 23 deletions(-) diff --git a/src/frontend/src/instance/log_handler.rs b/src/frontend/src/instance/log_handler.rs index c3422066a3..9ae782c7d4 100644 --- a/src/frontend/src/instance/log_handler.rs +++ b/src/frontend/src/instance/log_handler.rs @@ -25,8 +25,9 @@ use servers::error::{ }; use servers::interceptor::{LogIngestInterceptor, LogIngestInterceptorRef}; use servers::query_handler::PipelineHandler; -use session::context::QueryContextRef; +use session::context::{QueryContext, QueryContextRef}; use snafu::ResultExt; +use table::Table; use crate::instance::Instance; @@ -84,6 +85,18 @@ impl PipelineHandler for Instance { .await .context(PipelineSnafu) } + + async fn get_table( + &self, + table: &str, + query_ctx: &QueryContext, + ) -> std::result::Result>, catalog::error::Error> { + let catalog = query_ctx.current_catalog(); + let schema = query_ctx.current_schema(); + self.catalog_manager + .table(catalog, &schema, table, None) + .await + } } impl Instance { diff --git a/src/pipeline/src/etl/transform/transformer/greptime.rs b/src/pipeline/src/etl/transform/transformer/greptime.rs index 3b43696b5a..5d69a03ea2 100644 --- a/src/pipeline/src/etl/transform/transformer/greptime.rs +++ b/src/pipeline/src/etl/transform/transformer/greptime.rs @@ -15,6 +15,7 @@ pub mod coerce; use std::collections::HashSet; +use std::sync::Arc; use ahash::HashMap; use api::helper::proto_value_type; @@ -367,20 +368,15 @@ fn json_value_to_row( Ok(Row { values: row }) } -/// Identity pipeline for Greptime -/// This pipeline will convert the input JSON array to Greptime Rows -/// 1. The pipeline will add a default timestamp column to the schema -/// 2. The pipeline not resolve NULL value -/// 3. The pipeline assumes that the json format is fixed -/// 4. The pipeline will return an error if the same column datatype is mismatched -/// 5. The pipeline will analyze the schema of each json record and merge them to get the final schema. -pub fn identity_pipeline(array: Vec) -> Result { +fn identity_pipeline_inner<'a>( + array: Vec, + tag_column_names: Option>, +) -> Result { let mut rows = Vec::with_capacity(array.len()); - - let mut schema = SchemaInfo::default(); + let mut schema_info = SchemaInfo::default(); for value in array { if let serde_json::Value::Object(map) = value { - let row = json_value_to_row(&mut schema, map)?; + let row = json_value_to_row(&mut schema_info, map)?; rows.push(row); } } @@ -395,7 +391,7 @@ pub fn identity_pipeline(array: Vec) -> Result { let ts = GreptimeValue { value_data: Some(ValueData::TimestampNanosecondValue(ns)), }; - let column_count = schema.schema.len(); + let column_count = schema_info.schema.len(); for row in rows.iter_mut() { let diff = column_count - row.values.len(); for _ in 0..diff { @@ -403,15 +399,49 @@ pub fn identity_pipeline(array: Vec) -> Result { } row.values.push(ts.clone()); } - schema.schema.push(greptime_timestamp_schema); + schema_info.schema.push(greptime_timestamp_schema); + + // set the semantic type of the row key column to Tag + if let Some(tag_column_names) = tag_column_names { + tag_column_names.for_each(|tag_column_name| { + if let Some(index) = schema_info.index.get(tag_column_name) { + schema_info.schema[*index].semantic_type = SemanticType::Tag as i32; + } + }); + } Ok(Rows { - schema: schema.schema, + schema: schema_info.schema, rows, }) } +/// Identity pipeline for Greptime +/// This pipeline will convert the input JSON array to Greptime Rows +/// params table is used to set the semantic type of the row key column to Tag +/// 1. The pipeline will add a default timestamp column to the schema +/// 2. The pipeline not resolve NULL value +/// 3. The pipeline assumes that the json format is fixed +/// 4. The pipeline will return an error if the same column datatype is mismatched +/// 5. The pipeline will analyze the schema of each json record and merge them to get the final schema. +pub fn identity_pipeline( + array: Vec, + table: Option>, +) -> Result { + match table { + Some(table) => { + let table_info = table.table_info(); + let tag_column_names = table_info.meta.row_key_column_names(); + identity_pipeline_inner(array, Some(tag_column_names)) + } + None => identity_pipeline_inner(array, None::>), + } +} + #[cfg(test)] mod tests { + use api::v1::SemanticType; + + use crate::etl::transform::transformer::greptime::identity_pipeline_inner; use crate::identity_pipeline; #[test] @@ -437,7 +467,7 @@ mod tests { "gaga": "gaga" }), ]; - let rows = identity_pipeline(array); + let rows = identity_pipeline(array, None); assert!(rows.is_err()); assert_eq!( rows.err().unwrap().to_string(), @@ -465,7 +495,7 @@ mod tests { "gaga": "gaga" }), ]; - let rows = identity_pipeline(array); + let rows = identity_pipeline(array, None); assert!(rows.is_err()); assert_eq!( rows.err().unwrap().to_string(), @@ -493,7 +523,7 @@ mod tests { "gaga": "gaga" }), ]; - let rows = identity_pipeline(array); + let rows = identity_pipeline(array, None); assert!(rows.is_ok()); let rows = rows.unwrap(); assert_eq!(rows.schema.len(), 8); @@ -501,5 +531,58 @@ mod tests { assert_eq!(8, rows.rows[0].values.len()); assert_eq!(8, rows.rows[1].values.len()); } + { + let array = vec![ + serde_json::json!({ + "woshinull": null, + "name": "Alice", + "age": 20, + "is_student": true, + "score": 99.5, + "hobbies": "reading", + "address": "Beijing", + }), + serde_json::json!({ + "name": "Bob", + "age": 21, + "is_student": false, + "score": 88.5, + "hobbies": "swimming", + "address": "Shanghai", + "gaga": "gaga" + }), + ]; + let tag_column_names = ["name".to_string(), "address".to_string()]; + let rows = identity_pipeline_inner(array, Some(tag_column_names.iter())); + assert!(rows.is_ok()); + let rows = rows.unwrap(); + assert_eq!(rows.schema.len(), 8); + assert_eq!(rows.rows.len(), 2); + assert_eq!(8, rows.rows[0].values.len()); + assert_eq!(8, rows.rows[1].values.len()); + assert_eq!( + rows.schema + .iter() + .find(|x| x.column_name == "name") + .unwrap() + .semantic_type, + SemanticType::Tag as i32 + ); + assert_eq!( + rows.schema + .iter() + .find(|x| x.column_name == "address") + .unwrap() + .semantic_type, + SemanticType::Tag as i32 + ); + assert_eq!( + rows.schema + .iter() + .filter(|x| x.semantic_type == SemanticType::Tag as i32) + .count(), + 2 + ); + } } } diff --git a/src/servers/src/http/event.rs b/src/servers/src/http/event.rs index 69498c209a..5069db5197 100644 --- a/src/servers/src/http/event.rs +++ b/src/servers/src/http/event.rs @@ -46,8 +46,8 @@ use session::context::{Channel, QueryContext, QueryContextRef}; use snafu::{ensure, OptionExt, ResultExt}; use crate::error::{ - DecodeOtlpRequestSnafu, Error, InvalidParameterSnafu, ParseJson5Snafu, ParseJsonSnafu, - PipelineSnafu, Result, UnsupportedContentTypeSnafu, + CatalogSnafu, DecodeOtlpRequestSnafu, Error, InvalidParameterSnafu, ParseJson5Snafu, + ParseJsonSnafu, PipelineSnafu, Result, UnsupportedContentTypeSnafu, }; use crate::http::extractor::LogTableName; use crate::http::header::CONTENT_TYPE_PROTOBUF_STR; @@ -612,10 +612,15 @@ async fn ingest_logs_inner( let mut results = Vec::with_capacity(pipeline_data.len()); let transformed_data: Rows; if pipeline_name == GREPTIME_INTERNAL_IDENTITY_PIPELINE_NAME { - let rows = pipeline::identity_pipeline(pipeline_data) + let table = state + .get_table(&table_name, &query_ctx) + .await + .context(CatalogSnafu)?; + let rows = pipeline::identity_pipeline(pipeline_data, table) .context(PipelineTransformSnafu) .context(PipelineSnafu)?; - transformed_data = rows; + + transformed_data = rows } else { let pipeline = state .get_pipeline(&pipeline_name, version, query_ctx.clone()) diff --git a/src/servers/src/query_handler.rs b/src/servers/src/query_handler.rs index 58812e9350..96a01593a8 100644 --- a/src/servers/src/query_handler.rs +++ b/src/servers/src/query_handler.rs @@ -39,7 +39,7 @@ use opentelemetry_proto::tonic::collector::metrics::v1::ExportMetricsServiceRequ use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest; use pipeline::{GreptimeTransformer, Pipeline, PipelineInfo, PipelineVersion, PipelineWay}; use serde_json::Value; -use session::context::QueryContextRef; +use session::context::{QueryContext, QueryContextRef}; use crate::error::Result; use crate::influxdb::InfluxdbRequest; @@ -164,4 +164,10 @@ pub trait PipelineHandler { version: PipelineVersion, query_ctx: QueryContextRef, ) -> Result>; + + async fn get_table( + &self, + table: &str, + query_ctx: &QueryContext, + ) -> std::result::Result>, catalog::error::Error>; } From fee75a1fadfda2f98a496090158e99e4b93915f4 Mon Sep 17 00:00:00 2001 From: Yingwen Date: Thu, 12 Dec 2024 19:27:22 +0800 Subject: [PATCH 12/46] feat: collect reader metrics from prune reader (#5152) --- src/mito2/src/read/last_row.rs | 14 +++++++++++++- src/mito2/src/read/prune.rs | 16 +++++++++++++--- src/mito2/src/read/scan_util.rs | 5 +++-- src/mito2/src/sst/parquet/reader.rs | 4 ++-- 4 files changed, 31 insertions(+), 8 deletions(-) diff --git a/src/mito2/src/read/last_row.rs b/src/mito2/src/read/last_row.rs index 79d035e032..1e2a6a5844 100644 --- a/src/mito2/src/read/last_row.rs +++ b/src/mito2/src/read/last_row.rs @@ -27,7 +27,7 @@ use crate::cache::{ use crate::error::Result; use crate::read::{Batch, BatchReader, BoxedBatchReader}; use crate::sst::file::FileId; -use crate::sst::parquet::reader::RowGroupReader; +use crate::sst::parquet::reader::{ReaderMetrics, RowGroupReader}; /// Reader to keep the last row for each time series. /// It assumes that batches from the input reader are @@ -115,6 +115,14 @@ impl RowGroupLastRowCachedReader { } } + /// Gets the underlying reader metrics if uncached. + pub(crate) fn metrics(&self) -> Option<&ReaderMetrics> { + match self { + RowGroupLastRowCachedReader::Hit(_) => None, + RowGroupLastRowCachedReader::Miss(reader) => Some(reader.metrics()), + } + } + /// Creates new Hit variant and updates metrics. fn new_hit(value: Arc) -> Self { selector_result_cache_hit(); @@ -234,6 +242,10 @@ impl RowGroupLastRowReader { }); cache.put_selector_result(self.key, value); } + + fn metrics(&self) -> &ReaderMetrics { + self.reader.metrics() + } } /// Push last row into `yielded_batches`. diff --git a/src/mito2/src/read/prune.rs b/src/mito2/src/read/prune.rs index cb0066e734..500cd14302 100644 --- a/src/mito2/src/read/prune.rs +++ b/src/mito2/src/read/prune.rs @@ -72,11 +72,21 @@ impl PruneReader { self.source = source; } - pub(crate) fn metrics(&mut self) -> &ReaderMetrics { + /// Merge metrics with the inner reader and return the merged metrics. + pub(crate) fn metrics(&self) -> ReaderMetrics { + let mut metrics = self.metrics.clone(); match &self.source { - Source::RowGroup(r) => r.metrics(), - Source::LastRow(_) => &self.metrics, + Source::RowGroup(r) => { + metrics.merge_from(r.metrics()); + } + Source::LastRow(r) => { + if let Some(inner_metrics) = r.metrics() { + metrics.merge_from(inner_metrics); + } + } } + + metrics } pub(crate) async fn next_batch(&mut self) -> Result> { diff --git a/src/mito2/src/read/scan_util.rs b/src/mito2/src/read/scan_util.rs index df790d191a..0bdf62e77e 100644 --- a/src/mito2/src/read/scan_util.rs +++ b/src/mito2/src/read/scan_util.rs @@ -181,8 +181,9 @@ pub(crate) fn scan_file_ranges( } yield batch; } - if let Source::PruneReader(mut reader) = source { - reader_metrics.merge_from(reader.metrics()); + if let Source::PruneReader(reader) = source { + let prune_metrics = reader.metrics(); + reader_metrics.merge_from(&prune_metrics); } } diff --git a/src/mito2/src/sst/parquet/reader.rs b/src/mito2/src/sst/parquet/reader.rs index 02c5c2cf3c..335b09426e 100644 --- a/src/mito2/src/sst/parquet/reader.rs +++ b/src/mito2/src/sst/parquet/reader.rs @@ -918,10 +918,10 @@ enum ReaderState { impl ReaderState { /// Returns the metrics of the reader. - fn metrics(&mut self) -> &ReaderMetrics { + fn metrics(&self) -> ReaderMetrics { match self { ReaderState::Readable(reader) => reader.metrics(), - ReaderState::Exhausted(m) => m, + ReaderState::Exhausted(m) => m.clone(), } } } From e8e95267389148fefb8422a61e33bd593a0359c3 Mon Sep 17 00:00:00 2001 From: localhost Date: Thu, 12 Dec 2024 19:47:21 +0800 Subject: [PATCH 13/46] chore: pipeline dryrun api can currently receives pipeline raw content (#5142) * chore: pipeline dryrun api can currently receives pipeline raw content * chore: remove dryrun v1 and add test * chore: change dryrun pipeline api body schema * chore: remove useless struct PipelineInfo * chore: update PipelineDryrunParams doc * chore: increase code readability * chore: add some comment for pipeline dryrun test * Apply suggestions from code review Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com> * chore: format code --------- Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com> --- src/frontend/src/instance/log_handler.rs | 5 + src/pipeline/benches/processor.rs | 2 +- src/pipeline/src/etl.rs | 18 +- src/pipeline/src/manager/pipeline_operator.rs | 5 + src/pipeline/src/manager/table.rs | 2 +- src/pipeline/tests/common.rs | 2 +- src/pipeline/tests/dissect.rs | 2 +- src/pipeline/tests/pipeline.rs | 10 +- src/servers/src/http/event.rs | 142 +++++++--- src/servers/src/query_handler.rs | 3 + tests-integration/tests/http.rs | 267 ++++++++++++------ 11 files changed, 311 insertions(+), 147 deletions(-) diff --git a/src/frontend/src/instance/log_handler.rs b/src/frontend/src/instance/log_handler.rs index 9ae782c7d4..2da2d6717d 100644 --- a/src/frontend/src/instance/log_handler.rs +++ b/src/frontend/src/instance/log_handler.rs @@ -19,6 +19,7 @@ use async_trait::async_trait; use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq}; use client::Output; use common_error::ext::BoxedError; +use pipeline::pipeline_operator::PipelineOperator; use pipeline::{GreptimeTransformer, Pipeline, PipelineInfo, PipelineVersion}; use servers::error::{ AuthSnafu, Error as ServerError, ExecuteGrpcRequestSnafu, PipelineSnafu, Result as ServerResult, @@ -97,6 +98,10 @@ impl PipelineHandler for Instance { .table(catalog, &schema, table, None) .await } + + fn build_pipeline(&self, pipeline: &str) -> ServerResult> { + PipelineOperator::build_pipeline(pipeline).context(PipelineSnafu) + } } impl Instance { diff --git a/src/pipeline/benches/processor.rs b/src/pipeline/benches/processor.rs index 09462753d8..8cf221af5b 100644 --- a/src/pipeline/benches/processor.rs +++ b/src/pipeline/benches/processor.rs @@ -223,7 +223,7 @@ transform: type: uint32 "#; - parse(&Content::Yaml(pipeline_yaml.into())).unwrap() + parse(&Content::Yaml(pipeline_yaml)).unwrap() } fn criterion_benchmark(c: &mut Criterion) { diff --git a/src/pipeline/src/etl.rs b/src/pipeline/src/etl.rs index 9bd47a899e..45feb4b02f 100644 --- a/src/pipeline/src/etl.rs +++ b/src/pipeline/src/etl.rs @@ -37,9 +37,9 @@ const PROCESSORS: &str = "processors"; const TRANSFORM: &str = "transform"; const TRANSFORMS: &str = "transforms"; -pub enum Content { - Json(String), - Yaml(String), +pub enum Content<'a> { + Json(&'a str), + Yaml(&'a str), } pub fn parse(input: &Content) -> Result> @@ -379,8 +379,7 @@ transform: - field: field2 type: uint32 "#; - let pipeline: Pipeline = - parse(&Content::Yaml(pipeline_yaml.into())).unwrap(); + let pipeline: Pipeline = parse(&Content::Yaml(pipeline_yaml)).unwrap(); let mut payload = pipeline.init_intermediate_state(); pipeline.prepare(input_value, &mut payload).unwrap(); assert_eq!(&["my_field"].to_vec(), pipeline.required_keys()); @@ -432,8 +431,7 @@ transform: - field: ts type: timestamp, ns index: time"#; - let pipeline: Pipeline = - parse(&Content::Yaml(pipeline_str.into())).unwrap(); + let pipeline: Pipeline = parse(&Content::Yaml(pipeline_str)).unwrap(); let mut payload = pipeline.init_intermediate_state(); pipeline .prepare(serde_json::Value::String(message), &mut payload) @@ -509,8 +507,7 @@ transform: type: uint32 "#; - let pipeline: Pipeline = - parse(&Content::Yaml(pipeline_yaml.into())).unwrap(); + let pipeline: Pipeline = parse(&Content::Yaml(pipeline_yaml)).unwrap(); let mut payload = pipeline.init_intermediate_state(); pipeline.prepare(input_value, &mut payload).unwrap(); assert_eq!(&["my_field"].to_vec(), pipeline.required_keys()); @@ -554,8 +551,7 @@ transform: index: time "#; - let pipeline: Pipeline = - parse(&Content::Yaml(pipeline_yaml.into())).unwrap(); + let pipeline: Pipeline = parse(&Content::Yaml(pipeline_yaml)).unwrap(); let schema = pipeline.schemas().clone(); let mut result = pipeline.init_intermediate_state(); pipeline.prepare(input_value, &mut result).unwrap(); diff --git a/src/pipeline/src/manager/pipeline_operator.rs b/src/pipeline/src/manager/pipeline_operator.rs index 2e838144a4..4f43b89e2e 100644 --- a/src/pipeline/src/manager/pipeline_operator.rs +++ b/src/pipeline/src/manager/pipeline_operator.rs @@ -243,4 +243,9 @@ impl PipelineOperator { }) .await } + + /// Compile a pipeline. + pub fn build_pipeline(pipeline: &str) -> Result> { + PipelineTable::compile_pipeline(pipeline) + } } diff --git a/src/pipeline/src/manager/table.rs b/src/pipeline/src/manager/table.rs index 7b3719b667..c2a36c63ec 100644 --- a/src/pipeline/src/manager/table.rs +++ b/src/pipeline/src/manager/table.rs @@ -203,7 +203,7 @@ impl PipelineTable { /// Compile a pipeline from a string. pub fn compile_pipeline(pipeline: &str) -> Result> { - let yaml_content = Content::Yaml(pipeline.into()); + let yaml_content = Content::Yaml(pipeline); parse::(&yaml_content).context(CompilePipelineSnafu) } diff --git a/src/pipeline/tests/common.rs b/src/pipeline/tests/common.rs index aa96d14d55..d825c91e4c 100644 --- a/src/pipeline/tests/common.rs +++ b/src/pipeline/tests/common.rs @@ -19,7 +19,7 @@ use pipeline::{parse, Content, GreptimeTransformer, Pipeline}; pub fn parse_and_exec(input_str: &str, pipeline_yaml: &str) -> Rows { let input_value = serde_json::from_str::(input_str).unwrap(); - let yaml_content = Content::Yaml(pipeline_yaml.into()); + let yaml_content = Content::Yaml(pipeline_yaml); let pipeline: Pipeline = parse(&yaml_content).expect("failed to parse pipeline"); let mut result = pipeline.init_intermediate_state(); diff --git a/src/pipeline/tests/dissect.rs b/src/pipeline/tests/dissect.rs index 7577d58080..56386d0e86 100644 --- a/src/pipeline/tests/dissect.rs +++ b/src/pipeline/tests/dissect.rs @@ -270,7 +270,7 @@ transform: let input_value = serde_json::from_str::(input_str).unwrap(); - let yaml_content = pipeline::Content::Yaml(pipeline_yaml.into()); + let yaml_content = pipeline::Content::Yaml(pipeline_yaml); let pipeline: pipeline::Pipeline = pipeline::parse(&yaml_content).expect("failed to parse pipeline"); let mut result = pipeline.init_intermediate_state(); diff --git a/src/pipeline/tests/pipeline.rs b/src/pipeline/tests/pipeline.rs index e68c7b9e6a..de724e1a27 100644 --- a/src/pipeline/tests/pipeline.rs +++ b/src/pipeline/tests/pipeline.rs @@ -417,7 +417,7 @@ transform: .map(|(_, d)| GreptimeValue { value_data: d }) .collect::>(); - let yaml_content = Content::Yaml(pipeline_yaml.into()); + let yaml_content = Content::Yaml(pipeline_yaml); let pipeline: Pipeline = parse(&yaml_content).expect("failed to parse pipeline"); let mut stats = pipeline.init_intermediate_state(); @@ -487,7 +487,7 @@ transform: type: json "#; - let yaml_content = Content::Yaml(pipeline_yaml.into()); + let yaml_content = Content::Yaml(pipeline_yaml); let pipeline: Pipeline = parse(&yaml_content).unwrap(); let mut status = pipeline.init_intermediate_state(); @@ -592,7 +592,7 @@ transform: type: json "#; - let yaml_content = Content::Yaml(pipeline_yaml.into()); + let yaml_content = Content::Yaml(pipeline_yaml); let pipeline: Pipeline = parse(&yaml_content).unwrap(); let mut status = pipeline.init_intermediate_state(); @@ -655,7 +655,7 @@ transform: index: timestamp "#; - let yaml_content = Content::Yaml(pipeline_yaml.into()); + let yaml_content = Content::Yaml(pipeline_yaml); let pipeline: Pipeline = parse(&yaml_content).unwrap(); let mut status = pipeline.init_intermediate_state(); @@ -691,7 +691,7 @@ transform: - message type: string "#; - let yaml_content = Content::Yaml(pipeline_yaml.into()); + let yaml_content = Content::Yaml(pipeline_yaml); let pipeline: Pipeline = parse(&yaml_content).unwrap(); let mut status = pipeline.init_intermediate_state(); diff --git a/src/servers/src/http/event.rs b/src/servers/src/http/event.rs index 5069db5197..b6b520627d 100644 --- a/src/servers/src/http/event.rs +++ b/src/servers/src/http/event.rs @@ -38,7 +38,7 @@ use lazy_static::lazy_static; use loki_api::prost_types::Timestamp; use pipeline::error::PipelineTransformSnafu; use pipeline::util::to_pipeline_version; -use pipeline::PipelineVersion; +use pipeline::{GreptimeTransformer, PipelineVersion}; use prost::Message; use serde::{Deserialize, Serialize}; use serde_json::{Deserializer, Map, Value}; @@ -276,39 +276,11 @@ fn transform_ndjson_array_factory( }) } -#[axum_macros::debug_handler] -pub async fn pipeline_dryrun( - State(log_state): State, - Query(query_params): Query, - Extension(mut query_ctx): Extension, - TypedHeader(content_type): TypedHeader, - payload: String, +/// Dryrun pipeline with given data +fn dryrun_pipeline_inner( + value: Vec, + pipeline: &pipeline::Pipeline, ) -> Result { - let handler = log_state.log_handler; - let pipeline_name = query_params.pipeline_name.context(InvalidParameterSnafu { - reason: "pipeline_name is required", - })?; - - let version = to_pipeline_version(query_params.version).context(PipelineSnafu)?; - - let ignore_errors = query_params.ignore_errors.unwrap_or(false); - - let value = extract_pipeline_value_by_content_type(content_type, payload, ignore_errors)?; - - ensure!( - value.len() <= 10, - InvalidParameterSnafu { - reason: "too many rows for dryrun", - } - ); - - query_ctx.set_channel(Channel::Http); - let query_ctx = Arc::new(query_ctx); - - let pipeline = handler - .get_pipeline(&pipeline_name, version, query_ctx.clone()) - .await?; - let mut intermediate_state = pipeline.init_intermediate_state(); let mut results = Vec::with_capacity(value.len()); @@ -387,6 +359,110 @@ pub async fn pipeline_dryrun( Ok(Json(result).into_response()) } +/// Dryrun pipeline with given data +/// pipeline_name and pipeline_version to specify pipeline stored in db +/// pipeline to specify pipeline raw content +/// data to specify data +/// data maght be list of string or list of object +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct PipelineDryrunParams { + pub pipeline_name: Option, + pub pipeline_version: Option, + pub pipeline: Option, + pub data: Vec, +} + +/// Check if the payload is valid json +/// Check if the payload contains pipeline or pipeline_name and data +/// Return Some if valid, None if invalid +fn check_pipeline_dryrun_params_valid(payload: &str) -> Option { + match serde_json::from_str::(payload) { + // payload with pipeline or pipeline_name and data is array + Ok(params) if params.pipeline.is_some() || params.pipeline_name.is_some() => Some(params), + // because of the pipeline_name or pipeline is required + Ok(_) => None, + // invalid json + Err(_) => None, + } +} + +/// Check if the pipeline_name exists +fn check_pipeline_name_exists(pipeline_name: Option) -> Result { + pipeline_name.context(InvalidParameterSnafu { + reason: "pipeline_name is required", + }) +} + +/// Check if the data length less than 10 +fn check_data_valid(data_len: usize) -> Result<()> { + ensure!( + data_len <= 10, + InvalidParameterSnafu { + reason: "data is required", + } + ); + Ok(()) +} + +#[axum_macros::debug_handler] +pub async fn pipeline_dryrun( + State(log_state): State, + Query(query_params): Query, + Extension(mut query_ctx): Extension, + TypedHeader(content_type): TypedHeader, + payload: String, +) -> Result { + let handler = log_state.log_handler; + + match check_pipeline_dryrun_params_valid(&payload) { + Some(params) => { + let data = params.data; + + check_data_valid(data.len())?; + + match params.pipeline { + None => { + let version = + to_pipeline_version(params.pipeline_version).context(PipelineSnafu)?; + let pipeline_name = check_pipeline_name_exists(params.pipeline_name)?; + let pipeline = handler + .get_pipeline(&pipeline_name, version, Arc::new(query_ctx)) + .await?; + dryrun_pipeline_inner(data, &pipeline) + } + Some(pipeline) => { + let pipeline = handler.build_pipeline(&pipeline)?; + dryrun_pipeline_inner(data, &pipeline) + } + } + } + None => { + // This path is for back compatibility with the previous dry run code + // where the payload is just data (JSON or plain text) and the pipeline name + // is specified using query param. + let pipeline_name = check_pipeline_name_exists(query_params.pipeline_name)?; + + let version = to_pipeline_version(query_params.version).context(PipelineSnafu)?; + + let ignore_errors = query_params.ignore_errors.unwrap_or(false); + + let value = + extract_pipeline_value_by_content_type(content_type, payload, ignore_errors)?; + + check_data_valid(value.len())?; + + query_ctx.set_channel(Channel::Http); + let query_ctx = Arc::new(query_ctx); + + let pipeline = handler + .get_pipeline(&pipeline_name, version, query_ctx.clone()) + .await?; + + dryrun_pipeline_inner(value, &pipeline) + } + } +} + #[axum_macros::debug_handler] pub async fn loki_ingest( State(log_state): State, diff --git a/src/servers/src/query_handler.rs b/src/servers/src/query_handler.rs index 96a01593a8..ff92d3c5d1 100644 --- a/src/servers/src/query_handler.rs +++ b/src/servers/src/query_handler.rs @@ -170,4 +170,7 @@ pub trait PipelineHandler { table: &str, query_ctx: &QueryContext, ) -> std::result::Result>, catalog::error::Error>; + + //// Build a pipeline from a string. + fn build_pipeline(&self, pipeline: &str) -> Result>; } diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index 5a48fef39e..ab2ec4ea67 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -1319,7 +1319,7 @@ pub async fn test_test_pipeline_api(store_type: StorageType) { // handshake let client = TestClient::new(app); - let body = r#" + let pipeline_content = r#" processors: - date: field: time @@ -1346,7 +1346,7 @@ transform: let res = client .post("/v1/events/pipelines/test") .header("Content-Type", "application/x-yaml") - .body(body) + .body(pipeline_content) .send() .await; @@ -1367,8 +1367,87 @@ transform: let pipeline = pipelines.first().unwrap(); assert_eq!(pipeline.get("name").unwrap(), "test"); - // 2. write data - let data_body = r#" + let dryrun_schema = json!([ + { + "colume_type": "FIELD", + "data_type": "INT32", + "fulltext": false, + "name": "id1" + }, + { + "colume_type": "FIELD", + "data_type": "INT32", + "fulltext": false, + "name": "id2" + }, + { + "colume_type": "FIELD", + "data_type": "STRING", + "fulltext": false, + "name": "type" + }, + { + "colume_type": "FIELD", + "data_type": "STRING", + "fulltext": false, + "name": "log" + }, + { + "colume_type": "FIELD", + "data_type": "STRING", + "fulltext": false, + "name": "logger" + }, + { + "colume_type": "TIMESTAMP", + "data_type": "TIMESTAMP_NANOSECOND", + "fulltext": false, + "name": "time" + } + ]); + let dryrun_rows = json!([ + [ + { + "data_type": "INT32", + "key": "id1", + "semantic_type": "FIELD", + "value": 2436 + }, + { + "data_type": "INT32", + "key": "id2", + "semantic_type": "FIELD", + "value": 2528 + }, + { + "data_type": "STRING", + "key": "type", + "semantic_type": "FIELD", + "value": "I" + }, + { + "data_type": "STRING", + "key": "log", + "semantic_type": "FIELD", + "value": "ClusterAdapter:enter sendTextDataToCluster\\n" + }, + { + "data_type": "STRING", + "key": "logger", + "semantic_type": "FIELD", + "value": "INTERACT.MANAGER" + }, + { + "data_type": "TIMESTAMP_NANOSECOND", + "key": "time", + "semantic_type": "TIMESTAMP", + "value": "2024-05-25 20:16:37.217+0000" + } + ] + ]); + { + // test original api + let data_body = r#" [ { "id1": "2436", @@ -1380,100 +1459,100 @@ transform: } ] "#; - let res = client - .post("/v1/events/pipelines/dryrun?pipeline_name=test") - .header("Content-Type", "application/json") - .body(data_body) - .send() - .await; - assert_eq!(res.status(), StatusCode::OK); - let body: Value = res.json().await; - let schema = &body["schema"]; - let rows = &body["rows"]; - assert_eq!( - schema, - &json!([ + let res = client + .post("/v1/events/pipelines/dryrun?pipeline_name=test") + .header("Content-Type", "application/json") + .body(data_body) + .send() + .await; + assert_eq!(res.status(), StatusCode::OK); + let body: Value = res.json().await; + let schema = &body["schema"]; + let rows = &body["rows"]; + assert_eq!(schema, &dryrun_schema); + assert_eq!(rows, &dryrun_rows); + } + { + // test new api specify pipeline via pipeline_name + let body = r#" { - "colume_type": "FIELD", - "data_type": "INT32", - "fulltext": false, - "name": "id1" - }, - { - "colume_type": "FIELD", - "data_type": "INT32", - "fulltext": false, - "name": "id2" - }, - { - "colume_type": "FIELD", - "data_type": "STRING", - "fulltext": false, - "name": "type" - }, - { - "colume_type": "FIELD", - "data_type": "STRING", - "fulltext": false, - "name": "log" - }, - { - "colume_type": "FIELD", - "data_type": "STRING", - "fulltext": false, - "name": "logger" - }, - { - "colume_type": "TIMESTAMP", - "data_type": "TIMESTAMP_NANOSECOND", - "fulltext": false, - "name": "time" - } - ]) - ); - assert_eq!( - rows, - &json!([ - [ + "pipeline_name": "test", + "data": [ { - "data_type": "INT32", - "key": "id1", - "semantic_type": "FIELD", - "value": 2436 - }, - { - "data_type": "INT32", - "key": "id2", - "semantic_type": "FIELD", - "value": 2528 - }, - { - "data_type": "STRING", - "key": "type", - "semantic_type": "FIELD", - "value": "I" - }, - { - "data_type": "STRING", - "key": "log", - "semantic_type": "FIELD", - "value": "ClusterAdapter:enter sendTextDataToCluster\\n" - }, - { - "data_type": "STRING", - "key": "logger", - "semantic_type": "FIELD", - "value": "INTERACT.MANAGER" - }, - { - "data_type": "TIMESTAMP_NANOSECOND", - "key": "time", - "semantic_type": "TIMESTAMP", - "value": "2024-05-25 20:16:37.217+0000" + "id1": "2436", + "id2": "2528", + "logger": "INTERACT.MANAGER", + "type": "I", + "time": "2024-05-25 20:16:37.217", + "log": "ClusterAdapter:enter sendTextDataToCluster\\n" } ] - ]) - ); + } + "#; + let res = client + .post("/v1/events/pipelines/dryrun") + .header("Content-Type", "application/json") + .body(body) + .send() + .await; + assert_eq!(res.status(), StatusCode::OK); + let body: Value = res.json().await; + let schema = &body["schema"]; + let rows = &body["rows"]; + assert_eq!(schema, &dryrun_schema); + assert_eq!(rows, &dryrun_rows); + } + { + // test new api specify pipeline via pipeline raw data + let mut body = json!({ + "data": [ + { + "id1": "2436", + "id2": "2528", + "logger": "INTERACT.MANAGER", + "type": "I", + "time": "2024-05-25 20:16:37.217", + "log": "ClusterAdapter:enter sendTextDataToCluster\\n" + } + ] + }); + body["pipeline"] = json!(pipeline_content); + let res = client + .post("/v1/events/pipelines/dryrun") + .header("Content-Type", "application/json") + .body(body.to_string()) + .send() + .await; + assert_eq!(res.status(), StatusCode::OK); + let body: Value = res.json().await; + let schema = &body["schema"]; + let rows = &body["rows"]; + assert_eq!(schema, &dryrun_schema); + assert_eq!(rows, &dryrun_rows); + } + { + // failback to old version api + // not pipeline and pipeline_name in the body + let body = json!({ + "data": [ + { + "id1": "2436", + "id2": "2528", + "logger": "INTERACT.MANAGER", + "type": "I", + "time": "2024-05-25 20:16:37.217", + "log": "ClusterAdapter:enter sendTextDataToCluster\\n" + } + ] + }); + let res = client + .post("/v1/events/pipelines/dryrun") + .header("Content-Type", "application/json") + .body(body.to_string()) + .send() + .await; + assert_eq!(res.status(), StatusCode::BAD_REQUEST); + } guard.remove_all().await; } From 4b4c6dbb66497e48e9573509dd1d9ab76e57097e Mon Sep 17 00:00:00 2001 From: Yohan Wal Date: Fri, 13 Dec 2024 15:34:24 +0800 Subject: [PATCH 14/46] refactor: cache inverted index with fixed-size page (#5114) * feat: cache inverted index by page instead of file * fix: add unit test and fix bugs * chore: typo * chore: ci * fix: math * chore: apply review comments * chore: renames * test: add unit test for index key calculation * refactor: use ReadableSize * feat: add config for inverted index page size * chore: update config file * refactor: handle multiple range read and fix some related bugs * fix: add config * test: turn to a fs reader to match behaviors of object store --- Cargo.lock | 1 + config/config.md | 4 + config/datanode.example.toml | 9 + config/standalone.example.toml | 3 + src/common/base/src/range_read.rs | 4 +- src/index/src/inverted_index/format/reader.rs | 13 +- .../src/inverted_index/format/reader/blob.rs | 17 +- src/mito2/Cargo.toml | 1 + src/mito2/src/cache.rs | 14 +- src/mito2/src/cache/index.rs | 399 ++++++++++++++++-- src/mito2/src/config.rs | 3 + src/mito2/src/error.rs | 11 +- src/mito2/src/sst/index.rs | 2 +- .../src/sst/index/inverted_index/applier.rs | 13 +- .../src/sst/index/inverted_index/creator.rs | 2 +- src/mito2/src/test_util.rs | 5 +- src/mito2/src/worker.rs | 1 + tests-integration/tests/http.rs | 1 + 18 files changed, 434 insertions(+), 69 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 534b8c465a..b60615c8e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6643,6 +6643,7 @@ dependencies = [ "async-channel 1.9.0", "async-stream", "async-trait", + "bytemuck", "bytes", "common-base", "common-config", diff --git a/config/config.md b/config/config.md index 6a500a5b4a..d3353930b1 100644 --- a/config/config.md +++ b/config/config.md @@ -150,6 +150,7 @@ | `region_engine.mito.inverted_index.intermediate_path` | String | `""` | Deprecated, use `region_engine.mito.index.aux_path` instead. | | `region_engine.mito.inverted_index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. | | `region_engine.mito.inverted_index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. | +| `region_engine.mito.inverted_index.content_cache_page_size` | String | `8MiB` | Page size for inverted index content cache. | | `region_engine.mito.fulltext_index` | -- | -- | The options for full-text index in Mito engine. | | `region_engine.mito.fulltext_index.create_on_flush` | String | `auto` | Whether to create the index on flush.
- `auto`: automatically (default)
- `disable`: never | | `region_engine.mito.fulltext_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.
- `auto`: automatically (default)
- `disable`: never | @@ -475,6 +476,9 @@ | `region_engine.mito.inverted_index.apply_on_query` | String | `auto` | Whether to apply the index on query
- `auto`: automatically (default)
- `disable`: never | | `region_engine.mito.inverted_index.mem_threshold_on_create` | String | `auto` | Memory threshold for performing an external sort during index creation.
- `auto`: automatically determine the threshold based on the system memory size (default)
- `unlimited`: no memory limit
- `[size]` e.g. `64MB`: fixed memory threshold | | `region_engine.mito.inverted_index.intermediate_path` | String | `""` | Deprecated, use `region_engine.mito.index.aux_path` instead. | +| `region_engine.mito.inverted_index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. | +| `region_engine.mito.inverted_index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. | +| `region_engine.mito.inverted_index.content_cache_page_size` | String | `8MiB` | Page size for inverted index content cache. | | `region_engine.mito.fulltext_index` | -- | -- | The options for full-text index in Mito engine. | | `region_engine.mito.fulltext_index.create_on_flush` | String | `auto` | Whether to create the index on flush.
- `auto`: automatically (default)
- `disable`: never | | `region_engine.mito.fulltext_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.
- `auto`: automatically (default)
- `disable`: never | diff --git a/config/datanode.example.toml b/config/datanode.example.toml index 0ba80a9f7d..90a4d69b2e 100644 --- a/config/datanode.example.toml +++ b/config/datanode.example.toml @@ -543,6 +543,15 @@ mem_threshold_on_create = "auto" ## Deprecated, use `region_engine.mito.index.aux_path` instead. intermediate_path = "" +## Cache size for inverted index metadata. +metadata_cache_size = "64MiB" + +## Cache size for inverted index content. +content_cache_size = "128MiB" + +## Page size for inverted index content cache. +content_cache_page_size = "8MiB" + ## The options for full-text index in Mito engine. [region_engine.mito.fulltext_index] diff --git a/config/standalone.example.toml b/config/standalone.example.toml index 8eae532d61..b73246d37f 100644 --- a/config/standalone.example.toml +++ b/config/standalone.example.toml @@ -588,6 +588,9 @@ metadata_cache_size = "64MiB" ## Cache size for inverted index content. content_cache_size = "128MiB" +## Page size for inverted index content cache. +content_cache_page_size = "8MiB" + ## The options for full-text index in Mito engine. [region_engine.mito.fulltext_index] diff --git a/src/common/base/src/range_read.rs b/src/common/base/src/range_read.rs index 61f28cb629..53c26eeebd 100644 --- a/src/common/base/src/range_read.rs +++ b/src/common/base/src/range_read.rs @@ -205,9 +205,7 @@ impl RangeReader for Vec { }) } - async fn read(&mut self, mut range: Range) -> io::Result { - range.end = range.end.min(self.len() as u64); - + async fn read(&mut self, range: Range) -> io::Result { let bytes = Bytes::copy_from_slice(&self[range.start as usize..range.end as usize]); Ok(bytes) } diff --git a/src/index/src/inverted_index/format/reader.rs b/src/index/src/inverted_index/format/reader.rs index a6fb0cecbf..904681d5f4 100644 --- a/src/index/src/inverted_index/format/reader.rs +++ b/src/index/src/inverted_index/format/reader.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::ops::Range; use std::sync::Arc; use async_trait::async_trait; @@ -30,23 +31,23 @@ mod footer; #[mockall::automock] #[async_trait] pub trait InvertedIndexReader: Send { - /// Reads all data to dest. - async fn read_all(&mut self, dest: &mut Vec) -> Result; - /// Seeks to given offset and reads data with exact size as provided. - async fn seek_read(&mut self, offset: u64, size: u32) -> Result>; + async fn range_read(&mut self, offset: u64, size: u32) -> Result>; + + /// Reads the bytes in the given ranges. + async fn read_vec(&mut self, ranges: &[Range]) -> Result>>; /// Retrieves metadata of all inverted indices stored within the blob. async fn metadata(&mut self) -> Result>; /// Retrieves the finite state transducer (FST) map from the given offset and size. async fn fst(&mut self, offset: u64, size: u32) -> Result { - let fst_data = self.seek_read(offset, size).await?; + let fst_data = self.range_read(offset, size).await?; FstMap::new(fst_data).context(DecodeFstSnafu) } /// Retrieves the bitmap from the given offset and size. async fn bitmap(&mut self, offset: u64, size: u32) -> Result { - self.seek_read(offset, size).await.map(BitVec::from_vec) + self.range_read(offset, size).await.map(BitVec::from_vec) } } diff --git a/src/index/src/inverted_index/format/reader/blob.rs b/src/index/src/inverted_index/format/reader/blob.rs index de34cd36f8..371655d535 100644 --- a/src/index/src/inverted_index/format/reader/blob.rs +++ b/src/index/src/inverted_index/format/reader/blob.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::ops::Range; use std::sync::Arc; use async_trait::async_trait; @@ -50,16 +51,7 @@ impl InvertedIndexBlobReader { #[async_trait] impl InvertedIndexReader for InvertedIndexBlobReader { - async fn read_all(&mut self, dest: &mut Vec) -> Result { - let metadata = self.source.metadata().await.context(CommonIoSnafu)?; - self.source - .read_into(0..metadata.content_length, dest) - .await - .context(CommonIoSnafu)?; - Ok(metadata.content_length as usize) - } - - async fn seek_read(&mut self, offset: u64, size: u32) -> Result> { + async fn range_read(&mut self, offset: u64, size: u32) -> Result> { let buf = self .source .read(offset..offset + size as u64) @@ -68,6 +60,11 @@ impl InvertedIndexReader for InvertedIndexBlobReader { Ok(buf.into()) } + async fn read_vec(&mut self, ranges: &[Range]) -> Result>> { + let bufs = self.source.read_vec(ranges).await.context(CommonIoSnafu)?; + Ok(bufs.into_iter().map(|buf| buf.into()).collect()) + } + async fn metadata(&mut self) -> Result> { let metadata = self.source.metadata().await.context(CommonIoSnafu)?; let blob_size = metadata.content_length; diff --git a/src/mito2/Cargo.toml b/src/mito2/Cargo.toml index eedf6ae636..eecb79440a 100644 --- a/src/mito2/Cargo.toml +++ b/src/mito2/Cargo.toml @@ -17,6 +17,7 @@ aquamarine.workspace = true async-channel = "1.9" async-stream.workspace = true async-trait = "0.1" +bytemuck.workspace = true bytes.workspace = true common-base.workspace = true common-config.workspace = true diff --git a/src/mito2/src/cache.rs b/src/mito2/src/cache.rs index 7018b039d6..03cf913624 100644 --- a/src/mito2/src/cache.rs +++ b/src/mito2/src/cache.rs @@ -244,6 +244,7 @@ pub struct CacheManagerBuilder { page_cache_size: u64, index_metadata_size: u64, index_content_size: u64, + index_content_page_size: u64, puffin_metadata_size: u64, write_cache: Option, selector_result_cache_size: u64, @@ -286,6 +287,12 @@ impl CacheManagerBuilder { self } + /// Sets page size for index content. + pub fn index_content_page_size(mut self, bytes: u64) -> Self { + self.index_content_page_size = bytes; + self + } + /// Sets cache size for puffin metadata. pub fn puffin_metadata_size(mut self, bytes: u64) -> Self { self.puffin_metadata_size = bytes; @@ -352,8 +359,11 @@ impl CacheManagerBuilder { }) .build() }); - let inverted_index_cache = - InvertedIndexCache::new(self.index_metadata_size, self.index_content_size); + let inverted_index_cache = InvertedIndexCache::new( + self.index_metadata_size, + self.index_content_size, + self.index_content_page_size, + ); let puffin_metadata_cache = PuffinMetadataCache::new(self.puffin_metadata_size, &CACHE_BYTES); let selector_result_cache = (self.selector_result_cache_size != 0).then(|| { diff --git a/src/mito2/src/cache/index.rs b/src/mito2/src/cache/index.rs index 4e6e4deee2..e25fb22dcb 100644 --- a/src/mito2/src/cache/index.rs +++ b/src/mito2/src/cache/index.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::ops::Range; use std::sync::Arc; use api::v1::index::InvertedIndexMetas; @@ -34,14 +35,16 @@ const INDEX_CONTENT_TYPE: &str = "index_content"; /// Inverted index blob reader with cache. pub struct CachedInvertedIndexBlobReader { file_id: FileId, + file_size: u64, inner: R, cache: InvertedIndexCacheRef, } impl CachedInvertedIndexBlobReader { - pub fn new(file_id: FileId, inner: R, cache: InvertedIndexCacheRef) -> Self { + pub fn new(file_id: FileId, file_size: u64, inner: R, cache: InvertedIndexCacheRef) -> Self { Self { file_id, + file_size, inner, cache, } @@ -59,43 +62,77 @@ where offset: u64, size: u32, ) -> index::inverted_index::error::Result> { - let range = offset as usize..(offset + size as u64) as usize; - if let Some(cached) = self.cache.get_index(IndexKey { - file_id: self.file_id, - }) { - CACHE_HIT.with_label_values(&[INDEX_CONTENT_TYPE]).inc(); - Ok(cached[range].to_vec()) - } else { - let mut all_data = Vec::with_capacity(1024 * 1024); - self.inner.read_all(&mut all_data).await?; - let result = all_data[range].to_vec(); - self.cache.put_index( - IndexKey { - file_id: self.file_id, - }, - Arc::new(all_data), - ); - CACHE_MISS.with_label_values(&[INDEX_CONTENT_TYPE]).inc(); - Ok(result) + let keys = + IndexDataPageKey::generate_page_keys(self.file_id, offset, size, self.cache.page_size); + // Size is 0, return empty data. + if keys.is_empty() { + return Ok(Vec::new()); } + // TODO: Can be replaced by an uncontinuous structure like opendal::Buffer. + let mut data = Vec::with_capacity(keys.len()); + data.resize(keys.len(), Arc::new(Vec::new())); + let mut cache_miss_range = vec![]; + let mut cache_miss_idx = vec![]; + let last_index = keys.len() - 1; + // TODO: Avoid copy as much as possible. + for (i, index) in keys.clone().into_iter().enumerate() { + match self.cache.get_index(&index) { + Some(page) => { + CACHE_HIT.with_label_values(&[INDEX_CONTENT_TYPE]).inc(); + data[i] = page; + } + None => { + CACHE_MISS.with_label_values(&[INDEX_CONTENT_TYPE]).inc(); + let base_offset = index.page_id * self.cache.page_size; + let pruned_size = if i == last_index { + prune_size(&keys, self.file_size, self.cache.page_size) + } else { + self.cache.page_size + }; + cache_miss_range.push(base_offset..base_offset + pruned_size); + cache_miss_idx.push(i); + } + } + } + if !cache_miss_range.is_empty() { + let pages = self.inner.read_vec(&cache_miss_range).await?; + for (i, page) in cache_miss_idx.into_iter().zip(pages.into_iter()) { + let page = Arc::new(page); + let key = keys[i].clone(); + data[i] = page.clone(); + self.cache.put_index(key, page.clone()); + } + } + let mut result = Vec::with_capacity(size as usize); + data.iter().enumerate().for_each(|(i, page)| { + let range = if i == 0 { + IndexDataPageKey::calculate_first_page_range(offset, size, self.cache.page_size) + } else if i == last_index { + IndexDataPageKey::calculate_last_page_range(offset, size, self.cache.page_size) + } else { + 0..self.cache.page_size as usize + }; + result.extend_from_slice(&page[range]); + }); + Ok(result) } } #[async_trait] impl InvertedIndexReader for CachedInvertedIndexBlobReader { - async fn read_all( - &mut self, - dest: &mut Vec, - ) -> index::inverted_index::error::Result { - self.inner.read_all(dest).await - } - - async fn seek_read( + async fn range_read( &mut self, offset: u64, size: u32, ) -> index::inverted_index::error::Result> { - self.inner.seek_read(offset, size).await + self.inner.range_read(offset, size).await + } + + async fn read_vec( + &mut self, + ranges: &[Range], + ) -> index::inverted_index::error::Result>> { + self.inner.read_vec(ranges).await } async fn metadata(&mut self) -> index::inverted_index::error::Result> { @@ -130,22 +167,81 @@ impl InvertedIndexReader for CachedInvertedIndexBlobRead } #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct IndexKey { +pub struct IndexMetadataKey { file_id: FileId, } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct IndexDataPageKey { + file_id: FileId, + page_id: u64, +} + +impl IndexDataPageKey { + /// Converts an offset to a page ID based on the page size. + fn calculate_page_id(offset: u64, page_size: u64) -> u64 { + offset / page_size + } + + /// Calculates the total number of pages that a given size spans, starting from a specific offset. + fn calculate_page_count(offset: u64, size: u32, page_size: u64) -> u32 { + let start_page = Self::calculate_page_id(offset, page_size); + let end_page = Self::calculate_page_id(offset + (size as u64) - 1, page_size); + (end_page + 1 - start_page) as u32 + } + + /// Computes the byte range in the first page based on the offset and size. + /// For example, if offset is 1000 and size is 5000 with PAGE_SIZE of 4096, the first page range is 1000..4096. + fn calculate_first_page_range(offset: u64, size: u32, page_size: u64) -> Range { + let start = (offset % page_size) as usize; + let end = if size > page_size as u32 - start as u32 { + page_size as usize + } else { + start + size as usize + }; + start..end + } + + /// Computes the byte range in the last page based on the offset and size. + /// For example, if offset is 1000 and size is 5000 with PAGE_SIZE of 4096, the last page range is 0..1904. + fn calculate_last_page_range(offset: u64, size: u32, page_size: u64) -> Range { + let offset = offset as usize; + let size = size as usize; + let page_size = page_size as usize; + if (offset + size) % page_size == 0 { + 0..page_size + } else { + 0..((offset + size) % page_size) + } + } + + /// Generates a vector of IndexKey instances for the pages that a given offset and size span. + fn generate_page_keys(file_id: FileId, offset: u64, size: u32, page_size: u64) -> Vec { + let start_page = Self::calculate_page_id(offset, page_size); + let total_pages = Self::calculate_page_count(offset, size, page_size); + (0..total_pages) + .map(|i| Self { + file_id, + page_id: start_page + i as u64, + }) + .collect() + } +} + pub type InvertedIndexCacheRef = Arc; pub struct InvertedIndexCache { /// Cache for inverted index metadata - index_metadata: moka::sync::Cache>, + index_metadata: moka::sync::Cache>, /// Cache for inverted index content. - index: moka::sync::Cache>>, + index: moka::sync::Cache>>, + // Page size for index content. + page_size: u64, } impl InvertedIndexCache { /// Creates `InvertedIndexCache` with provided `index_metadata_cap` and `index_content_cap`. - pub fn new(index_metadata_cap: u64, index_content_cap: u64) -> Self { + pub fn new(index_metadata_cap: u64, index_content_cap: u64, page_size: u64) -> Self { common_telemetry::debug!("Building InvertedIndexCache with metadata size: {index_metadata_cap}, content size: {index_content_cap}"); let index_metadata = moka::sync::CacheBuilder::new(index_metadata_cap) .name("inverted_index_metadata") @@ -170,29 +266,29 @@ impl InvertedIndexCache { Self { index_metadata, index: index_cache, + page_size, } } } impl InvertedIndexCache { pub fn get_index_metadata(&self, file_id: FileId) -> Option> { - self.index_metadata.get(&IndexKey { file_id }) + self.index_metadata.get(&IndexMetadataKey { file_id }) } pub fn put_index_metadata(&self, file_id: FileId, metadata: Arc) { - let key = IndexKey { file_id }; + let key = IndexMetadataKey { file_id }; CACHE_BYTES .with_label_values(&[INDEX_METADATA_TYPE]) .add(index_metadata_weight(&key, &metadata).into()); self.index_metadata.insert(key, metadata) } - // todo(hl): align index file content to pages with size like 4096 bytes. - pub fn get_index(&self, key: IndexKey) -> Option>> { - self.index.get(&key) + pub fn get_index(&self, key: &IndexDataPageKey) -> Option>> { + self.index.get(key) } - pub fn put_index(&self, key: IndexKey, value: Arc>) { + pub fn put_index(&self, key: IndexDataPageKey, value: Arc>) { CACHE_BYTES .with_label_values(&[INDEX_CONTENT_TYPE]) .add(index_content_weight(&key, &value).into()); @@ -201,11 +297,234 @@ impl InvertedIndexCache { } /// Calculates weight for index metadata. -fn index_metadata_weight(k: &IndexKey, v: &Arc) -> u32 { +fn index_metadata_weight(k: &IndexMetadataKey, v: &Arc) -> u32 { (k.file_id.as_bytes().len() + v.encoded_len()) as u32 } /// Calculates weight for index content. -fn index_content_weight(k: &IndexKey, v: &Arc>) -> u32 { +fn index_content_weight(k: &IndexDataPageKey, v: &Arc>) -> u32 { (k.file_id.as_bytes().len() + v.len()) as u32 } + +/// Prunes the size of the last page based on the indexes. +/// We have following cases: +/// 1. The rest file size is less than the page size, read to the end of the file. +/// 2. Otherwise, read the page size. +fn prune_size(indexes: &[IndexDataPageKey], file_size: u64, page_size: u64) -> u64 { + let last_page_start = indexes.last().map(|i| i.page_id * page_size).unwrap_or(0); + page_size.min(file_size - last_page_start) +} + +#[cfg(test)] +mod test { + use std::num::NonZeroUsize; + + use common_base::BitVec; + use futures::stream; + use index::inverted_index::format::reader::{InvertedIndexBlobReader, InvertedIndexReader}; + use index::inverted_index::format::writer::{InvertedIndexBlobWriter, InvertedIndexWriter}; + use index::inverted_index::Bytes; + use prometheus::register_int_counter_vec; + use rand::{Rng, RngCore}; + + use super::*; + use crate::sst::index::store::InstrumentedStore; + use crate::test_util::TestEnv; + + // Fuzz test for index data page key + #[test] + fn fuzz_index_calculation() { + // randomly generate a large u8 array + let mut rng = rand::thread_rng(); + let mut data = vec![0u8; 1024 * 1024]; + rng.fill_bytes(&mut data); + let file_id = FileId::random(); + + for _ in 0..100 { + let offset = rng.gen_range(0..data.len() as u64); + let size = rng.gen_range(0..data.len() as u32 - offset as u32); + let page_size: usize = rng.gen_range(1..1024); + + let indexes = + IndexDataPageKey::generate_page_keys(file_id, offset, size, page_size as u64); + let page_num = indexes.len(); + let mut read = Vec::with_capacity(size as usize); + let last_index = indexes.len() - 1; + for (i, key) in indexes.into_iter().enumerate() { + let start = key.page_id as usize * page_size; + let page = if start + page_size < data.len() { + &data[start..start + page_size] + } else { + &data[start..] + }; + let range = if i == 0 { + // first page range + IndexDataPageKey::calculate_first_page_range(offset, size, page_size as u64) + } else if i == last_index { + // last page range. when the first page is the last page, the range is not used. + IndexDataPageKey::calculate_last_page_range(offset, size, page_size as u64) + } else { + 0..page_size + }; + read.extend_from_slice(&page[range]); + } + let expected_range = offset as usize..(offset + size as u64 as u64) as usize; + if read != data.get(expected_range).unwrap() { + panic!( + "fuzz_read_index failed, offset: {}, size: {}, page_size: {}\nread len: {}, expected len: {}\nfirst page range: {:?}, last page range: {:?}, page num: {}", + offset, size, page_size, read.len(), size as usize, + IndexDataPageKey::calculate_first_page_range(offset, size, page_size as u64), + IndexDataPageKey::calculate_last_page_range(offset, size, page_size as u64), page_num + ); + } + } + } + + fn unpack(fst_value: u64) -> [u32; 2] { + bytemuck::cast::(fst_value) + } + + async fn create_inverted_index_blob() -> Vec { + let mut blob = Vec::new(); + let mut writer = InvertedIndexBlobWriter::new(&mut blob); + writer + .add_index( + "tag0".to_string(), + BitVec::from_slice(&[0b0000_0001, 0b0000_0000]), + Box::new(stream::iter(vec![ + Ok((Bytes::from("a"), BitVec::from_slice(&[0b0000_0001]))), + Ok((Bytes::from("b"), BitVec::from_slice(&[0b0010_0000]))), + Ok((Bytes::from("c"), BitVec::from_slice(&[0b0000_0001]))), + ])), + ) + .await + .unwrap(); + writer + .add_index( + "tag1".to_string(), + BitVec::from_slice(&[0b0000_0001, 0b0000_0000]), + Box::new(stream::iter(vec![ + Ok((Bytes::from("x"), BitVec::from_slice(&[0b0000_0001]))), + Ok((Bytes::from("y"), BitVec::from_slice(&[0b0010_0000]))), + Ok((Bytes::from("z"), BitVec::from_slice(&[0b0000_0001]))), + ])), + ) + .await + .unwrap(); + writer + .finish(8, NonZeroUsize::new(1).unwrap()) + .await + .unwrap(); + + blob + } + + #[tokio::test] + async fn test_inverted_index_cache() { + let blob = create_inverted_index_blob().await; + + // Init a test range reader in local fs. + let mut env = TestEnv::new(); + let file_size = blob.len() as u64; + let store = env.init_object_store_manager(); + let temp_path = "data"; + store.write(temp_path, blob).await.unwrap(); + let store = InstrumentedStore::new(store); + let metric = + register_int_counter_vec!("test_bytes", "a counter for test", &["test"]).unwrap(); + let counter = metric.with_label_values(&["test"]); + let range_reader = store + .range_reader("data", &counter, &counter) + .await + .unwrap(); + + let reader = InvertedIndexBlobReader::new(range_reader); + let mut cached_reader = CachedInvertedIndexBlobReader::new( + FileId::random(), + file_size, + reader, + Arc::new(InvertedIndexCache::new(8192, 8192, 50)), + ); + let metadata = cached_reader.metadata().await.unwrap(); + assert_eq!(metadata.total_row_count, 8); + assert_eq!(metadata.segment_row_count, 1); + assert_eq!(metadata.metas.len(), 2); + // tag0 + let tag0 = metadata.metas.get("tag0").unwrap(); + let stats0 = tag0.stats.as_ref().unwrap(); + assert_eq!(stats0.distinct_count, 3); + assert_eq!(stats0.null_count, 1); + assert_eq!(stats0.min_value, Bytes::from("a")); + assert_eq!(stats0.max_value, Bytes::from("c")); + let fst0 = cached_reader + .fst( + tag0.base_offset + tag0.relative_fst_offset as u64, + tag0.fst_size, + ) + .await + .unwrap(); + assert_eq!(fst0.len(), 3); + let [offset, size] = unpack(fst0.get(b"a").unwrap()); + let bitmap = cached_reader + .bitmap(tag0.base_offset + offset as u64, size) + .await + .unwrap(); + assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001])); + let [offset, size] = unpack(fst0.get(b"b").unwrap()); + let bitmap = cached_reader + .bitmap(tag0.base_offset + offset as u64, size) + .await + .unwrap(); + assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000])); + let [offset, size] = unpack(fst0.get(b"c").unwrap()); + let bitmap = cached_reader + .bitmap(tag0.base_offset + offset as u64, size) + .await + .unwrap(); + assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001])); + + // tag1 + let tag1 = metadata.metas.get("tag1").unwrap(); + let stats1 = tag1.stats.as_ref().unwrap(); + assert_eq!(stats1.distinct_count, 3); + assert_eq!(stats1.null_count, 1); + assert_eq!(stats1.min_value, Bytes::from("x")); + assert_eq!(stats1.max_value, Bytes::from("z")); + let fst1 = cached_reader + .fst( + tag1.base_offset + tag1.relative_fst_offset as u64, + tag1.fst_size, + ) + .await + .unwrap(); + assert_eq!(fst1.len(), 3); + let [offset, size] = unpack(fst1.get(b"x").unwrap()); + let bitmap = cached_reader + .bitmap(tag1.base_offset + offset as u64, size) + .await + .unwrap(); + assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001])); + let [offset, size] = unpack(fst1.get(b"y").unwrap()); + let bitmap = cached_reader + .bitmap(tag1.base_offset + offset as u64, size) + .await + .unwrap(); + assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000])); + let [offset, size] = unpack(fst1.get(b"z").unwrap()); + let bitmap = cached_reader + .bitmap(tag1.base_offset + offset as u64, size) + .await + .unwrap(); + assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001])); + + // fuzz test + let mut rng = rand::thread_rng(); + for _ in 0..100 { + let offset = rng.gen_range(0..file_size); + let size = rng.gen_range(0..file_size as u32 - offset as u32); + let expected = cached_reader.range_read(offset, size).await.unwrap(); + let read = cached_reader.get_or_load(offset, size).await.unwrap(); + assert_eq!(read, expected); + } + } +} diff --git a/src/mito2/src/config.rs b/src/mito2/src/config.rs index dda3f42710..963089c60a 100644 --- a/src/mito2/src/config.rs +++ b/src/mito2/src/config.rs @@ -416,6 +416,8 @@ pub struct InvertedIndexConfig { pub metadata_cache_size: ReadableSize, /// Cache size for inverted index content. Setting it to 0 to disable the cache. pub content_cache_size: ReadableSize, + /// Page size for inverted index content. + pub content_cache_page_size: ReadableSize, } impl InvertedIndexConfig { @@ -441,6 +443,7 @@ impl Default for InvertedIndexConfig { intermediate_path: String::new(), metadata_cache_size: ReadableSize::mb(64), content_cache_size: ReadableSize::mb(128), + content_cache_page_size: ReadableSize::mb(8), }; if let Some(sys_memory) = common_config::utils::get_sys_total_memory() { diff --git a/src/mito2/src/error.rs b/src/mito2/src/error.rs index d5e47d2136..f6d1dbafee 100644 --- a/src/mito2/src/error.rs +++ b/src/mito2/src/error.rs @@ -893,6 +893,14 @@ pub enum Error { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Failed to read file metadata"))] + Metadata { + #[snafu(source)] + error: std::io::Error, + #[snafu(implicit)] + location: Location, + }, } pub type Result = std::result::Result; @@ -965,7 +973,8 @@ impl ErrorExt for Error { | CreateDir { .. } | ReadDataPart { .. } | CorruptedEntry { .. } - | BuildEntry { .. } => StatusCode::Internal, + | BuildEntry { .. } + | Metadata { .. } => StatusCode::Internal, OpenRegion { source, .. } => source.status_code(), diff --git a/src/mito2/src/sst/index.rs b/src/mito2/src/sst/index.rs index a4f4ab9e44..1972f3d7ab 100644 --- a/src/mito2/src/sst/index.rs +++ b/src/mito2/src/sst/index.rs @@ -18,7 +18,7 @@ pub(crate) mod intermediate; pub(crate) mod inverted_index; pub(crate) mod puffin_manager; mod statistics; -mod store; +pub(crate) mod store; use std::num::NonZeroUsize; diff --git a/src/mito2/src/sst/index/inverted_index/applier.rs b/src/mito2/src/sst/index/inverted_index/applier.rs index d060d4bec1..0542fd7a59 100644 --- a/src/mito2/src/sst/index/inverted_index/applier.rs +++ b/src/mito2/src/sst/index/inverted_index/applier.rs @@ -16,6 +16,7 @@ pub mod builder; use std::sync::Arc; +use common_base::range_read::RangeReader; use common_telemetry::warn; use index::inverted_index::format::reader::InvertedIndexBlobReader; use index::inverted_index::search::index_apply::{ @@ -29,7 +30,9 @@ use store_api::storage::RegionId; use crate::cache::file_cache::{FileCacheRef, FileType, IndexKey}; use crate::cache::index::{CachedInvertedIndexBlobReader, InvertedIndexCacheRef}; -use crate::error::{ApplyInvertedIndexSnafu, PuffinBuildReaderSnafu, PuffinReadBlobSnafu, Result}; +use crate::error::{ + ApplyInvertedIndexSnafu, MetadataSnafu, PuffinBuildReaderSnafu, PuffinReadBlobSnafu, Result, +}; use crate::metrics::{INDEX_APPLY_ELAPSED, INDEX_APPLY_MEMORY_USAGE}; use crate::sst::file::FileId; use crate::sst::index::inverted_index::INDEX_BLOB_TYPE; @@ -123,7 +126,7 @@ impl InvertedIndexApplier { index_not_found_strategy: IndexNotFoundStrategy::ReturnEmpty, }; - let blob = match self.cached_blob_reader(file_id).await { + let mut blob = match self.cached_blob_reader(file_id).await { Ok(Some(puffin_reader)) => puffin_reader, other => { if let Err(err) = other { @@ -134,8 +137,14 @@ impl InvertedIndexApplier { }; if let Some(index_cache) = &self.inverted_index_cache { + let file_size = if let Some(file_size) = file_size_hint { + file_size + } else { + blob.metadata().await.context(MetadataSnafu)?.content_length + }; let mut index_reader = CachedInvertedIndexBlobReader::new( file_id, + file_size, InvertedIndexBlobReader::new(blob), index_cache.clone(), ); diff --git a/src/mito2/src/sst/index/inverted_index/creator.rs b/src/mito2/src/sst/index/inverted_index/creator.rs index 43cf54fa28..15cba55c44 100644 --- a/src/mito2/src/sst/index/inverted_index/creator.rs +++ b/src/mito2/src/sst/index/inverted_index/creator.rs @@ -448,7 +448,7 @@ mod tests { move |expr| { let _d = &d; - let cache = Arc::new(InvertedIndexCache::new(10, 10)); + let cache = Arc::new(InvertedIndexCache::new(10, 10, 100)); let puffin_metadata_cache = Arc::new(PuffinMetadataCache::new(10, &CACHE_BYTES)); let applier = InvertedIndexApplierBuilder::new( region_dir.clone(), diff --git a/src/mito2/src/test_util.rs b/src/mito2/src/test_util.rs index dec175e76f..314e886ba9 100644 --- a/src/mito2/src/test_util.rs +++ b/src/mito2/src/test_util.rs @@ -35,8 +35,7 @@ use api::v1::{OpType, Row, Rows, SemanticType}; use common_base::readable_size::ReadableSize; use common_base::Plugins; use common_datasource::compression::CompressionType; -use common_meta::cache::{new_schema_cache, new_table_info_cache, new_table_schema_cache}; -use common_meta::key::schema_name::{SchemaName, SchemaNameValue}; +use common_meta::cache::{new_schema_cache, new_table_schema_cache}; use common_meta::key::{SchemaMetadataManager, SchemaMetadataManagerRef}; use common_meta::kv_backend::memory::MemoryKvBackend; use common_meta::kv_backend::KvBackendRef; @@ -49,7 +48,7 @@ use datatypes::schema::ColumnSchema; use log_store::kafka::log_store::KafkaLogStore; use log_store::raft_engine::log_store::RaftEngineLogStore; use log_store::test_util::log_store_util; -use moka::future::{Cache, CacheBuilder}; +use moka::future::CacheBuilder; use object_store::manager::{ObjectStoreManager, ObjectStoreManagerRef}; use object_store::services::Fs; use object_store::ObjectStore; diff --git a/src/mito2/src/worker.rs b/src/mito2/src/worker.rs index f8ab9c3f4e..233ab9f056 100644 --- a/src/mito2/src/worker.rs +++ b/src/mito2/src/worker.rs @@ -170,6 +170,7 @@ impl WorkerGroup { .selector_result_cache_size(config.selector_result_cache_size.as_bytes()) .index_metadata_size(config.inverted_index.metadata_cache_size.as_bytes()) .index_content_size(config.inverted_index.content_cache_size.as_bytes()) + .index_content_page_size(config.inverted_index.content_cache_page_size.as_bytes()) .puffin_metadata_size(config.index.metadata_cache_size.as_bytes()) .write_cache(write_cache) .build(), diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index ab2ec4ea67..4843b81e91 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -946,6 +946,7 @@ create_on_flush = "auto" create_on_compaction = "auto" apply_on_query = "auto" mem_threshold_on_create = "auto" +content_cache_page_size = "8MiB" [region_engine.mito.fulltext_index] create_on_flush = "auto" From bef6896280a4dd5833617df04378667bce13a634 Mon Sep 17 00:00:00 2001 From: Yingwen Date: Fri, 13 Dec 2024 16:17:49 +0800 Subject: [PATCH 15/46] docs: Add index panels to standalone grafana dashboard (#5140) * docs: Add index panels to standalnoe grafana dashboard * docs: fix flush/compaction op --- grafana/greptimedb.json | 3233 ++++++++++++++++++++++++--------------- 1 file changed, 1996 insertions(+), 1237 deletions(-) diff --git a/grafana/greptimedb.json b/grafana/greptimedb.json index 7c6dfb0751..86925d5342 100644 --- a/grafana/greptimedb.json +++ b/grafana/greptimedb.json @@ -145,7 +145,9 @@ "countRows": false, "enablePagination": false, "fields": [], - "reducer": ["sum"], + "reducer": [ + "sum" + ], "show": false }, "showHeader": true, @@ -223,7 +225,9 @@ "justifyMode": "center", "orientation": "auto", "reduceOptions": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "fields": "", "values": false }, @@ -298,7 +302,9 @@ "justifyMode": "auto", "orientation": "auto", "reduceOptions": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "fields": "", "values": false }, @@ -595,7 +601,7 @@ "type": "timeseries" }, { - "collapsed": false, + "collapsed": true, "gridPos": { "h": 1, "w": 24, @@ -603,776 +609,772 @@ "y": 10 }, "id": 24, - "panels": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 34, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_promql_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "promql-{{db}}-p95", + "range": true, + "refId": "PromQL P95", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_promql_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "promql-{{db}}-p99", + "range": true, + "refId": "PromQL P99", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_sql_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "sql-{{db}}-p95", + "range": true, + "refId": "SQL P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_sql_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "sql-{{db}}-p99", + "range": true, + "refId": "SQL P99" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_prometheus_read_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "promstore-read-{{db}}-p95", + "range": true, + "refId": "PromStore Read P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_prometheus_read_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "promstore-read-{{db}}-p99", + "range": true, + "refId": "PromStore Read P99" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db, method) (rate(greptime_servers_http_prometheus_promql_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "prom-promql-{{db}}-{{method}}-p95", + "range": true, + "refId": "Prometheus PromQL P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db, method) (rate(greptime_servers_http_prometheus_promql_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "prom-promql-{{db}}-{{method}}-p99", + "range": true, + "refId": "Prometheus PromQL P99" + } + ], + "title": "HTTP query elapsed", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 35, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_influxdb_write_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "influx-{{db}}-p95", + "range": true, + "refId": "InfluxDB Line Protocol P95", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_influxdb_write_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "influx-{{db}}-p99", + "range": true, + "refId": "InfluxDB Line Protocol P99", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_prometheus_write_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "promstore-{{db}}-p95", + "range": true, + "refId": "PromStore Write P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_prometheus_write_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "promstore-{{db}}-p99", + "range": true, + "refId": "PromStore Write P99" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_otlp_metrics_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "otlp-metric-{{db}}-p95", + "range": true, + "refId": "OTLP Metric P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_otlp_metrics_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "otlp-metric-{{db}}-p99", + "range": true, + "refId": "OTLP Metric P99" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_otlp_traces_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "otlp-trace-{{db}}-p95", + "range": true, + "refId": "OTLP Trace P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_otlp_traces_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "otlp-trace-{{db}}-p99", + "range": true, + "refId": "OTLP Trace P99" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_logs_transform_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "log-transform-{{db}}-p95", + "range": true, + "refId": "Log Transform P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_logs_transform_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "log-transform-{{db}}-p99", + "range": true, + "refId": "Log Transform P99" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_logs_ingestion_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "log-ingest-{{db}}-p99", + "range": true, + "refId": "Log Ingest P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_logs_ingestion_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "log-ingest-{{db}}-p99", + "range": true, + "refId": "Log Ingest P99" + } + ], + "title": "HTTP write elapsed", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 38, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(path) (rate(greptime_servers_http_requests_total[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "HTTP request rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 36, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(db) (rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{db}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Logs ingest rate (number of lines)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 13, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_grpc_requests_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{db}}-p95", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_grpc_requests_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{db}}-p99", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "gRPC insert elapsed", + "type": "timeseries" + } + ], "title": "Protocol", "type": "row" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 11 - }, - "id": 34, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_promql_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "promql-{{db}}-p95", - "range": true, - "refId": "PromQL P95", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_promql_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "promql-{{db}}-p99", - "range": true, - "refId": "PromQL P99", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_sql_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "sql-{{db}}-p95", - "range": true, - "refId": "SQL P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_sql_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "sql-{{db}}-p99", - "range": true, - "refId": "SQL P99" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_prometheus_read_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "promstore-read-{{db}}-p95", - "range": true, - "refId": "PromStore Read P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_prometheus_read_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "promstore-read-{{db}}-p99", - "range": true, - "refId": "PromStore Read P99" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db, method) (rate(greptime_servers_http_prometheus_promql_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "prom-promql-{{db}}-{{method}}-p95", - "range": true, - "refId": "Prometheus PromQL P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db, method) (rate(greptime_servers_http_prometheus_promql_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "prom-promql-{{db}}-{{method}}-p99", - "range": true, - "refId": "Prometheus PromQL P99" - } - ], - "title": "HTTP query elapsed", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 11 - }, - "id": 35, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_influxdb_write_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "influx-{{db}}-p95", - "range": true, - "refId": "InfluxDB Line Protocol P95", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_influxdb_write_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "influx-{{db}}-p99", - "range": true, - "refId": "InfluxDB Line Protocol P99", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_prometheus_write_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "promstore-{{db}}-p95", - "range": true, - "refId": "PromStore Write P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_prometheus_write_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "promstore-{{db}}-p99", - "range": true, - "refId": "PromStore Write P99" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_otlp_metrics_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "otlp-metric-{{db}}-p95", - "range": true, - "refId": "OTLP Metric P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_otlp_metrics_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "otlp-metric-{{db}}-p99", - "range": true, - "refId": "OTLP Metric P99" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_otlp_traces_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "otlp-trace-{{db}}-p95", - "range": true, - "refId": "OTLP Trace P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_otlp_traces_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "otlp-trace-{{db}}-p99", - "range": true, - "refId": "OTLP Trace P99" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_logs_transform_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "log-transform-{{db}}-p95", - "range": true, - "refId": "Log Transform P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_logs_transform_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "log-transform-{{db}}-p99", - "range": true, - "refId": "Log Transform P99" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_logs_ingestion_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "log-ingest-{{db}}-p99", - "range": true, - "refId": "Log Ingest P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_logs_ingestion_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "log-ingest-{{db}}-p99", - "range": true, - "refId": "Log Ingest P99" - } - ], - "title": "HTTP write elapsed", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 18 - }, - "id": 38, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum by(path) (rate(greptime_servers_http_requests_total[$__rate_interval]))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "HTTP request rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 18 - }, - "id": 36, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum by(db) (rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{db}}", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Logs ingest rate (number of lines)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 25 - }, - "id": 13, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_grpc_requests_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{db}}-p95", - "range": true, - "refId": "A", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_grpc_requests_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{db}}-p99", - "range": true, - "refId": "B", - "useBackend": false - } - ], - "title": "gRPC insert elapsed", - "type": "timeseries" - }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 32 + "y": 11 }, "id": 25, "panels": [], @@ -1426,7 +1428,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1441,7 +1444,7 @@ "h": 7, "w": 12, "x": 0, - "y": 33 + "y": 12 }, "id": 1, "interval": "1s", @@ -1542,7 +1545,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1557,7 +1561,7 @@ "h": 7, "w": 12, "x": 12, - "y": 33 + "y": 12 }, "id": 7, "interval": "1s", @@ -1641,7 +1645,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1656,7 +1661,7 @@ "h": 7, "w": 12, "x": 0, - "y": 40 + "y": 19 }, "id": 3, "interval": "1s", @@ -1740,7 +1745,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1755,7 +1761,7 @@ "h": 7, "w": 12, "x": 12, - "y": 40 + "y": 19 }, "id": 11, "interval": "1s", @@ -1856,7 +1862,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1871,7 +1878,7 @@ "h": 7, "w": 12, "x": 0, - "y": 47 + "y": 26 }, "id": 15, "interval": "1s", @@ -1968,7 +1975,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1983,10 +1991,9 @@ "h": 7, "w": 12, "x": 12, - "y": 47 + "y": 26 }, "id": 39, - "interval": "1s", "options": { "legend": { "calcs": [], @@ -2006,8 +2013,8 @@ "uid": "${DS_PROMETHEUS-1}" }, "disableTextWrap": false, - "editorMode": "code", - "expr": "idelta(greptime_mito_compaction_stage_elapsed_count{stage=\"merge\"}[5m])", + "editorMode": "builder", + "expr": "idelta(greptime_mito_compaction_stage_elapsed_count{stage=\"merge\"}[$__interval])", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, @@ -2021,13 +2028,17 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, type) (rate(greptime_mito_flush_elapsed_bucket[$__rate_interval])))", + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.95, sum by(le, type) (idelta(greptime_mito_flush_elapsed_bucket[$__interval])))", + "fullMetaSearch": false, "hide": false, + "includeNullMetadata": true, "instant": false, "legendFormat": "flush-{{type}}", "range": true, - "refId": "B" + "refId": "B", + "useBackend": false } ], "title": "Flush / compaction count", @@ -2080,7 +2091,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -2096,7 +2108,7 @@ "h": 7, "w": 12, "x": 0, - "y": 54 + "y": 33 }, "id": 9, "interval": "1s", @@ -2193,7 +2205,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -2208,7 +2221,7 @@ "h": 7, "w": 12, "x": 12, - "y": 54 + "y": 33 }, "id": 40, "interval": "1s", @@ -2231,8 +2244,8 @@ "uid": "${DS_PROMETHEUS-1}" }, "disableTextWrap": false, - "editorMode": "code", - "expr": "greptime_mito_write_stall_total", + "editorMode": "builder", + "expr": "rate(greptime_mito_write_stall_total[$__rate_interval])", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, @@ -2292,7 +2305,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -2308,7 +2322,7 @@ "h": 7, "w": 12, "x": 0, - "y": 61 + "y": 40 }, "id": 41, "interval": "1s", @@ -2392,7 +2406,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -2408,7 +2423,7 @@ "h": 7, "w": 12, "x": 12, - "y": 61 + "y": 40 }, "id": 42, "interval": "1s", @@ -2446,241 +2461,998 @@ "type": "timeseries" }, { - "collapsed": false, + "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 68 + "y": 47 }, "id": 26, - "panels": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 4 + }, + "id": 22, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, operation) (rate(greptime_metric_engine_mito_op_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "p95-{{operation}}", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, operation) (rate(greptime_metric_engine_mito_op_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p99-{{operation}}", + "range": true, + "refId": "B" + } + ], + "title": "Metric engine to mito R/W duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 4 + }, + "id": 33, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, operation) (rate(greptime_metric_engine_mito_ddl_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "p95-{{operation}}", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, operation) (rate(greptime_metric_engine_mito_ddl_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p99-{{label_name}}", + "range": true, + "refId": "B" + } + ], + "title": "Metric engine to mito DDL duration", + "type": "timeseries" + } + ], "title": "Metric Engine", "type": "row" }, { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, + "collapsed": true, "gridPos": { - "h": 7, - "w": 12, + "h": 1, + "w": 24, "x": 0, - "y": 69 + "y": 48 }, - "id": 22, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ + "id": 21, + "panels": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, operation) (rate(greptime_metric_engine_mito_op_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "p95-{{operation}}", - "range": true, - "refId": "A", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, operation) (rate(greptime_metric_engine_mito_op_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "p99-{{operation}}", - "range": true, - "refId": "B" - } - ], - "title": "Metric engine to mito R/W duration", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 49 }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "id": 18, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" }, - { - "color": "red", - "value": 80 + "disableTextWrap": false, + "editorMode": "code", + "expr": "rate(opendal_bytes_total_sum[$__rate_interval])", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{scheme}}-{{operation}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "OpenDAL traffic", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.95, sum by(le, operation, schema) (rate(opendal_requests_duration_seconds_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "OpenDAL operation duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 56 + }, + "id": 43, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "greptime_object_store_lru_cache_bytes", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}-{{type}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Object store read cache size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 56 + }, + "id": 44, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance) / (sum(increase(greptime_object_store_lru_cache_miss[$__rate_interval])) by (instance) + sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Object store read cache hit", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 69 - }, - "id": 33, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" + }, + "overrides": [] }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, operation) (rate(greptime_metric_engine_mito_ddl_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "p95-{{operation}}", - "range": true, - "refId": "A", - "useBackend": false + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 63 + }, + "id": 10, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{logstore}}-{{optype}}-p95", + "range": true, + "refId": "Log Store P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{logstore}}-{{optype}}-p99", + "range": true, + "refId": "Log Store P99" + } + ], + "title": "Log Store op duration seconds", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, operation) (rate(greptime_metric_engine_mito_ddl_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "p99-{{label_name}}", - "range": true, - "refId": "B" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 63 + }, + "id": 12, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "req-size-p95", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "req-size-p99", + "range": true, + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "rate(raft_engine_write_size_sum[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "throughput", + "range": true, + "refId": "B" + } + ], + "title": "WAL write size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 70 + }, + "id": 37, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, type, node) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{node}}-{{type}}-p99", + "range": true, + "refId": "Log Store P95" + } + ], + "title": "WAL sync duration seconds", + "type": "timeseries" } ], - "title": "Metric engine to mito DDL duration", - "type": "timeseries" + "title": "Storage Components", + "type": "row" }, { "collapsed": false, @@ -2688,11 +3460,11 @@ "h": 1, "w": 24, "x": 0, - "y": 76 + "y": 49 }, - "id": 21, + "id": 46, "panels": [], - "title": "Storage Components", + "title": "Index", "type": "row" }, { @@ -2742,7 +3514,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -2758,9 +3531,9 @@ "h": 7, "w": 12, "x": 0, - "y": 77 + "y": 50 }, - "id": 18, + "id": 45, "interval": "1s", "options": { "legend": { @@ -2782,116 +3555,30 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "rate(opendal_bytes_total_sum[$__rate_interval])", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{scheme}}-{{operation}}", - "range": true, - "refId": "B", - "useBackend": false - } - ], - "title": "OpenDAL traffic", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 77 - }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le, operation, schema) (rate(opendal_requests_duration_seconds_bucket[$__rate_interval])))", + "expr": "greptime_index_create_memory_usage", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, - "legendFormat": "__auto", + "legendFormat": "{{instance}}-{{type}}", "range": true, "refId": "A", "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "greptime_index_apply_memory_usage", + "hide": false, + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "B" } ], - "title": "OpenDAL operation duration", + "title": "Index memory usage", "type": "timeseries" }, { @@ -2941,116 +3628,8 @@ "mode": "absolute", "steps": [ { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 84 - }, - "id": 10, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "{{logstore}}-{{optype}}-p95", - "range": true, - "refId": "Log Store P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "{{logstore}}-{{optype}}-p99", - "range": true, - "refId": "Log Store P99" - } - ], - "title": "Log Store op duration seconds", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3066,7 +3645,7 @@ "h": 7, "w": 12, "x": 12, - "y": 84 + "y": 50 }, "id": 19, "interval": "1s", @@ -3201,7 +3780,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3216,9 +3796,9 @@ "h": 7, "w": 12, "x": 0, - "y": 91 + "y": 57 }, - "id": 37, + "id": 47, "interval": "1s", "options": { "legend": { @@ -3238,16 +3818,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, + "disableTextWrap": false, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, type, node) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))", - "hide": false, + "expr": "rate(greptime_index_create_rows_total[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": false, "instant": false, - "legendFormat": "{{node}}-{{type}}-p99", + "legendFormat": "{{type}}", "range": true, - "refId": "Log Store P95" + "refId": "A", + "useBackend": false } ], - "title": "WAL sync duration seconds", + "title": "Index create rows total", "type": "timeseries" }, { @@ -3297,7 +3880,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3313,9 +3897,9 @@ "h": 7, "w": 12, "x": 12, - "y": 91 + "y": 57 }, - "id": 12, + "id": 48, "interval": "1s", "options": { "legend": { @@ -3336,48 +3920,223 @@ "uid": "${DS_PROMETHEUS-1}" }, "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", + "editorMode": "builder", + "expr": "sum by(instance, type) (rate(greptime_index_create_bytes_total[$__rate_interval]))", "fullMetaSearch": false, + "hide": false, "includeNullMetadata": false, "instant": false, - "legendFormat": "req-size-p95", + "legendFormat": "{{instance}}-{{type}}", "range": true, - "refId": "A", + "refId": "B", "useBackend": false + } + ], + "title": "Index create bytes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 64 + }, + "id": 49, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", + "editorMode": "builder", + "expr": "sum by(instance, type, file_type) (rate(greptime_index_io_bytes_total[$__rate_interval]))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, - "legendFormat": "req-size-p99", + "legendFormat": "{{instance}}-{{type}}-{{file_type}}", "range": true, - "refId": "C", + "refId": "B", "useBackend": false + } + ], + "title": "Index IO bytes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 64 + }, + "id": 50, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "editorMode": "code", - "expr": "rate(raft_engine_write_size_sum[$__rate_interval])", + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(instance, type, file_type) (rate(greptime_index_io_op_total[$__rate_interval]))", + "fullMetaSearch": false, "hide": false, + "includeNullMetadata": false, "instant": false, - "legendFormat": "throughput", + "legendFormat": "{{instance}}-{{type}}-{{file_type}}", "range": true, - "refId": "B" + "refId": "B", + "useBackend": false } ], - "title": "WAL write size", + "title": "Index IO op", "type": "timeseries" } ], From 53d55c0b6bb9ae9b4c479904ae966bab0ea4f950 Mon Sep 17 00:00:00 2001 From: localhost Date: Fri, 13 Dec 2024 18:10:59 +0800 Subject: [PATCH 16/46] fix: loki write row len error (#5161) --- src/servers/src/http/event.rs | 4 ++-- tests-integration/tests/http.rs | 20 +++++++++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/servers/src/http/event.rs b/src/servers/src/http/event.rs index b6b520627d..c0926af833 100644 --- a/src/servers/src/http/event.rs +++ b/src/servers/src/http/event.rs @@ -514,8 +514,8 @@ pub async fn loki_ingest( let line = entry.line; // create and init row - let mut row = Vec::with_capacity(schemas.capacity()); - for _ in 0..row.capacity() { + let mut row = Vec::with_capacity(schemas.len()); + for _ in 0..schemas.len() { row.push(GreptimeValue { value_data: None }); } // insert ts and line diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index 4843b81e91..fb28247908 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -1816,11 +1816,17 @@ pub async fn test_loki_logs(store_type: StorageType) { // init loki request let req: PushRequest = PushRequest { streams: vec![StreamAdapter { - labels: "{service=\"test\",source=\"integration\"}".to_string(), - entries: vec![EntryAdapter { - timestamp: Some(Timestamp::from_str("2024-11-07T10:53:50").unwrap()), - line: "this is a log message".to_string(), - }], + labels: r#"{service="test",source="integration","wadaxi"="do anything"}"#.to_string(), + entries: vec![ + EntryAdapter { + timestamp: Some(Timestamp::from_str("2024-11-07T10:53:50").unwrap()), + line: "this is a log message".to_string(), + }, + EntryAdapter { + timestamp: Some(Timestamp::from_str("2024-11-07T10:53:50").unwrap()), + line: "this is a log message".to_string(), + }, + ], hash: rand::random(), }], }; @@ -1848,7 +1854,7 @@ pub async fn test_loki_logs(store_type: StorageType) { assert_eq!(StatusCode::OK, res.status()); // test schema - let expected = "[[\"loki_table_name\",\"CREATE TABLE IF NOT EXISTS \\\"loki_table_name\\\" (\\n \\\"greptime_timestamp\\\" TIMESTAMP(9) NOT NULL,\\n \\\"line\\\" STRING NULL,\\n \\\"service\\\" STRING NULL,\\n \\\"source\\\" STRING NULL,\\n TIME INDEX (\\\"greptime_timestamp\\\"),\\n PRIMARY KEY (\\\"service\\\", \\\"source\\\")\\n)\\n\\nENGINE=mito\\nWITH(\\n append_mode = 'true'\\n)\"]]"; + let expected = "[[\"loki_table_name\",\"CREATE TABLE IF NOT EXISTS \\\"loki_table_name\\\" (\\n \\\"greptime_timestamp\\\" TIMESTAMP(9) NOT NULL,\\n \\\"line\\\" STRING NULL,\\n \\\"service\\\" STRING NULL,\\n \\\"source\\\" STRING NULL,\\n \\\"wadaxi\\\" STRING NULL,\\n TIME INDEX (\\\"greptime_timestamp\\\"),\\n PRIMARY KEY (\\\"service\\\", \\\"source\\\", \\\"wadaxi\\\")\\n)\\n\\nENGINE=mito\\nWITH(\\n append_mode = 'true'\\n)\"]]"; validate_data( "loki_schema", &client, @@ -1858,7 +1864,7 @@ pub async fn test_loki_logs(store_type: StorageType) { .await; // test content - let expected = r#"[[1730976830000000000,"this is a log message","test","integration"]]"#; + let expected = r#"[[1730976830000000000,"this is a log message","test","integration","do anything"],[1730976830000000000,"this is a log message","test","integration","do anything"]]"#; validate_data( "loki_content", &client, From 579059d99f485f31e242f089ffccf6c88ce6520b Mon Sep 17 00:00:00 2001 From: Yingwen Date: Fri, 13 Dec 2024 20:53:11 +0800 Subject: [PATCH 17/46] ci: use 4xlarge for nightly build (#5158) --- .github/workflows/nightly-build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml index 09fcc5c26e..afe01f11ec 100644 --- a/.github/workflows/nightly-build.yml +++ b/.github/workflows/nightly-build.yml @@ -12,7 +12,7 @@ on: linux_amd64_runner: type: choice description: The runner uses to build linux-amd64 artifacts - default: ec2-c6i.2xlarge-amd64 + default: ec2-c6i.4xlarge-amd64 options: - ubuntu-20.04 - ubuntu-20.04-8-cores @@ -27,7 +27,7 @@ on: linux_arm64_runner: type: choice description: The runner uses to build linux-arm64 artifacts - default: ec2-c6g.2xlarge-arm64 + default: ec2-c6g.4xlarge-arm64 options: - ec2-c6g.xlarge-arm64 # 4C8G - ec2-c6g.2xlarge-arm64 # 8C16G From 358d5e1d63cc28cee30fe3b213fef26f5fb71aac Mon Sep 17 00:00:00 2001 From: Niwaka <61189782+NiwakaDev@users.noreply.github.com> Date: Sun, 15 Dec 2024 18:05:29 +0900 Subject: [PATCH 18/46] fix: support alter table ~ add ~ custom_type (#5165) --- .../src/statements/transform/type_alias.rs | 4 ++++ .../common/alter/alter_table.result | 19 +++++++++++++------ .../standalone/common/alter/alter_table.sql | 7 ++++++- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/sql/src/statements/transform/type_alias.rs b/src/sql/src/statements/transform/type_alias.rs index 9e51ca9180..d670a63b47 100644 --- a/src/sql/src/statements/transform/type_alias.rs +++ b/src/sql/src/statements/transform/type_alias.rs @@ -57,6 +57,10 @@ impl TransformRule for TypeAliasTransformRule { alter_table.alter_operation_mut() { replace_type_alias(target_type) + } else if let AlterTableOperation::AddColumn { column_def, .. } = + alter_table.alter_operation_mut() + { + replace_type_alias(&mut column_def.data_type); } } _ => {} diff --git a/tests/cases/standalone/common/alter/alter_table.result b/tests/cases/standalone/common/alter/alter_table.result index 120e7695d0..5c1dbfca77 100644 --- a/tests/cases/standalone/common/alter/alter_table.result +++ b/tests/cases/standalone/common/alter/alter_table.result @@ -140,10 +140,17 @@ ADD Affected Rows: 0 +ALTER TABLE + t2 +ADD + COLUMN at4 UINT16; + +Affected Rows: 0 + INSERT INTO t2 VALUES - ("loc_1", "loc_2", "loc_3", 'job1', 0, 1); + ("loc_1", "loc_2", "loc_3", 2, 'job1', 0, 1); Affected Rows: 1 @@ -152,11 +159,11 @@ SELECT FROM t2; -+-------+-------+-------+------+---------------------+-----+ -| at | at2 | at3 | job | ts | val | -+-------+-------+-------+------+---------------------+-----+ -| loc_1 | loc_2 | loc_3 | job1 | 1970-01-01T00:00:00 | 1.0 | -+-------+-------+-------+------+---------------------+-----+ ++-------+-------+-------+-----+------+---------------------+-----+ +| at | at2 | at3 | at4 | job | ts | val | ++-------+-------+-------+-----+------+---------------------+-----+ +| loc_1 | loc_2 | loc_3 | 2 | job1 | 1970-01-01T00:00:00 | 1.0 | ++-------+-------+-------+-----+------+---------------------+-----+ DROP TABLE t1; diff --git a/tests/cases/standalone/common/alter/alter_table.sql b/tests/cases/standalone/common/alter/alter_table.sql index 7f3e0b6640..c52a2445db 100644 --- a/tests/cases/standalone/common/alter/alter_table.sql +++ b/tests/cases/standalone/common/alter/alter_table.sql @@ -67,10 +67,15 @@ ALTER TABLE ADD COLUMN at2 STRING; +ALTER TABLE + t2 +ADD + COLUMN at4 UINT16; + INSERT INTO t2 VALUES - ("loc_1", "loc_2", "loc_3", 'job1', 0, 1); + ("loc_1", "loc_2", "loc_3", 2, 'job1', 0, 1); SELECT * From 9d7fea902e6a87d0decf3b96f51e81c2c1569e73 Mon Sep 17 00:00:00 2001 From: shuiyisong <113876041+shuiyisong@users.noreply.github.com> Date: Mon, 16 Dec 2024 14:17:27 +0800 Subject: [PATCH 19/46] chore: remove unused dep (#5163) * chore: remove unused dep * chore: remove more unused dep --- Cargo.lock | 179 +------------------------------ src/cache/Cargo.toml | 1 - src/catalog/Cargo.toml | 3 - src/cli/Cargo.toml | 2 - src/client/Cargo.toml | 2 - src/common/catalog/Cargo.toml | 5 - src/common/datasource/Cargo.toml | 1 - src/common/frontend/Cargo.toml | 5 - src/common/function/Cargo.toml | 1 - src/common/runtime/Cargo.toml | 2 - src/file-engine/Cargo.toml | 1 - src/flow/Cargo.toml | 1 - src/frontend/Cargo.toml | 2 - src/metric-engine/Cargo.toml | 1 - src/mito2/Cargo.toml | 2 - src/pipeline/Cargo.toml | 1 - src/promql/Cargo.toml | 3 - src/query/Cargo.toml | 4 - src/script/Cargo.toml | 2 - src/servers/Cargo.toml | 1 - src/store-api/Cargo.toml | 1 - 21 files changed, 2 insertions(+), 218 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b60615c8e5..df817dc201 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -222,26 +222,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "approx_eq" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3f9eb837c6a783fbf002e3e5cc7925a3aa6893d6d42f9169517528983777590" - -[[package]] -name = "aquamarine" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1da02abba9f9063d786eab1509833ebb2fac0f966862ca59439c76b9c566760" -dependencies = [ - "include_dir", - "itertools 0.10.5", - "proc-macro-error", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "arbitrary" version = "1.3.2" @@ -1310,7 +1290,6 @@ dependencies = [ "common-meta", "moka", "snafu 0.8.5", - "substrait 0.12.0", ] [[package]] @@ -1349,7 +1328,6 @@ dependencies = [ "catalog", "chrono", "common-catalog", - "common-config", "common-error", "common-macro", "common-meta", @@ -1358,7 +1336,6 @@ dependencies = [ "common-recordbatch", "common-runtime", "common-telemetry", - "common-test-util", "common-time", "common-version", "dashmap", @@ -1369,7 +1346,6 @@ dependencies = [ "humantime", "itertools 0.10.5", "lazy_static", - "log-store", "meta-client", "moka", "object-store", @@ -1693,7 +1669,6 @@ dependencies = [ "common-grpc", "common-macro", "common-meta", - "common-options", "common-procedure", "common-query", "common-recordbatch", @@ -1722,7 +1697,6 @@ dependencies = [ "store-api", "substrait 0.12.0", "table", - "temp-env", "tempfile", "tokio", "tracing-appender", @@ -1746,8 +1720,6 @@ dependencies = [ "common-query", "common-recordbatch", "common-telemetry", - "datanode", - "derive-new 0.5.9", "enum_dispatch", "futures-util", "lazy_static", @@ -1928,13 +1900,6 @@ dependencies = [ [[package]] name = "common-catalog" version = "0.12.0" -dependencies = [ - "chrono", - "common-error", - "common-macro", - "snafu 0.8.5", - "tokio", -] [[package]] name = "common-config" @@ -1978,7 +1943,6 @@ dependencies = [ "datafusion", "datatypes", "derive_builder 0.12.0", - "dotenv", "futures", "lazy_static", "object-store", @@ -2022,15 +1986,10 @@ dependencies = [ name = "common-frontend" version = "0.12.0" dependencies = [ - "api", "async-trait", - "common-base", "common-error", "common-macro", - "common-query", - "session", "snafu 0.8.5", - "sql", ] [[package]] @@ -2064,7 +2023,6 @@ dependencies = [ "num-traits", "once_cell", "paste", - "ron", "s2", "serde", "serde_json", @@ -2353,8 +2311,6 @@ dependencies = [ "snafu 0.8.5", "tempfile", "tokio", - "tokio-metrics", - "tokio-metrics-collector", "tokio-test", "tokio-util", ] @@ -2834,16 +2790,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "ctor" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" -dependencies = [ - "quote", - "syn 1.0.109", -] - [[package]] name = "darling" version = "0.14.4" @@ -3386,17 +3332,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "derive-new" -version = "0.5.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3418329ca0ad70234b9735dc4ceed10af4df60eff9c8e7b06cb5e520d92c3535" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "derive-new" version = "0.7.0" @@ -3919,7 +3854,6 @@ dependencies = [ "common-error", "common-macro", "common-procedure", - "common-procedure-test", "common-query", "common-recordbatch", "common-telemetry", @@ -4067,7 +4001,6 @@ dependencies = [ "itertools 0.10.5", "lazy_static", "meta-client", - "minstant", "nom", "num-traits", "operator", @@ -4114,15 +4047,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "format_num" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14ac05eb8d2eb4ed1eeff847911deae077b0b53332465de9d6a26b0ea9961bc8" -dependencies = [ - "regex", -] - [[package]] name = "fragile" version = "2.0.0" @@ -4145,7 +4069,6 @@ dependencies = [ "common-config", "common-datasource", "common-error", - "common-frontend", "common-function", "common-grpc", "common-macro", @@ -4167,7 +4090,6 @@ dependencies = [ "lazy_static", "log-store", "meta-client", - "meta-srv", "opentelemetry-proto 0.5.0", "operator", "partition", @@ -5244,25 +5166,6 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb56e1aa765b4b4f3aadfab769793b7087bb03a4ea4920644a6d238e2df5b9ed" -[[package]] -name = "include_dir" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "923d117408f1e49d914f1a379a309cffe4f18c05cf4e3d12e613a15fc81bd0dd" -dependencies = [ - "include_dir_macros", -] - -[[package]] -name = "include_dir_macros" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cab85a7ed0bd5f0e76d93846e0147172bed2e2d3f859bcc33a8d9699cad1a75" -dependencies = [ - "proc-macro2", - "quote", -] - [[package]] name = "index" version = "0.12.0" @@ -6535,7 +6438,6 @@ name = "metric-engine" version = "0.12.0" dependencies = [ "api", - "aquamarine", "async-trait", "base64 0.21.7", "common-base", @@ -6600,16 +6502,6 @@ dependencies = [ "adler2", ] -[[package]] -name = "minstant" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fb9b5c752f145ac5046bccc3c4f62892e3c950c1d1eab80c5949cd68a2078db" -dependencies = [ - "ctor", - "web-time 1.1.0", -] - [[package]] name = "mio" version = "0.8.11" @@ -6639,7 +6531,6 @@ name = "mito2" version = "0.12.0" dependencies = [ "api", - "aquamarine", "async-channel 1.9.0", "async-stream", "async-trait", @@ -6653,7 +6544,6 @@ dependencies = [ "common-function", "common-macro", "common-meta", - "common-procedure-test", "common-query", "common-recordbatch", "common-runtime", @@ -8090,7 +7980,7 @@ dependencies = [ "async-trait", "bytes", "chrono", - "derive-new 0.7.0", + "derive-new", "futures", "hex", "lazy-regex", @@ -8230,7 +8120,6 @@ dependencies = [ "query", "rayon", "regex", - "ron", "serde", "serde_json", "session", @@ -8642,10 +8531,7 @@ dependencies = [ "greptime-proto", "lazy_static", "prometheus", - "promql-parser", "prost 0.12.6", - "query", - "session", "snafu 0.8.5", "tokio", ] @@ -8993,7 +8879,6 @@ version = "0.12.0" dependencies = [ "ahash 0.8.11", "api", - "approx_eq", "arc-swap", "arrow", "arrow-schema", @@ -9025,7 +8910,6 @@ dependencies = [ "datafusion-sql", "datatypes", "fastrand", - "format_num", "futures", "futures-util", "greptime-proto", @@ -9053,9 +8937,7 @@ dependencies = [ "sql", "sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)", "statrs", - "stats-cli", "store-api", - "streaming-stats", "substrait 0.12.0", "table", "tokio", @@ -10546,7 +10428,6 @@ dependencies = [ "datatypes", "futures", "lazy_static", - "log-store", "once_cell", "operator", "paste", @@ -10569,7 +10450,6 @@ dependencies = [ "sql", "table", "tokio", - "tokio-test", ] [[package]] @@ -10911,7 +10791,6 @@ dependencies = [ "tokio-postgres-rustls", "tokio-rustls 0.26.0", "tokio-stream", - "tokio-test", "tokio-util", "tonic 0.11.0", "tonic-reflection", @@ -11545,22 +11424,11 @@ dependencies = [ "rand", ] -[[package]] -name = "stats-cli" -version = "3.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8786c4fc8a91bc4fcd90aed33413f79e4dc9811f24ba14d1d59adf57cf1c871" -dependencies = [ - "clap 2.34.0", - "num-traits", -] - [[package]] name = "store-api" version = "0.12.0" dependencies = [ "api", - "aquamarine", "async-stream", "async-trait", "common-base", @@ -11596,15 +11464,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb" -[[package]] -name = "streaming-stats" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0d670ce4e348a2081843569e0f79b21c99c91bb9028b3b3ecb0f050306de547" -dependencies = [ - "num-traits", -] - [[package]] name = "strfmt" version = "0.2.4" @@ -12562,30 +12421,6 @@ dependencies = [ "syn 2.0.90", ] -[[package]] -name = "tokio-metrics" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eace09241d62c98b7eeb1107d4c5c64ca3bd7da92e8c218c153ab3a78f9be112" -dependencies = [ - "futures-util", - "pin-project-lite", - "tokio", - "tokio-stream", -] - -[[package]] -name = "tokio-metrics-collector" -version = "0.2.1" -source = "git+https://github.com/MichaelScofield/tokio-metrics-collector.git?rev=89d692d5753d28564a7aac73c6ac5aba22243ba0#89d692d5753d28564a7aac73c6ac5aba22243ba0" -dependencies = [ - "lazy_static", - "parking_lot 0.12.3", - "prometheus", - "tokio", - "tokio-metrics", -] - [[package]] name = "tokio-postgres" version = "0.7.12" @@ -13012,7 +12847,7 @@ dependencies = [ "tracing-core", "tracing-log 0.2.0", "tracing-subscriber", - "web-time 0.2.4", + "web-time", ] [[package]] @@ -13783,16 +13618,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "web-time" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - [[package]] name = "webbrowser" version = "0.8.15" diff --git a/src/cache/Cargo.toml b/src/cache/Cargo.toml index 9a2888e5fc..07870fa904 100644 --- a/src/cache/Cargo.toml +++ b/src/cache/Cargo.toml @@ -11,4 +11,3 @@ common-macro.workspace = true common-meta.workspace = true moka.workspace = true snafu.workspace = true -substrait.workspace = true diff --git a/src/catalog/Cargo.toml b/src/catalog/Cargo.toml index a5ad92e891..b7e19a44b9 100644 --- a/src/catalog/Cargo.toml +++ b/src/catalog/Cargo.toml @@ -18,7 +18,6 @@ async-stream.workspace = true async-trait = "0.1" bytes.workspace = true common-catalog.workspace = true -common-config.workspace = true common-error.workspace = true common-macro.workspace = true common-meta.workspace = true @@ -58,7 +57,5 @@ catalog = { workspace = true, features = ["testing"] } chrono.workspace = true common-meta = { workspace = true, features = ["testing"] } common-query = { workspace = true, features = ["testing"] } -common-test-util.workspace = true -log-store.workspace = true object-store.workspace = true tokio.workspace = true diff --git a/src/cli/Cargo.toml b/src/cli/Cargo.toml index b49aa00ee2..de2abc15f1 100644 --- a/src/cli/Cargo.toml +++ b/src/cli/Cargo.toml @@ -23,7 +23,6 @@ common-error.workspace = true common-grpc.workspace = true common-macro.workspace = true common-meta.workspace = true -common-options.workspace = true common-procedure.workspace = true common-query.workspace = true common-recordbatch.workspace = true @@ -61,5 +60,4 @@ client = { workspace = true, features = ["testing"] } common-test-util.workspace = true common-version.workspace = true serde.workspace = true -temp-env = "0.3" tempfile.workspace = true diff --git a/src/client/Cargo.toml b/src/client/Cargo.toml index 9d198ab9fb..f8702fe6ac 100644 --- a/src/client/Cargo.toml +++ b/src/client/Cargo.toml @@ -42,8 +42,6 @@ tonic.workspace = true [dev-dependencies] common-grpc-expr.workspace = true -datanode.workspace = true -derive-new = "0.5" tracing = "0.1" [dev-dependencies.substrait_proto] diff --git a/src/common/catalog/Cargo.toml b/src/common/catalog/Cargo.toml index 61f49ab0e4..051675fe93 100644 --- a/src/common/catalog/Cargo.toml +++ b/src/common/catalog/Cargo.toml @@ -8,10 +8,5 @@ license.workspace = true workspace = true [dependencies] -common-error.workspace = true -common-macro.workspace = true -snafu.workspace = true [dev-dependencies] -chrono.workspace = true -tokio.workspace = true diff --git a/src/common/datasource/Cargo.toml b/src/common/datasource/Cargo.toml index 65f1d18a66..16137e6b3e 100644 --- a/src/common/datasource/Cargo.toml +++ b/src/common/datasource/Cargo.toml @@ -48,5 +48,4 @@ url = "2.3" [dev-dependencies] common-telemetry.workspace = true common-test-util.workspace = true -dotenv.workspace = true uuid.workspace = true diff --git a/src/common/frontend/Cargo.toml b/src/common/frontend/Cargo.toml index 2aa111fa1a..7c3b705bdd 100644 --- a/src/common/frontend/Cargo.toml +++ b/src/common/frontend/Cargo.toml @@ -5,12 +5,7 @@ edition.workspace = true license.workspace = true [dependencies] -api.workspace = true async-trait.workspace = true -common-base.workspace = true common-error.workspace = true common-macro.workspace = true -common-query.workspace = true -session.workspace = true snafu.workspace = true -sql.workspace = true diff --git a/src/common/function/Cargo.toml b/src/common/function/Cargo.toml index 29cefb1e75..e7cc25ca13 100644 --- a/src/common/function/Cargo.toml +++ b/src/common/function/Cargo.toml @@ -51,6 +51,5 @@ wkt = { version = "0.11", optional = true } [dev-dependencies] approx = "0.5" -ron = "0.7" serde = { version = "1.0", features = ["derive"] } tokio.workspace = true diff --git a/src/common/runtime/Cargo.toml b/src/common/runtime/Cargo.toml index c249ba221e..7a12a03ba9 100644 --- a/src/common/runtime/Cargo.toml +++ b/src/common/runtime/Cargo.toml @@ -35,8 +35,6 @@ serde_json.workspace = true snafu.workspace = true tempfile.workspace = true tokio.workspace = true -tokio-metrics = "0.3" -tokio-metrics-collector = { git = "https://github.com/MichaelScofield/tokio-metrics-collector.git", rev = "89d692d5753d28564a7aac73c6ac5aba22243ba0" } tokio-util.workspace = true [dev-dependencies] diff --git a/src/file-engine/Cargo.toml b/src/file-engine/Cargo.toml index f9cd1113f5..1a665d6676 100644 --- a/src/file-engine/Cargo.toml +++ b/src/file-engine/Cargo.toml @@ -38,5 +38,4 @@ tokio.workspace = true [dev-dependencies] api.workspace = true -common-procedure-test.workspace = true common-test-util.workspace = true diff --git a/src/flow/Cargo.toml b/src/flow/Cargo.toml index ed2a1dc1c4..ffba0618da 100644 --- a/src/flow/Cargo.toml +++ b/src/flow/Cargo.toml @@ -47,7 +47,6 @@ hydroflow = { git = "https://github.com/GreptimeTeam/hydroflow.git", branch = "m itertools.workspace = true lazy_static.workspace = true meta-client.workspace = true -minstant = "0.1.7" nom = "7.1.3" num-traits = "0.2" operator.workspace = true diff --git a/src/frontend/Cargo.toml b/src/frontend/Cargo.toml index 01f06eb033..e21819c568 100644 --- a/src/frontend/Cargo.toml +++ b/src/frontend/Cargo.toml @@ -25,7 +25,6 @@ common-catalog.workspace = true common-config.workspace = true common-datasource.workspace = true common-error.workspace = true -common-frontend.workspace = true common-function.workspace = true common-grpc.workspace = true common-macro.workspace = true @@ -71,7 +70,6 @@ common-test-util.workspace = true datanode.workspace = true datatypes.workspace = true futures = "0.3" -meta-srv = { workspace = true, features = ["mock"] } serde_json.workspace = true strfmt = "0.2" tower.workspace = true diff --git a/src/metric-engine/Cargo.toml b/src/metric-engine/Cargo.toml index 85aa371594..666ac09faa 100644 --- a/src/metric-engine/Cargo.toml +++ b/src/metric-engine/Cargo.toml @@ -9,7 +9,6 @@ workspace = true [dependencies] api.workspace = true -aquamarine.workspace = true async-trait.workspace = true base64.workspace = true common-base.workspace = true diff --git a/src/mito2/Cargo.toml b/src/mito2/Cargo.toml index eecb79440a..181ba0f434 100644 --- a/src/mito2/Cargo.toml +++ b/src/mito2/Cargo.toml @@ -13,7 +13,6 @@ workspace = true [dependencies] api.workspace = true -aquamarine.workspace = true async-channel = "1.9" async-stream.workspace = true async-trait = "0.1" @@ -77,7 +76,6 @@ uuid.workspace = true [dev-dependencies] common-function.workspace = true common-meta = { workspace = true, features = ["testing"] } -common-procedure-test.workspace = true common-test-util.workspace = true criterion = "0.4" dotenv.workspace = true diff --git a/src/pipeline/Cargo.toml b/src/pipeline/Cargo.toml index 4657f39a68..9c26d1a52f 100644 --- a/src/pipeline/Cargo.toml +++ b/src/pipeline/Cargo.toml @@ -63,7 +63,6 @@ yaml-rust = "0.4" catalog = { workspace = true, features = ["testing"] } criterion = { version = "0.4", features = ["html_reports"] } rayon = "1.0" -ron = "0.7" serde = { version = "1.0", features = ["derive"] } session = { workspace = true, features = ["testing"] } diff --git a/src/promql/Cargo.toml b/src/promql/Cargo.toml index 4039328528..7b51651a7c 100644 --- a/src/promql/Cargo.toml +++ b/src/promql/Cargo.toml @@ -22,11 +22,8 @@ futures = "0.3" greptime-proto.workspace = true lazy_static.workspace = true prometheus.workspace = true -promql-parser.workspace = true prost.workspace = true snafu.workspace = true [dev-dependencies] -query.workspace = true -session = { workspace = true, features = ["testing"] } tokio.workspace = true diff --git a/src/query/Cargo.toml b/src/query/Cargo.toml index 863a5a1c33..8139ea3aaf 100644 --- a/src/query/Cargo.toml +++ b/src/query/Cargo.toml @@ -67,13 +67,11 @@ tokio.workspace = true uuid.workspace = true [dev-dependencies] -approx_eq = "0.1" arrow.workspace = true catalog = { workspace = true, features = ["testing"] } common-macro.workspace = true common-query = { workspace = true, features = ["testing"] } fastrand = "2.0" -format_num = "0.1" num = "0.4" num-traits = "0.2" paste = "1.0" @@ -83,8 +81,6 @@ serde.workspace = true serde_json.workspace = true session = { workspace = true, features = ["testing"] } statrs = "0.16" -stats-cli = "3.0" store-api.workspace = true -streaming-stats = "0.2" table = { workspace = true, features = ["testing"] } tokio-stream.workspace = true diff --git a/src/script/Cargo.toml b/src/script/Cargo.toml index 88d10c9509..136eb3c4fc 100644 --- a/src/script/Cargo.toml +++ b/src/script/Cargo.toml @@ -80,13 +80,11 @@ tokio.workspace = true catalog = { workspace = true, features = ["testing"] } common-test-util.workspace = true criterion = { version = "0.4", features = ["html_reports", "async_tokio"] } -log-store.workspace = true operator.workspace = true rayon = "1.0" ron = "0.7" serde = { version = "1.0", features = ["derive"] } session = { workspace = true, features = ["testing"] } -tokio-test = "0.4" [[bench]] name = "py_benchmark" diff --git a/src/servers/Cargo.toml b/src/servers/Cargo.toml index ddfeaf27bd..a90fb880e2 100644 --- a/src/servers/Cargo.toml +++ b/src/servers/Cargo.toml @@ -134,7 +134,6 @@ table.workspace = true tempfile = "3.0.0" tokio-postgres = "0.7" tokio-postgres-rustls = "0.12" -tokio-test = "0.4" [target.'cfg(unix)'.dev-dependencies] pprof = { version = "0.13", features = ["criterion", "flamegraph"] } diff --git a/src/store-api/Cargo.toml b/src/store-api/Cargo.toml index 7c974661e3..1214ae3d40 100644 --- a/src/store-api/Cargo.toml +++ b/src/store-api/Cargo.toml @@ -9,7 +9,6 @@ workspace = true [dependencies] api.workspace = true -aquamarine.workspace = true async-trait.workspace = true common-base.workspace = true common-error.workspace = true From f82af15eba627bfc175b4ccb23c3a5790d016905 Mon Sep 17 00:00:00 2001 From: Lin Yihai Date: Mon, 16 Dec 2024 14:46:38 +0800 Subject: [PATCH 20/46] feat: Add `vector_scalar_mul` function. (#5166) --- src/common/function/src/scalars/vector.rs | 2 + .../function/src/scalars/vector/scalar_mul.rs | 173 ++++++++++++++++++ .../function/vector/vector_scalar.result | 48 +++++ .../common/function/vector/vector_scalar.sql | 12 ++ 4 files changed, 235 insertions(+) create mode 100644 src/common/function/src/scalars/vector/scalar_mul.rs diff --git a/src/common/function/src/scalars/vector.rs b/src/common/function/src/scalars/vector.rs index 0c0428ce9a..d462b917af 100644 --- a/src/common/function/src/scalars/vector.rs +++ b/src/common/function/src/scalars/vector.rs @@ -16,6 +16,7 @@ mod convert; mod distance; pub(crate) mod impl_conv; mod scalar_add; +mod scalar_mul; use std::sync::Arc; @@ -36,5 +37,6 @@ impl VectorFunction { // scalar calculation registry.register(Arc::new(scalar_add::ScalarAddFunction)); + registry.register(Arc::new(scalar_mul::ScalarMulFunction)); } } diff --git a/src/common/function/src/scalars/vector/scalar_mul.rs b/src/common/function/src/scalars/vector/scalar_mul.rs new file mode 100644 index 0000000000..3c7fe4c070 --- /dev/null +++ b/src/common/function/src/scalars/vector/scalar_mul.rs @@ -0,0 +1,173 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::borrow::Cow; +use std::fmt::Display; + +use common_query::error::{InvalidFuncArgsSnafu, Result}; +use common_query::prelude::Signature; +use datatypes::prelude::ConcreteDataType; +use datatypes::scalars::ScalarVectorBuilder; +use datatypes::vectors::{BinaryVectorBuilder, MutableVector, VectorRef}; +use nalgebra::DVectorView; +use snafu::ensure; + +use crate::function::{Function, FunctionContext}; +use crate::helper; +use crate::scalars::vector::impl_conv::{as_veclit, as_veclit_if_const, veclit_to_binlit}; + +const NAME: &str = "vec_scalar_mul"; + +/// Multiples a scalar to each element of a vector. +/// +/// # Example +/// +/// ```sql +/// SELECT vec_to_string(vec_scalar_mul(2, "[1, 2, 3]")) as result; +/// +/// +---------+ +/// | result | +/// +---------+ +/// | [2,4,6] | +/// +---------+ +/// +/// -- 1/scalar to simulate division +/// SELECT vec_to_string(vec_scalar_mul(0.5, "[2, 4, 6]")) as result; +/// +/// +---------+ +/// | result | +/// +---------+ +/// | [1,2,3] | +/// +---------+ +/// ``` +#[derive(Debug, Clone, Default)] +pub struct ScalarMulFunction; + +impl Function for ScalarMulFunction { + fn name(&self) -> &str { + NAME + } + + fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result { + Ok(ConcreteDataType::binary_datatype()) + } + + fn signature(&self) -> Signature { + helper::one_of_sigs2( + vec![ConcreteDataType::float64_datatype()], + vec![ + ConcreteDataType::string_datatype(), + ConcreteDataType::binary_datatype(), + ], + ) + } + + fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result { + ensure!( + columns.len() == 2, + InvalidFuncArgsSnafu { + err_msg: format!( + "The length of the args is not correct, expect exactly two, have: {}", + columns.len() + ), + } + ); + let arg0 = &columns[0]; + let arg1 = &columns[1]; + + let len = arg0.len(); + let mut result = BinaryVectorBuilder::with_capacity(len); + if len == 0 { + return Ok(result.to_vector()); + } + + let arg1_const = as_veclit_if_const(arg1)?; + + for i in 0..len { + let arg0 = arg0.get(i).as_f64_lossy(); + let Some(arg0) = arg0 else { + result.push_null(); + continue; + }; + + let arg1 = match arg1_const.as_ref() { + Some(arg1) => Some(Cow::Borrowed(arg1.as_ref())), + None => as_veclit(arg1.get_ref(i))?, + }; + let Some(arg1) = arg1 else { + result.push_null(); + continue; + }; + + let vec = DVectorView::from_slice(&arg1, arg1.len()); + let vec_res = vec.scale(arg0 as _); + + let veclit = vec_res.as_slice(); + let binlit = veclit_to_binlit(veclit); + result.push(Some(&binlit)); + } + + Ok(result.to_vector()) + } +} + +impl Display for ScalarMulFunction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", NAME.to_ascii_uppercase()) + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use datatypes::vectors::{Float32Vector, StringVector}; + + use super::*; + + #[test] + fn test_scalar_mul() { + let func = ScalarMulFunction; + + let input0 = Arc::new(Float32Vector::from(vec![ + Some(2.0), + Some(-0.5), + None, + Some(3.0), + ])); + let input1 = Arc::new(StringVector::from(vec![ + Some("[1.0,2.0,3.0]".to_string()), + Some("[8.0,10.0,12.0]".to_string()), + Some("[7.0,8.0,9.0]".to_string()), + None, + ])); + + let result = func + .eval(FunctionContext::default(), &[input0, input1]) + .unwrap(); + + let result = result.as_ref(); + assert_eq!(result.len(), 4); + assert_eq!( + result.get_ref(0).as_binary().unwrap(), + Some(veclit_to_binlit(&[2.0, 4.0, 6.0]).as_slice()) + ); + assert_eq!( + result.get_ref(1).as_binary().unwrap(), + Some(veclit_to_binlit(&[-4.0, -5.0, -6.0]).as_slice()) + ); + assert!(result.get_ref(2).is_null()); + assert!(result.get_ref(3).is_null()); + } +} diff --git a/tests/cases/standalone/common/function/vector/vector_scalar.result b/tests/cases/standalone/common/function/vector/vector_scalar.result index 5750a0adfd..a379c385fa 100644 --- a/tests/cases/standalone/common/function/vector/vector_scalar.result +++ b/tests/cases/standalone/common/function/vector/vector_scalar.result @@ -46,3 +46,51 @@ SELECT vec_to_string(vec_scalar_add(-1, '[1.0, 2.0]')); | [0,1] | +-------------------------------------------------------------+ +SELECT vec_to_string(vec_scalar_mul(1.0, '[1.0, 2.0]')); + ++--------------------------------------------------------------+ +| vec_to_string(vec_scalar_mul(Float64(1),Utf8("[1.0, 2.0]"))) | ++--------------------------------------------------------------+ +| [1,2] | ++--------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_mul(-0.5, '[2.0, 4.0]')); + ++-----------------------------------------------------------------+ +| vec_to_string(vec_scalar_mul(Float64(-0.5),Utf8("[2.0, 4.0]"))) | ++-----------------------------------------------------------------+ +| [-1,-2] | ++-----------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_mul(1.0, parse_vec('[1.0, 2.0]'))); + ++-------------------------------------------------------------------------+ +| vec_to_string(vec_scalar_mul(Float64(1),parse_vec(Utf8("[1.0, 2.0]")))) | ++-------------------------------------------------------------------------+ +| [1,2] | ++-------------------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_mul(-0.5, parse_vec('[2.0, 4.0]'))); + ++----------------------------------------------------------------------------+ +| vec_to_string(vec_scalar_mul(Float64(-0.5),parse_vec(Utf8("[2.0, 4.0]")))) | ++----------------------------------------------------------------------------+ +| [-1,-2] | ++----------------------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_mul(1, '[1.0, 2.0]')); + ++------------------------------------------------------------+ +| vec_to_string(vec_scalar_mul(Int64(1),Utf8("[1.0, 2.0]"))) | ++------------------------------------------------------------+ +| [1,2] | ++------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_mul(-0.5, '[2.0, 4.0]')); + ++-----------------------------------------------------------------+ +| vec_to_string(vec_scalar_mul(Float64(-0.5),Utf8("[2.0, 4.0]"))) | ++-----------------------------------------------------------------+ +| [-1,-2] | ++-----------------------------------------------------------------+ + diff --git a/tests/cases/standalone/common/function/vector/vector_scalar.sql b/tests/cases/standalone/common/function/vector/vector_scalar.sql index e438ac6a40..2727f29705 100644 --- a/tests/cases/standalone/common/function/vector/vector_scalar.sql +++ b/tests/cases/standalone/common/function/vector/vector_scalar.sql @@ -9,3 +9,15 @@ SELECT vec_to_string(vec_scalar_add(-1.0, parse_vec('[1.0, 2.0]'))); SELECT vec_to_string(vec_scalar_add(1, '[1.0, 2.0]')); SELECT vec_to_string(vec_scalar_add(-1, '[1.0, 2.0]')); + +SELECT vec_to_string(vec_scalar_mul(1.0, '[1.0, 2.0]')); + +SELECT vec_to_string(vec_scalar_mul(-0.5, '[2.0, 4.0]')); + +SELECT vec_to_string(vec_scalar_mul(1.0, parse_vec('[1.0, 2.0]'))); + +SELECT vec_to_string(vec_scalar_mul(-0.5, parse_vec('[2.0, 4.0]'))); + +SELECT vec_to_string(vec_scalar_mul(1, '[1.0, 2.0]')); + +SELECT vec_to_string(vec_scalar_mul(-0.5, '[2.0, 4.0]')); \ No newline at end of file From 5ffda7e97130c449455ee83f2afc80ee76d4b168 Mon Sep 17 00:00:00 2001 From: "Lei, HUANG" <6406592+v0y4g3r@users.noreply.github.com> Date: Mon, 16 Dec 2024 15:08:07 +0800 Subject: [PATCH 21/46] chore: gauge for flush compaction (#5156) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add metrics * chore/bench-metrics: Add INFLIGHT_FLUSH_COUNT Metric to Flush Process • Introduced INFLIGHT_FLUSH_COUNT metric to track the number of ongoing flush operations. • Incremented INFLIGHT_FLUSH_COUNT in FlushScheduler to monitor active flushes. • Removed redundant increment of INFLIGHT_FLUSH_COUNT in RegionWorkerLoop to prevent double counting. * chore/bench-metrics: Add Metrics for Compaction and Flush Operations • Introduced INFLIGHT_COMPACTION_COUNT and INFLIGHT_FLUSH_COUNT metrics to track the number of ongoing compaction and flush operations. • Incremented INFLIGHT_COMPACTION_COUNT when scheduling remote and local compaction jobs, and decremented it upon completion. • Added INFLIGHT_FLUSH_COUNT increment and decrement logic around flush tasks to monitor active flush operations. • Removed redundant metric updates in worker.rs and handle_compaction.rs to streamline metric handling. * chore: add metrics for remote compaction jobs * chore: format * chore: also add dashbaord --- grafana/greptimedb.json | 1497 ++++++++--------- src/mito2/src/compaction.rs | 5 +- src/mito2/src/flush.rs | 8 +- src/mito2/src/metrics.rs | 13 + .../src/schedule/remote_job_scheduler.rs | 3 +- 5 files changed, 774 insertions(+), 752 deletions(-) diff --git a/grafana/greptimedb.json b/grafana/greptimedb.json index 86925d5342..c526373874 100644 --- a/grafana/greptimedb.json +++ b/grafana/greptimedb.json @@ -2014,11 +2014,11 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "idelta(greptime_mito_compaction_stage_elapsed_count{stage=\"merge\"}[$__interval])", + "expr": "greptime_mito_inflight_compaction_count", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, - "legendFormat": "compaction-{{stage}}", + "legendFormat": "compaction-{{instance}}", "range": true, "refId": "A", "useBackend": false @@ -2030,12 +2030,12 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le, type) (idelta(greptime_mito_flush_elapsed_bucket[$__interval])))", + "expr": "greptime_mito_inflight_flush_count", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, - "legendFormat": "flush-{{type}}", + "legendFormat": "flush-{{instance}}", "range": true, "refId": "B", "useBackend": false @@ -2707,753 +2707,752 @@ "y": 48 }, "id": 21, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 49 - }, - "id": 18, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "rate(opendal_bytes_total_sum[$__rate_interval])", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{scheme}}-{{operation}}", - "range": true, - "refId": "B", - "useBackend": false - } - ], - "title": "OpenDAL traffic", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 49 - }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le, operation, schema) (rate(opendal_requests_duration_seconds_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "OpenDAL operation duration", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 56 - }, - "id": 43, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "greptime_object_store_lru_cache_bytes", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{instance}}-{{type}}", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Object store read cache size", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 56 - }, - "id": 44, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance) / (sum(increase(greptime_object_store_lru_cache_miss[$__rate_interval])) by (instance) + sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{instance}}", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Object store read cache hit", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 63 - }, - "id": 10, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "{{logstore}}-{{optype}}-p95", - "range": true, - "refId": "Log Store P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "{{logstore}}-{{optype}}-p99", - "range": true, - "refId": "Log Store P99" - } - ], - "title": "Log Store op duration seconds", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 63 - }, - "id": 12, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "req-size-p95", - "range": true, - "refId": "A", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "req-size-p99", - "range": true, - "refId": "C", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "rate(raft_engine_write_size_sum[$__rate_interval])", - "hide": false, - "instant": false, - "legendFormat": "throughput", - "range": true, - "refId": "B" - } - ], - "title": "WAL write size", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 70 - }, - "id": 37, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, type, node) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "{{node}}-{{type}}-p99", - "range": true, - "refId": "Log Store P95" - } - ], - "title": "WAL sync duration seconds", - "type": "timeseries" - } - ], + "panels": [], "title": "Storage Components", "type": "row" }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 18, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "rate(opendal_bytes_total_sum[$__rate_interval])", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{scheme}}-{{operation}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "OpenDAL traffic", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.95, sum by(le, operation, schema) (rate(opendal_requests_duration_seconds_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "OpenDAL operation duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 56 + }, + "id": 43, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "greptime_object_store_lru_cache_bytes", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}-{{type}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Object store read cache size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 56 + }, + "id": 44, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance) / (sum(increase(greptime_object_store_lru_cache_miss[$__rate_interval])) by (instance) + sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Object store read cache hit", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 63 + }, + "id": 10, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{logstore}}-{{optype}}-p95", + "range": true, + "refId": "Log Store P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{logstore}}-{{optype}}-p99", + "range": true, + "refId": "Log Store P99" + } + ], + "title": "Log Store op duration seconds", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 63 + }, + "id": 12, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "req-size-p95", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "req-size-p99", + "range": true, + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "rate(raft_engine_write_size_sum[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "throughput", + "range": true, + "refId": "B" + } + ], + "title": "WAL write size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 70 + }, + "id": 37, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, type, node) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{node}}-{{type}}-p99", + "range": true, + "refId": "Log Store P95" + } + ], + "title": "WAL sync duration seconds", + "type": "timeseries" + }, { "collapsed": false, "gridPos": { @@ -4154,6 +4153,6 @@ "timezone": "", "title": "GreptimeDB", "uid": "e7097237-669b-4f8d-b751-13067afbfb68", - "version": 16, + "version": 17, "weekStart": "" } diff --git a/src/mito2/src/compaction.rs b/src/mito2/src/compaction.rs index 2b70f455d8..5236e0d616 100644 --- a/src/mito2/src/compaction.rs +++ b/src/mito2/src/compaction.rs @@ -53,7 +53,7 @@ use crate::error::{ RegionTruncatedSnafu, RemoteCompactionSnafu, Result, TimeRangePredicateOverflowSnafu, TimeoutSnafu, }; -use crate::metrics::COMPACTION_STAGE_ELAPSED; +use crate::metrics::{COMPACTION_STAGE_ELAPSED, INFLIGHT_COMPACTION_COUNT}; use crate::read::projection::ProjectionMapper; use crate::read::scan_region::ScanInput; use crate::read::seq_scan::SeqScan; @@ -340,6 +340,7 @@ impl CompactionScheduler { "Scheduled remote compaction job {} for region {}", job_id, region_id ); + INFLIGHT_COMPACTION_COUNT.inc(); return Ok(()); } Err(e) => { @@ -384,7 +385,9 @@ impl CompactionScheduler { // Submit the compaction task. self.scheduler .schedule(Box::pin(async move { + INFLIGHT_COMPACTION_COUNT.inc(); local_compaction_task.run().await; + INFLIGHT_COMPACTION_COUNT.dec(); })) .map_err(|e| { error!(e; "Failed to submit compaction request for region {}", region_id); diff --git a/src/mito2/src/flush.rs b/src/mito2/src/flush.rs index 9606e92d04..09f45ca4f7 100644 --- a/src/mito2/src/flush.rs +++ b/src/mito2/src/flush.rs @@ -32,7 +32,10 @@ use crate::error::{ Error, FlushRegionSnafu, RegionClosedSnafu, RegionDroppedSnafu, RegionTruncatedSnafu, Result, }; use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList}; -use crate::metrics::{FLUSH_BYTES_TOTAL, FLUSH_ELAPSED, FLUSH_ERRORS_TOTAL, FLUSH_REQUESTS_TOTAL}; +use crate::metrics::{ + FLUSH_BYTES_TOTAL, FLUSH_ELAPSED, FLUSH_ERRORS_TOTAL, FLUSH_REQUESTS_TOTAL, + INFLIGHT_FLUSH_COUNT, +}; use crate::read::Source; use crate::region::options::IndexOptions; use crate::region::version::{VersionControlData, VersionControlRef}; @@ -261,7 +264,9 @@ impl RegionFlushTask { let version_data = version_control.current(); Box::pin(async move { + INFLIGHT_FLUSH_COUNT.inc(); self.do_flush(version_data).await; + INFLIGHT_FLUSH_COUNT.dec(); }) } @@ -530,6 +535,7 @@ impl FlushScheduler { self.region_status.remove(®ion_id); return Err(e); } + flush_status.flushing = true; Ok(()) diff --git a/src/mito2/src/metrics.rs b/src/mito2/src/metrics.rs index e7c1c7272e..5a5d76da4c 100644 --- a/src/mito2/src/metrics.rs +++ b/src/mito2/src/metrics.rs @@ -75,6 +75,12 @@ lazy_static! { /// Histogram of flushed bytes. pub static ref FLUSH_BYTES_TOTAL: IntCounter = register_int_counter!("greptime_mito_flush_bytes_total", "mito flush bytes total").unwrap(); + /// Gauge for inflight compaction tasks. + pub static ref INFLIGHT_FLUSH_COUNT: IntGauge = + register_int_gauge!( + "greptime_mito_inflight_flush_count", + "inflight flush count", + ).unwrap(); // ------ End of flush related metrics @@ -124,6 +130,13 @@ lazy_static! { /// Counter of failed compaction task. pub static ref COMPACTION_FAILURE_COUNT: IntCounter = register_int_counter!("greptime_mito_compaction_failure_total", "mito compaction failure total").unwrap(); + + /// Gauge for inflight compaction tasks. + pub static ref INFLIGHT_COMPACTION_COUNT: IntGauge = + register_int_gauge!( + "greptime_mito_inflight_compaction_count", + "inflight compaction count", + ).unwrap(); // ------- End of compaction metrics. // Query metrics. diff --git a/src/mito2/src/schedule/remote_job_scheduler.rs b/src/mito2/src/schedule/remote_job_scheduler.rs index 8f51a774d5..bfe31ef041 100644 --- a/src/mito2/src/schedule/remote_job_scheduler.rs +++ b/src/mito2/src/schedule/remote_job_scheduler.rs @@ -27,7 +27,7 @@ use crate::compaction::compactor::CompactionRegion; use crate::compaction::picker::PickerOutput; use crate::error::{CompactRegionSnafu, Error, ParseJobIdSnafu, Result}; use crate::manifest::action::RegionEdit; -use crate::metrics::COMPACTION_FAILURE_COUNT; +use crate::metrics::{COMPACTION_FAILURE_COUNT, INFLIGHT_COMPACTION_COUNT}; use crate::request::{ BackgroundNotify, CompactionFailed, CompactionFinished, OutputTx, WorkerRequest, }; @@ -145,6 +145,7 @@ impl DefaultNotifier { #[async_trait::async_trait] impl Notifier for DefaultNotifier { async fn notify(&self, result: RemoteJobResult, waiters: Vec) { + INFLIGHT_COMPACTION_COUNT.dec(); match result { RemoteJobResult::CompactionJobResult(result) => { let notify = { From 54698325b6453f87dda5b46779f2ebb621e25ae2 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Mon, 16 Dec 2024 17:21:00 +0800 Subject: [PATCH 22/46] feat: introduce SKIPPING index (part 1) (#5155) * skip index parser Signed-off-by: Ruihang Xia * wip: sqlness Signed-off-by: Ruihang Xia * impl show create part Signed-off-by: Ruihang Xia * add empty line Signed-off-by: Ruihang Xia * change keyword to SKIPPING INDEX Signed-off-by: Ruihang Xia * rename local variables Signed-off-by: Ruihang Xia --------- Signed-off-by: Ruihang Xia --- src/api/src/v1/column_def.rs | 12 +- src/datatypes/src/error.rs | 9 +- src/datatypes/src/schema.rs | 7 +- src/datatypes/src/schema/column_schema.rs | 106 ++++++++++++++++++ src/operator/src/statement/ddl.rs | 3 +- src/query/src/error.rs | 11 +- src/query/src/sql/show_create_table.rs | 34 +++++- src/sql/src/error.rs | 9 +- src/sql/src/parsers/create_parser.rs | 105 ++++++++++++++++- src/sql/src/parsers/utils.rs | 13 ++- src/sql/src/statements.rs | 10 +- src/sql/src/statements/create.rs | 26 ++++- .../create/create_with_skip_index.result | 33 ++++++ .../common/create/create_with_skip_index.sql | 14 +++ 14 files changed, 371 insertions(+), 21 deletions(-) create mode 100644 tests/cases/standalone/common/create/create_with_skip_index.result create mode 100644 tests/cases/standalone/common/create/create_with_skip_index.sql diff --git a/src/api/src/v1/column_def.rs b/src/api/src/v1/column_def.rs index f026d3f6f9..77dcd2c621 100644 --- a/src/api/src/v1/column_def.rs +++ b/src/api/src/v1/column_def.rs @@ -16,7 +16,7 @@ use std::collections::HashMap; use datatypes::schema::{ ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextOptions, COMMENT_KEY, - FULLTEXT_KEY, INVERTED_INDEX_KEY, + FULLTEXT_KEY, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY, }; use greptime_proto::v1::Analyzer; use snafu::ResultExt; @@ -29,6 +29,8 @@ use crate::v1::{ColumnDef, ColumnOptions, SemanticType}; const FULLTEXT_GRPC_KEY: &str = "fulltext"; /// Key used to store inverted index options in gRPC column options. const INVERTED_INDEX_GRPC_KEY: &str = "inverted_index"; +/// Key used to store skip index options in gRPC column options. +const SKIPPING_INDEX_GRPC_KEY: &str = "skipping_index"; /// Tries to construct a `ColumnSchema` from the given `ColumnDef`. pub fn try_as_column_schema(column_def: &ColumnDef) -> Result { @@ -60,6 +62,9 @@ pub fn try_as_column_schema(column_def: &ColumnDef) -> Result { if let Some(inverted_index) = options.options.get(INVERTED_INDEX_GRPC_KEY) { metadata.insert(INVERTED_INDEX_KEY.to_string(), inverted_index.clone()); } + if let Some(skipping_index) = options.options.get(SKIPPING_INDEX_GRPC_KEY) { + metadata.insert(SKIPPING_INDEX_KEY.to_string(), skipping_index.clone()); + } } ColumnSchema::new(&column_def.name, data_type.into(), column_def.is_nullable) @@ -84,6 +89,11 @@ pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option StatusCode::InvalidArguments, + | InvalidFulltextOption { .. } + | InvalidSkippingIndexOption { .. } => StatusCode::InvalidArguments, ValueExceedsPrecision { .. } | CastType { .. } diff --git a/src/datatypes/src/schema.rs b/src/datatypes/src/schema.rs index 2eaa0254fb..c537a4608b 100644 --- a/src/datatypes/src/schema.rs +++ b/src/datatypes/src/schema.rs @@ -28,10 +28,11 @@ use snafu::{ensure, ResultExt}; use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, Result}; use crate::prelude::ConcreteDataType; pub use crate::schema::column_schema::{ - ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, + ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, SkippingIndexOptions, COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE, COLUMN_FULLTEXT_OPT_KEY_ANALYZER, - COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY, - TIME_INDEX_KEY, + COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, + COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY, + SKIPPING_INDEX_KEY, TIME_INDEX_KEY, }; pub use crate::schema::constraint::ColumnDefaultConstraint; pub use crate::schema::raw::RawSchema; diff --git a/src/datatypes/src/schema/column_schema.rs b/src/datatypes/src/schema/column_schema.rs index c1e2df8469..aee9efd962 100644 --- a/src/datatypes/src/schema/column_schema.rs +++ b/src/datatypes/src/schema/column_schema.rs @@ -39,12 +39,20 @@ const DEFAULT_CONSTRAINT_KEY: &str = "greptime:default_constraint"; pub const FULLTEXT_KEY: &str = "greptime:fulltext"; /// Key used to store whether the column has inverted index in arrow field's metadata. pub const INVERTED_INDEX_KEY: &str = "greptime:inverted_index"; +/// Key used to store skip options in arrow field's metadata. +pub const SKIPPING_INDEX_KEY: &str = "greptime:skipping_index"; /// Keys used in fulltext options pub const COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE: &str = "enable"; pub const COLUMN_FULLTEXT_OPT_KEY_ANALYZER: &str = "analyzer"; pub const COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE: &str = "case_sensitive"; +/// Keys used in SKIPPING index options +pub const COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY: &str = "granularity"; +pub const COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE: &str = "type"; + +pub const DEFAULT_GRANULARITY: u32 = 10240; + /// Schema of a column, used as an immutable struct. #[derive(Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct ColumnSchema { @@ -298,6 +306,34 @@ impl ColumnSchema { ); Ok(()) } + + /// Retrieves the skipping index options for the column. + pub fn skipping_index_options(&self) -> Result> { + match self.metadata.get(SKIPPING_INDEX_KEY) { + None => Ok(None), + Some(json) => { + let options = + serde_json::from_str(json).context(error::DeserializeSnafu { json })?; + Ok(Some(options)) + } + } + } + + pub fn with_skipping_options(mut self, options: SkippingIndexOptions) -> Result { + self.metadata.insert( + SKIPPING_INDEX_KEY.to_string(), + serde_json::to_string(&options).context(error::SerializeSnafu)?, + ); + Ok(self) + } + + pub fn set_skipping_options(&mut self, options: &SkippingIndexOptions) -> Result<()> { + self.metadata.insert( + SKIPPING_INDEX_KEY.to_string(), + serde_json::to_string(options).context(error::SerializeSnafu)?, + ); + Ok(()) + } } /// Column extended type set in column schema's metadata. @@ -495,6 +531,76 @@ impl fmt::Display for FulltextAnalyzer { } } +/// Skipping options for a column. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)] +#[serde(rename_all = "kebab-case")] +pub struct SkippingIndexOptions { + /// The granularity of the skip index. + pub granularity: u32, + /// The type of the skip index. + #[serde(default)] + pub index_type: SkipIndexType, +} + +impl fmt::Display for SkippingIndexOptions { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "granularity={}", self.granularity)?; + write!(f, ", index_type={}", self.index_type)?; + Ok(()) + } +} + +/// Skip index types. +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)] +pub enum SkipIndexType { + #[default] + BloomFilter, +} + +impl fmt::Display for SkipIndexType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + SkipIndexType::BloomFilter => write!(f, "BLOOM"), + } + } +} + +impl TryFrom> for SkippingIndexOptions { + type Error = Error; + + fn try_from(options: HashMap) -> Result { + // Parse granularity with default value 1 + let granularity = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY) { + Some(value) => value.parse::().map_err(|_| { + error::InvalidSkippingIndexOptionSnafu { + msg: format!("Invalid granularity: {value}, expected: positive integer"), + } + .build() + })?, + None => DEFAULT_GRANULARITY, + }; + + // Parse index type with default value BloomFilter + let index_type = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE) { + Some(typ) => match typ.to_ascii_uppercase().as_str() { + "BLOOM" => SkipIndexType::BloomFilter, + _ => { + return error::InvalidSkippingIndexOptionSnafu { + msg: format!("Invalid index type: {typ}, expected: 'BLOOM'"), + } + .fail(); + } + }, + None => SkipIndexType::default(), + }; + + Ok(SkippingIndexOptions { + granularity, + index_type, + }) + } +} + #[cfg(test)] mod tests { use std::sync::Arc; diff --git a/src/operator/src/statement/ddl.rs b/src/operator/src/statement/ddl.rs index ed96ca6f18..eba88ee44d 100644 --- a/src/operator/src/statement/ddl.rs +++ b/src/operator/src/statement/ddl.rs @@ -271,7 +271,8 @@ impl StatementExecutor { table_info.ident.table_id = table_id; - let table_info = Arc::new(table_info.try_into().context(CreateTableInfoSnafu)?); + let table_info: Arc = + Arc::new(table_info.try_into().context(CreateTableInfoSnafu)?); create_table.table_id = Some(api::v1::TableId { id: table_id }); let table = DistTable::table(table_info); diff --git a/src/query/src/error.rs b/src/query/src/error.rs index 7e246d11c3..e696008cf5 100644 --- a/src/query/src/error.rs +++ b/src/query/src/error.rs @@ -316,6 +316,13 @@ pub enum Error { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Failed to get SKIPPING index options"))] + GetSkippingIndexOptions { + source: datatypes::error::Error, + #[snafu(implicit)] + location: Location, + }, } impl ErrorExt for Error { @@ -366,7 +373,9 @@ impl ErrorExt for Error { MissingTableMutationHandler { .. } => StatusCode::Unexpected, GetRegionMetadata { .. } => StatusCode::RegionNotReady, TableReadOnly { .. } => StatusCode::Unsupported, - GetFulltextOptions { source, .. } => source.status_code(), + GetFulltextOptions { source, .. } | GetSkippingIndexOptions { source, .. } => { + source.status_code() + } } } diff --git a/src/query/src/sql/show_create_table.rs b/src/query/src/sql/show_create_table.rs index ca69dfc5e6..b903509d22 100644 --- a/src/query/src/sql/show_create_table.rs +++ b/src/query/src/sql/show_create_table.rs @@ -19,7 +19,8 @@ use std::collections::HashMap; use common_meta::SchemaOptions; use datatypes::schema::{ ColumnDefaultConstraint, ColumnSchema, SchemaRef, COLUMN_FULLTEXT_OPT_KEY_ANALYZER, - COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COMMENT_KEY, + COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, + COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, }; use snafu::ResultExt; use sql::ast::{ColumnDef, ColumnOption, ColumnOptionDef, Expr, Ident, ObjectName}; @@ -32,7 +33,8 @@ use table::metadata::{TableInfoRef, TableMeta}; use table::requests::{FILE_TABLE_META_KEY, TTL_KEY, WRITE_BUFFER_SIZE_KEY}; use crate::error::{ - ConvertSqlTypeSnafu, ConvertSqlValueSnafu, GetFulltextOptionsSnafu, Result, SqlSnafu, + ConvertSqlTypeSnafu, ConvertSqlValueSnafu, GetFulltextOptionsSnafu, + GetSkippingIndexOptionsSnafu, Result, SqlSnafu, }; /// Generates CREATE TABLE options from given table metadata and schema-level options. @@ -115,6 +117,23 @@ fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result StatusCode::Unsupported, PermissionDenied { .. } => StatusCode::PermissionDenied, - SetFulltextOption { .. } => StatusCode::Unexpected, + SetFulltextOption { .. } | SetSkippingIndexOption { .. } => StatusCode::Unexpected, } } diff --git a/src/sql/src/parsers/create_parser.rs b/src/sql/src/parsers/create_parser.rs index bb9aadadb7..f40ecb7b6e 100644 --- a/src/sql/src/parsers/create_parser.rs +++ b/src/sql/src/parsers/create_parser.rs @@ -36,7 +36,9 @@ use crate::error::{ SyntaxSnafu, UnexpectedSnafu, UnsupportedSnafu, }; use crate::parser::{ParserContext, FLOW}; -use crate::parsers::utils::validate_column_fulltext_create_option; +use crate::parsers::utils::{ + validate_column_fulltext_create_option, validate_column_skipping_index_create_option, +}; use crate::statements::create::{ Column, ColumnExtensions, CreateDatabase, CreateExternalTable, CreateFlow, CreateTable, CreateTableLike, CreateView, Partitions, TableConstraint, VECTOR_OPT_DIM, @@ -53,6 +55,7 @@ pub const SINK: &str = "SINK"; pub const EXPIRE: &str = "EXPIRE"; pub const AFTER: &str = "AFTER"; pub const INVERTED: &str = "INVERTED"; +pub const SKIPPING: &str = "SKIPPING"; const DB_OPT_KEY_TTL: &str = "ttl"; @@ -701,6 +704,49 @@ impl<'a> ParserContext<'a> { column_extensions.vector_options = Some(options.into()); } + let mut is_index_declared = false; + + if let Token::Word(word) = parser.peek_token().token + && word.value.eq_ignore_ascii_case(SKIPPING) + { + parser.next_token(); + // Consume `INDEX` keyword + ensure!( + parser.parse_keyword(Keyword::INDEX), + InvalidColumnOptionSnafu { + name: column_name.to_string(), + msg: "expect INDEX after SKIPPING keyword", + } + ); + ensure!( + column_extensions.skipping_index_options.is_none(), + InvalidColumnOptionSnafu { + name: column_name.to_string(), + msg: "duplicated SKIPPING index option", + } + ); + + let options = parser + .parse_options(Keyword::WITH) + .context(error::SyntaxSnafu)? + .into_iter() + .map(parse_option_string) + .collect::>>()?; + + for key in options.keys() { + ensure!( + validate_column_skipping_index_create_option(key), + InvalidColumnOptionSnafu { + name: column_name.to_string(), + msg: format!("invalid SKIP option: {key}"), + } + ); + } + + column_extensions.skipping_index_options = Some(options.into()); + is_index_declared |= true; + } + if parser.parse_keyword(Keyword::FULLTEXT) { ensure!( column_extensions.fulltext_options.is_none(), @@ -738,10 +784,10 @@ impl<'a> ParserContext<'a> { } column_extensions.fulltext_options = Some(options.into()); - Ok(true) - } else { - Ok(false) + is_index_declared |= true; } + + Ok(is_index_declared) } fn parse_optional_table_constraint(&mut self) -> Result> { @@ -2103,6 +2149,57 @@ CREATE TABLE log ( .contains("invalid FULLTEXT option")); } + #[test] + fn test_parse_create_table_skip_options() { + let sql = r" +CREATE TABLE log ( + ts TIMESTAMP TIME INDEX, + msg INT SKIPPING INDEX WITH (granularity='8192', type='bloom'), +)"; + let result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + + if let Statement::CreateTable(c) = &result[0] { + c.columns.iter().for_each(|col| { + if col.name().value == "msg" { + assert!(!col + .extensions + .skipping_index_options + .as_ref() + .unwrap() + .is_empty()); + } + }); + } else { + panic!("should be create_table statement"); + } + + let sql = r" + CREATE TABLE log ( + ts TIMESTAMP TIME INDEX, + msg INT SKIPPING INDEX, + )"; + let result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + + if let Statement::CreateTable(c) = &result[0] { + c.columns.iter().for_each(|col| { + if col.name().value == "msg" { + assert!(col + .extensions + .skipping_index_options + .as_ref() + .unwrap() + .is_empty()); + } + }); + } else { + panic!("should be create_table statement"); + } + } + #[test] fn test_parse_create_view_with_columns() { let sql = "CREATE VIEW test () AS SELECT * FROM NUMBERS"; diff --git a/src/sql/src/parsers/utils.rs b/src/sql/src/parsers/utils.rs index ae5146d7ee..f7eefc4b95 100644 --- a/src/sql/src/parsers/utils.rs +++ b/src/sql/src/parsers/utils.rs @@ -26,7 +26,10 @@ use datafusion_expr::{AggregateUDF, ScalarUDF, TableSource, WindowUDF}; use datafusion_sql::planner::{ContextProvider, SqlToRel}; use datafusion_sql::TableReference; use datatypes::arrow::datatypes::DataType; -use datatypes::schema::{COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE}; +use datatypes::schema::{ + COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, + COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, +}; use snafu::ResultExt; use crate::error::{ @@ -119,3 +122,11 @@ pub fn validate_column_fulltext_create_option(key: &str) -> bool { ] .contains(&key) } + +pub fn validate_column_skipping_index_create_option(key: &str) -> bool { + [ + COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, + COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, + ] + .contains(&key) +} diff --git a/src/sql/src/statements.rs b/src/sql/src/statements.rs index 25cc3bf7e5..00196ed531 100644 --- a/src/sql/src/statements.rs +++ b/src/sql/src/statements.rs @@ -58,7 +58,8 @@ use crate::error::{ self, ColumnTypeMismatchSnafu, ConvertSqlValueSnafu, ConvertToGrpcDataTypeSnafu, ConvertValueSnafu, DatatypeSnafu, InvalidCastSnafu, InvalidSqlValueSnafu, InvalidUnaryOpSnafu, ParseSqlValueSnafu, Result, SerializeColumnDefaultConstraintSnafu, SetFulltextOptionSnafu, - TimestampOverflowSnafu, UnsupportedDefaultValueSnafu, UnsupportedUnaryOpSnafu, + SetSkippingIndexOptionSnafu, TimestampOverflowSnafu, UnsupportedDefaultValueSnafu, + UnsupportedUnaryOpSnafu, }; use crate::statements::create::Column; pub use crate::statements::option_map::OptionMap; @@ -513,6 +514,12 @@ pub fn column_to_schema( .context(SetFulltextOptionSnafu)?; } + if let Some(options) = column.extensions.build_skipping_index_options()? { + column_schema = column_schema + .with_skipping_options(options) + .context(SetSkippingIndexOptionSnafu)?; + } + Ok(column_schema) } @@ -1519,6 +1526,7 @@ mod tests { .into(), ), vector_options: None, + skipping_index_options: None, }, }; diff --git a/src/sql/src/statements/create.rs b/src/sql/src/statements/create.rs index e4ea46572e..3ea265fb7f 100644 --- a/src/sql/src/statements/create.rs +++ b/src/sql/src/statements/create.rs @@ -16,7 +16,7 @@ use std::collections::HashMap; use std::fmt::{Display, Formatter}; use common_catalog::consts::FILE_ENGINE; -use datatypes::schema::FulltextOptions; +use datatypes::schema::{FulltextOptions, SkippingIndexOptions}; use itertools::Itertools; use serde::Serialize; use snafu::ResultExt; @@ -24,7 +24,7 @@ use sqlparser::ast::{ColumnOptionDef, DataType, Expr, Query}; use sqlparser_derive::{Visit, VisitMut}; use crate::ast::{ColumnDef, Ident, ObjectName, Value as SqlValue}; -use crate::error::{Result, SetFulltextOptionSnafu}; +use crate::error::{Result, SetFulltextOptionSnafu, SetSkippingIndexOptionSnafu}; use crate::statements::statement::Statement; use crate::statements::OptionMap; @@ -116,6 +116,8 @@ pub struct ColumnExtensions { pub fulltext_options: Option, /// Vector options. pub vector_options: Option, + /// Skipping index options. + pub skipping_index_options: Option, } impl Column { @@ -158,6 +160,15 @@ impl Display for Column { write!(f, " FULLTEXT")?; } } + + if let Some(skipping_index_options) = &self.extensions.skipping_index_options { + if !skipping_index_options.is_empty() { + let options = skipping_index_options.kv_pairs(); + write!(f, " SKIPPING INDEX WITH({})", format_list_comma!(options))?; + } else { + write!(f, " SKIPPING INDEX")?; + } + } Ok(()) } } @@ -171,6 +182,17 @@ impl ColumnExtensions { let options: HashMap = options.clone().into_map(); Ok(Some(options.try_into().context(SetFulltextOptionSnafu)?)) } + + pub fn build_skipping_index_options(&self) -> Result> { + let Some(options) = self.skipping_index_options.as_ref() else { + return Ok(None); + }; + + let options: HashMap = options.clone().into_map(); + Ok(Some( + options.try_into().context(SetSkippingIndexOptionSnafu)?, + )) + } } #[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] diff --git a/tests/cases/standalone/common/create/create_with_skip_index.result b/tests/cases/standalone/common/create/create_with_skip_index.result new file mode 100644 index 0000000000..00dd24dc6c --- /dev/null +++ b/tests/cases/standalone/common/create/create_with_skip_index.result @@ -0,0 +1,33 @@ +create table + skipping_table ( + ts timestamp time index, + id string skipping index, + `name` string skipping index + with + (granularity = 8192), + ); + +Affected Rows: 0 + +show +create table + skipping_table; + ++----------------+---------------------------------------------------------------------------------+ +| Table | Create Table | ++----------------+---------------------------------------------------------------------------------+ +| skipping_table | CREATE TABLE IF NOT EXISTS "skipping_table" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "id" STRING NULL SKIPPING INDEX WITH(granularity = '10240', type = 'BLOOM'), | +| | "name" STRING NULL SKIPPING INDEX WITH(granularity = '8192', type = 'BLOOM'), | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++----------------+---------------------------------------------------------------------------------+ + +drop table skipping_table; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/create/create_with_skip_index.sql b/tests/cases/standalone/common/create/create_with_skip_index.sql new file mode 100644 index 0000000000..0558936699 --- /dev/null +++ b/tests/cases/standalone/common/create/create_with_skip_index.sql @@ -0,0 +1,14 @@ +create table + skipping_table ( + ts timestamp time index, + id string skipping index, + `name` string skipping index + with + (granularity = 8192), + ); + +show +create table + skipping_table; + +drop table skipping_table; From 88f7075a2a09b0cf3abcff7407ba7737e73b4861 Mon Sep 17 00:00:00 2001 From: ZonaHe Date: Mon, 16 Dec 2024 18:56:41 +0800 Subject: [PATCH 23/46] feat: update dashboard to v0.7.3 (#5172) Co-authored-by: sunchanglong --- src/servers/dashboard/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/servers/dashboard/VERSION b/src/servers/dashboard/VERSION index 2c0a9c7b77..3d105a6fd8 100644 --- a/src/servers/dashboard/VERSION +++ b/src/servers/dashboard/VERSION @@ -1 +1 @@ -v0.7.2 +v0.7.3 From acedff030b7f827706e8cbe52123b94bf5167663 Mon Sep 17 00:00:00 2001 From: Ning Sun Date: Mon, 16 Dec 2024 19:47:18 +0800 Subject: [PATCH 24/46] chore: add nix-shell configure for a minimal environment for development (#5175) * chore: add nix-shell development environment * chore: add rust-analyzer * chore: use .envrc as a private file --- .gitignore | 6 +++++- shell.nix | 22 ++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 shell.nix diff --git a/.gitignore b/.gitignore index c1b0a89618..5823287889 100644 --- a/.gitignore +++ b/.gitignore @@ -47,6 +47,10 @@ benchmarks/data venv/ -# Fuzz tests +# Fuzz tests tests-fuzz/artifacts/ tests-fuzz/corpus/ + +# Nix +.direnv +.envrc diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000000..b255fe845c --- /dev/null +++ b/shell.nix @@ -0,0 +1,22 @@ +let + nixpkgs = fetchTarball "https://github.com/NixOS/nixpkgs/tarball/nixos-unstable"; + fenix = import (fetchTarball "https://github.com/nix-community/fenix/archive/main.tar.gz") {}; + pkgs = import nixpkgs { config = {}; overlays = []; }; +in + +pkgs.mkShellNoCC { + packages = with pkgs; [ + git + clang + gcc + mold + libgit2 + protobuf + (fenix.fromToolchainFile { + dir = ./.; + }) + fenix.rust-analyzer + cargo-nextest + ]; + +} From 043d0bd7c23bd49f5ed614eedadb06090f974530 Mon Sep 17 00:00:00 2001 From: discord9 <55937128+discord9@users.noreply.github.com> Date: Mon, 16 Dec 2024 20:25:23 +0800 Subject: [PATCH 25/46] test: flow rebuild (#5162) * tests: rebuild flow * tests: more rebuild * tests: restart * chore: drop clean --- .../common/flow/flow_rebuild.result | 578 ++++++++++++++++++ .../standalone/common/flow/flow_rebuild.sql | 319 ++++++++++ 2 files changed, 897 insertions(+) create mode 100644 tests/cases/standalone/common/flow/flow_rebuild.result create mode 100644 tests/cases/standalone/common/flow/flow_rebuild.sql diff --git a/tests/cases/standalone/common/flow/flow_rebuild.result b/tests/cases/standalone/common/flow/flow_rebuild.result new file mode 100644 index 0000000000..67fd43a032 --- /dev/null +++ b/tests/cases/standalone/common/flow/flow_rebuild.result @@ -0,0 +1,578 @@ +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP TABLE input_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +-- combination of different order of rebuild input table/flow +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP TABLE input_basic; + +Affected Rows: 0 + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +-- this is expected to be the same as above("2") since the new `input_basic` table +-- have different table id, so is a different table +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +-- recreate flow so that it use new table id +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +Affected Rows: 3 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +-- 3 is also expected, since flow don't have persisent state +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 3 | ++----------+ + +DROP TABLE input_basic; + +Affected Rows: 0 + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +Affected Rows: 3 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 3 | ++----------+ + +-- test again, this time with db restart +DROP TABLE input_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP TABLE input_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +-- combination of different order of rebuild input table/flow +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP TABLE input_basic; + +Affected Rows: 0 + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (26, "2021-07-01 00:00:02.000"); + +Affected Rows: 3 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +-- this is expected to be the same as above("2") since the new `input_basic` table +-- have different table id, so is a different table +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +-- recreate flow so that it use new table id +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +Affected Rows: 3 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +-- 3 is also expected, since flow don't have persisent state +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 3 | ++----------+ + +DROP TABLE input_basic; + +Affected Rows: 0 + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +Affected Rows: 3 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 3 | ++----------+ + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +DROP TABLE input_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/flow/flow_rebuild.sql b/tests/cases/standalone/common/flow/flow_rebuild.sql new file mode 100644 index 0000000000..288d6f1f03 --- /dev/null +++ b/tests/cases/standalone/common/flow/flow_rebuild.sql @@ -0,0 +1,319 @@ +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +DROP TABLE input_basic; + +DROP TABLE out_basic; + +DROP FLOW test_wildcard_basic; + +-- combination of different order of rebuild input table/flow + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +DROP TABLE input_basic; + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +-- this is expected to be the same as above("2") since the new `input_basic` table +-- have different table id, so is a different table +SELECT wildcard FROM out_basic; + +DROP FLOW test_wildcard_basic; + +-- recreate flow so that it use new table id +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +-- 3 is also expected, since flow don't have persisent state +SELECT wildcard FROM out_basic; + +DROP TABLE input_basic; +DROP FLOW test_wildcard_basic; +DROP TABLE out_basic; + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +DROP FLOW test_wildcard_basic; + +DROP TABLE out_basic; + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +-- test again, this time with db restart +DROP TABLE input_basic; +DROP TABLE out_basic; +DROP FLOW test_wildcard_basic; + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +DROP TABLE input_basic; + +DROP TABLE out_basic; + +DROP FLOW test_wildcard_basic; + +-- combination of different order of rebuild input table/flow + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +DROP TABLE input_basic; + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (26, "2021-07-01 00:00:02.000"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +-- this is expected to be the same as above("2") since the new `input_basic` table +-- have different table id, so is a different table +SELECT wildcard FROM out_basic; + +DROP FLOW test_wildcard_basic; + +-- recreate flow so that it use new table id +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +-- 3 is also expected, since flow don't have persisent state +SELECT wildcard FROM out_basic; + +DROP TABLE input_basic; +DROP FLOW test_wildcard_basic; +DROP TABLE out_basic; + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +DROP FLOW test_wildcard_basic; + +DROP TABLE out_basic; + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +DROP FLOW test_wildcard_basic; + +DROP TABLE input_basic; + +DROP TABLE out_basic; From d0245473a916e968b2cba827b0f755e946845570 Mon Sep 17 00:00:00 2001 From: Weny Xu Date: Mon, 16 Dec 2024 22:01:40 +0800 Subject: [PATCH 26/46] fix: correct `set_region_role_state_gracefully` behaviors (#5171) * fix: reduce default max rows for fuzz testing * chore: remove Postgres setup from fuzz test workflow * chore(fuzz): increase resource limits for GreptimeDB cluster * chore(fuzz): increase resource limits for kafka * fix: correct `set_region_role_state_gracefully` behaviors * chore: remove Postgres setup from fuzz test workflow * chore(fuzz): redue resource limits for GreptimeDB & kafka --- .github/actions/setup-kafka-cluster/action.yml | 2 ++ .github/workflows/develop.yml | 4 ---- src/metric-engine/src/engine.rs | 8 +++++++- src/metric-engine/src/engine/catchup.rs | 3 +++ src/mito2/src/worker/handle_catchup.rs | 3 ++- tests-fuzz/src/utils.rs | 2 +- 6 files changed, 15 insertions(+), 7 deletions(-) diff --git a/.github/actions/setup-kafka-cluster/action.yml b/.github/actions/setup-kafka-cluster/action.yml index b8a7339423..22b4389957 100644 --- a/.github/actions/setup-kafka-cluster/action.yml +++ b/.github/actions/setup-kafka-cluster/action.yml @@ -18,6 +18,8 @@ runs: --set controller.replicaCount=${{ inputs.controller-replicas }} \ --set controller.resources.requests.cpu=50m \ --set controller.resources.requests.memory=128Mi \ + --set controller.resources.limits.cpu=2000m \ + --set controller.resources.limits.memory=2Gi \ --set listeners.controller.protocol=PLAINTEXT \ --set listeners.client.protocol=PLAINTEXT \ --create-namespace \ diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml index 6eccbe65b8..8939453f9d 100644 --- a/.github/workflows/develop.yml +++ b/.github/workflows/develop.yml @@ -323,8 +323,6 @@ jobs: uses: ./.github/actions/setup-kafka-cluster - name: Setup Etcd cluser uses: ./.github/actions/setup-etcd-cluster - - name: Setup Postgres cluser - uses: ./.github/actions/setup-postgres-cluster # Prepares for fuzz tests - uses: arduino/setup-protoc@v3 with: @@ -474,8 +472,6 @@ jobs: uses: ./.github/actions/setup-kafka-cluster - name: Setup Etcd cluser uses: ./.github/actions/setup-etcd-cluster - - name: Setup Postgres cluser - uses: ./.github/actions/setup-postgres-cluster # Prepares for fuzz tests - uses: arduino/setup-protoc@v3 with: diff --git a/src/metric-engine/src/engine.rs b/src/metric-engine/src/engine.rs index 86b64ddfae..15b9470113 100644 --- a/src/metric-engine/src/engine.rs +++ b/src/metric-engine/src/engine.rs @@ -210,7 +210,6 @@ impl RegionEngine for MetricEngine { for x in [ utils::to_metadata_region_id(region_id), utils::to_data_region_id(region_id), - region_id, ] { if let Err(e) = self.inner.mito.set_region_role(x, role) && e.status_code() != StatusCode::RegionNotFound @@ -226,6 +225,13 @@ impl RegionEngine for MetricEngine { region_id: RegionId, region_role_state: SettableRegionRoleState, ) -> std::result::Result { + self.inner + .mito + .set_region_role_state_gracefully( + utils::to_metadata_region_id(region_id), + region_role_state, + ) + .await?; self.inner .mito .set_region_role_state_gracefully(region_id, region_role_state) diff --git a/src/metric-engine/src/engine/catchup.rs b/src/metric-engine/src/engine/catchup.rs index 4b1268c049..783e1f009c 100644 --- a/src/metric-engine/src/engine/catchup.rs +++ b/src/metric-engine/src/engine/catchup.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use common_telemetry::debug; use snafu::ResultExt; use store_api::region_engine::RegionEngine; use store_api::region_request::{AffectedRows, RegionCatchupRequest, RegionRequest}; @@ -35,6 +36,7 @@ impl MetricEngineInner { } let metadata_region_id = utils::to_metadata_region_id(region_id); // TODO(weny): improve the catchup, we can read the wal entries only once. + debug!("Catchup metadata region {metadata_region_id}"); self.mito .handle_request( metadata_region_id, @@ -48,6 +50,7 @@ impl MetricEngineInner { .context(MitoCatchupOperationSnafu)?; let data_region_id = utils::to_data_region_id(region_id); + debug!("Catchup data region {data_region_id}"); self.mito .handle_request( data_region_id, diff --git a/src/mito2/src/worker/handle_catchup.rs b/src/mito2/src/worker/handle_catchup.rs index f0fd6b0550..8992621dd7 100644 --- a/src/mito2/src/worker/handle_catchup.rs +++ b/src/mito2/src/worker/handle_catchup.rs @@ -16,8 +16,8 @@ use std::sync::Arc; -use common_telemetry::info; use common_telemetry::tracing::warn; +use common_telemetry::{debug, info}; use snafu::ensure; use store_api::logstore::LogStore; use store_api::region_engine::RegionRole; @@ -40,6 +40,7 @@ impl RegionWorkerLoop { }; if region.is_writable() { + debug!("Region {region_id} is writable, skip catchup"); return Ok(0); } // Note: Currently, We protect the split brain by ensuring the mutable table is empty. diff --git a/tests-fuzz/src/utils.rs b/tests-fuzz/src/utils.rs index 7433479789..84222f6d5a 100644 --- a/tests-fuzz/src/utils.rs +++ b/tests-fuzz/src/utils.rs @@ -142,7 +142,7 @@ macro_rules! make_get_from_env_helper { make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_ALTER_ACTIONS, 256); make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_INSERT_ACTIONS, 8); -make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_ROWS, 2048); +make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_ROWS, 512); make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_TABLES, 64); make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_COLUMNS, 32); From 8a5384697b7ae3f1ef1c988a27179ce0eee89a35 Mon Sep 17 00:00:00 2001 From: Yingwen Date: Tue, 17 Dec 2024 09:45:50 +0800 Subject: [PATCH 27/46] chore: add aquamarine to dep lists (#5181) --- Cargo.lock | 36 ++++++++++++++++++++++++++++++++++++ src/metric-engine/Cargo.toml | 1 + src/mito2/Cargo.toml | 1 + src/store-api/Cargo.toml | 1 + 4 files changed, 39 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index df817dc201..1fa61c8c6f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -222,6 +222,20 @@ dependencies = [ "num-traits", ] +[[package]] +name = "aquamarine" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1da02abba9f9063d786eab1509833ebb2fac0f966862ca59439c76b9c566760" +dependencies = [ + "include_dir", + "itertools 0.10.5", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "arbitrary" version = "1.3.2" @@ -5166,6 +5180,25 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb56e1aa765b4b4f3aadfab769793b7087bb03a4ea4920644a6d238e2df5b9ed" +[[package]] +name = "include_dir" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "923d117408f1e49d914f1a379a309cffe4f18c05cf4e3d12e613a15fc81bd0dd" +dependencies = [ + "include_dir_macros", +] + +[[package]] +name = "include_dir_macros" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cab85a7ed0bd5f0e76d93846e0147172bed2e2d3f859bcc33a8d9699cad1a75" +dependencies = [ + "proc-macro2", + "quote", +] + [[package]] name = "index" version = "0.12.0" @@ -6438,6 +6471,7 @@ name = "metric-engine" version = "0.12.0" dependencies = [ "api", + "aquamarine", "async-trait", "base64 0.21.7", "common-base", @@ -6531,6 +6565,7 @@ name = "mito2" version = "0.12.0" dependencies = [ "api", + "aquamarine", "async-channel 1.9.0", "async-stream", "async-trait", @@ -11429,6 +11464,7 @@ name = "store-api" version = "0.12.0" dependencies = [ "api", + "aquamarine", "async-stream", "async-trait", "common-base", diff --git a/src/metric-engine/Cargo.toml b/src/metric-engine/Cargo.toml index 666ac09faa..85aa371594 100644 --- a/src/metric-engine/Cargo.toml +++ b/src/metric-engine/Cargo.toml @@ -9,6 +9,7 @@ workspace = true [dependencies] api.workspace = true +aquamarine.workspace = true async-trait.workspace = true base64.workspace = true common-base.workspace = true diff --git a/src/mito2/Cargo.toml b/src/mito2/Cargo.toml index 181ba0f434..56d480df5a 100644 --- a/src/mito2/Cargo.toml +++ b/src/mito2/Cargo.toml @@ -13,6 +13,7 @@ workspace = true [dependencies] api.workspace = true +aquamarine.workspace = true async-channel = "1.9" async-stream.workspace = true async-trait = "0.1" diff --git a/src/store-api/Cargo.toml b/src/store-api/Cargo.toml index 1214ae3d40..7c974661e3 100644 --- a/src/store-api/Cargo.toml +++ b/src/store-api/Cargo.toml @@ -9,6 +9,7 @@ workspace = true [dependencies] api.workspace = true +aquamarine.workspace = true async-trait.workspace = true common-base.workspace = true common-error.workspace = true From bfc777e6ac1d1389aeae480241e22f9ea2c4621f Mon Sep 17 00:00:00 2001 From: Yingwen Date: Tue, 17 Dec 2024 12:01:32 +0800 Subject: [PATCH 28/46] fix: deletion between two put may not work in `last_non_null` mode (#5168) * fix: deletion between rows with the same key may not work * test: add sqlness test case * chore: comments --- src/mito2/src/read/dedup.rs | 45 ++++++++++++- .../common/insert/merge_mode.result | 65 +++++++++++++++++++ .../standalone/common/insert/merge_mode.sql | 27 ++++++++ 3 files changed, 136 insertions(+), 1 deletion(-) diff --git a/src/mito2/src/read/dedup.rs b/src/mito2/src/read/dedup.rs index c77d0c3fab..a29781b947 100644 --- a/src/mito2/src/read/dedup.rs +++ b/src/mito2/src/read/dedup.rs @@ -224,6 +224,12 @@ pub(crate) struct DedupMetrics { } /// Buffer to store fields in the last row to merge. +/// +/// Usage: +/// We should call `maybe_init()` to initialize the builder and then call `push_first_row()` +/// to push the first row of batches that the timestamp is the same as the row in this builder. +/// Finally we should call `merge_last_non_null()` to merge the last non-null fields and +/// return the merged batch. struct LastFieldsBuilder { /// Filter deleted rows. filter_deleted: bool, @@ -311,6 +317,16 @@ impl LastFieldsBuilder { return; } + // Both `maybe_init()` and `push_first_row()` can update the builder. If the delete + // op is not in the latest row, then we can't set the deletion flag in the `maybe_init()`. + // We must check the batch and update the deletion flag here to prevent + // the builder from merging non-null fields in rows that insert before the deleted row. + self.contains_deletion = batch.op_types().get_data(0).unwrap() == OpType::Delete as u8; + if self.contains_deletion { + // Deletes this row. + return; + } + let fields = batch.fields(); for (idx, value) in self.last_fields.iter_mut().enumerate() { if value.is_null() && !fields[idx].data.is_null(0) { @@ -323,7 +339,8 @@ impl LastFieldsBuilder { } /// Merges last non-null fields, builds a new batch and resets the builder. - /// It may overwrites the last row of the `buffer`. + /// It may overwrites the last row of the `buffer`. The `buffer` is the batch + /// that initialized the builder. fn merge_last_non_null( &mut self, buffer: Batch, @@ -1082,6 +1099,32 @@ mod tests { ); } + #[test] + fn test_last_non_null_strategy_delete_middle() { + let input = [ + new_batch_multi_fields(b"k1", &[1], &[7], &[OpType::Put], &[(Some(11), None)]), + new_batch_multi_fields(b"k1", &[1], &[4], &[OpType::Delete], &[(None, None)]), + new_batch_multi_fields(b"k1", &[1], &[1], &[OpType::Put], &[(Some(12), Some(1))]), + new_batch_multi_fields(b"k1", &[2], &[8], &[OpType::Put], &[(Some(21), None)]), + new_batch_multi_fields(b"k1", &[2], &[5], &[OpType::Delete], &[(None, None)]), + new_batch_multi_fields(b"k1", &[2], &[2], &[OpType::Put], &[(Some(22), Some(2))]), + new_batch_multi_fields(b"k1", &[3], &[9], &[OpType::Put], &[(Some(31), None)]), + new_batch_multi_fields(b"k1", &[3], &[6], &[OpType::Delete], &[(None, None)]), + new_batch_multi_fields(b"k1", &[3], &[3], &[OpType::Put], &[(Some(32), Some(3))]), + ]; + + let mut strategy = LastNonNull::new(true); + check_dedup_strategy( + &input, + &mut strategy, + &[ + new_batch_multi_fields(b"k1", &[1], &[7], &[OpType::Put], &[(Some(11), None)]), + new_batch_multi_fields(b"k1", &[2], &[8], &[OpType::Put], &[(Some(21), None)]), + new_batch_multi_fields(b"k1", &[3], &[9], &[OpType::Put], &[(Some(31), None)]), + ], + ); + } + #[test] fn test_last_non_null_iter_on_batch() { let input = [new_batch_multi_fields( diff --git a/tests/cases/standalone/common/insert/merge_mode.result b/tests/cases/standalone/common/insert/merge_mode.result index f96ad2c8bc..a98f6b6e38 100644 --- a/tests/cases/standalone/common/insert/merge_mode.result +++ b/tests/cases/standalone/common/insert/merge_mode.result @@ -92,6 +92,71 @@ DROP TABLE last_row_table; Affected Rows: 0 +CREATE TABLE IF NOT EXISTS `delete_between` ( + `time` TIMESTAMP(0) NOT NULL, + `code` STRING NULL, + `name` STRING NULL, + `status` TINYINT NULL, + TIME INDEX (`time`), + PRIMARY KEY (`code`) +) ENGINE=mito WITH( + merge_mode = 'last_non_null' +); + +Affected Rows: 0 + +INSERT INTO `delete_between` (`time`, `code`, `name`, `status`) VALUES ('2024-11-26 10:00:00', 'achn', '1.png', 0); + +Affected Rows: 1 + +INSERT INTO `delete_between` (`time`, `code`, `name`, `status`) VALUES ('2024-11-26 10:01:00', 'achn', '2.png', 0); + +Affected Rows: 1 + +INSERT INTO `delete_between` (`time`, `code`, `name`, `status`) VALUES ('2024-11-26 10:02:00', 'achn', '3.png', 1); + +Affected Rows: 1 + +SELECT * FROM `delete_between`; + ++---------------------+------+-------+--------+ +| time | code | name | status | ++---------------------+------+-------+--------+ +| 2024-11-26T10:00:00 | achn | 1.png | 0 | +| 2024-11-26T10:01:00 | achn | 2.png | 0 | +| 2024-11-26T10:02:00 | achn | 3.png | 1 | ++---------------------+------+-------+--------+ + +DELETE FROM `delete_between`; + +Affected Rows: 3 + +INSERT INTO `delete_between` (`time`, `code`, `name`) VALUES ('2024-11-26 10:00:00', 'achn', '1.png'); + +Affected Rows: 1 + +INSERT INTO `delete_between` (`time`, `code`, `name`) VALUES ('2024-11-26 10:01:00', 'achn', '2.png'); + +Affected Rows: 1 + +INSERT INTO `delete_between` (`time`, `code`, `name`) VALUES ('2024-11-26 10:02:00', 'achn', '3.png'); + +Affected Rows: 1 + +SELECT * FROM `delete_between`; + ++---------------------+------+-------+--------+ +| time | code | name | status | ++---------------------+------+-------+--------+ +| 2024-11-26T10:00:00 | achn | 1.png | | +| 2024-11-26T10:01:00 | achn | 2.png | | +| 2024-11-26T10:02:00 | achn | 3.png | | ++---------------------+------+-------+--------+ + +DROP TABLE `delete_between`; + +Affected Rows: 0 + create table if not exists invalid_merge_mode( host string, ts timestamp, diff --git a/tests/cases/standalone/common/insert/merge_mode.sql b/tests/cases/standalone/common/insert/merge_mode.sql index 967f949333..9d22cc13d6 100644 --- a/tests/cases/standalone/common/insert/merge_mode.sql +++ b/tests/cases/standalone/common/insert/merge_mode.sql @@ -44,6 +44,33 @@ SELECT * from last_row_table ORDER BY host, ts; DROP TABLE last_row_table; +CREATE TABLE IF NOT EXISTS `delete_between` ( + `time` TIMESTAMP(0) NOT NULL, + `code` STRING NULL, + `name` STRING NULL, + `status` TINYINT NULL, + TIME INDEX (`time`), + PRIMARY KEY (`code`) +) ENGINE=mito WITH( + merge_mode = 'last_non_null' +); + +INSERT INTO `delete_between` (`time`, `code`, `name`, `status`) VALUES ('2024-11-26 10:00:00', 'achn', '1.png', 0); +INSERT INTO `delete_between` (`time`, `code`, `name`, `status`) VALUES ('2024-11-26 10:01:00', 'achn', '2.png', 0); +INSERT INTO `delete_between` (`time`, `code`, `name`, `status`) VALUES ('2024-11-26 10:02:00', 'achn', '3.png', 1); + +SELECT * FROM `delete_between`; + +DELETE FROM `delete_between`; + +INSERT INTO `delete_between` (`time`, `code`, `name`) VALUES ('2024-11-26 10:00:00', 'achn', '1.png'); +INSERT INTO `delete_between` (`time`, `code`, `name`) VALUES ('2024-11-26 10:01:00', 'achn', '2.png'); +INSERT INTO `delete_between` (`time`, `code`, `name`) VALUES ('2024-11-26 10:02:00', 'achn', '3.png'); + +SELECT * FROM `delete_between`; + +DROP TABLE `delete_between`; + create table if not exists invalid_merge_mode( host string, ts timestamp, From d821dc5a3eaa6af7cb7fa36939916dd4ac57a4a9 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Tue, 17 Dec 2024 14:55:42 +0800 Subject: [PATCH 29/46] feat(bloom-filter): add basic bloom filter creator (Part 1) (#5177) * feat(bloom-filter): add a simple bloom filter creator (Part 1) Signed-off-by: Zhenchi * fix: clippy Signed-off-by: Zhenchi * fix: header Signed-off-by: Zhenchi * docs: add format comment Signed-off-by: Zhenchi --------- Signed-off-by: Zhenchi --- Cargo.lock | 26 ++- src/index/Cargo.toml | 2 + src/index/src/bloom_filter.rs | 53 +++++ src/index/src/bloom_filter/creator.rs | 294 ++++++++++++++++++++++++++ src/index/src/bloom_filter/error.rs | 66 ++++++ src/index/src/lib.rs | 1 + 6 files changed, 439 insertions(+), 3 deletions(-) create mode 100644 src/index/src/bloom_filter.rs create mode 100644 src/index/src/bloom_filter/creator.rs create mode 100644 src/index/src/bloom_filter/error.rs diff --git a/Cargo.lock b/Cargo.lock index 1fa61c8c6f..b86134a3ed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3834,6 +3834,18 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" +[[package]] +name = "fastbloom" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b679f25009b51b71506296f95fb6362ba7d0151172fa7373a8d1611b8bc5d10f" +dependencies = [ + "getrandom", + "rand", + "siphasher 1.0.1", + "wide", +] + [[package]] name = "fastdivide" version = "0.4.1" @@ -5213,6 +5225,7 @@ dependencies = [ "common-runtime", "common-telemetry", "common-test-util", + "fastbloom", "fst", "futures", "greptime-proto", @@ -5223,6 +5236,7 @@ dependencies = [ "regex", "regex-automata 0.4.8", "serde", + "serde_json", "snafu 0.8.5", "tantivy", "tantivy-jieba", @@ -8065,7 +8079,7 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" dependencies = [ - "siphasher", + "siphasher 0.3.11", ] [[package]] @@ -8074,7 +8088,7 @@ version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" dependencies = [ - "siphasher", + "siphasher 0.3.11", ] [[package]] @@ -10005,7 +10019,7 @@ dependencies = [ "once_cell", "radium", "rand", - "siphasher", + "siphasher 0.3.11", "unic-ucd-category", "volatile", "widestring", @@ -11016,6 +11030,12 @@ version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + [[package]] name = "sketches-ddsketch" version = "0.2.2" diff --git a/src/index/Cargo.toml b/src/index/Cargo.toml index 772177147a..f46c64a176 100644 --- a/src/index/Cargo.toml +++ b/src/index/Cargo.toml @@ -17,6 +17,7 @@ common-error.workspace = true common-macro.workspace = true common-runtime.workspace = true common-telemetry.workspace = true +fastbloom = "0.8" fst.workspace = true futures.workspace = true greptime-proto.workspace = true @@ -26,6 +27,7 @@ prost.workspace = true regex.workspace = true regex-automata.workspace = true serde.workspace = true +serde_json.workspace = true snafu.workspace = true tantivy = { version = "0.22", features = ["zstd-compression"] } tantivy-jieba = "0.11.0" diff --git a/src/index/src/bloom_filter.rs b/src/index/src/bloom_filter.rs new file mode 100644 index 0000000000..e68acc698a --- /dev/null +++ b/src/index/src/bloom_filter.rs @@ -0,0 +1,53 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use serde::{Deserialize, Serialize}; + +pub mod creator; +mod error; + +pub type Bytes = Vec; +pub type BytesRef<'a> = &'a [u8]; + +/// The Meta information of the bloom filter stored in the file. +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct BloomFilterMeta { + /// The number of rows per segment. + pub rows_per_segment: usize, + + /// The number of segments. + pub seg_count: usize, + + /// The number of total rows. + pub row_count: usize, + + /// The size of the bloom filter excluding the meta information. + pub bloom_filter_segments_size: usize, + + /// Offset and size of bloom filters in the file. + pub bloom_filter_segments: Vec, +} + +/// The location of the bloom filter segment in the file. +#[derive(Debug, Serialize, Deserialize)] +pub struct BloomFilterSegmentLocation { + /// The offset of the bloom filter segment in the file. + pub offset: u64, + + /// The size of the bloom filter segment in the file. + pub size: u64, + + /// The number of elements in the bloom filter segment. + pub elem_count: usize, +} diff --git a/src/index/src/bloom_filter/creator.rs b/src/index/src/bloom_filter/creator.rs new file mode 100644 index 0000000000..b3c95d3a76 --- /dev/null +++ b/src/index/src/bloom_filter/creator.rs @@ -0,0 +1,294 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashSet; + +use fastbloom::BloomFilter; +use futures::{AsyncWrite, AsyncWriteExt}; +use snafu::ResultExt; + +use super::error::{IoSnafu, SerdeJsonSnafu}; +use crate::bloom_filter::error::Result; +use crate::bloom_filter::{BloomFilterMeta, BloomFilterSegmentLocation, Bytes}; + +/// The seed used for the Bloom filter. +const SEED: u128 = 42; + +/// The false positive rate of the Bloom filter. +const FALSE_POSITIVE_RATE: f64 = 0.01; + +/// `BloomFilterCreator` is responsible for creating and managing bloom filters +/// for a set of elements. It divides the rows into segments and creates +/// bloom filters for each segment. +/// +/// # Format +/// +/// The bloom filter creator writes the following format to the writer: +/// +/// ```text +/// +--------------------+--------------------+-----+----------------------+----------------------+ +/// | Bloom filter 0 | Bloom filter 1 | ... | BloomFilterMeta | Meta size | +/// +--------------------+--------------------+-----+----------------------+----------------------+ +/// |<- bytes (size 0) ->|<- bytes (size 1) ->| ... |<- json (meta size) ->|<- u32 LE (4 bytes) ->| +/// ``` +/// +pub struct BloomFilterCreator { + /// The number of rows per segment set by the user. + rows_per_segment: usize, + + /// Row count that added to the bloom filter so far. + accumulated_row_count: usize, + + /// A set of distinct elements in the current segment. + cur_seg_distinct_elems: HashSet, + + /// The memory usage of the current segment's distinct elements. + cur_seg_distinct_elems_mem_usage: usize, + + /// Storage for finalized Bloom filters. + finalized_bloom_filters: FinalizedBloomFilterStorage, +} + +impl BloomFilterCreator { + /// Creates a new `BloomFilterCreator` with the specified number of rows per segment. + /// + /// # PANICS + /// + /// `rows_per_segment` <= 0 + pub fn new(rows_per_segment: usize) -> Self { + assert!( + rows_per_segment > 0, + "rows_per_segment must be greater than 0" + ); + + Self { + rows_per_segment, + accumulated_row_count: 0, + cur_seg_distinct_elems: HashSet::default(), + cur_seg_distinct_elems_mem_usage: 0, + finalized_bloom_filters: FinalizedBloomFilterStorage::default(), + } + } + + /// Adds a row of elements to the bloom filter. If the number of accumulated rows + /// reaches `rows_per_segment`, it finalizes the current segment. + pub fn push_row_elems(&mut self, elems: impl IntoIterator) { + self.accumulated_row_count += 1; + for elem in elems.into_iter() { + let len = elem.len(); + let is_new = self.cur_seg_distinct_elems.insert(elem); + if is_new { + self.cur_seg_distinct_elems_mem_usage += len; + } + } + + if self.accumulated_row_count % self.rows_per_segment == 0 { + self.finalize_segment(); + } + } + + /// Finalizes any remaining segments and writes the bloom filters and metadata to the provided writer. + pub async fn finish(&mut self, mut writer: impl AsyncWrite + Unpin) -> Result<()> { + if !self.cur_seg_distinct_elems.is_empty() { + self.finalize_segment(); + } + + let mut meta = BloomFilterMeta { + rows_per_segment: self.rows_per_segment, + seg_count: self.finalized_bloom_filters.len(), + row_count: self.accumulated_row_count, + ..Default::default() + }; + + let mut buf = Vec::new(); + for segment in self.finalized_bloom_filters.drain() { + let slice = segment.bloom_filter.as_slice(); + buf.clear(); + write_u64_slice(&mut buf, slice); + writer.write_all(&buf).await.context(IoSnafu)?; + + let size = buf.len(); + meta.bloom_filter_segments.push(BloomFilterSegmentLocation { + offset: meta.bloom_filter_segments_size as _, + size: size as _, + elem_count: segment.element_count, + }); + meta.bloom_filter_segments_size += size; + } + + let meta_bytes = serde_json::to_vec(&meta).context(SerdeJsonSnafu)?; + writer.write_all(&meta_bytes).await.context(IoSnafu)?; + + let meta_size = meta_bytes.len() as u32; + writer + .write_all(&meta_size.to_le_bytes()) + .await + .context(IoSnafu)?; + writer.flush().await.unwrap(); + + Ok(()) + } + + /// Returns the memory usage of the creating bloom filter. + pub fn memory_usage(&self) -> usize { + self.cur_seg_distinct_elems_mem_usage + self.finalized_bloom_filters.memory_usage() + } + + fn finalize_segment(&mut self) { + let elem_count = self.cur_seg_distinct_elems.len(); + self.finalized_bloom_filters + .add(self.cur_seg_distinct_elems.drain(), elem_count); + self.cur_seg_distinct_elems_mem_usage = 0; + } +} + +/// Storage for finalized Bloom filters. +/// +/// TODO(zhongzc): Add support for storing intermediate bloom filters on disk to control memory usage. +#[derive(Debug, Default)] +struct FinalizedBloomFilterStorage { + /// Bloom filters that are stored in memory. + in_memory: Vec, +} + +impl FinalizedBloomFilterStorage { + fn memory_usage(&self) -> usize { + self.in_memory.iter().map(|s| s.size).sum() + } + + /// Adds a new finalized Bloom filter to the storage. + /// + /// TODO(zhongzc): Add support for flushing to disk. + fn add(&mut self, elems: impl IntoIterator, elem_count: usize) { + let mut bf = BloomFilter::with_false_pos(FALSE_POSITIVE_RATE) + .seed(&SEED) + .expected_items(elem_count); + for elem in elems.into_iter() { + bf.insert(&elem); + } + + let cbf = FinalizedBloomFilterSegment::new(bf, elem_count); + self.in_memory.push(cbf); + } + + fn len(&self) -> usize { + self.in_memory.len() + } + + fn drain(&mut self) -> impl Iterator + '_ { + self.in_memory.drain(..) + } +} + +/// A finalized Bloom filter segment. +#[derive(Debug)] +struct FinalizedBloomFilterSegment { + /// The underlying Bloom filter. + bloom_filter: BloomFilter, + + /// The number of elements in the Bloom filter. + element_count: usize, + + /// The occupied memory size of the Bloom filter. + size: usize, +} + +impl FinalizedBloomFilterSegment { + fn new(bloom_filter: BloomFilter, elem_count: usize) -> Self { + let memory_usage = std::mem::size_of_val(bloom_filter.as_slice()); + Self { + bloom_filter, + element_count: elem_count, + size: memory_usage, + } + } +} + +/// Writes a slice of `u64` to the buffer in little-endian order. +fn write_u64_slice(buf: &mut Vec, slice: &[u64]) { + buf.reserve(std::mem::size_of_val(slice)); + for &x in slice { + buf.extend_from_slice(&x.to_le_bytes()); + } +} + +#[cfg(test)] +mod tests { + use futures::io::Cursor; + + use super::*; + + fn u64_vec_from_bytes(bytes: &[u8]) -> Vec { + bytes + .chunks_exact(std::mem::size_of::()) + .map(|chunk| u64::from_le_bytes(chunk.try_into().unwrap())) + .collect() + } + + #[tokio::test] + async fn test_bloom_filter_creator() { + let mut writer = Cursor::new(Vec::new()); + let mut creator = BloomFilterCreator::new(2); + + creator.push_row_elems(vec![b"a".to_vec(), b"b".to_vec()]); + assert!(creator.cur_seg_distinct_elems_mem_usage > 0); + assert!(creator.memory_usage() > 0); + + creator.push_row_elems(vec![b"c".to_vec(), b"d".to_vec()]); + // Finalize the first segment + assert!(creator.cur_seg_distinct_elems_mem_usage == 0); + assert!(creator.memory_usage() > 0); + + creator.push_row_elems(vec![b"e".to_vec(), b"f".to_vec()]); + assert!(creator.cur_seg_distinct_elems_mem_usage > 0); + assert!(creator.memory_usage() > 0); + + creator.finish(&mut writer).await.unwrap(); + + let bytes = writer.into_inner(); + let total_size = bytes.len(); + let meta_size_offset = total_size - 4; + let meta_size = u32::from_le_bytes((&bytes[meta_size_offset..]).try_into().unwrap()); + + let meta_bytes = &bytes[total_size - meta_size as usize - 4..total_size - 4]; + let meta: BloomFilterMeta = serde_json::from_slice(meta_bytes).unwrap(); + + assert_eq!(meta.rows_per_segment, 2); + assert_eq!(meta.seg_count, 2); + assert_eq!(meta.row_count, 3); + assert_eq!( + meta.bloom_filter_segments_size + meta_bytes.len() + 4, + total_size + ); + + let mut bfs = Vec::new(); + for segment in meta.bloom_filter_segments { + let bloom_filter_bytes = + &bytes[segment.offset as usize..(segment.offset + segment.size) as usize]; + let v = u64_vec_from_bytes(bloom_filter_bytes); + let bloom_filter = BloomFilter::from_vec(v) + .seed(&SEED) + .expected_items(segment.elem_count); + bfs.push(bloom_filter); + } + + assert_eq!(bfs.len(), 2); + assert!(bfs[0].contains(&b"a")); + assert!(bfs[0].contains(&b"b")); + assert!(bfs[0].contains(&b"c")); + assert!(bfs[0].contains(&b"d")); + assert!(bfs[1].contains(&b"e")); + assert!(bfs[1].contains(&b"f")); + } +} diff --git a/src/index/src/bloom_filter/error.rs b/src/index/src/bloom_filter/error.rs new file mode 100644 index 0000000000..8e95dc5225 --- /dev/null +++ b/src/index/src/bloom_filter/error.rs @@ -0,0 +1,66 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::Any; + +use common_error::ext::{BoxedError, ErrorExt}; +use common_error::status_code::StatusCode; +use common_macro::stack_trace_debug; +use snafu::{Location, Snafu}; + +#[derive(Snafu)] +#[snafu(visibility(pub))] +#[stack_trace_debug] +pub enum Error { + #[snafu(display("IO error"))] + Io { + #[snafu(source)] + error: std::io::Error, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Failed to serde json"))] + SerdeJson { + #[snafu(source)] + error: serde_json::error::Error, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("External error"))] + External { + source: BoxedError, + #[snafu(implicit)] + location: Location, + }, +} + +impl ErrorExt for Error { + fn status_code(&self) -> StatusCode { + use Error::*; + + match self { + Io { .. } | Self::SerdeJson { .. } => StatusCode::Unexpected, + + External { source, .. } => source.status_code(), + } + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +pub type Result = std::result::Result; diff --git a/src/index/src/lib.rs b/src/index/src/lib.rs index 5e2e411668..e52a93138f 100644 --- a/src/index/src/lib.rs +++ b/src/index/src/lib.rs @@ -15,5 +15,6 @@ #![feature(iter_partition_in_place)] #![feature(assert_matches)] +pub mod bloom_filter; pub mod fulltext_index; pub mod inverted_index; From 421088a868821245119703614252dff1e9b33158 Mon Sep 17 00:00:00 2001 From: discord9 <55937128+discord9@users.noreply.github.com> Date: Tue, 17 Dec 2024 15:00:02 +0800 Subject: [PATCH 30/46] test: sqlness upgrade compatibility tests (#5126) * feat: simple version switch * chore: remove debug print * chore: add common folder * tests: add drop table * feat: pull versioned binary * chore: don't use native-tls * chore: rm outdated docs * chore: new line * fix: save old bin dir * fix: switch version restart all node * feat: use etcd * fix: wait for election * fix: normal sqlness * refactor: hashmap for bin dir * test: past 3 major version compat crate table * refactor: allow using without setup etcd --- Cargo.lock | 70 ++++- src/common/meta/src/kv_backend/etcd.rs | 2 + tests/conf/metasrv-test.toml.template | 10 + tests/runner/Cargo.toml | 12 +- tests/runner/src/env.rs | 190 ++++++++++-- tests/runner/src/main.rs | 30 ++ tests/runner/src/util.rs | 283 +++++++++++++++++- tests/upgrade-compat/distributed/common | 1 + .../common/table_engine_0_10_2.result | 137 +++++++++ .../standalone/common/table_engine_0_10_2.sql | 60 ++++ .../common/table_engine_v0_11_0.result | 137 +++++++++ .../common/table_engine_v0_11_0.sql | 60 ++++ .../common/table_engine_v0_9_5.result | 137 +++++++++ .../standalone/common/table_engine_v0_9_5.sql | 60 ++++ .../standalone/common/test_simple.result | 47 +++ .../standalone/common/test_simple.sql | 22 ++ .../standalone/common/test_ttl.result | 153 ++++++++++ .../standalone/common/test_ttl.sql | 42 +++ 18 files changed, 1420 insertions(+), 33 deletions(-) create mode 120000 tests/upgrade-compat/distributed/common create mode 100644 tests/upgrade-compat/standalone/common/table_engine_0_10_2.result create mode 100644 tests/upgrade-compat/standalone/common/table_engine_0_10_2.sql create mode 100644 tests/upgrade-compat/standalone/common/table_engine_v0_11_0.result create mode 100644 tests/upgrade-compat/standalone/common/table_engine_v0_11_0.sql create mode 100644 tests/upgrade-compat/standalone/common/table_engine_v0_9_5.result create mode 100644 tests/upgrade-compat/standalone/common/table_engine_v0_9_5.sql create mode 100644 tests/upgrade-compat/standalone/common/test_simple.result create mode 100644 tests/upgrade-compat/standalone/common/test_simple.sql create mode 100644 tests/upgrade-compat/standalone/common/test_ttl.result create mode 100644 tests/upgrade-compat/standalone/common/test_ttl.sql diff --git a/Cargo.lock b/Cargo.lock index b86134a3ed..ea2931f098 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6026,6 +6026,18 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "local-ip-address" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3669cf5561f8d27e8fc84cc15e58350e70f557d4d65f70e3154e54cd2f8e1782" +dependencies = [ + "libc", + "neli", + "thiserror 1.0.64", + "windows-sys 0.59.0", +] + [[package]] name = "lock_api" version = "0.4.12" @@ -6992,6 +7004,31 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27b02d87554356db9e9a873add8782d4ea6e3e58ea071a9adb9a2e8ddb884a8b" +[[package]] +name = "neli" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1100229e06604150b3becd61a4965d5c70f3be1759544ea7274166f4be41ef43" +dependencies = [ + "byteorder", + "libc", + "log", + "neli-proc-macros", +] + +[[package]] +name = "neli-proc-macros" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c168194d373b1e134786274020dae7fc5513d565ea2ebb9bc9ff17ffb69106d4" +dependencies = [ + "either", + "proc-macro2", + "quote", + "serde", + "syn 1.0.109", +] + [[package]] name = "new_debug_unreachable" version = "1.0.6" @@ -9380,9 +9417,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.8" +version = "0.12.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f713147fbe92361e52392c73b8c9e48c04c6625bce969ef54dc901e58e042a7b" +checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" dependencies = [ "base64 0.22.1", "bytes", @@ -11280,14 +11317,21 @@ dependencies = [ "common-recordbatch", "common-time", "datatypes", + "flate2", + "hex", + "local-ip-address", "mysql", + "reqwest", "serde", "serde_json", + "sha2", "sqlness", + "tar", "tempfile", "tinytemplate", "tokio", "tokio-postgres", + "tokio-stream", ] [[package]] @@ -12043,6 +12087,17 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "tar" +version = "0.4.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c65998313f8e17d0d553d28f91a0df93e4dbbbf770279c7bc21ca0f09ea1a1f6" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "target-lexicon" version = "0.12.16" @@ -14168,6 +14223,17 @@ dependencies = [ "zeroize", ] +[[package]] +name = "xattr" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f" +dependencies = [ + "libc", + "linux-raw-sys", + "rustix", +] + [[package]] name = "xml-rs" version = "0.8.22" diff --git a/src/common/meta/src/kv_backend/etcd.rs b/src/common/meta/src/kv_backend/etcd.rs index 1cdd45bc5c..a787940b6d 100644 --- a/src/common/meta/src/kv_backend/etcd.rs +++ b/src/common/meta/src/kv_backend/etcd.rs @@ -15,6 +15,7 @@ use std::any::Any; use std::sync::Arc; +use common_telemetry::info; use etcd_client::{ Client, DeleteOptions, GetOptions, PutOptions, Txn, TxnOp, TxnOpResponse, TxnResponse, }; @@ -55,6 +56,7 @@ impl EtcdStore { } pub fn with_etcd_client(client: Client, max_txn_ops: usize) -> KvBackendRef { + info!("Connected to etcd"); Arc::new(Self { client, max_txn_ops, diff --git a/tests/conf/metasrv-test.toml.template b/tests/conf/metasrv-test.toml.template index 8d27aad3c4..1196403a26 100644 --- a/tests/conf/metasrv-test.toml.template +++ b/tests/conf/metasrv-test.toml.template @@ -1,4 +1,14 @@ flush_stats_factor = 1 +{{ if use_etcd }} +## Store server address default to etcd store. +store_addrs = [{store_addrs | unescaped}] + +## Store data in memory. +use_memory_store = false + +## The datastore for meta server. +backend = "EtcdStore" +{{ endif }} [wal] {{ if is_raft_engine }} provider = "raft_engine" diff --git a/tests/runner/Cargo.toml b/tests/runner/Cargo.toml index 71312c39de..3ea403e862 100644 --- a/tests/runner/Cargo.toml +++ b/tests/runner/Cargo.toml @@ -16,12 +16,18 @@ common-query.workspace = true common-recordbatch.workspace = true common-time.workspace = true datatypes = { workspace = true } +flate2 = "1.0" +hex = "0.4" +local-ip-address = "0.6" mysql = { version = "25.0.1", default-features = false, features = ["minimal", "rustls-tls"] } +reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] } serde.workspace = true serde_json.workspace = true -tokio-postgres = { workspace = true } -# sqlness 0.6.0 have a bug causing `cargo sqlness` to fail(see https://github.com/CeresDB/sqlness/issues/68) which is fixed in 0.6.1 -sqlness = "0.6.1" +sha2 = "0.10" +sqlness = "0.6.1" # sqlness 0.6.0 have a bug causing `cargo sqlness` to fail(see https://github.com/CeresDB/sqlness/issues/68) which is fixed in 0.6.1 +tar = "0.4" tempfile.workspace = true tinytemplate = "1.2" tokio.workspace = true +tokio-postgres = { workspace = true } +tokio-stream.workspace = true diff --git a/tests/runner/src/env.rs b/tests/runner/src/env.rs index bb5d74a267..81bbe2fb0b 100644 --- a/tests/runner/src/env.rs +++ b/tests/runner/src/env.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::borrow::Cow; +use std::collections::HashMap; use std::fmt::Display; use std::fs::OpenOptions; use std::io; @@ -45,6 +46,7 @@ use tokio::sync::Mutex as TokioMutex; use tokio_postgres::{Client as PgClient, SimpleQueryMessage as PgRow}; use crate::protocol_interceptor::{MYSQL, PROTOCOL_KEY}; +use crate::util::{get_workspace_root, maybe_pull_binary, PROGRAM}; use crate::{util, ServerAddr}; const METASRV_ADDR: &str = "127.0.0.1:29302"; @@ -64,6 +66,12 @@ pub enum WalConfig { }, } +#[derive(Clone)] +pub struct StoreConfig { + pub store_addrs: Vec, + pub setup_etcd: bool, +} + #[derive(Clone)] pub struct Env { sqlness_home: PathBuf, @@ -74,6 +82,12 @@ pub struct Env { /// When running in CI, this is expected to be set. /// If not set, this runner will build the GreptimeDB binary itself when needed, and set this field by then. bins_dir: Arc>>, + /// The path to the directory that contains the old pre-built GreptimeDB binaries. + versioned_bins_dirs: Arc>>, + /// Pull different versions of GreptimeDB on need. + pull_version_on_need: bool, + /// Store address for metasrv metadata + store_config: StoreConfig, } #[async_trait] @@ -100,13 +114,21 @@ impl Env { data_home: PathBuf, server_addrs: ServerAddr, wal: WalConfig, + pull_version_on_need: bool, bins_dir: Option, + store_config: StoreConfig, ) -> Self { Self { sqlness_home: data_home, server_addrs, wal, - bins_dir: Arc::new(Mutex::new(bins_dir)), + pull_version_on_need, + bins_dir: Arc::new(Mutex::new(bins_dir.clone())), + versioned_bins_dirs: Arc::new(Mutex::new(HashMap::from_iter([( + "latest".to_string(), + bins_dir.clone().unwrap_or(util::get_binary_dir("debug")), + )]))), + store_config, } } @@ -117,7 +139,7 @@ impl Env { self.build_db(); self.setup_wal(); - let db_ctx = GreptimeDBContext::new(self.wal.clone()); + let db_ctx = GreptimeDBContext::new(self.wal.clone(), self.store_config.clone()); let server_process = self.start_server("standalone", &db_ctx, true).await; @@ -136,8 +158,9 @@ impl Env { } else { self.build_db(); self.setup_wal(); + self.setup_etcd(); - let db_ctx = GreptimeDBContext::new(self.wal.clone()); + let db_ctx = GreptimeDBContext::new(self.wal.clone(), self.store_config.clone()); // start a distributed GreptimeDB let meta_server = self.start_server("metasrv", &db_ctx, true).await; @@ -152,12 +175,12 @@ impl Env { let mut greptimedb = self.connect_db(&Default::default()).await; - greptimedb.metasrv_process = Some(meta_server); + greptimedb.metasrv_process = Some(meta_server).into(); greptimedb.server_processes = Some(Arc::new(Mutex::new(vec![ datanode_1, datanode_2, datanode_3, ]))); - greptimedb.frontend_process = Some(frontend); - greptimedb.flownode_process = Some(flownode); + greptimedb.frontend_process = Some(frontend).into(); + greptimedb.flownode_process = Some(flownode).into(); greptimedb.is_standalone = false; greptimedb.ctx = db_ctx; @@ -237,13 +260,14 @@ impl Env { pg_client: TokioMutex::new(pg_client), mysql_client: TokioMutex::new(mysql_client), server_processes: None, - metasrv_process: None, - frontend_process: None, - flownode_process: None, + metasrv_process: None.into(), + frontend_process: None.into(), + flownode_process: None.into(), ctx: GreptimeDBContext { time: 0, datanode_id: Default::default(), wal: self.wal.clone(), + store_config: self.store_config.clone(), }, is_standalone: false, env: self.clone(), @@ -341,7 +365,7 @@ impl Env { ) } "metasrv" => { - let args = vec![ + let mut args = vec![ DEFAULT_LOG_LEVEL.to_string(), subcommand.to_string(), "start".to_string(), @@ -349,8 +373,6 @@ impl Env { "127.0.0.1:29302".to_string(), "--server-addr".to_string(), "127.0.0.1:29302".to_string(), - "--backend".to_string(), - "memory-store".to_string(), "--enable-region-failover".to_string(), "false".to_string(), "--http-addr=127.0.0.1:29502".to_string(), @@ -361,6 +383,9 @@ impl Env { "-c".to_string(), self.generate_config_file(subcommand, db_ctx), ]; + if db_ctx.store_config().store_addrs.is_empty() { + args.extend(vec!["--backend".to_string(), "memory-store".to_string()]) + } (args, vec![METASRV_ADDR.to_string()]) } _ => panic!("Unexpected subcommand: {subcommand}"), @@ -375,23 +400,20 @@ impl Env { } } - #[cfg(not(windows))] - let program = "./greptime"; - #[cfg(windows)] - let program = "greptime.exe"; + let program = PROGRAM; let bins_dir = self.bins_dir.lock().unwrap().clone().expect( "GreptimeDB binary is not available. Please pass in the path to the directory that contains the pre-built GreptimeDB binary. Or you may call `self.build_db()` beforehand.", ); let mut process = Command::new(program) - .current_dir(bins_dir) + .current_dir(bins_dir.clone()) .env("TZ", "UTC") .args(args) .stdout(stdout_file) .spawn() .unwrap_or_else(|error| { - panic!("Failed to start the DB with subcommand {subcommand},Error: {error}") + panic!("Failed to start the DB with subcommand {subcommand},Error: {error}, path: {:?}", bins_dir.join(program)); }); for check_ip_addr in &check_ip_addrs { @@ -452,7 +474,7 @@ impl Env { } /// stop and restart the server process - async fn restart_server(&self, db: &GreptimeDB) { + async fn restart_server(&self, db: &GreptimeDB, is_full_restart: bool) { { if let Some(server_process) = db.server_processes.clone() { let mut server_processes = server_process.lock().unwrap(); @@ -460,6 +482,23 @@ impl Env { Env::stop_server(server_process); } } + if is_full_restart { + if let Some(mut metasrv_process) = + db.metasrv_process.lock().expect("poisoned lock").take() + { + Env::stop_server(&mut metasrv_process); + } + if let Some(mut frontend_process) = + db.frontend_process.lock().expect("poisoned lock").take() + { + Env::stop_server(&mut frontend_process); + } + if let Some(mut flownode_process) = + db.flownode_process.lock().expect("poisoned lock").take() + { + Env::stop_server(&mut flownode_process); + } + } } // check if the server is distributed or standalone @@ -468,12 +507,37 @@ impl Env { vec![new_server_process] } else { db.ctx.reset_datanode_id(); + if is_full_restart { + let metasrv = self.start_server("metasrv", &db.ctx, false).await; + db.metasrv_process + .lock() + .expect("lock poisoned") + .replace(metasrv); + + // wait for metasrv to start + // since it seems older version of db might take longer to complete election + tokio::time::sleep(Duration::from_secs(5)).await; + } let mut processes = vec![]; for _ in 0..3 { let new_server_process = self.start_server("datanode", &db.ctx, false).await; processes.push(new_server_process); } + + if is_full_restart { + let frontend = self.start_server("frontend", &db.ctx, false).await; + db.frontend_process + .lock() + .expect("lock poisoned") + .replace(frontend); + + let flownode = self.start_server("flownode", &db.ctx, false).await; + db.flownode_process + .lock() + .expect("lock poisoned") + .replace(flownode); + } processes }; @@ -493,6 +557,19 @@ impl Env { } } + /// Setup etcd if needed. + fn setup_etcd(&self) { + if self.store_config.setup_etcd { + let client_ports = self + .store_config + .store_addrs + .iter() + .map(|s| s.split(':').nth(1).unwrap().parse::().unwrap()) + .collect::>(); + util::setup_etcd(client_ports, None, None); + } + } + /// Generate config file to `/tmp/{subcommand}-{current_time}.toml` fn generate_config_file(&self, subcommand: &str, db_ctx: &GreptimeDBContext) -> String { let mut tt = TinyTemplate::new(); @@ -509,6 +586,8 @@ impl Env { procedure_dir: String, is_raft_engine: bool, kafka_wal_broker_endpoints: String, + use_etcd: bool, + store_addrs: String, } let data_home = self.sqlness_home.join(format!("greptimedb-{subcommand}")); @@ -522,6 +601,15 @@ impl Env { procedure_dir, is_raft_engine: db_ctx.is_raft_engine(), kafka_wal_broker_endpoints: db_ctx.kafka_wal_broker_endpoints(), + use_etcd: !self.store_config.store_addrs.is_empty(), + store_addrs: self + .store_config + .store_addrs + .clone() + .iter() + .map(|p| format!("\"{p}\"")) + .collect::>() + .join(","), }; let rendered = tt.render(subcommand, &ctx).unwrap(); @@ -580,9 +668,9 @@ impl Env { pub struct GreptimeDB { server_processes: Option>>>, - metasrv_process: Option, - frontend_process: Option, - flownode_process: Option, + metasrv_process: Mutex>, + frontend_process: Mutex>, + flownode_process: Mutex>, grpc_client: TokioMutex, pg_client: TokioMutex, mysql_client: TokioMutex, @@ -693,8 +781,35 @@ impl GreptimeDB { impl Database for GreptimeDB { async fn query(&self, ctx: QueryContext, query: String) -> Box { if ctx.context.contains_key("restart") && self.env.server_addrs.server_addr.is_none() { - self.env.restart_server(self).await; + self.env.restart_server(self, false).await; + } else if let Some(version) = ctx.context.get("version") { + let version_bin_dir = self + .env + .versioned_bins_dirs + .lock() + .expect("lock poison") + .get(version.as_str()) + .cloned(); + + match version_bin_dir { + Some(path) if path.clone().join(PROGRAM).is_file() => { + // use version in versioned_bins_dirs + *self.env.bins_dir.lock().unwrap() = Some(path.clone()); + } + _ => { + // use version in dir files + maybe_pull_binary(version, self.env.pull_version_on_need).await; + let root = get_workspace_root(); + let new_path = PathBuf::from_iter([&root, version]); + *self.env.bins_dir.lock().unwrap() = Some(new_path); + } + } + + self.env.restart_server(self, true).await; + // sleep for a while to wait for the server to fully boot up + tokio::time::sleep(Duration::from_secs(5)).await; } + if let Some(protocol) = ctx.context.get(PROTOCOL_KEY) { // protocol is bound to be either "mysql" or "postgres" if protocol == MYSQL { @@ -720,15 +835,30 @@ impl GreptimeDB { ); } } - if let Some(mut metasrv) = self.metasrv_process.take() { + if let Some(mut metasrv) = self + .metasrv_process + .lock() + .expect("someone else panic when holding lock") + .take() + { Env::stop_server(&mut metasrv); println!("Metasrv (pid = {}) is stopped", metasrv.id()); } - if let Some(mut frontend) = self.frontend_process.take() { + if let Some(mut frontend) = self + .frontend_process + .lock() + .expect("someone else panic when holding lock") + .take() + { Env::stop_server(&mut frontend); println!("Frontend (pid = {}) is stopped", frontend.id()); } - if let Some(mut flownode) = self.flownode_process.take() { + if let Some(mut flownode) = self + .flownode_process + .lock() + .expect("someone else panic when holding lock") + .take() + { Env::stop_server(&mut flownode); println!("Flownode (pid = {}) is stopped", flownode.id()); } @@ -752,14 +882,16 @@ struct GreptimeDBContext { time: i64, datanode_id: AtomicU32, wal: WalConfig, + store_config: StoreConfig, } impl GreptimeDBContext { - pub fn new(wal: WalConfig) -> Self { + pub fn new(wal: WalConfig, store_config: StoreConfig) -> Self { Self { time: common_time::util::current_time_millis(), datanode_id: AtomicU32::new(0), wal, + store_config, } } @@ -787,6 +919,10 @@ impl GreptimeDBContext { fn reset_datanode_id(&self) { self.datanode_id.store(0, Ordering::Relaxed); } + + fn store_config(&self) -> StoreConfig { + self.store_config.clone() + } } struct ResultDisplayer { diff --git a/tests/runner/src/main.rs b/tests/runner/src/main.rs index eca72f280e..2e3158e195 100644 --- a/tests/runner/src/main.rs +++ b/tests/runner/src/main.rs @@ -22,6 +22,8 @@ use env::{Env, WalConfig}; use sqlness::interceptor::Registry; use sqlness::{ConfigBuilder, Runner}; +use crate::env::StoreConfig; + mod env; mod protocol_interceptor; mod util; @@ -92,6 +94,18 @@ struct Args { /// This may affect future test runs. #[clap(long)] preserve_state: bool, + + /// Pull Different versions of GreptimeDB on need. + #[clap(long, default_value = "true")] + pull_version_on_need: bool, + + /// The store addresses for metadata, if empty, will use memory store. + #[clap(long)] + store_addrs: Vec, + + /// Whether to setup etcd, by default it is false. + #[clap(long, default_value = "false")] + setup_etcd: bool, } #[tokio::main] @@ -110,6 +124,11 @@ async fn main() { Arc::new(protocol_interceptor::ProtocolInterceptorFactory), ); + if let Some(d) = &args.case_dir { + if !d.is_dir() { + panic!("{} is not a directory", d.display()); + } + } let config = ConfigBuilder::default() .case_dir(util::get_case_dir(args.case_dir)) .fail_fast(args.fail_fast) @@ -132,19 +151,30 @@ async fn main() { }, }; + let store = StoreConfig { + store_addrs: args.store_addrs.clone(), + setup_etcd: args.setup_etcd, + }; + let runner = Runner::new( config, Env::new( sqlness_home.clone(), args.server_addr.clone(), wal, + args.pull_version_on_need, args.bins_dir, + store, ), ); runner.run().await.unwrap(); // clean up and exit if !args.preserve_state { + if args.setup_etcd { + println!("Stopping etcd"); + util::stop_rm_etcd(); + } println!("Removing state in {:?}", sqlness_home); tokio::fs::remove_dir_all(sqlness_home).await.unwrap(); } diff --git a/tests/runner/src/util.rs b/tests/runner/src/util.rs index 04c336e148..4bcd482a26 100644 --- a/tests/runner/src/util.rs +++ b/tests/runner/src/util.rs @@ -12,18 +12,299 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::io::Read; use std::net::SocketAddr; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::process::Command; use std::time::Duration; +use sha2::{Digest, Sha256}; use tokio::io::AsyncWriteExt; use tokio::net::TcpSocket; use tokio::time; +use tokio_stream::StreamExt; /// Check port every 0.1 second. const PORT_CHECK_INTERVAL: Duration = Duration::from_millis(100); +#[cfg(not(windows))] +pub const PROGRAM: &str = "./greptime"; +#[cfg(windows)] +pub const PROGRAM: &str = "greptime.exe"; + +fn http_proxy() -> Option { + for proxy in ["http_proxy", "HTTP_PROXY", "all_proxy", "ALL_PROXY"] { + if let Ok(proxy_addr) = std::env::var(proxy) { + println!("Getting Proxy from env var: {}={}", proxy, proxy_addr); + return Some(proxy_addr); + } + } + None +} + +fn https_proxy() -> Option { + for proxy in ["https_proxy", "HTTPS_PROXY", "all_proxy", "ALL_PROXY"] { + if let Ok(proxy_addr) = std::env::var(proxy) { + println!("Getting Proxy from env var: {}={}", proxy, proxy_addr); + return Some(proxy_addr); + } + } + None +} + +async fn download_files(url: &str, path: &str) { + let proxy = if url.starts_with("http://") { + http_proxy().map(|proxy| reqwest::Proxy::http(proxy).unwrap()) + } else if url.starts_with("https://") { + https_proxy().map(|proxy| reqwest::Proxy::https(proxy).unwrap()) + } else { + None + }; + + let client = proxy + .map(|proxy| { + reqwest::Client::builder() + .proxy(proxy) + .build() + .expect("Failed to build client") + }) + .unwrap_or(reqwest::Client::new()); + + let mut file = tokio::fs::File::create(path) + .await + .unwrap_or_else(|_| panic!("Failed to create file in {path}")); + println!("Downloading {}...", url); + + let resp = client + .get(url) + .send() + .await + .expect("Failed to send download request"); + let len = resp.content_length(); + let mut stream = resp.bytes_stream(); + let mut size_downloaded = 0; + + while let Some(chunk_result) = stream.next().await { + let chunk = chunk_result.unwrap(); + size_downloaded += chunk.len(); + if let Some(len) = len { + print!("\rDownloading {}/{} bytes", size_downloaded, len); + } else { + print!("\rDownloaded {} bytes", size_downloaded); + } + + file.write_all(&chunk).await.unwrap(); + } + + file.flush().await.unwrap(); + + println!("\nDownloaded {}", url); +} + +fn decompress(archive: &str, dest: &str) { + let tar = std::fs::File::open(archive).unwrap(); + let dec = flate2::read::GzDecoder::new(tar); + let mut a = tar::Archive::new(dec); + a.unpack(dest).unwrap(); +} + +/// Use curl to download the binary from the release page. +/// +/// # Arguments +/// +/// * `version` - The version of the binary to download. i.e. "v0.9.5" +pub async fn pull_binary(version: &str) { + let os = std::env::consts::OS; + let arch = match std::env::consts::ARCH { + "x86_64" => "amd64", + "aarch64" => "arm64", + _ => panic!("Unsupported arch: {}", std::env::consts::ARCH), + }; + let triple = format!("greptime-{}-{}-{}", os, arch, version); + let filename = format!("{triple}.tar.gz"); + + let url = format!( + "https://github.com/GreptimeTeam/greptimedb/releases/download/{version}/{filename}" + ); + println!("Downloading {version} binary from {}", url); + + // mkdir {version} + let _ = std::fs::create_dir(version); + + let archive = Path::new(version).join(filename); + let folder_path = Path::new(version); + + // download the binary to the version directory + download_files(&url, &archive.to_string_lossy()).await; + + let checksum_file = format!("{triple}.sha256sum"); + let checksum_url = format!( + "https://github.com/GreptimeTeam/greptimedb/releases/download/{version}/{checksum_file}" + ); + download_files( + &checksum_url, + &PathBuf::from_iter([version, &checksum_file]).to_string_lossy(), + ) + .await; + + // verify the checksum + let mut file = std::fs::File::open(&archive).unwrap(); + let mut sha256 = Sha256::new(); + std::io::copy(&mut file, &mut sha256).unwrap(); + let checksum: Vec = sha256.finalize().to_vec(); + + let mut expected_checksum = + std::fs::File::open(PathBuf::from_iter([version, &checksum_file])).unwrap(); + let mut buf = String::new(); + expected_checksum.read_to_string(&mut buf).unwrap(); + let expected_checksum = hex::decode(buf.lines().next().unwrap()).unwrap(); + + assert_eq!( + checksum, expected_checksum, + "Checksum mismatched, downloaded file is corrupted" + ); + + decompress(&archive.to_string_lossy(), &folder_path.to_string_lossy()); + println!("Downloaded and extracted {version} binary to {folder_path:?}"); + + // move the binary to the version directory + std::fs::rename( + PathBuf::from_iter([version, &triple, "greptime"]), + PathBuf::from_iter([version, "greptime"]), + ) + .unwrap(); + + // remove the archive and inner folder + std::fs::remove_file(&archive).unwrap(); + std::fs::remove_dir(PathBuf::from_iter([version, &triple])).unwrap(); +} + +/// Pull the binary if it does not exist and `pull_version_on_need` is true. +pub async fn maybe_pull_binary(version: &str, pull_version_on_need: bool) { + let exist = Path::new(version).join(PROGRAM).is_file(); + match (exist, pull_version_on_need){ + (true, _) => println!("Binary {version} exists"), + (false, false) => panic!("Binary {version} does not exist, please run with --pull-version-on-need or manually download it"), + (false, true) => { pull_binary(version).await; }, + } +} + +/// Set up a standalone etcd in docker. +pub fn setup_etcd(client_ports: Vec, peer_port: Option, etcd_version: Option<&str>) { + if std::process::Command::new("docker") + .args(["-v"]) + .status() + .is_err() + { + panic!("Docker is not installed"); + } + let peer_port = peer_port.unwrap_or(2380); + let exposed_port: Vec<_> = client_ports.iter().chain(Some(&peer_port)).collect(); + let exposed_port_str = exposed_port + .iter() + .flat_map(|p| ["-p".to_string(), format!("{p}:{p}")]) + .collect::>(); + let etcd_version = etcd_version.unwrap_or("v3.5.17"); + let etcd_image = format!("quay.io/coreos/etcd:{etcd_version}"); + let peer_url = format!("http://0.0.0.0:{peer_port}"); + let my_local_ip = local_ip_address::local_ip().unwrap(); + + let my_local_ip_str = my_local_ip.to_string(); + + let mut arg_list = vec![]; + arg_list.extend([ + "run", + "-d", + "-v", + "/usr/share/ca-certificates/:/etc/ssl/certs", + ]); + arg_list.extend(exposed_port_str.iter().map(std::ops::Deref::deref)); + arg_list.extend([ + "--name", + "etcd", + &etcd_image, + "etcd", + "-name", + "etcd0", + "-advertise-client-urls", + ]); + + let adv_client_urls = client_ports + .iter() + .map(|p| format!("http://{my_local_ip_str}:{p}")) + .collect::>() + .join(","); + + arg_list.push(&adv_client_urls); + + arg_list.extend(["-listen-client-urls"]); + + let client_ports_fmt = client_ports + .iter() + .map(|p| format!("http://0.0.0.0:{p}")) + .collect::>() + .join(","); + + arg_list.push(&client_ports_fmt); + + arg_list.push("-initial-advertise-peer-urls"); + let advertise_peer_url = format!("http://{my_local_ip_str}:{peer_port}"); + arg_list.push(&advertise_peer_url); + + arg_list.extend(["-listen-peer-urls", &peer_url]); + + arg_list.extend(["-initial-cluster-token", "etcd-cluster-1"]); + + arg_list.push("-initial-cluster"); + + let init_cluster_url = format!("etcd0=http://{my_local_ip_str}:{peer_port}"); + + arg_list.push(&init_cluster_url); + + arg_list.extend(["-initial-cluster-state", "new"]); + + let mut cmd = std::process::Command::new("docker"); + + cmd.args(arg_list); + + println!("Starting etcd with command: {:?}", cmd); + + let status = cmd.status(); + if status.is_err() { + panic!("Failed to start etcd: {:?}", status); + } else if let Ok(status) = status { + if status.success() { + println!( + "Started etcd with client ports {:?} and peer port {}, statues:{status:?}", + client_ports, peer_port + ); + } else { + panic!("Failed to start etcd: {:?}", status); + } + } +} + +/// Stop and remove the etcd container +pub fn stop_rm_etcd() { + let status = std::process::Command::new("docker") + .args(["container", "stop", "etcd"]) + .status(); + if status.is_err() { + panic!("Failed to stop etcd: {:?}", status); + } else { + println!("Stopped etcd"); + } + // rm the container + let status = std::process::Command::new("docker") + .args(["container", "rm", "etcd"]) + .status(); + if status.is_err() { + panic!("Failed to remove etcd container: {:?}", status); + } else { + println!("Removed etcd container"); + } +} + /// Get the dir of test cases. This function only works when the runner is run /// under the project's dir because it depends on some envs set by cargo. pub fn get_case_dir(case_dir: Option) -> String { diff --git a/tests/upgrade-compat/distributed/common b/tests/upgrade-compat/distributed/common new file mode 120000 index 0000000000..2b0920287d --- /dev/null +++ b/tests/upgrade-compat/distributed/common @@ -0,0 +1 @@ +../standalone/common \ No newline at end of file diff --git a/tests/upgrade-compat/standalone/common/table_engine_0_10_2.result b/tests/upgrade-compat/standalone/common/table_engine_0_10_2.result new file mode 100644 index 0000000000..046255a641 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/table_engine_0_10_2.result @@ -0,0 +1,137 @@ +-- SQLNESS ARG version=v0.10.2 +CREATE TABLE mito_system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +)ENGINE=mito; + +Affected Rows: 0 + +INSERT INTO mito_system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +Affected Rows: 3 + +CREATE TABLE phy (ts timestamp time index, cpu_util double) engine=metric with ("physical_metric_table" = ""); + +Affected Rows: 0 + +CREATE TABLE system_metrics ( + host STRING, + cpu_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host), + TIME INDEX(ts) +)ENGINE=metric with ("on_physical_table" = "phy"); + +Affected Rows: 0 + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host1', 11.8, 1667446797450), + ('host2', 80.0, 1667446797450), + ('host1', 50.0, 1667446797450); + +Affected Rows: 3 + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE mito_system_metrics; + ++---------------------+-----------------------------------------------------------+ +| Table | Create Table | ++---------------------+-----------------------------------------------------------+ +| mito_system_metrics | CREATE TABLE IF NOT EXISTS "mito_system_metrics" ( | +| | "host" STRING NULL, | +| | "idc" STRING NULL, | +| | "cpu_util" DOUBLE NULL, | +| | "memory_util" DOUBLE NULL, | +| | "disk_util" DOUBLE NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host", "idc") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++---------------------+-----------------------------------------------------------+ + +SHOW CREATE TABLE system_metrics; + ++----------------+-----------------------------------------------------------+ +| Table | Create Table | ++----------------+-----------------------------------------------------------+ +| system_metrics | CREATE TABLE IF NOT EXISTS "system_metrics" ( | +| | "cpu_util" DOUBLE NULL, | +| | "host" STRING NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host") | +| | ) | +| | | +| | ENGINE=metric | +| | WITH( | +| | on_physical_table = 'phy' | +| | ) | ++----------------+-----------------------------------------------------------+ + +INSERT INTO mito_system_metrics +VALUES + ("host3", "idc_a", 90.0, 70.3, 90.0, 1667446797450), + ("host4", "idc_a", 70.0, 70.3, 90.0, 1667446797450), + ("host5", "idc_a", 60.0, 70.3, 90.0, 1667446797450); + +Affected Rows: 3 + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host3', 90.0, 1667446797450), + ('host4', 70.0, 1667446797450), + ('host5', 60.0, 1667446797450); + +Affected Rows: 3 + +SELECT * FROM mito_system_metrics; + ++-------+-------+----------+-------------+-----------+-------------------------+ +| host | idc | cpu_util | memory_util | disk_util | ts | ++-------+-------+----------+-------------+-----------+-------------------------+ +| host1 | idc_a | 11.8 | 10.3 | 10.3 | 2022-11-03T03:39:57.450 | +| host1 | idc_b | 50.0 | 66.7 | 40.6 | 2022-11-03T03:39:57.450 | +| host2 | idc_a | 80.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host3 | idc_a | 90.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host4 | idc_a | 70.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host5 | idc_a | 60.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | ++-------+-------+----------+-------------+-----------+-------------------------+ + +SELECT * FROM system_metrics; + ++----------+-------+-------------------------+ +| cpu_util | host | ts | ++----------+-------+-------------------------+ +| 80.0 | host2 | 2022-11-03T03:39:57.450 | +| 70.0 | host4 | 2022-11-03T03:39:57.450 | +| 60.0 | host5 | 2022-11-03T03:39:57.450 | +| 90.0 | host3 | 2022-11-03T03:39:57.450 | +| 50.0 | host1 | 2022-11-03T03:39:57.450 | ++----------+-------+-------------------------+ + +DROP TABLE mito_system_metrics; + +Affected Rows: 0 + +DROP TABLE system_metrics; + +Affected Rows: 0 + +DROP TABLE phy; + +Affected Rows: 0 + diff --git a/tests/upgrade-compat/standalone/common/table_engine_0_10_2.sql b/tests/upgrade-compat/standalone/common/table_engine_0_10_2.sql new file mode 100644 index 0000000000..1907533b15 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/table_engine_0_10_2.sql @@ -0,0 +1,60 @@ +-- SQLNESS ARG version=v0.10.2 +CREATE TABLE mito_system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +)ENGINE=mito; + +INSERT INTO mito_system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +CREATE TABLE phy (ts timestamp time index, cpu_util double) engine=metric with ("physical_metric_table" = ""); + +CREATE TABLE system_metrics ( + host STRING, + cpu_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host), + TIME INDEX(ts) +)ENGINE=metric with ("on_physical_table" = "phy"); + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host1', 11.8, 1667446797450), + ('host2', 80.0, 1667446797450), + ('host1', 50.0, 1667446797450); + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE mito_system_metrics; + +SHOW CREATE TABLE system_metrics; + +INSERT INTO mito_system_metrics +VALUES + ("host3", "idc_a", 90.0, 70.3, 90.0, 1667446797450), + ("host4", "idc_a", 70.0, 70.3, 90.0, 1667446797450), + ("host5", "idc_a", 60.0, 70.3, 90.0, 1667446797450); + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host3', 90.0, 1667446797450), + ('host4', 70.0, 1667446797450), + ('host5', 60.0, 1667446797450); + +SELECT * FROM mito_system_metrics; + +SELECT * FROM system_metrics; + +DROP TABLE mito_system_metrics; + +DROP TABLE system_metrics; + +DROP TABLE phy; diff --git a/tests/upgrade-compat/standalone/common/table_engine_v0_11_0.result b/tests/upgrade-compat/standalone/common/table_engine_v0_11_0.result new file mode 100644 index 0000000000..7ce230a688 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/table_engine_v0_11_0.result @@ -0,0 +1,137 @@ +-- SQLNESS ARG version=v0.11.0 +CREATE TABLE mito_system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +)ENGINE=mito; + +Affected Rows: 0 + +INSERT INTO mito_system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +Affected Rows: 3 + +CREATE TABLE phy (ts timestamp time index, cpu_util double) engine=metric with ("physical_metric_table" = ""); + +Affected Rows: 0 + +CREATE TABLE system_metrics ( + host STRING, + cpu_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host), + TIME INDEX(ts) +)ENGINE=metric with ("on_physical_table" = "phy"); + +Affected Rows: 0 + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host1', 11.8, 1667446797450), + ('host2', 80.0, 1667446797450), + ('host1', 50.0, 1667446797450); + +Affected Rows: 3 + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE mito_system_metrics; + ++---------------------+-----------------------------------------------------------+ +| Table | Create Table | ++---------------------+-----------------------------------------------------------+ +| mito_system_metrics | CREATE TABLE IF NOT EXISTS "mito_system_metrics" ( | +| | "host" STRING NULL, | +| | "idc" STRING NULL, | +| | "cpu_util" DOUBLE NULL, | +| | "memory_util" DOUBLE NULL, | +| | "disk_util" DOUBLE NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host", "idc") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++---------------------+-----------------------------------------------------------+ + +SHOW CREATE TABLE system_metrics; + ++----------------+-----------------------------------------------------------+ +| Table | Create Table | ++----------------+-----------------------------------------------------------+ +| system_metrics | CREATE TABLE IF NOT EXISTS "system_metrics" ( | +| | "cpu_util" DOUBLE NULL, | +| | "host" STRING NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host") | +| | ) | +| | | +| | ENGINE=metric | +| | WITH( | +| | on_physical_table = 'phy' | +| | ) | ++----------------+-----------------------------------------------------------+ + +INSERT INTO mito_system_metrics +VALUES + ("host3", "idc_a", 90.0, 70.3, 90.0, 1667446797450), + ("host4", "idc_a", 70.0, 70.3, 90.0, 1667446797450), + ("host5", "idc_a", 60.0, 70.3, 90.0, 1667446797450); + +Affected Rows: 3 + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host3', 90.0, 1667446797450), + ('host4', 70.0, 1667446797450), + ('host5', 60.0, 1667446797450); + +Affected Rows: 3 + +SELECT * FROM mito_system_metrics; + ++-------+-------+----------+-------------+-----------+-------------------------+ +| host | idc | cpu_util | memory_util | disk_util | ts | ++-------+-------+----------+-------------+-----------+-------------------------+ +| host1 | idc_a | 11.8 | 10.3 | 10.3 | 2022-11-03T03:39:57.450 | +| host1 | idc_b | 50.0 | 66.7 | 40.6 | 2022-11-03T03:39:57.450 | +| host2 | idc_a | 80.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host3 | idc_a | 90.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host4 | idc_a | 70.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host5 | idc_a | 60.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | ++-------+-------+----------+-------------+-----------+-------------------------+ + +SELECT * FROM system_metrics; + ++----------+-------+-------------------------+ +| cpu_util | host | ts | ++----------+-------+-------------------------+ +| 80.0 | host2 | 2022-11-03T03:39:57.450 | +| 70.0 | host4 | 2022-11-03T03:39:57.450 | +| 60.0 | host5 | 2022-11-03T03:39:57.450 | +| 90.0 | host3 | 2022-11-03T03:39:57.450 | +| 50.0 | host1 | 2022-11-03T03:39:57.450 | ++----------+-------+-------------------------+ + +DROP TABLE mito_system_metrics; + +Affected Rows: 0 + +DROP TABLE system_metrics; + +Affected Rows: 0 + +DROP TABLE phy; + +Affected Rows: 0 + diff --git a/tests/upgrade-compat/standalone/common/table_engine_v0_11_0.sql b/tests/upgrade-compat/standalone/common/table_engine_v0_11_0.sql new file mode 100644 index 0000000000..963170fdf5 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/table_engine_v0_11_0.sql @@ -0,0 +1,60 @@ +-- SQLNESS ARG version=v0.11.0 +CREATE TABLE mito_system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +)ENGINE=mito; + +INSERT INTO mito_system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +CREATE TABLE phy (ts timestamp time index, cpu_util double) engine=metric with ("physical_metric_table" = ""); + +CREATE TABLE system_metrics ( + host STRING, + cpu_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host), + TIME INDEX(ts) +)ENGINE=metric with ("on_physical_table" = "phy"); + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host1', 11.8, 1667446797450), + ('host2', 80.0, 1667446797450), + ('host1', 50.0, 1667446797450); + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE mito_system_metrics; + +SHOW CREATE TABLE system_metrics; + +INSERT INTO mito_system_metrics +VALUES + ("host3", "idc_a", 90.0, 70.3, 90.0, 1667446797450), + ("host4", "idc_a", 70.0, 70.3, 90.0, 1667446797450), + ("host5", "idc_a", 60.0, 70.3, 90.0, 1667446797450); + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host3', 90.0, 1667446797450), + ('host4', 70.0, 1667446797450), + ('host5', 60.0, 1667446797450); + +SELECT * FROM mito_system_metrics; + +SELECT * FROM system_metrics; + +DROP TABLE mito_system_metrics; + +DROP TABLE system_metrics; + +DROP TABLE phy; diff --git a/tests/upgrade-compat/standalone/common/table_engine_v0_9_5.result b/tests/upgrade-compat/standalone/common/table_engine_v0_9_5.result new file mode 100644 index 0000000000..41b81f01c0 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/table_engine_v0_9_5.result @@ -0,0 +1,137 @@ +-- SQLNESS ARG version=v0.9.5 +CREATE TABLE mito_system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +)ENGINE=mito; + +Affected Rows: 0 + +INSERT INTO mito_system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +Affected Rows: 3 + +CREATE TABLE phy (ts timestamp time index, cpu_util double) engine=metric with ("physical_metric_table" = ""); + +Affected Rows: 0 + +CREATE TABLE system_metrics ( + host STRING, + cpu_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host), + TIME INDEX(ts) +)ENGINE=metric with ("on_physical_table" = "phy"); + +Affected Rows: 0 + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host1', 11.8, 1667446797450), + ('host2', 80.0, 1667446797450), + ('host1', 50.0, 1667446797450); + +Affected Rows: 3 + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE mito_system_metrics; + ++---------------------+-----------------------------------------------------------+ +| Table | Create Table | ++---------------------+-----------------------------------------------------------+ +| mito_system_metrics | CREATE TABLE IF NOT EXISTS "mito_system_metrics" ( | +| | "host" STRING NULL, | +| | "idc" STRING NULL, | +| | "cpu_util" DOUBLE NULL, | +| | "memory_util" DOUBLE NULL, | +| | "disk_util" DOUBLE NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host", "idc") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++---------------------+-----------------------------------------------------------+ + +SHOW CREATE TABLE system_metrics; + ++----------------+-----------------------------------------------------------+ +| Table | Create Table | ++----------------+-----------------------------------------------------------+ +| system_metrics | CREATE TABLE IF NOT EXISTS "system_metrics" ( | +| | "cpu_util" DOUBLE NULL, | +| | "host" STRING NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host") | +| | ) | +| | | +| | ENGINE=metric | +| | WITH( | +| | on_physical_table = 'phy' | +| | ) | ++----------------+-----------------------------------------------------------+ + +INSERT INTO mito_system_metrics +VALUES + ("host3", "idc_a", 90.0, 70.3, 90.0, 1667446797450), + ("host4", "idc_a", 70.0, 70.3, 90.0, 1667446797450), + ("host5", "idc_a", 60.0, 70.3, 90.0, 1667446797450); + +Affected Rows: 3 + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host3', 90.0, 1667446797450), + ('host4', 70.0, 1667446797450), + ('host5', 60.0, 1667446797450); + +Affected Rows: 3 + +SELECT * FROM mito_system_metrics; + ++-------+-------+----------+-------------+-----------+-------------------------+ +| host | idc | cpu_util | memory_util | disk_util | ts | ++-------+-------+----------+-------------+-----------+-------------------------+ +| host1 | idc_a | 11.8 | 10.3 | 10.3 | 2022-11-03T03:39:57.450 | +| host1 | idc_b | 50.0 | 66.7 | 40.6 | 2022-11-03T03:39:57.450 | +| host2 | idc_a | 80.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host3 | idc_a | 90.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host4 | idc_a | 70.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host5 | idc_a | 60.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | ++-------+-------+----------+-------------+-----------+-------------------------+ + +SELECT * FROM system_metrics; + ++----------+-------+-------------------------+ +| cpu_util | host | ts | ++----------+-------+-------------------------+ +| 80.0 | host2 | 2022-11-03T03:39:57.450 | +| 70.0 | host4 | 2022-11-03T03:39:57.450 | +| 60.0 | host5 | 2022-11-03T03:39:57.450 | +| 90.0 | host3 | 2022-11-03T03:39:57.450 | +| 50.0 | host1 | 2022-11-03T03:39:57.450 | ++----------+-------+-------------------------+ + +DROP TABLE mito_system_metrics; + +Affected Rows: 0 + +DROP TABLE system_metrics; + +Affected Rows: 0 + +DROP TABLE phy; + +Affected Rows: 0 + diff --git a/tests/upgrade-compat/standalone/common/table_engine_v0_9_5.sql b/tests/upgrade-compat/standalone/common/table_engine_v0_9_5.sql new file mode 100644 index 0000000000..9908085213 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/table_engine_v0_9_5.sql @@ -0,0 +1,60 @@ +-- SQLNESS ARG version=v0.9.5 +CREATE TABLE mito_system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +)ENGINE=mito; + +INSERT INTO mito_system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +CREATE TABLE phy (ts timestamp time index, cpu_util double) engine=metric with ("physical_metric_table" = ""); + +CREATE TABLE system_metrics ( + host STRING, + cpu_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host), + TIME INDEX(ts) +)ENGINE=metric with ("on_physical_table" = "phy"); + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host1', 11.8, 1667446797450), + ('host2', 80.0, 1667446797450), + ('host1', 50.0, 1667446797450); + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE mito_system_metrics; + +SHOW CREATE TABLE system_metrics; + +INSERT INTO mito_system_metrics +VALUES + ("host3", "idc_a", 90.0, 70.3, 90.0, 1667446797450), + ("host4", "idc_a", 70.0, 70.3, 90.0, 1667446797450), + ("host5", "idc_a", 60.0, 70.3, 90.0, 1667446797450); + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host3', 90.0, 1667446797450), + ('host4', 70.0, 1667446797450), + ('host5', 60.0, 1667446797450); + +SELECT * FROM mito_system_metrics; + +SELECT * FROM system_metrics; + +DROP TABLE mito_system_metrics; + +DROP TABLE system_metrics; + +DROP TABLE phy; diff --git a/tests/upgrade-compat/standalone/common/test_simple.result b/tests/upgrade-compat/standalone/common/test_simple.result new file mode 100644 index 0000000000..ff2c340598 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/test_simple.result @@ -0,0 +1,47 @@ +-- SQLNESS ARG version=v0.9.5 +CREATE TABLE system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +); + +Affected Rows: 0 + +INSERT INTO system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +Affected Rows: 3 + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE system_metrics; + ++----------------+-----------------------------------------------------------+ +| Table | Create Table | ++----------------+-----------------------------------------------------------+ +| system_metrics | CREATE TABLE IF NOT EXISTS "system_metrics" ( | +| | "host" STRING NULL, | +| | "idc" STRING NULL, | +| | "cpu_util" DOUBLE NULL, | +| | "memory_util" DOUBLE NULL, | +| | "disk_util" DOUBLE NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host", "idc") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++----------------+-----------------------------------------------------------+ + +DROP TABLE system_metrics; + +Affected Rows: 0 + diff --git a/tests/upgrade-compat/standalone/common/test_simple.sql b/tests/upgrade-compat/standalone/common/test_simple.sql new file mode 100644 index 0000000000..0f8daa0985 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/test_simple.sql @@ -0,0 +1,22 @@ +-- SQLNESS ARG version=v0.9.5 +CREATE TABLE system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +); + +INSERT INTO system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE system_metrics; + +DROP TABLE system_metrics; diff --git a/tests/upgrade-compat/standalone/common/test_ttl.result b/tests/upgrade-compat/standalone/common/test_ttl.result new file mode 100644 index 0000000000..d06bc629b6 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/test_ttl.result @@ -0,0 +1,153 @@ +-- SQLNESS ARG version=v0.9.5 +CREATE TABLE test_ttl_0s(ts TIMESTAMP TIME INDEX, val INT) WITH (ttl = '0 second'); + +Affected Rows: 0 + +CREATE TABLE test_ttl_1s(ts TIMESTAMP TIME INDEX, val INT) WITH (ttl = '1 second'); + +Affected Rows: 0 + +CREATE TABLE test_ttl_none(ts TIMESTAMP TIME INDEX, val INT); + +Affected Rows: 0 + +CREATE DATABASE ttl_db_1s WITH (ttl = '1 second'); + +Affected Rows: 1 + +CREATE DATABASE ttl_db_0s WITH (ttl = '0 second'); + +Affected Rows: 1 + +CREATE DATABASE ttl_db_none; + +Affected Rows: 1 + +-- SQLNESS ARG version=latest +SHOW TABLES; + ++---------------+ +| Tables | ++---------------+ +| numbers | +| test_ttl_0s | +| test_ttl_1s | +| test_ttl_none | ++---------------+ + +SHOW CREATE TABLE test_ttl_1s; + ++-------------+--------------------------------------------+ +| Table | Create Table | ++-------------+--------------------------------------------+ +| test_ttl_1s | CREATE TABLE IF NOT EXISTS "test_ttl_1s" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "val" INT NULL, | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | WITH( | +| | ttl = '1s' | +| | ) | ++-------------+--------------------------------------------+ + +SHOW CREATE TABLE test_ttl_0s; + ++-------------+--------------------------------------------+ +| Table | Create Table | ++-------------+--------------------------------------------+ +| test_ttl_0s | CREATE TABLE IF NOT EXISTS "test_ttl_0s" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "val" INT NULL, | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | WITH( | +| | ttl = '0s' | +| | ) | ++-------------+--------------------------------------------+ + +SHOW CREATE TABLE test_ttl_none; + ++---------------+----------------------------------------------+ +| Table | Create Table | ++---------------+----------------------------------------------+ +| test_ttl_none | CREATE TABLE IF NOT EXISTS "test_ttl_none" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "val" INT NULL, | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++---------------+----------------------------------------------+ + +DROP TABLE test_ttl_1s; + +Affected Rows: 0 + +DROP TABLE test_ttl_0s; + +Affected Rows: 0 + +DROP TABLE test_ttl_none; + +Affected Rows: 0 + +SHOW DATABASES; + ++--------------------+ +| Database | ++--------------------+ +| greptime_private | +| information_schema | +| public | +| ttl_db_0s | +| ttl_db_1s | +| ttl_db_none | ++--------------------+ + +SHOW CREATE DATABASE ttl_db_1s; + ++-----------+-----------------------------------------+ +| Database | Create Database | ++-----------+-----------------------------------------+ +| ttl_db_1s | CREATE DATABASE IF NOT EXISTS ttl_db_1s | +| | WITH( | +| | ttl = '1s' | +| | ) | ++-----------+-----------------------------------------+ + +SHOW CREATE DATABASE ttl_db_0s; + ++-----------+-----------------------------------------+ +| Database | Create Database | ++-----------+-----------------------------------------+ +| ttl_db_0s | CREATE DATABASE IF NOT EXISTS ttl_db_0s | +| | WITH( | +| | ttl = '0s' | +| | ) | ++-----------+-----------------------------------------+ + +SHOW CREATE DATABASE ttl_db_none; + ++-------------+-------------------------------------------+ +| Database | Create Database | ++-------------+-------------------------------------------+ +| ttl_db_none | CREATE DATABASE IF NOT EXISTS ttl_db_none | ++-------------+-------------------------------------------+ + +DROP DATABASE ttl_db_1s; + +Affected Rows: 0 + +DROP DATABASE ttl_db_0s; + +Affected Rows: 0 + +DROP DATABASE ttl_db_none; + +Affected Rows: 0 + diff --git a/tests/upgrade-compat/standalone/common/test_ttl.sql b/tests/upgrade-compat/standalone/common/test_ttl.sql new file mode 100644 index 0000000000..3462fd2244 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/test_ttl.sql @@ -0,0 +1,42 @@ + +-- SQLNESS ARG version=v0.9.5 +CREATE TABLE test_ttl_0s(ts TIMESTAMP TIME INDEX, val INT) WITH (ttl = '0 second'); + +CREATE TABLE test_ttl_1s(ts TIMESTAMP TIME INDEX, val INT) WITH (ttl = '1 second'); + +CREATE TABLE test_ttl_none(ts TIMESTAMP TIME INDEX, val INT); + +CREATE DATABASE ttl_db_1s WITH (ttl = '1 second'); + +CREATE DATABASE ttl_db_0s WITH (ttl = '0 second'); + +CREATE DATABASE ttl_db_none; + +-- SQLNESS ARG version=latest +SHOW TABLES; + +SHOW CREATE TABLE test_ttl_1s; + +SHOW CREATE TABLE test_ttl_0s; + +SHOW CREATE TABLE test_ttl_none; + +DROP TABLE test_ttl_1s; + +DROP TABLE test_ttl_0s; + +DROP TABLE test_ttl_none; + +SHOW DATABASES; + +SHOW CREATE DATABASE ttl_db_1s; + +SHOW CREATE DATABASE ttl_db_0s; + +SHOW CREATE DATABASE ttl_db_none; + +DROP DATABASE ttl_db_1s; + +DROP DATABASE ttl_db_0s; + +DROP DATABASE ttl_db_none; From c33cf593983b9ea653709e3ab4778571ef18129b Mon Sep 17 00:00:00 2001 From: "Lei, HUANG" <6406592+v0y4g3r@users.noreply.github.com> Date: Tue, 17 Dec 2024 15:06:07 +0800 Subject: [PATCH 31/46] perf: avoid holding memtable during compaction (#5157) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * perf/avoid-holding-memtable-during-compaction: Refactor Compaction Version Handling • Introduced CompactionVersion struct to encapsulate region version details for compaction, removing dependency on VersionRef. • Updated CompactionRequest and CompactionRegion to use CompactionVersion. • Modified open_compaction_region to construct CompactionVersion without memtables. • Adjusted WindowedCompactionPicker to work with CompactionVersion. • Enhanced flush logic in WriteBufferManager to improve memory usage checks and logging. * reformat code * chore: change log level * reformat code --------- Co-authored-by: Yingwen --- src/mito2/src/compaction.rs | 8 ++-- src/mito2/src/compaction/compactor.rs | 66 +++++++++++++++------------ src/mito2/src/compaction/window.rs | 40 ++++++---------- src/mito2/src/flush.rs | 21 +++++---- 4 files changed, 70 insertions(+), 65 deletions(-) diff --git a/src/mito2/src/compaction.rs b/src/mito2/src/compaction.rs index 5236e0d616..7fdd32aa27 100644 --- a/src/mito2/src/compaction.rs +++ b/src/mito2/src/compaction.rs @@ -44,7 +44,7 @@ use tokio::sync::mpsc::{self, Sender}; use crate::access_layer::AccessLayerRef; use crate::cache::CacheManagerRef; -use crate::compaction::compactor::{CompactionRegion, DefaultCompactor}; +use crate::compaction::compactor::{CompactionRegion, CompactionVersion, DefaultCompactor}; use crate::compaction::picker::{new_picker, CompactionTask}; use crate::compaction::task::CompactionTaskImpl; use crate::config::MitoConfig; @@ -59,7 +59,7 @@ use crate::read::scan_region::ScanInput; use crate::read::seq_scan::SeqScan; use crate::read::BoxedBatchReader; use crate::region::options::MergeMode; -use crate::region::version::{VersionControlRef, VersionRef}; +use crate::region::version::VersionControlRef; use crate::region::ManifestContextRef; use crate::request::{OptionOutputTx, OutputTx, WorkerRequest}; use crate::schedule::remote_job_scheduler::{ @@ -73,7 +73,7 @@ use crate::worker::WorkerListener; /// Region compaction request. pub struct CompactionRequest { pub(crate) engine_config: Arc, - pub(crate) current_version: VersionRef, + pub(crate) current_version: CompactionVersion, pub(crate) access_layer: AccessLayerRef, /// Sender to send notification to the region worker. pub(crate) request_sender: mpsc::Sender, @@ -522,7 +522,7 @@ impl CompactionStatus { listener: WorkerListener, schema_metadata_manager: SchemaMetadataManagerRef, ) -> CompactionRequest { - let current_version = self.version_control.current().version; + let current_version = CompactionVersion::from(self.version_control.current().version); let start_time = Instant::now(); let mut req = CompactionRequest { engine_config, diff --git a/src/mito2/src/compaction/compactor.rs b/src/mito2/src/compaction/compactor.rs index 91ab34c961..e2499140fd 100644 --- a/src/mito2/src/compaction/compactor.rs +++ b/src/mito2/src/compaction/compactor.rs @@ -35,12 +35,10 @@ use crate::error::{EmptyRegionDirSnafu, JoinSnafu, ObjectStoreNotFoundSnafu, Res use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList}; use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions}; use crate::manifest::storage::manifest_compress_type; -use crate::memtable::time_partition::TimePartitions; -use crate::memtable::MemtableBuilderProvider; use crate::read::Source; use crate::region::opener::new_manifest_dir; use crate::region::options::RegionOptions; -use crate::region::version::{VersionBuilder, VersionRef}; +use crate::region::version::VersionRef; use crate::region::{ManifestContext, RegionLeaderState, RegionRoleState}; use crate::schedule::scheduler::LocalScheduler; use crate::sst::file::{FileMeta, IndexType}; @@ -48,6 +46,34 @@ use crate::sst::file_purger::LocalFilePurger; use crate::sst::index::intermediate::IntermediateManager; use crate::sst::index::puffin_manager::PuffinManagerFactory; use crate::sst::parquet::WriteOptions; +use crate::sst::version::{SstVersion, SstVersionRef}; + +/// Region version for compaction that does not hold memtables. +#[derive(Clone)] +pub struct CompactionVersion { + /// Metadata of the region. + /// + /// Altering metadata isn't frequent, storing metadata in Arc to allow sharing + /// metadata and reuse metadata when creating a new `Version`. + pub(crate) metadata: RegionMetadataRef, + /// Options of the region. + pub(crate) options: RegionOptions, + /// SSTs of the region. + pub(crate) ssts: SstVersionRef, + /// Inferred compaction time window. + pub(crate) compaction_time_window: Option, +} + +impl From for CompactionVersion { + fn from(value: VersionRef) -> Self { + Self { + metadata: value.metadata.clone(), + options: value.options.clone(), + ssts: value.ssts.clone(), + compaction_time_window: value.compaction_time_window, + } + } +} /// CompactionRegion represents a region that needs to be compacted. /// It's the subset of MitoRegion. @@ -62,7 +88,7 @@ pub struct CompactionRegion { pub(crate) cache_manager: CacheManagerRef, pub(crate) access_layer: AccessLayerRef, pub(crate) manifest_ctx: Arc, - pub(crate) current_version: VersionRef, + pub(crate) current_version: CompactionVersion, pub(crate) file_purger: Option>, pub(crate) ttl: Option, } @@ -147,30 +173,14 @@ pub async fn open_compaction_region( }; let current_version = { - let memtable_builder = MemtableBuilderProvider::new(None, Arc::new(mito_config.clone())) - .builder_for_options( - req.region_options.memtable.as_ref(), - req.region_options.need_dedup(), - req.region_options.merge_mode(), - ); - - // Initial memtable id is 0. - let mutable = Arc::new(TimePartitions::new( - region_metadata.clone(), - memtable_builder.clone(), - 0, - req.region_options.compaction.time_window(), - )); - - let version = VersionBuilder::new(region_metadata.clone(), mutable) - .add_files(file_purger.clone(), manifest.files.values().cloned()) - .flushed_entry_id(manifest.flushed_entry_id) - .flushed_sequence(manifest.flushed_sequence) - .truncated_entry_id(manifest.truncated_entry_id) - .compaction_time_window(manifest.compaction_time_window) - .options(req.region_options.clone()) - .build(); - Arc::new(version) + let mut ssts = SstVersion::new(); + ssts.add_files(file_purger.clone(), manifest.files.values().cloned()); + CompactionVersion { + metadata: region_metadata.clone(), + options: req.region_options.clone(), + ssts: Arc::new(ssts), + compaction_time_window: manifest.compaction_time_window, + } }; let ttl = find_ttl( diff --git a/src/mito2/src/compaction/window.rs b/src/mito2/src/compaction/window.rs index f16b8e4c95..10bdb47297 100644 --- a/src/mito2/src/compaction/window.rs +++ b/src/mito2/src/compaction/window.rs @@ -23,10 +23,9 @@ use common_time::Timestamp; use store_api::storage::RegionId; use crate::compaction::buckets::infer_time_bucket; -use crate::compaction::compactor::CompactionRegion; +use crate::compaction::compactor::{CompactionRegion, CompactionVersion}; use crate::compaction::picker::{Picker, PickerOutput}; use crate::compaction::{get_expired_ssts, CompactionOutput}; -use crate::region::version::VersionRef; use crate::sst::file::{FileHandle, FileId}; /// Compaction picker that splits the time range of all involved files to windows, and merges @@ -48,7 +47,11 @@ impl WindowedCompactionPicker { // use persisted window. If persist window is not present, we check the time window // provided while creating table. If all of those are absent, we infer the window // from files in level0. - fn calculate_time_window(&self, region_id: RegionId, current_version: &VersionRef) -> i64 { + fn calculate_time_window( + &self, + region_id: RegionId, + current_version: &CompactionVersion, + ) -> i64 { self.compaction_time_window_seconds .or(current_version .compaction_time_window @@ -67,7 +70,7 @@ impl WindowedCompactionPicker { fn pick_inner( &self, region_id: RegionId, - current_version: &VersionRef, + current_version: &CompactionVersion, current_time: Timestamp, ) -> (Vec, Vec, i64) { let time_window = self.calculate_time_window(region_id, current_version); @@ -205,28 +208,19 @@ mod tests { use common_time::Timestamp; use store_api::storage::RegionId; + use crate::compaction::compactor::CompactionVersion; use crate::compaction::window::{file_time_bucket_span, WindowedCompactionPicker}; - use crate::memtable::partition_tree::{PartitionTreeConfig, PartitionTreeMemtableBuilder}; - use crate::memtable::time_partition::TimePartitions; - use crate::memtable::version::MemtableVersion; use crate::region::options::RegionOptions; - use crate::region::version::{Version, VersionRef}; use crate::sst::file::{FileId, FileMeta, Level}; use crate::sst::version::SstVersion; use crate::test_util::memtable_util::metadata_for_test; use crate::test_util::NoopFilePurger; - fn build_version(files: &[(FileId, i64, i64, Level)], ttl: Option) -> VersionRef { + fn build_version( + files: &[(FileId, i64, i64, Level)], + ttl: Option, + ) -> CompactionVersion { let metadata = metadata_for_test(); - let memtables = Arc::new(MemtableVersion::new(Arc::new(TimePartitions::new( - metadata.clone(), - Arc::new(PartitionTreeMemtableBuilder::new( - PartitionTreeConfig::default(), - None, - )), - 0, - None, - )))); let file_purger_ref = Arc::new(NoopFilePurger); let mut ssts = SstVersion::new(); @@ -244,14 +238,9 @@ mod tests { }), ); - Arc::new(Version { + CompactionVersion { metadata, - memtables, ssts: Arc::new(ssts), - flushed_entry_id: 0, - flushed_sequence: 0, - truncated_entry_id: None, - compaction_time_window: None, options: RegionOptions { ttl: ttl.map(|t| t.into()), compaction: Default::default(), @@ -262,7 +251,8 @@ mod tests { memtable: None, merge_mode: None, }, - }) + compaction_time_window: None, + } } #[test] diff --git a/src/mito2/src/flush.rs b/src/mito2/src/flush.rs index 09f45ca4f7..b522f225f9 100644 --- a/src/mito2/src/flush.rs +++ b/src/mito2/src/flush.rs @@ -18,7 +18,7 @@ use std::collections::HashMap; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; -use common_telemetry::{debug, error, info}; +use common_telemetry::{debug, error, info, trace}; use smallvec::SmallVec; use snafu::ResultExt; use store_api::storage::RegionId; @@ -141,17 +141,22 @@ impl WriteBufferManager for WriteBufferManagerImpl { // If the memory exceeds the buffer size, we trigger more aggressive // flush. But if already more than half memory is being flushed, // triggering more flush may not help. We will hold it instead. - if memory_usage >= self.global_write_buffer_size - && mutable_memtable_memory_usage >= self.global_write_buffer_size / 2 - { - debug!( + if memory_usage >= self.global_write_buffer_size { + if mutable_memtable_memory_usage >= self.global_write_buffer_size / 2 { + debug!( "Engine should flush (over total limit), memory_usage: {}, global_write_buffer_size: {}, \ mutable_usage: {}.", memory_usage, self.global_write_buffer_size, - mutable_memtable_memory_usage, - ); - return true; + mutable_memtable_memory_usage); + return true; + } else { + trace!( + "Engine won't flush, memory_usage: {}, global_write_buffer_size: {}, mutable_usage: {}.", + memory_usage, + self.global_write_buffer_size, + mutable_memtable_memory_usage); + } } false From 18e8c45384e506cdea9a3c4bddc010fca1e7f10d Mon Sep 17 00:00:00 2001 From: LFC <990479+MichaelScofield@users.noreply.github.com> Date: Wed, 18 Dec 2024 10:42:33 +0800 Subject: [PATCH 32/46] refactor: produce BatchBuilder from a Batch to modify it again (#5186) chore: pub some mods --- src/mito2/src/read.rs | 12 ++++++++++++ src/mito2/src/sst/parquet.rs | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/mito2/src/read.rs b/src/mito2/src/read.rs index d8ac5ce46b..c4de103f10 100644 --- a/src/mito2/src/read.rs +++ b/src/mito2/src/read.rs @@ -861,6 +861,18 @@ impl BatchBuilder { } } +impl From for BatchBuilder { + fn from(batch: Batch) -> Self { + Self { + primary_key: batch.primary_key, + timestamps: Some(batch.timestamps), + sequences: Some(batch.sequences), + op_types: Some(batch.op_types), + fields: batch.fields, + } + } +} + /// Async [Batch] reader and iterator wrapper. /// /// This is the data source for SST writers or internal readers. diff --git a/src/mito2/src/sst/parquet.rs b/src/mito2/src/sst/parquet.rs index ae51a0d37c..3dd53ba645 100644 --- a/src/mito2/src/sst/parquet.rs +++ b/src/mito2/src/sst/parquet.rs @@ -24,7 +24,7 @@ use crate::sst::index::IndexOutput; use crate::sst::DEFAULT_WRITE_BUFFER_SIZE; pub(crate) mod file_range; -pub(crate) mod format; +pub mod format; pub(crate) mod helper; pub(crate) mod metadata; mod page_reader; From 7d1bcc9d494c2f3f0bedea23ffab331f8a8df88a Mon Sep 17 00:00:00 2001 From: Yohan Wal Date: Wed, 18 Dec 2024 11:45:38 +0800 Subject: [PATCH 33/46] feat: introduce Buffer for non-continuous bytes (#5164) * feat: introduce Buffer for non-continuous bytes * Update src/mito2/src/cache/index.rs Co-authored-by: Weny Xu * chore: apply review comments * refactor: use opendal::Buffer --------- Co-authored-by: Weny Xu --- Cargo.lock | 1 + src/common/base/Cargo.toml | 1 + src/index/src/inverted_index/format/reader.rs | 3 +- .../src/inverted_index/format/reader/blob.rs | 6 +- src/mito2/src/cache/index.rs | 99 ++++++++----------- 5 files changed, 46 insertions(+), 64 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ea2931f098..a0225cf27d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1904,6 +1904,7 @@ dependencies = [ "futures", "paste", "pin-project", + "rand", "serde", "snafu 0.8.5", "tokio", diff --git a/src/common/base/Cargo.toml b/src/common/base/Cargo.toml index 465599974d..2d35ad5d31 100644 --- a/src/common/base/Cargo.toml +++ b/src/common/base/Cargo.toml @@ -17,6 +17,7 @@ common-macro.workspace = true futures.workspace = true paste = "1.0" pin-project.workspace = true +rand.workspace = true serde = { version = "1.0", features = ["derive"] } snafu.workspace = true tokio.workspace = true diff --git a/src/index/src/inverted_index/format/reader.rs b/src/index/src/inverted_index/format/reader.rs index 904681d5f4..21e5487d1e 100644 --- a/src/index/src/inverted_index/format/reader.rs +++ b/src/index/src/inverted_index/format/reader.rs @@ -16,6 +16,7 @@ use std::ops::Range; use std::sync::Arc; use async_trait::async_trait; +use bytes::Bytes; use common_base::BitVec; use greptime_proto::v1::index::InvertedIndexMetas; use snafu::ResultExt; @@ -35,7 +36,7 @@ pub trait InvertedIndexReader: Send { async fn range_read(&mut self, offset: u64, size: u32) -> Result>; /// Reads the bytes in the given ranges. - async fn read_vec(&mut self, ranges: &[Range]) -> Result>>; + async fn read_vec(&mut self, ranges: &[Range]) -> Result>; /// Retrieves metadata of all inverted indices stored within the blob. async fn metadata(&mut self) -> Result>; diff --git a/src/index/src/inverted_index/format/reader/blob.rs b/src/index/src/inverted_index/format/reader/blob.rs index 371655d535..fcaa63773d 100644 --- a/src/index/src/inverted_index/format/reader/blob.rs +++ b/src/index/src/inverted_index/format/reader/blob.rs @@ -16,6 +16,7 @@ use std::ops::Range; use std::sync::Arc; use async_trait::async_trait; +use bytes::Bytes; use common_base::range_read::RangeReader; use greptime_proto::v1::index::InvertedIndexMetas; use snafu::{ensure, ResultExt}; @@ -60,9 +61,8 @@ impl InvertedIndexReader for InvertedIndexBlobReader { Ok(buf.into()) } - async fn read_vec(&mut self, ranges: &[Range]) -> Result>> { - let bufs = self.source.read_vec(ranges).await.context(CommonIoSnafu)?; - Ok(bufs.into_iter().map(|buf| buf.into()).collect()) + async fn read_vec(&mut self, ranges: &[Range]) -> Result> { + self.source.read_vec(ranges).await.context(CommonIoSnafu) } async fn metadata(&mut self) -> Result> { diff --git a/src/mito2/src/cache/index.rs b/src/mito2/src/cache/index.rs index e25fb22dcb..de39ea3784 100644 --- a/src/mito2/src/cache/index.rs +++ b/src/mito2/src/cache/index.rs @@ -17,10 +17,12 @@ use std::sync::Arc; use api::v1::index::InvertedIndexMetas; use async_trait::async_trait; +use bytes::Bytes; use common_base::BitVec; use index::inverted_index::error::DecodeFstSnafu; use index::inverted_index::format::reader::InvertedIndexReader; use index::inverted_index::FstMap; +use object_store::Buffer; use prost::Message; use snafu::ResultExt; @@ -68,15 +70,14 @@ where if keys.is_empty() { return Ok(Vec::new()); } - // TODO: Can be replaced by an uncontinuous structure like opendal::Buffer. let mut data = Vec::with_capacity(keys.len()); - data.resize(keys.len(), Arc::new(Vec::new())); + data.resize(keys.len(), Bytes::new()); let mut cache_miss_range = vec![]; let mut cache_miss_idx = vec![]; let last_index = keys.len() - 1; // TODO: Avoid copy as much as possible. - for (i, index) in keys.clone().into_iter().enumerate() { - match self.cache.get_index(&index) { + for (i, index) in keys.iter().enumerate() { + match self.cache.get_index(index) { Some(page) => { CACHE_HIT.with_label_values(&[INDEX_CONTENT_TYPE]).inc(); data[i] = page; @@ -97,24 +98,19 @@ where if !cache_miss_range.is_empty() { let pages = self.inner.read_vec(&cache_miss_range).await?; for (i, page) in cache_miss_idx.into_iter().zip(pages.into_iter()) { - let page = Arc::new(page); let key = keys[i].clone(); data[i] = page.clone(); self.cache.put_index(key, page.clone()); } } - let mut result = Vec::with_capacity(size as usize); - data.iter().enumerate().for_each(|(i, page)| { - let range = if i == 0 { - IndexDataPageKey::calculate_first_page_range(offset, size, self.cache.page_size) - } else if i == last_index { - IndexDataPageKey::calculate_last_page_range(offset, size, self.cache.page_size) - } else { - 0..self.cache.page_size as usize - }; - result.extend_from_slice(&page[range]); - }); - Ok(result) + let buffer = Buffer::from_iter(data.into_iter()); + Ok(buffer + .slice(IndexDataPageKey::calculate_range( + offset, + size, + self.cache.page_size, + )) + .to_vec()) } } @@ -131,7 +127,7 @@ impl InvertedIndexReader for CachedInvertedIndexBlobRead async fn read_vec( &mut self, ranges: &[Range], - ) -> index::inverted_index::error::Result>> { + ) -> index::inverted_index::error::Result> { self.inner.read_vec(ranges).await } @@ -190,31 +186,19 @@ impl IndexDataPageKey { (end_page + 1 - start_page) as u32 } - /// Computes the byte range in the first page based on the offset and size. - /// For example, if offset is 1000 and size is 5000 with PAGE_SIZE of 4096, the first page range is 1000..4096. - fn calculate_first_page_range(offset: u64, size: u32, page_size: u64) -> Range { + /// Calculates the byte range for data retrieval based on the specified offset and size. + /// + /// This function determines the starting and ending byte positions required for reading data. + /// For example, with an offset of 5000 and a size of 5000, using a PAGE_SIZE of 4096, + /// the resulting byte range will be 904..5904. This indicates that: + /// - The reader will first access fixed-size pages [4096, 8192) and [8192, 12288). + /// - To read the range [5000..10000), it only needs to fetch bytes within the range [904, 5904) across two pages. + fn calculate_range(offset: u64, size: u32, page_size: u64) -> Range { let start = (offset % page_size) as usize; - let end = if size > page_size as u32 - start as u32 { - page_size as usize - } else { - start + size as usize - }; + let end = start + size as usize; start..end } - /// Computes the byte range in the last page based on the offset and size. - /// For example, if offset is 1000 and size is 5000 with PAGE_SIZE of 4096, the last page range is 0..1904. - fn calculate_last_page_range(offset: u64, size: u32, page_size: u64) -> Range { - let offset = offset as usize; - let size = size as usize; - let page_size = page_size as usize; - if (offset + size) % page_size == 0 { - 0..page_size - } else { - 0..((offset + size) % page_size) - } - } - /// Generates a vector of IndexKey instances for the pages that a given offset and size span. fn generate_page_keys(file_id: FileId, offset: u64, size: u32, page_size: u64) -> Vec { let start_page = Self::calculate_page_id(offset, page_size); @@ -234,7 +218,7 @@ pub struct InvertedIndexCache { /// Cache for inverted index metadata index_metadata: moka::sync::Cache>, /// Cache for inverted index content. - index: moka::sync::Cache>>, + index: moka::sync::Cache, // Page size for index content. page_size: u64, } @@ -284,11 +268,11 @@ impl InvertedIndexCache { self.index_metadata.insert(key, metadata) } - pub fn get_index(&self, key: &IndexDataPageKey) -> Option>> { + pub fn get_index(&self, key: &IndexDataPageKey) -> Option { self.index.get(key) } - pub fn put_index(&self, key: IndexDataPageKey, value: Arc>) { + pub fn put_index(&self, key: IndexDataPageKey, value: Bytes) { CACHE_BYTES .with_label_values(&[INDEX_CONTENT_TYPE]) .add(index_content_weight(&key, &value).into()); @@ -302,7 +286,7 @@ fn index_metadata_weight(k: &IndexMetadataKey, v: &Arc) -> u } /// Calculates weight for index content. -fn index_content_weight(k: &IndexDataPageKey, v: &Arc>) -> u32 { +fn index_content_weight(k: &IndexDataPageKey, v: &Bytes) -> u32 { (k.file_id.as_bytes().len() + v.len()) as u32 } @@ -331,6 +315,9 @@ mod test { use crate::sst::index::store::InstrumentedStore; use crate::test_util::TestEnv; + // Repeat times for following little fuzz tests. + const FUZZ_REPEAT_TIMES: usize = 100; + // Fuzz test for index data page key #[test] fn fuzz_index_calculation() { @@ -340,7 +327,7 @@ mod test { rng.fill_bytes(&mut data); let file_id = FileId::random(); - for _ in 0..100 { + for _ in 0..FUZZ_REPEAT_TIMES { let offset = rng.gen_range(0..data.len() as u64); let size = rng.gen_range(0..data.len() as u32 - offset as u32); let page_size: usize = rng.gen_range(1..1024); @@ -349,32 +336,24 @@ mod test { IndexDataPageKey::generate_page_keys(file_id, offset, size, page_size as u64); let page_num = indexes.len(); let mut read = Vec::with_capacity(size as usize); - let last_index = indexes.len() - 1; - for (i, key) in indexes.into_iter().enumerate() { + for key in indexes.into_iter() { let start = key.page_id as usize * page_size; let page = if start + page_size < data.len() { &data[start..start + page_size] } else { &data[start..] }; - let range = if i == 0 { - // first page range - IndexDataPageKey::calculate_first_page_range(offset, size, page_size as u64) - } else if i == last_index { - // last page range. when the first page is the last page, the range is not used. - IndexDataPageKey::calculate_last_page_range(offset, size, page_size as u64) - } else { - 0..page_size - }; - read.extend_from_slice(&page[range]); + read.extend_from_slice(page); } let expected_range = offset as usize..(offset + size as u64 as u64) as usize; + let read = + read[IndexDataPageKey::calculate_range(offset, size, page_size as u64)].to_vec(); if read != data.get(expected_range).unwrap() { panic!( - "fuzz_read_index failed, offset: {}, size: {}, page_size: {}\nread len: {}, expected len: {}\nfirst page range: {:?}, last page range: {:?}, page num: {}", + "fuzz_read_index failed, offset: {}, size: {}, page_size: {}\nread len: {}, expected len: {}\nrange: {:?}, page num: {}", offset, size, page_size, read.len(), size as usize, - IndexDataPageKey::calculate_first_page_range(offset, size, page_size as u64), - IndexDataPageKey::calculate_last_page_range(offset, size, page_size as u64), page_num + IndexDataPageKey::calculate_range(offset, size, page_size as u64), + page_num ); } } @@ -519,7 +498,7 @@ mod test { // fuzz test let mut rng = rand::thread_rng(); - for _ in 0..100 { + for _ in 0..FUZZ_REPEAT_TIMES { let offset = rng.gen_range(0..file_size); let size = rng.gen_range(0..file_size as u32 - offset as u32); let expected = cached_reader.range_read(offset, size).await.unwrap(); From 266919c226f4da5296c75797169843094b221f4f Mon Sep 17 00:00:00 2001 From: Lanqing Yang Date: Tue, 17 Dec 2024 22:10:59 -0800 Subject: [PATCH 34/46] fix: display inverted and fulltext index in show index (#5169) --- .../information_schema/key_column_usage.rs | 52 ++++++++++--------- src/datatypes/src/schema/column_schema.rs | 4 ++ src/query/src/sql.rs | 23 ++++++-- .../standalone/common/show/show_index.result | 52 +++++++++++-------- .../standalone/common/show/show_index.sql | 6 ++- 5 files changed, 88 insertions(+), 49 deletions(-) diff --git a/src/catalog/src/system_schema/information_schema/key_column_usage.rs b/src/catalog/src/system_schema/information_schema/key_column_usage.rs index 56713dabba..42cfa53fdb 100644 --- a/src/catalog/src/system_schema/information_schema/key_column_usage.rs +++ b/src/catalog/src/system_schema/information_schema/key_column_usage.rs @@ -54,6 +54,10 @@ const INIT_CAPACITY: usize = 42; pub(crate) const PRI_CONSTRAINT_NAME: &str = "PRIMARY"; /// Time index constraint name pub(crate) const TIME_INDEX_CONSTRAINT_NAME: &str = "TIME INDEX"; +/// Inverted index constraint name +pub(crate) const INVERTED_INDEX_CONSTRAINT_NAME: &str = "INVERTED INDEX"; +/// Fulltext index constraint name +pub(crate) const FULLTEXT_INDEX_CONSTRAINT_NAME: &str = "FULLTEXT INDEX"; /// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`. pub(super) struct InformationSchemaKeyColumnUsage { @@ -216,14 +220,13 @@ impl InformationSchemaKeyColumnUsageBuilder { let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None); while let Some(table) = stream.try_next().await? { - let mut primary_constraints = vec![]; - let table_info = table.table_info(); let table_name = &table_info.name; let keys = &table_info.meta.primary_key_indices; let schema = table.schema(); for (idx, column) in schema.column_schemas().iter().enumerate() { + let mut constraints = vec![]; if column.is_time_index() { self.add_key_column_usage( &predicates, @@ -236,30 +239,31 @@ impl InformationSchemaKeyColumnUsageBuilder { 1, //always 1 for time index ); } - if keys.contains(&idx) { - primary_constraints.push(( - catalog_name.clone(), - schema_name.clone(), - table_name.to_string(), - column.name.clone(), - )); - } // TODO(dimbtp): foreign key constraint not supported yet - } + if keys.contains(&idx) { + constraints.push(PRI_CONSTRAINT_NAME); + } + if column.is_inverted_indexed() { + constraints.push(INVERTED_INDEX_CONSTRAINT_NAME); + } - for (i, (catalog_name, schema_name, table_name, column_name)) in - primary_constraints.into_iter().enumerate() - { - self.add_key_column_usage( - &predicates, - &schema_name, - PRI_CONSTRAINT_NAME, - &catalog_name, - &schema_name, - &table_name, - &column_name, - i as u32 + 1, - ); + if column.has_fulltext_index_key() { + constraints.push(FULLTEXT_INDEX_CONSTRAINT_NAME); + } + + if !constraints.is_empty() { + let aggregated_constraints = constraints.join(", "); + self.add_key_column_usage( + &predicates, + &schema_name, + &aggregated_constraints, + &catalog_name, + &schema_name, + table_name, + &column.name, + idx as u32 + 1, + ); + } } } } diff --git a/src/datatypes/src/schema/column_schema.rs b/src/datatypes/src/schema/column_schema.rs index aee9efd962..7a96ab5e2b 100644 --- a/src/datatypes/src/schema/column_schema.rs +++ b/src/datatypes/src/schema/column_schema.rs @@ -164,6 +164,10 @@ impl ColumnSchema { .unwrap_or(false) } + pub fn has_fulltext_index_key(&self) -> bool { + self.metadata.contains_key(FULLTEXT_KEY) + } + pub fn has_inverted_index_key(&self) -> bool { self.metadata.contains_key(INVERTED_INDEX_KEY) } diff --git a/src/query/src/sql.rs b/src/query/src/sql.rs index 062bd8e14e..3337503d09 100644 --- a/src/query/src/sql.rs +++ b/src/query/src/sql.rs @@ -40,7 +40,7 @@ use common_recordbatch::RecordBatches; use common_time::timezone::get_timezone; use common_time::Timestamp; use datafusion::common::ScalarValue; -use datafusion::prelude::SessionContext; +use datafusion::prelude::{concat_ws, SessionContext}; use datafusion_expr::{case, col, lit, Expr}; use datatypes::prelude::*; use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, RawSchema, Schema}; @@ -400,6 +400,20 @@ pub async fn show_index( query_ctx.current_schema() }; + let fulltext_index_expr = case(col("constraint_name").like(lit("%FULLTEXT INDEX%"))) + .when(lit(true), lit("greptime-fulltext-index-v1")) + .otherwise(null()) + .context(error::PlanSqlSnafu)?; + + let inverted_index_expr = case( + col("constraint_name") + .like(lit("%INVERTED INDEX%")) + .or(col("constraint_name").like(lit("%PRIMARY%"))), + ) + .when(lit(true), lit("greptime-inverted-index-v1")) + .otherwise(null()) + .context(error::PlanSqlSnafu)?; + let select = vec![ // 1 as `Non_unique`: contain duplicates lit(1).alias(INDEX_NONT_UNIQUE_COLUMN), @@ -417,8 +431,11 @@ pub async fn show_index( .otherwise(lit(YES_STR)) .context(error::PlanSqlSnafu)? .alias(COLUMN_NULLABLE_COLUMN), - // TODO(dennis): maybe 'BTREE'? - lit("greptime-inverted-index-v1").alias(INDEX_INDEX_TYPE_COLUMN), + concat_ws( + lit(", "), + vec![inverted_index_expr.clone(), fulltext_index_expr.clone()], + ) + .alias(INDEX_INDEX_TYPE_COLUMN), lit("").alias(COLUMN_COMMENT_COLUMN), lit("").alias(INDEX_COMMENT_COLUMN), lit(YES_STR).alias(INDEX_VISIBLE_COLUMN), diff --git a/tests/cases/standalone/common/show/show_index.result b/tests/cases/standalone/common/show/show_index.result index 995da87c13..6f179687db 100644 --- a/tests/cases/standalone/common/show/show_index.result +++ b/tests/cases/standalone/common/show/show_index.result @@ -1,11 +1,15 @@ CREATE TABLE IF NOT EXISTS system_metrics ( host STRING, - idc STRING, + idc STRING FULLTEXT, cpu_util DOUBLE, memory_util DOUBLE, disk_util DOUBLE, + desc1 STRING, + desc2 STRING FULLTEXT, + desc3 STRING FULLTEXT, ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(host, idc), + INVERTED INDEX(idc, desc1, desc2), TIME INDEX(ts) ); @@ -33,28 +37,34 @@ SHOW INDEX FROM test; +-------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ | test | 1 | PRIMARY | 1 | a | A | | | | YES | greptime-inverted-index-v1 | | | YES | | | test | 1 | PRIMARY | 2 | b | A | | | | YES | greptime-inverted-index-v1 | | | YES | | -| test | 1 | TIME INDEX | 1 | ts | A | | | | NO | greptime-inverted-index-v1 | | | YES | | +| test | 1 | TIME INDEX | 1 | ts | A | | | | NO | | | | YES | | +-------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ SHOW INDEX FROM system_metrics; -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ -| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ -| system_metrics | 1 | PRIMARY | 1 | host | A | | | | YES | greptime-inverted-index-v1 | | | YES | | -| system_metrics | 1 | PRIMARY | 2 | idc | A | | | | YES | greptime-inverted-index-v1 | | | YES | | -| system_metrics | 1 | TIME INDEX | 1 | ts | A | | | | NO | greptime-inverted-index-v1 | | | YES | | -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ ++----------------+------------+-----------------------------------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------------------------------------+---------+---------------+---------+------------+ +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | ++----------------+------------+-----------------------------------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------------------------------------+---------+---------------+---------+------------+ +| system_metrics | 1 | INVERTED INDEX | 6 | desc1 | A | | | | YES | greptime-inverted-index-v1 | | | YES | | +| system_metrics | 1 | INVERTED INDEX, FULLTEXT INDEX | 7 | desc2 | A | | | | YES | greptime-inverted-index-v1, greptime-fulltext-index-v1 | | | YES | | +| system_metrics | 1 | FULLTEXT INDEX | 8 | desc3 | A | | | | YES | greptime-fulltext-index-v1 | | | YES | | +| system_metrics | 1 | PRIMARY | 1 | host | A | | | | YES | greptime-inverted-index-v1 | | | YES | | +| system_metrics | 1 | PRIMARY, INVERTED INDEX, FULLTEXT INDEX | 2 | idc | A | | | | YES | greptime-inverted-index-v1, greptime-fulltext-index-v1 | | | YES | | +| system_metrics | 1 | TIME INDEX | 1 | ts | A | | | | NO | | | | YES | | ++----------------+------------+-----------------------------------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------------------------------------+---------+---------------+---------+------------+ SHOW INDEX FROM system_metrics in public; -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ -| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ -| system_metrics | 1 | PRIMARY | 1 | host | A | | | | YES | greptime-inverted-index-v1 | | | YES | | -| system_metrics | 1 | PRIMARY | 2 | idc | A | | | | YES | greptime-inverted-index-v1 | | | YES | | -| system_metrics | 1 | TIME INDEX | 1 | ts | A | | | | NO | greptime-inverted-index-v1 | | | YES | | -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ ++----------------+------------+-----------------------------------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------------------------------------+---------+---------------+---------+------------+ +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | ++----------------+------------+-----------------------------------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------------------------------------+---------+---------------+---------+------------+ +| system_metrics | 1 | INVERTED INDEX | 6 | desc1 | A | | | | YES | greptime-inverted-index-v1 | | | YES | | +| system_metrics | 1 | INVERTED INDEX, FULLTEXT INDEX | 7 | desc2 | A | | | | YES | greptime-inverted-index-v1, greptime-fulltext-index-v1 | | | YES | | +| system_metrics | 1 | FULLTEXT INDEX | 8 | desc3 | A | | | | YES | greptime-fulltext-index-v1 | | | YES | | +| system_metrics | 1 | PRIMARY | 1 | host | A | | | | YES | greptime-inverted-index-v1 | | | YES | | +| system_metrics | 1 | PRIMARY, INVERTED INDEX, FULLTEXT INDEX | 2 | idc | A | | | | YES | greptime-inverted-index-v1, greptime-fulltext-index-v1 | | | YES | | +| system_metrics | 1 | TIME INDEX | 1 | ts | A | | | | NO | | | | YES | | ++----------------+------------+-----------------------------------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------------------------------------+---------+---------------+---------+------------+ SHOW INDEX FROM system_metrics like '%util%'; @@ -62,11 +72,11 @@ Error: 1001(Unsupported), SQL statement is not supported, keyword: like SHOW INDEX FROM system_metrics WHERE Key_name = 'TIME INDEX'; -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ -| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ -| system_metrics | 1 | TIME INDEX | 1 | ts | A | | | | NO | greptime-inverted-index-v1 | | | YES | | -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ ++----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+ +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | ++----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+ +| system_metrics | 1 | TIME INDEX | 1 | ts | A | | | | NO | | | | YES | | ++----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+ DROP TABLE system_metrics; diff --git a/tests/cases/standalone/common/show/show_index.sql b/tests/cases/standalone/common/show/show_index.sql index 3f804db384..f0c5894a0a 100644 --- a/tests/cases/standalone/common/show/show_index.sql +++ b/tests/cases/standalone/common/show/show_index.sql @@ -1,11 +1,15 @@ CREATE TABLE IF NOT EXISTS system_metrics ( host STRING, - idc STRING, + idc STRING FULLTEXT, cpu_util DOUBLE, memory_util DOUBLE, disk_util DOUBLE, + desc1 STRING, + desc2 STRING FULLTEXT, + desc3 STRING FULLTEXT, ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(host, idc), + INVERTED INDEX(idc, desc1, desc2), TIME INDEX(ts) ); From e662c241e62dcd88b5a51e28fbea969d2d7b5fb8 Mon Sep 17 00:00:00 2001 From: dennis zhuang Date: Wed, 18 Dec 2024 14:35:45 +0800 Subject: [PATCH 35/46] feat: impl label_join and label_replace for promql (#5153) * feat: impl label_join and label_replace for promql * chore: style * fix: dst_label is eqauls to src_label * fix: forgot to sort the results * fix: processing empty source label --- src/query/src/promql/planner.rs | 258 +++++++++++++++++- .../standalone/common/promql/label.result | 199 ++++++++++++++ .../cases/standalone/common/promql/label.sql | 55 ++++ 3 files changed, 508 insertions(+), 4 deletions(-) create mode 100644 tests/cases/standalone/common/promql/label.result create mode 100644 tests/cases/standalone/common/promql/label.sql diff --git a/src/query/src/promql/planner.rs b/src/query/src/promql/planner.rs index 001e41ca99..1e7bc27dab 100644 --- a/src/query/src/promql/planner.rs +++ b/src/query/src/promql/planner.rs @@ -1213,7 +1213,7 @@ impl PromPlanner { let quantile_expr = match other_input_exprs.pop_front() { Some(DfExpr::Literal(ScalarValue::Float64(Some(quantile)))) => quantile, other => UnexpectedPlanExprSnafu { - desc: format!("expect f64 literal as quantile, but found {:?}", other), + desc: format!("expected f64 literal as quantile, but found {:?}", other), } .fail()?, }; @@ -1224,7 +1224,7 @@ impl PromPlanner { Some(DfExpr::Literal(ScalarValue::Float64(Some(t)))) => t as i64, Some(DfExpr::Literal(ScalarValue::Int64(Some(t)))) => t, other => UnexpectedPlanExprSnafu { - desc: format!("expect i64 literal as t, but found {:?}", other), + desc: format!("expected i64 literal as t, but found {:?}", other), } .fail()?, }; @@ -1235,7 +1235,7 @@ impl PromPlanner { Some(DfExpr::Literal(ScalarValue::Float64(Some(sf)))) => sf, other => UnexpectedPlanExprSnafu { desc: format!( - "expect f64 literal as smoothing factor, but found {:?}", + "expected f64 literal as smoothing factor, but found {:?}", other ), } @@ -1244,7 +1244,10 @@ impl PromPlanner { let tf_exp = match other_input_exprs.pop_front() { Some(DfExpr::Literal(ScalarValue::Float64(Some(tf)))) => tf, other => UnexpectedPlanExprSnafu { - desc: format!("expect f64 literal as trend factor, but found {:?}", other), + desc: format!( + "expected f64 literal as trend factor, but found {:?}", + other + ), } .fail()?, }; @@ -1331,6 +1334,47 @@ impl PromPlanner { exprs.push(date_part_expr); ScalarFunc::GeneratedExpr } + + "label_join" => { + let (concat_expr, dst_label) = + Self::build_concat_labels_expr(&mut other_input_exprs, session_state)?; + + // Reserve the current field columns except the `dst_label`. + for value in &self.ctx.field_columns { + if *value != dst_label { + let expr = DfExpr::Column(Column::from_name(value)); + exprs.push(expr); + } + } + + // Remove it from tag columns + self.ctx.tag_columns.retain(|tag| *tag != dst_label); + + // Add the new label expr + exprs.push(concat_expr); + + ScalarFunc::GeneratedExpr + } + "label_replace" => { + let (replace_expr, dst_label) = + Self::build_regexp_replace_label_expr(&mut other_input_exprs, session_state)?; + + // Reserve the current field columns except the `dst_label`. + for value in &self.ctx.field_columns { + if *value != dst_label { + let expr = DfExpr::Column(Column::from_name(value)); + exprs.push(expr); + } + } + + // Remove it from tag columns + self.ctx.tag_columns.retain(|tag| *tag != dst_label); + + // Add the new label expr + exprs.push(replace_expr); + + ScalarFunc::GeneratedExpr + } _ => { if let Some(f) = session_state.scalar_functions().get(func.name) { ScalarFunc::DataFusionBuiltin(f.clone()) @@ -1411,6 +1455,7 @@ impl PromPlanner { // update value columns' name, and alias them to remove qualifiers let mut new_field_columns = Vec::with_capacity(exprs.len()); + exprs = exprs .into_iter() .map(|expr| { @@ -1420,11 +1465,146 @@ impl PromPlanner { }) .collect::, _>>() .context(DataFusionPlanningSnafu)?; + self.ctx.field_columns = new_field_columns; Ok(exprs) } + /// Build expr for `label_replace` function + fn build_regexp_replace_label_expr( + other_input_exprs: &mut VecDeque, + session_state: &SessionState, + ) -> Result<(DfExpr, String)> { + // label_replace(vector, dst_label, replacement, src_label, regex) + let dst_label = match other_input_exprs.pop_front() { + Some(DfExpr::Literal(ScalarValue::Utf8(Some(d)))) => d, + other => UnexpectedPlanExprSnafu { + desc: format!("expected dst_label string literal, but found {:?}", other), + } + .fail()?, + }; + let replacement = match other_input_exprs.pop_front() { + Some(DfExpr::Literal(ScalarValue::Utf8(Some(r)))) => r, + other => UnexpectedPlanExprSnafu { + desc: format!("expected replacement string literal, but found {:?}", other), + } + .fail()?, + }; + let src_label = match other_input_exprs.pop_front() { + Some(DfExpr::Literal(ScalarValue::Utf8(Some(s)))) => s, + other => UnexpectedPlanExprSnafu { + desc: format!("expected src_label string literal, but found {:?}", other), + } + .fail()?, + }; + let regex = match other_input_exprs.pop_front() { + Some(DfExpr::Literal(ScalarValue::Utf8(Some(r)))) => r, + other => UnexpectedPlanExprSnafu { + desc: format!("expected regex string literal, but found {:?}", other), + } + .fail()?, + }; + + let func = session_state + .scalar_functions() + .get("regexp_replace") + .context(UnsupportedExprSnafu { + name: "regexp_replace", + })?; + + // regexp_replace(src_label, regex, replacement) + let args = vec![ + if src_label.is_empty() { + DfExpr::Literal(ScalarValue::Null) + } else { + DfExpr::Column(Column::from_name(src_label)) + }, + DfExpr::Literal(ScalarValue::Utf8(Some(regex))), + DfExpr::Literal(ScalarValue::Utf8(Some(replacement))), + ]; + + Ok(( + DfExpr::ScalarFunction(ScalarFunction { + func: func.clone(), + args, + }) + .alias(&dst_label), + dst_label, + )) + } + + /// Build expr for `label_join` function + fn build_concat_labels_expr( + other_input_exprs: &mut VecDeque, + session_state: &SessionState, + ) -> Result<(DfExpr, String)> { + // label_join(vector, dst_label, separator, src_label_1, src_label_2, ...) + + let dst_label = match other_input_exprs.pop_front() { + Some(DfExpr::Literal(ScalarValue::Utf8(Some(d)))) => d, + other => UnexpectedPlanExprSnafu { + desc: format!("expected dst_label string literal, but found {:?}", other), + } + .fail()?, + }; + let separator = match other_input_exprs.pop_front() { + Some(DfExpr::Literal(ScalarValue::Utf8(Some(d)))) => d, + other => UnexpectedPlanExprSnafu { + desc: format!("expected separator string literal, but found {:?}", other), + } + .fail()?, + }; + let src_labels = other_input_exprs + .clone() + .into_iter() + .map(|expr| { + // Cast source label into column + match expr { + DfExpr::Literal(ScalarValue::Utf8(Some(label))) => { + if label.is_empty() { + Ok(DfExpr::Literal(ScalarValue::Null)) + } else { + Ok(DfExpr::Column(Column::from_name(label))) + } + } + other => UnexpectedPlanExprSnafu { + desc: format!( + "expected source label string literal, but found {:?}", + other + ), + } + .fail(), + } + }) + .collect::>>()?; + ensure!( + !src_labels.is_empty(), + FunctionInvalidArgumentSnafu { + fn_name: "label_join", + } + ); + + let func = session_state + .scalar_functions() + .get("concat_ws") + .context(UnsupportedExprSnafu { name: "concat_ws" })?; + + // concat_ws(separator, src_label_1, src_label_2, ...) as dst_label + let mut args = Vec::with_capacity(1 + src_labels.len()); + args.push(DfExpr::Literal(ScalarValue::Utf8(Some(separator)))); + args.extend(src_labels); + + Ok(( + DfExpr::ScalarFunction(ScalarFunction { + func: func.clone(), + args, + }) + .alias(&dst_label), + dst_label, + )) + } + fn create_time_index_column_expr(&self) -> Result { Ok(DfExpr::Column(Column::from_name( self.ctx @@ -3267,4 +3447,74 @@ mod test { \n TableScan: metrics [tag:Utf8, timestamp:Timestamp(Nanosecond, None), field:Float64;N]" ); } + + #[tokio::test] + async fn test_label_join() { + let prom_expr = parser::parse( + "label_join(up{tag_0='api-server'}, 'foo', ',', 'tag_1', 'tag_2', 'tag_3')", + ) + .unwrap(); + let eval_stmt = EvalStmt { + expr: prom_expr, + start: UNIX_EPOCH, + end: UNIX_EPOCH + .checked_add(Duration::from_secs(100_000)) + .unwrap(), + interval: Duration::from_secs(5), + lookback_delta: Duration::from_secs(1), + }; + + let table_provider = + build_test_table_provider(&[(DEFAULT_SCHEMA_NAME.to_string(), "up".to_string())], 4, 1) + .await; + let plan = PromPlanner::stmt_to_plan(table_provider, &eval_stmt, &build_session_state()) + .await + .unwrap(); + + let expected = r#"Filter: field_0 IS NOT NULL AND foo IS NOT NULL [timestamp:Timestamp(Millisecond, None), field_0:Float64;N, foo:Utf8;N, tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8] + Projection: up.timestamp, up.field_0 AS field_0, concat_ws(Utf8(","), up.tag_1, up.tag_2, up.tag_3) AS foo AS foo, up.tag_0, up.tag_1, up.tag_2, up.tag_3 [timestamp:Timestamp(Millisecond, None), field_0:Float64;N, foo:Utf8;N, tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8] + PromInstantManipulate: range=[0..100000000], lookback=[1000], interval=[5000], time index=[timestamp] [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + PromSeriesNormalize: offset=[0], time index=[timestamp], filter NaN: [false] [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + PromSeriesDivide: tags=["tag_0", "tag_1", "tag_2", "tag_3"] [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + Sort: up.tag_0 DESC NULLS LAST, up.tag_1 DESC NULLS LAST, up.tag_2 DESC NULLS LAST, up.tag_3 DESC NULLS LAST, up.timestamp DESC NULLS LAST [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + Filter: up.tag_0 = Utf8("api-server") AND up.timestamp >= TimestampMillisecond(-1000, None) AND up.timestamp <= TimestampMillisecond(100001000, None) [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + TableScan: up [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N]"#; + + assert_eq!(plan.display_indent_schema().to_string(), expected); + } + + #[tokio::test] + async fn test_label_replace() { + let prom_expr = parser::parse( + "label_replace(up{tag_0=\"a:c\"}, \"foo\", \"$1\", \"tag_0\", \"(.*):.*\")", + ) + .unwrap(); + let eval_stmt = EvalStmt { + expr: prom_expr, + start: UNIX_EPOCH, + end: UNIX_EPOCH + .checked_add(Duration::from_secs(100_000)) + .unwrap(), + interval: Duration::from_secs(5), + lookback_delta: Duration::from_secs(1), + }; + + let table_provider = + build_test_table_provider(&[(DEFAULT_SCHEMA_NAME.to_string(), "up".to_string())], 1, 1) + .await; + let plan = PromPlanner::stmt_to_plan(table_provider, &eval_stmt, &build_session_state()) + .await + .unwrap(); + + let expected = r#"Filter: field_0 IS NOT NULL AND foo IS NOT NULL [timestamp:Timestamp(Millisecond, None), field_0:Float64;N, foo:Utf8;N, tag_0:Utf8] + Projection: up.timestamp, up.field_0 AS field_0, regexp_replace(up.tag_0, Utf8("(.*):.*"), Utf8("$1")) AS foo AS foo, up.tag_0 [timestamp:Timestamp(Millisecond, None), field_0:Float64;N, foo:Utf8;N, tag_0:Utf8] + PromInstantManipulate: range=[0..100000000], lookback=[1000], interval=[5000], time index=[timestamp] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + PromSeriesNormalize: offset=[0], time index=[timestamp], filter NaN: [false] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + PromSeriesDivide: tags=["tag_0"] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + Sort: up.tag_0 DESC NULLS LAST, up.timestamp DESC NULLS LAST [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + Filter: up.tag_0 = Utf8("a:c") AND up.timestamp >= TimestampMillisecond(-1000, None) AND up.timestamp <= TimestampMillisecond(100001000, None) [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + TableScan: up [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N]"#; + + assert_eq!(plan.display_indent_schema().to_string(), expected); + } } diff --git a/tests/cases/standalone/common/promql/label.result b/tests/cases/standalone/common/promql/label.result new file mode 100644 index 0000000000..42ba33ca92 --- /dev/null +++ b/tests/cases/standalone/common/promql/label.result @@ -0,0 +1,199 @@ +CREATE TABLE test ( + ts timestamp(3) time index, + host STRING, + idc STRING, + val BIGINT, + PRIMARY KEY(host, idc), +); + +Affected Rows: 0 + +INSERT INTO TABLE test VALUES + (0, 'host1', 'idc1', 1), + (0, 'host2', 'idc1', 2), + (5000, 'host1', 'idc2:zone1',3), + (5000, 'host2', 'idc2',4), + (10000, 'host1', 'idc3:zone2',5), + (10000, 'host2', 'idc3',6), + (15000, 'host1', 'idc4:zone3',7), + (15000, 'host2', 'idc4',8); + +Affected Rows: 8 + +-- Missing source labels -- +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "new_host", "-"); + +Error: 1004(InvalidArguments), Invalid function argument for label_join + +-- dst_label is equal to source label -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "host", "-", "host"); + ++---------------------+-----+-------+------------+ +| ts | val | host | idc | ++---------------------+-----+-------+------------+ +| 1970-01-01T00:00:00 | 1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 3 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 1 | host1 | idc1 | +| 1970-01-01T00:00:10 | 3 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 5 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 1 | host1 | idc1 | +| 1970-01-01T00:00:15 | 3 | host1 | idc2:zone1 | +| 1970-01-01T00:00:15 | 5 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 7 | host1 | idc4:zone3 | ++---------------------+-----+-------+------------+ + +-- dst_label is in source labels -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "host", "-", "idc", "host"); + ++---------------------+-----+------------------+------------+ +| ts | val | host | idc | ++---------------------+-----+------------------+------------+ +| 1970-01-01T00:00:00 | 1 | idc1-host1 | idc1 | +| 1970-01-01T00:00:05 | 1 | idc1-host1 | idc1 | +| 1970-01-01T00:00:05 | 3 | idc2:zone1-host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 1 | idc1-host1 | idc1 | +| 1970-01-01T00:00:10 | 3 | idc2:zone1-host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 5 | idc3:zone2-host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 1 | idc1-host1 | idc1 | +| 1970-01-01T00:00:15 | 3 | idc2:zone1-host1 | idc2:zone1 | +| 1970-01-01T00:00:15 | 5 | idc3:zone2-host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 7 | idc4:zone3-host1 | idc4:zone3 | ++---------------------+-----+------------------+------------+ + +-- test the empty source label -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "host", "-", ""); + ++---------------------+-----+------+------------+ +| ts | val | host | idc | ++---------------------+-----+------+------------+ +| 1970-01-01T00:00:00 | 1 | | idc1 | +| 1970-01-01T00:00:05 | 1 | | idc1 | +| 1970-01-01T00:00:05 | 3 | | idc2:zone1 | +| 1970-01-01T00:00:10 | 1 | | idc1 | +| 1970-01-01T00:00:10 | 3 | | idc2:zone1 | +| 1970-01-01T00:00:10 | 5 | | idc3:zone2 | +| 1970-01-01T00:00:15 | 1 | | idc1 | +| 1970-01-01T00:00:15 | 3 | | idc2:zone1 | +| 1970-01-01T00:00:15 | 5 | | idc3:zone2 | +| 1970-01-01T00:00:15 | 7 | | idc4:zone3 | ++---------------------+-----+------+------------+ + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "new_host", "-", "idc", "host"); + ++---------------------+-----+------------------+-------+------------+ +| ts | val | new_host | host | idc | ++---------------------+-----+------------------+-------+------------+ +| 1970-01-01T00:00:00 | 1 | idc1-host1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 1 | idc1-host1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 3 | idc2:zone1-host1 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 1 | idc1-host1 | host1 | idc1 | +| 1970-01-01T00:00:10 | 3 | idc2:zone1-host1 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 5 | idc3:zone2-host1 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 1 | idc1-host1 | host1 | idc1 | +| 1970-01-01T00:00:15 | 3 | idc2:zone1-host1 | host1 | idc2:zone1 | +| 1970-01-01T00:00:15 | 5 | idc3:zone2-host1 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 7 | idc4:zone3-host1 | host1 | idc4:zone3 | ++---------------------+-----+------------------+-------+------------+ + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host1"}, "new_idc", "$2", "idc", "(.*):(.*)"); + ++---------------------+-----+---------+-------+------------+ +| ts | val | new_idc | host | idc | ++---------------------+-----+---------+-------+------------+ +| 1970-01-01T00:00:00 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 3 | zone1 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:10 | 3 | zone1 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 5 | zone2 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:15 | 3 | zone1 | host1 | idc2:zone1 | +| 1970-01-01T00:00:15 | 5 | zone2 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 7 | zone3 | host1 | idc4:zone3 | ++---------------------+-----+---------+-------+------------+ + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host1"}, "new_idc", "idc99", "idc", "idc2.*"); + ++---------------------+-----+------------+-------+------------+ +| ts | val | new_idc | host | idc | ++---------------------+-----+------------+-------+------------+ +| 1970-01-01T00:00:00 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 3 | idc99 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:10 | 3 | idc99 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 5 | idc3:zone2 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:15 | 3 | idc99 | host1 | idc2:zone1 | +| 1970-01-01T00:00:15 | 5 | idc3:zone2 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 7 | idc4:zone3 | host1 | idc4:zone3 | ++---------------------+-----+------------+-------+------------+ + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host2"}, "new_idc", "$2", "idc", "(.*):(.*)"); + ++---------------------+-----+---------+-------+------+ +| ts | val | new_idc | host | idc | ++---------------------+-----+---------+-------+------+ +| 1970-01-01T00:00:00 | 2 | idc1 | host2 | idc1 | +| 1970-01-01T00:00:05 | 2 | idc1 | host2 | idc1 | +| 1970-01-01T00:00:05 | 4 | idc2 | host2 | idc2 | +| 1970-01-01T00:00:10 | 2 | idc1 | host2 | idc1 | +| 1970-01-01T00:00:10 | 4 | idc2 | host2 | idc2 | +| 1970-01-01T00:00:10 | 6 | idc3 | host2 | idc3 | +| 1970-01-01T00:00:15 | 2 | idc1 | host2 | idc1 | +| 1970-01-01T00:00:15 | 4 | idc2 | host2 | idc2 | +| 1970-01-01T00:00:15 | 6 | idc3 | host2 | idc3 | +| 1970-01-01T00:00:15 | 8 | idc4 | host2 | idc4 | ++---------------------+-----+---------+-------+------+ + +-- dst_label is equal to source label -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host2"}, "idc", "$2", "idc", "(.*):(.*)"); + ++---------------------+-----+------+-------+ +| ts | val | idc | host | ++---------------------+-----+------+-------+ +| 1970-01-01T00:00:00 | 2 | idc1 | host2 | +| 1970-01-01T00:00:05 | 2 | idc1 | host2 | +| 1970-01-01T00:00:05 | 4 | idc2 | host2 | +| 1970-01-01T00:00:10 | 2 | idc1 | host2 | +| 1970-01-01T00:00:10 | 4 | idc2 | host2 | +| 1970-01-01T00:00:10 | 6 | idc3 | host2 | +| 1970-01-01T00:00:15 | 2 | idc1 | host2 | +| 1970-01-01T00:00:15 | 4 | idc2 | host2 | +| 1970-01-01T00:00:15 | 6 | idc3 | host2 | +| 1970-01-01T00:00:15 | 8 | idc4 | host2 | ++---------------------+-----+------+-------+ + +-- test the empty source label -- +-- TODO(dennis): we can't remove the label currently -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host2"}, "idc", "", "", ""); + ++---------------------+-----+-----+-------+ +| ts | val | idc | host | ++---------------------+-----+-----+-------+ +| 1970-01-01T00:00:00 | 2 | | host2 | +| 1970-01-01T00:00:05 | 2 | | host2 | +| 1970-01-01T00:00:05 | 4 | | host2 | +| 1970-01-01T00:00:10 | 2 | | host2 | +| 1970-01-01T00:00:10 | 4 | | host2 | +| 1970-01-01T00:00:10 | 6 | | host2 | +| 1970-01-01T00:00:15 | 2 | | host2 | +| 1970-01-01T00:00:15 | 4 | | host2 | +| 1970-01-01T00:00:15 | 6 | | host2 | +| 1970-01-01T00:00:15 | 8 | | host2 | ++---------------------+-----+-----+-------+ + +DROP TABLE test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/promql/label.sql b/tests/cases/standalone/common/promql/label.sql new file mode 100644 index 0000000000..3b9058c27e --- /dev/null +++ b/tests/cases/standalone/common/promql/label.sql @@ -0,0 +1,55 @@ +CREATE TABLE test ( + ts timestamp(3) time index, + host STRING, + idc STRING, + val BIGINT, + PRIMARY KEY(host, idc), +); + +INSERT INTO TABLE test VALUES + (0, 'host1', 'idc1', 1), + (0, 'host2', 'idc1', 2), + (5000, 'host1', 'idc2:zone1',3), + (5000, 'host2', 'idc2',4), + (10000, 'host1', 'idc3:zone2',5), + (10000, 'host2', 'idc3',6), + (15000, 'host1', 'idc4:zone3',7), + (15000, 'host2', 'idc4',8); + +-- Missing source labels -- +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "new_host", "-"); + +-- dst_label is equal to source label -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "host", "-", "host"); + +-- dst_label is in source labels -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "host", "-", "idc", "host"); + +-- test the empty source label -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "host", "-", ""); + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "new_host", "-", "idc", "host"); + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host1"}, "new_idc", "$2", "idc", "(.*):(.*)"); + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host1"}, "new_idc", "idc99", "idc", "idc2.*"); + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host2"}, "new_idc", "$2", "idc", "(.*):(.*)"); + +-- dst_label is equal to source label -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host2"}, "idc", "$2", "idc", "(.*):(.*)"); + +-- test the empty source label -- +-- TODO(dennis): we can't remove the label currently -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host2"}, "idc", "", "", ""); + +DROP TABLE test; From 58d6982c939b0b85e932ce9f1e4b879a4d2f288f Mon Sep 17 00:00:00 2001 From: Yingwen Date: Wed, 18 Dec 2024 14:37:22 +0800 Subject: [PATCH 36/46] feat: do not keep MemtableRefs in ScanInput (#5184) --- src/mito2/src/memtable.rs | 11 +++++- src/mito2/src/memtable/bulk.rs | 5 +-- src/mito2/src/memtable/partition_tree.rs | 10 +++-- src/mito2/src/memtable/time_series.rs | 9 +++-- src/mito2/src/read/range.rs | 50 +++++++++--------------- src/mito2/src/read/scan_region.rs | 30 +++++++++----- src/mito2/src/read/scan_util.rs | 3 +- src/mito2/src/read/seq_scan.rs | 1 - src/mito2/src/read/unordered_scan.rs | 1 - src/mito2/src/test_util/memtable_util.rs | 6 +-- 10 files changed, 66 insertions(+), 60 deletions(-) diff --git a/src/mito2/src/memtable.rs b/src/mito2/src/memtable.rs index f7d05c621f..6adc6eb96a 100644 --- a/src/mito2/src/memtable.rs +++ b/src/mito2/src/memtable.rs @@ -110,6 +110,15 @@ impl MemtableStats { pub type BoxedBatchIterator = Box> + Send>; +/// Ranges in a memtable. +#[derive(Default)] +pub struct MemtableRanges { + /// Range IDs and ranges. + pub ranges: BTreeMap, + /// Statistics of the memtable at the query time. + pub stats: MemtableStats, +} + /// In memory write buffer. pub trait Memtable: Send + Sync + fmt::Debug { /// Returns the id of this memtable. @@ -139,7 +148,7 @@ pub trait Memtable: Send + Sync + fmt::Debug { &self, projection: Option<&[ColumnId]>, predicate: Option, - ) -> BTreeMap; + ) -> MemtableRanges; /// Returns true if the memtable is empty. fn is_empty(&self) -> bool; diff --git a/src/mito2/src/memtable/bulk.rs b/src/mito2/src/memtable/bulk.rs index 46e757f3df..96e6c70acd 100644 --- a/src/mito2/src/memtable/bulk.rs +++ b/src/mito2/src/memtable/bulk.rs @@ -14,7 +14,6 @@ //! Memtable implementation for bulk load -use std::collections::BTreeMap; use std::sync::{Arc, RwLock}; use store_api::metadata::RegionMetadataRef; @@ -25,7 +24,7 @@ use crate::error::Result; use crate::memtable::bulk::part::BulkPart; use crate::memtable::key_values::KeyValue; use crate::memtable::{ - BoxedBatchIterator, KeyValues, Memtable, MemtableId, MemtableRange, MemtableRef, MemtableStats, + BoxedBatchIterator, KeyValues, Memtable, MemtableId, MemtableRanges, MemtableRef, MemtableStats, }; #[allow(unused)] @@ -68,7 +67,7 @@ impl Memtable for BulkMemtable { &self, _projection: Option<&[ColumnId]>, _predicate: Option, - ) -> BTreeMap { + ) -> MemtableRanges { todo!() } diff --git a/src/mito2/src/memtable/partition_tree.rs b/src/mito2/src/memtable/partition_tree.rs index 4c4b471643..1376f92331 100644 --- a/src/mito2/src/memtable/partition_tree.rs +++ b/src/mito2/src/memtable/partition_tree.rs @@ -23,7 +23,6 @@ mod shard; mod shard_builder; mod tree; -use std::collections::BTreeMap; use std::fmt; use std::sync::atomic::{AtomicI64, AtomicUsize, Ordering}; use std::sync::Arc; @@ -41,7 +40,7 @@ use crate::memtable::partition_tree::tree::PartitionTree; use crate::memtable::stats::WriteMetrics; use crate::memtable::{ AllocTracker, BoxedBatchIterator, BulkPart, IterBuilder, KeyValues, Memtable, MemtableBuilder, - MemtableId, MemtableRange, MemtableRangeContext, MemtableRef, MemtableStats, + MemtableId, MemtableRange, MemtableRangeContext, MemtableRanges, MemtableRef, MemtableStats, }; use crate::region::options::MergeMode; @@ -176,7 +175,7 @@ impl Memtable for PartitionTreeMemtable { &self, projection: Option<&[ColumnId]>, predicate: Option, - ) -> BTreeMap { + ) -> MemtableRanges { let projection = projection.map(|ids| ids.to_vec()); let builder = Box::new(PartitionTreeIterBuilder { tree: self.tree.clone(), @@ -185,7 +184,10 @@ impl Memtable for PartitionTreeMemtable { }); let context = Arc::new(MemtableRangeContext::new(self.id, builder)); - [(0, MemtableRange::new(context))].into() + MemtableRanges { + ranges: [(0, MemtableRange::new(context))].into(), + stats: self.stats(), + } } fn is_empty(&self) -> bool { diff --git a/src/mito2/src/memtable/time_series.rs b/src/mito2/src/memtable/time_series.rs index 4959c468b6..8ef6f44121 100644 --- a/src/mito2/src/memtable/time_series.rs +++ b/src/mito2/src/memtable/time_series.rs @@ -45,7 +45,7 @@ use crate::memtable::key_values::KeyValue; use crate::memtable::stats::WriteMetrics; use crate::memtable::{ AllocTracker, BoxedBatchIterator, BulkPart, IterBuilder, KeyValues, Memtable, MemtableBuilder, - MemtableId, MemtableRange, MemtableRangeContext, MemtableRef, MemtableStats, + MemtableId, MemtableRange, MemtableRangeContext, MemtableRanges, MemtableRef, MemtableStats, }; use crate::metrics::{READ_ROWS_TOTAL, READ_STAGE_ELAPSED}; use crate::read::dedup::LastNonNullIter; @@ -250,7 +250,7 @@ impl Memtable for TimeSeriesMemtable { &self, projection: Option<&[ColumnId]>, predicate: Option, - ) -> BTreeMap { + ) -> MemtableRanges { let projection = if let Some(projection) = projection { projection.iter().copied().collect() } else { @@ -268,7 +268,10 @@ impl Memtable for TimeSeriesMemtable { }); let context = Arc::new(MemtableRangeContext::new(self.id, builder)); - [(0, MemtableRange::new(context))].into() + MemtableRanges { + ranges: [(0, MemtableRange::new(context))].into(), + stats: self.stats(), + } } fn is_empty(&self) -> bool { diff --git a/src/mito2/src/read/range.rs b/src/mito2/src/read/range.rs index bdad5f8fef..1b29e196a2 100644 --- a/src/mito2/src/read/range.rs +++ b/src/mito2/src/read/range.rs @@ -24,7 +24,7 @@ use store_api::region_engine::PartitionRange; use crate::cache::CacheManager; use crate::error::Result; -use crate::memtable::{MemtableRange, MemtableRef}; +use crate::memtable::{MemtableRange, MemtableRanges, MemtableStats}; use crate::read::scan_region::ScanInput; use crate::sst::file::{overlaps, FileHandle, FileTimeRange}; use crate::sst::parquet::file_range::{FileRange, FileRangeContextRef}; @@ -175,7 +175,7 @@ impl RangeMeta { } } - fn push_unordered_mem_ranges(memtables: &[MemtableRef], ranges: &mut Vec) { + fn push_unordered_mem_ranges(memtables: &[MemRangeBuilder], ranges: &mut Vec) { // For append mode, we can parallelize reading memtables. for (memtable_index, memtable) in memtables.iter().enumerate() { let stats = memtable.stats(); @@ -270,7 +270,7 @@ impl RangeMeta { } } - fn push_seq_mem_ranges(memtables: &[MemtableRef], ranges: &mut Vec) { + fn push_seq_mem_ranges(memtables: &[MemRangeBuilder], ranges: &mut Vec) { // For non append-only mode, each range only contains one memtable by default. for (i, memtable) in memtables.iter().enumerate() { let stats = memtable.stats(); @@ -421,29 +421,38 @@ impl FileRangeBuilder { /// Builder to create mem ranges. pub(crate) struct MemRangeBuilder { /// Ranges of a memtable. - row_groups: BTreeMap, + ranges: MemtableRanges, } impl MemRangeBuilder { /// Builds a mem range builder from row groups. - pub(crate) fn new(row_groups: BTreeMap) -> Self { - Self { row_groups } + pub(crate) fn new(ranges: MemtableRanges) -> Self { + Self { ranges } } /// Builds mem ranges to read in the memtable. /// Negative `row_group_index` indicates all row groups. - fn build_ranges(&self, row_group_index: i64, ranges: &mut SmallVec<[MemtableRange; 2]>) { + pub(crate) fn build_ranges( + &self, + row_group_index: i64, + ranges: &mut SmallVec<[MemtableRange; 2]>, + ) { if row_group_index >= 0 { let row_group_index = row_group_index as usize; // Scans one row group. - let Some(range) = self.row_groups.get(&row_group_index) else { + let Some(range) = self.ranges.ranges.get(&row_group_index) else { return; }; ranges.push(range.clone()); } else { - ranges.extend(self.row_groups.values().cloned()); + ranges.extend(self.ranges.ranges.values().cloned()); } } + + /// Returns the statistics of the memtable. + pub(crate) fn stats(&self) -> &MemtableStats { + &self.ranges.stats + } } /// List to manages the builders to create file ranges. @@ -451,18 +460,15 @@ impl MemRangeBuilder { /// the list to different streams in the same partition. pub(crate) struct RangeBuilderList { num_memtables: usize, - mem_builders: Mutex>>, file_builders: Mutex>>>, } impl RangeBuilderList { /// Creates a new [ReaderBuilderList] with the given number of memtables and files. pub(crate) fn new(num_memtables: usize, num_files: usize) -> Self { - let mem_builders = (0..num_memtables).map(|_| None).collect(); let file_builders = (0..num_files).map(|_| None).collect(); Self { num_memtables, - mem_builders: Mutex::new(mem_builders), file_builders: Mutex::new(file_builders), } } @@ -488,26 +494,6 @@ impl RangeBuilderList { Ok(ranges) } - /// Builds mem ranges to read the row group at `index`. - pub(crate) fn build_mem_ranges( - &self, - input: &ScanInput, - index: RowGroupIndex, - ) -> SmallVec<[MemtableRange; 2]> { - let mut ranges = SmallVec::new(); - let mut mem_builders = self.mem_builders.lock().unwrap(); - match &mut mem_builders[index.index] { - Some(builder) => builder.build_ranges(index.row_group_index, &mut ranges), - None => { - let builder = input.prune_memtable(index.index); - builder.build_ranges(index.row_group_index, &mut ranges); - mem_builders[index.index] = Some(builder); - } - } - - ranges - } - fn get_file_builder(&self, index: usize) -> Option> { let file_builders = self.file_builders.lock().unwrap(); file_builders[index].clone() diff --git a/src/mito2/src/read/scan_region.rs b/src/mito2/src/read/scan_region.rs index 32b8c90cda..946ef28841 100644 --- a/src/mito2/src/read/scan_region.rs +++ b/src/mito2/src/read/scan_region.rs @@ -24,6 +24,7 @@ use common_recordbatch::SendableRecordBatchStream; use common_telemetry::{debug, error, tracing, warn}; use common_time::range::TimestampRange; use datafusion_expr::utils::expr_to_columns; +use smallvec::SmallVec; use store_api::region_engine::{PartitionRange, RegionScannerRef}; use store_api::storage::{ScanRequest, TimeSeriesRowSelector}; use table::predicate::{build_time_range_predicate, Predicate}; @@ -35,7 +36,7 @@ use crate::cache::file_cache::FileCacheRef; use crate::cache::CacheManagerRef; use crate::config::DEFAULT_SCAN_CHANNEL_SIZE; use crate::error::Result; -use crate::memtable::MemtableRef; +use crate::memtable::MemtableRange; use crate::metrics::READ_SST_COUNT; use crate::read::compat::{self, CompatBatch}; use crate::read::projection::ProjectionMapper; @@ -328,6 +329,14 @@ impl ScanRegion { Some(p) => ProjectionMapper::new(&self.version.metadata, p.iter().copied())?, None => ProjectionMapper::all(&self.version.metadata)?, }; + // Get memtable ranges to scan. + let memtables = memtables + .into_iter() + .map(|mem| { + let ranges = mem.ranges(Some(mapper.column_ids()), Some(predicate.clone())); + MemRangeBuilder::new(ranges) + }) + .collect(); let input = ScanInput::new(self.access_layer, mapper) .with_time_range(Some(time_range)) @@ -484,8 +493,8 @@ pub(crate) struct ScanInput { time_range: Option, /// Predicate to push down. pub(crate) predicate: Option, - /// Memtables to scan. - pub(crate) memtables: Vec, + /// Memtable range builders for memtables in the time range.. + pub(crate) memtables: Vec, /// Handles to SST files to scan. pub(crate) files: Vec, /// Cache. @@ -547,9 +556,9 @@ impl ScanInput { self } - /// Sets memtables to read. + /// Sets memtable range builders. #[must_use] - pub(crate) fn with_memtables(mut self, memtables: Vec) -> Self { + pub(crate) fn with_memtables(mut self, memtables: Vec) -> Self { self.memtables = memtables; self } @@ -667,11 +676,12 @@ impl ScanInput { Ok(sources) } - /// Prunes a memtable to scan and returns the builder to build readers. - pub(crate) fn prune_memtable(&self, mem_index: usize) -> MemRangeBuilder { - let memtable = &self.memtables[mem_index]; - let row_groups = memtable.ranges(Some(self.mapper.column_ids()), self.predicate.clone()); - MemRangeBuilder::new(row_groups) + /// Builds memtable ranges to scan by `index`. + pub(crate) fn build_mem_ranges(&self, index: RowGroupIndex) -> SmallVec<[MemtableRange; 2]> { + let memtable = &self.memtables[index.index]; + let mut ranges = SmallVec::new(); + memtable.build_ranges(index.row_group_index, &mut ranges); + ranges } /// Prunes a file to scan and returns the builder to build readers. diff --git a/src/mito2/src/read/scan_util.rs b/src/mito2/src/read/scan_util.rs index 0bdf62e77e..77a9bb1612 100644 --- a/src/mito2/src/read/scan_util.rs +++ b/src/mito2/src/read/scan_util.rs @@ -137,10 +137,9 @@ pub(crate) fn scan_mem_ranges( part_metrics: PartitionMetrics, index: RowGroupIndex, time_range: FileTimeRange, - range_builder_list: Arc, ) -> impl Stream> { try_stream! { - let ranges = range_builder_list.build_mem_ranges(&stream_ctx.input, index); + let ranges = stream_ctx.input.build_mem_ranges(index); part_metrics.inc_num_mem_ranges(ranges.len()); for range in ranges { let build_reader_start = Instant::now(); diff --git a/src/mito2/src/read/seq_scan.rs b/src/mito2/src/read/seq_scan.rs index bdf3a7d6b8..ca9291c0f6 100644 --- a/src/mito2/src/read/seq_scan.rs +++ b/src/mito2/src/read/seq_scan.rs @@ -403,7 +403,6 @@ fn build_sources( part_metrics.clone(), *index, range_meta.time_range, - range_builder_list.clone(), ); Box::pin(stream) as _ } else { diff --git a/src/mito2/src/read/unordered_scan.rs b/src/mito2/src/read/unordered_scan.rs index 60e5ca5c7c..28e7d64add 100644 --- a/src/mito2/src/read/unordered_scan.rs +++ b/src/mito2/src/read/unordered_scan.rs @@ -97,7 +97,6 @@ impl UnorderedScan { part_metrics.clone(), *index, range_meta.time_range, - range_builder_list.clone(), ); for await batch in stream { yield batch; diff --git a/src/mito2/src/test_util/memtable_util.rs b/src/mito2/src/test_util/memtable_util.rs index f1cc57aa3b..1a0eacecf8 100644 --- a/src/mito2/src/test_util/memtable_util.rs +++ b/src/mito2/src/test_util/memtable_util.rs @@ -35,7 +35,7 @@ use crate::memtable::key_values::KeyValue; use crate::memtable::partition_tree::data::{timestamp_array_to_i64_slice, DataBatch, DataBuffer}; use crate::memtable::{ BoxedBatchIterator, BulkPart, KeyValues, Memtable, MemtableBuilder, MemtableId, MemtableRange, - MemtableRef, MemtableStats, + MemtableRanges, MemtableRef, MemtableStats, }; use crate::row_converter::{McmpRowCodec, RowCodec, SortField}; @@ -93,8 +93,8 @@ impl Memtable for EmptyMemtable { &self, _projection: Option<&[ColumnId]>, _predicate: Option, - ) -> BTreeMap { - BTreeMap::new() + ) -> MemtableRanges { + MemtableRanges::default() } fn is_empty(&self) -> bool { From c6b7caa2ec246a59260e900710fde289e86f1021 Mon Sep 17 00:00:00 2001 From: Yingwen Date: Wed, 18 Dec 2024 14:39:49 +0800 Subject: [PATCH 37/46] feat: do not remove time filters in ScanRegion (#5180) * feat: do not remove time filters * chore: remove `time_range` from parquet reader * chore: print more message in the check script * chore: fix unused error --- scripts/check-snafu.py | 6 +- src/mito2/src/error.rs | 8 -- src/mito2/src/read/scan_region.rs | 11 +-- src/mito2/src/sst/parquet/reader.rs | 90 +------------------ src/query/src/tests/time_range_filter_test.rs | 4 +- src/table/src/predicate.rs | 16 ++-- 6 files changed, 17 insertions(+), 118 deletions(-) diff --git a/scripts/check-snafu.py b/scripts/check-snafu.py index d44edfeb8c..b91950692b 100644 --- a/scripts/check-snafu.py +++ b/scripts/check-snafu.py @@ -58,8 +58,10 @@ def main(): if not check_snafu_in_files(branch_name, other_rust_files) ] - for name in unused_snafu: - print(name) + if unused_snafu: + print("Unused error variants:") + for name in unused_snafu: + print(name) if unused_snafu: raise SystemExit(1) diff --git a/src/mito2/src/error.rs b/src/mito2/src/error.rs index f6d1dbafee..82b86a2155 100644 --- a/src/mito2/src/error.rs +++ b/src/mito2/src/error.rs @@ -756,13 +756,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Failed to build time range filters for value: {:?}", timestamp))] - BuildTimeRangeFilter { - timestamp: Timestamp, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Failed to open region"))] OpenRegion { #[snafu(implicit)] @@ -1023,7 +1016,6 @@ impl ErrorExt for Error { ChecksumMismatch { .. } => StatusCode::Unexpected, RegionStopped { .. } => StatusCode::RegionNotReady, TimeRangePredicateOverflow { .. } => StatusCode::InvalidArguments, - BuildTimeRangeFilter { .. } => StatusCode::Unexpected, UnsupportedOperation { .. } => StatusCode::Unsupported, RemoteCompaction { .. } => StatusCode::Unexpected, diff --git a/src/mito2/src/read/scan_region.rs b/src/mito2/src/read/scan_region.rs index 946ef28841..091b9bc48c 100644 --- a/src/mito2/src/read/scan_region.rs +++ b/src/mito2/src/read/scan_region.rs @@ -355,8 +355,8 @@ impl ScanRegion { Ok(input) } - /// Build time range predicate from filters, also remove time filters from request. - fn build_time_range_predicate(&mut self) -> TimestampRange { + /// Build time range predicate from filters. + fn build_time_range_predicate(&self) -> TimestampRange { let time_index = self.version.metadata.time_index_column(); let unit = time_index .column_schema @@ -364,11 +364,7 @@ impl ScanRegion { .as_timestamp() .expect("Time index must have timestamp-compatible type") .unit(); - build_time_range_predicate( - &time_index.column_schema.name, - unit, - &mut self.request.filters, - ) + build_time_range_predicate(&time_index.column_schema.name, unit, &self.request.filters) } /// Remove field filters if the merge mode is [MergeMode::LastNonNull]. @@ -695,7 +691,6 @@ impl ScanInput { .access_layer .read_sst(file.clone()) .predicate(self.predicate.clone()) - .time_range(self.time_range) .projection(Some(self.mapper.column_ids().to_vec())) .cache(self.cache_manager.clone()) .inverted_index_applier(self.inverted_index_applier.clone()) diff --git a/src/mito2/src/sst/parquet/reader.rs b/src/mito2/src/sst/parquet/reader.rs index 335b09426e..39153fce8d 100644 --- a/src/mito2/src/sst/parquet/reader.rs +++ b/src/mito2/src/sst/parquet/reader.rs @@ -23,11 +23,7 @@ use api::v1::SemanticType; use async_trait::async_trait; use common_recordbatch::filter::SimpleFilterEvaluator; use common_telemetry::{debug, warn}; -use common_time::range::TimestampRange; -use common_time::timestamp::TimeUnit; -use common_time::Timestamp; -use datafusion_common::ScalarValue; -use datafusion_expr::{Expr, Operator}; +use datafusion_expr::Expr; use datatypes::arrow::record_batch::RecordBatch; use datatypes::data_type::ConcreteDataType; use itertools::Itertools; @@ -42,7 +38,6 @@ use store_api::storage::ColumnId; use table::predicate::Predicate; use crate::cache::CacheManagerRef; -use crate::error; use crate::error::{ ArrowReaderSnafu, InvalidMetadataSnafu, InvalidParquetSnafu, ReadParquetSnafu, Result, }; @@ -74,8 +69,6 @@ pub struct ParquetReaderBuilder { object_store: ObjectStore, /// Predicate to push down. predicate: Option, - /// Time range to filter. - time_range: Option, /// Metadata of columns to read. /// /// `None` reads all columns. Due to schema change, the projection @@ -104,7 +97,6 @@ impl ParquetReaderBuilder { file_handle, object_store, predicate: None, - time_range: None, projection: None, cache_manager: None, inverted_index_applier: None, @@ -120,13 +112,6 @@ impl ParquetReaderBuilder { self } - /// Attaches the time range to the builder. - #[must_use] - pub fn time_range(mut self, time_range: Option) -> ParquetReaderBuilder { - self.time_range = time_range; - self - } - /// Attaches the projection to the builder. /// /// The reader only applies the projection to fields. @@ -238,7 +223,7 @@ impl ParquetReaderBuilder { cache_manager: self.cache_manager.clone(), }; - let mut filters = if let Some(predicate) = &self.predicate { + let filters = if let Some(predicate) = &self.predicate { predicate .exprs() .iter() @@ -254,10 +239,6 @@ impl ParquetReaderBuilder { vec![] }; - if let Some(time_range) = &self.time_range { - filters.extend(time_range_to_predicate(*time_range, ®ion_meta)?); - } - let codec = McmpRowCodec::new( read_format .metadata() @@ -678,59 +659,6 @@ impl ParquetReaderBuilder { } } -/// Transforms time range into [SimpleFilterEvaluator]. -fn time_range_to_predicate( - time_range: TimestampRange, - metadata: &RegionMetadataRef, -) -> Result> { - let ts_col = metadata.time_index_column(); - let ts_col_id = ts_col.column_id; - - let ts_to_filter = |op: Operator, timestamp: &Timestamp| { - let value = match timestamp.unit() { - TimeUnit::Second => ScalarValue::TimestampSecond(Some(timestamp.value()), None), - TimeUnit::Millisecond => { - ScalarValue::TimestampMillisecond(Some(timestamp.value()), None) - } - TimeUnit::Microsecond => { - ScalarValue::TimestampMicrosecond(Some(timestamp.value()), None) - } - TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(Some(timestamp.value()), None), - }; - let evaluator = SimpleFilterEvaluator::new(ts_col.column_schema.name.clone(), value, op) - .context(error::BuildTimeRangeFilterSnafu { - timestamp: *timestamp, - })?; - Ok(SimpleFilterContext::new( - evaluator, - ts_col_id, - SemanticType::Timestamp, - ts_col.column_schema.data_type.clone(), - )) - }; - - let predicates = match (time_range.start(), time_range.end()) { - (Some(start), Some(end)) => { - vec![ - ts_to_filter(Operator::GtEq, start)?, - ts_to_filter(Operator::Lt, end)?, - ] - } - - (Some(start), None) => { - vec![ts_to_filter(Operator::GtEq, start)?] - } - - (None, Some(end)) => { - vec![ts_to_filter(Operator::Lt, end)?] - } - (None, None) => { - vec![] - } - }; - Ok(predicates) -} - /// Metrics of filtering rows groups and rows. #[derive(Debug, Default, Clone, Copy)] pub(crate) struct ReaderFilterMetrics { @@ -939,20 +867,6 @@ pub(crate) struct SimpleFilterContext { } impl SimpleFilterContext { - fn new( - filter: SimpleFilterEvaluator, - column_id: ColumnId, - semantic_type: SemanticType, - data_type: ConcreteDataType, - ) -> Self { - Self { - filter, - column_id, - semantic_type, - data_type, - } - } - /// Creates a context for the `expr`. /// /// Returns None if the column to filter doesn't exist in the SST metadata or the diff --git a/src/query/src/tests/time_range_filter_test.rs b/src/query/src/tests/time_range_filter_test.rs index edb4042209..e141c99fa5 100644 --- a/src/query/src/tests/time_range_filter_test.rs +++ b/src/query/src/tests/time_range_filter_test.rs @@ -115,9 +115,9 @@ struct TimeRangeTester { impl TimeRangeTester { async fn check(&self, sql: &str, expect: TimestampRange) { let _ = exec_selection(self.engine.clone(), sql).await; - let mut filters = self.take_filters(); + let filters = self.take_filters(); - let range = build_time_range_predicate("ts", TimeUnit::Millisecond, &mut filters); + let range = build_time_range_predicate("ts", TimeUnit::Millisecond, &filters); assert_eq!(expect, range); } diff --git a/src/table/src/predicate.rs b/src/table/src/predicate.rs index 267f60b108..1fd5cdcbd3 100644 --- a/src/table/src/predicate.rs +++ b/src/table/src/predicate.rs @@ -135,21 +135,17 @@ impl Predicate { // since it requires query engine to convert sql to filters. /// `build_time_range_predicate` extracts time range from logical exprs to facilitate fast /// time range pruning. -pub fn build_time_range_predicate<'a>( - ts_col_name: &'a str, +pub fn build_time_range_predicate( + ts_col_name: &str, ts_col_unit: TimeUnit, - filters: &'a mut Vec, + filters: &[Expr], ) -> TimestampRange { let mut res = TimestampRange::min_to_max(); - let mut filters_remain = vec![]; - for expr in std::mem::take(filters) { - if let Some(range) = extract_time_range_from_expr(ts_col_name, ts_col_unit, &expr) { + for expr in filters { + if let Some(range) = extract_time_range_from_expr(ts_col_name, ts_col_unit, expr) { res = res.and(&range); - } else { - filters_remain.push(expr); } } - *filters = filters_remain; res } @@ -392,7 +388,7 @@ mod tests { fn check_build_predicate(expr: Expr, expect: TimestampRange) { assert_eq!( expect, - build_time_range_predicate("ts", TimeUnit::Millisecond, &mut vec![expr]) + build_time_range_predicate("ts", TimeUnit::Millisecond, &[expr]) ); } From 9b4e8555e256eedf1bf7f2b6c56b077a7ac4e405 Mon Sep 17 00:00:00 2001 From: jeremyhi Date: Wed, 18 Dec 2024 16:17:34 +0800 Subject: [PATCH 38/46] feat: extract hints from http header (#5128) * feat: extract hints from http header * Update src/servers/src/http/hints.rs Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com> * chore: by comment * refactor: get instead of loop --------- Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com> --- src/servers/src/grpc/database.rs | 57 +---------- src/servers/src/hint_headers.rs | 170 +++++++++++++++++++++++++++++++ src/servers/src/http.rs | 4 +- src/servers/src/http/hints.rs | 30 ++++++ src/servers/src/lib.rs | 1 + 5 files changed, 207 insertions(+), 55 deletions(-) create mode 100644 src/servers/src/hint_headers.rs create mode 100644 src/servers/src/http/hints.rs diff --git a/src/servers/src/grpc/database.rs b/src/servers/src/grpc/database.rs index 572f3c66f4..121d8c6c85 100644 --- a/src/servers/src/grpc/database.rs +++ b/src/servers/src/grpc/database.rs @@ -20,13 +20,11 @@ use common_error::status_code::StatusCode; use common_query::OutputData; use common_telemetry::{debug, warn}; use futures::StreamExt; -use tonic::metadata::{KeyAndValueRef, MetadataMap}; use tonic::{Request, Response, Status, Streaming}; use crate::grpc::greptime_handler::GreptimeRequestHandler; use crate::grpc::{cancellation, TonicResult}; - -pub const GREPTIME_DB_HEADER_HINT_PREFIX: &str = "x-greptime-hint-"; +use crate::hint_headers; pub(crate) struct DatabaseService { handler: GreptimeRequestHandler, @@ -45,7 +43,7 @@ impl GreptimeDatabase for DatabaseService { request: Request, ) -> TonicResult> { let remote_addr = request.remote_addr(); - let hints = extract_hints(request.metadata()); + let hints = hint_headers::extract_hints(request.metadata()); debug!( "GreptimeDatabase::Handle: request from {:?} with hints: {:?}", remote_addr, hints @@ -91,7 +89,7 @@ impl GreptimeDatabase for DatabaseService { request: Request>, ) -> Result, Status> { let remote_addr = request.remote_addr(); - let hints = extract_hints(request.metadata()); + let hints = hint_headers::extract_hints(request.metadata()); debug!( "GreptimeDatabase::HandleRequests: request from {:?} with hints: {:?}", remote_addr, hints @@ -142,52 +140,3 @@ impl GreptimeDatabase for DatabaseService { cancellation::with_cancellation_handler(request_future, cancellation_future).await } } - -fn extract_hints(metadata: &MetadataMap) -> Vec<(String, String)> { - metadata - .iter() - .filter_map(|kv| { - let KeyAndValueRef::Ascii(key, value) = kv else { - return None; - }; - let key = key.as_str(); - let new_key = key.strip_prefix(GREPTIME_DB_HEADER_HINT_PREFIX)?; - let Ok(value) = value.to_str() else { - // Simply return None for non-string values. - return None; - }; - Some((new_key.to_string(), value.trim().to_string())) - }) - .collect() -} - -#[cfg(test)] -mod tests { - use tonic::metadata::MetadataValue; - - use super::*; - - #[test] - fn test_extract_hints() { - let mut metadata = MetadataMap::new(); - let prev = metadata.insert( - "x-greptime-hint-append_mode", - MetadataValue::from_static("true"), - ); - metadata.insert("test-key", MetadataValue::from_static("test-value")); - assert!(prev.is_none()); - let hints = extract_hints(&metadata); - assert_eq!(hints, vec![("append_mode".to_string(), "true".to_string())]); - } - - #[test] - fn extract_hints_ignores_non_ascii_metadata() { - let mut metadata = MetadataMap::new(); - metadata.insert_bin( - "x-greptime-hint-merge_mode-bin", - MetadataValue::from_bytes(b"last_non_null"), - ); - let hints = extract_hints(&metadata); - assert!(hints.is_empty()); - } -} diff --git a/src/servers/src/hint_headers.rs b/src/servers/src/hint_headers.rs new file mode 100644 index 0000000000..6dafd45196 --- /dev/null +++ b/src/servers/src/hint_headers.rs @@ -0,0 +1,170 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use http::HeaderMap; +use tonic::metadata::MetadataMap; + +pub const HINT_KEYS: [&str; 5] = [ + "x-greptime-hint-auto_create_table", + "x-greptime-hint-ttl", + "x-greptime-hint-append_mode", + "x-greptime-hint-merge_mode", + "x-greptime-hint-physical_table", +]; + +pub(crate) fn extract_hints(headers: &T) -> Vec<(String, String)> { + let mut hints = Vec::new(); + for key in HINT_KEYS.iter() { + if let Some(value) = headers.get(key) { + let new_key = key.replace("x-greptime-hint-", ""); + hints.push((new_key, value.trim().to_string())); + } + } + hints +} + +pub(crate) trait ToHeaderMap { + fn get(&self, key: &str) -> Option<&str>; +} + +impl ToHeaderMap for MetadataMap { + fn get(&self, key: &str) -> Option<&str> { + self.get(key).and_then(|v| v.to_str().ok()) + } +} + +impl ToHeaderMap for HeaderMap { + fn get(&self, key: &str) -> Option<&str> { + self.get(key).and_then(|v| v.to_str().ok()) + } +} +#[cfg(test)] +mod tests { + use http::header::{HeaderMap, HeaderValue}; + use tonic::metadata::{MetadataMap, MetadataValue}; + + use super::*; + + #[test] + fn test_extract_hints_with_full_header_map() { + let mut headers = HeaderMap::new(); + headers.insert( + "x-greptime-hint-auto_create_table", + HeaderValue::from_static("true"), + ); + headers.insert("x-greptime-hint-ttl", HeaderValue::from_static("3600d")); + headers.insert( + "x-greptime-hint-append_mode", + HeaderValue::from_static("true"), + ); + headers.insert( + "x-greptime-hint-merge_mode", + HeaderValue::from_static("false"), + ); + headers.insert( + "x-greptime-hint-physical_table", + HeaderValue::from_static("table1"), + ); + + let hints = extract_hints(&headers); + + assert_eq!(hints.len(), 5); + assert_eq!( + hints[0], + ("auto_create_table".to_string(), "true".to_string()) + ); + assert_eq!(hints[1], ("ttl".to_string(), "3600d".to_string())); + assert_eq!(hints[2], ("append_mode".to_string(), "true".to_string())); + assert_eq!(hints[3], ("merge_mode".to_string(), "false".to_string())); + assert_eq!( + hints[4], + ("physical_table".to_string(), "table1".to_string()) + ); + } + + #[test] + fn test_extract_hints_with_missing_keys() { + let mut headers = HeaderMap::new(); + headers.insert( + "x-greptime-hint-auto_create_table", + HeaderValue::from_static("true"), + ); + headers.insert("x-greptime-hint-ttl", HeaderValue::from_static("3600d")); + + let hints = extract_hints(&headers); + + assert_eq!(hints.len(), 2); + assert_eq!( + hints[0], + ("auto_create_table".to_string(), "true".to_string()) + ); + assert_eq!(hints[1], ("ttl".to_string(), "3600d".to_string())); + } + + #[test] + fn test_extract_hints_with_metadata_map() { + let mut metadata = MetadataMap::new(); + metadata.insert( + "x-greptime-hint-auto_create_table", + MetadataValue::from_static("true"), + ); + metadata.insert("x-greptime-hint-ttl", MetadataValue::from_static("3600d")); + metadata.insert( + "x-greptime-hint-append_mode", + MetadataValue::from_static("true"), + ); + metadata.insert( + "x-greptime-hint-merge_mode", + MetadataValue::from_static("false"), + ); + metadata.insert( + "x-greptime-hint-physical_table", + MetadataValue::from_static("table1"), + ); + + let hints = extract_hints(&metadata); + + assert_eq!(hints.len(), 5); + assert_eq!( + hints[0], + ("auto_create_table".to_string(), "true".to_string()) + ); + assert_eq!(hints[1], ("ttl".to_string(), "3600d".to_string())); + assert_eq!(hints[2], ("append_mode".to_string(), "true".to_string())); + assert_eq!(hints[3], ("merge_mode".to_string(), "false".to_string())); + assert_eq!( + hints[4], + ("physical_table".to_string(), "table1".to_string()) + ); + } + + #[test] + fn test_extract_hints_with_partial_metadata_map() { + let mut metadata = MetadataMap::new(); + metadata.insert( + "x-greptime-hint-auto_create_table", + MetadataValue::from_static("true"), + ); + metadata.insert("x-greptime-hint-ttl", MetadataValue::from_static("3600d")); + + let hints = extract_hints(&metadata); + + assert_eq!(hints.len(), 2); + assert_eq!( + hints[0], + ("auto_create_table".to_string(), "true".to_string()) + ); + assert_eq!(hints[1], ("ttl".to_string(), "3600d".to_string())); + } +} diff --git a/src/servers/src/http.rs b/src/servers/src/http.rs index 1107870c9a..9841f02d6e 100644 --- a/src/servers/src/http.rs +++ b/src/servers/src/http.rs @@ -92,6 +92,7 @@ mod timeout; pub(crate) use timeout::DynamicTimeoutLayer; +mod hints; #[cfg(any(test, feature = "testing"))] pub mod test_helpers; @@ -703,7 +704,8 @@ impl HttpServer { .layer(middleware::from_fn_with_state( AuthState::new(self.user_provider.clone()), authorize::check_http_auth, - )), + )) + .layer(middleware::from_fn(hints::extract_hints)), ) // Handlers for debug, we don't expect a timeout. .nest( diff --git a/src/servers/src/http/hints.rs b/src/servers/src/http/hints.rs new file mode 100644 index 0000000000..4612201880 --- /dev/null +++ b/src/servers/src/http/hints.rs @@ -0,0 +1,30 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use axum::http::Request; +use axum::middleware::Next; +use axum::response::Response; +use session::context::QueryContext; + +use crate::hint_headers; + +pub async fn extract_hints(mut request: Request, next: Next) -> Response { + let hints = hint_headers::extract_hints(request.headers()); + if let Some(query_ctx) = request.extensions_mut().get_mut::() { + for (key, value) in hints { + query_ctx.set_extension(key, value); + } + } + next.run(request).await +} diff --git a/src/servers/src/lib.rs b/src/servers/src/lib.rs index ce6857c6d2..92f2b8b9d0 100644 --- a/src/servers/src/lib.rs +++ b/src/servers/src/lib.rs @@ -27,6 +27,7 @@ pub mod error; pub mod export_metrics; pub mod grpc; pub mod heartbeat_options; +mod hint_headers; pub mod http; pub mod influxdb; pub mod interceptor; From fa773cf48031e4da6df6997cb8aa572dad3f896b Mon Sep 17 00:00:00 2001 From: "Lei, HUANG" <6406592+v0y4g3r@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:24:15 +0800 Subject: [PATCH 39/46] fix(sqlness): enforce order in union tests (#5190) Add ORDER BY clause to subquery union tests Updated the SQL and result files for subquery union tests to include an ORDER BY clause, ensuring consistent result ordering. This change aligns with the test case from the DuckDB repository. --- tests/cases/standalone/common/subquery/table.result | 2 +- tests/cases/standalone/common/subquery/table.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/cases/standalone/common/subquery/table.result b/tests/cases/standalone/common/subquery/table.result index 8cea3aed13..549a387549 100644 --- a/tests/cases/standalone/common/subquery/table.result +++ b/tests/cases/standalone/common/subquery/table.result @@ -63,7 +63,7 @@ Affected Rows: 0 -- subquery union, from: -- https://github.com/duckdb/duckdb/blob/9196dd9b0a163e6c8aada26218803d04be30c562/test/sql/subquery/table/test_subquery_union.test -SELECT * FROM (SELECT 42) UNION ALL SELECT * FROM (SELECT 43); +SELECT * FROM (SELECT 42) UNION ALL SELECT * FROM (SELECT 43) ORDER BY 1; +-----------+ | Int64(42) | diff --git a/tests/cases/standalone/common/subquery/table.sql b/tests/cases/standalone/common/subquery/table.sql index 9f53aef301..d49f3af739 100644 --- a/tests/cases/standalone/common/subquery/table.sql +++ b/tests/cases/standalone/common/subquery/table.sql @@ -24,7 +24,7 @@ DROP TABLE test; -- subquery union, from: -- https://github.com/duckdb/duckdb/blob/9196dd9b0a163e6c8aada26218803d04be30c562/test/sql/subquery/table/test_subquery_union.test -SELECT * FROM (SELECT 42) UNION ALL SELECT * FROM (SELECT 43); +SELECT * FROM (SELECT 42) UNION ALL SELECT * FROM (SELECT 43) ORDER BY 1; -- table subquery, from: -- https://github.com/duckdb/duckdb/blob/8704c7d0807d6ce1e2ebcdf6398e1b6cc050e507/test/sql/subquery/table/test_table_subquery.test From f04d3802598aa049fdd93900e180df2dececf7e4 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Wed, 18 Dec 2024 16:51:46 +0800 Subject: [PATCH 40/46] fix: validate matcher op for __name__ in promql (#5191) Signed-off-by: Ruihang Xia --- src/query/src/promql/planner.rs | 7 +++++++ tests/cases/standalone/common/tql/basic.result | 4 ++++ tests/cases/standalone/common/tql/basic.sql | 2 ++ 3 files changed, 13 insertions(+) diff --git a/src/query/src/promql/planner.rs b/src/query/src/promql/planner.rs index 1e7bc27dab..bfdfb5981a 100644 --- a/src/query/src/promql/planner.rs +++ b/src/query/src/promql/planner.rs @@ -689,6 +689,13 @@ impl PromPlanner { let mut matches = label_matchers.find_matchers(METRIC_NAME); ensure!(!matches.is_empty(), NoMetricMatcherSnafu); ensure!(matches.len() == 1, MultipleMetricMatchersSnafu); + ensure!( + matches[0].op == MatchOp::Equal, + UnsupportedMatcherOpSnafu { + matcher_op: matches[0].op.to_string(), + matcher: METRIC_NAME + } + ); metric_name = matches.pop().map(|m| m.value); } diff --git a/tests/cases/standalone/common/tql/basic.result b/tests/cases/standalone/common/tql/basic.result index 5c6725dbcd..3015101a55 100644 --- a/tests/cases/standalone/common/tql/basic.result +++ b/tests/cases/standalone/common/tql/basic.result @@ -66,6 +66,10 @@ TQL EVAL (0, 10, '5s') {__name__!="test"}; Error: 2000(InvalidSyntax), vector selector must contain at least one non-empty matcher +TQL EVAL (0, 10, '5s') {__name__=~"test"}; + +Error: 1004(InvalidArguments), Matcher operator =~ is not supported for __name__ + -- the point at 1ms will be shadowed by the point at 2ms TQL EVAL (0, 10, '5s') test{k="a"}; diff --git a/tests/cases/standalone/common/tql/basic.sql b/tests/cases/standalone/common/tql/basic.sql index 85f2948148..afca586ed8 100644 --- a/tests/cases/standalone/common/tql/basic.sql +++ b/tests/cases/standalone/common/tql/basic.sql @@ -22,6 +22,8 @@ TQL EVAL (0, 10, '5s') {__name__="test", __field__="i"}; -- NOT SUPPORTED: `__name__` matcher without equal condition TQL EVAL (0, 10, '5s') {__name__!="test"}; +TQL EVAL (0, 10, '5s') {__name__=~"test"}; + -- the point at 1ms will be shadowed by the point at 2ms TQL EVAL (0, 10, '5s') test{k="a"}; From 218236cc5b2b444346431263d35629715df4b155 Mon Sep 17 00:00:00 2001 From: Yingwen Date: Wed, 18 Dec 2024 17:10:56 +0800 Subject: [PATCH 41/46] docs: fix grafana dashboard row (#5192) --- grafana/greptimedb.json | 2838 ++++++++++++++++++++------------------- 1 file changed, 1420 insertions(+), 1418 deletions(-) diff --git a/grafana/greptimedb.json b/grafana/greptimedb.json index c526373874..9657565c27 100644 --- a/grafana/greptimedb.json +++ b/grafana/greptimedb.json @@ -2707,754 +2707,755 @@ "y": 48 }, "id": 21, - "panels": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 18, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "rate(opendal_bytes_total_sum[$__rate_interval])", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{scheme}}-{{operation}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "OpenDAL traffic", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 14 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.95, sum by(le, operation, schema) (rate(opendal_requests_duration_seconds_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "OpenDAL operation duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 21 + }, + "id": 43, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "greptime_object_store_lru_cache_bytes", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}-{{type}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Object store read cache size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 21 + }, + "id": 44, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance) / (sum(increase(greptime_object_store_lru_cache_miss[$__rate_interval])) by (instance) + sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Object store read cache hit", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 10, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{logstore}}-{{optype}}-p95", + "range": true, + "refId": "Log Store P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{logstore}}-{{optype}}-p99", + "range": true, + "refId": "Log Store P99" + } + ], + "title": "Log Store op duration seconds", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 28 + }, + "id": 12, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "req-size-p95", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "req-size-p99", + "range": true, + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "rate(raft_engine_write_size_sum[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "throughput", + "range": true, + "refId": "B" + } + ], + "title": "WAL write size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 35 + }, + "id": 37, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, type, node) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{node}}-{{type}}-p99", + "range": true, + "refId": "Log Store P95" + } + ], + "title": "WAL sync duration seconds", + "type": "timeseries" + } + ], "title": "Storage Components", "type": "row" }, { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 49 - }, - "id": 18, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "rate(opendal_bytes_total_sum[$__rate_interval])", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{scheme}}-{{operation}}", - "range": true, - "refId": "B", - "useBackend": false - } - ], - "title": "OpenDAL traffic", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 49 - }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le, operation, schema) (rate(opendal_requests_duration_seconds_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "OpenDAL operation duration", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 56 - }, - "id": 43, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "greptime_object_store_lru_cache_bytes", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{instance}}-{{type}}", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Object store read cache size", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 56 - }, - "id": 44, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance) / (sum(increase(greptime_object_store_lru_cache_miss[$__rate_interval])) by (instance) + sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{instance}}", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Object store read cache hit", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 63 - }, - "id": 10, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "{{logstore}}-{{optype}}-p95", - "range": true, - "refId": "Log Store P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "{{logstore}}-{{optype}}-p99", - "range": true, - "refId": "Log Store P99" - } - ], - "title": "Log Store op duration seconds", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 63 - }, - "id": 12, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "req-size-p95", - "range": true, - "refId": "A", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "req-size-p99", - "range": true, - "refId": "C", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "rate(raft_engine_write_size_sum[$__rate_interval])", - "hide": false, - "instant": false, - "legendFormat": "throughput", - "range": true, - "refId": "B" - } - ], - "title": "WAL write size", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 70 - }, - "id": 37, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, type, node) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "{{node}}-{{type}}-p99", - "range": true, - "refId": "Log Store P95" - } - ], - "title": "WAL sync duration seconds", - "type": "timeseries" - }, - { - "collapsed": false, + "collapsed": true, "gridPos": { "h": 1, "w": 24, @@ -3462,681 +3463,682 @@ "y": 49 }, "id": 46, - "panels": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 45, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "greptime_index_create_memory_usage", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}-{{type}}", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "greptime_index_apply_memory_usage", + "hide": false, + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "B" + } + ], + "title": "Index memory usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 19, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, type) (rate(greptime_index_apply_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "apply-{{type}}-p95", + "range": true, + "refId": "Apply P95", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, type) (rate(greptime_index_apply_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "apply-{{type}}-p95", + "range": true, + "refId": "Apply P99", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, type) (rate(greptime_index_create_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "create-{{type}}-p95", + "range": true, + "refId": "Create P95", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, type) (rate(greptime_index_create_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "create-{{type}}-p95", + "range": true, + "refId": "Create P99", + "useBackend": false + } + ], + "title": "Index elapsed", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 22 + }, + "id": 47, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "rate(greptime_index_create_rows_total[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{type}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Index create rows total", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 22 + }, + "id": 48, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(instance, type) (rate(greptime_index_create_bytes_total[$__rate_interval]))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}-{{type}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Index create bytes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 29 + }, + "id": 49, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(instance, type, file_type) (rate(greptime_index_io_bytes_total[$__rate_interval]))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}-{{type}}-{{file_type}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Index IO bytes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 29 + }, + "id": 50, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(instance, type, file_type) (rate(greptime_index_io_op_total[$__rate_interval]))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}-{{type}}-{{file_type}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Index IO op", + "type": "timeseries" + } + ], "title": "Index", "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 50 - }, - "id": 45, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "greptime_index_create_memory_usage", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{instance}}-{{type}}", - "range": true, - "refId": "A", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "greptime_index_apply_memory_usage", - "hide": false, - "instant": false, - "legendFormat": "{{instance}}", - "range": true, - "refId": "B" - } - ], - "title": "Index memory usage", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 50 - }, - "id": 19, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, type) (rate(greptime_index_apply_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "apply-{{type}}-p95", - "range": true, - "refId": "Apply P95", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, type) (rate(greptime_index_apply_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "apply-{{type}}-p95", - "range": true, - "refId": "Apply P99", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, type) (rate(greptime_index_create_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "create-{{type}}-p95", - "range": true, - "refId": "Create P95", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, type) (rate(greptime_index_create_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "create-{{type}}-p95", - "range": true, - "refId": "Create P99", - "useBackend": false - } - ], - "title": "Index elapsed", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 57 - }, - "id": 47, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "rate(greptime_index_create_rows_total[$__rate_interval])", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{type}}", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Index create rows total", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 57 - }, - "id": 48, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "sum by(instance, type) (rate(greptime_index_create_bytes_total[$__rate_interval]))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{instance}}-{{type}}", - "range": true, - "refId": "B", - "useBackend": false - } - ], - "title": "Index create bytes", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 64 - }, - "id": 49, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "sum by(instance, type, file_type) (rate(greptime_index_io_bytes_total[$__rate_interval]))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{instance}}-{{type}}-{{file_type}}", - "range": true, - "refId": "B", - "useBackend": false - } - ], - "title": "Index IO bytes", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 64 - }, - "id": 50, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "sum by(instance, type, file_type) (rate(greptime_index_io_op_total[$__rate_interval]))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{instance}}-{{type}}-{{file_type}}", - "range": true, - "refId": "B", - "useBackend": false - } - ], - "title": "Index IO op", - "type": "timeseries" } ], "refresh": "10s", @@ -4153,6 +4155,6 @@ "timezone": "", "title": "GreptimeDB", "uid": "e7097237-669b-4f8d-b751-13067afbfb68", - "version": 17, + "version": 18, "weekStart": "" } From 548e1988ab4256fb6443e9e800f36603ec29a4fb Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Wed, 18 Dec 2024 19:24:43 +0800 Subject: [PATCH 42/46] refactor: remove unused symbols (#5193) chore: remove unused symbols Signed-off-by: Ruihang Xia --- src/cmd/src/datanode.rs | 4 -- src/cmd/src/flownode.rs | 4 -- .../meta/src/cache/table/table_route.rs | 8 --- src/common/meta/src/key/table_route.rs | 53 ------------------- src/common/meta/src/rpc/router.rs | 46 ---------------- src/common/recordbatch/src/lib.rs | 14 ----- src/common/time/src/util.rs | 4 -- src/flow/src/adapter.rs | 22 -------- src/flow/src/compute/render/src_sink.rs | 44 +-------------- src/flow/src/compute/types.rs | 16 ------ src/flow/src/expr/error.rs | 5 -- src/flow/src/expr/linear.rs | 28 ---------- src/flow/src/plan.rs | 46 +--------------- src/flow/src/repr/relation.rs | 8 --- src/meta-srv/src/metasrv.rs | 4 -- src/meta-srv/src/mocks.rs | 5 -- .../src/procedure/region_migration.rs | 6 --- src/script/src/python/ffi_types/copr.rs | 20 ------- src/session/src/lib.rs | 4 -- src/sql/src/statements.rs | 20 +------ 20 files changed, 4 insertions(+), 357 deletions(-) diff --git a/src/cmd/src/datanode.rs b/src/cmd/src/datanode.rs index 811ed826ad..be2aedf57e 100644 --- a/src/cmd/src/datanode.rs +++ b/src/cmd/src/datanode.rs @@ -59,10 +59,6 @@ impl Instance { } } - pub fn datanode_mut(&mut self) -> &mut Datanode { - &mut self.datanode - } - pub fn datanode(&self) -> &Datanode { &self.datanode } diff --git a/src/cmd/src/flownode.rs b/src/cmd/src/flownode.rs index a9ad12bfbc..b399bf37f7 100644 --- a/src/cmd/src/flownode.rs +++ b/src/cmd/src/flownode.rs @@ -63,10 +63,6 @@ impl Instance { } } - pub fn flownode_mut(&mut self) -> &mut FlownodeInstance { - &mut self.flownode - } - pub fn flownode(&self) -> &FlownodeInstance { &self.flownode } diff --git a/src/common/meta/src/cache/table/table_route.rs b/src/common/meta/src/cache/table/table_route.rs index 2383a1ea13..840e52f8ae 100644 --- a/src/common/meta/src/cache/table/table_route.rs +++ b/src/common/meta/src/cache/table/table_route.rs @@ -49,14 +49,6 @@ impl TableRoute { TableRoute::Logical(_) => None, } } - - /// Returns [LogicalTableRouteValue] reference if it's [TableRoute::Logical]; Otherwise it returns [None]. - pub fn as_logical_table_route_ref(&self) -> Option<&Arc> { - match self { - TableRoute::Physical(_) => None, - TableRoute::Logical(table_route) => Some(table_route), - } - } } /// [TableRouteCache] caches the [TableId] to [TableRoute] mapping. diff --git a/src/common/meta/src/key/table_route.rs b/src/common/meta/src/key/table_route.rs index 96949d2b9f..b5ebf0b4b1 100644 --- a/src/common/meta/src/key/table_route.rs +++ b/src/common/meta/src/key/table_route.rs @@ -290,28 +290,6 @@ impl TableRouteManager { } } - /// Returns the [`PhysicalTableRouteValue`] in the first level, - /// It won't follow the [`LogicalTableRouteValue`] to find the next level [`PhysicalTableRouteValue`]. - /// - /// Returns an error if the first level value is not a [`PhysicalTableRouteValue`]. - pub async fn try_get_physical_table_route( - &self, - table_id: TableId, - ) -> Result> { - match self.storage.get(table_id).await? { - Some(route) => { - ensure!( - route.is_physical(), - UnexpectedLogicalRouteTableSnafu { - err_msg: format!("{route:?} is a non-physical TableRouteValue.") - } - ); - Ok(Some(route.into_physical_table_route())) - } - None => Ok(None), - } - } - /// Returns the [TableId] recursively. /// /// Returns a [TableRouteNotFound](crate::error::Error::TableRouteNotFound) Error if: @@ -569,37 +547,6 @@ impl TableRouteStorage { .transpose() } - /// Returns the physical `DeserializedValueWithBytes` recursively. - /// - /// Returns a [TableRouteNotFound](crate::error::Error::TableRouteNotFound) Error if: - /// - the physical table(`logical_or_physical_table_id`) does not exist - /// - the corresponding physical table of the logical table(`logical_or_physical_table_id`) does not exist. - pub async fn get_physical_table_route_with_raw_bytes( - &self, - logical_or_physical_table_id: TableId, - ) -> Result<(TableId, DeserializedValueWithBytes)> { - let table_route = self - .get_with_raw_bytes(logical_or_physical_table_id) - .await? - .context(TableRouteNotFoundSnafu { - table_id: logical_or_physical_table_id, - })?; - - match table_route.get_inner_ref() { - TableRouteValue::Physical(_) => Ok((logical_or_physical_table_id, table_route)), - TableRouteValue::Logical(x) => { - let physical_table_id = x.physical_table_id(); - let physical_table_route = self - .get_with_raw_bytes(physical_table_id) - .await? - .context(TableRouteNotFoundSnafu { - table_id: physical_table_id, - })?; - Ok((physical_table_id, physical_table_route)) - } - } - } - /// Returns batch of [`TableRouteValue`] that respects the order of `table_ids`. pub async fn batch_get(&self, table_ids: &[TableId]) -> Result>> { let mut table_routes = self.batch_get_inner(table_ids).await?; diff --git a/src/common/meta/src/rpc/router.rs b/src/common/meta/src/rpc/router.rs index dd7349ae8f..0e700cc6da 100644 --- a/src/common/meta/src/rpc/router.rs +++ b/src/common/meta/src/rpc/router.rs @@ -89,39 +89,6 @@ pub fn convert_to_region_leader_map(region_routes: &[RegionRoute]) -> HashMap>() } -/// Returns the HashMap<[RegionNumber], HashSet> -pub fn convert_to_region_peer_map( - region_routes: &[RegionRoute], -) -> HashMap> { - region_routes - .iter() - .map(|x| { - let set = x - .follower_peers - .iter() - .map(|p| p.id) - .chain(x.leader_peer.as_ref().map(|p| p.id)) - .collect::>(); - - (x.region.id.region_number(), set) - }) - .collect::>() -} - -/// Returns the HashMap<[RegionNumber], [LeaderState]>; -pub fn convert_to_region_leader_state_map( - region_routes: &[RegionRoute], -) -> HashMap { - region_routes - .iter() - .filter_map(|x| { - x.leader_state - .as_ref() - .map(|state| (x.region.id.region_number(), *state)) - }) - .collect::>() -} - pub fn find_region_leader( region_routes: &[RegionRoute], region_number: RegionNumber, @@ -147,19 +114,6 @@ pub fn find_leader_regions(region_routes: &[RegionRoute], datanode: &Peer) -> Ve .collect() } -pub fn extract_all_peers(region_routes: &[RegionRoute]) -> Vec { - let mut peers = region_routes - .iter() - .flat_map(|x| x.leader_peer.iter().chain(x.follower_peers.iter())) - .collect::>() - .into_iter() - .cloned() - .collect::>(); - peers.sort_by_key(|x| x.id); - - peers -} - impl TableRoute { pub fn new(table: Table, region_routes: Vec) -> Self { let region_leaders = region_routes diff --git a/src/common/recordbatch/src/lib.rs b/src/common/recordbatch/src/lib.rs index 257b6f0973..0281b45749 100644 --- a/src/common/recordbatch/src/lib.rs +++ b/src/common/recordbatch/src/lib.rs @@ -26,7 +26,6 @@ use std::sync::Arc; use adapter::RecordBatchMetrics; use arc_swap::ArcSwapOption; -use datafusion::physical_plan::memory::MemoryStream; pub use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream; use datatypes::arrow::compute::SortOptions; pub use datatypes::arrow::record_batch::RecordBatch as DfRecordBatch; @@ -170,19 +169,6 @@ impl RecordBatches { index: 0, }) } - - pub fn into_df_stream(self) -> DfSendableRecordBatchStream { - let df_record_batches = self - .batches - .into_iter() - .map(|batch| batch.into_df_record_batch()) - .collect(); - // unwrap safety: `MemoryStream::try_new` won't fail - Box::pin( - MemoryStream::try_new(df_record_batches, self.schema.arrow_schema().clone(), None) - .unwrap(), - ) - } } impl IntoIterator for RecordBatches { diff --git a/src/common/time/src/util.rs b/src/common/time/src/util.rs index 19fe3bc911..ccb9e1bdd0 100644 --- a/src/common/time/src/util.rs +++ b/src/common/time/src/util.rs @@ -29,10 +29,6 @@ pub fn format_utc_datetime(utc: &NaiveDateTime, pattern: &str) -> String { } } -pub fn system_datetime_to_utc(local: &NaiveDateTime) -> LocalResult { - datetime_to_utc(local, get_timezone(None)) -} - /// Cast a [`NaiveDateTime`] with the given timezone. pub fn datetime_to_utc( datetime: &NaiveDateTime, diff --git a/src/flow/src/adapter.rs b/src/flow/src/adapter.rs index 80d03e2770..7d9ae5e422 100644 --- a/src/flow/src/adapter.rs +++ b/src/flow/src/adapter.rs @@ -206,28 +206,6 @@ impl DiffRequest { } } -/// iterate through the diff row and form continuous diff row with same diff type -pub fn diff_row_to_request(rows: Vec) -> Vec { - let mut reqs = Vec::new(); - for (row, ts, diff) in rows { - let last = reqs.last_mut(); - match (last, diff) { - (Some(DiffRequest::Insert(rows)), 1) => { - rows.push((row, ts)); - } - (Some(DiffRequest::Insert(_)), -1) => reqs.push(DiffRequest::Delete(vec![(row, ts)])), - (Some(DiffRequest::Delete(rows)), -1) => { - rows.push((row, ts)); - } - (Some(DiffRequest::Delete(_)), 1) => reqs.push(DiffRequest::Insert(vec![(row, ts)])), - (None, 1) => reqs.push(DiffRequest::Insert(vec![(row, ts)])), - (None, -1) => reqs.push(DiffRequest::Delete(vec![(row, ts)])), - _ => {} - } - } - reqs -} - pub fn batches_to_rows_req(batches: Vec) -> Result, Error> { let mut reqs = Vec::new(); for batch in batches { diff --git a/src/flow/src/compute/render/src_sink.rs b/src/flow/src/compute/render/src_sink.rs index 62e733420b..cc8cf01ff7 100644 --- a/src/flow/src/compute/render/src_sink.rs +++ b/src/flow/src/compute/render/src_sink.rs @@ -14,7 +14,7 @@ //! Source and Sink for the dataflow -use std::collections::{BTreeMap, VecDeque}; +use std::collections::BTreeMap; use common_telemetry::{debug, trace}; use hydroflow::scheduled::graph_ext::GraphExt; @@ -28,7 +28,7 @@ use crate::compute::types::{Arranged, Collection, CollectionBundle, Toff}; use crate::error::{Error, PlanSnafu}; use crate::expr::error::InternalSnafu; use crate::expr::{Batch, EvalError}; -use crate::repr::{DiffRow, Row, BROADCAST_CAP}; +use crate::repr::{DiffRow, Row}; #[allow(clippy::mutable_key_type)] impl Context<'_, '_> { @@ -242,44 +242,4 @@ impl Context<'_, '_> { }, ); } - - /// Render a sink which send updates to broadcast channel, have internal buffer in case broadcast channel is full - pub fn render_sink(&mut self, bundle: CollectionBundle, sender: broadcast::Sender) { - let CollectionBundle { - collection, - arranged: _, - } = bundle; - let mut buf = VecDeque::with_capacity(1000); - - let schd = self.compute_state.get_scheduler(); - let inner_schd = schd.clone(); - let now = self.compute_state.current_time_ref(); - - let sink = self - .df - .add_subgraph_sink("Sink", collection.into_inner(), move |_ctx, recv| { - let data = recv.take_inner(); - buf.extend(data.into_iter().flat_map(|i| i.into_iter())); - if sender.len() >= BROADCAST_CAP { - return; - } else { - while let Some(row) = buf.pop_front() { - // if the sender is full, stop sending - if sender.len() >= BROADCAST_CAP { - break; - } - // TODO(discord9): handling tokio broadcast error - let _ = sender.send(row); - } - } - - // if buffer is not empty, schedule the next run at next tick - // so the buffer can be drained as soon as possible - if !buf.is_empty() { - inner_schd.schedule_at(*now.borrow() + 1); - } - }); - - schd.set_cur_subgraph(sink); - } } diff --git a/src/flow/src/compute/types.rs b/src/flow/src/compute/types.rs index 00ed660a6e..e125a2d272 100644 --- a/src/flow/src/compute/types.rs +++ b/src/flow/src/compute/types.rs @@ -82,22 +82,6 @@ impl Arranged { writer: self.writer.clone(), }) } - - /// Copy the full arrangement, including the future and the current updates. - /// - /// Internally `Rc-ed` so it's cheap to copy - pub fn try_copy_full(&self) -> Option { - self.arrangement - .clone_full_arrange() - .map(|arrangement| Arranged { - arrangement, - readers: self.readers.clone(), - writer: self.writer.clone(), - }) - } - pub fn add_reader(&self, id: SubgraphId) { - self.readers.borrow_mut().push(id) - } } /// A bundle of the various ways a collection can be represented. diff --git a/src/flow/src/expr/error.rs b/src/flow/src/expr/error.rs index 4b69b3df23..992d5c5921 100644 --- a/src/flow/src/expr/error.rs +++ b/src/flow/src/expr/error.rs @@ -21,11 +21,6 @@ use datafusion_common::DataFusionError; use datatypes::data_type::ConcreteDataType; use snafu::{Location, Snafu}; -fn is_send_sync() { - fn check() {} - check::(); -} - /// EvalError is about errors happen on columnar evaluation /// /// TODO(discord9): add detailed location of column/operator(instead of code) to errors tp help identify related column diff --git a/src/flow/src/expr/linear.rs b/src/flow/src/expr/linear.rs index 8e220f7d86..373e467aba 100644 --- a/src/flow/src/expr/linear.rs +++ b/src/flow/src/expr/linear.rs @@ -359,14 +359,6 @@ impl MapFilterProject { ) } - /// Convert the `MapFilterProject` into a staged evaluation plan. - /// - /// The main behavior is extract temporal predicates, which cannot be evaluated - /// using the standard machinery. - pub fn into_plan(self) -> Result { - MfpPlan::create_from(self) - } - /// Lists input columns whose values are used in outputs. /// /// It is entirely appropriate to determine the demand of an instance @@ -602,26 +594,6 @@ impl SafeMfpPlan { } } - /// A version of `evaluate` which produces an iterator over `Datum` - /// as output. - /// - /// This version can be useful when one wants to capture the resulting - /// datums without packing and then unpacking a row. - #[inline(always)] - pub fn evaluate_iter<'a>( - &'a self, - datums: &'a mut Vec, - ) -> Result + 'a>, EvalError> { - let passed_predicates = self.evaluate_inner(datums)?; - if !passed_predicates { - Ok(None) - } else { - Ok(Some( - self.mfp.projection.iter().map(move |i| datums[*i].clone()), - )) - } - } - /// Populates `values` with `self.expressions` and tests `self.predicates`. /// /// This does not apply `self.projection`, which is up to the calling method. diff --git a/src/flow/src/plan.rs b/src/flow/src/plan.rs index dc86b984ed..e1cf22e621 100644 --- a/src/flow/src/plan.rs +++ b/src/flow/src/plan.rs @@ -18,10 +18,8 @@ mod join; mod reduce; -use std::collections::BTreeSet; - use crate::error::Error; -use crate::expr::{GlobalId, Id, LocalId, MapFilterProject, SafeMfpPlan, TypedExpr}; +use crate::expr::{Id, LocalId, MapFilterProject, SafeMfpPlan, TypedExpr}; use crate::plan::join::JoinPlan; pub(crate) use crate::plan::reduce::{AccumulablePlan, AggrWithIndex, KeyValPlan, ReducePlan}; use crate::repr::{DiffRow, RelationDesc}; @@ -186,48 +184,6 @@ pub enum Plan { }, } -impl Plan { - /// Find all the used collection in the plan - pub fn find_used_collection(&self) -> BTreeSet { - fn recur_find_use(plan: &Plan, used: &mut BTreeSet) { - match plan { - Plan::Get { id } => { - match id { - Id::Local(_) => (), - Id::Global(g) => { - used.insert(*g); - } - }; - } - Plan::Let { value, body, .. } => { - recur_find_use(&value.plan, used); - recur_find_use(&body.plan, used); - } - Plan::Mfp { input, .. } => { - recur_find_use(&input.plan, used); - } - Plan::Reduce { input, .. } => { - recur_find_use(&input.plan, used); - } - Plan::Join { inputs, .. } => { - for input in inputs { - recur_find_use(&input.plan, used); - } - } - Plan::Union { inputs, .. } => { - for input in inputs { - recur_find_use(&input.plan, used); - } - } - _ => {} - } - } - let mut ret = Default::default(); - recur_find_use(self, &mut ret); - ret - } -} - impl Plan { pub fn with_types(self, schema: RelationDesc) -> TypedPlan { TypedPlan { schema, plan: self } diff --git a/src/flow/src/repr/relation.rs b/src/flow/src/repr/relation.rs index 54ad1c5e8e..d0fbb861eb 100644 --- a/src/flow/src/repr/relation.rs +++ b/src/flow/src/repr/relation.rs @@ -46,14 +46,6 @@ impl Key { self.column_indices.push(col); } - /// Add columns to Key - pub fn add_cols(&mut self, cols: I) - where - I: IntoIterator, - { - self.column_indices.extend(cols); - } - /// Remove a column from Key pub fn remove_col(&mut self, col: usize) { self.column_indices.retain(|&r| r != col); diff --git a/src/meta-srv/src/metasrv.rs b/src/meta-srv/src/metasrv.rs index da614ac9b9..c7dcd81e9f 100644 --- a/src/meta-srv/src/metasrv.rs +++ b/src/meta-srv/src/metasrv.rs @@ -204,10 +204,6 @@ impl Context { pub fn reset_in_memory(&self) { self.in_memory.reset(); } - - pub fn reset_leader_cached_kv_backend(&self) { - self.leader_cached_kv_backend.reset(); - } } /// The value of the leader. It is used to store the leader's address. diff --git a/src/meta-srv/src/mocks.rs b/src/meta-srv/src/mocks.rs index cf9144dc39..9611fcdd13 100644 --- a/src/meta-srv/src/mocks.rs +++ b/src/meta-srv/src/mocks.rs @@ -52,11 +52,6 @@ pub async fn mock_with_etcdstore(addr: &str) -> MockInfo { mock(Default::default(), kv_backend, None, None, None).await } -pub async fn mock_with_memstore_and_selector(selector: SelectorRef) -> MockInfo { - let kv_backend = Arc::new(MemoryKvBackend::new()); - mock(Default::default(), kv_backend, Some(selector), None, None).await -} - pub async fn mock( opts: MetasrvOptions, kv_backend: KvBackendRef, diff --git a/src/meta-srv/src/procedure/region_migration.rs b/src/meta-srv/src/procedure/region_migration.rs index 40df9401cb..1baa0c04d4 100644 --- a/src/meta-srv/src/procedure/region_migration.rs +++ b/src/meta-srv/src/procedure/region_migration.rs @@ -364,12 +364,6 @@ impl Context { Ok(datanode_value.as_ref().unwrap()) } - /// Removes the `table_info` of [VolatileContext], returns true if any. - pub fn remove_table_info_value(&mut self) -> bool { - let value = self.volatile_ctx.table_info.take(); - value.is_some() - } - /// Returns the [RegionId]. pub fn region_id(&self) -> RegionId { self.persistent_ctx.region_id diff --git a/src/script/src/python/ffi_types/copr.rs b/src/script/src/python/ffi_types/copr.rs index 1a9a88466b..e0037550a6 100644 --- a/src/script/src/python/ffi_types/copr.rs +++ b/src/script/src/python/ffi_types/copr.rs @@ -499,26 +499,6 @@ pub fn exec_parsed( } } -/// execute script just like [`exec_coprocessor`] do, -/// but instead of return a internal [`Error`] type, -/// return a friendly String format of error -/// -/// use `ln_offset` and `filename` to offset line number and mark file name in error prompt -#[cfg(test)] -#[allow(dead_code)] -pub fn exec_copr_print( - script: &str, - rb: &Option, - ln_offset: usize, - filename: &str, - eval_ctx: &EvalContext, -) -> StdResult { - let res = exec_coprocessor(script, rb, eval_ctx); - res.map_err(|e| { - crate::python::error::pretty_print_error_in_src(script, &e, ln_offset, filename) - }) -} - #[cfg(test)] mod tests { use crate::python::ffi_types::copr::parse::parse_and_compile_copr; diff --git a/src/session/src/lib.rs b/src/session/src/lib.rs index f553fef58c..c018d47ebc 100644 --- a/src/session/src/lib.rs +++ b/src/session/src/lib.rs @@ -97,10 +97,6 @@ impl Session { &self.conn_info } - pub fn mut_conn_info(&mut self) -> &mut ConnInfo { - &mut self.conn_info - } - pub fn timezone(&self) -> Timezone { self.mutable_inner.read().unwrap().timezone.clone() } diff --git a/src/sql/src/statements.rs b/src/sql/src/statements.rs index 00196ed531..90db401cba 100644 --- a/src/sql/src/statements.rs +++ b/src/sql/src/statements.rs @@ -34,10 +34,8 @@ pub mod truncate; use std::str::FromStr; use api::helper::ColumnDataTypeWrapper; -use api::v1::add_column_location::LocationType; -use api::v1::{AddColumnLocation as Location, SemanticType}; +use api::v1::SemanticType; use common_base::bytes::Bytes; -use common_query::AddColumnLocation; use common_time::timezone::Timezone; use common_time::Timestamp; use datatypes::prelude::ConcreteDataType; @@ -688,22 +686,6 @@ pub fn concrete_data_type_to_sql_data_type(data_type: &ConcreteDataType) -> Resu } } -pub fn sql_location_to_grpc_add_column_location( - location: &Option, -) -> Option { - match location { - Some(AddColumnLocation::First) => Some(Location { - location_type: LocationType::First.into(), - after_column_name: String::default(), - }), - Some(AddColumnLocation::After { column_name }) => Some(Location { - location_type: LocationType::After.into(), - after_column_name: column_name.to_string(), - }), - None => None, - } -} - #[cfg(test)] mod tests { use std::assert_matches::assert_matches; From 2107737db196561b6453ac148c7afb6a1550eaf7 Mon Sep 17 00:00:00 2001 From: Ning Sun Date: Wed, 18 Dec 2024 20:41:24 +0800 Subject: [PATCH 43/46] chore: make nix compilation environment config more robust (#5183) * chore: improve nix-shell support * fix: add pkg-config * ci: add a github action to ensure build on clean system * ci: optimise dependencies of task * ci: move clean build to nightly --- .github/workflows/nightly-ci.yml | 11 +++++++++++ rust-toolchain.toml | 1 + shell.nix | 15 ++++++++++----- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/.github/workflows/nightly-ci.yml b/.github/workflows/nightly-ci.yml index b6ff247ffb..285fb61a7c 100644 --- a/.github/workflows/nightly-ci.yml +++ b/.github/workflows/nightly-ci.yml @@ -114,6 +114,17 @@ jobs: GT_S3_REGION: ${{ vars.AWS_CI_TEST_BUCKET_REGION }} UNITTEST_LOG_DIR: "__unittest_logs" + cleanbuild-linux-nix: + runs-on: ubuntu-latest-8-cores + timeout-minutes: 60 + needs: [coverage, fmt, clippy, check] + steps: + - uses: actions/checkout@v4 + - uses: cachix/install-nix-action@v27 + with: + nix_path: nixpkgs=channel:nixos-unstable + - run: nix-shell --pure --run "cargo build" + check-status: name: Check status needs: [sqlness-test, sqlness-windows, test-on-windows] diff --git a/rust-toolchain.toml b/rust-toolchain.toml index c986eedd97..d12222a5d3 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,2 +1,3 @@ [toolchain] channel = "nightly-2024-10-19" +components = ["rust-analyzer"] diff --git a/shell.nix b/shell.nix index b255fe845c..ce84a03276 100644 --- a/shell.nix +++ b/shell.nix @@ -4,19 +4,24 @@ let pkgs = import nixpkgs { config = {}; overlays = []; }; in -pkgs.mkShellNoCC { - packages = with pkgs; [ +pkgs.mkShell rec { + nativeBuildInputs = with pkgs; [ + pkg-config git clang gcc - mold - libgit2 protobuf + mold (fenix.fromToolchainFile { dir = ./.; }) - fenix.rust-analyzer cargo-nextest + taplo ]; + buildInputs = with pkgs; [ + libgit2 + ]; + + LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath buildInputs; } From c9ad8c7101a182d79b5f255e28c3886d6609d12c Mon Sep 17 00:00:00 2001 From: Ning Sun Date: Wed, 18 Dec 2024 23:15:55 +0800 Subject: [PATCH 44/46] feat: show create postgresql foreign table (#5143) * feat: add show create table for pg in parser * feat: implement show create table operation * fix: adopt upstream changes --- src/datatypes/src/data_type.rs | 45 +++++++++++++++++ src/operator/src/statement.rs | 13 ++++- src/operator/src/statement/show.rs | 20 ++++++++ src/query/src/sql.rs | 48 +++++++++++++++++++ src/sql/src/parsers/show_parser.rs | 20 ++++++-- src/sql/src/statements/show.rs | 46 +++++++++++++++++- .../standalone/common/show/show_create.result | 30 ++++++++++++ .../standalone/common/show/show_create.sql | 4 ++ 8 files changed, 219 insertions(+), 7 deletions(-) diff --git a/src/datatypes/src/data_type.rs b/src/datatypes/src/data_type.rs index 8f81a0c86f..b3342cc6f5 100644 --- a/src/datatypes/src/data_type.rs +++ b/src/datatypes/src/data_type.rs @@ -370,6 +370,51 @@ impl ConcreteDataType { _ => None, } } + + /// Return the datatype name in postgres type system + pub fn postgres_datatype_name(&self) -> &'static str { + match self { + &ConcreteDataType::Null(_) => "UNKNOWN", + &ConcreteDataType::Boolean(_) => "BOOL", + &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "CHAR", + &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "INT2", + &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "INT4", + &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "INT8", + &ConcreteDataType::Float32(_) => "FLOAT4", + &ConcreteDataType::Float64(_) => "FLOAT8", + &ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => "BYTEA", + &ConcreteDataType::String(_) => "VARCHAR", + &ConcreteDataType::Date(_) => "DATE", + &ConcreteDataType::DateTime(_) | &ConcreteDataType::Timestamp(_) => "TIMESTAMP", + &ConcreteDataType::Time(_) => "TIME", + &ConcreteDataType::Interval(_) => "INTERVAL", + &ConcreteDataType::Decimal128(_) => "NUMERIC", + &ConcreteDataType::Json(_) => "JSON", + ConcreteDataType::List(list) => match list.item_type() { + &ConcreteDataType::Null(_) => "UNKNOWN", + &ConcreteDataType::Boolean(_) => "_BOOL", + &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "_CHAR", + &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "_INT2", + &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "_INT4", + &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "_INT8", + &ConcreteDataType::Float32(_) => "_FLOAT4", + &ConcreteDataType::Float64(_) => "_FLOAT8", + &ConcreteDataType::Binary(_) => "_BYTEA", + &ConcreteDataType::String(_) => "_VARCHAR", + &ConcreteDataType::Date(_) => "_DATE", + &ConcreteDataType::DateTime(_) | &ConcreteDataType::Timestamp(_) => "_TIMESTAMP", + &ConcreteDataType::Time(_) => "_TIME", + &ConcreteDataType::Interval(_) => "_INTERVAL", + &ConcreteDataType::Decimal128(_) => "_NUMERIC", + &ConcreteDataType::Json(_) => "_JSON", + &ConcreteDataType::Duration(_) + | &ConcreteDataType::Dictionary(_) + | &ConcreteDataType::Vector(_) + | &ConcreteDataType::List(_) => "UNKNOWN", + }, + &ConcreteDataType::Duration(_) | &ConcreteDataType::Dictionary(_) => "UNKNOWN", + } + } } impl From<&ConcreteDataType> for ConcreteDataType { diff --git a/src/operator/src/statement.rs b/src/operator/src/statement.rs index b3251ca6bf..ad842a40fe 100644 --- a/src/operator/src/statement.rs +++ b/src/operator/src/statement.rs @@ -59,6 +59,7 @@ use set::set_query_timeout; use snafu::{ensure, OptionExt, ResultExt}; use sql::statements::copy::{CopyDatabase, CopyDatabaseArgument, CopyTable, CopyTableArgument}; use sql::statements::set_variables::SetVariables; +use sql::statements::show::ShowCreateTableVariant; use sql::statements::statement::Statement; use sql::statements::OptionMap; use sql::util::format_raw_object_name; @@ -317,8 +318,16 @@ impl StatementExecutor { .context(TableNotFoundSnafu { table_name: &table })?; let table_name = TableName::new(catalog, schema, table); - self.show_create_table(table_name, table_ref, query_ctx) - .await + match show.variant { + ShowCreateTableVariant::Original => { + self.show_create_table(table_name, table_ref, query_ctx) + .await + } + ShowCreateTableVariant::PostgresForeignTable => { + self.show_create_table_for_pg(table_name, table_ref, query_ctx) + .await + } + } } Statement::ShowCreateFlow(show) => self.show_create_flow(show, query_ctx).await, Statement::ShowCreateView(show) => self.show_create_view(show, query_ctx).await, diff --git a/src/operator/src/statement/show.rs b/src/operator/src/statement/show.rs index 210ec4e7f2..fe91c71abe 100644 --- a/src/operator/src/statement/show.rs +++ b/src/operator/src/statement/show.rs @@ -144,6 +144,26 @@ impl StatementExecutor { .context(ExecuteStatementSnafu) } + #[tracing::instrument(skip_all)] + pub async fn show_create_table_for_pg( + &self, + table_name: TableName, + table: TableRef, + query_ctx: QueryContextRef, + ) -> Result { + let table_info = table.table_info(); + if table_info.table_type != TableType::Base { + return error::ShowCreateTableBaseOnlySnafu { + table_name: table_name.to_string(), + table_type: table_info.table_type, + } + .fail(); + } + + query::sql::show_create_foreign_table_for_pg(table, query_ctx) + .context(ExecuteStatementSnafu) + } + #[tracing::instrument(skip_all)] pub async fn show_create_view( &self, diff --git a/src/query/src/sql.rs b/src/query/src/sql.rs index 3337503d09..7525bb904b 100644 --- a/src/query/src/sql.rs +++ b/src/query/src/sql.rs @@ -45,6 +45,7 @@ use datafusion_expr::{case, col, lit, Expr}; use datatypes::prelude::*; use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, RawSchema, Schema}; use datatypes::vectors::StringVector; +use itertools::Itertools; use object_store::ObjectStore; use once_cell::sync::Lazy; use regex::Regex; @@ -61,6 +62,7 @@ use sql::statements::show::{ use sql::statements::statement::Statement; use sql::statements::OptionMap; use sqlparser::ast::ObjectName; +use store_api::metric_engine_consts::{is_metric_engine, is_metric_engine_internal_column}; use table::requests::{FILE_TABLE_LOCATION_KEY, FILE_TABLE_PATTERN_KEY}; use table::TableRef; @@ -763,6 +765,52 @@ pub fn show_create_table( Ok(Output::new_with_record_batches(records)) } +pub fn show_create_foreign_table_for_pg( + table: TableRef, + _query_ctx: QueryContextRef, +) -> Result { + let table_info = table.table_info(); + + let table_meta = &table_info.meta; + let table_name = &table_info.name; + let schema = &table_info.meta.schema; + let is_metric_engine = is_metric_engine(&table_meta.engine); + + let columns = schema + .column_schemas() + .iter() + .filter_map(|c| { + if is_metric_engine && is_metric_engine_internal_column(&c.name) { + None + } else { + Some(format!( + "\"{}\" {}", + c.name, + c.data_type.postgres_datatype_name() + )) + } + }) + .join(",\n "); + + let sql = format!( + r#"CREATE FOREIGN TABLE ft_{} ( + {} +) +SERVER greptimedb +OPTIONS (table_name '{}')"#, + table_name, columns, table_name + ); + + let columns = vec![ + Arc::new(StringVector::from(vec![table_name.clone()])) as _, + Arc::new(StringVector::from(vec![sql])) as _, + ]; + let records = RecordBatches::try_from_columns(SHOW_CREATE_TABLE_OUTPUT_SCHEMA.clone(), columns) + .context(error::CreateRecordBatchSnafu)?; + + Ok(Output::new_with_record_batches(records)) +} + pub fn show_create_view( view_name: ObjectName, definition: &str, diff --git a/src/sql/src/parsers/show_parser.rs b/src/sql/src/parsers/show_parser.rs index d1530c1fcb..fa31e813f3 100644 --- a/src/sql/src/parsers/show_parser.rs +++ b/src/sql/src/parsers/show_parser.rs @@ -21,9 +21,9 @@ use crate::error::{ }; use crate::parser::ParserContext; use crate::statements::show::{ - ShowColumns, ShowCreateDatabase, ShowCreateFlow, ShowCreateTable, ShowCreateView, - ShowDatabases, ShowFlows, ShowIndex, ShowKind, ShowStatus, ShowTableStatus, ShowTables, - ShowVariables, ShowViews, + ShowColumns, ShowCreateDatabase, ShowCreateFlow, ShowCreateTable, ShowCreateTableVariant, + ShowCreateView, ShowDatabases, ShowFlows, ShowIndex, ShowKind, ShowStatus, ShowTableStatus, + ShowTables, ShowVariables, ShowViews, }; use crate::statements::statement::Statement; @@ -146,7 +146,19 @@ impl ParserContext<'_> { name: table_name.to_string(), } ); - Ok(Statement::ShowCreateTable(ShowCreateTable { table_name })) + let mut variant = ShowCreateTableVariant::Original; + if self.consume_token("FOR") { + if self.consume_token("POSTGRES_FOREIGN_TABLE") { + variant = ShowCreateTableVariant::PostgresForeignTable; + } else { + self.unsupported(self.peek_token_as_string())?; + } + } + + Ok(Statement::ShowCreateTable(ShowCreateTable { + table_name, + variant, + })) } fn parse_show_create_flow(&mut self) -> Result { diff --git a/src/sql/src/statements/show.rs b/src/sql/src/statements/show.rs index 055cd7768f..92f13422e6 100644 --- a/src/sql/src/statements/show.rs +++ b/src/sql/src/statements/show.rs @@ -179,12 +179,26 @@ impl Display for ShowCreateDatabase { #[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowCreateTable { pub table_name: ObjectName, + pub variant: ShowCreateTableVariant, +} + +/// Variant of a show create table +#[derive(Default, Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] +pub enum ShowCreateTableVariant { + #[default] + Original, + PostgresForeignTable, } impl Display for ShowCreateTable { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let table_name = &self.table_name; - write!(f, r#"SHOW CREATE TABLE {table_name}"#) + write!(f, r#"SHOW CREATE TABLE {table_name}"#)?; + if let ShowCreateTableVariant::PostgresForeignTable = self.variant { + write!(f, " FOR POSTGRES_FOREIGN_TABLE")?; + } + + Ok(()) } } @@ -344,12 +358,31 @@ mod tests { Statement::ShowCreateTable(show) => { let table_name = show.table_name.to_string(); assert_eq!(table_name, "test"); + assert_eq!(show.variant, ShowCreateTableVariant::Original); + } + _ => { + unreachable!(); + } + } + + let sql = "SHOW CREATE TABLE test FOR POSTGRES_FOREIGN_TABLE"; + let stmts: Vec = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + assert_eq!(1, stmts.len()); + assert_matches!(&stmts[0], Statement::ShowCreateTable { .. }); + match &stmts[0] { + Statement::ShowCreateTable(show) => { + let table_name = show.table_name.to_string(); + assert_eq!(table_name, "test"); + assert_eq!(show.variant, ShowCreateTableVariant::PostgresForeignTable); } _ => { unreachable!(); } } } + #[test] pub fn test_show_create_missing_table_name() { let sql = "SHOW CREATE TABLE"; @@ -361,6 +394,17 @@ mod tests { .is_err()); } + #[test] + pub fn test_show_create_unknown_for() { + let sql = "SHOW CREATE TABLE t FOR UNKNOWN"; + assert!(ParserContext::create_with_dialect( + sql, + &GreptimeDbDialect {}, + ParseOptions::default() + ) + .is_err()); + } + #[test] pub fn test_show_create_flow() { let sql = "SHOW CREATE FLOW test"; diff --git a/tests/cases/standalone/common/show/show_create.result b/tests/cases/standalone/common/show/show_create.result index ec692c0f29..85536954d4 100644 --- a/tests/cases/standalone/common/show/show_create.result +++ b/tests/cases/standalone/common/show/show_create.result @@ -46,6 +46,22 @@ SHOW CREATE TABLE system_metrics; | | ) | +----------------+-----------------------------------------------------------+ +SHOW CREATE TABLE system_metrics FOR POSTGRES_FOREIGN_TABLE; + ++----------------+------------------------------------------+ +| Table | Create Table | ++----------------+------------------------------------------+ +| system_metrics | CREATE FOREIGN TABLE ft_system_metrics ( | +| | "id" INT4, | +| | "host" VARCHAR, | +| | "cpu" FLOAT8, | +| | "disk" FLOAT4, | +| | "ts" TIMESTAMP | +| | ) | +| | SERVER greptimedb | +| | OPTIONS (table_name 'system_metrics') | ++----------------+------------------------------------------+ + DROP TABLE system_metrics; Affected Rows: 0 @@ -141,6 +157,20 @@ show create table t1; | | ) | +-------+-----------------------------------+ +SHOW CREATE TABLE t1 FOR POSTGRES_FOREIGN_TABLE; + ++-------+------------------------------+ +| Table | Create Table | ++-------+------------------------------+ +| t1 | CREATE FOREIGN TABLE ft_t1 ( | +| | "host" VARCHAR, | +| | "ts" TIMESTAMP, | +| | "val" FLOAT8 | +| | ) | +| | SERVER greptimedb | +| | OPTIONS (table_name 't1') | ++-------+------------------------------+ + drop table t1; Affected Rows: 0 diff --git a/tests/cases/standalone/common/show/show_create.sql b/tests/cases/standalone/common/show/show_create.sql index 45c8f7a3ef..5289df6e76 100644 --- a/tests/cases/standalone/common/show/show_create.sql +++ b/tests/cases/standalone/common/show/show_create.sql @@ -20,6 +20,8 @@ WITH( SHOW CREATE TABLE system_metrics; +SHOW CREATE TABLE system_metrics FOR POSTGRES_FOREIGN_TABLE; + DROP TABLE system_metrics; create table table_without_partition ( @@ -57,6 +59,8 @@ show create table phy; show create table t1; +SHOW CREATE TABLE t1 FOR POSTGRES_FOREIGN_TABLE; + drop table t1; drop table phy; From 66f0581f5b42780fd89fc53928e838600b9f8400 Mon Sep 17 00:00:00 2001 From: Weny Xu Date: Thu, 19 Dec 2024 11:29:34 +0800 Subject: [PATCH 45/46] fix: ensure table route metadata is eventually rolled back on failure (#5174) * fix: ensure table route metadata is eventually rolled back on procedure failure * fix(fuzz): enhance procedure condition checking * chore: add logs * feat: close downgraded leader region actively * chore: apply suggestions from CR --- .../src/procedure/region_migration.rs | 76 +++++++++- .../close_downgraded_region.rs | 138 ++++++++++++++++++ .../region_migration/migration_start.rs | 8 +- .../region_migration/open_candidate_region.rs | 7 +- .../procedure/region_migration/test_util.rs | 20 ++- .../region_migration/update_metadata.rs | 6 +- .../upgrade_candidate_region.rs | 9 +- .../upgrade_candidate_region.rs | 4 +- .../migration/fuzz_migrate_metric_regions.rs | 71 ++++----- .../migration/fuzz_migrate_mito_regions.rs | 6 +- 10 files changed, 274 insertions(+), 71 deletions(-) create mode 100644 src/meta-srv/src/procedure/region_migration/close_downgraded_region.rs diff --git a/src/meta-srv/src/procedure/region_migration.rs b/src/meta-srv/src/procedure/region_migration.rs index 1baa0c04d4..3b27d33f22 100644 --- a/src/meta-srv/src/procedure/region_migration.rs +++ b/src/meta-srv/src/procedure/region_migration.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +pub(crate) mod close_downgraded_region; pub(crate) mod downgrade_leader_region; pub(crate) mod manager; pub(crate) mod migration_abort; @@ -43,6 +44,7 @@ use common_procedure::error::{ Error as ProcedureError, FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu, }; use common_procedure::{Context as ProcedureContext, LockKey, Procedure, Status, StringKey}; +use common_telemetry::info; use manager::RegionMigrationProcedureGuard; pub use manager::{ RegionMigrationManagerRef, RegionMigrationProcedureTask, RegionMigrationProcedureTracker, @@ -91,7 +93,9 @@ impl PersistentContext { let lock_key = vec![ CatalogLock::Read(&self.catalog).into(), SchemaLock::read(&self.catalog, &self.schema).into(), - TableLock::Read(region_id.table_id()).into(), + // The optimistic updating of table route is not working very well, + // so we need to use the write lock here. + TableLock::Write(region_id.table_id()).into(), RegionLock::Write(region_id).into(), ]; @@ -253,7 +257,7 @@ impl Context { .await .context(error::TableMetadataManagerSnafu) .map_err(BoxedError::new) - .context(error::RetryLaterWithSourceSnafu { + .with_context(|_| error::RetryLaterWithSourceSnafu { reason: format!("Failed to get TableRoute: {table_id}"), })? .context(error::TableRouteNotFoundSnafu { table_id })?; @@ -317,7 +321,7 @@ impl Context { .await .context(error::TableMetadataManagerSnafu) .map_err(BoxedError::new) - .context(error::RetryLaterWithSourceSnafu { + .with_context(|_| error::RetryLaterWithSourceSnafu { reason: format!("Failed to get TableInfo: {table_id}"), })? .context(error::TableInfoNotFoundSnafu { table_id })?; @@ -350,7 +354,7 @@ impl Context { .await .context(error::TableMetadataManagerSnafu) .map_err(BoxedError::new) - .context(error::RetryLaterWithSourceSnafu { + .with_context(|_| error::RetryLaterWithSourceSnafu { reason: format!("Failed to get DatanodeTable: ({datanode_id},{table_id})"), })? .context(error::DatanodeTableNotFoundSnafu { @@ -468,6 +472,48 @@ impl RegionMigrationProcedure { _guard: guard, }) } + + async fn rollback_inner(&mut self) -> Result<()> { + let _timer = METRIC_META_REGION_MIGRATION_EXECUTE + .with_label_values(&["rollback"]) + .start_timer(); + + let table_id = self.context.region_id().table_id(); + let region_id = self.context.region_id(); + self.context.remove_table_route_value(); + let table_metadata_manager = self.context.table_metadata_manager.clone(); + let table_route = self.context.get_table_route_value().await?; + + // Safety: It must be a physical table route. + let downgraded = table_route + .region_routes() + .unwrap() + .iter() + .filter(|route| route.region.id == region_id) + .any(|route| route.is_leader_downgrading()); + + if downgraded { + info!("Rollbacking downgraded region leader table route, region: {region_id}"); + table_metadata_manager + .update_leader_region_status(table_id, table_route, |route| { + if route.region.id == region_id { + Some(None) + } else { + None + } + }) + .await + .context(error::TableMetadataManagerSnafu) + .map_err(BoxedError::new) + .with_context(|_| error::RetryLaterWithSourceSnafu { + reason: format!("Failed to update the table route during the rollback downgraded leader region: {region_id}"), + })?; + } + + self.context.register_failure_detectors().await; + + Ok(()) + } } #[async_trait::async_trait] @@ -476,6 +522,16 @@ impl Procedure for RegionMigrationProcedure { Self::TYPE_NAME } + async fn rollback(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<()> { + self.rollback_inner() + .await + .map_err(ProcedureError::external) + } + + fn rollback_supported(&self) -> bool { + true + } + async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult { let state = &mut self.state; @@ -701,6 +757,12 @@ mod tests { Assertion::simple(assert_update_metadata_upgrade, assert_no_persist), ), // UpdateMetadata::Upgrade + Step::next( + "Should be the close downgraded region", + None, + Assertion::simple(assert_close_downgraded_region, assert_no_persist), + ), + // CloseDowngradedRegion Step::next( "Should be the region migration end", None, @@ -1071,6 +1133,12 @@ mod tests { Assertion::simple(assert_update_metadata_upgrade, assert_no_persist), ), // UpdateMetadata::Upgrade + Step::next( + "Should be the close downgraded region", + None, + Assertion::simple(assert_close_downgraded_region, assert_no_persist), + ), + // CloseDowngradedRegion Step::next( "Should be the region migration end", None, diff --git a/src/meta-srv/src/procedure/region_migration/close_downgraded_region.rs b/src/meta-srv/src/procedure/region_migration/close_downgraded_region.rs new file mode 100644 index 0000000000..9113607681 --- /dev/null +++ b/src/meta-srv/src/procedure/region_migration/close_downgraded_region.rs @@ -0,0 +1,138 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::Any; +use std::time::Duration; + +use api::v1::meta::MailboxMessage; +use common_meta::distributed_time_constants::MAILBOX_RTT_SECS; +use common_meta::instruction::{Instruction, InstructionReply, SimpleReply}; +use common_meta::key::datanode_table::RegionInfo; +use common_meta::RegionIdent; +use common_procedure::Status; +use common_telemetry::{info, warn}; +use serde::{Deserialize, Serialize}; +use snafu::ResultExt; + +use crate::error::{self, Result}; +use crate::handler::HeartbeatMailbox; +use crate::procedure::region_migration::migration_end::RegionMigrationEnd; +use crate::procedure::region_migration::{Context, State}; +use crate::service::mailbox::Channel; + +const CLOSE_DOWNGRADED_REGION_TIMEOUT: Duration = Duration::from_secs(MAILBOX_RTT_SECS); + +#[derive(Debug, Serialize, Deserialize)] +pub struct CloseDowngradedRegion; + +#[async_trait::async_trait] +#[typetag::serde] +impl State for CloseDowngradedRegion { + async fn next(&mut self, ctx: &mut Context) -> Result<(Box, Status)> { + if let Err(err) = self.close_downgraded_leader_region(ctx).await { + let downgrade_leader_datanode = &ctx.persistent_ctx.from_peer; + let region_id = ctx.region_id(); + warn!(err; "Failed to close downgraded leader region: {region_id} on datanode {:?}", downgrade_leader_datanode); + } + + Ok((Box::new(RegionMigrationEnd), Status::done())) + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +impl CloseDowngradedRegion { + /// Builds close region instruction. + /// + /// Abort(non-retry): + /// - Datanode Table is not found. + async fn build_close_region_instruction(&self, ctx: &mut Context) -> Result { + let pc = &ctx.persistent_ctx; + let downgrade_leader_datanode_id = pc.from_peer.id; + let cluster_id = pc.cluster_id; + let table_id = pc.region_id.table_id(); + let region_number = pc.region_id.region_number(); + let datanode_table_value = ctx.get_from_peer_datanode_table_value().await?; + + let RegionInfo { engine, .. } = datanode_table_value.region_info.clone(); + + Ok(Instruction::CloseRegion(RegionIdent { + cluster_id, + datanode_id: downgrade_leader_datanode_id, + table_id, + region_number, + engine, + })) + } + + /// Closes the downgraded leader region. + async fn close_downgraded_leader_region(&self, ctx: &mut Context) -> Result<()> { + let close_instruction = self.build_close_region_instruction(ctx).await?; + let region_id = ctx.region_id(); + let pc = &ctx.persistent_ctx; + let downgrade_leader_datanode = &pc.from_peer; + let msg = MailboxMessage::json_message( + &format!("Close downgraded region: {}", region_id), + &format!("Meta@{}", ctx.server_addr()), + &format!( + "Datanode-{}@{}", + downgrade_leader_datanode.id, downgrade_leader_datanode.addr + ), + common_time::util::current_time_millis(), + &close_instruction, + ) + .with_context(|_| error::SerializeToJsonSnafu { + input: close_instruction.to_string(), + })?; + + let ch = Channel::Datanode(downgrade_leader_datanode.id); + let receiver = ctx + .mailbox + .send(&ch, msg, CLOSE_DOWNGRADED_REGION_TIMEOUT) + .await?; + + match receiver.await? { + Ok(msg) => { + let reply = HeartbeatMailbox::json_reply(&msg)?; + info!( + "Received close downgraded leade region reply: {:?}, region: {}", + reply, region_id + ); + let InstructionReply::CloseRegion(SimpleReply { result, error }) = reply else { + return error::UnexpectedInstructionReplySnafu { + mailbox_message: msg.to_string(), + reason: "expect close region reply", + } + .fail(); + }; + + if result { + Ok(()) + } else { + error::UnexpectedSnafu { + violated: format!( + "Failed to close downgraded leader region: {region_id} on datanode {:?}, error: {error:?}", + downgrade_leader_datanode, + ), + } + .fail() + } + } + + Err(e) => Err(e), + } + } +} diff --git a/src/meta-srv/src/procedure/region_migration/migration_start.rs b/src/meta-srv/src/procedure/region_migration/migration_start.rs index 3f81033410..4c097631d3 100644 --- a/src/meta-srv/src/procedure/region_migration/migration_start.rs +++ b/src/meta-srv/src/procedure/region_migration/migration_start.rs @@ -21,11 +21,11 @@ use serde::{Deserialize, Serialize}; use snafu::{OptionExt, ResultExt}; use store_api::storage::RegionId; -use super::migration_abort::RegionMigrationAbort; -use super::migration_end::RegionMigrationEnd; -use super::open_candidate_region::OpenCandidateRegion; -use super::update_metadata::UpdateMetadata; use crate::error::{self, Result}; +use crate::procedure::region_migration::migration_abort::RegionMigrationAbort; +use crate::procedure::region_migration::migration_end::RegionMigrationEnd; +use crate::procedure::region_migration::open_candidate_region::OpenCandidateRegion; +use crate::procedure::region_migration::update_metadata::UpdateMetadata; use crate::procedure::region_migration::{Context, State}; /// The behaviors: diff --git a/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs b/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs index 22b64b0142..6a96540b82 100644 --- a/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs +++ b/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs @@ -25,9 +25,9 @@ use common_telemetry::info; use serde::{Deserialize, Serialize}; use snafu::{OptionExt, ResultExt}; -use super::update_metadata::UpdateMetadata; use crate::error::{self, Result}; use crate::handler::HeartbeatMailbox; +use crate::procedure::region_migration::update_metadata::UpdateMetadata; use crate::procedure::region_migration::{Context, State}; use crate::service::mailbox::Channel; @@ -145,7 +145,10 @@ impl OpenCandidateRegion { match receiver.await? { Ok(msg) => { let reply = HeartbeatMailbox::json_reply(&msg)?; - info!("Received open region reply: {:?}", reply); + info!( + "Received open region reply: {:?}, region: {}", + reply, region_id + ); let InstructionReply::OpenRegion(SimpleReply { result, error }) = reply else { return error::UnexpectedInstructionReplySnafu { mailbox_message: msg.to_string(), diff --git a/src/meta-srv/src/procedure/region_migration/test_util.rs b/src/meta-srv/src/procedure/region_migration/test_util.rs index 2058782396..2fe55edcab 100644 --- a/src/meta-srv/src/procedure/region_migration/test_util.rs +++ b/src/meta-srv/src/procedure/region_migration/test_util.rs @@ -44,19 +44,21 @@ use store_api::storage::RegionId; use table::metadata::RawTableInfo; use tokio::sync::mpsc::{Receiver, Sender}; -use super::manager::RegionMigrationProcedureTracker; -use super::migration_abort::RegionMigrationAbort; -use super::upgrade_candidate_region::UpgradeCandidateRegion; -use super::{Context, ContextFactory, DefaultContextFactory, State, VolatileContext}; use crate::cache_invalidator::MetasrvCacheInvalidator; use crate::error::{self, Error, Result}; use crate::handler::{HeartbeatMailbox, Pusher, Pushers}; use crate::metasrv::MetasrvInfo; +use crate::procedure::region_migration::close_downgraded_region::CloseDowngradedRegion; use crate::procedure::region_migration::downgrade_leader_region::DowngradeLeaderRegion; +use crate::procedure::region_migration::manager::RegionMigrationProcedureTracker; +use crate::procedure::region_migration::migration_abort::RegionMigrationAbort; use crate::procedure::region_migration::migration_end::RegionMigrationEnd; use crate::procedure::region_migration::open_candidate_region::OpenCandidateRegion; use crate::procedure::region_migration::update_metadata::UpdateMetadata; -use crate::procedure::region_migration::PersistentContext; +use crate::procedure::region_migration::upgrade_candidate_region::UpgradeCandidateRegion; +use crate::procedure::region_migration::{ + Context, ContextFactory, DefaultContextFactory, PersistentContext, State, VolatileContext, +}; use crate::service::mailbox::{Channel, MailboxRef}; pub type MockHeartbeatReceiver = Receiver>; @@ -569,6 +571,14 @@ pub(crate) fn assert_region_migration_end(next: &dyn State) { let _ = next.as_any().downcast_ref::().unwrap(); } +/// Asserts the [State] should be [CloseDowngradedRegion]. +pub(crate) fn assert_close_downgraded_region(next: &dyn State) { + let _ = next + .as_any() + .downcast_ref::() + .unwrap(); +} + /// Asserts the [State] should be [RegionMigrationAbort]. pub(crate) fn assert_region_migration_abort(next: &dyn State) { let _ = next diff --git a/src/meta-srv/src/procedure/region_migration/update_metadata.rs b/src/meta-srv/src/procedure/region_migration/update_metadata.rs index 180cf31fe1..858669ea21 100644 --- a/src/meta-srv/src/procedure/region_migration/update_metadata.rs +++ b/src/meta-srv/src/procedure/region_migration/update_metadata.rs @@ -22,10 +22,10 @@ use common_procedure::Status; use common_telemetry::warn; use serde::{Deserialize, Serialize}; -use super::migration_abort::RegionMigrationAbort; -use super::migration_end::RegionMigrationEnd; use crate::error::Result; +use crate::procedure::region_migration::close_downgraded_region::CloseDowngradedRegion; use crate::procedure::region_migration::downgrade_leader_region::DowngradeLeaderRegion; +use crate::procedure::region_migration::migration_abort::RegionMigrationAbort; use crate::procedure::region_migration::{Context, State}; #[derive(Debug, Serialize, Deserialize)] @@ -58,7 +58,7 @@ impl State for UpdateMetadata { if let Err(err) = ctx.invalidate_table_cache().await { warn!("Failed to broadcast the invalidate table cache message during the upgrade candidate, error: {err:?}"); }; - Ok((Box::new(RegionMigrationEnd), Status::done())) + Ok((Box::new(CloseDowngradedRegion), Status::executing(false))) } UpdateMetadata::Rollback => { self.rollback_downgraded_region(ctx).await?; diff --git a/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs b/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs index b710a0e1f3..c180456bd4 100644 --- a/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs +++ b/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs @@ -195,7 +195,7 @@ mod tests { use store_api::storage::RegionId; use crate::error::Error; - use crate::procedure::region_migration::migration_end::RegionMigrationEnd; + use crate::procedure::region_migration::close_downgraded_region::CloseDowngradedRegion; use crate::procedure::region_migration::test_util::{self, TestingEnv}; use crate::procedure::region_migration::update_metadata::UpdateMetadata; use crate::procedure::region_migration::{ContextFactory, PersistentContext, State}; @@ -443,7 +443,7 @@ mod tests { } #[tokio::test] - async fn test_next_migration_end_state() { + async fn test_next_close_downgraded_region_state() { let mut state = Box::new(UpdateMetadata::Upgrade); let env = TestingEnv::new(); let persistent_context = new_persistent_context(); @@ -471,7 +471,10 @@ mod tests { let (next, _) = state.next(&mut ctx).await.unwrap(); - let _ = next.as_any().downcast_ref::().unwrap(); + let _ = next + .as_any() + .downcast_ref::() + .unwrap(); let table_route = table_metadata_manager .table_route_manager() diff --git a/src/meta-srv/src/procedure/region_migration/upgrade_candidate_region.rs b/src/meta-srv/src/procedure/region_migration/upgrade_candidate_region.rs index 49100e92f3..fa989274b4 100644 --- a/src/meta-srv/src/procedure/region_migration/upgrade_candidate_region.rs +++ b/src/meta-srv/src/procedure/region_migration/upgrade_candidate_region.rs @@ -23,9 +23,9 @@ use serde::{Deserialize, Serialize}; use snafu::{ensure, OptionExt, ResultExt}; use tokio::time::{sleep, Instant}; -use super::update_metadata::UpdateMetadata; use crate::error::{self, Result}; use crate::handler::HeartbeatMailbox; +use crate::procedure::region_migration::update_metadata::UpdateMetadata; use crate::procedure::region_migration::{Context, State}; use crate::service::mailbox::Channel; @@ -155,7 +155,7 @@ impl UpgradeCandidateRegion { exists, error::UnexpectedSnafu { violated: format!( - "Expected region {} doesn't exist on datanode {:?}", + "Candidate region {} doesn't exist on datanode {:?}", region_id, candidate ) } diff --git a/tests-fuzz/targets/migration/fuzz_migrate_metric_regions.rs b/tests-fuzz/targets/migration/fuzz_migrate_metric_regions.rs index d4fa4d08fd..5bcddea53a 100644 --- a/tests-fuzz/targets/migration/fuzz_migrate_metric_regions.rs +++ b/tests-fuzz/targets/migration/fuzz_migrate_metric_regions.rs @@ -229,6 +229,29 @@ async fn create_logical_table_and_insert_values( Ok(()) } +async fn wait_for_migration(ctx: &FuzzContext, migration: &Migration, procedure_id: &str) { + info!("Waits for migration: {migration:?}"); + let region_id = migration.region_id.as_u64(); + wait_condition_fn( + Duration::from_secs(120), + || { + let greptime = ctx.greptime.clone(); + let procedure_id = procedure_id.to_string(); + Box::pin(async move { + let output = procedure_state(&greptime, &procedure_id).await; + info!("Checking procedure: {procedure_id}, output: {output}"); + (fetch_partition(&greptime, region_id).await.unwrap(), output) + }) + }, + |(partition, output)| { + info!("Region: {region_id}, datanode: {}", partition.datanode_id); + partition.datanode_id == migration.to_peer && output.contains("Done") + }, + Duration::from_secs(1), + ) + .await; +} + async fn execute_migration(ctx: FuzzContext, input: FuzzInput) -> Result<()> { let mut rng = ChaCha20Rng::seed_from_u64(input.seed); // Creates a physical table. @@ -297,28 +320,7 @@ async fn execute_migration(ctx: FuzzContext, input: FuzzInput) -> Result<()> { } info!("Excepted new region distribution: {new_distribution:?}"); for (migration, procedure_id) in migrations.clone().into_iter().zip(procedure_ids) { - info!("Waits for migration: {migration:?}"); - let region_id = migration.region_id.as_u64(); - wait_condition_fn( - Duration::from_secs(120), - || { - let greptime = ctx.greptime.clone(); - let procedure_id = procedure_id.to_string(); - Box::pin(async move { - { - let output = procedure_state(&greptime, &procedure_id).await; - info!("Checking procedure: {procedure_id}, output: {output}"); - fetch_partition(&greptime, region_id).await.unwrap() - } - }) - }, - |partition| { - info!("Region: {region_id}, datanode: {}", partition.datanode_id); - partition.datanode_id == migration.to_peer - }, - Duration::from_secs(1), - ) - .await; + wait_for_migration(&ctx, &migration, &procedure_id).await; } // Validates value rows @@ -388,29 +390,8 @@ async fn execute_migration(ctx: FuzzContext, input: FuzzInput) -> Result<()> { procedure_ids.push(procedure_id); } info!("Excepted new region distribution: {new_distribution:?}"); - for (migration, procedure_id) in migrations.into_iter().zip(procedure_ids) { - info!("Waits for migration: {migration:?}"); - let region_id = migration.region_id.as_u64(); - wait_condition_fn( - Duration::from_secs(120), - || { - let greptime = ctx.greptime.clone(); - let procedure_id = procedure_id.to_string(); - Box::pin(async move { - { - let output = procedure_state(&greptime, &procedure_id).await; - info!("Checking procedure: {procedure_id}, output: {output}"); - fetch_partition(&greptime, region_id).await.unwrap() - } - }) - }, - |partition| { - info!("Region: {region_id}, datanode: {}", partition.datanode_id); - partition.datanode_id == migration.to_peer - }, - Duration::from_secs(1), - ) - .await; + for (migration, procedure_id) in migrations.clone().into_iter().zip(procedure_ids) { + wait_for_migration(&ctx, &migration, &procedure_id).await; } // Creates more logical tables and inserts values diff --git a/tests-fuzz/targets/migration/fuzz_migrate_mito_regions.rs b/tests-fuzz/targets/migration/fuzz_migrate_mito_regions.rs index 3f15e859c4..12c4cdae49 100644 --- a/tests-fuzz/targets/migration/fuzz_migrate_mito_regions.rs +++ b/tests-fuzz/targets/migration/fuzz_migrate_mito_regions.rs @@ -248,13 +248,13 @@ async fn migrate_regions(ctx: &FuzzContext, migrations: &[Migration]) -> Result< { let output = procedure_state(&greptime, &procedure_id).await; info!("Checking procedure: {procedure_id}, output: {output}"); - fetch_partition(&greptime, region_id).await.unwrap() + (fetch_partition(&greptime, region_id).await.unwrap(), output) } }) }, - |partition| { + |(partition, output)| { info!("Region: {region_id}, datanode: {}", partition.datanode_id); - partition.datanode_id == migration.to_peer + partition.datanode_id == migration.to_peer && output.contains("Done") }, Duration::from_secs(5), ) From 422d18da8bbdaba3b3a9b93bea6ef9bc3b76ab2f Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Thu, 19 Dec 2024 11:42:05 +0800 Subject: [PATCH 46/46] feat: bump opendal and switch prometheus layer to the upstream impl (#5179) * feat: bump opendal and switch prometheus layer to the upstream impl Signed-off-by: Ruihang Xia * remove unused files Signed-off-by: Ruihang Xia * fix tests Signed-off-by: Ruihang Xia * remove unused things Signed-off-by: Ruihang Xia * remove root dir on recovering cache Signed-off-by: Ruihang Xia * filter out non-files entry in test Signed-off-by: Ruihang Xia --------- Signed-off-by: Ruihang Xia --- Cargo.lock | 25 +- src/common/datasource/src/object_store/fs.rs | 2 +- src/common/datasource/src/object_store/s3.rs | 2 +- src/common/procedure/src/local/runner.rs | 8 +- src/datanode/src/error.rs | 15 +- src/datanode/src/store.rs | 5 +- src/file-engine/src/manifest.rs | 2 +- src/file-engine/src/region.rs | 6 +- src/metric-engine/src/test_util.rs | 4 +- src/mito2/src/cache/file_cache.rs | 4 +- src/mito2/src/engine/create_test.rs | 4 +- src/mito2/src/engine/drop_test.rs | 12 +- src/mito2/src/engine/open_test.rs | 4 +- src/mito2/src/manifest/tests/checkpoint.rs | 2 + src/mito2/src/sst/file_purger.rs | 6 +- src/mito2/src/worker/handle_open.rs | 2 +- src/object-store/Cargo.toml | 3 +- src/object-store/src/layers.rs | 33 +- .../src/layers/lru_cache/read_cache.rs | 9 +- src/object-store/src/layers/prometheus.rs | 584 ------------------ src/object-store/src/util.rs | 49 +- src/object-store/tests/object_store_test.rs | 61 +- 22 files changed, 134 insertions(+), 708 deletions(-) delete mode 100644 src/object-store/src/layers/prometheus.rs diff --git a/Cargo.lock b/Cargo.lock index a0225cf27d..fa8ba34d1a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -866,18 +866,6 @@ dependencies = [ "rand", ] -[[package]] -name = "backon" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d67782c3f868daa71d3533538e98a8e13713231969def7536e8039606fc46bf0" -dependencies = [ - "fastrand", - "futures-core", - "pin-project", - "tokio", -] - [[package]] name = "backon" version = "1.2.0" @@ -2228,7 +2216,7 @@ version = "0.12.0" dependencies = [ "async-stream", "async-trait", - "backon 1.2.0", + "backon", "common-base", "common-error", "common-macro", @@ -7386,13 +7374,13 @@ checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" [[package]] name = "opendal" -version = "0.49.2" +version = "0.50.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b04d09b9822c2f75a1d2fc513a2c1279c70e91e7407936fffdf6a6976ec530a" +checksum = "cb28bb6c64e116ceaf8dd4e87099d3cfea4a58e85e62b104fef74c91afba0f44" dependencies = [ "anyhow", "async-trait", - "backon 0.4.4", + "backon", "base64 0.22.1", "bytes", "chrono", @@ -7405,6 +7393,7 @@ dependencies = [ "md-5", "once_cell", "percent-encoding", + "prometheus", "quick-xml 0.36.2", "reqsign", "reqwest", @@ -9387,9 +9376,9 @@ dependencies = [ [[package]] name = "reqsign" -version = "0.16.0" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03dd4ba7c3901dd43e6b8c7446a760d45bc1ea4301002e1a6fa48f97c3a796fa" +checksum = "eb0075a66c8bfbf4cc8b70dca166e722e1f55a3ea9250ecbb85f4d92a5f64149" dependencies = [ "anyhow", "async-trait", diff --git a/src/common/datasource/src/object_store/fs.rs b/src/common/datasource/src/object_store/fs.rs index f87311f517..5ffbbfa314 100644 --- a/src/common/datasource/src/object_store/fs.rs +++ b/src/common/datasource/src/object_store/fs.rs @@ -27,7 +27,7 @@ pub fn build_fs_backend(root: &str) -> Result { DefaultLoggingInterceptor, )) .layer(object_store::layers::TracingLayer) - .layer(object_store::layers::PrometheusMetricsLayer::new(true)) + .layer(object_store::layers::build_prometheus_metrics_layer(true)) .finish(); Ok(object_store) } diff --git a/src/common/datasource/src/object_store/s3.rs b/src/common/datasource/src/object_store/s3.rs index e141621b89..0d83eb7a98 100644 --- a/src/common/datasource/src/object_store/s3.rs +++ b/src/common/datasource/src/object_store/s3.rs @@ -89,7 +89,7 @@ pub fn build_s3_backend( DefaultLoggingInterceptor, )) .layer(object_store::layers::TracingLayer) - .layer(object_store::layers::PrometheusMetricsLayer::new(true)) + .layer(object_store::layers::build_prometheus_metrics_layer(true)) .finish()) } diff --git a/src/common/procedure/src/local/runner.rs b/src/common/procedure/src/local/runner.rs index c2d15001fb..bf277a0e72 100644 --- a/src/common/procedure/src/local/runner.rs +++ b/src/common/procedure/src/local/runner.rs @@ -544,7 +544,7 @@ mod tests { use common_test_util::temp_dir::create_temp_dir; use futures_util::future::BoxFuture; use futures_util::FutureExt; - use object_store::ObjectStore; + use object_store::{EntryMode, ObjectStore}; use tokio::sync::mpsc; use super::*; @@ -578,7 +578,11 @@ mod tests { ) { let dir = proc_path!(procedure_store, "{procedure_id}/"); let lister = object_store.list(&dir).await.unwrap(); - let mut files_in_dir: Vec<_> = lister.into_iter().map(|de| de.name().to_string()).collect(); + let mut files_in_dir: Vec<_> = lister + .into_iter() + .filter(|x| x.metadata().mode() == EntryMode::FILE) + .map(|de| de.name().to_string()) + .collect(); files_in_dir.sort_unstable(); assert_eq!(files, files_in_dir); } diff --git a/src/datanode/src/error.rs b/src/datanode/src/error.rs index 9fbd46e160..61a4eae128 100644 --- a/src/datanode/src/error.rs +++ b/src/datanode/src/error.rs @@ -193,6 +193,14 @@ pub enum Error { location: Location, }, + #[snafu(display("Failed to build http client"))] + BuildHttpClient { + #[snafu(implicit)] + location: Location, + #[snafu(source)] + error: reqwest::Error, + }, + #[snafu(display("Missing required field: {}", name))] MissingRequiredField { name: String, @@ -406,9 +414,10 @@ impl ErrorExt for Error { | MissingKvBackend { .. } | TomlFormat { .. } => StatusCode::InvalidArguments, - PayloadNotExist { .. } | Unexpected { .. } | WatchAsyncTaskChange { .. } => { - StatusCode::Unexpected - } + PayloadNotExist { .. } + | Unexpected { .. } + | WatchAsyncTaskChange { .. } + | BuildHttpClient { .. } => StatusCode::Unexpected, AsyncTaskExecute { source, .. } => source.status_code(), diff --git a/src/datanode/src/store.rs b/src/datanode/src/store.rs index c78afe448e..52a1cba982 100644 --- a/src/datanode/src/store.rs +++ b/src/datanode/src/store.rs @@ -32,7 +32,7 @@ use object_store::{Access, Error, HttpClient, ObjectStore, ObjectStoreBuilder, O use snafu::prelude::*; use crate::config::{HttpClientConfig, ObjectStoreConfig, DEFAULT_OBJECT_STORE_CACHE_SIZE}; -use crate::error::{self, CreateDirSnafu, Result}; +use crate::error::{self, BuildHttpClientSnafu, CreateDirSnafu, Result}; pub(crate) async fn new_raw_object_store( store: &ObjectStoreConfig, @@ -236,7 +236,8 @@ pub(crate) fn build_http_client(config: &HttpClientConfig) -> Result builder.timeout(config.timeout) }; - HttpClient::build(http_builder).context(error::InitBackendSnafu) + let client = http_builder.build().context(BuildHttpClientSnafu)?; + Ok(HttpClient::with(client)) } struct PrintDetailedError; diff --git a/src/file-engine/src/manifest.rs b/src/file-engine/src/manifest.rs index 6310c3ccb9..6bf5ee104b 100644 --- a/src/file-engine/src/manifest.rs +++ b/src/file-engine/src/manifest.rs @@ -46,7 +46,7 @@ impl FileRegionManifest { pub async fn store(&self, region_dir: &str, object_store: &ObjectStore) -> Result<()> { let path = ®ion_manifest_path(region_dir); let exist = object_store - .is_exist(path) + .exists(path) .await .context(CheckObjectSnafu { path })?; ensure!(!exist, ManifestExistsSnafu { path }); diff --git a/src/file-engine/src/region.rs b/src/file-engine/src/region.rs index a5af682228..673d352b1e 100644 --- a/src/file-engine/src/region.rs +++ b/src/file-engine/src/region.rs @@ -130,7 +130,7 @@ mod tests { assert_eq!(region.metadata.primary_key, vec![1]); assert!(object_store - .is_exist("create_region_dir/manifest/_file_manifest") + .exists("create_region_dir/manifest/_file_manifest") .await .unwrap()); @@ -198,13 +198,13 @@ mod tests { .unwrap(); assert!(object_store - .is_exist("drop_region_dir/manifest/_file_manifest") + .exists("drop_region_dir/manifest/_file_manifest") .await .unwrap()); FileRegion::drop(®ion, &object_store).await.unwrap(); assert!(!object_store - .is_exist("drop_region_dir/manifest/_file_manifest") + .exists("drop_region_dir/manifest/_file_manifest") .await .unwrap()); diff --git a/src/metric-engine/src/test_util.rs b/src/metric-engine/src/test_util.rs index c5f7a2b4a3..d0f8cf5028 100644 --- a/src/metric-engine/src/test_util.rs +++ b/src/metric-engine/src/test_util.rs @@ -313,12 +313,12 @@ mod test { let region_dir = "test_metric_region"; // assert metadata region's dir let metadata_region_dir = join_dir(region_dir, METADATA_REGION_SUBDIR); - let exist = object_store.is_exist(&metadata_region_dir).await.unwrap(); + let exist = object_store.exists(&metadata_region_dir).await.unwrap(); assert!(exist); // assert data region's dir let data_region_dir = join_dir(region_dir, DATA_REGION_SUBDIR); - let exist = object_store.is_exist(&data_region_dir).await.unwrap(); + let exist = object_store.exists(&data_region_dir).await.unwrap(); assert!(exist); // check mito engine diff --git a/src/mito2/src/cache/file_cache.rs b/src/mito2/src/cache/file_cache.rs index 9e5742ca04..eb112530ca 100644 --- a/src/mito2/src/cache/file_cache.rs +++ b/src/mito2/src/cache/file_cache.rs @@ -286,7 +286,7 @@ impl FileCache { } async fn get_reader(&self, file_path: &str) -> object_store::Result> { - if self.local_store.is_exist(file_path).await? { + if self.local_store.exists(file_path).await? { Ok(Some(self.local_store.reader(file_path).await?)) } else { Ok(None) @@ -480,7 +480,7 @@ mod tests { cache.memory_index.run_pending_tasks().await; // The file also not exists. - assert!(!local_store.is_exist(&file_path).await.unwrap()); + assert!(!local_store.exists(&file_path).await.unwrap()); assert_eq!(0, cache.memory_index.weighted_size()); } diff --git a/src/mito2/src/engine/create_test.rs b/src/mito2/src/engine/create_test.rs index 48b04dc86d..4bcc559340 100644 --- a/src/mito2/src/engine/create_test.rs +++ b/src/mito2/src/engine/create_test.rs @@ -192,12 +192,12 @@ async fn test_engine_create_with_custom_store() { assert!(object_store_manager .find("Gcs") .unwrap() - .is_exist(region_dir) + .exists(region_dir) .await .unwrap()); assert!(!object_store_manager .default_object_store() - .is_exist(region_dir) + .exists(region_dir) .await .unwrap()); } diff --git a/src/mito2/src/engine/drop_test.rs b/src/mito2/src/engine/drop_test.rs index 7d719f778b..5d0c5afbf0 100644 --- a/src/mito2/src/engine/drop_test.rs +++ b/src/mito2/src/engine/drop_test.rs @@ -71,7 +71,7 @@ async fn test_engine_drop_region() { assert!(!env .get_object_store() .unwrap() - .is_exist(&join_path(®ion_dir, DROPPING_MARKER_FILE)) + .exists(&join_path(®ion_dir, DROPPING_MARKER_FILE)) .await .unwrap()); @@ -93,7 +93,7 @@ async fn test_engine_drop_region() { listener.wait().await; let object_store = env.get_object_store().unwrap(); - assert!(!object_store.is_exist(®ion_dir).await.unwrap()); + assert!(!object_store.exists(®ion_dir).await.unwrap()); } #[tokio::test] @@ -167,13 +167,13 @@ async fn test_engine_drop_region_for_custom_store() { assert!(object_store_manager .find("Gcs") .unwrap() - .is_exist(&custom_region_dir) + .exists(&custom_region_dir) .await .unwrap()); assert!(object_store_manager .find("default") .unwrap() - .is_exist(&global_region_dir) + .exists(&global_region_dir) .await .unwrap()); @@ -190,13 +190,13 @@ async fn test_engine_drop_region_for_custom_store() { assert!(!object_store_manager .find("Gcs") .unwrap() - .is_exist(&custom_region_dir) + .exists(&custom_region_dir) .await .unwrap()); assert!(object_store_manager .find("default") .unwrap() - .is_exist(&global_region_dir) + .exists(&global_region_dir) .await .unwrap()); } diff --git a/src/mito2/src/engine/open_test.rs b/src/mito2/src/engine/open_test.rs index 6752bbd04b..a3b51514c2 100644 --- a/src/mito2/src/engine/open_test.rs +++ b/src/mito2/src/engine/open_test.rs @@ -228,13 +228,13 @@ async fn test_engine_region_open_with_custom_store() { let object_store_manager = env.get_object_store_manager().unwrap(); assert!(!object_store_manager .default_object_store() - .is_exist(region.access_layer.region_dir()) + .exists(region.access_layer.region_dir()) .await .unwrap()); assert!(object_store_manager .find("Gcs") .unwrap() - .is_exist(region.access_layer.region_dir()) + .exists(region.access_layer.region_dir()) .await .unwrap()); } diff --git a/src/mito2/src/manifest/tests/checkpoint.rs b/src/mito2/src/manifest/tests/checkpoint.rs index 692f40422b..6f2c92bc5e 100644 --- a/src/mito2/src/manifest/tests/checkpoint.rs +++ b/src/mito2/src/manifest/tests/checkpoint.rs @@ -84,6 +84,7 @@ async fn manager_without_checkpoint() { // check files let mut expected = vec![ + "/", "00000000000000000010.json", "00000000000000000009.json", "00000000000000000008.json", @@ -130,6 +131,7 @@ async fn manager_with_checkpoint_distance_1() { // check files let mut expected = vec![ + "/", "00000000000000000009.checkpoint", "00000000000000000010.checkpoint", "00000000000000000010.json", diff --git a/src/mito2/src/sst/file_purger.rs b/src/mito2/src/sst/file_purger.rs index 76c7a71503..81251c91a5 100644 --- a/src/mito2/src/sst/file_purger.rs +++ b/src/mito2/src/sst/file_purger.rs @@ -185,7 +185,7 @@ mod tests { scheduler.stop(true).await.unwrap(); - assert!(!object_store.is_exist(&path).await.unwrap()); + assert!(!object_store.exists(&path).await.unwrap()); } #[tokio::test] @@ -247,7 +247,7 @@ mod tests { scheduler.stop(true).await.unwrap(); - assert!(!object_store.is_exist(&path).await.unwrap()); - assert!(!object_store.is_exist(&index_path).await.unwrap()); + assert!(!object_store.exists(&path).await.unwrap()); + assert!(!object_store.exists(&index_path).await.unwrap()); } } diff --git a/src/mito2/src/worker/handle_open.rs b/src/mito2/src/worker/handle_open.rs index d4a13a1345..01eaf17652 100644 --- a/src/mito2/src/worker/handle_open.rs +++ b/src/mito2/src/worker/handle_open.rs @@ -51,7 +51,7 @@ impl RegionWorkerLoop { // Check if this region is pending drop. And clean the entire dir if so. if !self.dropping_regions.is_region_exists(region_id) && object_store - .is_exist(&join_path(&request.region_dir, DROPPING_MARKER_FILE)) + .exists(&join_path(&request.region_dir, DROPPING_MARKER_FILE)) .await .context(OpenDalSnafu)? { diff --git a/src/object-store/Cargo.toml b/src/object-store/Cargo.toml index 72e0e2bfbe..b82be7376a 100644 --- a/src/object-store/Cargo.toml +++ b/src/object-store/Cargo.toml @@ -17,8 +17,9 @@ futures.workspace = true lazy_static.workspace = true md5 = "0.7" moka = { workspace = true, features = ["future"] } -opendal = { version = "0.49", features = [ +opendal = { version = "0.50", features = [ "layers-tracing", + "layers-prometheus", "services-azblob", "services-fs", "services-gcs", diff --git a/src/object-store/src/layers.rs b/src/object-store/src/layers.rs index b2145aa6b0..20108ab63c 100644 --- a/src/object-store/src/layers.rs +++ b/src/object-store/src/layers.rs @@ -13,8 +13,37 @@ // limitations under the License. mod lru_cache; -mod prometheus; pub use lru_cache::*; pub use opendal::layers::*; -pub use prometheus::PrometheusMetricsLayer; +pub use prometheus::build_prometheus_metrics_layer; + +mod prometheus { + use std::sync::{Mutex, OnceLock}; + + use opendal::layers::PrometheusLayer; + + static PROMETHEUS_LAYER: OnceLock> = OnceLock::new(); + + pub fn build_prometheus_metrics_layer(with_path_label: bool) -> PrometheusLayer { + PROMETHEUS_LAYER + .get_or_init(|| { + // This logical tries to extract parent path from the object storage operation + // the function also relies on assumption that the region path is built from + // pattern `/catalog/schema/table_id/....` + // + // We'll get the data/catalog/schema from path. + let path_level = if with_path_label { 3 } else { 0 }; + + let layer = PrometheusLayer::builder() + .path_label(path_level) + .register_default() + .unwrap(); + + Mutex::new(layer) + }) + .lock() + .unwrap() + .clone() + } +} diff --git a/src/object-store/src/layers/lru_cache/read_cache.rs b/src/object-store/src/layers/lru_cache/read_cache.rs index f88b36784d..874b17280d 100644 --- a/src/object-store/src/layers/lru_cache/read_cache.rs +++ b/src/object-store/src/layers/lru_cache/read_cache.rs @@ -156,9 +156,12 @@ impl ReadCache { let size = entry.metadata().content_length(); OBJECT_STORE_LRU_CACHE_ENTRIES.inc(); OBJECT_STORE_LRU_CACHE_BYTES.add(size as i64); - self.mem_cache - .insert(read_key.to_string(), ReadResult::Success(size as u32)) - .await; + // ignore root path + if entry.path() != "/" { + self.mem_cache + .insert(read_key.to_string(), ReadResult::Success(size as u32)) + .await; + } } Ok(self.cache_stat().await) diff --git a/src/object-store/src/layers/prometheus.rs b/src/object-store/src/layers/prometheus.rs deleted file mode 100644 index fef83a9146..0000000000 --- a/src/object-store/src/layers/prometheus.rs +++ /dev/null @@ -1,584 +0,0 @@ -// Copyright 2023 Greptime Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! code originally from , make a tiny change to avoid crash in multi thread env - -use std::fmt::{Debug, Formatter}; - -use common_telemetry::debug; -use lazy_static::lazy_static; -use opendal::raw::*; -use opendal::{Buffer, ErrorKind}; -use prometheus::{ - exponential_buckets, histogram_opts, register_histogram_vec, register_int_counter_vec, - Histogram, HistogramTimer, HistogramVec, IntCounterVec, -}; - -use crate::util::extract_parent_path; - -type Result = std::result::Result; - -lazy_static! { - static ref REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!( - "opendal_requests_total", - "Total times of all kinds of operation being called", - &["scheme", "operation", "path"], - ) - .unwrap(); - static ref REQUESTS_DURATION_SECONDS: HistogramVec = register_histogram_vec!( - histogram_opts!( - "opendal_requests_duration_seconds", - "Histogram of the time spent on specific operation", - exponential_buckets(0.01, 2.0, 16).unwrap() - ), - &["scheme", "operation", "path"] - ) - .unwrap(); - static ref BYTES_TOTAL: HistogramVec = register_histogram_vec!( - histogram_opts!( - "opendal_bytes_total", - "Total size of sync or async Read/Write", - exponential_buckets(0.01, 2.0, 16).unwrap() - ), - &["scheme", "operation", "path"] - ) - .unwrap(); -} - -#[inline] -fn increment_errors_total(op: Operation, kind: ErrorKind) { - debug!( - "Prometheus statistics metrics error, operation {} error {}", - op.into_static(), - kind.into_static() - ); -} - -/// Please refer to [prometheus](https://docs.rs/prometheus) for every operation. -/// -/// # Prometheus Metrics -/// -/// In this section, we will introduce three metrics that are currently being exported by opendal. These metrics are essential for understanding the behavior and performance of opendal. -/// -/// -/// | Metric Name | Type | Description | Labels | -/// |-----------------------------------|-----------|------------------------------------------------------|---------------------| -/// | opendal_requests_total | Counter | Total times of all kinds of operation being called | scheme, operation | -/// | opendal_requests_duration_seconds | Histogram | Histogram of the time spent on specific operation | scheme, operation | -/// | opendal_bytes_total | Histogram | Total size of sync or async Read/Write | scheme, operation | -/// -/// For a more detailed explanation of these metrics and how they are used, please refer to the [Prometheus documentation](https://prometheus.io/docs/introduction/overview/). -/// -/// # Histogram Configuration -/// -/// The metric buckets for these histograms are automatically generated based on the `exponential_buckets(0.01, 2.0, 16)` configuration. -#[derive(Default, Debug, Clone)] -pub struct PrometheusMetricsLayer { - pub path_label: bool, -} - -impl PrometheusMetricsLayer { - pub fn new(path_label: bool) -> Self { - Self { path_label } - } -} - -impl Layer for PrometheusMetricsLayer { - type LayeredAccess = PrometheusAccess; - - fn layer(&self, inner: A) -> Self::LayeredAccess { - let meta = inner.info(); - let scheme = meta.scheme(); - - PrometheusAccess { - inner, - scheme: scheme.to_string(), - path_label: self.path_label, - } - } -} - -#[derive(Clone)] -pub struct PrometheusAccess { - inner: A, - scheme: String, - path_label: bool, -} - -impl PrometheusAccess { - fn get_path_label<'a>(&self, path: &'a str) -> &'a str { - if self.path_label { - extract_parent_path(path) - } else { - "" - } - } -} - -impl Debug for PrometheusAccess { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("PrometheusAccessor") - .field("inner", &self.inner) - .finish_non_exhaustive() - } -} - -impl LayeredAccess for PrometheusAccess { - type Inner = A; - type Reader = PrometheusMetricWrapper; - type BlockingReader = PrometheusMetricWrapper; - type Writer = PrometheusMetricWrapper; - type BlockingWriter = PrometheusMetricWrapper; - type Lister = A::Lister; - type BlockingLister = A::BlockingLister; - - fn inner(&self) -> &Self::Inner { - &self.inner - } - - async fn create_dir(&self, path: &str, args: OpCreateDir) -> Result { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::CreateDir.into_static(), path_label]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::CreateDir.into_static(), path_label]) - .start_timer(); - let create_res = self.inner.create_dir(path, args).await; - - timer.observe_duration(); - create_res.inspect_err(|e| { - increment_errors_total(Operation::CreateDir, e.kind()); - }) - } - - async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::Read.into_static(), path_label]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::Read.into_static(), path_label]) - .start_timer(); - - let (rp, r) = self.inner.read(path, args).await.inspect_err(|e| { - increment_errors_total(Operation::Read, e.kind()); - })?; - - Ok(( - rp, - PrometheusMetricWrapper::new( - r, - Operation::Read, - BYTES_TOTAL.with_label_values(&[ - &self.scheme, - Operation::Read.into_static(), - path_label, - ]), - timer, - ), - )) - } - - async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::Write.into_static(), path_label]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::Write.into_static(), path_label]) - .start_timer(); - - let (rp, r) = self.inner.write(path, args).await.inspect_err(|e| { - increment_errors_total(Operation::Write, e.kind()); - })?; - - Ok(( - rp, - PrometheusMetricWrapper::new( - r, - Operation::Write, - BYTES_TOTAL.with_label_values(&[ - &self.scheme, - Operation::Write.into_static(), - path_label, - ]), - timer, - ), - )) - } - - async fn stat(&self, path: &str, args: OpStat) -> Result { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::Stat.into_static(), path_label]) - .inc(); - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::Stat.into_static(), path_label]) - .start_timer(); - - let stat_res = self.inner.stat(path, args).await; - timer.observe_duration(); - stat_res.inspect_err(|e| { - increment_errors_total(Operation::Stat, e.kind()); - }) - } - - async fn delete(&self, path: &str, args: OpDelete) -> Result { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::Delete.into_static(), path_label]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::Delete.into_static(), path_label]) - .start_timer(); - - let delete_res = self.inner.delete(path, args).await; - timer.observe_duration(); - delete_res.inspect_err(|e| { - increment_errors_total(Operation::Delete, e.kind()); - }) - } - - async fn list(&self, path: &str, args: OpList) -> Result<(RpList, Self::Lister)> { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::List.into_static(), path_label]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::List.into_static(), path_label]) - .start_timer(); - - let list_res = self.inner.list(path, args).await; - - timer.observe_duration(); - list_res.inspect_err(|e| { - increment_errors_total(Operation::List, e.kind()); - }) - } - - async fn batch(&self, args: OpBatch) -> Result { - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::Batch.into_static(), ""]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::Batch.into_static(), ""]) - .start_timer(); - let result = self.inner.batch(args).await; - - timer.observe_duration(); - result.inspect_err(|e| { - increment_errors_total(Operation::Batch, e.kind()); - }) - } - - async fn presign(&self, path: &str, args: OpPresign) -> Result { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::Presign.into_static(), path_label]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::Presign.into_static(), path_label]) - .start_timer(); - let result = self.inner.presign(path, args).await; - timer.observe_duration(); - - result.inspect_err(|e| { - increment_errors_total(Operation::Presign, e.kind()); - }) - } - - fn blocking_create_dir(&self, path: &str, args: OpCreateDir) -> Result { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[ - &self.scheme, - Operation::BlockingCreateDir.into_static(), - path_label, - ]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[ - &self.scheme, - Operation::BlockingCreateDir.into_static(), - path_label, - ]) - .start_timer(); - let result = self.inner.blocking_create_dir(path, args); - - timer.observe_duration(); - - result.inspect_err(|e| { - increment_errors_total(Operation::BlockingCreateDir, e.kind()); - }) - } - - fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[ - &self.scheme, - Operation::BlockingRead.into_static(), - path_label, - ]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[ - &self.scheme, - Operation::BlockingRead.into_static(), - path_label, - ]) - .start_timer(); - - self.inner - .blocking_read(path, args) - .map(|(rp, r)| { - ( - rp, - PrometheusMetricWrapper::new( - r, - Operation::BlockingRead, - BYTES_TOTAL.with_label_values(&[ - &self.scheme, - Operation::BlockingRead.into_static(), - path_label, - ]), - timer, - ), - ) - }) - .inspect_err(|e| { - increment_errors_total(Operation::BlockingRead, e.kind()); - }) - } - - fn blocking_write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::BlockingWriter)> { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[ - &self.scheme, - Operation::BlockingWrite.into_static(), - path_label, - ]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[ - &self.scheme, - Operation::BlockingWrite.into_static(), - path_label, - ]) - .start_timer(); - - self.inner - .blocking_write(path, args) - .map(|(rp, r)| { - ( - rp, - PrometheusMetricWrapper::new( - r, - Operation::BlockingWrite, - BYTES_TOTAL.with_label_values(&[ - &self.scheme, - Operation::BlockingWrite.into_static(), - path_label, - ]), - timer, - ), - ) - }) - .inspect_err(|e| { - increment_errors_total(Operation::BlockingWrite, e.kind()); - }) - } - - fn blocking_stat(&self, path: &str, args: OpStat) -> Result { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[ - &self.scheme, - Operation::BlockingStat.into_static(), - path_label, - ]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[ - &self.scheme, - Operation::BlockingStat.into_static(), - path_label, - ]) - .start_timer(); - let result = self.inner.blocking_stat(path, args); - timer.observe_duration(); - result.inspect_err(|e| { - increment_errors_total(Operation::BlockingStat, e.kind()); - }) - } - - fn blocking_delete(&self, path: &str, args: OpDelete) -> Result { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[ - &self.scheme, - Operation::BlockingDelete.into_static(), - path_label, - ]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[ - &self.scheme, - Operation::BlockingDelete.into_static(), - path_label, - ]) - .start_timer(); - let result = self.inner.blocking_delete(path, args); - timer.observe_duration(); - - result.inspect_err(|e| { - increment_errors_total(Operation::BlockingDelete, e.kind()); - }) - } - - fn blocking_list(&self, path: &str, args: OpList) -> Result<(RpList, Self::BlockingLister)> { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[ - &self.scheme, - Operation::BlockingList.into_static(), - path_label, - ]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[ - &self.scheme, - Operation::BlockingList.into_static(), - path_label, - ]) - .start_timer(); - let result = self.inner.blocking_list(path, args); - timer.observe_duration(); - - result.inspect_err(|e| { - increment_errors_total(Operation::BlockingList, e.kind()); - }) - } -} - -pub struct PrometheusMetricWrapper { - inner: R, - - op: Operation, - bytes_counter: Histogram, - _requests_duration_timer: HistogramTimer, - bytes: u64, -} - -impl Drop for PrometheusMetricWrapper { - fn drop(&mut self) { - self.bytes_counter.observe(self.bytes as f64); - } -} - -impl PrometheusMetricWrapper { - fn new( - inner: R, - op: Operation, - bytes_counter: Histogram, - requests_duration_timer: HistogramTimer, - ) -> Self { - Self { - inner, - op, - bytes_counter, - _requests_duration_timer: requests_duration_timer, - bytes: 0, - } - } -} - -impl oio::Read for PrometheusMetricWrapper { - async fn read(&mut self) -> Result { - self.inner.read().await.inspect_err(|err| { - increment_errors_total(self.op, err.kind()); - }) - } -} - -impl oio::BlockingRead for PrometheusMetricWrapper { - fn read(&mut self) -> opendal::Result { - self.inner.read().inspect_err(|err| { - increment_errors_total(self.op, err.kind()); - }) - } -} - -impl oio::Write for PrometheusMetricWrapper { - async fn write(&mut self, bs: Buffer) -> Result<()> { - let bytes = bs.len(); - match self.inner.write(bs).await { - Ok(_) => { - self.bytes += bytes as u64; - Ok(()) - } - Err(err) => { - increment_errors_total(self.op, err.kind()); - Err(err) - } - } - } - - async fn close(&mut self) -> Result<()> { - self.inner.close().await.inspect_err(|err| { - increment_errors_total(self.op, err.kind()); - }) - } - - async fn abort(&mut self) -> Result<()> { - self.inner.close().await.inspect_err(|err| { - increment_errors_total(self.op, err.kind()); - }) - } -} - -impl oio::BlockingWrite for PrometheusMetricWrapper { - fn write(&mut self, bs: Buffer) -> Result<()> { - let bytes = bs.len(); - self.inner - .write(bs) - .map(|_| { - self.bytes += bytes as u64; - }) - .inspect_err(|err| { - increment_errors_total(self.op, err.kind()); - }) - } - - fn close(&mut self) -> Result<()> { - self.inner.close().inspect_err(|err| { - increment_errors_total(self.op, err.kind()); - }) - } -} diff --git a/src/object-store/src/util.rs b/src/object-store/src/util.rs index fc0a031ab9..271da33e85 100644 --- a/src/object-store/src/util.rs +++ b/src/object-store/src/util.rs @@ -15,19 +15,12 @@ use std::fmt::Display; use common_telemetry::{debug, error, trace}; -use futures::TryStreamExt; use opendal::layers::{LoggingInterceptor, LoggingLayer, TracingLayer}; use opendal::raw::{AccessorInfo, Operation}; -use opendal::{Entry, ErrorKind, Lister}; +use opendal::ErrorKind; -use crate::layers::PrometheusMetricsLayer; use crate::ObjectStore; -/// Collect all entries from the [Lister]. -pub async fn collect(stream: Lister) -> Result, opendal::Error> { - stream.try_collect::>().await -} - /// Join two paths and normalize the output dir. /// /// The output dir is always ends with `/`. e.g. @@ -127,26 +120,12 @@ pub fn normalize_path(path: &str) -> String { p } -// This logical tries to extract parent path from the object storage operation -// the function also relies on assumption that the region path is built from -// pattern `/catalog/schema/table_id/....` -// -// this implementation tries to extract at most 3 levels of parent path -pub(crate) fn extract_parent_path(path: &str) -> &str { - // split the path into `catalog`, `schema` and others - path.char_indices() - .filter(|&(_, c)| c == '/') - // we get the data/catalog/schema from path, split at the 3rd / - .nth(2) - .map_or(path, |(i, _)| &path[..i]) -} - /// Attaches instrument layers to the object store. pub fn with_instrument_layers(object_store: ObjectStore, path_label: bool) -> ObjectStore { object_store .layer(LoggingLayer::new(DefaultLoggingInterceptor)) .layer(TracingLayer) - .layer(PrometheusMetricsLayer::new(path_label)) + .layer(crate::layers::build_prometheus_metrics_layer(path_label)) } static LOGGING_TARGET: &str = "opendal::services"; @@ -263,28 +242,4 @@ mod tests { assert_eq!("/abc", join_path("//", "/abc")); assert_eq!("abc/def", join_path("abc/", "//def")); } - - #[test] - fn test_path_extraction() { - assert_eq!( - "data/greptime/public", - extract_parent_path("data/greptime/public/1024/1024_0000000000/") - ); - - assert_eq!( - "data/greptime/public", - extract_parent_path("data/greptime/public/1/") - ); - - assert_eq!( - "data/greptime/public", - extract_parent_path("data/greptime/public") - ); - - assert_eq!("data/greptime/", extract_parent_path("data/greptime/")); - - assert_eq!("data/", extract_parent_path("data/")); - - assert_eq!("/", extract_parent_path("/")); - } } diff --git a/src/object-store/tests/object_store_test.rs b/src/object-store/tests/object_store_test.rs index 497decffab..7e81b965fb 100644 --- a/src/object-store/tests/object_store_test.rs +++ b/src/object-store/tests/object_store_test.rs @@ -65,23 +65,38 @@ async fn test_object_list(store: &ObjectStore) -> Result<()> { store.write(p3, "Hello, object3!").await?; // List objects - let entries = store.list("/").await?; + let entries = store + .list("/") + .await? + .into_iter() + .filter(|x| x.metadata().mode() == EntryMode::FILE) + .collect::>(); assert_eq!(3, entries.len()); store.delete(p1).await?; store.delete(p3).await?; // List objects again - // Only o2 is exists - let entries = store.list("/").await?; + // Only o2 and root exist + let entries = store + .list("/") + .await? + .into_iter() + .filter(|x| x.metadata().mode() == EntryMode::FILE) + .collect::>(); assert_eq!(1, entries.len()); - assert_eq!(p2, entries.first().unwrap().path()); + assert_eq!(p2, entries[0].path()); let content = store.read(p2).await?; assert_eq!("Hello, object2!", String::from_utf8(content.to_vec())?); store.delete(p2).await?; - let entries = store.list("/").await?; + let entries = store + .list("/") + .await? + .into_iter() + .filter(|x| x.metadata().mode() == EntryMode::FILE) + .collect::>(); assert!(entries.is_empty()); assert!(store.read(p1).await.is_err()); @@ -252,7 +267,7 @@ async fn test_file_backend_with_lru_cache() -> Result<()> { async fn assert_lru_cache(cache_layer: &LruCacheLayer, file_names: &[&str]) { for file_name in file_names { - assert!(cache_layer.contains_file(file_name).await); + assert!(cache_layer.contains_file(file_name).await, "{file_name}"); } } @@ -264,7 +279,9 @@ async fn assert_cache_files( let (_, mut lister) = store.list("/", OpList::default()).await?; let mut objects = vec![]; while let Some(e) = lister.next().await? { - objects.push(e); + if e.mode() == EntryMode::FILE { + objects.push(e); + } } // compare the cache file with the expected cache file; ignore orders @@ -332,9 +349,9 @@ async fn test_object_store_cache_policy() -> Result<()> { assert_cache_files( &cache_store, &[ - "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14", - "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=7-14", - "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=0-14", + "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-", + "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=7-", + "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=0-", ], &["Hello, object1!", "object2!", "Hello, object2!"], ) @@ -342,9 +359,9 @@ async fn test_object_store_cache_policy() -> Result<()> { assert_lru_cache( &cache_layer, &[ - "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14", - "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=7-14", - "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=0-14", + "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-", + "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=7-", + "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=0-", ], ) .await; @@ -355,13 +372,13 @@ async fn test_object_store_cache_policy() -> Result<()> { assert_eq!(cache_layer.read_cache_stat().await, (1, 15)); assert_cache_files( &cache_store, - &["6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14"], + &["6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-"], &["Hello, object1!"], ) .await?; assert_lru_cache( &cache_layer, - &["6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14"], + &["6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-"], ) .await; @@ -388,8 +405,8 @@ async fn test_object_store_cache_policy() -> Result<()> { assert_cache_files( &cache_store, &[ - "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14", - "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-14", + "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-", + "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-", "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4", ], &["Hello, object1!", "Hello, object3!", "Hello"], @@ -398,8 +415,8 @@ async fn test_object_store_cache_policy() -> Result<()> { assert_lru_cache( &cache_layer, &[ - "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14", - "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-14", + "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-", + "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-", "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4", ], ) @@ -416,7 +433,7 @@ async fn test_object_store_cache_policy() -> Result<()> { &cache_store, &[ "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=1-14", - "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-14", + "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-", "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4", ], &["ello, object1!", "Hello, object3!", "Hello"], @@ -426,7 +443,7 @@ async fn test_object_store_cache_policy() -> Result<()> { &cache_layer, &[ "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=1-14", - "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-14", + "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-", "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4", ], ) @@ -448,7 +465,7 @@ async fn test_object_store_cache_policy() -> Result<()> { &cache_layer, &[ "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=1-14", - "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-14", + "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-", "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4", ], )