Compare commits

..

1 Commits

Author SHA1 Message Date
discord9
3d17d195a3 feat: flownode to frontend load balance with guess 2025-06-08 14:17:32 +08:00
90 changed files with 742 additions and 2751 deletions

View File

@@ -64,11 +64,11 @@ inputs:
upload-max-retry-times:
description: Max retry times for uploading artifacts to S3
required: false
default: "30"
default: "20"
upload-retry-timeout:
description: Timeout for uploading artifacts to S3
required: false
default: "120" # minutes
default: "30" # minutes
runs:
using: composite
steps:

778
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -30,7 +30,6 @@ members = [
"src/common/recordbatch",
"src/common/runtime",
"src/common/session",
"src/common/stat",
"src/common/substrait",
"src/common/telemetry",
"src/common/test-util",
@@ -149,7 +148,6 @@ meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev =
mockall = "0.13"
moka = "0.12"
nalgebra = "0.33"
nix = { version = "0.30.1", default-features = false, features = ["event", "fs", "process"] }
notify = "8.0"
num_cpus = "1.16"
object_store_opendal = "0.50"
@@ -289,7 +287,6 @@ query = { path = "src/query" }
servers = { path = "src/servers" }
session = { path = "src/session" }
sql = { path = "src/sql" }
stat = { path = "src/common/stat" }
store-api = { path = "src/store-api" }
substrait = { path = "src/common/substrait" }
table = { path = "src/table" }

View File

@@ -19,11 +19,9 @@ use std::time::Duration;
use async_trait::async_trait;
use clap::{Parser, ValueEnum};
use common_base::secrets::{ExposeSecret, SecretString};
use common_error::ext::BoxedError;
use common_telemetry::{debug, error, info};
use object_store::layers::LoggingLayer;
use object_store::services::Oss;
use object_store::{services, ObjectStore};
use serde_json::Value;
use snafu::{OptionExt, ResultExt};
@@ -112,15 +110,15 @@ pub struct ExportCommand {
#[clap(long)]
s3: bool,
/// if both `ddl_local_dir` and remote storage (s3/oss) are set, `ddl_local_dir` will be only used for
/// exported SQL files, and the data will be exported to remote storage.
/// if both `s3_ddl_local_dir` and `s3` are set, `s3_ddl_local_dir` will be only used for
/// exported SQL files, and the data will be exported to s3.
///
/// Note that `ddl_local_dir` export sql files to **LOCAL** file system, this is useful if export client don't have
/// direct access to remote storage.
/// Note that `s3_ddl_local_dir` export sql files to **LOCAL** file system, this is useful if export client don't have
/// direct access to s3.
///
/// if remote storage is set but `ddl_local_dir` is not set, both SQL&data will be exported to remote storage.
/// if `s3` is set but `s3_ddl_local_dir` is not set, both SQL&data will be exported to s3.
#[clap(long)]
ddl_local_dir: Option<String>,
s3_ddl_local_dir: Option<String>,
/// The s3 bucket name
/// if s3 is set, this is required
@@ -151,30 +149,6 @@ pub struct ExportCommand {
/// if s3 is set, this is required
#[clap(long)]
s3_region: Option<String>,
/// if export data to oss
#[clap(long)]
oss: bool,
/// The oss bucket name
/// if oss is set, this is required
#[clap(long)]
oss_bucket: Option<String>,
/// The oss endpoint
/// if oss is set, this is required
#[clap(long)]
oss_endpoint: Option<String>,
/// The oss access key id
/// if oss is set, this is required
#[clap(long)]
oss_access_key_id: Option<String>,
/// The oss access key secret
/// if oss is set, this is required
#[clap(long)]
oss_access_key_secret: Option<String>,
}
impl ExportCommand {
@@ -188,7 +162,7 @@ impl ExportCommand {
{
return Err(BoxedError::new(S3ConfigNotSetSnafu {}.build()));
}
if !self.s3 && !self.oss && self.output_dir.is_none() {
if !self.s3 && self.output_dir.is_none() {
return Err(BoxedError::new(OutputDirNotSetSnafu {}.build()));
}
let (catalog, schema) =
@@ -213,32 +187,13 @@ impl ExportCommand {
start_time: self.start_time.clone(),
end_time: self.end_time.clone(),
s3: self.s3,
ddl_local_dir: self.ddl_local_dir.clone(),
s3_ddl_local_dir: self.s3_ddl_local_dir.clone(),
s3_bucket: self.s3_bucket.clone(),
s3_root: self.s3_root.clone(),
s3_endpoint: self.s3_endpoint.clone(),
// Wrap sensitive values in SecretString
s3_access_key: self
.s3_access_key
.as_ref()
.map(|k| SecretString::from(k.clone())),
s3_secret_key: self
.s3_secret_key
.as_ref()
.map(|k| SecretString::from(k.clone())),
s3_access_key: self.s3_access_key.clone(),
s3_secret_key: self.s3_secret_key.clone(),
s3_region: self.s3_region.clone(),
oss: self.oss,
oss_bucket: self.oss_bucket.clone(),
oss_endpoint: self.oss_endpoint.clone(),
// Wrap sensitive values in SecretString
oss_access_key_id: self
.oss_access_key_id
.as_ref()
.map(|k| SecretString::from(k.clone())),
oss_access_key_secret: self
.oss_access_key_secret
.as_ref()
.map(|k| SecretString::from(k.clone())),
}))
}
}
@@ -254,30 +209,23 @@ pub struct Export {
start_time: Option<String>,
end_time: Option<String>,
s3: bool,
ddl_local_dir: Option<String>,
s3_ddl_local_dir: Option<String>,
s3_bucket: Option<String>,
s3_root: Option<String>,
s3_endpoint: Option<String>,
// Changed to SecretString for sensitive data
s3_access_key: Option<SecretString>,
s3_secret_key: Option<SecretString>,
s3_access_key: Option<String>,
s3_secret_key: Option<String>,
s3_region: Option<String>,
oss: bool,
oss_bucket: Option<String>,
oss_endpoint: Option<String>,
// Changed to SecretString for sensitive data
oss_access_key_id: Option<SecretString>,
oss_access_key_secret: Option<SecretString>,
}
impl Export {
fn catalog_path(&self) -> PathBuf {
if self.s3 || self.oss {
if self.s3 {
PathBuf::from(&self.catalog)
} else if let Some(dir) = &self.output_dir {
PathBuf::from(dir).join(&self.catalog)
} else {
unreachable!("catalog_path: output_dir must be set when not using remote storage")
unreachable!("catalog_path: output_dir must be set when not using s3")
}
}
@@ -479,7 +427,7 @@ impl Export {
.await?;
// Create directory if needed for file system storage
if !export_self.s3 && !export_self.oss {
if !export_self.s3 {
let db_dir = format!("{}/{}/", export_self.catalog, schema);
operator.create_dir(&db_dir).await.context(OpenDalSnafu)?;
}
@@ -525,8 +473,6 @@ impl Export {
async fn build_operator(&self) -> Result<ObjectStore> {
if self.s3 {
self.build_s3_operator().await
} else if self.oss {
self.build_oss_operator().await
} else {
self.build_fs_operator().await
}
@@ -534,8 +480,9 @@ impl Export {
/// build operator with preference for file system
async fn build_prefer_fs_operator(&self) -> Result<ObjectStore> {
if (self.s3 || self.oss) && self.ddl_local_dir.is_some() {
let root = self.ddl_local_dir.as_ref().unwrap().clone();
// is under s3 mode and s3_ddl_dir is set, use it as root
if self.s3 && self.s3_ddl_local_dir.is_some() {
let root = self.s3_ddl_local_dir.as_ref().unwrap().clone();
let op = ObjectStore::new(services::Fs::default().root(&root))
.context(OpenDalSnafu)?
.layer(LoggingLayer::default())
@@ -543,8 +490,6 @@ impl Export {
Ok(op)
} else if self.s3 {
self.build_s3_operator().await
} else if self.oss {
self.build_oss_operator().await
} else {
self.build_fs_operator().await
}
@@ -570,35 +515,11 @@ impl Export {
}
if let Some(key_id) = self.s3_access_key.as_ref() {
builder = builder.access_key_id(key_id.expose_secret());
builder = builder.access_key_id(key_id);
}
if let Some(secret_key) = self.s3_secret_key.as_ref() {
builder = builder.secret_access_key(secret_key.expose_secret());
}
let op = ObjectStore::new(builder)
.context(OpenDalSnafu)?
.layer(LoggingLayer::default())
.finish();
Ok(op)
}
async fn build_oss_operator(&self) -> Result<ObjectStore> {
let mut builder = Oss::default()
.bucket(self.oss_bucket.as_ref().expect("oss_bucket must be set"))
.endpoint(
self.oss_endpoint
.as_ref()
.expect("oss_endpoint must be set"),
);
// Use expose_secret() to access the actual secret value
if let Some(key_id) = self.oss_access_key_id.as_ref() {
builder = builder.access_key_id(key_id.expose_secret());
}
if let Some(secret_key) = self.oss_access_key_secret.as_ref() {
builder = builder.access_key_secret(secret_key.expose_secret());
builder = builder.secret_access_key(secret_key);
}
let op = ObjectStore::new(builder)
@@ -641,8 +562,8 @@ impl Export {
tasks.push(async move {
let _permit = semaphore_moved.acquire().await.unwrap();
// Create directory if not using remote storage
if !export_self.s3 && !export_self.oss {
// Create directory if not using S3
if !export_self.s3 {
let db_dir = format!("{}/{}/", export_self.catalog, schema);
operator.create_dir(&db_dir).await.context(OpenDalSnafu)?;
}
@@ -654,11 +575,7 @@ impl Export {
r#"COPY DATABASE "{}"."{}" TO '{}' WITH ({}){};"#,
export_self.catalog, schema, path, with_options_clone, connection_part
);
// Log SQL command but mask sensitive information
let safe_sql = export_self.mask_sensitive_sql(&sql);
info!("Executing sql: {}", safe_sql);
info!("Executing sql: {sql}");
export_self.database_client.sql_in_public(&sql).await?;
info!(
"Finished exporting {}.{} data to {}",
@@ -698,29 +615,6 @@ impl Export {
Ok(())
}
/// Mask sensitive information in SQL commands for safe logging
fn mask_sensitive_sql(&self, sql: &str) -> String {
let mut masked_sql = sql.to_string();
// Mask S3 credentials
if let Some(access_key) = &self.s3_access_key {
masked_sql = masked_sql.replace(access_key.expose_secret(), "[REDACTED]");
}
if let Some(secret_key) = &self.s3_secret_key {
masked_sql = masked_sql.replace(secret_key.expose_secret(), "[REDACTED]");
}
// Mask OSS credentials
if let Some(access_key_id) = &self.oss_access_key_id {
masked_sql = masked_sql.replace(access_key_id.expose_secret(), "[REDACTED]");
}
if let Some(access_key_secret) = &self.oss_access_key_secret {
masked_sql = masked_sql.replace(access_key_secret.expose_secret(), "[REDACTED]");
}
masked_sql
}
fn get_file_path(&self, schema: &str, file_name: &str) -> String {
format!("{}/{}/{}", self.catalog, schema, file_name)
}
@@ -737,13 +631,6 @@ impl Export {
},
file_path
)
} else if self.oss {
format!(
"oss://{}/{}/{}",
self.oss_bucket.as_ref().unwrap_or(&String::new()),
self.catalog,
file_path
)
} else {
format!(
"{}/{}",
@@ -788,36 +675,15 @@ impl Export {
};
// Safety: All s3 options are required
// Use expose_secret() to access the actual secret values
let connection_options = format!(
"ACCESS_KEY_ID='{}', SECRET_ACCESS_KEY='{}', REGION='{}'{}",
self.s3_access_key.as_ref().unwrap().expose_secret(),
self.s3_secret_key.as_ref().unwrap().expose_secret(),
self.s3_access_key.as_ref().unwrap(),
self.s3_secret_key.as_ref().unwrap(),
self.s3_region.as_ref().unwrap(),
endpoint_option
);
(s3_path, format!(" CONNECTION ({})", connection_options))
} else if self.oss {
let oss_path = format!(
"oss://{}/{}/{}/",
self.oss_bucket.as_ref().unwrap(),
self.catalog,
schema
);
let endpoint_option = if let Some(endpoint) = self.oss_endpoint.as_ref() {
format!(", ENDPOINT='{}'", endpoint)
} else {
String::new()
};
let connection_options = format!(
"ACCESS_KEY_ID='{}', ACCESS_KEY_SECRET='{}'{}",
self.oss_access_key_id.as_ref().unwrap().expose_secret(),
self.oss_access_key_secret.as_ref().unwrap().expose_secret(),
endpoint_option
);
(oss_path, format!(" CONNECTION ({})", connection_options))
} else {
(
self.catalog_path()

View File

@@ -80,7 +80,6 @@ servers.workspace = true
session.workspace = true
similar-asserts.workspace = true
snafu.workspace = true
stat.workspace = true
store-api.workspace = true
substrait.workspace = true
table.workspace = true

View File

@@ -28,7 +28,7 @@ use tracing_appender::non_blocking::WorkerGuard;
use crate::datanode::{DatanodeOptions, Instance, APP_NAME};
use crate::error::{MetaClientInitSnafu, MissingConfigSnafu, Result, StartDatanodeSnafu};
use crate::{create_resource_limit_metrics, log_versions};
use crate::log_versions;
/// Builder for Datanode instance.
pub struct InstanceBuilder {
@@ -68,7 +68,6 @@ impl InstanceBuilder {
);
log_versions(version(), short_version(), APP_NAME);
create_resource_limit_metrics(APP_NAME);
plugins::setup_datanode_plugins(plugins, &opts.plugins, dn_opts)
.await

View File

@@ -45,7 +45,7 @@ use crate::error::{
MissingConfigSnafu, Result, ShutdownFlownodeSnafu, StartFlownodeSnafu,
};
use crate::options::{GlobalOptions, GreptimeOptions};
use crate::{create_resource_limit_metrics, log_versions, App};
use crate::{log_versions, App};
pub const APP_NAME: &str = "greptime-flownode";
@@ -246,9 +246,7 @@ impl StartCommand {
opts.component.node_id.map(|x| x.to_string()),
None,
);
log_versions(version(), short_version(), APP_NAME);
create_resource_limit_metrics(APP_NAME);
info!("Flownode start command: {:#?}", self);
info!("Flownode options: {:#?}", opts);

View File

@@ -44,7 +44,7 @@ use tracing_appender::non_blocking::WorkerGuard;
use crate::error::{self, Result};
use crate::options::{GlobalOptions, GreptimeOptions};
use crate::{create_resource_limit_metrics, log_versions, App};
use crate::{log_versions, App};
type FrontendOptions = GreptimeOptions<frontend::frontend::FrontendOptions>;
@@ -270,9 +270,7 @@ impl StartCommand {
opts.component.node_id.clone(),
opts.component.slow_query.as_ref(),
);
log_versions(version(), short_version(), APP_NAME);
create_resource_limit_metrics(APP_NAME);
info!("Frontend start command: {:#?}", self);
info!("Frontend options: {:#?}", opts);

View File

@@ -16,7 +16,6 @@
use async_trait::async_trait;
use common_telemetry::{error, info};
use stat::{get_cpu_limit, get_memory_limit};
use crate::error::Result;
@@ -32,12 +31,6 @@ pub mod standalone;
lazy_static::lazy_static! {
static ref APP_VERSION: prometheus::IntGaugeVec =
prometheus::register_int_gauge_vec!("greptime_app_version", "app version", &["version", "short_version", "app"]).unwrap();
static ref CPU_LIMIT: prometheus::IntGaugeVec =
prometheus::register_int_gauge_vec!("greptime_cpu_limit_in_millicores", "cpu limit in millicores", &["app"]).unwrap();
static ref MEMORY_LIMIT: prometheus::IntGaugeVec =
prometheus::register_int_gauge_vec!("greptime_memory_limit_in_bytes", "memory limit in bytes", &["app"]).unwrap();
}
/// wait for the close signal, for unix platform it's SIGINT or SIGTERM
@@ -121,24 +114,6 @@ pub fn log_versions(version: &str, short_version: &str, app: &str) {
log_env_flags();
}
pub fn create_resource_limit_metrics(app: &str) {
if let Some(cpu_limit) = get_cpu_limit() {
info!(
"GreptimeDB start with cpu limit in millicores: {}",
cpu_limit
);
CPU_LIMIT.with_label_values(&[app]).set(cpu_limit);
}
if let Some(memory_limit) = get_memory_limit() {
info!(
"GreptimeDB start with memory limit in bytes: {}",
memory_limit
);
MEMORY_LIMIT.with_label_values(&[app]).set(memory_limit);
}
}
fn log_env_flags() {
info!("command line arguments");
for argument in std::env::args() {

View File

@@ -29,7 +29,7 @@ use tracing_appender::non_blocking::WorkerGuard;
use crate::error::{self, LoadLayeredConfigSnafu, Result, StartMetaServerSnafu};
use crate::options::{GlobalOptions, GreptimeOptions};
use crate::{create_resource_limit_metrics, log_versions, App};
use crate::{log_versions, App};
type MetasrvOptions = GreptimeOptions<meta_srv::metasrv::MetasrvOptions>;
@@ -302,9 +302,7 @@ impl StartCommand {
None,
None,
);
log_versions(version(), short_version(), APP_NAME);
create_resource_limit_metrics(APP_NAME);
info!("Metasrv start command: {:#?}", self);

View File

@@ -83,7 +83,7 @@ use tracing_appender::non_blocking::WorkerGuard;
use crate::error::{Result, StartFlownodeSnafu};
use crate::options::{GlobalOptions, GreptimeOptions};
use crate::{create_resource_limit_metrics, error, log_versions, App};
use crate::{error, log_versions, App};
pub const APP_NAME: &str = "greptime-standalone";
@@ -457,9 +457,7 @@ impl StartCommand {
None,
opts.component.slow_query.as_ref(),
);
log_versions(version(), short_version(), APP_NAME);
create_resource_limit_metrics(APP_NAME);
info!("Standalone start command: {:#?}", self);
info!("Standalone options: {opts:#?}");

View File

@@ -13,9 +13,7 @@
// limitations under the License.
pub mod fs;
pub mod oss;
pub mod s3;
use std::collections::HashMap;
use lazy_static::lazy_static;
@@ -27,12 +25,10 @@ use url::{ParseError, Url};
use self::fs::build_fs_backend;
use self::s3::build_s3_backend;
use crate::error::{self, Result};
use crate::object_store::oss::build_oss_backend;
use crate::util::find_dir_and_filename;
pub const FS_SCHEMA: &str = "FS";
pub const S3_SCHEMA: &str = "S3";
pub const OSS_SCHEMA: &str = "OSS";
/// Returns `(schema, Option<host>, path)`
pub fn parse_url(url: &str) -> Result<(String, Option<String>, String)> {
@@ -68,12 +64,6 @@ pub fn build_backend(url: &str, connection: &HashMap<String, String>) -> Result<
})?;
Ok(build_s3_backend(&host, &root, connection)?)
}
OSS_SCHEMA => {
let host = host.context(error::EmptyHostPathSnafu {
url: url.to_string(),
})?;
Ok(build_oss_backend(&host, &root, connection)?)
}
FS_SCHEMA => Ok(build_fs_backend(&root)?),
_ => error::UnsupportedBackendProtocolSnafu {

View File

@@ -1,118 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use object_store::services::Oss;
use object_store::ObjectStore;
use snafu::ResultExt;
use crate::error::{self, Result};
const BUCKET: &str = "bucket";
const ENDPOINT: &str = "endpoint";
const ACCESS_KEY_ID: &str = "access_key_id";
const ACCESS_KEY_SECRET: &str = "access_key_secret";
const ROOT: &str = "root";
const ALLOW_ANONYMOUS: &str = "allow_anonymous";
/// Check if the key is supported in OSS configuration.
pub fn is_supported_in_oss(key: &str) -> bool {
[
ROOT,
ALLOW_ANONYMOUS,
BUCKET,
ENDPOINT,
ACCESS_KEY_ID,
ACCESS_KEY_SECRET,
]
.contains(&key)
}
/// Build an OSS backend using the provided bucket, root, and connection parameters.
pub fn build_oss_backend(
bucket: &str,
root: &str,
connection: &HashMap<String, String>,
) -> Result<ObjectStore> {
let mut builder = Oss::default().bucket(bucket).root(root);
if let Some(endpoint) = connection.get(ENDPOINT) {
builder = builder.endpoint(endpoint);
}
if let Some(access_key_id) = connection.get(ACCESS_KEY_ID) {
builder = builder.access_key_id(access_key_id);
}
if let Some(access_key_secret) = connection.get(ACCESS_KEY_SECRET) {
builder = builder.access_key_secret(access_key_secret);
}
if let Some(allow_anonymous) = connection.get(ALLOW_ANONYMOUS) {
let allow = allow_anonymous.as_str().parse::<bool>().map_err(|e| {
error::InvalidConnectionSnafu {
msg: format!(
"failed to parse the option {}={}, {}",
ALLOW_ANONYMOUS, allow_anonymous, e
),
}
.build()
})?;
if allow {
builder = builder.allow_anonymous();
}
}
let op = ObjectStore::new(builder)
.context(error::BuildBackendSnafu)?
.layer(object_store::layers::LoggingLayer::default())
.layer(object_store::layers::TracingLayer)
.layer(object_store::layers::build_prometheus_metrics_layer(true))
.finish();
Ok(op)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_supported_in_oss() {
assert!(is_supported_in_oss(ROOT));
assert!(is_supported_in_oss(ALLOW_ANONYMOUS));
assert!(is_supported_in_oss(BUCKET));
assert!(is_supported_in_oss(ENDPOINT));
assert!(is_supported_in_oss(ACCESS_KEY_ID));
assert!(is_supported_in_oss(ACCESS_KEY_SECRET));
assert!(!is_supported_in_oss("foo"));
assert!(!is_supported_in_oss("BAR"));
}
#[test]
fn test_build_oss_backend_all_fields_valid() {
let mut connection = HashMap::new();
connection.insert(
ENDPOINT.to_string(),
"http://oss-ap-southeast-1.aliyuncs.com".to_string(),
);
connection.insert(ACCESS_KEY_ID.to_string(), "key_id".to_string());
connection.insert(ACCESS_KEY_SECRET.to_string(), "key_secret".to_string());
connection.insert(ALLOW_ANONYMOUS.to_string(), "true".to_string());
let result = build_oss_backend("my-bucket", "my-root", &connection);
assert!(result.is_ok());
}
}

View File

@@ -188,71 +188,7 @@ pub const CACHE_KEY_PREFIXES: [&str; 5] = [
NODE_ADDRESS_PREFIX,
];
/// A set of regions with the same role.
#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize)]
pub struct RegionRoleSet {
/// Leader regions.
pub leader_regions: Vec<RegionNumber>,
/// Follower regions.
pub follower_regions: Vec<RegionNumber>,
}
impl<'de> Deserialize<'de> for RegionRoleSet {
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
#[derive(Deserialize)]
#[serde(untagged)]
enum RegionRoleSetOrLeaderOnly {
Full {
leader_regions: Vec<RegionNumber>,
follower_regions: Vec<RegionNumber>,
},
LeaderOnly(Vec<RegionNumber>),
}
match RegionRoleSetOrLeaderOnly::deserialize(deserializer)? {
RegionRoleSetOrLeaderOnly::Full {
leader_regions,
follower_regions,
} => Ok(RegionRoleSet::new(leader_regions, follower_regions)),
RegionRoleSetOrLeaderOnly::LeaderOnly(leader_regions) => {
Ok(RegionRoleSet::new(leader_regions, vec![]))
}
}
}
}
impl RegionRoleSet {
/// Create a new region role set.
pub fn new(leader_regions: Vec<RegionNumber>, follower_regions: Vec<RegionNumber>) -> Self {
Self {
leader_regions,
follower_regions,
}
}
/// Add a leader region to the set.
pub fn add_leader_region(&mut self, region_number: RegionNumber) {
self.leader_regions.push(region_number);
}
/// Add a follower region to the set.
pub fn add_follower_region(&mut self, region_number: RegionNumber) {
self.follower_regions.push(region_number);
}
/// Sort the regions.
pub fn sort(&mut self) {
self.follower_regions.sort();
self.leader_regions.sort();
}
}
/// The distribution of regions.
///
/// The key is the datanode id, the value is the region role set.
pub type RegionDistribution = BTreeMap<DatanodeId, RegionRoleSet>;
pub type RegionDistribution = BTreeMap<DatanodeId, Vec<RegionNumber>>;
/// The id of flow.
pub type FlowId = u32;
@@ -1432,8 +1368,7 @@ mod tests {
use crate::key::table_name::TableNameKey;
use crate::key::table_route::TableRouteValue;
use crate::key::{
DeserializedValueWithBytes, RegionDistribution, RegionRoleSet, TableMetadataManager,
ViewInfoValue, TOPIC_REGION_PREFIX,
DeserializedValueWithBytes, TableMetadataManager, ViewInfoValue, TOPIC_REGION_PREFIX,
};
use crate::kv_backend::memory::MemoryKvBackend;
use crate::kv_backend::KvBackend;
@@ -2060,8 +1995,7 @@ mod tests {
.unwrap()
.unwrap();
assert_eq!(got.regions, regions.leader_regions);
assert_eq!(got.follower_regions, regions.follower_regions);
assert_eq!(got.regions, regions)
}
}
@@ -2478,28 +2412,4 @@ mod tests {
assert_eq!(current_view_info.columns, new_columns);
assert_eq!(current_view_info.plan_columns, new_plan_columns);
}
#[test]
fn test_region_role_set_deserialize() {
let s = r#"{"leader_regions": [1, 2, 3], "follower_regions": [4, 5, 6]}"#;
let region_role_set: RegionRoleSet = serde_json::from_str(s).unwrap();
assert_eq!(region_role_set.leader_regions, vec![1, 2, 3]);
assert_eq!(region_role_set.follower_regions, vec![4, 5, 6]);
let s = r#"[1, 2, 3]"#;
let region_role_set: RegionRoleSet = serde_json::from_str(s).unwrap();
assert_eq!(region_role_set.leader_regions, vec![1, 2, 3]);
assert!(region_role_set.follower_regions.is_empty());
}
#[test]
fn test_region_distribution_deserialize() {
let s = r#"{"1": [1,2,3], "2": {"leader_regions": [7, 8, 9], "follower_regions": [10, 11, 12]}}"#;
let region_distribution: RegionDistribution = serde_json::from_str(s).unwrap();
assert_eq!(region_distribution.len(), 2);
assert_eq!(region_distribution[&1].leader_regions, vec![1, 2, 3]);
assert!(region_distribution[&1].follower_regions.is_empty());
assert_eq!(region_distribution[&2].leader_regions, vec![7, 8, 9]);
assert_eq!(region_distribution[&2].follower_regions, vec![10, 11, 12]);
}
}

View File

@@ -24,7 +24,7 @@ use table::metadata::TableId;
use crate::error::{DatanodeTableInfoNotFoundSnafu, InvalidMetadataSnafu, Result};
use crate::key::table_route::PhysicalTableRouteValue;
use crate::key::{
MetadataKey, MetadataValue, RegionDistribution, RegionRoleSet, DATANODE_TABLE_KEY_PATTERN,
MetadataKey, MetadataValue, RegionDistribution, DATANODE_TABLE_KEY_PATTERN,
DATANODE_TABLE_KEY_PREFIX,
};
use crate::kv_backend::txn::{Txn, TxnOp};
@@ -118,31 +118,23 @@ impl Display for DatanodeTableKey {
pub struct DatanodeTableValue {
pub table_id: TableId,
pub regions: Vec<RegionNumber>,
#[serde(default)]
pub follower_regions: Vec<RegionNumber>,
#[serde(flatten)]
pub region_info: RegionInfo,
version: u64,
}
impl DatanodeTableValue {
pub fn new(table_id: TableId, region_role_set: RegionRoleSet, region_info: RegionInfo) -> Self {
let RegionRoleSet {
leader_regions,
follower_regions,
} = region_role_set;
pub fn new(table_id: TableId, regions: Vec<RegionNumber>, region_info: RegionInfo) -> Self {
Self {
table_id,
regions: leader_regions,
follower_regions,
regions,
region_info,
version: 0,
}
}
}
/// Decodes [`KeyValue`] to [`DatanodeTableValue`].
/// Decodes `KeyValue` to ((),`DatanodeTableValue`)
pub fn datanode_table_value_decoder(kv: KeyValue) -> Result<DatanodeTableValue> {
DatanodeTableValue::try_from_raw_value(&kv.value)
}
@@ -381,11 +373,10 @@ mod tests {
let value = DatanodeTableValue {
table_id: 42,
regions: vec![1, 2, 3],
follower_regions: vec![],
region_info: RegionInfo::default(),
version: 1,
};
let literal = br#"{"table_id":42,"regions":[1,2,3],"follower_regions":[],"engine":"","region_storage_path":"","region_options":{},"region_wal_options":{},"version":1}"#;
let literal = br#"{"table_id":42,"regions":[1,2,3],"engine":"","region_storage_path":"","region_options":{},"region_wal_options":{},"version":1}"#;
let raw_value = value.try_as_raw_value().unwrap();
assert_eq!(raw_value, literal);
@@ -476,7 +467,6 @@ mod tests {
let table_value = DatanodeTableValue {
table_id: 1,
regions: vec![],
follower_regions: vec![],
region_info,
version: 1,
};

View File

@@ -40,23 +40,17 @@ pub fn region_distribution(region_routes: &[RegionRoute]) -> RegionDistribution
let mut regions_id_map = RegionDistribution::new();
for route in region_routes.iter() {
if let Some(peer) = route.leader_peer.as_ref() {
let region_number = route.region.id.region_number();
regions_id_map
.entry(peer.id)
.or_default()
.add_leader_region(region_number);
let region_id = route.region.id.region_number();
regions_id_map.entry(peer.id).or_default().push(region_id);
}
for peer in route.follower_peers.iter() {
let region_number = route.region.id.region_number();
regions_id_map
.entry(peer.id)
.or_default()
.add_follower_region(region_number);
let region_id = route.region.id.region_number();
regions_id_map.entry(peer.id).or_default().push(region_id);
}
}
for (_, region_role_set) in regions_id_map.iter_mut() {
// Sort the regions in ascending order.
region_role_set.sort()
for (_, regions) in regions_id_map.iter_mut() {
// id asc
regions.sort()
}
regions_id_map
}
@@ -461,7 +455,6 @@ impl From<PbPartition> for Partition {
#[cfg(test)]
mod tests {
use super::*;
use crate::key::RegionRoleSet;
#[test]
fn test_leader_is_downgraded() {
@@ -618,8 +611,8 @@ mod tests {
let distribution = region_distribution(&region_routes);
assert_eq!(distribution.len(), 3);
assert_eq!(distribution[&1], RegionRoleSet::new(vec![1], vec![2]));
assert_eq!(distribution[&2], RegionRoleSet::new(vec![2], vec![1]));
assert_eq!(distribution[&3], RegionRoleSet::new(vec![], vec![1, 2]));
assert_eq!(distribution[&1], vec![1, 2]);
assert_eq!(distribution[&2], vec![1, 2]);
assert_eq!(distribution[&3], vec![1, 2]);
}
}

View File

@@ -1,11 +0,0 @@
[package]
name = "stat"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
nix.workspace = true
[lints]
workspace = true

View File

@@ -1,183 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#![allow(dead_code)]
use std::fs::read_to_string;
use std::path::Path;
#[cfg(target_os = "linux")]
use nix::sys::{statfs, statfs::statfs};
/// `MAX_VALUE` is used to indicate that the resource is unlimited.
pub const MAX_VALUE: i64 = -1;
const CGROUP_UNIFIED_MOUNTPOINT: &str = "/sys/fs/cgroup";
const MEMORY_MAX_FILE_CGROUP_V2: &str = "memory.max";
const MEMORY_MAX_FILE_CGROUP_V1: &str = "memory.limit_in_bytes";
const CPU_MAX_FILE_CGROUP_V2: &str = "cpu.max";
const CPU_QUOTA_FILE_CGROUP_V1: &str = "cpu.cfs_quota_us";
const CPU_PERIOD_FILE_CGROUP_V1: &str = "cpu.cfs_period_us";
// `MAX_VALUE_CGROUP_V2` string in `/sys/fs/cgroup/cpu.max` and `/sys/fs/cgroup/memory.max` to indicate that the resource is unlimited.
const MAX_VALUE_CGROUP_V2: &str = "max";
// For cgroup v1, if the memory is unlimited, it will return a very large value(different from platform) that close to 2^63.
// For easier comparison, if the memory limit is larger than 1PB we consider it as unlimited.
const MAX_MEMORY_IN_BYTES: i64 = 1125899906842624; // 1PB
/// Get the limit of memory in bytes.
///
/// - If the memory is unlimited, return `-1`.
/// - Return `None` if it fails to read the memory limit or not on linux.
pub fn get_memory_limit() -> Option<i64> {
#[cfg(target_os = "linux")]
{
let memory_max_file = if is_cgroup_v2()? {
// Read `/sys/fs/cgroup/memory.max` to get the memory limit.
MEMORY_MAX_FILE_CGROUP_V2
} else {
// Read `/sys/fs/cgroup/memory.limit_in_bytes` to get the memory limit.
MEMORY_MAX_FILE_CGROUP_V1
};
// For cgroup v1, it will return a very large value(different from platform) if the memory is unlimited.
let memory_limit =
read_value_from_file(Path::new(CGROUP_UNIFIED_MOUNTPOINT).join(memory_max_file))?;
// If memory limit exceeds 1PB(cgroup v1), consider it as unlimited.
if memory_limit > MAX_MEMORY_IN_BYTES {
return Some(MAX_VALUE);
}
Some(memory_limit)
}
#[cfg(not(target_os = "linux"))]
None
}
/// Get the limit of cpu in millicores.
///
/// - If the cpu is unlimited, return `-1`.
/// - Return `None` if it fails to read the cpu limit or not on linux.
pub fn get_cpu_limit() -> Option<i64> {
#[cfg(target_os = "linux")]
if is_cgroup_v2()? {
// Read `/sys/fs/cgroup/cpu.max` to get the cpu limit.
get_cgroup_v2_cpu_limit(Path::new(CGROUP_UNIFIED_MOUNTPOINT).join(CPU_MAX_FILE_CGROUP_V2))
} else {
// Read `/sys/fs/cgroup/cpu.cfs_quota_us` and `/sys/fs/cgroup/cpu.cfs_period_us` to get the cpu limit.
let quota = read_value_from_file(
Path::new(CGROUP_UNIFIED_MOUNTPOINT).join(CPU_QUOTA_FILE_CGROUP_V1),
)?;
if quota == MAX_VALUE {
return Some(MAX_VALUE);
}
let period = read_value_from_file(
Path::new(CGROUP_UNIFIED_MOUNTPOINT).join(CPU_PERIOD_FILE_CGROUP_V1),
)?;
// Return the cpu limit in millicores.
Some(quota * 1000 / period)
}
#[cfg(not(target_os = "linux"))]
None
}
// Check whether the cgroup is v2.
// - Return `true` if the cgroup is v2, otherwise return `false`.
// - Return `None` if the detection fails or not on linux.
fn is_cgroup_v2() -> Option<bool> {
#[cfg(target_os = "linux")]
{
let path = Path::new(CGROUP_UNIFIED_MOUNTPOINT);
let fs_stat = statfs(path).ok()?;
Some(fs_stat.filesystem_type() == statfs::CGROUP2_SUPER_MAGIC)
}
#[cfg(not(target_os = "linux"))]
None
}
fn read_value_from_file<P: AsRef<Path>>(path: P) -> Option<i64> {
let content = read_to_string(&path).ok()?;
// If the content starts with "max", return `MAX_VALUE`.
if content.starts_with(MAX_VALUE_CGROUP_V2) {
return Some(MAX_VALUE);
}
content.trim().parse::<i64>().ok()
}
fn get_cgroup_v2_cpu_limit<P: AsRef<Path>>(path: P) -> Option<i64> {
let content = read_to_string(&path).ok()?;
let fields = content.trim().split(' ').collect::<Vec<&str>>();
if fields.len() != 2 {
return None;
}
// If the cpu is unlimited, it will be `-1`.
let quota = fields[0].trim();
if quota == MAX_VALUE_CGROUP_V2 {
return Some(MAX_VALUE);
}
let quota = quota.parse::<i64>().ok()?;
let period = fields[1].trim();
let period = period.parse::<i64>().ok()?;
// Return the cpu limit in millicores.
Some(quota * 1000 / period)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_read_value_from_file() {
assert_eq!(
read_value_from_file(Path::new("testdata").join("memory.max")).unwrap(),
100000
);
assert_eq!(
read_value_from_file(Path::new("testdata").join("memory.max.unlimited")).unwrap(),
MAX_VALUE
);
assert_eq!(read_value_from_file(Path::new("non_existent_file")), None);
}
#[test]
fn test_get_cgroup_v2_cpu_limit() {
assert_eq!(
get_cgroup_v2_cpu_limit(Path::new("testdata").join("cpu.max")).unwrap(),
1500
);
assert_eq!(
get_cgroup_v2_cpu_limit(Path::new("testdata").join("cpu.max.unlimited")).unwrap(),
MAX_VALUE
);
assert_eq!(
get_cgroup_v2_cpu_limit(Path::new("non_existent_file")),
None
);
}
}

View File

@@ -1,17 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
mod cgroups;
pub use cgroups::*;

View File

@@ -1 +0,0 @@
150000 100000

View File

@@ -1 +0,0 @@
max 100000

View File

@@ -1 +0,0 @@
100000

View File

@@ -1 +0,0 @@
max

View File

@@ -6,7 +6,6 @@ license.workspace = true
[features]
testing = []
enterprise = []
[lints]
workspace = true

View File

@@ -559,8 +559,6 @@ async fn open_all_regions(
init_regions_parallelism: usize,
) -> Result<()> {
let mut regions = vec![];
#[cfg(feature = "enterprise")]
let mut follower_regions = vec![];
for table_value in table_values {
for region_number in table_value.regions {
// Augments region options with wal options if a wal options is provided.
@@ -578,24 +576,6 @@ async fn open_all_regions(
region_options,
));
}
#[cfg(feature = "enterprise")]
for region_number in table_value.follower_regions {
// Augments region options with wal options if a wal options is provided.
let mut region_options = table_value.region_info.region_options.clone();
prepare_wal_options(
&mut region_options,
RegionId::new(table_value.table_id, region_number),
&table_value.region_info.region_wal_options,
);
follower_regions.push((
RegionId::new(table_value.table_id, region_number),
table_value.region_info.engine.clone(),
table_value.region_info.region_storage_path.clone(),
region_options,
));
}
}
let num_regions = regions.len();
info!("going to open {} region(s)", num_regions);
@@ -637,43 +617,6 @@ async fn open_all_regions(
}
}
}
#[cfg(feature = "enterprise")]
if !follower_regions.is_empty() {
info!(
"going to open {} follower region(s)",
follower_regions.len()
);
let mut region_requests = Vec::with_capacity(follower_regions.len());
for (region_id, engine, store_path, options) in follower_regions {
let region_dir = region_dir(&store_path, region_id);
region_requests.push((
region_id,
RegionOpenRequest {
engine,
region_dir,
options,
skip_wal_replay: true,
},
));
}
let open_regions = region_server
.handle_batch_open_requests(init_regions_parallelism, region_requests)
.await?;
ensure!(
open_regions.len() == num_regions,
error::UnexpectedSnafu {
violated: format!(
"Expected to open {} of follower regions, only {} of regions has opened",
num_regions,
open_regions.len()
)
}
);
}
info!("all regions are opened");
Ok(())
@@ -689,7 +632,6 @@ mod tests {
use common_base::Plugins;
use common_meta::cache::LayeredCacheRegistryBuilder;
use common_meta::key::datanode_table::DatanodeTableManager;
use common_meta::key::RegionRoleSet;
use common_meta::kv_backend::memory::MemoryKvBackend;
use common_meta::kv_backend::KvBackendRef;
use mito2::engine::MITO_ENGINE_NAME;
@@ -709,7 +651,7 @@ mod tests {
"foo/bar/weny",
HashMap::from([("foo".to_string(), "bar".to_string())]),
HashMap::default(),
BTreeMap::from([(0, RegionRoleSet::new(vec![0, 1, 2], vec![]))]),
BTreeMap::from([(0, vec![0, 1, 2])]),
)
.unwrap();

View File

@@ -14,8 +14,9 @@
//! Frontend client to run flow as batching task which is time-window-aware normal query triggered every tick set by user
use std::sync::{Arc, Weak};
use std::time::SystemTime;
use std::collections::HashMap;
use std::sync::{Arc, Mutex, Weak};
use std::time::{Duration, Instant, SystemTime};
use api::v1::greptime_request::Request;
use api::v1::CreateTableExpr;
@@ -26,20 +27,21 @@ use common_meta::cluster::{NodeInfo, NodeInfoKey, Role};
use common_meta::peer::Peer;
use common_meta::rpc::store::RangeRequest;
use common_query::Output;
use common_telemetry::warn;
use common_telemetry::{debug, warn};
use itertools::Itertools;
use meta_client::client::MetaClient;
use rand::rng;
use rand::seq::SliceRandom;
use servers::query_handler::grpc::GrpcQueryHandler;
use session::context::{QueryContextBuilder, QueryContextRef};
use snafu::{OptionExt, ResultExt};
use crate::batching_mode::task::BatchingTask;
use crate::batching_mode::{
DEFAULT_BATCHING_ENGINE_QUERY_TIMEOUT, FRONTEND_ACTIVITY_TIMEOUT, GRPC_CONN_TIMEOUT,
GRPC_MAX_RETRIES,
};
use crate::error::{ExternalSnafu, InvalidRequestSnafu, NoAvailableFrontendSnafu, UnexpectedSnafu};
use crate::{Error, FlowAuthHeader};
use crate::metrics::METRIC_FLOW_BATCHING_ENGINE_GUESS_FE_LOAD;
use crate::{Error, FlowAuthHeader, FlowId};
/// Just like [`GrpcQueryHandler`] but use BoxedError
///
@@ -74,6 +76,105 @@ impl<
type HandlerMutable = Arc<std::sync::Mutex<Option<Weak<dyn GrpcQueryHandlerWithBoxedError>>>>;
/// Statistics about running query on this frontend from flownode
#[derive(Debug, Default, Clone)]
struct FrontendStat {
/// The query for flow id has been running since this timestamp
since: HashMap<FlowId, Instant>,
/// The average query time for each flow id
/// This is used to calculate the average query time for each flow id
past_query_avg: HashMap<FlowId, (usize, Duration)>,
}
#[derive(Debug, Default, Clone)]
pub struct FrontendStats {
/// The statistics for each flow id
stats: Arc<Mutex<HashMap<String, FrontendStat>>>,
}
impl FrontendStats {
pub fn observe(&self, frontend_addr: &str, flow_id: FlowId) -> FrontendStatsGuard {
let mut stats = self.stats.lock().expect("Failed to lock frontend stats");
let stat = stats.entry(frontend_addr.to_string()).or_default();
stat.since.insert(flow_id, Instant::now());
FrontendStatsGuard {
stats: self.stats.clone(),
frontend_addr: frontend_addr.to_string(),
cur: flow_id,
}
}
/// return frontend addrs sorted by load, from lightest to heaviest
/// The load is calculated as the total average query time for each flow id plus running query's total running time elapsed
pub fn sort_by_load(&self) -> Vec<String> {
let stats = self.stats.lock().expect("Failed to lock frontend stats");
let fe_load_factor = stats
.iter()
.map(|(node_addr, stat)| {
// total expected avg running time for all currently running queries
let total_expect_avg_run_time = stat
.since
.keys()
.map(|f| {
let (count, total_duration) =
stat.past_query_avg.get(f).unwrap_or(&(0, Duration::ZERO));
if *count == 0 {
0.0
} else {
total_duration.as_secs_f64() / *count as f64
}
})
.sum::<f64>();
let total_cur_running_time = stat
.since
.values()
.map(|since| since.elapsed().as_secs_f64())
.sum::<f64>();
(
node_addr.to_string(),
total_expect_avg_run_time + total_cur_running_time,
)
})
.sorted_by(|(_, load_a), (_, load_b)| {
load_a
.partial_cmp(load_b)
.unwrap_or(std::cmp::Ordering::Equal)
})
.collect::<Vec<_>>();
debug!("Frontend load factor: {:?}", fe_load_factor);
for (node_addr, load) in &fe_load_factor {
METRIC_FLOW_BATCHING_ENGINE_GUESS_FE_LOAD
.with_label_values(&[&node_addr.to_string()])
.observe(*load);
}
fe_load_factor
.into_iter()
.map(|(addr, _)| addr)
.collect::<Vec<_>>()
}
}
pub struct FrontendStatsGuard {
stats: Arc<Mutex<HashMap<String, FrontendStat>>>,
frontend_addr: String,
cur: FlowId,
}
impl Drop for FrontendStatsGuard {
fn drop(&mut self) {
let mut stats = self.stats.lock().expect("Failed to lock frontend stats");
if let Some(stat) = stats.get_mut(&self.frontend_addr) {
if let Some(since) = stat.since.remove(&self.cur) {
let elapsed = since.elapsed();
let (count, total_duration) = stat.past_query_avg.entry(self.cur).or_default();
*count += 1;
*total_duration += elapsed;
}
}
}
}
/// A simple frontend client able to execute sql using grpc protocol
///
/// This is for computation-heavy query which need to offload computation to frontend, lifting the load from flownode
@@ -83,6 +184,7 @@ pub enum FrontendClient {
meta_client: Arc<MetaClient>,
chnl_mgr: ChannelManager,
auth: Option<FlowAuthHeader>,
fe_stats: FrontendStats,
},
Standalone {
/// for the sake of simplicity still use grpc even in standalone mode
@@ -114,6 +216,7 @@ impl FrontendClient {
ChannelManager::with_config(cfg)
},
auth,
fe_stats: Default::default(),
}
}
@@ -192,6 +295,7 @@ impl FrontendClient {
meta_client: _,
chnl_mgr,
auth,
fe_stats,
} = self
else {
return UnexpectedSnafu {
@@ -208,8 +312,21 @@ impl FrontendClient {
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap()
.as_millis() as i64;
// shuffle the frontends to avoid always pick the same one
frontends.shuffle(&mut rng());
let node_addrs_by_load = fe_stats.sort_by_load();
// index+1 to load order asc, so that the lightest node has load 1 and non-existent node has load 0
let addr2load = node_addrs_by_load
.iter()
.enumerate()
.map(|(i, id)| (id.clone(), i + 1))
.collect::<HashMap<_, _>>();
// sort frontends by load, from lightest to heaviest
frontends.sort_by(|(_, a), (_, b)| {
// if not even in stats, treat as 0 load since never been queried
let load_a = addr2load.get(&a.peer.addr).unwrap_or(&0);
let load_b = addr2load.get(&b.peer.addr).unwrap_or(&0);
load_a.cmp(load_b)
});
debug!("Frontend nodes sorted by load: {:?}", frontends);
// found node with maximum last_activity_ts
for (_, node_info) in frontends
@@ -257,6 +374,7 @@ impl FrontendClient {
create: CreateTableExpr,
catalog: &str,
schema: &str,
task: Option<&BatchingTask>,
) -> Result<u32, Error> {
self.handle(
Request::Ddl(api::v1::DdlRequest {
@@ -265,6 +383,7 @@ impl FrontendClient {
catalog,
schema,
&mut None,
task,
)
.await
}
@@ -276,15 +395,19 @@ impl FrontendClient {
catalog: &str,
schema: &str,
peer_desc: &mut Option<PeerDesc>,
task: Option<&BatchingTask>,
) -> Result<u32, Error> {
match self {
FrontendClient::Distributed { .. } => {
FrontendClient::Distributed { fe_stats, .. } => {
let db = self.get_random_active_frontend(catalog, schema).await?;
*peer_desc = Some(PeerDesc::Dist {
peer: db.peer.clone(),
});
let flow_id = task.map(|t| t.config.flow_id).unwrap_or_default();
let _guard = fe_stats.observe(&db.peer.addr, flow_id);
db.database
.handle_with_retry(req.clone(), GRPC_MAX_RETRIES)
.await

View File

@@ -280,7 +280,7 @@ impl BatchingTask {
let catalog = &self.config.sink_table_name[0];
let schema = &self.config.sink_table_name[1];
frontend_client
.create(expr.clone(), catalog, schema)
.create(expr.clone(), catalog, schema, Some(self))
.await?;
Ok(())
}
@@ -361,7 +361,7 @@ impl BatchingTask {
};
frontend_client
.handle(req, catalog, schema, &mut peer_desc)
.handle(req, catalog, schema, &mut peer_desc, Some(self))
.await
};

View File

@@ -58,6 +58,14 @@ lazy_static! {
vec![60., 4. * 60., 16. * 60., 64. * 60., 256. * 60.]
)
.unwrap();
pub static ref METRIC_FLOW_BATCHING_ENGINE_GUESS_FE_LOAD: HistogramVec =
register_histogram_vec!(
"greptime_flow_batching_engine_guess_fe_load",
"flow batching engine guessed frontend load",
&["fe_addr"],
vec![60., 4. * 60., 16. * 60., 64. * 60., 256. * 60.]
)
.unwrap();
pub static ref METRIC_FLOW_RUN_INTERVAL_MS: IntGauge =
register_int_gauge!("greptime_flow_run_interval_ms", "flow run interval in ms").unwrap();
pub static ref METRIC_FLOW_ROWS: IntCounterVec = register_int_counter_vec!(

View File

@@ -40,7 +40,7 @@ pub(crate) fn infer_time_bucket<'a>(files: impl Iterator<Item = &'a FileHandle>)
.unwrap_or_else(|| TIME_BUCKETS.max()) // safety: TIME_BUCKETS cannot be empty.
}
pub(crate) struct TimeBuckets([i64; 5]);
pub(crate) struct TimeBuckets([i64; 7]);
impl TimeBuckets {
/// Fits a given time span into time bucket by find the minimum bucket that can cover the span.
@@ -71,11 +71,13 @@ impl TimeBuckets {
/// A set of predefined time buckets.
pub(crate) const TIME_BUCKETS: TimeBuckets = TimeBuckets([
60 * 60, // one hour
2 * 60 * 60, // two hours
12 * 60 * 60, // twelve hours
24 * 60 * 60, // one day
7 * 24 * 60 * 60, // one week
60 * 60, // one hour
2 * 60 * 60, // two hours
12 * 60 * 60, // twelve hours
24 * 60 * 60, // one day
7 * 24 * 60 * 60, // one week
365 * 24 * 60 * 60, // one year
10 * 365 * 24 * 60 * 60, // ten years
]);
#[cfg(test)]
@@ -105,7 +107,7 @@ mod tests {
TIME_BUCKETS.get(3),
TIME_BUCKETS.fit_time_bucket(TIME_BUCKETS.get(3) - 1)
);
assert_eq!(TIME_BUCKETS.get(4), TIME_BUCKETS.fit_time_bucket(i64::MAX));
assert_eq!(TIME_BUCKETS.get(6), TIME_BUCKETS.fit_time_bucket(i64::MAX));
}
#[test]

View File

@@ -61,30 +61,6 @@ async fn put_and_flush(
assert_eq!(0, result.affected_rows);
}
async fn flush(engine: &MitoEngine, region_id: RegionId) {
let result = engine
.handle_request(
region_id,
RegionRequest::Flush(RegionFlushRequest {
row_group_size: None,
}),
)
.await
.unwrap();
assert_eq!(0, result.affected_rows);
}
async fn compact(engine: &MitoEngine, region_id: RegionId) {
let result = engine
.handle_request(
region_id,
RegionRequest::Compact(RegionCompactRequest::default()),
)
.await
.unwrap();
assert_eq!(result.affected_rows, 0);
}
async fn delete_and_flush(
engine: &MitoEngine,
region_id: RegionId,
@@ -171,7 +147,14 @@ async fn test_compaction_region() {
delete_and_flush(&engine, region_id, &column_schemas, 15..30).await;
put_and_flush(&engine, region_id, &column_schemas, 15..25).await;
compact(&engine, region_id).await;
let result = engine
.handle_request(
region_id,
RegionRequest::Compact(RegionCompactRequest::default()),
)
.await
.unwrap();
assert_eq!(result.affected_rows, 0);
let scanner = engine.scanner(region_id, ScanRequest::default()).unwrap();
// Input:
@@ -196,136 +179,6 @@ async fn test_compaction_region() {
assert_eq!((0..25).map(|v| v * 1000).collect::<Vec<_>>(), vec);
}
#[tokio::test]
async fn test_infer_compaction_time_window() {
common_telemetry::init_default_ut_logging();
let mut env = TestEnv::new();
let engine = env.create_engine(MitoConfig::default()).await;
let region_id = RegionId::new(1, 1);
env.get_schema_metadata_manager()
.register_region_table_info(
region_id.table_id(),
"test_table",
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
let request = CreateRequestBuilder::new()
.insert_option("compaction.type", "twcs")
.build();
let column_schemas = request
.column_metadatas
.iter()
.map(column_metadata_to_column_schema)
.collect::<Vec<_>>();
engine
.handle_request(region_id, RegionRequest::Create(request))
.await
.unwrap();
// time window should be absent
assert!(engine
.get_region(region_id)
.unwrap()
.version_control
.current()
.version
.compaction_time_window
.is_none());
put_and_flush(&engine, region_id, &column_schemas, 1..2).await;
put_and_flush(&engine, region_id, &column_schemas, 2..3).await;
put_and_flush(&engine, region_id, &column_schemas, 3..4).await;
put_and_flush(&engine, region_id, &column_schemas, 4..5).await;
compact(&engine, region_id).await;
let scanner = engine.scanner(region_id, ScanRequest::default()).unwrap();
assert_eq!(
1,
scanner.num_files(),
"unexpected files: {:?}",
scanner.file_ids()
);
assert_eq!(
Duration::from_secs(3600),
engine
.get_region(region_id)
.unwrap()
.version_control
.current()
.version
.compaction_time_window
.unwrap()
);
// write two rows to trigger another flush.
// note: this two rows still use the original part_duration (1day by default), so they are written
// to the same time partition and flushed to one file.
put_rows(
&engine,
region_id,
Rows {
schema: column_schemas.clone(),
rows: build_rows_for_key("a", 3601, 3602, 0),
},
)
.await;
put_rows(
&engine,
region_id,
Rows {
schema: column_schemas.clone(),
rows: build_rows_for_key("a", 7201, 7202, 0),
},
)
.await;
// this flush should update part_duration in TimePartitions.
flush(&engine, region_id).await;
compact(&engine, region_id).await;
let scanner = engine.scanner(region_id, ScanRequest::default()).unwrap();
assert_eq!(
2,
scanner.num_files(),
"unexpected files: {:?}",
scanner.file_ids()
);
// These data should use new part_duration in TimePartitions and get written to two different
// time partitions so we end up with 4 ssts.
put_rows(
&engine,
region_id,
Rows {
schema: column_schemas.clone(),
rows: build_rows_for_key("a", 3601, 3602, 0),
},
)
.await;
put_rows(
&engine,
region_id,
Rows {
schema: column_schemas.clone(),
rows: build_rows_for_key("a", 7201, 7202, 0),
},
)
.await;
flush(&engine, region_id).await;
let scanner = engine.scanner(region_id, ScanRequest::default()).unwrap();
assert_eq!(
4,
scanner.num_files(),
"unexpected files: {:?}",
scanner.file_ids()
);
}
#[tokio::test]
async fn test_compaction_overlapping_files() {
common_telemetry::init_default_ut_logging();
@@ -363,7 +216,14 @@ async fn test_compaction_overlapping_files() {
put_and_flush(&engine, region_id, &column_schemas, 20..30).await;
delete_and_flush(&engine, region_id, &column_schemas, 30..40).await;
compact(&engine, region_id).await;
let result = engine
.handle_request(
region_id,
RegionRequest::Compact(RegionCompactRequest::default()),
)
.await
.unwrap();
assert_eq!(result.affected_rows, 0);
let scanner = engine.scanner(region_id, ScanRequest::default()).unwrap();
assert_eq!(
@@ -422,7 +282,15 @@ async fn test_compaction_region_with_overlapping() {
put_and_flush(&engine, region_id, &column_schemas, 3600..10800).await; // window 10800
delete_and_flush(&engine, region_id, &column_schemas, 0..3600).await; // window 3600
compact(&engine, region_id).await;
let result = engine
.handle_request(
region_id,
RegionRequest::Compact(RegionCompactRequest::default()),
)
.await
.unwrap();
assert_eq!(result.affected_rows, 0);
let scanner = engine.scanner(region_id, ScanRequest::default()).unwrap();
let stream = scanner.scan().await.unwrap();
let vec = collect_stream_ts(stream).await;
@@ -468,7 +336,15 @@ async fn test_compaction_region_with_overlapping_delete_all() {
put_and_flush(&engine, region_id, &column_schemas, 0..3600).await; // window 3600
delete_and_flush(&engine, region_id, &column_schemas, 0..10800).await; // window 10800
compact(&engine, region_id).await;
let result = engine
.handle_request(
region_id,
RegionRequest::Compact(RegionCompactRequest::default()),
)
.await
.unwrap();
assert_eq!(result.affected_rows, 0);
let scanner = engine.scanner(region_id, ScanRequest::default()).unwrap();
assert_eq!(
2,
@@ -601,7 +477,15 @@ async fn test_compaction_update_time_window() {
put_and_flush(&engine, region_id, &column_schemas, 1800..2700).await; // window 3600
put_and_flush(&engine, region_id, &column_schemas, 2700..3600).await; // window 3600
compact(&engine, region_id).await;
let result = engine
.handle_request(
region_id,
RegionRequest::Compact(RegionCompactRequest::default()),
)
.await
.unwrap();
assert_eq!(result.affected_rows, 0);
assert_eq!(
engine
.get_region(region_id)
@@ -688,7 +572,13 @@ async fn test_change_region_compaction_window() {
put_and_flush(&engine, region_id, &column_schemas, 1200..1800).await; // window 3600
put_and_flush(&engine, region_id, &column_schemas, 1800..2400).await; // window 3600
compact(&engine, region_id).await;
engine
.handle_request(
region_id,
RegionRequest::Compact(RegionCompactRequest::default()),
)
.await
.unwrap();
// Put window 7200
put_and_flush(&engine, region_id, &column_schemas, 4000..5000).await;
@@ -733,7 +623,13 @@ async fn test_change_region_compaction_window() {
// Compaction again. It should compacts window 3600 and 7200
// into 7200.
compact(&engine, region_id).await;
engine
.handle_request(
region_id,
RegionRequest::Compact(RegionCompactRequest::default()),
)
.await
.unwrap();
// Check compaction window.
{
let region = engine.get_region(region_id).unwrap();
@@ -813,7 +709,13 @@ async fn test_open_overwrite_compaction_window() {
put_and_flush(&engine, region_id, &column_schemas, 1200..1800).await; // window 3600
put_and_flush(&engine, region_id, &column_schemas, 1800..2400).await; // window 3600
compact(&engine, region_id).await;
engine
.handle_request(
region_id,
RegionRequest::Compact(RegionCompactRequest::default()),
)
.await
.unwrap();
// Check compaction window.
{

View File

@@ -21,7 +21,6 @@
#![feature(result_flattening)]
#![feature(int_roundings)]
#![feature(debug_closure_helpers)]
#![feature(duration_constructors)]
#[cfg(any(test, feature = "test"))]
#[cfg_attr(feature = "test", allow(unused))]

View File

@@ -40,22 +40,25 @@ use crate::memtable::key_values::KeyValue;
use crate::memtable::version::SmallMemtableVec;
use crate::memtable::{KeyValues, MemtableBuilderRef, MemtableId, MemtableRef};
/// Initial time window if not specified.
const INITIAL_TIME_WINDOW: Duration = Duration::from_days(1);
/// A partition holds rows with timestamps between `[min, max)`.
#[derive(Debug, Clone)]
pub struct TimePartition {
/// Memtable of the partition.
memtable: MemtableRef,
/// Time range of the partition. `min` is inclusive and `max` is exclusive.
time_range: PartTimeRange,
/// `None` means there is no time range. The time
/// range is `None` if and only if the [TimePartitions::part_duration] is `None`.
time_range: Option<PartTimeRange>,
}
impl TimePartition {
/// Returns whether the `ts` belongs to the partition.
fn contains_timestamp(&self, ts: Timestamp) -> bool {
self.time_range.contains_timestamp(ts)
let Some(range) = self.time_range else {
return true;
};
range.contains_timestamp(ts)
}
/// Write rows to the part.
@@ -69,11 +72,14 @@ impl TimePartition {
}
/// Write a partial [BulkPart] according to [TimePartition::time_range].
fn write_record_batch_partial(&self, part: &BulkPart) -> Result<()> {
fn write_record_batch_partial(&self, part: &BulkPart) -> error::Result<()> {
let Some(range) = self.time_range else {
unreachable!("TimePartition must have explicit time range when a bulk request involves multiple time partition")
};
let Some(filtered) = filter_record_batch(
part,
self.time_range.min_timestamp.value(),
self.time_range.max_timestamp.value(),
range.min_timestamp.value(),
range.max_timestamp.value(),
)?
else {
return Ok(());
@@ -203,7 +209,10 @@ pub struct TimePartitions {
/// Mutable data of partitions.
inner: Mutex<PartitionsInner>,
/// Duration of a partition.
part_duration: Duration,
///
/// `None` means there is only one partition and the [TimePartition::time_range] is
/// also `None`.
part_duration: Option<Duration>,
/// Metadata of the region.
metadata: RegionMetadataRef,
/// Builder of memtables.
@@ -220,10 +229,26 @@ impl TimePartitions {
next_memtable_id: MemtableId,
part_duration: Option<Duration>,
) -> Self {
let inner = PartitionsInner::new(next_memtable_id);
let mut inner = PartitionsInner::new(next_memtable_id);
if part_duration.is_none() {
// If `part_duration` is None, then we create a partition with `None` time
// range so we will write all rows to that partition.
let memtable = builder.build(inner.alloc_memtable_id(), &metadata);
debug!(
"Creates a time partition for all timestamps, region: {}, memtable_id: {}",
metadata.region_id,
memtable.id(),
);
let part = TimePartition {
memtable,
time_range: None,
};
inner.parts.push(part);
}
Self {
inner: Mutex::new(inner),
part_duration: part_duration.unwrap_or(INITIAL_TIME_WINDOW),
part_duration,
metadata,
builder,
}
@@ -304,18 +329,19 @@ impl TimePartitions {
part_start: Timestamp,
inner: &mut MutexGuard<PartitionsInner>,
) -> Result<TimePartition> {
let part_duration = self.part_duration.unwrap();
let part_pos = match inner
.parts
.iter()
.position(|part| part.time_range.min_timestamp == part_start)
.position(|part| part.time_range.unwrap().min_timestamp == part_start)
{
Some(pos) => pos,
None => {
let range = PartTimeRange::from_start_duration(part_start, self.part_duration)
let range = PartTimeRange::from_start_duration(part_start, part_duration)
.with_context(|| InvalidRequestSnafu {
region_id: self.metadata.region_id,
reason: format!(
"Partition time range for {part_start:?} is out of bound, bucket size: {:?}", self.part_duration
"Partition time range for {part_start:?} is out of bound, bucket size: {part_duration:?}",
),
})?;
let memtable = self
@@ -325,14 +351,14 @@ impl TimePartitions {
"Create time partition {:?} for region {}, duration: {:?}, memtable_id: {}, parts_total: {}",
range,
self.metadata.region_id,
self.part_duration,
part_duration,
memtable.id(),
inner.parts.len() + 1
);
let pos = inner.parts.len();
inner.parts.push(TimePartition {
memtable,
time_range: range,
time_range: Some(range),
});
pos
}
@@ -370,13 +396,13 @@ impl TimePartitions {
/// Forks latest partition and updates the partition duration if `part_duration` is Some.
pub fn fork(&self, metadata: &RegionMetadataRef, part_duration: Option<Duration>) -> Self {
// Fall back to the existing partition duration.
let part_duration = part_duration.unwrap_or(self.part_duration);
let part_duration = part_duration.or(self.part_duration);
let mut inner = self.inner.lock().unwrap();
let latest_part = inner
.parts
.iter()
.max_by_key(|part| part.time_range.min_timestamp)
.max_by_key(|part| part.time_range.map(|range| range.min_timestamp))
.cloned();
let Some(old_part) = latest_part else {
@@ -385,31 +411,33 @@ impl TimePartitions {
metadata.clone(),
self.builder.clone(),
inner.next_memtable_id,
Some(part_duration),
part_duration,
);
};
let old_stats = old_part.memtable.stats();
// Use the max timestamp to compute the new time range for the memtable.
let partitions_inner = old_stats
.time_range()
.and_then(|(_, old_stats_end_timestamp)| {
partition_start_timestamp(old_stats_end_timestamp, part_duration)
.and_then(|start| PartTimeRange::from_start_duration(start, part_duration))
})
.map(|part_time_range| {
// Forks the latest partition, but compute the time range based on the new duration.
let memtable = old_part.memtable.fork(inner.alloc_memtable_id(), metadata);
let part = TimePartition {
memtable,
time_range: part_time_range,
};
PartitionsInner::with_partition(part, inner.next_memtable_id)
})
.unwrap_or_else(|| PartitionsInner::new(inner.next_memtable_id));
// If `part_duration` is None, the new range will be None.
let new_time_range =
old_stats
.time_range()
.zip(part_duration)
.and_then(|(range, bucket)| {
partition_start_timestamp(range.1, bucket)
.and_then(|start| PartTimeRange::from_start_duration(start, bucket))
});
// Forks the latest partition, but compute the time range based on the new duration.
let memtable = old_part.memtable.fork(inner.alloc_memtable_id(), metadata);
let new_part = TimePartition {
memtable,
time_range: new_time_range,
};
Self {
inner: Mutex::new(partitions_inner),
inner: Mutex::new(PartitionsInner::with_partition(
new_part,
inner.next_memtable_id,
)),
part_duration,
metadata: metadata.clone(),
builder: self.builder.clone(),
@@ -417,7 +445,7 @@ impl TimePartitions {
}
/// Returns partition duration.
pub(crate) fn part_duration(&self) -> Duration {
pub(crate) fn part_duration(&self) -> Option<Duration> {
self.part_duration
}
@@ -462,7 +490,7 @@ impl TimePartitions {
self.metadata.clone(),
self.builder.clone(),
self.next_memtable_id(),
Some(part_duration.unwrap_or(self.part_duration)),
part_duration.or(self.part_duration),
)
}
@@ -486,7 +514,11 @@ impl TimePartitions {
let mut present = HashSet::new();
// First find any existing partitions that overlap
for part in existing_parts {
let part_time_range = &part.time_range;
let Some(part_time_range) = part.time_range.as_ref() else {
matching.push(part);
return Ok((matching, Vec::new()));
};
if !(max < part_time_range.min_timestamp || min >= part_time_range.max_timestamp) {
matching.push(part);
present.insert(part_time_range.min_timestamp.value());
@@ -494,7 +526,7 @@ impl TimePartitions {
}
// safety: self.part_duration can only be present when reach here.
let part_duration = self.part_duration_or_default();
let part_duration = self.part_duration.unwrap();
let timestamp_unit = self.metadata.time_index_type().unit();
let part_duration_sec = part_duration.as_secs() as i64;
@@ -589,13 +621,12 @@ impl TimePartitions {
Ok((matching, missing))
}
/// Returns partition duration, or use default 1day duration is not present.
fn part_duration_or_default(&self) -> Duration {
self.part_duration
}
/// Write to multiple partitions.
fn write_multi_parts(&self, kvs: &KeyValues, parts: &PartitionVec) -> Result<()> {
// If part duration is `None` then there is always one partition and all rows
// will be put in that partition before invoking this method.
debug_assert!(self.part_duration.is_some());
let mut parts_to_write = HashMap::new();
let mut missing_parts = HashMap::new();
for kv in kvs.iter() {
@@ -604,8 +635,9 @@ impl TimePartitions {
let ts = kv.timestamp().as_timestamp().unwrap().unwrap();
for part in parts {
if part.contains_timestamp(ts) {
// Safety: Since part duration is `Some` so all time range should be `Some`.
parts_to_write
.entry(part.time_range.min_timestamp)
.entry(part.time_range.unwrap().min_timestamp)
.or_insert_with(|| PartitionToWrite {
partition: part.clone(),
key_values: Vec::new(),
@@ -620,7 +652,7 @@ impl TimePartitions {
if !part_found {
// We need to write it to a new part.
// Safety: `new()` ensures duration is always Some if we do to this method.
let part_duration = self.part_duration_or_default();
let part_duration = self.part_duration.unwrap();
let part_start =
partition_start_timestamp(ts, part_duration).with_context(|| {
InvalidRequestSnafu {
@@ -755,7 +787,7 @@ mod tests {
let metadata = memtable_util::metadata_for_test();
let builder = Arc::new(PartitionTreeMemtableBuilder::default());
let partitions = TimePartitions::new(metadata.clone(), builder, 0, None);
assert_eq!(0, partitions.num_partitions());
assert_eq!(1, partitions.num_partitions());
assert!(partitions.is_empty());
let kvs = memtable_util::build_key_values(
@@ -817,15 +849,14 @@ mod tests {
let parts = partitions.list_partitions();
assert_eq!(
Timestamp::new_millisecond(0),
parts[0].time_range.min_timestamp
parts[0].time_range.unwrap().min_timestamp
);
assert_eq!(
Timestamp::new_millisecond(10000),
parts[0].time_range.max_timestamp
parts[0].time_range.unwrap().max_timestamp
);
}
#[cfg(test)]
fn new_multi_partitions(metadata: &RegionMetadataRef) -> TimePartitions {
let builder = Arc::new(PartitionTreeMemtableBuilder::default());
let partitions =
@@ -869,11 +900,11 @@ mod tests {
assert_eq!(0, parts[0].memtable.id());
assert_eq!(
Timestamp::new_millisecond(0),
parts[0].time_range.min_timestamp
parts[0].time_range.unwrap().min_timestamp
);
assert_eq!(
Timestamp::new_millisecond(5000),
parts[0].time_range.max_timestamp
parts[0].time_range.unwrap().max_timestamp
);
assert_eq!(&[0, 2000, 3000, 4000], &timestamps[..]);
let iter = parts[1].memtable.iter(None, None, None).unwrap();
@@ -882,11 +913,11 @@ mod tests {
assert_eq!(&[5000, 7000], &timestamps[..]);
assert_eq!(
Timestamp::new_millisecond(5000),
parts[1].time_range.min_timestamp
parts[1].time_range.unwrap().min_timestamp
);
assert_eq!(
Timestamp::new_millisecond(10000),
parts[1].time_range.max_timestamp
parts[1].time_range.unwrap().max_timestamp
);
}
@@ -897,26 +928,26 @@ mod tests {
let partitions = TimePartitions::new(metadata.clone(), builder.clone(), 0, None);
let new_parts = partitions.new_with_part_duration(Some(Duration::from_secs(5)));
assert_eq!(Duration::from_secs(5), new_parts.part_duration());
assert_eq!(0, new_parts.next_memtable_id());
assert_eq!(Duration::from_secs(5), new_parts.part_duration().unwrap());
assert_eq!(1, new_parts.next_memtable_id());
// Won't update the duration if it's None.
let new_parts = new_parts.new_with_part_duration(None);
assert_eq!(Duration::from_secs(5), new_parts.part_duration());
assert_eq!(Duration::from_secs(5), new_parts.part_duration().unwrap());
// Don't need to create new memtables.
assert_eq!(0, new_parts.next_memtable_id());
assert_eq!(1, new_parts.next_memtable_id());
let new_parts = new_parts.new_with_part_duration(Some(Duration::from_secs(10)));
assert_eq!(Duration::from_secs(10), new_parts.part_duration());
assert_eq!(Duration::from_secs(10), new_parts.part_duration().unwrap());
// Don't need to create new memtables.
assert_eq!(0, new_parts.next_memtable_id());
assert_eq!(1, new_parts.next_memtable_id());
let builder = Arc::new(PartitionTreeMemtableBuilder::default());
let partitions = TimePartitions::new(metadata.clone(), builder.clone(), 0, None);
// Need to build a new memtable as duration is still None.
let new_parts = partitions.new_with_part_duration(None);
assert_eq!(INITIAL_TIME_WINDOW, new_parts.part_duration());
assert_eq!(0, new_parts.next_memtable_id());
assert!(new_parts.part_duration().is_none());
assert_eq!(2, new_parts.next_memtable_id());
}
#[test]
@@ -926,28 +957,28 @@ mod tests {
let partitions = TimePartitions::new(metadata.clone(), builder, 0, None);
partitions.freeze().unwrap();
let new_parts = partitions.fork(&metadata, None);
assert_eq!(INITIAL_TIME_WINDOW, new_parts.part_duration());
assert!(new_parts.list_partitions().is_empty());
assert_eq!(0, new_parts.next_memtable_id());
assert!(new_parts.part_duration().is_none());
assert_eq!(1, new_parts.list_partitions()[0].memtable.id());
assert_eq!(2, new_parts.next_memtable_id());
new_parts.freeze().unwrap();
let new_parts = new_parts.fork(&metadata, Some(Duration::from_secs(5)));
assert_eq!(Duration::from_secs(5), new_parts.part_duration());
assert!(new_parts.list_partitions().is_empty());
assert_eq!(0, new_parts.next_memtable_id());
assert_eq!(Duration::from_secs(5), new_parts.part_duration().unwrap());
assert_eq!(2, new_parts.list_partitions()[0].memtable.id());
assert_eq!(3, new_parts.next_memtable_id());
new_parts.freeze().unwrap();
let new_parts = new_parts.fork(&metadata, None);
// Won't update the duration.
assert_eq!(Duration::from_secs(5), new_parts.part_duration());
assert!(new_parts.list_partitions().is_empty());
assert_eq!(0, new_parts.next_memtable_id());
assert_eq!(Duration::from_secs(5), new_parts.part_duration().unwrap());
assert_eq!(3, new_parts.list_partitions()[0].memtable.id());
assert_eq!(4, new_parts.next_memtable_id());
new_parts.freeze().unwrap();
let new_parts = new_parts.fork(&metadata, Some(Duration::from_secs(10)));
assert_eq!(Duration::from_secs(10), new_parts.part_duration());
assert!(new_parts.list_partitions().is_empty());
assert_eq!(0, new_parts.next_memtable_id());
assert_eq!(Duration::from_secs(10), new_parts.part_duration().unwrap());
assert_eq!(4, new_parts.list_partitions()[0].memtable.id());
assert_eq!(5, new_parts.next_memtable_id());
}
#[test]
@@ -959,14 +990,14 @@ mod tests {
// Won't update the duration.
let new_parts = partitions.fork(&metadata, None);
assert!(new_parts.is_empty());
assert_eq!(Duration::from_secs(5), new_parts.part_duration());
assert_eq!(Duration::from_secs(5), new_parts.part_duration().unwrap());
assert_eq!(2, new_parts.list_partitions()[0].memtable.id());
assert_eq!(3, new_parts.next_memtable_id());
// Although we don't fork a memtable multiple times, we still add a test for it.
let new_parts = partitions.fork(&metadata, Some(Duration::from_secs(10)));
assert!(new_parts.is_empty());
assert_eq!(Duration::from_secs(10), new_parts.part_duration());
assert_eq!(Duration::from_secs(10), new_parts.part_duration().unwrap());
assert_eq!(3, new_parts.list_partitions()[0].memtable.id());
assert_eq!(4, new_parts.next_memtable_id());
}
@@ -987,9 +1018,9 @@ mod tests {
Timestamp::new_millisecond(2000),
)
.unwrap();
assert_eq!(matching.len(), 0);
assert_eq!(missing.len(), 1);
assert_eq!(missing[0], Timestamp::new_millisecond(0));
assert_eq!(matching.len(), 1);
assert!(missing.is_empty());
assert!(matching[0].time_range.is_none());
// Case 2: With time range partitioning
let partitions = TimePartitions::new(
@@ -1021,7 +1052,7 @@ mod tests {
.unwrap();
assert_eq!(matching.len(), 1);
assert!(missing.is_empty());
assert_eq!(matching[0].time_range.min_timestamp.value(), 0);
assert_eq!(matching[0].time_range.unwrap().min_timestamp.value(), 0);
// Test case 2b: Query spanning multiple existing partitions
let (matching, missing) = partitions
@@ -1034,8 +1065,8 @@ mod tests {
.unwrap();
assert_eq!(matching.len(), 2);
assert!(missing.is_empty());
assert_eq!(matching[0].time_range.min_timestamp.value(), 0);
assert_eq!(matching[1].time_range.min_timestamp.value(), 5000);
assert_eq!(matching[0].time_range.unwrap().min_timestamp.value(), 0);
assert_eq!(matching[1].time_range.unwrap().min_timestamp.value(), 5000);
// Test case 2c: Query requiring new partition
let (matching, missing) = partitions
@@ -1061,8 +1092,8 @@ mod tests {
.unwrap();
assert_eq!(matching.len(), 2);
assert!(missing.is_empty());
assert_eq!(matching[0].time_range.min_timestamp.value(), 0);
assert_eq!(matching[1].time_range.min_timestamp.value(), 5000);
assert_eq!(matching[0].time_range.unwrap().min_timestamp.value(), 0);
assert_eq!(matching[1].time_range.unwrap().min_timestamp.value(), 5000);
// Test case 2e: Corner case
let (matching, missing) = partitions
@@ -1075,8 +1106,8 @@ mod tests {
.unwrap();
assert_eq!(matching.len(), 2);
assert!(missing.is_empty());
assert_eq!(matching[0].time_range.min_timestamp.value(), 0);
assert_eq!(matching[1].time_range.min_timestamp.value(), 5000);
assert_eq!(matching[0].time_range.unwrap().min_timestamp.value(), 0);
assert_eq!(matching[1].time_range.unwrap().min_timestamp.value(), 5000);
// Test case 2f: Corner case with
let (matching, missing) = partitions
@@ -1089,7 +1120,7 @@ mod tests {
.unwrap();
assert_eq!(matching.len(), 1);
assert_eq!(1, missing.len());
assert_eq!(matching[0].time_range.min_timestamp.value(), 5000);
assert_eq!(matching[0].time_range.unwrap().min_timestamp.value(), 5000);
assert_eq!(missing[0].value(), 10000);
// Test case 2g: Cross 0
@@ -1102,7 +1133,7 @@ mod tests {
)
.unwrap();
assert_eq!(matching.len(), 1);
assert_eq!(matching[0].time_range.min_timestamp.value(), 0);
assert_eq!(matching[0].time_range.unwrap().min_timestamp.value(), 0);
assert_eq!(1, missing.len());
assert_eq!(missing[0].value(), -5000);
@@ -1120,8 +1151,8 @@ mod tests {
)
.unwrap();
assert_eq!(2, matching.len());
assert_eq!(matching[0].time_range.min_timestamp.value(), 0);
assert_eq!(matching[1].time_range.min_timestamp.value(), 5000);
assert_eq!(matching[0].time_range.unwrap().min_timestamp.value(), 0);
assert_eq!(matching[1].time_range.unwrap().min_timestamp.value(), 5000);
assert_eq!(2, missing.len());
assert_eq!(missing[0].value(), -100000000000);
assert_eq!(missing[1].value(), 100000000000);
@@ -1131,7 +1162,10 @@ mod tests {
let schema = Arc::new(Schema::new(vec![
Field::new(
"ts",
DataType::Timestamp(arrow::datatypes::TimeUnit::Millisecond, None),
arrow::datatypes::DataType::Timestamp(
arrow::datatypes::TimeUnit::Millisecond,
None,
),
false,
),
Field::new("val", DataType::Utf8, true),

View File

@@ -76,7 +76,7 @@ impl MemtableVersion {
) -> Result<Option<MemtableVersion>> {
if self.mutable.is_empty() {
// No need to freeze the mutable memtable, but we need to check the time window.
if Some(self.mutable.part_duration()) == time_window {
if self.mutable.part_duration() == time_window {
// If the time window is the same, we don't need to update it.
return Ok(None);
}
@@ -98,7 +98,7 @@ impl MemtableVersion {
// soft limit.
self.mutable.freeze()?;
// Fork the memtable.
if Some(self.mutable.part_duration()) != time_window {
if self.mutable.part_duration() != time_window {
common_telemetry::debug!(
"Fork memtable, update partition duration from {:?}, to {:?}",
self.mutable.part_duration(),

View File

@@ -142,7 +142,7 @@ impl VersionControl {
/// Mark all opened files as deleted and set the delete marker in [VersionControlData]
pub(crate) fn mark_dropped(&self, memtable_builder: &MemtableBuilderRef) {
let version = self.current().version;
let part_duration = Some(version.memtables.mutable.part_duration());
let part_duration = version.memtables.mutable.part_duration();
let next_memtable_id = version.memtables.mutable.next_memtable_id();
let new_mutable = Arc::new(TimePartitions::new(
version.metadata.clone(),
@@ -166,7 +166,7 @@ impl VersionControl {
/// new schema. Memtables of the version must be empty.
pub(crate) fn alter_schema(&self, metadata: RegionMetadataRef, builder: &MemtableBuilderRef) {
let version = self.current().version;
let part_duration = Some(version.memtables.mutable.part_duration());
let part_duration = version.memtables.mutable.part_duration();
let next_memtable_id = version.memtables.mutable.next_memtable_id();
let new_mutable = Arc::new(TimePartitions::new(
metadata.clone(),
@@ -202,7 +202,7 @@ impl VersionControl {
version.metadata.clone(),
memtable_builder.clone(),
next_memtable_id,
Some(part_duration),
part_duration,
));
let new_version = Arc::new(
VersionBuilder::new(version.metadata.clone(), new_mutable)

View File

@@ -59,7 +59,6 @@ sql.workspace = true
table.workspace = true
tokio.workspace = true
urlencoding = "2.1"
vrl = "0.24"
yaml-rust = "0.4"
[dev-dependencies]

View File

@@ -24,9 +24,9 @@ fn processor_mut(
let mut result = Vec::with_capacity(input_values.len());
for v in input_values {
let payload = json_to_map(v).unwrap();
let mut payload = json_to_map(v).unwrap();
let r = pipeline
.exec_mut(payload)?
.exec_mut(&mut payload)?
.into_transformed()
.expect("expect transformed result ");
result.push(r.0);

View File

@@ -20,10 +20,10 @@ use crate::error::{
Error, FieldRequiredForDispatcherSnafu, Result, TableSuffixRequiredForDispatcherRuleSnafu,
ValueRequiredForDispatcherRuleSnafu,
};
use crate::etl::ctx_req::TABLE_SUFFIX_KEY;
use crate::{PipelineMap, Value};
const FIELD: &str = "field";
const TABLE_SUFFIX: &str = "table_suffix";
const PIPELINE: &str = "pipeline";
const VALUE: &str = "value";
const RULES: &str = "rules";
@@ -80,7 +80,7 @@ impl TryFrom<&Yaml> for Dispatcher {
rules
.iter()
.map(|rule| {
let table_part = rule[TABLE_SUFFIX_KEY]
let table_part = rule[TABLE_SUFFIX]
.as_str()
.map(|s| s.to_string())
.context(TableSuffixRequiredForDispatcherRuleSnafu)?;

View File

@@ -411,6 +411,13 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display(
"At least one timestamp-related processor is required to use auto transform"
))]
TransformNoTimestampProcessor {
#[snafu(implicit)]
location: Location,
},
#[snafu(display(
"Illegal to set multiple timestamp Index columns, please set only one: {columns}"
))]
@@ -426,7 +433,7 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Exactly one time-related processor and one timestamp value is required to use auto transform"))]
#[snafu(display("Exactly one timestamp value is required to use auto transform"))]
AutoTransformOneTimestamp {
#[snafu(implicit)]
location: Location,
@@ -679,54 +686,6 @@ pub enum Error {
location: Location,
},
#[snafu(display("Failed to compile VRL, {}", msg))]
CompileVrl {
msg: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to execute VRL, {}", msg))]
ExecuteVrl {
msg: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Float is not a number: {}", input_float))]
FloatNaN {
input_float: f64,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Invalid timestamp value: {}", input))]
InvalidTimestamp {
input: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to convert bytes to utf8"))]
BytesToUtf8 {
#[snafu(source)]
error: std::string::FromUtf8Error,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Please don't use regex in Vrl script"))]
VrlRegexValue {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Vrl script should return `.` in the end"))]
VrlReturnValue {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to cast type, msg: {}", msg))]
CastType {
msg: String,
@@ -873,6 +832,7 @@ impl ErrorExt for Error {
| TransformTypeMustBeSet { .. }
| TransformColumnNameMustBeUnique { .. }
| TransformMultipleTimestampIndex { .. }
| TransformNoTimestampProcessor { .. }
| TransformTimestampIndexCount { .. }
| AutoTransformOneTimestamp { .. }
| CoerceUnsupportedNullType { .. }
@@ -906,13 +866,6 @@ impl ErrorExt for Error {
| ReachedMaxNestedLevels { .. }
| RequiredTableSuffixTemplate
| InvalidTableSuffixTemplate { .. }
| CompileVrl { .. }
| ExecuteVrl { .. }
| FloatNaN { .. }
| BytesToUtf8 { .. }
| InvalidTimestamp { .. }
| VrlRegexValue { .. }
| VrlReturnValue { .. }
| PipelineMissing { .. } => StatusCode::InvalidArguments,
}
}

View File

@@ -30,13 +30,12 @@ use yaml_rust::YamlLoader;
use crate::dispatcher::{Dispatcher, Rule};
use crate::error::{
AutoTransformOneTimestampSnafu, InputValueMustBeObjectSnafu, IntermediateKeyIndexSnafu, Result,
YamlLoadSnafu, YamlParseSnafu,
InputValueMustBeObjectSnafu, IntermediateKeyIndexSnafu, Result,
TransformNoTimestampProcessorSnafu, YamlLoadSnafu, YamlParseSnafu,
};
use crate::etl::ctx_req::TABLE_SUFFIX_KEY;
use crate::etl::processor::ProcessorKind;
use crate::tablesuffix::TableSuffixTemplate;
use crate::{ContextOpt, GreptimeTransformer};
use crate::GreptimeTransformer;
const DESCRIPTION: &str = "description";
const PROCESSORS: &str = "processors";
@@ -81,14 +80,16 @@ pub fn parse(input: &Content) -> Result<Pipeline> {
// check processors have at least one timestamp-related processor
let cnt = processors
.iter()
.filter_map(|p| match p {
ProcessorKind::Date(d) => Some(d.target_count()),
ProcessorKind::Timestamp(t) => Some(t.target_count()),
ProcessorKind::Epoch(e) => Some(e.target_count()),
_ => None,
.filter(|p| {
matches!(
p,
ProcessorKind::Date(_)
| ProcessorKind::Timestamp(_)
| ProcessorKind::Epoch(_)
)
})
.sum::<usize>();
ensure!(cnt == 1, AutoTransformOneTimestampSnafu);
.count();
ensure!(cnt > 0, TransformNoTimestampProcessorSnafu);
None
} else {
Some(GreptimeTransformer::new(transformers)?)
@@ -155,15 +156,14 @@ impl DispatchedTo {
pub enum PipelineExecOutput {
Transformed(TransformedOutput),
AutoTransform(AutoTransformOutput),
DispatchedTo(DispatchedTo, PipelineMap),
DispatchedTo(DispatchedTo),
}
#[derive(Debug)]
pub struct TransformedOutput {
pub opt: ContextOpt,
pub opt: String,
pub row: Row,
pub table_suffix: Option<String>,
pub pipeline_map: PipelineMap,
}
#[derive(Debug)]
@@ -171,7 +171,6 @@ pub struct AutoTransformOutput {
pub table_suffix: Option<String>,
// ts_column_name -> unit
pub ts_unit_map: HashMap<String, TimeUnit>,
pub pipeline_map: PipelineMap,
}
impl PipelineExecOutput {
@@ -189,7 +188,7 @@ impl PipelineExecOutput {
// Note: This is a test only function, do not use it in production.
pub fn into_dispatched(self) -> Option<DispatchedTo> {
if let Self::DispatchedTo(d, _) = self {
if let Self::DispatchedTo(d) = self {
Some(d)
} else {
None
@@ -232,38 +231,30 @@ pub fn simd_json_array_to_map(val: Vec<simd_json::OwnedValue>) -> Result<Vec<Pip
}
impl Pipeline {
pub fn exec_mut(&self, mut val: PipelineMap) -> Result<PipelineExecOutput> {
pub fn exec_mut(&self, val: &mut PipelineMap) -> Result<PipelineExecOutput> {
// process
for processor in self.processors.iter() {
val = processor.exec_mut(val)?;
processor.exec_mut(val)?;
}
// dispatch, fast return if matched
if let Some(rule) = self.dispatcher.as_ref().and_then(|d| d.exec(&val)) {
return Ok(PipelineExecOutput::DispatchedTo(rule.into(), val));
if let Some(rule) = self.dispatcher.as_ref().and_then(|d| d.exec(val)) {
return Ok(PipelineExecOutput::DispatchedTo(rule.into()));
}
// do transform
if let Some(transformer) = self.transformer() {
let (mut opt, row) = transformer.transform_mut(&mut val)?;
let table_suffix = opt.resolve_table_suffix(self.tablesuffix.as_ref(), &val);
let (opt, row) = transformer.transform_mut(val)?;
let table_suffix = self.tablesuffix.as_ref().and_then(|t| t.apply(val));
Ok(PipelineExecOutput::Transformed(TransformedOutput {
opt,
row,
table_suffix,
pipeline_map: val,
}))
} else {
// check table suffix var
let table_suffix = val
.remove(TABLE_SUFFIX_KEY)
.map(|f| f.to_str_value())
.or_else(|| self.tablesuffix.as_ref().and_then(|t| t.apply(&val)));
let table_suffix = self.tablesuffix.as_ref().and_then(|t| t.apply(val));
let mut ts_unit_map = HashMap::with_capacity(4);
// get all ts values
for (k, v) in val.iter() {
for (k, v) in val {
if let Value::Timestamp(ts) = v {
if !ts_unit_map.contains_key(k) {
ts_unit_map.insert(k.clone(), ts.get_unit());
@@ -273,7 +264,6 @@ impl Pipeline {
Ok(PipelineExecOutput::AutoTransform(AutoTransformOutput {
table_suffix,
ts_unit_map,
pipeline_map: val,
}))
}
}
@@ -328,9 +318,9 @@ transform:
type: uint32
"#;
let pipeline: Pipeline = parse(&Content::Yaml(pipeline_yaml)).unwrap();
let payload = json_to_map(input_value).unwrap();
let mut payload = json_to_map(input_value).unwrap();
let result = pipeline
.exec_mut(payload)
.exec_mut(&mut payload)
.unwrap()
.into_transformed()
.unwrap();
@@ -381,7 +371,7 @@ transform:
let mut payload = PipelineMap::new();
payload.insert("message".to_string(), Value::String(message));
let result = pipeline
.exec_mut(payload)
.exec_mut(&mut payload)
.unwrap()
.into_transformed()
.unwrap();
@@ -456,9 +446,9 @@ transform:
"#;
let pipeline: Pipeline = parse(&Content::Yaml(pipeline_yaml)).unwrap();
let payload = json_to_map(input_value).unwrap();
let mut payload = json_to_map(input_value).unwrap();
let result = pipeline
.exec_mut(payload)
.exec_mut(&mut payload)
.unwrap()
.into_transformed()
.unwrap();
@@ -498,10 +488,10 @@ transform:
let pipeline: Pipeline = parse(&Content::Yaml(pipeline_yaml)).unwrap();
let schema = pipeline.schemas().unwrap().clone();
let result = json_to_map(input_value).unwrap();
let mut result = json_to_map(input_value).unwrap();
let row = pipeline
.exec_mut(result)
.exec_mut(&mut result)
.unwrap()
.into_transformed()
.unwrap();

View File

@@ -13,145 +13,69 @@
// limitations under the License.
use std::collections::hash_map::IntoIter;
use std::collections::BTreeMap;
use std::sync::Arc;
use ahash::{HashMap, HashMapExt};
use api::v1::{RowInsertRequest, RowInsertRequests, Rows};
use itertools::Itertools;
use session::context::{QueryContext, QueryContextRef};
use crate::tablesuffix::TableSuffixTemplate;
use crate::PipelineMap;
const GREPTIME_AUTO_CREATE_TABLE: &str = "greptime_auto_create_table";
const GREPTIME_TTL: &str = "greptime_ttl";
const GREPTIME_APPEND_MODE: &str = "greptime_append_mode";
const GREPTIME_MERGE_MODE: &str = "greptime_merge_mode";
const GREPTIME_PHYSICAL_TABLE: &str = "greptime_physical_table";
const GREPTIME_SKIP_WAL: &str = "greptime_skip_wal";
const GREPTIME_TABLE_SUFFIX: &str = "greptime_table_suffix";
const DEFAULT_OPT: &str = "";
pub(crate) const AUTO_CREATE_TABLE_KEY: &str = "auto_create_table";
pub(crate) const TTL_KEY: &str = "ttl";
pub(crate) const APPEND_MODE_KEY: &str = "append_mode";
pub(crate) const MERGE_MODE_KEY: &str = "merge_mode";
pub(crate) const PHYSICAL_TABLE_KEY: &str = "physical_table";
pub(crate) const SKIP_WAL_KEY: &str = "skip_wal";
pub(crate) const TABLE_SUFFIX_KEY: &str = "table_suffix";
pub const PIPELINE_HINT_KEYS: [&str; 7] = [
GREPTIME_AUTO_CREATE_TABLE,
GREPTIME_TTL,
GREPTIME_APPEND_MODE,
GREPTIME_MERGE_MODE,
GREPTIME_PHYSICAL_TABLE,
GREPTIME_SKIP_WAL,
GREPTIME_TABLE_SUFFIX,
pub const PIPELINE_HINT_KEYS: [&str; 6] = [
"greptime_auto_create_table",
"greptime_ttl",
"greptime_append_mode",
"greptime_merge_mode",
"greptime_physical_table",
"greptime_skip_wal",
];
const PIPELINE_HINT_PREFIX: &str = "greptime_";
/// ContextOpt is a collection of options(including table options and pipeline options)
/// that should be extracted during the pipeline execution.
///
/// The options are set in the format of hint keys. See [`PIPELINE_HINT_KEYS`].
/// It's is used as the key in [`ContextReq`] for grouping the row insert requests.
#[derive(Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ContextOpt {
// table options, that need to be set in the query context before making row insert requests
auto_create_table: Option<String>,
ttl: Option<String>,
append_mode: Option<String>,
merge_mode: Option<String>,
physical_table: Option<String>,
skip_wal: Option<String>,
// pipeline options, not set in query context
// can be removed before the end of the pipeline execution
table_suffix: Option<String>,
// Remove hints from the pipeline context and form a option string
// e.g: skip_wal=true,ttl=1d
pub fn from_pipeline_map_to_opt(pipeline_map: &mut PipelineMap) -> String {
let mut btreemap = BTreeMap::new();
for k in PIPELINE_HINT_KEYS {
if let Some(v) = pipeline_map.remove(k) {
btreemap.insert(k, v.to_str_value());
}
}
btreemap
.into_iter()
.map(|(k, v)| format!("{}={}", k.replace(PIPELINE_HINT_PREFIX, ""), v))
.join(",")
}
impl ContextOpt {
pub fn from_pipeline_map_to_opt(pipeline_map: &mut PipelineMap) -> Self {
let mut opt = Self::default();
for k in PIPELINE_HINT_KEYS {
if let Some(v) = pipeline_map.remove(k) {
match k {
GREPTIME_AUTO_CREATE_TABLE => {
opt.auto_create_table = Some(v.to_str_value());
}
GREPTIME_TTL => {
opt.ttl = Some(v.to_str_value());
}
GREPTIME_APPEND_MODE => {
opt.append_mode = Some(v.to_str_value());
}
GREPTIME_MERGE_MODE => {
opt.merge_mode = Some(v.to_str_value());
}
GREPTIME_PHYSICAL_TABLE => {
opt.physical_table = Some(v.to_str_value());
}
GREPTIME_SKIP_WAL => {
opt.skip_wal = Some(v.to_str_value());
}
GREPTIME_TABLE_SUFFIX => {
opt.table_suffix = Some(v.to_str_value());
}
_ => {}
}
}
}
opt
}
pub(crate) fn resolve_table_suffix(
&mut self,
table_suffix: Option<&TableSuffixTemplate>,
pipeline_map: &PipelineMap,
) -> Option<String> {
self.table_suffix
.take()
.or_else(|| table_suffix.and_then(|s| s.apply(pipeline_map)))
}
pub fn set_query_context(self, ctx: &mut QueryContext) {
if let Some(auto_create_table) = &self.auto_create_table {
ctx.set_extension(AUTO_CREATE_TABLE_KEY, auto_create_table);
}
if let Some(ttl) = &self.ttl {
ctx.set_extension(TTL_KEY, ttl);
}
if let Some(append_mode) = &self.append_mode {
ctx.set_extension(APPEND_MODE_KEY, append_mode);
}
if let Some(merge_mode) = &self.merge_mode {
ctx.set_extension(MERGE_MODE_KEY, merge_mode);
}
if let Some(physical_table) = &self.physical_table {
ctx.set_extension(PHYSICAL_TABLE_KEY, physical_table);
}
if let Some(skip_wal) = &self.skip_wal {
ctx.set_extension(SKIP_WAL_KEY, skip_wal);
}
}
// split the option string back to a map
fn from_opt_to_map(opt: &str) -> HashMap<&str, &str> {
opt.split(',')
.filter_map(|s| {
s.split_once("=")
.filter(|(k, v)| !k.is_empty() && !v.is_empty())
})
.collect()
}
/// ContextReq is a collection of row insert requests with different options.
/// The default option is all empty.
/// Because options are set in query context, we have to split them into sequential calls
/// The key is a [`ContextOpt`] struct for strong type.
/// e.g:
/// {
/// "skip_wal=true,ttl=1d": [RowInsertRequest],
/// "ttl=1d": [RowInsertRequest],
/// }
// ContextReq is a collection of row insert requests with different options.
// The default option is empty string.
// Because options are set in query context, we have to split them into sequential calls
// e.g:
// {
// "skip_wal=true,ttl=1d": [RowInsertRequest],
// "ttl=1d": [RowInsertRequest],
// }
#[derive(Debug, Default)]
pub struct ContextReq {
req: HashMap<ContextOpt, Vec<RowInsertRequest>>,
req: HashMap<String, Vec<RowInsertRequest>>,
}
impl ContextReq {
pub fn from_opt_map(opt_map: HashMap<ContextOpt, Rows>, table_name: String) -> Self {
pub fn from_opt_map(opt_map: HashMap<String, Rows>, table_name: String) -> Self {
Self {
req: opt_map
.into_iter()
@@ -164,17 +88,17 @@ impl ContextReq {
}],
)
})
.collect::<HashMap<ContextOpt, Vec<RowInsertRequest>>>(),
.collect::<HashMap<String, Vec<RowInsertRequest>>>(),
}
}
pub fn default_opt_with_reqs(reqs: Vec<RowInsertRequest>) -> Self {
let mut req_map = HashMap::new();
req_map.insert(ContextOpt::default(), reqs);
req_map.insert(DEFAULT_OPT.to_string(), reqs);
Self { req: req_map }
}
pub fn add_rows(&mut self, opt: ContextOpt, req: RowInsertRequest) {
pub fn add_rows(&mut self, opt: String, req: RowInsertRequest) {
self.req.entry(opt).or_default().push(req);
}
@@ -207,7 +131,7 @@ impl ContextReq {
// It will clone the query context for each option and set the options to the context.
// Then it will return the context and the row insert requests for actual insert.
pub struct ContextReqIter {
opt_req: IntoIter<ContextOpt, Vec<RowInsertRequest>>,
opt_req: IntoIter<String, Vec<RowInsertRequest>>,
ctx_template: QueryContext,
}
@@ -216,8 +140,13 @@ impl Iterator for ContextReqIter {
fn next(&mut self) -> Option<Self::Item> {
let (opt, req_vec) = self.opt_req.next()?;
let opt_map = from_opt_to_map(&opt);
let mut ctx = self.ctx_template.clone();
opt.set_query_context(&mut ctx);
for (k, v) in opt_map {
ctx.set_extension(k, v);
}
Some((Arc::new(ctx), RowInsertRequests { inserts: req_vec }))
}

View File

@@ -29,7 +29,6 @@ pub mod select;
pub mod simple_extract;
pub mod timestamp;
pub mod urlencoding;
pub mod vrl;
use std::str::FromStr;
@@ -59,7 +58,6 @@ use crate::etl::field::{Field, Fields};
use crate::etl::processor::json_parse::JsonParseProcessor;
use crate::etl::processor::select::SelectProcessor;
use crate::etl::processor::simple_extract::SimpleExtractProcessor;
use crate::etl::processor::vrl::VrlProcessor;
use crate::etl::PipelineMap;
const FIELD_NAME: &str = "field";
@@ -125,7 +123,7 @@ pub trait Processor: std::fmt::Debug + Send + Sync + 'static {
fn ignore_missing(&self) -> bool;
/// Execute the processor on a vector which be preprocessed by the pipeline
fn exec_mut(&self, val: PipelineMap) -> Result<PipelineMap>;
fn exec_mut(&self, val: &mut PipelineMap) -> Result<()>;
}
#[derive(Debug)]
@@ -148,7 +146,6 @@ pub enum ProcessorKind {
Decolorize(DecolorizeProcessor),
Digest(DigestProcessor),
Select(SelectProcessor),
Vrl(VrlProcessor),
}
#[derive(Debug, Default)]
@@ -230,7 +227,6 @@ fn parse_processor(doc: &yaml_rust::Yaml) -> Result<ProcessorKind> {
json_parse::PROCESSOR_JSON_PARSE => {
ProcessorKind::JsonParse(JsonParseProcessor::try_from(value)?)
}
vrl::PROCESSOR_VRL => ProcessorKind::Vrl(VrlProcessor::try_from(value)?),
select::PROCESSOR_SELECT => ProcessorKind::Select(SelectProcessor::try_from(value)?),
_ => return UnsupportedProcessorSnafu { processor: str_key }.fail(),
};

View File

@@ -249,7 +249,7 @@ impl Processor for CmcdProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: PipelineMap) -> Result<PipelineMap> {
fn exec_mut(&self, val: &mut PipelineMap) -> Result<()> {
for field in self.fields.iter() {
let name = field.input_field();
@@ -277,7 +277,7 @@ impl Processor for CmcdProcessor {
}
}
Ok(val)
Ok(())
}
}

View File

@@ -189,7 +189,7 @@ impl Processor for CsvProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: PipelineMap) -> Result<PipelineMap> {
fn exec_mut(&self, val: &mut PipelineMap) -> Result<()> {
for field in self.fields.iter() {
let name = field.input_field();
@@ -216,7 +216,7 @@ impl Processor for CsvProcessor {
}
}
}
Ok(val)
Ok(())
}
}

View File

@@ -163,10 +163,6 @@ pub struct DateProcessor {
}
impl DateProcessor {
pub(crate) fn target_count(&self) -> usize {
self.fields.len()
}
fn parse(&self, val: &str) -> Result<Timestamp> {
let mut tz = Tz::UTC;
if let Some(timezone) = &self.timezone {
@@ -198,7 +194,7 @@ impl Processor for DateProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: PipelineMap) -> Result<PipelineMap> {
fn exec_mut(&self, val: &mut PipelineMap) -> Result<()> {
for field in self.fields.iter() {
let index = field.input_field();
match val.get(index) {
@@ -225,7 +221,7 @@ impl Processor for DateProcessor {
}
}
}
Ok(val)
Ok(())
}
}

View File

@@ -102,7 +102,7 @@ impl crate::etl::processor::Processor for DecolorizeProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: PipelineMap) -> Result<PipelineMap> {
fn exec_mut(&self, val: &mut PipelineMap) -> Result<()> {
for field in self.fields.iter() {
let index = field.input_field();
match val.get(index) {
@@ -122,7 +122,7 @@ impl crate::etl::processor::Processor for DecolorizeProcessor {
}
}
}
Ok(val)
Ok(())
}
}

View File

@@ -201,7 +201,7 @@ impl crate::etl::processor::Processor for DigestProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: PipelineMap) -> Result<PipelineMap> {
fn exec_mut(&self, val: &mut PipelineMap) -> Result<()> {
for field in self.fields.iter() {
let index = field.input_field();
match val.get(index) {
@@ -221,7 +221,7 @@ impl crate::etl::processor::Processor for DigestProcessor {
}
}
}
Ok(val)
Ok(())
}
}

View File

@@ -601,7 +601,7 @@ impl Processor for DissectProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: PipelineMap) -> Result<PipelineMap> {
fn exec_mut(&self, val: &mut PipelineMap) -> Result<()> {
for field in self.fields.iter() {
let index = field.input_field();
match val.get(index) {
@@ -629,7 +629,7 @@ impl Processor for DissectProcessor {
}
}
}
Ok(val)
Ok(())
}
}

View File

@@ -111,10 +111,6 @@ impl EpochProcessor {
Resolution::Nano => Ok(Timestamp::Nanosecond(t)),
}
}
pub(crate) fn target_count(&self) -> usize {
self.fields.len()
}
}
impl TryFrom<&yaml_rust::yaml::Hash> for EpochProcessor {
@@ -167,7 +163,7 @@ impl Processor for EpochProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: PipelineMap) -> Result<PipelineMap> {
fn exec_mut(&self, val: &mut PipelineMap) -> Result<()> {
for field in self.fields.iter() {
let index = field.input_field();
match val.get(index) {
@@ -187,7 +183,7 @@ impl Processor for EpochProcessor {
}
}
}
Ok(val)
Ok(())
}
}

View File

@@ -118,7 +118,7 @@ impl crate::etl::processor::Processor for GsubProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: PipelineMap) -> Result<PipelineMap> {
fn exec_mut(&self, val: &mut PipelineMap) -> Result<()> {
for field in self.fields.iter() {
let index = field.input_field();
match val.get(index) {
@@ -138,7 +138,7 @@ impl crate::etl::processor::Processor for GsubProcessor {
}
}
}
Ok(val)
Ok(())
}
}

View File

@@ -95,7 +95,7 @@ impl Processor for JoinProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: PipelineMap) -> Result<PipelineMap> {
fn exec_mut(&self, val: &mut PipelineMap) -> Result<()> {
for field in self.fields.iter() {
let index = field.input_field();
match val.get(index) {
@@ -123,7 +123,7 @@ impl Processor for JoinProcessor {
}
}
Ok(val)
Ok(())
}
}

View File

@@ -97,7 +97,7 @@ impl Processor for JsonParseProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: PipelineMap) -> Result<PipelineMap> {
fn exec_mut(&self, val: &mut PipelineMap) -> Result<()> {
for field in self.fields.iter() {
let index = field.input_field();
match val.get(index) {
@@ -117,7 +117,7 @@ impl Processor for JsonParseProcessor {
}
}
}
Ok(val)
Ok(())
}
}

View File

@@ -125,7 +125,7 @@ impl Processor for JsonPathProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: PipelineMap) -> Result<PipelineMap> {
fn exec_mut(&self, val: &mut PipelineMap) -> Result<()> {
for field in self.fields.iter() {
let index = field.input_field();
match val.get(index) {
@@ -145,7 +145,7 @@ impl Processor for JsonPathProcessor {
}
}
}
Ok(val)
Ok(())
}
}

View File

@@ -126,7 +126,7 @@ impl Processor for LetterProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: PipelineMap) -> Result<PipelineMap> {
fn exec_mut(&self, val: &mut PipelineMap) -> Result<()> {
for field in self.fields.iter() {
let index = field.input_field();
match val.get(index) {
@@ -154,7 +154,7 @@ impl Processor for LetterProcessor {
}
}
Ok(val)
Ok(())
}
}

View File

@@ -192,7 +192,7 @@ impl Processor for RegexProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: PipelineMap) -> Result<PipelineMap> {
fn exec_mut(&self, val: &mut PipelineMap) -> Result<()> {
for field in self.fields.iter() {
let index = field.input_field();
let prefix = field.target_or_input_field();
@@ -220,7 +220,7 @@ impl Processor for RegexProcessor {
}
}
Ok(val)
Ok(())
}
}
#[cfg(test)]

View File

@@ -96,7 +96,7 @@ impl Processor for SelectProcessor {
true
}
fn exec_mut(&self, mut val: PipelineMap) -> Result<PipelineMap> {
fn exec_mut(&self, val: &mut PipelineMap) -> Result<()> {
match self.select_type {
SelectType::Include => {
let mut include_key_set = HashSet::with_capacity(val.len());
@@ -121,7 +121,7 @@ impl Processor for SelectProcessor {
}
}
Ok(val)
Ok(())
}
}
@@ -142,9 +142,8 @@ mod test {
p.insert("hello".to_string(), Value::String("world".to_string()));
p.insert("hello2".to_string(), Value::String("world2".to_string()));
let result = processor.exec_mut(p);
let result = processor.exec_mut(&mut p);
assert!(result.is_ok());
let p = result.unwrap();
assert_eq!(p.len(), 1);
assert_eq!(p.get("hello"), Some(&Value::String("world".to_string())));
}
@@ -160,9 +159,8 @@ mod test {
p.insert("hello".to_string(), Value::String("world".to_string()));
p.insert("hello2".to_string(), Value::String("world2".to_string()));
let result = processor.exec_mut(p);
let result = processor.exec_mut(&mut p);
assert!(result.is_ok());
let p = result.unwrap();
assert_eq!(p.len(), 1);
assert_eq!(p.get("hello3"), Some(&Value::String("world".to_string())));
}
@@ -178,9 +176,8 @@ mod test {
p.insert("hello".to_string(), Value::String("world".to_string()));
p.insert("hello2".to_string(), Value::String("world2".to_string()));
let result = processor.exec_mut(p);
let result = processor.exec_mut(&mut p);
assert!(result.is_ok());
let p = result.unwrap();
assert_eq!(p.len(), 1);
assert_eq!(p.get("hello"), None);
assert_eq!(p.get("hello2"), Some(&Value::String("world2".to_string())));

View File

@@ -98,7 +98,7 @@ impl Processor for SimpleExtractProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: PipelineMap) -> Result<PipelineMap> {
fn exec_mut(&self, val: &mut PipelineMap) -> Result<()> {
for field in self.fields.iter() {
let index = field.input_field();
match val.get(index) {
@@ -118,7 +118,7 @@ impl Processor for SimpleExtractProcessor {
}
}
}
Ok(val)
Ok(())
}
}

View File

@@ -205,10 +205,6 @@ impl TimestampProcessor {
Resolution::Nano => Ok(Timestamp::Nanosecond(t)),
}
}
pub(crate) fn target_count(&self) -> usize {
self.fields.len()
}
}
fn parse_formats(yaml: &yaml_rust::yaml::Yaml) -> Result<Vec<(Arc<String>, Tz)>> {
@@ -302,7 +298,7 @@ impl Processor for TimestampProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: PipelineMap) -> Result<PipelineMap> {
fn exec_mut(&self, val: &mut PipelineMap) -> Result<()> {
for field in self.fields.iter() {
let index = field.input_field();
match val.get(index) {
@@ -322,7 +318,7 @@ impl Processor for TimestampProcessor {
}
}
}
Ok(val)
Ok(())
}
}

View File

@@ -126,7 +126,7 @@ impl crate::etl::processor::Processor for UrlEncodingProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: PipelineMap) -> Result<PipelineMap> {
fn exec_mut(&self, val: &mut PipelineMap) -> Result<()> {
for field in self.fields.iter() {
let index = field.input_field();
match val.get(index) {
@@ -153,7 +153,7 @@ impl crate::etl::processor::Processor for UrlEncodingProcessor {
}
}
}
Ok(val)
Ok(())
}
}

View File

@@ -1,319 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::BTreeMap;
use chrono_tz::Tz;
use snafu::{OptionExt, ResultExt};
use vrl::compiler::runtime::Runtime;
use vrl::compiler::{compile, Program, TargetValue};
use vrl::diagnostic::Formatter;
use vrl::prelude::{Bytes, NotNan, TimeZone};
use vrl::value::{KeyString, Kind, Secrets, Value as VrlValue};
use crate::error::{
BytesToUtf8Snafu, CompileVrlSnafu, Error, ExecuteVrlSnafu, FloatNaNSnafu,
InvalidTimestampSnafu, KeyMustBeStringSnafu, Result, VrlRegexValueSnafu, VrlReturnValueSnafu,
};
use crate::etl::processor::yaml_string;
use crate::{PipelineMap, Value as PipelineValue};
pub(crate) const PROCESSOR_VRL: &str = "vrl";
const SOURCE: &str = "source";
#[derive(Debug)]
pub struct VrlProcessor {
source: String,
program: Program,
}
impl VrlProcessor {
pub fn new(source: String) -> Result<Self> {
let fns = vrl::stdlib::all();
let compile_result = compile(&source, &fns).map_err(|e| {
CompileVrlSnafu {
msg: Formatter::new(&source, e).to_string(),
}
.build()
})?;
let program = compile_result.program;
// check if the return value is have regex
let result_def = program.final_type_info().result;
let kind = result_def.kind();
if !kind.is_object() {
return VrlReturnValueSnafu.fail();
}
check_regex_output(kind)?;
Ok(Self { source, program })
}
pub fn resolve(&self, m: PipelineMap) -> Result<PipelineValue> {
let pipeline_vrl = m
.into_iter()
.map(|(k, v)| pipeline_value_to_vrl_value(v).map(|v| (KeyString::from(k), v)))
.collect::<Result<BTreeMap<_, _>>>()?;
let mut target = TargetValue {
value: VrlValue::Object(pipeline_vrl),
metadata: VrlValue::Object(BTreeMap::new()),
secrets: Secrets::default(),
};
let timezone = TimeZone::Named(Tz::UTC);
let mut runtime = Runtime::default();
let re = runtime
.resolve(&mut target, &self.program, &timezone)
.map_err(|e| {
ExecuteVrlSnafu {
msg: e.get_expression_error().to_string(),
}
.build()
})?;
vrl_value_to_pipeline_value(re)
}
}
impl TryFrom<&yaml_rust::yaml::Hash> for VrlProcessor {
type Error = Error;
fn try_from(value: &yaml_rust::yaml::Hash) -> Result<Self> {
let mut source = String::new();
for (k, v) in value.iter() {
let key = k
.as_str()
.with_context(|| KeyMustBeStringSnafu { k: k.clone() })?;
if key == SOURCE {
source = yaml_string(v, SOURCE)?;
}
}
let processor = VrlProcessor::new(source)?;
Ok(processor)
}
}
impl crate::etl::processor::Processor for VrlProcessor {
fn kind(&self) -> &str {
PROCESSOR_VRL
}
fn ignore_missing(&self) -> bool {
true
}
fn exec_mut(&self, val: PipelineMap) -> Result<PipelineMap> {
let val = self.resolve(val)?;
if let PipelineValue::Map(m) = val {
Ok(m.values)
} else {
VrlRegexValueSnafu.fail()
}
}
}
fn pipeline_value_to_vrl_value(v: PipelineValue) -> Result<VrlValue> {
match v {
PipelineValue::Null => Ok(VrlValue::Null),
PipelineValue::Int8(x) => Ok(VrlValue::Integer(x as i64)),
PipelineValue::Int16(x) => Ok(VrlValue::Integer(x as i64)),
PipelineValue::Int32(x) => Ok(VrlValue::Integer(x as i64)),
PipelineValue::Int64(x) => Ok(VrlValue::Integer(x)),
PipelineValue::Uint8(x) => Ok(VrlValue::Integer(x as i64)),
PipelineValue::Uint16(x) => Ok(VrlValue::Integer(x as i64)),
PipelineValue::Uint32(x) => Ok(VrlValue::Integer(x as i64)),
PipelineValue::Uint64(x) => Ok(VrlValue::Integer(x as i64)),
PipelineValue::Float32(x) => NotNan::new(x as f64)
.map_err(|_| FloatNaNSnafu { input_float: x }.build())
.map(VrlValue::Float),
PipelineValue::Float64(x) => NotNan::new(x)
.map_err(|_| FloatNaNSnafu { input_float: x }.build())
.map(VrlValue::Float),
PipelineValue::Boolean(x) => Ok(VrlValue::Boolean(x)),
PipelineValue::String(x) => Ok(VrlValue::Bytes(Bytes::copy_from_slice(x.as_bytes()))),
PipelineValue::Timestamp(x) => x
.to_datetime()
.context(InvalidTimestampSnafu {
input: x.to_string(),
})
.map(VrlValue::Timestamp),
PipelineValue::Array(array) => Ok(VrlValue::Array(
array
.into_iter()
.map(pipeline_value_to_vrl_value)
.collect::<Result<Vec<_>>>()?,
)),
PipelineValue::Map(m) => {
let values = m
.values
.into_iter()
.map(|(k, v)| pipeline_value_to_vrl_value(v).map(|v| (KeyString::from(k), v)))
.collect::<Result<BTreeMap<_, _>>>()?;
Ok(VrlValue::Object(values))
}
}
}
fn vrl_value_to_pipeline_value(v: VrlValue) -> Result<PipelineValue> {
match v {
VrlValue::Bytes(bytes) => String::from_utf8(bytes.to_vec())
.context(BytesToUtf8Snafu)
.map(PipelineValue::String),
VrlValue::Regex(_) => VrlRegexValueSnafu.fail(),
VrlValue::Integer(x) => Ok(PipelineValue::Int64(x)),
VrlValue::Float(not_nan) => Ok(PipelineValue::Float64(not_nan.into_inner())),
VrlValue::Boolean(b) => Ok(PipelineValue::Boolean(b)),
VrlValue::Timestamp(date_time) => crate::etl::value::Timestamp::from_datetime(date_time)
.context(InvalidTimestampSnafu {
input: date_time.to_string(),
})
.map(PipelineValue::Timestamp),
VrlValue::Object(bm) => {
let b = bm
.into_iter()
.map(|(k, v)| vrl_value_to_pipeline_value(v).map(|v| (k.to_string(), v)))
.collect::<Result<BTreeMap<String, PipelineValue>>>()?;
Ok(PipelineValue::Map(b.into()))
}
VrlValue::Array(values) => {
let a = values
.into_iter()
.map(vrl_value_to_pipeline_value)
.collect::<Result<Vec<_>>>()?;
Ok(PipelineValue::Array(a.into()))
}
VrlValue::Null => Ok(PipelineValue::Null),
}
}
fn check_regex_output(output_kind: &Kind) -> Result<()> {
if output_kind.is_regex() {
return VrlRegexValueSnafu.fail();
}
if let Some(arr) = output_kind.as_array() {
let k = arr.known();
for v in k.values() {
check_regex_output(v)?
}
}
if let Some(obj) = output_kind.as_object() {
let k = obj.known();
for v in k.values() {
check_regex_output(v)?
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::etl::value::Timestamp;
use crate::Map;
#[test]
fn test_vrl() {
let source = r#"
.name.a = .user_info.name
.name.b = .user_info.name
del(.user_info)
.timestamp = now()
.
"#;
let v = VrlProcessor::new(source.to_string());
assert!(v.is_ok());
let v = v.unwrap();
let mut n = PipelineMap::new();
n.insert(
"name".to_string(),
PipelineValue::String("certain_name".to_string()),
);
let mut m = PipelineMap::new();
m.insert(
"user_info".to_string(),
PipelineValue::Map(Map { values: n }),
);
let re = v.resolve(m);
assert!(re.is_ok());
let re = re.unwrap();
assert!(matches!(re, PipelineValue::Map(_)));
assert!(re.get("name").is_some());
let name = re.get("name").unwrap();
assert!(matches!(name.get("a").unwrap(), PipelineValue::String(x) if x == "certain_name"));
assert!(matches!(name.get("b").unwrap(), PipelineValue::String(x) if x == "certain_name"));
assert!(re.get("timestamp").is_some());
let timestamp = re.get("timestamp").unwrap();
assert!(matches!(
timestamp,
PipelineValue::Timestamp(Timestamp::Nanosecond(_))
));
}
#[test]
fn test_yaml_to_vrl() {
let yaml = r#"
processors:
- vrl:
source: |
.name.a = .user_info.name
.name.b = .user_info.name
del(.user_info)
.timestamp = now()
.
"#;
let y = yaml_rust::YamlLoader::load_from_str(yaml).unwrap();
let vrl_processor_yaml = y
.first()
.and_then(|x| x.as_hash())
.and_then(|x| x.get(&yaml_rust::Yaml::String("processors".to_string())))
.and_then(|x| x.as_vec())
.and_then(|x| x.first())
.and_then(|x| x.as_hash())
.and_then(|x| x.get(&yaml_rust::Yaml::String("vrl".to_string())))
.and_then(|x| x.as_hash())
.unwrap();
let vrl = VrlProcessor::try_from(vrl_processor_yaml);
assert!(vrl.is_ok());
let vrl = vrl.unwrap();
assert_eq!(vrl.source, ".name.a = .user_info.name\n.name.b = .user_info.name\ndel(.user_info)\n.timestamp = now()\n.\n");
}
#[test]
fn test_regex() {
let source = r#"
.re = r'(?i)^Hello, World!$'
del(.re)
.re = r'(?i)^Hello, World!$'
.
"#;
let v = VrlProcessor::new(source.to_string());
assert!(v.is_err());
}
}

View File

@@ -88,10 +88,9 @@ impl TryFrom<&Vec<yaml_rust::Yaml>> for Transforms {
type Error = Error;
fn try_from(docs: &Vec<yaml_rust::Yaml>) -> Result<Self> {
let mut transforms = Vec::with_capacity(32);
let mut all_output_keys: Vec<String> = Vec::with_capacity(32);
let mut all_required_keys = Vec::with_capacity(32);
let mut transforms = Vec::with_capacity(100);
let mut all_output_keys: Vec<String> = Vec::with_capacity(100);
let mut all_required_keys = Vec::with_capacity(100);
for doc in docs {
let transform_builder: Transform = doc
.as_hash()
@@ -124,10 +123,15 @@ impl TryFrom<&Vec<yaml_rust::Yaml>> for Transforms {
#[derive(Debug, Clone)]
pub struct Transform {
pub fields: Fields,
pub type_: Value,
pub default: Option<Value>,
pub index: Option<Index>,
pub tag: bool,
pub on_failure: Option<OnFailure>,
}

View File

@@ -35,13 +35,12 @@ use crate::error::{
TransformColumnNameMustBeUniqueSnafu, TransformMultipleTimestampIndexSnafu,
TransformTimestampIndexCountSnafu, UnsupportedNumberTypeSnafu,
};
use crate::etl::ctx_req::ContextOpt;
use crate::etl::field::{Field, Fields};
use crate::etl::transform::index::Index;
use crate::etl::transform::{Transform, Transforms};
use crate::etl::value::{Timestamp, Value};
use crate::etl::PipelineMap;
use crate::PipelineContext;
use crate::{from_pipeline_map_to_opt, PipelineContext};
const DEFAULT_GREPTIME_TIMESTAMP_COLUMN: &str = "greptime_timestamp";
const DEFAULT_MAX_NESTED_LEVELS_FOR_JSON_FLATTENING: usize = 10;
@@ -186,8 +185,8 @@ impl GreptimeTransformer {
}
}
pub fn transform_mut(&self, pipeline_map: &mut PipelineMap) -> Result<(ContextOpt, Row)> {
let opt = ContextOpt::from_pipeline_map_to_opt(pipeline_map);
pub fn transform_mut(&self, pipeline_map: &mut PipelineMap) -> Result<(String, Row)> {
let opt = from_pipeline_map_to_opt(pipeline_map);
let mut values = vec![GreptimeValue { value_data: None }; self.schema.len()];
let mut output_index = 0;
@@ -520,7 +519,7 @@ fn resolve_value(
fn identity_pipeline_inner(
pipeline_maps: Vec<PipelineMap>,
pipeline_ctx: &PipelineContext<'_>,
) -> Result<(SchemaInfo, HashMap<ContextOpt, Vec<Row>>)> {
) -> Result<(SchemaInfo, HashMap<String, Vec<Row>>)> {
let mut schema_info = SchemaInfo::default();
let custom_ts = pipeline_ctx.pipeline_definition.get_custom_ts();
@@ -545,7 +544,7 @@ fn identity_pipeline_inner(
let len = pipeline_maps.len();
for mut pipeline_map in pipeline_maps {
let opt = ContextOpt::from_pipeline_map_to_opt(&mut pipeline_map);
let opt = from_pipeline_map_to_opt(&mut pipeline_map);
let row = values_to_row(&mut schema_info, pipeline_map, pipeline_ctx)?;
opt_map
@@ -579,7 +578,7 @@ pub fn identity_pipeline(
array: Vec<PipelineMap>,
table: Option<Arc<table::Table>>,
pipeline_ctx: &PipelineContext<'_>,
) -> Result<HashMap<ContextOpt, Rows>> {
) -> Result<HashMap<String, Rows>> {
let input = if pipeline_ctx.pipeline_param.flatten_json_object() {
array
.into_iter()
@@ -610,7 +609,7 @@ pub fn identity_pipeline(
},
)
})
.collect::<HashMap<ContextOpt, Rows>>()
.collect::<HashMap<String, Rows>>()
})
}
@@ -762,7 +761,7 @@ mod tests {
assert!(rows.is_ok());
let mut rows = rows.unwrap();
assert!(rows.len() == 1);
let rows = rows.remove(&ContextOpt::default()).unwrap();
let rows = rows.remove("").unwrap();
assert_eq!(rows.schema.len(), 8);
assert_eq!(rows.rows.len(), 2);
assert_eq!(8, rows.rows[0].values.len());
@@ -800,7 +799,7 @@ mod tests {
}
assert!(rows.len() == 1);
let rows = rows.remove(&ContextOpt::default()).unwrap();
let rows = rows.remove("").unwrap();
Rows {
schema: schema.schema,

View File

@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use chrono::{DateTime, Utc};
use common_time::timestamp::TimeUnit;
#[derive(Debug, Clone, PartialEq)]
@@ -105,19 +104,6 @@ impl Timestamp {
Timestamp::Second(_) => TimeUnit::Second,
}
}
pub fn to_datetime(&self) -> Option<DateTime<Utc>> {
match self {
Timestamp::Nanosecond(v) => Some(DateTime::from_timestamp_nanos(*v)),
Timestamp::Microsecond(v) => DateTime::from_timestamp_micros(*v),
Timestamp::Millisecond(v) => DateTime::from_timestamp_millis(*v),
Timestamp::Second(v) => DateTime::from_timestamp(*v, 0),
}
}
pub fn from_datetime(dt: DateTime<Utc>) -> Option<Self> {
dt.timestamp_nanos_opt().map(Timestamp::Nanosecond)
}
}
impl Default for Timestamp {

View File

@@ -19,7 +19,7 @@ mod manager;
mod metrics;
mod tablesuffix;
pub use etl::ctx_req::{ContextOpt, ContextReq};
pub use etl::ctx_req::{from_pipeline_map_to_opt, ContextReq};
pub use etl::processor::Processor;
pub use etl::transform::transformer::greptime::{GreptimePipelineParams, SchemaInfo};
pub use etl::transform::transformer::identity_pipeline;

View File

@@ -29,9 +29,9 @@ pub fn parse_and_exec(input_str: &str, pipeline_yaml: &str) -> Rows {
match input_value {
serde_json::Value::Array(array) => {
for value in array {
let intermediate_status = json_to_map(value).unwrap();
let mut intermediate_status = json_to_map(value).unwrap();
let row = pipeline
.exec_mut(intermediate_status)
.exec_mut(&mut intermediate_status)
.expect("failed to exec pipeline")
.into_transformed()
.expect("expect transformed result ");
@@ -39,9 +39,9 @@ pub fn parse_and_exec(input_str: &str, pipeline_yaml: &str) -> Rows {
}
}
serde_json::Value::Object(_) => {
let intermediate_status = json_to_map(input_value).unwrap();
let mut intermediate_status = json_to_map(input_value).unwrap();
let row = pipeline
.exec_mut(intermediate_status)
.exec_mut(&mut intermediate_status)
.expect("failed to exec pipeline")
.into_transformed()
.expect("expect transformed result ");

View File

@@ -274,9 +274,9 @@ transform:
let yaml_content = pipeline::Content::Yaml(pipeline_yaml);
let pipeline: pipeline::Pipeline =
pipeline::parse(&yaml_content).expect("failed to parse pipeline");
let result = json_to_map(input_value).unwrap();
let mut result = json_to_map(input_value).unwrap();
let row = pipeline.exec_mut(result);
let row = pipeline.exec_mut(&mut result);
assert!(row.is_err());
assert_eq!(row.err().unwrap().to_string(), "No matching pattern found");

View File

@@ -419,10 +419,10 @@ transform:
let yaml_content = Content::Yaml(pipeline_yaml);
let pipeline: Pipeline = parse(&yaml_content).expect("failed to parse pipeline");
let stats = json_to_map(input_value).unwrap();
let mut stats = json_to_map(input_value).unwrap();
let row = pipeline
.exec_mut(stats)
.exec_mut(&mut stats)
.expect("failed to exec pipeline")
.into_transformed()
.expect("expect transformed result ");
@@ -488,9 +488,9 @@ transform:
let yaml_content = Content::Yaml(pipeline_yaml);
let pipeline: Pipeline = parse(&yaml_content).unwrap();
let status = json_to_map(input_value).unwrap();
let mut status = json_to_map(input_value).unwrap();
let row = pipeline
.exec_mut(status)
.exec_mut(&mut status)
.unwrap()
.into_transformed()
.expect("expect transformed result ");
@@ -597,9 +597,9 @@ transform:
let yaml_content = Content::Yaml(pipeline_yaml);
let pipeline: Pipeline = parse(&yaml_content).unwrap();
let status = json_to_map(input_value).unwrap();
let mut status = json_to_map(input_value).unwrap();
let row = pipeline
.exec_mut(status)
.exec_mut(&mut status)
.unwrap()
.into_transformed()
.expect("expect transformed result ");
@@ -663,9 +663,9 @@ transform:
let yaml_content = Content::Yaml(pipeline_yaml);
let pipeline: Pipeline = parse(&yaml_content).unwrap();
let status = json_to_map(input_value).unwrap();
let mut status = json_to_map(input_value).unwrap();
let row = pipeline
.exec_mut(status)
.exec_mut(&mut status)
.unwrap()
.into_transformed()
.expect("expect transformed result ");
@@ -703,9 +703,10 @@ transform:
let yaml_content = Content::Yaml(pipeline_yaml);
let pipeline: Pipeline = parse(&yaml_content).unwrap();
let status = json_to_map(input_value).unwrap();
let mut status = json_to_map(input_value).unwrap();
let row = pipeline
.exec_mut(status)
.exec_mut(&mut status)
.unwrap()
.into_transformed()
.expect("expect transformed result ");
@@ -762,9 +763,9 @@ transform:
let yaml_content = Content::Yaml(pipeline_yaml);
let pipeline: Pipeline = parse(&yaml_content).unwrap();
let status = json_to_map(input_value).unwrap();
let mut status = json_to_map(input_value).unwrap();
let row = pipeline
.exec_mut(status)
.exec_mut(&mut status)
.unwrap()
.into_transformed()
.expect("expect transformed result ");
@@ -803,9 +804,9 @@ transform:
let yaml_content = Content::Yaml(pipeline_yaml);
let pipeline: Pipeline = parse(&yaml_content).unwrap();
let status = json_to_map(input_value).unwrap();
let mut status = json_to_map(input_value).unwrap();
let row = pipeline
.exec_mut(status)
.exec_mut(&mut status)
.unwrap()
.into_transformed()
.expect("expect transformed result ");
@@ -865,18 +866,18 @@ transform:
let yaml_content = Content::Yaml(pipeline_yaml);
let pipeline: Pipeline = parse(&yaml_content).unwrap();
let status = json_to_map(input_value1).unwrap();
let mut status = json_to_map(input_value1).unwrap();
let dispatched_to = pipeline
.exec_mut(status)
.exec_mut(&mut status)
.unwrap()
.into_dispatched()
.expect("expect dispatched result ");
assert_eq!(dispatched_to.table_suffix, "http");
assert_eq!(dispatched_to.pipeline.unwrap(), "access_log_pipeline");
let status = json_to_map(input_value2).unwrap();
let mut status = json_to_map(input_value2).unwrap();
let row = pipeline
.exec_mut(status)
.exec_mut(&mut status)
.unwrap()
.into_transformed()
.expect("expect transformed result ");
@@ -929,8 +930,8 @@ table_suffix: _${logger}
let yaml_content = Content::Yaml(pipeline_yaml);
let pipeline: Pipeline = parse(&yaml_content).unwrap();
let status = json_to_map(input_value).unwrap();
let exec_re = pipeline.exec_mut(status).unwrap();
let mut status = json_to_map(input_value).unwrap();
let exec_re = pipeline.exec_mut(&mut status).unwrap();
let (row, table_name) = exec_re.into_transformed().unwrap();
let values = row.values;

View File

@@ -120,9 +120,9 @@ async fn run_custom_pipeline(
let mut auto_map = HashMap::new();
let mut auto_map_ts_keys = HashMap::new();
for pipeline_map in pipeline_maps {
for mut pipeline_map in pipeline_maps {
let r = pipeline
.exec_mut(pipeline_map)
.exec_mut(&mut pipeline_map)
.inspect_err(|_| {
METRIC_HTTP_LOGS_TRANSFORM_ELAPSED
.with_label_values(&[db.as_str(), METRIC_FAILURE_VALUE])
@@ -135,7 +135,6 @@ async fn run_custom_pipeline(
opt,
row,
table_suffix,
pipeline_map: _val,
}) => {
let act_table_name = table_suffix_to_table_name(&table_name, table_suffix);
push_to_map!(transformed_map, (opt, act_table_name), row, arr_len);
@@ -143,7 +142,6 @@ async fn run_custom_pipeline(
PipelineExecOutput::AutoTransform(AutoTransformOutput {
table_suffix,
ts_unit_map,
pipeline_map,
}) => {
let act_table_name = table_suffix_to_table_name(&table_name, table_suffix);
push_to_map!(auto_map, act_table_name.clone(), pipeline_map, arr_len);
@@ -152,8 +150,8 @@ async fn run_custom_pipeline(
.or_insert_with(HashMap::new)
.extend(ts_unit_map);
}
PipelineExecOutput::DispatchedTo(dispatched_to, val) => {
push_to_map!(dispatched, dispatched_to, val, arr_len);
PipelineExecOutput::DispatchedTo(dispatched_to) => {
push_to_map!(dispatched, dispatched_to, pipeline_map, arr_len);
}
}
}

View File

@@ -19,7 +19,6 @@ use std::fmt;
use std::str::FromStr;
use common_base::readable_size::ReadableSize;
use common_datasource::object_store::oss::is_supported_in_oss;
use common_datasource::object_store::s3::is_supported_in_s3;
use common_query::AddColumnLocation;
use common_time::range::TimestampRange;
@@ -71,10 +70,6 @@ pub fn validate_table_option(key: &str) -> bool {
return true;
}
if is_supported_in_oss(key) {
return true;
}
if is_mito_engine_option_key(key) {
return true;
}

View File

@@ -857,7 +857,7 @@ CREATE TABLE {table_name} (
.expect("physical table route"),
)
.iter()
.map(|(k, v)| (v.leader_regions[0], *k))
.map(|(k, v)| (v[0], *k))
.collect::<HashMap<u32, u64>>();
assert!(region_to_dn_map.len() <= instance.datanodes().len());

View File

@@ -224,7 +224,7 @@ mod tests {
.expect("region routes should be physical"),
)
.iter()
.map(|(k, v)| (v.leader_regions[0], *k))
.map(|(k, v)| (v[0], *k))
.collect::<HashMap<u32, u64>>();
assert!(region_to_dn_map.len() <= instance.datanodes().len());

View File

@@ -1458,12 +1458,9 @@ async fn test_insert_with_default_value_for_type(instance: Arc<Instance>, type_n
.data;
assert!(matches!(output, OutputData::AffectedRows(1)));
let output = execute_sql(
&instance,
&format!("select host, cpu from {table_name} order by host"),
)
.await
.data;
let output = execute_sql(&instance, &format!("select host, cpu from {table_name}"))
.await
.data;
let expected = "\
+-------+-----+
| host | cpu |
@@ -1760,12 +1757,7 @@ async fn test_execute_copy_from_orc_with_cast(instance: Arc<dyn MockInstance>) {
assert!(matches!(output, OutputData::AffectedRows(5)));
let output = execute_sql(
&instance,
"select * from demo order by timestamp_simple asc;",
)
.await
.data;
let output = execute_sql(&instance, "select * from demo;").await.data;
let expected = r#"+-------------------------------+----------------------------+-------------------------+----------------------------+
| bigint_direct | bigint_neg_direct | bigint_other | timestamp_simple |
+-------------------------------+----------------------------+-------------------------+----------------------------+

View File

@@ -520,7 +520,7 @@ async fn insert_and_assert(db: &Database) {
// select
let output = db
.sql("SELECT host, cpu, memory, ts FROM demo order by host")
.sql("SELECT host, cpu, memory, ts FROM demo")
.await
.unwrap();

View File

@@ -105,8 +105,6 @@ macro_rules! http_tests {
test_pipeline_dispatcher,
test_pipeline_suffix_template,
test_pipeline_context,
test_pipeline_with_vrl,
test_pipeline_with_hint_vrl,
test_otlp_metrics,
test_otlp_traces_v0,
@@ -2068,8 +2066,7 @@ table_suffix: _${type}
"type": "http",
"time": "2024-05-25 20:16:37.217",
"log": "ClusterAdapter:enter sendTextDataToCluster\\n",
"greptime_ttl": "1d",
"greptime_skip_wal": "true"
"greptime_ttl": "1d"
},
{
"id1": "2436",
@@ -2119,13 +2116,12 @@ table_suffix: _${type}
// CREATE TABLE IF NOT EXISTS "d_table_http" (
// ... ignore
// )
// ENGINE=mito
// ENGINE=mito
// WITH(
// append_mode = 'true',
// skip_wal = 'true',
// ttl = '1day'
// )
let expected = "[[\"d_table_http\",\"CREATE TABLE IF NOT EXISTS \\\"d_table_http\\\" (\\n \\\"id1_root\\\" INT NULL,\\n \\\"id2_root\\\" INT NULL,\\n \\\"type\\\" STRING NULL,\\n \\\"log\\\" STRING NULL,\\n \\\"logger\\\" STRING NULL,\\n \\\"time\\\" TIMESTAMP(9) NOT NULL,\\n TIME INDEX (\\\"time\\\")\\n)\\n\\nENGINE=mito\\nWITH(\\n append_mode = 'true',\\n skip_wal = 'true',\\n ttl = '1day'\\n)\"]]";
let expected = "[[\"d_table_http\",\"CREATE TABLE IF NOT EXISTS \\\"d_table_http\\\" (\\n \\\"id1_root\\\" INT NULL,\\n \\\"id2_root\\\" INT NULL,\\n \\\"type\\\" STRING NULL,\\n \\\"log\\\" STRING NULL,\\n \\\"logger\\\" STRING NULL,\\n \\\"time\\\" TIMESTAMP(9) NOT NULL,\\n TIME INDEX (\\\"time\\\")\\n)\\n\\nENGINE=mito\\nWITH(\\n append_mode = 'true',\\n ttl = '1day'\\n)\"]]";
validate_data(
"test_pipeline_context_http",
&client,
@@ -2137,141 +2133,6 @@ table_suffix: _${type}
guard.remove_all().await;
}
pub async fn test_pipeline_with_vrl(storage_type: StorageType) {
common_telemetry::init_default_ut_logging();
let (app, mut guard) =
setup_test_http_app_with_frontend(storage_type, "test_pipeline_with_vrl").await;
// handshake
let client = TestClient::new(app).await;
let pipeline = r#"
processors:
- date:
field: time
formats:
- "%Y-%m-%d %H:%M:%S%.3f"
ignore_missing: true
- vrl:
source: |
.log_id = .id
del(.id)
.
transform:
- fields:
- log_id
type: int32
- field: time
type: time
index: timestamp
"#;
// 1. create pipeline
let res = client
.post("/v1/events/pipelines/root")
.header("Content-Type", "application/x-yaml")
.body(pipeline)
.send()
.await;
assert_eq!(res.status(), StatusCode::OK);
// 2. write data
let data_body = r#"
[
{
"id": "2436",
"time": "2024-05-25 20:16:37.217"
}
]
"#;
let res = client
.post("/v1/events/logs?db=public&table=d_table&pipeline_name=root")
.header("Content-Type", "application/json")
.body(data_body)
.send()
.await;
assert_eq!(res.status(), StatusCode::OK);
validate_data(
"test_pipeline_with_vrl",
&client,
"select * from d_table",
"[[2436,1716668197217000000]]",
)
.await;
guard.remove_all().await;
}
pub async fn test_pipeline_with_hint_vrl(storage_type: StorageType) {
common_telemetry::init_default_ut_logging();
let (app, mut guard) =
setup_test_http_app_with_frontend(storage_type, "test_pipeline_with_hint_vrl").await;
// handshake
let client = TestClient::new(app).await;
let pipeline = r#"
processors:
- date:
field: time
formats:
- "%Y-%m-%d %H:%M:%S%.3f"
ignore_missing: true
- vrl:
source: |
.greptime_table_suffix, err = "_" + .id
.
transform:
- fields:
- id
type: int32
- field: time
type: time
index: timestamp
"#;
// 1. create pipeline
let res = client
.post("/v1/events/pipelines/root")
.header("Content-Type", "application/x-yaml")
.body(pipeline)
.send()
.await;
assert_eq!(res.status(), StatusCode::OK);
// 2. write data
let data_body = r#"
[
{
"id": "2436",
"time": "2024-05-25 20:16:37.217"
}
]
"#;
let res = client
.post("/v1/events/logs?db=public&table=d_table&pipeline_name=root")
.header("Content-Type", "application/json")
.body(data_body)
.send()
.await;
assert_eq!(res.status(), StatusCode::OK);
validate_data(
"test_pipeline_with_hint_vrl",
&client,
"show tables",
"[[\"d_table_2436\"],[\"demo\"],[\"numbers\"]]",
)
.await;
guard.remove_all().await;
}
pub async fn test_identity_pipeline_with_flatten(store_type: StorageType) {
common_telemetry::init_default_ut_logging();
let (app, mut guard) =

View File

@@ -16,7 +16,7 @@ use std::sync::Arc;
use std::time::Duration;
use client::{OutputData, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_meta::key::{RegionDistribution, RegionRoleSet, TableMetadataManagerRef};
use common_meta::key::{RegionDistribution, TableMetadataManagerRef};
use common_meta::peer::Peer;
use common_query::Output;
use common_recordbatch::RecordBatches;
@@ -166,7 +166,7 @@ pub async fn test_region_migration(store_type: StorageType, endpoints: Vec<Strin
to_regions
);
let region_id = RegionId::new(table_id, from_regions.leader_regions[0]);
let region_id = RegionId::new(table_id, from_regions[0]);
// Trigger region migration.
let procedure = region_migration_manager
.submit_procedure(RegionMigrationProcedureTask::new(
@@ -180,12 +180,7 @@ pub async fn test_region_migration(store_type: StorageType, endpoints: Vec<Strin
info!("Started region procedure: {}!", procedure.unwrap());
// Prepares expected region distribution.
to_regions
.leader_regions
.extend(from_regions.leader_regions);
to_regions
.follower_regions
.extend(from_regions.follower_regions);
to_regions.extend(from_regions);
// Keeps asc order.
to_regions.sort();
distribution.insert(to_peer_id, to_regions);
@@ -305,10 +300,10 @@ pub async fn test_metric_table_region_migration_by_sql(
let (from_peer_id, from_regions) = distribution.pop_first().unwrap();
info!(
"Selecting from peer: {from_peer_id}, and regions: {:?}",
from_regions.leader_regions[0]
from_regions[0]
);
let to_peer_id = (from_peer_id + 1) % 3;
let region_id = RegionId::new(table_id, from_regions.leader_regions[0]);
let region_id = RegionId::new(table_id, from_regions[0]);
// Trigger region migration.
let procedure_id =
trigger_migration_by_sql(&cluster, region_id.as_u64(), from_peer_id, to_peer_id).await;
@@ -441,7 +436,7 @@ pub async fn test_region_migration_by_sql(store_type: StorageType, endpoints: Ve
to_regions
);
let region_id = RegionId::new(table_id, from_regions.leader_regions[0]);
let region_id = RegionId::new(table_id, from_regions[0]);
// Trigger region migration.
let procedure_id =
trigger_migration_by_sql(&cluster, region_id.as_u64(), from_peer_id, to_peer_id).await;
@@ -563,12 +558,12 @@ pub async fn test_region_migration_multiple_regions(
let (peer_2, peer_2_regions) = distribution.pop_first().unwrap();
// Picks the peer only contains as from peer.
let ((from_peer_id, from_regions), (to_peer_id, mut to_regions)) =
if peer_1_regions.leader_regions.len() == 1 {
((peer_1, peer_1_regions), (peer_2, peer_2_regions))
} else {
((peer_2, peer_2_regions), (peer_1, peer_1_regions))
};
let ((from_peer_id, from_regions), (to_peer_id, mut to_regions)) = if peer_1_regions.len() == 1
{
((peer_1, peer_1_regions), (peer_2, peer_2_regions))
} else {
((peer_2, peer_2_regions), (peer_1, peer_1_regions))
};
info!(
"Selecting from peer: {from_peer_id}, and regions: {:?}",
@@ -579,7 +574,7 @@ pub async fn test_region_migration_multiple_regions(
to_regions
);
let region_id = RegionId::new(table_id, from_regions.leader_regions[0]);
let region_id = RegionId::new(table_id, from_regions[0]);
// Trigger region migration.
let procedure = region_migration_manager
.submit_procedure(RegionMigrationProcedureTask::new(
@@ -593,12 +588,7 @@ pub async fn test_region_migration_multiple_regions(
info!("Started region procedure: {}!", procedure.unwrap());
// Prepares expected region distribution.
to_regions
.leader_regions
.extend(from_regions.leader_regions);
to_regions
.follower_regions
.extend(from_regions.follower_regions);
to_regions.extend(from_regions);
// Keeps asc order.
to_regions.sort();
distribution.insert(to_peer_id, to_regions);
@@ -709,7 +699,7 @@ pub async fn test_region_migration_all_regions(store_type: StorageType, endpoint
let region_migration_manager = cluster.metasrv.region_migration_manager();
let (from_peer_id, mut from_regions) = distribution.pop_first().unwrap();
let to_peer_id = 1;
let mut to_regions = RegionRoleSet::default();
let mut to_regions = Vec::new();
info!(
"Selecting from peer: {from_peer_id}, and regions: {:?}",
from_regions
@@ -719,7 +709,7 @@ pub async fn test_region_migration_all_regions(store_type: StorageType, endpoint
to_regions
);
let region_id = RegionId::new(table_id, from_regions.leader_regions[0]);
let region_id = RegionId::new(table_id, from_regions[0]);
// Trigger region migration.
let procedure = region_migration_manager
.submit_procedure(RegionMigrationProcedureTask::new(
@@ -733,9 +723,7 @@ pub async fn test_region_migration_all_regions(store_type: StorageType, endpoint
info!("Started region procedure: {}!", procedure.unwrap());
// Prepares expected region distribution.
to_regions
.leader_regions
.push(from_regions.leader_regions.remove(0));
to_regions.push(from_regions.remove(0));
// Keeps asc order.
to_regions.sort();
distribution.insert(to_peer_id, to_regions);
@@ -1132,7 +1120,7 @@ async fn find_region_distribution_by_sql(
distribution
.entry(datanode_id)
.or_default()
.add_leader_region(region_id.region_number());
.push(region_id.region_number());
}
}

View File

@@ -145,7 +145,7 @@ INSERT INTO t1 (ts, val, host) VALUES
Affected Rows: 6
SELECT * FROM t1 ORDER BY ts ASC;
SELECT * FROM t1;
+-------------+---------------------+------+
| host | ts | val |
@@ -159,7 +159,7 @@ ALTER TABLE t1 ADD COLUMN k STRING PRIMARY KEY;
Affected Rows: 0
SELECT * FROM t1 ORDER BY ts ASC;
SELECT * FROM t1;
+-------------+---+---------------------+------+
| host | k | ts | val |

View File

@@ -52,11 +52,11 @@ INSERT INTO t1 (ts, val, host) VALUES
('2022-01-02 00:00:00', 4.56, 'example.com'),
('2022-01-03 00:00:00', 7.89, 'example.com');
SELECT * FROM t1 ORDER BY ts ASC;
SELECT * FROM t1;
ALTER TABLE t1 ADD COLUMN k STRING PRIMARY KEY;
SELECT * FROM t1 ORDER BY ts ASC;
SELECT * FROM t1;
DROP TABLE t1;

View File

@@ -76,7 +76,7 @@ insert into foo (host) values ('host3');
Affected Rows: 1
select * from foo order by ts;
select * from foo;
+-------+---------------------+-----+
| host | ts | cpu |
@@ -141,7 +141,7 @@ SELECT * FROM system_metrics;
| host2 | idc_a | 80.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 |
+-------+-------+----------+-------------+-----------+-------------------------+
select * from foo order by host asc;
select * from foo;
+-------+---------------------+-----+
| host | ts | cpu |
@@ -151,12 +151,12 @@ select * from foo order by host asc;
| host3 | 2023-04-29T00:00:00 | 0.0 |
+-------+---------------------+-----+
SELECT * from t1 order by ts desc;
SELECT * from t1;
++
++
SELECT * from t2 order by ts desc;
SELECT * from t2;
+------+-------------------------+-----+
| job | ts | val |

View File

@@ -37,7 +37,7 @@ insert into foo (host, cpu) values ('host2', 2.2);
insert into foo (host) values ('host3');
select * from foo order by ts;
select * from foo;
CREATE TABLE phy (ts timestamp time index, val double) engine=metric with ("physical_metric_table" = "");
@@ -63,11 +63,11 @@ INSERT INTO t2 VALUES ('job1', 0, 0), ('job2', 1, 1);
SELECT * FROM system_metrics;
select * from foo order by host asc;
select * from foo;
SELECT * from t1 order by ts desc;
SELECT * from t1;
SELECT * from t2 order by ts desc;
SELECT * from t2;
DROP TABLE t1;

View File

@@ -219,7 +219,7 @@ INSERT INTO t1 (ts, val, host) VALUES
Affected Rows: 6
SELECT * FROM t1 ORDER BY ts ASC;
SELECT * FROM t1;
+-------------+---------------------+------+
| host | ts | val |

View File

@@ -84,7 +84,7 @@ INSERT INTO t1 (ts, val, host) VALUES
('2022-01-02 00:00:00', 4.56, 'example.com'),
('2022-01-03 00:00:00', 7.89, 'example.com');
SELECT * FROM t1 ORDER BY ts ASC;
SELECT * FROM t1;
DROP TABLE t1;

View File

@@ -500,8 +500,7 @@ SELECT
rate,
time_window
FROM
approx_rate
ORDER BY time_window ASC;
approx_rate;
+------+---------------------+
| rate | time_window |
@@ -530,8 +529,7 @@ SELECT
rate,
time_window
FROM
approx_rate
ORDER BY time_window ASC;
approx_rate;
+------+---------------------+
| rate | time_window |
@@ -561,8 +559,7 @@ SELECT
rate,
time_window
FROM
approx_rate
ORDER BY time_window ASC;
approx_rate;
+-------------------+---------------------+
| rate | time_window |
@@ -593,8 +590,7 @@ SELECT
rate,
time_window
FROM
approx_rate
ORDER BY time_window ASC;
approx_rate;
+--------------------+---------------------+
| rate | time_window |
@@ -1286,18 +1282,17 @@ ADMIN FLUSH_FLOW('requests_long_term');
SELECT
*
FROM
requests_without_ip
ORDER BY ts ASC;
requests_without_ip;
+--------------+-----+---------------------+
| service_name | val | ts |
+--------------+-----+---------------------+
| | 100 | 2024-10-18T19:00:00 |
| svc1 | 100 | 2024-10-18T19:00:00 |
| | 200 | 2024-10-18T19:00:30 |
| svc1 | 200 | 2024-10-18T19:00:30 |
| | 300 | 2024-10-18T19:01:00 |
| | 100 | 2024-10-18T19:01:01 |
| svc1 | 100 | 2024-10-18T19:00:00 |
| svc1 | 200 | 2024-10-18T19:00:30 |
| svc1 | 400 | 2024-10-18T19:01:30 |
| svc1 | 200 | 2024-10-18T19:01:31 |
+--------------+-----+---------------------+
@@ -1340,26 +1335,25 @@ ADMIN FLUSH_FLOW('requests_long_term');
SELECT
*
FROM
requests_without_ip
ORDER BY ts ASC;
requests_without_ip;
+--------------+-----+---------------------+
| service_name | val | ts |
+--------------+-----+---------------------+
| | 100 | 2024-10-18T19:00:00 |
| svc1 | 100 | 2024-10-18T19:00:00 |
| | 200 | 2024-10-18T19:00:30 |
| svc1 | 200 | 2024-10-18T19:00:30 |
| | 300 | 2024-10-18T19:01:00 |
| | 100 | 2024-10-18T19:01:01 |
| svc1 | 400 | 2024-10-18T19:01:30 |
| svc1 | 200 | 2024-10-18T19:01:31 |
| | 100 | 2024-10-19T19:00:00 |
| | 200 | 2024-10-19T19:00:30 |
| | 300 | 2024-10-19T19:01:00 |
| | 100 | 2024-10-19T19:01:01 |
| | 400 | 2024-10-19T19:01:30 |
| | 200 | 2024-10-19T19:01:31 |
| svc1 | 100 | 2024-10-18T19:00:00 |
| svc1 | 200 | 2024-10-18T19:00:30 |
| svc1 | 400 | 2024-10-18T19:01:30 |
| svc1 | 200 | 2024-10-18T19:01:31 |
+--------------+-----+---------------------+
INSERT INTO
@@ -1388,32 +1382,31 @@ ADMIN FLUSH_FLOW('requests_long_term');
SELECT
*
FROM
requests_without_ip
ORDER BY ts ASC;;
requests_without_ip;
+--------------+-----+---------------------+
| service_name | val | ts |
+--------------+-----+---------------------+
| | 100 | 2024-10-18T19:00:00 |
| svc1 | 100 | 2024-10-18T19:00:00 |
| svc2 | 100 | 2024-10-18T19:00:00 |
| | 200 | 2024-10-18T19:00:30 |
| svc1 | 200 | 2024-10-18T19:00:30 |
| svc2 | 200 | 2024-10-18T19:00:30 |
| | 300 | 2024-10-18T19:01:00 |
| svc2 | 300 | 2024-10-18T19:01:00 |
| | 100 | 2024-10-18T19:01:01 |
| svc2 | 100 | 2024-10-18T19:01:01 |
| svc1 | 400 | 2024-10-18T19:01:30 |
| svc2 | 400 | 2024-10-18T19:01:30 |
| svc1 | 200 | 2024-10-18T19:01:31 |
| svc2 | 200 | 2024-10-18T19:01:31 |
| | 100 | 2024-10-19T19:00:00 |
| | 200 | 2024-10-19T19:00:30 |
| | 300 | 2024-10-19T19:01:00 |
| | 100 | 2024-10-19T19:01:01 |
| | 400 | 2024-10-19T19:01:30 |
| | 200 | 2024-10-19T19:01:31 |
| svc1 | 100 | 2024-10-18T19:00:00 |
| svc1 | 200 | 2024-10-18T19:00:30 |
| svc1 | 400 | 2024-10-18T19:01:30 |
| svc1 | 200 | 2024-10-18T19:01:31 |
| svc2 | 100 | 2024-10-18T19:00:00 |
| svc2 | 200 | 2024-10-18T19:00:30 |
| svc2 | 300 | 2024-10-18T19:01:00 |
| svc2 | 100 | 2024-10-18T19:01:01 |
| svc2 | 400 | 2024-10-18T19:01:30 |
| svc2 | 200 | 2024-10-18T19:01:31 |
+--------------+-----+---------------------+
DROP FLOW requests_long_term;

View File

@@ -219,8 +219,7 @@ SELECT
rate,
time_window
FROM
approx_rate
ORDER BY time_window ASC;
approx_rate;
INSERT INTO
bytes_log
@@ -235,8 +234,7 @@ SELECT
rate,
time_window
FROM
approx_rate
ORDER BY time_window ASC;
approx_rate;
INSERT INTO
bytes_log
@@ -251,8 +249,7 @@ SELECT
rate,
time_window
FROM
approx_rate
ORDER BY time_window ASC;
approx_rate;
INSERT INTO
bytes_log
@@ -267,8 +264,7 @@ SELECT
rate,
time_window
FROM
approx_rate
ORDER BY time_window ASC;
approx_rate;
DROP TABLE bytes_log;
@@ -614,8 +610,7 @@ ADMIN FLUSH_FLOW('requests_long_term');
SELECT
*
FROM
requests_without_ip
ORDER BY ts ASC;
requests_without_ip;
-- Test if FLOWS table works, but don't care about the result since it vary from runs
SELECT
@@ -641,8 +636,7 @@ ADMIN FLUSH_FLOW('requests_long_term');
SELECT
*
FROM
requests_without_ip
ORDER BY ts ASC;
requests_without_ip;
INSERT INTO
requests
@@ -662,8 +656,7 @@ ADMIN FLUSH_FLOW('requests_long_term');
SELECT
*
FROM
requests_without_ip
ORDER BY ts ASC;;
requests_without_ip;
DROP FLOW requests_long_term;

View File

@@ -67,8 +67,7 @@ SELECT
rate,
time_window
FROM
approx_rate
ORDER BY time_window ASC;
approx_rate;
+------+---------------------+
| rate | time_window |
@@ -98,8 +97,7 @@ SELECT
rate,
time_window
FROM
approx_rate
ORDER BY time_window ASC;
approx_rate;
+------+---------------------+
| rate | time_window |
@@ -130,8 +128,7 @@ SELECT
rate,
time_window
FROM
approx_rate
ORDER BY time_window ASC;
approx_rate;
+-------------------+---------------------+
| rate | time_window |
@@ -163,8 +160,7 @@ SELECT
rate,
time_window
FROM
approx_rate
ORDER BY time_window ASC;
approx_rate;
+--------------------+---------------------+
| rate | time_window |

View File

@@ -39,8 +39,7 @@ SELECT
rate,
time_window
FROM
approx_rate
ORDER BY time_window ASC;
approx_rate;
-- reordered insert, also test if null is handled correctly
INSERT INTO
@@ -56,8 +55,7 @@ SELECT
rate,
time_window
FROM
approx_rate
ORDER BY time_window ASC;
approx_rate;
-- reordered insert
INSERT INTO
@@ -73,8 +71,7 @@ SELECT
rate,
time_window
FROM
approx_rate
ORDER BY time_window ASC;
approx_rate;
-- reordered insert
INSERT INTO
@@ -90,8 +87,7 @@ SELECT
rate,
time_window
FROM
approx_rate
ORDER BY time_window ASC;
approx_rate;
DROP TABLE bytes_log;

View File

@@ -7,7 +7,7 @@ INSERT INTO test1 VALUES (1, DEFAULT), (2, DEFAULT), (3, '2024-01-31 00:01:01'),
Affected Rows: 4
SELECT * FROM test1 ORDER BY j;
SELECT * FROM test1;
+---+---------------------+
| i | j |
@@ -30,7 +30,7 @@ INSERT INTO test2 VALUES (1, DEFAULT), (2, DEFAULT), (3, '2024-01-31 00:01:01'),
Affected Rows: 4
SELECT * FROM test2 ORDER BY j;
SELECT * FROM test2;
+---+---------------------+
| i | j |
@@ -41,7 +41,7 @@ SELECT * FROM test2 ORDER BY j;
| 4 | 2025-01-31T16:01:01 |
+---+---------------------+
SELECT * FROM test1 ORDER BY j;
SELECT * FROM test1;
+---+---------------------+
| i | j |

View File

@@ -4,7 +4,7 @@ CREATE TABLE test1 (i INTEGER, j TIMESTAMP default '2024-01-30 00:01:01' TIME IN
INSERT INTO test1 VALUES (1, DEFAULT), (2, DEFAULT), (3, '2024-01-31 00:01:01'), (4, '2025-02-01 00:01:01');
SELECT * FROM test1 ORDER BY j;
SELECT * FROM test1;
SET time_zone = 'Asia/Shanghai';
@@ -12,9 +12,9 @@ CREATE TABLE test2 (i INTEGER, j TIMESTAMP default '2024-01-30 00:01:01' TIME IN
INSERT INTO test2 VALUES (1, DEFAULT), (2, DEFAULT), (3, '2024-01-31 00:01:01'), (4, '2025-02-01 00:01:01');
SELECT * FROM test2 ORDER BY j;
SELECT * FROM test2;
SELECT * FROM test1 ORDER BY j;
SELECT * FROM test1;
SET time_zone = 'UTC';

View File

@@ -36,7 +36,7 @@ INSERT INTO test values
Affected Rows: 5
SELECT * from test ORDER BY ts ASC;
SELECT * from test;
+-----+---------------------+
| d | ts |
@@ -48,7 +48,7 @@ SELECT * from test ORDER BY ts ASC;
| 5.0 | 2024-01-04T16:00:00 |
+-----+---------------------+
SELECT * from test where ts >= '2024-01-02 08:00:00' ORDER BY ts;
SELECT * from test where ts >= '2024-01-02 08:00:00';
+-----+---------------------+
| d | ts |
@@ -59,7 +59,7 @@ SELECT * from test where ts >= '2024-01-02 08:00:00' ORDER BY ts;
| 5.0 | 2024-01-04T16:00:00 |
+-----+---------------------+
SELECT * from test where ts <= '2024-01-03 16:00:00' ORDER BY ts;
SELECT * from test where ts <= '2024-01-03 16:00:00';
+-----+---------------------+
| d | ts |
@@ -69,7 +69,7 @@ SELECT * from test where ts <= '2024-01-03 16:00:00' ORDER BY ts;
| 3.0 | 2024-01-03T16:00:00 |
+-----+---------------------+
select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test ORDER BY ts;
select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test;
+----------------------------------------------------+
| date_format(test.ts,Utf8("%Y-%m-%d %H:%M:%S:%3f")) |
@@ -126,7 +126,7 @@ select timezone();
| +08:00 |
+------------+
SELECT * from test ORDER BY ts;
SELECT * from test;
+-----+---------------------+
| d | ts |
@@ -138,7 +138,7 @@ SELECT * from test ORDER BY ts;
| 5.0 | 2024-01-04T16:00:00 |
+-----+---------------------+
SELECT * from test where ts >= '2024-01-02 08:00:00' ORDER BY ts;
SELECT * from test where ts >= '2024-01-02 08:00:00';
+-----+---------------------+
| d | ts |
@@ -149,7 +149,7 @@ SELECT * from test where ts >= '2024-01-02 08:00:00' ORDER BY ts;
| 5.0 | 2024-01-04T16:00:00 |
+-----+---------------------+
SELECT * from test where ts <= '2024-01-03 16:00:00' ORDER BY ts;
SELECT * from test where ts <= '2024-01-03 16:00:00';
+-----+---------------------+
| d | ts |
@@ -158,7 +158,7 @@ SELECT * from test where ts <= '2024-01-03 16:00:00' ORDER BY ts;
| 2.0 | 2024-01-02T08:00:00 |
+-----+---------------------+
select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test ORDER BY ts;
select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test;
+----------------------------------------------------+
| date_format(test.ts,Utf8("%Y-%m-%d %H:%M:%S:%3f")) |
@@ -215,7 +215,7 @@ select timezone();
| -08:00 |
+------------+
SELECT * from test ORDER BY ts;
SELECT * from test;
+-----+---------------------+
| d | ts |
@@ -227,7 +227,7 @@ SELECT * from test ORDER BY ts;
| 5.0 | 2024-01-04T16:00:00 |
+-----+---------------------+
SELECT * from test where ts >= '2024-01-02 08:00:00' ORDER BY ts;
SELECT * from test where ts >= '2024-01-02 08:00:00';
+-----+---------------------+
| d | ts |
@@ -237,7 +237,7 @@ SELECT * from test where ts >= '2024-01-02 08:00:00' ORDER BY ts;
| 5.0 | 2024-01-04T16:00:00 |
+-----+---------------------+
SELECT * from test where ts <= '2024-01-03 16:00:00' ORDER BY ts;
SELECT * from test where ts <= '2024-01-03 16:00:00';
+-----+---------------------+
| d | ts |
@@ -248,7 +248,7 @@ SELECT * from test where ts <= '2024-01-03 16:00:00' ORDER BY ts;
| 4.0 | 2024-01-04T00:00:00 |
+-----+---------------------+
select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test ORDER BY ts;
select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test;
+----------------------------------------------------+
| date_format(test.ts,Utf8("%Y-%m-%d %H:%M:%S:%3f")) |

View File

@@ -14,13 +14,13 @@ INSERT INTO test values
(4, '2024-01-04 00:00:00'),
(5, '2024-01-05 00:00:00+08:00');
SELECT * from test ORDER BY ts ASC;
SELECT * from test;
SELECT * from test where ts >= '2024-01-02 08:00:00' ORDER BY ts;
SELECT * from test where ts >= '2024-01-02 08:00:00';
SELECT * from test where ts <= '2024-01-03 16:00:00' ORDER BY ts;
SELECT * from test where ts <= '2024-01-03 16:00:00';
select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test ORDER BY ts;
select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test;
select to_unixtime('2024-01-02 00:00:00');
@@ -35,13 +35,13 @@ SHOW VARIABLES system_time_zone;
select timezone();
SELECT * from test ORDER BY ts;
SELECT * from test;
SELECT * from test where ts >= '2024-01-02 08:00:00' ORDER BY ts;
SELECT * from test where ts >= '2024-01-02 08:00:00';
SELECT * from test where ts <= '2024-01-03 16:00:00' ORDER BY ts;
SELECT * from test where ts <= '2024-01-03 16:00:00';
select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test ORDER BY ts;
select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test;
select to_unixtime('2024-01-02 00:00:00');
@@ -56,13 +56,13 @@ SHOW VARIABLES system_time_zone;
select timezone();
SELECT * from test ORDER BY ts;
SELECT * from test;
SELECT * from test where ts >= '2024-01-02 08:00:00' ORDER BY ts;
SELECT * from test where ts >= '2024-01-02 08:00:00';
SELECT * from test where ts <= '2024-01-03 16:00:00' ORDER BY ts;
SELECT * from test where ts <= '2024-01-03 16:00:00';
select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test ORDER BY ts;
select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test;
select to_unixtime('2024-01-02 00:00:00');

View File

@@ -22,7 +22,7 @@ INSERT INTO test SELECT a||a||a||a||a||a||a||a||a||a, to_unixtime(ts) * 7 FROM t
Affected Rows: 1
-- now create a second table, we only insert the big varchar string in there
CREATE TABLE bigtable (a VARCHAR, ts timestamp_s time index) WITH ('compaction.type' = 'twcs', 'compaction.twcs.time_window'='1000000y');
CREATE TABLE bigtable (a VARCHAR, ts timestamp_s time index);
Affected Rows: 0
@@ -174,15 +174,6 @@ SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
| 2048 | 2048 | 10000 | 20480000 |
+----------+-------------------+-----------------------------------+-----------------------------------+
-- SQLNESS ARG restart=true
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
+----------+-------------------+-----------------------------------+-----------------------------------+
| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) |
+----------+-------------------+-----------------------------------+-----------------------------------+
| 2048 | 2048 | 10000 | 20480000 |
+----------+-------------------+-----------------------------------+-----------------------------------+
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 67 FROM bigtable;
Affected Rows: 2048
@@ -207,6 +198,39 @@ SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
| 8192 | 8192 | 10000 | 81920000 |
+----------+-------------------+-----------------------------------+-----------------------------------+
-- SQLNESS ARG restart=true
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
+----------+-------------------+-----------------------------------+-----------------------------------+
| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) |
+----------+-------------------+-----------------------------------+-----------------------------------+
| 8192 | 8192 | 10000 | 81920000 |
+----------+-------------------+-----------------------------------+-----------------------------------+
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 73 FROM bigtable;
Affected Rows: 8192
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
+----------+-------------------+-----------------------------------+-----------------------------------+
| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) |
+----------+-------------------+-----------------------------------+-----------------------------------+
| 16384 | 16384 | 10000 | 163840000 |
+----------+-------------------+-----------------------------------+-----------------------------------+
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 79 FROM bigtable;
Affected Rows: 16384
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
+----------+-------------------+-----------------------------------+-----------------------------------+
| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) |
+----------+-------------------+-----------------------------------+-----------------------------------+
| 32768 | 32768 | 10000 | 327680000 |
+----------+-------------------+-----------------------------------+-----------------------------------+
DROP TABLE test;
Affected Rows: 0

View File

@@ -13,7 +13,7 @@ INSERT INTO test SELECT a||a||a||a||a||a||a||a||a||a, to_unixtime(ts) * 5 FROM t
INSERT INTO test SELECT a||a||a||a||a||a||a||a||a||a, to_unixtime(ts) * 7 FROM test WHERE LENGTH(a)=(SELECT MAX(LENGTH(a)) FROM test);
-- now create a second table, we only insert the big varchar string in there
CREATE TABLE bigtable (a VARCHAR, ts timestamp_s time index) WITH ('compaction.type' = 'twcs', 'compaction.twcs.time_window'='1000000y');
CREATE TABLE bigtable (a VARCHAR, ts timestamp_s time index);
INSERT INTO bigtable SELECT a, ts FROM test WHERE LENGTH(a)=(SELECT MAX(LENGTH(a)) FROM test);
@@ -67,8 +67,6 @@ INSERT INTO bigtable SELECT a, to_unixtime(ts) * 63 FROM bigtable;
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
-- SQLNESS ARG restart=true
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 67 FROM bigtable;
@@ -78,6 +76,17 @@ INSERT INTO bigtable SELECT a, to_unixtime(ts) * 71 FROM bigtable;
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
-- SQLNESS ARG restart=true
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 73 FROM bigtable;
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
INSERT INTO bigtable SELECT a, to_unixtime(ts) * 79 FROM bigtable;
SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable;
DROP TABLE test;