feat: CREATE OR REPLACE FLOW (#5001)

* feat: Replace flow

* refactor: better show create flow&tests: better check

* tests: sqlness result update

* tests: unit test for update

* refactor: cmp with raw bytes

* refactor: rename

* refactor: per review
This commit is contained in:
discord9
2024-11-19 16:44:57 +08:00
committed by GitHub
parent dbb3f2d98d
commit 4d8fe29ea8
12 changed files with 932 additions and 78 deletions

View File

@@ -50,7 +50,10 @@ use crate::adapter::util::column_schemas_to_proto;
use crate::adapter::worker::{create_worker, Worker, WorkerHandle};
use crate::compute::ErrCollector;
use crate::df_optimizer::sql_to_flow_plan;
use crate::error::{EvalSnafu, ExternalSnafu, InternalSnafu, TableNotFoundSnafu, UnexpectedSnafu};
use crate::error::{
EvalSnafu, ExternalSnafu, FlowAlreadyExistSnafu, InternalSnafu, TableNotFoundSnafu,
UnexpectedSnafu,
};
use crate::expr::{Batch, GlobalId};
use crate::metrics::{METRIC_FLOW_INSERT_ELAPSED, METRIC_FLOW_RUN_INTERVAL_MS};
use crate::repr::{self, DiffRow, Row, BATCH_SIZE};
@@ -673,6 +676,21 @@ impl FlowWorkerManager {
}
}
/// The arguments to create a flow in [`FlowWorkerManager`].
#[derive(Debug, Clone)]
pub struct CreateFlowArgs {
pub flow_id: FlowId,
pub sink_table_name: TableName,
pub source_table_ids: Vec<TableId>,
pub create_if_not_exists: bool,
pub or_replace: bool,
pub expire_after: Option<i64>,
pub comment: Option<String>,
pub sql: String,
pub flow_options: HashMap<String, String>,
pub query_ctx: Option<QueryContext>,
}
/// Create&Remove flow
impl FlowWorkerManager {
/// remove a flow by it's id
@@ -694,18 +712,48 @@ impl FlowWorkerManager {
/// 1. parse query into typed plan(and optional parse expire_after expr)
/// 2. render source/sink with output table id and used input table id
#[allow(clippy::too_many_arguments)]
pub async fn create_flow(
&self,
flow_id: FlowId,
sink_table_name: TableName,
source_table_ids: &[TableId],
create_if_not_exists: bool,
expire_after: Option<i64>,
comment: Option<String>,
sql: String,
flow_options: HashMap<String, String>,
query_ctx: Option<QueryContext>,
) -> Result<Option<FlowId>, Error> {
pub async fn create_flow(&self, args: CreateFlowArgs) -> Result<Option<FlowId>, Error> {
let CreateFlowArgs {
flow_id,
sink_table_name,
source_table_ids,
create_if_not_exists,
or_replace,
expire_after,
comment,
sql,
flow_options,
query_ctx,
} = args;
let already_exist = {
let mut flag = false;
// check if the task already exists
for handle in self.worker_handles.iter() {
if handle.lock().await.contains_flow(flow_id).await? {
flag = true;
break;
}
}
flag
};
match (create_if_not_exists, or_replace, already_exist) {
// do replace
(_, true, true) => {
info!("Replacing flow with id={}", flow_id);
self.remove_flow(flow_id).await?;
}
(false, false, true) => FlowAlreadyExistSnafu { id: flow_id }.fail()?,
// do nothing if exists
(true, false, true) => {
info!("Flow with id={} already exists, do nothing", flow_id);
return Ok(None);
}
// create if not exists
(_, _, false) => (),
}
if create_if_not_exists {
// check if the task already exists
for handle in self.worker_handles.iter() {
@@ -717,7 +765,7 @@ impl FlowWorkerManager {
let mut node_ctx = self.node_context.write().await;
// assign global id to source and sink table
for source in source_table_ids {
for source in &source_table_ids {
node_ctx
.assign_global_id_to_table(&self.table_info_source, None, Some(*source))
.await?;
@@ -726,7 +774,7 @@ impl FlowWorkerManager {
.assign_global_id_to_table(&self.table_info_source, Some(sink_table_name.clone()), None)
.await?;
node_ctx.register_task_src_sink(flow_id, source_table_ids, sink_table_name.clone());
node_ctx.register_task_src_sink(flow_id, &source_table_ids, sink_table_name.clone());
node_ctx.query_context = query_ctx.map(Arc::new);
// construct a active dataflow state with it

View File

@@ -28,7 +28,7 @@ use itertools::Itertools;
use snafu::{OptionExt, ResultExt};
use store_api::storage::RegionId;
use crate::adapter::FlowWorkerManager;
use crate::adapter::{CreateFlowArgs, FlowWorkerManager};
use crate::error::InternalSnafu;
use crate::metrics::METRIC_FLOW_TASK_COUNT;
use crate::repr::{self, DiffRow};
@@ -57,7 +57,7 @@ impl Flownode for FlowWorkerManager {
comment,
sql,
flow_options,
or_replace: _,
or_replace,
})) => {
let source_table_ids = source_table_ids.into_iter().map(|id| id.id).collect_vec();
let sink_table_name = [
@@ -66,20 +66,19 @@ impl Flownode for FlowWorkerManager {
sink_table_name.table_name,
];
let expire_after = expire_after.map(|e| e.value);
let ret = self
.create_flow(
task_id.id as u64,
sink_table_name,
&source_table_ids,
create_if_not_exists,
expire_after,
Some(comment),
sql,
flow_options,
query_ctx,
)
.await
.map_err(to_meta_err)?;
let args = CreateFlowArgs {
flow_id: task_id.id as u64,
sink_table_name,
source_table_ids,
create_if_not_exists,
or_replace,
expire_after,
comment: Some(comment),
sql,
flow_options,
query_ctx,
};
let ret = self.create_flow(args).await.map_err(to_meta_err)?;
METRIC_FLOW_TASK_COUNT.inc();
Ok(FlowResponse {
affected_flows: ret

View File

@@ -48,7 +48,7 @@ use tonic::codec::CompressionEncoding;
use tonic::transport::server::TcpIncoming;
use tonic::{Request, Response, Status};
use crate::adapter::FlowWorkerManagerRef;
use crate::adapter::{CreateFlowArgs, FlowWorkerManagerRef};
use crate::error::{
CacheRequiredSnafu, ExternalSnafu, FlowNotFoundSnafu, ListFlowsSnafu, ParseAddrSnafu,
ShutdownServerSnafu, StartServerSnafu, UnexpectedSnafu,
@@ -355,23 +355,26 @@ impl FlownodeBuilder {
info.sink_table_name().schema_name.clone(),
info.sink_table_name().table_name.clone(),
];
manager
.create_flow(
flow_id as _,
sink_table_name,
info.source_table_ids(),
true,
info.expire_after(),
Some(info.comment().clone()),
info.raw_sql().clone(),
info.options().clone(),
Some(
QueryContextBuilder::default()
.current_catalog(info.catalog_name().clone())
.build(),
),
)
.await?;
let args = CreateFlowArgs {
flow_id: flow_id as _,
sink_table_name,
source_table_ids: info.source_table_ids().to_vec(),
// because recover should only happen on restart the `create_if_not_exists` and `or_replace` can be arbitrary value(since flow doesn't exist)
// but for the sake of consistency and to make sure recover of flow actually happen, we set both to true
// (which is also fine since checks for not allow both to be true is on metasrv and we already pass that)
create_if_not_exists: true,
or_replace: true,
expire_after: info.expire_after(),
comment: Some(info.comment().clone()),
sql: info.raw_sql().clone(),
flow_options: info.options().clone(),
query_ctx: Some(
QueryContextBuilder::default()
.current_catalog(info.catalog_name().clone())
.build(),
),
};
manager.create_flow(args).await?;
}
Ok(cnt)