flow/batching_mode/task/
inc.rs1use std::sync::Arc;
16
17use common_error::ext::BoxedError;
18use common_telemetry::debug;
19use common_telemetry::tracing::warn;
20use datafusion_expr::{DmlStatement, LogicalPlan};
21use query::options::{
22 FLOW_INCREMENTAL_AFTER_SEQS, FLOW_INCREMENTAL_MODE, FLOW_INCREMENTAL_MODE_MEMTABLE_ONLY,
23 FLOW_SINK_TABLE_ID,
24};
25use snafu::ResultExt;
26use table::metadata::TableId;
27
28use crate::Error;
29use crate::batching_mode::state::CheckpointMode;
30use crate::batching_mode::table_creator::QueryType;
31use crate::batching_mode::task::BatchingTask;
32use crate::batching_mode::utils::{
33 analyze_incremental_aggregate_plan, get_table_info_df_schema,
34 rewrite_incremental_aggregate_with_sink_merge,
35};
36use crate::error::{ExternalSnafu, UnexpectedSnafu};
37
38impl BatchingTask {
39 async fn sink_table_id(&self) -> Result<TableId, Error> {
40 let table = self
41 .config
42 .catalog_manager
43 .table(
44 &self.config.sink_table_name[0],
45 &self.config.sink_table_name[1],
46 &self.config.sink_table_name[2],
47 None,
48 )
49 .await
50 .map_err(BoxedError::new)
51 .context(ExternalSnafu)?
52 .ok_or_else(|| {
53 UnexpectedSnafu {
54 reason: format!(
55 "Flow {} cannot build incremental extensions because sink table {:?} was not found",
56 self.config.flow_id, self.config.sink_table_name
57 ),
58 }
59 .build()
60 })?;
61 Ok(table.table_info().table_id())
62 }
63
64 pub(super) async fn prepare_plan_for_incremental(
74 &self,
75 plan: &LogicalPlan,
76 ) -> Result<Option<LogicalPlan>, Error> {
77 let is_incremental_sql = {
78 let state = self.state.read().unwrap();
79 if state.is_incremental_disabled() {
80 return Ok(None);
81 }
82 state.checkpoint_mode() == CheckpointMode::Incremental
83 && matches!(self.config.query_type, QueryType::Sql)
84 };
85
86 if !is_incremental_sql {
87 return Ok(None);
88 }
89
90 let inner_plan = match plan {
94 LogicalPlan::Dml(dml) => dml.input.as_ref().clone(),
95 _ => return Ok(None),
96 };
97
98 let Some(analysis) = analyze_incremental_aggregate_plan(&inner_plan)? else {
105 warn!(
106 "Flow {} incremental mode but plan is not an aggregate query; \
107 permanently disabling incremental for this flow",
108 self.config.flow_id
109 );
110 self.state.write().unwrap().disable_incremental();
111 return Ok(None);
112 };
113
114 if !analysis.unsupported_exprs.is_empty() {
115 warn!(
116 "Flow {} incremental aggregate contains unsupported expressions {:?}; \
117 permanently disabling incremental for this flow",
118 self.config.flow_id, analysis.unsupported_exprs
119 );
120 self.state.write().unwrap().disable_incremental();
121 return Ok(None);
122 }
123
124 if analysis.merge_columns.is_empty() {
129 return Ok(Some(plan.clone()));
130 }
131
132 let sink_table = match get_table_info_df_schema(
137 self.config.catalog_manager.clone(),
138 self.config.sink_table_name.clone(),
139 )
140 .await
141 {
142 Ok((table, _)) => table,
143 Err(err) => {
144 warn!(
145 "Flow {} failed to fetch sink table for incremental rewrite; \
146 falling back to full snapshot for this round: {:?}",
147 self.config.flow_id, err
148 );
149 self.state.write().unwrap().mark_full_snapshot();
150 return Ok(None);
151 }
152 };
153 let rewritten_inner = match rewrite_incremental_aggregate_with_sink_merge(
154 &inner_plan,
155 &analysis,
156 sink_table,
157 &self.config.sink_table_name,
158 None,
159 )
160 .await
161 {
162 Ok(plan) => plan,
163 Err(err) => {
164 warn!(
165 "Flow {} failed to rewrite incremental aggregate with sink merge; \
166 falling back to full snapshot for this round: {:?}",
167 self.config.flow_id, err
168 );
169 self.state.write().unwrap().mark_full_snapshot();
170 return Ok(None);
171 }
172 };
173
174 let rewritten = match plan {
176 LogicalPlan::Dml(dml) => LogicalPlan::Dml(DmlStatement::new(
177 dml.table_name.clone(),
178 dml.target.clone(),
179 dml.op.clone(),
180 Arc::new(rewritten_inner),
181 )),
182 _ => unreachable!("already matched Dml above"),
183 };
184
185 debug!(
186 "Flow {} rewrote incremental SQL aggregate query with sink merge",
187 self.config.flow_id
188 );
189
190 Ok(Some(rewritten))
191 }
192
193 pub(super) async fn build_flow_query_extensions(
194 &self,
195 incremental_safe: bool,
196 can_advance_checkpoints: bool,
197 ) -> Result<Vec<(&'static str, String)>, Error> {
198 let mut extensions = vec![("flow.return_region_seq", "true".to_string())];
199
200 let incremental_checkpoints_json = {
201 let state = self.state.read().unwrap();
202 if incremental_safe
203 && can_advance_checkpoints
204 && !state.is_incremental_disabled()
205 && state.checkpoint_mode() == CheckpointMode::Incremental
206 && !state.checkpoints().is_empty()
207 {
208 Some(serde_json::to_string(state.checkpoints()).map_err(|err| {
209 UnexpectedSnafu {
210 reason: format!("Failed to serialize checkpoint map: {err}"),
211 }
212 .build()
213 })?)
214 } else {
215 None
216 }
217 };
218
219 if let Some(checkpoints_json) = incremental_checkpoints_json {
220 let sink_table_id = self.sink_table_id().await?;
221 extensions.push((FLOW_SINK_TABLE_ID, sink_table_id.to_string()));
222 extensions.push((
223 FLOW_INCREMENTAL_MODE,
224 FLOW_INCREMENTAL_MODE_MEMTABLE_ONLY.to_string(),
225 ));
226 extensions.push((FLOW_INCREMENTAL_AFTER_SEQS, checkpoints_json));
227 }
228
229 Ok(extensions)
230 }
231}