flow/batching_mode/task/
inc.rs1use std::sync::Arc;
16
17use common_error::ext::BoxedError;
18use common_telemetry::debug;
19use common_telemetry::tracing::warn;
20use datafusion_expr::{DmlStatement, LogicalPlan};
21use query::options::{
22 FLOW_INCREMENTAL_AFTER_SEQS, FLOW_INCREMENTAL_MODE, FLOW_INCREMENTAL_MODE_MEMTABLE_ONLY,
23 FLOW_SINK_TABLE_ID,
24};
25use snafu::ResultExt;
26use table::metadata::TableId;
27
28use crate::Error;
29use crate::batching_mode::incremental_filter::build_sink_dirty_time_window_filter_expr;
30use crate::batching_mode::state::{CheckpointMode, FilterExprInfo};
31use crate::batching_mode::table_creator::QueryType;
32use crate::batching_mode::task::BatchingTask;
33use crate::batching_mode::utils::{
34 analyze_incremental_aggregate_plan, get_table_info_df_schema,
35 rewrite_incremental_aggregate_with_sink_merge,
36};
37use crate::error::{ExternalSnafu, UnexpectedSnafu};
38
39impl BatchingTask {
40 async fn sink_table_id(&self) -> Result<TableId, Error> {
41 let table = self
42 .config
43 .catalog_manager
44 .table(
45 &self.config.sink_table_name[0],
46 &self.config.sink_table_name[1],
47 &self.config.sink_table_name[2],
48 None,
49 )
50 .await
51 .map_err(BoxedError::new)
52 .context(ExternalSnafu)?
53 .ok_or_else(|| {
54 UnexpectedSnafu {
55 reason: format!(
56 "Flow {} cannot build incremental extensions because sink table {:?} was not found",
57 self.config.flow_id, self.config.sink_table_name
58 ),
59 }
60 .build()
61 })?;
62 Ok(table.table_info().table_id())
63 }
64
65 pub(super) async fn prepare_plan_for_incremental(
75 &self,
76 plan: &LogicalPlan,
77 dirty_filter: Option<&FilterExprInfo>,
78 ) -> Result<Option<LogicalPlan>, Error> {
79 let is_incremental_sql = {
80 let state = self.state.read().unwrap();
81 if state.is_incremental_disabled() {
82 return Ok(None);
83 }
84 state.checkpoint_mode() == CheckpointMode::Incremental
85 && matches!(self.config.query_type, QueryType::Sql)
86 };
87
88 if !is_incremental_sql {
89 return Ok(None);
90 }
91
92 let inner_plan = match plan {
96 LogicalPlan::Dml(dml) => dml.input.as_ref().clone(),
97 _ => return Ok(None),
98 };
99
100 let Some(analysis) = analyze_incremental_aggregate_plan(&inner_plan)? else {
107 warn!(
108 "Flow {} incremental mode but plan is not an aggregate query; \
109 permanently disabling incremental for this flow",
110 self.config.flow_id
111 );
112 self.state.write().unwrap().disable_incremental();
113 return Ok(None);
114 };
115
116 if !analysis.unsupported_exprs.is_empty() {
117 warn!(
118 "Flow {} incremental aggregate contains unsupported expressions {:?}; \
119 permanently disabling incremental for this flow",
120 self.config.flow_id, analysis.unsupported_exprs
121 );
122 self.state.write().unwrap().disable_incremental();
123 return Ok(None);
124 }
125
126 if analysis.merge_columns.is_empty() {
131 return Ok(Some(plan.clone()));
132 }
133
134 let sink_table = match get_table_info_df_schema(
139 self.config.catalog_manager.clone(),
140 self.config.sink_table_name.clone(),
141 )
142 .await
143 {
144 Ok((table, _)) => table,
145 Err(err) => {
146 warn!(
147 "Flow {} failed to fetch sink table for incremental rewrite; \
148 falling back to full snapshot for this round: {:?}",
149 self.config.flow_id, err
150 );
151 self.state.write().unwrap().mark_full_snapshot();
152 return Ok(None);
153 }
154 };
155 let sink_schema = sink_table.table_info().meta.schema.clone();
156 let sink_dirty_filter = match build_sink_dirty_time_window_filter_expr(
157 self.config.flow_id,
158 &analysis,
159 &sink_schema,
160 dirty_filter,
161 ) {
162 Ok(filter) => filter,
163 Err(err) => {
164 warn!(
165 "Flow {} failed to build sink dirty time window filter; \
166 falling back to full snapshot for this round: {:?}",
167 self.config.flow_id, err
168 );
169 self.state.write().unwrap().mark_full_snapshot();
170 return Ok(None);
171 }
172 };
173
174 let rewritten_inner = match rewrite_incremental_aggregate_with_sink_merge(
175 &inner_plan,
176 &analysis,
177 sink_table,
178 &self.config.sink_table_name,
179 sink_dirty_filter,
180 )
181 .await
182 {
183 Ok(plan) => plan,
184 Err(err) => {
185 warn!(
186 "Flow {} failed to rewrite incremental aggregate with sink merge; \
187 falling back to full snapshot for this round: {:?}",
188 self.config.flow_id, err
189 );
190 self.state.write().unwrap().mark_full_snapshot();
191 return Ok(None);
192 }
193 };
194
195 let rewritten = match plan {
197 LogicalPlan::Dml(dml) => LogicalPlan::Dml(DmlStatement::new(
198 dml.table_name.clone(),
199 dml.target.clone(),
200 dml.op.clone(),
201 Arc::new(rewritten_inner),
202 )),
203 _ => unreachable!("already matched Dml above"),
204 };
205
206 debug!(
207 "Flow {} rewrote incremental SQL aggregate query with sink merge",
208 self.config.flow_id
209 );
210
211 Ok(Some(rewritten))
212 }
213
214 pub(super) async fn build_flow_query_extensions(
215 &self,
216 incremental_safe: bool,
217 can_advance_checkpoints: bool,
218 ) -> Result<Vec<(&'static str, String)>, Error> {
219 let mut extensions = vec![("flow.return_region_seq", "true".to_string())];
220
221 let incremental_checkpoints_json = {
222 let state = self.state.read().unwrap();
223 if incremental_safe
224 && can_advance_checkpoints
225 && !state.is_incremental_disabled()
226 && state.checkpoint_mode() == CheckpointMode::Incremental
227 && !state.checkpoints().is_empty()
228 {
229 Some(serde_json::to_string(state.checkpoints()).map_err(|err| {
230 UnexpectedSnafu {
231 reason: format!("Failed to serialize checkpoint map: {err}"),
232 }
233 .build()
234 })?)
235 } else {
236 None
237 }
238 };
239
240 if let Some(checkpoints_json) = incremental_checkpoints_json {
241 let sink_table_id = self.sink_table_id().await?;
242 extensions.push((FLOW_SINK_TABLE_ID, sink_table_id.to_string()));
243 extensions.push((
244 FLOW_INCREMENTAL_MODE,
245 FLOW_INCREMENTAL_MODE_MEMTABLE_ONLY.to_string(),
246 ));
247 extensions.push((FLOW_INCREMENTAL_AFTER_SEQS, checkpoints_json));
248 }
249
250 Ok(extensions)
251 }
252}