flow/batching_mode/task/
ckpt.rs1use std::time::Duration;
16
17use client::OutputWithMetrics;
18use common_error::ext::ErrorExt;
19use common_error::status_code::StatusCode;
20use common_telemetry::tracing::warn;
21use common_telemetry::{debug, info};
22
23use crate::batching_mode::checkpoint::{
24 FlowCheckpointDecision, FlowQueryFallbackReason, checkpoint_mode_label,
25};
26use crate::batching_mode::state::{CheckpointMode, TaskState};
27use crate::batching_mode::task::BatchingTask;
28use crate::metrics::{
29 METRIC_FLOW_BATCHING_ENGINE_CHECKPOINT_DECISION_CNT, METRIC_FLOW_BATCHING_ENGINE_QUERY_MODE_CNT,
30};
31use crate::{Error, FlowId};
32
33impl BatchingTask {
34 pub(super) fn query_failure_reason(err: &Error) -> FlowQueryFallbackReason {
35 if err.status_code() == StatusCode::RequestOutdated {
36 FlowQueryFallbackReason::StaleCursor
37 } else {
38 FlowQueryFallbackReason::IncrementalQueryFailure
39 }
40 }
41
42 pub(super) fn apply_query_failure_to_state(
43 state: &mut TaskState,
44 elapsed: Duration,
45 reason: FlowQueryFallbackReason,
46 ) -> Option<FlowCheckpointDecision> {
47 state.after_query_exec(elapsed, false);
48 let checkpoint_mode = state.checkpoint_mode();
49 if checkpoint_mode == CheckpointMode::Incremental {
50 state.mark_full_snapshot();
51 Some(FlowCheckpointDecision::FallbackToFullSnapshot {
52 previous_mode: checkpoint_mode,
53 reason,
54 })
55 } else {
56 None
57 }
58 }
59
60 pub(super) fn apply_query_result_to_state(
61 state: &mut TaskState,
62 res: &OutputWithMetrics,
63 elapsed: Duration,
64 can_advance_checkpoints: bool,
65 ) -> FlowCheckpointDecision {
66 state.after_query_exec(elapsed, true);
67 let checkpoint_mode = state.checkpoint_mode();
68 if !can_advance_checkpoints {
69 state.mark_full_snapshot();
70 return FlowCheckpointDecision::FallbackToFullSnapshot {
71 previous_mode: checkpoint_mode,
72 reason: FlowQueryFallbackReason::DirtyBacklogPending,
73 };
74 }
75
76 if let (Some(participating_regions), Some(watermark_map)) =
77 (res.participating_regions(), res.region_watermark_map())
78 {
79 let can_advance = match checkpoint_mode {
80 CheckpointMode::FullSnapshot => state
81 .can_advance_full_snapshot_checkpoints(&participating_regions, &watermark_map),
82 CheckpointMode::Incremental => state
83 .can_advance_incremental_checkpoints_with_participation(
84 &participating_regions,
85 &watermark_map,
86 ),
87 };
88
89 if can_advance {
90 let participating_region_count = participating_regions.len();
91 let watermark_count = watermark_map.len();
92 match checkpoint_mode {
93 CheckpointMode::FullSnapshot => {
94 state.advance_checkpoints(watermark_map);
95 if state.is_incremental_disabled() {
96 FlowCheckpointDecision::FallbackToFullSnapshot {
97 previous_mode: CheckpointMode::FullSnapshot,
98 reason: FlowQueryFallbackReason::IncrementalDisabled,
99 }
100 } else {
101 FlowCheckpointDecision::AdvancedFromFullSnapshot {
102 participating_regions: participating_region_count,
103 watermarks: watermark_count,
104 }
105 }
106 }
107 CheckpointMode::Incremental => {
108 state.advance_incremental_checkpoints_with_participation(
109 &participating_regions,
110 watermark_map,
111 );
112 FlowCheckpointDecision::AdvancedIncremental {
113 participating_regions: participating_region_count,
114 watermarks: watermark_count,
115 }
116 }
117 }
118 } else {
119 state.mark_full_snapshot();
120 FlowCheckpointDecision::FallbackToFullSnapshot {
121 previous_mode: checkpoint_mode,
122 reason: FlowQueryFallbackReason::IncompleteRegionWatermark,
123 }
124 }
125 } else {
126 state.mark_full_snapshot();
127 FlowCheckpointDecision::FallbackToFullSnapshot {
128 previous_mode: checkpoint_mode,
129 reason: FlowQueryFallbackReason::MissingRegionWatermark,
130 }
131 }
132 }
133
134 pub(super) fn record_checkpoint_decision(flow_id: FlowId, decision: FlowCheckpointDecision) {
135 let flow_id = flow_id.to_string();
136 METRIC_FLOW_BATCHING_ENGINE_CHECKPOINT_DECISION_CNT
137 .with_label_values(&[
138 flow_id.as_str(),
139 decision.mode_label(),
140 decision.decision_label(),
141 decision.reason_label(),
142 ])
143 .inc();
144
145 match decision {
146 FlowCheckpointDecision::AdvancedFromFullSnapshot {
147 participating_regions,
148 watermarks,
149 } => {
150 info!(
151 "Flow {flow_id} switched to incremental mode after full snapshot, participating_regions={participating_regions}, watermarks={watermarks}"
152 );
153 }
154 FlowCheckpointDecision::AdvancedIncremental {
155 participating_regions,
156 watermarks,
157 } => {
158 debug!(
159 "Flow {flow_id} advanced incremental checkpoints, participating_regions={participating_regions}, watermarks={watermarks}"
160 );
161 }
162 FlowCheckpointDecision::FallbackToFullSnapshot {
163 previous_mode,
164 reason,
165 } => {
166 warn!(
167 "Flow {flow_id} switched to full snapshot mode, previous_mode={}, reason={}",
168 checkpoint_mode_label(previous_mode),
169 reason.as_label()
170 );
171 }
172 }
173 }
174
175 pub(super) fn record_query_mode(flow_id: FlowId, mode: CheckpointMode) {
176 let flow_id = flow_id.to_string();
177 METRIC_FLOW_BATCHING_ENGINE_QUERY_MODE_CNT
178 .with_label_values(&[flow_id.as_str(), checkpoint_mode_label(mode)])
179 .inc();
180 }
181}