Skip to main content

flow/batching_mode/task/
ckpt.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::time::Duration;
16
17use client::OutputWithMetrics;
18use common_error::ext::ErrorExt;
19use common_error::status_code::StatusCode;
20use common_telemetry::tracing::warn;
21use common_telemetry::{debug, info};
22
23use crate::batching_mode::checkpoint::{
24    FlowCheckpointDecision, FlowQueryFallbackReason, checkpoint_mode_label,
25};
26use crate::batching_mode::state::{CheckpointMode, TaskState};
27use crate::batching_mode::task::BatchingTask;
28use crate::metrics::{
29    METRIC_FLOW_BATCHING_ENGINE_CHECKPOINT_DECISION_CNT, METRIC_FLOW_BATCHING_ENGINE_QUERY_MODE_CNT,
30};
31use crate::{Error, FlowId};
32
33impl BatchingTask {
34    pub(super) fn query_failure_reason(err: &Error) -> FlowQueryFallbackReason {
35        if err.status_code() == StatusCode::RequestOutdated {
36            FlowQueryFallbackReason::StaleCursor
37        } else {
38            FlowQueryFallbackReason::IncrementalQueryFailure
39        }
40    }
41
42    pub(super) fn apply_query_failure_to_state(
43        state: &mut TaskState,
44        elapsed: Duration,
45        reason: FlowQueryFallbackReason,
46    ) -> Option<FlowCheckpointDecision> {
47        state.after_query_exec(elapsed, false);
48        let checkpoint_mode = state.checkpoint_mode();
49        if checkpoint_mode == CheckpointMode::Incremental {
50            state.mark_full_snapshot();
51            Some(FlowCheckpointDecision::FallbackToFullSnapshot {
52                previous_mode: checkpoint_mode,
53                reason,
54            })
55        } else {
56            None
57        }
58    }
59
60    pub(super) fn apply_query_result_to_state(
61        state: &mut TaskState,
62        res: &OutputWithMetrics,
63        elapsed: Duration,
64        can_advance_checkpoints: bool,
65    ) -> FlowCheckpointDecision {
66        state.after_query_exec(elapsed, true);
67        let checkpoint_mode = state.checkpoint_mode();
68        if !can_advance_checkpoints {
69            state.mark_full_snapshot();
70            return FlowCheckpointDecision::FallbackToFullSnapshot {
71                previous_mode: checkpoint_mode,
72                reason: FlowQueryFallbackReason::DirtyBacklogPending,
73            };
74        }
75
76        if let (Some(participating_regions), Some(watermark_map)) =
77            (res.participating_regions(), res.region_watermark_map())
78        {
79            let can_advance = match checkpoint_mode {
80                CheckpointMode::FullSnapshot => state
81                    .can_advance_full_snapshot_checkpoints(&participating_regions, &watermark_map),
82                CheckpointMode::Incremental => state
83                    .can_advance_incremental_checkpoints_with_participation(
84                        &participating_regions,
85                        &watermark_map,
86                    ),
87            };
88
89            if can_advance {
90                let participating_region_count = participating_regions.len();
91                let watermark_count = watermark_map.len();
92                match checkpoint_mode {
93                    CheckpointMode::FullSnapshot => {
94                        state.advance_checkpoints(watermark_map);
95                        if state.is_incremental_disabled() {
96                            FlowCheckpointDecision::FallbackToFullSnapshot {
97                                previous_mode: CheckpointMode::FullSnapshot,
98                                reason: FlowQueryFallbackReason::IncrementalDisabled,
99                            }
100                        } else {
101                            FlowCheckpointDecision::AdvancedFromFullSnapshot {
102                                participating_regions: participating_region_count,
103                                watermarks: watermark_count,
104                            }
105                        }
106                    }
107                    CheckpointMode::Incremental => {
108                        state.advance_incremental_checkpoints_with_participation(
109                            &participating_regions,
110                            watermark_map,
111                        );
112                        FlowCheckpointDecision::AdvancedIncremental {
113                            participating_regions: participating_region_count,
114                            watermarks: watermark_count,
115                        }
116                    }
117                }
118            } else {
119                state.mark_full_snapshot();
120                FlowCheckpointDecision::FallbackToFullSnapshot {
121                    previous_mode: checkpoint_mode,
122                    reason: FlowQueryFallbackReason::IncompleteRegionWatermark,
123                }
124            }
125        } else {
126            state.mark_full_snapshot();
127            FlowCheckpointDecision::FallbackToFullSnapshot {
128                previous_mode: checkpoint_mode,
129                reason: FlowQueryFallbackReason::MissingRegionWatermark,
130            }
131        }
132    }
133
134    pub(super) fn record_checkpoint_decision(flow_id: FlowId, decision: FlowCheckpointDecision) {
135        let flow_id = flow_id.to_string();
136        METRIC_FLOW_BATCHING_ENGINE_CHECKPOINT_DECISION_CNT
137            .with_label_values(&[
138                flow_id.as_str(),
139                decision.mode_label(),
140                decision.decision_label(),
141                decision.reason_label(),
142            ])
143            .inc();
144
145        match decision {
146            FlowCheckpointDecision::AdvancedFromFullSnapshot {
147                participating_regions,
148                watermarks,
149            } => {
150                info!(
151                    "Flow {flow_id} switched to incremental mode after full snapshot, participating_regions={participating_regions}, watermarks={watermarks}"
152                );
153            }
154            FlowCheckpointDecision::AdvancedIncremental {
155                participating_regions,
156                watermarks,
157            } => {
158                debug!(
159                    "Flow {flow_id} advanced incremental checkpoints, participating_regions={participating_regions}, watermarks={watermarks}"
160                );
161            }
162            FlowCheckpointDecision::FallbackToFullSnapshot {
163                previous_mode,
164                reason,
165            } => {
166                warn!(
167                    "Flow {flow_id} switched to full snapshot mode, previous_mode={}, reason={}",
168                    checkpoint_mode_label(previous_mode),
169                    reason.as_label()
170                );
171            }
172        }
173    }
174
175    pub(super) fn record_query_mode(flow_id: FlowId, mode: CheckpointMode) {
176        let flow_id = flow_id.to_string();
177        METRIC_FLOW_BATCHING_ENGINE_QUERY_MODE_CNT
178            .with_label_values(&[flow_id.as_str(), checkpoint_mode_label(mode)])
179            .inc();
180    }
181}