Skip to main content

common_procedure/
error.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::any::Any;
16use std::sync::Arc;
17
18use common_error::ext::{BoxedError, ErrorExt, RetryHint};
19use common_error::status_code::StatusCode;
20use common_macro::stack_trace_debug;
21use object_store::error::retry_hint_from_opendal_error;
22use snafu::{Location, Snafu};
23
24use crate::PoisonKey;
25use crate::procedure::ProcedureId;
26
27/// Procedure error.
28#[derive(Snafu)]
29#[snafu(visibility(pub))]
30#[stack_trace_debug]
31pub enum Error {
32    #[snafu(display("Failed to check procedure manager status"))]
33    CheckStatus {
34        source: BoxedError,
35        #[snafu(implicit)]
36        location: Location,
37    },
38
39    #[snafu(display("Manager is pasued"))]
40    ManagerPasued {
41        #[snafu(implicit)]
42        location: Location,
43    },
44
45    #[snafu(display(
46        "Failed to execute procedure due to external error, clean poisons: {}",
47        clean_poisons
48    ))]
49    External {
50        source: BoxedError,
51        clean_poisons: bool,
52    },
53
54    #[snafu(display("Loader {} is already registered", name))]
55    LoaderConflict {
56        name: String,
57        #[snafu(implicit)]
58        location: Location,
59    },
60
61    #[snafu(display("Procedure Manager is stopped"))]
62    ManagerNotStart {
63        #[snafu(implicit)]
64        location: Location,
65    },
66
67    #[snafu(display("Failed to serialize to json"))]
68    ToJson {
69        #[snafu(source)]
70        error: serde_json::Error,
71        #[snafu(implicit)]
72        location: Location,
73    },
74
75    #[snafu(display("Procedure {} already exists", procedure_id))]
76    DuplicateProcedure {
77        procedure_id: ProcedureId,
78        #[snafu(implicit)]
79        location: Location,
80    },
81
82    #[snafu(display("Too many running procedures, max: {}", max_running_procedures))]
83    TooManyRunningProcedures {
84        max_running_procedures: usize,
85        #[snafu(implicit)]
86        location: Location,
87    },
88
89    #[snafu(display("Failed to put state, key: '{key}'"))]
90    PutState {
91        key: String,
92        #[snafu(implicit)]
93        location: Location,
94        source: BoxedError,
95    },
96
97    #[snafu(display("Failed to put poison, key: '{key}', token: '{token}'"))]
98    PutPoison {
99        key: String,
100        token: String,
101        #[snafu(implicit)]
102        location: Location,
103        source: BoxedError,
104    },
105
106    #[snafu(display("Failed to get poison, key: '{key}'"))]
107    GetPoison {
108        key: String,
109        #[snafu(implicit)]
110        location: Location,
111        source: BoxedError,
112    },
113
114    #[snafu(display("Failed to delete poison, key: '{key}', token: '{token}'"))]
115    DeletePoison {
116        key: String,
117        token: String,
118        #[snafu(implicit)]
119        location: Location,
120        source: BoxedError,
121    },
122
123    #[snafu(display("Failed to delete {}", key))]
124    DeleteState {
125        key: String,
126        #[snafu(source)]
127        error: object_store::Error,
128    },
129
130    #[snafu(display("Failed to delete keys: '{keys}'"))]
131    DeleteStates {
132        keys: String,
133        #[snafu(implicit)]
134        location: Location,
135        source: BoxedError,
136    },
137
138    #[snafu(display("Failed to list state, path: '{path}'"))]
139    ListState {
140        path: String,
141        #[snafu(implicit)]
142        location: Location,
143        source: BoxedError,
144    },
145
146    #[snafu(display("Failed to deserialize from json"))]
147    FromJson {
148        #[snafu(source)]
149        error: serde_json::Error,
150        #[snafu(implicit)]
151        location: Location,
152    },
153
154    #[snafu(display("Procedure exec failed"))]
155    RetryLater {
156        source: BoxedError,
157        clean_poisons: bool,
158    },
159
160    #[snafu(display("Procedure panics, procedure_id: {}", procedure_id))]
161    ProcedurePanic { procedure_id: ProcedureId },
162
163    #[snafu(display("Failed to wait watcher"))]
164    WaitWatcher {
165        #[snafu(source)]
166        error: tokio::sync::watch::error::RecvError,
167        #[snafu(implicit)]
168        location: Location,
169    },
170
171    #[snafu(display("Failed to execute procedure"))]
172    ProcedureExec {
173        source: Arc<Error>,
174        #[snafu(implicit)]
175        location: Location,
176    },
177
178    #[snafu(display("Rollback Procedure recovered: {error}"))]
179    RollbackProcedureRecovered {
180        error: String,
181        #[snafu(implicit)]
182        location: Location,
183    },
184
185    #[snafu(display("Procedure retry exceeded max times, procedure_id: {}", procedure_id))]
186    RetryTimesExceeded {
187        source: Arc<Error>,
188        procedure_id: ProcedureId,
189    },
190
191    #[snafu(display(
192        "Procedure rollback exceeded max times, procedure_id: {}",
193        procedure_id
194    ))]
195    RollbackTimesExceeded {
196        source: Arc<Error>,
197        procedure_id: ProcedureId,
198    },
199
200    #[snafu(display("Failed to start the remove_outdated_meta method, error"))]
201    StartRemoveOutdatedMetaTask {
202        source: common_runtime::error::Error,
203        #[snafu(implicit)]
204        location: Location,
205    },
206
207    #[snafu(display("Failed to stop the remove_outdated_meta method, error"))]
208    StopRemoveOutdatedMetaTask {
209        source: common_runtime::error::Error,
210        #[snafu(implicit)]
211        location: Location,
212    },
213
214    #[snafu(display("Failed to parse segment key: {key}"))]
215    ParseSegmentKey {
216        #[snafu(implicit)]
217        location: Location,
218        key: String,
219        #[snafu(source)]
220        error: std::num::ParseIntError,
221    },
222
223    #[snafu(display("Unexpected: {err_msg}"))]
224    Unexpected {
225        #[snafu(implicit)]
226        location: Location,
227        err_msg: String,
228    },
229
230    #[snafu(display("Not support to rollback the procedure"))]
231    RollbackNotSupported {
232        #[snafu(implicit)]
233        location: Location,
234    },
235
236    #[snafu(display("Procedure not found, procedure_id: {}", procedure_id))]
237    ProcedureNotFound {
238        procedure_id: ProcedureId,
239        #[snafu(implicit)]
240        location: Location,
241    },
242
243    #[snafu(display("Poison key not defined, key: '{key}', procedure_id: '{procedure_id}'"))]
244    PoisonKeyNotDefined {
245        key: PoisonKey,
246        procedure_id: ProcedureId,
247        #[snafu(implicit)]
248        location: Location,
249    },
250}
251
252pub type Result<T> = std::result::Result<T, Error>;
253
254impl ErrorExt for Error {
255    fn status_code(&self) -> StatusCode {
256        match self {
257            Error::External { source, .. }
258            | Error::PutState { source, .. }
259            | Error::DeleteStates { source, .. }
260            | Error::ListState { source, .. }
261            | Error::PutPoison { source, .. }
262            | Error::DeletePoison { source, .. }
263            | Error::GetPoison { source, .. }
264            | Error::CheckStatus { source, .. }
265            | Error::RetryLater { source, .. } => source.status_code(),
266
267            Error::ToJson { .. }
268            | Error::DeleteState { .. }
269            | Error::FromJson { .. }
270            | Error::WaitWatcher { .. } => StatusCode::Internal,
271
272            Error::RetryTimesExceeded { .. }
273            | Error::RollbackTimesExceeded { .. }
274            | Error::ManagerNotStart { .. }
275            | Error::ManagerPasued { .. }
276            | Error::TooManyRunningProcedures { .. }
277            | Error::RollbackProcedureRecovered { .. } => StatusCode::IllegalState,
278
279            Error::RollbackNotSupported { .. } => StatusCode::Unsupported,
280            Error::LoaderConflict { .. } | Error::DuplicateProcedure { .. } => {
281                StatusCode::InvalidArguments
282            }
283            Error::ProcedurePanic { .. }
284            | Error::ParseSegmentKey { .. }
285            | Error::Unexpected { .. }
286            | &Error::ProcedureNotFound { .. }
287            | Error::PoisonKeyNotDefined { .. } => StatusCode::Unexpected,
288            Error::ProcedureExec { source, .. } => source.status_code(),
289            Error::StartRemoveOutdatedMetaTask { source, .. }
290            | Error::StopRemoveOutdatedMetaTask { source, .. } => source.status_code(),
291        }
292    }
293
294    fn as_any(&self) -> &dyn Any {
295        self
296    }
297
298    fn retry_hint(&self) -> RetryHint {
299        match self {
300            Error::RetryLater { .. } => RetryHint::Retryable,
301            Error::External { source, .. }
302            | Error::PutState { source, .. }
303            | Error::DeleteStates { source, .. }
304            | Error::ListState { source, .. }
305            | Error::PutPoison { source, .. }
306            | Error::DeletePoison { source, .. }
307            | Error::GetPoison { source, .. }
308            | Error::CheckStatus { source, .. } => source.retry_hint(),
309            Error::ProcedureExec { source, .. } => source.retry_hint(),
310            Error::StartRemoveOutdatedMetaTask { source, .. }
311            | Error::StopRemoveOutdatedMetaTask { source, .. } => source.retry_hint(),
312            Error::DeleteState { error, .. } => retry_hint_from_opendal_error(error),
313            Error::RetryTimesExceeded { .. } | Error::RollbackTimesExceeded { .. } => {
314                RetryHint::NonRetryable
315            }
316            _ => RetryHint::NonRetryable,
317        }
318    }
319}
320
321impl Error {
322    /// Creates a new [Error::External] error from source `err`.
323    pub fn external<E: ErrorExt + Send + Sync + 'static>(err: E) -> Error {
324        Error::External {
325            source: BoxedError::new(err),
326            clean_poisons: false,
327        }
328    }
329
330    /// Creates a new [Error::External] error from source `err` and clean poisons.
331    pub fn external_and_clean_poisons<E: ErrorExt + Send + Sync + 'static>(err: E) -> Error {
332        Error::External {
333            source: BoxedError::new(err),
334            clean_poisons: true,
335        }
336    }
337
338    /// Creates a new [Error::RetryLater] error from source `err`.
339    pub fn retry_later<E: ErrorExt + Send + Sync + 'static>(err: E) -> Error {
340        Error::RetryLater {
341            source: BoxedError::new(err),
342            clean_poisons: false,
343        }
344    }
345
346    /// Creates a new [Error::RetryLater] error from source `err` and clean poisons.
347    pub fn retry_later_and_clean_poisons<E: ErrorExt + Send + Sync + 'static>(err: E) -> Error {
348        Error::RetryLater {
349            source: BoxedError::new(err),
350            clean_poisons: true,
351        }
352    }
353
354    /// Determine whether it is a retry later type through [StatusCode]
355    pub fn is_retry_later(&self) -> bool {
356        matches!(self, Error::RetryLater { .. })
357    }
358
359    /// Determine whether it needs to clean poisons.
360    pub fn need_clean_poisons(&self) -> bool {
361        matches!(self, Error::External { clean_poisons, .. } if *clean_poisons)
362            || matches!(self, Error::RetryLater { clean_poisons, .. } if *clean_poisons)
363    }
364
365    #[cfg(test)]
366    /// Creates a new [Error::RetryLater] or [Error::External] error from source `err` according
367    /// to its [RetryHint].
368    pub fn from_error_ext<E: ErrorExt + Send + Sync + 'static>(err: E) -> Self {
369        if err.retry_hint().is_retryable() {
370            Error::retry_later(err)
371        } else {
372            Error::external(err)
373        }
374    }
375}
376
377#[cfg(test)]
378mod tests {
379    use std::sync::Arc;
380
381    use common_error::mock::MockError;
382
383    use super::*;
384
385    #[test]
386    fn test_retry_later_hint_is_retryable() {
387        let err = Error::retry_later(MockError::new(StatusCode::Internal));
388
389        assert_eq!(err.retry_hint(), RetryHint::Retryable);
390    }
391
392    #[test]
393    fn test_external_forwards_retry_hint() {
394        let source = Error::retry_later(MockError::new(StatusCode::Internal));
395        let err = Error::external(source);
396
397        assert_eq!(err.retry_hint(), RetryHint::Retryable);
398    }
399
400    #[test]
401    fn test_retry_exceeded_hint_is_non_retryable() {
402        let source = Arc::new(Error::retry_later(MockError::new(StatusCode::Internal)));
403        let err = Error::RetryTimesExceeded {
404            source: source.clone(),
405            procedure_id: ProcedureId::random(),
406        };
407
408        assert_eq!(err.retry_hint(), RetryHint::NonRetryable);
409
410        let err = Error::RollbackTimesExceeded {
411            source,
412            procedure_id: ProcedureId::random(),
413        };
414
415        assert_eq!(err.retry_hint(), RetryHint::NonRetryable);
416    }
417
418    #[test]
419    fn test_from_error_ext_uses_retry_hint() {
420        let err = Error::from_error_ext(Error::retry_later(MockError::new(
421            StatusCode::InvalidArguments,
422        )));
423        assert!(err.is_retry_later());
424
425        let err = Error::from_error_ext(MockError::new(StatusCode::InvalidArguments));
426        assert!(!err.is_retry_later());
427
428        let err = Error::from_error_ext(MockError::new(StatusCode::Internal));
429        assert!(!err.is_retry_later());
430    }
431}