1use std::any::Any;
16use std::sync::Arc;
17
18use common_error::ext::{BoxedError, ErrorExt, RetryHint};
19use common_error::status_code::StatusCode;
20use common_macro::stack_trace_debug;
21use object_store::error::retry_hint_from_opendal_error;
22use snafu::{Location, Snafu};
23
24use crate::PoisonKey;
25use crate::procedure::ProcedureId;
26
27#[derive(Snafu)]
29#[snafu(visibility(pub))]
30#[stack_trace_debug]
31pub enum Error {
32 #[snafu(display("Failed to check procedure manager status"))]
33 CheckStatus {
34 source: BoxedError,
35 #[snafu(implicit)]
36 location: Location,
37 },
38
39 #[snafu(display("Manager is pasued"))]
40 ManagerPasued {
41 #[snafu(implicit)]
42 location: Location,
43 },
44
45 #[snafu(display(
46 "Failed to execute procedure due to external error, clean poisons: {}",
47 clean_poisons
48 ))]
49 External {
50 source: BoxedError,
51 clean_poisons: bool,
52 },
53
54 #[snafu(display("Loader {} is already registered", name))]
55 LoaderConflict {
56 name: String,
57 #[snafu(implicit)]
58 location: Location,
59 },
60
61 #[snafu(display("Procedure Manager is stopped"))]
62 ManagerNotStart {
63 #[snafu(implicit)]
64 location: Location,
65 },
66
67 #[snafu(display("Failed to serialize to json"))]
68 ToJson {
69 #[snafu(source)]
70 error: serde_json::Error,
71 #[snafu(implicit)]
72 location: Location,
73 },
74
75 #[snafu(display("Procedure {} already exists", procedure_id))]
76 DuplicateProcedure {
77 procedure_id: ProcedureId,
78 #[snafu(implicit)]
79 location: Location,
80 },
81
82 #[snafu(display("Too many running procedures, max: {}", max_running_procedures))]
83 TooManyRunningProcedures {
84 max_running_procedures: usize,
85 #[snafu(implicit)]
86 location: Location,
87 },
88
89 #[snafu(display("Failed to put state, key: '{key}'"))]
90 PutState {
91 key: String,
92 #[snafu(implicit)]
93 location: Location,
94 source: BoxedError,
95 },
96
97 #[snafu(display("Failed to put poison, key: '{key}', token: '{token}'"))]
98 PutPoison {
99 key: String,
100 token: String,
101 #[snafu(implicit)]
102 location: Location,
103 source: BoxedError,
104 },
105
106 #[snafu(display("Failed to get poison, key: '{key}'"))]
107 GetPoison {
108 key: String,
109 #[snafu(implicit)]
110 location: Location,
111 source: BoxedError,
112 },
113
114 #[snafu(display("Failed to delete poison, key: '{key}', token: '{token}'"))]
115 DeletePoison {
116 key: String,
117 token: String,
118 #[snafu(implicit)]
119 location: Location,
120 source: BoxedError,
121 },
122
123 #[snafu(display("Failed to delete {}", key))]
124 DeleteState {
125 key: String,
126 #[snafu(source)]
127 error: object_store::Error,
128 },
129
130 #[snafu(display("Failed to delete keys: '{keys}'"))]
131 DeleteStates {
132 keys: String,
133 #[snafu(implicit)]
134 location: Location,
135 source: BoxedError,
136 },
137
138 #[snafu(display("Failed to list state, path: '{path}'"))]
139 ListState {
140 path: String,
141 #[snafu(implicit)]
142 location: Location,
143 source: BoxedError,
144 },
145
146 #[snafu(display("Failed to deserialize from json"))]
147 FromJson {
148 #[snafu(source)]
149 error: serde_json::Error,
150 #[snafu(implicit)]
151 location: Location,
152 },
153
154 #[snafu(display("Procedure exec failed"))]
155 RetryLater {
156 source: BoxedError,
157 clean_poisons: bool,
158 },
159
160 #[snafu(display("Procedure panics, procedure_id: {}", procedure_id))]
161 ProcedurePanic { procedure_id: ProcedureId },
162
163 #[snafu(display("Failed to wait watcher"))]
164 WaitWatcher {
165 #[snafu(source)]
166 error: tokio::sync::watch::error::RecvError,
167 #[snafu(implicit)]
168 location: Location,
169 },
170
171 #[snafu(display("Failed to execute procedure"))]
172 ProcedureExec {
173 source: Arc<Error>,
174 #[snafu(implicit)]
175 location: Location,
176 },
177
178 #[snafu(display("Rollback Procedure recovered: {error}"))]
179 RollbackProcedureRecovered {
180 error: String,
181 #[snafu(implicit)]
182 location: Location,
183 },
184
185 #[snafu(display("Procedure retry exceeded max times, procedure_id: {}", procedure_id))]
186 RetryTimesExceeded {
187 source: Arc<Error>,
188 procedure_id: ProcedureId,
189 },
190
191 #[snafu(display(
192 "Procedure rollback exceeded max times, procedure_id: {}",
193 procedure_id
194 ))]
195 RollbackTimesExceeded {
196 source: Arc<Error>,
197 procedure_id: ProcedureId,
198 },
199
200 #[snafu(display("Failed to start the remove_outdated_meta method, error"))]
201 StartRemoveOutdatedMetaTask {
202 source: common_runtime::error::Error,
203 #[snafu(implicit)]
204 location: Location,
205 },
206
207 #[snafu(display("Failed to stop the remove_outdated_meta method, error"))]
208 StopRemoveOutdatedMetaTask {
209 source: common_runtime::error::Error,
210 #[snafu(implicit)]
211 location: Location,
212 },
213
214 #[snafu(display("Failed to parse segment key: {key}"))]
215 ParseSegmentKey {
216 #[snafu(implicit)]
217 location: Location,
218 key: String,
219 #[snafu(source)]
220 error: std::num::ParseIntError,
221 },
222
223 #[snafu(display("Unexpected: {err_msg}"))]
224 Unexpected {
225 #[snafu(implicit)]
226 location: Location,
227 err_msg: String,
228 },
229
230 #[snafu(display("Not support to rollback the procedure"))]
231 RollbackNotSupported {
232 #[snafu(implicit)]
233 location: Location,
234 },
235
236 #[snafu(display("Procedure not found, procedure_id: {}", procedure_id))]
237 ProcedureNotFound {
238 procedure_id: ProcedureId,
239 #[snafu(implicit)]
240 location: Location,
241 },
242
243 #[snafu(display("Poison key not defined, key: '{key}', procedure_id: '{procedure_id}'"))]
244 PoisonKeyNotDefined {
245 key: PoisonKey,
246 procedure_id: ProcedureId,
247 #[snafu(implicit)]
248 location: Location,
249 },
250}
251
252pub type Result<T> = std::result::Result<T, Error>;
253
254impl ErrorExt for Error {
255 fn status_code(&self) -> StatusCode {
256 match self {
257 Error::External { source, .. }
258 | Error::PutState { source, .. }
259 | Error::DeleteStates { source, .. }
260 | Error::ListState { source, .. }
261 | Error::PutPoison { source, .. }
262 | Error::DeletePoison { source, .. }
263 | Error::GetPoison { source, .. }
264 | Error::CheckStatus { source, .. }
265 | Error::RetryLater { source, .. } => source.status_code(),
266
267 Error::ToJson { .. }
268 | Error::DeleteState { .. }
269 | Error::FromJson { .. }
270 | Error::WaitWatcher { .. } => StatusCode::Internal,
271
272 Error::RetryTimesExceeded { .. }
273 | Error::RollbackTimesExceeded { .. }
274 | Error::ManagerNotStart { .. }
275 | Error::ManagerPasued { .. }
276 | Error::TooManyRunningProcedures { .. }
277 | Error::RollbackProcedureRecovered { .. } => StatusCode::IllegalState,
278
279 Error::RollbackNotSupported { .. } => StatusCode::Unsupported,
280 Error::LoaderConflict { .. } | Error::DuplicateProcedure { .. } => {
281 StatusCode::InvalidArguments
282 }
283 Error::ProcedurePanic { .. }
284 | Error::ParseSegmentKey { .. }
285 | Error::Unexpected { .. }
286 | &Error::ProcedureNotFound { .. }
287 | Error::PoisonKeyNotDefined { .. } => StatusCode::Unexpected,
288 Error::ProcedureExec { source, .. } => source.status_code(),
289 Error::StartRemoveOutdatedMetaTask { source, .. }
290 | Error::StopRemoveOutdatedMetaTask { source, .. } => source.status_code(),
291 }
292 }
293
294 fn as_any(&self) -> &dyn Any {
295 self
296 }
297
298 fn retry_hint(&self) -> RetryHint {
299 match self {
300 Error::RetryLater { .. } => RetryHint::Retryable,
301 Error::External { source, .. }
302 | Error::PutState { source, .. }
303 | Error::DeleteStates { source, .. }
304 | Error::ListState { source, .. }
305 | Error::PutPoison { source, .. }
306 | Error::DeletePoison { source, .. }
307 | Error::GetPoison { source, .. }
308 | Error::CheckStatus { source, .. } => source.retry_hint(),
309 Error::ProcedureExec { source, .. } => source.retry_hint(),
310 Error::StartRemoveOutdatedMetaTask { source, .. }
311 | Error::StopRemoveOutdatedMetaTask { source, .. } => source.retry_hint(),
312 Error::DeleteState { error, .. } => retry_hint_from_opendal_error(error),
313 Error::RetryTimesExceeded { .. } | Error::RollbackTimesExceeded { .. } => {
314 RetryHint::NonRetryable
315 }
316 _ => RetryHint::NonRetryable,
317 }
318 }
319}
320
321impl Error {
322 pub fn external<E: ErrorExt + Send + Sync + 'static>(err: E) -> Error {
324 Error::External {
325 source: BoxedError::new(err),
326 clean_poisons: false,
327 }
328 }
329
330 pub fn external_and_clean_poisons<E: ErrorExt + Send + Sync + 'static>(err: E) -> Error {
332 Error::External {
333 source: BoxedError::new(err),
334 clean_poisons: true,
335 }
336 }
337
338 pub fn retry_later<E: ErrorExt + Send + Sync + 'static>(err: E) -> Error {
340 Error::RetryLater {
341 source: BoxedError::new(err),
342 clean_poisons: false,
343 }
344 }
345
346 pub fn retry_later_and_clean_poisons<E: ErrorExt + Send + Sync + 'static>(err: E) -> Error {
348 Error::RetryLater {
349 source: BoxedError::new(err),
350 clean_poisons: true,
351 }
352 }
353
354 pub fn is_retry_later(&self) -> bool {
356 matches!(self, Error::RetryLater { .. })
357 }
358
359 pub fn need_clean_poisons(&self) -> bool {
361 matches!(self, Error::External { clean_poisons, .. } if *clean_poisons)
362 || matches!(self, Error::RetryLater { clean_poisons, .. } if *clean_poisons)
363 }
364
365 #[cfg(test)]
366 pub fn from_error_ext<E: ErrorExt + Send + Sync + 'static>(err: E) -> Self {
369 if err.retry_hint().is_retryable() {
370 Error::retry_later(err)
371 } else {
372 Error::external(err)
373 }
374 }
375}
376
377#[cfg(test)]
378mod tests {
379 use std::sync::Arc;
380
381 use common_error::mock::MockError;
382
383 use super::*;
384
385 #[test]
386 fn test_retry_later_hint_is_retryable() {
387 let err = Error::retry_later(MockError::new(StatusCode::Internal));
388
389 assert_eq!(err.retry_hint(), RetryHint::Retryable);
390 }
391
392 #[test]
393 fn test_external_forwards_retry_hint() {
394 let source = Error::retry_later(MockError::new(StatusCode::Internal));
395 let err = Error::external(source);
396
397 assert_eq!(err.retry_hint(), RetryHint::Retryable);
398 }
399
400 #[test]
401 fn test_retry_exceeded_hint_is_non_retryable() {
402 let source = Arc::new(Error::retry_later(MockError::new(StatusCode::Internal)));
403 let err = Error::RetryTimesExceeded {
404 source: source.clone(),
405 procedure_id: ProcedureId::random(),
406 };
407
408 assert_eq!(err.retry_hint(), RetryHint::NonRetryable);
409
410 let err = Error::RollbackTimesExceeded {
411 source,
412 procedure_id: ProcedureId::random(),
413 };
414
415 assert_eq!(err.retry_hint(), RetryHint::NonRetryable);
416 }
417
418 #[test]
419 fn test_from_error_ext_uses_retry_hint() {
420 let err = Error::from_error_ext(Error::retry_later(MockError::new(
421 StatusCode::InvalidArguments,
422 )));
423 assert!(err.is_retry_later());
424
425 let err = Error::from_error_ext(MockError::new(StatusCode::InvalidArguments));
426 assert!(!err.is_retry_later());
427
428 let err = Error::from_error_ext(MockError::new(StatusCode::Internal));
429 assert!(!err.is_retry_later());
430 }
431}