Skip to main content

datanode/
error.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::any::Any;
16use std::sync::Arc;
17
18use common_error::define_into_tonic_status;
19use common_error::ext::{BoxedError, ErrorExt, RetryHint};
20use common_error::status_code::StatusCode;
21use common_macro::stack_trace_debug;
22use common_runtime::JoinError;
23use snafu::{Location, Snafu};
24use store_api::storage::RegionId;
25use table::error::Error as TableError;
26use tokio::time::error::Elapsed;
27
28/// Business error of datanode.
29#[derive(Snafu)]
30#[snafu(visibility(pub))]
31#[stack_trace_debug]
32pub enum Error {
33    #[snafu(display("Failed to execute async task"))]
34    AsyncTaskExecute {
35        #[snafu(implicit)]
36        location: Location,
37        source: Arc<Error>,
38    },
39
40    #[snafu(display("Failed to watch change"))]
41    WatchAsyncTaskChange {
42        #[snafu(implicit)]
43        location: Location,
44        #[snafu(source)]
45        error: tokio::sync::watch::error::RecvError,
46    },
47
48    #[snafu(display("Failed to handle heartbeat response"))]
49    HandleHeartbeatResponse {
50        #[snafu(implicit)]
51        location: Location,
52        source: common_meta::error::Error,
53    },
54
55    #[snafu(display("Failed to get info from meta server"))]
56    GetMetadata {
57        #[snafu(implicit)]
58        location: Location,
59        source: common_meta::error::Error,
60    },
61
62    #[snafu(display("Failed to execute logical plan"))]
63    ExecuteLogicalPlan {
64        #[snafu(implicit)]
65        location: Location,
66        source: query::error::Error,
67    },
68
69    #[snafu(display("Failed to join datanode runtime task, request_type: {}", request_type))]
70    RuntimeJoin {
71        request_type: &'static str,
72        #[snafu(source)]
73        error: JoinError,
74        #[snafu(implicit)]
75        location: Location,
76    },
77
78    #[snafu(display("Failed to create plan decoder"))]
79    NewPlanDecoder {
80        #[snafu(implicit)]
81        location: Location,
82        source: query::error::Error,
83    },
84
85    #[snafu(display("Failed to decode logical plan"))]
86    DecodeLogicalPlan {
87        #[snafu(implicit)]
88        location: Location,
89        source: common_query::error::Error,
90    },
91
92    #[snafu(display("Schema not found: {}", name))]
93    SchemaNotFound {
94        name: String,
95        #[snafu(implicit)]
96        location: Location,
97    },
98
99    #[snafu(display("Missing timestamp column in request"))]
100    MissingTimestampColumn {
101        #[snafu(implicit)]
102        location: Location,
103    },
104
105    #[snafu(display("Failed to delete value from table: {}", table_name))]
106    Delete {
107        table_name: String,
108        #[snafu(implicit)]
109        location: Location,
110        source: TableError,
111    },
112
113    #[snafu(display("Failed to start server"))]
114    StartServer {
115        #[snafu(implicit)]
116        location: Location,
117        source: servers::error::Error,
118    },
119
120    #[snafu(display("Failed to parse address {}", addr))]
121    ParseAddr {
122        addr: String,
123        #[snafu(source)]
124        error: std::net::AddrParseError,
125    },
126
127    #[snafu(display("Failed to create directory {}", dir))]
128    CreateDir {
129        dir: String,
130        #[snafu(source)]
131        error: std::io::Error,
132    },
133
134    #[snafu(display("Failed to remove directory {}", dir))]
135    RemoveDir {
136        dir: String,
137        #[snafu(source)]
138        error: std::io::Error,
139    },
140
141    #[snafu(display("Failed to open log store"))]
142    OpenLogStore {
143        #[snafu(implicit)]
144        location: Location,
145        source: Box<log_store::error::Error>,
146    },
147
148    #[snafu(display("Invalid SQL, error: {}", msg))]
149    InvalidSql { msg: String },
150
151    #[snafu(display("Illegal primary keys definition: {}", msg))]
152    IllegalPrimaryKeysDef {
153        msg: String,
154        #[snafu(implicit)]
155        location: Location,
156    },
157
158    #[snafu(display("Schema {} already exists", name))]
159    SchemaExists {
160        name: String,
161        #[snafu(implicit)]
162        location: Location,
163    },
164
165    #[snafu(display("Failed to initialize meta client"))]
166    MetaClientInit {
167        #[snafu(implicit)]
168        location: Location,
169        source: meta_client::error::Error,
170    },
171
172    #[snafu(display("Missing node id in Datanode config"))]
173    MissingNodeId {
174        #[snafu(implicit)]
175        location: Location,
176    },
177
178    #[snafu(display("Failed to build datanode"))]
179    BuildDatanode {
180        #[snafu(implicit)]
181        location: Location,
182        source: BoxedError,
183    },
184
185    #[snafu(display("Failed to build http client"))]
186    BuildHttpClient {
187        #[snafu(implicit)]
188        location: Location,
189        #[snafu(source)]
190        error: reqwest::Error,
191    },
192
193    #[snafu(display("Missing required field: {}", name))]
194    MissingRequiredField {
195        name: String,
196        #[snafu(implicit)]
197        location: Location,
198    },
199
200    #[snafu(display(
201        "No valid default value can be built automatically, column: {}",
202        column,
203    ))]
204    ColumnNoneDefaultValue {
205        column: String,
206        #[snafu(implicit)]
207        location: Location,
208    },
209
210    #[snafu(display("Failed to shutdown server"))]
211    ShutdownServer {
212        #[snafu(implicit)]
213        location: Location,
214        #[snafu(source)]
215        source: servers::error::Error,
216    },
217
218    #[snafu(display("Failed to shutdown instance"))]
219    ShutdownInstance {
220        #[snafu(implicit)]
221        location: Location,
222        #[snafu(source)]
223        source: BoxedError,
224    },
225
226    #[snafu(display("Payload not exist"))]
227    PayloadNotExist {
228        #[snafu(implicit)]
229        location: Location,
230    },
231
232    #[snafu(display("Unexpected, violated: {}", violated))]
233    Unexpected {
234        violated: String,
235        #[snafu(implicit)]
236        location: Location,
237    },
238
239    #[snafu(display("Failed to handle request for region {}", region_id))]
240    HandleRegionRequest {
241        region_id: RegionId,
242        #[snafu(implicit)]
243        location: Location,
244        source: BoxedError,
245    },
246
247    #[snafu(display("Failed to open batch regions"))]
248    HandleBatchOpenRequest {
249        #[snafu(implicit)]
250        location: Location,
251        source: BoxedError,
252    },
253
254    #[snafu(display("Failed to handle batch ddl request, ddl_type: {}", ddl_type))]
255    HandleBatchDdlRequest {
256        #[snafu(implicit)]
257        location: Location,
258        source: BoxedError,
259        ddl_type: String,
260    },
261
262    #[snafu(display("RegionId {} not found", region_id))]
263    RegionNotFound {
264        region_id: RegionId,
265        #[snafu(implicit)]
266        location: Location,
267    },
268
269    #[snafu(display("Region {} not ready", region_id))]
270    RegionNotReady {
271        region_id: RegionId,
272        #[snafu(implicit)]
273        location: Location,
274    },
275
276    #[snafu(display("Region {} is busy", region_id))]
277    RegionBusy {
278        region_id: RegionId,
279        #[snafu(implicit)]
280        location: Location,
281    },
282
283    #[snafu(display("Region engine {} is not registered", name))]
284    RegionEngineNotFound {
285        name: String,
286        #[snafu(implicit)]
287        location: Location,
288    },
289
290    #[snafu(display("Unsupported output type, expected: {}", expected))]
291    UnsupportedOutput {
292        expected: String,
293        #[snafu(implicit)]
294        location: Location,
295    },
296
297    #[snafu(display("Failed to build region requests"))]
298    BuildRegionRequests {
299        #[snafu(implicit)]
300        location: Location,
301        source: store_api::metadata::MetadataError,
302    },
303
304    #[snafu(display("Failed to serialize WAL options for region {}", region_id))]
305    SerializeWalOptions {
306        region_id: RegionId,
307        #[snafu(source)]
308        error: serde_json::Error,
309        #[snafu(implicit)]
310        location: Location,
311    },
312
313    #[snafu(display("Failed to stop region engine {}", name))]
314    StopRegionEngine {
315        name: String,
316        #[snafu(implicit)]
317        location: Location,
318        source: BoxedError,
319    },
320
321    #[snafu(display(
322        "Failed to find logical regions in physical region {}",
323        physical_region_id
324    ))]
325    FindLogicalRegions {
326        physical_region_id: RegionId,
327        source: metric_engine::error::Error,
328        #[snafu(implicit)]
329        location: Location,
330    },
331
332    #[snafu(display("Failed to build mito engine"))]
333    BuildMitoEngine {
334        source: mito2::error::Error,
335        #[snafu(implicit)]
336        location: Location,
337    },
338
339    #[snafu(display("Failed to build metric engine"))]
340    BuildMetricEngine {
341        source: metric_engine::error::Error,
342        #[snafu(implicit)]
343        location: Location,
344    },
345
346    #[snafu(display("Failed to run gc for region {}", region_id))]
347    GcMitoEngine {
348        region_id: RegionId,
349        source: mito2::error::Error,
350        #[snafu(implicit)]
351        location: Location,
352    },
353
354    #[snafu(display("Failed to list SST entries from storage"))]
355    ListStorageSsts {
356        #[snafu(implicit)]
357        location: Location,
358        source: mito2::error::Error,
359    },
360
361    #[snafu(display("Failed to serialize options to TOML"))]
362    TomlFormat {
363        #[snafu(implicit)]
364        location: Location,
365        #[snafu(source(from(common_config::error::Error, Box::new)))]
366        source: Box<common_config::error::Error>,
367    },
368
369    #[snafu(display(
370        "Failed to get region metadata from engine {} for region_id {}",
371        engine,
372        region_id,
373    ))]
374    GetRegionMetadata {
375        engine: String,
376        region_id: RegionId,
377        #[snafu(implicit)]
378        location: Location,
379        source: BoxedError,
380    },
381
382    #[snafu(display("DataFusion"))]
383    DataFusion {
384        #[snafu(source)]
385        error: datafusion::error::DataFusionError,
386        #[snafu(implicit)]
387        location: Location,
388    },
389
390    #[snafu(display("Failed to acquire permit, source closed"))]
391    ConcurrentQueryLimiterClosed {
392        #[snafu(source)]
393        error: tokio::sync::AcquireError,
394        #[snafu(implicit)]
395        location: Location,
396    },
397
398    #[snafu(display("Failed to acquire permit under timeouts"))]
399    ConcurrentQueryLimiterTimeout {
400        #[snafu(source)]
401        error: Elapsed,
402        #[snafu(implicit)]
403        location: Location,
404    },
405
406    #[snafu(display("Cache not found in registry"))]
407    MissingCache {
408        #[snafu(implicit)]
409        location: Location,
410    },
411
412    #[snafu(display("Failed to serialize json"))]
413    SerializeJson {
414        #[snafu(source)]
415        error: serde_json::Error,
416        #[snafu(implicit)]
417        location: Location,
418    },
419
420    #[snafu(display("Failed object store operation"))]
421    ObjectStore {
422        source: object_store::error::Error,
423        #[snafu(implicit)]
424        location: Location,
425    },
426
427    #[snafu(display("Not yet implemented: {what}"))]
428    NotYetImplemented { what: String },
429}
430
431pub type Result<T> = std::result::Result<T, Error>;
432
433impl ErrorExt for Error {
434    fn status_code(&self) -> StatusCode {
435        use Error::*;
436        match self {
437            NewPlanDecoder { source, .. } | ExecuteLogicalPlan { source, .. } => {
438                source.status_code()
439            }
440
441            BuildRegionRequests { source, .. } => source.status_code(),
442            HandleHeartbeatResponse { source, .. } | GetMetadata { source, .. } => {
443                source.status_code()
444            }
445
446            DecodeLogicalPlan { source, .. } => source.status_code(),
447
448            Delete { source, .. } => source.status_code(),
449
450            InvalidSql { .. }
451            | IllegalPrimaryKeysDef { .. }
452            | MissingTimestampColumn { .. }
453            | SchemaNotFound { .. }
454            | SchemaExists { .. }
455            | MissingNodeId { .. }
456            | ColumnNoneDefaultValue { .. }
457            | MissingRequiredField { .. }
458            | RegionEngineNotFound { .. }
459            | ParseAddr { .. }
460            | TomlFormat { .. }
461            | BuildDatanode { .. } => StatusCode::InvalidArguments,
462
463            PayloadNotExist { .. }
464            | Unexpected { .. }
465            | SerializeWalOptions { .. }
466            | WatchAsyncTaskChange { .. }
467            | BuildHttpClient { .. } => StatusCode::Unexpected,
468
469            AsyncTaskExecute { source, .. } => source.status_code(),
470
471            CreateDir { .. }
472            | RemoveDir { .. }
473            | ShutdownInstance { .. }
474            | DataFusion { .. }
475            | RuntimeJoin { .. } => StatusCode::Internal,
476
477            RegionNotFound { .. } => StatusCode::RegionNotFound,
478            RegionNotReady { .. } => StatusCode::RegionNotReady,
479            RegionBusy { .. } => StatusCode::RegionBusy,
480
481            StartServer { source, .. } | ShutdownServer { source, .. } => source.status_code(),
482
483            OpenLogStore { source, .. } => source.status_code(),
484            MetaClientInit { source, .. } => source.status_code(),
485            UnsupportedOutput { .. } | NotYetImplemented { .. } => StatusCode::Unsupported,
486            HandleRegionRequest { source, .. }
487            | GetRegionMetadata { source, .. }
488            | HandleBatchOpenRequest { source, .. }
489            | HandleBatchDdlRequest { source, .. } => source.status_code(),
490            StopRegionEngine { source, .. } => source.status_code(),
491
492            FindLogicalRegions { source, .. } => source.status_code(),
493            BuildMitoEngine { source, .. } | GcMitoEngine { source, .. } => source.status_code(),
494            BuildMetricEngine { source, .. } => source.status_code(),
495            ListStorageSsts { source, .. } => source.status_code(),
496            ConcurrentQueryLimiterClosed { .. } | ConcurrentQueryLimiterTimeout { .. } => {
497                StatusCode::RegionBusy
498            }
499            MissingCache { .. } => StatusCode::Internal,
500            SerializeJson { .. } => StatusCode::Internal,
501
502            ObjectStore { source, .. } => source.status_code(),
503        }
504    }
505
506    fn as_any(&self) -> &dyn Any {
507        self
508    }
509
510    fn retry_hint(&self) -> RetryHint {
511        use Error::*;
512
513        match self {
514            RegionBusy { .. }
515            | RegionNotReady { .. }
516            | ConcurrentQueryLimiterClosed { .. }
517            | ConcurrentQueryLimiterTimeout { .. } => RetryHint::Retryable,
518            NewPlanDecoder { source, .. } | ExecuteLogicalPlan { source, .. } => {
519                source.retry_hint()
520            }
521            HandleHeartbeatResponse { source, .. } | GetMetadata { source, .. } => {
522                source.retry_hint()
523            }
524            DecodeLogicalPlan { source, .. } => source.retry_hint(),
525            Delete { source, .. } => source.retry_hint(),
526            AsyncTaskExecute { source, .. } => source.retry_hint(),
527            StartServer { source, .. } | ShutdownServer { source, .. } => source.retry_hint(),
528            OpenLogStore { source, .. } => source.retry_hint(),
529            MetaClientInit { source, .. } => source.retry_hint(),
530            HandleRegionRequest { source, .. }
531            | GetRegionMetadata { source, .. }
532            | HandleBatchOpenRequest { source, .. }
533            | HandleBatchDdlRequest { source, .. }
534            | StopRegionEngine { source, .. } => source.retry_hint(),
535            FindLogicalRegions { source, .. } => source.retry_hint(),
536            BuildMitoEngine { source, .. } => source.retry_hint(),
537            GcMitoEngine { source, .. } => source.retry_hint(),
538            BuildMetricEngine { source, .. } => source.retry_hint(),
539            ListStorageSsts { source, .. } => source.retry_hint(),
540            ObjectStore { source, .. } => source.retry_hint(),
541            _ => RetryHint::NonRetryable,
542        }
543    }
544}
545
546define_into_tonic_status!(Error);
547
548#[cfg(test)]
549mod tests {
550    use common_error::ext::RetryHint;
551
552    use super::*;
553
554    #[test]
555    fn test_region_state_hints_are_retryable() {
556        let region_id = RegionId::new(1024, 1);
557
558        let err = RegionBusySnafu { region_id }.build();
559        assert_eq!(err.retry_hint(), RetryHint::Retryable);
560
561        let err = RegionNotReadySnafu { region_id }.build();
562        assert_eq!(err.retry_hint(), RetryHint::Retryable);
563    }
564
565    #[test]
566    fn test_default_hint_is_non_retryable() {
567        let err = UnexpectedSnafu {
568            violated: "mock error",
569        }
570        .build();
571
572        assert_eq!(err.retry_hint(), RetryHint::NonRetryable);
573    }
574}