From 746b4e23697700ff3791c03a248312774ddeca7b Mon Sep 17 00:00:00 2001 From: Weny Xu Date: Wed, 9 Apr 2025 15:58:23 +0800 Subject: [PATCH] refactor: improve error code handling in status code conversion (#5851) * refactor: improve error code handling in status code conversion * chore: apply suggestions from CR * fix: only hanlde client side thrown error * feat: introduce `DeadlineExceeded` * fix: exclude Code::Unknown from retry conditions --- src/client/src/error.rs | 23 +++++++++------------ src/client/src/region.rs | 3 +-- src/common/error/src/status_code.rs | 16 ++++++++++++++- src/meta-client/src/client/cluster.rs | 3 ++- src/meta-client/src/client/procedure.rs | 3 ++- src/meta-client/src/error.rs | 27 ++++++++++++++----------- src/servers/src/error.rs | 7 ++++++- src/servers/src/mysql/writer.rs | 2 +- src/servers/src/postgres/types/error.rs | 1 + 9 files changed, 53 insertions(+), 32 deletions(-) diff --git a/src/client/src/error.rs b/src/client/src/error.rs index 47d9990598..ed0c1b5ccb 100644 --- a/src/client/src/error.rs +++ b/src/client/src/error.rs @@ -15,7 +15,7 @@ use std::any::Any; use common_error::ext::{BoxedError, ErrorExt}; -use common_error::status_code::StatusCode; +use common_error::status_code::{convert_tonic_code_to_status_code, StatusCode}; use common_error::{GREPTIME_DB_HEADER_ERROR_CODE, GREPTIME_DB_HEADER_ERROR_MSG}; use common_macro::stack_trace_debug; use snafu::{location, Location, Snafu}; @@ -152,15 +152,15 @@ impl From for Error { .and_then(|v| String::from_utf8(v.as_bytes().to_vec()).ok()) } - let code = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_CODE) - .and_then(|s| { - if let Ok(code) = s.parse::() { - StatusCode::from_u32(code) - } else { - None - } - }) - .unwrap_or(StatusCode::Unknown); + let code = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_CODE).and_then(|s| { + if let Ok(code) = s.parse::() { + StatusCode::from_u32(code) + } else { + None + } + }); + let tonic_code = e.code(); + let code = code.unwrap_or_else(|| convert_tonic_code_to_status_code(tonic_code)); let msg = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_MSG) .unwrap_or_else(|| e.message().to_string()); @@ -187,9 +187,6 @@ impl Error { } | Self::RegionServer { code: Code::Unavailable, .. - } | Self::RegionServer { - code: Code::Unknown, - .. } ) } diff --git a/src/client/src/region.rs b/src/client/src/region.rs index 2afbb3dfb0..15d123936d 100644 --- a/src/client/src/region.rs +++ b/src/client/src/region.rs @@ -201,12 +201,11 @@ impl RegionRequester { .await .map_err(|e| { let code = e.code(); - let err: error::Error = e.into(); // Uses `Error::RegionServer` instead of `Error::Server` error::Error::RegionServer { addr, code, - source: BoxedError::new(err), + source: BoxedError::new(error::Error::from(e)), location: location!(), } })? diff --git a/src/common/error/src/status_code.rs b/src/common/error/src/status_code.rs index 239871e1d1..fbd8b52a95 100644 --- a/src/common/error/src/status_code.rs +++ b/src/common/error/src/status_code.rs @@ -34,12 +34,14 @@ pub enum StatusCode { Internal = 1003, /// Invalid arguments. InvalidArguments = 1004, - /// The task is cancelled. + /// The task is cancelled (typically caller-side). Cancelled = 1005, /// Illegal state, can be exposed to users. IllegalState = 1006, /// Caused by some error originated from external system. External = 1007, + /// The request is deadline exceeded (typically server-side). + DeadlineExceeded = 1008, // ====== End of common status code ================ // ====== Begin of SQL related status code ========= @@ -142,6 +144,7 @@ impl StatusCode { | StatusCode::Unexpected | StatusCode::InvalidArguments | StatusCode::Cancelled + | StatusCode::DeadlineExceeded | StatusCode::InvalidSyntax | StatusCode::DatabaseAlreadyExists | StatusCode::PlanQuery @@ -177,6 +180,7 @@ impl StatusCode { | StatusCode::Unexpected | StatusCode::Internal | StatusCode::Cancelled + | StatusCode::DeadlineExceeded | StatusCode::IllegalState | StatusCode::EngineExecuteQuery | StatusCode::StorageUnavailable @@ -272,6 +276,7 @@ pub fn status_to_tonic_code(status_code: StatusCode) -> Code { Code::InvalidArgument } StatusCode::Cancelled => Code::Cancelled, + StatusCode::DeadlineExceeded => Code::DeadlineExceeded, StatusCode::TableAlreadyExists | StatusCode::TableColumnExists | StatusCode::RegionAlreadyExists @@ -299,6 +304,15 @@ pub fn status_to_tonic_code(status_code: StatusCode) -> Code { } } +/// Converts tonic [Code] to [StatusCode]. +pub fn convert_tonic_code_to_status_code(code: Code) -> StatusCode { + match code { + Code::Cancelled => StatusCode::Cancelled, + Code::DeadlineExceeded => StatusCode::DeadlineExceeded, + _ => StatusCode::Internal, + } +} + #[cfg(test)] mod tests { use strum::IntoEnumIterator; diff --git a/src/meta-client/src/client/cluster.rs b/src/meta-client/src/client/cluster.rs index d50ac9717c..e1f78d4d8a 100644 --- a/src/meta-client/src/client/cluster.rs +++ b/src/meta-client/src/client/cluster.rs @@ -30,7 +30,7 @@ use common_meta::rpc::store::{ BatchPutResponse, DeleteRangeRequest, DeleteRangeResponse, PutRequest, PutResponse, RangeRequest, RangeResponse, }; -use common_telemetry::{info, warn}; +use common_telemetry::{error, info, warn}; use snafu::{ensure, ResultExt}; use tokio::sync::RwLock; use tonic::codec::CompressionEncoding; @@ -237,6 +237,7 @@ impl Inner { times += 1; continue; } else { + error!("An error occurred in gRPC, status: {status}"); return Err(Error::from(status)); } } diff --git a/src/meta-client/src/client/procedure.rs b/src/meta-client/src/client/procedure.rs index 2f310ab85d..1656bea2cd 100644 --- a/src/meta-client/src/client/procedure.rs +++ b/src/meta-client/src/client/procedure.rs @@ -24,7 +24,7 @@ use api::v1::meta::{ }; use common_grpc::channel_manager::ChannelManager; use common_telemetry::tracing_context::TracingContext; -use common_telemetry::{info, warn}; +use common_telemetry::{error, info, warn}; use snafu::{ensure, ResultExt}; use tokio::sync::RwLock; use tonic::codec::CompressionEncoding; @@ -199,6 +199,7 @@ impl Inner { times += 1; continue; } else { + error!("An error occurred in gRPC, status: {status}"); return Err(error::Error::from(status)); } } diff --git a/src/meta-client/src/error.rs b/src/meta-client/src/error.rs index be1cf150da..c561fdf168 100644 --- a/src/meta-client/src/error.rs +++ b/src/meta-client/src/error.rs @@ -13,10 +13,10 @@ // limitations under the License. use common_error::ext::ErrorExt; -use common_error::status_code::StatusCode; +use common_error::status_code::{convert_tonic_code_to_status_code, StatusCode}; use common_error::{GREPTIME_DB_HEADER_ERROR_CODE, GREPTIME_DB_HEADER_ERROR_MSG}; use common_macro::stack_trace_debug; -use snafu::{Location, Snafu}; +use snafu::{location, Location, Snafu}; use tonic::Status; #[derive(Snafu)] @@ -35,6 +35,8 @@ pub enum Error { code: StatusCode, msg: String, tonic_code: tonic::Code, + #[snafu(implicit)] + location: Location, }, #[snafu(display("No leader, should ask leader first"))] @@ -168,15 +170,15 @@ impl From for Error { .and_then(|v| String::from_utf8(v.as_bytes().to_vec()).ok()) } - let code = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_CODE) - .and_then(|s| { - if let Ok(code) = s.parse::() { - StatusCode::from_u32(code) - } else { - None - } - }) - .unwrap_or(StatusCode::Internal); + let code = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_CODE).and_then(|s| { + if let Ok(code) = s.parse::() { + StatusCode::from_u32(code) + } else { + None + } + }); + let tonic_code = e.code(); + let code = code.unwrap_or_else(|| convert_tonic_code_to_status_code(tonic_code)); let msg = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_MSG) .unwrap_or_else(|| e.message().to_string()); @@ -184,7 +186,8 @@ impl From for Error { Self::MetaServer { code, msg, - tonic_code: e.code(), + tonic_code, + location: location!(), } } } diff --git a/src/servers/src/error.rs b/src/servers/src/error.rs index b850a4d938..dd3516d890 100644 --- a/src/servers/src/error.rs +++ b/src/servers/src/error.rs @@ -772,9 +772,14 @@ impl IntoResponse for Error { } } +/// Converts [StatusCode] to [HttpStatusCode]. pub fn status_code_to_http_status(status_code: &StatusCode) -> HttpStatusCode { match status_code { - StatusCode::Success | StatusCode::Cancelled => HttpStatusCode::OK, + StatusCode::Success => HttpStatusCode::OK, + + // When a request is cancelled by the client (e.g., by a client side timeout), + // we should return a gateway timeout status code to the external client. + StatusCode::Cancelled | StatusCode::DeadlineExceeded => HttpStatusCode::GATEWAY_TIMEOUT, StatusCode::Unsupported | StatusCode::InvalidArguments diff --git a/src/servers/src/mysql/writer.rs b/src/servers/src/mysql/writer.rs index 39a424a53f..ef62ef70cf 100644 --- a/src/servers/src/mysql/writer.rs +++ b/src/servers/src/mysql/writer.rs @@ -334,7 +334,7 @@ fn mysql_error_kind(status_code: &StatusCode) -> ErrorKind { StatusCode::Success => ErrorKind::ER_YES, StatusCode::Unknown | StatusCode::External => ErrorKind::ER_UNKNOWN_ERROR, StatusCode::Unsupported => ErrorKind::ER_NOT_SUPPORTED_YET, - StatusCode::Cancelled => ErrorKind::ER_QUERY_INTERRUPTED, + StatusCode::Cancelled | StatusCode::DeadlineExceeded => ErrorKind::ER_QUERY_INTERRUPTED, StatusCode::RuntimeResourcesExhausted => ErrorKind::ER_OUT_OF_RESOURCES, StatusCode::InvalidSyntax => ErrorKind::ER_SYNTAX_ERROR, StatusCode::RegionAlreadyExists | StatusCode::TableAlreadyExists => { diff --git a/src/servers/src/postgres/types/error.rs b/src/servers/src/postgres/types/error.rs index 033f4f6b87..e555fe6e7c 100644 --- a/src/servers/src/postgres/types/error.rs +++ b/src/servers/src/postgres/types/error.rs @@ -374,6 +374,7 @@ impl From for PgErrorCode { StatusCode::Unsupported => PgErrorCode::Ec0A000, StatusCode::InvalidArguments => PgErrorCode::Ec22023, StatusCode::Cancelled => PgErrorCode::Ec57000, + StatusCode::DeadlineExceeded => PgErrorCode::Ec57000, StatusCode::External => PgErrorCode::Ec58000, StatusCode::Unknown