[PRE-MERGE] hlinnaka/walingest-simplify-vm-flag-clearing-2

Squashed commit of the following:

commit b1d701dc06
Author: Heikki Linnakangas <heikki@neon.tech>
Date:   Thu Jan 4 18:23:59 2024 +0200

    Refactor generation of ClearVisibilityMapFlags records.

    With fewer mutable variables, for sake of clarity.

commit aa95a07d27
Author: Heikki Linnakangas <heikki@neon.tech>
Date:   Thu Jan 4 18:20:03 2024 +0200

    Refactor code to apply ClearVisibilityMapFlags records a little.

    To reduce the repetition.

commit 18e9208158
Author: John Spray <john@neon.tech>
Date:   Thu Jan 4 10:40:03 2024 +0000

    pageserver: improved error handling for shard routing error, timeline not found (#6262)

    ## Problem

    - When a client requests a key that isn't found in any shard on the node
    (edge case that only happens if a compute's config is out of date), we
    should prompt them to reconnect (as this includes a backoff), since they
    will not be able to complete the request until they eventually get a
    correct pageserver connection string.
    - QueryError::Other is used excessively: this contains a type-ambiguous
    anyhow::Error and is logged very verbosely (including backtrace).

    ## Summary of changes

    - Introduce PageStreamError to replace use of anyhow::Error in request
    handlers for getpage, etc.
    - Introduce Reconnect and NotFound variants to QueryError
    - Map the "shard routing error" case to PageStreamError::Reconnect ->
    QueryError::Reconnect
    - Update type conversions for LSN timeouts and tenant/timeline not found
    errors to use PageStreamError::NotFound->QueryError::NotFound
This commit is contained in:
Christian Schwarz
2024-01-04 17:08:24 +00:00
parent c029203d47
commit 4a7ea3f533
10 changed files with 320 additions and 182 deletions

View File

@@ -35,6 +35,12 @@ pub enum QueryError {
/// We were instructed to shutdown while processing the query
#[error("Shutting down")]
Shutdown,
/// Query handler indicated that client should reconnect
#[error("Server requested reconnect")]
Reconnect,
/// Query named an entity that was not found
#[error("Not found: {0}")]
NotFound(std::borrow::Cow<'static, str>),
/// Authentication failure
#[error("Unauthorized: {0}")]
Unauthorized(std::borrow::Cow<'static, str>),
@@ -54,9 +60,9 @@ impl From<io::Error> for QueryError {
impl QueryError {
pub fn pg_error_code(&self) -> &'static [u8; 5] {
match self {
Self::Disconnected(_) | Self::SimulatedConnectionError => b"08006", // connection failure
Self::Disconnected(_) | Self::SimulatedConnectionError | Self::Reconnect => b"08006", // connection failure
Self::Shutdown => SQLSTATE_ADMIN_SHUTDOWN,
Self::Unauthorized(_) => SQLSTATE_INTERNAL_ERROR,
Self::Unauthorized(_) | Self::NotFound(_) => SQLSTATE_INTERNAL_ERROR,
Self::Other(_) => SQLSTATE_INTERNAL_ERROR, // internal error
}
}
@@ -425,6 +431,11 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
info!("Stopped due to shutdown");
Ok(())
}
Err(QueryError::Reconnect) => {
// Dropping out of this loop implicitly disconnects
info!("Stopped due to handler reconnect request");
Ok(())
}
Err(QueryError::Disconnected(e)) => {
info!("Disconnected ({e:#})");
// Disconnection is not an error: we just use it that way internally to drop
@@ -974,7 +985,9 @@ impl<'a, IO: AsyncRead + AsyncWrite + Unpin> AsyncWrite for CopyDataWriter<'a, I
pub fn short_error(e: &QueryError) -> String {
match e {
QueryError::Disconnected(connection_error) => connection_error.to_string(),
QueryError::Reconnect => "reconnect".to_string(),
QueryError::Shutdown => "shutdown".to_string(),
QueryError::NotFound(_) => "not found".to_string(),
QueryError::Unauthorized(_e) => "JWT authentication error".to_string(),
QueryError::SimulatedConnectionError => "simulated connection error".to_string(),
QueryError::Other(e) => format!("{e:#}"),
@@ -996,9 +1009,15 @@ fn log_query_error(query: &str, e: &QueryError) {
QueryError::SimulatedConnectionError => {
error!("query handler for query '{query}' failed due to a simulated connection error")
}
QueryError::Reconnect => {
info!("query handler for '{query}' requested client to reconnect")
}
QueryError::Shutdown => {
info!("query handler for '{query}' cancelled during tenant shutdown")
}
QueryError::NotFound(reason) => {
info!("query handler for '{query}' entity not found: {reason}")
}
QueryError::Unauthorized(e) => {
warn!("query handler for '{query}' failed with authentication error: {e}");
}