From ec6d3422a5a7b6f537b029d7c3e70b7a60f99e0c Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Fri, 18 Oct 2024 13:38:59 +0100 Subject: [PATCH] pageserver: disconnect when asking client to reconnect (#9390) ## Problem Consider the following sequence of events: 1. Shard location gets downgraded to secondary while there's a libpq connection in pagestream mode from the compute 2. There's no active tenant, so we return `QueryError::Reconnect` from `PageServerHandler::handle_get_page_at_lsn_request`. 3. Error bubbles up to `PostgresBackendIO::process_message`, bailing us out of pagestream mode. 4. We instruct the client to reconnnect, but continue serving the libpq connection. The client isn't yet aware of the request to reconnect and believes it is still in pagestream mode. Pageserver fails to deserialize get page requests wrapped in `CopyData` since it's not in pagestream mode. ## Summary of Changes When we wish to instruct the client to reconnect, also disconnect from the server side after flushing the error. Closes https://github.com/neondatabase/cloud/issues/17336 --- libs/postgres_backend/src/lib.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/libs/postgres_backend/src/lib.rs b/libs/postgres_backend/src/lib.rs index 9d274b25e6..7419798a60 100644 --- a/libs/postgres_backend/src/lib.rs +++ b/libs/postgres_backend/src/lib.rs @@ -738,6 +738,20 @@ impl PostgresBackend { QueryError::SimulatedConnectionError => { return Err(QueryError::SimulatedConnectionError) } + err @ QueryError::Reconnect => { + // Instruct the client to reconnect, stop processing messages + // from this libpq connection and, finally, disconnect from the + // server side (returning an Err achieves the later). + // + // Note the flushing is done by the caller. + let reconnect_error = short_error(&err); + self.write_message_noflush(&BeMessage::ErrorResponse( + &reconnect_error, + Some(err.pg_error_code()), + ))?; + + return Err(err); + } e => { log_query_error(query_string, &e); let short_error = short_error(&e);