mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-15 09:22:55 +00:00
WIP: fix: logical size limit is broken during PS restart
fixes https://github.com/neondatabase/neon/issues/5963 On top of https://github.com/neondatabase/neon/pull/6000 Will ship this in a release after #600
This commit is contained in:
@@ -1738,34 +1738,18 @@ impl Timeline {
|
||||
) -> logical_size::CurrentLogicalSize {
|
||||
let current_size = self.current_logical_size.current_size();
|
||||
debug!("Current size: {current_size:?}");
|
||||
|
||||
match (current_size.accuracy(), priority) {
|
||||
(logical_size::Accuracy::Exact, _) => (), // nothing to do
|
||||
(logical_size::Accuracy::Approximate, GetLogicalSizePriority::Background) => {
|
||||
// background task will eventually deliver an exact value, we're in no rush
|
||||
}
|
||||
(logical_size::Accuracy::Approximate, GetLogicalSizePriority::User) => {
|
||||
// background task is not ready, but user is asking for it now;
|
||||
// => make the background task skip the line
|
||||
// (The alternative would be to calculate the size here, but,
|
||||
// it can actually take a long time if the user has a lot of rels.
|
||||
// And we'll inevitable need it again; So, let the background task do the work.)
|
||||
match self
|
||||
.current_logical_size
|
||||
.cancel_wait_for_background_loop_concurrency_limit_semaphore
|
||||
.get()
|
||||
{
|
||||
Some(cancel) => cancel.cancel(),
|
||||
None => {
|
||||
warn!("unexpected: priority_tx not set, logical size calculation will not be prioritized");
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
current_size
|
||||
}
|
||||
|
||||
// if it's not already computed, it computes it _now_
|
||||
pub(crate) async fn get_current_logical_size_wait_exact(
|
||||
self: &Arc<Self>,
|
||||
) -> Result<logical_size::Exact, TimelineCancelled | CalculationError> {
|
||||
self.current_logical_size.initial_logical_size.get_or_try_init(async {
|
||||
// do calcualtion here
|
||||
})
|
||||
}
|
||||
|
||||
fn spawn_initial_logical_size_computation_task(self: &Arc<Self>, ctx: &RequestContext) {
|
||||
let Some(initial_part_end) = self.current_logical_size.initial_part_end else {
|
||||
// nothing to do for freshly created timelines;
|
||||
@@ -1832,31 +1816,9 @@ impl Timeline {
|
||||
&cancel,
|
||||
);
|
||||
|
||||
use crate::metrics::initial_logical_size::StartCircumstances;
|
||||
let (_maybe_permit, circumstances) = tokio::select! {
|
||||
res = wait_for_permit => {
|
||||
match res {
|
||||
Ok(permit) => (Some(permit), StartCircumstances::AfterBackgroundTasksRateLimit),
|
||||
Err(RateLimitError::Cancelled) => {
|
||||
return Err(BackgroundCalculationError::Cancelled);
|
||||
}
|
||||
}
|
||||
}
|
||||
() = skip_concurrency_limiter.cancelled() => {
|
||||
// Some action that is part of a end user interaction requested logical size
|
||||
// => break out of the rate limit
|
||||
// TODO: ideally we'd not run on BackgroundRuntime but the requester's runtime;
|
||||
// but then again what happens if they cancel; also, we should just be using
|
||||
// one runtime across the entire process, so, let's leave this for now.
|
||||
(None, StartCircumstances::SkippedConcurrencyLimiter)
|
||||
}
|
||||
};
|
||||
|
||||
let metrics_guard = if attempt == 1 {
|
||||
crate::metrics::initial_logical_size::START_CALCULATION.first(circumstances)
|
||||
} else {
|
||||
crate::metrics::initial_logical_size::START_CALCULATION.retry(circumstances)
|
||||
};
|
||||
self.current_logical_size.initial_logical_size.get_or_init(async {
|
||||
// do calcualtion here
|
||||
});
|
||||
|
||||
match self_ref
|
||||
.logical_size_calculation_task(
|
||||
|
||||
@@ -22,7 +22,7 @@ pub(super) struct LogicalSize {
|
||||
///
|
||||
/// NOTE: size at a given LSN is constant, but after a restart we will calculate
|
||||
/// the initial size at a different LSN.
|
||||
pub initial_logical_size: OnceCell<(
|
||||
pub initial_logical_size: tokio::sync::OnceCell<(
|
||||
u64,
|
||||
crate::metrics::initial_logical_size::FinishedCalculationGuard,
|
||||
)>,
|
||||
|
||||
@@ -206,6 +206,10 @@ pub(super) async fn connection_manager_loop_step(
|
||||
|
||||
if let Some(new_candidate) = connection_manager_state.next_connection_candidate() {
|
||||
info!("Switching to new connection candidate: {new_candidate:?}");
|
||||
tokio::select! {
|
||||
logical_size = connection_manager_state.timeline.get_current_logical_size_wait_exact().await,
|
||||
_ = connection_manager.should_shutdown(),
|
||||
}
|
||||
connection_manager_state
|
||||
.change_connection(new_candidate, ctx)
|
||||
.await
|
||||
|
||||
Reference in New Issue
Block a user