allow pushing fake SystemTimes for refresh_gc_info

this should theoretically make the situation reproducable by first:

0. configure local pageserver to never run gc or compaction, allow
   access to tenant via local storage
1. `curl -X POST localhost:9898/v1/tenant/$tid/attach`
2. `curl -X PUT --data "{ \"tenant_id\": \"$tid\", \"gc_horizon\": 87772208 }" --header 'content-type: application/json' localhost:9898/v1/tenant/config`
3. `curl -X POST localhost:9898/add_forced_now?now=2023-01-24T04:58:17.319972Z`
4. `curl -X POST localhost:9898/add_forced_now?now=2023-01-24T04:59:32.436Z`
5. `curl -X PUT --data '{}' --header 'content-type: application/json' localhost:9898/v1/tenant/$tid/timeline/$ttid/do_gc`
    - this now uses now from step 3
6. `curl localhost:9898/v1/tenant/$tid/size`
    - this now uses now from step 4

this comes quite close, but doesn't end up downloading the same file.
This commit is contained in:
Joonas Koivunen
2023-01-25 18:46:44 +02:00
parent 8bd70a3d30
commit 91b6ac2043
3 changed files with 40 additions and 3 deletions

View File

@@ -916,6 +916,7 @@ pub fn make_router(
"/v1/tenant/:tenant_id/timeline/:timeline_id/download_remote_layers",
timeline_download_remote_layers_handler_post,
)
.post("/add_forced_now", handle_add_forced_now)
.get(
"/v1/tenant/:tenant_id/timeline/:timeline_id/download_remote_layers",
timeline_download_remote_layers_handler_get,
@@ -926,3 +927,14 @@ pub fn make_router(
)
.any(handler_404))
}
async fn handle_add_forced_now(req: Request<Body>) -> Result<Response<Body>, ApiError> {
let now = get_query_param(&req, "now")?;
let now = chrono::DateTime::parse_from_rfc3339(&now).unwrap();
let now = now.with_timezone(&chrono::Utc);
crate::tenant::timeline::Timeline::force_next_now(now.into());
json_response(StatusCode::OK, ())
}

View File

@@ -90,7 +90,7 @@ pub mod mgr;
pub mod tasks;
pub mod upload_queue;
mod timeline;
pub mod timeline;
pub mod size;

View File

@@ -15,7 +15,7 @@ use tokio_util::sync::CancellationToken;
use tracing::*;
use std::cmp::{max, min, Ordering};
use std::collections::HashMap;
use std::collections::{HashMap, VecDeque};
use std::fs;
use std::ops::{Deref, Range};
use std::path::{Path, PathBuf};
@@ -75,6 +75,9 @@ enum FlushLoopState {
Exited,
}
pub static PENDING_NOWS: once_cell::sync::Lazy<Mutex<VecDeque<SystemTime>>> =
once_cell::sync::Lazy::new(|| Default::default());
pub struct Timeline {
conf: &'static PageServerConf,
tenant_conf: Arc<RwLock<TenantConfOpt>>,
@@ -2627,6 +2630,10 @@ impl Timeline {
Ok(())
}
pub fn force_next_now(next: SystemTime) {
PENDING_NOWS.lock().unwrap().push_back(next)
}
/// Update information about which layer files need to be retained on
/// garbage collection. This is separate from actually performing the GC,
/// and is updated more frequently, so that compaction can remove obsolete
@@ -2674,10 +2681,28 @@ impl Timeline {
// work, so avoid calling it altogether if time-based retention is not
// configured. It would be pointless anyway.
let pitr_cutoff = if pitr != Duration::ZERO {
let now = SystemTime::now();
let now = PENDING_NOWS.lock().unwrap().pop_front();
let now = if let Some(now) = now {
let dt = chrono::DateTime::<chrono::Utc>::from(now);
let dt = dt.to_rfc3339_opts(chrono::SecondsFormat::Micros, true);
tracing::warn!(now = dt, "using forced now");
now
} else {
SystemTime::now()
};
if let Some(pitr_cutoff_timestamp) = now.checked_sub(pitr) {
let pitr_timestamp = to_pg_timestamp(pitr_cutoff_timestamp);
{
let dt = chrono::DateTime::<chrono::Utc>::from(now);
let dt = dt.to_rfc3339_opts(chrono::SecondsFormat::Micros, true);
info!(
?pitr,
pitr_cutoff_timestamp = dt,
"searching lsn for timestamp"
);
}
match self.find_lsn_for_timestamp(pitr_timestamp).await? {
LsnForTimestamp::Present(lsn) => lsn,
LsnForTimestamp::Future(lsn) => {