Compare commits

...

5 Commits

Author SHA1 Message Date
Joonas Koivunen
91b6ac2043 allow pushing fake SystemTimes for refresh_gc_info
this should theoretically make the situation reproducable by first:

0. configure local pageserver to never run gc or compaction, allow
   access to tenant via local storage
1. `curl -X POST localhost:9898/v1/tenant/$tid/attach`
2. `curl -X PUT --data "{ \"tenant_id\": \"$tid\", \"gc_horizon\": 87772208 }" --header 'content-type: application/json' localhost:9898/v1/tenant/config`
3. `curl -X POST localhost:9898/add_forced_now?now=2023-01-24T04:58:17.319972Z`
4. `curl -X POST localhost:9898/add_forced_now?now=2023-01-24T04:59:32.436Z`
5. `curl -X PUT --data '{}' --header 'content-type: application/json' localhost:9898/v1/tenant/$tid/timeline/$ttid/do_gc`
    - this now uses now from step 3
6. `curl localhost:9898/v1/tenant/$tid/size`
    - this now uses now from step 4

this comes quite close, but doesn't end up downloading the same file.
2023-01-25 18:52:42 +02:00
Joonas Koivunen
8bd70a3d30 log Arc comparisons (has not been useful) 2023-01-25 18:52:42 +02:00
Joonas Koivunen
eb2b8ab3b4 temp: propagate span
there's a separate PR for this.
2023-01-25 18:52:42 +02:00
Joonas Koivunen
5bdf6ef378 temp: silence nagging bg task log output 2023-01-25 18:52:42 +02:00
Joonas Koivunen
c8367b1ea5 temp: allow writing config for attached tenant 2023-01-25 18:44:52 +02:00
5 changed files with 69 additions and 7 deletions

View File

@@ -916,6 +916,7 @@ pub fn make_router(
"/v1/tenant/:tenant_id/timeline/:timeline_id/download_remote_layers",
timeline_download_remote_layers_handler_post,
)
.post("/add_forced_now", handle_add_forced_now)
.get(
"/v1/tenant/:tenant_id/timeline/:timeline_id/download_remote_layers",
timeline_download_remote_layers_handler_get,
@@ -926,3 +927,14 @@ pub fn make_router(
)
.any(handler_404))
}
async fn handle_add_forced_now(req: Request<Body>) -> Result<Response<Body>, ApiError> {
let now = get_query_param(&req, "now")?;
let now = chrono::DateTime::parse_from_rfc3339(&now).unwrap();
let now = now.with_timezone(&chrono::Utc);
crate::tenant::timeline::Timeline::force_next_now(now.into());
json_response(StatusCode::OK, ())
}

View File

@@ -90,7 +90,7 @@ pub mod mgr;
pub mod tasks;
pub mod upload_queue;
mod timeline;
pub mod timeline;
pub mod size;
@@ -1797,9 +1797,12 @@ impl Tenant {
let mut target_config_file = VirtualFile::open_with_options(
target_config_path,
OpenOptions::new()
.truncate(true) // This needed for overwriting with small config files
// This needed for overwriting with small config files
.truncate(true)
.write(true)
.create_new(first_save),
.create_new(first_save)
// this will be ignored if create_new(true)
.create(true),
)?;
target_config_file

View File

@@ -202,6 +202,13 @@ impl<T: ?Sized> PartialEq for LayerRTreeObject<T> {
// references. Clippy complains about this. In practice it
// seems to work, the assertion below would be triggered
// otherwise but this ought to be fixed.
{
let left = Arc::as_ptr(&self.layer);
let right = Arc::as_ptr(&other.layer);
tracing::info!(?left, ?right, "comparing ptr_eq");
}
#[allow(clippy::vtable_address_comparisons)]
Arc::ptr_eq(&self.layer, &other.layer)
}

View File

@@ -52,6 +52,8 @@ async fn compaction_loop(tenant_id: TenantId) {
info!("starting");
TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
async {
let mut first = true;
loop {
trace!("waking up");
@@ -68,10 +70,14 @@ async fn compaction_loop(tenant_id: TenantId) {
let mut sleep_duration = tenant.get_compaction_period();
if sleep_duration == Duration::ZERO {
info!("automatic compaction is disabled");
if first {
info!("automatic compaction is disabled");
}
first = false;
// check again in 10 seconds, in case it's been enabled again.
sleep_duration = Duration::from_secs(10);
} else {
first = true;
// Run compaction
if let Err(e) = tenant.compaction_iteration().await {
sleep_duration = wait_duration;
@@ -103,6 +109,7 @@ async fn gc_loop(tenant_id: TenantId) {
info!("starting");
TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
async {
let mut first = true;
loop {
trace!("waking up");
@@ -121,10 +128,14 @@ async fn gc_loop(tenant_id: TenantId) {
let gc_horizon = tenant.get_gc_horizon();
let mut sleep_duration = gc_period;
if sleep_duration == Duration::ZERO {
info!("automatic GC is disabled");
if first {
info!("automatic GC is disabled");
}
first = false;
// check again in 10 seconds, in case it's been enabled again.
sleep_duration = Duration::from_secs(10);
} else {
first = true;
// Run gc
if gc_horizon > 0 {
if let Err(e) = tenant.gc_iteration(None, gc_horizon, tenant.get_pitr_interval()).await

View File

@@ -15,7 +15,7 @@ use tokio_util::sync::CancellationToken;
use tracing::*;
use std::cmp::{max, min, Ordering};
use std::collections::HashMap;
use std::collections::{HashMap, VecDeque};
use std::fs;
use std::ops::{Deref, Range};
use std::path::{Path, PathBuf};
@@ -75,6 +75,9 @@ enum FlushLoopState {
Exited,
}
pub static PENDING_NOWS: once_cell::sync::Lazy<Mutex<VecDeque<SystemTime>>> =
once_cell::sync::Lazy::new(|| Default::default());
pub struct Timeline {
conf: &'static PageServerConf,
tenant_conf: Arc<RwLock<TenantConfOpt>>,
@@ -1370,9 +1373,13 @@ impl Timeline {
let self_calculation = Arc::clone(self);
let cancel = CancellationToken::new();
let blocking_span = tracing::info_span!("blocking");
let calculation = async {
let cancel = cancel.child_token();
tokio::task::spawn_blocking(move || {
// spans cannot be automatically moved/hoisted to spawn_blocking, do that manually
let _entered = blocking_span.entered();
// Run in a separate thread since this can do a lot of
// synchronous file IO without .await inbetween
// if there are no RemoteLayers that would require downloading.
@@ -2623,6 +2630,10 @@ impl Timeline {
Ok(())
}
pub fn force_next_now(next: SystemTime) {
PENDING_NOWS.lock().unwrap().push_back(next)
}
/// Update information about which layer files need to be retained on
/// garbage collection. This is separate from actually performing the GC,
/// and is updated more frequently, so that compaction can remove obsolete
@@ -2670,10 +2681,28 @@ impl Timeline {
// work, so avoid calling it altogether if time-based retention is not
// configured. It would be pointless anyway.
let pitr_cutoff = if pitr != Duration::ZERO {
let now = SystemTime::now();
let now = PENDING_NOWS.lock().unwrap().pop_front();
let now = if let Some(now) = now {
let dt = chrono::DateTime::<chrono::Utc>::from(now);
let dt = dt.to_rfc3339_opts(chrono::SecondsFormat::Micros, true);
tracing::warn!(now = dt, "using forced now");
now
} else {
SystemTime::now()
};
if let Some(pitr_cutoff_timestamp) = now.checked_sub(pitr) {
let pitr_timestamp = to_pg_timestamp(pitr_cutoff_timestamp);
{
let dt = chrono::DateTime::<chrono::Utc>::from(now);
let dt = dt.to_rfc3339_opts(chrono::SecondsFormat::Micros, true);
info!(
?pitr,
pitr_cutoff_timestamp = dt,
"searching lsn for timestamp"
);
}
match self.find_lsn_for_timestamp(pitr_timestamp).await? {
LsnForTimestamp::Present(lsn) => lsn,
LsnForTimestamp::Future(lsn) => {