mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-27 10:00:38 +00:00
Merge branch 'main' into yuchen/virtual-file-config
This commit is contained in:
@@ -575,7 +575,7 @@ fn start_pageserver(
|
||||
.build()
|
||||
.map_err(|err| anyhow!(err))?;
|
||||
let service = utils::http::RouterService::new(router).unwrap();
|
||||
let server = hyper::Server::from_tcp(http_listener)?
|
||||
let server = hyper0::Server::from_tcp(http_listener)?
|
||||
.serve(service)
|
||||
.with_graceful_shutdown({
|
||||
let cancel = cancel.clone();
|
||||
|
||||
@@ -1743,6 +1743,10 @@ async fn timeline_compact_handler(
|
||||
let state = get_state(&request);
|
||||
|
||||
let mut flags = EnumSet::empty();
|
||||
|
||||
if Some(true) == parse_query_param::<_, bool>(&request, "force_l0_compaction")? {
|
||||
flags |= CompactFlags::ForceL0Compaction;
|
||||
}
|
||||
if Some(true) == parse_query_param::<_, bool>(&request, "force_repartition")? {
|
||||
flags |= CompactFlags::ForceRepartition;
|
||||
}
|
||||
@@ -1789,6 +1793,9 @@ async fn timeline_checkpoint_handler(
|
||||
let state = get_state(&request);
|
||||
|
||||
let mut flags = EnumSet::empty();
|
||||
if Some(true) == parse_query_param::<_, bool>(&request, "force_l0_compaction")? {
|
||||
flags |= CompactFlags::ForceL0Compaction;
|
||||
}
|
||||
if Some(true) == parse_query_param::<_, bool>(&request, "force_repartition")? {
|
||||
flags |= CompactFlags::ForceRepartition;
|
||||
}
|
||||
|
||||
@@ -13,6 +13,8 @@ pub mod http;
|
||||
pub mod import_datadir;
|
||||
pub mod l0_flush;
|
||||
|
||||
extern crate hyper0 as hyper;
|
||||
|
||||
use futures::{stream::FuturesUnordered, StreamExt};
|
||||
pub use pageserver_api::keyspace;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
@@ -97,6 +97,7 @@ use crate::tenant::remote_timeline_client::MaybeDeletedIndexPart;
|
||||
use crate::tenant::remote_timeline_client::INITDB_PATH;
|
||||
use crate::tenant::storage_layer::DeltaLayer;
|
||||
use crate::tenant::storage_layer::ImageLayer;
|
||||
use crate::walingest::WalLagCooldown;
|
||||
use crate::walredo;
|
||||
use crate::InitializationOrder;
|
||||
use std::collections::hash_map::Entry;
|
||||
@@ -319,6 +320,9 @@ pub struct Tenant {
|
||||
/// background warmup.
|
||||
pub(crate) activate_now_sem: tokio::sync::Semaphore,
|
||||
|
||||
/// Time it took for the tenant to activate. Zero if not active yet.
|
||||
attach_wal_lag_cooldown: Arc<std::sync::OnceLock<WalLagCooldown>>,
|
||||
|
||||
// Cancellation token fires when we have entered shutdown(). This is a parent of
|
||||
// Timelines' cancellation token.
|
||||
pub(crate) cancel: CancellationToken,
|
||||
@@ -1000,11 +1004,15 @@ impl Tenant {
|
||||
// Remote preload is complete.
|
||||
drop(remote_load_completion);
|
||||
|
||||
|
||||
// We will time the duration of the attach phase unless this is a creation (attach will do no work)
|
||||
let attach_start = std::time::Instant::now();
|
||||
let attached = {
|
||||
let _attach_timer = Some(TENANT.attach.start_timer());
|
||||
tenant_clone.attach(preload, &ctx).await
|
||||
};
|
||||
let attach_duration = attach_start.elapsed();
|
||||
_ = tenant_clone.attach_wal_lag_cooldown.set(WalLagCooldown::new(attach_start, attach_duration));
|
||||
|
||||
match attached {
|
||||
Ok(()) => {
|
||||
@@ -2754,6 +2762,7 @@ impl Tenant {
|
||||
pg_version,
|
||||
state,
|
||||
last_aux_file_policy,
|
||||
self.attach_wal_lag_cooldown.clone(),
|
||||
self.cancel.child_token(),
|
||||
);
|
||||
|
||||
@@ -2860,6 +2869,7 @@ impl Tenant {
|
||||
Some(Duration::from_secs(3600 * 24)),
|
||||
)),
|
||||
activate_now_sem: tokio::sync::Semaphore::new(0),
|
||||
attach_wal_lag_cooldown: Arc::new(std::sync::OnceLock::new()),
|
||||
cancel: CancellationToken::default(),
|
||||
gate: Gate::default(),
|
||||
timeline_get_throttle: Arc::new(throttle::Throttle::new(
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
//! We cannot use global or default config instead, because wrong settings
|
||||
//! may lead to a data loss.
|
||||
//!
|
||||
use anyhow::bail;
|
||||
pub(crate) use pageserver_api::config::TenantConfigToml as TenantConf;
|
||||
use pageserver_api::models::AuxFilePolicy;
|
||||
use pageserver_api::models::CompactionAlgorithmSettings;
|
||||
@@ -441,29 +440,6 @@ impl TryFrom<&'_ models::TenantConfig> for TenantConfOpt {
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<toml_edit::Item> for TenantConfOpt {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(item: toml_edit::Item) -> Result<Self, Self::Error> {
|
||||
match item {
|
||||
toml_edit::Item::Value(value) => {
|
||||
let d = value.into_deserializer();
|
||||
return serde_path_to_error::deserialize(d)
|
||||
.map_err(|e| anyhow::anyhow!("{}: {}", e.path(), e.inner().message()));
|
||||
}
|
||||
toml_edit::Item::Table(table) => {
|
||||
let deserializer =
|
||||
toml_edit::de::Deserializer::from(toml_edit::DocumentMut::from(table));
|
||||
return serde_path_to_error::deserialize(deserializer)
|
||||
.map_err(|e| anyhow::anyhow!("{}: {}", e.path(), e.inner().message()));
|
||||
}
|
||||
_ => {
|
||||
bail!("expected non-inline table but found {item}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This is a conversion from our internal tenant config object to the one used
|
||||
/// in external APIs.
|
||||
impl From<TenantConfOpt> for models::TenantConfig {
|
||||
|
||||
@@ -27,7 +27,7 @@ use crate::tenant::Generation;
|
||||
use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
|
||||
use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile};
|
||||
use crate::TEMP_FILE_SUFFIX;
|
||||
use remote_storage::{DownloadError, GenericRemoteStorage, ListingMode, RemotePath};
|
||||
use remote_storage::{DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath};
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::pausable_failpoint;
|
||||
@@ -153,7 +153,9 @@ async fn download_object<'a>(
|
||||
.with_context(|| format!("create a destination file for layer '{dst_path}'"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
let download = storage.download(src_path, cancel).await?;
|
||||
let download = storage
|
||||
.download(src_path, &DownloadOpts::default(), cancel)
|
||||
.await?;
|
||||
|
||||
pausable_failpoint!("before-downloading-layer-stream-pausable");
|
||||
|
||||
@@ -204,7 +206,9 @@ async fn download_object<'a>(
|
||||
.with_context(|| format!("create a destination file for layer '{dst_path}'"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
let mut download = storage.download(src_path, cancel).await?;
|
||||
let mut download = storage
|
||||
.download(src_path, &DownloadOpts::default(), cancel)
|
||||
.await?;
|
||||
|
||||
pausable_failpoint!("before-downloading-layer-stream-pausable");
|
||||
|
||||
@@ -344,7 +348,9 @@ async fn do_download_index_part(
|
||||
|
||||
let index_part_bytes = download_retry_forever(
|
||||
|| async {
|
||||
let download = storage.download(&remote_path, cancel).await?;
|
||||
let download = storage
|
||||
.download(&remote_path, &DownloadOpts::default(), cancel)
|
||||
.await?;
|
||||
|
||||
let mut bytes = Vec::new();
|
||||
|
||||
@@ -526,10 +532,15 @@ pub(crate) async fn download_initdb_tar_zst(
|
||||
.with_context(|| format!("tempfile creation {temp_path}"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
let download = match storage.download(&remote_path, cancel).await {
|
||||
let download = match storage
|
||||
.download(&remote_path, &DownloadOpts::default(), cancel)
|
||||
.await
|
||||
{
|
||||
Ok(dl) => dl,
|
||||
Err(DownloadError::NotFound) => {
|
||||
storage.download(&remote_preserved_path, cancel).await?
|
||||
storage
|
||||
.download(&remote_preserved_path, &DownloadOpts::default(), cancel)
|
||||
.await?
|
||||
}
|
||||
Err(other) => Err(other)?,
|
||||
};
|
||||
|
||||
@@ -49,7 +49,7 @@ use futures::Future;
|
||||
use metrics::UIntGauge;
|
||||
use pageserver_api::models::SecondaryProgress;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use remote_storage::{DownloadError, Etag, GenericRemoteStorage};
|
||||
use remote_storage::{DownloadError, DownloadOpts, Etag, GenericRemoteStorage};
|
||||
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{info_span, instrument, warn, Instrument};
|
||||
@@ -944,36 +944,34 @@ impl<'a> TenantDownloader<'a> {
|
||||
) -> Result<HeatMapDownload, UpdateError> {
|
||||
debug_assert_current_span_has_tenant_id();
|
||||
let tenant_shard_id = self.secondary_state.get_tenant_shard_id();
|
||||
// TODO: pull up etag check into the request, to do a conditional GET rather than
|
||||
// issuing a GET and then maybe ignoring the response body
|
||||
// (https://github.com/neondatabase/neon/issues/6199)
|
||||
tracing::debug!("Downloading heatmap for secondary tenant",);
|
||||
|
||||
let heatmap_path = remote_heatmap_path(tenant_shard_id);
|
||||
let cancel = &self.secondary_state.cancel;
|
||||
let opts = DownloadOpts {
|
||||
etag: prev_etag.cloned(),
|
||||
};
|
||||
|
||||
backoff::retry(
|
||||
|| async {
|
||||
let download = self
|
||||
let download = match self
|
||||
.remote_storage
|
||||
.download(&heatmap_path, cancel)
|
||||
.download(&heatmap_path, &opts, cancel)
|
||||
.await
|
||||
.map_err(UpdateError::from)?;
|
||||
{
|
||||
Ok(download) => download,
|
||||
Err(DownloadError::Unmodified) => return Ok(HeatMapDownload::Unmodified),
|
||||
Err(err) => return Err(err.into()),
|
||||
};
|
||||
|
||||
SECONDARY_MODE.download_heatmap.inc();
|
||||
|
||||
if Some(&download.etag) == prev_etag {
|
||||
Ok(HeatMapDownload::Unmodified)
|
||||
} else {
|
||||
let mut heatmap_bytes = Vec::new();
|
||||
let mut body = tokio_util::io::StreamReader::new(download.download_stream);
|
||||
let _size = tokio::io::copy_buf(&mut body, &mut heatmap_bytes).await?;
|
||||
Ok(HeatMapDownload::Modified(HeatMapModified {
|
||||
etag: download.etag,
|
||||
last_modified: download.last_modified,
|
||||
bytes: heatmap_bytes,
|
||||
}))
|
||||
}
|
||||
let mut heatmap_bytes = Vec::new();
|
||||
let mut body = tokio_util::io::StreamReader::new(download.download_stream);
|
||||
let _size = tokio::io::copy_buf(&mut body, &mut heatmap_bytes).await?;
|
||||
Ok(HeatMapDownload::Modified(HeatMapModified {
|
||||
etag: download.etag,
|
||||
last_modified: download.last_modified,
|
||||
bytes: heatmap_bytes,
|
||||
}))
|
||||
},
|
||||
|e| matches!(e, UpdateError::NoData | UpdateError::Cancelled),
|
||||
FAILED_DOWNLOAD_WARN_THRESHOLD,
|
||||
@@ -984,6 +982,7 @@ impl<'a> TenantDownloader<'a> {
|
||||
.await
|
||||
.ok_or_else(|| UpdateError::Cancelled)
|
||||
.and_then(|x| x)
|
||||
.inspect(|_| SECONDARY_MODE.download_heatmap.inc())
|
||||
}
|
||||
|
||||
/// Download heatmap layers that are not present on local disk, or update their
|
||||
|
||||
@@ -53,6 +53,7 @@ use camino::{Utf8Path, Utf8PathBuf};
|
||||
use futures::StreamExt;
|
||||
use itertools::Itertools;
|
||||
use pageserver_api::config::MaxVectoredReadBytes;
|
||||
use pageserver_api::key::DBDIR_KEY;
|
||||
use pageserver_api::keyspace::KeySpace;
|
||||
use pageserver_api::models::ImageCompressionAlgorithm;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
@@ -963,14 +964,25 @@ impl DeltaLayerInner {
|
||||
.blobs_at
|
||||
.as_slice()
|
||||
.iter()
|
||||
.map(|(_, blob_meta)| format!("{}@{}", blob_meta.key, blob_meta.lsn))
|
||||
.filter_map(|(_, blob_meta)| {
|
||||
if blob_meta.key.is_rel_dir_key() || blob_meta.key == DBDIR_KEY {
|
||||
// The size of values for these keys is unbounded and can
|
||||
// grow very large in pathological cases.
|
||||
None
|
||||
} else {
|
||||
Some(format!("{}@{}", blob_meta.key, blob_meta.lsn))
|
||||
}
|
||||
})
|
||||
.join(", ");
|
||||
tracing::warn!(
|
||||
"Oversized vectored read ({} > {}) for keys {}",
|
||||
largest_read_size,
|
||||
read_size_soft_max,
|
||||
offenders
|
||||
);
|
||||
|
||||
if !offenders.is_empty() {
|
||||
tracing::warn!(
|
||||
"Oversized vectored read ({} > {}) for keys {}",
|
||||
largest_read_size,
|
||||
read_size_soft_max,
|
||||
offenders
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
largest_read_size
|
||||
|
||||
@@ -49,6 +49,7 @@ use camino::{Utf8Path, Utf8PathBuf};
|
||||
use hex;
|
||||
use itertools::Itertools;
|
||||
use pageserver_api::config::MaxVectoredReadBytes;
|
||||
use pageserver_api::key::DBDIR_KEY;
|
||||
use pageserver_api::keyspace::KeySpace;
|
||||
use pageserver_api::shard::{ShardIdentity, TenantShardId};
|
||||
use rand::{distributions::Alphanumeric, Rng};
|
||||
@@ -587,14 +588,25 @@ impl ImageLayerInner {
|
||||
.blobs_at
|
||||
.as_slice()
|
||||
.iter()
|
||||
.map(|(_, blob_meta)| format!("{}@{}", blob_meta.key, blob_meta.lsn))
|
||||
.filter_map(|(_, blob_meta)| {
|
||||
if blob_meta.key.is_rel_dir_key() || blob_meta.key == DBDIR_KEY {
|
||||
// The size of values for these keys is unbounded and can
|
||||
// grow very large in pathological cases.
|
||||
None
|
||||
} else {
|
||||
Some(format!("{}@{}", blob_meta.key, blob_meta.lsn))
|
||||
}
|
||||
})
|
||||
.join(", ");
|
||||
tracing::warn!(
|
||||
"Oversized vectored read ({} > {}) for keys {}",
|
||||
buf_size,
|
||||
max_vectored_read_bytes,
|
||||
offenders
|
||||
);
|
||||
|
||||
if !offenders.is_empty() {
|
||||
tracing::warn!(
|
||||
"Oversized vectored read ({} > {}) for keys {}",
|
||||
buf_size,
|
||||
max_vectored_read_bytes,
|
||||
offenders
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let buf = BytesMut::with_capacity(buf_size);
|
||||
|
||||
@@ -442,11 +442,13 @@ impl Layer {
|
||||
// Visibility was modified to Visible: maybe log about this
|
||||
match ctx.task_kind() {
|
||||
TaskKind::CalculateSyntheticSize
|
||||
| TaskKind::OndemandLogicalSizeCalculation
|
||||
| TaskKind::GarbageCollector
|
||||
| TaskKind::MgmtRequest => {
|
||||
// This situation is expected in code paths do binary searches of the LSN space to resolve
|
||||
// an LSN to a timestamp, which happens during GC, during GC cutoff calculations in synthetic size,
|
||||
// and on-demand for certain HTTP API requests.
|
||||
// and on-demand for certain HTTP API requests. On-demand logical size calculation is also included
|
||||
// because it is run as a sub-task of synthetic size.
|
||||
}
|
||||
_ => {
|
||||
// In all other contexts, it is unusual to do I/O involving layers which are not visible at
|
||||
@@ -456,8 +458,8 @@ impl Layer {
|
||||
// This case is legal in brief time windows: for example an in-flight getpage request can hold on to a layer object
|
||||
// which was covered by a concurrent compaction.
|
||||
tracing::info!(
|
||||
"Layer {} became visible as a result of access",
|
||||
self.0.desc.key()
|
||||
layer=%self,
|
||||
"became visible as a result of access",
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -686,7 +688,9 @@ impl Drop for LayerInner {
|
||||
// and we could be delaying shutdown for nothing.
|
||||
}
|
||||
|
||||
if let Some(timeline) = self.timeline.upgrade() {
|
||||
let timeline = self.timeline.upgrade();
|
||||
|
||||
if let Some(timeline) = timeline.as_ref() {
|
||||
// Only need to decrement metrics if the timeline still exists: otherwise
|
||||
// it will have already de-registered these metrics via TimelineMetrics::shutdown
|
||||
if self.desc.is_delta() {
|
||||
@@ -717,7 +721,6 @@ impl Drop for LayerInner {
|
||||
let path = std::mem::take(&mut self.path);
|
||||
let file_name = self.layer_desc().layer_name();
|
||||
let file_size = self.layer_desc().file_size;
|
||||
let timeline = self.timeline.clone();
|
||||
let meta = self.metadata();
|
||||
let status = self.status.take();
|
||||
|
||||
@@ -727,7 +730,7 @@ impl Drop for LayerInner {
|
||||
// carry this until we are finished for [`Layer::wait_drop`] support
|
||||
let _status = status;
|
||||
|
||||
let Some(timeline) = timeline.upgrade() else {
|
||||
let Some(timeline) = timeline else {
|
||||
// no need to nag that timeline is gone: under normal situation on
|
||||
// task_mgr::remove_tenant_from_memory the timeline is gone before we get dropped.
|
||||
LAYER_IMPL_METRICS.inc_deletes_failed(DeleteFailed::TimelineGone);
|
||||
|
||||
@@ -48,7 +48,6 @@ use utils::{
|
||||
sync::gate::{Gate, GateGuard},
|
||||
};
|
||||
|
||||
use std::pin::pin;
|
||||
use std::sync::atomic::Ordering as AtomicOrdering;
|
||||
use std::sync::{Arc, Mutex, RwLock, Weak};
|
||||
use std::time::{Duration, Instant, SystemTime};
|
||||
@@ -62,6 +61,7 @@ use std::{
|
||||
collections::btree_map::Entry,
|
||||
ops::{Deref, Range},
|
||||
};
|
||||
use std::{pin::pin, sync::OnceLock};
|
||||
|
||||
use crate::{
|
||||
aux_file::AuxFileSizeEstimator,
|
||||
@@ -71,6 +71,7 @@ use crate::{
|
||||
metadata::TimelineMetadata,
|
||||
storage_layer::{inmemory_layer::IndexEntry, PersistentLayerDesc},
|
||||
},
|
||||
walingest::WalLagCooldown,
|
||||
walredo,
|
||||
};
|
||||
use crate::{
|
||||
@@ -429,6 +430,8 @@ pub struct Timeline {
|
||||
pub(crate) l0_flush_global_state: L0FlushGlobalState,
|
||||
|
||||
pub(crate) handles: handle::PerTimelineState<crate::page_service::TenantManagerTypes>,
|
||||
|
||||
pub(crate) attach_wal_lag_cooldown: Arc<OnceLock<WalLagCooldown>>,
|
||||
}
|
||||
|
||||
pub struct WalReceiverInfo {
|
||||
@@ -737,6 +740,7 @@ pub enum GetLogicalSizePriority {
|
||||
pub(crate) enum CompactFlags {
|
||||
ForceRepartition,
|
||||
ForceImageLayerCreation,
|
||||
ForceL0Compaction,
|
||||
EnhancedGcBottomMostCompaction,
|
||||
DryRun,
|
||||
}
|
||||
@@ -2130,6 +2134,7 @@ impl Timeline {
|
||||
pg_version: u32,
|
||||
state: TimelineState,
|
||||
aux_file_policy: Option<AuxFilePolicy>,
|
||||
attach_wal_lag_cooldown: Arc<OnceLock<WalLagCooldown>>,
|
||||
cancel: CancellationToken,
|
||||
) -> Arc<Self> {
|
||||
let disk_consistent_lsn = metadata.disk_consistent_lsn();
|
||||
@@ -2271,6 +2276,8 @@ impl Timeline {
|
||||
l0_flush_global_state: resources.l0_flush_global_state,
|
||||
|
||||
handles: Default::default(),
|
||||
|
||||
attach_wal_lag_cooldown,
|
||||
};
|
||||
|
||||
if aux_file_policy == Some(AuxFilePolicy::V1) {
|
||||
|
||||
@@ -11,6 +11,7 @@ pub(crate) struct RangeAnalysis {
|
||||
has_image: bool,
|
||||
num_of_deltas_above_image: usize,
|
||||
total_num_of_deltas: usize,
|
||||
num_of_l0: usize,
|
||||
}
|
||||
|
||||
impl Timeline {
|
||||
@@ -20,8 +21,10 @@ impl Timeline {
|
||||
let mut delta_ranges = Vec::new();
|
||||
let mut image_ranges = Vec::new();
|
||||
|
||||
let num_of_l0;
|
||||
let all_layer_files = {
|
||||
let guard = self.layers.read().await;
|
||||
num_of_l0 = guard.layer_map().unwrap().level0_deltas().len();
|
||||
guard.all_persistent_layers()
|
||||
};
|
||||
let lsn = self.get_last_record_lsn();
|
||||
@@ -82,6 +85,7 @@ impl Timeline {
|
||||
has_image: image_layer.is_some(),
|
||||
num_of_deltas_above_image: maybe_delta_layers.len(),
|
||||
total_num_of_deltas: pitr_delta_layers.len(),
|
||||
num_of_l0,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -353,7 +353,13 @@ impl Timeline {
|
||||
|
||||
// 2. Compact
|
||||
let timer = self.metrics.compact_time_histo.start_timer();
|
||||
let fully_compacted = self.compact_level0(target_file_size, ctx).await?;
|
||||
let fully_compacted = self
|
||||
.compact_level0(
|
||||
target_file_size,
|
||||
flags.contains(CompactFlags::ForceL0Compaction),
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
timer.stop_and_record();
|
||||
|
||||
let mut partitioning = dense_partitioning;
|
||||
@@ -658,6 +664,7 @@ impl Timeline {
|
||||
async fn compact_level0(
|
||||
self: &Arc<Self>,
|
||||
target_file_size: u64,
|
||||
force_compaction_ignore_threshold: bool,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<bool, CompactionError> {
|
||||
let CompactLevel0Phase1Result {
|
||||
@@ -679,9 +686,15 @@ impl Timeline {
|
||||
let now = tokio::time::Instant::now();
|
||||
stats.read_lock_acquisition_micros =
|
||||
DurationRecorder::Recorded(RecordedDuration(now - begin), now);
|
||||
self.compact_level0_phase1(phase1_layers_locked, stats, target_file_size, &ctx)
|
||||
.instrument(phase1_span)
|
||||
.await?
|
||||
self.compact_level0_phase1(
|
||||
phase1_layers_locked,
|
||||
stats,
|
||||
target_file_size,
|
||||
force_compaction_ignore_threshold,
|
||||
&ctx,
|
||||
)
|
||||
.instrument(phase1_span)
|
||||
.await?
|
||||
};
|
||||
|
||||
if new_layers.is_empty() && deltas_to_compact.is_empty() {
|
||||
@@ -700,6 +713,7 @@ impl Timeline {
|
||||
guard: tokio::sync::RwLockReadGuard<'a, LayerManager>,
|
||||
mut stats: CompactLevel0Phase1StatsBuilder,
|
||||
target_file_size: u64,
|
||||
force_compaction_ignore_threshold: bool,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<CompactLevel0Phase1Result, CompactionError> {
|
||||
stats.read_lock_held_spawn_blocking_startup_micros =
|
||||
@@ -711,11 +725,26 @@ impl Timeline {
|
||||
// Only compact if enough layers have accumulated.
|
||||
let threshold = self.get_compaction_threshold();
|
||||
if level0_deltas.is_empty() || level0_deltas.len() < threshold {
|
||||
debug!(
|
||||
level0_deltas = level0_deltas.len(),
|
||||
threshold, "too few deltas to compact"
|
||||
);
|
||||
return Ok(CompactLevel0Phase1Result::default());
|
||||
if force_compaction_ignore_threshold {
|
||||
if !level0_deltas.is_empty() {
|
||||
info!(
|
||||
level0_deltas = level0_deltas.len(),
|
||||
threshold, "too few deltas to compact, but forcing compaction"
|
||||
);
|
||||
} else {
|
||||
info!(
|
||||
level0_deltas = level0_deltas.len(),
|
||||
threshold, "too few deltas to compact, cannot force compaction"
|
||||
);
|
||||
return Ok(CompactLevel0Phase1Result::default());
|
||||
}
|
||||
} else {
|
||||
debug!(
|
||||
level0_deltas = level0_deltas.len(),
|
||||
threshold, "too few deltas to compact"
|
||||
);
|
||||
return Ok(CompactLevel0Phase1Result::default());
|
||||
}
|
||||
}
|
||||
|
||||
let mut level0_deltas = level0_deltas
|
||||
|
||||
@@ -21,7 +21,10 @@
|
||||
//! redo Postgres process, but some records it can handle directly with
|
||||
//! bespoken Rust code.
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::sync::OnceLock;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
use std::time::SystemTime;
|
||||
|
||||
use pageserver_api::shard::ShardIdentity;
|
||||
@@ -69,7 +72,29 @@ impl CheckPoint {
|
||||
}
|
||||
}
|
||||
|
||||
/// Temporary limitation of WAL lag warnings after attach
|
||||
///
|
||||
/// After tenant attach, we want to limit WAL lag warnings because
|
||||
/// we don't look at the WAL until the attach is complete, which
|
||||
/// might take a while.
|
||||
pub struct WalLagCooldown {
|
||||
/// Until when should this limitation apply at all
|
||||
active_until: std::time::Instant,
|
||||
/// The maximum lag to suppress. Lags above this limit get reported anyways.
|
||||
max_lag: Duration,
|
||||
}
|
||||
|
||||
impl WalLagCooldown {
|
||||
pub fn new(attach_start: Instant, attach_duration: Duration) -> Self {
|
||||
Self {
|
||||
active_until: attach_start + attach_duration * 3 + Duration::from_secs(120),
|
||||
max_lag: attach_duration * 2 + Duration::from_secs(60),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct WalIngest {
|
||||
attach_wal_lag_cooldown: Arc<OnceLock<WalLagCooldown>>,
|
||||
shard: ShardIdentity,
|
||||
checkpoint: CheckPoint,
|
||||
checkpoint_modified: bool,
|
||||
@@ -103,6 +128,7 @@ impl WalIngest {
|
||||
shard: *timeline.get_shard_identity(),
|
||||
checkpoint,
|
||||
checkpoint_modified: false,
|
||||
attach_wal_lag_cooldown: timeline.attach_wal_lag_cooldown.clone(),
|
||||
warn_ingest_lag: WarnIngestLag {
|
||||
lag_msg_ratelimit: RateLimit::new(std::time::Duration::from_secs(10)),
|
||||
future_lsn_msg_ratelimit: RateLimit::new(std::time::Duration::from_secs(10)),
|
||||
@@ -1429,6 +1455,13 @@ impl WalIngest {
|
||||
Ok(lag) => {
|
||||
if lag > conf.wait_lsn_timeout {
|
||||
rate_limits.lag_msg_ratelimit.call2(|rate_limit_stats| {
|
||||
if let Some(cooldown) = self.attach_wal_lag_cooldown.get() {
|
||||
if std::time::Instant::now() < cooldown.active_until && lag <= cooldown.max_lag {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// Still loading? We shouldn't be here
|
||||
}
|
||||
let lag = humantime::format_duration(lag);
|
||||
warn!(%rate_limit_stats, %lag, "ingesting record with timestamp lagging more than wait_lsn_timeout");
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user