Compare commits

..

3 Commits

Author SHA1 Message Date
John Spray
422310c19b slightly more efficient 2025-02-17 22:43:36 +01:00
John Spray
77b1fd40b5 wip prototype 2025-02-17 22:37:46 +01:00
John Spray
a8f59f851d Revert "tests: broaden allow-list for #10720 workaround (#10807)"
This reverts commit ae463f366b.
2025-02-17 22:08:03 +01:00
41 changed files with 254 additions and 804 deletions

View File

@@ -1509,73 +1509,6 @@ WORKDIR /ext-src/pg_repack-src
RUN make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install
#########################################################################################
#
# Layer "pgaudit"
# compile pgaudit extension
#
#########################################################################################
FROM build-deps AS pgaudit-src
ARG PG_VERSION
WORKDIR /ext-src
RUN case "${PG_VERSION}" in \
"v14") \
export PGAUDIT_VERSION=1.6.2 \
export PGAUDIT_CHECKSUM=1f350d70a0cbf488c0f2b485e3a5c9b11f78ad9e3cbb95ef6904afa1eb3187eb \
;; \
"v15") \
export PGAUDIT_VERSION=1.7.0 \
export PGAUDIT_CHECKSUM=8f4a73e451c88c567e516e6cba7dc1e23bc91686bb6f1f77f8f3126d428a8bd8 \
;; \
"v16") \
export PGAUDIT_VERSION=16.0 \
export PGAUDIT_CHECKSUM=d53ef985f2d0b15ba25c512c4ce967dce07b94fd4422c95bd04c4c1a055fe738 \
;; \
"v17") \
export PGAUDIT_VERSION=17.0 \
export PGAUDIT_CHECKSUM=7d0d08d030275d525f36cd48b38c6455f1023da863385badff0cec44965bfd8c \
;; \
*) \
echo "pgaudit is not supported on this PostgreSQL version" && exit 1;; \
esac && \
wget https://github.com/pgaudit/pgaudit/archive/refs/tags/${PGAUDIT_VERSION}.tar.gz -O pgaudit.tar.gz && \
echo "${PGAUDIT_CHECKSUM} pgaudit.tar.gz" | sha256sum --check && \
mkdir pgaudit-src && cd pgaudit-src && tar xzf ../pgaudit.tar.gz --strip-components=1 -C .
FROM pg-build AS pgaudit-build
COPY --from=pgaudit-src /ext-src/ /ext-src/
WORKDIR /ext-src/pgaudit-src
RUN make install USE_PGXS=1 -j $(getconf _NPROCESSORS_ONLN)
#########################################################################################
#
# Layer "pgauditlogtofile"
# compile pgauditlogtofile extension
#
#########################################################################################
FROM build-deps AS pgauditlogtofile-src
ARG PG_VERSION
WORKDIR /ext-src
RUN case "${PG_VERSION}" in \
"v14" | "v15" | "v16" | "v17") \
export PGAUDITLOGTOFILE_VERSION=v1.6.4 \
export PGAUDITLOGTOFILE_CHECKSUM=ef801eb09c26aaa935c0dabd92c81eb9ebe338930daa9674d420a280c6bc2d70 \
;; \
*) \
echo "pgauditlogtofile is not supported on this PostgreSQL version" && exit 1;; \
esac && \
wget https://github.com/fmbiete/pgauditlogtofile/archive/refs/tags/${PGAUDITLOGTOFILE_VERSION}.tar.gz -O pgauditlogtofile.tar.gz && \
echo "${PGAUDITLOGTOFILE_CHECKSUM} pgauditlogtofile.tar.gz" | sha256sum --check && \
mkdir pgauditlogtofile-src && cd pgauditlogtofile-src && tar xzf ../pgauditlogtofile.tar.gz --strip-components=1 -C .
FROM pg-build AS pgauditlogtofile-build
COPY --from=pgauditlogtofile-src /ext-src/ /ext-src/
WORKDIR /ext-src/pgauditlogtofile-src
RUN make install USE_PGXS=1 -j $(getconf _NPROCESSORS_ONLN)
#########################################################################################
#
# Layer "neon-ext-build"
@@ -1671,8 +1604,6 @@ COPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg_mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg_duckdb-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg_repack-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pgaudit-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pgauditlogtofile-build /usr/local/pgsql/ /usr/local/pgsql/
#########################################################################################
#

View File

@@ -361,14 +361,6 @@ async fn run_dump_restore(
// how we run it
.env_clear()
.env("LD_LIBRARY_PATH", &pg_lib_dir)
.env(
"ASAN_OPTIONS",
std::env::var("ASAN_OPTIONS").unwrap_or_default(),
)
.env(
"UBSAN_OPTIONS",
std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
)
.kill_on_drop(true)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
@@ -402,14 +394,6 @@ async fn run_dump_restore(
// how we run it
.env_clear()
.env("LD_LIBRARY_PATH", &pg_lib_dir)
.env(
"ASAN_OPTIONS",
std::env::var("ASAN_OPTIONS").unwrap_or_default(),
)
.env(
"UBSAN_OPTIONS",
std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
)
.kill_on_drop(true)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())

View File

@@ -22,7 +22,7 @@ use pageserver_api::{
};
use pageserver_client::mgmt_api::{self};
use reqwest::{Method, StatusCode, Url};
use utils::id::{NodeId, TenantId, TimelineId};
use utils::id::{NodeId, TenantId};
use pageserver_api::controller_api::{
NodeConfigureRequest, NodeRegisterRequest, NodeSchedulingPolicy, PlacementPolicy,
@@ -239,19 +239,6 @@ enum Command {
#[arg(long)]
scheduling_policy: SkSchedulingPolicyArg,
},
/// Downloads any missing heatmap layers for all shard for a given timeline
DownloadHeatmapLayers {
/// Tenant ID or tenant shard ID. When an unsharded tenant ID is specified,
/// the operation is performed on all shards. When a sharded tenant ID is
/// specified, the operation is only performed on the specified shard.
#[arg(long)]
tenant_shard_id: TenantShardId,
#[arg(long)]
timeline_id: TimelineId,
/// Optional: Maximum download concurrency (default is 16)
#[arg(long)]
concurrency: Option<usize>,
},
}
#[derive(Parser)]
@@ -1260,24 +1247,6 @@ async fn main() -> anyhow::Result<()> {
String::from(scheduling_policy)
);
}
Command::DownloadHeatmapLayers {
tenant_shard_id,
timeline_id,
concurrency,
} => {
let mut path = format!(
"/v1/tenant/{}/timeline/{}/download_heatmap_layers",
tenant_shard_id, timeline_id,
);
if let Some(c) = concurrency {
path = format!("{path}?concurrency={c}");
}
storcon_client
.dispatch::<(), ()>(Method::POST, path, None)
.await?;
}
}
Ok(())

View File

@@ -1080,7 +1080,8 @@ pub struct TenantInfo {
/// Opaque explanation if gc is being blocked.
///
/// Only looked up for the individual tenant detail, not the listing.
/// Only looked up for the individual tenant detail, not the listing. This is purely for
/// debugging, not included in openapi.
#[serde(skip_serializing_if = "Option::is_none")]
pub gc_blocking: Option<String>,
}

View File

@@ -9,43 +9,13 @@ use anyhow::bail;
use serde::{Deserialize, Serialize};
use utils::id::NodeId;
/// 1 is the first valid generation, 0 is used as
/// a placeholder before we fully migrate to generations.
pub const INVALID_GENERATION: SafekeeperGeneration = SafekeeperGeneration::new(0);
pub const INITIAL_GENERATION: SafekeeperGeneration = SafekeeperGeneration::new(1);
/// Number uniquely identifying safekeeper configuration.
/// Note: it is a part of sk control file.
///
/// Like tenant generations, but for safekeepers.
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub struct SafekeeperGeneration(u32);
impl SafekeeperGeneration {
pub const fn new(v: u32) -> Self {
Self(v)
}
#[track_caller]
pub fn previous(&self) -> Option<Self> {
Some(Self(self.0.checked_sub(1)?))
}
#[track_caller]
pub fn next(&self) -> Self {
Self(self.0 + 1)
}
pub fn into_inner(self) -> u32 {
self.0
}
}
impl Display for SafekeeperGeneration {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
pub type Generation = u32;
/// 1 is the first valid generation, 0 is used as
/// a placeholder before we fully migrate to generations.
pub const INVALID_GENERATION: Generation = 0;
pub const INITIAL_GENERATION: Generation = 1;
/// Membership is defined by ids so e.g. walproposer uses them to figure out
/// quorums, but we also carry host and port to give wp idea where to connect.
@@ -119,7 +89,7 @@ impl Display for MemberSet {
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct Configuration {
/// Unique id.
pub generation: SafekeeperGeneration,
pub generation: Generation,
/// Current members of the configuration.
pub members: MemberSet,
/// Some means it is a joint conf.

View File

@@ -282,18 +282,3 @@ pub struct TimelineTermBumpResponse {
pub struct SafekeeperUtilization {
pub timeline_count: u64,
}
/// pull_timeline request body.
#[derive(Debug, Deserialize, Serialize)]
pub struct PullTimelineRequest {
pub tenant_id: TenantId,
pub timeline_id: TimelineId,
pub http_hosts: Vec<String>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct PullTimelineResponse {
// Donor safekeeper host
pub safekeeper_host: String,
// TODO: add more fields?
}

View File

@@ -286,11 +286,6 @@ mod tests {
const SHORT2_ENC_LE: &[u8] = &[8, 0, 0, 3, 7];
const SHORT2_ENC_LE_TRAILING: &[u8] = &[8, 0, 0, 3, 7, 0xff, 0xff, 0xff];
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
struct NewTypeStruct(u32);
const NT1: NewTypeStruct = NewTypeStruct(414243);
const NT1_INNER: u32 = 414243;
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct LongMsg {
pub tag: u8,
@@ -413,42 +408,4 @@ mod tests {
let msg2 = LongMsg::des(&encoded).unwrap();
assert_eq!(msg, msg2);
}
#[test]
/// Ensure that newtype wrappers around u32 don't change the serialization format
fn be_nt() {
use super::BeSer;
assert_eq!(NT1.serialized_size().unwrap(), 4);
let msg = NT1;
let encoded = msg.ser().unwrap();
let expected = hex_literal::hex!("0006 5223");
assert_eq!(encoded, expected);
assert_eq!(encoded, NT1_INNER.ser().unwrap());
let msg2 = NewTypeStruct::des(&encoded).unwrap();
assert_eq!(msg, msg2);
}
#[test]
/// Ensure that newtype wrappers around u32 don't change the serialization format
fn le_nt() {
use super::LeSer;
assert_eq!(NT1.serialized_size().unwrap(), 4);
let msg = NT1;
let encoded = msg.ser().unwrap();
let expected = hex_literal::hex!("2352 0600");
assert_eq!(encoded, expected);
assert_eq!(encoded, NT1_INNER.ser().unwrap());
let msg2 = NewTypeStruct::des(&encoded).unwrap();
assert_eq!(msg, msg2);
}
}

View File

@@ -117,10 +117,6 @@ impl TenantShardId {
)
}
pub fn range(&self) -> RangeInclusive<Self> {
RangeInclusive::new(*self, *self)
}
pub fn shard_slug(&self) -> impl std::fmt::Display + '_ {
ShardSlug(self)
}

View File

@@ -477,26 +477,6 @@ impl Client {
self.request(Method::POST, &uri, ()).await.map(|_| ())
}
pub async fn timeline_download_heatmap_layers(
&self,
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
concurrency: Option<usize>,
) -> Result<()> {
let mut path = reqwest::Url::parse(&format!(
"{}/v1/tenant/{}/timeline/{}/download_heatmap_layers",
self.mgmt_api_endpoint, tenant_shard_id, timeline_id
))
.expect("Cannot build URL");
if let Some(concurrency) = concurrency {
path.query_pairs_mut()
.append_pair("concurrency", &format!("{}", concurrency));
}
self.request(Method::POST, path, ()).await.map(|_| ())
}
pub async fn tenant_reset(&self, tenant_shard_id: TenantShardId) -> Result<()> {
let uri = format!(
"{}/v1/tenant/{}/reset",

View File

@@ -824,38 +824,6 @@ paths:
schema:
$ref: "#/components/schemas/TenantConfigResponse"
/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers:
parameters:
- name: tenant_shard_id
in: path
required: true
schema:
type: string
- name: timeline_id
in: path
required: true
schema:
type: string
- name: concurrency
description: Maximum number of concurrent downloads (capped at remote storage concurrency)
in: query
required: false
schema:
type: integer
post:
description: |
Download all layers in the specified timeline's heatmap. The `tenant_shard_id` parameter
may be used to target all shards of a tenant when the unsharded form is used, or a specific
tenant shard with the sharded form.
responses:
"200":
description: Success
delete:
description: Stop any on-going background downloads of heatmap layers for the specified timeline.
responses:
"200":
description: Success
/v1/utilization:
get:
description: |
@@ -914,8 +882,6 @@ components:
properties:
reason:
type: string
gc_blocking:
type: string
TenantCreateRequest:
allOf:
@@ -1117,9 +1083,6 @@ components:
min_readable_lsn:
type: string
format: hex
latest_gc_cutoff_lsn:
type: string
format: hex
applied_gc_cutoff_lsn:
type: string
format: hex

View File

@@ -1463,59 +1463,6 @@ async fn timeline_layer_scan_disposable_keys(
)
}
async fn timeline_download_heatmap_layers_handler(
request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
// Only used in the case where remote storage is not configured.
const DEFAULT_MAX_CONCURRENCY: usize = 100;
// A conservative default.
const DEFAULT_CONCURRENCY: usize = 16;
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
let desired_concurrency =
parse_query_param(&request, "concurrency")?.unwrap_or(DEFAULT_CONCURRENCY);
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let state = get_state(&request);
let timeline =
active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
.await?;
let max_concurrency = get_config(&request)
.remote_storage_config
.as_ref()
.map(|c| c.concurrency_limit())
.unwrap_or(DEFAULT_MAX_CONCURRENCY);
let concurrency = std::cmp::min(max_concurrency, desired_concurrency);
timeline.start_heatmap_layers_download(concurrency).await?;
json_response(StatusCode::ACCEPTED, ())
}
async fn timeline_shutdown_download_heatmap_layers_handler(
request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let state = get_state(&request);
let timeline =
active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
.await?;
timeline.stop_and_drain_heatmap_layers_download().await;
json_response(StatusCode::OK, ())
}
async fn layer_download_handler(
request: Request<Body>,
_cancel: CancellationToken,
@@ -3679,14 +3626,6 @@ pub fn make_router(
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer",
|r| api_handler(r, layer_map_info_handler),
)
.post(
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_heatmap_layers",
|r| api_handler(r, timeline_download_heatmap_layers_handler),
)
.delete(
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_heatmap_layers",
|r| api_handler(r, timeline_shutdown_download_heatmap_layers_handler),
)
.get(
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_file_name",
|r| api_handler(r, layer_download_handler),

View File

@@ -1799,13 +1799,6 @@ impl PageServerHandler {
.as_millis()
.to_string()
});
info!(
"acquired lease for {} until {}",
lsn,
valid_until_str.as_deref().unwrap_or("<unknown>")
);
let bytes = valid_until_str.as_ref().map(|x| x.as_bytes());
pgb.write_message_noflush(&BeMessage::RowDescription(&[RowDescriptor::text_col(

View File

@@ -3101,9 +3101,6 @@ impl Tenant {
if let Some(queue) = queue {
outcome = queue
.iteration(cancel, ctx, &self.gc_block, &timeline)
.instrument(
info_span!("gc_compact_timeline", timeline_id = %timeline.timeline_id),
)
.await?;
}
}
@@ -7849,6 +7846,18 @@ mod tests {
}
tline.freeze_and_flush().await?;
// Force layers to L1
tline
.compact(
&cancel,
{
let mut flags = EnumSet::new();
flags.insert(CompactFlags::ForceL0Compaction);
flags
},
&ctx,
)
.await?;
if iter % 5 == 0 {
let (_, before_delta_file_accessed) =
@@ -7861,6 +7870,7 @@ mod tests {
let mut flags = EnumSet::new();
flags.insert(CompactFlags::ForceImageLayerCreation);
flags.insert(CompactFlags::ForceRepartition);
flags.insert(CompactFlags::ForceL0Compaction);
flags
},
&ctx,
@@ -8307,6 +8317,8 @@ mod tests {
let cancel = CancellationToken::new();
// Image layer creation happens on the disk_consistent_lsn so we need to force set it now.
tline.force_set_disk_consistent_lsn(Lsn(0x40));
tline
.compact(
&cancel,
@@ -8320,8 +8332,7 @@ mod tests {
)
.await
.unwrap();
// Image layers are created at last_record_lsn
// Image layers are created at repartition LSN
let images = tline
.inspect_image_layers(Lsn(0x40), &ctx, io_concurrency.clone())
.await

View File

@@ -570,8 +570,12 @@ impl LayerMap {
self.historic.iter()
}
pub fn riter_historic_layers(&self) -> impl '_ + Iterator<Item = Arc<PersistentLayerDesc>> {
self.historic.riter()
}
/// Get a ref counted pointer for the first in memory layer that matches the provided predicate.
pub fn find_in_memory_layer<Pred>(&self, mut pred: Pred) -> Option<Arc<InMemoryLayer>>
pub(crate) fn find_in_memory_layer<Pred>(&self, mut pred: Pred) -> Option<Arc<InMemoryLayer>>
where
Pred: FnMut(&Arc<InMemoryLayer>) -> bool,
{
@@ -900,6 +904,24 @@ impl LayerMap {
Ok(())
}
/// Efficiency: this is a single btreemap walk to the end of the map in the common case where
/// we are queried for image layers after the start of an ephemeral layer. In the general case
/// where we are called with some arbitrary LSN, this function is O(N) -- so don't use it like that.
pub(crate) fn get_newest_image_after(&self, lsn: Lsn) -> Option<Arc<PersistentLayerDesc>> {
// TODO: an efficient equivalent, this is a crude placeholder
for layer in self.riter_historic_layers() {
if !layer.is_delta() && layer.image_layer_lsn() >= lsn {
return Some(layer);
}
if layer.lsn_range.start < lsn {
// We are past the layers that could possibly intersect with the requested bound
break;
}
}
None
}
/// `read_points` represent the tip of a timeline and any branch points, i.e. the places
/// where we expect to serve reads.
///

View File

@@ -509,6 +509,18 @@ impl<Value: Clone> BufferedHistoricLayerCoverage<Value> {
self.layers.values().cloned()
}
/// Iterate all the layers in reverse order (newest LSNs first)
pub fn riter(&self) -> impl '_ + Iterator<Item = Value> {
// NOTE we can actually perform this without rebuilding,
// but it's not necessary for now.
if !self.buffer.is_empty() {
panic!("rebuild pls")
}
// TODO: is cloned() really needed?
self.layers.values().rev().cloned()
}
/// Return a reference to a queryable map, assuming all updates
/// have already been processed using self.rebuild()
pub fn get(&self) -> anyhow::Result<&HistoricLayerCoverage<Value>> {

View File

@@ -4,7 +4,6 @@ pub mod delete;
pub(crate) mod detach_ancestor;
mod eviction_task;
pub(crate) mod handle;
mod heatmap_layers_downloader;
pub(crate) mod import_pgdata;
mod init;
pub mod layer_manager;
@@ -468,10 +467,6 @@ pub struct Timeline {
pub(crate) page_trace: ArcSwapOption<Sender<PageTraceEvent>>,
previous_heatmap: ArcSwapOption<PreviousHeatmap>,
/// May host a background Tokio task which downloads all the layers from the current
/// heatmap on demand.
heatmap_layers_downloader: Mutex<Option<heatmap_layers_downloader::HeatmapLayersDownloader>>,
}
pub(crate) enum PreviousHeatmap {
@@ -2044,11 +2039,6 @@ impl Timeline {
tracing::debug!("Cancelling CancellationToken");
self.cancel.cancel();
// If we have a background task downloading heatmap layers stop it.
// The background downloads are sensitive to timeline cancellation (done above),
// so the drain will be immediate.
self.stop_and_drain_heatmap_layers_download().await;
// Ensure Prevent new page service requests from starting.
self.handles.shutdown();
@@ -2762,8 +2752,6 @@ impl Timeline {
page_trace: Default::default(),
previous_heatmap: ArcSwapOption::from_pointee(previous_heatmap),
heatmap_layers_downloader: Mutex::new(None),
};
result.repartition_threshold =
@@ -3787,6 +3775,8 @@ impl Timeline {
let mut completed_keyspace = KeySpace::default();
let mut image_covered_keyspace = KeySpaceRandomAccum::new();
let mut in_memory_layers_considered = Vec::new();
// Prevent GC from progressing while visiting the current timeline.
// If we are GC-ing because a new image layer was added while traversing
// the timeline, then it will remove layers that are required for fulfilling
@@ -3822,12 +3812,34 @@ impl Timeline {
let in_memory_layer = layers.find_in_memory_layer(|l| {
let start_lsn = l.get_lsn_range().start;
cont_lsn > start_lsn
!in_memory_layers_considered.contains(&start_lsn) && cont_lsn > start_lsn
});
match in_memory_layer {
Some(l) => {
let lsn_range = l.get_lsn_range().start..cont_lsn;
in_memory_layers_considered.push(l.get_lsn_range().start);
// Search for image layers that overlap with the in-memory layer: this is rare but permitted, and
// we must bound the `lsn_range` of this layer to avoid skipping past the image layer.
// TODO: a narrower search that only hits on image layers matching `unmapped_keyspace`
let lsn_range = if let Some(image) =
layers.get_newest_image_after(l.get_lsn_range().start)
{
// Note that this does not guarantee serving a read from an image layer, just that we will
// not skip considering thge image layer in our Fringe. We can still end up doing walredo work
// in spite of the presence of an image layer, if the inmemory layers we visit contain enough
// information to fully construct a page. For example:
// - ephemeral layer contains I1, D1, D2, <LSN X>
// - image layer at LSN X contains image equal to I2
// - we will end up doing a walredo of I1+D1+D2, rather than reading from the image layer
//
// This is not a problem for correctness, and is rare enough that the wasted time doing walredo
// doesn't matter.
image.get_lsn_range().start + 1..cont_lsn
} else {
l.get_lsn_range().start..cont_lsn
};
fringe.update(
ReadableLayer::InMemoryLayer(l),
unmapped_keyspace.clone(),

View File

@@ -301,12 +301,18 @@ impl GcCompactionQueue {
let mut guard = self.inner.lock().unwrap();
guard.gc_guards.insert(id, gc_guard);
}
let _ = timeline.compact_with_options(cancel, options, ctx).await?;
let _ = timeline
.compact_with_options(cancel, options, ctx)
.instrument(info_span!("scheduled_compact_timeline", %timeline.timeline_id))
.await?;
self.notify_and_unblock(id);
}
}
GcCompactionQueueItem::SubCompactionJob(options) => {
let _ = timeline.compact_with_options(cancel, options, ctx).await?;
let _ = timeline
.compact_with_options(cancel, options, ctx)
.instrument(info_span!("scheduled_compact_timeline", %timeline.timeline_id))
.await?;
}
GcCompactionQueueItem::Notify(id) => {
self.notify_and_unblock(id);
@@ -686,6 +692,21 @@ impl Timeline {
// Define partitioning schema if needed
let l0_l1_boundary_lsn = {
// We do the repartition on the L0-L1 boundary. All data below the boundary
// are compacted by L0 with low read amplification, thus making the `repartition`
// function run fast.
let guard = self.layers.read().await;
let l0_min_lsn = guard
.layer_map()?
.level0_deltas()
.iter()
.map(|l| l.get_lsn_range().start)
.min()
.unwrap_or(self.get_disk_consistent_lsn());
l0_min_lsn.max(self.get_ancestor_lsn())
};
// 1. L0 Compact
let l0_outcome = {
let timer = self.metrics.compact_time_histo.start_timer();
@@ -712,79 +733,86 @@ impl Timeline {
return Ok(CompactionOutcome::YieldForL0);
}
// 2. Repartition and create image layers if necessary
match self
.repartition(
self.get_last_record_lsn(),
self.get_compaction_target_size(),
options.flags,
ctx,
)
.await
{
Ok(((dense_partitioning, sparse_partitioning), lsn)) => {
// Disables access_stats updates, so that the files we read remain candidates for eviction after we're done with them
let image_ctx = RequestContextBuilder::extend(ctx)
.access_stats_behavior(AccessStatsBehavior::Skip)
.build();
if l0_l1_boundary_lsn < self.partitioning.read().1 {
// We never go backwards when repartition and create image layers.
info!("skipping image layer generation because repartition LSN is greater than L0-L1 boundary LSN.");
} else {
// 2. Repartition and create image layers if necessary
match self
.repartition(
l0_l1_boundary_lsn,
self.get_compaction_target_size(),
options.flags,
ctx,
)
.await
{
Ok(((dense_partitioning, sparse_partitioning), lsn)) => {
// Disables access_stats updates, so that the files we read remain candidates for eviction after we're done with them
let image_ctx = RequestContextBuilder::extend(ctx)
.access_stats_behavior(AccessStatsBehavior::Skip)
.build();
let mut partitioning = dense_partitioning;
partitioning
.parts
.extend(sparse_partitioning.into_dense().parts);
let mut partitioning = dense_partitioning;
partitioning
.parts
.extend(sparse_partitioning.into_dense().parts);
// 3. Create new image layers for partitions that have been modified "enough".
let (image_layers, outcome) = self
.create_image_layers(
&partitioning,
lsn,
if options
.flags
.contains(CompactFlags::ForceImageLayerCreation)
{
ImageLayerCreationMode::Force
} else {
ImageLayerCreationMode::Try
},
&image_ctx,
self.last_image_layer_creation_status
.load()
.as_ref()
.clone(),
!options.flags.contains(CompactFlags::NoYield),
)
.await
.inspect_err(|err| {
if let CreateImageLayersError::GetVectoredError(
GetVectoredError::MissingKey(_),
) = err
{
critical!("missing key during compaction: {err:?}");
}
})?;
// 3. Create new image layers for partitions that have been modified "enough".
let (image_layers, outcome) = self
.create_image_layers(
&partitioning,
lsn,
if options
.flags
.contains(CompactFlags::ForceImageLayerCreation)
{
ImageLayerCreationMode::Force
} else {
ImageLayerCreationMode::Try
},
&image_ctx,
self.last_image_layer_creation_status
.load()
.as_ref()
.clone(),
!options.flags.contains(CompactFlags::NoYield),
)
.await
.inspect_err(|err| {
if let CreateImageLayersError::GetVectoredError(
GetVectoredError::MissingKey(_),
) = err
{
critical!("missing key during compaction: {err:?}");
}
})?;
self.last_image_layer_creation_status
.store(Arc::new(outcome.clone()));
self.last_image_layer_creation_status
.store(Arc::new(outcome.clone()));
self.upload_new_image_layers(image_layers)?;
if let LastImageLayerCreationStatus::Incomplete { .. } = outcome {
// Yield and do not do any other kind of compaction.
info!("skipping shard ancestor compaction due to pending image layer generation tasks (preempted by L0 compaction).");
return Ok(CompactionOutcome::YieldForL0);
self.upload_new_image_layers(image_layers)?;
if let LastImageLayerCreationStatus::Incomplete { .. } = outcome {
// Yield and do not do any other kind of compaction.
info!("skipping shard ancestor compaction due to pending image layer generation tasks (preempted by L0 compaction).");
return Ok(CompactionOutcome::YieldForL0);
}
}
}
Err(err) => {
// no partitioning? This is normal, if the timeline was just created
// as an empty timeline. Also in unit tests, when we use the timeline
// as a simple key-value store, ignoring the datadir layout. Log the
// error but continue.
//
// Suppress error when it's due to cancellation
if !self.cancel.is_cancelled() && !err.is_cancelled() {
tracing::error!("could not compact, repartitioning keyspace failed: {err:?}");
Err(err) => {
// no partitioning? This is normal, if the timeline was just created
// as an empty timeline. Also in unit tests, when we use the timeline
// as a simple key-value store, ignoring the datadir layout. Log the
// error but continue.
//
// Suppress error when it's due to cancellation
if !self.cancel.is_cancelled() && !err.is_cancelled() {
tracing::error!(
"could not compact, repartitioning keyspace failed: {err:?}"
);
}
}
}
};
};
}
let partition_count = self.partitioning.read().0 .0.parts.len();

View File

@@ -1,162 +0,0 @@
//! Timeline utility module to hydrate everything from the current heatmap.
//!
//! Provides utilities to spawn and abort a background task where the downloads happen.
//! See /v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_heatmap_layers.
use futures::StreamExt;
use http_utils::error::ApiError;
use std::sync::{Arc, Mutex};
use tokio_util::sync::CancellationToken;
use utils::sync::gate::Gate;
use super::Timeline;
// This status is not strictly necessary now, but gives us a nice place
// to store progress information if we ever wish to expose it.
pub(super) enum HeatmapLayersDownloadStatus {
InProgress,
Complete,
}
pub(super) struct HeatmapLayersDownloader {
handle: tokio::task::JoinHandle<()>,
status: Arc<Mutex<HeatmapLayersDownloadStatus>>,
cancel: CancellationToken,
downloads_guard: Arc<Gate>,
}
impl HeatmapLayersDownloader {
fn new(
timeline: Arc<Timeline>,
concurrency: usize,
) -> Result<HeatmapLayersDownloader, ApiError> {
let tl_guard = timeline.gate.enter().map_err(|_| ApiError::Cancelled)?;
let cancel = timeline.cancel.child_token();
let downloads_guard = Arc::new(Gate::default());
let status = Arc::new(Mutex::new(HeatmapLayersDownloadStatus::InProgress));
let handle = tokio::task::spawn({
let status = status.clone();
let downloads_guard = downloads_guard.clone();
let cancel = cancel.clone();
async move {
let _guard = tl_guard;
scopeguard::defer! {
*status.lock().unwrap() = HeatmapLayersDownloadStatus::Complete;
}
let Some(heatmap) = timeline.generate_heatmap().await else {
tracing::info!("Heatmap layers download failed to generate heatmap");
return;
};
tracing::info!(
resident_size=%timeline.resident_physical_size(),
heatmap_layers=%heatmap.layers.len(),
"Starting heatmap layers download"
);
let stream = futures::stream::iter(heatmap.layers.into_iter().filter_map(
|layer| {
let tl = timeline.clone();
let dl_guard = match downloads_guard.enter() {
Ok(g) => g,
Err(_) => {
// [`Self::shutdown`] was called. Don't spawn any more downloads.
return None;
}
};
Some(async move {
let _dl_guard = dl_guard;
let res = tl.download_layer(&layer.name).await;
if let Err(err) = res {
if !err.is_cancelled() {
tracing::warn!(layer=%layer.name,"Failed to download heatmap layer: {err}")
}
}
})
}
)).buffered(concurrency);
tokio::select! {
_ = stream.collect::<()>() => {
tracing::info!(
resident_size=%timeline.resident_physical_size(),
"Heatmap layers download completed"
);
},
_ = cancel.cancelled() => {
tracing::info!("Heatmap layers download cancelled");
}
}
}
});
Ok(Self {
status,
handle,
cancel,
downloads_guard,
})
}
fn is_complete(&self) -> bool {
matches!(
*self.status.lock().unwrap(),
HeatmapLayersDownloadStatus::Complete
)
}
/// Drive any in-progress downloads to completion and stop spawning any new ones.
///
/// This has two callers and they behave differently
/// 1. [`Timeline::shutdown`]: the drain will be immediate since downloads themselves
/// are sensitive to timeline cancellation.
///
/// 2. Endpoint handler in [`crate::http::routes`]: the drain will wait for any in-progress
/// downloads to complete.
async fn stop_and_drain(self) {
// Counterintuitive: close the guard before cancelling.
// Something needs to poll the already created download futures to completion.
// If we cancel first, then the underlying task exits and we lost
// the poller.
self.downloads_guard.close().await;
self.cancel.cancel();
if let Err(err) = self.handle.await {
tracing::warn!("Failed to join heatmap layer downloader task: {err}");
}
}
}
impl Timeline {
pub(crate) async fn start_heatmap_layers_download(
self: &Arc<Self>,
concurrency: usize,
) -> Result<(), ApiError> {
let mut locked = self.heatmap_layers_downloader.lock().unwrap();
if locked.as_ref().map(|dl| dl.is_complete()).unwrap_or(true) {
let dl = HeatmapLayersDownloader::new(self.clone(), concurrency)?;
*locked = Some(dl);
Ok(())
} else {
Err(ApiError::Conflict("Already running".to_string()))
}
}
pub(crate) async fn stop_and_drain_heatmap_layers_download(&self) {
// This can race with the start of a new downloader and lead to a situation
// where one donloader is shutting down and another one is in-flight.
// The only impact is that we'd end up using more remote storage semaphore
// units than expected.
let downloader = self.heatmap_layers_downloader.lock().unwrap().take();
if let Some(dl) = downloader {
dl.stop_and_drain().await;
}
}
}

View File

@@ -279,12 +279,9 @@ impl ClientInnerCommon<postgres_client::Client> {
local_data.jti += 1;
let token = resign_jwt(&local_data.key, payload, local_data.jti)?;
// discard all cannot run in a transaction. must be executed alone.
self.inner.batch_execute("discard all").await?;
// initiates the auth session
// this is safe from query injections as the jwt format free of any escape characters.
let query = format!("select auth.jwt_session_init('{token}')");
let query = format!("discard all; select auth.jwt_session_init('{token}')");
self.inner.batch_execute(&query).await?;
let pid = self.inner.get_process_id();

View File

@@ -5,10 +5,7 @@
use http_utils::error::HttpErrorBody;
use reqwest::{IntoUrl, Method, StatusCode};
use safekeeper_api::models::{
PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest,
TimelineStatus,
};
use safekeeper_api::models::{SafekeeperUtilization, TimelineCreateRequest, TimelineStatus};
use std::error::Error as _;
use utils::{
id::{NodeId, TenantId, TimelineId},
@@ -91,12 +88,6 @@ impl Client {
resp.json().await.map_err(Error::ReceiveBody)
}
pub async fn pull_timeline(&self, req: &PullTimelineRequest) -> Result<PullTimelineResponse> {
let uri = format!("{}/v1/pull_timeline", self.mgmt_api_endpoint);
let resp = self.post(&uri, req).await?;
resp.json().await.map_err(Error::ReceiveBody)
}
pub async fn delete_timeline(
&self,
tenant_id: TenantId,

View File

@@ -235,7 +235,7 @@ impl Storage for FileStorage {
#[cfg(test)]
mod test {
use super::*;
use safekeeper_api::membership::{Configuration, MemberSet, SafekeeperGeneration};
use safekeeper_api::membership::{Configuration, MemberSet};
use tokio::fs;
use utils::lsn::Lsn;
@@ -246,7 +246,7 @@ mod test {
let tempdir = camino_tempfile::tempdir()?;
let mut state = TimelinePersistentState::empty();
state.mconf = Configuration {
generation: SafekeeperGeneration::new(42),
generation: 42,
members: MemberSet::empty(),
new_members: None,
};

View File

@@ -2,7 +2,6 @@ use http_utils::failpoints::failpoints_handler;
use hyper::{Body, Request, Response, StatusCode};
use safekeeper_api::models;
use safekeeper_api::models::AcceptorStateStatus;
use safekeeper_api::models::PullTimelineRequest;
use safekeeper_api::models::SafekeeperStatus;
use safekeeper_api::models::TermSwitchApiEntry;
use safekeeper_api::models::TimelineStatus;
@@ -231,7 +230,7 @@ async fn timeline_delete_handler(mut request: Request<Body>) -> Result<Response<
async fn timeline_pull_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permission(&request, None)?;
let data: PullTimelineRequest = json_request(&mut request).await?;
let data: pull_timeline::Request = json_request(&mut request).await?;
let conf = get_conf(&request);
let global_timelines = get_global_timelines(&request);

View File

@@ -4,13 +4,10 @@ use camino::Utf8PathBuf;
use chrono::{DateTime, Utc};
use futures::{SinkExt, StreamExt, TryStreamExt};
use postgres_ffi::{XLogFileName, XLogSegNo, PG_TLI};
use safekeeper_api::{
models::{PullTimelineRequest, PullTimelineResponse, TimelineStatus},
Term,
};
use safekeeper_api::{models::TimelineStatus, Term};
use safekeeper_client::mgmt_api;
use safekeeper_client::mgmt_api::Client;
use serde::Deserialize;
use serde::{Deserialize, Serialize};
use std::{
cmp::min,
io::{self, ErrorKind},
@@ -36,7 +33,7 @@ use crate::{
};
use utils::{
crashsafe::fsync_async_opt,
id::{NodeId, TenantTimelineId},
id::{NodeId, TenantId, TenantTimelineId, TimelineId},
logging::SecretString,
lsn::Lsn,
pausable_failpoint,
@@ -381,6 +378,21 @@ impl WalResidentTimeline {
}
}
/// pull_timeline request body.
#[derive(Debug, Deserialize)]
pub struct Request {
pub tenant_id: TenantId,
pub timeline_id: TimelineId,
pub http_hosts: Vec<String>,
}
#[derive(Debug, Serialize)]
pub struct Response {
// Donor safekeeper host
pub safekeeper_host: String,
// TODO: add more fields?
}
/// Response for debug dump request.
#[derive(Debug, Deserialize)]
pub struct DebugDumpResponse {
@@ -393,10 +405,10 @@ pub struct DebugDumpResponse {
/// Find the most advanced safekeeper and pull timeline from it.
pub async fn handle_request(
request: PullTimelineRequest,
request: Request,
sk_auth_token: Option<SecretString>,
global_timelines: Arc<GlobalTimelines>,
) -> Result<PullTimelineResponse> {
) -> Result<Response> {
let existing_tli = global_timelines.get(TenantTimelineId::new(
request.tenant_id,
request.timeline_id,
@@ -448,7 +460,7 @@ async fn pull_timeline(
host: String,
sk_auth_token: Option<SecretString>,
global_timelines: Arc<GlobalTimelines>,
) -> Result<PullTimelineResponse> {
) -> Result<Response> {
let ttid = TenantTimelineId::new(status.tenant_id, status.timeline_id);
info!(
"pulling timeline {} from safekeeper {}, commit_lsn={}, flush_lsn={}, term={}, epoch={}",
@@ -523,7 +535,7 @@ async fn pull_timeline(
.load_temp_timeline(ttid, &tli_dir_path, false)
.await?;
Ok(PullTimelineResponse {
Ok(Response {
safekeeper_host: host,
})
}

View File

@@ -1004,7 +1004,7 @@ mod tests {
use postgres_ffi::{XLogSegNo, WAL_SEGMENT_SIZE};
use safekeeper_api::{
membership::{Configuration, MemberSet, SafekeeperGeneration, SafekeeperId},
membership::{Configuration, MemberSet, SafekeeperId},
ServerInfo,
};
@@ -1303,7 +1303,7 @@ mod tests {
tenant_id,
timeline_id,
mconf: Configuration {
generation: SafekeeperGeneration::new(42),
generation: 42,
members: MemberSet::new(vec![SafekeeperId {
id: NodeId(1),
host: "hehe.org".to_owned(),

View File

@@ -516,24 +516,6 @@ async fn handle_tenant_timeline_block_unblock_gc(
json_response(StatusCode::OK, ())
}
async fn handle_tenant_timeline_download_heatmap_layers(
service: Arc<Service>,
req: Request<Body>,
) -> Result<Response<Body>, ApiError> {
let tenant_shard_id: TenantShardId = parse_request_param(&req, "tenant_shard_id")?;
check_permissions(&req, Scope::PageServerApi)?;
let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
let concurrency: Option<usize> = parse_query_param(&req, "concurrency")?;
service
.tenant_timeline_download_heatmap_layers(tenant_shard_id, timeline_id, concurrency)
.await?;
json_response(StatusCode::OK, ())
}
// For metric labels where we would like to include the approximate path, but exclude high-cardinality fields like query parameters
// and tenant/timeline IDs. Since we are proxying to arbitrary paths, we don't have routing templates to
// compare to, so we can just filter out our well known ID format with regexes.
@@ -2096,16 +2078,6 @@ pub fn make_router(
)
},
)
.post(
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_heatmap_layers",
|r| {
tenant_service_handler(
r,
handle_tenant_timeline_download_heatmap_layers,
RequestName("v1_tenant_timeline_download_heatmap_layers"),
)
},
)
// Tenant detail GET passthrough to shard zero:
.get("/v1/tenant/:tenant_id", |r| {
tenant_service_handler(

View File

@@ -280,22 +280,6 @@ impl PageserverClient {
)
}
pub(crate) async fn timeline_download_heatmap_layers(
&self,
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
concurrency: Option<usize>,
) -> Result<()> {
measured_request!(
"download_heatmap_layers",
crate::metrics::Method::Post,
&self.node_id_label,
self.inner
.timeline_download_heatmap_layers(tenant_shard_id, timeline_id, concurrency)
.await
)
}
pub(crate) async fn get_utilization(&self) -> Result<PageserverUtilization> {
measured_request!(
"utilization",

View File

@@ -1,8 +1,5 @@
use crate::metrics::PageserverRequestLabelGroup;
use safekeeper_api::models::{
PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest,
TimelineStatus,
};
use safekeeper_api::models::{SafekeeperUtilization, TimelineCreateRequest, TimelineStatus};
use safekeeper_client::mgmt_api::{Client, Result};
use utils::{
id::{NodeId, TenantId, TimelineId},
@@ -97,19 +94,6 @@ impl SafekeeperClient {
)
}
#[allow(dead_code)]
pub(crate) async fn pull_timeline(
&self,
req: &PullTimelineRequest,
) -> Result<PullTimelineResponse> {
measured_request!(
"pull_timeline",
crate::metrics::Method::Post,
&self.node_id_label,
self.inner.pull_timeline(req).await
)
}
pub(crate) async fn get_utilization(&self) -> Result<SafekeeperUtilization> {
measured_request!(
"utilization",

View File

@@ -162,7 +162,6 @@ enum TenantOperations {
TimelineDetachAncestor,
TimelineGcBlockUnblock,
DropDetached,
DownloadHeatmapLayers,
}
#[derive(Clone, strum_macros::Display)]
@@ -3758,61 +3757,6 @@ impl Service {
Ok(())
}
pub(crate) async fn tenant_timeline_download_heatmap_layers(
&self,
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
concurrency: Option<usize>,
) -> Result<(), ApiError> {
let _tenant_lock = trace_shared_lock(
&self.tenant_op_locks,
tenant_shard_id.tenant_id,
TenantOperations::DownloadHeatmapLayers,
)
.await;
let targets = {
let locked = self.inner.read().unwrap();
let mut targets = Vec::new();
// If the request got an unsharded tenant id, then apply
// the operation to all shards. Otherwise, apply it to a specific shard.
let shards_range = if tenant_shard_id.is_unsharded() {
TenantShardId::tenant_range(tenant_shard_id.tenant_id)
} else {
tenant_shard_id.range()
};
for (tenant_shard_id, shard) in locked.tenants.range(shards_range) {
if let Some(node_id) = shard.intent.get_attached() {
let node = locked
.nodes
.get(node_id)
.expect("Pageservers may not be deleted while referenced");
targets.push((*tenant_shard_id, node.clone()));
}
}
targets
};
self.tenant_for_shards_api(
targets,
|tenant_shard_id, client| async move {
client
.timeline_download_heatmap_layers(tenant_shard_id, timeline_id, concurrency)
.await
},
1,
1,
SHORT_RECONCILE_TIMEOUT,
&self.cancel,
)
.await;
Ok(())
}
/// Helper for concurrently calling a pageserver API on a number of shards, such as timeline creation.
///
/// On success, the returned vector contains exactly the same number of elements as the input `locations`.

View File

@@ -705,7 +705,7 @@ class NeonEnvBuilder:
assert self.version_combination is not None, "version combination must be set"
# Always use a newer version of `neon_local`
(self.mixdir / "neon_local").hardlink_to(self.neon_binpath / "neon_local")
(self.mixdir / "neon_local").symlink_to(self.neon_binpath / "neon_local")
self.neon_local_binpath = self.mixdir
for component, paths in COMPONENT_BINARIES.items():
@@ -716,7 +716,7 @@ class NeonEnvBuilder:
)
for filename in paths:
destination = self.mixdir / filename
destination.hardlink_to(directory / filename)
destination.symlink_to(directory / filename)
self.neon_binpath = self.mixdir
if self.version_combination["compute"] == "old":
@@ -2467,14 +2467,6 @@ class NeonStorageController(MetricsGetter, LogUtils):
response.raise_for_status()
return [TenantShardId.parse(tid) for tid in response.json()["updated"]]
def download_heatmap_layers(self, tenant_shard_id: TenantShardId, timeline_id: TimelineId):
response = self.request(
"POST",
f"{self.api}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers",
headers=self.headers(TokenScope.ADMIN),
)
response.raise_for_status()
def __enter__(self) -> Self:
return self

View File

@@ -64,8 +64,6 @@ VERSIONS_COMBINATIONS = (
# If it is not set or set to a value not equal to "false", LFC is enabled by default.
USE_LFC = os.environ.get("USE_LFC") != "false"
WITH_SANITIZERS = os.environ.get("SANITIZERS") == "enabled"
def subprocess_capture(
capture_dir: Path,

View File

@@ -236,7 +236,9 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder, with_b
wait_until(compaction_finished, timeout=60)
# ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked)
env.pageserver.assert_log_contains("gc_compact_timeline.*picked .* layers for compaction")
env.pageserver.assert_log_contains(
"scheduled_compact_timeline.*picked .* layers for compaction"
)
log.info("Validating at workload end ...")
workload.validate(env.pageserver.id)
@@ -298,8 +300,6 @@ def test_pageserver_gc_compaction_idempotent(
workload.churn_rows(row_count, env.pageserver.id)
env.create_branch("child_branch") # so that we have a retain_lsn
workload.churn_rows(row_count, env.pageserver.id)
env.create_branch("child_branch_2") # so that we have another retain_lsn
workload.churn_rows(row_count, env.pageserver.id)
# compact 3 times if mode is before_restart
n_compactions = 3 if compaction_mode == "before_restart" else 1
ps_http.timeline_compact(
@@ -315,6 +315,10 @@ def test_pageserver_gc_compaction_idempotent(
body={
"scheduled": True,
"sub_compaction": True,
"compact_key_range": {
"start": "000000000000000000000000000000000000",
"end": "030000000000000000000000000000000000",
},
"sub_compaction_max_job_size_mb": 16,
},
)
@@ -332,13 +336,19 @@ def test_pageserver_gc_compaction_idempotent(
body={
"scheduled": True,
"sub_compaction": True,
"compact_key_range": {
"start": "000000000000000000000000000000000000",
"end": "030000000000000000000000000000000000",
},
"sub_compaction_max_job_size_mb": 16,
},
)
wait_until(compaction_finished, timeout=60)
# ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked)
env.pageserver.assert_log_contains("gc_compact_timeline.*picked .* layers for compaction")
env.pageserver.assert_log_contains(
"scheduled_compact_timeline.*picked .* layers for compaction"
)
# ensure we hit the duplicated layer key warning at least once: we did two compactions consecutively,
# and the second one should have hit the duplicated layer key warning.
@@ -456,7 +466,9 @@ def test_pageserver_gc_compaction_interrupt(neon_env_builder: NeonEnvBuilder):
wait_until(compaction_finished, timeout=60)
# ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked)
env.pageserver.assert_log_contains("gc_compact_timeline.*picked .* layers for compaction")
env.pageserver.assert_log_contains(
"scheduled_compact_timeline.*picked .* layers for compaction"
)
log.info("Validating at workload end ...")
workload.validate(env.pageserver.id)

View File

@@ -2,8 +2,6 @@ from __future__ import annotations
import pytest
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.pg_version import PgVersion
from fixtures.utils import WITH_SANITIZERS, run_only_on_postgres
@pytest.mark.parametrize(
@@ -25,20 +23,3 @@ def test_endpoint_crash(neon_env_builder: NeonEnvBuilder, sql_func: str):
endpoint.safe_psql("CREATE EXTENSION neon_test_utils;")
with pytest.raises(Exception, match="This probably means the server terminated abnormally"):
endpoint.safe_psql(f"SELECT {sql_func}();")
@run_only_on_postgres([PgVersion.V17], "Currently, build vith sanitizers is possible with v17 only")
def test_sanitizers(neon_env_builder: NeonEnvBuilder):
"""
Test that undefined behavior leads to endpoint abort with sanitizers enabled
"""
env = neon_env_builder.init_start()
env.create_branch("test_ubsan")
endpoint = env.endpoints.create_start("test_ubsan")
# Test case based on https://www.postgresql.org/message-id/17167-028026e4ca333817@postgresql.org
if not WITH_SANITIZERS:
endpoint.safe_psql("SELECT 1::int4 << 128")
else:
with pytest.raises(Exception, match="This probably means the server terminated abnormally"):
endpoint.safe_psql("SELECT 1::int4 << 128")

View File

@@ -20,6 +20,9 @@ from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind
from fixtures.utils import query_scalar, wait_until
@pytest.mark.skip(
reason="We won't create future layers any more after https://github.com/neondatabase/neon/pull/10548"
)
@pytest.mark.parametrize(
"attach_mode",
["default_generation", "same_generation"],

View File

@@ -974,22 +974,12 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder):
# The new layer map should contain all the layers in the pre-migration one
# and a new in memory layer
after_migration_heatmap_layers_count = len(heatmap_after_migration["timelines"][0]["layers"])
assert (
len(heatmap_before_migration["timelines"][0]["layers"]) + 1
== after_migration_heatmap_layers_count
assert len(heatmap_before_migration["timelines"][0]["layers"]) + 1 == len(
heatmap_after_migration["timelines"][0]["layers"]
)
log.info(f"Heatmap size after cold migration is {after_migration_heatmap_layers_count}")
env.storage_controller.download_heatmap_layers(
TenantShardId(tenant_id, shard_number=0, shard_count=0), timeline_id
log.info(
f'Heatmap size after cold migration is {len(heatmap_after_migration["timelines"][0]["layers"])}'
)
def all_layers_downloaded():
local_layers_count = len(ps_secondary.list_layers(tenant_id, timeline_id))
log.info(f"{local_layers_count=} {after_migration_heatmap_layers_count=}")
assert local_layers_count == after_migration_heatmap_layers_count
wait_until(all_layers_downloaded)
# TODO: Once we have an endpoint for rescuing the cold location, exercise it here.

View File

@@ -1821,7 +1821,7 @@ def test_sharding_gc(
# TODO: remove when https://github.com/neondatabase/neon/issues/10720 is fixed
ps.allowed_errors.extend(
[
".*could not find data for key.*",
".*could not find data for key 020000000000000000000000000000000000.*",
".*could not ingest record.*",
]
)

View File

@@ -318,7 +318,7 @@ def test_scrubber_physical_gc_ancestors(neon_env_builder: NeonEnvBuilder, shard_
# TODO: remove when https://github.com/neondatabase/neon/issues/10720 is fixed
ps.allowed_errors.extend(
[
".*could not find data for key.*",
".*could not find data for key 020000000000000000000000000000000000.*",
".*could not ingest record.*",
]
)

16
vendor/revisions.json vendored
View File

@@ -1,18 +1,18 @@
{
"v17": [
"17.4",
"a8fea8b4be43039f0782347c88a9b9b25f50c9d8"
"17.3",
"4d3a722312b496ff7378156caa6d41c2e70c30e4"
],
"v16": [
"16.8",
"9422247c582e7c1a08a4855d04af0874f8df2f34"
"16.7",
"999cf81b101ead40e597d5cd729458d8200f4537"
],
"v15": [
"15.12",
"81e2eef0616c65c2233c75b06f25766ae4c080c4"
"15.11",
"80ed91ce255c765d25be0bb4a02c942fe6311fbf"
],
"v14": [
"14.17",
"6254ab9b4496c3e481bc037ae69d859bbc2bdd7d"
"14.16",
"62a86dfc91e0c35a72f2ea5e99e6969b830c0c26"
]
}