slightly more efficient

wip prototype
Revert "tests: broaden allow-list for #10720 workaround (#10807 )"
2026-07-04 04:30:38 +00:00 · 2025-02-17 22:43:36 +01:00 · 2025-02-17 22:37:46 +01:00 · 2025-02-17 22:08:03 +01:00
41 changed files with 254 additions and 804 deletions
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -1509,73 +1509,6 @@ WORKDIR /ext-src/pg_repack-src
 RUN make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install

-
-#########################################################################################
-#
-# Layer "pgaudit"
-# compile pgaudit extension
-#
-#########################################################################################
-
-FROM build-deps AS pgaudit-src
-ARG PG_VERSION
-WORKDIR /ext-src
-RUN case "${PG_VERSION}" in \
-    "v14") \
-    export PGAUDIT_VERSION=1.6.2 \
-    export PGAUDIT_CHECKSUM=1f350d70a0cbf488c0f2b485e3a5c9b11f78ad9e3cbb95ef6904afa1eb3187eb \
-    ;; \
-    "v15") \
-    export PGAUDIT_VERSION=1.7.0 \
-    export PGAUDIT_CHECKSUM=8f4a73e451c88c567e516e6cba7dc1e23bc91686bb6f1f77f8f3126d428a8bd8 \
-    ;; \
-    "v16") \
-    export PGAUDIT_VERSION=16.0 \
-    export PGAUDIT_CHECKSUM=d53ef985f2d0b15ba25c512c4ce967dce07b94fd4422c95bd04c4c1a055fe738 \
-    ;; \
-    "v17") \
-    export PGAUDIT_VERSION=17.0 \
-    export PGAUDIT_CHECKSUM=7d0d08d030275d525f36cd48b38c6455f1023da863385badff0cec44965bfd8c \
-    ;; \
-    *) \
-    echo "pgaudit is not supported on this PostgreSQL version" && exit 1;; \
-    esac && \
-    wget https://github.com/pgaudit/pgaudit/archive/refs/tags/${PGAUDIT_VERSION}.tar.gz -O pgaudit.tar.gz && \
-    echo "${PGAUDIT_CHECKSUM} pgaudit.tar.gz" | sha256sum --check && \
-    mkdir pgaudit-src && cd pgaudit-src && tar xzf ../pgaudit.tar.gz --strip-components=1 -C .
-
-FROM pg-build AS pgaudit-build
-COPY --from=pgaudit-src /ext-src/ /ext-src/
-WORKDIR /ext-src/pgaudit-src
-RUN make install USE_PGXS=1 -j $(getconf _NPROCESSORS_ONLN)
-
-#########################################################################################
-#
-# Layer "pgauditlogtofile"
-# compile pgauditlogtofile extension
-#
-#########################################################################################
-
-FROM build-deps AS pgauditlogtofile-src
-ARG PG_VERSION
-WORKDIR /ext-src
-RUN case "${PG_VERSION}" in \
-    "v14" | "v15" | "v16" | "v17") \
-    export PGAUDITLOGTOFILE_VERSION=v1.6.4 \
-    export PGAUDITLOGTOFILE_CHECKSUM=ef801eb09c26aaa935c0dabd92c81eb9ebe338930daa9674d420a280c6bc2d70 \
-    ;; \
-    *) \
-    echo "pgauditlogtofile is not supported on this PostgreSQL version" && exit 1;; \
-    esac && \
-    wget https://github.com/fmbiete/pgauditlogtofile/archive/refs/tags/${PGAUDITLOGTOFILE_VERSION}.tar.gz -O pgauditlogtofile.tar.gz && \
-    echo "${PGAUDITLOGTOFILE_CHECKSUM} pgauditlogtofile.tar.gz" | sha256sum --check && \
-    mkdir pgauditlogtofile-src && cd pgauditlogtofile-src && tar xzf ../pgauditlogtofile.tar.gz --strip-components=1 -C .
-
-FROM pg-build AS pgauditlogtofile-build
-COPY --from=pgauditlogtofile-src /ext-src/ /ext-src/
-WORKDIR /ext-src/pgauditlogtofile-src
-RUN make install USE_PGXS=1 -j $(getconf _NPROCESSORS_ONLN)
-
 #########################################################################################
 #
 # Layer "neon-ext-build"
@@ -1671,8 +1604,6 @@ COPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_duckdb-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_repack-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pgaudit-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pgauditlogtofile-build /usr/local/pgsql/ /usr/local/pgsql/

 #########################################################################################
 #
--- a/compute_tools/src/bin/fast_import.rs
+++ b/compute_tools/src/bin/fast_import.rs
@@ -361,14 +361,6 @@ async fn run_dump_restore(
            // how we run it
            .env_clear()
            .env("LD_LIBRARY_PATH", &pg_lib_dir)
-            .env(
-                "ASAN_OPTIONS",
-                std::env::var("ASAN_OPTIONS").unwrap_or_default(),
-            )
-            .env(
-                "UBSAN_OPTIONS",
-                std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
-            )
            .kill_on_drop(true)
            .stdout(std::process::Stdio::piped())
            .stderr(std::process::Stdio::piped())
@@ -402,14 +394,6 @@ async fn run_dump_restore(
            // how we run it
            .env_clear()
            .env("LD_LIBRARY_PATH", &pg_lib_dir)
-            .env(
-                "ASAN_OPTIONS",
-                std::env::var("ASAN_OPTIONS").unwrap_or_default(),
-            )
-            .env(
-                "UBSAN_OPTIONS",
-                std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
-            )
            .kill_on_drop(true)
            .stdout(std::process::Stdio::piped())
            .stderr(std::process::Stdio::piped())
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -22,7 +22,7 @@ use pageserver_api::{
 };
 use pageserver_client::mgmt_api::{self};
 use reqwest::{Method, StatusCode, Url};
-use utils::id::{NodeId, TenantId, TimelineId};
+use utils::id::{NodeId, TenantId};

 use pageserver_api::controller_api::{
    NodeConfigureRequest, NodeRegisterRequest, NodeSchedulingPolicy, PlacementPolicy,
@@ -239,19 +239,6 @@ enum Command {
        #[arg(long)]
        scheduling_policy: SkSchedulingPolicyArg,
    },
-    /// Downloads any missing heatmap layers for all shard for a given timeline
-    DownloadHeatmapLayers {
-        /// Tenant ID or tenant shard ID. When an unsharded tenant ID is specified,
-        /// the operation is performed on all shards. When a sharded tenant ID is
-        /// specified, the operation is only performed on the specified shard.
-        #[arg(long)]
-        tenant_shard_id: TenantShardId,
-        #[arg(long)]
-        timeline_id: TimelineId,
-        /// Optional: Maximum download concurrency (default is 16)
-        #[arg(long)]
-        concurrency: Option<usize>,
-    },
 }

 #[derive(Parser)]
@@ -1260,24 +1247,6 @@ async fn main() -> anyhow::Result<()> {
                String::from(scheduling_policy)
            );
        }
-        Command::DownloadHeatmapLayers {
-            tenant_shard_id,
-            timeline_id,
-            concurrency,
-        } => {
-            let mut path = format!(
-                "/v1/tenant/{}/timeline/{}/download_heatmap_layers",
-                tenant_shard_id, timeline_id,
-            );
-
-            if let Some(c) = concurrency {
-                path = format!("{path}?concurrency={c}");
-            }
-
-            storcon_client
-                .dispatch::<(), ()>(Method::POST, path, None)
-                .await?;
-        }
    }

    Ok(())
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -1080,7 +1080,8 @@ pub struct TenantInfo {

    /// Opaque explanation if gc is being blocked.
    ///
-    /// Only looked up for the individual tenant detail, not the listing.
+    /// Only looked up for the individual tenant detail, not the listing. This is purely for
+    /// debugging, not included in openapi.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub gc_blocking: Option<String>,
 }
--- a/libs/safekeeper_api/src/membership.rs
+++ b/libs/safekeeper_api/src/membership.rs
@@ -9,43 +9,13 @@ use anyhow::bail;
 use serde::{Deserialize, Serialize};
 use utils::id::NodeId;

-/// 1 is the first valid generation, 0 is used as
-/// a placeholder before we fully migrate to generations.
-pub const INVALID_GENERATION: SafekeeperGeneration = SafekeeperGeneration::new(0);
-pub const INITIAL_GENERATION: SafekeeperGeneration = SafekeeperGeneration::new(1);
-
 /// Number uniquely identifying safekeeper configuration.
 /// Note: it is a part of sk control file.
-///
-/// Like tenant generations, but for safekeepers.
-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
-pub struct SafekeeperGeneration(u32);
-
-impl SafekeeperGeneration {
-    pub const fn new(v: u32) -> Self {
-        Self(v)
-    }
-
-    #[track_caller]
-    pub fn previous(&self) -> Option<Self> {
-        Some(Self(self.0.checked_sub(1)?))
-    }
-
-    #[track_caller]
-    pub fn next(&self) -> Self {
-        Self(self.0 + 1)
-    }
-
-    pub fn into_inner(self) -> u32 {
-        self.0
-    }
-}
-
-impl Display for SafekeeperGeneration {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}", self.0)
-    }
-}
+pub type Generation = u32;
+/// 1 is the first valid generation, 0 is used as
+/// a placeholder before we fully migrate to generations.
+pub const INVALID_GENERATION: Generation = 0;
+pub const INITIAL_GENERATION: Generation = 1;

 /// Membership is defined by ids so e.g. walproposer uses them to figure out
 /// quorums, but we also carry host and port to give wp idea where to connect.
@@ -119,7 +89,7 @@ impl Display for MemberSet {
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 pub struct Configuration {
    /// Unique id.
-    pub generation: SafekeeperGeneration,
+    pub generation: Generation,
    /// Current members of the configuration.
    pub members: MemberSet,
    /// Some means it is a joint conf.
--- a/libs/safekeeper_api/src/models.rs
+++ b/libs/safekeeper_api/src/models.rs
@@ -282,18 +282,3 @@ pub struct TimelineTermBumpResponse {
 pub struct SafekeeperUtilization {
    pub timeline_count: u64,
 }
-
-/// pull_timeline request body.
-#[derive(Debug, Deserialize, Serialize)]
-pub struct PullTimelineRequest {
-    pub tenant_id: TenantId,
-    pub timeline_id: TimelineId,
-    pub http_hosts: Vec<String>,
-}
-
-#[derive(Debug, Serialize, Deserialize)]
-pub struct PullTimelineResponse {
-    // Donor safekeeper host
-    pub safekeeper_host: String,
-    // TODO: add more fields?
-}
--- a/libs/utils/src/bin_ser.rs
+++ b/libs/utils/src/bin_ser.rs
@@ -286,11 +286,6 @@ mod tests {
    const SHORT2_ENC_LE: &[u8] = &[8, 0, 0, 3, 7];
    const SHORT2_ENC_LE_TRAILING: &[u8] = &[8, 0, 0, 3, 7, 0xff, 0xff, 0xff];

-    #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
-    struct NewTypeStruct(u32);
-    const NT1: NewTypeStruct = NewTypeStruct(414243);
-    const NT1_INNER: u32 = 414243;
-
    #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
    pub struct LongMsg {
        pub tag: u8,
@@ -413,42 +408,4 @@ mod tests {
        let msg2 = LongMsg::des(&encoded).unwrap();
        assert_eq!(msg, msg2);
    }
-
-    #[test]
-    /// Ensure that newtype wrappers around u32 don't change the serialization format
-    fn be_nt() {
-        use super::BeSer;
-
-        assert_eq!(NT1.serialized_size().unwrap(), 4);
-
-        let msg = NT1;
-
-        let encoded = msg.ser().unwrap();
-        let expected = hex_literal::hex!("0006 5223");
-        assert_eq!(encoded, expected);
-
-        assert_eq!(encoded, NT1_INNER.ser().unwrap());
-
-        let msg2 = NewTypeStruct::des(&encoded).unwrap();
-        assert_eq!(msg, msg2);
-    }
-
-    #[test]
-    /// Ensure that newtype wrappers around u32 don't change the serialization format
-    fn le_nt() {
-        use super::LeSer;
-
-        assert_eq!(NT1.serialized_size().unwrap(), 4);
-
-        let msg = NT1;
-
-        let encoded = msg.ser().unwrap();
-        let expected = hex_literal::hex!("2352 0600");
-        assert_eq!(encoded, expected);
-
-        assert_eq!(encoded, NT1_INNER.ser().unwrap());
-
-        let msg2 = NewTypeStruct::des(&encoded).unwrap();
-        assert_eq!(msg, msg2);
-    }
 }
--- a/libs/utils/src/shard.rs
+++ b/libs/utils/src/shard.rs
@@ -117,10 +117,6 @@ impl TenantShardId {
        )
    }

-    pub fn range(&self) -> RangeInclusive<Self> {
-        RangeInclusive::new(*self, *self)
-    }
-
    pub fn shard_slug(&self) -> impl std::fmt::Display + '_ {
        ShardSlug(self)
    }
--- a/pageserver/client/src/mgmt_api.rs
+++ b/pageserver/client/src/mgmt_api.rs
@@ -477,26 +477,6 @@ impl Client {
        self.request(Method::POST, &uri, ()).await.map(|_| ())
    }

-    pub async fn timeline_download_heatmap_layers(
-        &self,
-        tenant_shard_id: TenantShardId,
-        timeline_id: TimelineId,
-        concurrency: Option<usize>,
-    ) -> Result<()> {
-        let mut path = reqwest::Url::parse(&format!(
-            "{}/v1/tenant/{}/timeline/{}/download_heatmap_layers",
-            self.mgmt_api_endpoint, tenant_shard_id, timeline_id
-        ))
-        .expect("Cannot build URL");
-
-        if let Some(concurrency) = concurrency {
-            path.query_pairs_mut()
-                .append_pair("concurrency", &format!("{}", concurrency));
-        }
-
-        self.request(Method::POST, path, ()).await.map(|_| ())
-    }
-
    pub async fn tenant_reset(&self, tenant_shard_id: TenantShardId) -> Result<()> {
        let uri = format!(
            "{}/v1/tenant/{}/reset",
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -824,38 +824,6 @@ paths:
              schema:
                $ref: "#/components/schemas/TenantConfigResponse"

-  /v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers:
-    parameters:
-      - name: tenant_shard_id
-        in: path
-        required: true
-        schema:
-          type: string
-      - name: timeline_id
-        in: path
-        required: true
-        schema:
-          type: string
-      - name: concurrency
-        description: Maximum number of concurrent downloads (capped at remote storage concurrency)
-        in: query
-        required: false
-        schema:
-          type: integer
-    post:
-      description: |
-        Download all layers in the specified timeline's heatmap. The `tenant_shard_id` parameter
-        may be used to target all shards of a tenant when the unsharded form is used, or a specific
-        tenant shard with the sharded form.
-      responses:
-        "200":
-          description: Success
-    delete:
-      description: Stop any on-going background downloads of heatmap layers for the specified timeline.
-      responses:
-        "200":
-          description: Success
-
  /v1/utilization:
    get:
      description: |
@@ -914,8 +882,6 @@ components:
              properties:
                reason:
                  type: string
-        gc_blocking:
-          type: string

    TenantCreateRequest:
      allOf:
@@ -1117,9 +1083,6 @@ components:
        min_readable_lsn:
          type: string
          format: hex
-        latest_gc_cutoff_lsn:
-          type: string
-          format: hex
        applied_gc_cutoff_lsn:
          type: string
          format: hex
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -1463,59 +1463,6 @@ async fn timeline_layer_scan_disposable_keys(
    )
 }

-async fn timeline_download_heatmap_layers_handler(
-    request: Request<Body>,
-    _cancel: CancellationToken,
-) -> Result<Response<Body>, ApiError> {
-    // Only used in the case where remote storage is not configured.
-    const DEFAULT_MAX_CONCURRENCY: usize = 100;
-    // A conservative default.
-    const DEFAULT_CONCURRENCY: usize = 16;
-
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
-    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-
-    let desired_concurrency =
-        parse_query_param(&request, "concurrency")?.unwrap_or(DEFAULT_CONCURRENCY);
-
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
-
-    let state = get_state(&request);
-    let timeline =
-        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
-            .await?;
-
-    let max_concurrency = get_config(&request)
-        .remote_storage_config
-        .as_ref()
-        .map(|c| c.concurrency_limit())
-        .unwrap_or(DEFAULT_MAX_CONCURRENCY);
-    let concurrency = std::cmp::min(max_concurrency, desired_concurrency);
-
-    timeline.start_heatmap_layers_download(concurrency).await?;
-
-    json_response(StatusCode::ACCEPTED, ())
-}
-
-async fn timeline_shutdown_download_heatmap_layers_handler(
-    request: Request<Body>,
-    _cancel: CancellationToken,
-) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
-    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
-
-    let state = get_state(&request);
-    let timeline =
-        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
-            .await?;
-
-    timeline.stop_and_drain_heatmap_layers_download().await;
-
-    json_response(StatusCode::OK, ())
-}
-
 async fn layer_download_handler(
    request: Request<Body>,
    _cancel: CancellationToken,
@@ -3679,14 +3626,6 @@ pub fn make_router(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer",
            |r| api_handler(r, layer_map_info_handler),
        )
-        .post(
-            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_heatmap_layers",
-            |r| api_handler(r, timeline_download_heatmap_layers_handler),
-        )
-        .delete(
-            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_heatmap_layers",
-            |r| api_handler(r, timeline_shutdown_download_heatmap_layers_handler),
-        )
        .get(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_file_name",
            |r| api_handler(r, layer_download_handler),
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -1799,13 +1799,6 @@ impl PageServerHandler {
                .as_millis()
                .to_string()
        });
-
-        info!(
-            "acquired lease for {} until {}",
-            lsn,
-            valid_until_str.as_deref().unwrap_or("<unknown>")
-        );
-
        let bytes = valid_until_str.as_ref().map(|x| x.as_bytes());

        pgb.write_message_noflush(&BeMessage::RowDescription(&[RowDescriptor::text_col(
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -3101,9 +3101,6 @@ impl Tenant {
                if let Some(queue) = queue {
                    outcome = queue
                        .iteration(cancel, ctx, &self.gc_block, &timeline)
-                        .instrument(
-                            info_span!("gc_compact_timeline", timeline_id = %timeline.timeline_id),
-                        )
                        .await?;
                }
            }
@@ -7849,6 +7846,18 @@ mod tests {
            }

            tline.freeze_and_flush().await?;
+            // Force layers to L1
+            tline
+                .compact(
+                    &cancel,
+                    {
+                        let mut flags = EnumSet::new();
+                        flags.insert(CompactFlags::ForceL0Compaction);
+                        flags
+                    },
+                    &ctx,
+                )
+                .await?;

            if iter % 5 == 0 {
                let (_, before_delta_file_accessed) =
@@ -7861,6 +7870,7 @@ mod tests {
                            let mut flags = EnumSet::new();
                            flags.insert(CompactFlags::ForceImageLayerCreation);
                            flags.insert(CompactFlags::ForceRepartition);
+                            flags.insert(CompactFlags::ForceL0Compaction);
                            flags
                        },
                        &ctx,
@@ -8307,6 +8317,8 @@ mod tests {

        let cancel = CancellationToken::new();

+        // Image layer creation happens on the disk_consistent_lsn so we need to force set it now.
+        tline.force_set_disk_consistent_lsn(Lsn(0x40));
        tline
            .compact(
                &cancel,
@@ -8320,8 +8332,7 @@ mod tests {
            )
            .await
            .unwrap();
-
-        // Image layers are created at last_record_lsn
+        // Image layers are created at repartition LSN
        let images = tline
            .inspect_image_layers(Lsn(0x40), &ctx, io_concurrency.clone())
            .await
--- a/pageserver/src/tenant/layer_map.rs
+++ b/pageserver/src/tenant/layer_map.rs
@@ -570,8 +570,12 @@ impl LayerMap {
        self.historic.iter()
    }

+    pub fn riter_historic_layers(&self) -> impl '_ + Iterator<Item = Arc<PersistentLayerDesc>> {
+        self.historic.riter()
+    }
+
    /// Get a ref counted pointer for the first in memory layer that matches the provided predicate.
-    pub fn find_in_memory_layer<Pred>(&self, mut pred: Pred) -> Option<Arc<InMemoryLayer>>
+    pub(crate) fn find_in_memory_layer<Pred>(&self, mut pred: Pred) -> Option<Arc<InMemoryLayer>>
    where
        Pred: FnMut(&Arc<InMemoryLayer>) -> bool,
    {
@@ -900,6 +904,24 @@ impl LayerMap {
        Ok(())
    }

+    /// Efficiency: this is a single btreemap walk to the end of the map in the common case where
+    /// we are queried for image layers after the start of an ephemeral layer.  In the general case
+    /// where we are called with some arbitrary LSN, this function is O(N) -- so don't use it like that.
+    pub(crate) fn get_newest_image_after(&self, lsn: Lsn) -> Option<Arc<PersistentLayerDesc>> {
+        // TODO: an efficient equivalent, this is a crude placeholder
+        for layer in self.riter_historic_layers() {
+            if !layer.is_delta() && layer.image_layer_lsn() >= lsn {
+                return Some(layer);
+            }
+
+            if layer.lsn_range.start < lsn {
+                // We are past the layers that could possibly intersect with the requested bound
+                break;
+            }
+        }
+        None
+    }
+
    /// `read_points` represent the tip of a timeline and any branch points, i.e. the places
    /// where we expect to serve reads.
    ///
--- a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs
+++ b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs
@@ -509,6 +509,18 @@ impl<Value: Clone> BufferedHistoricLayerCoverage<Value> {
        self.layers.values().cloned()
    }

+    /// Iterate all the layers in reverse order (newest LSNs first)
+    pub fn riter(&self) -> impl '_ + Iterator<Item = Value> {
+        // NOTE we can actually perform this without rebuilding,
+        //      but it's not necessary for now.
+        if !self.buffer.is_empty() {
+            panic!("rebuild pls")
+        }
+
+        // TODO: is cloned() really needed?
+        self.layers.values().rev().cloned()
+    }
+
    /// Return a reference to a queryable map, assuming all updates
    /// have already been processed using self.rebuild()
    pub fn get(&self) -> anyhow::Result<&HistoricLayerCoverage<Value>> {
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -4,7 +4,6 @@ pub mod delete;
 pub(crate) mod detach_ancestor;
 mod eviction_task;
 pub(crate) mod handle;
-mod heatmap_layers_downloader;
 pub(crate) mod import_pgdata;
 mod init;
 pub mod layer_manager;
@@ -468,10 +467,6 @@ pub struct Timeline {
    pub(crate) page_trace: ArcSwapOption<Sender<PageTraceEvent>>,

    previous_heatmap: ArcSwapOption<PreviousHeatmap>,
-
-    /// May host a background Tokio task which downloads all the layers from the current
-    /// heatmap on demand.
-    heatmap_layers_downloader: Mutex<Option<heatmap_layers_downloader::HeatmapLayersDownloader>>,
 }

 pub(crate) enum PreviousHeatmap {
@@ -2044,11 +2039,6 @@ impl Timeline {
        tracing::debug!("Cancelling CancellationToken");
        self.cancel.cancel();

-        // If we have a background task downloading heatmap layers stop it.
-        // The background downloads are sensitive to timeline cancellation (done above),
-        // so the drain will be immediate.
-        self.stop_and_drain_heatmap_layers_download().await;
-
        // Ensure Prevent new page service requests from starting.
        self.handles.shutdown();

@@ -2762,8 +2752,6 @@ impl Timeline {
                page_trace: Default::default(),

                previous_heatmap: ArcSwapOption::from_pointee(previous_heatmap),
-
-                heatmap_layers_downloader: Mutex::new(None),
            };

            result.repartition_threshold =
@@ -3787,6 +3775,8 @@ impl Timeline {
        let mut completed_keyspace = KeySpace::default();
        let mut image_covered_keyspace = KeySpaceRandomAccum::new();

+        let mut in_memory_layers_considered = Vec::new();
+
        // Prevent GC from progressing while visiting the current timeline.
        // If we are GC-ing because a new image layer was added while traversing
        // the timeline, then it will remove layers that are required for fulfilling
@@ -3822,12 +3812,34 @@ impl Timeline {

                let in_memory_layer = layers.find_in_memory_layer(|l| {
                    let start_lsn = l.get_lsn_range().start;
-                    cont_lsn > start_lsn
+                    !in_memory_layers_considered.contains(&start_lsn) && cont_lsn > start_lsn
                });

                match in_memory_layer {
                    Some(l) => {
-                        let lsn_range = l.get_lsn_range().start..cont_lsn;
+                        in_memory_layers_considered.push(l.get_lsn_range().start);
+
+                        // Search for image layers that overlap with the in-memory layer: this is rare but permitted, and
+                        // we must bound the `lsn_range` of this layer to avoid skipping past the image layer.
+                        // TODO: a narrower search that only hits on image layers matching `unmapped_keyspace`
+                        let lsn_range = if let Some(image) =
+                            layers.get_newest_image_after(l.get_lsn_range().start)
+                        {
+                            // Note that this does not guarantee serving a read from an image layer, just that we will
+                            // not skip considering thge image layer in our Fringe.  We can still end up doing walredo work
+                            // in spite of the presence of an image layer, if the inmemory layers we visit contain enough
+                            // information to fully construct a page.  For example:
+                            //  - ephemeral layer contains I1, D1, D2, <LSN X>
+                            //  - image layer at LSN X contains image equal to I2
+                            //  - we will end up doing a walredo of I1+D1+D2, rather than reading from the image layer
+                            //
+                            //  This is not a problem for correctness, and is rare enough that the wasted time doing walredo
+                            //  doesn't matter.
+                            image.get_lsn_range().start + 1..cont_lsn
+                        } else {
+                            l.get_lsn_range().start..cont_lsn
+                        };
+
                        fringe.update(
                            ReadableLayer::InMemoryLayer(l),
                            unmapped_keyspace.clone(),
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -301,12 +301,18 @@ impl GcCompactionQueue {
                        let mut guard = self.inner.lock().unwrap();
                        guard.gc_guards.insert(id, gc_guard);
                    }
-                    let _ = timeline.compact_with_options(cancel, options, ctx).await?;
+                    let _ = timeline
+                        .compact_with_options(cancel, options, ctx)
+                        .instrument(info_span!("scheduled_compact_timeline", %timeline.timeline_id))
+                        .await?;
                    self.notify_and_unblock(id);
                }
            }
            GcCompactionQueueItem::SubCompactionJob(options) => {
-                let _ = timeline.compact_with_options(cancel, options, ctx).await?;
+                let _ = timeline
+                    .compact_with_options(cancel, options, ctx)
+                    .instrument(info_span!("scheduled_compact_timeline", %timeline.timeline_id))
+                    .await?;
            }
            GcCompactionQueueItem::Notify(id) => {
                self.notify_and_unblock(id);
@@ -686,6 +692,21 @@ impl Timeline {

        // Define partitioning schema if needed

+        let l0_l1_boundary_lsn = {
+            // We do the repartition on the L0-L1 boundary. All data below the boundary
+            // are compacted by L0 with low read amplification, thus making the `repartition`
+            // function run fast.
+            let guard = self.layers.read().await;
+            let l0_min_lsn = guard
+                .layer_map()?
+                .level0_deltas()
+                .iter()
+                .map(|l| l.get_lsn_range().start)
+                .min()
+                .unwrap_or(self.get_disk_consistent_lsn());
+            l0_min_lsn.max(self.get_ancestor_lsn())
+        };
+
        // 1. L0 Compact
        let l0_outcome = {
            let timer = self.metrics.compact_time_histo.start_timer();
@@ -712,79 +733,86 @@ impl Timeline {
            return Ok(CompactionOutcome::YieldForL0);
        }

-        // 2. Repartition and create image layers if necessary
-        match self
-            .repartition(
-                self.get_last_record_lsn(),
-                self.get_compaction_target_size(),
-                options.flags,
-                ctx,
-            )
-            .await
-        {
-            Ok(((dense_partitioning, sparse_partitioning), lsn)) => {
-                // Disables access_stats updates, so that the files we read remain candidates for eviction after we're done with them
-                let image_ctx = RequestContextBuilder::extend(ctx)
-                    .access_stats_behavior(AccessStatsBehavior::Skip)
-                    .build();
+        if l0_l1_boundary_lsn < self.partitioning.read().1 {
+            // We never go backwards when repartition and create image layers.
+            info!("skipping image layer generation because repartition LSN is greater than L0-L1 boundary LSN.");
+        } else {
+            // 2. Repartition and create image layers if necessary
+            match self
+                .repartition(
+                    l0_l1_boundary_lsn,
+                    self.get_compaction_target_size(),
+                    options.flags,
+                    ctx,
+                )
+                .await
+            {
+                Ok(((dense_partitioning, sparse_partitioning), lsn)) => {
+                    // Disables access_stats updates, so that the files we read remain candidates for eviction after we're done with them
+                    let image_ctx = RequestContextBuilder::extend(ctx)
+                        .access_stats_behavior(AccessStatsBehavior::Skip)
+                        .build();

-                let mut partitioning = dense_partitioning;
-                partitioning
-                    .parts
-                    .extend(sparse_partitioning.into_dense().parts);
+                    let mut partitioning = dense_partitioning;
+                    partitioning
+                        .parts
+                        .extend(sparse_partitioning.into_dense().parts);

-                // 3. Create new image layers for partitions that have been modified "enough".
-                let (image_layers, outcome) = self
-                    .create_image_layers(
-                        &partitioning,
-                        lsn,
-                        if options
-                            .flags
-                            .contains(CompactFlags::ForceImageLayerCreation)
-                        {
-                            ImageLayerCreationMode::Force
-                        } else {
-                            ImageLayerCreationMode::Try
-                        },
-                        &image_ctx,
-                        self.last_image_layer_creation_status
-                            .load()
-                            .as_ref()
-                            .clone(),
-                        !options.flags.contains(CompactFlags::NoYield),
-                    )
-                    .await
-                    .inspect_err(|err| {
-                        if let CreateImageLayersError::GetVectoredError(
-                            GetVectoredError::MissingKey(_),
-                        ) = err
-                        {
-                            critical!("missing key during compaction: {err:?}");
-                        }
-                    })?;
+                    // 3. Create new image layers for partitions that have been modified "enough".
+                    let (image_layers, outcome) = self
+                        .create_image_layers(
+                            &partitioning,
+                            lsn,
+                            if options
+                                .flags
+                                .contains(CompactFlags::ForceImageLayerCreation)
+                            {
+                                ImageLayerCreationMode::Force
+                            } else {
+                                ImageLayerCreationMode::Try
+                            },
+                            &image_ctx,
+                            self.last_image_layer_creation_status
+                                .load()
+                                .as_ref()
+                                .clone(),
+                            !options.flags.contains(CompactFlags::NoYield),
+                        )
+                        .await
+                        .inspect_err(|err| {
+                            if let CreateImageLayersError::GetVectoredError(
+                                GetVectoredError::MissingKey(_),
+                            ) = err
+                            {
+                                critical!("missing key during compaction: {err:?}");
+                            }
+                        })?;

-                self.last_image_layer_creation_status
-                    .store(Arc::new(outcome.clone()));
+                    self.last_image_layer_creation_status
+                        .store(Arc::new(outcome.clone()));

-                self.upload_new_image_layers(image_layers)?;
-                if let LastImageLayerCreationStatus::Incomplete { .. } = outcome {
-                    // Yield and do not do any other kind of compaction.
-                    info!("skipping shard ancestor compaction due to pending image layer generation tasks (preempted by L0 compaction).");
-                    return Ok(CompactionOutcome::YieldForL0);
+                    self.upload_new_image_layers(image_layers)?;
+                    if let LastImageLayerCreationStatus::Incomplete { .. } = outcome {
+                        // Yield and do not do any other kind of compaction.
+                        info!("skipping shard ancestor compaction due to pending image layer generation tasks (preempted by L0 compaction).");
+                        return Ok(CompactionOutcome::YieldForL0);
+                    }
                }
-            }
-            Err(err) => {
-                // no partitioning? This is normal, if the timeline was just created
-                // as an empty timeline. Also in unit tests, when we use the timeline
-                // as a simple key-value store, ignoring the datadir layout. Log the
-                // error but continue.
-                //
-                // Suppress error when it's due to cancellation
-                if !self.cancel.is_cancelled() && !err.is_cancelled() {
-                    tracing::error!("could not compact, repartitioning keyspace failed: {err:?}");
+                Err(err) => {
+                    // no partitioning? This is normal, if the timeline was just created
+                    // as an empty timeline. Also in unit tests, when we use the timeline
+                    // as a simple key-value store, ignoring the datadir layout. Log the
+                    // error but continue.
+                    //
+                    // Suppress error when it's due to cancellation
+                    if !self.cancel.is_cancelled() && !err.is_cancelled() {
+                        tracing::error!(
+                            "could not compact, repartitioning keyspace failed: {err:?}"
+                        );
+                    }
                }
-            }
-        };
+            };
+        }

        let partition_count = self.partitioning.read().0 .0.parts.len();

--- a/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs
+++ b/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs
@@ -1,162 +0,0 @@
-//! Timeline utility module to hydrate everything from the current heatmap.
-//!
-//! Provides utilities to spawn and abort a background task where the downloads happen.
-//! See /v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_heatmap_layers.
-
-use futures::StreamExt;
-use http_utils::error::ApiError;
-use std::sync::{Arc, Mutex};
-use tokio_util::sync::CancellationToken;
-use utils::sync::gate::Gate;
-
-use super::Timeline;
-
-// This status is not strictly necessary now, but gives us a nice place
-// to store progress information if we ever wish to expose it.
-pub(super) enum HeatmapLayersDownloadStatus {
-    InProgress,
-    Complete,
-}
-
-pub(super) struct HeatmapLayersDownloader {
-    handle: tokio::task::JoinHandle<()>,
-    status: Arc<Mutex<HeatmapLayersDownloadStatus>>,
-    cancel: CancellationToken,
-    downloads_guard: Arc<Gate>,
-}
-
-impl HeatmapLayersDownloader {
-    fn new(
-        timeline: Arc<Timeline>,
-        concurrency: usize,
-    ) -> Result<HeatmapLayersDownloader, ApiError> {
-        let tl_guard = timeline.gate.enter().map_err(|_| ApiError::Cancelled)?;
-
-        let cancel = timeline.cancel.child_token();
-        let downloads_guard = Arc::new(Gate::default());
-
-        let status = Arc::new(Mutex::new(HeatmapLayersDownloadStatus::InProgress));
-
-        let handle = tokio::task::spawn({
-            let status = status.clone();
-            let downloads_guard = downloads_guard.clone();
-            let cancel = cancel.clone();
-
-            async move {
-                let _guard = tl_guard;
-
-                scopeguard::defer! {
-                    *status.lock().unwrap() = HeatmapLayersDownloadStatus::Complete;
-                }
-
-                let Some(heatmap) = timeline.generate_heatmap().await else {
-                    tracing::info!("Heatmap layers download failed to generate heatmap");
-                    return;
-                };
-
-                tracing::info!(
-                    resident_size=%timeline.resident_physical_size(),
-                    heatmap_layers=%heatmap.layers.len(),
-                    "Starting heatmap layers download"
-                );
-
-                let stream = futures::stream::iter(heatmap.layers.into_iter().filter_map(
-                    |layer| {
-                        let tl = timeline.clone();
-                        let dl_guard = match downloads_guard.enter() {
-                            Ok(g) => g,
-                            Err(_) => {
-                                // [`Self::shutdown`] was called. Don't spawn any more downloads.
-                                return None;
-                            }
-                        };
-
-                        Some(async move {
-                            let _dl_guard = dl_guard;
-
-                            let res = tl.download_layer(&layer.name).await;
-                            if let Err(err) = res {
-                                if !err.is_cancelled() {
-                                    tracing::warn!(layer=%layer.name,"Failed to download heatmap layer: {err}")
-                                }
-                            }
-                        })
-                    }
-                )).buffered(concurrency);
-
-                tokio::select! {
-                    _ = stream.collect::<()>() => {
-                        tracing::info!(
-                            resident_size=%timeline.resident_physical_size(),
-                            "Heatmap layers download completed"
-                        );
-                    },
-                    _ = cancel.cancelled() => {
-                        tracing::info!("Heatmap layers download cancelled");
-                    }
-                }
-            }
-        });
-
-        Ok(Self {
-            status,
-            handle,
-            cancel,
-            downloads_guard,
-        })
-    }
-
-    fn is_complete(&self) -> bool {
-        matches!(
-            *self.status.lock().unwrap(),
-            HeatmapLayersDownloadStatus::Complete
-        )
-    }
-
-    /// Drive any in-progress downloads to completion and stop spawning any new ones.
-    ///
-    /// This has two callers and they behave differently
-    /// 1. [`Timeline::shutdown`]: the drain will be immediate since downloads themselves
-    ///    are sensitive to timeline cancellation.
-    ///
-    /// 2. Endpoint handler in [`crate::http::routes`]: the drain will wait for any in-progress
-    ///    downloads to complete.
-    async fn stop_and_drain(self) {
-        // Counterintuitive: close the guard before cancelling.
-        // Something needs to poll the already created download futures to completion.
-        // If we cancel first, then the underlying task exits and we lost
-        // the poller.
-        self.downloads_guard.close().await;
-        self.cancel.cancel();
-        if let Err(err) = self.handle.await {
-            tracing::warn!("Failed to join heatmap layer downloader task: {err}");
-        }
-    }
-}
-
-impl Timeline {
-    pub(crate) async fn start_heatmap_layers_download(
-        self: &Arc<Self>,
-        concurrency: usize,
-    ) -> Result<(), ApiError> {
-        let mut locked = self.heatmap_layers_downloader.lock().unwrap();
-        if locked.as_ref().map(|dl| dl.is_complete()).unwrap_or(true) {
-            let dl = HeatmapLayersDownloader::new(self.clone(), concurrency)?;
-            *locked = Some(dl);
-            Ok(())
-        } else {
-            Err(ApiError::Conflict("Already running".to_string()))
-        }
-    }
-
-    pub(crate) async fn stop_and_drain_heatmap_layers_download(&self) {
-        // This can race with the start of a new downloader and lead to a situation
-        // where one donloader is shutting down and another one is in-flight.
-        // The only impact is that we'd end up using more remote storage semaphore
-        // units than expected.
-        let downloader = self.heatmap_layers_downloader.lock().unwrap().take();
-        if let Some(dl) = downloader {
-            dl.stop_and_drain().await;
-        }
-    }
-}
--- a/proxy/src/serverless/local_conn_pool.rs
+++ b/proxy/src/serverless/local_conn_pool.rs
@@ -279,12 +279,9 @@ impl ClientInnerCommon<postgres_client::Client> {
            local_data.jti += 1;
            let token = resign_jwt(&local_data.key, payload, local_data.jti)?;

-            // discard all cannot run in a transaction. must be executed alone.
-            self.inner.batch_execute("discard all").await?;
-
            // initiates the auth session
            // this is safe from query injections as the jwt format free of any escape characters.
-            let query = format!("select auth.jwt_session_init('{token}')");
+            let query = format!("discard all; select auth.jwt_session_init('{token}')");
            self.inner.batch_execute(&query).await?;

            let pid = self.inner.get_process_id();
--- a/safekeeper/client/src/mgmt_api.rs
+++ b/safekeeper/client/src/mgmt_api.rs
@@ -5,10 +5,7 @@

 use http_utils::error::HttpErrorBody;
 use reqwest::{IntoUrl, Method, StatusCode};
-use safekeeper_api::models::{
-    PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest,
-    TimelineStatus,
-};
+use safekeeper_api::models::{SafekeeperUtilization, TimelineCreateRequest, TimelineStatus};
 use std::error::Error as _;
 use utils::{
    id::{NodeId, TenantId, TimelineId},
@@ -91,12 +88,6 @@ impl Client {
        resp.json().await.map_err(Error::ReceiveBody)
    }

-    pub async fn pull_timeline(&self, req: &PullTimelineRequest) -> Result<PullTimelineResponse> {
-        let uri = format!("{}/v1/pull_timeline", self.mgmt_api_endpoint);
-        let resp = self.post(&uri, req).await?;
-        resp.json().await.map_err(Error::ReceiveBody)
-    }
-
    pub async fn delete_timeline(
        &self,
        tenant_id: TenantId,
--- a/safekeeper/src/control_file.rs
+++ b/safekeeper/src/control_file.rs
@@ -235,7 +235,7 @@ impl Storage for FileStorage {
 #[cfg(test)]
 mod test {
    use super::*;
-    use safekeeper_api::membership::{Configuration, MemberSet, SafekeeperGeneration};
+    use safekeeper_api::membership::{Configuration, MemberSet};
    use tokio::fs;
    use utils::lsn::Lsn;

@@ -246,7 +246,7 @@ mod test {
        let tempdir = camino_tempfile::tempdir()?;
        let mut state = TimelinePersistentState::empty();
        state.mconf = Configuration {
-            generation: SafekeeperGeneration::new(42),
+            generation: 42,
            members: MemberSet::empty(),
            new_members: None,
        };
--- a/safekeeper/src/http/routes.rs
+++ b/safekeeper/src/http/routes.rs
@@ -2,7 +2,6 @@ use http_utils::failpoints::failpoints_handler;
 use hyper::{Body, Request, Response, StatusCode};
 use safekeeper_api::models;
 use safekeeper_api::models::AcceptorStateStatus;
-use safekeeper_api::models::PullTimelineRequest;
 use safekeeper_api::models::SafekeeperStatus;
 use safekeeper_api::models::TermSwitchApiEntry;
 use safekeeper_api::models::TimelineStatus;
@@ -231,7 +230,7 @@ async fn timeline_delete_handler(mut request: Request<Body>) -> Result<Response<
 async fn timeline_pull_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permission(&request, None)?;

-    let data: PullTimelineRequest = json_request(&mut request).await?;
+    let data: pull_timeline::Request = json_request(&mut request).await?;
    let conf = get_conf(&request);
    let global_timelines = get_global_timelines(&request);

--- a/safekeeper/src/pull_timeline.rs
+++ b/safekeeper/src/pull_timeline.rs
@@ -4,13 +4,10 @@ use camino::Utf8PathBuf;
 use chrono::{DateTime, Utc};
 use futures::{SinkExt, StreamExt, TryStreamExt};
 use postgres_ffi::{XLogFileName, XLogSegNo, PG_TLI};
-use safekeeper_api::{
-    models::{PullTimelineRequest, PullTimelineResponse, TimelineStatus},
-    Term,
-};
+use safekeeper_api::{models::TimelineStatus, Term};
 use safekeeper_client::mgmt_api;
 use safekeeper_client::mgmt_api::Client;
-use serde::Deserialize;
+use serde::{Deserialize, Serialize};
 use std::{
    cmp::min,
    io::{self, ErrorKind},
@@ -36,7 +33,7 @@ use crate::{
 };
 use utils::{
    crashsafe::fsync_async_opt,
-    id::{NodeId, TenantTimelineId},
+    id::{NodeId, TenantId, TenantTimelineId, TimelineId},
    logging::SecretString,
    lsn::Lsn,
    pausable_failpoint,
@@ -381,6 +378,21 @@ impl WalResidentTimeline {
    }
 }

+/// pull_timeline request body.
+#[derive(Debug, Deserialize)]
+pub struct Request {
+    pub tenant_id: TenantId,
+    pub timeline_id: TimelineId,
+    pub http_hosts: Vec<String>,
+}
+
+#[derive(Debug, Serialize)]
+pub struct Response {
+    // Donor safekeeper host
+    pub safekeeper_host: String,
+    // TODO: add more fields?
+}
+
 /// Response for debug dump request.
 #[derive(Debug, Deserialize)]
 pub struct DebugDumpResponse {
@@ -393,10 +405,10 @@ pub struct DebugDumpResponse {

 /// Find the most advanced safekeeper and pull timeline from it.
 pub async fn handle_request(
-    request: PullTimelineRequest,
+    request: Request,
    sk_auth_token: Option<SecretString>,
    global_timelines: Arc<GlobalTimelines>,
-) -> Result<PullTimelineResponse> {
+) -> Result<Response> {
    let existing_tli = global_timelines.get(TenantTimelineId::new(
        request.tenant_id,
        request.timeline_id,
@@ -448,7 +460,7 @@ async fn pull_timeline(
    host: String,
    sk_auth_token: Option<SecretString>,
    global_timelines: Arc<GlobalTimelines>,
-) -> Result<PullTimelineResponse> {
+) -> Result<Response> {
    let ttid = TenantTimelineId::new(status.tenant_id, status.timeline_id);
    info!(
        "pulling timeline {} from safekeeper {}, commit_lsn={}, flush_lsn={}, term={}, epoch={}",
@@ -523,7 +535,7 @@ async fn pull_timeline(
        .load_temp_timeline(ttid, &tli_dir_path, false)
        .await?;

-    Ok(PullTimelineResponse {
+    Ok(Response {
        safekeeper_host: host,
    })
 }
--- a/safekeeper/src/safekeeper.rs
+++ b/safekeeper/src/safekeeper.rs
@@ -1004,7 +1004,7 @@ mod tests {

    use postgres_ffi::{XLogSegNo, WAL_SEGMENT_SIZE};
    use safekeeper_api::{
-        membership::{Configuration, MemberSet, SafekeeperGeneration, SafekeeperId},
+        membership::{Configuration, MemberSet, SafekeeperId},
        ServerInfo,
    };

@@ -1303,7 +1303,7 @@ mod tests {
            tenant_id,
            timeline_id,
            mconf: Configuration {
-                generation: SafekeeperGeneration::new(42),
+                generation: 42,
                members: MemberSet::new(vec![SafekeeperId {
                    id: NodeId(1),
                    host: "hehe.org".to_owned(),
--- a/storage_controller/src/http.rs
+++ b/storage_controller/src/http.rs
@@ -516,24 +516,6 @@ async fn handle_tenant_timeline_block_unblock_gc(
    json_response(StatusCode::OK, ())
 }

-async fn handle_tenant_timeline_download_heatmap_layers(
-    service: Arc<Service>,
-    req: Request<Body>,
-) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&req, "tenant_shard_id")?;
-
-    check_permissions(&req, Scope::PageServerApi)?;
-
-    let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
-    let concurrency: Option<usize> = parse_query_param(&req, "concurrency")?;
-
-    service
-        .tenant_timeline_download_heatmap_layers(tenant_shard_id, timeline_id, concurrency)
-        .await?;
-
-    json_response(StatusCode::OK, ())
-}
-
 // For metric labels where we would like to include the approximate path, but exclude high-cardinality fields like query parameters
 // and tenant/timeline IDs.  Since we are proxying to arbitrary paths, we don't have routing templates to
 // compare to, so we can just filter out our well known ID format with regexes.
@@ -2096,16 +2078,6 @@ pub fn make_router(
                )
            },
        )
-        .post(
-            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_heatmap_layers",
-            |r| {
-                tenant_service_handler(
-                    r,
-                    handle_tenant_timeline_download_heatmap_layers,
-                    RequestName("v1_tenant_timeline_download_heatmap_layers"),
-                )
-            },
-        )
        // Tenant detail GET passthrough to shard zero:
        .get("/v1/tenant/:tenant_id", |r| {
            tenant_service_handler(
--- a/storage_controller/src/pageserver_client.rs
+++ b/storage_controller/src/pageserver_client.rs
@@ -280,22 +280,6 @@ impl PageserverClient {
        )
    }

-    pub(crate) async fn timeline_download_heatmap_layers(
-        &self,
-        tenant_shard_id: TenantShardId,
-        timeline_id: TimelineId,
-        concurrency: Option<usize>,
-    ) -> Result<()> {
-        measured_request!(
-            "download_heatmap_layers",
-            crate::metrics::Method::Post,
-            &self.node_id_label,
-            self.inner
-                .timeline_download_heatmap_layers(tenant_shard_id, timeline_id, concurrency)
-                .await
-        )
-    }
-
    pub(crate) async fn get_utilization(&self) -> Result<PageserverUtilization> {
        measured_request!(
            "utilization",
--- a/storage_controller/src/safekeeper_client.rs
+++ b/storage_controller/src/safekeeper_client.rs
@@ -1,8 +1,5 @@
 use crate::metrics::PageserverRequestLabelGroup;
-use safekeeper_api::models::{
-    PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest,
-    TimelineStatus,
-};
+use safekeeper_api::models::{SafekeeperUtilization, TimelineCreateRequest, TimelineStatus};
 use safekeeper_client::mgmt_api::{Client, Result};
 use utils::{
    id::{NodeId, TenantId, TimelineId},
@@ -97,19 +94,6 @@ impl SafekeeperClient {
        )
    }

-    #[allow(dead_code)]
-    pub(crate) async fn pull_timeline(
-        &self,
-        req: &PullTimelineRequest,
-    ) -> Result<PullTimelineResponse> {
-        measured_request!(
-            "pull_timeline",
-            crate::metrics::Method::Post,
-            &self.node_id_label,
-            self.inner.pull_timeline(req).await
-        )
-    }
-
    pub(crate) async fn get_utilization(&self) -> Result<SafekeeperUtilization> {
        measured_request!(
            "utilization",
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -162,7 +162,6 @@ enum TenantOperations {
    TimelineDetachAncestor,
    TimelineGcBlockUnblock,
    DropDetached,
-    DownloadHeatmapLayers,
 }

 #[derive(Clone, strum_macros::Display)]
@@ -3758,61 +3757,6 @@ impl Service {
        Ok(())
    }

-    pub(crate) async fn tenant_timeline_download_heatmap_layers(
-        &self,
-        tenant_shard_id: TenantShardId,
-        timeline_id: TimelineId,
-        concurrency: Option<usize>,
-    ) -> Result<(), ApiError> {
-        let _tenant_lock = trace_shared_lock(
-            &self.tenant_op_locks,
-            tenant_shard_id.tenant_id,
-            TenantOperations::DownloadHeatmapLayers,
-        )
-        .await;
-
-        let targets = {
-            let locked = self.inner.read().unwrap();
-            let mut targets = Vec::new();
-
-            // If the request got an unsharded tenant id, then apply
-            // the operation to all shards. Otherwise, apply it to a specific shard.
-            let shards_range = if tenant_shard_id.is_unsharded() {
-                TenantShardId::tenant_range(tenant_shard_id.tenant_id)
-            } else {
-                tenant_shard_id.range()
-            };
-
-            for (tenant_shard_id, shard) in locked.tenants.range(shards_range) {
-                if let Some(node_id) = shard.intent.get_attached() {
-                    let node = locked
-                        .nodes
-                        .get(node_id)
-                        .expect("Pageservers may not be deleted while referenced");
-
-                    targets.push((*tenant_shard_id, node.clone()));
-                }
-            }
-            targets
-        };
-
-        self.tenant_for_shards_api(
-            targets,
-            |tenant_shard_id, client| async move {
-                client
-                    .timeline_download_heatmap_layers(tenant_shard_id, timeline_id, concurrency)
-                    .await
-            },
-            1,
-            1,
-            SHORT_RECONCILE_TIMEOUT,
-            &self.cancel,
-        )
-        .await;
-
-        Ok(())
-    }
-
    /// Helper for concurrently calling a pageserver API on a number of shards, such as timeline creation.
    ///
    /// On success, the returned vector contains exactly the same number of elements as the input `locations`.
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -705,7 +705,7 @@ class NeonEnvBuilder:
        assert self.version_combination is not None, "version combination must be set"

        # Always use a newer version of `neon_local`
-        (self.mixdir / "neon_local").hardlink_to(self.neon_binpath / "neon_local")
+        (self.mixdir / "neon_local").symlink_to(self.neon_binpath / "neon_local")
        self.neon_local_binpath = self.mixdir

        for component, paths in COMPONENT_BINARIES.items():
@@ -716,7 +716,7 @@ class NeonEnvBuilder:
            )
            for filename in paths:
                destination = self.mixdir / filename
-                destination.hardlink_to(directory / filename)
+                destination.symlink_to(directory / filename)
        self.neon_binpath = self.mixdir

        if self.version_combination["compute"] == "old":
@@ -2467,14 +2467,6 @@ class NeonStorageController(MetricsGetter, LogUtils):
        response.raise_for_status()
        return [TenantShardId.parse(tid) for tid in response.json()["updated"]]

-    def download_heatmap_layers(self, tenant_shard_id: TenantShardId, timeline_id: TimelineId):
-        response = self.request(
-            "POST",
-            f"{self.api}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers",
-            headers=self.headers(TokenScope.ADMIN),
-        )
-        response.raise_for_status()
-
    def __enter__(self) -> Self:
        return self

--- a/test_runner/fixtures/utils.py
+++ b/test_runner/fixtures/utils.py
@@ -64,8 +64,6 @@ VERSIONS_COMBINATIONS = (
 # If it is not set or set to a value not equal to "false", LFC is enabled by default.
 USE_LFC = os.environ.get("USE_LFC") != "false"

-WITH_SANITIZERS = os.environ.get("SANITIZERS") == "enabled"
-

 def subprocess_capture(
    capture_dir: Path,
--- a/test_runner/regress/test_compaction.py
+++ b/test_runner/regress/test_compaction.py
@@ -236,7 +236,9 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder, with_b
    wait_until(compaction_finished, timeout=60)

    # ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked)
-    env.pageserver.assert_log_contains("gc_compact_timeline.*picked .* layers for compaction")
+    env.pageserver.assert_log_contains(
+        "scheduled_compact_timeline.*picked .* layers for compaction"
+    )

    log.info("Validating at workload end ...")
    workload.validate(env.pageserver.id)
@@ -298,8 +300,6 @@ def test_pageserver_gc_compaction_idempotent(
    workload.churn_rows(row_count, env.pageserver.id)
    env.create_branch("child_branch")  # so that we have a retain_lsn
    workload.churn_rows(row_count, env.pageserver.id)
-    env.create_branch("child_branch_2")  # so that we have another retain_lsn
-    workload.churn_rows(row_count, env.pageserver.id)
    # compact 3 times if mode is before_restart
    n_compactions = 3 if compaction_mode == "before_restart" else 1
    ps_http.timeline_compact(
@@ -315,6 +315,10 @@ def test_pageserver_gc_compaction_idempotent(
            body={
                "scheduled": True,
                "sub_compaction": True,
+                "compact_key_range": {
+                    "start": "000000000000000000000000000000000000",
+                    "end": "030000000000000000000000000000000000",
+                },
                "sub_compaction_max_job_size_mb": 16,
            },
        )
@@ -332,13 +336,19 @@ def test_pageserver_gc_compaction_idempotent(
                body={
                    "scheduled": True,
                    "sub_compaction": True,
+                    "compact_key_range": {
+                        "start": "000000000000000000000000000000000000",
+                        "end": "030000000000000000000000000000000000",
+                    },
                    "sub_compaction_max_job_size_mb": 16,
                },
            )
            wait_until(compaction_finished, timeout=60)

    # ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked)
-    env.pageserver.assert_log_contains("gc_compact_timeline.*picked .* layers for compaction")
+    env.pageserver.assert_log_contains(
+        "scheduled_compact_timeline.*picked .* layers for compaction"
+    )

    # ensure we hit the duplicated layer key warning at least once: we did two compactions consecutively,
    # and the second one should have hit the duplicated layer key warning.
@@ -456,7 +466,9 @@ def test_pageserver_gc_compaction_interrupt(neon_env_builder: NeonEnvBuilder):
    wait_until(compaction_finished, timeout=60)

    # ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked)
-    env.pageserver.assert_log_contains("gc_compact_timeline.*picked .* layers for compaction")
+    env.pageserver.assert_log_contains(
+        "scheduled_compact_timeline.*picked .* layers for compaction"
+    )

    log.info("Validating at workload end ...")
    workload.validate(env.pageserver.id)
--- a/test_runner/regress/test_endpoint_crash.py
+++ b/test_runner/regress/test_endpoint_crash.py
@@ -2,8 +2,6 @@ from __future__ import annotations

 import pytest
 from fixtures.neon_fixtures import NeonEnvBuilder
-from fixtures.pg_version import PgVersion
-from fixtures.utils import WITH_SANITIZERS, run_only_on_postgres


@pytest.mark.parametrize(
@@ -25,20 +23,3 @@ def test_endpoint_crash(neon_env_builder: NeonEnvBuilder, sql_func: str):
    endpoint.safe_psql("CREATE EXTENSION neon_test_utils;")
    with pytest.raises(Exception, match="This probably means the server terminated abnormally"):
        endpoint.safe_psql(f"SELECT {sql_func}();")
-
-
-@run_only_on_postgres([PgVersion.V17], "Currently, build vith sanitizers is possible with v17 only")
-def test_sanitizers(neon_env_builder: NeonEnvBuilder):
-    """
-    Test that undefined behavior leads to endpoint abort with sanitizers enabled
-    """
-    env = neon_env_builder.init_start()
-    env.create_branch("test_ubsan")
-    endpoint = env.endpoints.create_start("test_ubsan")
-
-    # Test case based on https://www.postgresql.org/message-id/17167-028026e4ca333817@postgresql.org
-    if not WITH_SANITIZERS:
-        endpoint.safe_psql("SELECT 1::int4 << 128")
-    else:
-        with pytest.raises(Exception, match="This probably means the server terminated abnormally"):
-            endpoint.safe_psql("SELECT 1::int4 << 128")
--- a/test_runner/regress/test_layers_from_future.py
+++ b/test_runner/regress/test_layers_from_future.py
@@ -20,6 +20,9 @@ from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind
 from fixtures.utils import query_scalar, wait_until


+@pytest.mark.skip(
+    reason="We won't create future layers any more after https://github.com/neondatabase/neon/pull/10548"
+)
@pytest.mark.parametrize(
    "attach_mode",
    ["default_generation", "same_generation"],
--- a/test_runner/regress/test_pageserver_secondary.py
+++ b/test_runner/regress/test_pageserver_secondary.py
@@ -974,22 +974,12 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder):

    # The new layer map should contain all the layers in the pre-migration one
    # and a new in memory layer
-    after_migration_heatmap_layers_count = len(heatmap_after_migration["timelines"][0]["layers"])
-    assert (
-        len(heatmap_before_migration["timelines"][0]["layers"]) + 1
-        == after_migration_heatmap_layers_count
+    assert len(heatmap_before_migration["timelines"][0]["layers"]) + 1 == len(
+        heatmap_after_migration["timelines"][0]["layers"]
    )

-    log.info(f"Heatmap size after cold migration is {after_migration_heatmap_layers_count}")
-
-    env.storage_controller.download_heatmap_layers(
-        TenantShardId(tenant_id, shard_number=0, shard_count=0), timeline_id
+    log.info(
+        f'Heatmap size after cold migration is {len(heatmap_after_migration["timelines"][0]["layers"])}'
    )

-    def all_layers_downloaded():
-        local_layers_count = len(ps_secondary.list_layers(tenant_id, timeline_id))
-
-        log.info(f"{local_layers_count=} {after_migration_heatmap_layers_count=}")
-        assert local_layers_count == after_migration_heatmap_layers_count
-
-    wait_until(all_layers_downloaded)
+    # TODO: Once we have an endpoint for rescuing the cold location, exercise it here.
--- a/test_runner/regress/test_sharding.py
+++ b/test_runner/regress/test_sharding.py
@@ -1821,7 +1821,7 @@ def test_sharding_gc(
        # TODO: remove when https://github.com/neondatabase/neon/issues/10720 is fixed
        ps.allowed_errors.extend(
            [
-                ".*could not find data for key.*",
+                ".*could not find data for key 020000000000000000000000000000000000.*",
                ".*could not ingest record.*",
            ]
        )
--- a/test_runner/regress/test_storage_scrubber.py
+++ b/test_runner/regress/test_storage_scrubber.py
@@ -318,7 +318,7 @@ def test_scrubber_physical_gc_ancestors(neon_env_builder: NeonEnvBuilder, shard_
        # TODO: remove when https://github.com/neondatabase/neon/issues/10720 is fixed
        ps.allowed_errors.extend(
            [
-                ".*could not find data for key.*",
+                ".*could not find data for key 020000000000000000000000000000000000.*",
                ".*could not ingest record.*",
            ]
        )
--- a/vendor/postgres-v14
+++ b/vendor/postgres-v14
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15
--- a/vendor/postgres-v16
+++ b/vendor/postgres-v16
--- a/vendor/postgres-v17
+++ b/vendor/postgres-v17
--- a/vendor/revisions.json
+++ b/vendor/revisions.json
@@ -1,18 +1,18 @@
 {
  "v17": [
-    "17.4",
-    "a8fea8b4be43039f0782347c88a9b9b25f50c9d8"
+    "17.3",
+    "4d3a722312b496ff7378156caa6d41c2e70c30e4"
  ],
  "v16": [
-    "16.8",
-    "9422247c582e7c1a08a4855d04af0874f8df2f34"
+    "16.7",
+    "999cf81b101ead40e597d5cd729458d8200f4537"
  ],
  "v15": [
-    "15.12",
-    "81e2eef0616c65c2233c75b06f25766ae4c080c4"
+    "15.11",
+    "80ed91ce255c765d25be0bb4a02c942fe6311fbf"
  ],
  "v14": [
-    "14.17",
-    "6254ab9b4496c3e481bc037ae69d859bbc2bdd7d"
+    "14.16",
+    "62a86dfc91e0c35a72f2ea5e99e6969b830c0c26"
  ]
 }
Author	SHA1	Message	Date
John Spray	422310c19b	slightly more efficient	2025-02-17 22:43:36 +01:00
John Spray	77b1fd40b5	wip prototype	2025-02-17 22:37:46 +01:00
John Spray	a8f59f851d	Revert "tests: broaden allow-list for #10720 workaround (#10807 )" This reverts commit `ae463f366b`.	2025-02-17 22:08:03 +01:00