apply envfilter only to export layer

fix env
feat: experiment with tokio-console
2026-02-14 16:10:37 +00:00 · 2025-02-05 12:11:28 +00:00 · 2025-02-05 11:26:10 +00:00 · 2025-02-05 10:05:12 +00:00
39 changed files with 336 additions and 858 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -3,15 +3,18 @@
 # by the RUSTDOCFLAGS env var in CI.
 rustdocflags = ["-Arustdoc::private_intra_doc_links"]

-# Enable frame pointers. This may have a minor performance overhead, but makes it easier and more
-# efficient to obtain stack traces (and thus CPU/heap profiles). It may also avoid seg faults that
-# we've seen with libunwind-based profiling. See also:
-#
-# * <https://www.brendangregg.com/blog/2024-03-17/the-return-of-the-frame-pointers.html>
-# * <https://github.com/rust-lang/rust/pull/122646>
-#
 # NB: the RUSTFLAGS envvar will replace this. Make sure to update e.g. Dockerfile as well.
-rustflags = ["-Cforce-frame-pointers=yes"]
+rustflags = [
+    # Enable frame pointers. This may have a minor performance overhead, but makes it easier and more
+    # efficient to obtain stack traces (and thus CPU/heap profiles). It may also avoid seg faults that
+    # we've seen with libunwind-based profiling. See also:
+    #
+    # * <https://www.brendangregg.com/blog/2024-03-17/the-return-of-the-frame-pointers.html>
+    # * <https://github.com/rust-lang/rust/pull/122646>
+    "-Cforce-frame-pointers=yes",
+    # Enable tokio_unstable to enable tokio-console support
+    "--cfg=tokio_unstable"
+]

 [alias]
 build_testing = ["build", "--features", "testing"]
--- a/.dockerignore
+++ b/.dockerignore
@@ -24,4 +24,3 @@
 !storage_controller/
 !vendor/postgres-*/
 !workspace_hack/
-!build_tools/patches
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -717,13 +717,40 @@ dependencies = [
 "tracing",
 ]

+[[package]]
+name = "axum"
+version = "0.7.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
+dependencies = [
+ "async-trait",
+ "axum-core 0.4.5",
+ "bytes",
+ "futures-util",
+ "http 1.1.0",
+ "http-body 1.0.0",
+ "http-body-util",
+ "itoa",
+ "matchit 0.7.3",
+ "memchr",
+ "mime",
+ "percent-encoding",
+ "pin-project-lite",
+ "rustversion",
+ "serde",
+ "sync_wrapper 1.0.1",
+ "tower 0.5.2",
+ "tower-layer",
+ "tower-service",
+]
+
 [[package]]
 name = "axum"
 version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6d6fd624c75e18b3b4c6b9caf42b1afe24437daaee904069137d8bab077be8b8"
 dependencies = [
- "axum-core",
+ "axum-core 0.5.0",
 "base64 0.22.1",
 "bytes",
 "form_urlencoded",
@@ -734,7 +761,7 @@ dependencies = [
 "hyper 1.4.1",
 "hyper-util",
 "itoa",
- "matchit",
+ "matchit 0.8.4",
 "memchr",
 "mime",
 "percent-encoding",
@@ -754,6 +781,26 @@ dependencies = [
 "tracing",
 ]

+[[package]]
+name = "axum-core"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
+dependencies = [
+ "async-trait",
+ "bytes",
+ "futures-util",
+ "http 1.1.0",
+ "http-body 1.0.0",
+ "http-body-util",
+ "mime",
+ "pin-project-lite",
+ "rustversion",
+ "sync_wrapper 1.0.1",
+ "tower-layer",
+ "tower-service",
+]
+
 [[package]]
 name = "axum-core"
 version = "0.5.0"
@@ -1299,7 +1346,7 @@ dependencies = [
 "aws-config",
 "aws-sdk-kms",
 "aws-sdk-s3",
- "axum",
+ "axum 0.8.1",
 "base64 0.13.1",
 "bytes",
 "camino",
@@ -1337,7 +1384,7 @@ dependencies = [
 "tokio-stream",
 "tokio-util",
 "tower 0.5.2",
- "tower-http",
+ "tower-http 0.6.2",
 "tracing",
 "tracing-opentelemetry",
 "tracing-subscriber",
@@ -1359,6 +1406,47 @@ dependencies = [
 "crossbeam-utils",
 ]

+[[package]]
+name = "console-api"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8030735ecb0d128428b64cd379809817e620a40e5001c54465b99ec5feec2857"
+dependencies = [
+ "futures-core",
+ "prost",
+ "prost-types",
+ "tonic",
+ "tracing-core",
+]
+
+[[package]]
+name = "console-subscriber"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6539aa9c6a4cd31f4b1c040f860a1eac9aa80e7df6b05d506a6e7179936d6a01"
+dependencies = [
+ "console-api",
+ "crossbeam-channel",
+ "crossbeam-utils",
+ "futures-task",
+ "hdrhistogram",
+ "humantime",
+ "hyper-util",
+ "parking_lot 0.12.1",
+ "prost",
+ "prost-types",
+ "serde",
+ "serde_json",
+ "thread_local",
+ "tokio",
+ "tokio-stream",
+ "tonic",
+ "tonic-web",
+ "tracing",
+ "tracing-core",
+ "tracing-subscriber",
+]
+
 [[package]]
 name = "const-oid"
 version = "0.9.6"
@@ -3435,6 +3523,12 @@ dependencies = [
 "regex-automata 0.1.10",
 ]

+[[package]]
+name = "matchit"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
+
 [[package]]
 name = "matchit"
 version = "0.8.4"
@@ -4892,6 +4986,7 @@ dependencies = [
 "clap",
 "clashmap",
 "compute_api",
+ "console-subscriber",
 "consumption_metrics",
 "ecdsa 0.16.9",
 "ed25519-dalek",
@@ -5404,7 +5499,7 @@ dependencies = [
 "async-trait",
 "getrandom 0.2.11",
 "http 1.1.0",
- "matchit",
+ "matchit 0.8.4",
 "opentelemetry",
 "reqwest",
 "reqwest-middleware",
@@ -6893,6 +6988,7 @@ dependencies = [
 "signal-hook-registry",
 "socket2",
 "tokio-macros",
+ "tracing",
 "windows-sys 0.52.0",
 ]

@@ -7132,9 +7228,12 @@ version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
 dependencies = [
+ "async-stream",
 "async-trait",
+ "axum 0.7.9",
 "base64 0.22.1",
 "bytes",
+ "h2 0.4.4",
 "http 1.1.0",
 "http-body 1.0.0",
 "http-body-util",
@@ -7146,6 +7245,7 @@ dependencies = [
 "prost",
 "rustls-native-certs 0.8.0",
 "rustls-pemfile 2.1.1",
+ "socket2",
 "tokio",
 "tokio-rustls 0.26.0",
 "tokio-stream",
@@ -7169,6 +7269,26 @@ dependencies = [
 "syn 2.0.90",
 ]

+[[package]]
+name = "tonic-web"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5299dd20801ad736dccb4a5ea0da7376e59cd98f213bf1c3d478cf53f4834b58"
+dependencies = [
+ "base64 0.22.1",
+ "bytes",
+ "http 1.1.0",
+ "http-body 1.0.0",
+ "http-body-util",
+ "pin-project",
+ "tokio-stream",
+ "tonic",
+ "tower-http 0.5.2",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
 [[package]]
 name = "tower"
 version = "0.4.13"
@@ -7205,6 +7325,22 @@ dependencies = [
 "tracing",
 ]

+[[package]]
+name = "tower-http"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5"
+dependencies = [
+ "bitflags 2.8.0",
+ "bytes",
+ "http 1.1.0",
+ "http-body 1.0.0",
+ "http-body-util",
+ "pin-project-lite",
+ "tower-layer",
+ "tower-service",
+]
+
 [[package]]
 name = "tower-http"
 version = "0.6.2"
@@ -7335,6 +7471,7 @@ checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008"
 dependencies = [
 "matchers",
 "once_cell",
+ "parking_lot 0.12.1",
 "regex",
 "serde",
 "serde_json",
@@ -7637,7 +7774,7 @@ name = "vm_monitor"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "axum",
+ "axum 0.8.1",
 "cgroups-rs",
 "clap",
 "futures",
@@ -8126,6 +8263,7 @@ dependencies = [
 "chrono",
 "clap",
 "clap_builder",
+ "crossbeam-utils",
 "crypto-bigint 0.5.5",
 "der 0.7.8",
 "deranged",
@@ -8142,6 +8280,7 @@ dependencies = [
 "getrandom 0.2.11",
 "half",
 "hashbrown 0.14.5",
+ "hdrhistogram",
 "hex",
 "hmac",
 "hyper 0.14.30",
@@ -8197,9 +8336,11 @@ dependencies = [
 "toml_edit",
 "tonic",
 "tower 0.4.13",
+ "tower 0.5.2",
 "tracing",
 "tracing-core",
 "tracing-log",
+ "tracing-subscriber",
 "url",
 "zerocopy",
 "zeroize",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -80,6 +80,7 @@ chrono = { version = "0.4", default-features = false, features = ["clock"] }
 clap = { version = "4.0", features = ["derive", "env"] }
 clashmap = { version = "1.0", features = ["raw-api"] }
 comfy-table = "7.1"
+console-subscriber = { version = "0.4.1", features = ["parking_lot", "grpc-web"] }
 const_format = "0.2"
 crc32c = "0.6"
 diatomic-waker = { version = "0.2.3" }
--- a/2
+++ b/2
@@ -45,7 +45,7 @@ COPY --chown=nonroot . .

 ARG ADDITIONAL_RUSTFLAGS
 RUN set -e \
-    && RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo build \
+    && RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes --cfg=tokio_unstable ${ADDITIONAL_RUSTFLAGS}" cargo build \
      --bin pg_sni_router  \
      --bin pageserver  \
      --bin pagectl  \
--- a/build-tools.Dockerfile
+++ b/build-tools.Dockerfile
@@ -12,8 +12,6 @@ RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
    echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc && \
    echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc

-COPY build_tools/patches/pgcopydbv017.patch /pgcopydbv017.patch
-
 RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \
        set -e && \
        apt update && \
@@ -46,7 +44,6 @@ RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \
        mkdir /tmp/pgcopydb && \
        tar -xzf /tmp/pgcopydb.tar.gz -C /tmp/pgcopydb --strip-components=1 && \
        cd /tmp/pgcopydb && \
-        patch -p1 < /pgcopydbv017.patch && \
        make -s clean && \
        make -s -j12 install && \
        libpq_path=$(find /lib /usr/lib -name "libpq.so.5" | head -n 1) && \
--- a/build_tools/patches/pgcopydbv017.patch
+++ b/build_tools/patches/pgcopydbv017.patch
@@ -1,57 +0,0 @@
-diff --git a/src/bin/pgcopydb/copydb.c b/src/bin/pgcopydb/copydb.c
-index d730b03..69a9be9 100644
--- a/src/bin/pgcopydb/copydb.c
-+++ b/src/bin/pgcopydb/copydb.c
-@@ -44,6 +44,7 @@ GUC dstSettings[] = {
- 	{ "synchronous_commit", "'off'" },
- 	{ "statement_timeout", "0" },
- 	{ "lock_timeout", "0" },
-+	{ "idle_in_transaction_session_timeout", "0" },
- 	{ NULL, NULL },
- };
- 
-diff --git a/src/bin/pgcopydb/pgsql.c b/src/bin/pgcopydb/pgsql.c
-index 94f2f46..e051ba8 100644
--- a/src/bin/pgcopydb/pgsql.c
-+++ b/src/bin/pgcopydb/pgsql.c
-@@ -2319,6 +2319,11 @@ pgsql_execute_log_error(PGSQL *pgsql,
- 
- 	LinesBuffer lbuf = { 0 };
- 
-+	if (message != NULL){
-+		// make sure message is writable by splitLines
-+		message = strdup(message);
-+	}
-+
- 	if (!splitLines(&lbuf, message))
- 	{
- 		/* errors have already been logged */
-@@ -2332,6 +2337,7 @@ pgsql_execute_log_error(PGSQL *pgsql,
- 				  PQbackendPID(pgsql->connection),
- 				  lbuf.lines[lineNumber]);
- 	}
-+        free(message); // free copy of message we created above
- 
- 	if (pgsql->logSQL)
- 	{
-@@ -3174,11 +3180,18 @@ pgcopy_log_error(PGSQL *pgsql, PGresult *res, const char *context)
- 		/* errors have already been logged */
- 		return;
- 	}
-
- 	if (res != NULL)
- 	{
- 		char *sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE);
-		strlcpy(pgsql->sqlstate, sqlstate, sizeof(pgsql->sqlstate));
-+		if (sqlstate == NULL)
-+		{
-+			// PQresultErrorField returned NULL!
-+			pgsql->sqlstate[0] = '\0';  // Set to an empty string to avoid segfault
-+		}
-+		else
-+		{
-+			strlcpy(pgsql->sqlstate, sqlstate, sizeof(pgsql->sqlstate));
-+		}
- 	}
- 
- 	char *endpoint =
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -85,10 +85,6 @@ ARG DEBIAN_VERSION=bookworm
 ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim
 ARG ALPINE_CURL_VERSION=8.11.1

-# By default, build all PostgreSQL extensions. For quick local testing when you don't
-# care about the extensions, pass EXTENSIONS=none or EXTENSIONS=minimal
-ARG EXTENSIONS=all
-
 #########################################################################################
 #
 # Layer "build-deps"
@@ -1488,35 +1484,12 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \

 #########################################################################################
 #
-# Layer "extensions-none"
-#
-#########################################################################################
-FROM build-deps AS extensions-none
-
-RUN mkdir /usr/local/pgsql
-
-#########################################################################################
-#
-# Layer "extensions-minimal"
-#
-# This subset of extensions includes the extensions that we have in
-# shared_preload_libraries by default.
-#
-#########################################################################################
-FROM build-deps AS extensions-minimal
-
-COPY --from=pgrag-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=timescaledb-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg_cron-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/
-
-#########################################################################################
-#
-# Layer "extensions-all"
+# Layer "all-extensions"
 # Bundle together all the extensions
 #
 #########################################################################################
-FROM build-deps AS extensions-all
+FROM build-deps AS all-extensions
+ARG PG_VERSION

 # Public extensions
 COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
@@ -1558,13 +1531,7 @@ COPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_repack-build /usr/local/pgsql/ /usr/local/pgsql/

-#########################################################################################
-#
-# Layer "neon-pg-ext-build"
-# Includes Postgres and all the extensions chosen by EXTENSIONS arg.
-#
-#########################################################################################
-FROM extensions-${EXTENSIONS} AS neon-pg-ext-build
+COPY --from=neon-ext-build /usr/local/pgsql/ /usr/local/pgsql/

 #########################################################################################
 #
@@ -1647,8 +1614,7 @@ RUN echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 30
 #
 #########################################################################################
 FROM neon-ext-build AS postgres-cleanup-layer
-
-COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
+COPY --from=all-extensions /usr/local/pgsql /usr/local/pgsql

 # Remove binaries from /bin/ that we won't use (or would manually copy & install otherwise)
 RUN cd /usr/local/pgsql/bin && rm -f ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -204,16 +204,14 @@ impl RemoteExtSpec {

        // Check if extension is present in public or custom.
        // If not, then it is not allowed to be used by this compute.
-        if !self
-            .public_extensions
-            .as_ref()
-            .is_some_and(|exts| exts.iter().any(|e| e == ext_name))
-            && !self
-                .custom_extensions
-                .as_ref()
-                .is_some_and(|exts| exts.iter().any(|e| e == ext_name))
-        {
-            return Err(anyhow::anyhow!("extension {} is not found", real_ext_name));
+        if let Some(public_extensions) = &self.public_extensions {
+            if !public_extensions.contains(&real_ext_name.to_string()) {
+                if let Some(custom_extensions) = &self.custom_extensions {
+                    if !custom_extensions.contains(&real_ext_name.to_string()) {
+                        return Err(anyhow::anyhow!("extension {} is not found", real_ext_name));
+                    }
+                }
+            }
        }

        match self.extension_data.get(real_ext_name) {
@@ -342,96 +340,6 @@ mod tests {
    use super::*;
    use std::fs::File;

-    #[test]
-    fn allow_installing_remote_extensions() {
-        let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
-            "public_extensions": null,
-            "custom_extensions": null,
-            "library_index": {},
-            "extension_data": {},
-        }))
-        .unwrap();
-
-        rspec
-            .get_ext("ext", false, "latest", "v17")
-            .expect_err("Extension should not be found");
-
-        let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
-            "public_extensions": [],
-            "custom_extensions": null,
-            "library_index": {},
-            "extension_data": {},
-        }))
-        .unwrap();
-
-        rspec
-            .get_ext("ext", false, "latest", "v17")
-            .expect_err("Extension should not be found");
-
-        let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
-            "public_extensions": [],
-            "custom_extensions": [],
-            "library_index": {
-                "ext": "ext"
-            },
-            "extension_data": {
-                "ext": {
-                    "control_data": {
-                        "ext.control": ""
-                    },
-                    "archive_path": ""
-                }
-            },
-        }))
-        .unwrap();
-
-        rspec
-            .get_ext("ext", false, "latest", "v17")
-            .expect_err("Extension should not be found");
-
-        let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
-            "public_extensions": [],
-            "custom_extensions": ["ext"],
-            "library_index": {
-                "ext": "ext"
-            },
-            "extension_data": {
-                "ext": {
-                    "control_data": {
-                        "ext.control": ""
-                    },
-                    "archive_path": ""
-                }
-            },
-        }))
-        .unwrap();
-
-        rspec
-            .get_ext("ext", false, "latest", "v17")
-            .expect("Extension should be found");
-
-        let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
-            "public_extensions": ["ext"],
-            "custom_extensions": [],
-            "library_index": {
-                "ext": "ext"
-            },
-            "extension_data": {
-                "ext": {
-                    "control_data": {
-                        "ext.control": ""
-                    },
-                    "archive_path": ""
-                }
-            },
-        }))
-        .unwrap();
-
-        rspec
-            .get_ext("ext", false, "latest", "v17")
-            .expect("Extension should be found");
-    }
-
    #[test]
    fn parse_spec_file() {
        let file = File::open("tests/cluster_spec.json").unwrap();
--- a/libs/remote_storage/src/config.rs
+++ b/libs/remote_storage/src/config.rs
@@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize};

 use crate::{
    DEFAULT_MAX_KEYS_PER_LIST_RESPONSE, DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT,
-    DEFAULT_REMOTE_STORAGE_LOCALFS_CONCURRENCY_LIMIT, DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT,
+    DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT,
 };

 /// External backup storage configuration, enough for creating a client for that storage.
@@ -45,11 +45,11 @@ impl RemoteStorageKind {

 impl RemoteStorageConfig {
    /// Helper to fetch the configured concurrency limit.
-    pub fn concurrency_limit(&self) -> usize {
+    pub fn concurrency_limit(&self) -> Option<usize> {
        match &self.storage {
-            RemoteStorageKind::LocalFs { .. } => DEFAULT_REMOTE_STORAGE_LOCALFS_CONCURRENCY_LIMIT,
-            RemoteStorageKind::AwsS3(c) => c.concurrency_limit.into(),
-            RemoteStorageKind::AzureContainer(c) => c.concurrency_limit.into(),
+            RemoteStorageKind::LocalFs { .. } => None,
+            RemoteStorageKind::AwsS3(c) => Some(c.concurrency_limit.into()),
+            RemoteStorageKind::AzureContainer(c) => Some(c.concurrency_limit.into()),
        }
    }
 }
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -65,12 +65,6 @@ pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;
 /// Here, a limit of max 20k concurrent connections was noted.
 /// <https://learn.microsoft.com/en-us/answers/questions/1301863/is-there-any-limitation-to-concurrent-connections>
 pub const DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT: usize = 100;
-/// Set this limit analogously to the S3 limit.
-///
-/// The local filesystem backend doesn't enforce a concurrency limit itself, but this also bounds
-/// the upload queue concurrency. Some tests create thousands of uploads, which slows down the
-/// quadratic scheduling of the upload queue, and there is no point spawning so many Tokio tasks.
-pub const DEFAULT_REMOTE_STORAGE_LOCALFS_CONCURRENCY_LIMIT: usize = 100;
 /// No limits on the client side, which currenltly means 1000 for AWS S3.
 /// <https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html#API_ListObjectsV2_RequestSyntax>
 pub const DEFAULT_MAX_KEYS_PER_LIST_RESPONSE: Option<i32> = None;
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -32,7 +32,6 @@ use utils::id::TimelineId;

 use crate::config::PageServerConf;
 use crate::context::{PageContentKind, RequestContext};
-use crate::pgdatadir_mapping::DatadirModificationStats;
 use crate::task_mgr::TaskKind;
 use crate::tenant::layer_map::LayerMap;
 use crate::tenant::mgr::TenantSlot;
@@ -2379,40 +2378,10 @@ pub(crate) struct WalIngestMetrics {
    pub(crate) records_observed: IntCounter,
    pub(crate) records_committed: IntCounter,
    pub(crate) records_filtered: IntCounter,
-    pub(crate) values_committed_metadata_images: IntCounter,
-    pub(crate) values_committed_metadata_deltas: IntCounter,
-    pub(crate) values_committed_data_images: IntCounter,
-    pub(crate) values_committed_data_deltas: IntCounter,
    pub(crate) gap_blocks_zeroed_on_rel_extend: IntCounter,
 }

-impl WalIngestMetrics {
-    pub(crate) fn inc_values_committed(&self, stats: &DatadirModificationStats) {
-        if stats.metadata_images > 0 {
-            self.values_committed_metadata_images
-                .inc_by(stats.metadata_images);
-        }
-        if stats.metadata_deltas > 0 {
-            self.values_committed_metadata_deltas
-                .inc_by(stats.metadata_deltas);
-        }
-        if stats.data_images > 0 {
-            self.values_committed_data_images.inc_by(stats.data_images);
-        }
-        if stats.data_deltas > 0 {
-            self.values_committed_data_deltas.inc_by(stats.data_deltas);
-        }
-    }
-}
-
 pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| {
-    let values_committed = register_int_counter_vec!(
-        "pageserver_wal_ingest_values_committed",
-        "Number of values committed to pageserver storage from WAL records",
-        &["class", "kind"],
-    )
-    .expect("failed to define a metric");
-
    WalIngestMetrics {
    bytes_received: register_int_counter!(
        "pageserver_wal_ingest_bytes_received",
@@ -2439,10 +2408,6 @@ pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| {
        "Number of WAL records filtered out due to sharding"
    )
    .expect("failed to define a metric"),
-    values_committed_metadata_images: values_committed.with_label_values(&["metadata", "image"]),
-    values_committed_metadata_deltas: values_committed.with_label_values(&["metadata", "delta"]),
-    values_committed_data_images: values_committed.with_label_values(&["data", "image"]),
-    values_committed_data_deltas: values_committed.with_label_values(&["data", "delta"]),
    gap_blocks_zeroed_on_rel_extend: register_int_counter!(
        "pageserver_gap_blocks_zeroed_on_rel_extend",
        "Total number of zero gap blocks written on relation extends"
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -1280,6 +1280,8 @@ impl PageServerHandler {
                }
                Ok(())
            }
+            // and log the info! line inside the request span
+            .instrument(span.clone())
            .await?;
        }
        Ok(())
@@ -2035,12 +2037,6 @@ impl PageServerHandler {
            .get(tenant_id, timeline_id, ShardSelector::Zero)
            .await?;

-        if timeline.is_archived() == Some(true) {
-            // TODO after a grace period, turn this log line into a hard error
-            tracing::warn!("timeline {tenant_id}/{timeline_id} is archived, but got basebackup request for it.");
-            //return Err(QueryError::NotFound("timeline is archived".into()))
-        }
-
        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
        if let Some(lsn) = lsn {
            // Backup was requested at a particular LSN. Wait for it to arrive.
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -48,7 +48,7 @@ use tracing::{debug, trace, warn};
 use utils::bin_ser::DeserializeError;
 use utils::pausable_failpoint;
 use utils::{bin_ser::BeSer, lsn::Lsn};
-use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};
+use wal_decoder::serialized_batch::SerializedValueBatch;

 /// Max delta records appended to the AUX_FILES_KEY (for aux v1). The write path will write a full image once this threshold is reached.
 pub const MAX_AUX_FILE_DELTAS: usize = 1024;
@@ -1297,26 +1297,6 @@ impl DatadirModification<'_> {
            .is_some_and(|b| b.has_data())
    }

-    /// Returns statistics about the currently pending modifications.
-    pub(crate) fn stats(&self) -> DatadirModificationStats {
-        let mut stats = DatadirModificationStats::default();
-        for (_, _, value) in self.pending_metadata_pages.values().flatten() {
-            match value {
-                Value::Image(_) => stats.metadata_images += 1,
-                Value::WalRecord(r) if r.will_init() => stats.metadata_images += 1,
-                Value::WalRecord(_) => stats.metadata_deltas += 1,
-            }
-        }
-        for valuemeta in self.pending_data_batch.iter().flat_map(|b| &b.metadata) {
-            match valuemeta {
-                ValueMeta::Serialized(s) if s.will_init => stats.data_images += 1,
-                ValueMeta::Serialized(_) => stats.data_deltas += 1,
-                ValueMeta::Observed(_) => {}
-            }
-        }
-        stats
-    }
-
    /// Set the current lsn
    pub(crate) fn set_lsn(&mut self, lsn: Lsn) -> anyhow::Result<()> {
        ensure!(
@@ -2337,15 +2317,6 @@ impl DatadirModification<'_> {
    }
 }

-/// Statistics for a DatadirModification.
-#[derive(Default)]
-pub struct DatadirModificationStats {
-    pub metadata_images: u64,
-    pub metadata_deltas: u64,
-    pub data_images: u64,
-    pub data_deltas: u64,
-}
-
 /// This struct facilitates accessing either a committed key from the timeline at a
 /// specific LSN, or the latest uncommitted key from a pending modification.
 ///
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -437,7 +437,8 @@ impl RemoteTimelineClient {
            .conf
            .remote_storage_config
            .as_ref()
-            .map_or(0, |r| r.concurrency_limit());
+            .and_then(|r| r.concurrency_limit())
+            .unwrap_or(0);
        let mut upload_queue = self.upload_queue.lock().unwrap();
        upload_queue.initialize_with_current_remote_index_part(index_part, inprogress_limit)?;
        self.update_remote_physical_size_gauge(Some(index_part));
@@ -460,7 +461,8 @@ impl RemoteTimelineClient {
            .conf
            .remote_storage_config
            .as_ref()
-            .map_or(0, |r| r.concurrency_limit());
+            .and_then(|r| r.concurrency_limit())
+            .unwrap_or(0);
        let mut upload_queue = self.upload_queue.lock().unwrap();
        upload_queue.initialize_empty_remote(local_metadata, inprogress_limit)?;
        self.update_remote_physical_size_gauge(None);
@@ -482,7 +484,8 @@ impl RemoteTimelineClient {
            .conf
            .remote_storage_config
            .as_ref()
-            .map_or(0, |r| r.concurrency_limit());
+            .and_then(|r| r.concurrency_limit())
+            .unwrap_or(0);

        let mut upload_queue = self.upload_queue.lock().unwrap();
        upload_queue.initialize_with_current_remote_index_part(index_part, inprogress_limit)?;
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -211,7 +211,7 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
                        error_run_count = 0;
                        // schedule the next compaction immediately in case there is a pending compaction task
                        sleep_duration = if let CompactionOutcome::Pending = outcome {
-                            Duration::from_secs(1)
+                            Duration::ZERO
                        } else {
                            period
                        };
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -192,12 +192,7 @@ pub enum ImageLayerCreationMode {

 #[derive(Clone, Debug, Default)]
 pub enum LastImageLayerCreationStatus {
-    Incomplete {
-        /// The last key of the partition (exclusive) that was processed in the last
-        /// image layer creation attempt. We will continue from this key in the next
-        /// attempt.
-        last_key: Key,
-    },
+    Incomplete, // TODO: record the last key being processed
    Complete,
    #[default]
    Initial,
@@ -4351,7 +4346,7 @@ impl Timeline {
        Ok(result)
    }

-    // Is it time to create a new image layer for the given partition? True if we want to generate.
+    // Is it time to create a new image layer for the given partition?
    async fn time_for_new_image_layer(&self, partition: &KeySpace, lsn: Lsn) -> bool {
        let threshold = self.get_image_creation_threshold();

@@ -4663,11 +4658,6 @@ impl Timeline {
    ) -> Result<(Vec<ResidentLayer>, LastImageLayerCreationStatus), CreateImageLayersError> {
        let timer = self.metrics.create_images_time_histo.start_timer();

-        if partitioning.parts.is_empty() {
-            warn!("no partitions to create image layers for");
-            return Ok((vec![], LastImageLayerCreationStatus::Complete));
-        }
-
        // We need to avoid holes between generated image layers.
        // Otherwise LayerMap::image_layer_exists will return false if key range of some layer is covered by more than one
        // image layer with hole between them. In this case such layer can not be utilized by GC.
@@ -4679,65 +4669,28 @@ impl Timeline {
        // image layers  <100000000..100000099> and <200000000..200000199> are not completely covering it.
        let mut start = Key::MIN;

-        let check_for_image_layers =
-            if let LastImageLayerCreationStatus::Incomplete { last_key } = last_status {
-                info!(
-                    "resuming image layer creation: last_status=incomplete, continue from {}",
-                    last_key
-                );
-                true
-            } else {
-                self.should_check_if_image_layers_required(lsn)
-            };
+        let check_for_image_layers = if let LastImageLayerCreationStatus::Incomplete = last_status {
+            info!(
+                "resuming image layer creation: last_status={:?}",
+                last_status
+            );
+            true
+        } else {
+            self.should_check_if_image_layers_required(lsn)
+        };

        let mut batch_image_writer = BatchLayerWriter::new(self.conf).await?;

        let mut all_generated = true;

        let mut partition_processed = 0;
-        let mut total_partitions = partitioning.parts.len();
-        let mut last_partition_processed = None;
-        let mut partition_parts = partitioning.parts.clone();
+        let total_partitions = partitioning.parts.len();

-        if let LastImageLayerCreationStatus::Incomplete { last_key } = last_status {
-            // We need to skip the partitions that have already been processed.
-            let mut found = false;
-            for (i, partition) in partition_parts.iter().enumerate() {
-                if last_key <= partition.end().unwrap() {
-                    // ```plain
-                    // |------|--------|----------|------|
-                    //              ^last_key
-                    //                    ^start from this partition
-                    // ```
-                    // Why `i+1` instead of `i`?
-                    // It is possible that the user did some writes after the previous image layer creation attempt so that
-                    // a relation grows in size, and the last_key is now in the middle of the partition. In this case, we
-                    // still want to skip this partition, so that we can make progress and avoid generating image layers over
-                    // the same partition. Doing a mod to ensure we don't end up with an empty vec.
-                    if i + 1 >= total_partitions {
-                        // In general, this case should not happen -- if last_key is on the last partition, the previous
-                        // iteration of image layer creation should return a complete status.
-                        break; // with found=false
-                    }
-                    partition_parts = partition_parts.split_off(i + 1); // Remove the first i + 1 elements
-                    total_partitions = partition_parts.len();
-                    // Update the start key to the partition start.
-                    start = partition_parts[0].start().unwrap();
-                    found = true;
-                    break;
-                }
-            }
-            if !found {
-                // Last key is within the last partition, or larger than all partitions.
-                return Ok((vec![], LastImageLayerCreationStatus::Complete));
-            }
-        }
-
-        for partition in partition_parts.iter() {
+        for partition in partitioning.parts.iter() {
            if self.cancel.is_cancelled() {
                return Err(CreateImageLayersError::Cancelled);
            }
-            partition_processed += 1;
+
            let img_range = start..partition.ranges.last().unwrap().end;
            let compact_metadata = partition.overlaps(&Key::metadata_key_range());
            if compact_metadata {
@@ -4772,8 +4725,6 @@ impl Timeline {
                    lsn_range: PersistentLayerDesc::image_layer_lsn_range(lsn),
                    is_delta: false,
                }) {
-                    // TODO: this can be processed with the BatchLayerWriter::finish_with_discard
-                    // in the future.
                    tracing::info!(
                        "Skipping image layer at {lsn} {}..{}, already exists",
                        img_range.start,
@@ -4854,6 +4805,8 @@ impl Timeline {
                }
            }

+            partition_processed += 1;
+
            if let ImageLayerCreationMode::Try = mode {
                // We have at least made some progress
                if batch_image_writer.pending_layer_num() >= 1 {
@@ -4869,10 +4822,8 @@ impl Timeline {
                        * self.get_compaction_threshold();
                    if image_preempt_threshold != 0 && num_of_l0_layers >= image_preempt_threshold {
                        tracing::info!(
-                        "preempt image layer generation at {lsn} when processing partition {}..{}: too many L0 layers {}",
-                        partition.start().unwrap(), partition.end().unwrap(), num_of_l0_layers
+                        "preempt image layer generation at {start} at {lsn}: too many L0 layers {num_of_l0_layers}",
                    );
-                        last_partition_processed = Some(partition.clone());
                        all_generated = false;
                        break;
                    }
@@ -4917,14 +4868,7 @@ impl Timeline {
            if all_generated {
                LastImageLayerCreationStatus::Complete
            } else {
-                LastImageLayerCreationStatus::Incomplete {
-                    last_key: if let Some(last_partition_processed) = last_partition_processed {
-                        last_partition_processed.end().unwrap_or(Key::MIN)
-                    } else {
-                        // This branch should be unreachable, but in case it happens, we can just return the start key.
-                        Key::MIN
-                    },
-                }
+                LastImageLayerCreationStatus::Incomplete
            },
        ))
    }
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -33,7 +33,6 @@ use crate::page_cache;
 use crate::statvfs::Statvfs;
 use crate::tenant::checks::check_valid_layermap;
 use crate::tenant::gc_block::GcBlock;
-use crate::tenant::layer_map::LayerMap;
 use crate::tenant::remote_timeline_client::WaitCompletionError;
 use crate::tenant::storage_layer::batch_split_writer::{
    BatchWriterResult, SplitDeltaLayerWriter, SplitImageLayerWriter,
@@ -439,11 +438,6 @@ impl KeyHistoryRetention {
        if dry_run {
            return true;
        }
-        if LayerMap::is_l0(&key.key_range, key.is_delta) {
-            // gc-compaction should not produce L0 deltas, otherwise it will break the layer order.
-            // We should ignore such layers.
-            return true;
-        }
        let layer_generation;
        {
            let guard = tline.layers.read().await;
@@ -754,7 +748,7 @@ impl Timeline {
                    .store(Arc::new(outcome.clone()));

                self.upload_new_image_layers(image_layers)?;
-                if let LastImageLayerCreationStatus::Incomplete { .. } = outcome {
+                if let LastImageLayerCreationStatus::Incomplete = outcome {
                    // Yield and do not do any other kind of compaction.
                    info!("skipping shard ancestor compaction due to pending image layer generation tasks (preempted by L0 compaction).");
                    return Ok(CompactionOutcome::Pending);
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -355,19 +355,6 @@ pub(super) async fn handle_walreceiver_connection(
                // advances it to its end LSN. 0 is just an initialization placeholder.
                let mut modification = timeline.begin_modification(Lsn(0));

-                async fn commit(
-                    modification: &mut DatadirModification<'_>,
-                    ctx: &RequestContext,
-                    uncommitted: &mut u64,
-                ) -> anyhow::Result<()> {
-                    let stats = modification.stats();
-                    modification.commit(ctx).await?;
-                    WAL_INGEST.records_committed.inc_by(*uncommitted);
-                    WAL_INGEST.inc_values_committed(&stats);
-                    *uncommitted = 0;
-                    Ok(())
-                }
-
                if !records.is_empty() {
                    timeline
                        .metrics
@@ -379,7 +366,8 @@ pub(super) async fn handle_walreceiver_connection(
                    if matches!(interpreted.flush_uncommitted, FlushUncommittedRecords::Yes)
                        && uncommitted_records > 0
                    {
-                        commit(&mut modification, &ctx, &mut uncommitted_records).await?;
+                        modification.commit(&ctx).await?;
+                        uncommitted_records = 0;
                    }

                    let local_next_record_lsn = interpreted.next_record_lsn;
@@ -408,7 +396,8 @@ pub(super) async fn handle_walreceiver_connection(
                        || modification.approx_pending_bytes()
                            > DatadirModification::MAX_PENDING_BYTES
                    {
-                        commit(&mut modification, &ctx, &mut uncommitted_records).await?;
+                        modification.commit(&ctx).await?;
+                        uncommitted_records = 0;
                    }
                }

@@ -426,7 +415,7 @@ pub(super) async fn handle_walreceiver_connection(

                if uncommitted_records > 0 || needs_last_record_lsn_advance {
                    // Commit any uncommitted records
-                    commit(&mut modification, &ctx, &mut uncommitted_records).await?;
+                    modification.commit(&ctx).await?;
                }

                if !caught_up && streaming_lsn >= end_of_wal {
@@ -453,12 +442,10 @@ pub(super) async fn handle_walreceiver_connection(
                    filtered: &mut u64,
                    ctx: &RequestContext,
                ) -> anyhow::Result<()> {
-                    let stats = modification.stats();
-                    modification.commit(ctx).await?;
                    WAL_INGEST
                        .records_committed
                        .inc_by(*uncommitted - *filtered);
-                    WAL_INGEST.inc_values_committed(&stats);
+                    modification.commit(ctx).await?;
                    *uncommitted = 0;
                    *filtered = 0;
                    Ok(())
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -509,44 +509,47 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,

 	CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);

-	tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);
+	tag.blockNum = (blkno + i) & ~(BLOCKS_PER_CHUNK - 1);
 	hash = get_hash_value(lfc_hash, &tag);
-	chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
+	chunk_offs = (blkno + i) & (BLOCKS_PER_CHUNK - 1);

 	LWLockAcquire(lfc_lock, LW_SHARED);

-	if (!LFC_ENABLED())
-	{
-		LWLockRelease(lfc_lock);
-		return 0;
-	}
 	while (true)
 	{
-		int		this_chunk = Min(nblocks - i, BLOCKS_PER_CHUNK - chunk_offs);
-		entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
-
-		if (entry != NULL)
+		int		this_chunk = Min(nblocks, BLOCKS_PER_CHUNK - chunk_offs);
+		if (LFC_ENABLED())
 		{
-			for (; chunk_offs < BLOCKS_PER_CHUNK && i < nblocks; chunk_offs++, i++)
+			entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
+
+			if (entry != NULL)
 			{
-				if ((entry->bitmap[chunk_offs >> 5] & 
-					 ((uint32)1 << (chunk_offs & 31))) != 0)
+				for (; chunk_offs < BLOCKS_PER_CHUNK && i < nblocks; chunk_offs++, i++)
 				{
-					BITMAP_SET(bitmap, i);
-					found++;
+					if ((entry->bitmap[chunk_offs >> 5] & 
+						((uint32)1 << (chunk_offs & 31))) != 0)
+					{
+						BITMAP_SET(bitmap, i);
+						found++;
+					}
 				}
 			}
+			else
+			{
+				i += this_chunk;
+			}
 		}
 		else
 		{
-			i += this_chunk;
+			LWLockRelease(lfc_lock);
+			return found;
 		}

 		/*
 		 * Break out of the iteration before doing expensive stuff for
 		 * a next iteration
 		 */
-		if (i >= nblocks)
+		if (i + 1 >= nblocks)
 			break;

 		/*
@@ -560,8 +563,8 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,

 	LWLockRelease(lfc_lock);

-#ifdef USE_ASSERT_CHECKING
-	{
+#if USE_ASSERT_CHECKING
+	do {
 		int count = 0;

 		for (int j = 0; j < nblocks; j++)
@@ -571,7 +574,7 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		}

 		Assert(count == found);
-	}
+	} while (false);
 #endif

 	return found;
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -36,11 +36,6 @@
 #include "pagestore_client.h"
 #include "walproposer.h"

-#ifdef __linux__
-#include <sys/ioctl.h>
-#include <linux/sockios.h>
-#endif
-
 #define PageStoreTrace DEBUG5

 #define MIN_RECONNECT_INTERVAL_USEC 1000
@@ -733,36 +728,11 @@ retry:
 		INSTR_TIME_SUBTRACT(since_last_log, last_log_ts);
 		if (INSTR_TIME_GET_MILLISEC(since_last_log) >= LOG_INTERVAL_MS)
 		{
-			int sndbuf = -1;
-			int recvbuf = -1;
-#ifdef __linux__
-			int socketfd;
-#endif
-
 			since_start = now;
 			INSTR_TIME_SUBTRACT(since_start, start_ts);
-
-#ifdef __linux__
-			/*
-			 * get kernel's send and recv queue size via ioctl
-			 * https://elixir.bootlin.com/linux/v6.1.128/source/include/uapi/linux/sockios.h#L25-L27
-			 */
-			socketfd = PQsocket(pageserver_conn);
-			if (socketfd != -1) {
-				int ioctl_err;
-				ioctl_err = ioctl(socketfd, SIOCOUTQ, &sndbuf);
-				if (ioctl_err!= 0) {
-					sndbuf = -errno;
-				}
-				ioctl_err = ioctl(socketfd, FIONREAD, &recvbuf);
-				if (ioctl_err != 0) {
-					recvbuf = -errno;
-				}
-			}
-#endif
-			neon_shard_log(shard_no, LOG, "no response received from pageserver for %0.3f s, still waiting (sent " UINT64_FORMAT " requests, received " UINT64_FORMAT " responses) (socket sndbuf=%d recvbuf=%d)",
+			neon_shard_log(shard_no, LOG, "no response received from pageserver for %0.3f s, still waiting (sent " UINT64_FORMAT " requests, received " UINT64_FORMAT " responses)",
 						   INSTR_TIME_GET_DOUBLE(since_start),
-						   shard->nrequests_sent, shard->nresponses_received, sndbuf, recvbuf);
+						   shard->nrequests_sent, shard->nresponses_received);
 			last_log_ts = now;
 			logged = true;
 		}
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -916,7 +916,7 @@ prefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns,
 {
 	uint64		min_ring_index;
 	PrefetchRequest hashkey;
-#ifdef USE_ASSERT_CHECKING
+#if USE_ASSERT_CHECKING
 	bool		any_hits = false;
 #endif
 	/* We will never read further ahead than our buffer can store. */
@@ -955,7 +955,7 @@ Retry:
 		else
 			lsns = NULL;

-#ifdef USE_ASSERT_CHECKING
+#if USE_ASSERT_CHECKING
 		any_hits = true;
 #endif

--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -27,6 +27,7 @@ chrono.workspace = true
 clap = { workspace = true, features = ["derive", "env"] }
 clashmap.workspace = true
 compute_api.workspace = true
+console-subscriber.workspace = true
 consumption_metrics.workspace = true
 env_logger.workspace = true
 framed-websockets.workspace = true
@@ -130,3 +131,6 @@ rstest.workspace = true
 walkdir.workspace = true
 rand_distr = "0.4"
 tokio-postgres.workspace = true
+
+[lints.rust]
+unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }
--- a/proxy/src/logging.rs
+++ b/proxy/src/logging.rs
@@ -15,7 +15,7 @@ use tracing_subscriber::filter::{EnvFilter, LevelFilter};
 use tracing_subscriber::fmt::format::{Format, Full};
 use tracing_subscriber::fmt::time::SystemTime;
 use tracing_subscriber::fmt::{FormatEvent, FormatFields};
-use tracing_subscriber::layer::{Context, Layer};
+use tracing_subscriber::layer::{Context, Identity, Layer};
 use tracing_subscriber::prelude::*;
 use tracing_subscriber::registry::{LookupSpan, SpanRef};

@@ -69,11 +69,23 @@ pub async fn init() -> anyhow::Result<LoggingGuard> {
        None
    };

+    let export_layer = Identity::new()
+        .and_then(otlp_layer)
+        .and_then(json_log_layer)
+        .and_then(text_log_layer)
+        .with_filter(env_filter);
+
+    #[cfg(not(tokio_unstable))]
+    let tokio_console_layer = Identity::new();
+    #[cfg(tokio_unstable)]
+    let tokio_console_layer = console_subscriber::ConsoleLayer::builder()
+        .with_default_env()
+        .enable_grpc_web(true)
+        .spawn();
+
    tracing_subscriber::registry()
-        .with(env_filter)
-        .with(otlp_layer)
-        .with(json_log_layer)
-        .with(text_log_layer)
+        .with(export_layer)
+        .with(tokio_console_layer)
        .try_init()?;

    Ok(LoggingGuard)
--- a/safekeeper/src/send_interpreted_wal.rs
+++ b/safekeeper/src/send_interpreted_wal.rs
@@ -120,20 +120,6 @@ pub enum InterpretedWalReaderError {
    WalStreamClosed,
 }

-enum CurrentPositionUpdate {
-    Reset(Lsn),
-    NotReset(Lsn),
-}
-
-impl CurrentPositionUpdate {
-    fn current_position(&self) -> Lsn {
-        match self {
-            CurrentPositionUpdate::Reset(lsn) => *lsn,
-            CurrentPositionUpdate::NotReset(lsn) => *lsn,
-        }
-    }
-}
-
 impl InterpretedWalReaderState {
    fn current_position(&self) -> Option<Lsn> {
        match self {
@@ -143,26 +129,6 @@ impl InterpretedWalReaderState {
            InterpretedWalReaderState::Done => None,
        }
    }
-
-    // Reset the current position of the WAL reader if the requested starting position
-    // of the new shard is smaller than the current value.
-    fn maybe_reset(&mut self, new_shard_start_pos: Lsn) -> CurrentPositionUpdate {
-        match self {
-            InterpretedWalReaderState::Running {
-                current_position, ..
-            } => {
-                if new_shard_start_pos < *current_position {
-                    *current_position = new_shard_start_pos;
-                    CurrentPositionUpdate::Reset(*current_position)
-                } else {
-                    CurrentPositionUpdate::NotReset(*current_position)
-                }
-            }
-            InterpretedWalReaderState::Done => {
-                panic!("maybe_reset called on finished reader")
-            }
-        }
-    }
 }

 pub(crate) struct AttachShardNotification {
@@ -444,24 +410,15 @@ impl InterpretedWalReader {
                        };

                        senders.push(ShardSenderState { sender_id: new_sender_id, tx: sender, next_record_lsn: start_pos});
-
-                        // If the shard is subscribing below the current position the we need
-                        // to update the cursor that tracks where we are at in the WAL
-                        // ([`Self::state`]) and reset the WAL stream itself
-                        // (`[Self::wal_stream`]). This must be done atomically from the POV of
-                        // anything outside the select statement.
-                        let position_reset = self.state.write().unwrap().maybe_reset(start_pos);
-                        match position_reset {
-                            CurrentPositionUpdate::Reset(to) => {
-                                self.wal_stream.reset(to).await;
-                                wal_decoder = WalStreamDecoder::new(to, self.pg_version);
-                            },
-                            CurrentPositionUpdate::NotReset(_) => {}
-                        };
+                        let current_pos = self.state.read().unwrap().current_position().unwrap();
+                        if start_pos < current_pos {
+                            self.wal_stream.reset(start_pos).await;
+                            wal_decoder = WalStreamDecoder::new(start_pos, self.pg_version);
+                        }

                        tracing::info!(
                            "Added shard sender {} with start_pos={} current_pos={}",
-                            ShardSenderId::new(shard_id, new_sender_id), start_pos, position_reset.current_position()
+                            ShardSenderId::new(shard_id, new_sender_id), start_pos, current_pos
                        );
                    }
                }
@@ -627,7 +584,7 @@ mod tests {
            .unwrap();

        let resident_tli = tli.wal_residence_guard().await.unwrap();
-        let end_watch = Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT, None)
+        let end_watch = Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT)
            .await
            .unwrap();
        let end_pos = end_watch.get();
@@ -758,6 +715,7 @@ mod tests {
        const MSG_COUNT: usize = 200;
        const PG_VERSION: u32 = 17;
        const SHARD_COUNT: u8 = 2;
+        const ATTACHED_SHARDS: u8 = 4;

        let start_lsn = Lsn::from_str("0/149FD18").unwrap();
        let env = Env::new(true).unwrap();
@@ -767,11 +725,9 @@ mod tests {
            .unwrap();

        let resident_tli = tli.wal_residence_guard().await.unwrap();
-        let mut next_record_lsns = Vec::default();
-        let end_watch =
-            Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT, Some(&mut next_record_lsns))
-                .await
-                .unwrap();
+        let end_watch = Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT)
+            .await
+            .unwrap();
        let end_pos = end_watch.get();

        let streaming_wal_reader = StreamingWalReader::new(
@@ -790,71 +746,38 @@ mod tests {
        )
        .unwrap();

-        struct Sender {
-            tx: Option<tokio::sync::mpsc::Sender<Batch>>,
-            rx: tokio::sync::mpsc::Receiver<Batch>,
-            shard: ShardIdentity,
-            start_lsn: Lsn,
-            received_next_record_lsns: Vec<Lsn>,
-        }
+        let (tx, rx) = tokio::sync::mpsc::channel::<Batch>(MSG_COUNT * 2);
+        let mut batch_receivers = vec![rx];

-        impl Sender {
-            fn new(start_lsn: Lsn, shard: ShardIdentity) -> Self {
-                let (tx, rx) = tokio::sync::mpsc::channel::<Batch>(MSG_COUNT * 2);
-                Self {
-                    tx: Some(tx),
-                    rx,
-                    shard,
-                    start_lsn,
-                    received_next_record_lsns: Vec::default(),
-                }
-            }
-        }
-
-        assert!(next_record_lsns.len() > 7);
-        let start_lsns = vec![
-            next_record_lsns[5],
-            next_record_lsns[1],
-            next_record_lsns[3],
-        ];
-        let mut senders = start_lsns
-            .into_iter()
-            .map(|lsn| Sender::new(lsn, shard_0))
-            .collect::<Vec<_>>();
-
-        let first_sender = senders.first_mut().unwrap();
        let handle = InterpretedWalReader::spawn(
            streaming_wal_reader,
-            first_sender.start_lsn,
-            first_sender.tx.take().unwrap(),
-            first_sender.shard,
+            start_lsn,
+            tx,
+            shard_0,
            PG_VERSION,
            &Some("pageserver".to_string()),
        );

-        for sender in senders.iter_mut().skip(1) {
-            handle
-                .fanout(sender.shard, sender.tx.take().unwrap(), sender.start_lsn)
-                .unwrap();
+        for _ in 0..(ATTACHED_SHARDS - 1) {
+            let (tx, rx) = tokio::sync::mpsc::channel::<Batch>(MSG_COUNT * 2);
+            handle.fanout(shard_0, tx, start_lsn).unwrap();
+            batch_receivers.push(rx);
        }

-        for sender in senders.iter_mut() {
-            loop {
-                let batch = sender.rx.recv().await.unwrap();
-                tracing::info!(
-                    "Sender with start_lsn={} received batch ending at {} with {} records",
-                    sender.start_lsn,
-                    batch.wal_end_lsn,
-                    batch.records.records.len()
+        loop {
+            let batch = batch_receivers.first_mut().unwrap().recv().await.unwrap();
+            for rx in batch_receivers.iter_mut().skip(1) {
+                let other_batch = rx.recv().await.unwrap();
+
+                assert_eq!(batch.wal_end_lsn, other_batch.wal_end_lsn);
+                assert_eq!(
+                    batch.available_wal_end_lsn,
+                    other_batch.available_wal_end_lsn
                );
+            }

-                for rec in batch.records.records {
-                    sender.received_next_record_lsns.push(rec.next_record_lsn);
-                }
-
-                if batch.wal_end_lsn == batch.available_wal_end_lsn {
-                    break;
-                }
+            if batch.wal_end_lsn == batch.available_wal_end_lsn {
+                break;
            }
        }

@@ -869,20 +792,5 @@ mod tests {
        }

        assert!(done);
-
-        for sender in senders {
-            tracing::info!(
-                "Validating records received by sender with start_lsn={}",
-                sender.start_lsn
-            );
-
-            assert!(sender.received_next_record_lsns.is_sorted());
-            let expected = next_record_lsns
-                .iter()
-                .filter(|lsn| **lsn > sender.start_lsn)
-                .copied()
-                .collect::<Vec<_>>();
-            assert_eq!(sender.received_next_record_lsns, expected);
-        }
    }
 }
--- a/safekeeper/src/test_utils.rs
+++ b/safekeeper/src/test_utils.rs
@@ -122,7 +122,6 @@ impl Env {
        start_lsn: Lsn,
        msg_size: usize,
        msg_count: usize,
-        mut next_record_lsns: Option<&mut Vec<Lsn>>,
    ) -> anyhow::Result<EndWatch> {
        let (msg_tx, msg_rx) = tokio::sync::mpsc::channel(receive_wal::MSG_QUEUE_SIZE);
        let (reply_tx, mut reply_rx) = tokio::sync::mpsc::channel(receive_wal::REPLY_QUEUE_SIZE);
@@ -131,7 +130,7 @@ impl Env {

        WalAcceptor::spawn(tli.wal_residence_guard().await?, msg_rx, reply_tx, Some(0));

-        let prefix = c"neon-file:";
+        let prefix = c"p";
        let prefixlen = prefix.to_bytes_with_nul().len();
        assert!(msg_size >= prefixlen);
        let message = vec![0; msg_size - prefixlen];
@@ -140,9 +139,6 @@ impl Env {
            &mut WalGenerator::new(LogicalMessageGenerator::new(prefix, &message), start_lsn);
        for _ in 0..msg_count {
            let (lsn, record) = walgen.next().unwrap();
-            if let Some(ref mut lsns) = next_record_lsns {
-                lsns.push(lsn);
-            }

            let req = AppendRequest {
                h: AppendRequestHeader {
--- a/safekeeper/src/wal_reader_stream.rs
+++ b/safekeeper/src/wal_reader_stream.rs
@@ -246,7 +246,7 @@ mod tests {
            .unwrap();

        let resident_tli = tli.wal_residence_guard().await.unwrap();
-        let end_watch = Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT, None)
+        let end_watch = Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT)
            .await
            .unwrap();
        let end_pos = end_watch.get();
--- a/storage_controller/src/compute_hook.rs
+++ b/storage_controller/src/compute_hook.rs
@@ -225,7 +225,7 @@ pub(crate) enum NotifyError {
    // We shutdown while sending
    #[error("Shutting down")]
    ShuttingDown,
-    // A response indicates we will never succeed, such as 400 or 403
+    // A response indicates we will never succeed, such as 400 or 404
    #[error("Non-retryable error {0}")]
    Fatal(StatusCode),

--- a/storage_controller/src/reconciler.rs
+++ b/storage_controller/src/reconciler.rs
@@ -115,15 +115,6 @@ impl ReconcilerConfigBuilder {
        }
    }

-    pub(crate) fn tenant_creation_hint(self, hint: bool) -> Self {
-        Self {
-            config: ReconcilerConfig {
-                tenant_creation_hint: hint,
-                ..self.config
-            },
-        }
-    }
-
    pub(crate) fn build(self) -> ReconcilerConfig {
        self.config
    }
@@ -138,10 +129,6 @@ pub(crate) struct ReconcilerConfig {
    // During live migrations this is the amount of time that
    // the pagserver will hold our poll.
    secondary_download_request_timeout: Option<Duration>,
-
-    // A hint indicating whether this reconciliation is done on the
-    // creation of a new tenant. This only informs logging behaviour.
-    tenant_creation_hint: bool,
 }

 impl ReconcilerConfig {
@@ -156,10 +143,6 @@ impl ReconcilerConfig {
        self.secondary_download_request_timeout
            .unwrap_or(SECONDARY_DOWNLOAD_REQUEST_TIMEOUT_DEFAULT)
    }
-
-    pub(crate) fn tenant_creation_hint(&self) -> bool {
-        self.tenant_creation_hint
-    }
 }

 /// RAII resource units granted to a Reconciler, which it should keep alive until it finishes doing I/O
@@ -951,35 +934,16 @@ impl Reconciler {
                )
                .await;
            if let Err(e) = &result {
-                // Set this flag so that in our ReconcileResult we will set the flag on the shard that it
-                // needs to retry at some point.
-                self.compute_notify_failure = true;
-
                // It is up to the caller whether they want to drop out on this error, but they don't have to:
                // in general we should avoid letting unavailability of the cloud control plane stop us from
                // making progress.
-                match e {
-                    // 404s from cplane during tenant creation are expected.
-                    // Cplane only persists the shards to the database after
-                    // creating the tenant and the timeline. If we notify before
-                    // that, we'll get a 404.
-                    //
-                    // This is fine because tenant creations happen via /location_config
-                    // and that returns the list of locations in the response. Hence, we
-                    // silence the error and return Ok(()) here. Reconciliation will still
-                    // be retried because we set [`Reconciler::compute_notify_failure`] above.
-                    NotifyError::Unexpected(hyper::StatusCode::NOT_FOUND)
-                        if self.reconciler_config.tenant_creation_hint() =>
-                    {
-                        return Ok(());
-                    }
-                    NotifyError::ShuttingDown => {}
-                    _ => {
-                        tracing::warn!(
-                            "Failed to notify compute of attached pageserver {node}: {e}"
-                        );
-                    }
+                if !matches!(e, NotifyError::ShuttingDown) {
+                    tracing::warn!("Failed to notify compute of attached pageserver {node}: {e}");
                }
+
+                // Set this flag so that in our ReconcileResult we will set the flag on the shard that it
+                // needs to retry at some point.
+                self.compute_notify_failure = true;
            }
            result
        } else {
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -2238,14 +2238,9 @@ impl Service {
        let waiters = {
            let mut locked = self.inner.write().unwrap();
            let (nodes, tenants, _scheduler) = locked.parts_mut();
-            let config = ReconcilerConfigBuilder::new()
-                .tenant_creation_hint(true)
-                .build();
            tenants
                .range_mut(TenantShardId::tenant_range(tenant_id))
-                .filter_map(|(_shard_id, shard)| {
-                    self.maybe_configured_reconcile_shard(shard, nodes, config)
-                })
+                .filter_map(|(_shard_id, shard)| self.maybe_reconcile_shard(shard, nodes))
                .collect::<Vec<_>>()
        };

--- a/storage_controller/src/tenant_shard.rs
+++ b/storage_controller/src/tenant_shard.rs
@@ -707,7 +707,6 @@ impl TenantShard {
                if let Some(node_id) = self.intent.get_attached() {
                    // Populate secondary by demoting the attached node
                    self.intent.demote_attached(scheduler, *node_id);
-
                    modified = true;
                } else if self.intent.secondary.is_empty() {
                    // Populate secondary by scheduling a fresh node
@@ -980,51 +979,24 @@ impl TenantShard {
                        ),
                    )
                })
-                .collect::<HashMap<_, _>>();
+                .collect::<Vec<_>>();

            if secondary_scores.iter().any(|score| score.1.is_none()) {
-                // Trivial case: if we only have one secondary, drop that one
-                if self.intent.get_secondary().len() == 1 {
-                    return Some(ScheduleOptimization {
-                        sequence: self.sequence,
-                        action: ScheduleOptimizationAction::RemoveSecondary(
-                            *self.intent.get_secondary().first().unwrap(),
-                        ),
-                    });
-                }
-
-                // Try to find a "good" secondary to keep, without relying on scores (one or more nodes is in a state
-                // where its score can't be calculated), and drop the others.  This enables us to make progress in
-                // most cases, even if some nodes are offline or have scheduling=pause set.
-
-                debug_assert!(self.intent.attached.is_some()); // We should not make it here unless attached -- this
-                                                               // logic presumes we are in a mode where we want secondaries to be in non-home AZ
-                if let Some(retain_secondary) = self.intent.get_secondary().iter().find(|n| {
-                    let in_home_az = scheduler.get_node_az(n) == self.intent.preferred_az_id;
-                    let is_available = secondary_scores
-                        .get(n)
-                        .expect("Built from same list of nodes")
-                        .is_some();
-                    is_available && !in_home_az
-                }) {
-                    // Great, we found one to retain.  Pick some other to drop.
-                    if let Some(victim) = self
-                        .intent
-                        .get_secondary()
-                        .iter()
-                        .find(|n| n != &retain_secondary)
-                    {
+                // Don't have full list of scores, so can't make a good decision about which to drop unless
+                // there is an obvious one in the wrong AZ
+                for secondary in self.intent.get_secondary() {
+                    if scheduler.get_node_az(secondary) == self.intent.preferred_az_id {
                        return Some(ScheduleOptimization {
                            sequence: self.sequence,
-                            action: ScheduleOptimizationAction::RemoveSecondary(*victim),
+                            action: ScheduleOptimizationAction::RemoveSecondary(*secondary),
                        });
                    }
                }

                // Fall through: we didn't identify one to remove.  This ought to be rare.
                tracing::warn!("Keeping extra secondaries: can't determine which of {:?} to remove (some nodes offline?)",
-                    self.intent.get_secondary()
-                );
+                self.intent.get_secondary()
+            );
            } else {
                let victim = secondary_scores
                    .iter()
@@ -1033,7 +1005,7 @@ impl TenantShard {
                    .0;
                return Some(ScheduleOptimization {
                    sequence: self.sequence,
-                    action: ScheduleOptimizationAction::RemoveSecondary(*victim),
+                    action: ScheduleOptimizationAction::RemoveSecondary(victim),
                });
            }
        }
@@ -2407,110 +2379,6 @@ pub(crate) mod tests {
        Ok(())
    }

-    /// Test how the optimisation code behaves with an extra secondary
-    #[test]
-    fn optimize_removes_secondary() -> anyhow::Result<()> {
-        let az_a_tag = AvailabilityZone("az-a".to_string());
-        let az_b_tag = AvailabilityZone("az-b".to_string());
-        let mut nodes = make_test_nodes(
-            4,
-            &[
-                az_a_tag.clone(),
-                az_b_tag.clone(),
-                az_a_tag.clone(),
-                az_b_tag.clone(),
-            ],
-        );
-        let mut scheduler = Scheduler::new(nodes.values());
-
-        let mut schedule_context = ScheduleContext::default();
-
-        let mut shard_a = make_test_tenant_shard(PlacementPolicy::Attached(1));
-        shard_a.intent.preferred_az_id = Some(az_a_tag.clone());
-        shard_a
-            .schedule(&mut scheduler, &mut schedule_context)
-            .unwrap();
-
-        // Attached on node 1, secondary on node 2
-        assert_eq!(shard_a.intent.get_attached(), &Some(NodeId(1)));
-        assert_eq!(shard_a.intent.get_secondary(), &vec![NodeId(2)]);
-
-        // Initially optimiser is idle
-        assert_eq!(
-            shard_a.optimize_attachment(&mut scheduler, &schedule_context),
-            None
-        );
-        assert_eq!(
-            shard_a.optimize_secondary(&mut scheduler, &schedule_context),
-            None
-        );
-
-        // A spare secondary in the home AZ: it should be removed -- this is the situation when we're midway through a graceful migration, after cutting over
-        // to our new location
-        shard_a.intent.push_secondary(&mut scheduler, NodeId(3));
-        let optimization = shard_a.optimize_attachment(&mut scheduler, &schedule_context);
-        assert_eq!(
-            optimization,
-            Some(ScheduleOptimization {
-                sequence: shard_a.sequence,
-                action: ScheduleOptimizationAction::RemoveSecondary(NodeId(3))
-            })
-        );
-        shard_a.apply_optimization(&mut scheduler, optimization.unwrap());
-
-        // A spare secondary in the non-home AZ, and one of them is offline
-        shard_a.intent.push_secondary(&mut scheduler, NodeId(4));
-        nodes
-            .get_mut(&NodeId(4))
-            .unwrap()
-            .set_availability(NodeAvailability::Offline);
-        scheduler.node_upsert(nodes.get(&NodeId(4)).unwrap());
-        let optimization = shard_a.optimize_attachment(&mut scheduler, &schedule_context);
-        assert_eq!(
-            optimization,
-            Some(ScheduleOptimization {
-                sequence: shard_a.sequence,
-                action: ScheduleOptimizationAction::RemoveSecondary(NodeId(4))
-            })
-        );
-        shard_a.apply_optimization(&mut scheduler, optimization.unwrap());
-
-        // A spare secondary when should have none
-        shard_a.policy = PlacementPolicy::Attached(0);
-        let optimization = shard_a.optimize_attachment(&mut scheduler, &schedule_context);
-        assert_eq!(
-            optimization,
-            Some(ScheduleOptimization {
-                sequence: shard_a.sequence,
-                action: ScheduleOptimizationAction::RemoveSecondary(NodeId(2))
-            })
-        );
-        shard_a.apply_optimization(&mut scheduler, optimization.unwrap());
-        assert_eq!(shard_a.intent.get_attached(), &Some(NodeId(1)));
-        assert_eq!(shard_a.intent.get_secondary(), &vec![]);
-
-        // Check that in secondary mode, we preserve the secondary in the preferred AZ
-        let mut schedule_context = ScheduleContext::default(); // Fresh context, we're about to call schedule()
-        shard_a.policy = PlacementPolicy::Secondary;
-        shard_a
-            .schedule(&mut scheduler, &mut schedule_context)
-            .unwrap();
-        assert_eq!(shard_a.intent.get_attached(), &None);
-        assert_eq!(shard_a.intent.get_secondary(), &vec![NodeId(1)]);
-        assert_eq!(
-            shard_a.optimize_attachment(&mut scheduler, &schedule_context),
-            None
-        );
-        assert_eq!(
-            shard_a.optimize_secondary(&mut scheduler, &schedule_context),
-            None
-        );
-
-        shard_a.intent.clear(&mut scheduler);
-
-        Ok(())
-    }
-
    // Optimize til quiescent: this emulates what Service::optimize_all does, when
    // called repeatedly in the background.
    // Returns the applied optimizations
--- a/test_runner/performance/test_layer_map.py
+++ b/test_runner/performance/test_layer_map.py
@@ -34,20 +34,16 @@ def test_layer_map(neon_env_builder: NeonEnvBuilder, zenbenchmark):
    cur.execute("set log_statement = 'all'")
    cur.execute("create table t(x integer)")
    for _ in range(n_iters):
-        with zenbenchmark.record_duration(f"insert into t values (generate_series(1,{n_records}))"):
-            cur.execute(f"insert into t values (generate_series(1,{n_records}))")
+        cur.execute(f"insert into t values (generate_series(1,{n_records}))")
        time.sleep(1)

-    with zenbenchmark.record_duration("vacuum t"):
-        cur.execute("vacuum t")
+    cur.execute("vacuum t")

-    with zenbenchmark.record_duration("SELECT count(*) from t"):
+    with zenbenchmark.record_duration("test_query"):
        cur.execute("SELECT count(*) from t")
        assert cur.fetchone() == (n_iters * n_records,)

-    with zenbenchmark.record_duration("flush_ep_to_pageserver"):
-        flush_ep_to_pageserver(env, endpoint, tenant, timeline)
-    with zenbenchmark.record_duration("timeline_checkpoint"):
-        env.pageserver.http_client().timeline_checkpoint(
-            tenant, timeline, compact=False, wait_until_uploaded=True
-        )
+    flush_ep_to_pageserver(env, endpoint, tenant, timeline)
+    env.pageserver.http_client().timeline_checkpoint(
+        tenant, timeline, compact=False, wait_until_uploaded=True
+    )
--- a/test_runner/performance/test_perf_ingest_using_pgcopydb.py
+++ b/test_runner/performance/test_perf_ingest_using_pgcopydb.py
@@ -136,7 +136,7 @@ def run_command_and_log_output(command, log_file_path: Path):
        "LD_LIBRARY_PATH": f"{os.getenv('PGCOPYDB_LIB_PATH')}:{os.getenv('PG_16_LIB_PATH')}",
        "PGCOPYDB_SOURCE_PGURI": cast(str, os.getenv("BENCHMARK_INGEST_SOURCE_CONNSTR")),
        "PGCOPYDB_TARGET_PGURI": cast(str, os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR")),
-        "PGOPTIONS": "-c idle_in_transaction_session_timeout=0 -c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7",
+        "PGOPTIONS": "-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7",
    }
    # Combine the current environment with custom variables
    env = os.environ.copy()
--- a/test_runner/regress/test_compaction.py
+++ b/test_runner/regress/test_compaction.py
@@ -29,21 +29,6 @@ AGGRESSIVE_COMPACTION_TENANT_CONF = {
    # "lsn_lease_length": "0s", -- TODO: would cause branch creation errors, should fix later
 }

-PREEMPT_COMPACTION_TENANT_CONF = {
-    "gc_period": "5s",
-    "compaction_period": "5s",
-    # Small checkpoint distance to create many layers
-    "checkpoint_distance": 1024**2,
-    # Compact small layers
-    "compaction_target_size": 1024**2,
-    "image_creation_threshold": 1,
-    "image_creation_preempt_threshold": 1,
-    # compact more frequently
-    "compaction_threshold": 3,
-    "compaction_upper_limit": 6,
-    "lsn_lease_length": "0s",
-}
-

@skip_in_debug_build("only run with release build")
@pytest.mark.parametrize(
@@ -51,8 +36,7 @@ PREEMPT_COMPACTION_TENANT_CONF = {
    [PageserverWalReceiverProtocol.VANILLA, PageserverWalReceiverProtocol.INTERPRETED],
 )
 def test_pageserver_compaction_smoke(
-    neon_env_builder: NeonEnvBuilder,
-    wal_receiver_protocol: PageserverWalReceiverProtocol,
+    neon_env_builder: NeonEnvBuilder, wal_receiver_protocol: PageserverWalReceiverProtocol
 ):
    """
    This is a smoke test that compaction kicks in. The workload repeatedly churns
@@ -70,8 +54,7 @@ def test_pageserver_compaction_smoke(
 page_cache_size=10
 """

-    conf = AGGRESSIVE_COMPACTION_TENANT_CONF.copy()
-    env = neon_env_builder.init_start(initial_tenant_conf=conf)
+    env = neon_env_builder.init_start(initial_tenant_conf=AGGRESSIVE_COMPACTION_TENANT_CONF)

    tenant_id = env.initial_tenant
    timeline_id = env.initial_timeline
@@ -130,41 +113,6 @@ page_cache_size=10
    assert vectored_average < 8


-@skip_in_debug_build("only run with release build")
-def test_pageserver_compaction_preempt(
-    neon_env_builder: NeonEnvBuilder,
-):
-    # Ideally we should be able to do unit tests for this, but we need real Postgres
-    # WALs in order to do unit testing...
-
-    conf = PREEMPT_COMPACTION_TENANT_CONF.copy()
-    env = neon_env_builder.init_start(initial_tenant_conf=conf)
-
-    tenant_id = env.initial_tenant
-    timeline_id = env.initial_timeline
-
-    row_count = 200000
-    churn_rounds = 10
-
-    ps_http = env.pageserver.http_client()
-
-    workload = Workload(env, tenant_id, timeline_id)
-    workload.init(env.pageserver.id)
-
-    log.info("Writing initial data ...")
-    workload.write_rows(row_count, env.pageserver.id)
-
-    for i in range(1, churn_rounds + 1):
-        log.info(f"Running churn round {i}/{churn_rounds} ...")
-        workload.churn_rows(row_count, env.pageserver.id, upload=False)
-        workload.validate(env.pageserver.id)
-    ps_http.timeline_compact(tenant_id, timeline_id, wait_until_uploaded=True)
-    log.info("Validating at workload end ...")
-    workload.validate(env.pageserver.id)
-    # ensure image layer creation gets preempted and then resumed
-    env.pageserver.assert_log_contains("resuming image layer creation")
-
-
@skip_in_debug_build("only run with release build")
@pytest.mark.parametrize(
    "with_branches",
--- a/test_runner/regress/test_download_extensions.py
+++ b/test_runner/regress/test_download_extensions.py
@@ -95,8 +95,6 @@ def test_remote_extensions(

    # mock remote_extensions spec
    spec: dict[str, Any] = {
-        "public_extensions": ["anon"],
-        "custom_extensions": None,
        "library_index": {
            "anon": "anon",
        },
--- a/vendor/postgres-v16
+++ b/vendor/postgres-v16
--- a/vendor/postgres-v17
+++ b/vendor/postgres-v17
--- a/vendor/revisions.json
+++ b/vendor/revisions.json
@@ -1,11 +1,11 @@
 {
  "v17": [
    "17.2",
-    "8dfd5a7030d3e8a98b60265ebe045788892ac7f3"
+    "f0ffc8279dbcbbc439981a4fd001a9687e5d665d"
  ],
  "v16": [
    "16.6",
-    "86d9ea96ebb9088eac62f57f1f5ace68e70e0d1c"
+    "3cf7ce1afab75027716d14223f95ddb300754162"
  ],
  "v15": [
    "15.10",
--- a/workspace_hack/Cargo.toml
+++ b/workspace_hack/Cargo.toml
@@ -25,6 +25,7 @@ camino = { version = "1", default-features = false, features = ["serde1"] }
 chrono = { version = "0.4", default-features = false, features = ["clock", "serde", "wasmbind"] }
 clap = { version = "4", features = ["derive", "env", "string"] }
 clap_builder = { version = "4", default-features = false, features = ["color", "env", "help", "std", "string", "suggestions", "usage"] }
+crossbeam-utils = { version = "0.8" }
 crypto-bigint = { version = "0.5", features = ["generic-array", "zeroize"] }
 der = { version = "0.7", default-features = false, features = ["oid", "pem", "std"] }
 deranged = { version = "0.3", default-features = false, features = ["powerfmt", "serde", "std"] }
@@ -40,11 +41,12 @@ generic-array = { version = "0.14", default-features = false, features = ["more_
 getrandom = { version = "0.2", default-features = false, features = ["std"] }
 half = { version = "2", default-features = false, features = ["num-traits"] }
 hashbrown = { version = "0.14", features = ["raw"] }
+hdrhistogram = { version = "7" }
 hex = { version = "0.4", features = ["serde"] }
 hmac = { version = "0.12", default-features = false, features = ["reset"] }
 hyper-582f2526e08bb6a0 = { package = "hyper", version = "0.14", features = ["full"] }
 hyper-dff4ba8e3ae991db = { package = "hyper", version = "1", features = ["full"] }
-hyper-util = { version = "0.1", features = ["client-legacy", "http1", "http2", "server", "service"] }
+hyper-util = { version = "0.1", features = ["client-legacy", "server-auto", "service"] }
 indexmap-dff4ba8e3ae991db = { package = "indexmap", version = "1", default-features = false, features = ["std"] }
 indexmap-f595c2ba2a3f28df = { package = "indexmap", version = "2", features = ["serde"] }
 itertools = { version = "0.12" }
@@ -83,16 +85,18 @@ sync_wrapper = { version = "0.1", default-features = false, features = ["futures
 tikv-jemalloc-ctl = { version = "0.6", features = ["stats", "use_std"] }
 tikv-jemalloc-sys = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
 time = { version = "0.3", features = ["macros", "serde-well-known"] }
-tokio = { version = "1", features = ["full", "test-util"] }
+tokio = { version = "1", features = ["full", "test-util", "tracing"] }
 tokio-rustls = { version = "0.26", default-features = false, features = ["logging", "ring", "tls12"] }
-tokio-stream = { version = "0.1" }
+tokio-stream = { version = "0.1", features = ["net"] }
 tokio-util = { version = "0.7", features = ["codec", "compat", "io", "rt"] }
 toml_edit = { version = "0.22", features = ["serde"] }
-tonic = { version = "0.12", default-features = false, features = ["codegen", "prost", "tls-roots"] }
-tower = { version = "0.4", default-features = false, features = ["balance", "buffer", "limit", "util"] }
+tonic = { version = "0.12", features = ["tls-roots"] }
+tower-9fbad63c4bcf4a8f = { package = "tower", version = "0.4", default-features = false, features = ["balance", "buffer", "limit", "util"] }
+tower-d8f496e17d97b5cb = { package = "tower", version = "0.5", default-features = false, features = ["log", "make", "util"] }
 tracing = { version = "0.1", features = ["log"] }
 tracing-core = { version = "0.1" }
 tracing-log = { version = "0.2" }
+tracing-subscriber = { version = "0.3", default-features = false, features = ["env-filter", "fmt", "json", "parking_lot", "smallvec", "tracing-log"] }
 url = { version = "2", features = ["serde"] }
 zerocopy = { version = "0.7", features = ["derive", "simd"] }
 zeroize = { version = "1", features = ["derive", "serde"] }
Author	SHA1	Message	Date
Conrad Ludgate	9a0d3e84d5	apply envfilter only to export layer	2025-02-05 12:11:28 +00:00
Conrad Ludgate	57d51d581d	fix env	2025-02-05 11:26:10 +00:00
Conrad Ludgate	47c15899c9	feat: experiment with tokio-console	2025-02-05 10:05:12 +00:00