Merge branch 'main' into arpad/endpoint_azure

2026-01-14 17:02:56 +00:00 · 2024-11-25 13:13:36 +01:00
parent bb839903f8 6f6749c4a9
commit 790ac8a552
161 changed files with 5233 additions and 1446 deletions
--- a/.github/workflows/_build-and-test-locally.yml
+++ b/.github/workflows/_build-and-test-locally.yml
@@ -19,8 +19,8 @@ on:
        description: 'debug or release'
        required: true
        type: string
-      pg-versions:
-        description: 'a json array of postgres versions to run regression tests on'
+      test-cfg:
+        description: 'a json object of postgres versions and lfc states to run regression tests on'
        required: true
        type: string

@@ -276,14 +276,14 @@ jobs:
      options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
    strategy:
      fail-fast: false
-      matrix:
-        pg_version: ${{ fromJson(inputs.pg-versions) }}
+      matrix: ${{ fromJSON(format('{{"include":{0}}}', inputs.test-cfg)) }}
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: true

      - name: Pytest regression tests
+        continue-on-error: ${{ matrix.lfc_state == 'with-lfc' }}
        uses: ./.github/actions/run-python-test-set
        timeout-minutes: 60
        with:
@@ -300,6 +300,7 @@ jobs:
          CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
          BUILD_TAG: ${{ inputs.build-tag }}
          PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
+          USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}

      # Temporary disable this step until we figure out why it's so flaky
      # Ref https://github.com/neondatabase/neon/issues/4540
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -558,12 +558,12 @@ jobs:
        arch=$(uname -m | sed 's/x86_64/amd64/g' | sed 's/aarch64/arm64/g')

        cd /home/nonroot
-        wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-17/libpq5_17.1-1.pgdg110+1_${arch}.deb"
-        wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.5-1.pgdg110+1_${arch}.deb"
-        wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.5-1.pgdg110+1_${arch}.deb"
-        dpkg -x libpq5_17.1-1.pgdg110+1_${arch}.deb pg
-        dpkg -x postgresql-16_16.5-1.pgdg110+1_${arch}.deb pg
-        dpkg -x postgresql-client-16_16.5-1.pgdg110+1_${arch}.deb pg
+        wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-17/libpq5_17.2-1.pgdg110+1_${arch}.deb"
+        wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.6-1.pgdg110+1_${arch}.deb"
+        wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.6-1.pgdg110+1_${arch}.deb"
+        dpkg -x libpq5_17.2-1.pgdg110+1_${arch}.deb pg
+        dpkg -x postgresql-16_16.6-1.pgdg110+1_${arch}.deb pg
+        dpkg -x postgresql-client-16_16.6-1.pgdg110+1_${arch}.deb pg

        mkdir -p /tmp/neon/pg_install/v16/bin
        ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -253,7 +253,14 @@ jobs:
      build-tag: ${{ needs.tag.outputs.build-tag }}
      build-type: ${{ matrix.build-type }}
      # Run tests on all Postgres versions in release builds and only on the latest version in debug builds
-      pg-versions: ${{ matrix.build-type == 'release' && '["v14", "v15", "v16", "v17"]' || '["v17"]' }}
+      # run without LFC on v17 release only
+      test-cfg: |
+        ${{ matrix.build-type == 'release' && '[{"pg_version":"v14", "lfc_state": "without-lfc"},
+                                                {"pg_version":"v15", "lfc_state": "without-lfc"},
+                                                {"pg_version":"v16", "lfc_state": "without-lfc"},
+                                                {"pg_version":"v17", "lfc_state": "without-lfc"},
+                                                {"pg_version":"v17", "lfc_state": "with-lfc"}]'
+                                           || '[{"pg_version":"v17", "lfc_state": "without-lfc"}]' }}
    secrets: inherit

  # Keep `benchmarks` job outside of `build-and-test-locally` workflow to make job failures non-blocking
--- a/.github/workflows/report-workflow-stats-batch.yml
+++ b/.github/workflows/report-workflow-stats-batch.yml
@@ -4,10 +4,12 @@ on:
  schedule:
    - cron: '*/15 * * * *'
    - cron: '25 0 * * *'
+    - cron: '25 1 * * 6'

 jobs:
-  gh-workflow-stats-batch:
-    name: GitHub Workflow Stats Batch
+  gh-workflow-stats-batch-2h:
+    name: GitHub Workflow Stats Batch 2 hours
+    if: github.event.schedule == '*/15 * * * *'
    runs-on: ubuntu-22.04
    permissions:
      actions: read
@@ -16,14 +18,36 @@ jobs:
      uses: neondatabase/gh-workflow-stats-action@v0.2.1
      with:
        db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
-        db_table: "gh_workflow_stats_batch_neon"
+        db_table: "gh_workflow_stats_neon"
        gh_token: ${{ secrets.GITHUB_TOKEN }}
        duration: '2h'
-    - name: Export Workflow Run for the past 24 hours
-      if: github.event.schedule == '25 0 * * *'
+
+  gh-workflow-stats-batch-48h:
+    name: GitHub Workflow Stats Batch 48 hours
+    if: github.event.schedule == '25 0 * * *'
+    runs-on: ubuntu-22.04
+    permissions:
+      actions: read
+    steps:
+    - name: Export Workflow Run for the past 48 hours
      uses: neondatabase/gh-workflow-stats-action@v0.2.1
      with:
        db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
-        db_table: "gh_workflow_stats_batch_neon"
+        db_table: "gh_workflow_stats_neon"
        gh_token: ${{ secrets.GITHUB_TOKEN }}
-        duration: '24h'
+        duration: '48h'
+
+  gh-workflow-stats-batch-30d:
+    name: GitHub Workflow Stats Batch 30 days
+    if: github.event.schedule == '25 1 * * 6'
+    runs-on: ubuntu-22.04
+    permissions:
+      actions: read
+    steps:
+    - name: Export Workflow Run for the past 30 days
+      uses: neondatabase/gh-workflow-stats-action@v0.2.1
+      with:
+        db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
+        db_table: "gh_workflow_stats_neon"
+        gh_token: ${{ secrets.GITHUB_TOKEN }}
+        duration: '720h'
--- a/.github/workflows/report-workflow-stats.yml
+++ b/.github/workflows/report-workflow-stats.yml
@@ -1,41 +0,0 @@
-name: Report Workflow Stats
-
-on:
-  workflow_run:
-    workflows:
-    - Add `external` label to issues and PRs created by external users
-    - Benchmarking
-    - Build and Test
-    - Build and Test Locally
-    - Build build-tools image
-    - Check Permissions
-    - Check neon with extra platform builds
-    - Cloud Regression Test
-    - Create Release Branch
-    - Handle `approved-for-ci-run` label
-    - Lint GitHub Workflows
-    - Notify Slack channel about upcoming release
-    - Periodic pagebench performance test on dedicated EC2 machine in eu-central-1 region
-    - Pin build-tools image
-    - Prepare benchmarking databases by restoring dumps
-    - Push images to ACR
-    - Test Postgres client libraries
-    - Trigger E2E Tests
-    - cleanup caches by a branch
-    - Pre-merge checks
-    types: [completed]
-
-jobs:
-  gh-workflow-stats:
-    name: Github Workflow Stats
-    runs-on: ubuntu-22.04
-    permissions:
-      actions: read
-    steps:
-    - name: Export GH Workflow Stats
-      uses: neondatabase/gh-workflow-stats-action@v0.1.4
-      with:
-        DB_URI: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
-        DB_TABLE: "gh_workflow_stats_neon"
-        GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        GH_RUN_ID: ${{ github.event.workflow_run.id }}
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -46,6 +46,15 @@ dependencies = [
 "memchr",
 ]

+[[package]]
+name = "aligned-vec"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e0966165eaf052580bd70eb1b32cb3d6245774c0104d1b2793e9650bf83b52a"
+dependencies = [
+ "equator",
+]
+
 [[package]]
 name = "allocator-api2"
 version = "0.2.16"
@@ -146,6 +155,12 @@ dependencies = [
 "static_assertions",
 ]

+[[package]]
+name = "arrayvec"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
+
 [[package]]
 name = "asn1-rs"
 version = "0.6.2"
@@ -359,6 +374,28 @@ dependencies = [
 "tracing",
 ]

+[[package]]
+name = "aws-sdk-kms"
+version = "1.47.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "564a597a3c71a957d60a2e4c62c93d78ee5a0d636531e15b760acad983a5c18e"
+dependencies = [
+ "aws-credential-types",
+ "aws-runtime",
+ "aws-smithy-async",
+ "aws-smithy-http",
+ "aws-smithy-json",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-types",
+ "bytes",
+ "http 0.2.9",
+ "once_cell",
+ "regex-lite",
+ "tracing",
+]
+
 [[package]]
 name = "aws-sdk-s3"
 version = "1.52.0"
@@ -575,9 +612,9 @@ dependencies = [

 [[package]]
 name = "aws-smithy-runtime"
-version = "1.7.1"
+version = "1.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d1ce695746394772e7000b39fe073095db6d45a862d0767dd5ad0ac0d7f8eb87"
+checksum = "a065c0fe6fdbdf9f11817eb68582b2ab4aff9e9c39e986ae48f7ec576c6322db"
 dependencies = [
 "aws-smithy-async",
 "aws-smithy-http",
@@ -742,7 +779,7 @@ dependencies = [
 "once_cell",
 "paste",
 "pin-project",
- "quick-xml",
+ "quick-xml 0.31.0",
 "rand 0.8.5",
 "reqwest 0.11.19",
 "rustc_version",
@@ -1220,6 +1257,10 @@ name = "compute_tools"
 version = "0.1.0"
 dependencies = [
 "anyhow",
+ "aws-config",
+ "aws-sdk-kms",
+ "aws-sdk-s3",
+ "base64 0.13.1",
 "bytes",
 "camino",
 "cfg-if",
@@ -1237,13 +1278,16 @@ dependencies = [
 "opentelemetry",
 "opentelemetry_sdk",
 "postgres",
+ "postgres_initdb",
 "prometheus",
 "regex",
 "remote_storage",
 "reqwest 0.12.4",
 "rlimit",
 "rust-ini",
+ "serde",
 "serde_json",
+ "serde_with",
 "signal-hook",
 "tar",
 "thiserror",
@@ -1381,6 +1425,15 @@ version = "0.8.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"

+[[package]]
+name = "cpp_demangle"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96e58d342ad113c2b878f16d5d034c03be492ae460cdbc02b7f0f2284d310c7d"
+dependencies = [
+ "cfg-if",
+]
+
 [[package]]
 name = "cpufeatures"
 version = "0.2.9"
@@ -1904,6 +1957,26 @@ dependencies = [
 "termcolor",
 ]

+[[package]]
+name = "equator"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c35da53b5a021d2484a7cc49b2ac7f2d840f8236a286f84202369bd338d761ea"
+dependencies = [
+ "equator-macro",
+]
+
+[[package]]
+name = "equator-macro"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3bf679796c0322556351f287a51b49e48f7c4986e727b5dd78c972d30e2e16cc"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.52",
+]
+
 [[package]]
 name = "equivalent"
 version = "1.0.1"
@@ -2011,6 +2084,18 @@ dependencies = [
 "windows-sys 0.48.0",
 ]

+[[package]]
+name = "findshlibs"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64"
+dependencies = [
+ "cc",
+ "lazy_static",
+ "libc",
+ "winapi",
+]
+
 [[package]]
 name = "fixedbitset"
 version = "0.4.2"
@@ -2089,9 +2174,9 @@ dependencies = [

 [[package]]
 name = "futures-channel"
-version = "0.3.30"
+version = "0.3.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78"
+checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
 dependencies = [
 "futures-core",
 "futures-sink",
@@ -2099,9 +2184,9 @@ dependencies = [

 [[package]]
 name = "futures-core"
-version = "0.3.30"
+version = "0.3.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d"
+checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"

 [[package]]
 name = "futures-executor"
@@ -2116,9 +2201,9 @@ dependencies = [

 [[package]]
 name = "futures-io"
-version = "0.3.30"
+version = "0.3.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1"
+checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"

 [[package]]
 name = "futures-lite"
@@ -2137,9 +2222,9 @@ dependencies = [

 [[package]]
 name = "futures-macro"
-version = "0.3.30"
+version = "0.3.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
+checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -2148,15 +2233,15 @@ dependencies = [

 [[package]]
 name = "futures-sink"
-version = "0.3.30"
+version = "0.3.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5"
+checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"

 [[package]]
 name = "futures-task"
-version = "0.3.30"
+version = "0.3.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004"
+checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"

 [[package]]
 name = "futures-timer"
@@ -2166,9 +2251,9 @@ checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c"

 [[package]]
 name = "futures-util"
-version = "0.3.30"
+version = "0.3.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48"
+checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
 dependencies = [
 "futures-channel",
 "futures-core",
@@ -2714,6 +2799,24 @@ version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "64e9829a50b42bb782c1df523f78d332fe371b10c661e78b7a3c34b0198e9fac"

+[[package]]
+name = "inferno"
+version = "0.11.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88"
+dependencies = [
+ "ahash",
+ "indexmap 2.0.1",
+ "is-terminal",
+ "itoa",
+ "log",
+ "num-format",
+ "once_cell",
+ "quick-xml 0.26.0",
+ "rgb",
+ "str_stack",
+]
+
 [[package]]
 name = "inotify"
 version = "0.9.6"
@@ -2764,9 +2867,9 @@ dependencies = [

 [[package]]
 name = "ipnet"
-version = "2.9.0"
+version = "2.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3"
+checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708"

 [[package]]
 name = "is-terminal"
@@ -3053,6 +3156,15 @@ version = "2.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"

+[[package]]
+name = "memmap2"
+version = "0.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "45fd3a57831bf88bc63f8cebc0cf956116276e97fef3966103e96416209f7c92"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "memoffset"
 version = "0.7.1"
@@ -3278,6 +3390,16 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"

+[[package]]
+name = "num-format"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3"
+dependencies = [
+ "arrayvec",
+ "itoa",
+]
+
 [[package]]
 name = "num-integer"
 version = "0.1.45"
@@ -3619,6 +3741,7 @@ dependencies = [
 "num_cpus",
 "once_cell",
 "pageserver_api",
+ "pageserver_client",
 "pageserver_compaction",
 "pin-project-lite",
 "postgres",
@@ -3627,6 +3750,7 @@ dependencies = [
 "postgres_backend",
 "postgres_connection",
 "postgres_ffi",
+ "postgres_initdb",
 "pq_proto",
 "procfs",
 "rand 0.8.5",
@@ -4058,7 +4182,7 @@ dependencies = [
 "bytes",
 "once_cell",
 "pq_proto",
- "rustls 0.23.16",
+ "rustls 0.23.18",
 "rustls-pemfile 2.1.1",
 "serde",
 "thiserror",
@@ -4102,12 +4226,48 @@ dependencies = [
 "utils",
 ]

+[[package]]
+name = "postgres_initdb"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "camino",
+ "thiserror",
+ "tokio",
+ "workspace_hack",
+]
+
 [[package]]
 name = "powerfmt"
 version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"

+[[package]]
+name = "pprof"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebbe2f8898beba44815fdc9e5a4ae9c929e21c5dc29b0c774a15555f7f58d6d0"
+dependencies = [
+ "aligned-vec",
+ "backtrace",
+ "cfg-if",
+ "criterion",
+ "findshlibs",
+ "inferno",
+ "libc",
+ "log",
+ "nix 0.26.4",
+ "once_cell",
+ "parking_lot 0.12.1",
+ "protobuf",
+ "protobuf-codegen-pure",
+ "smallvec",
+ "symbolic-demangle",
+ "tempfile",
+ "thiserror",
+]
+
 [[package]]
 name = "ppv-lite86"
 version = "0.2.17"
@@ -4260,6 +4420,31 @@ dependencies = [
 "prost",
 ]

+[[package]]
+name = "protobuf"
+version = "2.28.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
+
+[[package]]
+name = "protobuf-codegen"
+version = "2.28.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "033460afb75cf755fcfc16dfaed20b86468082a2ea24e05ac35ab4a099a017d6"
+dependencies = [
+ "protobuf",
+]
+
+[[package]]
+name = "protobuf-codegen-pure"
+version = "2.28.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95a29399fc94bcd3eeaa951c715f7bea69409b2445356b00519740bcd6ddd865"
+dependencies = [
+ "protobuf",
+ "protobuf-codegen",
+]
+
 [[package]]
 name = "proxy"
 version = "0.1.0"
@@ -4333,7 +4518,7 @@ dependencies = [
 "rsa",
 "rstest",
 "rustc-hash",
- "rustls 0.23.16",
+ "rustls 0.23.18",
 "rustls-native-certs 0.8.0",
 "rustls-pemfile 2.1.1",
 "scopeguard",
@@ -4371,6 +4556,15 @@ dependencies = [
 "zerocopy",
 ]

+[[package]]
+name = "quick-xml"
+version = "0.26.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "quick-xml"
 version = "0.31.0"
@@ -4853,6 +5047,15 @@ dependencies = [
 "subtle",
 ]

+[[package]]
+name = "rgb"
+version = "0.8.50"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57397d16646700483b67d2dd6511d79318f9d057fdbd21a4066aeac8b41d310a"
+dependencies = [
+ "bytemuck",
+]
+
 [[package]]
 name = "ring"
 version = "0.17.6"
@@ -5028,9 +5231,9 @@ dependencies = [

 [[package]]
 name = "rustls"
-version = "0.23.16"
+version = "0.23.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eee87ff5d9b36712a58574e12e9f0ea80f915a5b0ac518d322b24a465617925e"
+checksum = "9c9cc1d47e243d655ace55ed38201c19ae02c148ae56412ab8750e8f0166ab7f"
 dependencies = [
 "log",
 "once_cell",
@@ -5166,6 +5369,7 @@ dependencies = [
 "postgres-protocol",
 "postgres_backend",
 "postgres_ffi",
+ "pprof",
 "pq_proto",
 "rand 0.8.5",
 "regex",
@@ -5712,6 +5916,12 @@ dependencies = [
 "der 0.7.8",
 ]

+[[package]]
+name = "stable_deref_trait"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
+
 [[package]]
 name = "static_assertions"
 version = "1.1.0"
@@ -5738,7 +5948,7 @@ dependencies = [
 "once_cell",
 "parking_lot 0.12.1",
 "prost",
- "rustls 0.23.16",
+ "rustls 0.23.18",
 "tokio",
 "tonic",
 "tonic-build",
@@ -5821,7 +6031,7 @@ dependencies = [
 "postgres_ffi",
 "remote_storage",
 "reqwest 0.12.4",
- "rustls 0.23.16",
+ "rustls 0.23.18",
 "rustls-native-certs 0.8.0",
 "serde",
 "serde_json",
@@ -5858,6 +6068,12 @@ dependencies = [
 "workspace_hack",
 ]

+[[package]]
+name = "str_stack"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb"
+
 [[package]]
 name = "stringprep"
 version = "0.1.2"
@@ -5905,6 +6121,29 @@ version = "0.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "20e16a0f46cf5fd675563ef54f26e83e20f2366bcf027bcb3cc3ed2b98aaf2ca"

+[[package]]
+name = "symbolic-common"
+version = "12.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "366f1b4c6baf6cfefc234bbd4899535fca0b06c74443039a73f6dfb2fad88d77"
+dependencies = [
+ "debugid",
+ "memmap2",
+ "stable_deref_trait",
+ "uuid",
+]
+
+[[package]]
+name = "symbolic-demangle"
+version = "12.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aba05ba5b9962ea5617baf556293720a8b2d0a282aa14ee4bf10e22efc7da8c8"
+dependencies = [
+ "cpp_demangle",
+ "rustc-demangle",
+ "symbolic-common",
+]
+
 [[package]]
 name = "syn"
 version = "1.0.109"
@@ -6254,7 +6493,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "04fb792ccd6bbcd4bba408eb8a292f70fc4a3589e5d793626f45190e6454b6ab"
 dependencies = [
 "ring",
- "rustls 0.23.16",
+ "rustls 0.23.18",
 "tokio",
 "tokio-postgres",
 "tokio-rustls 0.26.0",
@@ -6288,7 +6527,7 @@ version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4"
 dependencies = [
- "rustls 0.23.16",
+ "rustls 0.23.18",
 "rustls-pki-types",
 "tokio",
 ]
@@ -6697,7 +6936,7 @@ dependencies = [
 "base64 0.22.1",
 "log",
 "once_cell",
- "rustls 0.23.16",
+ "rustls 0.23.18",
 "rustls-pki-types",
 "url",
 "webpki-roots 0.26.1",
@@ -6772,6 +7011,7 @@ dependencies = [
 "once_cell",
 "pin-project-lite",
 "postgres_connection",
+ "pprof",
 "pq_proto",
 "rand 0.8.5",
 "regex",
@@ -7306,6 +7546,7 @@ dependencies = [
 "anyhow",
 "axum",
 "axum-core",
+ "base64 0.13.1",
 "base64 0.21.1",
 "base64ct",
 "bytes",
@@ -7340,6 +7581,7 @@ dependencies = [
 "libc",
 "log",
 "memchr",
+ "nix 0.26.4",
 "nom",
 "num-bigint",
 "num-integer",
@@ -7356,7 +7598,7 @@ dependencies = [
 "regex-automata 0.4.3",
 "regex-syntax 0.8.2",
 "reqwest 0.12.4",
- "rustls 0.23.16",
+ "rustls 0.23.18",
 "scopeguard",
 "serde",
 "serde_json",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -34,6 +34,7 @@ members = [
    "libs/vm_monitor",
    "libs/walproposer",
    "libs/wal_decoder",
+    "libs/postgres_initdb",
 ]

 [workspace.package]
@@ -57,6 +58,7 @@ async-trait = "0.1"
 aws-config = { version = "1.5", default-features = false, features=["rustls", "sso"] }
 aws-sdk-s3 = "1.52"
 aws-sdk-iam = "1.46.0"
+aws-sdk-kms = "1.47.0"
 aws-smithy-async = { version = "1.2.1", default-features = false, features=["rt-tokio"] }
 aws-smithy-types = "1.2"
 aws-credential-types = "1.2.0"
@@ -73,7 +75,7 @@ bytes = "1.0"
 camino = "1.1.6"
 cfg-if = "1.0.0"
 chrono = { version = "0.4", default-features = false, features = ["clock"] }
-clap = { version = "4.0", features = ["derive"] }
+clap = { version = "4.0", features = ["derive", "env"] }
 comfy-table = "7.1"
 const_format = "0.2"
 crc32c = "0.6"
@@ -106,7 +108,7 @@ hyper-util = "0.1"
 tokio-tungstenite = "0.21.0"
 indexmap = "2"
 indoc = "2"
-ipnet = "2.9.0"
+ipnet = "2.10.0"
 itertools = "0.10"
 itoa = "1.0.11"
 jsonwebtoken = "9"
@@ -130,6 +132,7 @@ parquet = { version = "53", default-features = false, features = ["zstd"] }
 parquet_derive = "53"
 pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
 pin-project-lite = "0.2"
+pprof = { version = "0.14", features = ["criterion", "flamegraph", "protobuf", "protobuf-codec"] }
 procfs = "0.16"
 prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
 prost = "0.13"
@@ -153,7 +156,7 @@ sentry = { version = "0.32", default-features = false, features = ["backtrace",
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 serde_path_to_error = "0.1"
-serde_with = "2.0"
+serde_with = { version = "2.0", features = [ "base64" ] }
 serde_assert = "0.5.0"
 sha2 = "0.10.2"
 signal-hook = "0.3"
@@ -212,12 +215,14 @@ tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", br
 compute_api = { version = "0.1", path = "./libs/compute_api/" }
 consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
 metrics = { version = "0.1", path = "./libs/metrics/" }
+pageserver = { path = "./pageserver" }
 pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
 pageserver_client = { path = "./pageserver/client" }
 pageserver_compaction = { version = "0.1", path = "./pageserver/compaction/" }
 postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
 postgres_connection = { version = "0.1", path = "./libs/postgres_connection/" }
 postgres_ffi = { version = "0.1", path = "./libs/postgres_ffi/" }
+postgres_initdb = { path = "./libs/postgres_initdb" }
 pq_proto = { version = "0.1", path = "./libs/pq_proto/" }
 remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
 safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
--- a/README.md
+++ b/README.md
@@ -132,7 +132,7 @@ make -j`sysctl -n hw.logicalcpu` -s
 To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `pg_install/bin` and `pg_install/lib`, respectively.

 To run the integration tests or Python scripts (not required to use the code), install
-Python (3.9 or higher), and install the python3 packages using `./scripts/pysync` (requires [poetry>=1.8](https://python-poetry.org/)) in the project directory.
+Python (3.11 or higher), and install the python3 packages using `./scripts/pysync` (requires [poetry>=1.8](https://python-poetry.org/)) in the project directory.


 #### Running neon database
--- a/build-tools.Dockerfile
+++ b/build-tools.Dockerfile
@@ -234,7 +234,7 @@ USER nonroot:nonroot
 WORKDIR /home/nonroot

 # Python
-ENV PYTHON_VERSION=3.9.19 \
+ENV PYTHON_VERSION=3.11.10 \
    PYENV_ROOT=/home/nonroot/.pyenv \
    PATH=/home/nonroot/.pyenv/shims:/home/nonroot/.pyenv/bin:/home/nonroot/.poetry/bin:$PATH
 RUN set -e \
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -1243,7 +1243,7 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \

 #########################################################################################
 #
-# Compile and run the Neon-specific `compute_ctl` binary
+# Compile and run the Neon-specific `compute_ctl` and `fast_import` binaries
 #
 #########################################################################################
 FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
@@ -1264,6 +1264,7 @@ RUN cd compute_tools && mold -run cargo build --locked --profile release-line-de
 FROM debian:$DEBIAN_FLAVOR AS compute-tools-image

 COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
+COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/fast_import /usr/local/bin/fast_import

 #########################################################################################
 #
@@ -1458,6 +1459,7 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \

 COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
 COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
+COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/fast_import /usr/local/bin/fast_import

 # pgbouncer and its config
 COPY --from=pgbouncer         /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer
@@ -1533,6 +1535,25 @@ RUN apt update && \
    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
    localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8

+# s5cmd 2.2.2 from https://github.com/peak/s5cmd/releases/tag/v2.2.2
+# used by fast_import
+ARG TARGETARCH
+ADD https://github.com/peak/s5cmd/releases/download/v2.2.2/s5cmd_2.2.2_linux_$TARGETARCH.deb /tmp/s5cmd.deb
+RUN set -ex; \
+    \
+    # Determine the expected checksum based on TARGETARCH
+    if [ "${TARGETARCH}" = "amd64" ]; then \
+        CHECKSUM="392c385320cd5ffa435759a95af77c215553d967e4b1c0fffe52e4f14c29cf85"; \
+    elif [ "${TARGETARCH}" = "arm64" ]; then \
+        CHECKSUM="939bee3cf4b5604ddb00e67f8c157b91d7c7a5b553d1fbb6890fad32894b7b46"; \
+    else \
+        echo "Unsupported architecture: ${TARGETARCH}"; exit 1; \
+    fi; \
+    \
+    # Compute and validate the checksum
+    echo "${CHECKSUM}  /tmp/s5cmd.deb" | sha256sum -c -
+RUN dpkg -i /tmp/s5cmd.deb && rm /tmp/s5cmd.deb
+
 ENV LANG=en_US.utf8
 USER postgres
 ENTRYPOINT ["/usr/local/bin/compute_ctl"]
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -10,6 +10,10 @@ default = []
 testing = []

 [dependencies]
+base64.workspace = true
+aws-config.workspace = true
+aws-sdk-s3.workspace = true
+aws-sdk-kms.workspace = true
 anyhow.workspace = true
 camino.workspace = true
 chrono.workspace = true
@@ -27,6 +31,8 @@ opentelemetry.workspace = true
 opentelemetry_sdk.workspace = true
 postgres.workspace = true
 regex.workspace = true
+serde.workspace = true
+serde_with.workspace = true
 serde_json.workspace = true
 signal-hook.workspace = true
 tar.workspace = true
@@ -43,6 +49,7 @@ thiserror.workspace = true
 url.workspace = true
 prometheus.workspace = true

+postgres_initdb.workspace = true
 compute_api.workspace = true
 utils.workspace = true
 workspace_hack.workspace = true
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -105,6 +105,11 @@ fn main() -> Result<()> {
 fn init() -> Result<(String, clap::ArgMatches)> {
    init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;

+    opentelemetry::global::set_error_handler(|err| {
+        tracing::info!("OpenTelemetry error: {err}");
+    })
+    .expect("global error handler lock poisoned");
+
    let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
    thread::spawn(move || {
        for sig in signals.forever() {
--- a/compute_tools/src/bin/fast_import.rs
+++ b/compute_tools/src/bin/fast_import.rs
@@ -0,0 +1,338 @@
+//! This program dumps a remote Postgres database into a local Postgres database
+//! and uploads the resulting PGDATA into object storage for import into a Timeline.
+//!
+//! # Context, Architecture, Design
+//!
+//! See cloud.git Fast Imports RFC (<https://github.com/neondatabase/cloud/pull/19799>)
+//! for the full picture.
+//! The RFC describing the storage pieces of importing the PGDATA dump into a Timeline
+//! is publicly accessible at <https://github.com/neondatabase/neon/pull/9538>.
+//!
+//! # This is a Prototype!
+//!
+//! This program is part of a prototype feature and not yet used in production.
+//!
+//! The cloud.git RFC contains lots of suggestions for improving e2e throughput
+//! of this step of the timeline import process.
+//!
+//! # Local Testing
+//!
+//! - Comment out most of the pgxns in The Dockerfile.compute-tools to speed up the build.
+//! - Build the image with the following command:
+//!
+//! ```bash
+//! docker buildx build --build-arg DEBIAN_FLAVOR=bullseye-slim --build-arg GIT_VERSION=local --build-arg PG_VERSION=v14 --build-arg BUILD_TAG="$(date --iso-8601=s -u)"  -t localhost:3030/localregistry/compute-node-v14:latest -f compute/Dockerfile.com
+//! docker push localhost:3030/localregistry/compute-node-v14:latest
+//! ```
+
+use anyhow::Context;
+use aws_config::BehaviorVersion;
+use camino::{Utf8Path, Utf8PathBuf};
+use clap::Parser;
+use nix::unistd::Pid;
+use tracing::{info, info_span, warn, Instrument};
+use utils::fs_ext::is_directory_empty;
+
+#[path = "fast_import/child_stdio_to_log.rs"]
+mod child_stdio_to_log;
+#[path = "fast_import/s3_uri.rs"]
+mod s3_uri;
+#[path = "fast_import/s5cmd.rs"]
+mod s5cmd;
+
+#[derive(clap::Parser)]
+struct Args {
+    #[clap(long)]
+    working_directory: Utf8PathBuf,
+    #[clap(long, env = "NEON_IMPORTER_S3_PREFIX")]
+    s3_prefix: s3_uri::S3Uri,
+    #[clap(long)]
+    pg_bin_dir: Utf8PathBuf,
+    #[clap(long)]
+    pg_lib_dir: Utf8PathBuf,
+}
+
+#[serde_with::serde_as]
+#[derive(serde::Deserialize)]
+struct Spec {
+    encryption_secret: EncryptionSecret,
+    #[serde_as(as = "serde_with::base64::Base64")]
+    source_connstring_ciphertext_base64: Vec<u8>,
+}
+
+#[derive(serde::Deserialize)]
+enum EncryptionSecret {
+    #[allow(clippy::upper_case_acronyms)]
+    KMS { key_id: String },
+}
+
+#[tokio::main]
+pub(crate) async fn main() -> anyhow::Result<()> {
+    utils::logging::init(
+        utils::logging::LogFormat::Plain,
+        utils::logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
+        utils::logging::Output::Stdout,
+    )?;
+
+    info!("starting");
+
+    let Args {
+        working_directory,
+        s3_prefix,
+        pg_bin_dir,
+        pg_lib_dir,
+    } = Args::parse();
+
+    let aws_config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
+
+    let spec: Spec = {
+        let spec_key = s3_prefix.append("/spec.json");
+        let s3_client = aws_sdk_s3::Client::new(&aws_config);
+        let object = s3_client
+            .get_object()
+            .bucket(&spec_key.bucket)
+            .key(spec_key.key)
+            .send()
+            .await
+            .context("get spec from s3")?
+            .body
+            .collect()
+            .await
+            .context("download spec body")?;
+        serde_json::from_slice(&object.into_bytes()).context("parse spec as json")?
+    };
+
+    match tokio::fs::create_dir(&working_directory).await {
+        Ok(()) => {}
+        Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
+            if !is_directory_empty(&working_directory)
+                .await
+                .context("check if working directory is empty")?
+            {
+                anyhow::bail!("working directory is not empty");
+            } else {
+                // ok
+            }
+        }
+        Err(e) => return Err(anyhow::Error::new(e).context("create working directory")),
+    }
+
+    let pgdata_dir = working_directory.join("pgdata");
+    tokio::fs::create_dir(&pgdata_dir)
+        .await
+        .context("create pgdata directory")?;
+
+    //
+    // Setup clients
+    //
+    let aws_config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
+    let kms_client = aws_sdk_kms::Client::new(&aws_config);
+
+    //
+    //  Initialize pgdata
+    //
+    let superuser = "cloud_admin"; // XXX: this shouldn't be hard-coded
+    postgres_initdb::do_run_initdb(postgres_initdb::RunInitdbArgs {
+        superuser,
+        locale: "en_US.UTF-8", // XXX: this shouldn't be hard-coded,
+        pg_version: 140000, // XXX: this shouldn't be hard-coded but derived from which compute image we're running in
+        initdb_bin: pg_bin_dir.join("initdb").as_ref(),
+        library_search_path: &pg_lib_dir, // TODO: is this right? Prob works in compute image, not sure about neon_local.
+        pgdata: &pgdata_dir,
+    })
+    .await
+    .context("initdb")?;
+
+    let nproc = num_cpus::get();
+
+    //
+    // Launch postgres process
+    //
+    let mut postgres_proc = tokio::process::Command::new(pg_bin_dir.join("postgres"))
+        .arg("-D")
+        .arg(&pgdata_dir)
+        .args(["-c", "wal_level=minimal"])
+        .args(["-c", "shared_buffers=10GB"])
+        .args(["-c", "max_wal_senders=0"])
+        .args(["-c", "fsync=off"])
+        .args(["-c", "full_page_writes=off"])
+        .args(["-c", "synchronous_commit=off"])
+        .args(["-c", "maintenance_work_mem=8388608"])
+        .args(["-c", &format!("max_parallel_maintenance_workers={nproc}")])
+        .args(["-c", &format!("max_parallel_workers={nproc}")])
+        .args(["-c", &format!("max_parallel_workers_per_gather={nproc}")])
+        .args(["-c", &format!("max_worker_processes={nproc}")])
+        .args(["-c", "effective_io_concurrency=100"])
+        .env_clear()
+        .stdout(std::process::Stdio::piped())
+        .stderr(std::process::Stdio::piped())
+        .spawn()
+        .context("spawn postgres")?;
+
+    info!("spawned postgres, waiting for it to become ready");
+    tokio::spawn(
+        child_stdio_to_log::relay_process_output(
+            postgres_proc.stdout.take(),
+            postgres_proc.stderr.take(),
+        )
+        .instrument(info_span!("postgres")),
+    );
+    let restore_pg_connstring =
+        format!("host=localhost port=5432 user={superuser} dbname=postgres");
+    loop {
+        let res = tokio_postgres::connect(&restore_pg_connstring, tokio_postgres::NoTls).await;
+        if res.is_ok() {
+            info!("postgres is ready, could connect to it");
+            break;
+        }
+    }
+
+    //
+    // Decrypt connection string
+    //
+    let source_connection_string = {
+        match spec.encryption_secret {
+            EncryptionSecret::KMS { key_id } => {
+                let mut output = kms_client
+                    .decrypt()
+                    .key_id(key_id)
+                    .ciphertext_blob(aws_sdk_s3::primitives::Blob::new(
+                        spec.source_connstring_ciphertext_base64,
+                    ))
+                    .send()
+                    .await
+                    .context("decrypt source connection string")?;
+                let plaintext = output
+                    .plaintext
+                    .take()
+                    .context("get plaintext source connection string")?;
+                String::from_utf8(plaintext.into_inner())
+                    .context("parse source connection string as utf8")?
+            }
+        }
+    };
+
+    //
+    // Start the work
+    //
+
+    let dumpdir = working_directory.join("dumpdir");
+
+    let common_args = [
+        // schema mapping (prob suffices to specify them on one side)
+        "--no-owner".to_string(),
+        "--no-privileges".to_string(),
+        "--no-publications".to_string(),
+        "--no-security-labels".to_string(),
+        "--no-subscriptions".to_string(),
+        "--no-tablespaces".to_string(),
+        // format
+        "--format".to_string(),
+        "directory".to_string(),
+        // concurrency
+        "--jobs".to_string(),
+        num_cpus::get().to_string(),
+        // progress updates
+        "--verbose".to_string(),
+    ];
+
+    info!("dump into the working directory");
+    {
+        let mut pg_dump = tokio::process::Command::new(pg_bin_dir.join("pg_dump"))
+            .args(&common_args)
+            .arg("-f")
+            .arg(&dumpdir)
+            .arg("--no-sync")
+            // POSITIONAL args
+            // source db (db name included in connection string)
+            .arg(&source_connection_string)
+            // how we run it
+            .env_clear()
+            .kill_on_drop(true)
+            .stdout(std::process::Stdio::piped())
+            .stderr(std::process::Stdio::piped())
+            .spawn()
+            .context("spawn pg_dump")?;
+
+        info!(pid=%pg_dump.id().unwrap(), "spawned pg_dump");
+
+        tokio::spawn(
+            child_stdio_to_log::relay_process_output(pg_dump.stdout.take(), pg_dump.stderr.take())
+                .instrument(info_span!("pg_dump")),
+        );
+
+        let st = pg_dump.wait().await.context("wait for pg_dump")?;
+        info!(status=?st, "pg_dump exited");
+        if !st.success() {
+            warn!(status=%st, "pg_dump failed, restore will likely fail as well");
+        }
+    }
+
+    // TODO: do it in a streaming way, plenty of internal research done on this already
+    // TODO: do the unlogged table trick
+
+    info!("restore from working directory into vanilla postgres");
+    {
+        let mut pg_restore = tokio::process::Command::new(pg_bin_dir.join("pg_restore"))
+            .args(&common_args)
+            .arg("-d")
+            .arg(&restore_pg_connstring)
+            // POSITIONAL args
+            .arg(&dumpdir)
+            // how we run it
+            .env_clear()
+            .kill_on_drop(true)
+            .stdout(std::process::Stdio::piped())
+            .stderr(std::process::Stdio::piped())
+            .spawn()
+            .context("spawn pg_restore")?;
+
+        info!(pid=%pg_restore.id().unwrap(), "spawned pg_restore");
+        tokio::spawn(
+            child_stdio_to_log::relay_process_output(
+                pg_restore.stdout.take(),
+                pg_restore.stderr.take(),
+            )
+            .instrument(info_span!("pg_restore")),
+        );
+        let st = pg_restore.wait().await.context("wait for pg_restore")?;
+        info!(status=?st, "pg_restore exited");
+        if !st.success() {
+            warn!(status=%st, "pg_restore failed, restore will likely fail as well");
+        }
+    }
+
+    info!("shutdown postgres");
+    {
+        nix::sys::signal::kill(
+            Pid::from_raw(
+                i32::try_from(postgres_proc.id().unwrap()).expect("convert child pid to i32"),
+            ),
+            nix::sys::signal::SIGTERM,
+        )
+        .context("signal postgres to shut down")?;
+        postgres_proc
+            .wait()
+            .await
+            .context("wait for postgres to shut down")?;
+    }
+
+    info!("upload pgdata");
+    s5cmd::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/"))
+        .await
+        .context("sync dump directory to destination")?;
+
+    info!("write status");
+    {
+        let status_dir = working_directory.join("status");
+        std::fs::create_dir(&status_dir).context("create status directory")?;
+        let status_file = status_dir.join("status");
+        std::fs::write(&status_file, serde_json::json!({"done": true}).to_string())
+            .context("write status file")?;
+        s5cmd::sync(&status_file, &s3_prefix.append("/status/pgdata"))
+            .await
+            .context("sync status directory to destination")?;
+    }
+
+    Ok(())
+}
--- a/compute_tools/src/bin/fast_import/child_stdio_to_log.rs
+++ b/compute_tools/src/bin/fast_import/child_stdio_to_log.rs
@@ -0,0 +1,35 @@
+use tokio::io::{AsyncBufReadExt, BufReader};
+use tokio::process::{ChildStderr, ChildStdout};
+use tracing::info;
+
+/// Asynchronously relays the output from a child process's `stdout` and `stderr` to the tracing log.
+/// Each line is read and logged individually, with lossy UTF-8 conversion.
+///
+/// # Arguments
+///
+/// * `stdout`: An `Option<ChildStdout>` from the child process.
+/// * `stderr`: An `Option<ChildStderr>` from the child process.
+///
+pub(crate) async fn relay_process_output(stdout: Option<ChildStdout>, stderr: Option<ChildStderr>) {
+    let stdout_fut = async {
+        if let Some(stdout) = stdout {
+            let reader = BufReader::new(stdout);
+            let mut lines = reader.lines();
+            while let Ok(Some(line)) = lines.next_line().await {
+                info!(fd = "stdout", "{}", line);
+            }
+        }
+    };
+
+    let stderr_fut = async {
+        if let Some(stderr) = stderr {
+            let reader = BufReader::new(stderr);
+            let mut lines = reader.lines();
+            while let Ok(Some(line)) = lines.next_line().await {
+                info!(fd = "stderr", "{}", line);
+            }
+        }
+    };
+
+    tokio::join!(stdout_fut, stderr_fut);
+}
--- a/compute_tools/src/bin/fast_import/s3_uri.rs
+++ b/compute_tools/src/bin/fast_import/s3_uri.rs
@@ -0,0 +1,75 @@
+use anyhow::Result;
+use std::str::FromStr;
+
+/// Struct to hold parsed S3 components
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct S3Uri {
+    pub bucket: String,
+    pub key: String,
+}
+
+impl FromStr for S3Uri {
+    type Err = anyhow::Error;
+
+    /// Parse an S3 URI into a bucket and key
+    fn from_str(uri: &str) -> Result<Self> {
+        // Ensure the URI starts with "s3://"
+        if !uri.starts_with("s3://") {
+            return Err(anyhow::anyhow!("Invalid S3 URI scheme"));
+        }
+
+        // Remove the "s3://" prefix
+        let stripped_uri = &uri[5..];
+
+        // Split the remaining string into bucket and key parts
+        if let Some((bucket, key)) = stripped_uri.split_once('/') {
+            Ok(S3Uri {
+                bucket: bucket.to_string(),
+                key: key.to_string(),
+            })
+        } else {
+            Err(anyhow::anyhow!(
+                "Invalid S3 URI format, missing bucket or key"
+            ))
+        }
+    }
+}
+
+impl S3Uri {
+    pub fn append(&self, suffix: &str) -> Self {
+        Self {
+            bucket: self.bucket.clone(),
+            key: format!("{}{}", self.key, suffix),
+        }
+    }
+}
+
+impl std::fmt::Display for S3Uri {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "s3://{}/{}", self.bucket, self.key)
+    }
+}
+
+impl clap::builder::TypedValueParser for S3Uri {
+    type Value = Self;
+
+    fn parse_ref(
+        &self,
+        _cmd: &clap::Command,
+        _arg: Option<&clap::Arg>,
+        value: &std::ffi::OsStr,
+    ) -> Result<Self::Value, clap::Error> {
+        let value_str = value.to_str().ok_or_else(|| {
+            clap::Error::raw(
+                clap::error::ErrorKind::InvalidUtf8,
+                "Invalid UTF-8 sequence",
+            )
+        })?;
+        S3Uri::from_str(value_str).map_err(|e| {
+            clap::Error::raw(
+                clap::error::ErrorKind::InvalidValue,
+                format!("Failed to parse S3 URI: {}", e),
+            )
+        })
+    }
+}
--- a/compute_tools/src/bin/fast_import/s5cmd.rs
+++ b/compute_tools/src/bin/fast_import/s5cmd.rs
@@ -0,0 +1,27 @@
+use anyhow::Context;
+use camino::Utf8Path;
+
+use super::s3_uri::S3Uri;
+
+pub(crate) async fn sync(local: &Utf8Path, remote: &S3Uri) -> anyhow::Result<()> {
+    let mut builder = tokio::process::Command::new("s5cmd");
+    // s5cmd uses aws-sdk-go v1, hence doesn't support AWS_ENDPOINT_URL
+    if let Some(val) = std::env::var_os("AWS_ENDPOINT_URL") {
+        builder.arg("--endpoint-url").arg(val);
+    }
+    builder
+        .arg("sync")
+        .arg(local.as_str())
+        .arg(remote.to_string());
+    let st = builder
+        .spawn()
+        .context("spawn s5cmd")?
+        .wait()
+        .await
+        .context("wait for s5cmd")?;
+    if st.success() {
+        Ok(())
+    } else {
+        Err(anyhow::anyhow!("s5cmd failed"))
+    }
+}
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -116,7 +116,7 @@ pub fn write_postgres_conf(
                vartype: "enum".to_owned(),
            };

-            write!(file, "{}", opt.to_pg_setting())?;
+            writeln!(file, "{}", opt.to_pg_setting())?;
        }
    }

--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -20,6 +20,7 @@ use anyhow::Result;
 use hyper::header::CONTENT_TYPE;
 use hyper::service::{make_service_fn, service_fn};
 use hyper::{Body, Method, Request, Response, Server, StatusCode};
+use metrics::proto::MetricFamily;
 use metrics::Encoder;
 use metrics::TextEncoder;
 use tokio::task;
@@ -72,10 +73,22 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
        (&Method::GET, "/metrics") => {
            debug!("serving /metrics GET request");

-            let mut buffer = vec![];
-            let metrics = installed_extensions::collect();
+            // When we call TextEncoder::encode() below, it will immediately
+            // return an error if a metric family has no metrics, so we need to
+            // preemptively filter out metric families with no metrics.
+            let metrics = installed_extensions::collect()
+                .into_iter()
+                .filter(|m| !m.get_metric().is_empty())
+                .collect::<Vec<MetricFamily>>();
+
            let encoder = TextEncoder::new();
-            encoder.encode(&metrics, &mut buffer).unwrap();
+            let mut buffer = vec![];
+
+            if let Err(err) = encoder.encode(&metrics, &mut buffer) {
+                let msg = format!("error handling /metrics request: {err}");
+                error!(msg);
+                return render_json_error(&msg, StatusCode::INTERNAL_SERVER_ERROR);
+            }

            match Response::builder()
                .status(StatusCode::OK)
--- a/compute_tools/src/installed_extensions.rs
+++ b/compute_tools/src/installed_extensions.rs
@@ -115,7 +115,7 @@ pub fn get_installed_extensions_sync(connstr: Url) -> Result<()> {

 static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
    register_uint_gauge_vec!(
-        "installed_extensions",
+        "compute_installed_extensions",
        "Number of databases where the version of extension is installed",
        &["extension_name", "version"]
    )
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -1153,6 +1153,7 @@ async fn handle_timeline(cmd: &TimelineCmd, env: &mut local_env::LocalEnv) -> Re
                timeline_info.timeline_id
            );
        }
+        // TODO: rename to import-basebackup-plus-wal
        TimelineCmd::Import(args) => {
            let tenant_id = get_tenant_id(args.tenant_id, env)?;
            let timeline_id = args.timeline_id;
--- a/deny.toml
+++ b/deny.toml
@@ -33,7 +33,6 @@ reason = "the marvin attack only affects private key decryption, not public key
 [licenses]
 allow = [
    "Apache-2.0",
-    "Artistic-2.0",
    "BSD-2-Clause",
    "BSD-3-Clause",
    "CC0-1.0",
@@ -67,7 +66,7 @@ registries = []
 # More documentation about the 'bans' section can be found here:
 # https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html
 [bans]
-multiple-versions = "warn"
+multiple-versions = "allow"
 wildcards = "allow"
 highlight = "all"
 workspace-default-features = "allow"
--- a/docs/sourcetree.md
+++ b/docs/sourcetree.md
@@ -113,21 +113,21 @@ so manual installation of dependencies is not recommended.
 A single virtual environment with all dependencies is described in the single `Pipfile`.

 ### Prerequisites
- Install Python 3.9 (the minimal supported version) or greater.
+- Install Python 3.11 (the minimal supported version) or greater.
    - Our setup with poetry should work with newer python versions too. So feel free to open an issue with a `c/test-runner` label if something doesn't work as expected.
-    - If you have some trouble with other version you can resolve it by installing Python 3.9 separately, via [pyenv](https://github.com/pyenv/pyenv) or via system package manager e.g.:
+    - If you have some trouble with other version you can resolve it by installing Python 3.11 separately, via [pyenv](https://github.com/pyenv/pyenv) or via system package manager e.g.:
      ```bash
      # In Ubuntu
      sudo add-apt-repository ppa:deadsnakes/ppa
      sudo apt update
-      sudo apt install python3.9
+      sudo apt install python3.11
      ```
 - Install `poetry`
    - Exact version of `poetry` is not important, see installation instructions available at poetry's [website](https://python-poetry.org/docs/#installation).
 - Install dependencies via `./scripts/pysync`.
    - Note that CI uses specific Python version (look for `PYTHON_VERSION` [here](https://github.com/neondatabase/docker-images/blob/main/rust/Dockerfile))
      so if you have different version some linting tools can yield different result locally vs in the CI.
-    - You can explicitly specify which Python to use by running `poetry env use /path/to/python`, e.g. `poetry env use python3.9`.
+    - You can explicitly specify which Python to use by running `poetry env use /path/to/python`, e.g. `poetry env use python3.11`.
      This may also disable the `The currently activated Python version X.Y.Z is not supported by the project` warning.

 Run `poetry shell` to activate the virtual environment.
--- a/libs/pageserver_api/Cargo.toml
+++ b/libs/pageserver_api/Cargo.toml
@@ -33,6 +33,7 @@ remote_storage.workspace = true
 postgres_backend.workspace = true
 nix = {workspace = true, optional = true}
 reqwest.workspace = true
+rand.workspace = true

 [dev-dependencies]
 bincode.workspace = true
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -97,6 +97,15 @@ pub struct ConfigToml {
    pub control_plane_api: Option<reqwest::Url>,
    pub control_plane_api_token: Option<String>,
    pub control_plane_emergency_mode: bool,
+    /// Unstable feature: subject to change or removal without notice.
+    /// See <https://github.com/neondatabase/neon/pull/9218>.
+    pub import_pgdata_upcall_api: Option<reqwest::Url>,
+    /// Unstable feature: subject to change or removal without notice.
+    /// See <https://github.com/neondatabase/neon/pull/9218>.
+    pub import_pgdata_upcall_api_token: Option<String>,
+    /// Unstable feature: subject to change or removal without notice.
+    /// See <https://github.com/neondatabase/neon/pull/9218>.
+    pub import_pgdata_aws_endpoint_url: Option<reqwest::Url>,
    pub heatmap_upload_concurrency: usize,
    pub secondary_download_concurrency: usize,
    pub virtual_file_io_engine: Option<crate::models::virtual_file::IoEngineKind>,
@@ -386,6 +395,10 @@ impl Default for ConfigToml {
            control_plane_api_token: (None),
            control_plane_emergency_mode: (false),

+            import_pgdata_upcall_api: (None),
+            import_pgdata_upcall_api_token: (None),
+            import_pgdata_aws_endpoint_url: (None),
+
            heatmap_upload_concurrency: (DEFAULT_HEATMAP_UPLOAD_CONCURRENCY),
            secondary_download_concurrency: (DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY),

--- a/libs/pageserver_api/src/keyspace.rs
+++ b/libs/pageserver_api/src/keyspace.rs
@@ -48,7 +48,7 @@ pub struct ShardedRange<'a> {

 // Calculate the size of a range within the blocks of the same relation, or spanning only the
 // top page in the previous relation's space.
-fn contiguous_range_len(range: &Range<Key>) -> u32 {
+pub fn contiguous_range_len(range: &Range<Key>) -> u32 {
    debug_assert!(is_contiguous_range(range));
    if range.start.field6 == 0xffffffff {
        range.end.field6 + 1
@@ -67,7 +67,7 @@ fn contiguous_range_len(range: &Range<Key>) -> u32 {
 /// This matters, because:
 /// - Within such ranges, keys are used contiguously.  Outside such ranges it is sparse.
 /// - Within such ranges, we may calculate distances using simple subtraction of field6.
-fn is_contiguous_range(range: &Range<Key>) -> bool {
+pub fn is_contiguous_range(range: &Range<Key>) -> bool {
    range.start.field1 == range.end.field1
        && range.start.field2 == range.end.field2
        && range.start.field3 == range.end.field3
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -2,6 +2,8 @@ pub mod detach_ancestor;
 pub mod partitioning;
 pub mod utilization;

+#[cfg(feature = "testing")]
+use camino::Utf8PathBuf;
 pub use utilization::PageserverUtilization;

 use std::{
@@ -227,6 +229,9 @@ pub enum TimelineCreateRequestMode {
        // we continue to accept it by having it here.
        pg_version: Option<u32>,
    },
+    ImportPgdata {
+        import_pgdata: TimelineCreateRequestModeImportPgdata,
+    },
    // NB: Bootstrap is all-optional, and thus the serde(untagged) will cause serde to stop at Bootstrap.
    // (serde picks the first matching enum variant, in declaration order).
    Bootstrap {
@@ -236,6 +241,42 @@ pub enum TimelineCreateRequestMode {
    },
 }

+#[derive(Serialize, Deserialize, Clone)]
+pub struct TimelineCreateRequestModeImportPgdata {
+    pub location: ImportPgdataLocation,
+    pub idempotency_key: ImportPgdataIdempotencyKey,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub enum ImportPgdataLocation {
+    #[cfg(feature = "testing")]
+    LocalFs { path: Utf8PathBuf },
+    AwsS3 {
+        region: String,
+        bucket: String,
+        /// A better name for this would be `prefix`; changing requires coordination with cplane.
+        /// See <https://github.com/neondatabase/cloud/issues/20646>.
+        key: String,
+    },
+}
+
+#[derive(Serialize, Deserialize, Clone)]
+#[serde(transparent)]
+pub struct ImportPgdataIdempotencyKey(pub String);
+
+impl ImportPgdataIdempotencyKey {
+    pub fn random() -> Self {
+        use rand::{distributions::Alphanumeric, Rng};
+        Self(
+            rand::thread_rng()
+                .sample_iter(&Alphanumeric)
+                .take(20)
+                .map(char::from)
+                .collect(),
+        )
+    }
+}
+
 #[derive(Serialize, Deserialize, Clone)]
 pub struct LsnLeaseRequest {
    pub lsn: Lsn,
--- a/libs/postgres_initdb/Cargo.toml
+++ b/libs/postgres_initdb/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "postgres_initdb"
+version = "0.1.0"
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+anyhow.workspace = true
+tokio.workspace = true
+camino.workspace = true
+thiserror.workspace = true
+workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/postgres_initdb/src/lib.rs
+++ b/libs/postgres_initdb/src/lib.rs
@@ -0,0 +1,103 @@
+//! The canonical way we run `initdb` in Neon.
+//!
+//! initdb has implicit defaults that are dependent on the environment, e.g., locales & collations.
+//!
+//! This module's job is to eliminate the environment-dependence as much as possible.
+
+use std::fmt;
+
+use camino::Utf8Path;
+
+pub struct RunInitdbArgs<'a> {
+    pub superuser: &'a str,
+    pub locale: &'a str,
+    pub initdb_bin: &'a Utf8Path,
+    pub pg_version: u32,
+    pub library_search_path: &'a Utf8Path,
+    pub pgdata: &'a Utf8Path,
+}
+
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+    Spawn(std::io::Error),
+    Failed {
+        status: std::process::ExitStatus,
+        stderr: Vec<u8>,
+    },
+    WaitOutput(std::io::Error),
+    Other(anyhow::Error),
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Error::Spawn(e) => write!(f, "Error spawning command: {:?}", e),
+            Error::Failed { status, stderr } => write!(
+                f,
+                "Command failed with status {:?}: {}",
+                status,
+                String::from_utf8_lossy(stderr)
+            ),
+            Error::WaitOutput(e) => write!(f, "Error waiting for command output: {:?}", e),
+            Error::Other(e) => write!(f, "Error: {:?}", e),
+        }
+    }
+}
+
+pub async fn do_run_initdb(args: RunInitdbArgs<'_>) -> Result<(), Error> {
+    let RunInitdbArgs {
+        superuser,
+        locale,
+        initdb_bin: initdb_bin_path,
+        pg_version,
+        library_search_path,
+        pgdata,
+    } = args;
+    let mut initdb_command = tokio::process::Command::new(initdb_bin_path);
+    initdb_command
+        .args(["--pgdata", pgdata.as_ref()])
+        .args(["--username", superuser])
+        .args(["--encoding", "utf8"])
+        .args(["--locale", locale])
+        .arg("--no-instructions")
+        .arg("--no-sync")
+        .env_clear()
+        .env("LD_LIBRARY_PATH", library_search_path)
+        .env("DYLD_LIBRARY_PATH", library_search_path)
+        .stdin(std::process::Stdio::null())
+        // stdout invocation produces the same output every time, we don't need it
+        .stdout(std::process::Stdio::null())
+        // we would be interested in the stderr output, if there was any
+        .stderr(std::process::Stdio::piped());
+
+    // Before version 14, only the libc provide was available.
+    if pg_version > 14 {
+        // Version 17 brought with it a builtin locale provider which only provides
+        // C and C.UTF-8. While being safer for collation purposes since it is
+        // guaranteed to be consistent throughout a major release, it is also more
+        // performant.
+        let locale_provider = if pg_version >= 17 { "builtin" } else { "libc" };
+
+        initdb_command.args(["--locale-provider", locale_provider]);
+    }
+
+    let initdb_proc = initdb_command.spawn().map_err(Error::Spawn)?;
+
+    // Ideally we'd select here with the cancellation token, but the problem is that
+    // we can't safely terminate initdb: it launches processes of its own, and killing
+    // initdb doesn't kill them. After we return from this function, we want the target
+    // directory to be able to be cleaned up.
+    // See https://github.com/neondatabase/neon/issues/6385
+    let initdb_output = initdb_proc
+        .wait_with_output()
+        .await
+        .map_err(Error::WaitOutput)?;
+    if !initdb_output.status.success() {
+        return Err(Error::Failed {
+            status: initdb_output.status,
+            stderr: initdb_output.stderr,
+        });
+    }
+
+    Ok(())
+}
--- a/libs/pq_proto/src/lib.rs
+++ b/libs/pq_proto/src/lib.rs
@@ -184,9 +184,8 @@ pub struct CancelKeyData {

 impl fmt::Display for CancelKeyData {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        // TODO: this is producing strange results, with 0xffffffff........ always in the logs.
        let hi = (self.backend_pid as u64) << 32;
-        let lo = self.cancel_key as u64;
+        let lo = (self.cancel_key as u64) & 0xffffffff;
        let id = hi | lo;

        // This format is more compact and might work better for logs.
@@ -1047,4 +1046,13 @@ mod tests {
        let data = [0, 0, 0, 7, 0, 0, 0, 0];
        FeStartupPacket::parse(&mut BytesMut::from_iter(data)).unwrap_err();
    }
+
+    #[test]
+    fn cancel_key_data() {
+        let key = CancelKeyData {
+            backend_pid: -1817212860,
+            cancel_key: -1183897012,
+        };
+        assert_eq!(format!("{key}"), "CancelKeyData(93af8844b96f2a4c)");
+    }
 }
--- a/libs/remote_storage/src/azure_blob.rs
+++ b/libs/remote_storage/src/azure_blob.rs
@@ -25,6 +25,7 @@ use azure_storage_blobs::{blob::operations::GetBlobBuilder, prelude::ContainerCl
 use bytes::Bytes;
 use futures::future::Either;
 use futures::stream::Stream;
+use futures::FutureExt;
 use futures_util::StreamExt;
 use futures_util::TryStreamExt;
 use http_types::{StatusCode, Url};
@@ -32,6 +33,7 @@ use scopeguard::ScopeGuard;
 use tokio_util::sync::CancellationToken;
 use tracing::debug;
 use utils::backoff;
+use utils::backoff::exponential_backoff_duration_seconds;

 use crate::metrics::{start_measuring_requests, AttemptOutcome, RequestKind};
 use crate::{
@@ -311,40 +313,59 @@ impl RemoteStorage for AzureBlobStorage {

            let mut next_marker = None;

+            let mut timeout_try_cnt = 1;
+
            'outer: loop {
                let mut builder = builder.clone();
                if let Some(marker) = next_marker.clone() {
                    builder = builder.marker(marker);
                }
-                let response = builder.into_stream();
-                let response = response.into_stream().map_err(to_download_error);
-                let response = tokio_stream::StreamExt::timeout(response, self.timeout);
-                let response = response.map(|res| match res {
-                    Ok(res) => res,
-                    Err(_elapsed) => Err(DownloadError::Timeout),
+                // Azure Blob Rust SDK does not expose the list blob API directly. Users have to use
+                // their pageable iterator wrapper that returns all keys as a stream. We want to have
+                // full control of paging, and therefore we only take the first item from the stream.
+                let mut response_stream = builder.into_stream();
+                let response = response_stream.next();
+                // Timeout mechanism: Azure client will sometimes stuck on a request, but retrying that request
+                // would immediately succeed. Therefore, we use exponential backoff timeout to retry the request.
+                // (Usually, exponential backoff is used to determine the sleep time between two retries.) We
+                // start with 10.0 second timeout, and double the timeout for each failure, up to 5 failures.
+                // timeout = min(5 * (1.0+1.0)^n, self.timeout).
+                let this_timeout = (5.0 * exponential_backoff_duration_seconds(timeout_try_cnt, 1.0, self.timeout.as_secs_f64())).min(self.timeout.as_secs_f64());
+                let response = tokio::time::timeout(Duration::from_secs_f64(this_timeout), response);
+                let response = response.map(|res| {
+                    match res {
+                        Ok(Some(Ok(res))) => Ok(Some(res)),
+                        Ok(Some(Err(e)))  => Err(to_download_error(e)),
+                        Ok(None) => Ok(None),
+                        Err(_elasped) => Err(DownloadError::Timeout),
+                    }
                });
-
-                let mut response = std::pin::pin!(response);
-
                let mut max_keys = max_keys.map(|mk| mk.get());
                let next_item = tokio::select! {
-                    op = response.next() => Ok(op),
+                    op = response => op,
                    _ = cancel.cancelled() => Err(DownloadError::Cancelled),
-                }?;
+                };
+
+                if let Err(DownloadError::Timeout) = &next_item {
+                    timeout_try_cnt += 1;
+                    if timeout_try_cnt <= 5 {
+                        continue;
+                    }
+                }
+
+                let next_item = next_item?;
+
+                if timeout_try_cnt >= 2 {
+                    tracing::warn!("Azure Blob Storage list timed out and succeeded after {} tries", timeout_try_cnt);
+                }
+                timeout_try_cnt = 1;
+
                let Some(entry) = next_item else {
                    // The list is complete, so yield it.
                    break;
                };

                let mut res = Listing::default();
-                let entry = match entry {
-                    Ok(entry) => entry,
-                    Err(e) => {
-                        // The error is potentially retryable, so we must rewind the loop after yielding.
-                        yield Err(e);
-                        continue;
-                    }
-                };
                next_marker = entry.continuation();
                let prefix_iter = entry
                    .blobs
@@ -360,7 +381,7 @@ impl RemoteStorage for AzureBlobStorage {
                        last_modified: k.properties.last_modified.into(),
                        size: k.properties.content_length,
                    }
-                    );
+                );

                for key in blob_iter {
                    res.keys.push(key);
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -360,7 +360,12 @@ impl RemoteStorage for LocalFs {
            let mut objects = Vec::with_capacity(keys.len());
            for key in keys {
                let path = key.with_base(&self.storage_root);
-                let metadata = file_metadata(&path).await?;
+                let metadata = file_metadata(&path).await;
+                if let Err(DownloadError::NotFound) = metadata {
+                    // Race: if the file is deleted between listing and metadata check, ignore it.
+                    continue;
+                }
+                let metadata = metadata?;
                if metadata.is_dir() {
                    continue;
                }
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -29,6 +29,7 @@ jsonwebtoken.workspace = true
 nix.workspace = true
 once_cell.workspace = true
 pin-project-lite.workspace = true
+pprof.workspace = true
 regex.workspace = true
 routerify.workspace = true
 serde.workspace = true
--- a/libs/utils/src/http/endpoint.rs
+++ b/libs/utils/src/http/endpoint.rs
@@ -1,7 +1,8 @@
 use crate::auth::{AuthError, Claims, SwappableJwtAuth};
 use crate::http::error::{api_error_handler, route_error_handler, ApiError};
-use anyhow::Context;
-use hyper::header::{HeaderName, AUTHORIZATION};
+use crate::http::request::{get_query_param, parse_query_param};
+use anyhow::{anyhow, Context};
+use hyper::header::{HeaderName, AUTHORIZATION, CONTENT_DISPOSITION};
 use hyper::http::HeaderValue;
 use hyper::Method;
 use hyper::{header::CONTENT_TYPE, Body, Request, Response};
@@ -12,11 +13,13 @@ use routerify::{Middleware, RequestInfo, Router, RouterBuilder};
 use tracing::{debug, info, info_span, warn, Instrument};

 use std::future::Future;
+use std::io::Write as _;
 use std::str::FromStr;
+use std::time::Duration;

 use bytes::{Bytes, BytesMut};
-use std::io::Write as _;
-use tokio::sync::mpsc;
+use pprof::protos::Message as _;
+use tokio::sync::{mpsc, Mutex};
 use tokio_stream::wrappers::ReceiverStream;

 static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {
@@ -328,6 +331,82 @@ pub async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<
    Ok(response)
 }

+/// Generates CPU profiles.
+pub async fn profile_cpu_handler(req: Request<Body>) -> Result<Response<Body>, ApiError> {
+    enum Format {
+        Pprof,
+        Svg,
+    }
+
+    // Parameters.
+    let format = match get_query_param(&req, "format")?.as_deref() {
+        None => Format::Pprof,
+        Some("pprof") => Format::Pprof,
+        Some("svg") => Format::Svg,
+        Some(format) => return Err(ApiError::BadRequest(anyhow!("invalid format {format}"))),
+    };
+    let seconds = match parse_query_param(&req, "seconds")? {
+        None => 5,
+        Some(seconds @ 1..=30) => seconds,
+        Some(_) => return Err(ApiError::BadRequest(anyhow!("duration must be 1-30 secs"))),
+    };
+    let frequency_hz = match parse_query_param(&req, "frequency")? {
+        None => 99,
+        Some(1001..) => return Err(ApiError::BadRequest(anyhow!("frequency must be <=1000 Hz"))),
+        Some(frequency) => frequency,
+    };
+
+    // Only allow one profiler at a time.
+    static PROFILE_LOCK: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(()));
+    let _lock = PROFILE_LOCK
+        .try_lock()
+        .map_err(|_| ApiError::Conflict("profiler already running".into()))?;
+
+    // Take the profile.
+    let report = tokio::task::spawn_blocking(move || {
+        let guard = pprof::ProfilerGuardBuilder::default()
+            .frequency(frequency_hz)
+            .blocklist(&["libc", "libgcc", "pthread", "vdso"])
+            .build()?;
+        std::thread::sleep(Duration::from_secs(seconds));
+        guard.report().build()
+    })
+    .await
+    .map_err(|join_err| ApiError::InternalServerError(join_err.into()))?
+    .map_err(|pprof_err| ApiError::InternalServerError(pprof_err.into()))?;
+
+    // Return the report in the requested format.
+    match format {
+        Format::Pprof => {
+            let mut body = Vec::new();
+            report
+                .pprof()
+                .map_err(|err| ApiError::InternalServerError(err.into()))?
+                .write_to_vec(&mut body)
+                .map_err(|err| ApiError::InternalServerError(err.into()))?;
+
+            Response::builder()
+                .status(200)
+                .header(CONTENT_TYPE, "application/octet-stream")
+                .header(CONTENT_DISPOSITION, "attachment; filename=\"profile.pb\"")
+                .body(Body::from(body))
+                .map_err(|err| ApiError::InternalServerError(err.into()))
+        }
+
+        Format::Svg => {
+            let mut body = Vec::new();
+            report
+                .flamegraph(&mut body)
+                .map_err(|err| ApiError::InternalServerError(err.into()))?;
+            Response::builder()
+                .status(200)
+                .header(CONTENT_TYPE, "image/svg+xml")
+                .body(Body::from(body))
+                .map_err(|err| ApiError::InternalServerError(err.into()))
+        }
+    }
+}
+
 pub fn add_request_id_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
 ) -> Middleware<B, ApiError> {
    Middleware::pre(move |req| async move {
--- a/libs/utils/src/http/request.rs
+++ b/libs/utils/src/http/request.rs
@@ -30,7 +30,7 @@ pub fn parse_request_param<T: FromStr>(
    }
 }

-fn get_query_param<'a>(
+pub fn get_query_param<'a>(
    request: &'a Request<Body>,
    param_name: &str,
 ) -> Result<Option<Cow<'a, str>>, ApiError> {
--- a/libs/utils/src/seqwait.rs
+++ b/libs/utils/src/seqwait.rs
@@ -83,7 +83,9 @@ where
            }
            wake_these.push(self.heap.pop().unwrap().wake_channel);
        }
-        self.update_status();
+        if !wake_these.is_empty() {
+            self.update_status();
+        }
        wake_these
    }

--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -43,6 +43,7 @@ postgres.workspace = true
 postgres_backend.workspace = true
 postgres-protocol.workspace = true
 postgres-types.workspace = true
+postgres_initdb.workspace = true
 rand.workspace = true
 range-set-blaze = { version = "0.1.16", features = ["alloc"] }
 regex.workspace = true
@@ -68,6 +69,7 @@ url.workspace = true
 walkdir.workspace = true
 metrics.workspace = true
 pageserver_api.workspace = true
+pageserver_client.workspace = true # for ResponseErrorMessageExt TOOD refactor that
 pageserver_compaction.workspace = true
 postgres_connection.workspace = true
 postgres_ffi.workspace = true
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -144,6 +144,10 @@ pub struct PageServerConf {
    /// JWT token for use with the control plane API.
    pub control_plane_api_token: Option<SecretString>,

+    pub import_pgdata_upcall_api: Option<Url>,
+    pub import_pgdata_upcall_api_token: Option<SecretString>,
+    pub import_pgdata_aws_endpoint_url: Option<Url>,
+
    /// If true, pageserver will make best-effort to operate without a control plane: only
    /// for use in major incidents.
    pub control_plane_emergency_mode: bool,
@@ -328,6 +332,9 @@ impl PageServerConf {
            control_plane_api,
            control_plane_api_token,
            control_plane_emergency_mode,
+            import_pgdata_upcall_api,
+            import_pgdata_upcall_api_token,
+            import_pgdata_aws_endpoint_url,
            heatmap_upload_concurrency,
            secondary_download_concurrency,
            ingest_batch_size,
@@ -383,6 +390,9 @@ impl PageServerConf {
            timeline_offloading,
            ephemeral_bytes_per_memory_kb,
            server_side_batch_timeout,
+            import_pgdata_upcall_api,
+            import_pgdata_upcall_api_token: import_pgdata_upcall_api_token.map(SecretString::from),
+            import_pgdata_aws_endpoint_url,

            // ------------------------------------------------------------
            // fields that require additional validation or custom handling
--- a/pageserver/src/deletion_queue/deleter.rs
+++ b/pageserver/src/deletion_queue/deleter.rs
@@ -15,6 +15,7 @@ use tokio_util::sync::CancellationToken;
 use tracing::info;
 use tracing::warn;
 use utils::backoff;
+use utils::pausable_failpoint;

 use crate::metrics;

@@ -90,6 +91,7 @@ impl Deleter {
    /// Block until everything in accumulator has been executed
    async fn flush(&mut self) -> Result<(), DeletionQueueError> {
        while !self.accumulator.is_empty() && !self.cancel.is_cancelled() {
+            pausable_failpoint!("deletion-queue-before-execute-pause");
            match self.remote_delete().await {
                Ok(()) => {
                    // Note: we assume that the remote storage layer returns Ok(()) if some
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -623,6 +623,8 @@ paths:
                existing_initdb_timeline_id:
                  type: string
                  format: hex
+                import_pgdata:
+                  $ref: "#/components/schemas/TimelineCreateRequestImportPgdata"
      responses:
        "201":
          description: Timeline was created, or already existed with matching parameters
@@ -979,6 +981,34 @@ components:
          $ref: "#/components/schemas/TenantConfig"
        effective_config:
          $ref: "#/components/schemas/TenantConfig"
+    TimelineCreateRequestImportPgdata:
+      type: object
+      required:
+        - location
+        - idempotency_key
+      properties:
+        idempotency_key:
+          type: string
+        location:
+          $ref: "#/components/schemas/TimelineCreateRequestImportPgdataLocation"
+    TimelineCreateRequestImportPgdataLocation:
+      type: object
+      properties:
+        AwsS3:
+          $ref: "#/components/schemas/TimelineCreateRequestImportPgdataLocationAwsS3"
+    TimelineCreateRequestImportPgdataLocationAwsS3:
+      type: object
+      properties:
+        region:
+          type: string
+        bucket:
+          type: string
+        key:
+          type: string
+      required:
+        - region
+        - bucket
+        - key
    TimelineInfo:
      type: object
      required:
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -40,6 +40,7 @@ use pageserver_api::models::TenantSorting;
 use pageserver_api::models::TenantState;
 use pageserver_api::models::TimelineArchivalConfigRequest;
 use pageserver_api::models::TimelineCreateRequestMode;
+use pageserver_api::models::TimelineCreateRequestModeImportPgdata;
 use pageserver_api::models::TimelinesInfoAndOffloaded;
 use pageserver_api::models::TopTenantShardItem;
 use pageserver_api::models::TopTenantShardsRequest;
@@ -55,6 +56,7 @@ use tokio_util::sync::CancellationToken;
 use tracing::*;
 use utils::auth::JwtAuth;
 use utils::failpoint_support::failpoints_handler;
+use utils::http::endpoint::profile_cpu_handler;
 use utils::http::endpoint::prometheus_metrics_handler;
 use utils::http::endpoint::request_span;
 use utils::http::request::must_parse_query_param;
@@ -80,6 +82,7 @@ use crate::tenant::secondary::SecondaryController;
 use crate::tenant::size::ModelInputs;
 use crate::tenant::storage_layer::LayerAccessStatsReset;
 use crate::tenant::storage_layer::LayerName;
+use crate::tenant::timeline::import_pgdata;
 use crate::tenant::timeline::offload::offload_timeline;
 use crate::tenant::timeline::offload::OffloadError;
 use crate::tenant::timeline::CompactFlags;
@@ -125,7 +128,7 @@ pub struct State {
    conf: &'static PageServerConf,
    tenant_manager: Arc<TenantManager>,
    auth: Option<Arc<SwappableJwtAuth>>,
-    allowlist_routes: Vec<Uri>,
+    allowlist_routes: &'static [&'static str],
    remote_storage: GenericRemoteStorage,
    broker_client: storage_broker::BrokerClientChannel,
    disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
@@ -146,10 +149,13 @@ impl State {
        deletion_queue_client: DeletionQueueClient,
        secondary_controller: SecondaryController,
    ) -> anyhow::Result<Self> {
-        let allowlist_routes = ["/v1/status", "/v1/doc", "/swagger.yml", "/metrics"]
-            .iter()
-            .map(|v| v.parse().unwrap())
-            .collect::<Vec<_>>();
+        let allowlist_routes = &[
+            "/v1/status",
+            "/v1/doc",
+            "/swagger.yml",
+            "/metrics",
+            "/profile/cpu",
+        ];
        Ok(Self {
            conf,
            tenant_manager,
@@ -576,6 +582,35 @@ async fn timeline_create_handler(
            ancestor_timeline_id,
            ancestor_start_lsn,
        }),
+        TimelineCreateRequestMode::ImportPgdata {
+            import_pgdata:
+                TimelineCreateRequestModeImportPgdata {
+                    location,
+                    idempotency_key,
+                },
+        } => tenant::CreateTimelineParams::ImportPgdata(tenant::CreateTimelineParamsImportPgdata {
+            idempotency_key: import_pgdata::index_part_format::IdempotencyKey::new(
+                idempotency_key.0,
+            ),
+            new_timeline_id,
+            location: {
+                use import_pgdata::index_part_format::Location;
+                use pageserver_api::models::ImportPgdataLocation;
+                match location {
+                    #[cfg(feature = "testing")]
+                    ImportPgdataLocation::LocalFs { path } => Location::LocalFs { path },
+                    ImportPgdataLocation::AwsS3 {
+                        region,
+                        bucket,
+                        key,
+                    } => Location::AwsS3 {
+                        region,
+                        bucket,
+                        key,
+                    },
+                }
+            },
+        }),
    };

    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Error);
@@ -3148,7 +3183,7 @@ pub fn make_router(
    if auth.is_some() {
        router = router.middleware(auth_middleware(|request| {
            let state = get_state(request);
-            if state.allowlist_routes.contains(request.uri()) {
+            if state.allowlist_routes.contains(&request.uri().path()) {
                None
            } else {
                state.auth.as_deref()
@@ -3167,6 +3202,7 @@ pub fn make_router(
    Ok(router
        .data(state)
        .get("/metrics", |r| request_span(r, prometheus_metrics_handler))
+        .get("/profile/cpu", |r| request_span(r, profile_cpu_handler))
        .get("/v1/status", |r| api_handler(r, status_handler))
        .put("/v1/failpoints", |r| {
            testing_api_handler("manage failpoints", r, failpoints_handler)
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -1068,21 +1068,26 @@ impl PageServerHandler {
            ));
        }

-        if request_lsn < **latest_gc_cutoff_lsn {
+        // Check explicitly for INVALID just to get a less scary error message if the request is obviously bogus
+        if request_lsn == Lsn::INVALID {
+            return Err(PageStreamError::BadRequest(
+                "invalid LSN(0) in request".into(),
+            ));
+        }
+
+        // Clients should only read from recent LSNs on their timeline, or from locations holding an LSN lease.
+        //
+        // We may have older data available, but we make a best effort to detect this case and return an error,
+        // to distinguish a misbehaving client (asking for old LSN) from a storage issue (data missing at a legitimate LSN).
+        if request_lsn < **latest_gc_cutoff_lsn && !timeline.is_gc_blocked_by_lsn_lease_deadline() {
            let gc_info = &timeline.gc_info.read().unwrap();
            if !gc_info.leases.contains_key(&request_lsn) {
-                // The requested LSN is below gc cutoff and is not guarded by a lease.
-
-                // Check explicitly for INVALID just to get a less scary error message if the
-                // request is obviously bogus
-                return Err(if request_lsn == Lsn::INVALID {
-                    PageStreamError::BadRequest("invalid LSN(0) in request".into())
-                } else {
+                return Err(
                    PageStreamError::BadRequest(format!(
                        "tried to request a page version that was garbage collected. requested at {} gc cutoff {}",
                        request_lsn, **latest_gc_cutoff_lsn
                    ).into())
-                });
+                );
            }
        }

--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -2276,9 +2276,9 @@ impl<'a> Version<'a> {
 //--- Metadata structs stored in key-value pairs in the repository.

 #[derive(Debug, Serialize, Deserialize)]
-struct DbDirectory {
+pub(crate) struct DbDirectory {
    // (spcnode, dbnode) -> (do relmapper and PG_VERSION files exist)
-    dbdirs: HashMap<(Oid, Oid), bool>,
+    pub(crate) dbdirs: HashMap<(Oid, Oid), bool>,
 }

 // The format of TwoPhaseDirectory changed in PostgreSQL v17, because the filenames of
@@ -2287,8 +2287,8 @@ struct DbDirectory {
 // "pg_twophsae/0000000A000002E4".

 #[derive(Debug, Serialize, Deserialize)]
-struct TwoPhaseDirectory {
-    xids: HashSet<TransactionId>,
+pub(crate) struct TwoPhaseDirectory {
+    pub(crate) xids: HashSet<TransactionId>,
 }

 #[derive(Debug, Serialize, Deserialize)]
@@ -2297,12 +2297,12 @@ struct TwoPhaseDirectoryV17 {
 }

 #[derive(Debug, Serialize, Deserialize, Default)]
-struct RelDirectory {
+pub(crate) struct RelDirectory {
    // Set of relations that exist. (relfilenode, forknum)
    //
    // TODO: Store it as a btree or radix tree or something else that spans multiple
    // key-value pairs, if you have a lot of relations
-    rels: HashSet<(Oid, u8)>,
+    pub(crate) rels: HashSet<(Oid, u8)>,
 }

 #[derive(Debug, Serialize, Deserialize)]
@@ -2311,9 +2311,9 @@ struct RelSizeEntry {
 }

 #[derive(Debug, Serialize, Deserialize, Default)]
-struct SlruSegmentDirectory {
+pub(crate) struct SlruSegmentDirectory {
    // Set of SLRU segments that exist.
-    segments: HashSet<u32>,
+    pub(crate) segments: HashSet<u32>,
 }

 #[derive(Copy, Clone, PartialEq, Eq, Debug, enum_map::Enum)]
--- a/pageserver/src/task_mgr.rs
+++ b/pageserver/src/task_mgr.rs
@@ -381,6 +381,8 @@ pub enum TaskKind {
    UnitTest,

    DetachAncestor,
+
+    ImportPgdata,
 }

 #[derive(Default)]
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -43,7 +43,9 @@ use std::sync::atomic::AtomicBool;
 use std::sync::Weak;
 use std::time::SystemTime;
 use storage_broker::BrokerClientChannel;
+use timeline::import_pgdata;
 use timeline::offload::offload_timeline;
+use timeline::ShutdownMode;
 use tokio::io::BufReader;
 use tokio::sync::watch;
 use tokio::task::JoinSet;
@@ -373,7 +375,6 @@ pub struct Tenant {

    l0_flush_global_state: L0FlushGlobalState,
 }
-
 impl std::fmt::Debug for Tenant {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{} ({})", self.tenant_shard_id, self.current_state())
@@ -860,6 +861,7 @@ impl Debug for SetStoppingError {
 pub(crate) enum CreateTimelineParams {
    Bootstrap(CreateTimelineParamsBootstrap),
    Branch(CreateTimelineParamsBranch),
+    ImportPgdata(CreateTimelineParamsImportPgdata),
 }

 #[derive(Debug)]
@@ -877,7 +879,14 @@ pub(crate) struct CreateTimelineParamsBranch {
    pub(crate) ancestor_start_lsn: Option<Lsn>,
 }

-/// What is used to determine idempotency of a [`Tenant::create_timeline`] call in  [`Tenant::start_creating_timeline`].
+#[derive(Debug)]
+pub(crate) struct CreateTimelineParamsImportPgdata {
+    pub(crate) new_timeline_id: TimelineId,
+    pub(crate) location: import_pgdata::index_part_format::Location,
+    pub(crate) idempotency_key: import_pgdata::index_part_format::IdempotencyKey,
+}
+
+/// What is used to determine idempotency of a [`Tenant::create_timeline`] call in  [`Tenant::start_creating_timeline`] in  [`Tenant::start_creating_timeline`].
 ///
 /// Each [`Timeline`] object holds [`Self`] as an immutable property in [`Timeline::create_idempotency`].
 ///
@@ -907,19 +916,50 @@ pub(crate) enum CreateTimelineIdempotency {
        ancestor_timeline_id: TimelineId,
        ancestor_start_lsn: Lsn,
    },
+    ImportPgdata(CreatingTimelineIdempotencyImportPgdata),
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub(crate) struct CreatingTimelineIdempotencyImportPgdata {
+    idempotency_key: import_pgdata::index_part_format::IdempotencyKey,
 }

 /// What is returned by [`Tenant::start_creating_timeline`].
 #[must_use]
-enum StartCreatingTimelineResult<'t> {
-    CreateGuard(TimelineCreateGuard<'t>),
+enum StartCreatingTimelineResult {
+    CreateGuard(TimelineCreateGuard),
    Idempotent(Arc<Timeline>),
 }

+enum TimelineInitAndSyncResult {
+    ReadyToActivate(Arc<Timeline>),
+    NeedsSpawnImportPgdata(TimelineInitAndSyncNeedsSpawnImportPgdata),
+}
+
+impl TimelineInitAndSyncResult {
+    fn ready_to_activate(self) -> Option<Arc<Timeline>> {
+        match self {
+            Self::ReadyToActivate(timeline) => Some(timeline),
+            _ => None,
+        }
+    }
+}
+
+#[must_use]
+struct TimelineInitAndSyncNeedsSpawnImportPgdata {
+    timeline: Arc<Timeline>,
+    import_pgdata: import_pgdata::index_part_format::Root,
+    guard: TimelineCreateGuard,
+}
+
 /// What is returned by [`Tenant::create_timeline`].
 enum CreateTimelineResult {
    Created(Arc<Timeline>),
    Idempotent(Arc<Timeline>),
+    /// IMPORTANT: This [`Arc<Timeline>`] object is not in [`Tenant::timelines`] when
+    /// we return this result, nor will this concrete object ever be added there.
+    /// Cf method comment on [`Tenant::create_timeline_import_pgdata`].
+    ImportSpawned(Arc<Timeline>),
 }

 impl CreateTimelineResult {
@@ -927,18 +967,19 @@ impl CreateTimelineResult {
        match self {
            Self::Created(_) => "Created",
            Self::Idempotent(_) => "Idempotent",
+            Self::ImportSpawned(_) => "ImportSpawned",
        }
    }
    fn timeline(&self) -> &Arc<Timeline> {
        match self {
-            Self::Created(t) | Self::Idempotent(t) => t,
+            Self::Created(t) | Self::Idempotent(t) | Self::ImportSpawned(t) => t,
        }
    }
    /// Unit test timelines aren't activated, test has to do it if it needs to.
    #[cfg(test)]
    fn into_timeline_for_test(self) -> Arc<Timeline> {
        match self {
-            Self::Created(t) | Self::Idempotent(t) => t,
+            Self::Created(t) | Self::Idempotent(t) | Self::ImportSpawned(t) => t,
        }
    }
 }
@@ -962,33 +1003,13 @@ pub enum CreateTimelineError {
 }

 #[derive(thiserror::Error, Debug)]
-enum InitdbError {
-    Other(anyhow::Error),
+pub enum InitdbError {
+    #[error("Operation was cancelled")]
    Cancelled,
-    Spawn(std::io::Result<()>),
-    Failed(std::process::ExitStatus, Vec<u8>),
-}
-
-impl fmt::Display for InitdbError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            InitdbError::Cancelled => write!(f, "Operation was cancelled"),
-            InitdbError::Spawn(e) => write!(f, "Spawn error: {:?}", e),
-            InitdbError::Failed(status, stderr) => write!(
-                f,
-                "Command failed with status {:?}: {}",
-                status,
-                String::from_utf8_lossy(stderr)
-            ),
-            InitdbError::Other(e) => write!(f, "Error: {:?}", e),
-        }
-    }
-}
-
-impl From<std::io::Error> for InitdbError {
-    fn from(error: std::io::Error) -> Self {
-        InitdbError::Spawn(Err(error))
-    }
+    #[error(transparent)]
+    Other(anyhow::Error),
+    #[error(transparent)]
+    Inner(postgres_initdb::Error),
 }

 enum CreateTimelineCause {
@@ -996,6 +1017,15 @@ enum CreateTimelineCause {
    Delete,
 }

+enum LoadTimelineCause {
+    Attach,
+    Unoffload,
+    ImportPgdata {
+        create_guard: TimelineCreateGuard,
+        activate: ActivateTimelineArgs,
+    },
+}
+
 #[derive(thiserror::Error, Debug)]
 pub(crate) enum GcError {
    // The tenant is shutting down
@@ -1072,24 +1102,35 @@ impl Tenant {
    /// it is marked as Active.
    #[allow(clippy::too_many_arguments)]
    async fn timeline_init_and_sync(
-        &self,
+        self: &Arc<Self>,
        timeline_id: TimelineId,
        resources: TimelineResources,
-        index_part: IndexPart,
+        mut index_part: IndexPart,
        metadata: TimelineMetadata,
        ancestor: Option<Arc<Timeline>>,
-        _ctx: &RequestContext,
-    ) -> anyhow::Result<()> {
+        cause: LoadTimelineCause,
+        ctx: &RequestContext,
+    ) -> anyhow::Result<TimelineInitAndSyncResult> {
        let tenant_id = self.tenant_shard_id;

-        let idempotency = if metadata.ancestor_timeline().is_none() {
-            CreateTimelineIdempotency::Bootstrap {
-                pg_version: metadata.pg_version(),
+        let import_pgdata = index_part.import_pgdata.take();
+        let idempotency = match &import_pgdata {
+            Some(import_pgdata) => {
+                CreateTimelineIdempotency::ImportPgdata(CreatingTimelineIdempotencyImportPgdata {
+                    idempotency_key: import_pgdata.idempotency_key().clone(),
+                })
            }
-        } else {
-            CreateTimelineIdempotency::Branch {
-                ancestor_timeline_id: metadata.ancestor_timeline().unwrap(),
-                ancestor_start_lsn: metadata.ancestor_lsn(),
+            None => {
+                if metadata.ancestor_timeline().is_none() {
+                    CreateTimelineIdempotency::Bootstrap {
+                        pg_version: metadata.pg_version(),
+                    }
+                } else {
+                    CreateTimelineIdempotency::Branch {
+                        ancestor_timeline_id: metadata.ancestor_timeline().unwrap(),
+                        ancestor_start_lsn: metadata.ancestor_lsn(),
+                    }
+                }
            }
        };

@@ -1121,39 +1162,91 @@ impl Tenant {
                format!("Failed to load layermap for timeline {tenant_id}/{timeline_id}")
            })?;

-        {
-            // avoiding holding it across awaits
-            let mut timelines_accessor = self.timelines.lock().unwrap();
-            match timelines_accessor.entry(timeline_id) {
-                // We should never try and load the same timeline twice during startup
-                Entry::Occupied(_) => {
-                    unreachable!(
-                        "Timeline {tenant_id}/{timeline_id} already exists in the tenant map"
-                    );
+        match import_pgdata {
+            Some(import_pgdata) if !import_pgdata.is_done() => {
+                match cause {
+                    LoadTimelineCause::Attach | LoadTimelineCause::Unoffload => (),
+                    LoadTimelineCause::ImportPgdata { .. } => {
+                        unreachable!("ImportPgdata should not be reloading timeline import is done and persisted as such in s3")
+                    }
                }
-                Entry::Vacant(v) => {
-                    v.insert(Arc::clone(&timeline));
-                    timeline.maybe_spawn_flush_loop();
+                let mut guard = self.timelines_creating.lock().unwrap();
+                if !guard.insert(timeline_id) {
+                    // We should never try and load the same timeline twice during startup
+                    unreachable!("Timeline {tenant_id}/{timeline_id} is already being created")
                }
+                let timeline_create_guard = TimelineCreateGuard {
+                    _tenant_gate_guard: self.gate.enter()?,
+                    owning_tenant: self.clone(),
+                    timeline_id,
+                    idempotency,
+                    // The users of this specific return value don't need the timline_path in there.
+                    timeline_path: timeline
+                        .conf
+                        .timeline_path(&timeline.tenant_shard_id, &timeline.timeline_id),
+                };
+                Ok(TimelineInitAndSyncResult::NeedsSpawnImportPgdata(
+                    TimelineInitAndSyncNeedsSpawnImportPgdata {
+                        timeline,
+                        import_pgdata,
+                        guard: timeline_create_guard,
+                    },
+                ))
            }
-        };
+            Some(_) | None => {
+                {
+                    let mut timelines_accessor = self.timelines.lock().unwrap();
+                    match timelines_accessor.entry(timeline_id) {
+                        // We should never try and load the same timeline twice during startup
+                        Entry::Occupied(_) => {
+                            unreachable!(
+                            "Timeline {tenant_id}/{timeline_id} already exists in the tenant map"
+                        );
+                        }
+                        Entry::Vacant(v) => {
+                            v.insert(Arc::clone(&timeline));
+                            timeline.maybe_spawn_flush_loop();
+                        }
+                    }
+                }

-        // Sanity check: a timeline should have some content.
-        anyhow::ensure!(
-            ancestor.is_some()
-                || timeline
-                    .layers
-                    .read()
-                    .await
-                    .layer_map()
-                    .expect("currently loading, layer manager cannot be shutdown already")
-                    .iter_historic_layers()
-                    .next()
-                    .is_some(),
-            "Timeline has no ancestor and no layer files"
-        );
+                // Sanity check: a timeline should have some content.
+                anyhow::ensure!(
+                    ancestor.is_some()
+                        || timeline
+                            .layers
+                            .read()
+                            .await
+                            .layer_map()
+                            .expect("currently loading, layer manager cannot be shutdown already")
+                            .iter_historic_layers()
+                            .next()
+                            .is_some(),
+                    "Timeline has no ancestor and no layer files"
+                );

-        Ok(())
+                match cause {
+                    LoadTimelineCause::Attach | LoadTimelineCause::Unoffload => (),
+                    LoadTimelineCause::ImportPgdata {
+                        create_guard,
+                        activate,
+                    } => {
+                        // TODO: see the comment in the task code above how I'm not so certain
+                        // it is safe to activate here because of concurrent shutdowns.
+                        match activate {
+                            ActivateTimelineArgs::Yes { broker_client } => {
+                                info!("activating timeline after reload from pgdata import task");
+                                timeline.activate(self.clone(), broker_client, None, ctx);
+                            }
+                            ActivateTimelineArgs::No => (),
+                        }
+                        drop(create_guard);
+                    }
+                }
+
+                Ok(TimelineInitAndSyncResult::ReadyToActivate(timeline))
+            }
+        }
    }

    /// Attach a tenant that's available in cloud storage.
@@ -1578,24 +1671,46 @@ impl Tenant {
            }

            // TODO again handle early failure
-            self.load_remote_timeline(
-                timeline_id,
-                index_part,
-                remote_metadata,
-                TimelineResources {
-                    remote_client,
-                    timeline_get_throttle: self.timeline_get_throttle.clone(),
-                    l0_flush_global_state: self.l0_flush_global_state.clone(),
-                },
-                ctx,
-            )
-            .await
-            .with_context(|| {
-                format!(
-                    "failed to load remote timeline {} for tenant {}",
-                    timeline_id, self.tenant_shard_id
+            let effect = self
+                .load_remote_timeline(
+                    timeline_id,
+                    index_part,
+                    remote_metadata,
+                    TimelineResources {
+                        remote_client,
+                        timeline_get_throttle: self.timeline_get_throttle.clone(),
+                        l0_flush_global_state: self.l0_flush_global_state.clone(),
+                    },
+                    LoadTimelineCause::Attach,
+                    ctx,
                )
-            })?;
+                .await
+                .with_context(|| {
+                    format!(
+                        "failed to load remote timeline {} for tenant {}",
+                        timeline_id, self.tenant_shard_id
+                    )
+                })?;
+
+            match effect {
+                TimelineInitAndSyncResult::ReadyToActivate(_) => {
+                    // activation happens later, on Tenant::activate
+                }
+                TimelineInitAndSyncResult::NeedsSpawnImportPgdata(
+                    TimelineInitAndSyncNeedsSpawnImportPgdata {
+                        timeline,
+                        import_pgdata,
+                        guard,
+                    },
+                ) => {
+                    tokio::task::spawn(self.clone().create_timeline_import_pgdata_task(
+                        timeline,
+                        import_pgdata,
+                        ActivateTimelineArgs::No,
+                        guard,
+                    ));
+                }
+            }
        }

        // Walk through deleted timelines, resume deletion
@@ -1719,13 +1834,14 @@ impl Tenant {

    #[instrument(skip_all, fields(timeline_id=%timeline_id))]
    async fn load_remote_timeline(
-        &self,
+        self: &Arc<Self>,
        timeline_id: TimelineId,
        index_part: IndexPart,
        remote_metadata: TimelineMetadata,
        resources: TimelineResources,
+        cause: LoadTimelineCause,
        ctx: &RequestContext,
-    ) -> anyhow::Result<()> {
+    ) -> anyhow::Result<TimelineInitAndSyncResult> {
        span::debug_assert_current_span_has_tenant_id();

        info!("downloading index file for timeline {}", timeline_id);
@@ -1752,6 +1868,7 @@ impl Tenant {
            index_part,
            remote_metadata,
            ancestor,
+            cause,
            ctx,
        )
        .await
@@ -1938,6 +2055,7 @@ impl Tenant {
                    TimelineArchivalError::Other(anyhow::anyhow!("Timeline already exists"))
                }
                TimelineExclusionError::Other(e) => TimelineArchivalError::Other(e),
+                TimelineExclusionError::ShuttingDown => TimelineArchivalError::Cancelled,
            })?;

        let timeline_preload = self
@@ -1976,6 +2094,7 @@ impl Tenant {
            index_part,
            remote_metadata,
            timeline_resources,
+            LoadTimelineCause::Unoffload,
            &ctx,
        )
        .await
@@ -2213,7 +2332,7 @@ impl Tenant {
    ///
    /// Tests should use `Tenant::create_test_timeline` to set up the minimum required metadata keys.
    pub(crate) async fn create_empty_timeline(
-        &self,
+        self: &Arc<Self>,
        new_timeline_id: TimelineId,
        initdb_lsn: Lsn,
        pg_version: u32,
@@ -2263,7 +2382,7 @@ impl Tenant {
    // Our current tests don't need the background loops.
    #[cfg(test)]
    pub async fn create_test_timeline(
-        &self,
+        self: &Arc<Self>,
        new_timeline_id: TimelineId,
        initdb_lsn: Lsn,
        pg_version: u32,
@@ -2302,7 +2421,7 @@ impl Tenant {
    #[cfg(test)]
    #[allow(clippy::too_many_arguments)]
    pub async fn create_test_timeline_with_layers(
-        &self,
+        self: &Arc<Self>,
        new_timeline_id: TimelineId,
        initdb_lsn: Lsn,
        pg_version: u32,
@@ -2439,6 +2558,16 @@ impl Tenant {
                self.branch_timeline(&ancestor_timeline, new_timeline_id, ancestor_start_lsn, ctx)
                    .await?
            }
+            CreateTimelineParams::ImportPgdata(params) => {
+                self.create_timeline_import_pgdata(
+                    params,
+                    ActivateTimelineArgs::Yes {
+                        broker_client: broker_client.clone(),
+                    },
+                    ctx,
+                )
+                .await?
+            }
        };

        // At this point we have dropped our guard on [`Self::timelines_creating`], and
@@ -2481,11 +2610,202 @@ impl Tenant {
                );
                timeline
            }
+            CreateTimelineResult::ImportSpawned(timeline) => {
+                info!("import task spawned, timeline will become visible and activated once the import is done");
+                timeline
+            }
        };

        Ok(activated_timeline)
    }

+    /// The returned [`Arc<Timeline>`] is NOT in the [`Tenant::timelines`] map until the import
+    /// completes in the background. A DIFFERENT [`Arc<Timeline>`] will be inserted into the
+    /// [`Tenant::timelines`] map when the import completes.
+    /// We only return an [`Arc<Timeline>`] here so the API handler can create a [`pageserver_api::models::TimelineInfo`]
+    /// for the response.
+    async fn create_timeline_import_pgdata(
+        self: &Arc<Tenant>,
+        params: CreateTimelineParamsImportPgdata,
+        activate: ActivateTimelineArgs,
+        ctx: &RequestContext,
+    ) -> Result<CreateTimelineResult, CreateTimelineError> {
+        let CreateTimelineParamsImportPgdata {
+            new_timeline_id,
+            location,
+            idempotency_key,
+        } = params;
+
+        let started_at = chrono::Utc::now().naive_utc();
+
+        //
+        // There's probably a simpler way to upload an index part, but, remote_timeline_client
+        // is the canonical way we do it.
+        // - create an empty timeline in-memory
+        // - use its remote_timeline_client to do the upload
+        // - dispose of the uninit timeline
+        // - keep the creation guard alive
+
+        let timeline_create_guard = match self
+            .start_creating_timeline(
+                new_timeline_id,
+                CreateTimelineIdempotency::ImportPgdata(CreatingTimelineIdempotencyImportPgdata {
+                    idempotency_key: idempotency_key.clone(),
+                }),
+            )
+            .await?
+        {
+            StartCreatingTimelineResult::CreateGuard(guard) => guard,
+            StartCreatingTimelineResult::Idempotent(timeline) => {
+                return Ok(CreateTimelineResult::Idempotent(timeline))
+            }
+        };
+
+        let mut uninit_timeline = {
+            let this = &self;
+            let initdb_lsn = Lsn(0);
+            let _ctx = ctx;
+            async move {
+                let new_metadata = TimelineMetadata::new(
+                    // Initialize disk_consistent LSN to 0, The caller must import some data to
+                    // make it valid, before calling finish_creation()
+                    Lsn(0),
+                    None,
+                    None,
+                    Lsn(0),
+                    initdb_lsn,
+                    initdb_lsn,
+                    15,
+                );
+                this.prepare_new_timeline(
+                    new_timeline_id,
+                    &new_metadata,
+                    timeline_create_guard,
+                    initdb_lsn,
+                    None,
+                )
+                .await
+            }
+        }
+        .await?;
+
+        let in_progress = import_pgdata::index_part_format::InProgress {
+            idempotency_key,
+            location,
+            started_at,
+        };
+        let index_part = import_pgdata::index_part_format::Root::V1(
+            import_pgdata::index_part_format::V1::InProgress(in_progress),
+        );
+        uninit_timeline
+            .raw_timeline()
+            .unwrap()
+            .remote_client
+            .schedule_index_upload_for_import_pgdata_state_update(Some(index_part.clone()))?;
+
+        // wait_completion happens in caller
+
+        let (timeline, timeline_create_guard) = uninit_timeline.finish_creation_myself();
+
+        tokio::spawn(self.clone().create_timeline_import_pgdata_task(
+            timeline.clone(),
+            index_part,
+            activate,
+            timeline_create_guard,
+        ));
+
+        // NB: the timeline doesn't exist in self.timelines at this point
+        Ok(CreateTimelineResult::ImportSpawned(timeline))
+    }
+
+    #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%timeline.timeline_id))]
+    async fn create_timeline_import_pgdata_task(
+        self: Arc<Tenant>,
+        timeline: Arc<Timeline>,
+        index_part: import_pgdata::index_part_format::Root,
+        activate: ActivateTimelineArgs,
+        timeline_create_guard: TimelineCreateGuard,
+    ) {
+        debug_assert_current_span_has_tenant_and_timeline_id();
+        info!("starting");
+        scopeguard::defer! {info!("exiting")};
+
+        let res = self
+            .create_timeline_import_pgdata_task_impl(
+                timeline,
+                index_part,
+                activate,
+                timeline_create_guard,
+            )
+            .await;
+        if let Err(err) = &res {
+            error!(?err, "task failed");
+            // TODO sleep & retry, sensitive to tenant shutdown
+            // TODO: allow timeline deletion requests => should cancel the task
+        }
+    }
+
+    async fn create_timeline_import_pgdata_task_impl(
+        self: Arc<Tenant>,
+        timeline: Arc<Timeline>,
+        index_part: import_pgdata::index_part_format::Root,
+        activate: ActivateTimelineArgs,
+        timeline_create_guard: TimelineCreateGuard,
+    ) -> Result<(), anyhow::Error> {
+        let ctx = RequestContext::new(TaskKind::ImportPgdata, DownloadBehavior::Warn);
+
+        info!("importing pgdata");
+        import_pgdata::doit(&timeline, index_part, &ctx, self.cancel.clone())
+            .await
+            .context("import")?;
+        info!("import done");
+
+        //
+        // Reload timeline from remote.
+        // This proves that the remote state is attachable, and it reuses the code.
+        //
+        // TODO: think about whether this is safe to do with concurrent Tenant::shutdown.
+        // timeline_create_guard hols the tenant gate open, so, shutdown cannot _complete_ until we exit.
+        // But our activate() call might launch new background tasks after Tenant::shutdown
+        // already went past shutting down the Tenant::timelines, which this timeline here is no part of.
+        // I think the same problem exists with the bootstrap & branch mgmt API tasks (tenant shutting
+        // down while bootstrapping/branching + activating), but, the race condition is much more likely
+        // to manifest because of the long runtime of this import task.
+
+        //        in theory this shouldn't even .await anything except for coop yield
+        info!("shutting down timeline");
+        timeline.shutdown(ShutdownMode::Hard).await;
+        info!("timeline shut down, reloading from remote");
+        // TODO: we can't do the following check because create_timeline_import_pgdata must return an Arc<Timeline>
+        // let Some(timeline) = Arc::into_inner(timeline) else {
+        //     anyhow::bail!("implementation error: timeline that we shut down was still referenced from somewhere");
+        // };
+        let timeline_id = timeline.timeline_id;
+
+        // load from object storage like Tenant::attach does
+        let resources = self.build_timeline_resources(timeline_id);
+        let index_part = resources
+            .remote_client
+            .download_index_file(&self.cancel)
+            .await?;
+        let index_part = match index_part {
+            MaybeDeletedIndexPart::Deleted(_) => {
+                // likely concurrent delete call, cplane should prevent this
+                anyhow::bail!("index part says deleted but we are not done creating yet, this should not happen but")
+            }
+            MaybeDeletedIndexPart::IndexPart(p) => p,
+        };
+        let metadata = index_part.metadata.clone();
+        self
+            .load_remote_timeline(timeline_id, index_part, metadata, resources, LoadTimelineCause::ImportPgdata{
+                create_guard: timeline_create_guard, activate, }, &ctx)
+            .await?
+            .ready_to_activate()
+            .context("implementation error: reloaded timeline still needs import after import reported success")?;
+
+        anyhow::Ok(())
+    }
+
    pub(crate) async fn delete_timeline(
        self: Arc<Self>,
        timeline_id: TimelineId,
@@ -3337,6 +3657,13 @@ where
    Ok(result)
 }

+enum ActivateTimelineArgs {
+    Yes {
+        broker_client: storage_broker::BrokerClientChannel,
+    },
+    No,
+}
+
 impl Tenant {
    pub fn tenant_specific_overrides(&self) -> TenantConfOpt {
        self.tenant_conf.load().tenant_conf.clone()
@@ -3520,6 +3847,7 @@ impl Tenant {
    /// `validate_ancestor == false` is used when a timeline is created for deletion
    /// and we might not have the ancestor present anymore which is fine for to be
    /// deleted timelines.
+    #[allow(clippy::too_many_arguments)]
    fn create_timeline_struct(
        &self,
        new_timeline_id: TimelineId,
@@ -4283,16 +4611,17 @@ impl Tenant {
    /// If the timeline was already created in the meantime, we check whether this
    /// request conflicts or is idempotent , based on `state`.
    async fn start_creating_timeline(
-        &self,
+        self: &Arc<Self>,
        new_timeline_id: TimelineId,
        idempotency: CreateTimelineIdempotency,
-    ) -> Result<StartCreatingTimelineResult<'_>, CreateTimelineError> {
+    ) -> Result<StartCreatingTimelineResult, CreateTimelineError> {
        let allow_offloaded = false;
        match self.create_timeline_create_guard(new_timeline_id, idempotency, allow_offloaded) {
            Ok(create_guard) => {
                pausable_failpoint!("timeline-creation-after-uninit");
                Ok(StartCreatingTimelineResult::CreateGuard(create_guard))
            }
+            Err(TimelineExclusionError::ShuttingDown) => Err(CreateTimelineError::ShuttingDown),
            Err(TimelineExclusionError::AlreadyCreating) => {
                // Creation is in progress, we cannot create it again, and we cannot
                // check if this request matches the existing one, so caller must try
@@ -4582,7 +4911,7 @@ impl Tenant {
        &'a self,
        new_timeline_id: TimelineId,
        new_metadata: &TimelineMetadata,
-        create_guard: TimelineCreateGuard<'a>,
+        create_guard: TimelineCreateGuard,
        start_lsn: Lsn,
        ancestor: Option<Arc<Timeline>>,
    ) -> anyhow::Result<UninitializedTimeline<'a>> {
@@ -4642,7 +4971,7 @@ impl Tenant {
    /// The `allow_offloaded` parameter controls whether to tolerate the existence of
    /// offloaded timelines or not.
    fn create_timeline_create_guard(
-        &self,
+        self: &Arc<Self>,
        timeline_id: TimelineId,
        idempotency: CreateTimelineIdempotency,
        allow_offloaded: bool,
@@ -4902,48 +5231,16 @@ async fn run_initdb(

    let _permit = INIT_DB_SEMAPHORE.acquire().await;

-    let mut initdb_command = tokio::process::Command::new(&initdb_bin_path);
-    initdb_command
-        .args(["--pgdata", initdb_target_dir.as_ref()])
-        .args(["--username", &conf.superuser])
-        .args(["--encoding", "utf8"])
-        .args(["--locale", &conf.locale])
-        .arg("--no-instructions")
-        .arg("--no-sync")
-        .env_clear()
-        .env("LD_LIBRARY_PATH", &initdb_lib_dir)
-        .env("DYLD_LIBRARY_PATH", &initdb_lib_dir)
-        .stdin(std::process::Stdio::null())
-        // stdout invocation produces the same output every time, we don't need it
-        .stdout(std::process::Stdio::null())
-        // we would be interested in the stderr output, if there was any
-        .stderr(std::process::Stdio::piped());
-
-    // Before version 14, only the libc provide was available.
-    if pg_version > 14 {
-        // Version 17 brought with it a builtin locale provider which only provides
-        // C and C.UTF-8. While being safer for collation purposes since it is
-        // guaranteed to be consistent throughout a major release, it is also more
-        // performant.
-        let locale_provider = if pg_version >= 17 { "builtin" } else { "libc" };
-
-        initdb_command.args(["--locale-provider", locale_provider]);
-    }
-
-    let initdb_proc = initdb_command.spawn()?;
-
-    // Ideally we'd select here with the cancellation token, but the problem is that
-    // we can't safely terminate initdb: it launches processes of its own, and killing
-    // initdb doesn't kill them. After we return from this function, we want the target
-    // directory to be able to be cleaned up.
-    // See https://github.com/neondatabase/neon/issues/6385
-    let initdb_output = initdb_proc.wait_with_output().await?;
-    if !initdb_output.status.success() {
-        return Err(InitdbError::Failed(
-            initdb_output.status,
-            initdb_output.stderr,
-        ));
-    }
+    let res = postgres_initdb::do_run_initdb(postgres_initdb::RunInitdbArgs {
+        superuser: &conf.superuser,
+        locale: &conf.locale,
+        initdb_bin: &initdb_bin_path,
+        pg_version,
+        library_search_path: &initdb_lib_dir,
+        pgdata: initdb_target_dir,
+    })
+    .await
+    .map_err(InitdbError::Inner);

    // This isn't true cancellation support, see above. Still return an error to
    // excercise the cancellation code path.
@@ -4951,7 +5248,7 @@ async fn run_initdb(
        return Err(InitdbError::Cancelled);
    }

-    Ok(())
+    res
 }

 /// Dump contents of a layer file to stdout.
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -199,7 +199,7 @@ use utils::backoff::{
 use utils::pausable_failpoint;
 use utils::shard::ShardNumber;

-use std::collections::{HashMap, VecDeque};
+use std::collections::{HashMap, HashSet, VecDeque};
 use std::sync::atomic::{AtomicU32, Ordering};
 use std::sync::{Arc, Mutex, OnceLock};
 use std::time::Duration;
@@ -223,7 +223,7 @@ use crate::task_mgr::shutdown_token;
 use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;
 use crate::tenant::remote_timeline_client::download::download_retry;
 use crate::tenant::storage_layer::AsLayerDesc;
-use crate::tenant::upload_queue::{Delete, UploadQueueStoppedDeletable};
+use crate::tenant::upload_queue::{Delete, OpType, UploadQueueStoppedDeletable};
 use crate::tenant::TIMELINES_SEGMENT_NAME;
 use crate::{
    config::PageServerConf,
@@ -244,6 +244,7 @@ use self::index::IndexPart;
 use super::config::AttachedLocationConfig;
 use super::metadata::MetadataUpdate;
 use super::storage_layer::{Layer, LayerName, ResidentLayer};
+use super::timeline::import_pgdata;
 use super::upload_queue::{NotInitialized, SetDeletedFlagProgress};
 use super::{DeleteTimelineError, Generation};

@@ -813,6 +814,18 @@ impl RemoteTimelineClient {
        Ok(need_wait)
    }

+    /// Launch an index-file upload operation in the background, setting `import_pgdata` field.
+    pub(crate) fn schedule_index_upload_for_import_pgdata_state_update(
+        self: &Arc<Self>,
+        state: Option<import_pgdata::index_part_format::Root>,
+    ) -> anyhow::Result<()> {
+        let mut guard = self.upload_queue.lock().unwrap();
+        let upload_queue = guard.initialized_mut()?;
+        upload_queue.dirty.import_pgdata = state;
+        self.schedule_index_upload(upload_queue)?;
+        Ok(())
+    }
+
    ///
    /// Launch an index-file upload operation in the background, if necessary.
    ///
@@ -1090,7 +1103,7 @@ impl RemoteTimelineClient {
            "scheduled layer file upload {layer}",
        );

-        let op = UploadOp::UploadLayer(layer, metadata);
+        let op = UploadOp::UploadLayer(layer, metadata, None);
        self.metric_begin(&op);
        upload_queue.queued_operations.push_back(op);
    }
@@ -1805,7 +1818,7 @@ impl RemoteTimelineClient {
                    // have finished.
                    upload_queue.inprogress_tasks.is_empty()
                }
-                UploadOp::Delete(_) => {
+                UploadOp::Delete(..) => {
                    // Wait for preceding uploads to finish. Concurrent deletions are OK, though.
                    upload_queue.num_inprogress_deletions == upload_queue.inprogress_tasks.len()
                }
@@ -1833,19 +1846,32 @@ impl RemoteTimelineClient {
            }

            // We can launch this task. Remove it from the queue first.
-            let next_op = upload_queue.queued_operations.pop_front().unwrap();
+            let mut next_op = upload_queue.queued_operations.pop_front().unwrap();

            debug!("starting op: {}", next_op);

-            // Update the counters
-            match next_op {
-                UploadOp::UploadLayer(_, _) => {
+            // Update the counters and prepare
+            match &mut next_op {
+                UploadOp::UploadLayer(layer, meta, mode) => {
+                    if upload_queue
+                        .recently_deleted
+                        .remove(&(layer.layer_desc().layer_name().clone(), meta.generation))
+                    {
+                        *mode = Some(OpType::FlushDeletion);
+                    } else {
+                        *mode = Some(OpType::MayReorder)
+                    }
                    upload_queue.num_inprogress_layer_uploads += 1;
                }
                UploadOp::UploadMetadata { .. } => {
                    upload_queue.num_inprogress_metadata_uploads += 1;
                }
-                UploadOp::Delete(_) => {
+                UploadOp::Delete(Delete { layers }) => {
+                    for (name, meta) in layers {
+                        upload_queue
+                            .recently_deleted
+                            .insert((name.clone(), meta.generation));
+                    }
                    upload_queue.num_inprogress_deletions += 1;
                }
                UploadOp::Barrier(sender) => {
@@ -1921,7 +1947,66 @@ impl RemoteTimelineClient {
            }

            let upload_result: anyhow::Result<()> = match &task.op {
-                UploadOp::UploadLayer(ref layer, ref layer_metadata) => {
+                UploadOp::UploadLayer(ref layer, ref layer_metadata, mode) => {
+                    if let Some(OpType::FlushDeletion) = mode {
+                        if self.config.read().unwrap().block_deletions {
+                            // Of course, this is not efficient... but usually the queue should be empty.
+                            let mut queue_locked = self.upload_queue.lock().unwrap();
+                            let mut detected = false;
+                            if let Ok(queue) = queue_locked.initialized_mut() {
+                                for list in queue.blocked_deletions.iter_mut() {
+                                    list.layers.retain(|(name, meta)| {
+                                        if name == &layer.layer_desc().layer_name()
+                                            && meta.generation == layer_metadata.generation
+                                        {
+                                            detected = true;
+                                            // remove the layer from deletion queue
+                                            false
+                                        } else {
+                                            // keep the layer
+                                            true
+                                        }
+                                    });
+                                }
+                            }
+                            if detected {
+                                info!(
+                                    "cancelled blocked deletion of layer {} at gen {:?}",
+                                    layer.layer_desc().layer_name(),
+                                    layer_metadata.generation
+                                );
+                            }
+                        } else {
+                            // TODO: we did not guarantee that upload task starts after deletion task, so there could be possibly race conditions
+                            // that we still get the layer deleted. But this only happens if someone creates a layer immediately after it's deleted,
+                            // which is not possible in the current system.
+                            info!(
+                                "waiting for deletion queue flush to complete before uploading layer {} at gen {:?}",
+                                layer.layer_desc().layer_name(),
+                                layer_metadata.generation
+                            );
+                            {
+                                // We are going to flush, we can clean up the recently deleted list.
+                                let mut queue_locked = self.upload_queue.lock().unwrap();
+                                if let Ok(queue) = queue_locked.initialized_mut() {
+                                    queue.recently_deleted.clear();
+                                }
+                            }
+                            if let Err(e) = self.deletion_queue_client.flush_execute().await {
+                                warn!(
+                                    "failed to flush the deletion queue before uploading layer {} at gen {:?}, still proceeding to upload: {e:#} ",
+                                    layer.layer_desc().layer_name(),
+                                    layer_metadata.generation
+                                );
+                            } else {
+                                info!(
+                                    "done flushing deletion queue before uploading layer {} at gen {:?}",
+                                    layer.layer_desc().layer_name(),
+                                    layer_metadata.generation
+                                );
+                            }
+                        }
+                    }
                    let local_path = layer.local_path();

                    // We should only be uploading layers created by this `Tenant`'s lifetime, so
@@ -2085,7 +2170,7 @@ impl RemoteTimelineClient {
            upload_queue.inprogress_tasks.remove(&task.task_id);

            let lsn_update = match task.op {
-                UploadOp::UploadLayer(_, _) => {
+                UploadOp::UploadLayer(_, _, _) => {
                    upload_queue.num_inprogress_layer_uploads -= 1;
                    None
                }
@@ -2162,7 +2247,7 @@ impl RemoteTimelineClient {
    )> {
        use RemoteTimelineClientMetricsCallTrackSize::DontTrackSize;
        let res = match op {
-            UploadOp::UploadLayer(_, m) => (
+            UploadOp::UploadLayer(_, m, _) => (
                RemoteOpFileKind::Layer,
                RemoteOpKind::Upload,
                RemoteTimelineClientMetricsCallTrackSize::Bytes(m.file_size),
@@ -2259,6 +2344,7 @@ impl RemoteTimelineClient {
                        blocked_deletions: Vec::new(),
                        shutting_down: false,
                        shutdown_ready: Arc::new(tokio::sync::Semaphore::new(0)),
+                        recently_deleted: HashSet::new(),
                    };

                    let upload_queue = std::mem::replace(
--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -706,7 +706,7 @@ where
    .and_then(|x| x)
 }

-async fn download_retry_forever<T, O, F>(
+pub(crate) async fn download_retry_forever<T, O, F>(
    op: O,
    description: &str,
    cancel: &CancellationToken,
--- a/pageserver/src/tenant/remote_timeline_client/index.rs
+++ b/pageserver/src/tenant/remote_timeline_client/index.rs
@@ -12,6 +12,7 @@ use utils::id::TimelineId;

 use crate::tenant::metadata::TimelineMetadata;
 use crate::tenant::storage_layer::LayerName;
+use crate::tenant::timeline::import_pgdata;
 use crate::tenant::Generation;
 use pageserver_api::shard::ShardIndex;

@@ -37,6 +38,13 @@ pub struct IndexPart {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub archived_at: Option<NaiveDateTime>,

+    /// This field supports import-from-pgdata ("fast imports" platform feature).
+    /// We don't currently use fast imports, so, this field is None for all production timelines.
+    /// See <https://github.com/neondatabase/neon/pull/9218> for more information.
+    #[serde(default)]
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub import_pgdata: Option<import_pgdata::index_part_format::Root>,
+
    /// Per layer file name metadata, which can be present for a present or missing layer file.
    ///
    /// Older versions of `IndexPart` will not have this property or have only a part of metadata
@@ -90,10 +98,11 @@ impl IndexPart {
    /// - 7: metadata_bytes is no longer written, but still read
    /// - 8: added `archived_at`
    /// - 9: +gc_blocking
-    const LATEST_VERSION: usize = 9;
+    /// - 10: +import_pgdata
+    const LATEST_VERSION: usize = 10;

    // Versions we may see when reading from a bucket.
-    pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 9];
+    pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];

    pub const FILE_NAME: &'static str = "index_part.json";

@@ -108,6 +117,7 @@ impl IndexPart {
            lineage: Default::default(),
            gc_blocking: None,
            last_aux_file_policy: None,
+            import_pgdata: None,
        }
    }

@@ -381,6 +391,7 @@ mod tests {
            lineage: Lineage::default(),
            gc_blocking: None,
            last_aux_file_policy: None,
+            import_pgdata: None,
        };

        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
@@ -425,6 +436,7 @@ mod tests {
            lineage: Lineage::default(),
            gc_blocking: None,
            last_aux_file_policy: None,
+            import_pgdata: None,
        };

        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
@@ -470,6 +482,7 @@ mod tests {
            lineage: Lineage::default(),
            gc_blocking: None,
            last_aux_file_policy: None,
+            import_pgdata: None,
        };

        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
@@ -518,6 +531,7 @@ mod tests {
            lineage: Lineage::default(),
            gc_blocking: None,
            last_aux_file_policy: None,
+            import_pgdata: None,
        };

        let empty_layers_parsed = IndexPart::from_json_bytes(empty_layers_json.as_bytes()).unwrap();
@@ -561,6 +575,7 @@ mod tests {
            lineage: Lineage::default(),
            gc_blocking: None,
            last_aux_file_policy: None,
+            import_pgdata: None,
        };

        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
@@ -607,6 +622,7 @@ mod tests {
            },
            gc_blocking: None,
            last_aux_file_policy: None,
+            import_pgdata: None,
        };

        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
@@ -658,6 +674,7 @@ mod tests {
            },
            gc_blocking: None,
            last_aux_file_policy: Some(AuxFilePolicy::V2),
+            import_pgdata: None,
        };

        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
@@ -714,6 +731,7 @@ mod tests {
            lineage: Default::default(),
            gc_blocking: None,
            last_aux_file_policy: Default::default(),
+            import_pgdata: None,
        };

        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
@@ -771,6 +789,7 @@ mod tests {
            lineage: Default::default(),
            gc_blocking: None,
            last_aux_file_policy: Default::default(),
+            import_pgdata: None,
        };

        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
@@ -833,6 +852,83 @@ mod tests {
            }),
            last_aux_file_policy: Default::default(),
            archived_at: None,
+            import_pgdata: None,
+        };
+
+        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
+        assert_eq!(part, expected);
+    }
+
+    #[test]
+    fn v10_importpgdata_is_parsed() {
+        let example = r#"{
+            "version": 10,
+            "layer_metadata":{
+                "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
+                "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51": { "file_size": 9007199254741001 }
+            },
+            "disk_consistent_lsn":"0/16960E8",
+            "metadata": {
+                "disk_consistent_lsn": "0/16960E8",
+                "prev_record_lsn": "0/1696070",
+                "ancestor_timeline": "e45a7f37d3ee2ff17dc14bf4f4e3f52e",
+                "ancestor_lsn": "0/0",
+                "latest_gc_cutoff_lsn": "0/1696070",
+                "initdb_lsn": "0/1696070",
+                "pg_version": 14
+            },
+            "gc_blocking": {
+                "started_at": "2024-07-19T09:00:00.123",
+                "reasons": ["DetachAncestor"]
+            },
+            "import_pgdata": {
+                "V1": {
+                    "Done": {
+                        "idempotency_key": "specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5",
+                        "started_at": "2024-11-13T09:23:42.123",
+                        "finished_at": "2024-11-13T09:42:23.123"
+                    }
+                }
+            }
+        }"#;
+
+        let expected = IndexPart {
+            version: 10,
+            layer_metadata: HashMap::from([
+                ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
+                    file_size: 25600000,
+                    generation: Generation::none(),
+                    shard: ShardIndex::unsharded()
+                }),
+                ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
+                    file_size: 9007199254741001,
+                    generation: Generation::none(),
+                    shard: ShardIndex::unsharded()
+                })
+            ]),
+            disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
+            metadata: TimelineMetadata::new(
+                Lsn::from_str("0/16960E8").unwrap(),
+                Some(Lsn::from_str("0/1696070").unwrap()),
+                Some(TimelineId::from_str("e45a7f37d3ee2ff17dc14bf4f4e3f52e").unwrap()),
+                Lsn::INVALID,
+                Lsn::from_str("0/1696070").unwrap(),
+                Lsn::from_str("0/1696070").unwrap(),
+                14,
+            ).with_recalculated_checksum().unwrap(),
+            deleted_at: None,
+            lineage: Default::default(),
+            gc_blocking: Some(GcBlocking {
+                started_at: parse_naive_datetime("2024-07-19T09:00:00.123000000"),
+                reasons: enumset::EnumSet::from_iter([GcBlockingReason::DetachAncestor]),
+            }),
+            last_aux_file_policy: Default::default(),
+            archived_at: None,
+            import_pgdata: Some(import_pgdata::index_part_format::Root::V1(import_pgdata::index_part_format::V1::Done(import_pgdata::index_part_format::Done{
+                started_at: parse_naive_datetime("2024-11-13T09:23:42.123000000"),
+                finished_at: parse_naive_datetime("2024-11-13T09:42:23.123000000"),
+                idempotency_key: import_pgdata::index_part_format::IdempotencyKey::new("specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5".to_string()),
+            })))
        };

        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
--- a/pageserver/src/tenant/secondary.rs
+++ b/pageserver/src/tenant/secondary.rs
@@ -111,15 +111,6 @@ pub(crate) struct SecondaryTenant {
    pub(super) heatmap_total_size_metric: UIntGauge,
 }

-impl Drop for SecondaryTenant {
-    fn drop(&mut self) {
-        let tenant_id = self.tenant_shard_id.tenant_id.to_string();
-        let shard_id = format!("{}", self.tenant_shard_id.shard_slug());
-        let _ = SECONDARY_RESIDENT_PHYSICAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);
-        let _ = SECONDARY_HEATMAP_TOTAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);
-    }
-}
-
 impl SecondaryTenant {
    pub(crate) fn new(
        tenant_shard_id: TenantShardId,
@@ -167,6 +158,13 @@ impl SecondaryTenant {

        // Wait for any secondary downloader work to complete
        self.gate.close().await;
+
+        self.validate_metrics();
+
+        let tenant_id = self.tenant_shard_id.tenant_id.to_string();
+        let shard_id = format!("{}", self.tenant_shard_id.shard_slug());
+        let _ = SECONDARY_RESIDENT_PHYSICAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);
+        let _ = SECONDARY_HEATMAP_TOTAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);
    }

    pub(crate) fn set_config(&self, config: &SecondaryLocationConfig) {
@@ -254,6 +252,20 @@ impl SecondaryTenant {
        .await
        .expect("secondary eviction should not have panicked");
    }
+
+    /// Exhaustive check that incrementally updated metrics match the actual state.
+    #[cfg(feature = "testing")]
+    fn validate_metrics(&self) {
+        let detail = self.detail.lock().unwrap();
+        let resident_size = detail.total_resident_size();
+
+        assert_eq!(resident_size, self.resident_size_metric.get());
+    }
+
+    #[cfg(not(feature = "testing"))]
+    fn validate_metrics(&self) {
+        // No-op in non-testing builds
+    }
 }

 /// The SecondaryController is a pseudo-rpc client for administrative control of secondary mode downloads,
--- a/pageserver/src/tenant/secondary/downloader.rs
+++ b/pageserver/src/tenant/secondary/downloader.rs
@@ -242,6 +242,19 @@ impl SecondaryDetail {
        }
    }

+    #[cfg(feature = "testing")]
+    pub(crate) fn total_resident_size(&self) -> u64 {
+        self.timelines
+            .values()
+            .map(|tl| {
+                tl.on_disk_layers
+                    .values()
+                    .map(|v| v.metadata.file_size)
+                    .sum::<u64>()
+            })
+            .sum::<u64>()
+    }
+
    pub(super) fn evict_layer(
        &mut self,
        name: LayerName,
@@ -763,24 +776,7 @@ impl<'a> TenantDownloader<'a> {
        }

        // Metrics consistency check in testing builds
-        if cfg!(feature = "testing") {
-            let detail = self.secondary_state.detail.lock().unwrap();
-            let resident_size = detail
-                .timelines
-                .values()
-                .map(|tl| {
-                    tl.on_disk_layers
-                        .values()
-                        .map(|v| v.metadata.file_size)
-                        .sum::<u64>()
-                })
-                .sum::<u64>();
-            assert_eq!(
-                resident_size,
-                self.secondary_state.resident_size_metric.get()
-            );
-        }
-
+        self.secondary_state.validate_metrics();
        // Only update last_etag after a full successful download: this way will not skip
        // the next download, even if the heatmap's actual etag is unchanged.
        self.secondary_state.detail.lock().unwrap().last_download = Some(DownloadSummary {
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -4,6 +4,7 @@ pub mod delete;
 pub(crate) mod detach_ancestor;
 mod eviction_task;
 pub(crate) mod handle;
+pub(crate) mod import_pgdata;
 mod init;
 pub mod layer_manager;
 pub(crate) mod logical_size;
@@ -2085,6 +2086,11 @@ impl Timeline {
            .unwrap_or(self.conf.default_tenant_conf.lsn_lease_length_for_ts)
    }

+    pub(crate) fn is_gc_blocked_by_lsn_lease_deadline(&self) -> bool {
+        let tenant_conf = self.tenant_conf.load();
+        tenant_conf.is_gc_blocked_by_lsn_lease_deadline()
+    }
+
    pub(crate) fn get_lazy_slru_download(&self) -> bool {
        let tenant_conf = self.tenant_conf.load();
        tenant_conf
@@ -2647,6 +2653,7 @@ impl Timeline {
        //
        // NB: generation numbers naturally protect against this because they disambiguate
        //     (1) and (4)
+        // TODO: this is basically a no-op now, should we remove it?
        self.remote_client.schedule_barrier()?;
        // Tenant::create_timeline will wait for these uploads to happen before returning, or
        // on retry.
@@ -2702,20 +2709,23 @@ impl Timeline {
                {
                    Some(cancel) => cancel.cancel(),
                    None => {
-                        let state = self.current_state();
-                        if matches!(
-                            state,
-                            TimelineState::Broken { .. } | TimelineState::Stopping
-                        ) {
-
-                            // Can happen when timeline detail endpoint is used when deletion is ongoing (or its broken).
-                            // Don't make noise.
-                        } else {
-                            warn!("unexpected: cancel_wait_for_background_loop_concurrency_limit_semaphore not set, priority-boosting of logical size calculation will not work");
-                            debug_assert!(false);
+                        match self.current_state() {
+                            TimelineState::Broken { .. } | TimelineState::Stopping => {
+                                // Can happen when timeline detail endpoint is used when deletion is ongoing (or its broken).
+                                // Don't make noise.
+                            }
+                            TimelineState::Loading => {
+                                // Import does not return an activated timeline.
+                                info!("discarding priority boost for logical size calculation because timeline is not yet active");
+                            }
+                            TimelineState::Active => {
+                                // activation should be setting the once cell
+                                warn!("unexpected: cancel_wait_for_background_loop_concurrency_limit_semaphore not set, priority-boosting of logical size calculation will not work");
+                                debug_assert!(false);
+                            }
                        }
                    }
-                };
+                }
            }
        }

--- a/pageserver/src/tenant/timeline/import_pgdata.rs
+++ b/pageserver/src/tenant/timeline/import_pgdata.rs
@@ -0,0 +1,218 @@
+use std::sync::Arc;
+
+use anyhow::{bail, Context};
+use remote_storage::RemotePath;
+use tokio_util::sync::CancellationToken;
+use tracing::{info, info_span, Instrument};
+use utils::lsn::Lsn;
+
+use crate::{context::RequestContext, tenant::metadata::TimelineMetadata};
+
+use super::Timeline;
+
+mod flow;
+mod importbucket_client;
+mod importbucket_format;
+pub(crate) mod index_part_format;
+pub(crate) mod upcall_api;
+
+pub async fn doit(
+    timeline: &Arc<Timeline>,
+    index_part: index_part_format::Root,
+    ctx: &RequestContext,
+    cancel: CancellationToken,
+) -> anyhow::Result<()> {
+    let index_part_format::Root::V1(v1) = index_part;
+    let index_part_format::InProgress {
+        location,
+        idempotency_key,
+        started_at,
+    } = match v1 {
+        index_part_format::V1::Done(_) => return Ok(()),
+        index_part_format::V1::InProgress(in_progress) => in_progress,
+    };
+
+    let storage = importbucket_client::new(timeline.conf, &location, cancel.clone()).await?;
+
+    info!("get spec early so we know we'll be able to upcall when done");
+    let Some(spec) = storage.get_spec().await? else {
+        bail!("spec not found")
+    };
+
+    let upcall_client =
+        upcall_api::Client::new(timeline.conf, cancel.clone()).context("create upcall client")?;
+
+    //
+    // send an early progress update to clean up k8s job early and generate potentially useful logs
+    //
+    info!("send early progress update");
+    upcall_client
+        .send_progress_until_success(&spec)
+        .instrument(info_span!("early_progress_update"))
+        .await?;
+
+    let status_prefix = RemotePath::from_string("status").unwrap();
+
+    //
+    // See if shard is done.
+    // TODO: incorporate generations into status key for split brain safety. Figure out together with checkpointing.
+    //
+    let shard_status_key =
+        status_prefix.join(format!("shard-{}", timeline.tenant_shard_id.shard_slug()));
+    let shard_status: Option<importbucket_format::ShardStatus> =
+        storage.get_json(&shard_status_key).await?;
+    info!(?shard_status, "peeking shard status");
+    if shard_status.map(|st| st.done).unwrap_or(false) {
+        info!("shard status indicates that the shard is done, skipping import");
+    } else {
+        // TODO: checkpoint the progress into the IndexPart instead of restarting
+        // from the beginning.
+
+        //
+        // Wipe the slate clean - the flow does not allow resuming.
+        // We can implement resuming in the future by checkpointing the progress into the IndexPart.
+        //
+        info!("wipe the slate clean");
+        {
+            // TODO: do we need to hold GC lock for this?
+            let mut guard = timeline.layers.write().await;
+            assert!(
+                guard.layer_map()?.open_layer.is_none(),
+                "while importing, there should be no in-memory layer" // this just seems like a good place to assert it
+            );
+            let all_layers_keys = guard.all_persistent_layers();
+            let all_layers: Vec<_> = all_layers_keys
+                .iter()
+                .map(|key| guard.get_from_key(key))
+                .collect();
+            let open = guard.open_mut().context("open_mut")?;
+
+            timeline.remote_client.schedule_gc_update(&all_layers)?;
+            open.finish_gc_timeline(&all_layers);
+        }
+
+        //
+        // Wait for pgdata to finish uploading
+        //
+        info!("wait for pgdata to reach status 'done'");
+        let pgdata_status_key = status_prefix.join("pgdata");
+        loop {
+            let res = async {
+                let pgdata_status: Option<importbucket_format::PgdataStatus> = storage
+                    .get_json(&pgdata_status_key)
+                    .await
+                    .context("get pgdata status")?;
+                info!(?pgdata_status, "peeking pgdata status");
+                if pgdata_status.map(|st| st.done).unwrap_or(false) {
+                    Ok(())
+                } else {
+                    Err(anyhow::anyhow!("pgdata not done yet"))
+                }
+            }
+            .await;
+            match res {
+                Ok(_) => break,
+                Err(err) => {
+                    info!(?err, "indefintely waiting for pgdata to finish");
+                    if tokio::time::timeout(std::time::Duration::from_secs(10), cancel.cancelled())
+                        .await
+                        .is_ok()
+                    {
+                        bail!("cancelled while waiting for pgdata");
+                    }
+                }
+            }
+        }
+
+        //
+        // Do the import
+        //
+        info!("do the import");
+        let control_file = storage.get_control_file().await?;
+        let base_lsn = control_file.base_lsn();
+
+        info!("update TimelineMetadata based on LSNs from control file");
+        {
+            let pg_version = control_file.pg_version();
+            let _ctx: &RequestContext = ctx;
+            async move {
+                // FIXME: The 'disk_consistent_lsn' should be the LSN at the *end* of the
+                // checkpoint record, and prev_record_lsn should point to its beginning.
+                // We should read the real end of the record from the WAL, but here we
+                // just fake it.
+                let disk_consistent_lsn = Lsn(base_lsn.0 + 8);
+                let prev_record_lsn = base_lsn;
+                let metadata = TimelineMetadata::new(
+                    disk_consistent_lsn,
+                    Some(prev_record_lsn),
+                    None,     // no ancestor
+                    Lsn(0),   // no ancestor lsn
+                    base_lsn, // latest_gc_cutoff_lsn
+                    base_lsn, // initdb_lsn
+                    pg_version,
+                );
+
+                let _start_lsn = disk_consistent_lsn + 1;
+
+                timeline
+                    .remote_client
+                    .schedule_index_upload_for_full_metadata_update(&metadata)?;
+
+                timeline.remote_client.wait_completion().await?;
+
+                anyhow::Ok(())
+            }
+        }
+        .await?;
+
+        flow::run(
+            timeline.clone(),
+            base_lsn,
+            control_file,
+            storage.clone(),
+            ctx,
+        )
+        .await?;
+
+        //
+        // Communicate that shard is done.
+        //
+        storage
+            .put_json(
+                &shard_status_key,
+                &importbucket_format::ShardStatus { done: true },
+            )
+            .await
+            .context("put shard status")?;
+    }
+
+    //
+    // Ensure at-least-once deliver of the upcall to cplane
+    // before we mark the task as done and never come here again.
+    //
+    info!("send final progress update");
+    upcall_client
+        .send_progress_until_success(&spec)
+        .instrument(info_span!("final_progress_update"))
+        .await?;
+
+    //
+    // Mark as done in index_part.
+    // This makes subsequent timeline loads enter the normal load code path
+    // instead of spawning the import task and calling this here function.
+    //
+    info!("mark import as complete in index part");
+    timeline
+        .remote_client
+        .schedule_index_upload_for_import_pgdata_state_update(Some(index_part_format::Root::V1(
+            index_part_format::V1::Done(index_part_format::Done {
+                idempotency_key,
+                started_at,
+                finished_at: chrono::Utc::now().naive_utc(),
+            }),
+        )))?;
+
+    timeline.remote_client.wait_completion().await?;
+
+    Ok(())
+}
--- a/pageserver/src/tenant/timeline/import_pgdata/flow.rs
+++ b/pageserver/src/tenant/timeline/import_pgdata/flow.rs
@@ -0,0 +1,798 @@
+//! Import a PGDATA directory into an empty root timeline.
+//!
+//! This module is adapted hackathon code by Heikki and Stas.
+//! Other code in the parent module was written by Christian as part of a customer PoC.
+//!
+//! The hackathon code was producing image layer files as a free-standing program.
+//!
+//! It has been modified to
+//! - run inside a running Pageserver, within the proper lifecycles of Timeline -> Tenant(Shard)
+//! - => sharding-awareness: produce image layers with only the data relevant for this shard
+//! - => S3 as the source for the PGDATA instead of local filesystem
+//!
+//! TODOs before productionization:
+//! - ChunkProcessingJob size / ImportJob::total_size does not account for sharding.
+//!   => produced image layers likely too small.
+//! - ChunkProcessingJob should cut up an ImportJob to hit exactly target image layer size.
+//! - asserts / unwraps need to be replaced with errors
+//! - don't trust remote objects will be small (=prevent OOMs in those cases)
+//!     - limit all in-memory buffers in size, or download to disk and read from there
+//! - limit task concurrency
+//! - generally play nice with other tenants in the system
+//!   - importbucket is different bucket than main pageserver storage, so, should be fine wrt S3 rate limits
+//!   - but concerns like network bandwidth, local disk write bandwidth, local disk capacity, etc
+//! - integrate with layer eviction system
+//! - audit for Tenant::cancel nor Timeline::cancel responsivity
+//! - audit for Tenant/Timeline gate holding (we spawn tokio tasks during this flow!)
+//!
+//! An incomplete set of TODOs from the Hackathon:
+//! - version-specific CheckPointData (=> pgv abstraction, already exists for regular walingest)
+
+use std::sync::Arc;
+
+use anyhow::{bail, ensure};
+use bytes::Bytes;
+
+use itertools::Itertools;
+use pageserver_api::{
+    key::{rel_block_to_key, rel_dir_to_key, rel_size_to_key, relmap_file_key, DBDIR_KEY},
+    reltag::RelTag,
+    shard::ShardIdentity,
+};
+use postgres_ffi::{pg_constants, relfile_utils::parse_relfilename, BLCKSZ};
+use tokio::task::JoinSet;
+use tracing::{debug, info_span, instrument, Instrument};
+
+use crate::{
+    assert_u64_eq_usize::UsizeIsU64,
+    pgdatadir_mapping::{SlruSegmentDirectory, TwoPhaseDirectory},
+};
+use crate::{
+    context::{DownloadBehavior, RequestContext},
+    pgdatadir_mapping::{DbDirectory, RelDirectory},
+    task_mgr::TaskKind,
+    tenant::storage_layer::{ImageLayerWriter, Layer},
+};
+
+use pageserver_api::key::Key;
+use pageserver_api::key::{
+    slru_block_to_key, slru_dir_to_key, slru_segment_size_to_key, CHECKPOINT_KEY, CONTROLFILE_KEY,
+    TWOPHASEDIR_KEY,
+};
+use pageserver_api::keyspace::singleton_range;
+use pageserver_api::keyspace::{contiguous_range_len, is_contiguous_range};
+use pageserver_api::reltag::SlruKind;
+use utils::bin_ser::BeSer;
+use utils::lsn::Lsn;
+
+use std::collections::HashSet;
+use std::ops::Range;
+
+use super::{
+    importbucket_client::{ControlFile, RemoteStorageWrapper},
+    Timeline,
+};
+
+use remote_storage::RemotePath;
+
+pub async fn run(
+    timeline: Arc<Timeline>,
+    pgdata_lsn: Lsn,
+    control_file: ControlFile,
+    storage: RemoteStorageWrapper,
+    ctx: &RequestContext,
+) -> anyhow::Result<()> {
+    Flow {
+        timeline,
+        pgdata_lsn,
+        control_file,
+        tasks: Vec::new(),
+        storage,
+    }
+    .run(ctx)
+    .await
+}
+
+struct Flow {
+    timeline: Arc<Timeline>,
+    pgdata_lsn: Lsn,
+    control_file: ControlFile,
+    tasks: Vec<AnyImportTask>,
+    storage: RemoteStorageWrapper,
+}
+
+impl Flow {
+    /// Perform the ingestion into [`Self::timeline`].
+    /// Assumes the timeline is empty (= no layers).
+    pub async fn run(mut self, ctx: &RequestContext) -> anyhow::Result<()> {
+        let pgdata_lsn = Lsn(self.control_file.control_file_data().checkPoint).align();
+
+        self.pgdata_lsn = pgdata_lsn;
+
+        let datadir = PgDataDir::new(&self.storage).await?;
+
+        // Import dbdir (00:00:00 keyspace)
+        // This is just constructed here, but will be written to the image layer in the first call to import_db()
+        let dbdir_buf = Bytes::from(DbDirectory::ser(&DbDirectory {
+            dbdirs: datadir
+                .dbs
+                .iter()
+                .map(|db| ((db.spcnode, db.dboid), true))
+                .collect(),
+        })?);
+        self.tasks
+            .push(ImportSingleKeyTask::new(DBDIR_KEY, dbdir_buf).into());
+
+        // Import databases (00:spcnode:dbnode keyspace for each db)
+        for db in datadir.dbs {
+            self.import_db(&db).await?;
+        }
+
+        // Import SLRUs
+
+        // pg_xact (01:00 keyspace)
+        self.import_slru(SlruKind::Clog, &self.storage.pgdata().join("pg_xact"))
+            .await?;
+        // pg_multixact/members (01:01 keyspace)
+        self.import_slru(
+            SlruKind::MultiXactMembers,
+            &self.storage.pgdata().join("pg_multixact/members"),
+        )
+        .await?;
+        // pg_multixact/offsets (01:02 keyspace)
+        self.import_slru(
+            SlruKind::MultiXactOffsets,
+            &self.storage.pgdata().join("pg_multixact/offsets"),
+        )
+        .await?;
+
+        // Import pg_twophase.
+        // TODO: as empty
+        let twophasedir_buf = TwoPhaseDirectory::ser(&TwoPhaseDirectory {
+            xids: HashSet::new(),
+        })?;
+        self.tasks
+            .push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
+                TWOPHASEDIR_KEY,
+                Bytes::from(twophasedir_buf),
+            )));
+
+        // Controlfile, checkpoint
+        self.tasks
+            .push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
+                CONTROLFILE_KEY,
+                self.control_file.control_file_buf().clone(),
+            )));
+
+        let checkpoint_buf = self
+            .control_file
+            .control_file_data()
+            .checkPointCopy
+            .encode()?;
+        self.tasks
+            .push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
+                CHECKPOINT_KEY,
+                checkpoint_buf,
+            )));
+
+        // Assigns parts of key space to later parallel jobs
+        let mut last_end_key = Key::MIN;
+        let mut current_chunk = Vec::new();
+        let mut current_chunk_size: usize = 0;
+        let mut parallel_jobs = Vec::new();
+        for task in std::mem::take(&mut self.tasks).into_iter() {
+            if current_chunk_size + task.total_size() > 1024 * 1024 * 1024 {
+                let key_range = last_end_key..task.key_range().start;
+                parallel_jobs.push(ChunkProcessingJob::new(
+                    key_range.clone(),
+                    std::mem::take(&mut current_chunk),
+                    &self,
+                ));
+                last_end_key = key_range.end;
+                current_chunk_size = 0;
+            }
+            current_chunk_size += task.total_size();
+            current_chunk.push(task);
+        }
+        parallel_jobs.push(ChunkProcessingJob::new(
+            last_end_key..Key::MAX,
+            current_chunk,
+            &self,
+        ));
+
+        // Start all jobs simultaneosly
+        let mut work = JoinSet::new();
+        // TODO: semaphore?
+        for job in parallel_jobs {
+            let ctx: RequestContext =
+                ctx.detached_child(TaskKind::ImportPgdata, DownloadBehavior::Error);
+            work.spawn(async move { job.run(&ctx).await }.instrument(info_span!("parallel_job")));
+        }
+        let mut results = Vec::new();
+        while let Some(result) = work.join_next().await {
+            match result {
+                Ok(res) => {
+                    results.push(res);
+                }
+                Err(_joinset_err) => {
+                    results.push(Err(anyhow::anyhow!(
+                        "parallel job panicked or cancelled, check pageserver logs"
+                    )));
+                }
+            }
+        }
+
+        if results.iter().all(|r| r.is_ok()) {
+            Ok(())
+        } else {
+            let mut msg = String::new();
+            for result in results {
+                if let Err(err) = result {
+                    msg.push_str(&format!("{err:?}\n\n"));
+                }
+            }
+            bail!("Some parallel jobs failed:\n\n{msg}");
+        }
+    }
+
+    #[instrument(level = tracing::Level::DEBUG, skip_all, fields(dboid=%db.dboid, tablespace=%db.spcnode, path=%db.path))]
+    async fn import_db(&mut self, db: &PgDataDirDb) -> anyhow::Result<()> {
+        debug!("start");
+        scopeguard::defer! {
+            debug!("return");
+        }
+
+        // Import relmap (00:spcnode:dbnode:00:*:00)
+        let relmap_key = relmap_file_key(db.spcnode, db.dboid);
+        debug!("Constructing relmap entry, key {relmap_key}");
+        let relmap_path = db.path.join("pg_filenode.map");
+        let relmap_buf = self.storage.get(&relmap_path).await?;
+        self.tasks
+            .push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
+                relmap_key, relmap_buf,
+            )));
+
+        // Import reldir (00:spcnode:dbnode:00:*:01)
+        let reldir_key = rel_dir_to_key(db.spcnode, db.dboid);
+        debug!("Constructing reldirs entry, key {reldir_key}");
+        let reldir_buf = RelDirectory::ser(&RelDirectory {
+            rels: db
+                .files
+                .iter()
+                .map(|f| (f.rel_tag.relnode, f.rel_tag.forknum))
+                .collect(),
+        })?;
+        self.tasks
+            .push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
+                reldir_key,
+                Bytes::from(reldir_buf),
+            )));
+
+        // Import data (00:spcnode:dbnode:reloid:fork:blk) and set sizes for each last
+        // segment in a given relation (00:spcnode:dbnode:reloid:fork:ff)
+        for file in &db.files {
+            debug!(%file.path, %file.filesize, "importing file");
+            let len = file.filesize;
+            ensure!(len % 8192 == 0);
+            let start_blk: u32 = file.segno * (1024 * 1024 * 1024 / 8192);
+            let start_key = rel_block_to_key(file.rel_tag, start_blk);
+            let end_key = rel_block_to_key(file.rel_tag, start_blk + (len / 8192) as u32);
+            self.tasks
+                .push(AnyImportTask::RelBlocks(ImportRelBlocksTask::new(
+                    *self.timeline.get_shard_identity(),
+                    start_key..end_key,
+                    &file.path,
+                    self.storage.clone(),
+                )));
+
+            // Set relsize for the last segment (00:spcnode:dbnode:reloid:fork:ff)
+            if let Some(nblocks) = file.nblocks {
+                let size_key = rel_size_to_key(file.rel_tag);
+                //debug!("Setting relation size (path={path}, rel_tag={rel_tag}, segno={segno}) to {nblocks}, key {size_key}");
+                let buf = nblocks.to_le_bytes();
+                self.tasks
+                    .push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
+                        size_key,
+                        Bytes::from(buf.to_vec()),
+                    )));
+            }
+        }
+
+        Ok(())
+    }
+
+    async fn import_slru(&mut self, kind: SlruKind, path: &RemotePath) -> anyhow::Result<()> {
+        let segments = self.storage.listfilesindir(path).await?;
+        let segments: Vec<(String, u32, usize)> = segments
+            .into_iter()
+            .filter_map(|(path, size)| {
+                let filename = path.object_name()?;
+                let segno = u32::from_str_radix(filename, 16).ok()?;
+                Some((filename.to_string(), segno, size))
+            })
+            .collect();
+
+        // Write SlruDir
+        let slrudir_key = slru_dir_to_key(kind);
+        let segnos: HashSet<u32> = segments
+            .iter()
+            .map(|(_path, segno, _size)| *segno)
+            .collect();
+        let slrudir = SlruSegmentDirectory { segments: segnos };
+        let slrudir_buf = SlruSegmentDirectory::ser(&slrudir)?;
+        self.tasks
+            .push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
+                slrudir_key,
+                Bytes::from(slrudir_buf),
+            )));
+
+        for (segpath, segno, size) in segments {
+            // SlruSegBlocks for each segment
+            let p = path.join(&segpath);
+            let file_size = size;
+            ensure!(file_size % 8192 == 0);
+            let nblocks = u32::try_from(file_size / 8192)?;
+            let start_key = slru_block_to_key(kind, segno, 0);
+            let end_key = slru_block_to_key(kind, segno, nblocks);
+            debug!(%p, segno=%segno, %size, %start_key, %end_key, "scheduling SLRU segment");
+            self.tasks
+                .push(AnyImportTask::SlruBlocks(ImportSlruBlocksTask::new(
+                    *self.timeline.get_shard_identity(),
+                    start_key..end_key,
+                    &p,
+                    self.storage.clone(),
+                )));
+
+            // Followed by SlruSegSize
+            let segsize_key = slru_segment_size_to_key(kind, segno);
+            let segsize_buf = nblocks.to_le_bytes();
+            self.tasks
+                .push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
+                    segsize_key,
+                    Bytes::copy_from_slice(&segsize_buf),
+                )));
+        }
+        Ok(())
+    }
+}
+
+//
+// dbdir iteration tools
+//
+
+struct PgDataDir {
+    pub dbs: Vec<PgDataDirDb>, // spcnode, dboid, path
+}
+
+struct PgDataDirDb {
+    pub spcnode: u32,
+    pub dboid: u32,
+    pub path: RemotePath,
+    pub files: Vec<PgDataDirDbFile>,
+}
+
+struct PgDataDirDbFile {
+    pub path: RemotePath,
+    pub rel_tag: RelTag,
+    pub segno: u32,
+    pub filesize: usize,
+    // Cummulative size of the given fork, set only for the last segment of that fork
+    pub nblocks: Option<usize>,
+}
+
+impl PgDataDir {
+    async fn new(storage: &RemoteStorageWrapper) -> anyhow::Result<Self> {
+        let datadir_path = storage.pgdata();
+        // Import ordinary databases, DEFAULTTABLESPACE_OID is smaller than GLOBALTABLESPACE_OID, so import them first
+        // Traverse database in increasing oid order
+
+        let basedir = &datadir_path.join("base");
+        let db_oids: Vec<_> = storage
+            .listdir(basedir)
+            .await?
+            .into_iter()
+            .filter_map(|path| path.object_name().and_then(|name| name.parse::<u32>().ok()))
+            .sorted()
+            .collect();
+        debug!(?db_oids, "found databases");
+        let mut databases = Vec::new();
+        for dboid in db_oids {
+            databases.push(
+                PgDataDirDb::new(
+                    storage,
+                    &basedir.join(dboid.to_string()),
+                    pg_constants::DEFAULTTABLESPACE_OID,
+                    dboid,
+                    &datadir_path,
+                )
+                .await?,
+            );
+        }
+
+        // special case for global catalogs
+        databases.push(
+            PgDataDirDb::new(
+                storage,
+                &datadir_path.join("global"),
+                postgres_ffi::pg_constants::GLOBALTABLESPACE_OID,
+                0,
+                &datadir_path,
+            )
+            .await?,
+        );
+
+        databases.sort_by_key(|db| (db.spcnode, db.dboid));
+
+        Ok(Self { dbs: databases })
+    }
+}
+
+impl PgDataDirDb {
+    #[instrument(level = tracing::Level::DEBUG, skip_all, fields(%dboid, %db_path))]
+    async fn new(
+        storage: &RemoteStorageWrapper,
+        db_path: &RemotePath,
+        spcnode: u32,
+        dboid: u32,
+        datadir_path: &RemotePath,
+    ) -> anyhow::Result<Self> {
+        let mut files: Vec<PgDataDirDbFile> = storage
+            .listfilesindir(db_path)
+            .await?
+            .into_iter()
+            .filter_map(|(path, size)| {
+                debug!(%path, %size, "found file in dbdir");
+                path.object_name().and_then(|name| {
+                    // returns (relnode, forknum, segno)
+                    parse_relfilename(name).ok().map(|x| (size, x))
+                })
+            })
+            .sorted_by_key(|(_, relfilename)| *relfilename)
+            .map(|(filesize, (relnode, forknum, segno))| {
+                let rel_tag = RelTag {
+                    spcnode,
+                    dbnode: dboid,
+                    relnode,
+                    forknum,
+                };
+
+                let path = datadir_path.join(rel_tag.to_segfile_name(segno));
+                assert!(filesize % BLCKSZ as usize == 0); // TODO: this should result in an error
+                let nblocks = filesize / BLCKSZ as usize;
+
+                PgDataDirDbFile {
+                    path,
+                    filesize,
+                    rel_tag,
+                    segno,
+                    nblocks: Some(nblocks), // first non-cummulative sizes
+                }
+            })
+            .collect();
+
+        // Set cummulative sizes. Do all of that math here, so that later we could easier
+        // parallelize over segments and know with which segments we need to write relsize
+        // entry.
+        let mut cumulative_nblocks: usize = 0;
+        let mut prev_rel_tag: Option<RelTag> = None;
+        for i in 0..files.len() {
+            if prev_rel_tag == Some(files[i].rel_tag) {
+                cumulative_nblocks += files[i].nblocks.unwrap();
+            } else {
+                cumulative_nblocks = files[i].nblocks.unwrap();
+            }
+
+            files[i].nblocks = if i == files.len() - 1 || files[i + 1].rel_tag != files[i].rel_tag {
+                Some(cumulative_nblocks)
+            } else {
+                None
+            };
+
+            prev_rel_tag = Some(files[i].rel_tag);
+        }
+
+        Ok(PgDataDirDb {
+            files,
+            path: db_path.clone(),
+            spcnode,
+            dboid,
+        })
+    }
+}
+
+trait ImportTask {
+    fn key_range(&self) -> Range<Key>;
+
+    fn total_size(&self) -> usize {
+        // TODO: revisit this
+        if is_contiguous_range(&self.key_range()) {
+            contiguous_range_len(&self.key_range()) as usize * 8192
+        } else {
+            u32::MAX as usize
+        }
+    }
+
+    async fn doit(
+        self,
+        layer_writer: &mut ImageLayerWriter,
+        ctx: &RequestContext,
+    ) -> anyhow::Result<usize>;
+}
+
+struct ImportSingleKeyTask {
+    key: Key,
+    buf: Bytes,
+}
+
+impl ImportSingleKeyTask {
+    fn new(key: Key, buf: Bytes) -> Self {
+        ImportSingleKeyTask { key, buf }
+    }
+}
+
+impl ImportTask for ImportSingleKeyTask {
+    fn key_range(&self) -> Range<Key> {
+        singleton_range(self.key)
+    }
+
+    async fn doit(
+        self,
+        layer_writer: &mut ImageLayerWriter,
+        ctx: &RequestContext,
+    ) -> anyhow::Result<usize> {
+        layer_writer.put_image(self.key, self.buf, ctx).await?;
+        Ok(1)
+    }
+}
+
+struct ImportRelBlocksTask {
+    shard_identity: ShardIdentity,
+    key_range: Range<Key>,
+    path: RemotePath,
+    storage: RemoteStorageWrapper,
+}
+
+impl ImportRelBlocksTask {
+    fn new(
+        shard_identity: ShardIdentity,
+        key_range: Range<Key>,
+        path: &RemotePath,
+        storage: RemoteStorageWrapper,
+    ) -> Self {
+        ImportRelBlocksTask {
+            shard_identity,
+            key_range,
+            path: path.clone(),
+            storage,
+        }
+    }
+}
+
+impl ImportTask for ImportRelBlocksTask {
+    fn key_range(&self) -> Range<Key> {
+        self.key_range.clone()
+    }
+
+    #[instrument(level = tracing::Level::DEBUG, skip_all, fields(%self.path))]
+    async fn doit(
+        self,
+        layer_writer: &mut ImageLayerWriter,
+        ctx: &RequestContext,
+    ) -> anyhow::Result<usize> {
+        debug!("Importing relation file");
+
+        let (rel_tag, start_blk) = self.key_range.start.to_rel_block()?;
+        let (rel_tag_end, end_blk) = self.key_range.end.to_rel_block()?;
+        assert_eq!(rel_tag, rel_tag_end);
+
+        let ranges = (start_blk..end_blk)
+            .enumerate()
+            .filter_map(|(i, blknum)| {
+                let key = rel_block_to_key(rel_tag, blknum);
+                if self.shard_identity.is_key_disposable(&key) {
+                    return None;
+                }
+                let file_offset = i.checked_mul(8192).unwrap();
+                Some((
+                    vec![key],
+                    file_offset,
+                    file_offset.checked_add(8192).unwrap(),
+                ))
+            })
+            .coalesce(|(mut acc, acc_start, acc_end), (mut key, start, end)| {
+                assert_eq!(key.len(), 1);
+                assert!(!acc.is_empty());
+                assert!(acc_end > acc_start);
+                if acc_end == start /* TODO additional max range check here, to limit memory consumption per task to X */ {
+                    acc.push(key.pop().unwrap());
+                    Ok((acc, acc_start, end))
+                } else {
+                    Err(((acc, acc_start, acc_end), (key, start, end)))
+                }
+            });
+
+        let mut nimages = 0;
+        for (keys, range_start, range_end) in ranges {
+            let range_buf = self
+                .storage
+                .get_range(&self.path, range_start.into_u64(), range_end.into_u64())
+                .await?;
+            let mut buf = Bytes::from(range_buf);
+            // TODO: batched writes
+            for key in keys {
+                let image = buf.split_to(8192);
+                layer_writer.put_image(key, image, ctx).await?;
+                nimages += 1;
+            }
+        }
+
+        Ok(nimages)
+    }
+}
+
+struct ImportSlruBlocksTask {
+    shard_identity: ShardIdentity,
+    key_range: Range<Key>,
+    path: RemotePath,
+    storage: RemoteStorageWrapper,
+}
+
+impl ImportSlruBlocksTask {
+    fn new(
+        shard_identity: ShardIdentity,
+        key_range: Range<Key>,
+        path: &RemotePath,
+        storage: RemoteStorageWrapper,
+    ) -> Self {
+        ImportSlruBlocksTask {
+            shard_identity,
+            key_range,
+            path: path.clone(),
+            storage,
+        }
+    }
+}
+
+impl ImportTask for ImportSlruBlocksTask {
+    fn key_range(&self) -> Range<Key> {
+        self.key_range.clone()
+    }
+
+    async fn doit(
+        self,
+        layer_writer: &mut ImageLayerWriter,
+        ctx: &RequestContext,
+    ) -> anyhow::Result<usize> {
+        debug!("Importing SLRU segment file {}", self.path);
+        let buf = self.storage.get(&self.path).await?;
+
+        let (kind, segno, start_blk) = self.key_range.start.to_slru_block()?;
+        let (_kind, _segno, end_blk) = self.key_range.end.to_slru_block()?;
+        let mut blknum = start_blk;
+        let mut nimages = 0;
+        let mut file_offset = 0;
+        while blknum < end_blk {
+            let key = slru_block_to_key(kind, segno, blknum);
+            assert!(
+                !self.shard_identity.is_key_disposable(&key),
+                "SLRU keys need to go into every shard"
+            );
+            let buf = &buf[file_offset..(file_offset + 8192)];
+            file_offset += 8192;
+            layer_writer
+                .put_image(key, Bytes::copy_from_slice(buf), ctx)
+                .await?;
+            blknum += 1;
+            nimages += 1;
+        }
+        Ok(nimages)
+    }
+}
+
+enum AnyImportTask {
+    SingleKey(ImportSingleKeyTask),
+    RelBlocks(ImportRelBlocksTask),
+    SlruBlocks(ImportSlruBlocksTask),
+}
+
+impl ImportTask for AnyImportTask {
+    fn key_range(&self) -> Range<Key> {
+        match self {
+            Self::SingleKey(t) => t.key_range(),
+            Self::RelBlocks(t) => t.key_range(),
+            Self::SlruBlocks(t) => t.key_range(),
+        }
+    }
+    /// returns the number of images put into the `layer_writer`
+    async fn doit(
+        self,
+        layer_writer: &mut ImageLayerWriter,
+        ctx: &RequestContext,
+    ) -> anyhow::Result<usize> {
+        match self {
+            Self::SingleKey(t) => t.doit(layer_writer, ctx).await,
+            Self::RelBlocks(t) => t.doit(layer_writer, ctx).await,
+            Self::SlruBlocks(t) => t.doit(layer_writer, ctx).await,
+        }
+    }
+}
+
+impl From<ImportSingleKeyTask> for AnyImportTask {
+    fn from(t: ImportSingleKeyTask) -> Self {
+        Self::SingleKey(t)
+    }
+}
+
+impl From<ImportRelBlocksTask> for AnyImportTask {
+    fn from(t: ImportRelBlocksTask) -> Self {
+        Self::RelBlocks(t)
+    }
+}
+
+impl From<ImportSlruBlocksTask> for AnyImportTask {
+    fn from(t: ImportSlruBlocksTask) -> Self {
+        Self::SlruBlocks(t)
+    }
+}
+
+struct ChunkProcessingJob {
+    timeline: Arc<Timeline>,
+    range: Range<Key>,
+    tasks: Vec<AnyImportTask>,
+
+    pgdata_lsn: Lsn,
+}
+
+impl ChunkProcessingJob {
+    fn new(range: Range<Key>, tasks: Vec<AnyImportTask>, env: &Flow) -> Self {
+        assert!(env.pgdata_lsn.is_valid());
+        Self {
+            timeline: env.timeline.clone(),
+            range,
+            tasks,
+            pgdata_lsn: env.pgdata_lsn,
+        }
+    }
+
+    async fn run(self, ctx: &RequestContext) -> anyhow::Result<()> {
+        let mut writer = ImageLayerWriter::new(
+            self.timeline.conf,
+            self.timeline.timeline_id,
+            self.timeline.tenant_shard_id,
+            &self.range,
+            self.pgdata_lsn,
+            ctx,
+        )
+        .await?;
+
+        let mut nimages = 0;
+        for task in self.tasks {
+            nimages += task.doit(&mut writer, ctx).await?;
+        }
+
+        let resident_layer = if nimages > 0 {
+            let (desc, path) = writer.finish(ctx).await?;
+            Layer::finish_creating(self.timeline.conf, &self.timeline, desc, &path)?
+        } else {
+            // dropping the writer cleans up
+            return Ok(());
+        };
+
+        // this is sharing the same code as create_image_layers
+        let mut guard = self.timeline.layers.write().await;
+        guard
+            .open_mut()?
+            .track_new_image_layers(&[resident_layer.clone()], &self.timeline.metrics);
+        crate::tenant::timeline::drop_wlock(guard);
+
+        // Schedule the layer for upload but don't add barriers such as
+        // wait for completion or index upload, so we don't inhibit upload parallelism.
+        // TODO: limit upload parallelism somehow (e.g. by limiting concurrency of jobs?)
+        // TODO: or regulate parallelism by upload queue depth? Prob should happen at a higher level.
+        self.timeline
+            .remote_client
+            .schedule_layer_file_upload(resident_layer)?;
+
+        Ok(())
+    }
+}
--- a/pageserver/src/tenant/timeline/import_pgdata/importbucket_client.rs
+++ b/pageserver/src/tenant/timeline/import_pgdata/importbucket_client.rs
@@ -0,0 +1,315 @@
+use std::{ops::Bound, sync::Arc};
+
+use anyhow::Context;
+use bytes::Bytes;
+use postgres_ffi::ControlFileData;
+use remote_storage::{
+    Download, DownloadError, DownloadOpts, GenericRemoteStorage, Listing, ListingObject, RemotePath,
+};
+use serde::de::DeserializeOwned;
+use tokio_util::sync::CancellationToken;
+use tracing::{debug, info, instrument};
+use utils::lsn::Lsn;
+
+use crate::{assert_u64_eq_usize::U64IsUsize, config::PageServerConf};
+
+use super::{importbucket_format, index_part_format};
+
+pub async fn new(
+    conf: &'static PageServerConf,
+    location: &index_part_format::Location,
+    cancel: CancellationToken,
+) -> Result<RemoteStorageWrapper, anyhow::Error> {
+    // FIXME: we probably want some timeout, and we might be able to assume the max file
+    // size on S3 is 1GiB (postgres segment size). But the problem is that the individual
+    // downloaders don't know enough about concurrent downloads to make a guess on the
+    // expected bandwidth and resulting best timeout.
+    let timeout = std::time::Duration::from_secs(24 * 60 * 60);
+    let location_storage = match location {
+        #[cfg(feature = "testing")]
+        index_part_format::Location::LocalFs { path } => {
+            GenericRemoteStorage::LocalFs(remote_storage::LocalFs::new(path.clone(), timeout)?)
+        }
+        index_part_format::Location::AwsS3 {
+            region,
+            bucket,
+            key,
+        } => {
+            // TODO: think about security implications of letting the client specify the bucket & prefix.
+            // It's the most flexible right now, but, possibly we want to move bucket name into PS conf
+            // and force the timeline_id into the prefix?
+            GenericRemoteStorage::AwsS3(Arc::new(
+                remote_storage::S3Bucket::new(
+                    &remote_storage::S3Config {
+                        bucket_name: bucket.clone(),
+                        prefix_in_bucket: Some(key.clone()),
+                        bucket_region: region.clone(),
+                        endpoint: conf
+                            .import_pgdata_aws_endpoint_url
+                            .clone()
+                            .map(|url| url.to_string()), //  by specifying None here, remote_storage/aws-sdk-rust will infer from env
+                        concurrency_limit: 100.try_into().unwrap(), // TODO: think about this
+                        max_keys_per_list_response: Some(1000),     // TODO: think about this
+                        upload_storage_class: None,                 // irrelevant
+                    },
+                    timeout,
+                )
+                .await
+                .context("setup s3 bucket")?,
+            ))
+        }
+    };
+    let storage_wrapper = RemoteStorageWrapper::new(location_storage, cancel);
+    Ok(storage_wrapper)
+}
+
+/// Wrap [`remote_storage`] APIs to make it look a bit more like a filesystem API
+/// such as [`tokio::fs`], which was used in the original implementation of the import code.
+#[derive(Clone)]
+pub struct RemoteStorageWrapper {
+    storage: GenericRemoteStorage,
+    cancel: CancellationToken,
+}
+
+impl RemoteStorageWrapper {
+    pub fn new(storage: GenericRemoteStorage, cancel: CancellationToken) -> Self {
+        Self { storage, cancel }
+    }
+
+    #[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]
+    pub async fn listfilesindir(
+        &self,
+        path: &RemotePath,
+    ) -> Result<Vec<(RemotePath, usize)>, DownloadError> {
+        assert!(
+            path.object_name().is_some(),
+            "must specify dirname, without trailing slash"
+        );
+        let path = path.add_trailing_slash();
+
+        let res = crate::tenant::remote_timeline_client::download::download_retry_forever(
+            || async {
+                let Listing { keys, prefixes: _ } = self
+                    .storage
+                    .list(
+                        Some(&path),
+                        remote_storage::ListingMode::WithDelimiter,
+                        None,
+                        &self.cancel,
+                    )
+                    .await?;
+                let res = keys
+                    .into_iter()
+                    .map(|ListingObject { key, size, .. }| (key, size.into_usize()))
+                    .collect();
+                Ok(res)
+            },
+            &format!("listfilesindir {path:?}"),
+            &self.cancel,
+        )
+        .await;
+        debug!(?res, "returning");
+        res
+    }
+
+    #[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]
+    pub async fn listdir(&self, path: &RemotePath) -> Result<Vec<RemotePath>, DownloadError> {
+        assert!(
+            path.object_name().is_some(),
+            "must specify dirname, without trailing slash"
+        );
+        let path = path.add_trailing_slash();
+
+        let res = crate::tenant::remote_timeline_client::download::download_retry_forever(
+            || async {
+                let Listing { keys, prefixes } = self
+                    .storage
+                    .list(
+                        Some(&path),
+                        remote_storage::ListingMode::WithDelimiter,
+                        None,
+                        &self.cancel,
+                    )
+                    .await?;
+                let res = keys
+                    .into_iter()
+                    .map(|ListingObject { key, .. }| key)
+                    .chain(prefixes.into_iter())
+                    .collect();
+                Ok(res)
+            },
+            &format!("listdir {path:?}"),
+            &self.cancel,
+        )
+        .await;
+        debug!(?res, "returning");
+        res
+    }
+
+    #[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]
+    pub async fn get(&self, path: &RemotePath) -> Result<Bytes, DownloadError> {
+        let res = crate::tenant::remote_timeline_client::download::download_retry_forever(
+            || async {
+                let Download {
+                    download_stream, ..
+                } = self
+                    .storage
+                    .download(path, &DownloadOpts::default(), &self.cancel)
+                    .await?;
+                let mut reader = tokio_util::io::StreamReader::new(download_stream);
+
+                // XXX optimize this, can we get the capacity hint from somewhere?
+                let mut buf = Vec::new();
+                tokio::io::copy_buf(&mut reader, &mut buf).await?;
+                Ok(Bytes::from(buf))
+            },
+            &format!("download {path:?}"),
+            &self.cancel,
+        )
+        .await;
+        debug!(len = res.as_ref().ok().map(|buf| buf.len()), "done");
+        res
+    }
+
+    pub async fn get_spec(&self) -> Result<Option<importbucket_format::Spec>, anyhow::Error> {
+        self.get_json(&RemotePath::from_string("spec.json").unwrap())
+            .await
+            .context("get spec")
+    }
+
+    #[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]
+    pub async fn get_json<T: DeserializeOwned>(
+        &self,
+        path: &RemotePath,
+    ) -> Result<Option<T>, DownloadError> {
+        let buf = match self.get(path).await {
+            Ok(buf) => buf,
+            Err(DownloadError::NotFound) => return Ok(None),
+            Err(err) => return Err(err),
+        };
+        let res = serde_json::from_slice(&buf)
+            .context("serialize")
+            // TODO: own error type
+            .map_err(DownloadError::Other)?;
+        Ok(Some(res))
+    }
+
+    #[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]
+    pub async fn put_json<T>(&self, path: &RemotePath, value: &T) -> anyhow::Result<()>
+    where
+        T: serde::Serialize,
+    {
+        let buf = serde_json::to_vec(value)?;
+        let bytes = Bytes::from(buf);
+        utils::backoff::retry(
+            || async {
+                let size = bytes.len();
+                let bytes = futures::stream::once(futures::future::ready(Ok(bytes.clone())));
+                self.storage
+                    .upload_storage_object(bytes, size, path, &self.cancel)
+                    .await
+            },
+            remote_storage::TimeoutOrCancel::caused_by_cancel,
+            1,
+            u32::MAX,
+            &format!("put json {path}"),
+            &self.cancel,
+        )
+        .await
+        .expect("practically infinite retries")
+    }
+
+    #[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]
+    pub async fn get_range(
+        &self,
+        path: &RemotePath,
+        start_inclusive: u64,
+        end_exclusive: u64,
+    ) -> Result<Vec<u8>, DownloadError> {
+        let len = end_exclusive
+            .checked_sub(start_inclusive)
+            .unwrap()
+            .into_usize();
+        let res = crate::tenant::remote_timeline_client::download::download_retry_forever(
+            || async {
+                let Download {
+                    download_stream, ..
+                } = self
+                    .storage
+                    .download(
+                        path,
+                        &DownloadOpts {
+                            etag: None,
+                            byte_start: Bound::Included(start_inclusive),
+                            byte_end: Bound::Excluded(end_exclusive)
+                        },
+                        &self.cancel)
+                    .await?;
+                let mut reader = tokio_util::io::StreamReader::new(download_stream);
+
+                let mut buf = Vec::with_capacity(len);
+                tokio::io::copy_buf(&mut reader, &mut buf).await?;
+                Ok(buf)
+            },
+            &format!("download range len=0x{len:x} [0x{start_inclusive:x},0x{end_exclusive:x}) from {path:?}"),
+            &self.cancel,
+        )
+        .await;
+        debug!(len = res.as_ref().ok().map(|buf| buf.len()), "done");
+        res
+    }
+
+    pub fn pgdata(&self) -> RemotePath {
+        RemotePath::from_string("pgdata").unwrap()
+    }
+
+    pub async fn get_control_file(&self) -> Result<ControlFile, anyhow::Error> {
+        let control_file_path = self.pgdata().join("global/pg_control");
+        info!("get control file from {control_file_path}");
+        let control_file_buf = self.get(&control_file_path).await?;
+        ControlFile::new(control_file_buf)
+    }
+}
+
+pub struct ControlFile {
+    control_file_data: ControlFileData,
+    control_file_buf: Bytes,
+}
+
+impl ControlFile {
+    pub(crate) fn new(control_file_buf: Bytes) -> Result<Self, anyhow::Error> {
+        // XXX ControlFileData is version-specific, we're always using v14 here. v17 had changes.
+        let control_file_data = ControlFileData::decode(&control_file_buf)?;
+        let control_file = ControlFile {
+            control_file_data,
+            control_file_buf,
+        };
+        control_file.try_pg_version()?; // so that we can offer infallible pg_version()
+        Ok(control_file)
+    }
+    pub(crate) fn base_lsn(&self) -> Lsn {
+        Lsn(self.control_file_data.checkPoint).align()
+    }
+    pub(crate) fn pg_version(&self) -> u32 {
+        self.try_pg_version()
+            .expect("prepare() checks that try_pg_version doesn't error")
+    }
+    pub(crate) fn control_file_data(&self) -> &ControlFileData {
+        &self.control_file_data
+    }
+    pub(crate) fn control_file_buf(&self) -> &Bytes {
+        &self.control_file_buf
+    }
+    fn try_pg_version(&self) -> anyhow::Result<u32> {
+        Ok(match self.control_file_data.catalog_version_no {
+            // thesea are from catversion.h
+            202107181 => 14,
+            202209061 => 15,
+            202307071 => 16,
+            /* XXX pg17 */
+            catversion => {
+                anyhow::bail!("unrecognized catalog version {catversion}")
+            }
+        })
+    }
+}
--- a/pageserver/src/tenant/timeline/import_pgdata/importbucket_format.rs
+++ b/pageserver/src/tenant/timeline/import_pgdata/importbucket_format.rs
@@ -0,0 +1,20 @@
+use serde::{Deserialize, Serialize};
+
+#[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)]
+pub struct PgdataStatus {
+    pub done: bool,
+    // TODO: remaining fields
+}
+
+#[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)]
+pub struct ShardStatus {
+    pub done: bool,
+    // TODO: remaining fields
+}
+
+// TODO: dedupe with fast_import code
+#[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)]
+pub struct Spec {
+    pub project_id: String,
+    pub branch_id: String,
+}
--- a/pageserver/src/tenant/timeline/import_pgdata/index_part_format.rs
+++ b/pageserver/src/tenant/timeline/import_pgdata/index_part_format.rs
@@ -0,0 +1,68 @@
+use serde::{Deserialize, Serialize};
+
+#[cfg(feature = "testing")]
+use camino::Utf8PathBuf;
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+pub enum Root {
+    V1(V1),
+}
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+pub enum V1 {
+    InProgress(InProgress),
+    Done(Done),
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+#[serde(transparent)]
+pub struct IdempotencyKey(String);
+
+impl IdempotencyKey {
+    pub fn new(s: String) -> Self {
+        Self(s)
+    }
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+pub struct InProgress {
+    pub idempotency_key: IdempotencyKey,
+    pub location: Location,
+    pub started_at: chrono::NaiveDateTime,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+pub struct Done {
+    pub idempotency_key: IdempotencyKey,
+    pub started_at: chrono::NaiveDateTime,
+    pub finished_at: chrono::NaiveDateTime,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+pub enum Location {
+    #[cfg(feature = "testing")]
+    LocalFs { path: Utf8PathBuf },
+    AwsS3 {
+        region: String,
+        bucket: String,
+        key: String,
+    },
+}
+
+impl Root {
+    pub fn is_done(&self) -> bool {
+        match self {
+            Root::V1(v1) => match v1 {
+                V1::Done(_) => true,
+                V1::InProgress(_) => false,
+            },
+        }
+    }
+    pub fn idempotency_key(&self) -> &IdempotencyKey {
+        match self {
+            Root::V1(v1) => match v1 {
+                V1::InProgress(in_progress) => &in_progress.idempotency_key,
+                V1::Done(done) => &done.idempotency_key,
+            },
+        }
+    }
+}
--- a/pageserver/src/tenant/timeline/import_pgdata/upcall_api.rs
+++ b/pageserver/src/tenant/timeline/import_pgdata/upcall_api.rs
@@ -0,0 +1,119 @@
+//! FIXME: most of this is copy-paste from mgmt_api.rs ; dedupe into a `reqwest_utils::Client` crate.
+use pageserver_client::mgmt_api::{Error, ResponseErrorMessageExt};
+use serde::{Deserialize, Serialize};
+use tokio_util::sync::CancellationToken;
+use tracing::error;
+
+use crate::config::PageServerConf;
+use reqwest::Method;
+
+use super::importbucket_format::Spec;
+
+pub struct Client {
+    base_url: String,
+    authorization_header: Option<String>,
+    client: reqwest::Client,
+    cancel: CancellationToken,
+}
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+#[derive(Serialize, Deserialize, Debug)]
+struct ImportProgressRequest {
+    // no fields yet, not sure if there every will be any
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+struct ImportProgressResponse {
+    // we don't care
+}
+
+impl Client {
+    pub fn new(conf: &PageServerConf, cancel: CancellationToken) -> anyhow::Result<Self> {
+        let Some(ref base_url) = conf.import_pgdata_upcall_api else {
+            anyhow::bail!("import_pgdata_upcall_api is not configured")
+        };
+        Ok(Self {
+            base_url: base_url.to_string(),
+            client: reqwest::Client::new(),
+            cancel,
+            authorization_header: conf
+                .import_pgdata_upcall_api_token
+                .as_ref()
+                .map(|secret_string| secret_string.get_contents())
+                .map(|jwt| format!("Bearer {jwt}")),
+        })
+    }
+
+    fn start_request<U: reqwest::IntoUrl>(
+        &self,
+        method: Method,
+        uri: U,
+    ) -> reqwest::RequestBuilder {
+        let req = self.client.request(method, uri);
+        if let Some(value) = &self.authorization_header {
+            req.header(reqwest::header::AUTHORIZATION, value)
+        } else {
+            req
+        }
+    }
+
+    async fn request_noerror<B: serde::Serialize, U: reqwest::IntoUrl>(
+        &self,
+        method: Method,
+        uri: U,
+        body: B,
+    ) -> Result<reqwest::Response> {
+        self.start_request(method, uri)
+            .json(&body)
+            .send()
+            .await
+            .map_err(Error::ReceiveBody)
+    }
+
+    async fn request<B: serde::Serialize, U: reqwest::IntoUrl>(
+        &self,
+        method: Method,
+        uri: U,
+        body: B,
+    ) -> Result<reqwest::Response> {
+        let res = self.request_noerror(method, uri, body).await?;
+        let response = res.error_from_body().await?;
+        Ok(response)
+    }
+
+    pub async fn send_progress_once(&self, spec: &Spec) -> Result<()> {
+        let url = format!(
+            "{}/projects/{}/branches/{}/import_progress",
+            self.base_url, spec.project_id, spec.branch_id
+        );
+        let ImportProgressResponse {} = self
+            .request(Method::POST, url, &ImportProgressRequest {})
+            .await?
+            .json()
+            .await
+            .map_err(Error::ReceiveBody)?;
+        Ok(())
+    }
+
+    pub async fn send_progress_until_success(&self, spec: &Spec) -> anyhow::Result<()> {
+        loop {
+            match self.send_progress_once(spec).await {
+                Ok(()) => return Ok(()),
+                Err(Error::Cancelled) => return Err(anyhow::anyhow!("cancelled")),
+                Err(err) => {
+                    error!(?err, "error sending progress, retrying");
+                    if tokio::time::timeout(
+                        std::time::Duration::from_secs(10),
+                        self.cancel.cancelled(),
+                    )
+                    .await
+                    .is_ok()
+                    {
+                        anyhow::bail!("cancelled while sending early progress update");
+                    }
+                }
+            }
+        }
+    }
+}
--- a/pageserver/src/tenant/timeline/uninit.rs
+++ b/pageserver/src/tenant/timeline/uninit.rs
@@ -3,7 +3,7 @@ use std::{collections::hash_map::Entry, fs, sync::Arc};
 use anyhow::Context;
 use camino::Utf8PathBuf;
 use tracing::{error, info, info_span};
-use utils::{fs_ext, id::TimelineId, lsn::Lsn};
+use utils::{fs_ext, id::TimelineId, lsn::Lsn, sync::gate::GateGuard};

 use crate::{
    context::RequestContext,
@@ -23,14 +23,14 @@ use super::Timeline;
 pub struct UninitializedTimeline<'t> {
    pub(crate) owning_tenant: &'t Tenant,
    timeline_id: TimelineId,
-    raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard<'t>)>,
+    raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard)>,
 }

 impl<'t> UninitializedTimeline<'t> {
    pub(crate) fn new(
        owning_tenant: &'t Tenant,
        timeline_id: TimelineId,
-        raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard<'t>)>,
+        raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard)>,
    ) -> Self {
        Self {
            owning_tenant,
@@ -87,6 +87,10 @@ impl<'t> UninitializedTimeline<'t> {
        }
    }

+    pub(crate) fn finish_creation_myself(&mut self) -> (Arc<Timeline>, TimelineCreateGuard) {
+        self.raw_timeline.take().expect("already checked")
+    }
+
    /// Prepares timeline data by loading it from the basebackup archive.
    pub(crate) async fn import_basebackup_from_tar(
        self,
@@ -167,9 +171,10 @@ pub(crate) fn cleanup_timeline_directory(create_guard: TimelineCreateGuard) {
 /// A guard for timeline creations in process: as long as this object exists, the timeline ID
 /// is kept in `[Tenant::timelines_creating]` to exclude concurrent attempts to create the same timeline.
 #[must_use]
-pub(crate) struct TimelineCreateGuard<'t> {
-    owning_tenant: &'t Tenant,
-    timeline_id: TimelineId,
+pub(crate) struct TimelineCreateGuard {
+    pub(crate) _tenant_gate_guard: GateGuard,
+    pub(crate) owning_tenant: Arc<Tenant>,
+    pub(crate) timeline_id: TimelineId,
    pub(crate) timeline_path: Utf8PathBuf,
    pub(crate) idempotency: CreateTimelineIdempotency,
 }
@@ -184,20 +189,27 @@ pub(crate) enum TimelineExclusionError {
    },
    #[error("Already creating")]
    AlreadyCreating,
+    #[error("Shutting down")]
+    ShuttingDown,

    // e.g. I/O errors, or some failure deep in postgres initdb
    #[error(transparent)]
    Other(#[from] anyhow::Error),
 }

-impl<'t> TimelineCreateGuard<'t> {
+impl TimelineCreateGuard {
    pub(crate) fn new(
-        owning_tenant: &'t Tenant,
+        owning_tenant: &Arc<Tenant>,
        timeline_id: TimelineId,
        timeline_path: Utf8PathBuf,
        idempotency: CreateTimelineIdempotency,
        allow_offloaded: bool,
    ) -> Result<Self, TimelineExclusionError> {
+        let _tenant_gate_guard = owning_tenant
+            .gate
+            .enter()
+            .map_err(|_| TimelineExclusionError::ShuttingDown)?;
+
        // Lock order: this is the only place we take both locks.  During drop() we only
        // lock creating_timelines
        let timelines = owning_tenant.timelines.lock().unwrap();
@@ -225,8 +237,12 @@ impl<'t> TimelineCreateGuard<'t> {
            return Err(TimelineExclusionError::AlreadyCreating);
        }
        creating_timelines.insert(timeline_id);
+        drop(creating_timelines);
+        drop(timelines_offloaded);
+        drop(timelines);
        Ok(Self {
-            owning_tenant,
+            _tenant_gate_guard,
+            owning_tenant: Arc::clone(owning_tenant),
            timeline_id,
            timeline_path,
            idempotency,
@@ -234,7 +250,7 @@ impl<'t> TimelineCreateGuard<'t> {
    }
 }

-impl Drop for TimelineCreateGuard<'_> {
+impl Drop for TimelineCreateGuard {
    fn drop(&mut self) {
        self.owning_tenant
            .timelines_creating
--- a/pageserver/src/tenant/upload_queue.rs
+++ b/pageserver/src/tenant/upload_queue.rs
@@ -3,6 +3,7 @@ use super::storage_layer::ResidentLayer;
 use crate::tenant::metadata::TimelineMetadata;
 use crate::tenant::remote_timeline_client::index::IndexPart;
 use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
+use std::collections::HashSet;
 use std::collections::{HashMap, VecDeque};
 use std::fmt::Debug;

@@ -14,7 +15,6 @@ use utils::lsn::AtomicLsn;
 use std::sync::atomic::AtomicU32;
 use utils::lsn::Lsn;

-#[cfg(feature = "testing")]
 use utils::generation::Generation;

 // clippy warns that Uninitialized is much smaller than Initialized, which wastes
@@ -38,6 +38,12 @@ impl UploadQueue {
    }
 }

+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
+pub(crate) enum OpType {
+    MayReorder,
+    FlushDeletion,
+}
+
 /// This keeps track of queued and in-progress tasks.
 pub(crate) struct UploadQueueInitialized {
    /// Counter to assign task IDs
@@ -88,6 +94,9 @@ pub(crate) struct UploadQueueInitialized {
    #[cfg(feature = "testing")]
    pub(crate) dangling_files: HashMap<LayerName, Generation>,

+    /// Ensure we order file operations correctly.
+    pub(crate) recently_deleted: HashSet<(LayerName, Generation)>,
+
    /// Deletions that are blocked by the tenant configuration
    pub(crate) blocked_deletions: Vec<Delete>,

@@ -183,6 +192,7 @@ impl UploadQueue {
            queued_operations: VecDeque::new(),
            #[cfg(feature = "testing")]
            dangling_files: HashMap::new(),
+            recently_deleted: HashSet::new(),
            blocked_deletions: Vec::new(),
            shutting_down: false,
            shutdown_ready: Arc::new(tokio::sync::Semaphore::new(0)),
@@ -224,6 +234,7 @@ impl UploadQueue {
            queued_operations: VecDeque::new(),
            #[cfg(feature = "testing")]
            dangling_files: HashMap::new(),
+            recently_deleted: HashSet::new(),
            blocked_deletions: Vec::new(),
            shutting_down: false,
            shutdown_ready: Arc::new(tokio::sync::Semaphore::new(0)),
@@ -282,8 +293,8 @@ pub(crate) struct Delete {

 #[derive(Debug)]
 pub(crate) enum UploadOp {
-    /// Upload a layer file
-    UploadLayer(ResidentLayer, LayerFileMetadata),
+    /// Upload a layer file. The last field indicates the last operation for thie file.
+    UploadLayer(ResidentLayer, LayerFileMetadata, Option<OpType>),

    /// Upload a index_part.json file
    UploadMetadata {
@@ -305,11 +316,11 @@ pub(crate) enum UploadOp {
 impl std::fmt::Display for UploadOp {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        match self {
-            UploadOp::UploadLayer(layer, metadata) => {
+            UploadOp::UploadLayer(layer, metadata, mode) => {
                write!(
                    f,
-                    "UploadLayer({}, size={:?}, gen={:?})",
-                    layer, metadata.file_size, metadata.generation
+                    "UploadLayer({}, size={:?}, gen={:?}, mode={:?})",
+                    layer, metadata.file_size, metadata.generation, mode
                )
            }
            UploadOp::UploadMetadata { uploaded, .. } => {
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.

 [[package]]
 name = "aiohappyeyeballs"
@@ -114,7 +114,6 @@ files = [
 [package.dependencies]
 aiohappyeyeballs = ">=2.3.0"
 aiosignal = ">=1.1.2"
-async-timeout = {version = ">=4.0,<6.0", markers = "python_version < \"3.11\""}
 attrs = ">=17.3.0"
 frozenlist = ">=1.1.1"
 multidict = ">=4.5,<7.0"
@@ -219,10 +218,8 @@ files = [
 ]

 [package.dependencies]
-exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
 idna = ">=2.8"
 sniffio = ">=1.1"
-typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}

 [package.extras]
 doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
@@ -737,10 +734,7 @@ files = [
 [package.dependencies]
 jmespath = ">=0.7.1,<2.0.0"
 python-dateutil = ">=2.1,<3.0.0"
-urllib3 = [
-    {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""},
-    {version = ">=1.25.4,<2.1", markers = "python_version >= \"3.10\""},
-]
+urllib3 = {version = ">=1.25.4,<2.1", markers = "python_version >= \"3.10\""}

 [package.extras]
 crt = ["awscrt (==0.19.19)"]
@@ -1069,20 +1063,6 @@ docs = ["myst-parser (==0.18.0)", "sphinx (==5.1.1)"]
 ssh = ["paramiko (>=2.4.3)"]
 websockets = ["websocket-client (>=1.3.0)"]

-[[package]]
-name = "exceptiongroup"
-version = "1.1.1"
-description = "Backport of PEP 654 (exception groups)"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "exceptiongroup-1.1.1-py3-none-any.whl", hash = "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e"},
-    {file = "exceptiongroup-1.1.1.tar.gz", hash = "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"},
-]
-
-[package.extras]
-test = ["pytest (>=6)"]
-
 [[package]]
 name = "execnet"
 version = "1.9.0"
@@ -1110,7 +1090,6 @@ files = [

 [package.dependencies]
 click = ">=8.0"
-importlib-metadata = {version = ">=3.6.0", markers = "python_version < \"3.10\""}
 itsdangerous = ">=2.0"
 Jinja2 = ">=3.0"
 Werkzeug = ">=2.2.2"
@@ -1319,25 +1298,6 @@ files = [
    {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"},
 ]

-[[package]]
-name = "importlib-metadata"
-version = "4.12.0"
-description = "Read metadata from Python packages"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "importlib_metadata-4.12.0-py3-none-any.whl", hash = "sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23"},
-    {file = "importlib_metadata-4.12.0.tar.gz", hash = "sha256:637245b8bab2b6502fcbc752cc4b7a6f6243bb02b31c5c26156ad103d3d45670"},
-]
-
-[package.dependencies]
-zipp = ">=0.5"
-
-[package.extras]
-docs = ["jaraco.packaging (>=9)", "rst.linker (>=1.9)", "sphinx"]
-perf = ["ipython"]
-testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"]
-
 [[package]]
 name = "iniconfig"
 version = "1.1.1"
@@ -1898,48 +1858,54 @@ files = [

 [[package]]
 name = "mypy"
-version = "1.3.0"
+version = "1.13.0"
 description = "Optional static typing for Python"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "mypy-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c1eb485cea53f4f5284e5baf92902cd0088b24984f4209e25981cc359d64448d"},
-    {file = "mypy-1.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4c99c3ecf223cf2952638da9cd82793d8f3c0c5fa8b6ae2b2d9ed1e1ff51ba85"},
-    {file = "mypy-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:550a8b3a19bb6589679a7c3c31f64312e7ff482a816c96e0cecec9ad3a7564dd"},
-    {file = "mypy-1.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cbc07246253b9e3d7d74c9ff948cd0fd7a71afcc2b77c7f0a59c26e9395cb152"},
-    {file = "mypy-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:a22435632710a4fcf8acf86cbd0d69f68ac389a3892cb23fbad176d1cddaf228"},
-    {file = "mypy-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6e33bb8b2613614a33dff70565f4c803f889ebd2f859466e42b46e1df76018dd"},
-    {file = "mypy-1.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7d23370d2a6b7a71dc65d1266f9a34e4cde9e8e21511322415db4b26f46f6b8c"},
-    {file = "mypy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:658fe7b674769a0770d4b26cb4d6f005e88a442fe82446f020be8e5f5efb2fae"},
-    {file = "mypy-1.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6e42d29e324cdda61daaec2336c42512e59c7c375340bd202efa1fe0f7b8f8ca"},
-    {file = "mypy-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:d0b6c62206e04061e27009481cb0ec966f7d6172b5b936f3ead3d74f29fe3dcf"},
-    {file = "mypy-1.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:76ec771e2342f1b558c36d49900dfe81d140361dd0d2df6cd71b3db1be155409"},
-    {file = "mypy-1.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebc95f8386314272bbc817026f8ce8f4f0d2ef7ae44f947c4664efac9adec929"},
-    {file = "mypy-1.3.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:faff86aa10c1aa4a10e1a301de160f3d8fc8703b88c7e98de46b531ff1276a9a"},
-    {file = "mypy-1.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8c5979d0deb27e0f4479bee18ea0f83732a893e81b78e62e2dda3e7e518c92ee"},
-    {file = "mypy-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c5d2cc54175bab47011b09688b418db71403aefad07cbcd62d44010543fc143f"},
-    {file = "mypy-1.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:87df44954c31d86df96c8bd6e80dfcd773473e877ac6176a8e29898bfb3501cb"},
-    {file = "mypy-1.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:473117e310febe632ddf10e745a355714e771ffe534f06db40702775056614c4"},
-    {file = "mypy-1.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:74bc9b6e0e79808bf8678d7678b2ae3736ea72d56eede3820bd3849823e7f305"},
-    {file = "mypy-1.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:44797d031a41516fcf5cbfa652265bb994e53e51994c1bd649ffcd0c3a7eccbf"},
-    {file = "mypy-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ddae0f39ca146972ff6bb4399f3b2943884a774b8771ea0a8f50e971f5ea5ba8"},
-    {file = "mypy-1.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1c4c42c60a8103ead4c1c060ac3cdd3ff01e18fddce6f1016e08939647a0e703"},
-    {file = "mypy-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e86c2c6852f62f8f2b24cb7a613ebe8e0c7dc1402c61d36a609174f63e0ff017"},
-    {file = "mypy-1.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f9dca1e257d4cc129517779226753dbefb4f2266c4eaad610fc15c6a7e14283e"},
-    {file = "mypy-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:95d8d31a7713510685b05fbb18d6ac287a56c8f6554d88c19e73f724a445448a"},
-    {file = "mypy-1.3.0-py3-none-any.whl", hash = "sha256:a8763e72d5d9574d45ce5881962bc8e9046bf7b375b0abf031f3e6811732a897"},
-    {file = "mypy-1.3.0.tar.gz", hash = "sha256:e1f4d16e296f5135624b34e8fb741eb0eadedca90862405b1f1fde2040b9bd11"},
+    {file = "mypy-1.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6607e0f1dd1fb7f0aca14d936d13fd19eba5e17e1cd2a14f808fa5f8f6d8f60a"},
+    {file = "mypy-1.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8a21be69bd26fa81b1f80a61ee7ab05b076c674d9b18fb56239d72e21d9f4c80"},
+    {file = "mypy-1.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b2353a44d2179846a096e25691d54d59904559f4232519d420d64da6828a3a7"},
+    {file = "mypy-1.13.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0730d1c6a2739d4511dc4253f8274cdd140c55c32dfb0a4cf8b7a43f40abfa6f"},
+    {file = "mypy-1.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:c5fc54dbb712ff5e5a0fca797e6e0aa25726c7e72c6a5850cfd2adbc1eb0a372"},
+    {file = "mypy-1.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:581665e6f3a8a9078f28d5502f4c334c0c8d802ef55ea0e7276a6e409bc0d82d"},
+    {file = "mypy-1.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3ddb5b9bf82e05cc9a627e84707b528e5c7caaa1c55c69e175abb15a761cec2d"},
+    {file = "mypy-1.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:20c7ee0bc0d5a9595c46f38beb04201f2620065a93755704e141fcac9f59db2b"},
+    {file = "mypy-1.13.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3790ded76f0b34bc9c8ba4def8f919dd6a46db0f5a6610fb994fe8efdd447f73"},
+    {file = "mypy-1.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:51f869f4b6b538229c1d1bcc1dd7d119817206e2bc54e8e374b3dfa202defcca"},
+    {file = "mypy-1.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5c7051a3461ae84dfb5dd15eff5094640c61c5f22257c8b766794e6dd85e72d5"},
+    {file = "mypy-1.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:39bb21c69a5d6342f4ce526e4584bc5c197fd20a60d14a8624d8743fffb9472e"},
+    {file = "mypy-1.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:164f28cb9d6367439031f4c81e84d3ccaa1e19232d9d05d37cb0bd880d3f93c2"},
+    {file = "mypy-1.13.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a4c1bfcdbce96ff5d96fc9b08e3831acb30dc44ab02671eca5953eadad07d6d0"},
+    {file = "mypy-1.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:a0affb3a79a256b4183ba09811e3577c5163ed06685e4d4b46429a271ba174d2"},
+    {file = "mypy-1.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a7b44178c9760ce1a43f544e595d35ed61ac2c3de306599fa59b38a6048e1aa7"},
+    {file = "mypy-1.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5d5092efb8516d08440e36626f0153b5006d4088c1d663d88bf79625af3d1d62"},
+    {file = "mypy-1.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de2904956dac40ced10931ac967ae63c5089bd498542194b436eb097a9f77bc8"},
+    {file = "mypy-1.13.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:7bfd8836970d33c2105562650656b6846149374dc8ed77d98424b40b09340ba7"},
+    {file = "mypy-1.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:9f73dba9ec77acb86457a8fc04b5239822df0c14a082564737833d2963677dbc"},
+    {file = "mypy-1.13.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:100fac22ce82925f676a734af0db922ecfea991e1d7ec0ceb1e115ebe501301a"},
+    {file = "mypy-1.13.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7bcb0bb7f42a978bb323a7c88f1081d1b5dee77ca86f4100735a6f541299d8fb"},
+    {file = "mypy-1.13.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bde31fc887c213e223bbfc34328070996061b0833b0a4cfec53745ed61f3519b"},
+    {file = "mypy-1.13.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:07de989f89786f62b937851295ed62e51774722e5444a27cecca993fc3f9cd74"},
+    {file = "mypy-1.13.0-cp38-cp38-win_amd64.whl", hash = "sha256:4bde84334fbe19bad704b3f5b78c4abd35ff1026f8ba72b29de70dda0916beb6"},
+    {file = "mypy-1.13.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0246bcb1b5de7f08f2826451abd947bf656945209b140d16ed317f65a17dc7dc"},
+    {file = "mypy-1.13.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7f5b7deae912cf8b77e990b9280f170381fdfbddf61b4ef80927edd813163732"},
+    {file = "mypy-1.13.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7029881ec6ffb8bc233a4fa364736789582c738217b133f1b55967115288a2bc"},
+    {file = "mypy-1.13.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3e38b980e5681f28f033f3be86b099a247b13c491f14bb8b1e1e134d23bb599d"},
+    {file = "mypy-1.13.0-cp39-cp39-win_amd64.whl", hash = "sha256:a6789be98a2017c912ae6ccb77ea553bbaf13d27605d2ca20a76dfbced631b24"},
+    {file = "mypy-1.13.0-py3-none-any.whl", hash = "sha256:9c250883f9fd81d212e0952c92dbfcc96fc237f4b7c92f56ac81fd48460b3e5a"},
+    {file = "mypy-1.13.0.tar.gz", hash = "sha256:0291a61b6fbf3e6673e3405cfcc0e7650bebc7939659fdca2702958038bd835e"},
 ]

 [package.dependencies]
 mypy-extensions = ">=1.0.0"
-tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
-typing-extensions = ">=3.10"
+typing-extensions = ">=4.6.0"

 [package.extras]
 dmypy = ["psutil (>=4.0)"]
+faster-cache = ["orjson"]
 install-types = ["pip"]
-python2 = ["typed-ast (>=1.4.0,<2)"]
+mypyc = ["setuptools (>=50)"]
 reports = ["lxml"]

 [[package]]
@@ -2514,11 +2480,9 @@ files = [

 [package.dependencies]
 colorama = {version = "*", markers = "sys_platform == \"win32\""}
-exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
 iniconfig = "*"
 packaging = "*"
 pluggy = ">=0.12,<2.0"
-tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}

 [package.extras]
 testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
@@ -2581,10 +2545,7 @@ files = [
 ]

 [package.dependencies]
-pytest = [
-    {version = ">=5.0", markers = "python_version < \"3.10\""},
-    {version = ">=6.2.4", markers = "python_version >= \"3.10\""},
-]
+pytest = {version = ">=6.2.4", markers = "python_version >= \"3.10\""}

 [[package]]
 name = "pytest-repeat"
@@ -3092,17 +3053,6 @@ files = [
    {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
 ]

-[[package]]
-name = "tomli"
-version = "2.0.1"
-description = "A lil' TOML parser"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
-    {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
-]
-
 [[package]]
 name = "types-jwcrypto"
 version = "1.5.0.20240925"
@@ -3359,16 +3309,6 @@ files = [
    {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"},
    {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"},
    {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"},
-    {file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"},
-    {file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"},
-    {file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"},
-    {file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"},
    {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"},
    {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"},
    {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"},
@@ -3523,21 +3463,6 @@ idna = ">=2.0"
 multidict = ">=4.0"
 propcache = ">=0.2.0"

-[[package]]
-name = "zipp"
-version = "3.19.1"
-description = "Backport of pathlib-compatible object wrapper for zip files"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "zipp-3.19.1-py3-none-any.whl", hash = "sha256:2828e64edb5386ea6a52e7ba7cdb17bb30a73a858f5eb6eb93d8d36f5ea26091"},
-    {file = "zipp-3.19.1.tar.gz", hash = "sha256:35427f6d5594f4acf82d25541438348c26736fa9b3afa2754bcd63cdb99d8e8f"},
-]
-
-[package.extras]
-doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
-test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
-
 [[package]]
 name = "zstandard"
 version = "0.21.0"
@@ -3598,5 +3523,5 @@ cffi = ["cffi (>=1.11)"]

 [metadata]
 lock-version = "2.0"
-python-versions = "^3.9"
-content-hash = "8cb9c38d83eec441391c0528ac2fbefde18c734373b2399e07c69382044e8ced"
+python-versions = "^3.11"
+content-hash = "21debe1116843e5d14bdf37d6e265c68c63a98a64ba04ec8b8a02af2e8d9f486"
--- a/proxy/src/auth/backend/console_redirect.rs
+++ b/proxy/src/auth/backend/console_redirect.rs
@@ -6,6 +6,7 @@ use tokio_postgres::config::SslMode;
 use tracing::{info, info_span};

 use super::ComputeCredentialKeys;
+use crate::auth::IpPattern;
 use crate::cache::Cached;
 use crate::config::AuthenticationConfig;
 use crate::context::RequestContext;
@@ -74,10 +75,10 @@ impl ConsoleRedirectBackend {
        ctx: &RequestContext,
        auth_config: &'static AuthenticationConfig,
        client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
-    ) -> auth::Result<ConsoleRedirectNodeInfo> {
+    ) -> auth::Result<(ConsoleRedirectNodeInfo, Option<Vec<IpPattern>>)> {
        authenticate(ctx, auth_config, &self.console_uri, client)
            .await
-            .map(ConsoleRedirectNodeInfo)
+            .map(|(node_info, ip_allowlist)| (ConsoleRedirectNodeInfo(node_info), ip_allowlist))
    }
 }

@@ -102,7 +103,7 @@ async fn authenticate(
    auth_config: &'static AuthenticationConfig,
    link_uri: &reqwest::Url,
    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
-) -> auth::Result<NodeInfo> {
+) -> auth::Result<(NodeInfo, Option<Vec<IpPattern>>)> {
    ctx.set_auth_method(crate::context::AuthMethod::ConsoleRedirect);

    // registering waiter can fail if we get unlucky with rng.
@@ -176,9 +177,12 @@ async fn authenticate(
        config.password(password.as_ref());
    }

-    Ok(NodeInfo {
-        config,
-        aux: db_info.aux,
-        allow_self_signed_compute: false, // caller may override
-    })
+    Ok((
+        NodeInfo {
+            config,
+            aux: db_info.aux,
+            allow_self_signed_compute: false, // caller may override
+        },
+        db_info.allowed_ips,
+    ))
 }
--- a/proxy/src/auth/backend/jwt.rs
+++ b/proxy/src/auth/backend/jwt.rs
@@ -132,6 +132,93 @@ struct JwkSet<'a> {
    keys: Vec<&'a RawValue>,
 }

+/// Given a jwks_url, fetch the JWKS and parse out all the signing JWKs.
+/// Returns `None` and log a warning if there are any errors.
+async fn fetch_jwks(
+    client: &reqwest_middleware::ClientWithMiddleware,
+    jwks_url: url::Url,
+) -> Option<jose_jwk::JwkSet> {
+    let req = client.get(jwks_url.clone());
+    // TODO(conrad): We need to filter out URLs that point to local resources. Public internet only.
+    let resp = req.send().await.and_then(|r| {
+        r.error_for_status()
+            .map_err(reqwest_middleware::Error::Reqwest)
+    });
+
+    let resp = match resp {
+        Ok(r) => r,
+        // TODO: should we re-insert JWKs if we want to keep this JWKs URL?
+        // I expect these failures would be quite sparse.
+        Err(e) => {
+            tracing::warn!(url=?jwks_url, error=?e, "could not fetch JWKs");
+            return None;
+        }
+    };
+
+    let resp: http::Response<reqwest::Body> = resp.into();
+
+    let bytes = match read_body_with_limit(resp.into_body(), MAX_JWK_BODY_SIZE).await {
+        Ok(bytes) => bytes,
+        Err(e) => {
+            tracing::warn!(url=?jwks_url, error=?e, "could not decode JWKs");
+            return None;
+        }
+    };
+
+    let jwks = match serde_json::from_slice::<JwkSet>(&bytes) {
+        Ok(jwks) => jwks,
+        Err(e) => {
+            tracing::warn!(url=?jwks_url, error=?e, "could not decode JWKs");
+            return None;
+        }
+    };
+
+    // `jose_jwk::Jwk` is quite large (288 bytes). Let's not pre-allocate for what we don't need.
+    //
+    // Even though we limit our responses to 64KiB, we could still receive a payload like
+    // `{"keys":[` + repeat(`0`).take(30000).join(`,`) + `]}`. Parsing this as `RawValue` uses 468KiB.
+    // Pre-allocating the corresponding `Vec::<jose_jwk::Jwk>::with_capacity(30000)` uses 8.2MiB.
+    let mut keys = vec![];
+
+    let mut failed = 0;
+    for key in jwks.keys {
+        let key = match serde_json::from_str::<jose_jwk::Jwk>(key.get()) {
+            Ok(key) => key,
+            Err(e) => {
+                tracing::debug!(url=?jwks_url, failed=?e, "could not decode JWK");
+                failed += 1;
+                continue;
+            }
+        };
+
+        // if `use` (called `cls` in rust) is specified to be something other than signing,
+        // we can skip storing it.
+        if key
+            .prm
+            .cls
+            .as_ref()
+            .is_some_and(|c| *c != jose_jwk::Class::Signing)
+        {
+            continue;
+        }
+
+        keys.push(key);
+    }
+
+    keys.shrink_to_fit();
+
+    if failed > 0 {
+        tracing::warn!(url=?jwks_url, failed, "could not decode JWKs");
+    }
+
+    if keys.is_empty() {
+        tracing::warn!(url=?jwks_url, "no valid JWKs found inside the response body");
+        return None;
+    }
+
+    Some(jose_jwk::JwkSet { keys })
+}
+
 impl JwkCacheEntryLock {
    async fn acquire_permit<'a>(self: &'a Arc<Self>) -> JwkRenewalPermit<'a> {
        JwkRenewalPermit::acquire_permit(self).await
@@ -166,87 +253,15 @@ impl JwkCacheEntryLock {
        // TODO(conrad): run concurrently
        // TODO(conrad): strip the JWKs urls (should be checked by cplane as well - cloud#16284)
        for rule in rules {
-            let req = client.get(rule.jwks_url.clone());
-            // TODO(conrad): eventually switch to using reqwest_middleware/`new_client_with_timeout`.
-            // TODO(conrad): We need to filter out URLs that point to local resources. Public internet only.
-            match req.send().await.and_then(|r| {
-                r.error_for_status()
-                    .map_err(reqwest_middleware::Error::Reqwest)
-            }) {
-                // todo: should we re-insert JWKs if we want to keep this JWKs URL?
-                // I expect these failures would be quite sparse.
-                Err(e) => tracing::warn!(url=?rule.jwks_url, error=?e, "could not fetch JWKs"),
-                Ok(r) => {
-                    let resp: http::Response<reqwest::Body> = r.into();
-
-                    let bytes = match read_body_with_limit(resp.into_body(), MAX_JWK_BODY_SIZE)
-                        .await
-                    {
-                        Ok(bytes) => bytes,
-                        Err(e) => {
-                            tracing::warn!(url=?rule.jwks_url, error=?e, "could not decode JWKs");
-                            continue;
-                        }
-                    };
-
-                    match serde_json::from_slice::<JwkSet>(&bytes) {
-                        Err(e) => {
-                            tracing::warn!(url=?rule.jwks_url, error=?e, "could not decode JWKs");
-                        }
-                        Ok(jwks) => {
-                            // size_of::<&RawValue>() == 16
-                            // size_of::<jose_jwk::Jwk>() == 288
-                            // better to not pre-allocate this as it might be pretty large - especially if it has many
-                            // keys we don't want or need.
-                            // trivial 'attack': `{"keys":[` + repeat(`0`).take(30000).join(`,`) + `]}`
-                            // this would consume 8MiB just like that!
-                            let mut keys = vec![];
-                            let mut failed = 0;
-                            for key in jwks.keys {
-                                match serde_json::from_str::<jose_jwk::Jwk>(key.get()) {
-                                    Ok(key) => {
-                                        // if `use` (called `cls` in rust) is specified to be something other than signing,
-                                        // we can skip storing it.
-                                        if key
-                                            .prm
-                                            .cls
-                                            .as_ref()
-                                            .is_some_and(|c| *c != jose_jwk::Class::Signing)
-                                        {
-                                            continue;
-                                        }
-
-                                        keys.push(key);
-                                    }
-                                    Err(e) => {
-                                        tracing::debug!(url=?rule.jwks_url, failed=?e, "could not decode JWK");
-                                        failed += 1;
-                                    }
-                                }
-                            }
-                            keys.shrink_to_fit();
-
-                            if failed > 0 {
-                                tracing::warn!(url=?rule.jwks_url, failed, "could not decode JWKs");
-                            }
-
-                            if keys.is_empty() {
-                                tracing::warn!(url=?rule.jwks_url, "no valid JWKs found inside the response body");
-                                continue;
-                            }
-
-                            let jwks = jose_jwk::JwkSet { keys };
-                            key_sets.insert(
-                                rule.id,
-                                KeySet {
-                                    jwks,
-                                    audience: rule.audience,
-                                    role_names: rule.role_names,
-                                },
-                            );
-                        }
-                    };
-                }
+            if let Some(jwks) = fetch_jwks(client, rule.jwks_url).await {
+                key_sets.insert(
+                    rule.id,
+                    KeySet {
+                        jwks,
+                        audience: rule.audience,
+                        role_names: rule.role_names,
+                    },
+                );
            }
        }

--- a/proxy/src/auth/backend/mod.rs
+++ b/proxy/src/auth/backend/mod.rs
@@ -6,7 +6,6 @@ pub mod local;

 use std::net::IpAddr;
 use std::sync::Arc;
-use std::time::Duration;

 pub use console_redirect::ConsoleRedirectBackend;
 pub(crate) use console_redirect::ConsoleRedirectError;
@@ -30,7 +29,7 @@ use crate::intern::EndpointIdInt;
 use crate::metrics::Metrics;
 use crate::proxy::connect_compute::ComputeConnectBackend;
 use crate::proxy::NeonOptions;
-use crate::rate_limiter::{BucketRateLimiter, EndpointRateLimiter, RateBucketInfo};
+use crate::rate_limiter::{BucketRateLimiter, EndpointRateLimiter};
 use crate::stream::Stream;
 use crate::types::{EndpointCacheKey, EndpointId, RoleName};
 use crate::{scram, stream};
@@ -192,21 +191,6 @@ impl MaskedIp {
 // This can't be just per IP because that would limit some PaaS that share IP addresses
 pub type AuthRateLimiter = BucketRateLimiter<(EndpointIdInt, MaskedIp)>;

-impl RateBucketInfo {
-    /// All of these are per endpoint-maskedip pair.
-    /// Context: 4096 rounds of pbkdf2 take about 1ms of cpu time to execute (1 milli-cpu-second or 1mcpus).
-    ///
-    /// First bucket: 1000mcpus total per endpoint-ip pair
-    /// * 4096000 requests per second with 1 hash rounds.
-    /// * 1000 requests per second with 4096 hash rounds.
-    /// * 6.8 requests per second with 600000 hash rounds.
-    pub const DEFAULT_AUTH_SET: [Self; 3] = [
-        Self::new(1000 * 4096, Duration::from_secs(1)),
-        Self::new(600 * 4096, Duration::from_secs(60)),
-        Self::new(300 * 4096, Duration::from_secs(600)),
-    ];
-}
-
 impl AuthenticationConfig {
    pub(crate) fn check_rate_limit(
        &self,
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -428,8 +428,9 @@ async fn main() -> anyhow::Result<()> {
        )?))),
        None => None,
    };
+
    let cancellation_handler = Arc::new(CancellationHandler::<
-        Option<Arc<tokio::sync::Mutex<RedisPublisherClient>>>,
+        Option<Arc<Mutex<RedisPublisherClient>>>,
    >::new(
        cancel_map.clone(),
        redis_publisher,
--- a/proxy/src/cancellation.rs
+++ b/proxy/src/cancellation.rs
@@ -10,16 +10,23 @@ use tokio_postgres::{CancelToken, NoTls};
 use tracing::{debug, info};
 use uuid::Uuid;

+use crate::auth::{check_peer_addr_is_in_list, IpPattern};
 use crate::error::ReportableError;
 use crate::metrics::{CancellationRequest, CancellationSource, Metrics};
+use crate::rate_limiter::LeakyBucketRateLimiter;
 use crate::redis::cancellation_publisher::{
    CancellationPublisher, CancellationPublisherMut, RedisPublisherClient,
 };
+use std::net::IpAddr;
+
+use ipnet::{IpNet, Ipv4Net, Ipv6Net};

 pub type CancelMap = Arc<DashMap<CancelKeyData, Option<CancelClosure>>>;
 pub type CancellationHandlerMain = CancellationHandler<Option<Arc<Mutex<RedisPublisherClient>>>>;
 pub(crate) type CancellationHandlerMainInternal = Option<Arc<Mutex<RedisPublisherClient>>>;

+type IpSubnetKey = IpNet;
+
 /// Enables serving `CancelRequest`s.
 ///
 /// If `CancellationPublisher` is available, cancel request will be used to publish the cancellation key to other proxy instances.
@@ -29,14 +36,23 @@ pub struct CancellationHandler<P> {
    /// This field used for the monitoring purposes.
    /// Represents the source of the cancellation request.
    from: CancellationSource,
+    // rate limiter of cancellation requests
+    limiter: Arc<std::sync::Mutex<LeakyBucketRateLimiter<IpSubnetKey>>>,
 }

 #[derive(Debug, Error)]
 pub(crate) enum CancelError {
    #[error("{0}")]
    IO(#[from] std::io::Error),
+
    #[error("{0}")]
    Postgres(#[from] tokio_postgres::Error),
+
+    #[error("rate limit exceeded")]
+    RateLimit,
+
+    #[error("IP is not allowed")]
+    IpNotAllowed,
 }

 impl ReportableError for CancelError {
@@ -47,6 +63,8 @@ impl ReportableError for CancelError {
                crate::error::ErrorKind::Postgres
            }
            CancelError::Postgres(_) => crate::error::ErrorKind::Compute,
+            CancelError::RateLimit => crate::error::ErrorKind::RateLimit,
+            CancelError::IpNotAllowed => crate::error::ErrorKind::User,
        }
    }
 }
@@ -79,13 +97,36 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
            cancellation_handler: self,
        }
    }
+
    /// Try to cancel a running query for the corresponding connection.
    /// If the cancellation key is not found, it will be published to Redis.
+    /// check_allowed - if true, check if the IP is allowed to cancel the query
    pub(crate) async fn cancel_session(
        &self,
        key: CancelKeyData,
        session_id: Uuid,
+        peer_addr: &IpAddr,
+        check_allowed: bool,
    ) -> Result<(), CancelError> {
+        // TODO: check for unspecified address is only for backward compatibility, should be removed
+        if !peer_addr.is_unspecified() {
+            let subnet_key = match *peer_addr {
+                IpAddr::V4(ip) => IpNet::V4(Ipv4Net::new_assert(ip, 24).trunc()), // use defaut mask here
+                IpAddr::V6(ip) => IpNet::V6(Ipv6Net::new_assert(ip, 64).trunc()),
+            };
+            if !self.limiter.lock().unwrap().check(subnet_key, 1) {
+                tracing::debug!("Rate limit exceeded. Skipping cancellation message");
+                Metrics::get()
+                    .proxy
+                    .cancellation_requests_total
+                    .inc(CancellationRequest {
+                        source: self.from,
+                        kind: crate::metrics::CancellationOutcome::RateLimitExceeded,
+                    });
+                return Err(CancelError::RateLimit);
+            }
+        }
+
        // NB: we should immediately release the lock after cloning the token.
        let Some(cancel_closure) = self.map.get(&key).and_then(|x| x.clone()) else {
            tracing::warn!("query cancellation key not found: {key}");
@@ -96,7 +137,13 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
                    source: self.from,
                    kind: crate::metrics::CancellationOutcome::NotFound,
                });
-            match self.client.try_publish(key, session_id).await {
+
+            if session_id == Uuid::nil() {
+                // was already published, do not publish it again
+                return Ok(());
+            }
+
+            match self.client.try_publish(key, session_id, *peer_addr).await {
                Ok(()) => {} // do nothing
                Err(e) => {
                    return Err(CancelError::IO(std::io::Error::new(
@@ -107,6 +154,13 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
            }
            return Ok(());
        };
+
+        if check_allowed
+            && !check_peer_addr_is_in_list(peer_addr, cancel_closure.ip_allowlist.as_slice())
+        {
+            return Err(CancelError::IpNotAllowed);
+        }
+
        Metrics::get()
            .proxy
            .cancellation_requests_total
@@ -135,13 +189,29 @@ impl CancellationHandler<()> {
            map,
            client: (),
            from,
+            limiter: Arc::new(std::sync::Mutex::new(
+                LeakyBucketRateLimiter::<IpSubnetKey>::new_with_shards(
+                    LeakyBucketRateLimiter::<IpSubnetKey>::DEFAULT,
+                    64,
+                ),
+            )),
        }
    }
 }

 impl<P: CancellationPublisherMut> CancellationHandler<Option<Arc<Mutex<P>>>> {
    pub fn new(map: CancelMap, client: Option<Arc<Mutex<P>>>, from: CancellationSource) -> Self {
-        Self { map, client, from }
+        Self {
+            map,
+            client,
+            from,
+            limiter: Arc::new(std::sync::Mutex::new(
+                LeakyBucketRateLimiter::<IpSubnetKey>::new_with_shards(
+                    LeakyBucketRateLimiter::<IpSubnetKey>::DEFAULT,
+                    64,
+                ),
+            )),
+        }
    }
 }

@@ -152,13 +222,19 @@ impl<P: CancellationPublisherMut> CancellationHandler<Option<Arc<Mutex<P>>>> {
 pub struct CancelClosure {
    socket_addr: SocketAddr,
    cancel_token: CancelToken,
+    ip_allowlist: Vec<IpPattern>,
 }

 impl CancelClosure {
-    pub(crate) fn new(socket_addr: SocketAddr, cancel_token: CancelToken) -> Self {
+    pub(crate) fn new(
+        socket_addr: SocketAddr,
+        cancel_token: CancelToken,
+        ip_allowlist: Vec<IpPattern>,
+    ) -> Self {
        Self {
            socket_addr,
            cancel_token,
+            ip_allowlist,
        }
    }
    /// Cancels the query running on user's compute node.
@@ -168,6 +244,9 @@ impl CancelClosure {
        debug!("query was cancelled");
        Ok(())
    }
+    pub(crate) fn set_ip_allowlist(&mut self, ip_allowlist: Vec<IpPattern>) {
+        self.ip_allowlist = ip_allowlist;
+    }
 }

 /// Helper for registering query cancellation tokens.
@@ -229,6 +308,8 @@ mod tests {
                    cancel_key: 0,
                },
                Uuid::new_v4(),
+                &("127.0.0.1".parse().unwrap()),
+                true,
            )
            .await
            .unwrap();
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -342,7 +342,7 @@ impl ConnCfg {

        // NB: CancelToken is supposed to hold socket_addr, but we use connect_raw.
        // Yet another reason to rework the connection establishing code.
-        let cancel_closure = CancelClosure::new(socket_addr, client.cancel_token());
+        let cancel_closure = CancelClosure::new(socket_addr, client.cancel_token(), vec![]);

        let connection = PostgresConnection {
            stream,
--- a/proxy/src/console_redirect_proxy.rs
+++ b/proxy/src/console_redirect_proxy.rs
@@ -156,16 +156,21 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
    let request_gauge = metrics.connection_requests.guard(proto);

    let tls = config.tls_config.as_ref();
-
    let record_handshake_error = !ctx.has_private_peer_addr();
    let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
    let do_handshake = handshake(ctx, stream, tls, record_handshake_error);
+
    let (mut stream, params) =
        match tokio::time::timeout(config.handshake_timeout, do_handshake).await?? {
            HandshakeData::Startup(stream, params) => (stream, params),
            HandshakeData::Cancel(cancel_key_data) => {
                return Ok(cancellation_handler
-                    .cancel_session(cancel_key_data, ctx.session_id())
+                    .cancel_session(
+                        cancel_key_data,
+                        ctx.session_id(),
+                        &ctx.peer_addr(),
+                        config.authentication_config.ip_allowlist_check_enabled,
+                    )
                    .await
                    .map(|()| None)?)
            }
@@ -174,7 +179,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(

    ctx.set_db_options(params.clone());

-    let user_info = match backend
+    let (user_info, ip_allowlist) = match backend
        .authenticate(ctx, &config.authentication_config, &mut stream)
        .await
    {
@@ -198,6 +203,8 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
    .or_else(|e| stream.throw_error(e))
    .await?;

+    node.cancel_closure
+        .set_ip_allowlist(ip_allowlist.unwrap_or_default());
    let session = cancellation_handler.get_session();
    prepare_client_connection(&node, &session, &mut stream).await?;

--- a/proxy/src/metrics.rs
+++ b/proxy/src/metrics.rs
@@ -351,6 +351,7 @@ pub enum CancellationSource {
 pub enum CancellationOutcome {
    NotFound,
    Found,
+    RateLimitExceeded,
 }

 #[derive(LabelGroup)]
--- a/proxy/src/proxy/mod.rs
+++ b/proxy/src/proxy/mod.rs
@@ -268,12 +268,18 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
    let record_handshake_error = !ctx.has_private_peer_addr();
    let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
    let do_handshake = handshake(ctx, stream, mode.handshake_tls(tls), record_handshake_error);
+
    let (mut stream, params) =
        match tokio::time::timeout(config.handshake_timeout, do_handshake).await?? {
            HandshakeData::Startup(stream, params) => (stream, params),
            HandshakeData::Cancel(cancel_key_data) => {
                return Ok(cancellation_handler
-                    .cancel_session(cancel_key_data, ctx.session_id())
+                    .cancel_session(
+                        cancel_key_data,
+                        ctx.session_id(),
+                        &ctx.peer_addr(),
+                        config.authentication_config.ip_allowlist_check_enabled,
+                    )
                    .await
                    .map(|()| None)?)
            }
--- a/proxy/src/rate_limiter/limiter.rs
+++ b/proxy/src/rate_limiter/limiter.rs
@@ -14,13 +14,13 @@ use tracing::info;

 use crate::intern::EndpointIdInt;

-pub(crate) struct GlobalRateLimiter {
+pub struct GlobalRateLimiter {
    data: Vec<RateBucket>,
    info: Vec<RateBucketInfo>,
 }

 impl GlobalRateLimiter {
-    pub(crate) fn new(info: Vec<RateBucketInfo>) -> Self {
+    pub fn new(info: Vec<RateBucketInfo>) -> Self {
        Self {
            data: vec![
                RateBucket {
@@ -34,7 +34,7 @@ impl GlobalRateLimiter {
    }

    /// Check that number of connections is below `max_rps` rps.
-    pub(crate) fn check(&mut self) -> bool {
+    pub fn check(&mut self) -> bool {
        let now = Instant::now();

        let should_allow_request = self
@@ -137,6 +137,19 @@ impl RateBucketInfo {
        Self::new(200, Duration::from_secs(600)),
    ];

+    /// All of these are per endpoint-maskedip pair.
+    /// Context: 4096 rounds of pbkdf2 take about 1ms of cpu time to execute (1 milli-cpu-second or 1mcpus).
+    ///
+    /// First bucket: 1000mcpus total per endpoint-ip pair
+    /// * 4096000 requests per second with 1 hash rounds.
+    /// * 1000 requests per second with 4096 hash rounds.
+    /// * 6.8 requests per second with 600000 hash rounds.
+    pub const DEFAULT_AUTH_SET: [Self; 3] = [
+        Self::new(1000 * 4096, Duration::from_secs(1)),
+        Self::new(600 * 4096, Duration::from_secs(60)),
+        Self::new(300 * 4096, Duration::from_secs(600)),
+    ];
+
    pub fn rps(&self) -> f64 {
        (self.max_rpi as f64) / self.interval.as_secs_f64()
    }
--- a/proxy/src/rate_limiter/mod.rs
+++ b/proxy/src/rate_limiter/mod.rs
@@ -8,5 +8,4 @@ pub(crate) use limit_algorithm::aimd::Aimd;
 pub(crate) use limit_algorithm::{
    DynamicLimiter, Outcome, RateLimitAlgorithm, RateLimiterConfig, Token,
 };
-pub(crate) use limiter::GlobalRateLimiter;
-pub use limiter::{BucketRateLimiter, RateBucketInfo, WakeComputeRateLimiter};
+pub use limiter::{BucketRateLimiter, GlobalRateLimiter, RateBucketInfo, WakeComputeRateLimiter};
--- a/proxy/src/redis/cancellation_publisher.rs
+++ b/proxy/src/redis/cancellation_publisher.rs
@@ -1,5 +1,6 @@
 use std::sync::Arc;

+use core::net::IpAddr;
 use pq_proto::CancelKeyData;
 use redis::AsyncCommands;
 use tokio::sync::Mutex;
@@ -15,6 +16,7 @@ pub trait CancellationPublisherMut: Send + Sync + 'static {
        &mut self,
        cancel_key_data: CancelKeyData,
        session_id: Uuid,
+        peer_addr: IpAddr,
    ) -> anyhow::Result<()>;
 }

@@ -24,6 +26,7 @@ pub trait CancellationPublisher: Send + Sync + 'static {
        &self,
        cancel_key_data: CancelKeyData,
        session_id: Uuid,
+        peer_addr: IpAddr,
    ) -> anyhow::Result<()>;
 }

@@ -32,6 +35,7 @@ impl CancellationPublisher for () {
        &self,
        _cancel_key_data: CancelKeyData,
        _session_id: Uuid,
+        _peer_addr: IpAddr,
    ) -> anyhow::Result<()> {
        Ok(())
    }
@@ -42,8 +46,10 @@ impl<P: CancellationPublisher> CancellationPublisherMut for P {
        &mut self,
        cancel_key_data: CancelKeyData,
        session_id: Uuid,
+        peer_addr: IpAddr,
    ) -> anyhow::Result<()> {
-        <P as CancellationPublisher>::try_publish(self, cancel_key_data, session_id).await
+        <P as CancellationPublisher>::try_publish(self, cancel_key_data, session_id, peer_addr)
+            .await
    }
 }

@@ -52,9 +58,10 @@ impl<P: CancellationPublisher> CancellationPublisher for Option<P> {
        &self,
        cancel_key_data: CancelKeyData,
        session_id: Uuid,
+        peer_addr: IpAddr,
    ) -> anyhow::Result<()> {
        if let Some(p) = self {
-            p.try_publish(cancel_key_data, session_id).await
+            p.try_publish(cancel_key_data, session_id, peer_addr).await
        } else {
            Ok(())
        }
@@ -66,10 +73,11 @@ impl<P: CancellationPublisherMut> CancellationPublisher for Arc<Mutex<P>> {
        &self,
        cancel_key_data: CancelKeyData,
        session_id: Uuid,
+        peer_addr: IpAddr,
    ) -> anyhow::Result<()> {
        self.lock()
            .await
-            .try_publish(cancel_key_data, session_id)
+            .try_publish(cancel_key_data, session_id, peer_addr)
            .await
    }
 }
@@ -97,11 +105,13 @@ impl RedisPublisherClient {
        &mut self,
        cancel_key_data: CancelKeyData,
        session_id: Uuid,
+        peer_addr: IpAddr,
    ) -> anyhow::Result<()> {
        let payload = serde_json::to_string(&Notification::Cancel(CancelSession {
            region_id: Some(self.region_id.clone()),
            cancel_key_data,
            session_id,
+            peer_addr: Some(peer_addr),
        }))?;
        let _: () = self.client.publish(PROXY_CHANNEL_NAME, payload).await?;
        Ok(())
@@ -120,13 +130,14 @@ impl RedisPublisherClient {
        &mut self,
        cancel_key_data: CancelKeyData,
        session_id: Uuid,
+        peer_addr: IpAddr,
    ) -> anyhow::Result<()> {
        // TODO: review redundant error duplication logs.
        if !self.limiter.check() {
            tracing::info!("Rate limit exceeded. Skipping cancellation message");
            return Err(anyhow::anyhow!("Rate limit exceeded"));
        }
-        match self.publish(cancel_key_data, session_id).await {
+        match self.publish(cancel_key_data, session_id, peer_addr).await {
            Ok(()) => return Ok(()),
            Err(e) => {
                tracing::error!("failed to publish a message: {e}");
@@ -134,7 +145,7 @@ impl RedisPublisherClient {
        }
        tracing::info!("Publisher is disconnected. Reconnectiong...");
        self.try_connect().await?;
-        self.publish(cancel_key_data, session_id).await
+        self.publish(cancel_key_data, session_id, peer_addr).await
    }
 }

@@ -143,9 +154,13 @@ impl CancellationPublisherMut for RedisPublisherClient {
        &mut self,
        cancel_key_data: CancelKeyData,
        session_id: Uuid,
+        peer_addr: IpAddr,
    ) -> anyhow::Result<()> {
        tracing::info!("publishing cancellation key to Redis");
-        match self.try_publish_internal(cancel_key_data, session_id).await {
+        match self
+            .try_publish_internal(cancel_key_data, session_id, peer_addr)
+            .await
+        {
            Ok(()) => {
                tracing::debug!("cancellation key successfuly published to Redis");
                Ok(())
--- a/proxy/src/redis/notifications.rs
+++ b/proxy/src/redis/notifications.rs
@@ -60,6 +60,7 @@ pub(crate) struct CancelSession {
    pub(crate) region_id: Option<String>,
    pub(crate) cancel_key_data: CancelKeyData,
    pub(crate) session_id: Uuid,
+    pub(crate) peer_addr: Option<std::net::IpAddr>,
 }

 fn deserialize_json_string<'de, D, T>(deserializer: D) -> Result<T, D::Error>
@@ -137,10 +138,20 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
                        return Ok(());
                    }
                }
+
+                // TODO: Remove unspecified peer_addr after the complete migration to the new format
+                let peer_addr = cancel_session
+                    .peer_addr
+                    .unwrap_or(std::net::IpAddr::V4(std::net::Ipv4Addr::UNSPECIFIED));
                // This instance of cancellation_handler doesn't have a RedisPublisherClient so it can't publish the message.
                match self
                    .cancellation_handler
-                    .cancel_session(cancel_session.cancel_key_data, uuid::Uuid::nil())
+                    .cancel_session(
+                        cancel_session.cancel_key_data,
+                        uuid::Uuid::nil(),
+                        &peer_addr,
+                        cancel_session.peer_addr.is_some(),
+                    )
                    .await
                {
                    Ok(()) => {}
@@ -335,6 +346,7 @@ mod tests {
            cancel_key_data,
            region_id: None,
            session_id: uuid,
+            peer_addr: None,
        });
        let text = serde_json::to_string(&msg)?;
        let result: Notification = serde_json::from_str(&text)?;
@@ -344,6 +356,7 @@ mod tests {
            cancel_key_data,
            region_id: Some("region".to_string()),
            session_id: uuid,
+            peer_addr: None,
        });
        let text = serde_json::to_string(&msg)?;
        let result: Notification = serde_json::from_str(&text)?;
--- a/proxy/src/serverless/sql_over_http.rs
+++ b/proxy/src/serverless/sql_over_http.rs
@@ -14,7 +14,7 @@ use hyper::{header, HeaderMap, Request, Response, StatusCode};
 use pq_proto::StartupMessageParamsBuilder;
 use serde::Serialize;
 use serde_json::Value;
-use tokio::time;
+use tokio::time::{self, Instant};
 use tokio_postgres::error::{DbError, ErrorPosition, SqlState};
 use tokio_postgres::{GenericClient, IsolationLevel, NoTls, ReadyForQueryStatus, Transaction};
 use tokio_util::sync::CancellationToken;
@@ -980,10 +980,11 @@ async fn query_to_json<T: GenericClient>(
    current_size: &mut usize,
    parsed_headers: HttpHeaders,
 ) -> Result<(ReadyForQueryStatus, impl Serialize), SqlOverHttpError> {
-    info!("executing query");
+    let query_start = Instant::now();
+
    let query_params = data.params;
    let mut row_stream = std::pin::pin!(client.query_raw_txt(&data.query, query_params).await?);
-    info!("finished executing query");
+    let query_acknowledged = Instant::now();

    // Manually drain the stream into a vector to leave row_stream hanging
    // around to get a command tag. Also check that the response is not too
@@ -1002,6 +1003,7 @@ async fn query_to_json<T: GenericClient>(
        }
    }

+    let query_resp_end = Instant::now();
    let ready = row_stream.ready_status();

    // grab the command tag and number of rows affected
@@ -1021,7 +1023,9 @@ async fn query_to_json<T: GenericClient>(
        rows = rows.len(),
        ?ready,
        command_tag,
-        "finished reading rows"
+        acknowledgement = ?(query_acknowledged - query_start),
+        response = ?(query_resp_end - query_start),
+        "finished executing query"
    );

    let columns_len = row_stream.columns().len();
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ authors = []
 package-mode = false

 [tool.poetry.dependencies]
-python = "^3.9"
+python = "^3.11"
 pytest = "^7.4.4"
 psycopg2-binary = "^2.9.10"
 typing-extensions = "^4.6.1"
@@ -51,7 +51,7 @@ testcontainers = "^4.8.1"
 jsonnet = "^0.20.0"

 [tool.poetry.group.dev.dependencies]
-mypy = "==1.3.0"
+mypy = "==1.13.0"
 ruff = "^0.7.0"

 [build-system]
@@ -89,7 +89,7 @@ module = [
 ignore_missing_imports = true

 [tool.ruff]
-target-version = "py39"
+target-version = "py311"
 extend-exclude = [
    "vendor/",
    "target/",
@@ -108,6 +108,3 @@ select = [
    "B", # bugbear
    "UP", # pyupgrade
 ]
-
-[tool.ruff.lint.pyupgrade]
-keep-runtime-typing = true # Remove this stanza when we require Python 3.10
--- a/safekeeper/Cargo.toml
+++ b/safekeeper/Cargo.toml
@@ -30,6 +30,7 @@ once_cell.workspace = true
 parking_lot.workspace = true
 postgres.workspace = true
 postgres-protocol.workspace = true
+pprof.workspace = true
 rand.workspace = true
 regex.workspace = true
 scopeguard.workspace = true
--- a/safekeeper/benches/README.md
+++ b/safekeeper/benches/README.md
@@ -14,6 +14,10 @@ cargo bench --package safekeeper --bench receive_wal process_msg/fsync=false

 # List available benchmarks.
 cargo bench --package safekeeper --benches -- --list
+
+# Generate flamegraph profiles using pprof-rs, profiling for 10 seconds.
+# Output in target/criterion/*/profile/flamegraph.svg.
+cargo bench --package safekeeper --bench receive_wal process_msg/fsync=false --profile-time 10
 ```

 Additional charts and statistics are available in `target/criterion/report/index.html`.
--- a/safekeeper/benches/receive_wal.rs
+++ b/safekeeper/benches/receive_wal.rs
@@ -10,6 +10,7 @@ use camino_tempfile::tempfile;
 use criterion::{criterion_group, criterion_main, BatchSize, Bencher, Criterion};
 use itertools::Itertools as _;
 use postgres_ffi::v17::wal_generator::{LogicalMessageGenerator, WalGenerator};
+use pprof::criterion::{Output, PProfProfiler};
 use safekeeper::receive_wal::{self, WalAcceptor};
 use safekeeper::safekeeper::{
    AcceptorProposerMessage, AppendRequest, AppendRequestHeader, ProposerAcceptorMessage,
@@ -24,8 +25,9 @@ const GB: usize = 1024 * MB;

 // Register benchmarks with Criterion.
 criterion_group!(
-    benches,
-    bench_process_msg,
+    name = benches;
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    targets = bench_process_msg,
    bench_wal_acceptor,
    bench_wal_acceptor_throughput,
    bench_file_write
--- a/safekeeper/src/http/routes.rs
+++ b/safekeeper/src/http/routes.rs
@@ -1,7 +1,6 @@
-use hyper::{Body, Request, Response, StatusCode, Uri};
-use once_cell::sync::Lazy;
+use hyper::{Body, Request, Response, StatusCode};
 use serde::{Deserialize, Serialize};
-use std::collections::{HashMap, HashSet};
+use std::collections::HashMap;
 use std::fmt;
 use std::io::Write as _;
 use std::str::FromStr;
@@ -14,7 +13,9 @@ use tokio_stream::wrappers::ReceiverStream;
 use tokio_util::sync::CancellationToken;
 use tracing::{info_span, Instrument};
 use utils::failpoint_support::failpoints_handler;
-use utils::http::endpoint::{prometheus_metrics_handler, request_span, ChannelWriter};
+use utils::http::endpoint::{
+    profile_cpu_handler, prometheus_metrics_handler, request_span, ChannelWriter,
+};
 use utils::http::request::parse_query_param;

 use postgres_ffi::WAL_SEGMENT_SIZE;
@@ -572,14 +573,8 @@ pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder<hyper::Body, ApiError>
    let mut router = endpoint::make_router();
    if conf.http_auth.is_some() {
        router = router.middleware(auth_middleware(|request| {
-            #[allow(clippy::mutable_key_type)]
-            static ALLOWLIST_ROUTES: Lazy<HashSet<Uri>> = Lazy::new(|| {
-                ["/v1/status", "/metrics"]
-                    .iter()
-                    .map(|v| v.parse().unwrap())
-                    .collect()
-            });
-            if ALLOWLIST_ROUTES.contains(request.uri()) {
+            const ALLOWLIST_ROUTES: &[&str] = &["/v1/status", "/metrics", "/profile/cpu"];
+            if ALLOWLIST_ROUTES.contains(&request.uri().path()) {
                None
            } else {
                // Option<Arc<SwappableJwtAuth>> is always provided as data below, hence unwrap().
@@ -598,6 +593,7 @@ pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder<hyper::Body, ApiError>
        .data(Arc::new(conf))
        .data(auth)
        .get("/metrics", |r| request_span(r, prometheus_metrics_handler))
+        .get("/profile/cpu", |r| request_span(r, profile_cpu_handler))
        .get("/v1/status", |r| request_span(r, status_handler))
        .put("/v1/failpoints", |r| {
            request_span(r, move |r| async {
--- a/scripts/flaky_tests.py
+++ b/scripts/flaky_tests.py
@@ -14,7 +14,7 @@ import psycopg2.extras
 import toml

 if TYPE_CHECKING:
-    from typing import Any, Optional
+    from typing import Any

 FLAKY_TESTS_QUERY = """
    SELECT
@@ -65,7 +65,7 @@ def main(args: argparse.Namespace):
        pageserver_virtual_file_io_engine_parameter = ""

    # re-use existing records of flaky tests from before parametrization by compaction_algorithm
-    def get_pageserver_default_tenant_config_compaction_algorithm() -> Optional[dict[str, Any]]:
+    def get_pageserver_default_tenant_config_compaction_algorithm() -> dict[str, Any] | None:
        """Duplicated from parametrize.py"""
        toml_table = os.getenv("PAGESERVER_DEFAULT_TENANT_CONFIG_COMPACTION_ALGORITHM")
        if toml_table is None:
--- a/scripts/force_layer_download.py
+++ b/scripts/force_layer_download.py
@@ -194,9 +194,11 @@ async def main_impl(args, report_out, client: Client):
            tenant_ids = await client.get_tenant_ids()
            get_timeline_id_coros = [client.get_timeline_ids(tenant_id) for tenant_id in tenant_ids]
            gathered = await asyncio.gather(*get_timeline_id_coros, return_exceptions=True)
-            assert len(tenant_ids) == len(gathered)
            tenant_and_timline_ids = []
-            for tid, tlids in zip(tenant_ids, gathered):
+            for tid, tlids in zip(tenant_ids, gathered, strict=True):
+                # TODO: add error handling if tlids isinstance(Exception)
+                assert isinstance(tlids, list)
+
                for tlid in tlids:
                    tenant_and_timline_ids.append((tid, tlid))
        elif len(comps) == 1:
--- a/scripts/ingest_regress_test_result-new-format.py
+++ b/scripts/ingest_regress_test_result-new-format.py
@@ -11,7 +11,7 @@ import re
 import sys
 from contextlib import contextmanager
 from dataclasses import dataclass
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 from pathlib import Path

 import backoff
@@ -31,6 +31,7 @@ CREATE TABLE IF NOT EXISTS results (
    duration     INT NOT NULL,
    flaky        BOOLEAN NOT NULL,
    arch         arch DEFAULT 'X64',
+    lfc          BOOLEAN DEFAULT false NOT NULL,
    build_type   TEXT NOT NULL,
    pg_version   INT NOT NULL,
    run_id       BIGINT NOT NULL,
@@ -54,6 +55,7 @@ class Row:
    duration: int
    flaky: bool
    arch: str
+    lfc: bool
    build_type: str
    pg_version: int
    run_id: int
@@ -132,6 +134,7 @@ def ingest_test_result(
            if p["name"].startswith("__")
        }
        arch = parameters.get("arch", "UNKNOWN").strip("'")
+        lfc = parameters.get("lfc", "False") == "True"

        build_type, pg_version, unparametrized_name = parse_test_name(test["name"])
        labels = {label["name"]: label["value"] for label in test["labels"]}
@@ -140,11 +143,12 @@ def ingest_test_result(
            suite=labels["suite"],
            name=unparametrized_name,
            status=test["status"],
-            started_at=datetime.fromtimestamp(test["time"]["start"] / 1000, tz=timezone.utc),
-            stopped_at=datetime.fromtimestamp(test["time"]["stop"] / 1000, tz=timezone.utc),
+            started_at=datetime.fromtimestamp(test["time"]["start"] / 1000, tz=UTC),
+            stopped_at=datetime.fromtimestamp(test["time"]["stop"] / 1000, tz=UTC),
            duration=test["time"]["duration"],
            flaky=test["flaky"] or test["retriesStatusChange"],
            arch=arch,
+            lfc=lfc,
            build_type=build_type,
            pg_version=pg_version,
            run_id=run_id,
--- a/test_runner/README.md
+++ b/test_runner/README.md
@@ -113,7 +113,7 @@ The test suite has a Python enum with equal name but different meaning:

 ```python
@enum.unique
-class RemoteStorageKind(str, enum.Enum):
+class RemoteStorageKind(StrEnum):
    LOCAL_FS = "local_fs"
    MOCK_S3 = "mock_s3"
    REAL_S3 = "real_s3"
--- a/test_runner/fixtures/auth_tokens.py
+++ b/test_runner/fixtures/auth_tokens.py
@@ -1,7 +1,7 @@
 from __future__ import annotations

 from dataclasses import dataclass
-from enum import Enum
+from enum import StrEnum
 from typing import Any

 import jwt
@@ -37,8 +37,7 @@ class AuthKeys:
        return self.generate_token(scope=TokenScope.TENANT, tenant_id=str(tenant_id))


-# TODO: Replace with `StrEnum` when we upgrade to python 3.11
-class TokenScope(str, Enum):
+class TokenScope(StrEnum):
    ADMIN = "admin"
    PAGE_SERVER_API = "pageserverapi"
    GENERATIONS_API = "generations_api"
--- a/test_runner/fixtures/benchmark_fixture.py
+++ b/test_runner/fixtures/benchmark_fixture.py
@@ -9,6 +9,7 @@ import re
 import timeit
 from contextlib import contextmanager
 from datetime import datetime
+from enum import StrEnum
 from pathlib import Path
 from typing import TYPE_CHECKING

@@ -24,8 +25,7 @@ from fixtures.log_helper import log
 from fixtures.neon_fixtures import NeonPageserver

 if TYPE_CHECKING:
-    from collections.abc import Iterator, Mapping
-    from typing import Callable, Optional
+    from collections.abc import Callable, Iterator, Mapping


 """
@@ -61,7 +61,7 @@ class PgBenchRunResult:
    number_of_threads: int
    number_of_transactions_actually_processed: int
    latency_average: float
-    latency_stddev: Optional[float]
+    latency_stddev: float | None
    tps: float
    run_duration: float
    run_start_timestamp: int
@@ -171,14 +171,14 @@ _PGBENCH_INIT_EXTRACTORS: Mapping[str, re.Pattern[str]] = {

@dataclasses.dataclass
 class PgBenchInitResult:
-    total: Optional[float]
-    drop_tables: Optional[float]
-    create_tables: Optional[float]
-    client_side_generate: Optional[float]
-    server_side_generate: Optional[float]
-    vacuum: Optional[float]
-    primary_keys: Optional[float]
-    foreign_keys: Optional[float]
+    total: float | None
+    drop_tables: float | None
+    create_tables: float | None
+    client_side_generate: float | None
+    server_side_generate: float | None
+    vacuum: float | None
+    primary_keys: float | None
+    foreign_keys: float | None
    duration: float
    start_timestamp: int
    end_timestamp: int
@@ -196,7 +196,7 @@ class PgBenchInitResult:

        last_line = stderr.splitlines()[-1]

-        timings: dict[str, Optional[float]] = {}
+        timings: dict[str, float | None] = {}
        last_line_items = re.split(r"\(|\)|,", last_line)
        for item in last_line_items:
            for key, regex in _PGBENCH_INIT_EXTRACTORS.items():
@@ -227,7 +227,7 @@ class PgBenchInitResult:


@enum.unique
-class MetricReport(str, enum.Enum):  # str is a hack to make it json serializable
+class MetricReport(StrEnum):  # str is a hack to make it json serializable
    # this means that this is a constant test parameter
    # like number of transactions, or number of clients
    TEST_PARAM = "test_param"
@@ -256,9 +256,8 @@ class NeonBenchmarker:
        metric_value: float,
        unit: str,
        report: MetricReport,
-        labels: Optional[
-            dict[str, str]
-        ] = None,  # use this to associate additional key/value pairs in json format for associated Neon object IDs like project ID with the metric
+        # use this to associate additional key/value pairs in json format for associated Neon object IDs like project ID with the metric
+        labels: dict[str, str] | None = None,
    ):
        """
        Record a benchmark result.
@@ -412,7 +411,7 @@ class NeonBenchmarker:
        self,
        pageserver: NeonPageserver,
        metric_name: str,
-        label_filters: Optional[dict[str, str]] = None,
+        label_filters: dict[str, str] | None = None,
    ) -> int:
        """Fetch the value of given int counter from pageserver metrics."""
        all_metrics = pageserver.http_client().get_metrics()
--- a/test_runner/fixtures/common_types.py
+++ b/test_runner/fixtures/common_types.py
@@ -2,14 +2,14 @@ from __future__ import annotations

 import random
 from dataclasses import dataclass
-from enum import Enum
+from enum import StrEnum
 from functools import total_ordering
 from typing import TYPE_CHECKING, TypeVar

 from typing_extensions import override

 if TYPE_CHECKING:
-    from typing import Any, Union
+    from typing import Any

    T = TypeVar("T", bound="Id")

@@ -24,7 +24,7 @@ class Lsn:
    representation is like "1/0123abcd". See also pg_lsn datatype in Postgres
    """

-    def __init__(self, x: Union[int, str]):
+    def __init__(self, x: int | str):
        if isinstance(x, int):
            self.lsn_int = x
        else:
@@ -67,7 +67,7 @@ class Lsn:
            return NotImplemented
        return self.lsn_int - other.lsn_int

-    def __add__(self, other: Union[int, Lsn]) -> Lsn:
+    def __add__(self, other: int | Lsn) -> Lsn:
        if isinstance(other, int):
            return Lsn(self.lsn_int + other)
        elif isinstance(other, Lsn):
@@ -190,8 +190,23 @@ class TenantTimelineId:
        )


-# Workaround for compat with python 3.9, which does not have `typing.Self`
-TTenantShardId = TypeVar("TTenantShardId", bound="TenantShardId")
+@dataclass
+class ShardIndex:
+    shard_number: int
+    shard_count: int
+
+    # cf impl Display for ShardIndex
+    @override
+    def __str__(self) -> str:
+        return f"{self.shard_number:02x}{self.shard_count:02x}"
+
+    @classmethod
+    def parse(cls: type[ShardIndex], input: str) -> ShardIndex:
+        assert len(input) == 4
+        return cls(
+            shard_number=int(input[0:2], 16),
+            shard_count=int(input[2:4], 16),
+        )


 class TenantShardId:
@@ -202,7 +217,7 @@ class TenantShardId:
        assert self.shard_number < self.shard_count or self.shard_count == 0

    @classmethod
-    def parse(cls: type[TTenantShardId], input: str) -> TTenantShardId:
+    def parse(cls: type[TenantShardId], input: str) -> TenantShardId:
        if len(input) == 32:
            return cls(
                tenant_id=TenantId(input),
@@ -226,6 +241,10 @@ class TenantShardId:
            # Unsharded case: equivalent of Rust TenantShardId::unsharded(tenant_id)
            return str(self.tenant_id)

+    @property
+    def shard_index(self) -> ShardIndex:
+        return ShardIndex(self.shard_number, self.shard_count)
+
    @override
    def __repr__(self):
        return self.__str__()
@@ -249,7 +268,6 @@ class TenantShardId:
        return hash(self._tuple())


-# TODO: Replace with `StrEnum` when we upgrade to python 3.11
-class TimelineArchivalState(str, Enum):
+class TimelineArchivalState(StrEnum):
    ARCHIVED = "Archived"
    UNARCHIVED = "Unarchived"
--- a/test_runner/fixtures/compare_fixtures.py
+++ b/test_runner/fixtures/compare_fixtures.py
@@ -99,7 +99,7 @@ class PgCompare(ABC):
                assert row is not None
                assert len(row) == len(pg_stat.columns)

-                for col, val in zip(pg_stat.columns, row):
+                for col, val in zip(pg_stat.columns, row, strict=False):
                    results[f"{pg_stat.table}.{col}"] = int(val)

        return results
--- a/test_runner/fixtures/compute_reconfigure.py
+++ b/test_runner/fixtures/compute_reconfigure.py
@@ -12,7 +12,8 @@ from fixtures.common_types import TenantId
 from fixtures.log_helper import log

 if TYPE_CHECKING:
-    from typing import Any, Callable, Optional
+    from collections.abc import Callable
+    from typing import Any


 class ComputeReconfigure:
@@ -20,12 +21,12 @@ class ComputeReconfigure:
        self.server = server
        self.control_plane_compute_hook_api = f"http://{server.host}:{server.port}/notify-attach"
        self.workloads: dict[TenantId, Any] = {}
-        self.on_notify: Optional[Callable[[Any], None]] = None
+        self.on_notify: Callable[[Any], None] | None = None

    def register_workload(self, workload: Any):
        self.workloads[workload.tenant_id] = workload

-    def register_on_notify(self, fn: Optional[Callable[[Any], None]]):
+    def register_on_notify(self, fn: Callable[[Any], None] | None):
        """
        Add some extra work during a notification, like sleeping to slow things down, or
        logging what was notified.
@@ -68,7 +69,7 @@ def compute_reconfigure_listener(make_httpserver: HTTPServer):
            # This causes the endpoint to query storage controller for its location, which
            # is redundant since we already have it here, but this avoids extending the
            # neon_local CLI to take full lists of locations
-            reconfigure_threads.submit(lambda workload=workload: workload.reconfigure())  # type: ignore[no-any-return]
+            reconfigure_threads.submit(lambda workload=workload: workload.reconfigure())  # type: ignore[misc]

        return Response(status=200)

--- a/test_runner/fixtures/h2server.py
+++ b/test_runner/fixtures/h2server.py
@@ -31,7 +31,7 @@ from h2.settings import SettingCodes
 from typing_extensions import override

 if TYPE_CHECKING:
-    from typing import Any, Optional
+    from typing import Any


 RequestData = collections.namedtuple("RequestData", ["headers", "data"])
@@ -49,7 +49,7 @@ class H2Protocol(asyncio.Protocol):
    def __init__(self):
        config = H2Configuration(client_side=False, header_encoding="utf-8")
        self.conn = H2Connection(config=config)
-        self.transport: Optional[asyncio.Transport] = None
+        self.transport: asyncio.Transport | None = None
        self.stream_data: dict[int, RequestData] = {}
        self.flow_control_futures: dict[int, asyncio.Future[Any]] = {}

@@ -61,7 +61,7 @@ class H2Protocol(asyncio.Protocol):
        self.transport.write(self.conn.data_to_send())

    @override
-    def connection_lost(self, exc: Optional[Exception]):
+    def connection_lost(self, exc: Exception | None):
        for future in self.flow_control_futures.values():
            future.cancel()
        self.flow_control_futures = {}
--- a/test_runner/fixtures/metrics.py
+++ b/test_runner/fixtures/metrics.py
@@ -1,16 +1,12 @@
 from __future__ import annotations

 from collections import defaultdict
-from typing import TYPE_CHECKING

 from prometheus_client.parser import text_string_to_metric_families
 from prometheus_client.samples import Sample

 from fixtures.log_helper import log

-if TYPE_CHECKING:
-    from typing import Optional
-

 class Metrics:
    metrics: dict[str, list[Sample]]
@@ -20,7 +16,7 @@ class Metrics:
        self.metrics = defaultdict(list)
        self.name = name

-    def query_all(self, name: str, filter: Optional[dict[str, str]] = None) -> list[Sample]:
+    def query_all(self, name: str, filter: dict[str, str] | None = None) -> list[Sample]:
        filter = filter or {}
        res: list[Sample] = []

@@ -32,7 +28,7 @@ class Metrics:
                pass
        return res

-    def query_one(self, name: str, filter: Optional[dict[str, str]] = None) -> Sample:
+    def query_one(self, name: str, filter: dict[str, str] | None = None) -> Sample:
        res = self.query_all(name, filter or {})
        assert len(res) == 1, f"expected single sample for {name} {filter}, found {res}"
        return res[0]
@@ -47,9 +43,7 @@ class MetricsGetter:
    def get_metrics(self) -> Metrics:
        raise NotImplementedError()

-    def get_metric_value(
-        self, name: str, filter: Optional[dict[str, str]] = None
-    ) -> Optional[float]:
+    def get_metric_value(self, name: str, filter: dict[str, str] | None = None) -> float | None:
        metrics = self.get_metrics()
        results = metrics.query_all(name, filter=filter)
        if not results:
@@ -59,7 +53,7 @@ class MetricsGetter:
        return results[0].value

    def get_metrics_values(
-        self, names: list[str], filter: Optional[dict[str, str]] = None, absence_ok: bool = False
+        self, names: list[str], filter: dict[str, str] | None = None, absence_ok: bool = False
    ) -> dict[str, float]:
        """
        When fetching multiple named metrics, it is more efficient to use this
--- a/test_runner/fixtures/neon_api.py
+++ b/test_runner/fixtures/neon_api.py
@@ -8,7 +8,7 @@ import requests
 from fixtures.log_helper import log

 if TYPE_CHECKING:
-    from typing import Any, Literal, Optional
+    from typing import Any, Literal

    from fixtures.pg_version import PgVersion

@@ -40,11 +40,11 @@ class NeonAPI:

    def create_project(
        self,
-        pg_version: Optional[PgVersion] = None,
-        name: Optional[str] = None,
-        branch_name: Optional[str] = None,
-        branch_role_name: Optional[str] = None,
-        branch_database_name: Optional[str] = None,
+        pg_version: PgVersion | None = None,
+        name: str | None = None,
+        branch_name: str | None = None,
+        branch_role_name: str | None = None,
+        branch_database_name: str | None = None,
    ) -> dict[str, Any]:
        data: dict[str, Any] = {
            "project": {
@@ -179,8 +179,8 @@ class NeonAPI:
    def get_connection_uri(
        self,
        project_id: str,
-        branch_id: Optional[str] = None,
-        endpoint_id: Optional[str] = None,
+        branch_id: str | None = None,
+        endpoint_id: str | None = None,
        database_name: str = "neondb",
        role_name: str = "neondb_owner",
        pooled: bool = True,
@@ -249,7 +249,7 @@ class NeonAPI:

@final
 class NeonApiEndpoint:
-    def __init__(self, neon_api: NeonAPI, pg_version: PgVersion, project_id: Optional[str]):
+    def __init__(self, neon_api: NeonAPI, pg_version: PgVersion, project_id: str | None):
        self.neon_api = neon_api
        self.project_id: str
        self.endpoint_id: str
--- a/test_runner/fixtures/neon_cli.py
+++ b/test_runner/fixtures/neon_cli.py
@@ -20,13 +20,9 @@ from fixtures.pg_version import PgVersion
 if TYPE_CHECKING:
    from typing import (
        Any,
-        Optional,
-        TypeVar,
        cast,
    )

-    T = TypeVar("T")
-

 # Used to be an ABC. abc.ABC removed due to linter without name change.
 class AbstractNeonCli:
@@ -36,7 +32,7 @@ class AbstractNeonCli:
    Do not use directly, use specific subclasses instead.
    """

-    def __init__(self, extra_env: Optional[dict[str, str]], binpath: Path):
+    def __init__(self, extra_env: dict[str, str] | None, binpath: Path):
        self.extra_env = extra_env
        self.binpath = binpath

@@ -45,7 +41,7 @@ class AbstractNeonCli:
    def raw_cli(
        self,
        arguments: list[str],
-        extra_env_vars: Optional[dict[str, str]] = None,
+        extra_env_vars: dict[str, str] | None = None,
        check_return_code=True,
        timeout=None,
    ) -> subprocess.CompletedProcess[str]:
@@ -173,7 +169,7 @@ class NeonLocalCli(AbstractNeonCli):

    def __init__(
        self,
-        extra_env: Optional[dict[str, str]],
+        extra_env: dict[str, str] | None,
        binpath: Path,
        repo_dir: Path,
        pg_distrib_dir: Path,
@@ -195,10 +191,10 @@ class NeonLocalCli(AbstractNeonCli):
        tenant_id: TenantId,
        timeline_id: TimelineId,
        pg_version: PgVersion,
-        conf: Optional[dict[str, Any]] = None,
-        shard_count: Optional[int] = None,
-        shard_stripe_size: Optional[int] = None,
-        placement_policy: Optional[str] = None,
+        conf: dict[str, Any] | None = None,
+        shard_count: int | None = None,
+        shard_stripe_size: int | None = None,
+        placement_policy: str | None = None,
        set_default: bool = False,
    ):
        """
@@ -302,8 +298,8 @@ class NeonLocalCli(AbstractNeonCli):
        tenant_id: TenantId,
        timeline_id: TimelineId,
        new_branch_name,
-        ancestor_branch_name: Optional[str] = None,
-        ancestor_start_lsn: Optional[Lsn] = None,
+        ancestor_branch_name: str | None = None,
+        ancestor_start_lsn: Lsn | None = None,
    ):
        cmd = [
            "timeline",
@@ -331,8 +327,8 @@ class NeonLocalCli(AbstractNeonCli):
        base_lsn: Lsn,
        base_tarfile: Path,
        pg_version: PgVersion,
-        end_lsn: Optional[Lsn] = None,
-        wal_tarfile: Optional[Path] = None,
+        end_lsn: Lsn | None = None,
+        wal_tarfile: Path | None = None,
    ):
        cmd = [
            "timeline",
@@ -380,7 +376,7 @@ class NeonLocalCli(AbstractNeonCli):
    def init(
        self,
        init_config: dict[str, Any],
-        force: Optional[str] = None,
+        force: str | None = None,
    ) -> subprocess.CompletedProcess[str]:
        with tempfile.NamedTemporaryFile(mode="w+") as init_config_tmpfile:
            init_config_tmpfile.write(toml.dumps(init_config))
@@ -400,9 +396,9 @@ class NeonLocalCli(AbstractNeonCli):

    def storage_controller_start(
        self,
-        timeout_in_seconds: Optional[int] = None,
-        instance_id: Optional[int] = None,
-        base_port: Optional[int] = None,
+        timeout_in_seconds: int | None = None,
+        instance_id: int | None = None,
+        base_port: int | None = None,
    ):
        cmd = ["storage_controller", "start"]
        if timeout_in_seconds is not None:
@@ -413,7 +409,7 @@ class NeonLocalCli(AbstractNeonCli):
            cmd.append(f"--base-port={base_port}")
        return self.raw_cli(cmd)

-    def storage_controller_stop(self, immediate: bool, instance_id: Optional[int] = None):
+    def storage_controller_stop(self, immediate: bool, instance_id: int | None = None):
        cmd = ["storage_controller", "stop"]
        if immediate:
            cmd.extend(["-m", "immediate"])
@@ -424,8 +420,8 @@ class NeonLocalCli(AbstractNeonCli):
    def pageserver_start(
        self,
        id: int,
-        extra_env_vars: Optional[dict[str, str]] = None,
-        timeout_in_seconds: Optional[int] = None,
+        extra_env_vars: dict[str, str] | None = None,
+        timeout_in_seconds: int | None = None,
    ) -> subprocess.CompletedProcess[str]:
        start_args = ["pageserver", "start", f"--id={id}"]
        if timeout_in_seconds is not None:
@@ -442,9 +438,9 @@ class NeonLocalCli(AbstractNeonCli):
    def safekeeper_start(
        self,
        id: int,
-        extra_opts: Optional[list[str]] = None,
-        extra_env_vars: Optional[dict[str, str]] = None,
-        timeout_in_seconds: Optional[int] = None,
+        extra_opts: list[str] | None = None,
+        extra_env_vars: dict[str, str] | None = None,
+        timeout_in_seconds: int | None = None,
    ) -> subprocess.CompletedProcess[str]:
        if extra_opts is not None:
            extra_opts = [f"-e={opt}" for opt in extra_opts]
@@ -457,7 +453,7 @@ class NeonLocalCli(AbstractNeonCli):
        )

    def safekeeper_stop(
-        self, id: Optional[int] = None, immediate=False
+        self, id: int | None = None, immediate=False
    ) -> subprocess.CompletedProcess[str]:
        args = ["safekeeper", "stop"]
        if id is not None:
@@ -467,7 +463,7 @@ class NeonLocalCli(AbstractNeonCli):
        return self.raw_cli(args)

    def storage_broker_start(
-        self, timeout_in_seconds: Optional[int] = None
+        self, timeout_in_seconds: int | None = None
    ) -> subprocess.CompletedProcess[str]:
        cmd = ["storage_broker", "start"]
        if timeout_in_seconds is not None:
@@ -485,10 +481,10 @@ class NeonLocalCli(AbstractNeonCli):
        http_port: int,
        tenant_id: TenantId,
        pg_version: PgVersion,
-        endpoint_id: Optional[str] = None,
+        endpoint_id: str | None = None,
        hot_standby: bool = False,
-        lsn: Optional[Lsn] = None,
-        pageserver_id: Optional[int] = None,
+        lsn: Lsn | None = None,
+        pageserver_id: int | None = None,
        allow_multiple=False,
    ) -> subprocess.CompletedProcess[str]:
        args = [
@@ -523,11 +519,11 @@ class NeonLocalCli(AbstractNeonCli):
    def endpoint_start(
        self,
        endpoint_id: str,
-        safekeepers: Optional[list[int]] = None,
-        remote_ext_config: Optional[str] = None,
-        pageserver_id: Optional[int] = None,
+        safekeepers: list[int] | None = None,
+        remote_ext_config: str | None = None,
+        pageserver_id: int | None = None,
        allow_multiple=False,
-        basebackup_request_tries: Optional[int] = None,
+        basebackup_request_tries: int | None = None,
    ) -> subprocess.CompletedProcess[str]:
        args = [
            "endpoint",
@@ -555,9 +551,9 @@ class NeonLocalCli(AbstractNeonCli):
    def endpoint_reconfigure(
        self,
        endpoint_id: str,
-        tenant_id: Optional[TenantId] = None,
-        pageserver_id: Optional[int] = None,
-        safekeepers: Optional[list[int]] = None,
+        tenant_id: TenantId | None = None,
+        pageserver_id: int | None = None,
+        safekeepers: list[int] | None = None,
        check_return_code=True,
    ) -> subprocess.CompletedProcess[str]:
        args = ["endpoint", "reconfigure", endpoint_id]
@@ -574,7 +570,7 @@ class NeonLocalCli(AbstractNeonCli):
        endpoint_id: str,
        destroy=False,
        check_return_code=True,
-        mode: Optional[str] = None,
+        mode: str | None = None,
    ) -> subprocess.CompletedProcess[str]:
        args = [
            "endpoint",
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
--- a/test_runner/fixtures/pageserver/common_types.py
+++ b/test_runner/fixtures/pageserver/common_types.py
@@ -2,7 +2,7 @@ from __future__ import annotations

 import re
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Union
+from typing import TYPE_CHECKING

 from fixtures.common_types import KEY_MAX, KEY_MIN, Key, Lsn

@@ -46,7 +46,7 @@ class DeltaLayerName:
        return ret


-LayerName = Union[ImageLayerName, DeltaLayerName]
+LayerName = ImageLayerName | DeltaLayerName


 class InvalidFileName(Exception):
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -1,24 +1,32 @@
 from __future__ import annotations

+import dataclasses
+import json
+import random
+import string
 import time
 from collections import defaultdict
 from dataclasses import dataclass
 from datetime import datetime
-from typing import TYPE_CHECKING, Any
+from typing import Any

 import requests
 from requests.adapters import HTTPAdapter
 from urllib3.util.retry import Retry

-from fixtures.common_types import Lsn, TenantId, TenantShardId, TimelineArchivalState, TimelineId
+from fixtures.common_types import (
+    Id,
+    Lsn,
+    TenantId,
+    TenantShardId,
+    TimelineArchivalState,
+    TimelineId,
+)
 from fixtures.log_helper import log
 from fixtures.metrics import Metrics, MetricsGetter, parse_metrics
 from fixtures.pg_version import PgVersion
 from fixtures.utils import Fn

-if TYPE_CHECKING:
-    from typing import Optional, Union
-

 class PageserverApiException(Exception):
    def __init__(self, message, status_code: int):
@@ -27,6 +35,69 @@ class PageserverApiException(Exception):
        self.status_code = status_code


+@dataclass
+class ImportPgdataIdemptencyKey:
+    key: str
+
+    @staticmethod
+    def random() -> ImportPgdataIdemptencyKey:
+        return ImportPgdataIdemptencyKey(
+            "".join(random.choices(string.ascii_letters + string.digits, k=20))
+        )
+
+
+@dataclass
+class LocalFs:
+    path: str
+
+
+@dataclass
+class AwsS3:
+    region: str
+    bucket: str
+    key: str
+
+
+@dataclass
+class ImportPgdataLocation:
+    LocalFs: None | LocalFs = None
+    AwsS3: None | AwsS3 = None
+
+
+@dataclass
+class TimelineCreateRequestModeImportPgdata:
+    location: ImportPgdataLocation
+    idempotency_key: ImportPgdataIdemptencyKey
+
+
+@dataclass
+class TimelineCreateRequestMode:
+    Branch: None | dict[str, Any] = None
+    Bootstrap: None | dict[str, Any] = None
+    ImportPgdata: None | TimelineCreateRequestModeImportPgdata = None
+
+
+@dataclass
+class TimelineCreateRequest:
+    new_timeline_id: TimelineId
+    mode: TimelineCreateRequestMode
+
+    def to_json(self) -> str:
+        class EnhancedJSONEncoder(json.JSONEncoder):
+            def default(self, o):
+                if dataclasses.is_dataclass(o) and not isinstance(o, type):
+                    return dataclasses.asdict(o)
+                elif isinstance(o, Id):
+                    return o.id.hex()
+                return super().default(o)
+
+        # mode is flattened
+        this = dataclasses.asdict(self)
+        mode = this.pop("mode")
+        this.update(mode)
+        return json.dumps(self, cls=EnhancedJSONEncoder)
+
+
 class TimelineCreate406(PageserverApiException):
    def __init__(self, res: requests.Response):
        assert res.status_code == 406
@@ -43,7 +114,7 @@ class TimelineCreate409(PageserverApiException):
 class InMemoryLayerInfo:
    kind: str
    lsn_start: str
-    lsn_end: Optional[str]
+    lsn_end: str | None

    @classmethod
    def from_json(cls, d: dict[str, Any]) -> InMemoryLayerInfo:
@@ -60,10 +131,10 @@ class HistoricLayerInfo:
    layer_file_name: str
    layer_file_size: int
    lsn_start: str
-    lsn_end: Optional[str]
+    lsn_end: str | None
    remote: bool
    # None for image layers, true if pageserver thinks this is an L0 delta layer
-    l0: Optional[bool]
+    l0: bool | None
    visible: bool

    @classmethod
@@ -180,8 +251,8 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        self,
        port: int,
        is_testing_enabled_or_skip: Fn,
-        auth_token: Optional[str] = None,
-        retries: Optional[Retry] = None,
+        auth_token: str | None = None,
+        retries: Retry | None = None,
    ):
        super().__init__()
        self.port = port
@@ -278,7 +349,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):

    def tenant_attach(
        self,
-        tenant_id: Union[TenantId, TenantShardId],
+        tenant_id: TenantId | TenantShardId,
        generation: int,
        config: None | dict[str, Any] = None,
    ):
@@ -305,7 +376,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
            },
        )

-    def tenant_reset(self, tenant_id: Union[TenantId, TenantShardId], drop_cache: bool):
+    def tenant_reset(self, tenant_id: TenantId | TenantShardId, drop_cache: bool):
        params = {}
        if drop_cache:
            params["drop_cache"] = "true"
@@ -315,10 +386,10 @@ class PageserverHttpClient(requests.Session, MetricsGetter):

    def tenant_location_conf(
        self,
-        tenant_id: Union[TenantId, TenantShardId],
+        tenant_id: TenantId | TenantShardId,
        location_conf: dict[str, Any],
        flush_ms=None,
-        lazy: Optional[bool] = None,
+        lazy: bool | None = None,
    ):
        body = location_conf.copy()

@@ -346,20 +417,20 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        assert isinstance(res_json["tenant_shards"], list)
        return res_json

-    def tenant_get_location(self, tenant_id: TenantShardId):
+    def tenant_get_location(self, tenant_id: TenantId | TenantShardId):
        res = self.get(
            f"http://localhost:{self.port}/v1/location_config/{tenant_id}",
        )
        self.verbose_error(res)
        return res.json()

-    def tenant_delete(self, tenant_id: Union[TenantId, TenantShardId]):
+    def tenant_delete(self, tenant_id: TenantId | TenantShardId):
        res = self.delete(f"http://localhost:{self.port}/v1/tenant/{tenant_id}")
        self.verbose_error(res)
        return res

    def tenant_status(
-        self, tenant_id: Union[TenantId, TenantShardId], activate: bool = False
+        self, tenant_id: TenantId | TenantShardId, activate: bool = False
    ) -> dict[Any, Any]:
        """
        :activate: hint the server not to accelerate activation of this tenant in response
@@ -378,17 +449,17 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        assert isinstance(res_json, dict)
        return res_json

-    def tenant_config(self, tenant_id: Union[TenantId, TenantShardId]) -> TenantConfig:
+    def tenant_config(self, tenant_id: TenantId | TenantShardId) -> TenantConfig:
        res = self.get(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/config")
        self.verbose_error(res)
        return TenantConfig.from_json(res.json())

-    def tenant_heatmap_upload(self, tenant_id: Union[TenantId, TenantShardId]):
+    def tenant_heatmap_upload(self, tenant_id: TenantId | TenantShardId):
        res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/heatmap_upload")
        self.verbose_error(res)

    def tenant_secondary_download(
-        self, tenant_id: Union[TenantId, TenantShardId], wait_ms: Optional[int] = None
+        self, tenant_id: TenantId | TenantShardId, wait_ms: int | None = None
    ) -> tuple[int, dict[Any, Any]]:
        url = f"http://localhost:{self.port}/v1/tenant/{tenant_id}/secondary/download"
        if wait_ms is not None:
@@ -397,13 +468,13 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        self.verbose_error(res)
        return (res.status_code, res.json())

-    def tenant_secondary_status(self, tenant_id: Union[TenantId, TenantShardId]):
+    def tenant_secondary_status(self, tenant_id: TenantId | TenantShardId):
        url = f"http://localhost:{self.port}/v1/tenant/{tenant_id}/secondary/status"
        res = self.get(url)
        self.verbose_error(res)
        return res.json()

-    def set_tenant_config(self, tenant_id: Union[TenantId, TenantShardId], config: dict[str, Any]):
+    def set_tenant_config(self, tenant_id: TenantId | TenantShardId, config: dict[str, Any]):
        """
        Only use this via storage_controller.pageserver_api().

@@ -420,8 +491,8 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
    def patch_tenant_config_client_side(
        self,
        tenant_id: TenantId,
-        inserts: Optional[dict[str, Any]] = None,
-        removes: Optional[list[str]] = None,
+        inserts: dict[str, Any] | None = None,
+        removes: list[str] | None = None,
    ):
        """
        Only use this via storage_controller.pageserver_api().
@@ -436,11 +507,11 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
                del current[key]
        self.set_tenant_config(tenant_id, current)

-    def tenant_size(self, tenant_id: Union[TenantId, TenantShardId]) -> int:
+    def tenant_size(self, tenant_id: TenantId | TenantShardId) -> int:
        return self.tenant_size_and_modelinputs(tenant_id)[0]

    def tenant_size_and_modelinputs(
-        self, tenant_id: Union[TenantId, TenantShardId]
+        self, tenant_id: TenantId | TenantShardId
    ) -> tuple[int, dict[str, Any]]:
        """
        Returns the tenant size, together with the model inputs as the second tuple item.
@@ -456,7 +527,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        assert isinstance(inputs, dict)
        return (size, inputs)

-    def tenant_size_debug(self, tenant_id: Union[TenantId, TenantShardId]) -> str:
+    def tenant_size_debug(self, tenant_id: TenantId | TenantShardId) -> str:
        """
        Returns the tenant size debug info, as an HTML string
        """
@@ -468,10 +539,10 @@ class PageserverHttpClient(requests.Session, MetricsGetter):

    def tenant_time_travel_remote_storage(
        self,
-        tenant_id: Union[TenantId, TenantShardId],
+        tenant_id: TenantId | TenantShardId,
        timestamp: datetime,
        done_if_after: datetime,
-        shard_counts: Optional[list[int]] = None,
+        shard_counts: list[int] | None = None,
    ):
        """
        Issues a request to perform time travel operations on the remote storage
@@ -490,7 +561,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):

    def timeline_list(
        self,
-        tenant_id: Union[TenantId, TenantShardId],
+        tenant_id: TenantId | TenantShardId,
        include_non_incremental_logical_size: bool = False,
        include_timeline_dir_layer_file_size_sum: bool = False,
    ) -> list[dict[str, Any]]:
@@ -510,7 +581,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):

    def timeline_and_offloaded_list(
        self,
-        tenant_id: Union[TenantId, TenantShardId],
+        tenant_id: TenantId | TenantShardId,
    ) -> TimelinesInfoAndOffloaded:
        res = self.get(
            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline_and_offloaded",
@@ -523,11 +594,11 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
    def timeline_create(
        self,
        pg_version: PgVersion,
-        tenant_id: Union[TenantId, TenantShardId],
+        tenant_id: TenantId | TenantShardId,
        new_timeline_id: TimelineId,
-        ancestor_timeline_id: Optional[TimelineId] = None,
-        ancestor_start_lsn: Optional[Lsn] = None,
-        existing_initdb_timeline_id: Optional[TimelineId] = None,
+        ancestor_timeline_id: TimelineId | None = None,
+        ancestor_start_lsn: Lsn | None = None,
+        existing_initdb_timeline_id: TimelineId | None = None,
        **kwargs,
    ) -> dict[Any, Any]:
        body: dict[str, Any] = {
@@ -558,7 +629,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):

    def timeline_detail(
        self,
-        tenant_id: Union[TenantId, TenantShardId],
+        tenant_id: TenantId | TenantShardId,
        timeline_id: TimelineId,
        include_non_incremental_logical_size: bool = False,
        include_timeline_dir_layer_file_size_sum: bool = False,
@@ -584,7 +655,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        return res_json

    def timeline_delete(
-        self, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId, **kwargs
+        self, tenant_id: TenantId | TenantShardId, timeline_id: TimelineId, **kwargs
    ):
        """
        Note that deletion is not instant, it is scheduled and performed mostly in the background.
@@ -600,9 +671,9 @@ class PageserverHttpClient(requests.Session, MetricsGetter):

    def timeline_gc(
        self,
-        tenant_id: Union[TenantId, TenantShardId],
+        tenant_id: TenantId | TenantShardId,
        timeline_id: TimelineId,
-        gc_horizon: Optional[int],
+        gc_horizon: int | None,
    ) -> dict[str, Any]:
        """
        Unlike most handlers, this will wait for the layers to be actually
@@ -624,16 +695,14 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        assert isinstance(res_json, dict)
        return res_json

-    def timeline_block_gc(self, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId):
+    def timeline_block_gc(self, tenant_id: TenantId | TenantShardId, timeline_id: TimelineId):
        res = self.post(
            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/block_gc",
        )
        log.info(f"Got GC request response code: {res.status_code}")
        self.verbose_error(res)

-    def timeline_unblock_gc(
-        self, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId
-    ):
+    def timeline_unblock_gc(self, tenant_id: TenantId | TenantShardId, timeline_id: TimelineId):
        res = self.post(
            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/unblock_gc",
        )
@@ -642,7 +711,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):

    def timeline_offload(
        self,
-        tenant_id: Union[TenantId, TenantShardId],
+        tenant_id: TenantId | TenantShardId,
        timeline_id: TimelineId,
    ):
        self.is_testing_enabled_or_skip()
@@ -658,14 +727,14 @@ class PageserverHttpClient(requests.Session, MetricsGetter):

    def timeline_compact(
        self,
-        tenant_id: Union[TenantId, TenantShardId],
+        tenant_id: TenantId | TenantShardId,
        timeline_id: TimelineId,
        force_repartition=False,
        force_image_layer_creation=False,
        force_l0_compaction=False,
        wait_until_uploaded=False,
        enhanced_gc_bottom_most_compaction=False,
-        body: Optional[dict[str, Any]] = None,
+        body: dict[str, Any] | None = None,
    ):
        self.is_testing_enabled_or_skip()
        query = {}
@@ -692,7 +761,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        assert res_json is None

    def timeline_preserve_initdb_archive(
-        self, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId
+        self, tenant_id: TenantId | TenantShardId, timeline_id: TimelineId
    ):
        log.info(
            f"Requesting initdb archive preservation for tenant {tenant_id} and timeline {timeline_id}"
@@ -704,7 +773,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):

    def timeline_archival_config(
        self,
-        tenant_id: Union[TenantId, TenantShardId],
+        tenant_id: TenantId | TenantShardId,
        timeline_id: TimelineId,
        state: TimelineArchivalState,
    ):
@@ -720,7 +789,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):

    def timeline_get_lsn_by_timestamp(
        self,
-        tenant_id: Union[TenantId, TenantShardId],
+        tenant_id: TenantId | TenantShardId,
        timeline_id: TimelineId,
        timestamp: datetime,
        with_lease: bool = False,
@@ -739,7 +808,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        return res_json

    def timeline_lsn_lease(
-        self, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId, lsn: Lsn
+        self, tenant_id: TenantId | TenantShardId, timeline_id: TimelineId, lsn: Lsn
    ):
        data = {
            "lsn": str(lsn),
@@ -755,7 +824,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        return res_json

    def timeline_get_timestamp_of_lsn(
-        self, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId, lsn: Lsn
+        self, tenant_id: TenantId | TenantShardId, timeline_id: TimelineId, lsn: Lsn
    ):
        log.info(f"Requesting time range of lsn {lsn}, tenant {tenant_id}, timeline {timeline_id}")
        res = self.get(
@@ -765,9 +834,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        res_json = res.json()
        return res_json

-    def timeline_layer_map_info(
-        self, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId
-    ):
+    def timeline_layer_map_info(self, tenant_id: TenantId | TenantShardId, timeline_id: TimelineId):
        log.info(f"Requesting layer map info of tenant {tenant_id}, timeline {timeline_id}")
        res = self.get(
            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/layer",
@@ -778,13 +845,13 @@ class PageserverHttpClient(requests.Session, MetricsGetter):

    def timeline_checkpoint(
        self,
-        tenant_id: Union[TenantId, TenantShardId],
+        tenant_id: TenantId | TenantShardId,
        timeline_id: TimelineId,
        force_repartition=False,
        force_image_layer_creation=False,
        force_l0_compaction=False,
        wait_until_uploaded=False,
-        compact: Optional[bool] = None,
+        compact: bool | None = None,
        **kwargs,
    ):
        self.is_testing_enabled_or_skip()
@@ -801,7 +868,9 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        if compact is not None:
            query["compact"] = "true" if compact else "false"

-        log.info(f"Requesting checkpoint: tenant {tenant_id}, timeline {timeline_id}")
+        log.info(
+            f"Requesting checkpoint: tenant {tenant_id}, timeline {timeline_id}, wait_until_uploaded={wait_until_uploaded}"
+        )
        res = self.put(
            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/checkpoint",
            params=query,
@@ -814,7 +883,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):

    def timeline_spawn_download_remote_layers(
        self,
-        tenant_id: Union[TenantId, TenantShardId],
+        tenant_id: TenantId | TenantShardId,
        timeline_id: TimelineId,
        max_concurrent_downloads: int,
    ) -> dict[str, Any]:
@@ -833,7 +902,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):

    def timeline_poll_download_remote_layers_status(
        self,
-        tenant_id: Union[TenantId, TenantShardId],
+        tenant_id: TenantId | TenantShardId,
        timeline_id: TimelineId,
        spawn_response: dict[str, Any],
        poll_state=None,
@@ -855,7 +924,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):

    def timeline_download_remote_layers(
        self,
-        tenant_id: Union[TenantId, TenantShardId],
+        tenant_id: TenantId | TenantShardId,
        timeline_id: TimelineId,
        max_concurrent_downloads: int,
        errors_ok=False,
@@ -905,7 +974,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        timeline_id: TimelineId,
        file_kind: str,
        op_kind: str,
-    ) -> Optional[int]:
+    ) -> int | None:
        metrics = [
            "pageserver_remote_timeline_client_calls_started_total",
            "pageserver_remote_timeline_client_calls_finished_total",
@@ -929,7 +998,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):

    def layer_map_info(
        self,
-        tenant_id: Union[TenantId, TenantShardId],
+        tenant_id: TenantId | TenantShardId,
        timeline_id: TimelineId,
    ) -> LayerMapInfo:
        res = self.get(
@@ -939,7 +1008,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        return LayerMapInfo.from_json(res.json())

    def timeline_layer_scan_disposable_keys(
-        self, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId, layer_name: str
+        self, tenant_id: TenantId | TenantShardId, timeline_id: TimelineId, layer_name: str
    ) -> ScanDisposableKeysResponse:
        res = self.post(
            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/layer/{layer_name}/scan_disposable_keys",
@@ -949,7 +1018,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        return ScanDisposableKeysResponse.from_json(res.json())

    def download_layer(
-        self, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId, layer_name: str
+        self, tenant_id: TenantId | TenantShardId, timeline_id: TimelineId, layer_name: str
    ):
        res = self.get(
            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/layer/{layer_name}",
@@ -958,9 +1027,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):

        assert res.status_code == 200

-    def download_all_layers(
-        self, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId
-    ):
+    def download_all_layers(self, tenant_id: TenantId | TenantShardId, timeline_id: TimelineId):
        info = self.layer_map_info(tenant_id, timeline_id)
        for layer in info.historic_layers:
            if not layer.remote:
@@ -969,9 +1036,9 @@ class PageserverHttpClient(requests.Session, MetricsGetter):

    def detach_ancestor(
        self,
-        tenant_id: Union[TenantId, TenantShardId],
+        tenant_id: TenantId | TenantShardId,
        timeline_id: TimelineId,
-        batch_size: Optional[int] = None,
+        batch_size: int | None = None,
        **kwargs,
    ) -> set[TimelineId]:
        params = {}
@@ -987,7 +1054,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        return set(map(TimelineId, json["reparented_timelines"]))

    def evict_layer(
-        self, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId, layer_name: str
+        self, tenant_id: TenantId | TenantShardId, timeline_id: TimelineId, layer_name: str
    ):
        res = self.delete(
            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/layer/{layer_name}",
@@ -996,7 +1063,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):

        assert res.status_code in (200, 304)

-    def evict_all_layers(self, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId):
+    def evict_all_layers(self, tenant_id: TenantId | TenantShardId, timeline_id: TimelineId):
        info = self.layer_map_info(tenant_id, timeline_id)
        for layer in info.historic_layers:
            self.evict_layer(tenant_id, timeline_id, layer.layer_file_name)
@@ -1009,7 +1076,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        self.verbose_error(res)
        return res.json()

-    def tenant_break(self, tenant_id: Union[TenantId, TenantShardId]):
+    def tenant_break(self, tenant_id: TenantId | TenantShardId):
        res = self.put(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/break")
        self.verbose_error(res)

@@ -1058,7 +1125,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):

    def perf_info(
        self,
-        tenant_id: Union[TenantId, TenantShardId],
+        tenant_id: TenantId | TenantShardId,
        timeline_id: TimelineId,
    ):
        self.is_testing_enabled_or_skip()
--- a/test_runner/fixtures/pageserver/many_tenants.py
+++ b/test_runner/fixtures/pageserver/many_tenants.py
@@ -13,7 +13,8 @@ from fixtures.neon_fixtures import (
 from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind

 if TYPE_CHECKING:
-    from typing import Any, Callable
+    from collections.abc import Callable
+    from typing import Any


 def single_timeline(
--- a/test_runner/fixtures/pageserver/utils.py
+++ b/test_runner/fixtures/pageserver/utils.py
@@ -17,14 +17,14 @@ from fixtures.remote_storage import RemoteStorage, RemoteStorageKind, S3Storage
 from fixtures.utils import wait_until

 if TYPE_CHECKING:
-    from typing import Any, Optional, Union
+    from typing import Any


 def assert_tenant_state(
    pageserver_http: PageserverHttpClient,
    tenant: TenantId,
    expected_state: str,
-    message: Optional[str] = None,
+    message: str | None = None,
 ) -> None:
    tenant_status = pageserver_http.tenant_status(tenant)
    log.info(f"tenant_status: {tenant_status}")
@@ -33,7 +33,7 @@ def assert_tenant_state(

 def remote_consistent_lsn(
    pageserver_http: PageserverHttpClient,
-    tenant: Union[TenantId, TenantShardId],
+    tenant: TenantId | TenantShardId,
    timeline: TimelineId,
 ) -> Lsn:
    detail = pageserver_http.timeline_detail(tenant, timeline)
@@ -51,7 +51,7 @@ def remote_consistent_lsn(

 def wait_for_upload(
    pageserver_http: PageserverHttpClient,
-    tenant: Union[TenantId, TenantShardId],
+    tenant: TenantId | TenantShardId,
    timeline: TimelineId,
    lsn: Lsn,
 ):
@@ -138,7 +138,7 @@ def wait_until_all_tenants_state(

 def wait_until_timeline_state(
    pageserver_http: PageserverHttpClient,
-    tenant_id: Union[TenantId, TenantShardId],
+    tenant_id: TenantId | TenantShardId,
    timeline_id: TimelineId,
    expected_state: str,
    iterations: int,
@@ -188,7 +188,7 @@ def wait_until_tenant_active(

 def last_record_lsn(
    pageserver_http_client: PageserverHttpClient,
-    tenant: Union[TenantId, TenantShardId],
+    tenant: TenantId | TenantShardId,
    timeline: TimelineId,
 ) -> Lsn:
    detail = pageserver_http_client.timeline_detail(tenant, timeline)
@@ -200,7 +200,7 @@ def last_record_lsn(

 def wait_for_last_record_lsn(
    pageserver_http: PageserverHttpClient,
-    tenant: Union[TenantId, TenantShardId],
+    tenant: TenantId | TenantShardId,
    timeline: TimelineId,
    lsn: Lsn,
 ) -> Lsn:
@@ -267,10 +267,10 @@ def wait_for_upload_queue_empty(

 def wait_timeline_detail_404(
    pageserver_http: PageserverHttpClient,
-    tenant_id: Union[TenantId, TenantShardId],
+    tenant_id: TenantId | TenantShardId,
    timeline_id: TimelineId,
    iterations: int,
-    interval: Optional[float] = None,
+    interval: float | None = None,
 ):
    if interval is None:
        interval = 0.25
@@ -292,10 +292,10 @@ def wait_timeline_detail_404(

 def timeline_delete_wait_completed(
    pageserver_http: PageserverHttpClient,
-    tenant_id: Union[TenantId, TenantShardId],
+    tenant_id: TenantId | TenantShardId,
    timeline_id: TimelineId,
    iterations: int = 20,
-    interval: Optional[float] = None,
+    interval: float | None = None,
    **delete_args,
 ) -> None:
    pageserver_http.timeline_delete(tenant_id=tenant_id, timeline_id=timeline_id, **delete_args)
@@ -304,9 +304,9 @@ def timeline_delete_wait_completed(

 # remote_storage must not be None, but that's easier for callers to make mypy happy
 def assert_prefix_empty(
-    remote_storage: Optional[RemoteStorage],
-    prefix: Optional[str] = None,
-    allowed_postfix: Optional[str] = None,
+    remote_storage: RemoteStorage | None,
+    prefix: str | None = None,
+    allowed_postfix: str | None = None,
    delimiter: str = "/",
 ) -> None:
    assert remote_storage is not None
@@ -348,8 +348,8 @@ def assert_prefix_empty(

 # remote_storage must not be None, but that's easier for callers to make mypy happy
 def assert_prefix_not_empty(
-    remote_storage: Optional[RemoteStorage],
-    prefix: Optional[str] = None,
+    remote_storage: RemoteStorage | None,
+    prefix: str | None = None,
    delimiter: str = "/",
 ):
    assert remote_storage is not None
@@ -358,7 +358,7 @@ def assert_prefix_not_empty(


 def list_prefix(
-    remote: RemoteStorage, prefix: Optional[str] = None, delimiter: str = "/"
+    remote: RemoteStorage, prefix: str | None = None, delimiter: str = "/"
 ) -> ListObjectsV2OutputTypeDef:
    """
    Note that this function takes into account prefix_in_bucket.
--- a/test_runner/fixtures/parametrize.py
+++ b/test_runner/fixtures/parametrize.py
@@ -11,7 +11,7 @@ from _pytest.python import Metafunc
 from fixtures.pg_version import PgVersion

 if TYPE_CHECKING:
-    from typing import Any, Optional
+    from typing import Any


 """
@@ -20,31 +20,31 @@ Dynamically parametrize tests by different parameters


@pytest.fixture(scope="function", autouse=True)
-def pg_version() -> Optional[PgVersion]:
+def pg_version() -> PgVersion | None:
    return None


@pytest.fixture(scope="function", autouse=True)
-def build_type() -> Optional[str]:
+def build_type() -> str | None:
    return None


@pytest.fixture(scope="session", autouse=True)
-def platform() -> Optional[str]:
+def platform() -> str | None:
    return None


@pytest.fixture(scope="function", autouse=True)
-def pageserver_virtual_file_io_engine() -> Optional[str]:
+def pageserver_virtual_file_io_engine() -> str | None:
    return os.getenv("PAGESERVER_VIRTUAL_FILE_IO_ENGINE")


@pytest.fixture(scope="function", autouse=True)
-def pageserver_virtual_file_io_mode() -> Optional[str]:
+def pageserver_virtual_file_io_mode() -> str | None:
    return os.getenv("PAGESERVER_VIRTUAL_FILE_IO_MODE")


-def get_pageserver_default_tenant_config_compaction_algorithm() -> Optional[dict[str, Any]]:
+def get_pageserver_default_tenant_config_compaction_algorithm() -> dict[str, Any] | None:
    toml_table = os.getenv("PAGESERVER_DEFAULT_TENANT_CONFIG_COMPACTION_ALGORITHM")
    if toml_table is None:
        return None
@@ -54,7 +54,7 @@ def get_pageserver_default_tenant_config_compaction_algorithm() -> Optional[dict


@pytest.fixture(scope="function", autouse=True)
-def pageserver_default_tenant_config_compaction_algorithm() -> Optional[dict[str, Any]]:
+def pageserver_default_tenant_config_compaction_algorithm() -> dict[str, Any] | None:
    return get_pageserver_default_tenant_config_compaction_algorithm()


@@ -66,6 +66,7 @@ def pytest_generate_tests(metafunc: Metafunc):

    metafunc.parametrize("build_type", build_types)

+    pg_versions: list[PgVersion]
    if (v := os.getenv("DEFAULT_PG_VERSION")) is None:
        pg_versions = [version for version in PgVersion if version != PgVersion.NOT_SET]
    else:
@@ -115,5 +116,6 @@ def pytest_runtest_makereport(*args, **kwargs):
    }.get(os.uname().machine, "UNKNOWN")
    arch = os.getenv("RUNNER_ARCH", uname_m)
    allure.dynamic.parameter("__arch", arch)
+    allure.dynamic.parameter("__lfc", os.getenv("USE_LFC") != "false")

    yield
--- a/test_runner/fixtures/paths.py
+++ b/test_runner/fixtures/paths.py
@@ -18,7 +18,6 @@ from fixtures.utils import allure_attach_from_dir

 if TYPE_CHECKING:
    from collections.abc import Iterator
-    from typing import Optional


 BASE_DIR = Path(__file__).parents[2]
@@ -26,9 +25,7 @@ COMPUTE_CONFIG_DIR = BASE_DIR / "compute" / "etc"
 DEFAULT_OUTPUT_DIR: str = "test_output"


-def get_test_dir(
-    request: FixtureRequest, top_output_dir: Path, prefix: Optional[str] = None
-) -> Path:
+def get_test_dir(request: FixtureRequest, top_output_dir: Path, prefix: str | None = None) -> Path:
    """Compute the path to a working directory for an individual test."""
    test_name = request.node.name
    test_dir = top_output_dir / f"{prefix or ''}{test_name.replace('/', '-')}"
@@ -112,7 +109,7 @@ def compatibility_snapshot_dir() -> Iterator[Path]:


@pytest.fixture(scope="session")
-def compatibility_neon_binpath() -> Iterator[Optional[Path]]:
+def compatibility_neon_binpath() -> Iterator[Path | None]:
    if os.getenv("REMOTE_ENV"):
        return
    comp_binpath = None
@@ -133,7 +130,7 @@ def pg_distrib_dir(base_dir: Path) -> Iterator[Path]:


@pytest.fixture(scope="session")
-def compatibility_pg_distrib_dir() -> Iterator[Optional[Path]]:
+def compatibility_pg_distrib_dir() -> Iterator[Path | None]:
    compat_distrib_dir = None
    if env_compat_postgres_bin := os.environ.get("COMPATIBILITY_POSTGRES_DISTRIB_DIR"):
        compat_distrib_dir = Path(env_compat_postgres_bin).resolve()
@@ -197,7 +194,7 @@ class FileAndThreadLock:
    def __init__(self, path: Path):
        self.path = path
        self.thread_lock = threading.Lock()
-        self.fd: Optional[int] = None
+        self.fd: int | None = None

    def __enter__(self):
        self.fd = os.open(self.path, os.O_CREAT | os.O_WRONLY)
@@ -208,9 +205,9 @@ class FileAndThreadLock:

    def __exit__(
        self,
-        exc_type: Optional[type[BaseException]],
-        exc_value: Optional[BaseException],
-        exc_traceback: Optional[TracebackType],
+        exc_type: type[BaseException] | None,
+        exc_value: BaseException | None,
+        exc_traceback: TracebackType | None,
    ):
        assert self.fd is not None
        assert self.thread_lock.locked()  # ... by us
@@ -263,9 +260,9 @@ class SnapshotDir:

    def __exit__(
        self,
-        exc_type: Optional[type[BaseException]],
-        exc_value: Optional[BaseException],
-        exc_traceback: Optional[TracebackType],
+        exc_type: type[BaseException] | None,
+        exc_value: BaseException | None,
+        exc_traceback: TracebackType | None,
    ):
        self._lock.__exit__(exc_type, exc_value, exc_traceback)

@@ -277,7 +274,7 @@ def shared_snapshot_dir(top_output_dir: Path, ident: str) -> SnapshotDir:


@pytest.fixture(scope="function")
-def test_overlay_dir(request: FixtureRequest, top_output_dir: Path) -> Optional[Path]:
+def test_overlay_dir(request: FixtureRequest, top_output_dir: Path) -> Path | None:
    """
    Idempotently create a test's overlayfs mount state directory.
    If the functionality isn't enabled via env var, returns None.
--- a/test_runner/fixtures/pg_version.py
+++ b/test_runner/fixtures/pg_version.py
@@ -1,22 +1,16 @@
 from __future__ import annotations

-import enum
-from typing import TYPE_CHECKING
+from enum import StrEnum

 from typing_extensions import override

-if TYPE_CHECKING:
-    from typing import Optional
-
-
 """
 This fixture is used to determine which version of Postgres to use for tests.
 """


 # Inherit PgVersion from str rather than int to make it easier to pass as a command-line argument
-# TODO: use enum.StrEnum for Python >= 3.11
-class PgVersion(str, enum.Enum):
+class PgVersion(StrEnum):
    V14 = "14"
    V15 = "15"
    V16 = "16"
@@ -34,7 +28,6 @@ class PgVersion(str, enum.Enum):
    def __repr__(self) -> str:
        return f"'{self.value}'"

-    # Make this explicit for Python 3.11 compatibility, which changes the behavior of enums
    @override
    def __str__(self) -> str:
        return self.value
@@ -47,16 +40,18 @@ class PgVersion(str, enum.Enum):

    @classmethod
    @override
-    def _missing_(cls, value: object) -> Optional[PgVersion]:
-        known_values = {v.value for _, v in cls.__members__.items()}
+    def _missing_(cls, value: object) -> PgVersion | None:
+        if not isinstance(value, str):
+            return None

-        # Allow passing version as a string with "v" prefix (e.g. "v14")
-        if isinstance(value, str) and value.lower().startswith("v") and value[1:] in known_values:
-            return cls(value[1:])
-        # Allow passing version as an int (e.g. 15 or 150002, both will be converted to PgVersion.V15)
-        elif isinstance(value, int) and str(value)[:2] in known_values:
-            return cls(str(value)[:2])
+        known_values = set(cls.__members__.values())
+
+        # Allow passing version as v-prefixed string (e.g. "v14")
+        if value.lower().startswith("v") and (v := value[1:]) in known_values:
+            return cls(v)
+
+        # Allow passing version as an int (i.e. both "15" and "150002" matches PgVersion.V15)
+        if value.isdigit() and (v := value[:2]) in known_values:
+            return cls(v)

-        # Make mypy happy
-        # See https://github.com/python/mypy/issues/3974
        return None
--- a/test_runner/fixtures/port_distributor.py
+++ b/test_runner/fixtures/port_distributor.py
@@ -3,13 +3,9 @@ from __future__ import annotations
 import re
 import socket
 from contextlib import closing
-from typing import TYPE_CHECKING

 from fixtures.log_helper import log

-if TYPE_CHECKING:
-    from typing import Union
-

 def can_bind(host: str, port: int) -> bool:
    """
@@ -49,17 +45,19 @@ class PortDistributor:
            "port range configured for test is exhausted, consider enlarging the range"
        )

-    def replace_with_new_port(self, value: Union[int, str]) -> Union[int, str]:
+    def replace_with_new_port(self, value: int | str) -> int | str:
        """
        Returns a new port for a port number in a string (like "localhost:1234") or int.
        Replacements are memorised, so a substitution for the same port is always the same.
        """

-        # TODO: replace with structural pattern matching for Python >= 3.10
-        if isinstance(value, int):
-            return self._replace_port_int(value)
-
-        return self._replace_port_str(value)
+        match value:
+            case int():
+                return self._replace_port_int(value)
+            case str():
+                return self._replace_port_str(value)
+            case _:
+                raise TypeError(f"Unsupported type {type(value)}, should be int | str")

    def _replace_port_int(self, value: int) -> int:
        known_port = self.port_map.get(value)
--- a/Show More
+++ b/Show More