don't require shared_preload_libraries in spec in local tests -

we don't pass settings this way in local setup
Don't require pgaudit library in local tests
2026-02-08 05:00:38 +00:00 · 2025-05-05 23:41:24 +01:00 · 2025-05-05 15:10:29 +01:00 · 2025-05-05 15:02:44 +01:00 · 2025-05-05 13:06:37 +00:00 · 2025-05-05 12:15:22 +00:00
128 changed files with 2018 additions and 9029 deletions
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -33,9 +33,14 @@ config-variables:
  - REMOTE_STORAGE_AZURE_CONTAINER
  - REMOTE_STORAGE_AZURE_REGION
  - SLACK_CICD_CHANNEL_ID
+  - SLACK_COMPUTE_CHANNEL_ID
  - SLACK_ON_CALL_DEVPROD_STREAM
  - SLACK_ON_CALL_QA_STAGING_STREAM
  - SLACK_ON_CALL_STORAGE_STAGING_STREAM
+  - SLACK_ONCALL_COMPUTE_GROUP
+  - SLACK_ONCALL_PROXY_GROUP
+  - SLACK_ONCALL_STORAGE_GROUP
+  - SLACK_PROXY_CHANNEL_ID
  - SLACK_RUST_CHANNEL_ID
  - SLACK_STORAGE_CHANNEL_ID
  - SLACK_UPCOMING_RELEASE_CHANNEL_ID
--- a/.github/scripts/lint-release-pr.sh
+++ b/.github/scripts/lint-release-pr.sh
@@ -41,7 +41,7 @@ echo "Merge base of ${MAIN_BRANCH} and ${RELEASE_BRANCH}: ${MERGE_BASE}"
 LAST_COMMIT=$(git rev-parse HEAD)

 MERGE_COMMIT_MESSAGE=$(git log -1 --format=%s "${LAST_COMMIT}")
-EXPECTED_MESSAGE_REGEX="^$COMPONENT release [0-9]{4}-[0-9]{2}-[0-9]{2}$"
+EXPECTED_MESSAGE_REGEX="^$COMPONENT release [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2} UTC$"

 if ! [[ "${MERGE_COMMIT_MESSAGE}" =~ ${EXPECTED_MESSAGE_REGEX} ]]; then
  report_error "Merge commit message does not match expected pattern: '<component> release YYYY-MM-DD'
--- a/.github/workflows/_create-release-pr.yml
+++ b/.github/workflows/_create-release-pr.yml
@@ -1,103 +0,0 @@
-name: Create Release PR
-
-on:
-  workflow_call:
-    inputs:
-      component-name:
-        description: 'Component name'
-        required: true
-        type: string
-      source-branch:
-        description: 'Source branch'
-        required: true
-        type: string
-    secrets:
-      ci-access-token:
-        description: 'CI access token'
-        required: true
-
-defaults:
-  run:
-    shell: bash -euo pipefail {0}
-
-permissions:
-  contents: read
-
-jobs:
-  create-release-branch:
-    runs-on: ubuntu-22.04
-
-    permissions:
-      contents: write # for `git push`
-
-    steps:
-    - name: Harden the runner (Audit all outbound calls)
-      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
-      with:
-        egress-policy: audit
-
-    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-      with:
-        ref: ${{ inputs.source-branch }}
-        fetch-depth: 0
-
-    - name: Set variables
-      id: vars
-      env:
-        COMPONENT_NAME: ${{ inputs.component-name }}
-        RELEASE_BRANCH: >-
-          ${{
-            false
-            || inputs.component-name == 'Storage' && 'release'
-            || inputs.component-name == 'Proxy' && 'release-proxy'
-            || inputs.component-name == 'Compute' && 'release-compute'
-          }}
-      run: |
-        now_date=$(date -u +'%Y-%m-%d')
-        now_time=$(date -u +'%H-%M-%Z')
-        {
-          echo "title=${COMPONENT_NAME} release ${now_date}"
-          echo "rc-branch=rc/${RELEASE_BRANCH}/${now_date}_${now_time}"
-          echo "release-branch=${RELEASE_BRANCH}"
-        } | tee -a ${GITHUB_OUTPUT}
-
-    - name: Configure git
-      run: |
-        git config user.name "github-actions[bot]"
-        git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
-
-    - name: Create RC branch
-      env:
-        RELEASE_BRANCH: ${{ steps.vars.outputs.release-branch }}
-        RC_BRANCH: ${{ steps.vars.outputs.rc-branch }}
-        TITLE: ${{ steps.vars.outputs.title }}
-      run: |
-        git switch -c "${RC_BRANCH}"
-
-        # Manually create a merge commit on the current branch, keeping the
-        # tree and setting the parents to the current HEAD and the HEAD of the
-        # release branch. This commit is what we'll fast-forward the release
-        # branch to when merging the release branch.
-        # For details on why, look at
-        # https://docs.neon.build/overview/repositories/neon.html#background-on-commit-history-of-release-prs
-        current_tree=$(git rev-parse 'HEAD^{tree}')
-        release_head=$(git rev-parse "origin/${RELEASE_BRANCH}")
-        current_head=$(git rev-parse HEAD)
-        merge_commit=$(git commit-tree -p "${current_head}" -p "${release_head}" -m "${TITLE}" "${current_tree}")
-
-        # Fast-forward the current branch to the newly created merge_commit
-        git merge --ff-only ${merge_commit}
-
-        git push origin "${RC_BRANCH}"
-
-    - name: Create a PR into ${{ steps.vars.outputs.release-branch }}
-      env:
-        GH_TOKEN: ${{ secrets.ci-access-token }}
-        RC_BRANCH: ${{ steps.vars.outputs.rc-branch }}
-        RELEASE_BRANCH: ${{ steps.vars.outputs.release-branch }}
-        TITLE: ${{ steps.vars.outputs.title }}
-      run: |
-        gh pr create --title "${TITLE}" \
-                     --body "" \
-                     --head "${RC_BRANCH}" \
-                     --base "${RELEASE_BRANCH}"
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -69,7 +69,7 @@ jobs:
          submodules: true

      - name: Check for file changes
-        uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36  # v3.0.2
+        uses: step-security/paths-filter@v3
        id: files-changed
        with:
          token: ${{ secrets.GITHUB_TOKEN }}
@@ -824,7 +824,7 @@ jobs:
          - pg: v17
            debian: bookworm
    env:
-      VM_BUILDER_VERSION: v0.42.2
+      VM_BUILDER_VERSION: v0.46.0

    steps:
      - name: Harden the runner (Audit all outbound calls)
@@ -1434,10 +1434,10 @@ jobs:
            ;;
          esac

-  notify-storage-release-deploy-failure:
-    needs: [ deploy ]
+  notify-release-deploy-failure:
+    needs: [ meta, deploy ]
    # We want this to run even if (transitive) dependencies are skipped, because deploy should really be successful on release branch workflow runs.
-    if: github.ref_name == 'release' && needs.deploy.result != 'success' && always()
+    if: contains(fromJSON('["storage-release", "compute-release", "proxy-release"]'), needs.meta.outputs.run-kind) && needs.deploy.result != 'success' && always()
    runs-on: ubuntu-22.04
    steps:
      - name: Harden the runner (Audit all outbound calls)
@@ -1445,15 +1445,40 @@ jobs:
        with:
          egress-policy: audit

-      - name: Post release-deploy failure to team-storage slack channel
+      - name: Post release-deploy failure to team slack channel
        uses: slackapi/slack-github-action@485a9d42d3a73031f12ec201c457e2162c45d02d # v2.0.0
+        env:
+          TEAM_ONCALL: >-
+            ${{
+              fromJSON(format('{
+                "storage-release": "<!subteam^{0}|@oncall-storage>",
+                "compute-release": "<!subteam^{1}|@oncall-compute>",
+                "proxy-release":   "<!subteam^{2}|@oncall-proxy>"
+              }',
+                vars.SLACK_ONCALL_STORAGE_GROUP,
+                vars.SLACK_ONCALL_COMPUTE_GROUP,
+                vars.SLACK_ONCALL_PROXY_GROUP
+              ))[needs.meta.outputs.run-kind]
+            }}
+          CHANNEL: >-
+            ${{
+              fromJSON(format('{
+                "storage-release": "{0}",
+                "compute-release": "{1}",
+                "proxy-release":   "{2}"
+              }',
+                vars.SLACK_STORAGE_CHANNEL_ID,
+                vars.SLACK_COMPUTE_CHANNEL_ID,
+                vars.SLACK_PROXY_CHANNEL_ID
+              ))[needs.meta.outputs.run-kind]
+            }}
        with:
          method: chat.postMessage
          token: ${{ secrets.SLACK_BOT_TOKEN }}
          payload: |
-            channel: ${{ vars.SLACK_STORAGE_CHANNEL_ID }}
+            channel: ${{ env.CHANNEL }}
            text: |
-              🔴 <!subteam^S06CJ87UMNY|@oncall-storage>: deploy job on release branch had unexpected status "${{ needs.deploy.result }}" <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>.
+              🔴 ${{ env.TEAM_ONCALL }}: deploy job on release branch had unexpected status "${{ needs.deploy.result }}" <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>.

  # The job runs on `release` branch and copies compatibility data and Neon artifact from the last *release PR* to the latest directory
  promote-compatibility-data:
--- a/.github/workflows/cloud-extensions.yml
+++ b/.github/workflows/cloud-extensions.yml
@@ -68,7 +68,7 @@ jobs:
        id: create-neon-project
        uses: ./.github/actions/neon-project-create
        with:
-          region_id: ${{ inputs.region_id }}
+          region_id: ${{ inputs.region_id || 'aws-us-east-2' }}
          postgres_version: ${{ matrix.pg-version }}
          project_settings: ${{ steps.project-settings.outputs.settings }}
          # We need these settings to get the expected output results.
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -53,7 +53,7 @@ jobs:
          submodules: true

      - name: Check for Postgres changes
-        uses: dorny/paths-filter@1441771bbfdd59dcd748680ee64ebd8faab1a242  #v3
+        uses: step-security/paths-filter@v3
        id: files_changed
        with:
          token: ${{ github.token }}
--- a/.github/workflows/release-compute.yml
+++ b/.github/workflows/release-compute.yml
@@ -0,0 +1,12 @@
+name: Create compute release PR
+
+on:
+  schedule:
+    - cron: '0 7 * * FRI'
+
+jobs:
+  create-release-pr:
+    uses: ./.github/workflows/release.yml
+    with:
+      component: compute
+    secrets: inherit
--- a/.github/workflows/release-proxy.yml
+++ b/.github/workflows/release-proxy.yml
@@ -0,0 +1,12 @@
+name: Create proxy release PR
+
+on:
+  schedule:
+    - cron: '0 6 * * TUE'
+
+jobs:
+  create-release-pr:
+    uses: ./.github/workflows/release.yml
+    with:
+      component: proxy
+    secrets: inherit
--- a/.github/workflows/release-storage.yml
+++ b/.github/workflows/release-storage.yml
@@ -0,0 +1,12 @@
+name: Create storage release PR
+
+on:
+  schedule:
+    - cron: '0 6 * * FRI'
+
+jobs:
+  create-release-pr:
+    uses: ./.github/workflows/release.yml
+    with:
+      component: storage
+    secrets: inherit
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -1,25 +1,34 @@
-name: Create Release Branch
+name: Create release PR

 on:
-  schedule:
-    # It should be kept in sync with if-condition in jobs
-    - cron: '0 6 * * TUE' # Proxy release
-    - cron: '0 6 * * FRI' # Storage release
-    - cron: '0 7 * * FRI' # Compute release
  workflow_dispatch:
    inputs:
-      create-storage-release-branch:
-        type: boolean
-        description: 'Create Storage release PR'
+      component:
+        description: "Component to release"
+        required: true
+        type: choice
+        options:
+          - compute
+          - proxy
+          - storage
+      cherry-pick:
+        description: "Commits to cherry-pick (space separated, makes this a hotfix based on previous release)"
        required: false
-      create-proxy-release-branch:
-        type: boolean
-        description: 'Create Proxy release PR'
-        required: false
-      create-compute-release-branch:
-        type: boolean
-        description: 'Create Compute release PR'
+        type: string
+        default: ''
+
+  workflow_call:
+    inputs:
+      component:
+        description: "Component to release"
+        required: true
+        type: string
+      cherry-pick:
+        description: "Commits to cherry-pick (space separated, makes this a hotfix based on previous release)"
        required: false
+        type: string
+        default: ''
+

 # No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
 permissions: {}
@@ -29,41 +38,31 @@ defaults:
    shell: bash -euo pipefail {0}

 jobs:
-  create-storage-release-branch:
-    if: ${{ github.event.schedule == '0 6 * * FRI' || inputs.create-storage-release-branch }}
+  create-release-pr:
+    runs-on: ubuntu-22.04

    permissions:
      contents: write

-    uses: ./.github/workflows/_create-release-pr.yml
-    with:
-      component-name: 'Storage'
-      source-branch: ${{ github.ref_name }}
-    secrets:
-      ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
+    steps:
+      - name: Harden the runner (Audit all outbound calls)
+        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
+        with:
+          egress-policy: audit

-  create-proxy-release-branch:
-    if: ${{ github.event.schedule == '0 6 * * TUE' || inputs.create-proxy-release-branch }}
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          fetch-depth: 0

-    permissions:
-      contents: write
+      - name: Configure git
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"

-    uses: ./.github/workflows/_create-release-pr.yml
-    with:
-      component-name: 'Proxy'
-      source-branch: ${{ github.ref_name }}
-    secrets:
-      ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
-
-  create-compute-release-branch:
-    if: ${{ github.event.schedule == '0 7 * * FRI' || inputs.create-compute-release-branch }}
-
-    permissions:
-      contents: write
-
-    uses: ./.github/workflows/_create-release-pr.yml
-    with:
-      component-name: 'Compute'
-      source-branch: ${{ github.ref_name }}
-    secrets:
-      ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
+      - name: Create release PR
+        uses: neondatabase/dev-actions/release-pr@290dec821d86fa8a93f019e8c69720f5865b5677
+        with:
+          component: ${{ inputs.component }}
+          cherry-pick: ${{ inputs.cherry-pick }}
+        env:
+          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -253,17 +253,6 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a8ab6b55fe97976e46f91ddbed8d147d966475dc29b2032757ba47e02376fbc3"

-[[package]]
-name = "atomic_enum"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99e1aca718ea7b89985790c94aad72d77533063fe00bc497bb79a7c2dae6a661"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.100",
-]
-
 [[package]]
 name = "autocfg"
 version = "1.1.0"
@@ -698,40 +687,13 @@ dependencies = [
 "tracing",
 ]

-[[package]]
-name = "axum"
-version = "0.7.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
-dependencies = [
- "async-trait",
- "axum-core 0.4.5",
- "bytes",
- "futures-util",
- "http 1.1.0",
- "http-body 1.0.0",
- "http-body-util",
- "itoa",
- "matchit 0.7.3",
- "memchr",
- "mime",
- "percent-encoding",
- "pin-project-lite",
- "rustversion",
- "serde",
- "sync_wrapper 1.0.1",
- "tower 0.5.2",
- "tower-layer",
- "tower-service",
-]
-
 [[package]]
 name = "axum"
 version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6d6fd624c75e18b3b4c6b9caf42b1afe24437daaee904069137d8bab077be8b8"
 dependencies = [
- "axum-core 0.5.0",
+ "axum-core",
 "base64 0.22.1",
 "bytes",
 "form_urlencoded",
@@ -742,7 +704,7 @@ dependencies = [
 "hyper 1.4.1",
 "hyper-util",
 "itoa",
- "matchit 0.8.4",
+ "matchit",
 "memchr",
 "mime",
 "percent-encoding",
@@ -762,26 +724,6 @@ dependencies = [
 "tracing",
 ]

-[[package]]
-name = "axum-core"
-version = "0.4.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
-dependencies = [
- "async-trait",
- "bytes",
- "futures-util",
- "http 1.1.0",
- "http-body 1.0.0",
- "http-body-util",
- "mime",
- "pin-project-lite",
- "rustversion",
- "sync_wrapper 1.0.1",
- "tower-layer",
- "tower-service",
-]
-
 [[package]]
 name = "axum-core"
 version = "0.5.0"
@@ -808,8 +750,8 @@ version = "0.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "460fc6f625a1f7705c6cf62d0d070794e94668988b1c38111baeec177c715f7b"
 dependencies = [
- "axum 0.8.1",
- "axum-core 0.5.0",
+ "axum",
+ "axum-core",
 "bytes",
 "futures-util",
 "headers",
@@ -1144,25 +1086,6 @@ version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"

-[[package]]
-name = "cbindgen"
-version = "0.28.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eadd868a2ce9ca38de7eeafdcec9c7065ef89b42b32f0839278d55f35c54d1ff"
-dependencies = [
- "clap",
- "heck 0.4.1",
- "indexmap 2.9.0",
- "log",
- "proc-macro2",
- "quote",
- "serde",
- "serde_json",
- "syn 2.0.100",
- "tempfile",
- "toml",
-]
-
 [[package]]
 name = "cc"
 version = "1.2.16"
@@ -1283,7 +1206,7 @@ version = "4.5.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab"
 dependencies = [
- "heck 0.5.0",
+ "heck",
 "proc-macro2",
 "quote",
 "syn 2.0.100",
@@ -1341,40 +1264,13 @@ dependencies = [
 "unicode-width",
 ]

-[[package]]
-name = "communicator"
-version = "0.1.0"
-dependencies = [
- "atomic_enum",
- "bytes",
- "cbindgen",
- "http 1.1.0",
- "libc",
- "neonart",
- "nix 0.27.1",
- "pageserver_client_grpc",
- "pageserver_data_api",
- "prost 0.13.3",
- "thiserror 1.0.69",
- "tokio",
- "tokio-epoll-uring",
- "tokio-pipe",
- "tonic",
- "tracing",
- "tracing-subscriber",
- "uring-common",
- "utils",
- "zerocopy 0.8.24",
- "zerocopy-derive 0.8.24",
-]
-
 [[package]]
 name = "compute_api"
 version = "0.1.0"
 dependencies = [
 "anyhow",
 "chrono",
- "indexmap 2.9.0",
+ "indexmap 2.0.1",
 "jsonwebtoken",
 "regex",
 "remote_storage",
@@ -1392,7 +1288,7 @@ dependencies = [
 "aws-sdk-kms",
 "aws-sdk-s3",
 "aws-smithy-types",
- "axum 0.8.1",
+ "axum",
 "axum-extra",
 "base64 0.13.1",
 "bytes",
@@ -1405,7 +1301,7 @@ dependencies = [
 "flate2",
 "futures",
 "http 1.1.0",
- "indexmap 2.9.0",
+ "indexmap 2.0.1",
 "jsonwebtoken",
 "metrics",
 "nix 0.27.1",
@@ -2031,7 +1927,7 @@ checksum = "0892a17df262a24294c382f0d5997571006e7a4348b4327557c4ff1cd4a8bccc"
 dependencies = [
 "darling",
 "either",
- "heck 0.5.0",
+ "heck",
 "proc-macro2",
 "quote",
 "syn 2.0.100",
@@ -2145,7 +2041,7 @@ name = "endpoint_storage"
 version = "0.0.1"
 dependencies = [
 "anyhow",
- "axum 0.8.1",
+ "axum",
 "axum-extra",
 "camino",
 "camino-tempfile",
@@ -2692,7 +2588,7 @@ dependencies = [
 "futures-sink",
 "futures-util",
 "http 0.2.9",
- "indexmap 2.9.0",
+ "indexmap 2.0.1",
 "slab",
 "tokio",
 "tokio-util",
@@ -2711,7 +2607,7 @@ dependencies = [
 "futures-sink",
 "futures-util",
 "http 1.1.0",
- "indexmap 2.9.0",
+ "indexmap 2.0.1",
 "slab",
 "tokio",
 "tokio-util",
@@ -2807,12 +2703,6 @@ dependencies = [
 "http 1.1.0",
 ]

-[[package]]
-name = "heck"
-version = "0.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
-
 [[package]]
 name = "heck"
 version = "0.5.0"
@@ -3301,12 +3191,12 @@ dependencies = [

 [[package]]
 name = "indexmap"
-version = "2.9.0"
+version = "2.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
+checksum = "ad227c3af19d4914570ad36d30409928b75967c298feb9ea1969db3a610bb14e"
 dependencies = [
 "equivalent",
- "hashbrown 0.15.2",
+ "hashbrown 0.14.5",
 "serde",
 ]

@@ -3329,7 +3219,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88"
 dependencies = [
 "ahash",
- "indexmap 2.9.0",
+ "indexmap 2.0.1",
 "is-terminal",
 "itoa",
 "log",
@@ -3352,7 +3242,7 @@ dependencies = [
 "crossbeam-utils",
 "dashmap 6.1.0",
 "env_logger",
- "indexmap 2.9.0",
+ "indexmap 2.0.1",
 "itoa",
 "log",
 "num-format",
@@ -3704,12 +3594,6 @@ dependencies = [
 "regex-automata 0.1.10",
 ]

-[[package]]
-name = "matchit"
-version = "0.7.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
-
 [[package]]
 name = "matchit"
 version = "0.8.4"
@@ -3755,7 +3639,7 @@ version = "0.0.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b9e6777fc80a575f9503d908c8b498782a6c3ee88a06cb416dc3941401e43b94"
 dependencies = [
- "heck 0.5.0",
+ "heck",
 "proc-macro2",
 "quote",
 "syn 2.0.100",
@@ -3901,15 +3785,6 @@ version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"

-[[package]]
-name = "neonart"
-version = "0.1.0"
-dependencies = [
- "rand 0.8.5",
- "tracing",
- "zerocopy 0.8.24",
-]
-
 [[package]]
 name = "never-say-never"
 version = "6.6.666"
@@ -4333,8 +4208,6 @@ dependencies = [
 "humantime-serde",
 "pageserver_api",
 "pageserver_client",
- "pageserver_client_grpc",
- "pageserver_data_api",
 "rand 0.8.5",
 "reqwest",
 "serde",
@@ -4411,8 +4284,6 @@ dependencies = [
 "pageserver_api",
 "pageserver_client",
 "pageserver_compaction",
- "pageserver_data_api",
- "peekable",
 "pem",
 "pin-project-lite",
 "postgres-protocol",
@@ -4424,7 +4295,6 @@ dependencies = [
 "pprof",
 "pq_proto",
 "procfs",
- "prost 0.13.3",
 "rand 0.8.5",
 "range-set-blaze",
 "regex",
@@ -4456,7 +4326,6 @@ dependencies = [
 "tokio-tar",
 "tokio-util",
 "toml_edit",
- "tonic",
 "tracing",
 "tracing-utils",
 "url",
@@ -4521,18 +4390,6 @@ dependencies = [
 "workspace_hack",
 ]

-[[package]]
-name = "pageserver_client_grpc"
-version = "0.1.0"
-dependencies = [
- "bytes",
- "http 1.1.0",
- "pageserver_data_api",
- "thiserror 1.0.69",
- "tonic",
- "tracing",
-]
-
 [[package]]
 name = "pageserver_compaction"
 version = "0.1.0"
@@ -4556,17 +4413,6 @@ dependencies = [
 "workspace_hack",
 ]

-[[package]]
-name = "pageserver_data_api"
-version = "0.1.0"
-dependencies = [
- "prost 0.13.3",
- "thiserror 1.0.69",
- "tonic",
- "tonic-build",
- "utils",
-]
-
 [[package]]
 name = "papaya"
 version = "0.2.1"
@@ -4693,15 +4539,6 @@ dependencies = [
 "sha2",
 ]

-[[package]]
-name = "peekable"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "225f9651e475709164f871dc2f5724956be59cb9edb055372ffeeab01ec2d20b"
-dependencies = [
- "smallvec",
-]
-
 [[package]]
 name = "pem"
 version = "3.0.3"
@@ -5173,7 +5010,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4"
 dependencies = [
 "bytes",
- "heck 0.5.0",
+ "heck",
 "itertools 0.12.1",
 "log",
 "multimap",
@@ -5194,7 +5031,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15"
 dependencies = [
 "bytes",
- "heck 0.5.0",
+ "heck",
 "itertools 0.12.1",
 "log",
 "multimap",
@@ -5297,7 +5134,7 @@ dependencies = [
 "hyper 0.14.30",
 "hyper 1.4.1",
 "hyper-util",
- "indexmap 2.9.0",
+ "indexmap 2.0.1",
 "ipnet",
 "itertools 0.10.5",
 "itoa",
@@ -5808,7 +5645,7 @@ dependencies = [
 "async-trait",
 "getrandom 0.2.11",
 "http 1.1.0",
- "matchit 0.8.4",
+ "matchit",
 "opentelemetry",
 "reqwest",
 "reqwest-middleware",
@@ -6969,7 +6806,7 @@ version = "0.26.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
 dependencies = [
- "heck 0.5.0",
+ "heck",
 "proc-macro2",
 "quote",
 "rustversion",
@@ -7394,16 +7231,6 @@ dependencies = [
 "syn 2.0.100",
 ]

-[[package]]
-name = "tokio-pipe"
-version = "0.2.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f213a84bffbd61b8fa0ba8a044b4bbe35d471d0b518867181e82bd5c15542784"
-dependencies = [
- "libc",
- "tokio",
-]
-
 [[package]]
 name = "tokio-postgres"
 version = "0.7.10"
@@ -7586,7 +7413,7 @@ version = "0.22.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f21c7aaf97f1bd9ca9d4f9e73b0a6c74bd5afef56f2bc931943a6e1c37e04e38"
 dependencies = [
- "indexmap 2.9.0",
+ "indexmap 2.0.1",
 "serde",
 "serde_spanned",
 "toml_datetime",
@@ -7599,13 +7426,9 @@ version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
 dependencies = [
- "async-stream",
 "async-trait",
- "axum 0.7.9",
 "base64 0.22.1",
 "bytes",
- "flate2",
- "h2 0.4.4",
 "http 1.1.0",
 "http-body 1.0.0",
 "http-body-util",
@@ -7617,7 +7440,6 @@ dependencies = [
 "prost 0.13.3",
 "rustls-native-certs 0.8.0",
 "rustls-pemfile 2.1.1",
- "socket2",
 "tokio",
 "tokio-rustls 0.26.0",
 "tokio-stream",
@@ -8117,7 +7939,7 @@ name = "vm_monitor"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "axum 0.8.1",
+ "axum",
 "cgroups-rs",
 "clap",
 "futures",
@@ -8627,7 +8449,7 @@ dependencies = [
 "hyper 1.4.1",
 "hyper-util",
 "indexmap 1.9.3",
- "indexmap 2.9.0",
+ "indexmap 2.0.1",
 "itertools 0.12.1",
 "lazy_static",
 "libc",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,7 +8,6 @@ members = [
    "pageserver/compaction",
    "pageserver/ctl",
    "pageserver/client",
-    "pageserver/client_grpc",
    "pageserver/pagebench",
    "proxy",
    "safekeeper",
@@ -30,7 +29,6 @@ members = [
    "libs/pq_proto",
    "libs/tenant_size_model",
    "libs/metrics",
-    "libs/neonart",
    "libs/postgres_connection",
    "libs/remote_storage",
    "libs/tracing-utils",
@@ -43,7 +41,6 @@ members = [
    "libs/proxy/postgres-types2",
    "libs/proxy/tokio-postgres2",
    "endpoint_storage",
-    "pgxn/neon/communicator",
 ]

 [workspace.package]
@@ -145,7 +142,6 @@ parquet = { version = "53", default-features = false, features = ["zstd"] }
 parquet_derive = "53"
 pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
 pem = "3.0.3"
-peekable = "0.3.0"
 pin-project-lite = "0.2"
 pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "prost-codec"] }
 procfs = "0.16"
@@ -191,6 +187,7 @@ thiserror = "1.0"
 tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
 tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
 tokio = { version = "1.43.1", features = ["macros"] }
+tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
 tokio-io-timeout = "1.2.0"
 tokio-postgres-rustls = "0.12.0"
 tokio-rustls = { version = "0.26.0", default-features = false, features = ["tls12", "ring"]}
@@ -199,7 +196,7 @@ tokio-tar = "0.3"
 tokio-util = { version = "0.7.10", features = ["io", "rt"] }
 toml = "0.8"
 toml_edit = "0.22"
-tonic = {version = "0.12.3", default-features = false, features = ["channel", "server", "tls", "tls-roots", "gzip"]}
+tonic = {version = "0.12.3", default-features = false, features = ["channel", "tls", "tls-roots"]}
 tower = { version = "0.5.2", default-features = false }
 tower-http = { version = "0.6.2", features = ["auth", "request-id", "trace"] }

@@ -231,9 +228,6 @@ x509-cert = { version = "0.2.5" }
 env_logger = "0.11"
 log = "0.4"

-tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
-uring-common = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
-
 ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
 postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
 postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
@@ -251,12 +245,9 @@ compute_api = { version = "0.1", path = "./libs/compute_api/" }
 consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
 http-utils = { version = "0.1", path = "./libs/http-utils/" }
 metrics = { version = "0.1", path = "./libs/metrics/" }
-neonart = { version = "0.1", path = "./libs/neonart/" }
 pageserver = { path = "./pageserver" }
 pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
 pageserver_client = { path = "./pageserver/client" }
-pageserver_client_grpc = { path = "./pageserver/client_grpc" }
-pageserver_data_api = { path = "./pageserver/data_api" }
 pageserver_compaction = { version = "0.1", path = "./pageserver/compaction/" }
 postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
 postgres_connection = { version = "0.1", path = "./libs/postgres_connection/" }
@@ -280,7 +271,6 @@ wal_decoder = { version = "0.1", path = "./libs/wal_decoder" }
 workspace_hack = { version = "0.1", path = "./workspace_hack/" }

 ## Build dependencies
-cbindgen = "0.28.0"
 criterion = "0.5.1"
 rcgen = "0.13"
 rstest = "0.18"
--- a/7
+++ b/7
@@ -18,12 +18,10 @@ ifeq ($(BUILD_TYPE),release)
 	PG_LDFLAGS = $(LDFLAGS)
 	# Unfortunately, `--profile=...` is a nightly feature
 	CARGO_BUILD_FLAGS += --release
-	NEON_CARGO_ARTIFACT_TARGET_DIR = $(ROOT_PROJECT_DIR)/target/release
 else ifeq ($(BUILD_TYPE),debug)
 	PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
 	PG_CFLAGS += -O0 -g3 $(CFLAGS)
 	PG_LDFLAGS = $(LDFLAGS)
-	NEON_CARGO_ARTIFACT_TARGET_DIR = $(ROOT_PROJECT_DIR)/target/debug
 else
 	$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
 endif
@@ -182,16 +180,11 @@ postgres-check-%: postgres-%

 .PHONY: neon-pg-ext-%
 neon-pg-ext-%: postgres-%
-	+@echo "Compiling communicator $*"
-	$(CARGO_CMD_PREFIX) cargo build -p communicator $(CARGO_BUILD_FLAGS)
-
 	+@echo "Compiling neon $*"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-$*
 	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
-		LIBCOMMUNICATOR_PATH=$(NEON_CARGO_ARTIFACT_TARGET_DIR) \
 		-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install
-
 	+@echo "Compiling neon_walredo $*"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$*
 	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -1083,6 +1083,34 @@ ARG PG_VERSION
 RUN cargo install --locked --version 0.12.9 cargo-pgrx && \
    /bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'

+USER root
+#########################################################################################
+#
+# Layer "rust extensions pgrx14"
+#
+#########################################################################################
+FROM pg-build-nonroot-with-cargo AS rust-extensions-build-pgrx14
+ARG PG_VERSION
+
+RUN cargo install --locked --version 0.14.1 cargo-pgrx && \
+    /bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
+
+USER root
+#########################################################################################
+#
+# Layer "rust extensions pgrx14"
+#
+# Version 14 is now required by a few 
+# This layer should be used as a base for new pgrx extensions,
+# and eventually get merged with `rust-extensions-build`
+#
+#########################################################################################
+FROM pg-build-nonroot-with-cargo AS rust-extensions-build-pgrx14
+ARG PG_VERSION
+
+RUN cargo install --locked --version 0.14.1 cargo-pgrx && \
+    /bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
+
 USER root

 #########################################################################################
@@ -1100,11 +1128,11 @@ RUN wget https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.18.1.tar.
    mkdir onnxruntime-src && cd onnxruntime-src && tar xzf ../onnxruntime.tar.gz --strip-components=1 -C . && \
    echo "#nothing to test here" > neon-test.sh

-RUN wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.0.0.tar.gz -O pgrag.tar.gz &&  \
-    echo "2cbe394c1e74fc8bcad9b52d5fbbfb783aef834ca3ce44626cfd770573700bb4 pgrag.tar.gz" | sha256sum --check && \
+RUN wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.1.1.tar.gz -O pgrag.tar.gz &&  \
+    echo "087b2ecd11ba307dc968042ef2e9e43dc04d9ba60e8306e882c407bbe1350a50 pgrag.tar.gz" | sha256sum --check && \
    mkdir pgrag-src && cd pgrag-src && tar xzf ../pgrag.tar.gz --strip-components=1 -C .

-FROM rust-extensions-build-pgrx12 AS pgrag-build
+FROM rust-extensions-build-pgrx14 AS pgrag-build
 COPY --from=pgrag-src /ext-src/ /ext-src/

 # Install build-time dependencies
@@ -1124,19 +1152,19 @@ RUN . venv/bin/activate && \

 WORKDIR /ext-src/pgrag-src
 RUN cd exts/rag && \
-    sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
+    sed -i 's/pgrx = "0.14.1"/pgrx = { version = "0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
    cargo pgrx install --release && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/rag.control

 RUN cd exts/rag_bge_small_en_v15 && \
-    sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
+    sed -i 's/pgrx = "0.14.1"/pgrx = { version = "0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
    ORT_LIB_LOCATION=/ext-src/onnxruntime-src/build/Linux \
        REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/bge_small_en_v15.onnx \
        cargo pgrx install --release --features remote_onnx && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/rag_bge_small_en_v15.control

 RUN cd exts/rag_jina_reranker_v1_tiny_en && \
-    sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
+    sed -i 's/pgrx = "0.14.1"/pgrx = { version = "0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
    ORT_LIB_LOCATION=/ext-src/onnxruntime-src/build/Linux \
        REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/jina_reranker_v1_tiny_en.onnx \
        cargo pgrx install --release --features remote_onnx && \
@@ -1319,6 +1347,39 @@ COPY --from=pg_session_jwt-src /ext-src/ /ext-src/
 WORKDIR /ext-src/pg_session_jwt-src
 RUN cargo pgrx install --release

+#########################################################################################
+#
+# Layer "pg-anon-pg-build"
+# compile anon extension
+#
+#########################################################################################
+FROM pg-build AS pg_anon-src
+ARG PG_VERSION
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+WORKDIR /ext-src
+COPY compute/patches/anon_v2.patch .
+
+# This is an experimental extension, never got to real production.
+# !Do not remove! It can be present in shared_preload_libraries and compute will fail to start if library is not found.
+ENV PATH="/usr/local/pgsql/bin/:$PATH"
+RUN wget https://gitlab.com/dalibo/postgresql_anonymizer/-/archive/latest/postgresql_anonymizer-latest.tar.gz -O pg_anon.tar.gz && \
+    mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \
+    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt && \
+    sed -i 's/pgrx = "0.14.1"/pgrx = { version = "=0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
+    patch -p1 < /ext-src/anon_v2.patch
+
+FROM rust-extensions-build-pgrx14 AS pg-anon-pg-build
+ARG PG_VERSION
+COPY --from=pg_anon-src /ext-src/ /ext-src/
+WORKDIR /ext-src
+RUN cd pg_anon-src && \
+    make -j $(getconf _NPROCESSORS_ONLN) extension PG_CONFIG=/usr/local/pgsql/bin/pg_config PGVER=pg$(echo "$PG_VERSION" | sed 's/^v//') && \
+    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config PGVER=pg$(echo "$PG_VERSION" | sed 's/^v//') && \
+    chmod -R a+r ../pg_anon-src && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control;
+
+########################################################################################
+
 #########################################################################################
 #
 # Layer "wal2json-build"
@@ -1615,6 +1676,7 @@ COPY --from=pg_uuidv7-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_roaringbitmap-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_semver-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=wal2json-build /usr/local/pgsql /usr/local/pgsql
+COPY --from=pg-anon-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_ivm-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
--- a/compute/etc/neon_collector.jsonnet
+++ b/compute/etc/neon_collector.jsonnet
@@ -23,6 +23,8 @@
    import 'sql_exporter/getpage_prefetch_requests_total.libsonnet',
    import 'sql_exporter/getpage_prefetches_buffered.libsonnet',
    import 'sql_exporter/getpage_sync_requests_total.libsonnet',
+    import 'sql_exporter/compute_getpage_stuck_requests_total.libsonnet',
+    import 'sql_exporter/compute_getpage_max_inflight_stuck_time_ms.libsonnet',
    import 'sql_exporter/getpage_wait_seconds_bucket.libsonnet',
    import 'sql_exporter/getpage_wait_seconds_count.libsonnet',
    import 'sql_exporter/getpage_wait_seconds_sum.libsonnet',
--- a/compute/etc/sql_exporter/compute_getpage_max_inflight_stuck_time_ms.libsonnet
+++ b/compute/etc/sql_exporter/compute_getpage_max_inflight_stuck_time_ms.libsonnet
@@ -0,0 +1,9 @@
+{
+  metric_name: 'compute_getpage_max_inflight_stuck_time_ms',
+  type: 'gauge',
+  help: 'Max wait time for stuck requests among all backends. Includes only active stuck requests, terminated or disconnected ones are not accounted for',
+  values: [
+    'compute_getpage_max_inflight_stuck_time_ms',
+  ],
+  query_ref: 'neon_perf_counters',
+}
--- a/compute/etc/sql_exporter/compute_getpage_stuck_requests_total.libsonnet
+++ b/compute/etc/sql_exporter/compute_getpage_stuck_requests_total.libsonnet
@@ -0,0 +1,9 @@
+{
+  metric_name: 'compute_getpage_stuck_requests_total',
+  type: 'counter',
+  help: 'Total number of Getpage requests left without an answer for more than pageserver_response_log_timeout but less than pageserver_response_disconnect_timeout',
+  values: [
+    'compute_getpage_stuck_requests_total',
+  ],
+  query_ref: 'neon_perf_counters',
+}
--- a/compute/etc/sql_exporter/neon_perf_counters.sql
+++ b/compute/etc/sql_exporter/neon_perf_counters.sql
@@ -9,6 +9,8 @@ SELECT d.* FROM pg_catalog.jsonb_to_record((SELECT jb FROM c)) AS d(
  getpage_wait_seconds_sum numeric,
  getpage_prefetch_requests_total numeric,
  getpage_sync_requests_total numeric,
+  compute_getpage_stuck_requests_total numeric,
+  compute_getpage_max_inflight_stuck_time_ms numeric,
  getpage_prefetch_misses_total numeric,
  getpage_prefetch_discards_total numeric,
  getpage_prefetches_buffered numeric,
--- a/compute/patches/anon_v2.patch
+++ b/compute/patches/anon_v2.patch
@@ -0,0 +1,129 @@
+diff --git a/sql/anon.sql b/sql/anon.sql
+index 0cdc769..f6cc950 100644
+--- a/sql/anon.sql
+++ b/sql/anon.sql
+@@ -1141,3 +1141,8 @@ $$
+ -- TODO : https://en.wikipedia.org/wiki/L-diversity
+ 
+ -- TODO : https://en.wikipedia.org/wiki/T-closeness
+
+-- NEON Patches
+
+GRANT ALL ON SCHEMA anon to neon_superuser;
+GRANT ALL ON ALL TABLES IN SCHEMA anon TO neon_superuser;
+diff --git a/sql/init.sql b/sql/init.sql
+index 7da6553..9b6164b 100644
+--- a/sql/init.sql
+++ b/sql/init.sql
+@@ -74,50 +74,49 @@ $$
+ 
+ SECURITY LABEL FOR anon ON FUNCTION anon.load_csv IS 'UNTRUSTED';
+ 
+--- load fake data from a given path
+-CREATE OR REPLACE FUNCTION anon.init(
+-  datapath TEXT
+-)
+CREATE OR REPLACE FUNCTION anon.load_fake_data()
+ RETURNS BOOLEAN
+ AS $$
+ DECLARE
+-  datapath_check TEXT;
+   success BOOLEAN;
+  sharedir TEXT;
+  datapath TEXT;
+ BEGIN
+ 
+-  IF anon.is_initialized() THEN
+-    RAISE NOTICE 'The anon extension is already initialized.';
+-    RETURN TRUE;
+-  END IF;
+  datapath := '/extension/anon/';
+  -- find the local extension directory
+  SELECT setting INTO sharedir
+  FROM pg_catalog.pg_config
+  WHERE name = 'SHAREDIR';
+ 
+   SELECT bool_or(results) INTO success
+   FROM unnest(array[
+-    anon.load_csv('anon.identifiers_category',datapath||'/identifiers_category.csv'),
+-    anon.load_csv('anon.identifier',datapath ||'/identifier.csv'),
+-    anon.load_csv('anon.address',datapath ||'/address.csv'),
+-    anon.load_csv('anon.city',datapath ||'/city.csv'),
+-    anon.load_csv('anon.company',datapath ||'/company.csv'),
+-    anon.load_csv('anon.country',datapath ||'/country.csv'),
+-    anon.load_csv('anon.email', datapath ||'/email.csv'),
+-    anon.load_csv('anon.first_name',datapath ||'/first_name.csv'),
+-    anon.load_csv('anon.iban',datapath ||'/iban.csv'),
+-    anon.load_csv('anon.last_name',datapath ||'/last_name.csv'),
+-    anon.load_csv('anon.postcode',datapath ||'/postcode.csv'),
+-    anon.load_csv('anon.siret',datapath ||'/siret.csv'),
+-    anon.load_csv('anon.lorem_ipsum',datapath ||'/lorem_ipsum.csv')
+    anon.load_csv('anon.identifiers_category',sharedir || datapath || '/identifiers_category.csv'),
+    anon.load_csv('anon.identifier',sharedir || datapath || '/identifier.csv'),
+    anon.load_csv('anon.address',sharedir || datapath || '/address.csv'),
+    anon.load_csv('anon.city',sharedir || datapath || '/city.csv'),
+    anon.load_csv('anon.company',sharedir || datapath || '/company.csv'),
+    anon.load_csv('anon.country',sharedir || datapath || '/country.csv'),
+    anon.load_csv('anon.email', sharedir || datapath || '/email.csv'),
+    anon.load_csv('anon.first_name',sharedir || datapath || '/first_name.csv'),
+    anon.load_csv('anon.iban',sharedir || datapath || '/iban.csv'),
+    anon.load_csv('anon.last_name',sharedir || datapath || '/last_name.csv'),
+    anon.load_csv('anon.postcode',sharedir || datapath || '/postcode.csv'),
+    anon.load_csv('anon.siret',sharedir || datapath || '/siret.csv'),
+    anon.load_csv('anon.lorem_ipsum',sharedir || datapath || '/lorem_ipsum.csv')
+   ]) results;
+   RETURN success;
+-
+ END;
+ $$
+-  LANGUAGE PLPGSQL
+  LANGUAGE plpgsql
+   VOLATILE
+   RETURNS NULL ON NULL INPUT
+-  PARALLEL UNSAFE -- because load_csv is unsafe
+-  SECURITY INVOKER
+  PARALLEL UNSAFE -- because of the EXCEPTION
+  SECURITY DEFINER
+   SET search_path=''
+ ;
+-SECURITY LABEL FOR anon ON FUNCTION anon.init(TEXT) IS 'UNTRUSTED';
+
+SECURITY LABEL FOR anon ON FUNCTION anon.load_fake_data IS 'UNTRUSTED';
+ 
+ -- People tend to forget the anon.init() step
+ -- This is a friendly notice for them
+@@ -144,7 +143,7 @@ SECURITY LABEL FOR anon ON FUNCTION anon.notice_if_not_init IS 'UNTRUSTED';
+ CREATE OR REPLACE FUNCTION anon.load(TEXT)
+ RETURNS BOOLEAN AS
+ $$
+-  SELECT anon.init($1);
+  SELECT anon.init();
+ $$
+   LANGUAGE SQL
+   VOLATILE
+@@ -159,16 +158,16 @@ SECURITY LABEL FOR anon ON FUNCTION anon.load(TEXT) IS 'UNTRUSTED';
+ CREATE OR REPLACE FUNCTION anon.init()
+ RETURNS BOOLEAN
+ AS $$
+-  WITH conf AS (
+-        -- find the local extension directory
+-        SELECT setting AS sharedir
+-        FROM pg_catalog.pg_config
+-        WHERE name = 'SHAREDIR'
+-    )
+-  SELECT anon.init(conf.sharedir || '/extension/anon/')
+-  FROM conf;
+BEGIN
+  IF anon.is_initialized() THEN
+    RAISE NOTICE 'The anon extension is already initialized.';
+    RETURN TRUE;
+  END IF;
+
+  RETURN anon.load_fake_data();
+END;
+ $$
+-  LANGUAGE SQL
+  LANGUAGE plpgsql
+   VOLATILE
+   PARALLEL UNSAFE -- because init is unsafe
+   SECURITY INVOKER
--- a/compute/vm-image-spec-bookworm.yaml
+++ b/compute/vm-image-spec-bookworm.yaml
@@ -22,7 +22,7 @@ commands:
  - name: local_proxy
    user: postgres
    sysvInitAction: respawn
-    shell: 'RUST_LOG="info,proxy::serverless::sql_over_http=warn" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
+    shell: 'RUST_LOG="error" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
  - name: postgres-exporter
    user: nobody
    sysvInitAction: respawn
--- a/compute/vm-image-spec-bullseye.yaml
+++ b/compute/vm-image-spec-bullseye.yaml
@@ -22,7 +22,7 @@ commands:
  - name: local_proxy
    user: postgres
    sysvInitAction: respawn
-    shell: 'RUST_LOG="info,proxy::serverless::sql_over_http=warn" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
+    shell: 'RUST_LOG="error" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
  - name: postgres-exporter
    user: nobody
    sysvInitAction: respawn
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -168,6 +168,35 @@ pub fn write_postgres_conf(
        writeln!(file, "# Managed by compute_ctl: end")?;
    }

+    // Always add pgaudit to shared_preload_libraries.
+    //
+    // This is needed to handle the downgrade scenario.
+    // pgaudit extension creates event triggers that require library to be loaded.
+    // so, once extension was installed it must always be present in shared_preload_libraries.
+    let mut extra_shared_preload_libraries = String::new();
+
+    let libs = {
+        // We don't distribute pgaudit in the testing image,
+        // and don't pass shared_preload_libraries via spec,
+        // so disable this logic there.
+        #[cfg(feature = "testing")]
+        {
+            String::new()
+        }
+        #[cfg(not(feature = "testing"))]
+        {
+            spec.cluster
+                .settings
+                .find("shared_preload_libraries")
+                .expect("shared_preload_libraries setting is missing in the spec")
+        }
+    };
+
+    #[cfg(not(feature = "testing"))]
+    if !libs.contains("pgaudit") {
+        extra_shared_preload_libraries.push_str(",pgaudit");
+    };
+
    // If base audit logging is enabled, configure it.
    // In this setup, the audit log will be written to the standard postgresql log.
    //
@@ -177,29 +206,22 @@ pub fn write_postgres_conf(
    // This way we always override the settings from the spec
    // and don't allow the user or the control plane admin to change them.
    match spec.audit_log_level {
-        ComputeAudit::Disabled => {}
+        ComputeAudit::Disabled => {
+            // this is the default, but let's be explicit
+            writeln!(file, "pgaudit.log='none'")?;
+        }
        ComputeAudit::Log | ComputeAudit::Base => {
            writeln!(file, "# Managed by compute_ctl base audit settings: start")?;
            writeln!(file, "pgaudit.log='ddl,role'")?;
            // Disable logging of catalog queries to reduce the noise
            writeln!(file, "pgaudit.log_catalog=off")?;

-            if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
-                let mut extra_shared_preload_libraries = String::new();
-                if !libs.contains("pgaudit") {
-                    extra_shared_preload_libraries.push_str(",pgaudit");
-                }
-                writeln!(
-                    file,
-                    "shared_preload_libraries='{}{}'",
-                    libs, extra_shared_preload_libraries
-                )?;
-            } else {
-                // Typically, this should be unreacheable,
-                // because we always set at least some shared_preload_libraries in the spec
-                // but let's handle it explicitly anyway.
-                writeln!(file, "shared_preload_libraries='neon,pgaudit'")?;
-            }
+            writeln!(
+                file,
+                "shared_preload_libraries='{}{}'",
+                libs, extra_shared_preload_libraries
+            )?;
+
            writeln!(file, "# Managed by compute_ctl base audit settings: end")?;
        }
        ComputeAudit::Hipaa | ComputeAudit::Extended | ComputeAudit::Full => {
@@ -228,28 +250,15 @@ pub fn write_postgres_conf(
            // The caller who sets the flag is responsible for ensuring that the necessary
            // shared_preload_libraries are present in the compute image,
            // otherwise the compute start will fail.
-            if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
-                let mut extra_shared_preload_libraries = String::new();
-                if !libs.contains("pgaudit") {
-                    extra_shared_preload_libraries.push_str(",pgaudit");
-                }
-                if !libs.contains("pgauditlogtofile") {
-                    extra_shared_preload_libraries.push_str(",pgauditlogtofile");
-                }
-                writeln!(
-                    file,
-                    "shared_preload_libraries='{}{}'",
-                    libs, extra_shared_preload_libraries
-                )?;
-            } else {
-                // Typically, this should be unreacheable,
-                // because we always set at least some shared_preload_libraries in the spec
-                // but let's handle it explicitly anyway.
-                writeln!(
-                    file,
-                    "shared_preload_libraries='neon,pgaudit,pgauditlogtofile'"
-                )?;
+            if !libs.contains("pgauditlogtofile") {
+                extra_shared_preload_libraries.push_str(",pgauditlogtofile");
            }
+            writeln!(
+                file,
+                "shared_preload_libraries='{}{}'",
+                libs, extra_shared_preload_libraries
+            )?;
+
            writeln!(
                file,
                "# Managed by compute_ctl compliance audit settings: end"
--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -424,10 +424,10 @@ pub fn launch_monitor(compute: &Arc<ComputeNode>) -> thread::JoinHandle<()> {
        experimental,
    };

-    let span = span!(Level::INFO, "compute_monitor");
    thread::Builder::new()
        .name("compute-monitor".into())
        .spawn(move || {
+            let span = span!(Level::INFO, "compute_monitor");
            let _enter = span.enter();
            monitor.run();
        })
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -112,7 +112,7 @@ impl SafekeeperNode {
    }

    /// Initializes a safekeeper node by creating all necessary files,
-    /// e.g. SSL certificates.
+    /// e.g. SSL certificates and JWT token file.
    pub fn initialize(&self) -> anyhow::Result<()> {
        if self.env.generate_local_ssl_certs {
            self.env.generate_ssl_cert(
@@ -120,6 +120,17 @@ impl SafekeeperNode {
                &self.datadir_path().join("server.key"),
            )?;
        }
+
+        // Generate a token file for authentication with other safekeepers
+        if self.conf.auth_enabled {
+            let token = self
+                .env
+                .generate_auth_token(&Claims::new(None, Scope::SafekeeperData))?;
+
+            let token_path = self.datadir_path().join("peer_jwt_token");
+            std::fs::write(token_path, token)?;
+        }
+
        Ok(())
    }

@@ -218,14 +229,26 @@ impl SafekeeperNode {
            args.push(format!("--ssl-ca-file={}", ssl_ca_file.to_str().unwrap()));
        }

+        if self.conf.auth_enabled {
+            let token_path = self.datadir_path().join("peer_jwt_token");
+            let token_path_str = token_path
+                .to_str()
+                .with_context(|| {
+                    format!("Token path {token_path:?} cannot be represented as a unicode string")
+                })?
+                .to_owned();
+            args.extend(["--auth-token-path".to_owned(), token_path_str]);
+        }
+
        args.extend_from_slice(extra_opts);

+        let env_variables = Vec::new();
        background_process::start_process(
            &format!("safekeeper-{id}"),
            &datadir,
            &self.env.safekeeper_bin(),
            &args,
-            self.safekeeper_env_variables()?,
+            env_variables,
            background_process::InitialPidFile::Expect(self.pid_file()),
            retry_timeout,
            || async {
@@ -239,18 +262,6 @@ impl SafekeeperNode {
        .await
    }

-    fn safekeeper_env_variables(&self) -> anyhow::Result<Vec<(String, String)>> {
-        // Generate a token to connect from safekeeper to peers
-        if self.conf.auth_enabled {
-            let token = self
-                .env
-                .generate_auth_token(&Claims::new(None, Scope::SafekeeperData))?;
-            Ok(vec![("SAFEKEEPER_AUTH_TOKEN".to_owned(), token)])
-        } else {
-            Ok(Vec::new())
-        }
-    }
-
    ///
    /// Stop the server.
    ///
--- a/docker-compose/pageserver_config/pageserver.toml
+++ b/docker-compose/pageserver_config/pageserver.toml
@@ -3,3 +3,5 @@ pg_distrib_dir='/usr/local/'
 listen_pg_addr='0.0.0.0:6400'
 listen_http_addr='0.0.0.0:9898'
 remote_storage={ endpoint='http://minio:9000', bucket_name='neon', bucket_region='eu-north-1', prefix_in_bucket='/pageserver' }
+control_plane_api='http://0.0.0.0:6666' # No storage controller in docker compose, specify a junk address
+control_plane_emergency_mode=true
--- a/docs/consumption_metrics.md
+++ b/docs/consumption_metrics.md
@@ -38,11 +38,6 @@ Currently, the following metrics are collected:
 Amount of WAL produced , by a timeline, i.e. last_record_lsn
 This is an absolute, per-timeline metric.

- `resident_size`
-
-Size of all the layer files in the tenant's directory on disk on the pageserver.
-This is an absolute, per-tenant metric.
-
 - `remote_storage_size`

 Size of the remote storage (S3) directory.
--- a/libs/neonart/Cargo.toml
+++ b/libs/neonart/Cargo.toml
@@ -1,11 +0,0 @@
-[package]
-name = "neonart"
-version = "0.1.0"
-edition.workspace = true
-license.workspace = true
-
-[dependencies]
-tracing.workspace = true
-
-rand.workspace = true # for tests
-zerocopy = "0.8"
--- a/libs/neonart/src/algorithm.rs
+++ b/libs/neonart/src/algorithm.rs
@@ -1,377 +0,0 @@
-mod lock_and_version;
-mod node_ptr;
-mod node_ref;
-
-use std::vec::Vec;
-
-use crate::algorithm::lock_and_version::ResultOrRestart;
-use crate::algorithm::node_ptr::{MAX_PREFIX_LEN, NodePtr};
-use crate::algorithm::node_ref::ChildOrValue;
-use crate::algorithm::node_ref::{NodeRef, ReadLockedNodeRef, WriteLockedNodeRef};
-
-use crate::epoch::EpochPin;
-use crate::{Allocator, Key, Value};
-
-pub(crate) type RootPtr<V> = node_ptr::NodePtr<V>;
-
-pub fn new_root<V: Value>(allocator: &Allocator) -> RootPtr<V> {
-    node_ptr::new_root(allocator)
-}
-
-pub(crate) fn search<'e, K: Key, V: Value>(
-    key: &K,
-    root: RootPtr<V>,
-    epoch_pin: &'e EpochPin,
-) -> Option<V> {
-    loop {
-        let root_ref = NodeRef::from_root_ptr(root);
-        if let Ok(result) = lookup_recurse(key.as_bytes(), root_ref, None, epoch_pin) {
-            break result;
-        }
-        // retry
-    }
-}
-
-pub(crate) fn update_fn<'e, K: Key, V: Value, F>(
-    key: &K,
-    value_fn: F,
-    root: RootPtr<V>,
-    allocator: &Allocator,
-    epoch_pin: &'e EpochPin,
-) where
-    F: FnOnce(Option<&V>) -> Option<V>,
-{
-    let value_fn_cell = std::cell::Cell::new(Some(value_fn));
-    loop {
-        let root_ref = NodeRef::from_root_ptr(root);
-        let this_value_fn = |arg: Option<&V>| value_fn_cell.take().unwrap()(arg);
-        let key_bytes = key.as_bytes();
-        if let Ok(()) = update_recurse(
-            key_bytes,
-            this_value_fn,
-            root_ref,
-            None,
-            allocator,
-            epoch_pin,
-            0,
-            key_bytes,
-        ) {
-            break;
-        }
-        // retry
-    }
-}
-
-pub(crate) fn dump_tree<'e, V: Value + std::fmt::Debug>(root: RootPtr<V>, epoch_pin: &'e EpochPin) {
-    let root_ref = NodeRef::from_root_ptr(root);
-
-    let _ = dump_recurse(&[], root_ref, &epoch_pin, 0);
-}
-
-// Error means you must retry.
-//
-// This corresponds to the 'lookupOpt' function in the paper
-fn lookup_recurse<'e, V: Value>(
-    key: &[u8],
-    node: NodeRef<'e, V>,
-    parent: Option<ReadLockedNodeRef<V>>,
-    epoch_pin: &'e EpochPin,
-) -> ResultOrRestart<Option<V>> {
-    let rnode = node.read_lock_or_restart()?;
-    if let Some(parent) = parent {
-        parent.read_unlock_or_restart()?;
-    }
-
-    // check if prefix matches, may increment level
-    let prefix_len = if let Some(prefix_len) = rnode.prefix_matches(key) {
-        prefix_len
-    } else {
-        rnode.read_unlock_or_restart()?;
-        return Ok(None);
-    };
-    let key = &key[prefix_len..];
-
-    // find child (or leaf value)
-    let next_node = rnode.find_child_or_value_or_restart(key[0])?;
-
-    match next_node {
-        None => Ok(None), // key not found
-        Some(ChildOrValue::Value(vptr)) => {
-            // safety: It's OK to follow the pointer because we checked the version.
-            let v = unsafe { (*vptr).clone() };
-            Ok(Some(v))
-        }
-        Some(ChildOrValue::Child(v)) => lookup_recurse(&key[1..], v, Some(rnode), epoch_pin),
-    }
-}
-
-// This corresponds to the 'insertOpt' function in the paper
-pub(crate) fn update_recurse<'e, V: Value, F>(
-    key: &[u8],
-    value_fn: F,
-    node: NodeRef<'e, V>,
-    rparent: Option<(ReadLockedNodeRef<V>, u8)>,
-    allocator: &Allocator,
-    epoch_pin: &'e EpochPin,
-    level: usize,
-    orig_key: &[u8],
-) -> ResultOrRestart<()>
-where
-    F: FnOnce(Option<&V>) -> Option<V>,
-{
-    let rnode = node.read_lock_or_restart()?;
-
-    let prefix_match_len = rnode.prefix_matches(key);
-    if prefix_match_len.is_none() {
-        let (rparent, parent_key) = rparent.expect("direct children of the root have no prefix");
-        let mut wparent = rparent.upgrade_to_write_lock_or_restart()?;
-        let mut wnode = rnode.upgrade_to_write_lock_or_restart()?;
-
-        if let Some(new_value) = value_fn(None) {
-            insert_split_prefix(
-                key,
-                new_value,
-                &mut wnode,
-                &mut wparent,
-                parent_key,
-                allocator,
-            );
-        }
-        wnode.write_unlock();
-        wparent.write_unlock();
-        return Ok(());
-    }
-    let prefix_match_len = prefix_match_len.unwrap();
-    let key = &key[prefix_match_len as usize..];
-    let level = level + prefix_match_len as usize;
-
-    let next_node = rnode.find_child_or_value_or_restart(key[0])?;
-
-    if next_node.is_none() {
-        if rnode.is_full() {
-            let (rparent, parent_key) = rparent.expect("root node cannot become full");
-            let mut wparent = rparent.upgrade_to_write_lock_or_restart()?;
-            let wnode = rnode.upgrade_to_write_lock_or_restart()?;
-
-            if let Some(new_value) = value_fn(None) {
-                insert_and_grow(key, new_value, &wnode, &mut wparent, parent_key, allocator);
-                wnode.write_unlock_obsolete();
-                wparent.write_unlock();
-            } else {
-                wnode.write_unlock();
-                wparent.write_unlock();
-            }
-        } else {
-            let mut wnode = rnode.upgrade_to_write_lock_or_restart()?;
-            if let Some((rparent, _)) = rparent {
-                rparent.read_unlock_or_restart()?;
-            }
-            if let Some(new_value) = value_fn(None) {
-                insert_to_node(&mut wnode, key, new_value, allocator);
-            }
-            wnode.write_unlock();
-        }
-        return Ok(());
-    } else {
-        let next_node = next_node.unwrap(); // checked above it's not None
-        if let Some((rparent, _)) = rparent {
-            rparent.read_unlock_or_restart()?;
-        }
-
-        match next_node {
-            ChildOrValue::Value(existing_value_ptr) => {
-                assert!(key.len() == 1);
-                let wnode = rnode.upgrade_to_write_lock_or_restart()?;
-
-                // safety: Now that we have acquired the write lock, we have exclusive access to the
-                // value
-                let vmut = unsafe { existing_value_ptr.cast_mut().as_mut() }.unwrap();
-                if let Some(new_value) = value_fn(Some(vmut)) {
-                    *vmut = new_value;
-                } else {
-                    // TODO: Treat this as deletion?
-                }
-                wnode.write_unlock();
-
-                Ok(())
-            }
-            ChildOrValue::Child(next_child) => {
-                // recurse to next level
-                update_recurse(
-                    &key[1..],
-                    value_fn,
-                    next_child,
-                    Some((rnode, key[0])),
-                    allocator,
-                    epoch_pin,
-                    level + 1,
-                    orig_key,
-                )
-            }
-        }
-    }
-}
-
-#[derive(Clone)]
-enum PathElement {
-    Prefix(Vec<u8>),
-    KeyByte(u8),
-}
-
-impl std::fmt::Debug for PathElement {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        match self {
-            PathElement::Prefix(prefix) => write!(fmt, "{:?}", prefix),
-            PathElement::KeyByte(key_byte) => write!(fmt, "{}", key_byte),
-        }
-    }
-}
-
-fn dump_recurse<'e, V: Value + std::fmt::Debug>(
-    path: &[PathElement],
-    node: NodeRef<'e, V>,
-    epoch_pin: &'e EpochPin,
-    level: usize,
-) -> ResultOrRestart<()> {
-    let indent = str::repeat(" ", level);
-
-    let rnode = node.read_lock_or_restart()?;
-    let mut path = Vec::from(path);
-    let prefix = rnode.get_prefix();
-    if prefix.len() != 0 {
-        path.push(PathElement::Prefix(Vec::from(prefix)));
-    }
-
-    for key_byte in 0..u8::MAX {
-        match rnode.find_child_or_value_or_restart(key_byte)? {
-            None => continue,
-            Some(ChildOrValue::Child(child_ref)) => {
-                let rchild = child_ref.read_lock_or_restart()?;
-                eprintln!(
-                    "{} {:?}, {}: prefix {:?}",
-                    indent,
-                    &path,
-                    key_byte,
-                    rchild.get_prefix()
-                );
-
-                let mut child_path = path.clone();
-                child_path.push(PathElement::KeyByte(key_byte));
-
-                dump_recurse(&child_path, child_ref, epoch_pin, level + 1)?;
-            }
-            Some(ChildOrValue::Value(val)) => {
-                eprintln!("{} {:?}, {}: {:?}", indent, path, key_byte, unsafe {
-                    val.as_ref().unwrap()
-                });
-            }
-        }
-    }
-
-    Ok(())
-}
-
-///```text
-///        [fooba]r -> value
-///
-/// [foo]b -> [a]r  -> value
-///      e -> [ls]e -> value
-///```
-fn insert_split_prefix<'a, V: Value>(
-    key: &[u8],
-    value: V,
-    node: &mut WriteLockedNodeRef<V>,
-    parent: &mut WriteLockedNodeRef<V>,
-    parent_key: u8,
-    allocator: &Allocator,
-) {
-    let old_node = node;
-    let old_prefix = old_node.get_prefix();
-    let common_prefix_len = common_prefix(key, old_prefix);
-
-    // Allocate a node for the new value.
-    let new_value_node = allocate_node_for_value(&key[common_prefix_len + 1..], value, allocator);
-
-    // Allocate a new internal node with the common prefix
-    let mut prefix_node = node_ref::new_internal(&key[..common_prefix_len], allocator);
-
-    // Add the old node and the new nodes to the new internal node
-    prefix_node.insert_child(old_prefix[common_prefix_len], old_node.as_ptr());
-    prefix_node.insert_child(key[common_prefix_len], new_value_node);
-
-    // Modify the prefix of the old child in place
-    old_node.truncate_prefix(old_prefix.len() - common_prefix_len - 1);
-
-    // replace the pointer in the parent
-    parent.replace_child(parent_key, prefix_node.into_ptr());
-}
-
-fn insert_to_node<V: Value>(
-    wnode: &mut WriteLockedNodeRef<V>,
-    key: &[u8],
-    value: V,
-    allocator: &Allocator,
-) {
-    if wnode.is_leaf() {
-        wnode.insert_value(key[0], value);
-    } else {
-        let value_child = allocate_node_for_value(&key[1..], value, allocator);
-        wnode.insert_child(key[0], value_child);
-    }
-}
-
-// On entry: 'parent' and 'node' are locked
-fn insert_and_grow<V: Value>(
-    key: &[u8],
-    value: V,
-    wnode: &WriteLockedNodeRef<V>,
-    parent: &mut WriteLockedNodeRef<V>,
-    parent_key_byte: u8,
-    allocator: &Allocator,
-) {
-    let mut bigger_node = wnode.grow(allocator);
-
-    if wnode.is_leaf() {
-        bigger_node.insert_value(key[0], value);
-    } else {
-        let value_child = allocate_node_for_value(&key[1..], value, allocator);
-        bigger_node.insert_child(key[0], value_child);
-    }
-
-    // Replace the pointer in the parent
-    parent.replace_child(parent_key_byte, bigger_node.into_ptr());
-}
-
-// Allocate a new leaf node to hold 'value'. If key is long, we may need to allocate
-// new internal nodes to hold it too
-fn allocate_node_for_value<V: Value>(key: &[u8], value: V, allocator: &Allocator) -> NodePtr<V> {
-    let mut prefix_off = key.len().saturating_sub(MAX_PREFIX_LEN + 1);
-
-    let mut leaf_node = node_ref::new_leaf(&key[prefix_off..key.len() - 1], allocator);
-    leaf_node.insert_value(*key.last().unwrap(), value);
-
-    let mut node = leaf_node;
-    while prefix_off > 0 {
-        // Need another internal node
-        let remain_prefix = &key[0..prefix_off];
-
-        prefix_off = remain_prefix.len().saturating_sub(MAX_PREFIX_LEN + 1);
-        let mut internal_node = node_ref::new_internal(
-            &remain_prefix[prefix_off..remain_prefix.len() - 1],
-            allocator,
-        );
-        internal_node.insert_child(*remain_prefix.last().unwrap(), node.into_ptr());
-        node = internal_node;
-    }
-
-    node.into_ptr()
-}
-
-fn common_prefix(a: &[u8], b: &[u8]) -> usize {
-    for i in 0..MAX_PREFIX_LEN {
-        if a[i] != b[i] {
-            return i;
-        }
-    }
-    panic!("prefixes are equal");
-}
--- a/libs/neonart/src/algorithm/lock_and_version.rs
+++ b/libs/neonart/src/algorithm/lock_and_version.rs
@@ -1,85 +0,0 @@
-use std::sync::atomic::{AtomicU64, Ordering};
-
-pub(crate) struct AtomicLockAndVersion {
-    inner: AtomicU64,
-}
-
-impl AtomicLockAndVersion {
-    pub(crate) fn new() -> AtomicLockAndVersion {
-        AtomicLockAndVersion {
-            inner: AtomicU64::new(0),
-        }
-    }
-}
-
-pub(crate) type ResultOrRestart<T> = Result<T, ()>;
-
-const fn restart<T>() -> ResultOrRestart<T> {
-    Err(())
-}
-
-impl AtomicLockAndVersion {
-    pub(crate) fn read_lock_or_restart(&self) -> ResultOrRestart<u64> {
-        let version = self.await_node_unlocked();
-        if is_obsolete(version) {
-            return restart();
-        }
-        Ok(version)
-    }
-
-    pub(crate) fn check_or_restart(&self, version: u64) -> ResultOrRestart<()> {
-        self.read_unlock_or_restart(version)
-    }
-
-    pub(crate) fn read_unlock_or_restart(&self, version: u64) -> ResultOrRestart<()> {
-        if self.inner.load(Ordering::Acquire) != version {
-            return restart();
-        }
-        Ok(())
-    }
-
-    pub(crate) fn upgrade_to_write_lock_or_restart(&self, version: u64) -> ResultOrRestart<()> {
-        if self
-            .inner
-            .compare_exchange(
-                version,
-                set_locked_bit(version),
-                Ordering::Acquire,
-                Ordering::Relaxed,
-            )
-            .is_err()
-        {
-            return restart();
-        }
-        Ok(())
-    }
-
-    pub(crate) fn write_unlock(&self) {
-        // reset locked bit and overflow into version
-        self.inner.fetch_add(2, Ordering::Release);
-    }
-
-    pub(crate) fn write_unlock_obsolete(&self) {
-        // set obsolete, reset locked, overflow into version
-        self.inner.fetch_add(3, Ordering::Release);
-    }
-
-    // Helper functions
-    fn await_node_unlocked(&self) -> u64 {
-        let mut version = self.inner.load(Ordering::Acquire);
-        while (version & 2) == 2 {
-            // spinlock
-            std::thread::yield_now();
-            version = self.inner.load(Ordering::Acquire)
-        }
-        version
-    }
-}
-
-fn set_locked_bit(version: u64) -> u64 {
-    return version + 2;
-}
-
-fn is_obsolete(version: u64) -> bool {
-    return (version & 1) == 1;
-}
--- a/libs/neonart/src/algorithm/node_ptr.rs
+++ b/libs/neonart/src/algorithm/node_ptr.rs
@@ -1,983 +0,0 @@
-use std::marker::PhantomData;
-use std::ptr::NonNull;
-
-use super::lock_and_version::AtomicLockAndVersion;
-
-use crate::Allocator;
-use crate::Value;
-
-pub(crate) const MAX_PREFIX_LEN: usize = 8;
-
-enum NodeTag {
-    Internal4,
-    Internal16,
-    Internal48,
-    Internal256,
-    Leaf4,
-    Leaf16,
-    Leaf48,
-    Leaf256,
-}
-
-#[repr(C)]
-struct NodeBase {
-    tag: NodeTag,
-    lock_and_version: AtomicLockAndVersion,
-}
-
-pub(crate) struct NodePtr<V> {
-    ptr: *mut NodeBase,
-
-    phantom_value: PhantomData<V>,
-}
-
-impl<V> std::fmt::Debug for NodePtr<V> {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        write!(fmt, "0x{}", self.ptr.addr())
-    }
-}
-
-impl<V> Copy for NodePtr<V> {}
-impl<V> Clone for NodePtr<V> {
-    fn clone(&self) -> NodePtr<V> {
-        NodePtr {
-            ptr: self.ptr,
-            phantom_value: PhantomData,
-        }
-    }
-}
-
-enum NodeVariant<'a, V> {
-    Internal4(&'a NodeInternal4<V>),
-    Internal16(&'a NodeInternal16<V>),
-    Internal48(&'a NodeInternal48<V>),
-    Internal256(&'a NodeInternal256<V>),
-    Leaf4(&'a NodeLeaf4<V>),
-    Leaf16(&'a NodeLeaf16<V>),
-    Leaf48(&'a NodeLeaf48<V>),
-    Leaf256(&'a NodeLeaf256<V>),
-}
-
-enum NodeVariantMut<'a, V> {
-    Internal4(&'a mut NodeInternal4<V>),
-    Internal16(&'a mut NodeInternal16<V>),
-    Internal48(&'a mut NodeInternal48<V>),
-    Internal256(&'a mut NodeInternal256<V>),
-    Leaf4(&'a mut NodeLeaf4<V>),
-    Leaf16(&'a mut NodeLeaf16<V>),
-    Leaf48(&'a mut NodeLeaf48<V>),
-    Leaf256(&'a mut NodeLeaf256<V>),
-}
-
-pub(crate) enum ChildOrValuePtr<V> {
-    Child(NodePtr<V>),
-    Value(*const V),
-}
-
-#[repr(C)]
-struct NodeInternal4<V> {
-    tag: NodeTag,
-    lock_and_version: AtomicLockAndVersion,
-
-    prefix: [u8; MAX_PREFIX_LEN],
-    prefix_len: u8,
-    num_children: u8,
-
-    child_keys: [u8; 4],
-    child_ptrs: [NodePtr<V>; 4],
-}
-
-#[repr(C)]
-struct NodeInternal16<V> {
-    tag: NodeTag,
-    lock_and_version: AtomicLockAndVersion,
-
-    prefix: [u8; MAX_PREFIX_LEN],
-    prefix_len: u8,
-
-    num_children: u8,
-    child_keys: [u8; 16],
-    child_ptrs: [NodePtr<V>; 16],
-}
-
-const INVALID_CHILD_INDEX: u8 = u8::MAX;
-
-#[repr(C)]
-struct NodeInternal48<V> {
-    tag: NodeTag,
-    lock_and_version: AtomicLockAndVersion,
-
-    prefix: [u8; MAX_PREFIX_LEN],
-    prefix_len: u8,
-
-    num_children: u8,
-    child_indexes: [u8; 256],
-    child_ptrs: [NodePtr<V>; 48],
-}
-
-#[repr(C)]
-pub(crate) struct NodeInternal256<V> {
-    tag: NodeTag,
-    lock_and_version: AtomicLockAndVersion,
-
-    prefix: [u8; MAX_PREFIX_LEN],
-    prefix_len: u8,
-
-    num_children: u16,
-    child_ptrs: [NodePtr<V>; 256],
-}
-
-#[repr(C)]
-struct NodeLeaf4<V> {
-    tag: NodeTag,
-    lock_and_version: AtomicLockAndVersion,
-
-    prefix: [u8; MAX_PREFIX_LEN],
-    prefix_len: u8,
-
-    num_values: u8,
-    child_keys: [u8; 4],
-    child_values: [Option<V>; 4],
-}
-
-#[repr(C)]
-struct NodeLeaf16<V> {
-    tag: NodeTag,
-    lock_and_version: AtomicLockAndVersion,
-
-    prefix: [u8; MAX_PREFIX_LEN],
-    prefix_len: u8,
-
-    num_values: u8,
-    child_keys: [u8; 16],
-    child_values: [Option<V>; 16],
-}
-
-#[repr(C)]
-struct NodeLeaf48<V> {
-    tag: NodeTag,
-    lock_and_version: AtomicLockAndVersion,
-
-    prefix: [u8; MAX_PREFIX_LEN],
-    prefix_len: u8,
-
-    num_values: u8,
-    child_indexes: [u8; 256],
-    child_values: [Option<V>; 48],
-}
-
-#[repr(C)]
-struct NodeLeaf256<V> {
-    tag: NodeTag,
-    lock_and_version: AtomicLockAndVersion,
-
-    prefix: [u8; MAX_PREFIX_LEN],
-    prefix_len: u8,
-
-    num_values: u16,
-    child_values: [Option<V>; 256],
-}
-
-impl<V> NodePtr<V> {
-    pub(crate) fn is_leaf(&self) -> bool {
-        match self.variant() {
-            NodeVariant::Internal4(_) => false,
-            NodeVariant::Internal16(_) => false,
-            NodeVariant::Internal48(_) => false,
-            NodeVariant::Internal256(_) => false,
-            NodeVariant::Leaf4(_) => true,
-            NodeVariant::Leaf16(_) => true,
-            NodeVariant::Leaf48(_) => true,
-            NodeVariant::Leaf256(_) => true,
-        }
-    }
-
-    pub(crate) fn lockword(&self) -> &AtomicLockAndVersion {
-        match self.variant() {
-            NodeVariant::Internal4(n) => &n.lock_and_version,
-            NodeVariant::Internal16(n) => &n.lock_and_version,
-            NodeVariant::Internal48(n) => &n.lock_and_version,
-            NodeVariant::Internal256(n) => &n.lock_and_version,
-            NodeVariant::Leaf4(n) => &n.lock_and_version,
-            NodeVariant::Leaf16(n) => &n.lock_and_version,
-            NodeVariant::Leaf48(n) => &n.lock_and_version,
-            NodeVariant::Leaf256(n) => &n.lock_and_version,
-        }
-    }
-
-    pub(crate) fn is_null(&self) -> bool {
-        self.ptr.is_null()
-    }
-
-    pub(crate) const fn null() -> NodePtr<V> {
-        NodePtr {
-            ptr: std::ptr::null_mut(),
-            phantom_value: PhantomData,
-        }
-    }
-
-    fn variant(&self) -> NodeVariant<V> {
-        unsafe {
-            match (*self.ptr).tag {
-                NodeTag::Internal4 => NodeVariant::Internal4(
-                    NonNull::new_unchecked(self.ptr.cast::<NodeInternal4<V>>()).as_ref(),
-                ),
-                NodeTag::Internal16 => NodeVariant::Internal16(
-                    NonNull::new_unchecked(self.ptr.cast::<NodeInternal16<V>>()).as_ref(),
-                ),
-                NodeTag::Internal48 => NodeVariant::Internal48(
-                    NonNull::new_unchecked(self.ptr.cast::<NodeInternal48<V>>()).as_ref(),
-                ),
-                NodeTag::Internal256 => NodeVariant::Internal256(
-                    NonNull::new_unchecked(self.ptr.cast::<NodeInternal256<V>>()).as_ref(),
-                ),
-                NodeTag::Leaf4 => NodeVariant::Leaf4(
-                    NonNull::new_unchecked(self.ptr.cast::<NodeLeaf4<V>>()).as_ref(),
-                ),
-                NodeTag::Leaf16 => NodeVariant::Leaf16(
-                    NonNull::new_unchecked(self.ptr.cast::<NodeLeaf16<V>>()).as_ref(),
-                ),
-                NodeTag::Leaf48 => NodeVariant::Leaf48(
-                    NonNull::new_unchecked(self.ptr.cast::<NodeLeaf48<V>>()).as_ref(),
-                ),
-                NodeTag::Leaf256 => NodeVariant::Leaf256(
-                    NonNull::new_unchecked(self.ptr.cast::<NodeLeaf256<V>>()).as_ref(),
-                ),
-            }
-        }
-    }
-
-    fn variant_mut(&mut self) -> NodeVariantMut<V> {
-        unsafe {
-            match (*self.ptr).tag {
-                NodeTag::Internal4 => NodeVariantMut::Internal4(
-                    NonNull::new_unchecked(self.ptr.cast::<NodeInternal4<V>>()).as_mut(),
-                ),
-                NodeTag::Internal16 => NodeVariantMut::Internal16(
-                    NonNull::new_unchecked(self.ptr.cast::<NodeInternal16<V>>()).as_mut(),
-                ),
-                NodeTag::Internal48 => NodeVariantMut::Internal48(
-                    NonNull::new_unchecked(self.ptr.cast::<NodeInternal48<V>>()).as_mut(),
-                ),
-                NodeTag::Internal256 => NodeVariantMut::Internal256(
-                    NonNull::new_unchecked(self.ptr.cast::<NodeInternal256<V>>()).as_mut(),
-                ),
-                NodeTag::Leaf4 => NodeVariantMut::Leaf4(
-                    NonNull::new_unchecked(self.ptr.cast::<NodeLeaf4<V>>()).as_mut(),
-                ),
-                NodeTag::Leaf16 => NodeVariantMut::Leaf16(
-                    NonNull::new_unchecked(self.ptr.cast::<NodeLeaf16<V>>()).as_mut(),
-                ),
-                NodeTag::Leaf48 => NodeVariantMut::Leaf48(
-                    NonNull::new_unchecked(self.ptr.cast::<NodeLeaf48<V>>()).as_mut(),
-                ),
-                NodeTag::Leaf256 => NodeVariantMut::Leaf256(
-                    NonNull::new_unchecked(self.ptr.cast::<NodeLeaf256<V>>()).as_mut(),
-                ),
-            }
-        }
-    }
-}
-
-impl<V: Value> NodePtr<V> {
-    pub(crate) fn prefix_matches(&self, key: &[u8]) -> Option<usize> {
-        let node_prefix = self.get_prefix();
-        assert!(node_prefix.len() <= key.len()); // because we only use fixed-size keys
-        if &key[0..node_prefix.len()] != node_prefix {
-            None
-        } else {
-            Some(node_prefix.len())
-        }
-    }
-
-    pub(crate) fn get_prefix(&self) -> &[u8] {
-        match self.variant() {
-            NodeVariant::Internal4(n) => n.get_prefix(),
-            NodeVariant::Internal16(n) => n.get_prefix(),
-            NodeVariant::Internal48(n) => n.get_prefix(),
-            NodeVariant::Internal256(n) => n.get_prefix(),
-            NodeVariant::Leaf4(n) => n.get_prefix(),
-            NodeVariant::Leaf16(n) => n.get_prefix(),
-            NodeVariant::Leaf48(n) => n.get_prefix(),
-            NodeVariant::Leaf256(n) => n.get_prefix(),
-        }
-    }
-
-    pub(crate) fn is_full(&self) -> bool {
-        match self.variant() {
-            NodeVariant::Internal4(n) => n.is_full(),
-            NodeVariant::Internal16(n) => n.is_full(),
-            NodeVariant::Internal48(n) => n.is_full(),
-            NodeVariant::Internal256(n) => n.is_full(),
-            NodeVariant::Leaf4(n) => n.is_full(),
-            NodeVariant::Leaf16(n) => n.is_full(),
-            NodeVariant::Leaf48(n) => n.is_full(),
-            NodeVariant::Leaf256(n) => n.is_full(),
-        }
-    }
-
-    pub(crate) fn find_child_or_value(&self, key_byte: u8) -> Option<ChildOrValuePtr<V>> {
-        match self.variant() {
-            NodeVariant::Internal4(n) => n.find_child(key_byte).map(|c| ChildOrValuePtr::Child(c)),
-            NodeVariant::Internal16(n) => n.find_child(key_byte).map(|c| ChildOrValuePtr::Child(c)),
-            NodeVariant::Internal48(n) => n.find_child(key_byte).map(|c| ChildOrValuePtr::Child(c)),
-            NodeVariant::Internal256(n) => {
-                n.find_child(key_byte).map(|c| ChildOrValuePtr::Child(c))
-            }
-            NodeVariant::Leaf4(n) => n
-                .get_leaf_value(key_byte)
-                .map(|v| ChildOrValuePtr::Value(v)),
-            NodeVariant::Leaf16(n) => n
-                .get_leaf_value(key_byte)
-                .map(|v| ChildOrValuePtr::Value(v)),
-            NodeVariant::Leaf48(n) => n
-                .get_leaf_value(key_byte)
-                .map(|v| ChildOrValuePtr::Value(v)),
-            NodeVariant::Leaf256(n) => n
-                .get_leaf_value(key_byte)
-                .map(|v| ChildOrValuePtr::Value(v)),
-        }
-    }
-
-    pub(crate) fn truncate_prefix(&mut self, new_prefix_len: usize) {
-        match self.variant_mut() {
-            NodeVariantMut::Internal4(n) => n.truncate_prefix(new_prefix_len),
-            NodeVariantMut::Internal16(n) => n.truncate_prefix(new_prefix_len),
-            NodeVariantMut::Internal48(n) => n.truncate_prefix(new_prefix_len),
-            NodeVariantMut::Internal256(n) => n.truncate_prefix(new_prefix_len),
-            NodeVariantMut::Leaf4(n) => n.truncate_prefix(new_prefix_len),
-            NodeVariantMut::Leaf16(n) => n.truncate_prefix(new_prefix_len),
-            NodeVariantMut::Leaf48(n) => n.truncate_prefix(new_prefix_len),
-            NodeVariantMut::Leaf256(n) => n.truncate_prefix(new_prefix_len),
-        }
-    }
-
-    pub(crate) fn grow(&self, allocator: &Allocator) -> NodePtr<V> {
-        match self.variant() {
-            NodeVariant::Internal4(n) => n.grow(allocator),
-            NodeVariant::Internal16(n) => n.grow(allocator),
-            NodeVariant::Internal48(n) => n.grow(allocator),
-            NodeVariant::Internal256(_) => panic!("cannot grow Internal256 node"),
-            NodeVariant::Leaf4(n) => n.grow(allocator),
-            NodeVariant::Leaf16(n) => n.grow(allocator),
-            NodeVariant::Leaf48(n) => n.grow(allocator),
-            NodeVariant::Leaf256(_) => panic!("cannot grow Leaf256 node"),
-        }
-    }
-
-    pub(crate) fn insert_child(&mut self, key_byte: u8, child: NodePtr<V>) {
-        match self.variant_mut() {
-            NodeVariantMut::Internal4(n) => n.insert_child(key_byte, child),
-            NodeVariantMut::Internal16(n) => n.insert_child(key_byte, child),
-            NodeVariantMut::Internal48(n) => n.insert_child(key_byte, child),
-            NodeVariantMut::Internal256(n) => n.insert_child(key_byte, child),
-            NodeVariantMut::Leaf4(_)
-            | NodeVariantMut::Leaf16(_)
-            | NodeVariantMut::Leaf48(_)
-            | NodeVariantMut::Leaf256(_) => panic!("insert_child called on leaf node"),
-        }
-    }
-
-    pub(crate) fn replace_child(&mut self, key_byte: u8, replacement: NodePtr<V>) {
-        match self.variant_mut() {
-            NodeVariantMut::Internal4(n) => n.replace_child(key_byte, replacement),
-            NodeVariantMut::Internal16(n) => n.replace_child(key_byte, replacement),
-            NodeVariantMut::Internal48(n) => n.replace_child(key_byte, replacement),
-            NodeVariantMut::Internal256(n) => n.replace_child(key_byte, replacement),
-            NodeVariantMut::Leaf4(_)
-            | NodeVariantMut::Leaf16(_)
-            | NodeVariantMut::Leaf48(_)
-            | NodeVariantMut::Leaf256(_) => panic!("replace_child called on leaf node"),
-        }
-    }
-
-    pub(crate) fn insert_value(&mut self, key_byte: u8, value: V) {
-        match self.variant_mut() {
-            NodeVariantMut::Internal4(_)
-            | NodeVariantMut::Internal16(_)
-            | NodeVariantMut::Internal48(_)
-            | NodeVariantMut::Internal256(_) => panic!("insert_value called on internal node"),
-            NodeVariantMut::Leaf4(n) => n.insert_value(key_byte, value),
-            NodeVariantMut::Leaf16(n) => n.insert_value(key_byte, value),
-            NodeVariantMut::Leaf48(n) => n.insert_value(key_byte, value),
-            NodeVariantMut::Leaf256(n) => n.insert_value(key_byte, value),
-        }
-    }
-}
-
-pub fn new_root<V: Value>(allocator: &Allocator) -> NodePtr<V> {
-    NodePtr {
-        ptr: allocator.alloc(NodeInternal256::<V>::new()).as_ptr().cast(),
-        phantom_value: PhantomData,
-    }
-}
-
-pub fn new_internal<V: Value>(prefix: &[u8], allocator: &Allocator) -> NodePtr<V> {
-    let mut node = allocator.alloc(NodeInternal4 {
-        tag: NodeTag::Internal4,
-        lock_and_version: AtomicLockAndVersion::new(),
-
-        prefix: [8; MAX_PREFIX_LEN],
-        prefix_len: prefix.len() as u8,
-        num_children: 0,
-
-        child_keys: [0; 4],
-        child_ptrs: [const { NodePtr::null() }; 4],
-    });
-    node.prefix[0..prefix.len()].copy_from_slice(prefix);
-
-    node.as_ptr().into()
-}
-
-pub fn new_leaf<V: Value>(prefix: &[u8], allocator: &Allocator) -> NodePtr<V> {
-    let mut node = allocator.alloc(NodeLeaf4 {
-        tag: NodeTag::Leaf4,
-        lock_and_version: AtomicLockAndVersion::new(),
-
-        prefix: [8; MAX_PREFIX_LEN],
-        prefix_len: prefix.len() as u8,
-        num_values: 0,
-
-        child_keys: [0; 4],
-        child_values: [const { None }; 4],
-    });
-    node.prefix[0..prefix.len()].copy_from_slice(prefix);
-
-    node.as_ptr().into()
-}
-
-impl<V: Value> NodeInternal4<V> {
-    fn get_prefix(&self) -> &[u8] {
-        &self.prefix[0..self.prefix_len as usize]
-    }
-
-    fn truncate_prefix(&mut self, new_prefix_len: usize) {
-        assert!(new_prefix_len < self.prefix_len as usize);
-        let prefix = &mut self.prefix;
-        let offset = self.prefix_len as usize - new_prefix_len;
-        for i in 0..new_prefix_len {
-            prefix[i] = prefix[i + offset];
-        }
-        self.prefix_len = new_prefix_len as u8;
-    }
-
-    fn find_child(&self, key: u8) -> Option<NodePtr<V>> {
-        for i in 0..self.num_children as usize {
-            if self.child_keys[i] == key {
-                return Some(self.child_ptrs[i]);
-            }
-        }
-        None
-    }
-
-    fn replace_child(&mut self, key_byte: u8, replacement: NodePtr<V>) {
-        for i in 0..self.num_children as usize {
-            if self.child_keys[i] == key_byte {
-                self.child_ptrs[i] = replacement;
-                return;
-            }
-        }
-        panic!("could not re-find parent with key {}", key_byte);
-    }
-
-    fn is_full(&self) -> bool {
-        self.num_children == 4
-    }
-
-    fn insert_child(&mut self, key_byte: u8, child: NodePtr<V>) {
-        assert!(self.num_children < 4);
-
-        let idx = self.num_children as usize;
-        self.child_keys[idx] = key_byte;
-        self.child_ptrs[idx] = child;
-        self.num_children += 1;
-    }
-
-    fn grow(&self, allocator: &Allocator) -> NodePtr<V> {
-        let mut node16 = allocator.alloc(NodeInternal16 {
-            tag: NodeTag::Internal16,
-            lock_and_version: AtomicLockAndVersion::new(),
-
-            prefix: self.prefix.clone(),
-            prefix_len: self.prefix_len,
-            num_children: self.num_children,
-
-            child_keys: [0; 16],
-            child_ptrs: [const { NodePtr::null() }; 16],
-        });
-        for i in 0..self.num_children as usize {
-            node16.child_keys[i] = self.child_keys[i];
-            node16.child_ptrs[i] = self.child_ptrs[i];
-        }
-
-        node16.as_ptr().into()
-    }
-}
-
-impl<V: Value> NodeInternal16<V> {
-    fn get_prefix(&self) -> &[u8] {
-        &self.prefix[0..self.prefix_len as usize]
-    }
-
-    fn truncate_prefix(&mut self, new_prefix_len: usize) {
-        assert!(new_prefix_len < self.prefix_len as usize);
-        let prefix = &mut self.prefix;
-        let offset = self.prefix_len as usize - new_prefix_len;
-        for i in 0..new_prefix_len {
-            prefix[i] = prefix[i + offset];
-        }
-        self.prefix_len = new_prefix_len as u8;
-    }
-
-    fn find_child(&self, key_byte: u8) -> Option<NodePtr<V>> {
-        for i in 0..self.num_children as usize {
-            if self.child_keys[i] == key_byte {
-                return Some(self.child_ptrs[i]);
-            }
-        }
-        None
-    }
-
-    fn replace_child(&mut self, key_byte: u8, replacement: NodePtr<V>) {
-        for i in 0..self.num_children as usize {
-            if self.child_keys[i] == key_byte {
-                self.child_ptrs[i] = replacement;
-                return;
-            }
-        }
-        panic!("could not re-find parent with key {}", key_byte);
-    }
-
-    fn is_full(&self) -> bool {
-        self.num_children == 16
-    }
-
-    fn insert_child(&mut self, key_byte: u8, child: NodePtr<V>) {
-        assert!(self.num_children < 16);
-
-        let idx = self.num_children as usize;
-        self.child_keys[idx] = key_byte;
-        self.child_ptrs[idx] = child;
-        self.num_children += 1;
-    }
-
-    fn grow(&self, allocator: &Allocator) -> NodePtr<V> {
-        let mut node48 = allocator.alloc(NodeInternal48 {
-            tag: NodeTag::Internal48,
-            lock_and_version: AtomicLockAndVersion::new(),
-
-            prefix: self.prefix.clone(),
-            prefix_len: self.prefix_len,
-            num_children: self.num_children,
-
-            child_indexes: [INVALID_CHILD_INDEX; 256],
-            child_ptrs: [const { NodePtr::null() }; 48],
-        });
-        for i in 0..self.num_children as usize {
-            let idx = self.child_keys[i] as usize;
-            node48.child_indexes[idx] = i as u8;
-            node48.child_ptrs[i] = self.child_ptrs[i];
-        }
-
-        node48.as_ptr().into()
-    }
-}
-
-impl<V: Value> NodeInternal48<V> {
-    fn get_prefix(&self) -> &[u8] {
-        &self.prefix[0..self.prefix_len as usize]
-    }
-
-    fn truncate_prefix(&mut self, new_prefix_len: usize) {
-        assert!(new_prefix_len < self.prefix_len as usize);
-        let prefix = &mut self.prefix;
-        let offset = self.prefix_len as usize - new_prefix_len;
-        for i in 0..new_prefix_len {
-            prefix[i] = prefix[i + offset];
-        }
-        self.prefix_len = new_prefix_len as u8;
-    }
-
-    fn find_child(&self, key_byte: u8) -> Option<NodePtr<V>> {
-        let idx = self.child_indexes[key_byte as usize];
-        if idx != INVALID_CHILD_INDEX {
-            Some(self.child_ptrs[idx as usize])
-        } else {
-            None
-        }
-    }
-
-    fn replace_child(&mut self, key_byte: u8, replacement: NodePtr<V>) {
-        let idx = self.child_indexes[key_byte as usize];
-        if idx != INVALID_CHILD_INDEX {
-            self.child_ptrs[idx as usize] = replacement
-        } else {
-            panic!("could not re-find parent with key {}", key_byte);
-        }
-    }
-
-    fn is_full(&self) -> bool {
-        self.num_children == 48
-    }
-
-    fn insert_child(&mut self, key_byte: u8, child: NodePtr<V>) {
-        assert!(self.num_children < 48);
-        assert!(self.child_indexes[key_byte as usize] == INVALID_CHILD_INDEX);
-        let idx = self.num_children;
-        self.child_indexes[key_byte as usize] = idx;
-        self.child_ptrs[idx as usize] = child;
-        self.num_children += 1;
-    }
-
-    fn grow(&self, allocator: &Allocator) -> NodePtr<V> {
-        let mut node256 = allocator.alloc(NodeInternal256 {
-            tag: NodeTag::Internal256,
-            lock_and_version: AtomicLockAndVersion::new(),
-
-            prefix: self.prefix.clone(),
-            prefix_len: self.prefix_len,
-            num_children: self.num_children as u16,
-
-            child_ptrs: [const { NodePtr::null() }; 256],
-        });
-        for i in 0..256 {
-            let idx = self.child_indexes[i];
-            if idx != INVALID_CHILD_INDEX {
-                node256.child_ptrs[i] = self.child_ptrs[idx as usize];
-            }
-        }
-        node256.as_ptr().into()
-    }
-}
-
-impl<V: Value> NodeInternal256<V> {
-    fn get_prefix(&self) -> &[u8] {
-        &self.prefix[0..self.prefix_len as usize]
-    }
-
-    fn truncate_prefix(&mut self, new_prefix_len: usize) {
-        assert!(new_prefix_len < self.prefix_len as usize);
-        let prefix = &mut self.prefix;
-        let offset = self.prefix_len as usize - new_prefix_len;
-        for i in 0..new_prefix_len {
-            prefix[i] = prefix[i + offset];
-        }
-        self.prefix_len = new_prefix_len as u8;
-    }
-
-    fn find_child(&self, key_byte: u8) -> Option<NodePtr<V>> {
-        let idx = key_byte as usize;
-        if !self.child_ptrs[idx].is_null() {
-            Some(self.child_ptrs[idx])
-        } else {
-            None
-        }
-    }
-
-    fn replace_child(&mut self, key_byte: u8, replacement: NodePtr<V>) {
-        let idx = key_byte as usize;
-        if !self.child_ptrs[idx].is_null() {
-            self.child_ptrs[idx] = replacement
-        } else {
-            panic!("could not re-find parent with key {}", key_byte);
-        }
-    }
-
-    fn is_full(&self) -> bool {
-        self.num_children == 256
-    }
-
-    fn insert_child(&mut self, key_byte: u8, child: NodePtr<V>) {
-        assert!(self.num_children < 256);
-        assert!(self.child_ptrs[key_byte as usize].is_null());
-        self.child_ptrs[key_byte as usize] = child;
-        self.num_children += 1;
-    }
-}
-
-impl<V: Value> NodeLeaf4<V> {
-    fn get_prefix(&self) -> &[u8] {
-        &self.prefix[0..self.prefix_len as usize]
-    }
-
-    fn truncate_prefix(&mut self, new_prefix_len: usize) {
-        assert!(new_prefix_len < self.prefix_len as usize);
-        let prefix = &mut self.prefix;
-        let offset = self.prefix_len as usize - new_prefix_len;
-        for i in 0..new_prefix_len {
-            prefix[i] = prefix[i + offset];
-        }
-        self.prefix_len = new_prefix_len as u8;
-    }
-
-    fn get_leaf_value<'a: 'b, 'b>(&'a self, key: u8) -> Option<&'b V> {
-        for i in 0..self.num_values {
-            if self.child_keys[i as usize] == key {
-                assert!(self.child_values[i as usize].is_some());
-                return self.child_values[i as usize].as_ref();
-            }
-        }
-        None
-    }
-    fn is_full(&self) -> bool {
-        self.num_values == 4
-    }
-
-    fn insert_value(&mut self, key_byte: u8, value: V) {
-        assert!(self.num_values < 16);
-
-        let idx = self.num_values as usize;
-        self.child_keys[idx] = key_byte;
-        self.child_values[idx] = Some(value);
-        self.num_values += 1;
-    }
-
-    fn grow(&self, allocator: &Allocator) -> NodePtr<V> {
-        let mut node16 = allocator.alloc(NodeLeaf16 {
-            tag: NodeTag::Leaf16,
-            lock_and_version: AtomicLockAndVersion::new(),
-
-            prefix: self.prefix.clone(),
-            prefix_len: self.prefix_len,
-            num_values: self.num_values,
-
-            child_keys: [0; 16],
-            child_values: [const { None }; 16],
-        });
-        for i in 0..self.num_values as usize {
-            node16.child_keys[i] = self.child_keys[i];
-            node16.child_values[i] = self.child_values[i].clone();
-        }
-        node16.as_ptr().into()
-    }
-}
-
-impl<V: Value> NodeLeaf16<V> {
-    fn get_prefix(&self) -> &[u8] {
-        &self.prefix[0..self.prefix_len as usize]
-    }
-
-    fn truncate_prefix(&mut self, new_prefix_len: usize) {
-        assert!(new_prefix_len < self.prefix_len as usize);
-        let prefix = &mut self.prefix;
-        let offset = self.prefix_len as usize - new_prefix_len;
-        for i in 0..new_prefix_len {
-            prefix[i] = prefix[i + offset];
-        }
-        self.prefix_len = new_prefix_len as u8;
-    }
-
-    fn get_leaf_value(&self, key: u8) -> Option<&V> {
-        for i in 0..self.num_values {
-            if self.child_keys[i as usize] == key {
-                assert!(self.child_values[i as usize].is_some());
-                return self.child_values[i as usize].as_ref();
-            }
-        }
-        None
-    }
-    fn is_full(&self) -> bool {
-        self.num_values == 16
-    }
-
-    fn insert_value(&mut self, key_byte: u8, value: V) {
-        assert!(self.num_values < 16);
-
-        let idx = self.num_values as usize;
-        self.child_keys[idx] = key_byte;
-        self.child_values[idx] = Some(value);
-        self.num_values += 1;
-    }
-    fn grow(&self, allocator: &Allocator) -> NodePtr<V> {
-        let mut node48 = allocator.alloc(NodeLeaf48 {
-            tag: NodeTag::Leaf48,
-            lock_and_version: AtomicLockAndVersion::new(),
-
-            prefix: self.prefix.clone(),
-            prefix_len: self.prefix_len,
-            num_values: self.num_values,
-
-            child_indexes: [INVALID_CHILD_INDEX; 256],
-            child_values: [const { None }; 48],
-        });
-        for i in 0..self.num_values {
-            let idx = self.child_keys[i as usize];
-            node48.child_indexes[idx as usize] = i;
-            node48.child_values[i as usize] = self.child_values[i as usize].clone();
-        }
-        node48.as_ptr().into()
-    }
-}
-
-impl<V: Value> NodeLeaf48<V> {
-    fn get_prefix(&self) -> &[u8] {
-        &self.prefix[0..self.prefix_len as usize]
-    }
-
-    fn truncate_prefix(&mut self, new_prefix_len: usize) {
-        assert!(new_prefix_len < self.prefix_len as usize);
-        let prefix = &mut self.prefix;
-        let offset = self.prefix_len as usize - new_prefix_len;
-        for i in 0..new_prefix_len {
-            prefix[i] = prefix[i + offset];
-        }
-        self.prefix_len = new_prefix_len as u8;
-    }
-
-    fn get_leaf_value(&self, key: u8) -> Option<&V> {
-        let idx = self.child_indexes[key as usize];
-        if idx != INVALID_CHILD_INDEX {
-            assert!(self.child_values[idx as usize].is_some());
-            self.child_values[idx as usize].as_ref()
-        } else {
-            None
-        }
-    }
-    fn is_full(&self) -> bool {
-        self.num_values == 48
-    }
-
-    fn insert_value(&mut self, key_byte: u8, value: V) {
-        assert!(self.num_values < 48);
-        assert!(self.child_indexes[key_byte as usize] == INVALID_CHILD_INDEX);
-        let idx = self.num_values;
-        self.child_indexes[key_byte as usize] = idx;
-        self.child_values[idx as usize] = Some(value);
-        self.num_values += 1;
-    }
-    fn grow(&self, allocator: &Allocator) -> NodePtr<V> {
-        let mut node256 = allocator.alloc(NodeLeaf256 {
-            tag: NodeTag::Leaf256,
-            lock_and_version: AtomicLockAndVersion::new(),
-
-            prefix: self.prefix.clone(),
-            prefix_len: self.prefix_len,
-            num_values: self.num_values as u16,
-
-            child_values: [const { None }; 256],
-        });
-        for i in 0..256 {
-            let idx = self.child_indexes[i];
-            if idx != INVALID_CHILD_INDEX {
-                node256.child_values[i] = self.child_values[idx as usize].clone();
-            }
-        }
-        node256.as_ptr().into()
-    }
-}
-
-impl<V: Value> NodeLeaf256<V> {
-    fn get_prefix(&self) -> &[u8] {
-        &self.prefix[0..self.prefix_len as usize]
-    }
-
-    fn truncate_prefix(&mut self, new_prefix_len: usize) {
-        assert!(new_prefix_len < self.prefix_len as usize);
-        let prefix = &mut self.prefix;
-        let offset = self.prefix_len as usize - new_prefix_len;
-        for i in 0..new_prefix_len {
-            prefix[i] = prefix[i + offset];
-        }
-        self.prefix_len = new_prefix_len as u8;
-    }
-
-    fn get_leaf_value(&self, key: u8) -> Option<&V> {
-        let idx = key as usize;
-        self.child_values[idx].as_ref()
-    }
-    fn is_full(&self) -> bool {
-        self.num_values == 256
-    }
-
-    fn insert_value(&mut self, key_byte: u8, value: V) {
-        assert!(self.num_values < 256);
-        assert!(self.child_values[key_byte as usize].is_none());
-        self.child_values[key_byte as usize] = Some(value);
-        self.num_values += 1;
-    }
-}
-
-impl<V: Value> NodeInternal256<V> {
-    pub(crate) fn new() -> NodeInternal256<V> {
-        NodeInternal256 {
-            tag: NodeTag::Internal256,
-            lock_and_version: AtomicLockAndVersion::new(),
-
-            prefix: [0; MAX_PREFIX_LEN],
-            prefix_len: 0,
-            num_children: 0,
-
-            child_ptrs: [const { NodePtr::null() }; 256],
-        }
-    }
-}
-
-impl<V: Value> From<*mut NodeInternal4<V>> for NodePtr<V> {
-    fn from(val: *mut NodeInternal4<V>) -> NodePtr<V> {
-        NodePtr {
-            ptr: val.cast(),
-            phantom_value: PhantomData,
-        }
-    }
-}
-impl<V: Value> From<*mut NodeInternal16<V>> for NodePtr<V> {
-    fn from(val: *mut NodeInternal16<V>) -> NodePtr<V> {
-        NodePtr {
-            ptr: val.cast(),
-            phantom_value: PhantomData,
-        }
-    }
-}
-
-impl<V: Value> From<*mut NodeInternal48<V>> for NodePtr<V> {
-    fn from(val: *mut NodeInternal48<V>) -> NodePtr<V> {
-        NodePtr {
-            ptr: val.cast(),
-            phantom_value: PhantomData,
-        }
-    }
-}
-
-impl<V: Value> From<*mut NodeInternal256<V>> for NodePtr<V> {
-    fn from(val: *mut NodeInternal256<V>) -> NodePtr<V> {
-        NodePtr {
-            ptr: val.cast(),
-            phantom_value: PhantomData,
-        }
-    }
-}
-
-impl<V: Value> From<*mut NodeLeaf4<V>> for NodePtr<V> {
-    fn from(val: *mut NodeLeaf4<V>) -> NodePtr<V> {
-        NodePtr {
-            ptr: val.cast(),
-            phantom_value: PhantomData,
-        }
-    }
-}
-impl<V: Value> From<*mut NodeLeaf16<V>> for NodePtr<V> {
-    fn from(val: *mut NodeLeaf16<V>) -> NodePtr<V> {
-        NodePtr {
-            ptr: val.cast(),
-            phantom_value: PhantomData,
-        }
-    }
-}
-
-impl<V: Value> From<*mut NodeLeaf48<V>> for NodePtr<V> {
-    fn from(val: *mut NodeLeaf48<V>) -> NodePtr<V> {
-        NodePtr {
-            ptr: val.cast(),
-            phantom_value: PhantomData,
-        }
-    }
-}
-
-impl<V: Value> From<*mut NodeLeaf256<V>> for NodePtr<V> {
-    fn from(val: *mut NodeLeaf256<V>) -> NodePtr<V> {
-        NodePtr {
-            ptr: val.cast(),
-            phantom_value: PhantomData,
-        }
-    }
-}
--- a/libs/neonart/src/algorithm/node_ref.rs
+++ b/libs/neonart/src/algorithm/node_ref.rs
@@ -1,202 +0,0 @@
-use std::fmt::Debug;
-use std::marker::PhantomData;
-
-use super::lock_and_version::ResultOrRestart;
-use super::node_ptr;
-use super::node_ptr::ChildOrValuePtr;
-use super::node_ptr::NodePtr;
-use crate::EpochPin;
-use crate::algorithm::lock_and_version::AtomicLockAndVersion;
-use crate::{Allocator, Value};
-
-pub struct NodeRef<'e, V> {
-    ptr: NodePtr<V>,
-
-    phantom: PhantomData<&'e EpochPin>,
-}
-
-impl<'e, V> Debug for NodeRef<'e, V> {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        write!(fmt, "{:?}", self.ptr)
-    }
-}
-
-impl<'e, V: Value> NodeRef<'e, V> {
-    pub(crate) fn from_root_ptr(root_ptr: NodePtr<V>) -> NodeRef<'e, V> {
-        NodeRef {
-            ptr: root_ptr,
-            phantom: PhantomData,
-        }
-    }
-
-    pub(crate) fn read_lock_or_restart(&self) -> ResultOrRestart<ReadLockedNodeRef<'e, V>> {
-        let version = self.lockword().read_lock_or_restart()?;
-        Ok(ReadLockedNodeRef {
-            ptr: self.ptr,
-            version,
-            phantom: self.phantom,
-        })
-    }
-
-    fn lockword(&self) -> &AtomicLockAndVersion {
-        self.ptr.lockword()
-    }
-}
-
-/// A reference to a node that has been optimistically read-locked. The functions re-check
-/// the version after each read.
-pub struct ReadLockedNodeRef<'e, V> {
-    ptr: NodePtr<V>,
-    version: u64,
-
-    phantom: PhantomData<&'e EpochPin>,
-}
-
-pub(crate) enum ChildOrValue<'e, V> {
-    Child(NodeRef<'e, V>),
-    Value(*const V),
-}
-
-impl<'e, V: Value> ReadLockedNodeRef<'e, V> {
-    pub(crate) fn is_full(&self) -> bool {
-        self.ptr.is_full()
-    }
-
-    pub(crate) fn get_prefix(&self) -> &[u8] {
-        self.ptr.get_prefix()
-    }
-
-    /// Note: because we're only holding a read lock, the prefix can change concurrently.
-    /// You must be prepared to restart, if read_unlock() returns error later.
-    ///
-    /// Returns the length of the prefix, or None if it's not a match
-    pub(crate) fn prefix_matches(&self, key: &[u8]) -> Option<usize> {
-        self.ptr.prefix_matches(key)
-    }
-
-    pub(crate) fn find_child_or_value_or_restart(
-        &self,
-        key_byte: u8,
-    ) -> ResultOrRestart<Option<ChildOrValue<'e, V>>> {
-        let child_or_value = self.ptr.find_child_or_value(key_byte);
-        self.ptr.lockword().check_or_restart(self.version)?;
-
-        match child_or_value {
-            None => Ok(None),
-            Some(ChildOrValuePtr::Value(vptr)) => Ok(Some(ChildOrValue::Value(vptr))),
-            Some(ChildOrValuePtr::Child(child_ptr)) => Ok(Some(ChildOrValue::Child(NodeRef {
-                ptr: child_ptr,
-                phantom: self.phantom,
-            }))),
-        }
-    }
-
-    pub(crate) fn upgrade_to_write_lock_or_restart(
-        self,
-    ) -> ResultOrRestart<WriteLockedNodeRef<'e, V>> {
-        self.ptr
-            .lockword()
-            .upgrade_to_write_lock_or_restart(self.version)?;
-
-        Ok(WriteLockedNodeRef {
-            ptr: self.ptr,
-            phantom: self.phantom,
-        })
-    }
-
-    pub(crate) fn read_unlock_or_restart(self) -> ResultOrRestart<()> {
-        self.ptr.lockword().check_or_restart(self.version)?;
-        Ok(())
-    }
-}
-
-/// A reference to a node that has been optimistically read-locked. The functions re-check
-/// the version after each read.
-pub struct WriteLockedNodeRef<'e, V> {
-    ptr: NodePtr<V>,
-    phantom: PhantomData<&'e EpochPin>,
-}
-
-impl<'e, V: Value> WriteLockedNodeRef<'e, V> {
-    pub(crate) fn is_leaf(&self) -> bool {
-        self.ptr.is_leaf()
-    }
-
-    pub(crate) fn write_unlock(mut self) {
-        self.ptr.lockword().write_unlock();
-        self.ptr = NodePtr::null();
-    }
-
-    pub(crate) fn write_unlock_obsolete(mut self) {
-        self.ptr.lockword().write_unlock_obsolete();
-        self.ptr = NodePtr::null();
-    }
-
-    pub(crate) fn get_prefix(&self) -> &[u8] {
-        self.ptr.get_prefix()
-    }
-
-    pub(crate) fn truncate_prefix(&mut self, new_prefix_len: usize) {
-        self.ptr.truncate_prefix(new_prefix_len)
-    }
-
-    pub(crate) fn insert_child(&mut self, key_byte: u8, child: NodePtr<V>) {
-        self.ptr.insert_child(key_byte, child)
-    }
-
-    pub(crate) fn insert_value(&mut self, key_byte: u8, value: V) {
-        self.ptr.insert_value(key_byte, value)
-    }
-
-    pub(crate) fn grow(&self, allocator: &Allocator) -> NewNodeRef<V> {
-        let new_node = self.ptr.grow(allocator);
-        NewNodeRef { ptr: new_node }
-    }
-
-    pub(crate) fn as_ptr(&self) -> NodePtr<V> {
-        self.ptr
-    }
-
-    pub(crate) fn replace_child(&mut self, key_byte: u8, replacement: NodePtr<V>) {
-        self.ptr.replace_child(key_byte, replacement);
-    }
-}
-
-impl<'e, V> Drop for WriteLockedNodeRef<'e, V> {
-    fn drop(&mut self) {
-        if !self.ptr.is_null() {
-            self.ptr.lockword().write_unlock();
-        }
-    }
-}
-
-pub(crate) struct NewNodeRef<V> {
-    ptr: NodePtr<V>,
-}
-
-impl<V: Value> NewNodeRef<V> {
-    pub(crate) fn insert_child(&mut self, key_byte: u8, child: NodePtr<V>) {
-        self.ptr.insert_child(key_byte, child)
-    }
-
-    pub(crate) fn insert_value(&mut self, key_byte: u8, value: V) {
-        self.ptr.insert_value(key_byte, value)
-    }
-
-    pub(crate) fn into_ptr(self) -> NodePtr<V> {
-        let ptr = self.ptr;
-        ptr
-    }
-}
-
-pub(crate) fn new_internal<V: Value>(prefix: &[u8], allocator: &Allocator) -> NewNodeRef<V> {
-    NewNodeRef {
-        ptr: node_ptr::new_internal(prefix, allocator),
-    }
-}
-
-pub(crate) fn new_leaf<V: Value>(prefix: &[u8], allocator: &Allocator) -> NewNodeRef<V> {
-    NewNodeRef {
-        ptr: node_ptr::new_leaf(prefix, allocator),
-    }
-}
--- a/libs/neonart/src/allocator.rs
+++ b/libs/neonart/src/allocator.rs
@@ -1,107 +0,0 @@
-use std::marker::PhantomData;
-use std::mem::MaybeUninit;
-use std::ops::{Deref, DerefMut};
-use std::ptr::NonNull;
-use std::sync::atomic::{AtomicUsize, Ordering};
-
-pub struct Allocator {
-    area: *mut MaybeUninit<u8>,
-    allocated: AtomicUsize,
-    size: usize,
-}
-
-// FIXME: I don't know if these are really safe...
-unsafe impl Send for Allocator {}
-unsafe impl Sync for Allocator {}
-
-#[repr(transparent)]
-pub struct AllocatedBox<'a, T> {
-    inner: NonNull<T>,
-
-    _phantom: PhantomData<&'a Allocator>,
-}
-
-// FIXME: I don't know if these are really safe...
-unsafe impl<'a, T> Send for AllocatedBox<'a, T> {}
-unsafe impl<'a, T> Sync for AllocatedBox<'a, T> {}
-
-impl<T> Deref for AllocatedBox<'_, T> {
-    type Target = T;
-
-    fn deref(&self) -> &T {
-        unsafe { self.inner.as_ref() }
-    }
-}
-
-impl<T> DerefMut for AllocatedBox<'_, T> {
-    fn deref_mut(&mut self) -> &mut T {
-        unsafe { self.inner.as_mut() }
-    }
-}
-
-impl<T> AsMut<T> for AllocatedBox<'_, T> {
-    fn as_mut(&mut self) -> &mut T {
-        unsafe { self.inner.as_mut() }
-    }
-}
-
-impl<T> AllocatedBox<'_, T> {
-    pub fn as_ptr(&self) -> *mut T {
-        self.inner.as_ptr()
-    }
-}
-
-const MAXALIGN: usize = std::mem::align_of::<usize>();
-
-impl Allocator {
-    pub fn new_uninit(area: &'static mut [MaybeUninit<u8>]) -> Allocator {
-        let ptr = area.as_mut_ptr();
-        let size = area.len();
-        Self::new_from_ptr(ptr, size)
-    }
-
-    pub fn new(area: &'static mut [u8]) -> Allocator {
-        let ptr: *mut MaybeUninit<u8> = area.as_mut_ptr().cast();
-        let size = area.len();
-        Self::new_from_ptr(ptr, size)
-    }
-
-    pub fn new_from_ptr(ptr: *mut MaybeUninit<u8>, size: usize) -> Allocator {
-        let padding = ptr.align_offset(MAXALIGN);
-
-        Allocator {
-            area: ptr,
-            allocated: AtomicUsize::new(padding),
-            size,
-        }
-    }
-
-    pub fn alloc<'a, T: Sized>(&'a self, value: T) -> AllocatedBox<'a, T> {
-        let sz = std::mem::size_of::<T>();
-
-        // pad all allocations to MAXALIGN boundaries
-        assert!(std::mem::align_of::<T>() <= MAXALIGN);
-        let sz = sz.next_multiple_of(MAXALIGN);
-
-        let offset = self.allocated.fetch_add(sz, Ordering::Relaxed);
-
-        if offset + sz > self.size {
-            panic!("out of memory");
-        }
-
-        let inner = unsafe {
-            let inner = self.area.offset(offset as isize).cast::<T>();
-            *inner = value;
-            NonNull::new_unchecked(inner)
-        };
-
-        AllocatedBox {
-            inner,
-            _phantom: PhantomData,
-        }
-    }
-
-    pub fn _dealloc_node<T>(&self, _node: AllocatedBox<T>) {
-        // doesn't free it immediately.
-    }
-}
--- a/libs/neonart/src/epoch.rs
+++ b/libs/neonart/src/epoch.rs
@@ -1,23 +0,0 @@
-//! This is similar to crossbeam_epoch crate, but works in shared memory
-//!
-//! FIXME: not implemented yet. (We haven't implemented removing any nodes from the ART
-//! tree, which is why we get away without this now)
-
-pub(crate) struct EpochPin {}
-
-pub(crate) fn pin_epoch() -> EpochPin {
-    EpochPin {}
-}
-
-/*
-struct CollectorGlobal {
-    epoch: AtomicU64,
-
-    participants: CachePadded<AtomicU64>, // make it an array
-}
-
-
-struct CollectorQueue {
-
-}
-*/
--- a/libs/neonart/src/lib.rs
+++ b/libs/neonart/src/lib.rs
@@ -1,301 +0,0 @@
-//! Adaptive Radix Tree (ART) implementation, with Optimistic Lock Coupling.
-//!
-//! The data structure is described in these two papers:
-//!
-//! [1] Leis, V. & Kemper, Alfons & Neumann, Thomas. (2013).
-//!     The adaptive radix tree: ARTful indexing for main-memory databases.
-//!     Proceedings - International Conference on Data Engineering. 38-49. 10.1109/ICDE.2013.6544812.
-//!     https://db.in.tum.de/~leis/papers/ART.pdf
-//!
-//! [2] Leis, Viktor & Scheibner, Florian & Kemper, Alfons & Neumann, Thomas. (2016).
-//!     The ART of practical synchronization.
-//!     1-8. 10.1145/2933349.2933352.
-//!     https://db.in.tum.de/~leis/papers/artsync.pdf
-//!
-//! [1] describes the base data structure, and [2] describes the Optimistic Lock Coupling that we
-//! use.
-//!
-//! The papers mention a few different variants. We have made the following choices in this
-//! implementation:
-//!
-//! - All keys have the same length
-//!
-//! - Multi-value leaves. The values are stored directly in one of the four different leaf node
-//!   types.
-//!
-//! - For collapsing inner nodes, we use the Pessimistic approach, where each inner node stores a
-//!   variable length "prefix", which stores the keys of all the one-way nodes which have been
-//!   removed. However, similar to the "hybrid" approach described in the paper, each node only has
-//!   space for a constant-size prefix of 8 bytes. If a node would have a longer prefix, then we
-//!   create create one-way nodes to store them. (There was no particular reason for this choice,
-//!   the "hybrid" approach described in the paper might be better.)
-//!
-//! - For concurrency, we use Optimistic Lock Coupling. The paper [2] also describes another method,
-//!   ROWEX, which generally performs better when there is contention, but that is not important
-//!   for use and Optimisic Lock Coupling is simpler to implement.
-//!
-//! ## Requirements
-//!
-//! This data structure is currently used for the integrated LFC, relsize and last-written LSN cache
-//! in the compute communicator, part of the 'neon' Postgres extension. We have some unique
-//! requirements, which is why we had to write our own. Namely:
-//!
-//! - The data structure has to live in fixed-sized shared memory segment. That rules out any
-//!   built-in Rust collections and most crates. (Except possibly with the 'allocator_api' rust
-//!   feature, which still nightly-only experimental as of this writing).
-//!
-//! - The data structure is accessed from multiple processes. Only one process updates the data
-//!   structure, but other processes perform reads. That rules out using built-in Rust locking
-//!   primitives like Mutex and RwLock, and most crates too.
-//!
-//! - Within the one process with write-access, multiple threads can perform updates concurrently.
-//!   That rules out using PostgreSQL LWLocks for the locking.
-//!
-//! The implementation is generic, and doesn't depend on any PostgreSQL specifics, but it has been
-//! written with that usage and the above constraints in mind. Some noteworthy assumptions:
-//!
-//! - Contention is assumed to be rare. In the integrated cache in PostgreSQL, there's higher level
-//!   locking in the PostgreSQL buffer manager, which ensures that two backends should not try to
-//!   read / write the same page at the same time. (Prefetching can conflict with actual reads,
-//!   however.)
-//!
-//!  - The keys in the integrated cache are 17 bytes long.
-//!
-//! ## Usage
-//!
-//! Because this is designed to be used as a Postgres shared memory data structure, initialization
-//! happens in three stages:
-//!
-//! 0. A fixed area of shared memory is allocated at postmaster startup.
-//!
-//! 1. TreeInitStruct::new() is called to initialize it, still in Postmaster process, before any
-//!    other process or thread is running. It returns a TreeInitStruct, which is inherited by all
-//!    the processes through fork().
-//!
-//! 2. One process may have write-access to the struct, by calling
-//!    [TreeInitStruct::attach_writer]. (That process is the communicator process.)
-//!
-//! 3. Other processes get read-access to the struct, by calling [TreeInitStruct::attach_reader]
-//!
-//! "Write access" means that you can insert / update / delete values in the tree.
-//!
-//! NOTE: The Values stored in the tree are sometimes moved, when a leaf node fills up and a new
-//! larger node needs to be allocated. The versioning and epoch-based allocator ensure that the data
-//! structure stays consistent, but if the Value has interior mutability, like atomic fields,
-//! updates to such fields might be lost if the leaf node is concurrently moved! If that becomes a
-//! problem, the version check could be passed up to the caller, so that the caller could detect the
-//! lost updates and retry the operation.
-//!
-//! ## Implementation
-//!
-//! node_ptr: Provides low-level implementations of the four different node types (eight actually,
-//! since there is an Internal and Leaf variant of each)
-//!
-//! lock_and_version.rs: Provides an abstraction for the combined lock and version counter on each
-//! node.
-//!
-//! node_ref.rs: The code in node_ptr.rs deals with raw pointers. node_ref.rs provides more type-safe
-//!   abstractions on top.
-//!
-//! algorithm.rs: Contains the functions to implement lookups and updates in the tree
-//!
-//! allocator.rs: Provides a facility to allocate memory for the tree nodes. (We must provide our
-//!   own abstraction for that because we need the data structure to live in a pre-allocated shared
-//!   memory segment).
-//!
-//! epoch.rs: The data structure requires that when a node is removed from the tree, it is not
-//!   immediately deallocated, but stays around for as long as concurrent readers might still have
-//!   pointers to them. This is enforced by an epoch system. This is similar to
-//!   e.g. crossbeam_epoch, but we couldn't use that either because it has to work across processes
-//!   communicating over the shared memory segment.
-//!
-//! ## See also
-//!
-//! There are some existing Rust ART implementations out there, but none of them filled all
-//! the requirements:
-//!
-//! - https://github.com/XiangpengHao/congee
-//! - https://github.com/declanvk/blart
-//!
-//! ## TODO
-//!
-//! - Removing values has not been implemented
-
-mod algorithm;
-mod allocator;
-mod epoch;
-
-use algorithm::RootPtr;
-
-use allocator::AllocatedBox;
-
-use std::fmt::Debug;
-use std::marker::PhantomData;
-use std::sync::atomic::{AtomicBool, Ordering};
-
-use crate::epoch::EpochPin;
-
-#[cfg(test)]
-mod tests;
-
-pub use allocator::Allocator;
-
-/// Fixed-length key type.
-///
-pub trait Key: Clone + Debug {
-    const KEY_LEN: usize;
-
-    fn as_bytes(&self) -> &[u8];
-}
-
-/// Values stored in the tree
-///
-/// Values need to be Cloneable, because when a node "grows", the value is copied to a new node and
-/// the old sticks around until all readers that might see the old value are gone.
-pub trait Value: Clone {}
-
-struct Tree<K: Key, V: Value> {
-    root: RootPtr<V>,
-
-    writer_attached: AtomicBool,
-
-    phantom_key: PhantomData<K>,
-}
-
-/// Struct created at postmaster startup
-pub struct TreeInitStruct<'t, K: Key, V: Value> {
-    tree: AllocatedBox<'t, Tree<K, V>>,
-
-    allocator: &'t Allocator,
-}
-
-/// The worker process has a reference to this. The write operations are only safe
-/// from the worker process
-pub struct TreeWriteAccess<'t, K: Key, V: Value>
-where
-    K: Key,
-    V: Value,
-{
-    tree: AllocatedBox<'t, Tree<K, V>>,
-
-    allocator: &'t Allocator,
-}
-
-/// The backends have a reference to this. It cannot be used to modify the tree
-pub struct TreeReadAccess<'t, K: Key, V: Value>
-where
-    K: Key,
-    V: Value,
-{
-    tree: AllocatedBox<'t, Tree<K, V>>,
-}
-
-impl<'a, 't: 'a, K: Key, V: Value> TreeInitStruct<'t, K, V> {
-    pub fn new(allocator: &'t Allocator) -> TreeInitStruct<'t, K, V> {
-        let tree = allocator.alloc(Tree {
-            root: algorithm::new_root(allocator),
-            writer_attached: AtomicBool::new(false),
-            phantom_key: PhantomData,
-        });
-
-        TreeInitStruct { tree, allocator }
-    }
-
-    pub fn attach_writer(self) -> TreeWriteAccess<'t, K, V> {
-        let previously_attached = self.tree.writer_attached.swap(true, Ordering::Relaxed);
-        if previously_attached {
-            panic!("writer already attached");
-        }
-        TreeWriteAccess {
-            tree: self.tree,
-            allocator: self.allocator,
-        }
-    }
-
-    pub fn attach_reader(self) -> TreeReadAccess<'t, K, V> {
-        TreeReadAccess { tree: self.tree }
-    }
-}
-
-impl<'t, K: Key + Clone, V: Value> TreeWriteAccess<'t, K, V> {
-    pub fn start_write(&'t self) -> TreeWriteGuard<'t, K, V> {
-        // TODO: grab epoch guard
-        TreeWriteGuard {
-            allocator: self.allocator,
-            tree: &self.tree,
-            epoch_pin: epoch::pin_epoch(),
-        }
-    }
-
-    pub fn start_read(&'t self) -> TreeReadGuard<'t, K, V> {
-        TreeReadGuard {
-            tree: &self.tree,
-            epoch_pin: epoch::pin_epoch(),
-        }
-    }
-}
-
-impl<'t, K: Key + Clone, V: Value> TreeReadAccess<'t, K, V> {
-    pub fn start_read(&'t self) -> TreeReadGuard<'t, K, V> {
-        TreeReadGuard {
-            tree: &self.tree,
-            epoch_pin: epoch::pin_epoch(),
-        }
-    }
-}
-
-pub struct TreeReadGuard<'t, K, V>
-where
-    K: Key,
-    V: Value,
-{
-    tree: &'t AllocatedBox<'t, Tree<K, V>>,
-
-    epoch_pin: EpochPin,
-}
-
-impl<'t, K: Key, V: Value> TreeReadGuard<'t, K, V> {
-    pub fn get(&self, key: &K) -> Option<V> {
-        algorithm::search(key, self.tree.root, &self.epoch_pin)
-    }
-}
-
-pub struct TreeWriteGuard<'t, K, V>
-where
-    K: Key,
-    V: Value,
-{
-    tree: &'t AllocatedBox<'t, Tree<K, V>>,
-    allocator: &'t Allocator,
-
-    epoch_pin: EpochPin,
-}
-
-impl<'t, K: Key, V: Value> TreeWriteGuard<'t, K, V> {
-    pub fn insert(&mut self, key: &K, value: V) {
-        self.update_with_fn(key, |_| Some(value))
-    }
-
-    pub fn update_with_fn<F>(&mut self, key: &K, value_fn: F)
-    where
-        F: FnOnce(Option<&V>) -> Option<V>,
-    {
-        algorithm::update_fn(
-            key,
-            value_fn,
-            self.tree.root,
-            self.allocator,
-            &self.epoch_pin,
-        )
-    }
-
-    pub fn get(&mut self, key: &K) -> Option<V> {
-        algorithm::search(key, self.tree.root, &self.epoch_pin)
-    }
-}
-
-impl<'t, K: Key, V: Value + Debug> TreeWriteGuard<'t, K, V> {
-    pub fn dump(&mut self) {
-        algorithm::dump_tree(self.tree.root, &self.epoch_pin)
-    }
-}
--- a/libs/neonart/src/tests.rs
+++ b/libs/neonart/src/tests.rs
@@ -1,90 +0,0 @@
-use std::collections::HashSet;
-
-use crate::Allocator;
-use crate::TreeInitStruct;
-
-use crate::{Key, Value};
-
-use rand::seq::SliceRandom;
-use rand::thread_rng;
-
-const TEST_KEY_LEN: usize = 16;
-
-#[derive(Clone, Copy, Debug)]
-struct TestKey([u8; TEST_KEY_LEN]);
-
-impl Key for TestKey {
-    const KEY_LEN: usize = TEST_KEY_LEN;
-
-    fn as_bytes(&self) -> &[u8] {
-        &self.0
-    }
-}
-
-impl From<u128> for TestKey {
-    fn from(val: u128) -> TestKey {
-        TestKey(val.to_be_bytes())
-    }
-}
-
-impl Value for usize {}
-
-fn test_inserts<K: Into<TestKey> + Copy>(keys: &[K]) {
-    const MEM_SIZE: usize = 10000000;
-    let area = Box::leak(Box::new_uninit_slice(MEM_SIZE));
-
-    let allocator = Box::leak(Box::new(Allocator::new_uninit(area)));
-
-    let init_struct = TreeInitStruct::<TestKey, usize>::new(allocator);
-    let tree_writer = init_struct.attach_writer();
-
-    for (idx, k) in keys.iter().enumerate() {
-        let mut w = tree_writer.start_write();
-        w.insert(&(*k).into(), idx);
-        eprintln!("INSERTED {:?}", Into::<TestKey>::into(*k));
-    }
-
-    //tree_writer.start_read().dump();
-
-    for (idx, k) in keys.iter().enumerate() {
-        let r = tree_writer.start_read();
-        let value = r.get(&(*k).into());
-        assert_eq!(value, Some(idx));
-    }
-}
-
-#[test]
-fn dense() {
-    // This exercises splitting a node with prefix
-    let keys: &[u128] = &[0, 1, 2, 3, 256];
-    test_inserts(keys);
-
-    // Dense keys
-    let mut keys: Vec<u128> = (0..10000).collect();
-    test_inserts(&keys);
-
-    // Do the same in random orders
-    for _ in 1..10 {
-        keys.shuffle(&mut thread_rng());
-        test_inserts(&keys);
-    }
-}
-
-#[test]
-fn sparse() {
-    // sparse keys
-    let mut keys: Vec<TestKey> = Vec::new();
-    let mut used_keys = HashSet::new();
-    for _ in 0..10000 {
-        loop {
-            let key = rand::random::<u128>();
-            if used_keys.get(&key).is_some() {
-                continue;
-            }
-            used_keys.insert(key);
-            keys.push(key.into());
-            break;
-        }
-    }
-    test_inserts(&keys);
-}
--- a/libs/postgres_backend/src/lib.rs
+++ b/libs/postgres_backend/src/lib.rs
@@ -841,6 +841,10 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {

        let expected_end = match &end {
            ServerInitiated(_) | CopyDone | CopyFail | Terminate | EOF | Cancelled => true,
+            // The timeline doesn't exist and we have been requested to not auto-create it.
+            // Compute requests for timelines that haven't been created yet
+            // might reach us before the storcon request to create those timelines.
+            TimelineNoCreate => true,
            CopyStreamHandlerEnd::Disconnected(ConnectionError::Io(io_error))
                if is_expected_io_error(io_error) =>
            {
@@ -1059,6 +1063,8 @@ pub enum CopyStreamHandlerEnd {
    Terminate,
    #[error("EOF on COPY stream")]
    EOF,
+    #[error("timeline not found, and allow_timeline_creation is false")]
+    TimelineNoCreate,
    /// The connection was lost
    #[error("connection error: {0}")]
    Disconnected(#[from] ConnectionError),
--- a/libs/safekeeper_api/src/models.rs
+++ b/libs/safekeeper_api/src/models.rs
@@ -303,7 +303,8 @@ pub struct PullTimelineRequest {

 #[derive(Debug, Serialize, Deserialize)]
 pub struct PullTimelineResponse {
-    // Donor safekeeper host
-    pub safekeeper_host: String,
+    /// Donor safekeeper host.
+    /// None if no pull happened because the timeline already exists.
+    pub safekeeper_host: Option<String>,
    // TODO: add more fields?
 }
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -42,14 +42,12 @@ nix.workspace = true
 num_cpus.workspace = true
 num-traits.workspace = true
 once_cell.workspace = true
-peekable.workspace = true
 pin-project-lite.workspace = true
 postgres_backend.workspace = true
 postgres-protocol.workspace = true
 postgres-types.workspace = true
 postgres_initdb.workspace = true
 pprof.workspace = true
-prost.workspace = true
 rand.workspace = true
 range-set-blaze = { version = "0.1.16", features = ["alloc"] }
 regex.workspace = true
@@ -62,7 +60,6 @@ serde_path_to_error.workspace = true
 serde_with.workspace = true
 sysinfo.workspace = true
 tokio-tar.workspace = true
-tonic.workspace = true
 thiserror.workspace = true
 tikv-jemallocator.workspace = true
 tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time"] }
@@ -79,7 +76,6 @@ url.workspace = true
 walkdir.workspace = true
 metrics.workspace = true
 pageserver_api.workspace = true
-pageserver_data_api.workspace = true
 pageserver_client.workspace = true # for ResponseErrorMessageExt TOOD refactor that
 pageserver_compaction.workspace = true
 pem.workspace = true
--- a/pageserver/client_grpc/Cargo.toml
+++ b/pageserver/client_grpc/Cargo.toml
@@ -1,13 +0,0 @@
-[package]
-name = "pageserver_client_grpc"
-version = "0.1.0"
-edition = "2024"
-
-[dependencies]
-bytes.workspace = true
-http.workspace = true
-thiserror.workspace = true
-tonic.workspace = true
-tracing.workspace = true
-
-pageserver_data_api.workspace = true
--- a/pageserver/client_grpc/src/lib.rs
+++ b/pageserver/client_grpc/src/lib.rs
@@ -1,221 +0,0 @@
-//! Pageserver Data API client
-//!
-//! - Manage connections to pageserver
-//! - Send requests to correct shards
-//!
-use std::collections::HashMap;
-use std::sync::RwLock;
-
-use bytes::Bytes;
-use http;
-use thiserror::Error;
-use tonic;
-use tonic::metadata::AsciiMetadataValue;
-use tonic::transport::Channel;
-
-use pageserver_data_api::model::*;
-use pageserver_data_api::proto;
-
-type Shardno = u16;
-
-use pageserver_data_api::client::PageServiceClient;
-
-type MyPageServiceClient = pageserver_data_api::client::PageServiceClient<
-    tonic::service::interceptor::InterceptedService<tonic::transport::Channel, AuthInterceptor>,
->;
-
-#[derive(Error, Debug)]
-pub enum PageserverClientError {
-    #[error("could not connect to service: {0}")]
-    ConnectError(#[from] tonic::transport::Error),
-    #[error("could not perform request: {0}`")]
-    RequestError(#[from] tonic::Status),
-
-    #[error("could not perform request: {0}`")]
-    InvalidUri(#[from] http::uri::InvalidUri),
-}
-
-pub struct PageserverClient {
-    _tenant_id: String,
-    _timeline_id: String,
-
-    _auth_token: Option<String>,
-
-    shard_map: HashMap<Shardno, String>,
-
-    channels: RwLock<HashMap<Shardno, Channel>>,
-
-    auth_interceptor: AuthInterceptor,
-}
-
-impl PageserverClient {
-    /// TODO: this doesn't currently react to changes in the shard map.
-    pub fn new(
-        tenant_id: &str,
-        timeline_id: &str,
-        auth_token: &Option<String>,
-        shard_map: HashMap<Shardno, String>,
-    ) -> Self {
-        Self {
-            _tenant_id: tenant_id.to_string(),
-            _timeline_id: timeline_id.to_string(),
-            _auth_token: auth_token.clone(),
-            shard_map,
-            channels: RwLock::new(HashMap::new()),
-            auth_interceptor: AuthInterceptor::new(tenant_id, timeline_id, auth_token.as_ref()),
-        }
-    }
-
-    pub async fn process_rel_exists_request(
-        &self,
-        request: &RelExistsRequest,
-    ) -> Result<bool, PageserverClientError> {
-        // Current sharding model assumes that all metadata is present only at shard 0.
-        let shard_no = 0;
-
-        let mut client = self.get_client(shard_no).await?;
-
-        let request = proto::RelExistsRequest::from(request);
-        let response = client.rel_exists(tonic::Request::new(request)).await?;
-
-        Ok(response.get_ref().exists)
-    }
-
-    pub async fn process_rel_size_request(
-        &self,
-        request: &RelSizeRequest,
-    ) -> Result<u32, PageserverClientError> {
-        // Current sharding model assumes that all metadata is present only at shard 0.
-        let shard_no = 0;
-
-        let mut client = self.get_client(shard_no).await?;
-
-        let request = proto::RelSizeRequest::from(request);
-        let response = client.rel_size(tonic::Request::new(request)).await?;
-
-        Ok(response.get_ref().num_blocks)
-    }
-
-    pub async fn get_page(&self, request: &GetPageRequest) -> Result<Bytes, PageserverClientError> {
-        // FIXME: calculate the shard number correctly
-        let shard_no = 0;
-
-        let mut client = self.get_client(shard_no).await?;
-
-        let request = proto::GetPageRequest::from(request);
-        let response = client.get_page(tonic::Request::new(request)).await?;
-
-        Ok(response.into_inner().page_image)
-    }
-
-    /// Process a request to get the size of a database.
-    pub async fn process_dbsize_request(
-        &self,
-        request: &DbSizeRequest,
-    ) -> Result<u64, PageserverClientError> {
-        // Current sharding model assumes that all metadata is present only at shard 0.
-        let shard_no = 0;
-
-        let mut client = self.get_client(shard_no).await?;
-
-        let request = proto::DbSizeRequest::from(request);
-        let response = client.db_size(tonic::Request::new(request)).await?;
-
-        Ok(response.get_ref().num_bytes)
-    }
-
-    /// Process a request to get the size of a database.
-    pub async fn get_base_backup(
-        &self,
-        request: &GetBaseBackupRequest,
-        gzip: bool,
-    ) -> std::result::Result<
-        tonic::Response<tonic::codec::Streaming<proto::GetBaseBackupResponseChunk>>,
-        PageserverClientError,
-    > {
-        // Current sharding model assumes that all metadata is present only at shard 0.
-        let shard_no = 0;
-
-        let mut client = self.get_client(shard_no).await?;
-        if gzip {
-            client = client.accept_compressed(tonic::codec::CompressionEncoding::Gzip);
-        }
-
-        let request = proto::GetBaseBackupRequest::from(request);
-        let response = client.get_base_backup(tonic::Request::new(request)).await?;
-
-        Ok(response)
-    }
-
-    /// Get a client for given shard
-    ///
-    /// This implements very basic caching. If we already have a client for the given shard,
-    /// reuse it. If not, create a new client and put it to the cache.
-    async fn get_client(
-        &self,
-        shard_no: u16,
-    ) -> Result<MyPageServiceClient, PageserverClientError> {
-        let reused_channel: Option<Channel> = {
-            let channels = self.channels.read().unwrap();
-
-            channels.get(&shard_no).cloned()
-        };
-
-        let channel = if let Some(reused_channel) = reused_channel {
-            reused_channel
-        } else {
-            let endpoint: tonic::transport::Endpoint = self
-                .shard_map
-                .get(&shard_no)
-                .expect("no url for shard {shard_no}")
-                .parse()?;
-            let channel = endpoint.connect().await?;
-
-            // Insert it to the cache so that it can be reused on subsequent calls. It's possible
-            // that another thread did the same concurrently, in which case we will overwrite the
-            // client in the cache.
-            {
-                let mut channels = self.channels.write().unwrap();
-                channels.insert(shard_no, channel.clone());
-            }
-            channel
-        };
-
-        let client = PageServiceClient::with_interceptor(channel, self.auth_interceptor.clone());
-        Ok(client)
-    }
-}
-
-/// Inject tenant_id, timeline_id and authentication token to all pageserver requests.
-#[derive(Clone)]
-struct AuthInterceptor {
-    tenant_id: AsciiMetadataValue,
-    timeline_id: AsciiMetadataValue,
-
-    auth_token: Option<AsciiMetadataValue>,
-}
-
-impl AuthInterceptor {
-    fn new(tenant_id: &str, timeline_id: &str, auth_token: Option<&String>) -> Self {
-        Self {
-            tenant_id: tenant_id.parse().expect("could not parse tenant id"),
-            timeline_id: timeline_id.parse().expect("could not parse timeline id"),
-            auth_token: auth_token.map(|x| x.parse().expect("could not parse auth token")),
-        }
-    }
-}
-
-impl tonic::service::Interceptor for AuthInterceptor {
-    fn call(&mut self, mut req: tonic::Request<()>) -> Result<tonic::Request<()>, tonic::Status> {
-        req.metadata_mut()
-            .insert("neon-tenant-id", self.tenant_id.clone());
-        req.metadata_mut()
-            .insert("neon-timeline-id", self.timeline_id.clone());
-        if let Some(auth_token) = &self.auth_token {
-            req.metadata_mut()
-                .insert("neon-auth-token", auth_token.clone());
-        }
-
-        Ok(req)
-    }
-}
--- a/pageserver/ctl/src/layers.rs
+++ b/pageserver/ctl/src/layers.rs
@@ -10,6 +10,7 @@ use pageserver::tenant::storage_layer::{DeltaLayer, ImageLayer, delta_layer, ima
 use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
 use pageserver::virtual_file::api::IoMode;
 use pageserver::{page_cache, virtual_file};
+use pageserver_api::key::Key;
 use utils::id::{TenantId, TimelineId};

 use crate::layer_map_analyzer::parse_filename;
@@ -27,6 +28,7 @@ pub(crate) enum LayerCmd {
        path: PathBuf,
        tenant: String,
        timeline: String,
+        key: Option<Key>,
    },
    /// Dump all information of a layer file
    DumpLayer {
@@ -100,6 +102,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
            path,
            tenant,
            timeline,
+            key,
        } => {
            let timeline_path = path
                .join(TENANTS_SEGMENT_NAME)
@@ -107,21 +110,37 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
                .join(TIMELINES_SEGMENT_NAME)
                .join(timeline);
            let mut idx = 0;
+            let mut to_print = Vec::default();
            for layer in fs::read_dir(timeline_path)? {
                let layer = layer?;
                if let Ok(layer_file) = parse_filename(&layer.file_name().into_string().unwrap()) {
-                    println!(
-                        "[{:3}]  key:{}-{}\n       lsn:{}-{}\n       delta:{}",
-                        idx,
-                        layer_file.key_range.start,
-                        layer_file.key_range.end,
-                        layer_file.lsn_range.start,
-                        layer_file.lsn_range.end,
-                        layer_file.is_delta,
-                    );
+                    if let Some(key) = key {
+                        if layer_file.key_range.start <= *key && *key < layer_file.key_range.end {
+                            to_print.push((idx, layer_file));
+                        }
+                    } else {
+                        to_print.push((idx, layer_file));
+                    }
                    idx += 1;
                }
            }
+
+            if key.is_some() {
+                to_print
+                    .sort_by_key(|(_idx, layer_file)| std::cmp::Reverse(layer_file.lsn_range.end));
+            }
+
+            for (idx, layer_file) in to_print {
+                println!(
+                    "[{:3}]  key:{}-{}\n       lsn:{}-{}\n       delta:{}",
+                    idx,
+                    layer_file.key_range.start,
+                    layer_file.key_range.end,
+                    layer_file.lsn_range.start,
+                    layer_file.lsn_range.end,
+                    layer_file.is_delta,
+                );
+            }
            Ok(())
        }
        LayerCmd::DumpLayer {
--- a/pageserver/data_api/Cargo.toml
+++ b/pageserver/data_api/Cargo.toml
@@ -1,18 +0,0 @@
-[package]
-name = "pageserver_data_api"
-version = "0.1.0"
-edition = "2024"
-
-[dependencies]
-
-# For Lsn.
-#
-# TODO: move Lsn to separate crate? This draws in a lot more dependencies
-utils.workspace = true
-
-prost.workspace = true
-thiserror.workspace = true
-tonic.workspace = true
-
-[build-dependencies]
-tonic-build.workspace = true
--- a/pageserver/data_api/build.rs
+++ b/pageserver/data_api/build.rs
@@ -1,8 +0,0 @@
-fn main() -> Result<(), Box<dyn std::error::Error>> {
-    // Generate rust code from .proto protobuf.
-    tonic_build::configure()
-        .bytes(&["."])
-        .compile_protos(&["proto/page_service.proto"], &["proto"])
-        .unwrap_or_else(|e| panic!("failed to compile protos {:?}", e));
-    Ok(())
-}
--- a/pageserver/data_api/proto/page_service.proto
+++ b/pageserver/data_api/proto/page_service.proto
@@ -1,84 +0,0 @@
-// Page service presented by pageservers, for computes
-//
-// Each request must come with the following metadata:
-// - neon-tenant-id
-// - neon-timeline-id
-// - neon-auth-token (if auth is enabled)
-//
-// TODO: what else? Priority? OpenTelemetry tracing?
-//
-
-syntax = "proto3";
-package page_service;
-
-service PageService {
-  rpc RelExists(RelExistsRequest) returns (RelExistsResponse);
-
-  // Returns size of a relation, as # of blocks
-  rpc RelSize (RelSizeRequest) returns (RelSizeResponse);
-
-  rpc GetPage (GetPageRequest) returns (GetPageResponse);
-
-  // Returns total size of a database, as # of bytes
-  rpc DbSize (DbSizeRequest) returns (DbSizeResponse);
-
-  rpc GetBaseBackup (GetBaseBackupRequest) returns (stream GetBaseBackupResponseChunk);
-}
-
-message RequestCommon {
-  uint64 request_lsn = 1;
-  uint64 not_modified_since_lsn = 2;
-}
-
-message RelTag {
-    uint32 spc_oid = 1;
-    uint32 db_oid = 2;
-    uint32 rel_number = 3;
-    uint32 fork_number = 4;
-}
-
-message RelExistsRequest {
-  RequestCommon common = 1;
-  RelTag rel = 2;
-}
-
-message RelExistsResponse {
-  bool exists = 1;
-}
-
-message RelSizeRequest {
-  RequestCommon common = 1;
-  RelTag rel = 2;
-}
-
-message RelSizeResponse {
-  uint32 num_blocks = 1;
-}
-
-message GetPageRequest {
-  RequestCommon common = 1;
-  RelTag rel = 2;
-  uint32 block_number = 3;
-}
-
-message GetPageResponse {
-  bytes page_image = 1;
-}
-
-message DbSizeRequest {
-  RequestCommon common = 1;
-  uint32 db_oid = 2;
-}
-
-message DbSizeResponse {
-  uint64 num_bytes = 1;
-}
-
-message GetBaseBackupRequest {
-  RequestCommon common = 1;
-  bool replica = 2;
-}
-
-message GetBaseBackupResponseChunk {
-  bytes chunk = 1;
-}
--- a/pageserver/data_api/src/lib.rs
+++ b/pageserver/data_api/src/lib.rs
@@ -1,17 +0,0 @@
-//! This crate has two modules related to the Pageserver Data API:
-//!
-//! proto: code auto-generated from the protobuf definition
-//! model: slightly more ergonomic structs representing the same API
-//!
-//! See protobuf spec under the protos/ subdirectory.
-//!
-//! This crate is used by both the client and the server. Try to keep it slim.
-//!
-pub mod model;
-
-// Code generated by protobuf.
-pub mod proto {
-    tonic::include_proto!("page_service");
-}
-
-pub use proto::page_service_client as client;
--- a/pageserver/data_api/src/model.rs
+++ b/pageserver/data_api/src/model.rs
@@ -1,239 +0,0 @@
-//! Structs representing the API
-//!
-//! These mirror the pageserver APIs and the structs automatically generated
-//! from the protobuf specification. The differences are:
-//!
-//! - Types that are in fact required by the API are not Options. The protobuf "required"
-//!   attribute is deprecated and 'prost' marks a lot of members as optional because of that.
-//!   (See https://github.com/tokio-rs/prost/issues/800 for a gripe on this)
-//!
-//! - Use more precise datatypes, e.g. Lsn and uints shorter than 32 bits.
-
-use utils::lsn::Lsn;
-
-use crate::proto;
-
-#[derive(Clone, Debug)]
-pub struct RequestCommon {
-    pub request_lsn: Lsn,
-    pub not_modified_since_lsn: Lsn,
-}
-
-#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd, Ord)]
-pub struct RelTag {
-    pub spc_oid: u32,
-    pub db_oid: u32,
-    pub rel_number: u32,
-    pub fork_number: u8,
-}
-
-#[derive(Clone, Debug)]
-pub struct RelExistsRequest {
-    pub common: RequestCommon,
-    pub rel: RelTag,
-}
-
-#[derive(Clone, Debug)]
-pub struct RelSizeRequest {
-    pub common: RequestCommon,
-    pub rel: RelTag,
-}
-
-#[derive(Clone, Debug)]
-pub struct RelSizeResponse {
-    pub num_blocks: u32,
-}
-
-#[derive(Clone, Debug)]
-pub struct GetPageRequest {
-    pub common: RequestCommon,
-    pub rel: RelTag,
-    pub block_number: u32,
-}
-
-#[derive(Clone, Debug)]
-pub struct GetPageResponse {
-    pub page_image: std::vec::Vec<u8>,
-}
-
-#[derive(Clone, Debug)]
-pub struct DbSizeRequest {
-    pub common: RequestCommon,
-    pub db_oid: u32,
-}
-
-#[derive(Clone, Debug)]
-pub struct DbSizeResponse {
-    pub num_bytes: u64,
-}
-
-#[derive(Clone, Debug)]
-pub struct GetBaseBackupRequest {
-    pub common: RequestCommon,
-    pub replica: bool,
-}
-
-//--- Conversions to/from the generated proto types
-
-use thiserror::Error;
-
-#[derive(Error, Debug)]
-pub enum ProtocolError {
-    #[error("the value for field `{0}` is invalid")]
-    InvalidValue(&'static str),
-    #[error("the required field `{0}` is missing ")]
-    Missing(&'static str),
-}
-
-impl From<ProtocolError> for tonic::Status {
-    fn from(e: ProtocolError) -> Self {
-        match e {
-            ProtocolError::InvalidValue(_field) => tonic::Status::invalid_argument(e.to_string()),
-            ProtocolError::Missing(_field) => tonic::Status::invalid_argument(e.to_string()),
-        }
-    }
-}
-
-impl From<&RelTag> for proto::RelTag {
-    fn from(value: &RelTag) -> proto::RelTag {
-        proto::RelTag {
-            spc_oid: value.spc_oid,
-            db_oid: value.db_oid,
-            rel_number: value.rel_number,
-            fork_number: value.fork_number as u32,
-        }
-    }
-}
-impl TryFrom<&proto::RelTag> for RelTag {
-    type Error = ProtocolError;
-
-    fn try_from(value: &proto::RelTag) -> Result<RelTag, ProtocolError> {
-        Ok(RelTag {
-            spc_oid: value.spc_oid,
-            db_oid: value.db_oid,
-            rel_number: value.rel_number,
-            fork_number: value
-                .fork_number
-                .try_into()
-                .or(Err(ProtocolError::InvalidValue("fork_number")))?,
-        })
-    }
-}
-
-impl From<&RequestCommon> for proto::RequestCommon {
-    fn from(value: &RequestCommon) -> proto::RequestCommon {
-        proto::RequestCommon {
-            request_lsn: value.request_lsn.into(),
-            not_modified_since_lsn: value.not_modified_since_lsn.into(),
-        }
-    }
-}
-impl From<&proto::RequestCommon> for RequestCommon {
-    fn from(value: &proto::RequestCommon) -> RequestCommon {
-        RequestCommon {
-            request_lsn: value.request_lsn.into(),
-            not_modified_since_lsn: value.not_modified_since_lsn.into(),
-        }
-    }
-}
-
-impl From<&RelExistsRequest> for proto::RelExistsRequest {
-    fn from(value: &RelExistsRequest) -> proto::RelExistsRequest {
-        proto::RelExistsRequest {
-            common: Some((&value.common).into()),
-            rel: Some((&value.rel).into()),
-        }
-    }
-}
-impl TryFrom<&proto::RelExistsRequest> for RelExistsRequest {
-    type Error = ProtocolError;
-
-    fn try_from(value: &proto::RelExistsRequest) -> Result<RelExistsRequest, ProtocolError> {
-        Ok(RelExistsRequest {
-            common: (&value.common.ok_or(ProtocolError::Missing("common"))?).into(),
-            rel: (&value.rel.ok_or(ProtocolError::Missing("rel"))?).try_into()?,
-        })
-    }
-}
-
-impl From<&RelSizeRequest> for proto::RelSizeRequest {
-    fn from(value: &RelSizeRequest) -> proto::RelSizeRequest {
-        proto::RelSizeRequest {
-            common: Some((&value.common).into()),
-            rel: Some((&value.rel).into()),
-        }
-    }
-}
-impl TryFrom<&proto::RelSizeRequest> for RelSizeRequest {
-    type Error = ProtocolError;
-
-    fn try_from(value: &proto::RelSizeRequest) -> Result<RelSizeRequest, ProtocolError> {
-        Ok(RelSizeRequest {
-            common: (&value.common.ok_or(ProtocolError::Missing("common"))?).into(),
-            rel: (&value.rel.ok_or(ProtocolError::Missing("rel"))?).try_into()?,
-        })
-    }
-}
-
-impl From<&GetPageRequest> for proto::GetPageRequest {
-    fn from(value: &GetPageRequest) -> proto::GetPageRequest {
-        proto::GetPageRequest {
-            common: Some((&value.common).into()),
-            rel: Some((&value.rel).into()),
-            block_number: value.block_number,
-        }
-    }
-}
-impl TryFrom<&proto::GetPageRequest> for GetPageRequest {
-    type Error = ProtocolError;
-
-    fn try_from(value: &proto::GetPageRequest) -> Result<GetPageRequest, ProtocolError> {
-        Ok(GetPageRequest {
-            common: (&value.common.ok_or(ProtocolError::Missing("common"))?).into(),
-            rel: (&value.rel.ok_or(ProtocolError::Missing("rel"))?).try_into()?,
-            block_number: value.block_number,
-        })
-    }
-}
-
-impl From<&DbSizeRequest> for proto::DbSizeRequest {
-    fn from(value: &DbSizeRequest) -> proto::DbSizeRequest {
-        proto::DbSizeRequest {
-            common: Some((&value.common).into()),
-            db_oid: value.db_oid,
-        }
-    }
-}
-
-impl TryFrom<&proto::DbSizeRequest> for DbSizeRequest {
-    type Error = ProtocolError;
-
-    fn try_from(value: &proto::DbSizeRequest) -> Result<DbSizeRequest, ProtocolError> {
-        Ok(DbSizeRequest {
-            common: (&value.common.ok_or(ProtocolError::Missing("common"))?).into(),
-            db_oid: value.db_oid,
-        })
-    }
-}
-
-impl From<&GetBaseBackupRequest> for proto::GetBaseBackupRequest {
-    fn from(value: &GetBaseBackupRequest) -> proto::GetBaseBackupRequest {
-        proto::GetBaseBackupRequest {
-            common: Some((&value.common).into()),
-            replica: value.replica,
-        }
-    }
-}
-
-impl TryFrom<&proto::GetBaseBackupRequest> for GetBaseBackupRequest {
-    type Error = ProtocolError;
-
-    fn try_from(
-        value: &proto::GetBaseBackupRequest,
-    ) -> Result<GetBaseBackupRequest, ProtocolError> {
-        Ok(GetBaseBackupRequest {
-            common: (&value.common.ok_or(ProtocolError::Missing("common"))?).into(),
-            replica: value.replica,
-        })
-    }
-}
--- a/pageserver/pagebench/Cargo.toml
+++ b/pageserver/pagebench/Cargo.toml
@@ -23,8 +23,6 @@ tokio.workspace = true
 tokio-util.workspace = true

 pageserver_client.workspace = true
-pageserver_client_grpc.workspace = true
-pageserver_data_api.workspace = true
 pageserver_api.workspace = true
 utils = { path = "../../libs/utils/" }
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/pageserver/pagebench/src/cmd/basebackup.rs
+++ b/pageserver/pagebench/src/cmd/basebackup.rs
@@ -9,9 +9,6 @@ use anyhow::Context;
 use pageserver_api::shard::TenantShardId;
 use pageserver_client::mgmt_api::ForceAwaitLogicalSize;
 use pageserver_client::page_service::BasebackupRequest;
-use pageserver_client_grpc;
-use pageserver_data_api::model::{GetBaseBackupRequest, RequestCommon};
-
 use rand::prelude::*;
 use tokio::sync::Barrier;
 use tokio::task::JoinSet;
@@ -25,8 +22,6 @@ use crate::util::{request_stats, tokio_thread_local_stats};
 /// basebackup@LatestLSN
 #[derive(clap::Parser)]
 pub(crate) struct Args {
-    #[clap(long, default_value = "false")]
-    grpc: bool,
    #[clap(long, default_value = "http://localhost:9898")]
    mgmt_api_endpoint: String,
    #[clap(long, default_value = "postgres://postgres@localhost:64000")]
@@ -57,7 +52,7 @@ impl LiveStats {

 struct Target {
    timeline: TenantTimelineId,
-    lsn_range: Range<Lsn>,
+    lsn_range: Option<Range<Lsn>>,
 }

 #[derive(serde::Serialize)]
@@ -110,7 +105,7 @@ async fn main_impl(
                anyhow::Ok(Target {
                    timeline,
                    // TODO: support lsn_range != latest LSN
-                    lsn_range: info.last_record_lsn..(info.last_record_lsn + 1),
+                    lsn_range: Some(info.last_record_lsn..(info.last_record_lsn + 1)),
                })
            }
        });
@@ -154,27 +149,14 @@ async fn main_impl(
    for tl in &timelines {
        let (sender, receiver) = tokio::sync::mpsc::channel(1); // TODO: not sure what the implications of this are
        work_senders.insert(tl, sender);
-
-        let client_task = if args.grpc {
-            tokio::spawn(client_grpc(
-                args,
-                *tl,
-                Arc::clone(&start_work_barrier),
-                receiver,
-                Arc::clone(&all_work_done_barrier),
-                Arc::clone(&live_stats),
-            ))
-        } else {
-            tokio::spawn(client(
-                args,
-                *tl,
-                Arc::clone(&start_work_barrier),
-                receiver,
-                Arc::clone(&all_work_done_barrier),
-                Arc::clone(&live_stats),
-            ))
-        };
-        tasks.push(client_task);
+        tasks.push(tokio::spawn(client(
+            args,
+            *tl,
+            Arc::clone(&start_work_barrier),
+            receiver,
+            Arc::clone(&all_work_done_barrier),
+            Arc::clone(&live_stats),
+        )));
    }

    let work_sender = async move {
@@ -183,7 +165,7 @@ async fn main_impl(
            let (timeline, work) = {
                let mut rng = rand::thread_rng();
                let target = all_targets.choose(&mut rng).unwrap();
-                let lsn = rng.gen_range(target.lsn_range.clone());
+                let lsn = target.lsn_range.clone().map(|r| rng.gen_range(r));
                (
                    target.timeline,
                    Work {
@@ -233,7 +215,7 @@ async fn main_impl(

 #[derive(Copy, Clone)]
 struct Work {
-    lsn: Lsn,
+    lsn: Option<Lsn>,
    gzip: bool,
 }

@@ -258,7 +240,7 @@ async fn client(
            .basebackup(&BasebackupRequest {
                tenant_id: timeline.tenant_id,
                timeline_id: timeline.timeline_id,
-                lsn: Some(lsn),
+                lsn,
                gzip,
            })
            .await
@@ -288,71 +270,3 @@ async fn client(

    all_work_done_barrier.wait().await;
 }
-
-#[instrument(skip_all)]
-async fn client_grpc(
-    args: &'static Args,
-    timeline: TenantTimelineId,
-    start_work_barrier: Arc<Barrier>,
-    mut work: tokio::sync::mpsc::Receiver<Work>,
-    all_work_done_barrier: Arc<Barrier>,
-    live_stats: Arc<LiveStats>,
-) {
-    let shard_map = HashMap::from([(0, args.page_service_connstring.clone())]);
-    let client = pageserver_client_grpc::PageserverClient::new(
-        &timeline.tenant_id.to_string(),
-        &timeline.timeline_id.to_string(),
-        &None,
-        shard_map,
-    );
-
-    start_work_barrier.wait().await;
-
-    while let Some(Work { lsn, gzip }) = work.recv().await {
-        let start = Instant::now();
-
-        //tokio::time::sleep(std::time::Duration::from_secs(1)).await;
-
-        info!("starting get_base_backup");
-        let mut basebackup_stream = client
-            .get_base_backup(
-                &GetBaseBackupRequest {
-                    common: RequestCommon {
-                        request_lsn: lsn,
-                        not_modified_since_lsn: lsn,
-                    },
-                    replica: false,
-                },
-                gzip,
-            )
-            .await
-            .with_context(|| format!("start basebackup for {timeline}"))
-            .unwrap()
-            .into_inner();
-
-        info!("starting receive");
-        use futures::StreamExt;
-        let mut size = 0;
-        let mut nchunks = 0;
-        while let Some(chunk) = basebackup_stream.next().await {
-            let chunk = chunk
-                .with_context(|| format!("error during basebackup"))
-                .unwrap();
-            size += chunk.chunk.len();
-            nchunks += 1;
-        }
-
-        info!(
-            "basebackup size is {} bytes, avg chunk size {} bytes",
-            size,
-            size as f32 / nchunks as f32
-        );
-        let elapsed = start.elapsed();
-        live_stats.inc();
-        STATS.with(|stats| {
-            stats.borrow().lock().unwrap().observe(elapsed).unwrap();
-        });
-    }
-
-    all_work_done_barrier.wait().await;
-}
--- a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
+++ b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
@@ -1,4 +1,4 @@
-use std::collections::{HashMap, HashSet, VecDeque};
+use std::collections::{HashSet, VecDeque};
 use std::future::Future;
 use std::num::NonZeroUsize;
 use std::pin::Pin;
@@ -8,8 +8,6 @@ use std::time::{Duration, Instant};

 use anyhow::Context;
 use camino::Utf8PathBuf;
-use futures::StreamExt;
-use futures::stream::FuturesOrdered;
 use pageserver_api::key::Key;
 use pageserver_api::keyspace::KeySpaceAccum;
 use pageserver_api::models::{PagestreamGetPageRequest, PagestreamRequest};
@@ -27,8 +25,6 @@ use crate::util::{request_stats, tokio_thread_local_stats};
 /// GetPage@LatestLSN, uniformly distributed across the compute-accessible keyspace.
 #[derive(clap::Parser)]
 pub(crate) struct Args {
-    #[clap(long, default_value = "false")]
-    grpc: bool,
    #[clap(long, default_value = "http://localhost:9898")]
    mgmt_api_endpoint: String,
    #[clap(long, default_value = "postgres://postgres@localhost:64000")]
@@ -299,29 +295,7 @@ async fn main_impl(
                .unwrap();

        Box::pin(async move {
-            if args.grpc {
-                client_grpc(
-                    args,
-                    worker_id,
-                    ss,
-                    cancel,
-                    rps_period,
-                    ranges,
-                    weights,
-                )
-                .await
-            } else {
-                client_libpq(
-                    args,
-                    worker_id,
-                    ss,
-                    cancel,
-                    rps_period,
-                    ranges,
-                    weights,
-                )
-                .await
-            }
+            client_libpq(args, worker_id, ss, cancel, rps_period, ranges, weights).await
        })
    };

@@ -460,100 +434,3 @@ async fn client_libpq(
        }
    }
 }
-
-async fn client_grpc(
-    args: &Args,
-    worker_id: WorkerId,
-    shared_state: Arc<SharedState>,
-    cancel: CancellationToken,
-    rps_period: Option<Duration>,
-    ranges: Vec<KeyRange>,
-    weights: rand::distributions::weighted::WeightedIndex<i128>,
-) {
-    let shard_map = HashMap::from([(0, args.page_service_connstring.clone())]);
-    let client = pageserver_client_grpc::PageserverClient::new(
-        &worker_id.timeline.tenant_id.to_string(),
-        &worker_id.timeline.timeline_id.to_string(),
-        &None,
-        shard_map,
-    );
-    let client = Arc::new(client);
-
-    shared_state.start_work_barrier.wait().await;
-    let client_start = Instant::now();
-    let mut ticks_processed = 0;
-    let mut inflight = FuturesOrdered::new();
-    while !cancel.is_cancelled() {
-        // Detect if a request took longer than the RPS rate
-        if let Some(period) = &rps_period {
-            let periods_passed_until_now =
-                usize::try_from(client_start.elapsed().as_micros() / period.as_micros()).unwrap();
-
-            if periods_passed_until_now > ticks_processed {
-                shared_state
-                    .live_stats
-                    .missed((periods_passed_until_now - ticks_processed) as u64);
-            }
-            ticks_processed = periods_passed_until_now;
-        }
-
-        while inflight.len() < args.queue_depth.get() {
-            let start = Instant::now();
-            let req = {
-                let mut rng = rand::thread_rng();
-                let r = &ranges[weights.sample(&mut rng)];
-                let key: i128 = rng.gen_range(r.start..r.end);
-                let key = Key::from_i128(key);
-                assert!(key.is_rel_block_key());
-                let (rel_tag, block_no) = key
-                    .to_rel_block()
-                    .expect("we filter non-rel-block keys out above");
-                pageserver_data_api::model::GetPageRequest {
-                    common: pageserver_data_api::model::RequestCommon {
-                        request_lsn: if rng.gen_bool(args.req_latest_probability) {
-                            Lsn::MAX
-                        } else {
-                            r.timeline_lsn
-                        },
-                        not_modified_since_lsn: r.timeline_lsn,
-                    },
-                    rel: pageserver_data_api::model::RelTag {
-                        spc_oid: rel_tag.spcnode,
-                        db_oid: rel_tag.dbnode,
-                        rel_number: rel_tag.relnode,
-                        fork_number: rel_tag.forknum,
-                    },
-                    block_number: block_no,
-                }
-            };
-            let client_clone = client.clone();
-            let getpage_fut = async move {
-                let result = client_clone.get_page(&req).await;
-                (start, result)
-            };
-            inflight.push_back(getpage_fut);
-        }
-
-        let (start, result) = inflight.next().await.unwrap();
-        result.expect("getpage request should succeed");
-        let end = Instant::now();
-        shared_state.live_stats.request_done();
-        ticks_processed += 1;
-        STATS.with(|stats| {
-            stats
-                .borrow()
-                .lock()
-                .unwrap()
-                .observe(end.duration_since(start))
-                .unwrap();
-        });
-
-        if let Some(period) = &rps_period {
-            let next_at = client_start
-                + Duration::from_micros(
-                    (ticks_processed) as u64 * u64::try_from(period.as_micros()).unwrap(),
-                );
-            tokio::time::sleep_until(next_at.into()).await;
-        }
-    }
-}
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -151,14 +151,10 @@ where
                .map_err(|_| BasebackupError::Shutdown)?,
        ),
    };
-    let res = basebackup
+    basebackup
        .send_tarball()
        .instrument(info_span!("send_tarball", backup_lsn=%backup_lsn))
-        .await;
-
-    info!("basebackup done!");
-
-    res
+        .await
 }

 /// This is short-living object only for the time of tarball creation,
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -16,7 +16,6 @@ use http_utils::tls_certs::ReloadingCertificateResolver;
 use metrics::launch_timestamp::{LaunchTimestamp, set_launch_timestamp_metric};
 use metrics::set_build_info_metric;
 use nix::sys::socket::{setsockopt, sockopt};
-use pageserver::compute_service;
 use pageserver::config::{PageServerConf, PageserverIdentity, ignored_fields};
 use pageserver::controller_upcall_client::StorageControllerUpcallClient;
 use pageserver::deletion_queue::DeletionQueue;
@@ -28,7 +27,7 @@ use pageserver::task_mgr::{
 use pageserver::tenant::{TenantSharedResources, mgr, secondary};
 use pageserver::{
    CancellableTask, ConsumptionMetricsTasks, HttpEndpointListener, HttpsEndpointListener, http,
-    page_cache, task_mgr, virtual_file,
+    page_cache, page_service, task_mgr, virtual_file,
 };
 use postgres_backend::AuthType;
 use remote_storage::GenericRemoteStorage;
@@ -505,7 +504,7 @@ fn start_pageserver(
    // Set up deletion queue
    let (deletion_queue, deletion_workers) = DeletionQueue::new(
        remote_storage.clone(),
-        StorageControllerUpcallClient::new(conf, &shutdown_pageserver)?,
+        StorageControllerUpcallClient::new(conf, &shutdown_pageserver),
        conf,
    );
    deletion_workers.spawn_with(BACKGROUND_RUNTIME.handle());
@@ -746,7 +745,7 @@ fn start_pageserver(
    // Spawn a task to listen for libpq connections. It will spawn further tasks
    // for each connection. We created the listener earlier already.
    let perf_trace_dispatch = otel_guard.as_ref().map(|g| g.dispatch.clone());
-    let compute_service = compute_service::spawn(
+    let page_service = page_service::spawn(
        conf,
        tenant_manager.clone(),
        pg_auth,
@@ -783,7 +782,7 @@ fn start_pageserver(
        pageserver::shutdown_pageserver(
            http_endpoint_listener,
            https_endpoint_listener,
-            compute_service,
+            page_service,
            consumption_metrics_tasks,
            disk_usage_eviction_task,
            &tenant_manager,
--- a/pageserver/src/compute_service.rs
+++ b/pageserver/src/compute_service.rs
@@ -1,286 +0,0 @@
-//!
-//! The Compute Service listens for compute connections, and serves requests like
-//! the GetPage@LSN requests.
-//!
-//! We support two protocols:
-//!
-//! 1. Legacy, connection-oriented libpq based protocol. That's
-//!    handled by the code in page_service.rs.
-//!
-//! 2. gRPC based protocol. See compute_service_grpc.rs.
-//!
-//! To make the transition smooth, without having to open up new firewall ports
-//! etc, both protocols are served on the same port. When a new TCP connection
-//! is accepted, we peek at the first few bytes incoming from the client to
-//! determine which protocol it speaks.
-//!
-//! TODO: This gets easier once we drop the legacy protocol support. Or if we
-//! open a separate port for them.
-
-use std::sync::Arc;
-
-use anyhow::Context;
-use futures::FutureExt;
-use pageserver_api::config::PageServicePipeliningConfig;
-use postgres_backend::AuthType;
-use tokio::task::JoinHandle;
-use tokio_util::sync::CancellationToken;
-use tracing::*;
-use utils::auth::SwappableJwtAuth;
-use utils::sync::gate::{Gate, GateGuard};
-
-use crate::compute_service_grpc::launch_compute_service_grpc_server;
-use crate::config::PageServerConf;
-use crate::context::{DownloadBehavior, RequestContext, RequestContextBuilder};
-use crate::page_service::libpq_page_service_conn_main;
-use crate::task_mgr::{self, COMPUTE_REQUEST_RUNTIME, TaskKind};
-use crate::tenant::mgr::TenantManager;
-
-///////////////////////////////////////////////////////////////////////////////
-
-pub type ConnectionHandlerResult = anyhow::Result<()>;
-
-pub struct Connections {
-    cancel: CancellationToken,
-    tasks: tokio::task::JoinSet<ConnectionHandlerResult>,
-    gate: Gate,
-}
-
-impl Connections {
-    pub(crate) async fn shutdown(self) {
-        let Self {
-            cancel,
-            mut tasks,
-            gate,
-        } = self;
-        cancel.cancel();
-        while let Some(res) = tasks.join_next().await {
-            Self::handle_connection_completion(res);
-        }
-        gate.close().await;
-    }
-
-    fn handle_connection_completion(res: Result<anyhow::Result<()>, tokio::task::JoinError>) {
-        match res {
-            Ok(Ok(())) => {}
-            Ok(Err(e)) => error!("error in page_service connection task: {:?}", e),
-            Err(e) => error!("page_service connection task panicked: {:?}", e),
-        }
-    }
-}
-
-pub struct Listener {
-    cancel: CancellationToken,
-    /// Cancel the listener task through `listen_cancel` to shut down the listener
-    /// and get a handle on the existing connections.
-    task: JoinHandle<Connections>,
-}
-
-pub fn spawn(
-    conf: &'static PageServerConf,
-    tenant_manager: Arc<TenantManager>,
-    pg_auth: Option<Arc<SwappableJwtAuth>>,
-    perf_trace_dispatch: Option<Dispatch>,
-    tcp_listener: tokio::net::TcpListener,
-    tls_config: Option<Arc<rustls::ServerConfig>>,
-) -> Listener {
-    let cancel = CancellationToken::new();
-    let libpq_ctx = RequestContext::todo_child(
-        TaskKind::LibpqEndpointListener,
-        // listener task shouldn't need to download anything. (We will
-        // create a separate sub-contexts for each connection, with their
-        // own download behavior. This context is used only to listen and
-        // accept connections.)
-        DownloadBehavior::Error,
-    );
-
-    let task = COMPUTE_REQUEST_RUNTIME.spawn(task_mgr::exit_on_panic_or_error(
-        "compute connection listener",
-        compute_connection_listener_main(
-            conf,
-            tenant_manager,
-            pg_auth,
-            perf_trace_dispatch,
-            tcp_listener,
-            conf.pg_auth_type,
-            tls_config,
-            conf.page_service_pipelining.clone(),
-            libpq_ctx,
-            cancel.clone(),
-        )
-        .map(anyhow::Ok),
-    ));
-
-    Listener { cancel, task }
-}
-
-impl Listener {
-    pub async fn stop_accepting(self) -> Connections {
-        self.cancel.cancel();
-        self.task
-            .await
-            .expect("unreachable: we wrap the listener task in task_mgr::exit_on_panic_or_error")
-    }
-}
-
-/// Listener loop. Listens for connections, and launches a new handler
-/// task for each.
-///
-/// Returns Ok(()) upon cancellation via `cancel`, returning the set of
-/// open connections.
-///
-#[allow(clippy::too_many_arguments)]
-pub async fn compute_connection_listener_main(
-    conf: &'static PageServerConf,
-    tenant_manager: Arc<TenantManager>,
-    auth: Option<Arc<SwappableJwtAuth>>,
-    perf_trace_dispatch: Option<Dispatch>,
-    listener: tokio::net::TcpListener,
-    auth_type: AuthType,
-    tls_config: Option<Arc<rustls::ServerConfig>>,
-    pipelining_config: PageServicePipeliningConfig,
-    listener_ctx: RequestContext,
-    listener_cancel: CancellationToken,
-) -> Connections {
-    let connections_cancel = CancellationToken::new();
-    let connections_gate = Gate::default();
-    let mut connection_handler_tasks = tokio::task::JoinSet::default();
-
-    // The connection handling task passes the gRPC protocol
-    // connections to this channel. The tonic gRPC server reads the
-    // channel and takes over the connections from there.
-    let (grpc_connections_tx, grpc_connections_rx) = tokio::sync::mpsc::channel(1000);
-
-    // Set up the gRPC service
-    launch_compute_service_grpc_server(
-        grpc_connections_rx,
-        conf,
-        tenant_manager.clone(),
-        auth.clone(),
-        auth_type,
-        connections_cancel.clone(),
-        &listener_ctx,
-    );
-
-    // Main listener loop
-    loop {
-        let gate_guard = match connections_gate.enter() {
-            Ok(guard) => guard,
-            Err(_) => break,
-        };
-
-        let accepted = tokio::select! {
-            biased;
-            _ = listener_cancel.cancelled() => break,
-            next = connection_handler_tasks.join_next(), if !connection_handler_tasks.is_empty() => {
-                let res = next.expect("we dont poll while empty");
-                Connections::handle_connection_completion(res);
-                continue;
-            }
-            accepted = listener.accept() => accepted,
-        };
-
-        match accepted {
-            Ok((socket, peer_addr)) => {
-                // Connection established. Spawn a new task to handle it.
-                debug!("accepted connection from {}", peer_addr);
-                let local_auth = auth.clone();
-                let connection_ctx = RequestContextBuilder::from(&listener_ctx)
-                    .task_kind(TaskKind::PageRequestHandler)
-                    .download_behavior(DownloadBehavior::Download)
-                    .perf_span_dispatch(perf_trace_dispatch.clone())
-                    .detached_child();
-
-                connection_handler_tasks.spawn(page_service_conn_main(
-                    conf,
-                    tenant_manager.clone(),
-                    local_auth,
-                    socket,
-                    auth_type,
-                    tls_config.clone(),
-                    pipelining_config.clone(),
-                    connection_ctx,
-                    connections_cancel.child_token(),
-                    gate_guard,
-                    grpc_connections_tx.clone(),
-                ));
-            }
-            Err(err) => {
-                // accept() failed. Log the error, and loop back to retry on next connection.
-                error!("accept() failed: {:?}", err);
-            }
-        }
-    }
-
-    debug!("page_service listener loop terminated");
-
-    Connections {
-        cancel: connections_cancel,
-        tasks: connection_handler_tasks,
-        gate: connections_gate,
-    }
-}
-
-/// Handle a new incoming connection.
-///
-/// This peeks at the first few incoming bytes and dispatches the connection
-/// to the legacy libpq handler or the new gRPC handler accordingly.
-#[instrument(skip_all, fields(peer_addr, application_name, compute_mode))]
-#[allow(clippy::too_many_arguments)]
-pub async fn page_service_conn_main(
-    conf: &'static PageServerConf,
-    tenant_manager: Arc<TenantManager>,
-    auth: Option<Arc<SwappableJwtAuth>>,
-    socket: tokio::net::TcpStream,
-    auth_type: AuthType,
-    tls_config: Option<Arc<rustls::ServerConfig>>,
-    pipelining_config: PageServicePipeliningConfig,
-    connection_ctx: RequestContext,
-    cancel: CancellationToken,
-    gate_guard: GateGuard,
-    grpc_connections_tx: tokio::sync::mpsc::Sender<tokio::io::Result<tokio::net::TcpStream>>,
-) -> ConnectionHandlerResult {
-    let mut buf: [u8; 4] = [0; 4];
-
-    socket
-        .set_nodelay(true)
-        .context("could not set TCP_NODELAY")?;
-
-    // Peek
-    socket.peek(&mut buf).await?;
-
-    let mut grpc = false;
-    if buf[0] == 0x16 {
-        // looks like a TLS handshake. Assume gRPC.
-        // XXX: Starting with v17, PostgreSQL also supports "direct TLS mode". But
-        // the compute doesn't use it.
-        grpc = true;
-    }
-
-    if buf[0] == b'G' || buf[0] == b'P' {
-        // Looks like 'GET' or 'POST'
-        // or 'PRI', indicating gRPC over HTTP/2 with prior knowledge
-        grpc = true;
-    }
-
-    // Dispatch
-    if grpc {
-        grpc_connections_tx.send(Ok(socket)).await?;
-        info!("connection sent to channel");
-        Ok(())
-    } else {
-        libpq_page_service_conn_main(
-            conf,
-            tenant_manager,
-            auth,
-            socket,
-            auth_type,
-            tls_config,
-            pipelining_config,
-            connection_ctx,
-            cancel,
-            gate_guard,
-        )
-        .await
-    }
-}
--- a/pageserver/src/compute_service_grpc.rs
+++ b/pageserver/src/compute_service_grpc.rs
@@ -1,746 +0,0 @@
-//!
-//! Compute <-> Pageserver API handler. This is for the new gRPC-based protocol
-//!
-//! TODO:
-//!
-//! - Many of the API endpoints are still missing
-//!
-//! - This is very much not optimized.
-//!
-//! - Much of the code was copy-pasted from page_service.rs. Like the code to get the
-//!   Timeline object, and the JWT auth. Could refactor and share.
-//!
-//!
-
-use std::pin::Pin;
-use std::str::FromStr;
-use std::sync::Arc;
-use std::task::Poll;
-use std::time::Duration;
-use std::time::Instant;
-
-use crate::TenantManager;
-use crate::auth::check_permission;
-use crate::basebackup;
-use crate::basebackup::BasebackupError;
-use crate::config::PageServerConf;
-use crate::context::{DownloadBehavior, RequestContext, RequestContextBuilder};
-use crate::task_mgr::TaskKind;
-use crate::tenant::Timeline;
-use crate::tenant::mgr::ShardResolveResult;
-use crate::tenant::mgr::ShardSelector;
-use crate::tenant::storage_layer::IoConcurrency;
-use crate::tenant::timeline::WaitLsnTimeout;
-use tokio::io::{AsyncWriteExt, ReadHalf, SimplexStream};
-use tokio::task::JoinHandle;
-use tokio_util::codec::{Decoder, FramedRead};
-use tokio_util::sync::CancellationToken;
-
-use futures::stream::StreamExt;
-
-use pageserver_data_api::model;
-use pageserver_data_api::proto::page_service_server::PageService;
-use pageserver_data_api::proto::page_service_server::PageServiceServer;
-
-use anyhow::Context;
-use bytes::BytesMut;
-use jsonwebtoken::TokenData;
-use tracing::Instrument;
-use tracing::{debug, error};
-use utils::auth::SwappableJwtAuth;
-
-use utils::id::{TenantId, TenantTimelineId, TimelineId};
-use utils::lsn::Lsn;
-use utils::simple_rcu::RcuReadGuard;
-
-use crate::tenant::PageReconstructError;
-
-use postgres_ffi::BLCKSZ;
-
-use tonic;
-use tonic::codec::CompressionEncoding;
-use tonic::service::interceptor::InterceptedService;
-
-use pageserver_api::key::rel_block_to_key;
-
-use crate::pgdatadir_mapping::Version;
-use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
-
-use postgres_backend::AuthType;
-
-pub use pageserver_data_api::proto;
-
-pub(super) fn launch_compute_service_grpc_server(
-    tcp_connections_rx: tokio::sync::mpsc::Receiver<tokio::io::Result<tokio::net::TcpStream>>,
-    conf: &'static PageServerConf,
-    tenant_manager: Arc<TenantManager>,
-    auth: Option<Arc<SwappableJwtAuth>>,
-    auth_type: AuthType,
-    connections_cancel: CancellationToken,
-    listener_ctx: &RequestContext,
-) {
-    // Set up the gRPC service
-    let service_ctx = RequestContextBuilder::from(listener_ctx)
-        .task_kind(TaskKind::PageRequestHandler)
-        .download_behavior(DownloadBehavior::Download)
-        .attached_child();
-    let service = crate::compute_service_grpc::PageServiceService {
-        conf,
-        tenant_mgr: tenant_manager.clone(),
-        ctx: Arc::new(service_ctx),
-    };
-    let authenticator = PageServiceAuthenticator {
-        auth: auth.clone(),
-        auth_type,
-    };
-
-    let server = InterceptedService::new(
-        PageServiceServer::new(service).send_compressed(CompressionEncoding::Gzip),
-        authenticator,
-    );
-
-    let cc = connections_cancel.clone();
-    tokio::spawn(async move {
-        tonic::transport::Server::builder()
-            .add_service(server)
-            .serve_with_incoming_shutdown(
-                tokio_stream::wrappers::ReceiverStream::new(tcp_connections_rx),
-                cc.cancelled(),
-            )
-            .await
-    });
-}
-
-struct PageServiceService {
-    conf: &'static PageServerConf,
-    tenant_mgr: Arc<TenantManager>,
-    ctx: Arc<RequestContext>,
-}
-
-/// An error happened in a get() operation.
-impl From<PageReconstructError> for tonic::Status {
-    fn from(e: PageReconstructError) -> Self {
-        match e {
-            PageReconstructError::Other(err) => tonic::Status::unknown(err.to_string()),
-            PageReconstructError::AncestorLsnTimeout(_) => {
-                tonic::Status::unavailable(e.to_string())
-            }
-            PageReconstructError::Cancelled => tonic::Status::aborted(e.to_string()),
-            PageReconstructError::WalRedo(_) => tonic::Status::internal(e.to_string()),
-            PageReconstructError::MissingKey(_) => tonic::Status::internal(e.to_string()),
-        }
-    }
-}
-
-fn convert_reltag(value: &model::RelTag) -> pageserver_api::reltag::RelTag {
-    pageserver_api::reltag::RelTag {
-        spcnode: value.spc_oid,
-        dbnode: value.db_oid,
-        relnode: value.rel_number,
-        forknum: value.fork_number,
-    }
-}
-
-#[tonic::async_trait]
-impl PageService for PageServiceService {
-    type GetBaseBackupStream = GetBaseBackupStream;
-
-    async fn rel_exists(
-        &self,
-        request: tonic::Request<proto::RelExistsRequest>,
-    ) -> std::result::Result<tonic::Response<proto::RelExistsResponse>, tonic::Status> {
-        let ttid = self.extract_ttid(request.metadata())?;
-        let req: model::RelExistsRequest = request.get_ref().try_into()?;
-
-        let rel = convert_reltag(&req.rel);
-        let span = tracing::info_span!("rel_exists", tenant_id = %ttid.tenant_id, timeline_id = %ttid.timeline_id, rel = %rel, req_lsn = %req.common.request_lsn);
-
-        async {
-            let timeline = self.get_timeline(ttid, ShardSelector::Zero).await?;
-            let ctx = self.ctx.with_scope_timeline(&timeline);
-            let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();
-            let lsn = Self::wait_or_get_last_lsn(
-                &timeline,
-                req.common.request_lsn,
-                req.common.not_modified_since_lsn,
-                &latest_gc_cutoff_lsn,
-                &ctx,
-            )
-            .await?;
-
-            let exists = timeline
-                .get_rel_exists(rel, Version::Lsn(lsn), &ctx)
-                .await?;
-
-            Ok(tonic::Response::new(proto::RelExistsResponse { exists }))
-        }
-        .instrument(span)
-        .await
-    }
-
-    /// Returns size of a relation, as # of blocks
-    async fn rel_size(
-        &self,
-        request: tonic::Request<proto::RelSizeRequest>,
-    ) -> std::result::Result<tonic::Response<proto::RelSizeResponse>, tonic::Status> {
-        let ttid = self.extract_ttid(request.metadata())?;
-        let req: model::RelSizeRequest = request.get_ref().try_into()?;
-        let rel = convert_reltag(&req.rel);
-
-        let span = tracing::info_span!("rel_size", tenant_id = %ttid.tenant_id, timeline_id = %ttid.timeline_id, rel = %rel, req_lsn = %req.common.request_lsn);
-
-        async {
-            let timeline = self.get_timeline(ttid, ShardSelector::Zero).await?;
-            let ctx = self.ctx.with_scope_timeline(&timeline);
-            let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();
-            let lsn = Self::wait_or_get_last_lsn(
-                &timeline,
-                req.common.request_lsn,
-                req.common.not_modified_since_lsn,
-                &latest_gc_cutoff_lsn,
-                &ctx,
-            )
-            .await?;
-
-            let num_blocks = timeline.get_rel_size(rel, Version::Lsn(lsn), &ctx).await?;
-
-            Ok(tonic::Response::new(proto::RelSizeResponse { num_blocks }))
-        }
-        .instrument(span)
-        .await
-    }
-
-    async fn get_page(
-        &self,
-        request: tonic::Request<proto::GetPageRequest>,
-    ) -> std::result::Result<tonic::Response<proto::GetPageResponse>, tonic::Status> {
-        let ttid = self.extract_ttid(request.metadata())?;
-        let req: model::GetPageRequest = request.get_ref().try_into()?;
-
-        // Calculate shard number.
-        //
-        // FIXME: this should probably be part of the data_api crate.
-        let rel = convert_reltag(&req.rel);
-        let key = rel_block_to_key(rel, req.block_number);
-        let timeline = self.get_timeline(ttid, ShardSelector::Page(key)).await?;
-
-        let ctx = self.ctx.with_scope_timeline(&timeline);
-        let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();
-        let lsn = Self::wait_or_get_last_lsn(
-            &timeline,
-            req.common.request_lsn,
-            req.common.not_modified_since_lsn,
-            &latest_gc_cutoff_lsn,
-            &ctx,
-        )
-        .await?;
-
-        let shard_id = timeline.tenant_shard_id.shard_number;
-        let span = tracing::info_span!("get_page", tenant_id = %ttid.tenant_id, shard_id = %shard_id, timeline_id = %ttid.timeline_id, rel = %rel, block_number = %req.block_number, req_lsn = %req.common.request_lsn);
-
-        async {
-            let gate_guard = match timeline.gate.enter() {
-                Ok(guard) => guard,
-                Err(_) => {
-                    return Err(tonic::Status::unavailable("timeline is shutting down"));
-                }
-            };
-
-            let io_concurrency = IoConcurrency::spawn_from_conf(self.conf, gate_guard);
-
-            let page_image = timeline
-                .get_rel_page_at_lsn(
-                    rel,
-                    req.block_number,
-                    Version::Lsn(lsn),
-                    &ctx,
-                    io_concurrency,
-                )
-                .await?;
-
-            Ok(tonic::Response::new(proto::GetPageResponse {
-                page_image: page_image,
-            }))
-        }
-        .instrument(span)
-        .await
-    }
-
-    async fn db_size(
-        &self,
-        request: tonic::Request<proto::DbSizeRequest>,
-    ) -> Result<tonic::Response<proto::DbSizeResponse>, tonic::Status> {
-        let ttid = self.extract_ttid(request.metadata())?;
-        let req: model::DbSizeRequest = request.get_ref().try_into()?;
-
-        let span = tracing::info_span!("get_page", tenant_id = %ttid.tenant_id, timeline_id = %ttid.timeline_id, db_oid = %req.db_oid, req_lsn = %req.common.request_lsn);
-
-        async {
-            let timeline = self.get_timeline(ttid, ShardSelector::Zero).await?;
-            let ctx = self.ctx.with_scope_timeline(&timeline);
-            let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();
-            let lsn = Self::wait_or_get_last_lsn(
-                &timeline,
-                req.common.request_lsn,
-                req.common.not_modified_since_lsn,
-                &latest_gc_cutoff_lsn,
-                &ctx,
-            )
-            .await?;
-
-            let total_blocks = timeline
-                .get_db_size(DEFAULTTABLESPACE_OID, req.db_oid, Version::Lsn(lsn), &ctx)
-                .await?;
-
-            Ok(tonic::Response::new(proto::DbSizeResponse {
-                num_bytes: total_blocks as u64 * BLCKSZ as u64,
-            }))
-        }
-        .instrument(span)
-        .await
-    }
-
-    async fn get_base_backup(
-        &self,
-        request: tonic::Request<proto::GetBaseBackupRequest>,
-    ) -> Result<tonic::Response<Self::GetBaseBackupStream>, tonic::Status> {
-        let ttid = self.extract_ttid(request.metadata())?;
-        let req: model::GetBaseBackupRequest = request.get_ref().try_into()?;
-
-        let timeline = self.get_timeline(ttid, ShardSelector::Zero).await?;
-
-        let ctx = self.ctx.with_scope_timeline(&timeline);
-        let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();
-        let lsn = Self::wait_or_get_last_lsn(
-            &timeline,
-            req.common.request_lsn,
-            req.common.not_modified_since_lsn,
-            &latest_gc_cutoff_lsn,
-            &ctx,
-        )
-        .await?;
-
-        let span = tracing::info_span!("get_base_backup", tenant_id = %ttid.tenant_id, timeline_id = %ttid.timeline_id, req_lsn = %req.common.request_lsn);
-
-        tracing::info!("starting basebackup");
-
-        #[allow(dead_code)]
-        enum TestMode {
-            /// Create real basebackup, in streaming fashion
-            Streaming,
-            /// Create real basebackup, but fully materialize it in the 'simplex' pipe buffer first
-            Materialize,
-            /// Create a dummy all-zeros basebackup, in streaming fashion
-            DummyStreaming,
-            /// Create a dummy all-zeros basebackup, but fully materialize it first
-            DummyMaterialize,
-        }
-        let mode = TestMode::Streaming;
-
-        let buf_size = match mode {
-            TestMode::Streaming | TestMode::DummyStreaming => 64 * 1024,
-            TestMode::Materialize | TestMode::DummyMaterialize => 64 * 1024 * 1024,
-        };
-
-        let (simplex_read, mut simplex_write) = tokio::io::simplex(buf_size);
-
-        let basebackup_task = match mode {
-            TestMode::DummyStreaming => {
-                tokio::spawn(
-                    async move {
-                        // hold onto the guard for as long as the basebackup runs
-                        let _latest_gc_cutoff_lsn = latest_gc_cutoff_lsn;
-
-                        let zerosbuf: [u8; 1024] = [0; 1024];
-                        let nbytes = 16900000;
-                        let mut bytes_written = 0;
-                        while bytes_written < nbytes {
-                            let s = std::cmp::min(1024, nbytes - bytes_written);
-                            let _ = simplex_write.write_all(&zerosbuf[0..s]).await;
-                            bytes_written += s;
-                        }
-                        simplex_write
-                            .shutdown()
-                            .await
-                            .context("shutdown of basebackup pipe")?;
-
-                        Ok(())
-                    }
-                    .instrument(span),
-                )
-            }
-            TestMode::DummyMaterialize => {
-                let zerosbuf: [u8; 1024] = [0; 1024];
-                let nbytes = 16900000;
-                let mut bytes_written = 0;
-                while bytes_written < nbytes {
-                    let s = std::cmp::min(1024, nbytes - bytes_written);
-                    let _ = simplex_write.write_all(&zerosbuf[0..s]).await;
-                    bytes_written += s;
-                }
-                simplex_write
-                    .shutdown()
-                    .await
-                    .expect("shutdown of basebackup pipe");
-                tracing::info!("basebackup (dummy) materialized");
-                let result = Ok(());
-
-                tokio::spawn(std::future::ready(result))
-            }
-            TestMode::Materialize => {
-                let result = basebackup::send_basebackup_tarball(
-                    &mut simplex_write,
-                    &timeline,
-                    Some(lsn),
-                    None,
-                    false,
-                    req.replica,
-                    &ctx,
-                )
-                .await;
-                simplex_write
-                    .shutdown()
-                    .await
-                    .expect("shutdown of basebackup pipe");
-                tracing::info!("basebackup materialized");
-
-                // Launch a task that writes the basebackup tarball to the simplex pipe
-                tokio::spawn(std::future::ready(result))
-            }
-            TestMode::Streaming => {
-                tokio::spawn(
-                    async move {
-                        // hold onto the guard for as long as the basebackup runs
-                        let _latest_gc_cutoff_lsn = latest_gc_cutoff_lsn;
-
-                        let result = basebackup::send_basebackup_tarball(
-                            &mut simplex_write,
-                            &timeline,
-                            Some(lsn),
-                            None,
-                            false,
-                            req.replica,
-                            &ctx,
-                        )
-                        .await;
-                        simplex_write
-                            .shutdown()
-                            .await
-                            .context("shutdown of basebackup pipe")?;
-                        result
-                    }
-                    .instrument(span),
-                )
-            }
-        };
-
-        let response = new_basebackup_response_stream(simplex_read, basebackup_task);
-
-        Ok(tonic::Response::new(response))
-    }
-}
-
-/// NB: this is a different value than [`crate::http::routes::ACTIVE_TENANT_TIMEOUT`].
-/// NB: and also different from page_service::ACTIVE_TENANT_TIMEOUT
-const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(30000);
-
-impl PageServiceService {
-    async fn get_timeline(
-        &self,
-        ttid: TenantTimelineId,
-        shard_selector: ShardSelector,
-    ) -> Result<Arc<Timeline>, tonic::Status> {
-        let timeout = ACTIVE_TENANT_TIMEOUT;
-        let wait_start = Instant::now();
-        let deadline = wait_start + timeout;
-
-        let tenant_shard = loop {
-            let resolved = self
-                .tenant_mgr
-                .resolve_attached_shard(&ttid.tenant_id, shard_selector);
-
-            match resolved {
-                ShardResolveResult::Found(tenant_shard) => break tenant_shard,
-                ShardResolveResult::NotFound => {
-                    return Err(tonic::Status::not_found("tenant not found"));
-                }
-                ShardResolveResult::InProgress(barrier) => {
-                    // We can't authoritatively answer right now: wait for InProgress state
-                    // to end, then try again
-                    tokio::select! {
-                        _  = barrier.wait() => {
-                            // The barrier completed: proceed around the loop to try looking up again
-                        },
-                        _ = tokio::time::sleep(deadline.duration_since(Instant::now())) => {
-                            return Err(tonic::Status::unavailable("tenant is in InProgress state"));
-                        }
-                    }
-                }
-            }
-        };
-
-        tracing::debug!("Waiting for tenant to enter active state...");
-        tenant_shard
-            .wait_to_become_active(deadline.duration_since(Instant::now()))
-            .await
-            .map_err(|e| {
-                tonic::Status::unavailable(format!("tenant is not in active state: {e}"))
-            })?;
-
-        let timeline = tenant_shard
-            .get_timeline(ttid.timeline_id, true)
-            .map_err(|e| tonic::Status::unavailable(format!("could not get timeline: {e}")))?;
-
-        // FIXME: need to do something with the 'gate' here?
-
-        Ok(timeline)
-    }
-
-    /// Extract TenantTimelineId from the request metadata
-    ///
-    /// Note: the interceptor has already authenticated the request
-    ///
-    /// TOOD: Could we use "binary" metadata for these, for efficiency? gRPC has such a concept
-    fn extract_ttid(
-        &self,
-        metadata: &tonic::metadata::MetadataMap,
-    ) -> Result<TenantTimelineId, tonic::Status> {
-        let tenant_id = metadata
-            .get("neon-tenant-id")
-            .ok_or(tonic::Status::invalid_argument(
-                "neon-tenant-id metadata missing",
-            ))?;
-        let tenant_id = tenant_id.to_str().map_err(|_| {
-            tonic::Status::invalid_argument("invalid UTF-8 characters in neon-tenant-id metadata")
-        })?;
-        let tenant_id = TenantId::from_str(tenant_id)
-            .map_err(|_| tonic::Status::invalid_argument("invalid neon-tenant-id metadata"))?;
-
-        let timeline_id =
-            metadata
-                .get("neon-timeline-id")
-                .ok_or(tonic::Status::invalid_argument(
-                    "neon-timeline-id metadata missing",
-                ))?;
-        let timeline_id = timeline_id.to_str().map_err(|_| {
-            tonic::Status::invalid_argument("invalid UTF-8 characters in neon-timeline-id metadata")
-        })?;
-        let timeline_id = TimelineId::from_str(timeline_id)
-            .map_err(|_| tonic::Status::invalid_argument("invalid neon-timelineid metadata"))?;
-
-        Ok(TenantTimelineId::new(tenant_id, timeline_id))
-    }
-
-    // XXX: copied from PageServerHandler
-    async fn wait_or_get_last_lsn(
-        timeline: &Timeline,
-        request_lsn: Lsn,
-        not_modified_since: Lsn,
-        latest_gc_cutoff_lsn: &RcuReadGuard<Lsn>,
-        ctx: &RequestContext,
-    ) -> Result<Lsn, tonic::Status> {
-        let last_record_lsn = timeline.get_last_record_lsn();
-
-        // Sanity check the request
-        if request_lsn < not_modified_since {
-            return Err(tonic::Status::invalid_argument(format!(
-                "invalid request with request LSN {} and not_modified_since {}",
-                request_lsn, not_modified_since,
-            )));
-        }
-
-        // Check explicitly for INVALID just to get a less scary error message if the request is obviously bogus
-        if request_lsn == Lsn::INVALID {
-            return Err(tonic::Status::invalid_argument("invalid LSN(0) in request"));
-        }
-
-        // Clients should only read from recent LSNs on their timeline, or from locations holding an LSN lease.
-        //
-        // We may have older data available, but we make a best effort to detect this case and return an error,
-        // to distinguish a misbehaving client (asking for old LSN) from a storage issue (data missing at a legitimate LSN).
-        if request_lsn < **latest_gc_cutoff_lsn && !timeline.is_gc_blocked_by_lsn_lease_deadline() {
-            let gc_info = &timeline.gc_info.read().unwrap();
-            if !gc_info.lsn_covered_by_lease(request_lsn) {
-                return Err(tonic::Status::not_found(format!(
-                    "tried to request a page version that was garbage collected. requested at {} gc cutoff {}",
-                    request_lsn, **latest_gc_cutoff_lsn
-                )));
-            }
-        }
-
-        // Wait for WAL up to 'not_modified_since' to arrive, if necessary
-        if not_modified_since > last_record_lsn {
-            timeline
-                .wait_lsn(
-                    not_modified_since,
-                    crate::tenant::timeline::WaitLsnWaiter::PageService,
-                    WaitLsnTimeout::Default,
-                    ctx,
-                )
-                .await
-                .map_err(|_| {
-                    tonic::Status::unavailable("not_modified_since LSN not arrived yet")
-                })?;
-            // Since we waited for 'not_modified_since' to arrive, that is now the last
-            // record LSN. (Or close enough for our purposes; the last-record LSN can
-            // advance immediately after we return anyway)
-            Ok(not_modified_since)
-        } else {
-            // It might be better to use max(not_modified_since, latest_gc_cutoff_lsn)
-            // here instead. That would give the same result, since we know that there
-            // haven't been any modifications since 'not_modified_since'. Using an older
-            // LSN might be faster, because that could allow skipping recent layers when
-            // finding the page. However, we have historically used 'last_record_lsn', so
-            // stick to that for now.
-            Ok(std::cmp::min(last_record_lsn, request_lsn))
-        }
-    }
-}
-
-#[derive(Clone)]
-pub struct PageServiceAuthenticator {
-    pub auth: Option<Arc<SwappableJwtAuth>>,
-    pub auth_type: AuthType,
-}
-
-impl tonic::service::Interceptor for PageServiceAuthenticator {
-    fn call(
-        &mut self,
-        req: tonic::Request<()>,
-    ) -> std::result::Result<tonic::Request<()>, tonic::Status> {
-        // Check the tenant_id in any case
-        let tenant_id =
-            req.metadata()
-                .get("neon-tenant-id")
-                .ok_or(tonic::Status::invalid_argument(
-                    "neon-tenant-id metadata missing",
-                ))?;
-        let tenant_id = tenant_id.to_str().map_err(|_| {
-            tonic::Status::invalid_argument("invalid UTF-8 characters in neon-tenant-id metadata")
-        })?;
-        let tenant_id = TenantId::from_str(tenant_id)
-            .map_err(|_| tonic::Status::invalid_argument("invalid neon-tenant-id metadata"))?;
-
-        // when accessing management api supply None as an argument
-        // when using to authorize tenant pass corresponding tenant id
-        let auth = if let Some(auth) = &self.auth {
-            auth
-        } else {
-            // auth is set to Trust, nothing to check so just return ok
-            return Ok(req);
-        };
-
-        let jwt = req
-            .metadata()
-            .get("neon-auth-token")
-            .ok_or(tonic::Status::unauthenticated("no neon-auth-token"))?;
-        let jwt = jwt.to_str().map_err(|_| {
-            tonic::Status::invalid_argument("invalid UTF-8 characters in neon-auth-token metadata")
-        })?;
-
-        let jwtdata: TokenData<utils::auth::Claims> = auth
-            .decode(jwt)
-            .map_err(|err| tonic::Status::unauthenticated(format!("invalid JWT token: {}", err)))?;
-        let claims = jwtdata.claims;
-
-        if matches!(claims.scope, utils::auth::Scope::Tenant) && claims.tenant_id.is_none() {
-            return Err(tonic::Status::unauthenticated(
-                "jwt token scope is Tenant, but tenant id is missing",
-            ));
-        }
-
-        debug!(
-            "jwt scope check succeeded for scope: {:#?} by tenant id: {:?}",
-            claims.scope, claims.tenant_id,
-        );
-
-        // The token is valid. Check if it's allowed to access the tenant ID
-        // given in the request.
-
-        check_permission(&claims, Some(tenant_id))
-            .map_err(|err| tonic::Status::permission_denied(err.to_string()))?;
-
-        // All checks out
-        Ok(req)
-    }
-}
-
-/// Stream of GetBaseBackupResponseChunk messages.
-///
-/// The first part of the Chain chunks the tarball. The second part checks the return value
-/// of the send_basebackup_tarball Future that created the tarball.
-
-type GetBaseBackupStream = futures::stream::Chain<BasebackupChunkedStream, CheckResultStream>;
-
-fn new_basebackup_response_stream(
-    simplex_read: ReadHalf<SimplexStream>,
-    basebackup_task: JoinHandle<Result<(), BasebackupError>>,
-) -> GetBaseBackupStream {
-    let framed = FramedRead::new(simplex_read, GetBaseBackupResponseDecoder {});
-
-    framed.chain(CheckResultStream { basebackup_task })
-}
-
-/// Stream that uses GetBaseBackupResponseDecoder
-type BasebackupChunkedStream =
-    tokio_util::codec::FramedRead<ReadHalf<SimplexStream>, GetBaseBackupResponseDecoder>;
-
-struct GetBaseBackupResponseDecoder;
-impl Decoder for GetBaseBackupResponseDecoder {
-    type Item = proto::GetBaseBackupResponseChunk;
-    type Error = tonic::Status;
-
-    fn decode(&mut self, src: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
-        if src.len() < 64 * 1024 {
-            return Ok(None);
-        }
-
-        let item = proto::GetBaseBackupResponseChunk {
-            chunk: bytes::Bytes::from(std::mem::take(src)),
-        };
-
-        Ok(Some(item))
-    }
-
-    fn decode_eof(&mut self, src: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
-        if src.is_empty() {
-            return Ok(None);
-        }
-
-        let item = proto::GetBaseBackupResponseChunk {
-            chunk: bytes::Bytes::from(std::mem::take(src)),
-        };
-
-        Ok(Some(item))
-    }
-}
-
-struct CheckResultStream {
-    basebackup_task: tokio::task::JoinHandle<Result<(), BasebackupError>>,
-}
-impl futures::Stream for CheckResultStream {
-    type Item = Result<proto::GetBaseBackupResponseChunk, tonic::Status>;
-
-    fn poll_next(
-        mut self: Pin<&mut Self>,
-        ctx: &mut std::task::Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        let task = Pin::new(&mut self.basebackup_task);
-        match task.poll(ctx) {
-            Poll::Pending => Poll::Pending,
-            Poll::Ready(Ok(Ok(()))) => Poll::Ready(None),
-            Poll::Ready(Ok(Err(basebackup_err))) => {
-                error!(error=%basebackup_err, "error getting basebackup");
-                Poll::Ready(Some(Err(tonic::Status::internal(
-                    "could not get basebackup",
-                ))))
-            }
-            Poll::Ready(Err(join_err)) => {
-                error!(error=%join_err, "JoinError getting basebackup");
-                Poll::Ready(Some(Err(tonic::Status::internal(
-                    "could not get basebackup",
-                ))))
-            }
-        }
-    }
-}
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -150,7 +150,7 @@ pub struct PageServerConf {
    /// not terrible.
    pub background_task_maximum_delay: Duration,

-    pub control_plane_api: Option<Url>,
+    pub control_plane_api: Url,

    /// JWT token for use with the control plane API.
    pub control_plane_api_token: Option<SecretString>,
@@ -438,7 +438,8 @@ impl PageServerConf {
            test_remote_failures,
            ondemand_download_behavior_treat_error_as_warn,
            background_task_maximum_delay,
-            control_plane_api,
+            control_plane_api: control_plane_api
+                .ok_or_else(|| anyhow::anyhow!("`control_plane_api` must be set"))?,
            control_plane_emergency_mode,
            heatmap_upload_concurrency,
            secondary_download_concurrency,
@@ -573,6 +574,7 @@ impl PageServerConf {
            background_task_maximum_delay: Duration::ZERO,
            load_previous_heatmap: Some(true),
            generate_unarchival_heatmap: Some(true),
+            control_plane_api: Some(Url::parse("http://localhost:6666").unwrap()),
            ..Default::default()
        };
        PageServerConf::parse_and_validate(NodeId(0), config_toml, &repo_dir).unwrap()
@@ -641,9 +643,12 @@ mod tests {
    use super::PageServerConf;

    #[test]
-    fn test_empty_config_toml_is_valid() {
-        // we use Default impl of everything in this situation
+    fn test_minimal_config_toml_is_valid() {
+        // The minimal valid config for running a pageserver:
+        // - control_plane_api is mandatory, as pageservers cannot run in isolation
+        // - we use Default impl of everything else in this situation
        let input = r#"
+            control_plane_api = "http://localhost:6666"
        "#;
        let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input)
            .expect("empty config is valid");
--- a/pageserver/src/consumption_metrics/metrics.rs
+++ b/pageserver/src/consumption_metrics/metrics.rs
@@ -30,9 +30,6 @@ pub(super) enum Name {
    /// Tenant remote size
    #[serde(rename = "remote_storage_size")]
    RemoteSize,
-    /// Tenant resident size
-    #[serde(rename = "resident_size")]
-    ResidentSize,
    /// Tenant synthetic size
    #[serde(rename = "synthetic_storage_size")]
    SyntheticSize,
@@ -187,18 +184,6 @@ impl MetricsKey {
        .absolute_values()
    }

-    /// Sum of [`Timeline::resident_physical_size`] for each `Tenant`.
-    ///
-    /// [`Timeline::resident_physical_size`]: crate::tenant::Timeline::resident_physical_size
-    const fn resident_size(tenant_id: TenantId) -> AbsoluteValueFactory {
-        MetricsKey {
-            tenant_id,
-            timeline_id: None,
-            metric: Name::ResidentSize,
-        }
-        .absolute_values()
-    }
-
    /// [`TenantShard::cached_synthetic_size`] as refreshed by [`calculate_synthetic_size_worker`].
    ///
    /// [`TenantShard::cached_synthetic_size`]: crate::tenant::TenantShard::cached_synthetic_size
@@ -261,10 +246,7 @@ where
    let mut tenants = std::pin::pin!(tenants);

    while let Some((tenant_id, tenant)) = tenants.next().await {
-        let mut tenant_resident_size = 0;
-
        let timelines = tenant.list_timelines();
-        let timelines_len = timelines.len();
        for timeline in timelines {
            let timeline_id = timeline.timeline_id;

@@ -287,16 +269,9 @@ where
                    continue;
                }
            }
-
-            tenant_resident_size += timeline.resident_physical_size();
        }

-        if timelines_len == 0 {
-            // Force set it to 1 byte to avoid not being reported -- all timelines are offloaded.
-            tenant_resident_size = 1;
-        }
-
-        let snap = TenantSnapshot::collect(&tenant, tenant_resident_size);
+        let snap = TenantSnapshot::collect(&tenant);
        snap.to_metrics(tenant_id, Utc::now(), cache, &mut current_metrics);
    }

@@ -305,19 +280,14 @@ where

 /// In-between abstraction to allow testing metrics without actual Tenants.
 struct TenantSnapshot {
-    resident_size: u64,
    remote_size: u64,
    synthetic_size: u64,
 }

 impl TenantSnapshot {
    /// Collect tenant status to have metrics created out of it.
-    ///
-    /// `resident_size` is calculated of the timelines we had access to for other metrics, so we
-    /// cannot just list timelines here.
-    fn collect(t: &Arc<crate::tenant::TenantShard>, resident_size: u64) -> Self {
+    fn collect(t: &Arc<crate::tenant::TenantShard>) -> Self {
        TenantSnapshot {
-            resident_size,
            remote_size: t.remote_size(),
            // Note that this metric is calculated in a separate bgworker
            // Here we only use cached value, which may lag behind the real latest one
@@ -334,8 +304,6 @@ impl TenantSnapshot {
    ) {
        let remote_size = MetricsKey::remote_storage_size(tenant_id).at(now, self.remote_size);

-        let resident_size = MetricsKey::resident_size(tenant_id).at(now, self.resident_size);
-
        let synthetic_size = {
            let factory = MetricsKey::synthetic_size(tenant_id);
            let mut synthetic_size = self.synthetic_size;
@@ -355,11 +323,7 @@ impl TenantSnapshot {
            }
        };

-        metrics.extend(
-            [Some(remote_size), Some(resident_size), synthetic_size]
-                .into_iter()
-                .flatten(),
-        );
+        metrics.extend([Some(remote_size), synthetic_size].into_iter().flatten());
    }
 }

--- a/pageserver/src/consumption_metrics/metrics/tests.rs
+++ b/pageserver/src/consumption_metrics/metrics/tests.rs
@@ -224,7 +224,6 @@ fn post_restart_synthetic_size_uses_cached_if_available() {
    let tenant_id = TenantId::generate();

    let ts = TenantSnapshot {
-        resident_size: 1000,
        remote_size: 1000,
        // not yet calculated
        synthetic_size: 0,
@@ -245,7 +244,6 @@ fn post_restart_synthetic_size_uses_cached_if_available() {
        metrics,
        &[
            MetricsKey::remote_storage_size(tenant_id).at(now, 1000),
-            MetricsKey::resident_size(tenant_id).at(now, 1000),
            MetricsKey::synthetic_size(tenant_id).at(now, 1000),
        ]
    );
@@ -256,7 +254,6 @@ fn post_restart_synthetic_size_is_not_sent_when_not_cached() {
    let tenant_id = TenantId::generate();

    let ts = TenantSnapshot {
-        resident_size: 1000,
        remote_size: 1000,
        // not yet calculated
        synthetic_size: 0,
@@ -274,7 +271,6 @@ fn post_restart_synthetic_size_is_not_sent_when_not_cached() {
        metrics,
        &[
            MetricsKey::remote_storage_size(tenant_id).at(now, 1000),
-            MetricsKey::resident_size(tenant_id).at(now, 1000),
            // no synthetic size here
        ]
    );
@@ -295,14 +291,13 @@ pub(crate) const fn metric_examples_old(
    timeline_id: TimelineId,
    now: DateTime<Utc>,
    before: DateTime<Utc>,
-) -> [RawMetric; 6] {
+) -> [RawMetric; 5] {
    [
        MetricsKey::written_size(tenant_id, timeline_id).at_old_format(now, 0),
        MetricsKey::written_size_delta(tenant_id, timeline_id)
            .from_until_old_format(before, now, 0),
        MetricsKey::timeline_logical_size(tenant_id, timeline_id).at_old_format(now, 0),
        MetricsKey::remote_storage_size(tenant_id).at_old_format(now, 0),
-        MetricsKey::resident_size(tenant_id).at_old_format(now, 0),
        MetricsKey::synthetic_size(tenant_id).at_old_format(now, 1),
    ]
 }
@@ -312,13 +307,12 @@ pub(crate) const fn metric_examples(
    timeline_id: TimelineId,
    now: DateTime<Utc>,
    before: DateTime<Utc>,
-) -> [NewRawMetric; 6] {
+) -> [NewRawMetric; 5] {
    [
        MetricsKey::written_size(tenant_id, timeline_id).at(now, 0),
        MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(before, now, 0),
        MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, 0),
        MetricsKey::remote_storage_size(tenant_id).at(now, 0),
-        MetricsKey::resident_size(tenant_id).at(now, 0),
        MetricsKey::synthetic_size(tenant_id).at(now, 1),
    ]
 }
--- a/pageserver/src/consumption_metrics/upload.rs
+++ b/pageserver/src/consumption_metrics/upload.rs
@@ -521,10 +521,6 @@ mod tests {
                line!(),
                r#"{"type":"absolute","time":"2023-09-15T00:00:00.123456789Z","metric":"remote_storage_size","idempotency_key":"2023-09-15 00:00:00.123456789 UTC-1-0000","value":0,"tenant_id":"00000000000000000000000000000000"}"#,
            ),
-            (
-                line!(),
-                r#"{"type":"absolute","time":"2023-09-15T00:00:00.123456789Z","metric":"resident_size","idempotency_key":"2023-09-15 00:00:00.123456789 UTC-1-0000","value":0,"tenant_id":"00000000000000000000000000000000"}"#,
-            ),
            (
                line!(),
                r#"{"type":"absolute","time":"2023-09-15T00:00:00.123456789Z","metric":"synthetic_storage_size","idempotency_key":"2023-09-15 00:00:00.123456789 UTC-1-0000","value":1,"tenant_id":"00000000000000000000000000000000"}"#,
@@ -564,7 +560,7 @@ mod tests {
        assert_eq!(upgraded_samples, new_samples);
    }

-    fn metric_samples_old() -> [RawMetric; 6] {
+    fn metric_samples_old() -> [RawMetric; 5] {
        let tenant_id = TenantId::from_array([0; 16]);
        let timeline_id = TimelineId::from_array([0xff; 16]);

@@ -576,7 +572,7 @@ mod tests {
        super::super::metrics::metric_examples_old(tenant_id, timeline_id, now, before)
    }

-    fn metric_samples() -> [NewRawMetric; 6] {
+    fn metric_samples() -> [NewRawMetric; 5] {
        let tenant_id = TenantId::from_array([0; 16]);
        let timeline_id = TimelineId::from_array([0xff; 16]);

--- a/pageserver/src/controller_upcall_client.rs
+++ b/pageserver/src/controller_upcall_client.rs
@@ -58,14 +58,8 @@ pub trait StorageControllerUpcallApi {
 impl StorageControllerUpcallClient {
    /// A None return value indicates that the input `conf` object does not have control
    /// plane API enabled.
-    pub fn new(
-        conf: &'static PageServerConf,
-        cancel: &CancellationToken,
-    ) -> Result<Option<Self>, reqwest::Error> {
-        let mut url = match conf.control_plane_api.as_ref() {
-            Some(u) => u.clone(),
-            None => return Ok(None),
-        };
+    pub fn new(conf: &'static PageServerConf, cancel: &CancellationToken) -> Self {
+        let mut url = conf.control_plane_api.clone();

        if let Ok(mut segs) = url.path_segments_mut() {
            // This ensures that `url` ends with a slash if it doesn't already.
@@ -85,15 +79,17 @@ impl StorageControllerUpcallClient {
        }

        for cert in &conf.ssl_ca_certs {
-            client = client.add_root_certificate(Certificate::from_der(cert.contents())?);
+            client = client.add_root_certificate(
+                Certificate::from_der(cert.contents()).expect("Invalid certificate in config"),
+            );
        }

-        Ok(Some(Self {
-            http_client: client.build()?,
+        Self {
+            http_client: client.build().expect("Failed to construct HTTP client"),
            base_url: url,
            node_id: conf.id,
            cancel: cancel.clone(),
-        }))
+        }
    }

    #[tracing::instrument(skip_all)]
--- a/pageserver/src/deletion_queue.rs
+++ b/pageserver/src/deletion_queue.rs
@@ -585,7 +585,7 @@ impl DeletionQueue {
    /// we don't spawn those inside new() so that the caller can use their runtime/spans of choice.
    pub fn new<C>(
        remote_storage: GenericRemoteStorage,
-        controller_upcall_client: Option<C>,
+        controller_upcall_client: C,
        conf: &'static PageServerConf,
    ) -> (Self, DeletionQueueWorkers<C>)
    where
@@ -701,7 +701,7 @@ mod test {
        async fn restart(&mut self) {
            let (deletion_queue, workers) = DeletionQueue::new(
                self.storage.clone(),
-                Some(self.mock_control_plane.clone()),
+                self.mock_control_plane.clone(),
                self.harness.conf,
            );

@@ -821,11 +821,8 @@ mod test {

        let mock_control_plane = MockStorageController::new();

-        let (deletion_queue, worker) = DeletionQueue::new(
-            storage.clone(),
-            Some(mock_control_plane.clone()),
-            harness.conf,
-        );
+        let (deletion_queue, worker) =
+            DeletionQueue::new(storage.clone(), mock_control_plane.clone(), harness.conf);

        let worker_join = worker.spawn_with(&tokio::runtime::Handle::current());

--- a/pageserver/src/deletion_queue/validator.rs
+++ b/pageserver/src/deletion_queue/validator.rs
@@ -53,7 +53,7 @@ where
    tx: tokio::sync::mpsc::Sender<DeleterMessage>,

    // Client for calling into control plane API for validation of deletes
-    controller_upcall_client: Option<C>,
+    controller_upcall_client: C,

    // DeletionLists which are waiting generation validation.  Not safe to
    // execute until [`validate`] has processed them.
@@ -86,7 +86,7 @@ where
        conf: &'static PageServerConf,
        rx: tokio::sync::mpsc::Receiver<ValidatorQueueMessage>,
        tx: tokio::sync::mpsc::Sender<DeleterMessage>,
-        controller_upcall_client: Option<C>,
+        controller_upcall_client: C,
        lsn_table: Arc<std::sync::RwLock<VisibleLsnUpdates>>,
        cancel: CancellationToken,
    ) -> Self {
@@ -137,20 +137,16 @@ where
            return Ok(());
        }

-        let tenants_valid = if let Some(controller_upcall_client) = &self.controller_upcall_client {
-            match controller_upcall_client
-                .validate(tenant_generations.iter().map(|(k, v)| (*k, *v)).collect())
-                .await
-            {
-                Ok(tenants) => tenants,
-                Err(RetryForeverError::ShuttingDown) => {
-                    // The only way a validation call returns an error is when the cancellation token fires
-                    return Err(DeletionQueueError::ShuttingDown);
-                }
+        let tenants_valid = match self
+            .controller_upcall_client
+            .validate(tenant_generations.iter().map(|(k, v)| (*k, *v)).collect())
+            .await
+        {
+            Ok(tenants) => tenants,
+            Err(RetryForeverError::ShuttingDown) => {
+                // The only way a validation call returns an error is when the cancellation token fires
+                return Err(DeletionQueueError::ShuttingDown);
            }
-        } else {
-            // Control plane API disabled.  In legacy mode we consider everything valid.
-            tenant_generations.keys().map(|k| (*k, true)).collect()
        };

        let mut validated_sequence: Option<u64> = None;
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -21,8 +21,6 @@ pub use pageserver_api::keyspace;
 use tokio_util::sync::CancellationToken;
 mod assert_u64_eq_usize;
 pub mod aux_file;
-pub mod compute_service;
-pub mod compute_service_grpc;
 pub mod metrics;
 pub mod page_cache;
 pub mod page_service;
@@ -84,7 +82,7 @@ impl CancellableTask {
 pub async fn shutdown_pageserver(
    http_listener: HttpEndpointListener,
    https_listener: Option<HttpsEndpointListener>,
-    compute_service: compute_service::Listener,
+    page_service: page_service::Listener,
    consumption_metrics_worker: ConsumptionMetricsTasks,
    disk_usage_eviction_task: Option<DiskUsageEvictionTask>,
    tenant_manager: &TenantManager,
@@ -169,11 +167,11 @@ pub async fn shutdown_pageserver(
        }
    });

-    // Shut down the compute service endpoint task. This prevents new connections from
+    // Shut down the libpq endpoint task. This prevents new connections from
    // being accepted.
    let remaining_connections = timed(
-        compute_service.stop_accepting(),
-        "shutdown compte service listener",
+        page_service.stop_accepting(),
+        "shutdown LibpqEndpointListener",
        Duration::from_secs(1),
    )
    .await;
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -497,6 +497,24 @@ pub(crate) static WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS: Lazy<IntCounter> = Lazy::n
    .expect("failed to define a metric")
 });

+pub(crate) static ONDEMAND_DOWNLOAD_BYTES: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "pageserver_ondemand_download_bytes_total",
+        "Total bytes of layers on-demand downloaded",
+        &["task_kind"]
+    )
+    .expect("failed to define a metric")
+});
+
+pub(crate) static ONDEMAND_DOWNLOAD_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "pageserver_ondemand_download_count",
+        "Total count of layers on-demand downloaded",
+        &["task_kind"]
+    )
+    .expect("failed to define a metric")
+});
+
 pub(crate) mod wait_ondemand_download_time {
    use super::*;
    const WAIT_ONDEMAND_DOWNLOAD_TIME_BUCKETS: &[f64] = &[
@@ -2180,6 +2198,10 @@ impl BasebackupQueryTimeOngoingRecording<'_> {
        // If you want to change categorize of a specific error, also change it in `log_query_error`.
        let metric = match res {
            Ok(_) => &self.parent.ok,
+            Err(QueryError::Shutdown) => {
+                // Do not observe ok/err for shutdown
+                return;
+            }
            Err(QueryError::Disconnected(ConnectionError::Io(io_error)))
                if is_expected_io_error(io_error) =>
            {
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -13,6 +13,7 @@ use crate::PERF_TRACE_TARGET;
 use anyhow::{Context, bail};
 use async_compression::tokio::write::GzipEncoder;
 use bytes::Buf;
+use futures::FutureExt;
 use itertools::Itertools;
 use jsonwebtoken::TokenData;
 use once_cell::sync::OnceCell;
@@ -39,6 +40,7 @@ use pq_proto::framed::ConnectionError;
 use pq_proto::{BeMessage, FeMessage, FeStartupPacket, RowDescriptor};
 use strum_macros::IntoStaticStr;
 use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, BufWriter};
+use tokio::task::JoinHandle;
 use tokio_util::sync::CancellationToken;
 use tracing::*;
 use utils::auth::{Claims, Scope, SwappableJwtAuth};
@@ -47,13 +49,15 @@ use utils::id::{TenantId, TimelineId};
 use utils::logging::log_slow;
 use utils::lsn::Lsn;
 use utils::simple_rcu::RcuReadGuard;
-use utils::sync::gate::GateGuard;
+use utils::sync::gate::{Gate, GateGuard};
 use utils::sync::spsc_fold;

 use crate::auth::check_permission;
 use crate::basebackup::BasebackupError;
 use crate::config::PageServerConf;
-use crate::context::{PerfInstrumentFutureExt, RequestContext, RequestContextBuilder};
+use crate::context::{
+    DownloadBehavior, PerfInstrumentFutureExt, RequestContext, RequestContextBuilder,
+};
 use crate::metrics::{
    self, COMPUTE_COMMANDS_COUNTERS, ComputeCommandKind, GetPageBatchBreakReason, LIVE_CONNECTIONS,
    SmgrOpTimer, TimelineMetrics,
@@ -63,6 +67,7 @@ use crate::span::{
    debug_assert_current_span_has_tenant_and_timeline_id,
    debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id,
 };
+use crate::task_mgr::{self, COMPUTE_REQUEST_RUNTIME, TaskKind};
 use crate::tenant::mgr::{
    GetActiveTenantError, GetTenantError, ShardResolveResult, ShardSelector, TenantManager,
 };
@@ -80,6 +85,171 @@ const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(30000);
 /// Threshold at which to log slow GetPage requests.
 const LOG_SLOW_GETPAGE_THRESHOLD: Duration = Duration::from_secs(30);

+///////////////////////////////////////////////////////////////////////////////
+
+pub struct Listener {
+    cancel: CancellationToken,
+    /// Cancel the listener task through `listen_cancel` to shut down the listener
+    /// and get a handle on the existing connections.
+    task: JoinHandle<Connections>,
+}
+
+pub struct Connections {
+    cancel: CancellationToken,
+    tasks: tokio::task::JoinSet<ConnectionHandlerResult>,
+    gate: Gate,
+}
+
+pub fn spawn(
+    conf: &'static PageServerConf,
+    tenant_manager: Arc<TenantManager>,
+    pg_auth: Option<Arc<SwappableJwtAuth>>,
+    perf_trace_dispatch: Option<Dispatch>,
+    tcp_listener: tokio::net::TcpListener,
+    tls_config: Option<Arc<rustls::ServerConfig>>,
+) -> Listener {
+    let cancel = CancellationToken::new();
+    let libpq_ctx = RequestContext::todo_child(
+        TaskKind::LibpqEndpointListener,
+        // listener task shouldn't need to download anything. (We will
+        // create a separate sub-contexts for each connection, with their
+        // own download behavior. This context is used only to listen and
+        // accept connections.)
+        DownloadBehavior::Error,
+    );
+    let task = COMPUTE_REQUEST_RUNTIME.spawn(task_mgr::exit_on_panic_or_error(
+        "libpq listener",
+        libpq_listener_main(
+            conf,
+            tenant_manager,
+            pg_auth,
+            perf_trace_dispatch,
+            tcp_listener,
+            conf.pg_auth_type,
+            tls_config,
+            conf.page_service_pipelining.clone(),
+            libpq_ctx,
+            cancel.clone(),
+        )
+        .map(anyhow::Ok),
+    ));
+
+    Listener { cancel, task }
+}
+
+impl Listener {
+    pub async fn stop_accepting(self) -> Connections {
+        self.cancel.cancel();
+        self.task
+            .await
+            .expect("unreachable: we wrap the listener task in task_mgr::exit_on_panic_or_error")
+    }
+}
+impl Connections {
+    pub(crate) async fn shutdown(self) {
+        let Self {
+            cancel,
+            mut tasks,
+            gate,
+        } = self;
+        cancel.cancel();
+        while let Some(res) = tasks.join_next().await {
+            Self::handle_connection_completion(res);
+        }
+        gate.close().await;
+    }
+
+    fn handle_connection_completion(res: Result<anyhow::Result<()>, tokio::task::JoinError>) {
+        match res {
+            Ok(Ok(())) => {}
+            Ok(Err(e)) => error!("error in page_service connection task: {:?}", e),
+            Err(e) => error!("page_service connection task panicked: {:?}", e),
+        }
+    }
+}
+
+///
+/// Main loop of the page service.
+///
+/// Listens for connections, and launches a new handler task for each.
+///
+/// Returns Ok(()) upon cancellation via `cancel`, returning the set of
+/// open connections.
+///
+#[allow(clippy::too_many_arguments)]
+pub async fn libpq_listener_main(
+    conf: &'static PageServerConf,
+    tenant_manager: Arc<TenantManager>,
+    auth: Option<Arc<SwappableJwtAuth>>,
+    perf_trace_dispatch: Option<Dispatch>,
+    listener: tokio::net::TcpListener,
+    auth_type: AuthType,
+    tls_config: Option<Arc<rustls::ServerConfig>>,
+    pipelining_config: PageServicePipeliningConfig,
+    listener_ctx: RequestContext,
+    listener_cancel: CancellationToken,
+) -> Connections {
+    let connections_cancel = CancellationToken::new();
+    let connections_gate = Gate::default();
+    let mut connection_handler_tasks = tokio::task::JoinSet::default();
+
+    loop {
+        let gate_guard = match connections_gate.enter() {
+            Ok(guard) => guard,
+            Err(_) => break,
+        };
+
+        let accepted = tokio::select! {
+            biased;
+            _ = listener_cancel.cancelled() => break,
+            next = connection_handler_tasks.join_next(), if !connection_handler_tasks.is_empty() => {
+                let res = next.expect("we dont poll while empty");
+                Connections::handle_connection_completion(res);
+                continue;
+            }
+            accepted = listener.accept() => accepted,
+        };
+
+        match accepted {
+            Ok((socket, peer_addr)) => {
+                // Connection established. Spawn a new task to handle it.
+                debug!("accepted connection from {}", peer_addr);
+                let local_auth = auth.clone();
+                let connection_ctx = RequestContextBuilder::from(&listener_ctx)
+                    .task_kind(TaskKind::PageRequestHandler)
+                    .download_behavior(DownloadBehavior::Download)
+                    .perf_span_dispatch(perf_trace_dispatch.clone())
+                    .detached_child();
+
+                connection_handler_tasks.spawn(page_service_conn_main(
+                    conf,
+                    tenant_manager.clone(),
+                    local_auth,
+                    socket,
+                    auth_type,
+                    tls_config.clone(),
+                    pipelining_config.clone(),
+                    connection_ctx,
+                    connections_cancel.child_token(),
+                    gate_guard,
+                ));
+            }
+            Err(err) => {
+                // accept() failed. Log the error, and loop back to retry on next connection.
+                error!("accept() failed: {:?}", err);
+            }
+        }
+    }
+
+    debug!("page_service listener loop terminated");
+
+    Connections {
+        cancel: connections_cancel,
+        tasks: connection_handler_tasks,
+        gate: connections_gate,
+    }
+}
+
 type ConnectionHandlerResult = anyhow::Result<()>;

 /// Perf root spans start at the per-request level, after shard routing.
@@ -91,10 +261,9 @@ struct ConnectionPerfSpanFields {
    compute_mode: Option<String>,
 }

-/// note: the caller has already set TCP_NODELAY on the socket
 #[instrument(skip_all, fields(peer_addr, application_name, compute_mode))]
 #[allow(clippy::too_many_arguments)]
-pub async fn libpq_page_service_conn_main(
+async fn page_service_conn_main(
    conf: &'static PageServerConf,
    tenant_manager: Arc<TenantManager>,
    auth: Option<Arc<SwappableJwtAuth>>,
@@ -110,6 +279,10 @@ pub async fn libpq_page_service_conn_main(
        .with_label_values(&["page_service"])
        .guard();

+    socket
+        .set_nodelay(true)
+        .context("could not set TCP_NODELAY")?;
+
    let socket_fd = socket.as_raw_fd();

    let peer_addr = socket.peer_addr().context("get peer address")?;
@@ -220,7 +393,7 @@ struct PageServerHandler {
    gate_guard: GateGuard,
 }

-pub struct TimelineHandles {
+struct TimelineHandles {
    wrapper: TenantManagerWrapper,
    /// Note on size: the typical size of this map is 1.  The largest size we expect
    /// to see is the number of shards divided by the number of pageservers (typically < 2),
@@ -862,10 +1035,25 @@ impl PageServerHandler {
                // avoid a somewhat costly Span::record() by constructing the entire span in one go.
                macro_rules! mkspan {
                    (before shard routing) => {{
-                        tracing::info_span!(parent: &parent_span, "handle_get_page_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.hdr.request_lsn)
+                        tracing::info_span!(
+                            parent: &parent_span,
+                            "handle_get_page_request",
+                            rel = %req.rel,
+                            blkno = %req.blkno,
+                            req_lsn = %req.hdr.request_lsn,
+                            not_modified_since_lsn = %req.hdr.not_modified_since
+                        )
                    }};
                    ($shard_id:expr) => {{
-                        tracing::info_span!(parent: &parent_span, "handle_get_page_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.hdr.request_lsn, shard_id = %$shard_id)
+                        tracing::info_span!(
+                            parent: &parent_span,
+                            "handle_get_page_request",
+                            rel = %req.rel,
+                            blkno = %req.blkno,
+                            req_lsn = %req.hdr.request_lsn,
+                            not_modified_since_lsn = %req.hdr.not_modified_since,
+                            shard_id = %$shard_id
+                        )
                    }};
                }

@@ -929,6 +1117,7 @@ impl PageServerHandler {
                            shard_id = %shard.get_shard_identity().shard_slug(),
                            timeline_id = %timeline_id,
                            lsn = %req.hdr.request_lsn,
+                            not_modified_since_lsn = %req.hdr.not_modified_since,
                            request_id = %req.hdr.reqid,
                            key = %key,
                            )
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -1084,8 +1084,17 @@ impl Timeline {
        let mut result = HashMap::new();
        for (k, v) in kv {
            let v = v?;
+            if v.is_empty() {
+                // This is a tombstone -- we can skip it.
+                // Originally, the replorigin code uses `Lsn::INVALID` to represent a tombstone. However, as it part of
+                // the sparse keyspace and the sparse keyspace uses an empty image to universally represent a tombstone,
+                // we also need to consider that. Such tombstones might be written on the detach ancestor code path to
+                // avoid the value going into the child branch. (See [`crate::tenant::timeline::detach_ancestor::generate_tombstone_image_layer`] for more details.)
+                continue;
+            }
            let origin_id = k.field6 as RepOriginId;
-            let origin_lsn = Lsn::des(&v).unwrap();
+            let origin_lsn = Lsn::des(&v)
+                .with_context(|| format!("decode replorigin value for {}: {v:?}", origin_id))?;
            if origin_lsn != Lsn::INVALID {
                result.insert(origin_id, origin_lsn);
            }
@@ -2578,6 +2587,11 @@ impl DatadirModification<'_> {
        }
    }

+    #[cfg(test)]
+    pub fn put_for_unit_test(&mut self, key: Key, val: Value) {
+        self.put(key, val);
+    }
+
    fn put(&mut self, key: Key, val: Value) {
        if Self::is_data_key(&key) {
            self.put_data(key.to_compact(), val)
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -4254,9 +4254,7 @@ impl TenantShard {
        deletion_queue_client: DeletionQueueClient,
        l0_flush_global_state: L0FlushGlobalState,
    ) -> TenantShard {
-        debug_assert!(
-            !attached_conf.location.generation.is_none() || conf.control_plane_api.is_none()
-        );
+        assert!(!attached_conf.location.generation.is_none());

        let (state, mut rx) = watch::channel(state);

@@ -5949,7 +5947,9 @@ mod tests {
    use itertools::Itertools;
    #[cfg(feature = "testing")]
    use models::CompactLsnRange;
-    use pageserver_api::key::{AUX_KEY_PREFIX, Key, NON_INHERITED_RANGE, RELATION_SIZE_PREFIX};
+    use pageserver_api::key::{
+        AUX_KEY_PREFIX, Key, NON_INHERITED_RANGE, RELATION_SIZE_PREFIX, repl_origin_key,
+    };
    use pageserver_api::keyspace::KeySpace;
    #[cfg(feature = "testing")]
    use pageserver_api::keyspace::KeySpaceRandomAccum;
@@ -8185,6 +8185,54 @@ mod tests {
        assert_eq!(files.get("pg_logical/mappings/test2"), None);
    }

+    #[tokio::test]
+    async fn test_repl_origin_tombstones() {
+        let harness = TenantHarness::create("test_repl_origin_tombstones")
+            .await
+            .unwrap();
+
+        let (tenant, ctx) = harness.load().await;
+        let io_concurrency = IoConcurrency::spawn_for_test();
+
+        let mut lsn = Lsn(0x08);
+
+        let tline: Arc<Timeline> = tenant
+            .create_test_timeline(TIMELINE_ID, lsn, DEFAULT_PG_VERSION, &ctx)
+            .await
+            .unwrap();
+
+        let repl_lsn = Lsn(0x10);
+        {
+            lsn += 8;
+            let mut modification = tline.begin_modification(lsn);
+            modification.put_for_unit_test(repl_origin_key(2), Value::Image(Bytes::new()));
+            modification.set_replorigin(1, repl_lsn).await.unwrap();
+            modification.commit(&ctx).await.unwrap();
+        }
+
+        // we can read everything from the storage
+        let repl_origins = tline
+            .get_replorigins(lsn, &ctx, io_concurrency.clone())
+            .await
+            .unwrap();
+        assert_eq!(repl_origins.len(), 1);
+        assert_eq!(repl_origins[&1], lsn);
+
+        {
+            lsn += 8;
+            let mut modification = tline.begin_modification(lsn);
+            modification.put_for_unit_test(
+                repl_origin_key(3),
+                Value::Image(Bytes::copy_from_slice(b"cannot_decode_this")),
+            );
+            modification.commit(&ctx).await.unwrap();
+        }
+        let result = tline
+            .get_replorigins(lsn, &ctx, io_concurrency.clone())
+            .await;
+        assert!(result.is_err());
+    }
+
    #[tokio::test]
    async fn test_metadata_image_creation() -> anyhow::Result<()> {
        let harness = TenantHarness::create("test_metadata_image_creation").await?;
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -346,7 +346,8 @@ async fn init_load_generations(
            "Emergency mode!  Tenants will be attached unsafely using their last known generation"
        );
        emergency_generations(tenant_confs)
-    } else if let Some(client) = StorageControllerUpcallClient::new(conf, cancel)? {
+    } else {
+        let client = StorageControllerUpcallClient::new(conf, cancel);
        info!("Calling {} API to re-attach tenants", client.base_url());
        // If we are configured to use the control plane API, then it is the source of truth for what tenants to load.
        match client.re_attach(conf).await {
@@ -360,9 +361,6 @@ async fn init_load_generations(
                anyhow::bail!("Shut down while waiting for control plane re-attach response")
            }
        }
-    } else {
-        info!("Control plane API not configured, tenant generations are disabled");
-        return Ok(None);
    };

    // The deletion queue needs to know about the startup attachment state to decide which (if any) stored
@@ -1153,17 +1151,8 @@ impl TenantManager {
                // Testing hack: if we are configured with no control plane, then drop the generation
                // from upserts.  This enables creating generation-less tenants even though neon_local
                // always uses generations when calling the location conf API.
-                let attached_conf = if cfg!(feature = "testing") {
-                    let mut conf = AttachedTenantConf::try_from(new_location_config)
-                        .map_err(UpsertLocationError::BadRequest)?;
-                    if self.conf.control_plane_api.is_none() {
-                        conf.location.generation = Generation::none();
-                    }
-                    conf
-                } else {
-                    AttachedTenantConf::try_from(new_location_config)
-                        .map_err(UpsertLocationError::BadRequest)?
-                };
+                let attached_conf = AttachedTenantConf::try_from(new_location_config)
+                    .map_err(UpsertLocationError::BadRequest)?;

                let tenant = tenant_spawn(
                    self.conf,
--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -4,6 +4,7 @@ use std::sync::{Arc, Weak};
 use std::time::{Duration, SystemTime};

 use crate::PERF_TRACE_TARGET;
+use crate::metrics::{ONDEMAND_DOWNLOAD_BYTES, ONDEMAND_DOWNLOAD_COUNT};
 use anyhow::Context;
 use camino::{Utf8Path, Utf8PathBuf};
 use pageserver_api::keyspace::KeySpace;
@@ -1255,6 +1256,14 @@ impl LayerInner {

                self.access_stats.record_residence_event();

+                let task_kind: &'static str = ctx.task_kind().into();
+                ONDEMAND_DOWNLOAD_BYTES
+                    .with_label_values(&[task_kind])
+                    .inc_by(self.desc.file_size);
+                ONDEMAND_DOWNLOAD_COUNT
+                    .with_label_values(&[task_kind])
+                    .inc();
+
                Ok(self.initialize_after_layer_is_on_disk(permit))
            }
            Err(e) => {
--- a/pageserver/src/tenant/timeline/detach_ancestor.rs
+++ b/pageserver/src/tenant/timeline/detach_ancestor.rs
@@ -178,7 +178,7 @@ impl Attempt {
    }
 }

-async fn generate_tombstone_image_layer(
+pub(crate) async fn generate_tombstone_image_layer(
    detached: &Arc<Timeline>,
    ancestor: &Arc<Timeline>,
    ancestor_lsn: Lsn,
--- a/pageserver/src/tenant/timeline/import_pgdata.rs
+++ b/pageserver/src/tenant/timeline/import_pgdata.rs
@@ -163,8 +163,7 @@ pub async fn doit(
        // Ensure at-least-once delivery of the upcall to storage controller
        // before we mark the task as done and never come here again.
        //
-        let storcon_client = StorageControllerUpcallClient::new(timeline.conf, &cancel)?
-            .expect("storcon configured");
+        let storcon_client = StorageControllerUpcallClient::new(timeline.conf, &cancel);
        storcon_client
            .put_timeline_import_status(
                timeline.tenant_shard_id,
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -14,8 +14,6 @@
 use std::fs::File;
 use std::io::{Error, ErrorKind};
 use std::os::fd::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, RawFd};
-#[cfg(target_os = "linux")]
-use std::os::unix::fs::OpenOptionsExt;
 use std::sync::LazyLock;
 use std::sync::atomic::{AtomicBool, AtomicU8, AtomicUsize, Ordering};

@@ -99,7 +97,7 @@ impl VirtualFile {

    pub async fn open_with_options_v2<P: AsRef<Utf8Path>>(
        path: P,
-        open_options: &OpenOptions,
+        #[cfg_attr(not(target_os = "linux"), allow(unused_mut))] mut open_options: OpenOptions,
        ctx: &RequestContext,
    ) -> Result<Self, std::io::Error> {
        let mode = get_io_mode();
@@ -112,21 +110,16 @@ impl VirtualFile {
            #[cfg(target_os = "linux")]
            (IoMode::DirectRw, _) => true,
        };
-        let open_options = open_options.clone();
-        let open_options = if set_o_direct {
+        if set_o_direct {
            #[cfg(target_os = "linux")]
            {
-                let mut open_options = open_options;
-                open_options.custom_flags(nix::libc::O_DIRECT);
-                open_options
+                open_options = open_options.custom_flags(nix::libc::O_DIRECT);
            }
            #[cfg(not(target_os = "linux"))]
            unreachable!(
                "O_DIRECT is not supported on this platform, IoMode's that result in set_o_direct=true shouldn't even be defined"
            );
-        } else {
-            open_options
-        };
+        }
        let inner = VirtualFileInner::open_with_options(path, open_options, ctx).await?;
        Ok(VirtualFile { inner, _mode: mode })
    }
@@ -530,7 +523,7 @@ impl VirtualFileInner {
        path: P,
        ctx: &RequestContext,
    ) -> Result<VirtualFileInner, std::io::Error> {
-        Self::open_with_options(path.as_ref(), OpenOptions::new().read(true).clone(), ctx).await
+        Self::open_with_options(path.as_ref(), OpenOptions::new().read(true), ctx).await
    }

    /// Open a file with given options.
@@ -558,10 +551,11 @@ impl VirtualFileInner {
        // It would perhaps be nicer to check just for the read and write flags
        // explicitly, but OpenOptions doesn't contain any functions to read flags,
        // only to set them.
-        let mut reopen_options = open_options.clone();
-        reopen_options.create(false);
-        reopen_options.create_new(false);
-        reopen_options.truncate(false);
+        let reopen_options = open_options
+            .clone()
+            .create(false)
+            .create_new(false)
+            .truncate(false);

        let vfile = VirtualFileInner {
            handle: RwLock::new(handle),
@@ -1307,7 +1301,7 @@ mod tests {
                opts: OpenOptions,
                ctx: &RequestContext,
            ) -> Result<MaybeVirtualFile, anyhow::Error> {
-                let vf = VirtualFile::open_with_options_v2(&path, &opts, ctx).await?;
+                let vf = VirtualFile::open_with_options_v2(&path, opts, ctx).await?;
                Ok(MaybeVirtualFile::VirtualFile(vf))
            }
        }
@@ -1374,7 +1368,7 @@ mod tests {
        let _ = file_a.read_string_at(0, 1, &ctx).await.unwrap_err();

        // Close the file and re-open for reading
-        let mut file_a = A::open(path_a, OpenOptions::new().read(true).to_owned(), &ctx).await?;
+        let mut file_a = A::open(path_a, OpenOptions::new().read(true), &ctx).await?;

        // cannot write to a file opened in read-only mode
        let _ = file_a
@@ -1393,8 +1387,7 @@ mod tests {
                .read(true)
                .write(true)
                .create(true)
-                .truncate(true)
-                .to_owned(),
+                .truncate(true),
            &ctx,
        )
        .await?;
@@ -1412,12 +1405,7 @@ mod tests {

        let mut vfiles = Vec::new();
        for _ in 0..100 {
-            let mut vfile = A::open(
-                path_b.clone(),
-                OpenOptions::new().read(true).to_owned(),
-                &ctx,
-            )
-            .await?;
+            let mut vfile = A::open(path_b.clone(), OpenOptions::new().read(true), &ctx).await?;
            assert_eq!("FOOBAR", vfile.read_string_at(0, 6, &ctx).await?);
            vfiles.push(vfile);
        }
@@ -1466,7 +1454,7 @@ mod tests {
        for _ in 0..VIRTUAL_FILES {
            let f = VirtualFileInner::open_with_options(
                &test_file_path,
-                OpenOptions::new().read(true).clone(),
+                OpenOptions::new().read(true),
                &ctx,
            )
            .await?;
--- a/pageserver/src/virtual_file/open_options.rs
+++ b/pageserver/src/virtual_file/open_options.rs
@@ -1,6 +1,7 @@
 //! Enum-dispatch to the `OpenOptions` type of the respective [`super::IoEngineKind`];

 use std::os::fd::OwnedFd;
+use std::os::unix::fs::OpenOptionsExt;
 use std::path::Path;

 use super::io_engine::IoEngine;
@@ -43,7 +44,7 @@ impl OpenOptions {
        self.write
    }

-    pub fn read(&mut self, read: bool) -> &mut OpenOptions {
+    pub fn read(mut self, read: bool) -> Self {
        match &mut self.inner {
            Inner::StdFs(x) => {
                let _ = x.read(read);
@@ -56,7 +57,7 @@ impl OpenOptions {
        self
    }

-    pub fn write(&mut self, write: bool) -> &mut OpenOptions {
+    pub fn write(mut self, write: bool) -> Self {
        self.write = write;
        match &mut self.inner {
            Inner::StdFs(x) => {
@@ -70,7 +71,7 @@ impl OpenOptions {
        self
    }

-    pub fn create(&mut self, create: bool) -> &mut OpenOptions {
+    pub fn create(mut self, create: bool) -> Self {
        match &mut self.inner {
            Inner::StdFs(x) => {
                let _ = x.create(create);
@@ -83,7 +84,7 @@ impl OpenOptions {
        self
    }

-    pub fn create_new(&mut self, create_new: bool) -> &mut OpenOptions {
+    pub fn create_new(mut self, create_new: bool) -> Self {
        match &mut self.inner {
            Inner::StdFs(x) => {
                let _ = x.create_new(create_new);
@@ -96,7 +97,7 @@ impl OpenOptions {
        self
    }

-    pub fn truncate(&mut self, truncate: bool) -> &mut OpenOptions {
+    pub fn truncate(mut self, truncate: bool) -> Self {
        match &mut self.inner {
            Inner::StdFs(x) => {
                let _ = x.truncate(truncate);
@@ -124,10 +125,8 @@ impl OpenOptions {
            }
        }
    }
-}

-impl std::os::unix::prelude::OpenOptionsExt for OpenOptions {
-    fn mode(&mut self, mode: u32) -> &mut OpenOptions {
+    pub fn mode(mut self, mode: u32) -> Self {
        match &mut self.inner {
            Inner::StdFs(x) => {
                let _ = x.mode(mode);
@@ -140,7 +139,7 @@ impl std::os::unix::prelude::OpenOptionsExt for OpenOptions {
        self
    }

-    fn custom_flags(&mut self, flags: i32) -> &mut OpenOptions {
+    pub fn custom_flags(mut self, flags: i32) -> Self {
        match &mut self.inner {
            Inner::StdFs(x) => {
                let _ = x.custom_flags(flags);
--- a/pgxn/neon/Makefile
+++ b/pgxn/neon/Makefile
@@ -1,10 +1,10 @@
 # pgxs/neon/Makefile

+
 MODULE_big = neon
 OBJS = \
 	$(WIN32RES) \
 	communicator.o \
-	communicator_new.o \
 	extension_server.o \
 	file_cache.o \
 	hll.o \
@@ -22,8 +22,7 @@ OBJS = \
 	walproposer.o \
 	walproposer_pg.o \
 	control_plane_connector.o \
-	walsender_hooks.o \
-	$(LIBCOMMUNICATOR_PATH)/libcommunicator.a
+	walsender_hooks.o

 PG_CPPFLAGS = -I$(libpq_srcdir)
 SHLIB_LINK_INTERNAL = $(libpq)
@@ -37,6 +36,8 @@ DATA = \
 	neon--1.2--1.3.sql \
 	neon--1.3--1.4.sql \
 	neon--1.4--1.5.sql \
+	neon--1.5--1.6.sql \
+	neon--1.6--1.5.sql \
 	neon--1.5--1.4.sql \
 	neon--1.4--1.3.sql \
 	neon--1.3--1.2.sql \
--- a/pgxn/neon/communicator.c
+++ b/pgxn/neon/communicator.c
@@ -687,8 +687,14 @@ prefetch_wait_for(uint64 ring_index)
 		END_PREFETCH_RECEIVE_WORK();
 		CHECK_FOR_INTERRUPTS();
 	}
-
-	return result;
+	if (result)
+	{
+		/* Check that slot is actually received (srver can be disconnected in prefetch_pump_state called from CHECK_FOR_INTERRUPTS */
+		PrefetchRequest *slot = GetPrfSlot(ring_index);
+		return slot->status == PRFS_RECEIVED;
+	}
+	return false;
+;
 }

 /*
--- a/pgxn/neon/communicator/Cargo.lock
+++ b/pgxn/neon/communicator/Cargo.lock
@@ -1,372 +0,0 @@
-# This file is automatically @generated by Cargo.
-# It is not intended for manual editing.
-version = 4
-
-[[package]]
-name = "addr2line"
-version = "0.24.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
-dependencies = [
- "gimli",
-]
-
-[[package]]
-name = "adler2"
-version = "2.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
-
-[[package]]
-name = "backtrace"
-version = "0.3.74"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a"
-dependencies = [
- "addr2line",
- "cfg-if",
- "libc",
- "miniz_oxide",
- "object",
- "rustc-demangle",
- "windows-targets",
-]
-
-[[package]]
-name = "base64"
-version = "0.22.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
-
-[[package]]
-name = "bytes"
-version = "1.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
-
-[[package]]
-name = "cfg-if"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
-
-[[package]]
-name = "communicator"
-version = "0.1.0"
-dependencies = [
- "tonic",
-]
-
-[[package]]
-name = "fnv"
-version = "1.0.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
-
-[[package]]
-name = "futures-core"
-version = "0.3.31"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
-
-[[package]]
-name = "gimli"
-version = "0.31.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
-
-[[package]]
-name = "http"
-version = "1.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565"
-dependencies = [
- "bytes",
- "fnv",
- "itoa",
-]
-
-[[package]]
-name = "http-body"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
-dependencies = [
- "bytes",
- "http",
-]
-
-[[package]]
-name = "http-body-util"
-version = "0.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
-dependencies = [
- "bytes",
- "futures-core",
- "http",
- "http-body",
- "pin-project-lite",
-]
-
-[[package]]
-name = "itoa"
-version = "1.0.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
-
-[[package]]
-name = "libc"
-version = "0.2.171"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
-
-[[package]]
-name = "memchr"
-version = "2.7.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
-
-[[package]]
-name = "miniz_oxide"
-version = "0.8.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ff70ce3e48ae43fa075863cef62e8b43b71a4f2382229920e0df362592919430"
-dependencies = [
- "adler2",
-]
-
-[[package]]
-name = "object"
-version = "0.36.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
-dependencies = [
- "memchr",
-]
-
-[[package]]
-name = "once_cell"
-version = "1.21.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
-
-[[package]]
-name = "percent-encoding"
-version = "2.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
-
-[[package]]
-name = "pin-project"
-version = "1.1.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a"
-dependencies = [
- "pin-project-internal",
-]
-
-[[package]]
-name = "pin-project-internal"
-version = "1.1.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "pin-project-lite"
-version = "0.2.16"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
-
-[[package]]
-name = "proc-macro2"
-version = "1.0.94"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84"
-dependencies = [
- "unicode-ident",
-]
-
-[[package]]
-name = "quote"
-version = "1.0.40"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
-dependencies = [
- "proc-macro2",
-]
-
-[[package]]
-name = "rustc-demangle"
-version = "0.1.24"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
-
-[[package]]
-name = "syn"
-version = "2.0.100"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-ident",
-]
-
-[[package]]
-name = "tokio"
-version = "1.44.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48"
-dependencies = [
- "backtrace",
- "pin-project-lite",
-]
-
-[[package]]
-name = "tokio-stream"
-version = "0.1.17"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047"
-dependencies = [
- "futures-core",
- "pin-project-lite",
- "tokio",
-]
-
-[[package]]
-name = "tonic"
-version = "0.13.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85839f0b32fd242bb3209262371d07feda6d780d16ee9d2bc88581b89da1549b"
-dependencies = [
- "base64",
- "bytes",
- "http",
- "http-body",
- "http-body-util",
- "percent-encoding",
- "pin-project",
- "tokio-stream",
- "tower-layer",
- "tower-service",
- "tracing",
-]
-
-[[package]]
-name = "tower-layer"
-version = "0.3.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
-
-[[package]]
-name = "tower-service"
-version = "0.3.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
-
-[[package]]
-name = "tracing"
-version = "0.1.41"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
-dependencies = [
- "pin-project-lite",
- "tracing-attributes",
- "tracing-core",
-]
-
-[[package]]
-name = "tracing-attributes"
-version = "0.1.28"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "tracing-core"
-version = "0.1.33"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c"
-dependencies = [
- "once_cell",
-]
-
-[[package]]
-name = "unicode-ident"
-version = "1.0.18"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
-
-[[package]]
-name = "windows-targets"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
-dependencies = [
- "windows_aarch64_gnullvm",
- "windows_aarch64_msvc",
- "windows_i686_gnu",
- "windows_i686_gnullvm",
- "windows_i686_msvc",
- "windows_x86_64_gnu",
- "windows_x86_64_gnullvm",
- "windows_x86_64_msvc",
-]
-
-[[package]]
-name = "windows_aarch64_gnullvm"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
-
-[[package]]
-name = "windows_aarch64_msvc"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
-
-[[package]]
-name = "windows_i686_gnu"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
-
-[[package]]
-name = "windows_i686_gnullvm"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
-
-[[package]]
-name = "windows_i686_msvc"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
-
-[[package]]
-name = "windows_x86_64_gnu"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
-
-[[package]]
-name = "windows_x86_64_gnullvm"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
-
-[[package]]
-name = "windows_x86_64_msvc"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
--- a/pgxn/neon/communicator/Cargo.toml
+++ b/pgxn/neon/communicator/Cargo.toml
@@ -1,35 +0,0 @@
-[package]
-name = "communicator"
-version = "0.1.0"
-edition = "2024"
-
-[lib]
-crate-type = ["staticlib"]
-
-[dependencies]
-bytes.workspace = true
-http.workspace = true
-libc.workspace = true
-nix.workspace = true
-atomic_enum = "0.3.0"
-prost.workspace = true
-tonic = { version = "0.12.0", default-features = false, features=["codegen", "prost", "transport"] }
-tokio = { version = "1.43.1", features = ["macros", "net", "io-util", "rt", "rt-multi-thread"] }
-tokio-pipe = { version = "0.2.12" }
-thiserror.workspace = true
-tracing.workspace = true
-tracing-subscriber.workspace = true
-zerocopy = "0.8.0"
-zerocopy-derive = "0.8.0"
-
-tokio-epoll-uring.workspace = true
-uring-common.workspace = true
-
-pageserver_client_grpc.workspace = true
-pageserver_data_api.workspace = true
-
-neonart.workspace = true
-utils.workspace = true
-
-[build-dependencies]
-cbindgen.workspace = true
--- a/pgxn/neon/communicator/README.md
+++ b/pgxn/neon/communicator/README.md
@@ -1,123 +0,0 @@
-# Communicator
-
-This package provides the so-called "compute-pageserver communicator",
-or just "communicator" in short. It runs in a PostgreSQL server, as
-part of the neon extension, and handles the communication with the
-pageservers. On the PostgreSQL side, the glue code in pgxn/neon/ uses
-the communicator to implement the PostgreSQL Storage Manager (SMGR)
-interface.
-
-## Design criteria
-
- Low latency
- Saturate a 10 Gbit / s network interface without becoming a bottleneck
-
-## Source code view
-
-pgxn/neon/communicator_new.c
-	Contains the glue that interact with PostgreSQL code and the Rust
-	communicator code.
-
-pgxn/neon/communicator/src/backend_interface.rs
-	The entry point for calls from each backend.
-
-pgxn/neon/communicator/src/init.rs
-	Initialization at server startup
-
-pgxn/neon/communicator/src/worker_process/
-    Worker process main loop and glue code
-
-At compilation time, pgxn/neon/communicator/ produces a static
-library, libcommunicator.a. It is linked to the neon.so extension
-library.
-
-The real networking code, which is independent of PostgreSQL, is in
-the pageserver/client_grpc crate.
-
-## Process view
-
-The communicator runs in a dedicated background worker process, the
-"communicator process". The communicator uses a multi-threaded Tokio
-runtime to execute the IO requests. So the communicator process has
-multiple threads running. That's unusual for Postgres processes and
-care must be taken to make that work.
-
-### Backend <-> worker communication
-
-Each backend has a number of I/O request slots in shared memory. The
-slots are statically allocated for each backend, and must not be
-accessed by other backends. The worker process reads requests from the
-shared memory slots, and writes responses back to the slots.
-
-To submit an IO request, first pick one of your backend's free slots,
-and write the details of the IO request in the slot. Finally, update
-the 'state' field of the slot to Submitted. That informs the worker
-process that it can start processing the request. Once the state has
-been set to Submitted, the backend *must not* access the slot anymore,
-until the worker process sets its state to 'Completed'. In other
-words, each slot is owned by either the backend or the worker process
-at all times, and the 'state' field indicates who has ownership at the
-moment.
-
-To inform the worker process that a request slot has a pending IO
-request, there's a pipe shared by the worker process and all backend
-processes. After you have changed the slot's state to Submitted, write
-the index of the request slot to the pipe. This wakes up the worker
-process.
-
-(Note that the pipe is just used for wakeups, but the worker process
-is free to pick up Submitted IO requests even without receiving the
-wakeup. As of this writing, it doesn't do that, but it might be useful
-in the future to reduce latency even further, for example.)
-
-When the worker process has completed processing the request, it
-writes the result back in the request slot. A GetPage request can also
-contain a pointer to buffer in the shared buffer cache. In that case,
-the worker process writes the resulting page contents directly to the
-buffer, and just a result code in the request slot. It then updates
-the 'state' field to Completed, which passes the owner ship back to
-the originating backend. Finally, it signals the process Latch of the
-originating backend, waking it up.
-
-### Differences between PostgreSQL v16, v17 and v18
-
-PostgreSQL v18 introduced the new AIO mechanism. The PostgreSQL AIO
-mechanism uses a very similar mechanism as described in the previous
-section, for the communication between AIO worker processes and
-backends. With our communicator, the AIO worker processes are not
-used, but we use the same PgAioHandle request slots as in upstream.
-For Neon-specific IO requests like GetDbSize, a neon request slot is
-used. But for the actual IO requests, the request slot merely contains
-a pointer to the PgAioHandle slot. The worker process updates the
-status of that, calls the IO callbacks upon completionetc, just like
-the upstream AIO worker processes do.
-
-## Sequence diagram
-
-                      neon
-    PostgreSQL     extension       backend_interface.rs  worker_process.rs    processor    tonic
-       |               .                    .                   .                 .
-	   | smgr_read()   .                    .                   .                 .
-	   +-------------> +                    .                   .                 .
-	   .               |                    .                   .                 .
-	   .               |  rcommunicator_    .                   .                 .
-	   .               | get_page_at_lsn    .                   .                 .
-	   .               +------------------> +                   .                 .
-                                            |                   .                 .
-                                            | write request to  .                 .                 .
-                                            | slot              .                 .
-                                            |                   .                 .
-                                            |                   .                 .
-											| submit_request()  .                 .
-											+-----------------> +                 .
-											|                   |                 .
-											|					| db_size_request .               .
-																+---------------->.
-																                  . TODO
-
-
-
-### Compute <-> pageserver protocol
-
-The protocol between Compute and the pageserver is based on gRPC. See `protos/`.
-
--- a/pgxn/neon/communicator/build.rs
+++ b/pgxn/neon/communicator/build.rs
@@ -1,24 +0,0 @@
-use cbindgen;
-
-use std::env;
-
-fn main() -> Result<(), Box<dyn std::error::Error>> {
-    let crate_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
-
-    cbindgen::generate(crate_dir).map_or_else(
-        |error| match error {
-            cbindgen::Error::ParseSyntaxError { .. } => {
-                // This means there was a syntax error in the Rust sources. Don't panic, because
-                // we want the build to continue and the Rust compiler to hit the error. The
-                // Rust compiler produces a better error message than cbindgen.
-                eprintln!("Generating C bindings failed because of a Rust syntax error");
-            }
-            e => panic!("Unable to generate C bindings: {:?}", e),
-        },
-        |bindings| {
-            bindings.write_to_file("communicator_bindings.h");
-        },
-    );
-
-    Ok(())
-}
--- a/pgxn/neon/communicator/cbindgen.toml
+++ b/pgxn/neon/communicator/cbindgen.toml
@@ -1,4 +0,0 @@
-language = "C"
-
-[enum]
-prefix_with_name = true
--- a/pgxn/neon/communicator/src/backend_comms.rs
+++ b/pgxn/neon/communicator/src/backend_comms.rs
@@ -1,204 +0,0 @@
-//! This module implements a request/response "slot" for submitting requests from backends
-//! to the communicator process.
-//!
-//! NB: The "backend" side of this code runs in Postgres backend processes,
-//! which means that it is not safe to use the 'tracing' crate for logging, nor
-//! to launch threads or use tokio tasks.
-use std::cell::UnsafeCell;
-use std::sync::atomic::fence;
-use std::sync::atomic::{AtomicI32, Ordering};
-
-use crate::neon_request::{NeonIORequest, NeonIOResult};
-
-use atomic_enum::atomic_enum;
-
-/// One request/response slot. Each backend has its own set of slots that it uses.
-///
-/// This is the moral equivalent of PgAioHandle for Postgres AIO requests
-/// Like PgAioHandle, try to keep this small.
-///
-/// There is an array of these in shared memory. Therefore, this must be Sized.
-///
-/// ## Lifecycle of a request
-///
-/// The slot is always owned by either the backend process or the communicator
-/// process, depending on the 'state'. Only the owning process is allowed to
-/// read or modify the slot, except for reading the 'state' itself to check who
-/// owns it.
-///
-/// A slot begins in the Idle state, where it is owned by the backend process.
-/// To submit a request, the backend process fills the slot with the request
-/// data, and changes it to the Submitted state. After changing the state, the
-/// slot is owned by the communicator process, and the backend is not allowed
-/// to access it until the communicator process marks it as Completed.
-///
-/// When the communicator process sees that the slot is in Submitted state, it
-/// starts to process the request. After processing the request, it stores the
-/// result in the slot, and changes the state to Completed. It is now owned by
-/// the backend process again, which may now read the result, and reuse the
-/// slot for a new request.
-///
-/// For correctness of the above protocol, we really only need two states:
-/// "owned by backend" and "owned by communicator process. But to help with
-/// debugging, there are a few more states. When the backend starts to fill in
-/// the request details in the slot, it first sets the state from Idle to
-/// Filling, and when it's done with that, from Filling to Submitted. In the
-/// Filling state, the slot is still owned by the backend. Similarly, when the
-/// communicator process starts to process a request, it sets it to Processing
-/// state first, but the slot is still owned by the communicator process.
-///
-/// This struct doesn't handle waking up the communicator process when a request
-/// has been submitted or when a response is ready. We only store the 'owner_procno'
-/// which can be used for waking up the backend on completion, but the wakeups are
-/// performed elsewhere.
-pub struct NeonIOHandle {
-    /// similar to PgAioHandleState
-    state: AtomicNeonIOHandleState,
-
-    /// The owning process's ProcNumber. The worker process uses this to set the process's
-    /// latch on completion.
-    ///
-    /// (This could be calculated from num_neon_request_slots_per_backend and the index of
-    /// this slot in the overall 'neon_requst_slots array')
-    owner_procno: AtomicI32,
-
-    /// SAFETY: This is modified by fill_request(), after it has established ownership
-    /// of the slot by setting state from Idle to Filling
-    request: UnsafeCell<NeonIORequest>,
-
-    /// valid when state is Completed
-    ///
-    /// SAFETY: This is modified by RequestProcessingGuard::complete(). There can be
-    /// only one RequestProcessingGuard outstanding for a slot at a time, because
-    /// it is returned by start_processing_request() which checks the state, so
-    /// RequestProcessingGuard has exclusive access to the slot.
-    result: UnsafeCell<NeonIOResult>,
-}
-
-// The protocol described in the "Lifecycle of a request" section above ensures
-// the safe access to the fields
-unsafe impl Send for NeonIOHandle {}
-unsafe impl Sync for NeonIOHandle {}
-
-impl Default for NeonIOHandle {
-    fn default() -> NeonIOHandle {
-        NeonIOHandle {
-            owner_procno: AtomicI32::new(-1),
-            request: UnsafeCell::new(NeonIORequest::Empty),
-            result: UnsafeCell::new(NeonIOResult::Empty),
-            state: AtomicNeonIOHandleState::new(NeonIOHandleState::Idle),
-        }
-    }
-}
-
-#[atomic_enum]
-#[derive(Eq, PartialEq)]
-pub enum NeonIOHandleState {
-    Idle,
-
-    /// backend is filling in the request
-    Filling,
-
-    /// Backend has submitted the request to the communicator, but the
-    /// communicator process has not yet started processing it.
-    Submitted,
-
-    /// Communicator is processing the request
-    Processing,
-
-    /// Communicator has completed the request, and the 'result' field is now
-    /// valid, but the backend has not read the result yet.
-    Completed,
-}
-
-pub struct RequestProcessingGuard<'a>(&'a NeonIOHandle);
-
-unsafe impl<'a> Send for RequestProcessingGuard<'a> {}
-unsafe impl<'a> Sync for RequestProcessingGuard<'a> {}
-
-impl<'a> RequestProcessingGuard<'a> {
-    pub fn get_request(&self) -> &NeonIORequest {
-        unsafe { &*self.0.request.get() }
-    }
-
-    pub fn get_owner_procno(&self) -> i32 {
-        self.0.owner_procno.load(Ordering::Relaxed)
-    }
-
-    pub fn completed(self, result: NeonIOResult) {
-        unsafe {
-            *self.0.result.get() = result;
-        };
-
-        // Ok, we have completed the IO. Mark the request as completed. After that,
-        // we no longer have ownership of the slot, and must not modify it.
-        let old_state = self
-            .0
-            .state
-            .swap(NeonIOHandleState::Completed, Ordering::Release);
-        assert!(old_state == NeonIOHandleState::Processing);
-    }
-}
-
-impl NeonIOHandle {
-    pub fn fill_request(&self, request: &NeonIORequest, proc_number: i32) {
-        // Verify that the slot is in Idle state previously, and start filling it.
-        //
-        // XXX: This step isn't strictly necessary. Assuming the caller didn't screw up
-        // and try to use a slot that's already in use, we could fill the slot and
-        // switch it directly from Idle to Submitted state.
-        if let Err(s) = self.state.compare_exchange(
-            NeonIOHandleState::Idle,
-            NeonIOHandleState::Filling,
-            Ordering::Relaxed,
-            Ordering::Relaxed,
-        ) {
-            panic!("unexpected state in request slot: {s:?}");
-        }
-
-        // This fence synchronizes-with store/swap in `communicator_process_main_loop`.
-        fence(Ordering::Acquire);
-
-        self.owner_procno.store(proc_number, Ordering::Relaxed);
-        unsafe { *self.request.get() = *request }
-        self.state
-            .store(NeonIOHandleState::Submitted, Ordering::Release);
-    }
-
-    pub fn try_get_result(&self) -> Option<NeonIOResult> {
-        // FIXME: ordering?
-        let state = self.state.load(Ordering::Relaxed);
-        if state == NeonIOHandleState::Completed {
-            // This fence synchronizes-with store/swap in `communicator_process_main_loop`.
-            fence(Ordering::Acquire);
-            let result = unsafe { *self.result.get() };
-            self.state.store(NeonIOHandleState::Idle, Ordering::Relaxed);
-            Some(result)
-        } else {
-            None
-        }
-    }
-
-    pub fn start_processing_request<'a>(&'a self) -> Option<RequestProcessingGuard<'a>> {
-        // Read the IO request from the slot indicated in the wakeup
-        //
-        // XXX: using compare_exchange for this is not strictly necessary, as long as
-        // the communicator process has _some_ means of tracking which requests it's
-        // already processing. That could be a flag somewhere in communicator's private
-        // memory, for example.
-        if let Err(s) = self.state.compare_exchange(
-            NeonIOHandleState::Submitted,
-            NeonIOHandleState::Processing,
-            Ordering::Relaxed,
-            Ordering::Relaxed,
-        ) {
-            // FIXME surprising state. This is unexpected at the moment, but if we
-            // started to process requests more aggressively, without waiting for the
-            // read from the pipe, then this could happen
-            panic!("unexpected state in request slot: {s:?}");
-        }
-        fence(Ordering::Acquire);
-
-        Some(RequestProcessingGuard(self))
-    }
-}
--- a/pgxn/neon/communicator/src/backend_interface.rs
+++ b/pgxn/neon/communicator/src/backend_interface.rs
@@ -1,196 +0,0 @@
-//! This code runs in each backend process. That means that launching Rust threads, panicking
-//! etc. is forbidden!
-
-use crate::backend_comms::NeonIOHandle;
-use crate::init::CommunicatorInitStruct;
-use crate::integrated_cache::{BackendCacheReadOp, IntegratedCacheReadAccess};
-use crate::neon_request::CCachedGetPageVResult;
-use crate::neon_request::{NeonIORequest, NeonIOResult};
-
-pub struct CommunicatorBackendStruct<'t> {
-    my_proc_number: i32,
-
-    next_neon_request_idx: u32,
-
-    my_start_idx: u32, // First request slot that belongs to this backend
-    my_end_idx: u32,   // end + 1 request slot that belongs to this backend
-
-    neon_request_slots: &'t [NeonIOHandle],
-
-    submission_pipe_write_fd: std::ffi::c_int,
-
-    pending_cache_read_op: Option<BackendCacheReadOp<'t>>,
-
-    integrated_cache: &'t IntegratedCacheReadAccess<'t>,
-}
-
-#[unsafe(no_mangle)]
-pub extern "C" fn rcommunicator_backend_init(
-    cis: Box<CommunicatorInitStruct>,
-    my_proc_number: i32,
-) -> &'static mut CommunicatorBackendStruct<'static> {
-    let start_idx = my_proc_number as u32 * cis.num_neon_request_slots_per_backend;
-    let end_idx = start_idx + cis.num_neon_request_slots_per_backend;
-
-    let integrated_cache = Box::leak(Box::new(cis.integrated_cache_init_struct.backend_init()));
-
-    let bs: &'static mut CommunicatorBackendStruct =
-        Box::leak(Box::new(CommunicatorBackendStruct {
-            my_proc_number,
-            next_neon_request_idx: start_idx,
-            my_start_idx: start_idx,
-            my_end_idx: end_idx,
-            neon_request_slots: cis.neon_request_slots,
-
-            submission_pipe_write_fd: cis.submission_pipe_write_fd,
-            pending_cache_read_op: None,
-
-            integrated_cache,
-        }));
-    bs
-}
-
-/// Start a request. You can poll for its completion and get the result by
-/// calling bcomm_poll_dbsize_request_completion(). The communicator will wake
-/// us up by setting our process latch, so to wait for the completion, wait on
-/// the latch and call bcomm_poll_dbsize_request_completion() every time the
-/// latch is set.
-///
-/// Safety: The C caller must ensure that the references are valid.
-#[unsafe(no_mangle)]
-pub extern "C" fn bcomm_start_io_request<'t>(
-    bs: &'t mut CommunicatorBackendStruct,
-    request: &NeonIORequest,
-    immediate_result_ptr: &mut NeonIOResult,
-) -> i32 {
-    assert!(bs.pending_cache_read_op.is_none());
-
-    // Check if the request can be satisfied from the cache first
-    if let NeonIORequest::RelSize(req) = request {
-        if let Some(nblocks) = bs.integrated_cache.get_rel_size(&req.reltag()) {
-            *immediate_result_ptr = NeonIOResult::RelSize(nblocks);
-            return -1;
-        }
-    }
-
-    // Create neon request and submit it
-    let request_idx = bs.start_neon_request(request);
-
-    // Tell the communicator about it
-    bs.submit_request(request_idx);
-
-    return request_idx;
-}
-
-#[unsafe(no_mangle)]
-pub extern "C" fn bcomm_start_get_page_v_request<'t>(
-    bs: &'t mut CommunicatorBackendStruct,
-    request: &NeonIORequest,
-    immediate_result_ptr: &mut CCachedGetPageVResult,
-) -> i32 {
-    let NeonIORequest::GetPageV(get_pagev_request) = request else {
-        panic!("invalid request passed to bcomm_start_get_page_v_request()");
-    };
-    assert!(matches!(request, NeonIORequest::GetPageV(_)));
-    assert!(bs.pending_cache_read_op.is_none());
-
-    // Check if the request can be satisfied from the cache first
-    let mut all_cached = true;
-    let read_op = bs.integrated_cache.start_read_op();
-    for i in 0..get_pagev_request.nblocks {
-        if let Some(cache_block) = read_op.get_page(
-            &get_pagev_request.reltag(),
-            get_pagev_request.block_number + i as u32,
-        ) {
-            (*immediate_result_ptr).cache_block_numbers[i as usize] = cache_block;
-        } else {
-            // not found in cache
-            all_cached = false;
-            break;
-        }
-    }
-    if all_cached {
-        bs.pending_cache_read_op = Some(read_op);
-        return -1;
-    }
-
-    // Create neon request and submit it
-    let request_idx = bs.start_neon_request(request);
-
-    // Tell the communicator about it
-    bs.submit_request(request_idx);
-
-    return request_idx;
-}
-
-/// Check if a request has completed. Returns:
-///
-/// -1 if the request is still being processed
-/// 0 on success
-#[unsafe(no_mangle)]
-pub extern "C" fn bcomm_poll_request_completion(
-    bs: &mut CommunicatorBackendStruct,
-    request_idx: u32,
-    result_p: &mut NeonIOResult,
-) -> i32 {
-    match bs.neon_request_slots[request_idx as usize].try_get_result() {
-        None => -1, // still processing
-        Some(result) => {
-            *result_p = result;
-            0
-        }
-    }
-}
-
-// LFC functions
-
-/// Finish a local file cache read
-///
-//
-#[unsafe(no_mangle)]
-pub extern "C" fn bcomm_finish_cache_read(bs: &mut CommunicatorBackendStruct) -> bool {
-    if let Some(op) = bs.pending_cache_read_op.take() {
-        op.finish()
-    } else {
-        panic!("bcomm_finish_cache_read() called with no cached read pending");
-    }
-}
-
-impl<'t> CommunicatorBackendStruct<'t> {
-    /// Send a wakeup to the communicator process
-    fn submit_request(self: &CommunicatorBackendStruct<'t>, request_idx: i32) {
-        // wake up communicator by writing the idx to the submission pipe
-        //
-        // This can block, if the pipe is full. That should be very rare,
-        // because the communicator tries hard to drain the pipe to prevent
-        // that. Also, there's a natural upper bound on how many wakeups can be
-        // queued up: there is only a limited number of request slots for each
-        // backend.
-        //
-        // If it does block very briefly, that's not too serious.
-        let idxbuf = request_idx.to_ne_bytes();
-        let _res = nix::unistd::write(self.submission_pipe_write_fd, &idxbuf);
-        // FIXME: check result, return any errors
-    }
-
-    /// Note: there's no guarantee on when the communicator might pick it up. You should ring
-    /// the doorbell. But it might pick it up immediately.
-    pub(crate) fn start_neon_request(&mut self, request: &NeonIORequest) -> i32 {
-        let my_proc_number = self.my_proc_number;
-
-        // Grab next free slot
-        // FIXME: any guarantee that there will be any?
-        let idx = self.next_neon_request_idx;
-
-        let next_idx = idx + 1;
-        self.next_neon_request_idx = if next_idx == self.my_end_idx {
-            self.my_start_idx
-        } else {
-            next_idx
-        };
-
-        self.neon_request_slots[idx as usize].fill_request(request, my_proc_number);
-
-        return idx as i32;
-    }
-}
--- a/pgxn/neon/communicator/src/file_cache.rs
+++ b/pgxn/neon/communicator/src/file_cache.rs
@@ -1,109 +0,0 @@
-//! Implement the "low-level" parts of the file cache.
-//!
-//! This module just deals with reading and writing the file, and keeping track
-//! which blocks in the cache file are in use and which are free. The "high
-//! level" parts of tracking which block in the cache file corresponds to which
-//! relation block is handled in 'integrated_cache' instead.
-//!
-//! This module is only used to access the file from the communicator
-//! process. The backend processes *also* read the file (and sometimes also
-//! write it? ), but the backends use direct C library calls for that.
-use std::fs::File;
-use std::path::Path;
-use std::sync::Arc;
-use std::sync::atomic::{AtomicU64, Ordering};
-
-use tokio_epoll_uring;
-
-use crate::BLCKSZ;
-
-pub type CacheBlock = u64;
-
-pub struct FileCache {
-    uring_system: tokio_epoll_uring::SystemHandle,
-
-    file: Arc<File>,
-
-    // TODO: there's no reclamation mechanism, the cache grows
-    // indefinitely. This is the next free block, i.e. the current
-    // size of the file
-    next_free_block: AtomicU64,
-}
-
-impl FileCache {
-    pub fn new(
-        file_cache_path: &Path,
-        uring_system: tokio_epoll_uring::SystemHandle,
-    ) -> Result<FileCache, std::io::Error> {
-        let file = std::fs::OpenOptions::new()
-            .read(true)
-            .write(true)
-            .truncate(true)
-            .create(true)
-            .open(file_cache_path)?;
-
-        tracing::info!("Created cache file {file_cache_path:?}");
-
-        Ok(FileCache {
-            file: Arc::new(file),
-            uring_system,
-            next_free_block: AtomicU64::new(0),
-        })
-    }
-
-    // File cache management
-
-    pub async fn read_block(
-        &self,
-        cache_block: CacheBlock,
-        dst: impl uring_common::buf::IoBufMut + Send + Sync,
-    ) -> Result<(), std::io::Error> {
-        assert!(dst.bytes_total() == BLCKSZ);
-        let file = self.file.clone();
-
-        let ((_file, _buf), res) = self
-            .uring_system
-            .read(file, cache_block as u64 * BLCKSZ as u64, dst)
-            .await;
-
-        let res = res.map_err(map_io_uring_error)?;
-        if res != BLCKSZ {
-            panic!("unexpected read result");
-        }
-
-        Ok(())
-    }
-
-    pub async fn write_block(
-        &self,
-        cache_block: CacheBlock,
-        src: impl uring_common::buf::IoBuf + Send + Sync,
-    ) -> Result<(), std::io::Error> {
-        assert!(src.bytes_init() == BLCKSZ);
-        let file = self.file.clone();
-
-        let ((_file, _buf), res) = self
-            .uring_system
-            .write(file, cache_block as u64 * BLCKSZ as u64, src)
-            .await;
-        let res = res.map_err(map_io_uring_error)?;
-        if res != BLCKSZ {
-            panic!("unexpected read result");
-        }
-
-        Ok(())
-    }
-
-    pub fn alloc_block(&self) -> CacheBlock {
-        self.next_free_block.fetch_add(1, Ordering::Relaxed)
-    }
-}
-
-fn map_io_uring_error(err: tokio_epoll_uring::Error<std::io::Error>) -> std::io::Error {
-    match err {
-        tokio_epoll_uring::Error::Op(err) => err,
-        tokio_epoll_uring::Error::System(err) => {
-            std::io::Error::new(std::io::ErrorKind::Other, err)
-        }
-    }
-}
--- a/pgxn/neon/communicator/src/init.rs
+++ b/pgxn/neon/communicator/src/init.rs
@@ -1,130 +0,0 @@
-//! Initialization functions. These are executed in the postmaster process,
-//! at different stages of server startup.
-//!
-//!
-//! Communicator initialization steps:
-//!
-//! 1. At postmaster startup, before shared memory is allocated,
-//!    rcommunicator_shmem_size() is called to get the amount of
-//!    shared memory that this module needs.
-//!
-//! 2. Later, after the shared memory has been allocated,
-//!    rcommunicator_shmem_init() is called to initialize the shmem
-//!    area.
-//!
-//! Per process initialization:
-//!
-//! When a backend process starts up, it calls rcommunicator_backend_init().
-//! In the communicator worker process, other functions are called, see
-//! `worker_process` module.
-
-use std::ffi::c_int;
-use std::mem;
-
-use crate::backend_comms::NeonIOHandle;
-use crate::integrated_cache::IntegratedCacheInitStruct;
-
-const NUM_NEON_REQUEST_SLOTS_PER_BACKEND: u32 = 5;
-
-/// This struct is created in the postmaster process, and inherited to
-/// the communicator process and all backend processes through fork()
-#[repr(C)]
-pub struct CommunicatorInitStruct {
-    #[allow(dead_code)]
-    pub max_procs: u32,
-
-    pub submission_pipe_read_fd: std::ffi::c_int,
-    pub submission_pipe_write_fd: std::ffi::c_int,
-
-    // Shared memory data structures
-    pub num_neon_request_slots_per_backend: u32,
-
-    pub neon_request_slots: &'static [NeonIOHandle],
-
-    pub integrated_cache_init_struct: IntegratedCacheInitStruct<'static>,
-}
-
-impl std::fmt::Debug for CommunicatorInitStruct {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        fmt.debug_struct("CommunicatorInitStruct")
-            .field("max_procs", &self.max_procs)
-            .field("submission_pipe_read_fd", &self.submission_pipe_read_fd)
-            .field("submission_pipe_write_fd", &self.submission_pipe_write_fd)
-            .field(
-                "num_neon_request_slots_per_backend",
-                &self.num_neon_request_slots_per_backend,
-            )
-            .field("neon_request_slots length", &self.neon_request_slots.len())
-            .finish()
-    }
-}
-
-#[unsafe(no_mangle)]
-pub extern "C" fn rcommunicator_shmem_size(max_procs: u32) -> u64 {
-    let mut size = 0;
-
-    let num_neon_request_slots = max_procs * NUM_NEON_REQUEST_SLOTS_PER_BACKEND;
-    size += mem::size_of::<NeonIOHandle>() * num_neon_request_slots as usize;
-
-    // For integrated_cache's Allocator. TODO: make this adjustable
-    size += IntegratedCacheInitStruct::shmem_size(max_procs);
-
-    size as u64
-}
-
-/// Initialize the shared memory segment. Returns a backend-private
-/// struct, which will be inherited by backend processes through fork
-#[unsafe(no_mangle)]
-pub extern "C" fn rcommunicator_shmem_init(
-    submission_pipe_read_fd: c_int,
-    submission_pipe_write_fd: c_int,
-    max_procs: u32,
-    shmem_area_ptr: *mut u8,
-    shmem_area_len: u64,
-) -> &'static mut CommunicatorInitStruct {
-    let mut ptr = shmem_area_ptr;
-
-    // Carve out the request slots from the shmem area and initialize them
-    let num_neon_request_slots_per_backend = NUM_NEON_REQUEST_SLOTS_PER_BACKEND;
-    let num_neon_request_slots = max_procs * num_neon_request_slots_per_backend;
-
-    let len_used;
-    let neon_request_slots: &mut [NeonIOHandle] = unsafe {
-        ptr = ptr.add(ptr.align_offset(std::mem::align_of::<NeonIOHandle>()));
-        let neon_request_slots_ptr: *mut NeonIOHandle = ptr.cast();
-        for _i in 0..num_neon_request_slots {
-            let slot: *mut NeonIOHandle = ptr.cast();
-            *slot = NeonIOHandle::default();
-            ptr = ptr.byte_add(mem::size_of::<NeonIOHandle>());
-        }
-        len_used = ptr.byte_offset_from(shmem_area_ptr) as usize;
-        assert!(len_used <= shmem_area_len as usize);
-
-        std::slice::from_raw_parts_mut(neon_request_slots_ptr, num_neon_request_slots as usize)
-    };
-
-    let remaining_area =
-        unsafe { std::slice::from_raw_parts_mut(ptr, shmem_area_len as usize - len_used) };
-
-    // Give the rest of the area to the integrated cache
-    let integrated_cache_init_struct =
-        IntegratedCacheInitStruct::shmem_init(max_procs, remaining_area);
-
-    eprintln!(
-        "PIPE READ {} WRITE {}",
-        submission_pipe_read_fd, submission_pipe_write_fd
-    );
-
-    let cis: &'static mut CommunicatorInitStruct = Box::leak(Box::new(CommunicatorInitStruct {
-        max_procs,
-        submission_pipe_read_fd,
-        submission_pipe_write_fd,
-
-        num_neon_request_slots_per_backend: NUM_NEON_REQUEST_SLOTS_PER_BACKEND,
-        neon_request_slots,
-
-        integrated_cache_init_struct,
-    }));
-
-    cis
-}
--- a/pgxn/neon/communicator/src/integrated_cache.rs
+++ b/pgxn/neon/communicator/src/integrated_cache.rs
@@ -1,423 +0,0 @@
-//! Integrated communicator cache
-//!
-//! Tracks:
-//! - Relation sizes and existence
-//! - Last-written LSN
-//! - TODO: Block cache (also known as LFC)
-//!
-//! TODO: limit the size
-//! TODO: concurrency
-//!
-//! Note: This deals with "relations", which is really just one "relation fork" in Postgres
-//! terms. RelFileLocator + ForkNumber is the key.
-
-use utils::lsn::Lsn;
-
-use crate::file_cache::{CacheBlock, FileCache};
-use pageserver_data_api::model::RelTag;
-
-use neonart;
-use neonart::TreeInitStruct;
-
-const CACHE_AREA_SIZE: usize = 10 * 1024 * 1024;
-
-/// This struct is stored in the shared memory segment.
-struct IntegratedCacheShmemData {
-    allocator: neonart::Allocator,
-}
-
-/// This struct is initialized at postmaster startup, and passed to all the processes via fork().
-pub struct IntegratedCacheInitStruct<'t> {
-    shmem_data: &'t IntegratedCacheShmemData,
-    handle: TreeInitStruct<'t, TreeKey, TreeEntry>,
-}
-
-/// Represents write-access to the integrated cache. This is used by the communicator process.
-pub struct IntegratedCacheWriteAccess<'t> {
-    cache_tree: neonart::TreeWriteAccess<'t, TreeKey, TreeEntry>,
-
-    global_lw_lsn: Lsn,
-
-    file_cache: Option<FileCache>,
-}
-
-/// Represents read-only access to the integrated cache. Backend processes have this.
-pub struct IntegratedCacheReadAccess<'t> {
-    cache_tree: neonart::TreeReadAccess<'t, TreeKey, TreeEntry>,
-}
-
-impl<'t> IntegratedCacheInitStruct<'t> {
-    /// Return the desired size in bytes of the shared memory area to reserve for the integrated
-    /// cache.
-    pub fn shmem_size(_max_procs: u32) -> usize {
-        CACHE_AREA_SIZE
-    }
-
-    /// Initialize the shared memory segment. This runs once in postmaster. Returns a struct which
-    /// will be inherited by all processes through fork.
-    pub fn shmem_init(_max_procs: u32, shmem_area: &'t mut [u8]) -> IntegratedCacheInitStruct<'t> {
-        assert!(shmem_area.len() > std::mem::size_of::<IntegratedCacheShmemData>());
-
-        let mut ptr = shmem_area.as_mut_ptr();
-        let shmem_data_ptr;
-        let len_used;
-        unsafe {
-            ptr = ptr.byte_add(ptr.align_offset(align_of::<IntegratedCacheShmemData>()));
-            shmem_data_ptr = ptr.cast::<IntegratedCacheShmemData>();
-            ptr = ptr.byte_add(std::mem::size_of::<IntegratedCacheShmemData>());
-            len_used = ptr.byte_offset_from(shmem_area.as_mut_ptr()) as usize;
-        };
-        assert!(len_used < shmem_area.len());
-
-        let area_ptr = ptr;
-        let area_size = shmem_area.len() - len_used;
-
-        let cache_area: &mut [u8] = unsafe { std::slice::from_raw_parts_mut(area_ptr, area_size) };
-        let allocator = neonart::Allocator::new(cache_area);
-
-        // Initialize the shared memory area
-        let shmem_data = unsafe {
-            *shmem_data_ptr = IntegratedCacheShmemData { allocator };
-            &*shmem_data_ptr
-        };
-
-        let tree_handle = TreeInitStruct::new(&shmem_data.allocator);
-
-        IntegratedCacheInitStruct {
-            shmem_data,
-            handle: tree_handle,
-        }
-    }
-
-    pub fn worker_process_init(
-        self,
-        lsn: Lsn,
-        file_cache: Option<FileCache>,
-    ) -> IntegratedCacheWriteAccess<'t> {
-        let IntegratedCacheInitStruct {
-            shmem_data: _shmem,
-            handle,
-        } = self;
-        let tree_writer = handle.attach_writer();
-
-        IntegratedCacheWriteAccess {
-            cache_tree: tree_writer,
-            global_lw_lsn: lsn,
-            file_cache,
-        }
-    }
-
-    pub fn backend_init(self) -> IntegratedCacheReadAccess<'t> {
-        let IntegratedCacheInitStruct {
-            shmem_data: _shmem,
-            handle,
-        } = self;
-
-        let tree_reader = handle.attach_reader();
-
-        IntegratedCacheReadAccess {
-            cache_tree: tree_reader,
-        }
-    }
-}
-
-#[derive(Clone)]
-enum TreeEntry {
-    Rel(RelEntry),
-    Block(BlockEntry),
-}
-
-#[derive(Clone)]
-struct BlockEntry {
-    lw_lsn: Lsn,
-    cache_block: Option<CacheBlock>,
-}
-
-#[derive(Clone, Default)]
-struct RelEntry {
-    /// cached size of the relation
-    nblocks: Option<u32>,
-}
-
-#[derive(
-    Clone,
-    Debug,
-    PartialEq,
-    PartialOrd,
-    Eq,
-    Ord,
-    zerocopy_derive::IntoBytes,
-    zerocopy_derive::Immutable,
-)]
-#[repr(packed)]
-struct TreeKey {
-    spc_oid: u32,
-    db_oid: u32,
-    rel_number: u32,
-    fork_number: u8,
-    block_number: u32,
-}
-
-impl From<&RelTag> for TreeKey {
-    fn from(val: &RelTag) -> TreeKey {
-        TreeKey {
-            spc_oid: val.spc_oid,
-            db_oid: val.db_oid,
-            rel_number: val.rel_number,
-            fork_number: val.fork_number,
-            block_number: u32::MAX,
-        }
-    }
-}
-
-impl From<(&RelTag, u32)> for TreeKey {
-    fn from(val: (&RelTag, u32)) -> TreeKey {
-        TreeKey {
-            spc_oid: val.0.spc_oid,
-            db_oid: val.0.db_oid,
-            rel_number: val.0.rel_number,
-            fork_number: val.0.fork_number,
-            block_number: val.1,
-        }
-    }
-}
-
-impl neonart::Key for TreeKey {
-    const KEY_LEN: usize = 4 + 4 + 4 + 1 + 32;
-
-    fn as_bytes(&self) -> &[u8] {
-        zerocopy::IntoBytes::as_bytes(self)
-    }
-}
-
-impl neonart::Value for TreeEntry {}
-
-/// Return type used in the cache's get_*() functions. 'Found' means that the page, or other
-/// information that was enqueried, exists in the cache. '
-pub enum CacheResult<V> {
-    /// The enqueried page or other information existed in the cache.
-    Found(V),
-
-    /// The cache doesn't contain the page (or other enqueried information, like relation size). The
-    /// Lsn is the 'not_modified_since' LSN that should be used in the request to the pageserver to
-    /// read the page.
-    NotFound(Lsn),
-}
-
-impl<'t> IntegratedCacheWriteAccess<'t> {
-    pub fn get_rel_size(&'t self, rel: &RelTag) -> CacheResult<u32> {
-        let r = self.cache_tree.start_read();
-        if let Some(nblocks) = get_rel_size(&r, rel) {
-            CacheResult::Found(nblocks)
-        } else {
-            CacheResult::NotFound(self.global_lw_lsn)
-        }
-    }
-
-    pub async fn get_page(
-        &'t self,
-        rel: &RelTag,
-        block_number: u32,
-        dst: impl uring_common::buf::IoBufMut + Send + Sync,
-    ) -> Result<CacheResult<()>, std::io::Error> {
-        let r = self.cache_tree.start_read();
-        if let Some(block_tree_entry) = r.get(&TreeKey::from((rel, block_number))) {
-            let block_entry = if let TreeEntry::Block(e) = block_tree_entry {
-                e
-            } else {
-                panic!("unexpected tree entry type for block key");
-            };
-
-            if let Some(cache_block) = block_entry.cache_block {
-                self.file_cache
-                    .as_ref()
-                    .unwrap()
-                    .read_block(cache_block, dst)
-                    .await?;
-                Ok(CacheResult::Found(()))
-            } else {
-                Ok(CacheResult::NotFound(block_entry.lw_lsn))
-            }
-        } else {
-            Ok(CacheResult::NotFound(self.global_lw_lsn))
-        }
-    }
-
-    pub async fn page_is_cached(
-        &'t self,
-        rel: &RelTag,
-        block_number: u32,
-    ) -> Result<CacheResult<()>, std::io::Error> {
-        let r = self.cache_tree.start_read();
-        if let Some(block_tree_entry) = r.get(&TreeKey::from((rel, block_number))) {
-            let block_entry = if let TreeEntry::Block(e) = block_tree_entry {
-                e
-            } else {
-                panic!("unexpected tree entry type for block key");
-            };
-
-            if let Some(_cache_block) = block_entry.cache_block {
-                Ok(CacheResult::Found(()))
-            } else {
-                Ok(CacheResult::NotFound(block_entry.lw_lsn))
-            }
-        } else {
-            Ok(CacheResult::NotFound(self.global_lw_lsn))
-        }
-    }
-
-    /// Does the relation exists? CacheResult::NotFound means that the cache doesn't contain that
-    /// information, i.e. we don't know if the relation exists or not.
-    pub fn get_rel_exists(&'t self, rel: &RelTag) -> CacheResult<bool> {
-        // we don't currently cache negative entries, so if the relation is in the cache, it exists
-        let r = self.cache_tree.start_read();
-        if let Some(_rel_entry) = r.get(&TreeKey::from(rel)) {
-            CacheResult::Found(true)
-        } else {
-            CacheResult::NotFound(self.global_lw_lsn)
-        }
-    }
-
-    pub fn get_db_size(&'t self, _db_oid: u32) -> CacheResult<u64> {
-        // fixme: is this right lsn?
-        CacheResult::NotFound(self.global_lw_lsn)
-    }
-
-    pub fn remember_rel_size(&'t self, rel: &RelTag, nblocks: u32) {
-        let mut w = self.cache_tree.start_write();
-
-        w.insert(
-            &TreeKey::from(rel),
-            TreeEntry::Rel(RelEntry {
-                nblocks: Some(nblocks),
-            }),
-        );
-    }
-
-    /// Remember the given page contents in the cache.
-    pub async fn remember_page(
-        &'t self,
-        rel: &RelTag,
-        block_number: u32,
-        src: impl uring_common::buf::IoBuf + Send + Sync,
-        lw_lsn: Lsn,
-    ) {
-        if let Some(file_cache) = self.file_cache.as_ref() {
-            let mut w = self.cache_tree.start_write();
-
-            let key = TreeKey::from((rel, block_number));
-
-            let mut cache_block = None;
-
-            w.update_with_fn(&key, |existing| {
-                if let Some(existing) = existing {
-                    let mut block_entry = if let TreeEntry::Block(e) = existing.clone() {
-                        e
-                    } else {
-                        panic!("unexpected tree entry type for block key");
-                    };
-                    block_entry.lw_lsn = lw_lsn;
-                    if block_entry.cache_block.is_none() {
-                        block_entry.cache_block = Some(file_cache.alloc_block());
-                    }
-                    cache_block = block_entry.cache_block;
-                    Some(TreeEntry::Block(block_entry))
-                } else {
-                    cache_block = Some(file_cache.alloc_block());
-                    Some(TreeEntry::Block(BlockEntry {
-                        lw_lsn: lw_lsn,
-                        cache_block: cache_block,
-                    }))
-                }
-            });
-            let cache_block = cache_block.unwrap();
-            file_cache
-                .write_block(cache_block, src)
-                .await
-                .expect("error writing to cache");
-        }
-    }
-
-    /// Forget information about given relation in the cache. (For DROP TABLE and such)
-    pub fn forget_rel(&'t self, rel: &RelTag) {
-        // FIXME: not implemented properly. smgrexists() would still return true for this
-        let mut w = self.cache_tree.start_write();
-        w.insert(
-            &TreeKey::from(rel),
-            TreeEntry::Rel(RelEntry { nblocks: None }),
-        );
-    }
-}
-
-/// Read relation size from the cache.
-///
-/// This is in a separate function so that it can be shared by
-/// IntegratedCacheReadAccess::get_rel_size() and IntegratedCacheWriteAccess::get_rel_size()
-fn get_rel_size<'t>(r: &neonart::TreeReadGuard<TreeKey, TreeEntry>, rel: &RelTag) -> Option<u32> {
-    if let Some(existing) = r.get(&TreeKey::from(rel)) {
-        let rel_entry = if let TreeEntry::Rel(e) = existing {
-            e
-        } else {
-            panic!("unexpected tree entry type for rel key");
-        };
-
-        if let Some(nblocks) = rel_entry.nblocks {
-            Some(nblocks)
-        } else {
-            None
-        }
-    } else {
-        None
-    }
-}
-
-/// Accessor for other backends
-///
-/// This allows backends to read pages from the cache directly, on their own, without making a
-/// request to the communicator process.
-impl<'t> IntegratedCacheReadAccess<'t> {
-    pub fn get_rel_size(&'t self, rel: &RelTag) -> Option<u32> {
-        get_rel_size(&self.cache_tree.start_read(), rel)
-    }
-
-    pub fn start_read_op(&'t self) -> BackendCacheReadOp<'t> {
-        let r = self.cache_tree.start_read();
-        BackendCacheReadOp { read_guard: r }
-    }
-}
-
-pub struct BackendCacheReadOp<'t> {
-    read_guard: neonart::TreeReadGuard<'t, TreeKey, TreeEntry>,
-}
-
-impl<'e> BackendCacheReadOp<'e> {
-    /// Initiate a read of the page from the cache.
-    ///
-    /// This returns the "cache block number", i.e. the block number within the cache file, where
-    /// the page's contents is stored. To get the page contents, the caller needs to read that block
-    /// from the cache file. This returns a guard object that you must hold while it performs the
-    /// read. It's possible that while you are performing the read, the cache block is invalidated.
-    /// After you have completed the read, call BackendCacheReadResult::finish() to check if the
-    /// read was in fact valid or not. If it was concurrently invalidated, you need to retry.
-    pub fn get_page(&self, rel: &RelTag, block_number: u32) -> Option<u64> {
-        if let Some(block_tree_entry) = self.read_guard.get(&TreeKey::from((rel, block_number))) {
-            let block_entry = if let TreeEntry::Block(e) = block_tree_entry {
-                e
-            } else {
-                panic!("unexpected tree entry type for block key");
-            };
-
-            block_entry.cache_block
-        } else {
-            None
-        }
-    }
-
-    pub fn finish(self) -> bool {
-        // TODO: currently, we use a spinlock to protect the in-memory tree, so concurrent
-        // invalidations are not possible. But the plan is to switch to optimistic locking,
-        // and once we do that, this would return 'false' if the optimistic locking failed and
-        // you need to retry.
-        true
-    }
-}
--- a/pgxn/neon/communicator/src/lib.rs
+++ b/pgxn/neon/communicator/src/lib.rs
@@ -1,25 +0,0 @@
-//!
-//! Three main parts:
-//! - async tokio communicator core, which receives requests and processes them.
-//! - Main loop and requests queues, which routes requests from backends to the core
-//! - the per-backend glue code, which submits requests
-//!
-
-mod backend_comms;
-
-// mark this 'pub', because these functions are called from C code. Otherwise, the compiler
-// complains about a bunch of structs and enum variants being unused, because it thinkgs
-// the functions that use them are never called. There are some C-callable functions in
-// other modules too, but marking this as pub is currently enough to silence the warnings
-//
-// TODO: perhaps collect *all* the extern "C" functions to one module?
-pub mod backend_interface;
-
-mod file_cache;
-mod init;
-mod integrated_cache;
-mod neon_request;
-mod worker_process;
-
-// FIXME get this from postgres headers somehow
-pub const BLCKSZ: usize = 8192;
--- a/pgxn/neon/communicator/src/neon_request.rs
+++ b/pgxn/neon/communicator/src/neon_request.rs
@@ -1,346 +0,0 @@
-type CLsn = u64;
-type COid = u32;
-
-// This conveniently matches PG_IOV_MAX
-pub const MAX_GETPAGEV_PAGES: usize = 32;
-
-use pageserver_data_api::model;
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub enum NeonIORequest {
-    Empty,
-
-    // Read requests. These are C-friendly variants of the corresponding structs in
-    // pageserver_data_api::model.
-    RelExists(CRelExistsRequest),
-    RelSize(CRelSizeRequest),
-    GetPageV(CGetPageVRequest),
-    PrefetchV(CPrefetchVRequest),
-    DbSize(CDbSizeRequest),
-
-    // Write requests. These are needed to keep the relation size cache and LFC up-to-date.
-    // They are not sent to the pageserver.
-    WritePage(CWritePageRequest),
-    RelExtend(CRelExtendRequest),
-    RelZeroExtend(CRelZeroExtendRequest),
-    RelCreate(CRelCreateRequest),
-    RelTruncate(CRelTruncateRequest),
-    RelUnlink(CRelUnlinkRequest),
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub enum NeonIOResult {
-    Empty,
-    RelExists(bool),
-    RelSize(u32),
-
-    /// the result pages are written to the shared memory addresses given in the request
-    GetPageV,
-
-    /// A prefetch request returns as soon as the request has been received by the communicator.
-    /// It is processed in the background.
-    PrefetchVLaunched,
-
-    DbSize(u64),
-
-    // FIXME design compact error codes. Can't easily pass a string or other dynamic data.
-    // currently, this is 'errno'
-    Error(i32),
-
-    Aborted,
-
-    /// used for all write requests
-    WriteOK,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CCachedGetPageVResult {
-    pub cache_block_numbers: [u64; MAX_GETPAGEV_PAGES],
-}
-
-/// ShmemBuf represents a buffer in shared memory.
-///
-/// SAFETY: The pointer must point to an area in shared memory. The functions allow you to liberally
-/// get a mutable pointer to the contents; it is the caller's responsibility to ensure that you
-/// don't access a buffer that's you're not allowed to. Inappropriate access to the buffer doesn't
-/// violate Rust's safety semantics, but it will mess up and crash Postgres.
-///
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct ShmemBuf {
-    // These fields define where the result is written. Must point into a buffer in shared memory!
-    pub ptr: *mut u8,
-}
-
-unsafe impl Send for ShmemBuf {}
-unsafe impl Sync for ShmemBuf {}
-
-unsafe impl uring_common::buf::IoBuf for ShmemBuf {
-    fn stable_ptr(&self) -> *const u8 {
-        self.ptr
-    }
-
-    fn bytes_init(&self) -> usize {
-        crate::BLCKSZ
-    }
-
-    fn bytes_total(&self) -> usize {
-        crate::BLCKSZ
-    }
-}
-
-unsafe impl uring_common::buf::IoBufMut for ShmemBuf {
-    fn stable_mut_ptr(&mut self) -> *mut u8 {
-        self.ptr
-    }
-
-    unsafe fn set_init(&mut self, pos: usize) {
-        if pos > crate::BLCKSZ as usize {
-            panic!(
-                "set_init called past end of buffer, pos {}, buffer size {}",
-                pos,
-                crate::BLCKSZ
-            );
-        }
-    }
-}
-
-impl ShmemBuf {
-    pub fn as_mut_ptr(&self) -> *mut u8 {
-        self.ptr
-    }
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelExistsRequest {
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelSizeRequest {
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CGetPageVRequest {
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub nblocks: u8,
-
-    // These fields define where the result is written. Must point into a buffer in shared memory!
-    pub dest: [ShmemBuf; MAX_GETPAGEV_PAGES],
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CPrefetchVRequest {
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub nblocks: u8,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CDbSizeRequest {
-    pub db_oid: COid,
-    pub request_lsn: CLsn,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CWritePageRequest {
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub lsn: CLsn,
-
-    // These fields define where the result is written. Must point into a buffer in shared memory!
-    pub src: ShmemBuf,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelExtendRequest {
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub lsn: CLsn,
-
-    // These fields define page contents. Must point into a buffer in shared memory!
-    pub src_ptr: usize,
-    pub src_size: u32,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelZeroExtendRequest {
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub nblocks: u32,
-    pub lsn: CLsn,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelCreateRequest {
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelTruncateRequest {
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub nblocks: u32,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelUnlinkRequest {
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub nblocks: u32,
-}
-
-impl CRelExistsRequest {
-    pub fn reltag(&self) -> model::RelTag {
-        model::RelTag {
-            spc_oid: self.spc_oid,
-            db_oid: self.db_oid,
-            rel_number: self.rel_number,
-            fork_number: self.fork_number,
-        }
-    }
-}
-
-impl CRelSizeRequest {
-    pub fn reltag(&self) -> model::RelTag {
-        model::RelTag {
-            spc_oid: self.spc_oid,
-            db_oid: self.db_oid,
-            rel_number: self.rel_number,
-            fork_number: self.fork_number,
-        }
-    }
-}
-
-impl CGetPageVRequest {
-    pub fn reltag(&self) -> model::RelTag {
-        model::RelTag {
-            spc_oid: self.spc_oid,
-            db_oid: self.db_oid,
-            rel_number: self.rel_number,
-            fork_number: self.fork_number,
-        }
-    }
-}
-
-impl CPrefetchVRequest {
-    pub fn reltag(&self) -> model::RelTag {
-        model::RelTag {
-            spc_oid: self.spc_oid,
-            db_oid: self.db_oid,
-            rel_number: self.rel_number,
-            fork_number: self.fork_number,
-        }
-    }
-}
-
-impl CWritePageRequest {
-    pub fn reltag(&self) -> model::RelTag {
-        model::RelTag {
-            spc_oid: self.spc_oid,
-            db_oid: self.db_oid,
-            rel_number: self.rel_number,
-            fork_number: self.fork_number,
-        }
-    }
-}
-
-impl CRelExtendRequest {
-    pub fn reltag(&self) -> model::RelTag {
-        model::RelTag {
-            spc_oid: self.spc_oid,
-            db_oid: self.db_oid,
-            rel_number: self.rel_number,
-            fork_number: self.fork_number,
-        }
-    }
-}
-
-impl CRelZeroExtendRequest {
-    pub fn reltag(&self) -> model::RelTag {
-        model::RelTag {
-            spc_oid: self.spc_oid,
-            db_oid: self.db_oid,
-            rel_number: self.rel_number,
-            fork_number: self.fork_number,
-        }
-    }
-}
-
-impl CRelCreateRequest {
-    pub fn reltag(&self) -> model::RelTag {
-        model::RelTag {
-            spc_oid: self.spc_oid,
-            db_oid: self.db_oid,
-            rel_number: self.rel_number,
-            fork_number: self.fork_number,
-        }
-    }
-}
-
-impl CRelTruncateRequest {
-    pub fn reltag(&self) -> model::RelTag {
-        model::RelTag {
-            spc_oid: self.spc_oid,
-            db_oid: self.db_oid,
-            rel_number: self.rel_number,
-            fork_number: self.fork_number,
-        }
-    }
-}
-
-impl CRelUnlinkRequest {
-    pub fn reltag(&self) -> model::RelTag {
-        model::RelTag {
-            spc_oid: self.spc_oid,
-            db_oid: self.db_oid,
-            rel_number: self.rel_number,
-            fork_number: self.fork_number,
-        }
-    }
-}
--- a/pgxn/neon/communicator/src/worker_process/callbacks.rs
+++ b/pgxn/neon/communicator/src/worker_process/callbacks.rs
@@ -1,28 +0,0 @@
-//! C callbacks to PostgreSQL facilities that the neon extension needs
-//! to provide. These are implemented in `neon/pgxn/communicator_new.c`.
-//! The function signatures better match!
-//!
-//! These are called from the communicator threads! Careful what you do, most
-//! Postgres functions are not safe to call in that context.
-
-use utils::lsn::Lsn;
-
-unsafe extern "C" {
-    pub fn notify_proc_unsafe(procno: std::ffi::c_int);
-    pub fn callback_set_my_latch_unsafe();
-    pub fn callback_get_request_lsn_unsafe() -> u64;
-}
-
-// safe wrappers
-
-pub(super) fn notify_proc(procno: std::ffi::c_int) {
-    unsafe { notify_proc_unsafe(procno) };
-}
-
-pub(super) fn callback_set_my_latch() {
-    unsafe { callback_set_my_latch_unsafe() };
-}
-
-pub(super) fn get_request_lsn() -> Lsn {
-    Lsn(unsafe { callback_get_request_lsn_unsafe() })
-}
--- a/pgxn/neon/communicator/src/worker_process/logging.rs
+++ b/pgxn/neon/communicator/src/worker_process/logging.rs
@@ -1,229 +0,0 @@
-//! Glue code to hook up Rust logging, with the `tracing` crate, to the PostgreSQL log
-//!
-//! In the Rust threads, the log messages are written to a mpsc Channel, and the Postgres
-//! process latch is raised. That wakes up the loop in the  main thread. It reads the
-//! message from the channel and ereport()s it. This ensures that only one thread, the main
-//! thread, calls the PostgreSQL logging routines at any time.
-
-use std::sync::mpsc::sync_channel;
-use std::sync::mpsc::{Receiver, SyncSender};
-use std::sync::mpsc::{TryRecvError, TrySendError};
-
-use tracing::info;
-use tracing::{Event, Level, Metadata, Subscriber};
-use tracing_subscriber::filter::LevelFilter;
-use tracing_subscriber::fmt::FmtContext;
-use tracing_subscriber::fmt::FormatEvent;
-use tracing_subscriber::fmt::FormatFields;
-use tracing_subscriber::fmt::FormattedFields;
-use tracing_subscriber::fmt::MakeWriter;
-use tracing_subscriber::fmt::format::Writer;
-use tracing_subscriber::registry::LookupSpan;
-
-use crate::worker_process::callbacks::callback_set_my_latch;
-
-pub struct LoggingState {
-    receiver: Receiver<FormattedEventWithMeta>,
-}
-
-/// Called once, at worker process startup. The returned LoggingState is passed back
-/// in the subsequent calls to `pump_logging`. It is opaque to the C code.
-#[unsafe(no_mangle)]
-pub extern "C" fn configure_logging() -> Box<LoggingState> {
-    let (sender, receiver) = sync_channel(1000);
-
-    let maker = Maker { channel: sender };
-
-    use tracing_subscriber::prelude::*;
-    let r = tracing_subscriber::registry();
-
-    let r = r.with(
-        tracing_subscriber::fmt::layer()
-            .event_format(SimpleFormatter::new())
-            .with_writer(maker)
-            // TODO: derive this from log_min_messages?
-            .with_filter(LevelFilter::from_level(Level::INFO)),
-    );
-    r.init();
-
-    info!("communicator process logging started");
-
-    let state = LoggingState { receiver };
-
-    Box::new(state)
-}
-
-/// Read one message from the logging queue. This is essentially a wrapper to Receiver,
-/// with a C-friendly signature.
-///
-/// The message is copied into *errbuf, which is a caller-supplied buffer of size `errbuf_len`.
-/// If the message doesn't fit in the buffer, it is truncated. It is always NULL-terminated.
-///
-/// The error level is returned *elevel_p. It's one of the PostgreSQL error levels, see elog.h
-#[unsafe(no_mangle)]
-pub extern "C" fn pump_logging(
-    state: &mut LoggingState,
-    errbuf: *mut u8,
-    errbuf_len: u32,
-    elevel_p: &mut i32,
-) -> i32 {
-    let msg = match state.receiver.try_recv() {
-        Err(TryRecvError::Empty) => return 0,
-        Err(TryRecvError::Disconnected) => return -1,
-        Ok(msg) => msg,
-    };
-
-    let src: &[u8] = &msg.message;
-    let dst = errbuf;
-    let len = std::cmp::min(src.len(), errbuf_len as usize - 1);
-    unsafe {
-        std::ptr::copy_nonoverlapping(src.as_ptr(), dst, len);
-        *(errbuf.add(len)) = b'\0'; // NULL terminator
-    }
-
-    // XXX: these levels are copied from PostgreSQL's elog.h. Introduce another enum
-    // to hide these?
-    *elevel_p = match msg.level {
-        Level::TRACE => 10, // DEBUG5
-        Level::DEBUG => 14, // DEBUG1
-        Level::INFO => 17,  // INFO
-        Level::WARN => 19,  // WARNING
-        Level::ERROR => 21, // ERROR
-    };
-    1
-}
-
-//---- The following functions can be called from any thread ----
-
-#[derive(Clone)]
-struct FormattedEventWithMeta {
-    message: Vec<u8>,
-    level: tracing::Level,
-}
-
-impl Default for FormattedEventWithMeta {
-    fn default() -> Self {
-        FormattedEventWithMeta {
-            message: Vec::new(),
-            level: tracing::Level::DEBUG,
-        }
-    }
-}
-
-struct EventBuilder<'a> {
-    event: FormattedEventWithMeta,
-
-    maker: &'a Maker,
-}
-
-impl<'a> std::io::Write for EventBuilder<'a> {
-    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
-        self.event.message.write(buf)
-    }
-    fn flush(&mut self) -> std::io::Result<()> {
-        self.maker.send_event(self.event.clone());
-        Ok(())
-    }
-}
-
-impl<'a> Drop for EventBuilder<'a> {
-    fn drop(&mut self) {
-        let maker = self.maker;
-        let event = std::mem::take(&mut self.event);
-
-        maker.send_event(event);
-    }
-}
-
-struct Maker {
-    channel: SyncSender<FormattedEventWithMeta>,
-}
-
-impl<'a> MakeWriter<'a> for Maker {
-    type Writer = EventBuilder<'a>;
-
-    fn make_writer(&'a self) -> Self::Writer {
-        panic!("not expected to be called when make_writer_for is implemented");
-    }
-
-    fn make_writer_for(&'a self, meta: &Metadata<'_>) -> Self::Writer {
-        EventBuilder {
-            event: FormattedEventWithMeta {
-                message: Vec::new(),
-                level: *meta.level(),
-            },
-            maker: self,
-        }
-    }
-}
-
-impl Maker {
-    fn send_event(&self, e: FormattedEventWithMeta) {
-        match self.channel.try_send(e) {
-            Ok(()) => {
-                // notify the main thread
-                callback_set_my_latch();
-            }
-            Err(TrySendError::Disconnected(_)) => {}
-            Err(TrySendError::Full(_)) => {
-                // TODO: record that some messages were lost
-            }
-        }
-    }
-}
-
-/// Simple formatter implementation for tracing_subscriber, which prints the log
-/// spans and message part like the default formatter, but no timestamp or error
-/// level. The error level is captured separately by `FormattedEventWithMeta',
-/// and when the error is printed by the main thread, with PostgreSQL ereport(),
-/// it gets a timestamp at that point. (The timestamp printed will therefore lag
-/// behind the timestamp on the event here, if the main thread doesn't process
-/// the log message promptly)
-struct SimpleFormatter;
-
-impl<S, N> FormatEvent<S, N> for SimpleFormatter
-where
-    S: Subscriber + for<'a> LookupSpan<'a>,
-    N: for<'a> FormatFields<'a> + 'static,
-{
-    fn format_event(
-        &self,
-        ctx: &FmtContext<'_, S, N>,
-        mut writer: Writer<'_>,
-        event: &Event<'_>,
-    ) -> std::fmt::Result {
-        // Format all the spans in the event's span context.
-        if let Some(scope) = ctx.event_scope() {
-            for span in scope.from_root() {
-                write!(writer, "{}", span.name())?;
-
-                // `FormattedFields` is a formatted representation of the span's
-                // fields, which is stored in its extensions by the `fmt` layer's
-                // `new_span` method. The fields will have been formatted
-                // by the same field formatter that's provided to the event
-                // formatter in the `FmtContext`.
-                let ext = span.extensions();
-                let fields = &ext
-                    .get::<FormattedFields<N>>()
-                    .expect("will never be `None`");
-
-                // Skip formatting the fields if the span had no fields.
-                if !fields.is_empty() {
-                    write!(writer, "{{{}}}", fields)?;
-                }
-                write!(writer, ": ")?;
-            }
-        }
-
-        // Write fields on the event
-        ctx.field_format().format_fields(writer.by_ref(), event)?;
-
-        writeln!(writer)
-    }
-}
-
-impl SimpleFormatter {
-    fn new() -> Self {
-        SimpleFormatter {}
-    }
-}
--- a/pgxn/neon/communicator/src/worker_process/main_loop.rs
+++ b/pgxn/neon/communicator/src/worker_process/main_loop.rs
@@ -1,384 +0,0 @@
-use std::collections::HashMap;
-use std::path::PathBuf;
-
-use crate::backend_comms::NeonIOHandle;
-use crate::file_cache::FileCache;
-use crate::init::CommunicatorInitStruct;
-use crate::integrated_cache::{CacheResult, IntegratedCacheWriteAccess};
-use crate::neon_request::{CGetPageVRequest, CPrefetchVRequest};
-use crate::neon_request::{NeonIORequest, NeonIOResult};
-use pageserver_client_grpc::PageserverClient;
-use pageserver_data_api::model;
-
-use tokio::io::AsyncReadExt;
-use tokio_epoll_uring::IoBuf;
-use tokio_pipe::PipeRead;
-
-use super::callbacks::{get_request_lsn, notify_proc};
-
-use tracing::{error, info, trace};
-
-use utils::lsn::Lsn;
-
-pub struct CommunicatorWorkerProcessStruct<'a> {
-    neon_request_slots: &'a [NeonIOHandle],
-
-    pageserver_client: PageserverClient,
-
-    cache: IntegratedCacheWriteAccess<'a>,
-
-    submission_pipe_read_raw_fd: i32,
-}
-
-pub(super) async fn init(
-    cis: Box<CommunicatorInitStruct>,
-    tenant_id: String,
-    timeline_id: String,
-    auth_token: Option<String>,
-    shard_map: HashMap<u16, String>,
-    _file_cache_size: u64,
-    file_cache_path: Option<PathBuf>,
-) -> CommunicatorWorkerProcessStruct<'static> {
-    let last_lsn = get_request_lsn();
-
-    let uring_system = tokio_epoll_uring::System::launch().await.unwrap();
-
-    let file_cache = if let Some(path) = file_cache_path {
-        Some(FileCache::new(&path, uring_system).expect("could not create cache file"))
-    } else {
-        // FIXME: temporarily for testing, use LFC even if disabled
-        Some(
-            FileCache::new(&PathBuf::from("new_filecache"), uring_system)
-                .expect("could not create cache file"),
-        )
-    };
-
-    // Initialize subsystems
-    let cache = cis
-        .integrated_cache_init_struct
-        .worker_process_init(last_lsn, file_cache);
-
-    let pageserver_client = PageserverClient::new(&tenant_id, &timeline_id, &auth_token, shard_map);
-
-    let this = CommunicatorWorkerProcessStruct {
-        neon_request_slots: cis.neon_request_slots,
-        pageserver_client,
-        cache,
-        submission_pipe_read_raw_fd: cis.submission_pipe_read_fd,
-    };
-
-    this
-}
-
-impl<'t> CommunicatorWorkerProcessStruct<'t> {
-    /// Main loop of the worker process. Receive requests from the backends and process them.
-    pub(super) async fn run(self: &'static Self) {
-        let mut idxbuf: [u8; 4] = [0; 4];
-
-        let mut submission_pipe_read =
-            PipeRead::from_raw_fd_checked(self.submission_pipe_read_raw_fd)
-                .expect("invalid pipe fd");
-
-        loop {
-            // Wait for a backend to ring the doorbell
-
-            match submission_pipe_read.read(&mut idxbuf).await {
-                Ok(4) => {}
-                Ok(nbytes) => panic!("short read ({nbytes} bytes) on communicator pipe"),
-                Err(e) => panic!("error reading from communicator pipe: {e}"),
-            }
-            let request_idx = u32::from_ne_bytes(idxbuf);
-
-            // Read the IO request from the slot indicated in the wakeup
-            let Some(slot) =
-                self.neon_request_slots[request_idx as usize].start_processing_request()
-            else {
-                // This currently should not happen. But if we have multiple threads picking up
-                // requests, and without waiting for the notifications, it could.
-                panic!("no request in slot");
-            };
-
-            // Ok, we have ownership of this request now. We must process
-            // it now, there's no going back.
-
-            //trace!("processing request {request_idx}: {request:?}");
-
-            // Spawn a separate task for every request. That's a little excessive for requests that
-            // can be quickly satisfied from the cache, but we expect that to be rare, because the
-            // requesting backend would have already checked the cache.
-            tokio::spawn(async {
-                let result = self.handle_request(slot.get_request()).await;
-                let owner_procno = slot.get_owner_procno();
-
-                // Ok, we have completed the IO. Mark the request as completed. After that,
-                // we no longer have ownership of the slot, and must not modify it.
-                slot.completed(result);
-
-                // Notify the backend about the completion. (Note that the backend might see
-                // the completed status even before this; this is just a wakeup)
-                notify_proc(owner_procno);
-            });
-        }
-    }
-
-    fn request_common(&self, not_modified_since_lsn: Lsn) -> model::RequestCommon {
-        model::RequestCommon {
-            request_lsn: get_request_lsn(),
-            not_modified_since_lsn,
-        }
-    }
-
-    async fn handle_request<'x>(self: &'static Self, req: &'x NeonIORequest) -> NeonIOResult {
-        match req {
-            NeonIORequest::Empty => {
-                error!("unexpected Empty IO request");
-                NeonIOResult::Error(-1)
-            }
-            NeonIORequest::RelExists(req) => {
-                let rel = req.reltag();
-
-                let not_modified_since = match self.cache.get_rel_exists(&rel) {
-                    CacheResult::Found(exists) => return NeonIOResult::RelExists(exists),
-                    CacheResult::NotFound(lsn) => lsn,
-                };
-
-                match self
-                    .pageserver_client
-                    .process_rel_exists_request(&model::RelExistsRequest {
-                        common: self.request_common(not_modified_since),
-                        rel,
-                    })
-                    .await
-                {
-                    Ok(exists) => NeonIOResult::RelExists(exists),
-                    Err(err) => {
-                        info!("tonic error: {err:?}");
-                        NeonIOResult::Error(-1)
-                    }
-                }
-            }
-
-            NeonIORequest::RelSize(req) => {
-                let rel = req.reltag();
-
-                // Check the cache first
-                let not_modified_since = match self.cache.get_rel_size(&rel) {
-                    CacheResult::Found(nblocks) => {
-                        tracing::trace!("found relsize for {:?} in cache: {}", rel, nblocks);
-                        return NeonIOResult::RelSize(nblocks);
-                    }
-                    CacheResult::NotFound(lsn) => lsn,
-                };
-
-                let common = self.request_common(not_modified_since);
-                match self
-                    .pageserver_client
-                    .process_rel_size_request(&model::RelSizeRequest {
-                        common: common.clone(),
-                        rel: rel.clone(),
-                    })
-                    .await
-                {
-                    Ok(nblocks) => {
-                        // update the cache
-                        tracing::info!("updated relsize for {:?} in cache: {}", rel, nblocks);
-                        self.cache.remember_rel_size(&rel, nblocks);
-
-                        NeonIOResult::RelSize(nblocks)
-                    }
-                    Err(err) => {
-                        info!("tonic error: {err:?}");
-                        NeonIOResult::Error(-1)
-                    }
-                }
-            }
-            NeonIORequest::GetPageV(req) => match self.handle_get_pagev_request(req).await {
-                Ok(()) => NeonIOResult::GetPageV,
-                Err(errno) => NeonIOResult::Error(errno),
-            },
-            NeonIORequest::PrefetchV(req) => {
-                let req = req.clone();
-                tokio::spawn(async move { self.handle_prefetchv_request(&req).await });
-                NeonIOResult::PrefetchVLaunched
-            }
-            NeonIORequest::DbSize(req) => {
-                // Check the cache first
-                let not_modified_since = match self.cache.get_db_size(req.db_oid) {
-                    CacheResult::Found(db_size) => {
-                        // get_page already copied the block content to the destination
-                        return NeonIOResult::DbSize(db_size);
-                    }
-                    CacheResult::NotFound(lsn) => lsn,
-                };
-
-                match self
-                    .pageserver_client
-                    .process_dbsize_request(&model::DbSizeRequest {
-                        common: self.request_common(not_modified_since),
-                        db_oid: req.db_oid,
-                    })
-                    .await
-                {
-                    Ok(db_size) => NeonIOResult::DbSize(db_size),
-                    Err(err) => {
-                        info!("tonic error: {err:?}");
-                        NeonIOResult::Error(-1)
-                    }
-                }
-            }
-
-            // Write requests
-            NeonIORequest::WritePage(req) => {
-                // Also store it in the LFC while we still have it
-                let rel = req.reltag();
-                self.cache
-                    .remember_page(&rel, req.block_number, req.src, Lsn(req.lsn))
-                    .await;
-                NeonIOResult::WriteOK
-            }
-            NeonIORequest::RelExtend(req) => {
-                self.cache
-                    .remember_rel_size(&req.reltag(), req.block_number + 1);
-                NeonIOResult::WriteOK
-            }
-            NeonIORequest::RelZeroExtend(req) => {
-                self.cache
-                    .remember_rel_size(&req.reltag(), req.block_number + req.nblocks);
-                NeonIOResult::WriteOK
-            }
-            NeonIORequest::RelCreate(req) => {
-                self.cache.remember_rel_size(&req.reltag(), 0);
-                NeonIOResult::WriteOK
-            }
-            NeonIORequest::RelTruncate(req) => {
-                self.cache.remember_rel_size(&req.reltag(), req.nblocks);
-                NeonIOResult::WriteOK
-            }
-            NeonIORequest::RelUnlink(req) => {
-                self.cache.forget_rel(&req.reltag());
-                NeonIOResult::WriteOK
-            }
-        }
-    }
-
-    async fn handle_get_pagev_request(&'t self, req: &CGetPageVRequest) -> Result<(), i32> {
-        let rel = req.reltag();
-
-        // Check the cache first
-        let mut cache_misses = Vec::new();
-        for i in 0..req.nblocks {
-            let blkno = req.block_number + i as u32;
-            let dest = req.dest[i as usize];
-            let not_modified_since = match self.cache.get_page(&rel, blkno, dest).await {
-                Ok(CacheResult::Found(_)) => {
-                    // get_page already copied the block content to the destination
-                    trace!("found blk {} in rel {:?} in LFC ", blkno, rel);
-                    continue;
-                }
-                Ok(CacheResult::NotFound(lsn)) => lsn,
-                Err(_io_error) => return Err(-1), // FIXME errno?
-            };
-            cache_misses.push((blkno, not_modified_since, dest));
-        }
-        if cache_misses.is_empty() {
-            return Ok(());
-        }
-        let not_modified_since = cache_misses
-            .iter()
-            .map(|(_blkno, lsn, _dest)| *lsn)
-            .max()
-            .unwrap();
-
-        // TODO: Use batched protocol
-        for (blkno, _lsn, dest) in cache_misses.iter() {
-            match self
-                .pageserver_client
-                .get_page(&model::GetPageRequest {
-                    common: self.request_common(not_modified_since),
-                    rel: rel.clone(),
-                    block_number: *blkno,
-                })
-                .await
-            {
-                Ok(page_image) => {
-                    // Write the received page image directly to the shared memory location
-                    // that the backend requested.
-                    let src: &[u8] = page_image.as_ref();
-                    let len = std::cmp::min(src.len(), dest.bytes_total() as usize);
-                    unsafe {
-                        std::ptr::copy_nonoverlapping(src.as_ptr(), dest.as_mut_ptr(), len);
-                    };
-
-                    trace!("remembering blk {} in rel {:?} in LFC", blkno, rel);
-
-                    // Also store it in the LFC while we have it
-                    self.cache
-                        .remember_page(&rel, *blkno, page_image, not_modified_since)
-                        .await;
-                }
-                Err(err) => {
-                    info!("tonic error: {err:?}");
-                    return Err(-1);
-                }
-            }
-        }
-        Ok(())
-    }
-
-    async fn handle_prefetchv_request(
-        self: &'static Self,
-        req: &CPrefetchVRequest,
-    ) -> Result<(), i32> {
-        let rel = req.reltag();
-
-        // Check the cache first
-        let mut cache_misses = Vec::new();
-        for i in 0..req.nblocks {
-            let blkno = req.block_number + i as u32;
-            let not_modified_since = match self.cache.page_is_cached(&rel, blkno).await {
-                Ok(CacheResult::Found(_)) => {
-                    trace!("found blk {} in rel {:?} in LFC ", req.block_number, rel);
-                    continue;
-                }
-                Ok(CacheResult::NotFound(lsn)) => lsn,
-                Err(_io_error) => return Err(-1), // FIXME errno?
-            };
-            cache_misses.push((req.block_number, not_modified_since));
-        }
-        if cache_misses.is_empty() {
-            return Ok(());
-        }
-        let not_modified_since = cache_misses.iter().map(|(_blkno, lsn)| *lsn).max().unwrap();
-
-        // TODO: spawn separate tasks for these. Use the integrated cache to keep track of the
-        // in-flight requests
-
-        // TODO: Use batched protocol
-        for (blkno, _lsn) in cache_misses.iter() {
-            match self
-                .pageserver_client
-                .get_page(&model::GetPageRequest {
-                    common: self.request_common(not_modified_since),
-                    rel: rel.clone(),
-                    block_number: *blkno,
-                })
-                .await
-            {
-                Ok(page_image) => {
-                    trace!(
-                        "prefetch completed, remembering blk {} in rel {:?} in LFC",
-                        req.block_number, rel
-                    );
-                    self.cache
-                        .remember_page(&rel, req.block_number, page_image, not_modified_since)
-                        .await;
-                }
-                Err(err) => {
-                    info!("tonic error: {err:?}");
-                    return Err(-1);
-                }
-            }
-        }
-        Ok(())
-    }
-}
--- a/pgxn/neon/communicator/src/worker_process/mod.rs
+++ b/pgxn/neon/communicator/src/worker_process/mod.rs
@@ -1,11 +0,0 @@
-//! This code runs in the communicator worker process. This provides
-//! the glue code to:
-//!
-//! - launch the 'processor',
-//! - receive IO requests from backends and pass them to the processor,
-//! - write results back to backends.
-
-mod callbacks;
-mod logging;
-mod main_loop;
-mod worker_interface;
--- a/pgxn/neon/communicator/src/worker_process/worker_interface.rs
+++ b/pgxn/neon/communicator/src/worker_process/worker_interface.rs
@@ -1,93 +0,0 @@
-//! Functions called from the C code in the worker process
-
-use std::collections::HashMap;
-use std::ffi::{CStr, c_char};
-use std::path::PathBuf;
-
-use tracing::error;
-
-use crate::init::CommunicatorInitStruct;
-use crate::worker_process::main_loop;
-
-/// Launch the communicator's tokio tasks, which do most of the work.
-///
-/// The caller has initialized the process as a regular PostgreSQL
-/// background worker process. The shared memory segment used to
-/// communicate with the backends has been allocated and initialized
-/// earlier, at postmaster startup, in rcommunicator_shmem_init().
-#[unsafe(no_mangle)]
-pub extern "C" fn communicator_worker_process_launch(
-    cis: Box<CommunicatorInitStruct>,
-    tenant_id: *const c_char,
-    timeline_id: *const c_char,
-    auth_token: *const c_char,
-    shard_map: *mut *mut c_char,
-    nshards: u32,
-    file_cache_path: *const c_char,
-    file_cache_size: u64,
-) {
-    // Convert the arguments into more convenient Rust types
-    let tenant_id = unsafe { CStr::from_ptr(tenant_id) }.to_str().unwrap();
-    let timeline_id = unsafe { CStr::from_ptr(timeline_id) }.to_str().unwrap();
-    let auth_token = {
-        if auth_token.is_null() {
-            None
-        } else {
-            let c_str = unsafe { CStr::from_ptr(auth_token) };
-            Some(c_str.to_str().unwrap().to_string())
-        }
-    };
-    let file_cache_path = {
-        if file_cache_path.is_null() {
-            None
-        } else {
-            let c_str = unsafe { CStr::from_ptr(file_cache_path) };
-            Some(PathBuf::from(c_str.to_str().unwrap()))
-        }
-    };
-    let shard_map = parse_shard_map(nshards, shard_map);
-
-    // start main loop
-    let runtime = tokio::runtime::Builder::new_multi_thread()
-        .enable_all()
-        .thread_name("communicator thread")
-        .build()
-        .unwrap();
-
-    let worker_struct = runtime.block_on(main_loop::init(
-        cis,
-        tenant_id.to_string(),
-        timeline_id.to_string(),
-        auth_token,
-        shard_map,
-        file_cache_size,
-        file_cache_path,
-    ));
-    let worker_struct = Box::leak(Box::new(worker_struct));
-
-    let main_loop_handle = runtime.spawn(worker_struct.run());
-
-    runtime.spawn(async {
-        let err = main_loop_handle.await.unwrap_err();
-        error!("error: {err:?}");
-    });
-
-    // keep the runtime running after we exit this function
-    Box::leak(Box::new(runtime));
-}
-
-/// Convert the "shard map" from an array of C strings, indexed by shard no to a rust HashMap
-fn parse_shard_map(nshards: u32, shard_map: *mut *mut c_char) -> HashMap<u16, String> {
-    let mut result: HashMap<u16, String> = HashMap::new();
-    let mut p = shard_map;
-
-    for i in 0..nshards {
-        let c_str = unsafe { CStr::from_ptr(*p) };
-
-        p = unsafe { p.add(1) };
-
-        let s = c_str.to_str().unwrap();
-        result.insert(i as u16, s.into());
-    }
-    result
-}
--- a/pgxn/neon/communicator_new.c
+++ b/pgxn/neon/communicator_new.c
@@ -1,953 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * communicator_new.c
- *	  Functions for communicating with remote pageservers.
- *
- * This is the "new" communicator. It consists of functions that
- * are called from the smgr implementation, in pagestore_smgr.c.
- *
- * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *-------------------------------------------------------------------------
- */
-#include "postgres.h"
-
-#include "access/xlog.h"
-#include "access/xlogdefs.h"
-#if PG_VERSION_NUM >= 150000
-#include "access/xlogrecovery.h"
-#endif
-#include "access/xlog_internal.h"
-#include "access/xlogutils.h"
-#include "executor/instrument.h"
-#include "miscadmin.h"
-#include "postmaster/bgworker.h"
-#include "postmaster/interrupt.h"
-#include "replication/walsender.h"
-#include "storage/fd.h"
-#include "storage/ipc.h"
-#include "storage/latch.h"
-#include "storage/procarray.h"
-#if PG_VERSION_NUM >= 170000
-#include "storage/procnumber.h"
-#endif
-#include "storage/spin.h"
-#include "tcop/tcopprot.h"
-
-#include "communicator_new.h"
-#include "neon.h"
-#include "neon_perf_counters.h"
-#include "pagestore_client.h"
-
-/*
- * FIXME: these are in file_cache.h, but I don't want to #include that
- * here. This code shouldn't be using the C file cache for anything else than
- * the GUCs.
- */
-extern int	lfc_size_limit;
-extern char *lfc_path;
-
-
-/* the rust bindings, generated by cbindgen */
-#include "communicator/communicator_bindings.h"
-
-#define MaxProcs (MaxBackends + NUM_AUXILIARY_PROCS)
-
-static CommunicatorInitStruct *cis;
-static CommunicatorBackendStruct *my_bs;
-
-static File cache_file = 0;
-
-typedef struct CommunicatorShmemPerBackendData
-{
-	/*
-	 * Latch used to notify backend of IO completion. We cannot use the
-	 * standard process latch (MyProc->latch) because we cannot clear that
-	 * latch as part of the IO handling, or we might cause the caller to miss
-	 * some other events.
-	 */
-	Latch		io_completion_latch;
-
-	/*
-	 * Normally, when reading or writing pages from shared buffer cache, the
-	 * worker process can operate directly on the shared buffer. But when
-	 * working with a local buffer, we use this "bounce buffer" to pass the
-	 * data to the worker process.
-	 *
-	 * TODO: That's slow, because it incurs an extra memory copy, and there's
-	 * currently only one of these per backend, which means you can have only
-	 * one such IO in progress at a time.
-	 */
-	PGIOAlignedBlock bounce_buffer;
-} CommunicatorShmemPerBackendData;
-
-typedef struct CommunicatorShmemData
-{
-	int			dummy;
-
-	CommunicatorShmemPerBackendData backends[]; /* MaxProcs */
-
-	/* rust-managed shmem area follows at next MAXALIGN boundary */
-} CommunicatorShmemData;
-
-static CommunicatorShmemData *communicator_shmem_ptr;
-
-#define MyIOCompletionLatch (&communicator_shmem_ptr->backends[MyProcNumber].io_completion_latch)
-
-static slock_t in_elog;
-
-#define MAX_INFLIGHT_ASYNC_REQUESTS 5
-
-/* request indexes of (prefetch) requests that have been started */
-static int	inflight_requests[MAX_INFLIGHT_ASYNC_REQUESTS];
-static int	num_inflight_requests = 0;
-
-static int	start_request(NeonIORequest *request, struct NeonIOResult *immediate_result_p);
-static void wait_request_completion(int request_idx, struct NeonIOResult *result_p);
-static void perform_request(NeonIORequest *request, struct NeonIOResult *result_p);
-static void process_inflight_requests(void);
-
-static bool bounce_needed(void *buffer);
-static void *bounce_buf(void);
-static void *bounce_write_if_needed(void *buffer);
-
-PGDLLEXPORT void communicator_new_bgworker_main(Datum main_arg);
-static void communicator_new_backend_exit(int code, Datum arg);
-
-/**** Initialization functions. These run in postmaster ****/
-
-void
-pg_init_communicator_new(void)
-{
-	BackgroundWorker bgw;
-
-	/* Initialize the background worker process */
-	memset(&bgw, 0, sizeof(bgw));
-	bgw.bgw_flags = BGWORKER_SHMEM_ACCESS;
-	bgw.bgw_start_time = BgWorkerStart_PostmasterStart;
-	snprintf(bgw.bgw_library_name, BGW_MAXLEN, "neon");
-	snprintf(bgw.bgw_function_name, BGW_MAXLEN, "communicator_new_bgworker_main");
-	snprintf(bgw.bgw_name, BGW_MAXLEN, "Storage communicator process");
-	snprintf(bgw.bgw_type, BGW_MAXLEN, "Storage communicator process");
-	bgw.bgw_restart_time = 5;
-	bgw.bgw_notify_pid = 0;
-	bgw.bgw_main_arg = (Datum) 0;
-
-	RegisterBackgroundWorker(&bgw);
-
-	SpinLockInit(&in_elog);
-}
-
-static size_t
-communicator_new_shmem_size(void)
-{
-	size_t		size = 0;
-
-	size += MAXALIGN(
-					 offsetof(CommunicatorShmemData, backends) +
-					 MaxProcs * sizeof(CommunicatorShmemPerBackendData)
-		);
-
-	/* space needed by the rust code */
-	size += rcommunicator_shmem_size(MaxProcs);
-
-	return size;
-}
-
-void
-communicator_new_shmem_request(void)
-{
-	RequestAddinShmemSpace(communicator_new_shmem_size());
-}
-
-void
-communicator_new_shmem_startup(void)
-{
-	bool		found;
-	int			pipefd[2];
-	int			rc;
-	size_t		communicator_size;
-	size_t		shmem_size;
-	void	   *shmem_ptr;
-
-	rc = pipe(pipefd);
-	if (rc != 0)
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg_internal("could not create pipe between neon communicator and backends : %m")));
-	if (fcntl(pipefd[0], F_SETFL, O_NONBLOCK) == -1)
-		elog(FATAL, "fcntl(F_SETFL) failed on read-end of communicator pipe: %m");
-	if (fcntl(pipefd[1], F_SETFL, O_NONBLOCK) == -1)
-		elog(FATAL, "fcntl(F_SETFL) failed on write-end of communicator pipe: %m");
-
-	shmem_size = communicator_new_shmem_size();
-	shmem_ptr = ShmemInitStruct("Communicator shmem state",
-								shmem_size,
-								&found);
-	Assert(!found);
-
-	/* Initialize the C-managed parts */
-	communicator_shmem_ptr = (CommunicatorShmemData *) shmem_ptr;
-	communicator_size = MAXALIGN(offsetof(CommunicatorShmemData, backends) + MaxProcs * sizeof(CommunicatorShmemPerBackendData));
-	shmem_ptr = (char *) shmem_ptr + communicator_size;
-	shmem_size -= communicator_size;
-
-	for (int i = 0; i < MaxProcs; i++)
-		InitSharedLatch(&communicator_shmem_ptr->backends[i].io_completion_latch);
-
-	/* Initialize the rust-managed parts */
-	cis = rcommunicator_shmem_init(pipefd[0], pipefd[1], MaxProcs, shmem_ptr, shmem_size);
-}
-
-/**** Worker process functions. These run in the communicator worker process ****/
-
-/* Entry point for the communicator bgworker process */
-void
-communicator_new_bgworker_main(Datum main_arg)
-{
-	char	  **connstrs;
-	shardno_t	num_shards;
-	struct LoggingState *logging;
-	char		errbuf[1000];
-	int			elevel;
-
-	/* Establish signal handlers. */
-	pqsignal(SIGUSR1, procsignal_sigusr1_handler);
-	pqsignal(SIGHUP, SignalHandlerForConfigReload);
-	pqsignal(SIGTERM, die);
-
-	BackgroundWorkerUnblockSignals();
-
-	get_shard_map(&connstrs, &num_shards);
-
-	logging = configure_logging();
-
-	communicator_worker_process_launch(
-									   cis,
-									   neon_tenant,
-									   neon_timeline,
-									   neon_auth_token,
-									   connstrs,
-									   num_shards,
-									   lfc_path,
-									   lfc_size_limit);
-	cis = NULL;
-
-	elog(LOG, "communicator threads started");
-	for (;;)
-	{
-		int32		rc;
-
-		CHECK_FOR_INTERRUPTS();
-
-		for (;;)
-		{
-			rc = pump_logging(logging, (uint8 *) errbuf, sizeof(errbuf), &elevel);
-			if (rc == 0)
-			{
-				/* nothing to do */
-				break;
-			}
-			else if (rc == 1)
-			{
-				/* Because we don't want to exit on error */
-				if (elevel == ERROR)
-					elevel = LOG;
-				if (elevel == INFO)
-					elevel = LOG;
-				elog(elevel, "[COMMUNICATOR] %s", errbuf);
-			}
-			else if (rc == -1)
-			{
-				elog(ERROR, "logging channel was closed unexpectedly");
-			}
-		}
-
-		(void) WaitLatch(MyLatch,
-						 WL_LATCH_SET | WL_EXIT_ON_PM_DEATH,
-						 0,
-						 PG_WAIT_EXTENSION);
-		ResetLatch(MyLatch);
-	}
-}
-
-/*
- * Callbacks from the rust code, in the communicator process.
- *
- * NOTE: These must be thread safe! It's very limited which PostgreSQL functions you can use!!!
- *
- * NOTE: the signatures of these better match the Rust definitions!
- */
-
-void
-notify_proc_unsafe(int procno)
-{
-	SetLatch(&communicator_shmem_ptr->backends[procno].io_completion_latch);
-
-}
-
-void
-callback_set_my_latch_unsafe(void)
-{
-	SetLatch(MyLatch);
-}
-
-/*
- * FIXME: The logic from neon_get_request_lsns() needs to go here, except for
- * the last-written LSN cache stuff, which is managed by the rust code now.
- */
-uint64
-callback_get_request_lsn_unsafe(void)
-{
-	/*
-	 * NB: be very careful with what you do here! This is called from tokio
-	 * threads, so anything tha tries to take LWLocks is unsafe, for example.
-	 *
-	 * RecoveryInProgress() is OK
-	 */
-	if (RecoveryInProgress())
-	{
-		XLogRecPtr	replay_lsn = GetXLogReplayRecPtr(NULL);
-
-		return replay_lsn;
-	}
-	else
-	{
-		XLogRecPtr	flushlsn;
-
-#if PG_VERSION_NUM >= 150000
-		flushlsn = GetFlushRecPtr(NULL);
-#else
-		flushlsn = GetFlushRecPtr();
-#endif
-
-		return flushlsn;
-	}
-}
-
-/**** Backend functions. These run in each backend ****/
-
-/* Initialize per-backend private state */
-void
-communicator_new_init(void)
-{
-	Assert(cis != NULL);
-	Assert(my_bs == NULL);
-
-	if (MyBgworkerEntry && strcmp(MyBgworkerEntry->bgw_function_name, "communicator_new_bgworker_main") == 0)
-		return;
-
-	OwnLatch(MyIOCompletionLatch);
-
-	my_bs = rcommunicator_backend_init(cis, MyProcNumber);
-	cis = NULL;
-
-	/*
-	 * Arrange to clean up at backend exit.
-	 */
-	on_shmem_exit(communicator_new_backend_exit, 0);
-}
-
-static void
-communicator_new_backend_exit(int code, Datum arg)
-{
-	DisownLatch(MyIOCompletionLatch);
-}
-
-/*
- * prefetch_register_bufferv() - register and prefetch buffers
- *
- * Register that we may want the contents of BufferTag in the near future.
- * This is used when issuing a speculative prefetch request, but also when
- * performing a synchronous request and need the buffer right now.
- *
- * When performing a prefetch rather than a synchronous request,
- * is_prefetch==true. Currently, it only affects how the request is accounted
- * in the perf counters.
- *
- * NOTE: this function may indirectly update MyPState->pfs_hash; which
- * invalidates any active pointers into the hash table.
- */
-void
-communicator_new_prefetch_register_bufferv(NRelFileInfo rinfo, ForkNumber forkNum,
-										   BlockNumber blockno, BlockNumber nblocks)
-{
-	int			request_idx;
-	NeonIORequest request = {
-		.tag = NeonIORequest_PrefetchV,
-		.prefetch_v = {
-			.spc_oid = NInfoGetSpcOid(rinfo),
-			.db_oid = NInfoGetDbOid(rinfo),
-			.rel_number = NInfoGetRelNumber(rinfo),
-			.fork_number = forkNum,
-			.block_number = blockno,
-			.nblocks = nblocks,
-		}
-	};
-	struct NeonIOResult result;
-
-	elog(LOG, "prefetch called for rel %u/%u/%u.%u block %u (%u blocks)",
-		 RelFileInfoFmt(rinfo), forkNum, blockno, nblocks);
-
-	if (num_inflight_requests >= MAX_INFLIGHT_ASYNC_REQUESTS)
-		process_inflight_requests();
-
-	request_idx = bcomm_start_io_request(my_bs, &request, &result);
-	if (request_idx == -1)
-	{
-		/* -1 means the request was satisfied immediately. */
-		/* FIXME: check and log errors */
-		return;
-	}
-	inflight_requests[num_inflight_requests] = request_idx;
-	num_inflight_requests++;
-
-	elog(LOG, "sent prefetch request with idx %d", request_idx);
-}
-
-static void
-process_inflight_requests(void)
-{
-	struct NeonIOResult result;
-
-	/* FIXME: log errors */
-	for (int i = 0; i < num_inflight_requests; i++)
-		wait_request_completion(inflight_requests[i], &result);
-	num_inflight_requests = 0;
-}
-
-/*
- * Perform an IO request in a synchronous fashion.
- *
- * Returns a pointer to the result slot. It is valid until the next time a
- * request is submitted.
- */
-static void
-perform_request(NeonIORequest * request, struct NeonIOResult *result_p)
-{
-	int			request_idx;
-
-	process_inflight_requests();
-
-	request_idx = start_request(request, result_p);
-	if (request_idx == -1)
-	{
-		/* it was completed immediately */
-		return;
-	}
-	wait_request_completion(request_idx, result_p);
-}
-
-static int
-start_request(NeonIORequest * request, struct NeonIOResult *immediate_result_p)
-{
-	int			request_idx;
-
-	request_idx = bcomm_start_io_request(my_bs, request, immediate_result_p);
-	if (request_idx == -1)
-	{
-		/* -1 means the request was satisfied immediately. */
-		return -1;
-	}
-	elog(DEBUG5, "sent request with idx %d: tag %d", request_idx, request->tag);
-	return request_idx;
-}
-
-static void
-wait_request_completion(int request_idx, struct NeonIOResult *result_p)
-{
-	int32_t		poll_res;
-
-	/* fixme: check 'request_idx' ? */
-
-	for (;;)
-	{
-		ResetLatch(MyIOCompletionLatch);
-
-		poll_res = bcomm_poll_request_completion(my_bs, request_idx, result_p);
-		if (poll_res == -1)
-		{
-			CHECK_FOR_INTERRUPTS();
-
-			/*
-			 * TODO: wake up periodically for CHECK_FOR_INTERRUPTS(). Because
-			 * we wait on MyIOCompletionLatch rather than MyLatch, we won't be
-			 * woken up for the standard interrupts.
-			 */
-			(void) WaitLatch(MyIOCompletionLatch,
-							 WL_EXIT_ON_PM_DEATH | WL_LATCH_SET,
-							 0,
-							 WAIT_EVENT_NEON_PS_STARTING);
-			continue;			/* still busy */
-		}
-		else if (poll_res == 0)
-		{
-			return;
-		}
-		else
-		{
-			elog(ERROR, "unexpected return code from bcomm_poll_request_completion()");
-		}
-	}
-}
-
-/*
- *	Does the physical file exist?
- */
-bool
-communicator_new_rel_exists(NRelFileInfo rinfo, ForkNumber forkNum)
-{
-	NeonIORequest request = {
-		.tag = NeonIORequest_RelExists,
-		.rel_exists = {
-			.spc_oid = NInfoGetSpcOid(rinfo),
-			.db_oid = NInfoGetDbOid(rinfo),
-			.rel_number = NInfoGetRelNumber(rinfo),
-			.fork_number = forkNum,
-		}
-	};
-	NeonIOResult result;
-
-	perform_request(&request, &result);
-	switch (result.tag)
-	{
-		case NeonIOResult_RelExists:
-			return result.rel_exists;
-		case NeonIOResult_Error:
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not check existence of rel %u/%u/%u.%u: %s",
-							RelFileInfoFmt(rinfo), forkNum, pg_strerror(result.error))));
-			break;
-		default:
-			elog(ERROR, "unexpected result for RelExists operation: %d", result.tag);
-			break;
-	}
-}
-
-/*
- * Read N consecutive pages from a relation
- */
-void
-communicator_new_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blockno,
-							  void **buffers, BlockNumber nblocks)
-{
-	NeonIOResult result;
-	CCachedGetPageVResult cached_result;
-	void	   *bounce_buf_used = NULL;
-	int			request_idx;
-	NeonIORequest request = {
-		.tag = NeonIORequest_GetPageV,
-		.get_page_v = {
-			.spc_oid = NInfoGetSpcOid(rinfo),
-			.db_oid = NInfoGetDbOid(rinfo),
-			.rel_number = NInfoGetRelNumber(rinfo),
-			.fork_number = forkNum,
-			.block_number = blockno,
-			.nblocks = nblocks,
-		}
-	};
-
-	elog(LOG, "getpagev called for rel %u/%u/%u.%u block %u (%u blocks)",
-		 RelFileInfoFmt(rinfo), forkNum, blockno, nblocks);
-
-	/* Fill in the destination buffers in the request */
-	if (nblocks == 1)
-	{
-		if (bounce_needed(buffers[0]))
-		{
-			bounce_buf_used = bounce_buf();
-			request.get_page_v.dest[0].ptr = bounce_buf_used;
-		}
-		else
-			request.get_page_v.dest[0].ptr = buffers[0];
-	}
-	else
-	{
-		for (int i = 0; i < nblocks; i++)
-		{
-			if (bounce_needed(buffers[i]))
-			{
-				/* Split the vector-request into single page requests */
-				for (int j = 0; j < nblocks; j++)
-				{
-					communicator_new_read_at_lsnv(rinfo, forkNum, blockno + j,
-												  &buffers[j], 1);
-				}
-				return;
-			}
-			request.get_page_v.dest[i].ptr = buffers[i];
-		}
-	}
-
-	process_inflight_requests();
-
-retry:
-	request_idx = bcomm_start_get_page_v_request(my_bs, &request, &cached_result);
-	if (request_idx == -1)
-	{
-		bool		completed;
-
-		/*
-		 * LFC hit, but we are responsible for completing the I/O on the local
-		 * file
-		 */
-		if (cache_file == 0)
-			cache_file = PathNameOpenFile(lfc_path, O_RDONLY | PG_BINARY);
-
-		for (int i = 0; i < nblocks; i++)
-		{
-			uint64_t	cached_block = cached_result.cache_block_numbers[i];
-			ssize_t		bytes_total = 0;
-
-			while (bytes_total < BLCKSZ)
-			{
-				ssize_t		nbytes;
-
-				nbytes = FileRead(cache_file, ((char *) buffers[i]) + bytes_total, BLCKSZ - bytes_total, cached_block * BLCKSZ + bytes_total, WAIT_EVENT_NEON_LFC_READ);
-				if (nbytes == -1)
-					ereport(ERROR,
-							(errcode_for_file_access(),
-							 errmsg("could not read block %lu in local cache file: %m",
-									cached_block)));
-				bytes_total += nbytes;
-			}
-		}
-		completed = bcomm_finish_cache_read(my_bs);
-		if (!completed)
-		{
-			elog(DEBUG1, "read from local cache file was superseded by concurrent update");
-			goto retry;
-		}
-		return;
-	}
-
-	wait_request_completion(request_idx, &result);
-	switch (result.tag)
-	{
-		case NeonIOResult_GetPageV:
-			if (bounce_buf_used)
-				memcpy(buffers[0], bounce_buf_used, BLCKSZ);
-			return;
-		case NeonIOResult_Error:
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read block %u in rel %u/%u/%u.%u: %s",
-							blockno, RelFileInfoFmt(rinfo), forkNum, pg_strerror(result.error))));
-			break;
-		default:
-			elog(ERROR, "unexpected result for GetPage operation: %d", result.tag);
-			break;
-	}
-}
-
-/*
- *	neon_nblocks() -- Get the number of blocks stored in a relation.
- */
-BlockNumber
-communicator_new_rel_nblocks(NRelFileInfo rinfo, ForkNumber forkNum)
-{
-	NeonIORequest request = {
-		.tag = NeonIORequest_RelSize,
-		.rel_size = {
-			.spc_oid = NInfoGetSpcOid(rinfo),
-			.db_oid = NInfoGetDbOid(rinfo),
-			.rel_number = NInfoGetRelNumber(rinfo),
-			.fork_number = forkNum,
-		}
-	};
-	NeonIOResult result;
-
-	perform_request(&request, &result);
-	switch (result.tag)
-	{
-		case NeonIOResult_RelSize:
-			return result.rel_size;
-		case NeonIOResult_Error:
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read size of rel %u/%u/%u.%u: %s",
-							RelFileInfoFmt(rinfo), forkNum, pg_strerror(result.error))));
-			break;
-		default:
-			elog(ERROR, "unexpected result for RelSize operation: %d", result.tag);
-			break;
-	}
-}
-
-/*
- *	neon_db_size() -- Get the size of the database in bytes.
- */
-int64
-communicator_new_dbsize(Oid dbNode)
-{
-	NeonIORequest request = {
-		.tag = NeonIORequest_DbSize,
-		.db_size = {
-			.db_oid = dbNode,
-		}
-	};
-	NeonIOResult result;
-
-	perform_request(&request, &result);
-	switch (result.tag)
-	{
-		case NeonIOResult_DbSize:
-			return (int64) result.db_size;
-		case NeonIOResult_Error:
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read database size of database %u: %s",
-							dbNode, pg_strerror(result.error))));
-			break;
-		default:
-			elog(ERROR, "unexpected result for DbSize operation: %d", result.tag);
-			break;
-	}
-}
-
-int
-communicator_new_read_slru_segment(SlruKind kind, int64 segno, void *buffer)
-{
-	/* TODO */
-	elog(ERROR, "not implemented");
-}
-
-/* Write requests */
-void
-communicator_new_write_page(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blockno,
-							const void *buffer, XLogRecPtr lsn)
-{
-	void	   *src = bounce_write_if_needed((void *) buffer);
-	NeonIORequest request = {
-		.tag = NeonIORequest_WritePage,
-		.write_page = {
-			.spc_oid = NInfoGetSpcOid(rinfo),
-			.db_oid = NInfoGetDbOid(rinfo),
-			.rel_number = NInfoGetRelNumber(rinfo),
-			.fork_number = forkNum,
-			.block_number = blockno,
-			.lsn = lsn,
-			.src.ptr = src,
-		}
-	};
-	NeonIOResult result;
-
-	perform_request(&request, &result);
-	switch (result.tag)
-	{
-		case NeonIOResult_WriteOK:
-			return;
-		case NeonIOResult_Error:
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not write block %u in rel %u/%u/%u.%u: %s",
-							blockno, RelFileInfoFmt(rinfo), forkNum, pg_strerror(result.error))));
-			break;
-		default:
-			elog(ERROR, "unexpected result for WritePage operation: %d", result.tag);
-			break;
-	}
-}
-
-void
-communicator_new_rel_extend(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blockno,
-							const void *buffer, XLogRecPtr lsn)
-{
-	void	   *src = bounce_write_if_needed((void *) buffer);
-	NeonIORequest request = {
-		.tag = NeonIORequest_RelExtend,
-		.rel_extend = {
-			.spc_oid = NInfoGetSpcOid(rinfo),
-			.db_oid = NInfoGetDbOid(rinfo),
-			.rel_number = NInfoGetRelNumber(rinfo),
-			.fork_number = forkNum,
-			.block_number = blockno,
-			.lsn = lsn,
-			.src_ptr = (uintptr_t) src,
-			.src_size = BLCKSZ,
-		}
-	};
-	NeonIOResult result;
-
-	perform_request(&request, &result);
-	switch (result.tag)
-	{
-		case NeonIOResult_WriteOK:
-			return;
-		case NeonIOResult_Error:
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not extend to block %u in rel %u/%u/%u.%u: %s",
-							blockno, RelFileInfoFmt(rinfo), forkNum, pg_strerror(result.error))));
-			break;
-		default:
-			elog(ERROR, "unexpected result for Extend operation: %d", result.tag);
-			break;
-	}
-}
-
-void
-communicator_new_rel_zeroextend(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blockno,
-								BlockNumber nblocks, XLogRecPtr lsn)
-{
-	NeonIORequest request = {
-		.tag = NeonIORequest_RelZeroExtend,
-		.rel_zero_extend = {
-			.spc_oid = NInfoGetSpcOid(rinfo),
-			.db_oid = NInfoGetDbOid(rinfo),
-			.rel_number = NInfoGetRelNumber(rinfo),
-			.fork_number = forkNum,
-			.block_number = blockno,
-			.nblocks = nblocks,
-			.lsn = lsn,
-		}
-	};
-	NeonIOResult result;
-
-	perform_request(&request, &result);
-	switch (result.tag)
-	{
-		case NeonIOResult_WriteOK:
-			return;
-		case NeonIOResult_Error:
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not zeroextend to block %u in rel %u/%u/%u.%u: %s",
-							blockno, RelFileInfoFmt(rinfo), forkNum, pg_strerror(result.error))));
-			break;
-		default:
-			elog(ERROR, "unexpected result for ZeroExtend operation: %d", result.tag);
-			break;
-	}
-}
-
-void
-communicator_new_rel_create(NRelFileInfo rinfo, ForkNumber forkNum)
-{
-	NeonIORequest request = {
-		.tag = NeonIORequest_RelCreate,
-		.rel_create = {
-			.spc_oid = NInfoGetSpcOid(rinfo),
-			.db_oid = NInfoGetDbOid(rinfo),
-			.rel_number = NInfoGetRelNumber(rinfo),
-			.fork_number = forkNum,
-		}
-	};
-	NeonIOResult result;
-
-	perform_request(&request, &result);
-	switch (result.tag)
-	{
-		case NeonIOResult_WriteOK:
-			return;
-		case NeonIOResult_Error:
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not create rel %u/%u/%u.%u: %s",
-							RelFileInfoFmt(rinfo), forkNum, pg_strerror(result.error))));
-			break;
-		default:
-			elog(ERROR, "unexpected result for Create operation: %d", result.tag);
-			break;
-	}
-}
-
-void
-communicator_new_rel_truncate(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber nblocks)
-{
-	NeonIORequest request = {
-		.tag = NeonIORequest_RelTruncate,
-		.rel_truncate = {
-			.spc_oid = NInfoGetSpcOid(rinfo),
-			.db_oid = NInfoGetDbOid(rinfo),
-			.rel_number = NInfoGetRelNumber(rinfo),
-			.fork_number = forkNum,
-			.nblocks = nblocks,
-		}
-	};
-	NeonIOResult result;
-
-	perform_request(&request, &result);
-	switch (result.tag)
-	{
-		case NeonIOResult_WriteOK:
-			return;
-		case NeonIOResult_Error:
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not truncate rel %u/%u/%u.%u to %u blocks: %s",
-							RelFileInfoFmt(rinfo), forkNum, nblocks, pg_strerror(result.error))));
-			break;
-		default:
-			elog(ERROR, "unexpected result for Truncate operation: %d", result.tag);
-			break;
-	}
-}
-
-void
-communicator_new_rel_unlink(NRelFileInfo rinfo, ForkNumber forkNum)
-{
-	NeonIORequest request = {
-		.tag = NeonIORequest_RelUnlink,
-		.rel_unlink = {
-			.spc_oid = NInfoGetSpcOid(rinfo),
-			.db_oid = NInfoGetDbOid(rinfo),
-			.rel_number = NInfoGetRelNumber(rinfo),
-			.fork_number = forkNum,
-		}
-	};
-	NeonIOResult result;
-
-	perform_request(&request, &result);
-	switch (result.tag)
-	{
-		case NeonIOResult_WriteOK:
-			return;
-		case NeonIOResult_Error:
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not unlink rel %u/%u/%u.%u: %s",
-							RelFileInfoFmt(rinfo), forkNum, pg_strerror(result.error))));
-			break;
-		default:
-			elog(ERROR, "unexpected result for Unlink operation: %d", result.tag);
-			break;
-	}
-}
-
-/*
- * The worker process can read / write shared buffers directly. But if smgrread() or
- * smgrwrite() is called with a private temporary buffer, we need to copy it to the
- * "bounce buffer", to make it available fro the worker process.
- */
-static bool
-bounce_needed(void *buffer)
-{
-	if ((uintptr_t) buffer >= (uintptr_t) BufferBlocks &&
-		(uintptr_t) buffer < (uintptr_t) BufferBlocks + NBuffers * BLCKSZ)
-	{
-		return false;
-	}
-	return true;
-}
-
-static void *
-bounce_buf(void)
-{
-	return &communicator_shmem_ptr->backends[MyProcNumber].bounce_buffer;
-}
-
-static void *
-bounce_write_if_needed(void *buffer)
-{
-	void	   *p;
-
-	if (!bounce_needed(buffer))
-		return buffer;
-
-	p = bounce_buf();
-	memcpy(p, buffer, BLCKSZ);
-	return p;
-}
--- a/pgxn/neon/communicator_new.h
+++ b/pgxn/neon/communicator_new.h
@@ -1,54 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * communicator_new.h
- *	  new implementation
- *
- *
- * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *-------------------------------------------------------------------------
- */
-#ifndef COMMUNICATOR_NEW_H
-#define COMMUNICATOR_NEW_H
-
-#include "neon_pgversioncompat.h"
-
-#include "storage/buf_internals.h"
-
-#include "pagestore_client.h"
-
-/* initialization at postmaster startup */
-extern void pg_init_communicator_new(void);
-extern void communicator_new_shmem_request(void);
-extern void communicator_new_shmem_startup(void);
-
-/* initialization at backend startup */
-extern void communicator_new_init(void);
-
-/* Read requests */
-extern bool communicator_new_rel_exists(NRelFileInfo rinfo, ForkNumber forkNum);
-extern BlockNumber communicator_new_rel_nblocks(NRelFileInfo rinfo, ForkNumber forknum);
-extern int64 communicator_new_dbsize(Oid dbNode);
-extern void communicator_new_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum,
-										  BlockNumber base_blockno,
-										  void **buffers, BlockNumber nblocks);
-extern void communicator_new_prefetch_register_bufferv(NRelFileInfo rinfo, ForkNumber forkNum,
-													   BlockNumber blockno,
-													   BlockNumber nblocks);
-extern int	communicator_new_read_slru_segment(SlruKind kind, int64 segno,
-											   void *buffer);
-
-/* Write requests, to keep the caches up-to-date */
-extern void communicator_new_write_page(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blockno,
-										const void *buffer, XLogRecPtr lsn);
-extern void communicator_new_rel_extend(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blockno,
-										const void *buffer, XLogRecPtr lsn);
-extern void communicator_new_rel_zeroextend(NRelFileInfo rinfo, ForkNumber forkNum,
-											BlockNumber blockno, BlockNumber nblocks,
-											XLogRecPtr lsn);
-extern void communicator_new_rel_create(NRelFileInfo rinfo, ForkNumber forkNum);
-extern void communicator_new_rel_truncate(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber nblocks);
-extern void communicator_new_rel_unlink(NRelFileInfo rinfo, ForkNumber forkNum);
-
-#endif							/* COMMUNICATOR_NEW_H */
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -98,7 +98,6 @@
 #define MB					((uint64)1024*1024)

 #define SIZE_MB_TO_CHUNKS(size) ((uint32)((size) * MB / BLCKSZ >> lfc_chunk_size_log))
-
 #define BLOCK_TO_CHUNK_OFF(blkno) ((blkno) & (lfc_blocks_per_chunk-1))

 /*
@@ -135,6 +134,15 @@ typedef struct FileCacheEntry
 #define N_COND_VARS 	64
 #define CV_WAIT_TIMEOUT	10

+#define MAX_PREWARM_WORKERS 8
+
+typedef struct PrewarmWorkerState
+{
+	uint32		prewarmed_pages;
+	uint32		skipped_pages;
+	TimestampTz completed;
+} PrewarmWorkerState;
+
 typedef struct FileCacheControl
 {
 	uint64		generation;		/* generation is needed to handle correct hash
@@ -156,25 +164,43 @@ typedef struct FileCacheControl
 	dlist_head  holes;          /* double linked list of punched holes */
 	HyperLogLogState wss_estimation; /* estimation of working set size */
 	ConditionVariable cv[N_COND_VARS]; /* turnstile of condition variables */
+	PrewarmWorkerState prewarm_workers[MAX_PREWARM_WORKERS];
+	size_t n_prewarm_workers;
+	size_t n_prewarm_entries;
+	size_t total_prewarm_pages;
+	size_t prewarm_batch;
+	bool   prewarm_active;
+	bool   prewarm_canceled;
+	dsm_handle prewarm_lfc_state_handle;
 } FileCacheControl;

-bool lfc_store_prefetch_result;
+#define FILE_CACHE_STATE_MAGIC 0xfcfcfcfc
+
+#define FILE_CACHE_STATE_BITMAP(fcs)	((uint8*)&(fcs)->chunks[(fcs)->n_chunks])
+#define FILE_CACHE_STATE_SIZE_FOR_CHUNKS(n_chunks)	(sizeof(FileCacheState) + (n_chunks)*sizeof(BufferTag) + (((n_chunks) * lfc_blocks_per_chunk)+7)/8)
+#define FILE_CACHE_STATE_SIZE(fcs)		(sizeof(FileCacheState) + (fcs->n_chunks)*sizeof(BufferTag) + (((fcs->n_chunks) << fcs->chunk_size_log)+7)/8)

 static HTAB *lfc_hash;
 static int	lfc_desc = -1;
 static LWLockId lfc_lock;
 static int	lfc_max_size;
-int	lfc_size_limit;
+static int	lfc_size_limit;
+static int	lfc_prewarm_limit;
+static int	lfc_prewarm_batch;
 static int	lfc_chunk_size_log = MAX_BLOCKS_PER_CHUNK_LOG;
 static int	lfc_blocks_per_chunk = MAX_BLOCKS_PER_CHUNK;
-char *lfc_path;
+static char *lfc_path;
 static uint64 lfc_generation;
 static FileCacheControl *lfc_ctl;
+static bool lfc_do_prewarm;
 static shmem_startup_hook_type prev_shmem_startup_hook;
 #if PG_VERSION_NUM>=150000
 static shmem_request_hook_type prev_shmem_request_hook;
 #endif

+bool lfc_store_prefetch_result;
+bool lfc_prewarm_update_ws_estimation;
+
 #define LFC_ENABLED() (lfc_ctl->limit != 0)

 /*
@@ -500,6 +526,17 @@ lfc_init(void)
 							NULL,
 							NULL);

+	DefineCustomBoolVariable("neon.prewarm_update_ws_estimation",
+							"Consider prewarmed pages for working set estimation",
+							NULL,
+							&lfc_prewarm_update_ws_estimation,
+							true,
+							PGC_SUSET,
+							0,
+							NULL,
+							NULL,
+							NULL);
+
 	DefineCustomIntVariable("neon.max_file_cache_size",
 							"Maximal size of Neon local file cache",
 							NULL,
@@ -550,6 +587,32 @@ lfc_init(void)
 							lfc_change_chunk_size,
 							NULL);

+	DefineCustomIntVariable("neon.file_cache_prewarm_limit",
+							"Maximal number of prewarmed chunks",
+							NULL,
+							&lfc_prewarm_limit,
+							INT_MAX,	/* no limit by default */
+							0,
+							INT_MAX,
+							PGC_SIGHUP,
+							0,
+							NULL,
+							NULL,
+							NULL);
+
+	DefineCustomIntVariable("neon.file_cache_prewarm_batch",
+							"Number of pages retrivied by prewarm from page server",
+							NULL,
+							&lfc_prewarm_batch,
+							64,
+							1,
+							INT_MAX,
+							PGC_SIGHUP,
+							0,
+							NULL,
+							NULL,
+							NULL);
+
 	if (lfc_max_size == 0)
 		return;

@@ -563,6 +626,317 @@ lfc_init(void)
 #endif
 }

+FileCacheState*
+lfc_get_state(size_t max_entries)
+{
+	FileCacheState* fcs = NULL;
+
+	if (lfc_maybe_disabled() || max_entries == 0)	/* fast exit if file cache is disabled */
+		return NULL;
+
+	LWLockAcquire(lfc_lock, LW_SHARED);
+
+	if (LFC_ENABLED())
+	{
+		dlist_iter iter;
+		size_t i = 0;
+		uint8* bitmap;
+		size_t n_pages = 0;
+		size_t n_entries = Min(max_entries, lfc_ctl->used - lfc_ctl->pinned);
+		size_t state_size = FILE_CACHE_STATE_SIZE_FOR_CHUNKS(n_entries);
+		fcs = (FileCacheState*)palloc0(state_size);
+		SET_VARSIZE(fcs, state_size);
+		fcs->magic = FILE_CACHE_STATE_MAGIC;
+		fcs->chunk_size_log = lfc_chunk_size_log;
+		fcs->n_chunks = n_entries;
+		bitmap = FILE_CACHE_STATE_BITMAP(fcs);
+
+		dlist_reverse_foreach(iter, &lfc_ctl->lru)
+		{
+			FileCacheEntry *entry = dlist_container(FileCacheEntry, list_node, iter.cur);
+			fcs->chunks[i] = entry->key;
+			for (int j = 0; j < lfc_blocks_per_chunk; j++)
+			{
+				if (GET_STATE(entry, j) != UNAVAILABLE)
+				{
+					BITMAP_SET(bitmap, i*lfc_blocks_per_chunk + j);
+					n_pages += 1;
+				}
+			}
+			if (++i == n_entries)
+				break;
+		}
+		Assert(i == n_entries);
+		fcs->n_pages = n_pages;
+		Assert(pg_popcount((char*)bitmap, ((n_entries << lfc_chunk_size_log) + 7)/8) == n_pages);
+		elog(LOG, "LFC: save state of %d chunks %d pages", (int)n_entries, (int)n_pages);
+	}
+
+	LWLockRelease(lfc_lock);
+
+	return fcs;
+}
+
+/*
+ * Prewarm LFC cache to the specified state. It uses lfc_prefetch function to load prewarmed page without hoilding shared buffer lock
+ * and avoid race conditions with other backends.
+ */
+void
+lfc_prewarm(FileCacheState* fcs, uint32 n_workers)
+{
+	size_t fcs_chunk_size_log;
+	size_t n_entries;
+	size_t prewarm_batch = Min(lfc_prewarm_batch, readahead_buffer_size);
+	size_t fcs_size;
+	dsm_segment *seg;
+	BackgroundWorkerHandle* bgw_handle[MAX_PREWARM_WORKERS];
+
+
+	if (!lfc_ensure_opened())
+		return;
+
+	if (prewarm_batch == 0 || lfc_prewarm_limit == 0 || n_workers == 0)
+	{
+		elog(LOG, "LFC: prewarm is disabled");
+		return;
+	}
+
+	if (n_workers > MAX_PREWARM_WORKERS)
+	{
+		elog(ERROR, "LFC: Too much prewarm workers, maximum is %d", MAX_PREWARM_WORKERS);
+	}
+
+	if (fcs == NULL || fcs->n_chunks == 0)
+	{
+		elog(LOG, "LFC: nothing to prewarm");
+		return;
+	}
+
+	if (fcs->magic != FILE_CACHE_STATE_MAGIC)
+	{
+		elog(ERROR, "LFC: Invalid file cache state magic: %X", fcs->magic);
+	}
+
+	fcs_size = VARSIZE(fcs);
+	if (FILE_CACHE_STATE_SIZE(fcs) != fcs_size)
+	{
+		elog(ERROR, "LFC: Invalid file cache state size: %u vs. %u", (unsigned)FILE_CACHE_STATE_SIZE(fcs), VARSIZE(fcs));
+	}
+
+	fcs_chunk_size_log = fcs->chunk_size_log;
+	if (fcs_chunk_size_log > MAX_BLOCKS_PER_CHUNK_LOG)
+	{
+		elog(ERROR, "LFC: Invalid chunk size log: %u", fcs->chunk_size_log);
+	}
+
+	n_entries = Min(fcs->n_chunks, lfc_prewarm_limit);
+	Assert(n_entries != 0);
+
+	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+
+	/* Do not prewarm more entries than LFC limit */
+	if (lfc_ctl->limit <= lfc_ctl->size)
+	{
+		elog(LOG, "LFC: skip prewarm because LFC is already filled");
+		LWLockRelease(lfc_lock);
+		return;
+	}
+
+	if (lfc_ctl->prewarm_active)
+	{
+		LWLockRelease(lfc_lock);
+		elog(ERROR, "LFC: skip prewarm because another prewarm is still active");
+	}
+	lfc_ctl->n_prewarm_entries = n_entries;
+	lfc_ctl->n_prewarm_workers = n_workers;
+	lfc_ctl->prewarm_active = true;
+	lfc_ctl->prewarm_canceled = false;
+	lfc_ctl->prewarm_batch = prewarm_batch;
+	memset(lfc_ctl->prewarm_workers, 0, n_workers*sizeof(PrewarmWorkerState));
+
+	LWLockRelease(lfc_lock);
+
+	/* Calculate total number of pages to be prewarmed */
+	lfc_ctl->total_prewarm_pages = fcs->n_pages;
+
+	seg = dsm_create(fcs_size, 0);
+	memcpy(dsm_segment_address(seg), fcs, fcs_size);
+	lfc_ctl->prewarm_lfc_state_handle = dsm_segment_handle(seg);
+
+	/* Spawn background workers */
+	for (uint32 i = 0; i < n_workers; i++)
+	{
+		BackgroundWorker worker = {0};
+
+		worker.bgw_flags = BGWORKER_SHMEM_ACCESS;
+		worker.bgw_start_time = BgWorkerStart_ConsistentState;
+		worker.bgw_restart_time = BGW_NEVER_RESTART;
+		strcpy(worker.bgw_library_name, "neon");
+		strcpy(worker.bgw_function_name, "lfc_prewarm_main");
+		snprintf(worker.bgw_name, BGW_MAXLEN, "LFC prewarm worker %d", i+1);
+		strcpy(worker.bgw_type, "LFC prewarm worker");
+		worker.bgw_main_arg = Int32GetDatum(i);
+		/* must set notify PID to wait for shutdown */
+		worker.bgw_notify_pid = MyProcPid;
+
+		if (!RegisterDynamicBackgroundWorker(&worker, &bgw_handle[i]))
+		{
+			ereport(LOG,
+					(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+					 errmsg("LFC: registering dynamic bgworker prewarm failed"),
+					 errhint("Consider increasing the configuration parameter \"%s\".", "max_worker_processes")));
+			n_workers = i;
+			lfc_ctl->prewarm_canceled = true;
+			break;
+		}
+	}
+
+	for (uint32 i = 0; i < n_workers; i++)
+	{
+		bool interrupted;
+		do
+		{
+			interrupted = false;
+			PG_TRY();
+			{
+				BgwHandleStatus status = WaitForBackgroundWorkerShutdown(bgw_handle[i]);
+				if (status != BGWH_STOPPED && status != BGWH_POSTMASTER_DIED)
+				{
+					elog(LOG, "LFC: Unexpected status of prewarm worker termination: %d", status);
+				}
+			}
+			PG_CATCH();
+			{
+				elog(LOG, "LFC: cancel prewarm");
+				lfc_ctl->prewarm_canceled = true;
+				interrupted = true;
+			}
+			PG_END_TRY();
+		} while (interrupted);
+
+		if (!lfc_ctl->prewarm_workers[i].completed)
+		{
+			/* Background worker doesn't set completion time: it means that it was abnormally terminated */
+			elog(LOG, "LFC: prewarm worker %d failed", i+1);
+			/* Set completion time to prevent get_prewarm_info from considering this worker as active */
+			lfc_ctl->prewarm_workers[i].completed = GetCurrentTimestamp();
+		}
+	}
+	dsm_detach(seg);
+
+	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+	lfc_ctl->prewarm_active = false;
+	LWLockRelease(lfc_lock);
+}
+
+void
+lfc_prewarm_main(Datum main_arg)
+{
+	size_t snd_idx = 0, rcv_idx = 0;
+	size_t n_sent = 0, n_received = 0;
+	size_t fcs_chunk_size_log;
+	size_t max_prefetch_pages;
+	size_t prewarm_batch;
+	size_t n_workers;
+	dsm_segment *seg;
+	FileCacheState* fcs;
+	uint8* bitmap;
+	BufferTag tag;
+	PrewarmWorkerState* ws;
+	uint32 worker_id = DatumGetInt32(main_arg);
+
+	pqsignal(SIGTERM, die);
+	BackgroundWorkerUnblockSignals();
+
+	seg = dsm_attach(lfc_ctl->prewarm_lfc_state_handle);
+	if (seg == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("could not map dynamic shared memory segment")));
+
+	fcs = (FileCacheState*) dsm_segment_address(seg);
+	prewarm_batch = lfc_ctl->prewarm_batch;
+	fcs_chunk_size_log = fcs->chunk_size_log;
+	n_workers = lfc_ctl->n_prewarm_workers;
+	max_prefetch_pages = lfc_ctl->n_prewarm_entries << fcs_chunk_size_log;
+	ws = &lfc_ctl->prewarm_workers[worker_id];
+	bitmap = FILE_CACHE_STATE_BITMAP(fcs);
+
+	/* enable prefetch in LFC */
+	lfc_store_prefetch_result = true;
+	lfc_do_prewarm = true; /* Flag for lfc_prefetch preventing replacement of existed entries if LFC cache is full */
+
+	elog(LOG, "LFC: worker %d start prewarming", worker_id);
+	while (!lfc_ctl->prewarm_canceled)
+	{
+		if (snd_idx < max_prefetch_pages)
+		{
+			if ((snd_idx >> fcs_chunk_size_log) % n_workers != worker_id)
+			{
+				/* If there are multiple workers, split chunks between them */
+				snd_idx += 1 << fcs_chunk_size_log;
+			}
+			else
+			{
+				if (BITMAP_ISSET(bitmap, snd_idx))
+				{
+					tag = fcs->chunks[snd_idx >> fcs_chunk_size_log];
+					tag.blockNum += snd_idx & ((1 << fcs_chunk_size_log) - 1);
+					if (!lfc_cache_contains(BufTagGetNRelFileInfo(tag), tag.forkNum, tag.blockNum))
+					{
+						(void)communicator_prefetch_register_bufferv(tag, NULL, 1, NULL);
+						n_sent += 1;
+					}
+					else
+					{
+						ws->skipped_pages += 1;
+						BITMAP_CLR(bitmap, snd_idx);
+					}
+				}
+				snd_idx += 1;
+			}
+		}
+		if (n_sent >= n_received + prewarm_batch || snd_idx == max_prefetch_pages)
+		{
+			if (n_received == n_sent && snd_idx == max_prefetch_pages)
+			{
+				break;
+			}
+			if ((rcv_idx >> fcs_chunk_size_log) % n_workers != worker_id)
+			{
+				/* Skip chunks processed by other workers */
+				rcv_idx += 1 << fcs_chunk_size_log;
+				continue;
+			}
+
+			/* Locate next block to prefetch */
+			while (!BITMAP_ISSET(bitmap, rcv_idx))
+			{
+				rcv_idx += 1;
+			}
+			tag = fcs->chunks[rcv_idx >> fcs_chunk_size_log];
+			tag.blockNum += rcv_idx & ((1 << fcs_chunk_size_log) - 1);
+			if (communicator_prefetch_receive(tag))
+			{
+				ws->prewarmed_pages += 1;
+			}
+			else
+			{
+				ws->skipped_pages += 1;
+			}
+			rcv_idx += 1;
+			n_received += 1;
+		}
+	}
+	/* No need to perform prefetch cleanup here because prewarm worker will be terminated and
+	 * connection to PS dropped just after return from this function.
+	 */
+	Assert(n_sent == n_received || lfc_ctl->prewarm_canceled);
+	elog(LOG, "LFC: worker %d complete prewarming: loaded %ld pages", worker_id, (long)n_received);
+	lfc_ctl->prewarm_workers[worker_id].completed = GetCurrentTimestamp();
+}
+
+
 /*
 * Check if page is present in the cache.
 * Returns true if page is found in local cache.
@@ -1001,8 +1375,11 @@ lfc_init_new_entry(FileCacheEntry* entry, uint32 hash)
 	 * If we can't (e.g. because all other slots are being accessed)
 	 * then we will remove this entry from the hash and continue
 	 * on to the next chunk, as we may not exceed the limit.
+	 *
+	 * While prewarming LFC we do not want to replace existed entries,
+	 * so we just stop prewarm is LFC cache is full.
 	 */
-	else if (!dlist_is_empty(&lfc_ctl->lru))
+	else if (!dlist_is_empty(&lfc_ctl->lru) && !lfc_do_prewarm)
 	{
 		/* Cache overflow: evict least recently used chunk */
 		FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node,
@@ -1026,6 +1403,7 @@ lfc_init_new_entry(FileCacheEntry* entry, uint32 hash)
 		/* Can't add this chunk - we don't have the space for it */
 		hash_search_with_hash_value(lfc_hash, &entry->key, hash,
 									HASH_REMOVE, NULL);
+		lfc_ctl->prewarm_canceled = true; /* cancel prewarm if LFC limit is reached */
 		return false;
 	}

@@ -1112,9 +1490,11 @@ lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,

 	entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_ENTER, &found);

-	tag.blockNum = blkno;
-	addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
-
+	if (lfc_prewarm_update_ws_estimation)
+	{
+		tag.blockNum = blkno;
+		addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
+	}
 	if (found)
 	{
 		state = GET_STATE(entry, chunk_offs);
@@ -1748,3 +2128,82 @@ approximate_working_set_size(PG_FUNCTION_ARGS)
 	}
 	PG_RETURN_NULL();
 }
+
+PG_FUNCTION_INFO_V1(get_local_cache_state);
+
+Datum
+get_local_cache_state(PG_FUNCTION_ARGS)
+{
+	size_t max_entries = PG_ARGISNULL(0) ? lfc_prewarm_limit : PG_GETARG_INT32(0);
+	FileCacheState* fcs = lfc_get_state(max_entries);
+	if (fcs != NULL)
+		PG_RETURN_BYTEA_P((bytea*)fcs);
+	else
+		PG_RETURN_NULL();
+}
+
+PG_FUNCTION_INFO_V1(prewarm_local_cache);
+
+Datum
+prewarm_local_cache(PG_FUNCTION_ARGS)
+{
+	bytea* state = PG_GETARG_BYTEA_PP(0);
+	uint32 n_workers =  PG_GETARG_INT32(1);
+	FileCacheState* fcs = (FileCacheState*)state;
+
+	lfc_prewarm(fcs, n_workers);
+
+	PG_RETURN_NULL();
+}
+
+PG_FUNCTION_INFO_V1(get_prewarm_info);
+
+Datum
+get_prewarm_info(PG_FUNCTION_ARGS)
+{
+	Datum		values[4];
+	bool		nulls[4];
+	TupleDesc	tupdesc;
+	uint32 prewarmed_pages = 0;
+	uint32 skipped_pages = 0;
+	uint32 active_workers = 0;
+	uint32 total_pages;
+	size_t n_workers;
+
+	if (lfc_size_limit == 0)
+		PG_RETURN_NULL();
+
+	LWLockAcquire(lfc_lock, LW_SHARED);
+	if (!lfc_ctl || lfc_ctl->n_prewarm_workers == 0)
+	{
+		LWLockRelease(lfc_lock);
+		PG_RETURN_NULL();
+	}
+	n_workers = lfc_ctl->n_prewarm_workers;
+	total_pages = lfc_ctl->total_prewarm_pages;
+	for (size_t i = 0; i < n_workers; i++)
+	{
+		PrewarmWorkerState* ws = &lfc_ctl->prewarm_workers[i];
+		prewarmed_pages += ws->prewarmed_pages;
+		skipped_pages += ws->skipped_pages;
+		active_workers += ws->completed != 0;
+	}
+	LWLockRelease(lfc_lock);
+
+	tupdesc = CreateTemplateTupleDesc(4);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "total_pages", INT4OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "prewarmed_pages", INT4OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 3, "skipped_pages", INT4OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 4, "active_workers", INT4OID, -1, 0);
+	tupdesc = BlessTupleDesc(tupdesc);
+
+	MemSet(nulls, 0, sizeof(nulls));
+
+	values[0] = Int32GetDatum(total_pages);
+	values[1] = Int32GetDatum(prewarmed_pages);
+	values[2] = Int32GetDatum(skipped_pages);
+	values[3] = Int32GetDatum(active_workers);
+
+	PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
+}
+
--- a/pgxn/neon/file_cache.h
+++ b/pgxn/neon/file_cache.h
@@ -13,10 +13,19 @@

 #include "neon_pgversioncompat.h"

+typedef struct FileCacheState
+{
+	int32		vl_len_;		/* varlena header (do not touch directly!) */
+	uint32		magic;
+	uint32		n_chunks;
+	uint32		n_pages;
+	uint16		chunk_size_log;
+	BufferTag	chunks[FLEXIBLE_ARRAY_MEMBER];
+	/* followed by bitmap */
+} FileCacheState;
+
 /* GUCs */
 extern bool lfc_store_prefetch_result;
-extern int	lfc_size_limit;
-extern char *lfc_path;

 /* functions for local file cache */
 extern void lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum,
@@ -34,7 +43,10 @@ extern int lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum,
 extern void lfc_init(void);
 extern bool lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
 						 const void* buffer, XLogRecPtr lsn);
+extern FileCacheState* lfc_get_state(size_t max_entries);
+extern void lfc_prewarm(FileCacheState* fcs, uint32 n_workers);

+PGDLLEXPORT void lfc_prewarm_main(Datum main_arg);

 static inline bool
 lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -26,6 +26,7 @@
 #include "portability/instr_time.h"
 #include "postmaster/interrupt.h"
 #include "storage/buf_internals.h"
+#include "storage/fd.h"
 #include "storage/ipc.h"
 #include "storage/lwlock.h"
 #include "storage/pg_shmem.h"
@@ -79,6 +80,7 @@ int         neon_protocol_version = 3;
 static int	neon_compute_mode = 0;
 static int	max_reconnect_attempts = 60;
 static int	stripe_size;
+static int	max_sockets;

 static int pageserver_response_log_timeout = 10000;
 /* 2.5 minutes. A bit higher than highest default TCP retransmission timeout */
@@ -279,55 +281,6 @@ AssignPageserverConnstring(const char *newval, void *extra)
 	}
 }

-/* Return a copy of the whole shard map from shared memory */
-void
-get_shard_map(char ***connstrs_p, shardno_t *num_shards_p)
-{
-	uint64		begin_update_counter;
-	uint64		end_update_counter;
-	ShardMap   *shard_map = &pagestore_shared->shard_map;
-	shardno_t	num_shards;
-	char	   *buf;
-	char	  **connstrs;
-
-	buf = palloc(MAX_SHARDS*MAX_PAGESERVER_CONNSTRING_SIZE);
-	connstrs = palloc(sizeof(char *) * MAX_SHARDS);
-
-	/*
-	 * Postmaster can update the shared memory values concurrently, in which
-	 * case we would copy a garbled mix of the old and new values. We will
-	 * detect it because the counter's won't match, and retry. But it's
-	 * important that we don't do anything within the retry-loop that would
-	 * depend on the string having valid contents.
-	 */
-	do
-	{
-		char		*p;
-
-		begin_update_counter = pg_atomic_read_u64(&pagestore_shared->begin_update_counter);
-		end_update_counter = pg_atomic_read_u64(&pagestore_shared->end_update_counter);
-
-		num_shards = shard_map->num_shards;
-
-		p = buf;
-		for (int i = 0; i < Min(num_shards, MAX_SHARDS); i++)
-		{
-			strlcpy(p, shard_map->connstring[i], MAX_PAGESERVER_CONNSTRING_SIZE);
-			connstrs[i] = p;
-			elog(LOG, "XX: connstrs[%d]: %p", i, p);
-			p += MAX_PAGESERVER_CONNSTRING_SIZE;
-		}
-
-		pg_memory_barrier();
-	}
-	while (begin_update_counter != end_update_counter
-		   || begin_update_counter != pg_atomic_read_u64(&pagestore_shared->begin_update_counter)
-		   || end_update_counter != pg_atomic_read_u64(&pagestore_shared->end_update_counter));
-
-	*connstrs_p = connstrs;
-	*num_shards_p = num_shards;
-}
-
 /*
 * Get the current number of shards, and/or the connection string for a
 * particular shard from the shard map in shared memory.
@@ -385,6 +338,13 @@ load_shard_map(shardno_t shard_no, char *connstr_p, shardno_t *num_shards_p)
 				pageserver_disconnect(i);
 		}
 		pagestore_local_counter = end_update_counter;
+
+        /* Reserve file descriptors for sockets */
+		while (max_sockets < num_shards)
+		{
+			max_sockets += 1;
+			ReserveExternalFD();
+		}
 	}

 	if (num_shards_p)
@@ -785,8 +745,8 @@ pageserver_connect(shardno_t shard_no, int elevel)
 	default:
 		neon_shard_log(shard_no, ERROR, "libpagestore: invalid connection state %d", shard->state);
 	}
-	/* This shouldn't be hit */
-	Assert(false);
+
+	pg_unreachable();
 }

 static void
@@ -926,6 +886,7 @@ retry:
 			int			port;
 			int			sndbuf;
 			int			recvbuf;
+			uint64*		max_wait;

 			get_local_port(PQsocket(pageserver_conn), &port);
 			get_socket_stats(PQsocket(pageserver_conn), &sndbuf, &recvbuf);
@@ -936,7 +897,10 @@ retry:
 						   shard->nrequests_sent, shard->nresponses_received, port, sndbuf, recvbuf,
 				           pageserver_conn->inStart, pageserver_conn->inEnd);
 			shard->receive_last_log_time = now;
+			MyNeonCounters->compute_getpage_stuck_requests_total += !shard->receive_logged;
 			shard->receive_logged = true;
+			max_wait = &MyNeonCounters->compute_getpage_max_inflight_stuck_time_ms;
+			*max_wait = Max(*max_wait, INSTR_TIME_GET_MILLISEC(since_start));
 		}

 		/*
@@ -959,6 +923,7 @@ retry:
 			get_local_port(PQsocket(pageserver_conn), &port);
 			neon_shard_log(shard_no, LOG, "no response from pageserver for %0.3f s, disconnecting (socket port=%d)",
 					   INSTR_TIME_GET_DOUBLE(since_start), port);
+			MyNeonCounters->compute_getpage_max_inflight_stuck_time_ms = 0;
 			pageserver_disconnect(shard_no);
 			return -1;
 		}
@@ -982,6 +947,7 @@ retry:
 	INSTR_TIME_SET_ZERO(shard->receive_start_time);
 	INSTR_TIME_SET_ZERO(shard->receive_last_log_time);
 	shard->receive_logged = false;
+	MyNeonCounters->compute_getpage_max_inflight_stuck_time_ms = 0;

 	return ret;
 }
--- a/pgxn/neon/neon--1.5--1.6.sql
+++ b/pgxn/neon/neon--1.5--1.6.sql
@@ -0,0 +1,22 @@
+\echo Use "ALTER EXTENSION neon UPDATE TO '1.6'" to load this file. \quit
+
+CREATE FUNCTION get_prewarm_info(out total_pages integer, out prewarmed_pages integer, out skipped_pages integer, out active_workers integer)
+RETURNS record
+AS 'MODULE_PATHNAME', 'get_prewarm_info'
+LANGUAGE C STRICT
+PARALLEL SAFE;
+
+CREATE FUNCTION get_local_cache_state(max_chunks integer default null)
+RETURNS bytea
+AS 'MODULE_PATHNAME', 'get_local_cache_state'
+LANGUAGE C
+PARALLEL UNSAFE;
+
+CREATE FUNCTION prewarm_local_cache(state bytea, n_workers integer default 1)
+RETURNS void
+AS 'MODULE_PATHNAME', 'prewarm_local_cache'
+LANGUAGE C STRICT
+PARALLEL UNSAFE;
+
+
+
--- a/pgxn/neon/neon--1.6--1.5.sql
+++ b/pgxn/neon/neon--1.6--1.5.sql
@@ -0,0 +1,7 @@
+DROP FUNCTION IF EXISTS get_prewarm_info(out total_pages integer, out prewarmed_pages integer, out skipped_pages integer, out active_workers integer);
+
+DROP FUNCTION IF EXISTS get_local_cache_state(max_chunks integer);
+
+DROP FUNCTION IF EXISTS prewarm_local_cache(state bytea, n_workers integer default 1);
+
+
--- a/pgxn/neon/neon.c
+++ b/pgxn/neon/neon.c
@@ -20,7 +20,6 @@
 #include "replication/logicallauncher.h"
 #include "replication/slot.h"
 #include "replication/walsender.h"
-#include "storage/ipc.h"
 #include "storage/proc.h"
 #include "funcapi.h"
 #include "access/htup_details.h"
@@ -30,7 +29,6 @@
 #include "utils/guc_tables.h"

 #include "communicator.h"
-#include "communicator_new.h"
 #include "extension_server.h"
 #include "file_cache.h"
 #include "neon.h"
@@ -47,17 +45,13 @@ PG_MODULE_MAGIC;
 void		_PG_init(void);


-bool neon_enable_new_communicator;
 static int  running_xacts_overflow_policy;

+#if PG_MAJORVERSION_NUM >= 16
 static shmem_startup_hook_type prev_shmem_startup_hook;
-#if PG_VERSION_NUM>=150000
-static shmem_request_hook_type prev_shmem_request_hook;
-#endif

-static void neon_shmem_request(void);
 static void neon_shmem_startup_hook(void);
-
+#endif
 #if PG_MAJORVERSION_NUM >= 17
 uint32		WAIT_EVENT_NEON_LFC_MAINTENANCE;
 uint32		WAIT_EVENT_NEON_LFC_READ;
@@ -436,36 +430,17 @@ _PG_init(void)
 	 */
 #if PG_VERSION_NUM >= 160000
 	load_file("$libdir/neon_rmgr", false);
-#endif

 	prev_shmem_startup_hook = shmem_startup_hook;
 	shmem_startup_hook = neon_shmem_startup_hook;
-#if PG_VERSION_NUM>=150000
-	prev_shmem_request_hook = shmem_request_hook;
-	shmem_request_hook = neon_shmem_request;
-#else
-	neon_shmem_request();
 #endif

-	DefineCustomBoolVariable(
-							"neon.enable_new_communicator",
-							"Enables new communicator implementation",
-							NULL,
-							&neon_enable_new_communicator,
-							true,
-							PGC_POSTMASTER,
-							0,
-							NULL, NULL, NULL);
-
 	pg_init_libpagestore();
 	lfc_init();
 	pg_init_walproposer();
 	init_lwlsncache();

 	pg_init_communicator();
-	if (neon_enable_new_communicator)
-		pg_init_communicator_new();
-
 	Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines;

 	InitUnstableExtensionsSupport();
@@ -584,17 +559,7 @@ backpressure_throttling_time(PG_FUNCTION_ARGS)
 	PG_RETURN_UINT64(BackpressureThrottlingTime());
 }

-static void
-neon_shmem_request(void)
-{
-#if PG_VERSION_NUM>=150000
-	if (prev_shmem_request_hook)
-		prev_shmem_request_hook();
-#endif
-
-	communicator_new_shmem_request();
-}
-
+#if PG_MAJORVERSION_NUM >= 16
 static void
 neon_shmem_startup_hook(void)
 {
@@ -614,6 +579,5 @@ neon_shmem_startup_hook(void)
 	WAIT_EVENT_NEON_PS_READ = WaitEventExtensionNew("Neon/PS_ReadIO");
 	WAIT_EVENT_NEON_WAL_DL = WaitEventExtensionNew("Neon/WAL_Download");
 #endif
-
-	communicator_new_shmem_startup();
 }
+#endif
--- a/pgxn/neon/neon.h
+++ b/pgxn/neon/neon.h
@@ -13,7 +13,6 @@
 #include "utils/wait_event.h"

 /* GUCs */
-extern bool neon_enable_new_communicator;
 extern char *neon_auth_token;
 extern char *neon_timeline;
 extern char *neon_tenant;
--- a/pgxn/neon/neon_lwlsncache.c
+++ b/pgxn/neon/neon_lwlsncache.c
@@ -4,6 +4,7 @@

 #include "miscadmin.h"
 #include "access/xlog.h"
+#include "access/xlog_internal.h"
 #include "storage/ipc.h"
 #include "storage/shmem.h"
 #include "storage/buf_internals.h"
@@ -396,9 +397,10 @@ SetLastWrittenLSNForBlockRangeInternal(XLogRecPtr lsn,
 XLogRecPtr
 neon_set_lwlsn_block_range(XLogRecPtr lsn, NRelFileInfo rlocator, ForkNumber forknum, BlockNumber from, BlockNumber n_blocks)
 {
-	if (lsn < FirstNormalUnloggedLSN || n_blocks == 0 || LwLsnCache->lastWrittenLsnCacheSize == 0)
+	if (lsn == InvalidXLogRecPtr || n_blocks == 0 || LwLsnCache->lastWrittenLsnCacheSize == 0)
 		return lsn;

+	Assert(lsn >= WalSegMinSize);
 	LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);
 	lsn = SetLastWrittenLSNForBlockRangeInternal(lsn, rlocator, forknum, from, n_blocks);
 	LWLockRelease(LastWrittenLsnLock);
@@ -435,7 +437,6 @@ neon_set_lwlsn_block_v(const XLogRecPtr *lsns, NRelFileInfo relfilenode,
 		NInfoGetRelNumber(relfilenode) == InvalidOid)
 		return InvalidXLogRecPtr;

-	
 	BufTagInit(key,  relNumber, forknum, blockno, spcOid, dbOid);

 	LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);
@@ -444,6 +445,10 @@ neon_set_lwlsn_block_v(const XLogRecPtr *lsns, NRelFileInfo relfilenode,
 	{
 		XLogRecPtr	lsn = lsns[i];

+		if (lsn == InvalidXLogRecPtr)
+			continue;
+
+		Assert(lsn >= WalSegMinSize);
 		key.blockNum = blockno + i;
 		entry = hash_search(lastWrittenLsnCache, &key, HASH_ENTER, &found);
 		if (found)
--- a/Show More
+++ b/Show More