follow up comments

2026-05-17 05:00:38 +00:00 · 2025-05-22 15:17:04 +02:00
272 changed files with 5212 additions and 22945 deletions
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -314,8 +314,7 @@ jobs:
          test_selection: performance
          run_in_parallel: false
          save_perf_report: ${{ github.ref_name == 'main' }}
-          # test_pageserver_max_throughput_getpage_at_latest_lsn is run in separate workflow periodic_pagebench.yml because it needs snapshots
-          extra_params: --splits 5 --group ${{ matrix.pytest_split_group }} --ignore=test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py
+          extra_params: --splits 5 --group ${{ matrix.pytest_split_group }}
          benchmark_durations: ${{ needs.get-benchmarks-durations.outputs.json }}
          pg_version: v16
          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
--- a/.github/workflows/periodic_pagebench.yml
+++ b/.github/workflows/periodic_pagebench.yml
@@ -1,4 +1,4 @@
-name: Periodic pagebench performance test on unit-perf hetzner runner
+name: Periodic pagebench performance test on dedicated EC2 machine in eu-central-1 region

 on:
  schedule:
@@ -8,7 +8,7 @@ on:
    #        │   │ ┌───────────── day of the month (1 - 31)
    #        │   │ │ ┌───────────── month (1 - 12 or JAN-DEC)
    #        │   │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
-    - cron: '0 */4 * * *' # Runs every 4 hours
+    - cron: '0 */3 * * *' # Runs every 3 hours
  workflow_dispatch: # Allows manual triggering of the workflow
    inputs:
      commit_hash:
@@ -16,11 +16,6 @@ on:
        description: 'The long neon repo commit hash for the system under test (pageserver) to be tested.'
        required: false
        default: ''
-      recreate_snapshots:
-        type: boolean
-        description: 'Recreate snapshots - !!!WARNING!!! We should only recreate snapshots if the previous ones are no longer compatible. Otherwise benchmarking results are not comparable across runs.'
-        required: false
-        default: false

 defaults:
  run:
@@ -34,13 +29,13 @@ permissions:
  contents: read

 jobs:
-  run_periodic_pagebench_test:
+  trigger_bench_on_ec2_machine_in_eu_central_1:
    permissions:
      id-token: write # aws-actions/configure-aws-credentials
      statuses: write
      contents: write
      pull-requests: write
-    runs-on: [ self-hosted, unit-perf ]
+    runs-on: [ self-hosted, small ]
    container:
      image: ghcr.io/neondatabase/build-tools:pinned-bookworm
      credentials:
@@ -49,13 +44,10 @@ jobs:
      options: --init
    timeout-minutes: 360  # Set the timeout to 6 hours
    env:
+      API_KEY: ${{ secrets.PERIODIC_PAGEBENCH_EC2_RUNNER_API_KEY }}
      RUN_ID: ${{ github.run_id }}
-      DEFAULT_PG_VERSION: 16
-      BUILD_TYPE: release
-      RUST_BACKTRACE: 1
-      # NEON_ENV_BUILDER_USE_OVERLAYFS_FOR_SNAPSHOTS: 1 - doesn't work without root in container
-      S3_BUCKET: neon-github-public-dev
-      PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
+      AWS_DEFAULT_REGION : "eu-central-1"
+      AWS_INSTANCE_ID : "i-02a59a3bf86bc7e74"
    steps:
    # we don't need the neon source code because we run everything remotely
    # however we still need the local github actions to run the allure step below
@@ -64,194 +56,99 @@ jobs:
      with:
        egress-policy: audit

-    - name: Set up the environment which depends on $RUNNER_TEMP on nvme drive
-      id: set-env
-      shell: bash -euxo pipefail {0}
-      run: |
-        {
-          echo "NEON_DIR=${RUNNER_TEMP}/neon"
-          echo "NEON_BIN=${RUNNER_TEMP}/neon/bin"
-          echo "POSTGRES_DISTRIB_DIR=${RUNNER_TEMP}/neon/pg_install"
-          echo "LD_LIBRARY_PATH=${RUNNER_TEMP}/neon/pg_install/v${DEFAULT_PG_VERSION}/lib"
-          echo "BACKUP_DIR=${RUNNER_TEMP}/instance_store/saved_snapshots"
-          echo "TEST_OUTPUT=${RUNNER_TEMP}/neon/test_output"
-          echo "PERF_REPORT_DIR=${RUNNER_TEMP}/neon/test_output/perf-report-local"
-          echo "ALLURE_DIR=${RUNNER_TEMP}/neon/test_output/allure-results"
-          echo "ALLURE_RESULTS_DIR=${RUNNER_TEMP}/neon/test_output/allure-results/results"
-        } >> "$GITHUB_ENV"
+    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

-        echo "allure_results_dir=${RUNNER_TEMP}/neon/test_output/allure-results/results" >> "$GITHUB_OUTPUT"
+    - name: Show my own (github runner) external IP address - usefull for IP allowlisting
+      run: curl https://ifconfig.me

-    - uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
+    - name: Assume AWS OIDC role that allows to manage (start/stop/describe... EC machine)
+      uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
      with:
        aws-region: eu-central-1
-        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
-        role-duration-seconds: 18000 # max 5 hours (needed in case commit hash is still being built)
+        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN }}
+        role-duration-seconds: 3600
+
+    - name: Start EC2 instance and wait for the instance to boot up
+      run: |
+        aws ec2 start-instances --instance-ids $AWS_INSTANCE_ID
+        aws ec2 wait instance-running --instance-ids $AWS_INSTANCE_ID
+        sleep 60 # sleep some time to allow cloudinit and our API server to start up
+
+    - name: Determine public IP of the EC2 instance and set env variable EC2_MACHINE_URL_US
+      run: |
+        public_ip=$(aws ec2 describe-instances --instance-ids $AWS_INSTANCE_ID --query 'Reservations[*].Instances[*].PublicIpAddress' --output text)
+        echo "Public IP of the EC2 instance: $public_ip"
+        echo "EC2_MACHINE_URL_US=https://${public_ip}:8443" >> $GITHUB_ENV
+
    - name: Determine commit hash
-      id: commit_hash
-      shell: bash -euxo pipefail {0}
      env:
        INPUT_COMMIT_HASH: ${{ github.event.inputs.commit_hash }}
      run: |
-        if [[ -z "${INPUT_COMMIT_HASH}" ]]; then
-          COMMIT_HASH=$(curl -s https://api.github.com/repos/neondatabase/neon/commits/main | jq -r '.sha')
-          echo "COMMIT_HASH=$COMMIT_HASH" >> $GITHUB_ENV
-          echo "commit_hash=$COMMIT_HASH" >> "$GITHUB_OUTPUT"
+        if [ -z "$INPUT_COMMIT_HASH" ]; then
+          echo "COMMIT_HASH=$(curl -s https://api.github.com/repos/neondatabase/neon/commits/main | jq -r '.sha')" >> $GITHUB_ENV
          echo "COMMIT_HASH_TYPE=latest" >> $GITHUB_ENV
        else
-          COMMIT_HASH="${INPUT_COMMIT_HASH}"
-          echo "COMMIT_HASH=$COMMIT_HASH" >> $GITHUB_ENV
-          echo "commit_hash=$COMMIT_HASH" >> "$GITHUB_OUTPUT"
+          echo "COMMIT_HASH=$INPUT_COMMIT_HASH" >> $GITHUB_ENV
          echo "COMMIT_HASH_TYPE=manual" >> $GITHUB_ENV
        fi
-    - name: Checkout the neon repository at given commit hash
-      uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-      with:
-        ref: ${{ steps.commit_hash.outputs.commit_hash }}

-    # does not reuse ./.github/actions/download because we need to download the artifact for the given commit hash
-    # example artifact
-    # s3://neon-github-public-dev/artifacts/48b870bc078bd2c450eb7b468e743b9c118549bf/15036827400/1/neon-Linux-X64-release-artifact.tar.zst /instance_store/artifacts/neon-Linux-release-artifact.tar.zst
-    - name: Determine artifact S3_KEY for given commit hash and download and extract artifact
-      id: artifact_prefix
-      shell: bash -euxo pipefail {0}
-      env:
-        ARCHIVE: ${{ runner.temp }}/downloads/neon-${{ runner.os }}-${{ runner.arch }}-release-artifact.tar.zst
-        COMMIT_HASH: ${{ env.COMMIT_HASH }}
-        COMMIT_HASH_TYPE: ${{ env.COMMIT_HASH_TYPE }}
+    - name: Start Bench with run_id
      run: |
-        attempt=0
-        max_attempts=24 # 5 minutes * 24 = 2 hours
+        curl -k -X 'POST' \
+        "${EC2_MACHINE_URL_US}/start_test/${GITHUB_RUN_ID}" \
+        -H 'accept: application/json' \
+        -H 'Content-Type: application/json' \
+        -H "Authorization: Bearer $API_KEY" \
+        -d "{\"neonRepoCommitHash\": \"${COMMIT_HASH}\", \"neonRepoCommitHashType\": \"${COMMIT_HASH_TYPE}\"}"

-        while [[ $attempt -lt $max_attempts ]]; do
-          # the following command will fail until the artifacts are available ...
-          S3_KEY=$(aws s3api list-objects-v2 --bucket "$S3_BUCKET" --prefix "artifacts/$COMMIT_HASH/" \
-            | jq -r '.Contents[]?.Key' \
-            | grep "neon-${{ runner.os }}-${{ runner.arch }}-release-artifact.tar.zst" \
-            | sort --version-sort \
-            | tail -1) || true # ... thus ignore errors from the command
-          if [[ -n "${S3_KEY}" ]]; then
-            echo "Artifact found: $S3_KEY"
-            echo "S3_KEY=$S3_KEY" >> $GITHUB_ENV
+    - name: Poll Test Status
+      id: poll_step
+      run: |
+        status=""
+        while [[ "$status" != "failure" && "$status" != "success" ]]; do
+          response=$(curl -k -X 'GET' \
+          "${EC2_MACHINE_URL_US}/test_status/${GITHUB_RUN_ID}" \
+          -H 'accept: application/json' \
+          -H "Authorization: Bearer $API_KEY")
+          echo "Response: $response"
+          set +x
+          status=$(echo $response | jq -r '.status')
+          echo "Test status: $status"
+          if [[ "$status" == "failure" ]]; then
+            echo "Test failed"
+            exit 1 # Fail the job step if status is failure
+          elif [[ "$status" == "success" || "$status" == "null" ]]; then
            break
+          elif [[ "$status" == "too_many_runs" ]]; then
+            echo "Too many runs already running"
+            echo "too_many_runs=true" >> "$GITHUB_OUTPUT"
+            exit 1
          fi
-          
-          # Increment attempt counter and sleep for 5 minutes
-          attempt=$((attempt + 1))
-          echo "Attempt $attempt of $max_attempts to find artifacts in S3 bucket s3://$S3_BUCKET/artifacts/$COMMIT_HASH failed. Retrying in 5 minutes..."
-          sleep 300 # Sleep for 5 minutes
+
+          sleep 60 # Poll every 60 seconds
        done

-        if [[ -z "${S3_KEY}" ]]; then
-          echo "Error: artifact not found in S3 bucket s3://$S3_BUCKET/artifacts/$COMMIT_HASH" after 2 hours
-        else
-          mkdir -p $(dirname $ARCHIVE)
-          time aws s3 cp --only-show-errors s3://$S3_BUCKET/${S3_KEY} ${ARCHIVE}
-          mkdir -p ${NEON_DIR}
-          time tar -xf ${ARCHIVE} -C ${NEON_DIR}
-          rm -f ${ARCHIVE}
-        fi
-
-    - name: Download snapshots from S3
-      if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.recreate_snapshots == 'false' || github.event.inputs.recreate_snapshots == '' }}
-      id: download_snapshots
-      shell: bash -euxo pipefail {0}
+    - name: Retrieve Test Logs
+      if: always() && steps.poll_step.outputs.too_many_runs != 'true'
      run: |
-        # Download the snapshots from S3
-        mkdir -p ${TEST_OUTPUT}
-        mkdir -p $BACKUP_DIR
-        cd $BACKUP_DIR
-        mkdir parts
-        cd parts
-        PART=$(aws s3api list-objects-v2 --bucket $S3_BUCKET --prefix performance/pagebench/ \
-          | jq -r '.Contents[]?.Key' \
-          | grep -E 'shared-snapshots-[0-9]{4}-[0-9]{2}-[0-9]{2}' \
-          | sort \
-          | tail -1)
-        echo "Latest PART: $PART"
-        if [[ -z "$PART" ]]; then
-          echo "ERROR: No matching S3 key found" >&2
-          exit 1
-        fi
-        S3_KEY=$(dirname $PART)
-        time aws s3 cp --only-show-errors --recursive s3://${S3_BUCKET}/$S3_KEY/ .
-        cd $TEST_OUTPUT
-        time cat $BACKUP_DIR/parts/* | zstdcat | tar --extract --preserve-permissions
-        rm -rf ${BACKUP_DIR}
+        curl -k -X 'GET' \
+        "${EC2_MACHINE_URL_US}/test_log/${GITHUB_RUN_ID}" \
+        -H 'accept: application/gzip' \
+        -H "Authorization: Bearer $API_KEY" \
+        --output "test_log_${GITHUB_RUN_ID}.gz"

-    - name: Cache poetry deps
-      uses: actions/cache@v4
-      with:
-        path: ~/.cache/pypoetry/virtualenvs
-        key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-bookworm-${{ hashFiles('poetry.lock') }}
-
-    - name: Install Python deps
-      shell: bash -euxo pipefail {0}
-      run: ./scripts/pysync
-
-    # we need high number of open files for pagebench
-    - name: show ulimits
-      shell: bash -euxo pipefail {0}
+    - name: Unzip Test Log and Print it into this job's log
+      if: always() && steps.poll_step.outputs.too_many_runs != 'true'
      run: |
-        ulimit -a
-
-    - name: Run pagebench testcase
-      shell: bash -euxo pipefail {0}
-      env:
-        CI: false  # need to override this env variable set by github to enforce using snapshots
-      run: |
-        export PLATFORM=hetzner-unit-perf-${COMMIT_HASH_TYPE}
-        # report the commit hash of the neon repository in the revision of the test results
-        export GITHUB_SHA=${COMMIT_HASH}
-        rm -rf ${PERF_REPORT_DIR}
-        rm -rf ${ALLURE_RESULTS_DIR}
-        mkdir -p ${PERF_REPORT_DIR}
-        mkdir -p ${ALLURE_RESULTS_DIR}
-        PARAMS="--alluredir=${ALLURE_RESULTS_DIR} --tb=short --verbose -rA"
-        EXTRA_PARAMS="--out-dir ${PERF_REPORT_DIR} --durations-path $TEST_OUTPUT/benchmark_durations.json"
-        # run only two selected tests
-        # environment set by parent:
-        # RUST_BACKTRACE=1 DEFAULT_PG_VERSION=16 BUILD_TYPE=release
-        ./scripts/pytest ${PARAMS} test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py::test_pageserver_characterize_throughput_with_n_tenants ${EXTRA_PARAMS}
-        ./scripts/pytest ${PARAMS} test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py::test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant ${EXTRA_PARAMS}
-
-    - name: upload the performance metrics to the Neon performance database which is used by grafana dashboards to display the results
-      shell: bash -euxo pipefail {0}
-      run: |
-        export REPORT_FROM="$PERF_REPORT_DIR"
-        export GITHUB_SHA=${COMMIT_HASH}
-        time ./scripts/generate_and_push_perf_report.sh
-
-    - name: Upload test results
-      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report-store
-      with:
-        report-dir:  ${{ steps.set-env.outputs.allure_results_dir }}
-        unique-key: ${{ env.BUILD_TYPE }}-${{ env.DEFAULT_PG_VERSION }}-${{ runner.arch }}
-        aws-oidc-role-arn:  ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        gzip -d "test_log_${GITHUB_RUN_ID}.gz"
+        cat "test_log_${GITHUB_RUN_ID}"

    - name: Create Allure report
-      id: create-allure-report
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
      with:
        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

-    - name: Upload snapshots
-      if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.recreate_snapshots != 'false' && github.event.inputs.recreate_snapshots != '' }}
-      id: upload_snapshots
-      shell: bash -euxo pipefail {0}
-      run: |
-        mkdir -p $BACKUP_DIR
-        cd $TEST_OUTPUT
-        tar --create --preserve-permissions --file - shared-snapshots | zstd -o $BACKUP_DIR/shared_snapshots.tar.zst
-        cd $BACKUP_DIR
-        mkdir parts
-        split -b 1G shared_snapshots.tar.zst ./parts/shared_snapshots.tar.zst.part.
-        SNAPSHOT_DATE=$(date +%F)  # YYYY-MM-DD
-        cd parts
-        time aws s3 cp --recursive . s3://${S3_BUCKET}/performance/pagebench/shared-snapshots-${SNAPSHOT_DATE}/
-
    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
      uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1
@@ -260,22 +157,26 @@ jobs:
        slack-message: "Periodic pagebench testing on dedicated hardware: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
-        
+
    - name: Cleanup Test Resources
      if: always()
-      shell: bash -euxo pipefail {0}
-      env:
-        ARCHIVE: ${{ runner.temp }}/downloads/neon-${{ runner.os }}-${{ runner.arch }}-release-artifact.tar.zst
      run: |
-        # Cleanup the test resources
-        if [[ -d "${BACKUP_DIR}" ]]; then
-          rm -rf ${BACKUP_DIR}
-        fi
-        if [[ -d "${TEST_OUTPUT}" ]]; then
-          rm -rf ${TEST_OUTPUT}
-        fi
-        if [[ -d "${NEON_DIR}" ]]; then
-          rm -rf ${NEON_DIR}
-        fi
-        rm -rf $(dirname $ARCHIVE)
+        curl -k -X 'POST' \
+        "${EC2_MACHINE_URL_US}/cleanup_test/${GITHUB_RUN_ID}" \
+        -H 'accept: application/json' \
+        -H "Authorization: Bearer $API_KEY" \
+        -d ''

+    - name: Assume AWS OIDC role that allows to manage (start/stop/describe... EC machine)
+      if: always() && steps.poll_step.outputs.too_many_runs != 'true'
+      uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN }}
+        role-duration-seconds: 3600
+
+    - name: Stop EC2 instance and wait for the instance to be stopped
+      if: always() && steps.poll_step.outputs.too_many_runs != 'true'
+      run: |
+        aws ec2 stop-instances --instance-ids $AWS_INSTANCE_ID
+        aws ec2 wait instance-stopped --instance-ids $AWS_INSTANCE_ID
--- a/.gitignore
+++ b/.gitignore
@@ -13,7 +13,6 @@ neon.iml
 /.neon
 /integration_tests/.neon
 compaction-suite-results.*
-pgxn/neon/communicator/communicator_bindings.h

 # Coverage
 *.profraw
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,7 +8,6 @@ members = [
    "pageserver/compaction",
    "pageserver/ctl",
    "pageserver/client",
-    "pageserver/client_grpc",
    "pageserver/pagebench",
    "pageserver/page_api",
    "proxy",
@@ -33,7 +32,6 @@ members = [
    "libs/pq_proto",
    "libs/tenant_size_model",
    "libs/metrics",
-    "libs/neonart",
    "libs/postgres_connection",
    "libs/remote_storage",
    "libs/tracing-utils",
@@ -46,7 +44,6 @@ members = [
    "libs/proxy/postgres-types2",
    "libs/proxy/tokio-postgres2",
    "endpoint_storage",
-    "pgxn/neon/communicator",
 ]

 [workspace.package]
@@ -90,7 +87,6 @@ clap = { version = "4.0", features = ["derive", "env"] }
 clashmap = { version = "1.0", features = ["raw-api"] }
 comfy-table = "7.1"
 const_format = "0.2"
-crossbeam-utils = "0.8.21"
 crc32c = "0.6"
 diatomic-waker = { version = "0.2.3" }
 either = "1.8"
@@ -149,12 +145,11 @@ parquet = { version = "53", default-features = false, features = ["zstd"] }
 parquet_derive = "53"
 pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
 pem = "3.0.3"
-peekable = "0.3.0"
 pin-project-lite = "0.2"
 pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "prost-codec"] }
 procfs = "0.16"
 prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
-prost = "0.13.5"
+prost = "0.13"
 rand = "0.8"
 redis = { version = "0.29.2", features = ["tokio-rustls-comp", "keep-alive"] }
 regex = "1.10.2"
@@ -184,7 +179,6 @@ smallvec = "1.11"
 smol_str = { version = "0.2.0", features = ["serde"] }
 socket2 = "0.5"
 spki = "0.7.3"
-spin = "0.9.8"
 strum = "0.26"
 strum_macros = "0.26"
 "subtle"  = "2.5.0"
@@ -196,16 +190,16 @@ thiserror = "1.0"
 tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
 tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
 tokio = { version = "1.43.1", features = ["macros"] }
+tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
 tokio-io-timeout = "1.2.0"
 tokio-postgres-rustls = "0.12.0"
 tokio-rustls = { version = "0.26.0", default-features = false, features = ["tls12", "ring"]}
 tokio-stream = "0.1"
 tokio-tar = "0.3"
-tokio-util = { version = "0.7.10", features = ["io", "io-util", "rt"] }
+tokio-util = { version = "0.7.10", features = ["io", "rt"] }
 toml = "0.8"
 toml_edit = "0.22"
-tonic = { version = "0.13.1", default-features = false, features = ["channel", "codegen", "gzip", "prost", "router", "server", "tls-ring", "tls-native-roots"] }
-tonic-reflection = { version = "0.13.1", features = ["server"] }
+tonic = {version = "0.12.3", default-features = false, features = ["channel", "tls", "tls-roots"]}
 tower = { version = "0.5.2", default-features = false }
 tower-http = { version = "0.6.2", features = ["auth", "request-id", "trace"] }

@@ -237,9 +231,6 @@ x509-cert = { version = "0.2.5" }
 env_logger = "0.11"
 log = "0.4"

-tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
-uring-common = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
-
 ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
 postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
 postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
@@ -255,46 +246,41 @@ azure_storage_blobs = { git = "https://github.com/neondatabase/azure-sdk-for-rus
 ## Local libraries
 compute_api = { version = "0.1", path = "./libs/compute_api/" }
 consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
-desim = { version = "0.1", path = "./libs/desim" }
 endpoint_storage = { version = "0.0.1", path = "./endpoint_storage/" }
 http-utils = { version = "0.1", path = "./libs/http-utils/" }
 metrics = { version = "0.1", path = "./libs/metrics/" }
-neonart = { version = "0.1", path = "./libs/neonart/" }
-neon-shmem = { version = "0.1", path = "./libs/neon-shmem/" }
 pageserver = { path = "./pageserver" }
 pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
 pageserver_client = { path = "./pageserver/client" }
-pageserver_client_grpc = { path = "./pageserver/client_grpc" }
 pageserver_compaction = { version = "0.1", path = "./pageserver/compaction/" }
 pageserver_page_api = { path = "./pageserver/page_api" }
 postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
 postgres_connection = { version = "0.1", path = "./libs/postgres_connection/" }
 postgres_ffi = { version = "0.1", path = "./libs/postgres_ffi/" }
 postgres_initdb = { path = "./libs/postgres_initdb" }
-posthog_client_lite = { version = "0.1", path = "./libs/posthog_client_lite" }
 pq_proto = { version = "0.1", path = "./libs/pq_proto/" }
 remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
 safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
 safekeeper_client = { path = "./safekeeper/client" }
+desim = { version = "0.1", path = "./libs/desim" }
 storage_broker = { version = "0.1", path = "./storage_broker/" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy.
 storage_controller_client = { path = "./storage_controller/client" }
 tenant_size_model = { version = "0.1", path = "./libs/tenant_size_model/" }
 tracing-utils = { version = "0.1", path = "./libs/tracing-utils/" }
 utils = { version = "0.1", path = "./libs/utils/" }
 vm_monitor = { version = "0.1", path = "./libs/vm_monitor/" }
-wal_decoder = { version = "0.1", path = "./libs/wal_decoder" }
 walproposer = { version = "0.1", path = "./libs/walproposer/" }
+wal_decoder = { version = "0.1", path = "./libs/wal_decoder" }

 ## Common library dependency
 workspace_hack = { version = "0.1", path = "./workspace_hack/" }

 ## Build dependencies
-cbindgen = "0.28.0"
 criterion = "0.5.1"
 rcgen = "0.13"
 rstest = "0.18"
 camino-tempfile = "1.0.2"
-tonic-build = "0.13.1"
+tonic-build = "0.12"

 [patch.crates-io]

--- a/7
+++ b/7
@@ -18,12 +18,10 @@ ifeq ($(BUILD_TYPE),release)
 	PG_LDFLAGS = $(LDFLAGS)
 	# Unfortunately, `--profile=...` is a nightly feature
 	CARGO_BUILD_FLAGS += --release
-	NEON_CARGO_ARTIFACT_TARGET_DIR = $(ROOT_PROJECT_DIR)/target/release
 else ifeq ($(BUILD_TYPE),debug)
 	PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
 	PG_CFLAGS += -O0 -g3 $(CFLAGS)
 	PG_LDFLAGS = $(LDFLAGS)
-	NEON_CARGO_ARTIFACT_TARGET_DIR = $(ROOT_PROJECT_DIR)/target/debug
 else
 	$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
 endif
@@ -182,16 +180,11 @@ postgres-check-%: postgres-%

 .PHONY: neon-pg-ext-%
 neon-pg-ext-%: postgres-%
-	+@echo "Compiling communicator $*"
-	$(CARGO_CMD_PREFIX) cargo build -p communicator $(CARGO_BUILD_FLAGS)
-
 	+@echo "Compiling neon $*"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-$*
 	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
-		LIBCOMMUNICATOR_PATH=$(NEON_CARGO_ARTIFACT_TARGET_DIR) \
 		-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install
-
 	+@echo "Compiling neon_walredo $*"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$*
 	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
--- a/build-tools.Dockerfile
+++ b/build-tools.Dockerfile
@@ -155,7 +155,7 @@ RUN set -e \

 # Keep the version the same as in compute/compute-node.Dockerfile and
 # test_runner/regress/test_compute_metrics.py.
-ENV SQL_EXPORTER_VERSION=0.17.3
+ENV SQL_EXPORTER_VERSION=0.17.0
 RUN curl -fsSL \
    "https://github.com/burningalchemist/sql_exporter/releases/download/${SQL_EXPORTER_VERSION}/sql_exporter-${SQL_EXPORTER_VERSION}.linux-$(case "$(uname -m)" in x86_64) echo amd64;; aarch64) echo arm64;; esac).tar.gz" \
    --output sql_exporter.tar.gz \
@@ -310,13 +310,13 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
    . "$HOME/.cargo/env" && \
    cargo --version && rustup --version && \
    rustup component add llvm-tools rustfmt clippy && \
-    cargo install rustfilt            --version ${RUSTFILT_VERSION} --locked && \
-    cargo install cargo-hakari        --version ${CARGO_HAKARI_VERSION} --locked && \
-    cargo install cargo-deny          --version ${CARGO_DENY_VERSION} --locked && \
-    cargo install cargo-hack          --version ${CARGO_HACK_VERSION} --locked && \
-    cargo install cargo-nextest       --version ${CARGO_NEXTEST_VERSION} --locked && \
-    cargo install cargo-chef          --version ${CARGO_CHEF_VERSION} --locked && \
-    cargo install diesel_cli          --version ${CARGO_DIESEL_CLI_VERSION} --locked \
+    cargo install rustfilt            --version ${RUSTFILT_VERSION} && \
+    cargo install cargo-hakari        --version ${CARGO_HAKARI_VERSION} && \
+    cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
+    cargo install cargo-hack          --version ${CARGO_HACK_VERSION} && \
+    cargo install cargo-nextest       --version ${CARGO_NEXTEST_VERSION} && \
+    cargo install cargo-chef --locked --version ${CARGO_CHEF_VERSION} && \
+    cargo install diesel_cli          --version ${CARGO_DIESEL_CLI_VERSION} \
                                      --features postgres-bundled --no-default-features && \
    rm -rf /home/nonroot/.cargo/registry && \
    rm -rf /home/nonroot/.cargo/git
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -297,7 +297,6 @@ RUN ./autogen.sh && \
    ./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
-    make staged-install && \
    cd extensions/postgis && \
    make clean && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
@@ -583,38 +582,6 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/hypopg.control

-#########################################################################################
-#
-# Layer "online_advisor-build"
-# compile online_advisor extension
-#
-#########################################################################################
-FROM build-deps AS online_advisor-src
-ARG PG_VERSION
-
-# online_advisor supports all Postgres version starting from PG14, but prior to PG17 has to be included in preload_shared_libraries
-# last release 1.0 - May 15, 2025
-WORKDIR /ext-src
-RUN case "${PG_VERSION:?}" in \
-    "v17") \
-        ;; \
-    *) \
-        echo "skipping the version of online_advistor for $PG_VERSION" && exit 0 \
-        ;; \
-    esac && \
-	wget https://github.com/knizhnik/online_advisor/archive/refs/tags/1.0.tar.gz -O online_advisor.tar.gz && \
-    echo "37dcadf8f7cc8d6cc1f8831276ee245b44f1b0274f09e511e47a67738ba9ed0f online_advisor.tar.gz" | sha256sum --check && \
-    mkdir online_advisor-src && cd online_advisor-src && tar xzf ../online_advisor.tar.gz --strip-components=1 -C .
-
-FROM pg-build AS online_advisor-build
-COPY --from=online_advisor-src /ext-src/ /ext-src/
-WORKDIR /ext-src/
-RUN if [ -d online_advisor-src ]; then \
-	    cd online_advisor-src && \
-        make -j install && \
-        echo 'trusted = true' >> /usr/local/pgsql/share/extension/online_advisor.control; \
-    fi
-
 #########################################################################################
 #
 # Layer "pg_hashids-build"
@@ -1181,14 +1148,14 @@ RUN cd exts/rag && \
 RUN cd exts/rag_bge_small_en_v15 && \
    sed -i 's/pgrx = "0.14.1"/pgrx = { version = "0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
    ORT_LIB_LOCATION=/ext-src/onnxruntime-src/build/Linux \
-        REMOTE_ONNX_URL=http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local/pgrag-data/bge_small_en_v15.onnx \
+        REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/bge_small_en_v15.onnx \
        cargo pgrx install --release --features remote_onnx && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/rag_bge_small_en_v15.control

 RUN cd exts/rag_jina_reranker_v1_tiny_en && \
    sed -i 's/pgrx = "0.14.1"/pgrx = { version = "0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
    ORT_LIB_LOCATION=/ext-src/onnxruntime-src/build/Linux \
-        REMOTE_ONNX_URL=http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local/pgrag-data/jina_reranker_v1_tiny_en.onnx \
+        REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/jina_reranker_v1_tiny_en.onnx \
        cargo pgrx install --release --features remote_onnx && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/rag_jina_reranker_v1_tiny_en.control

@@ -1681,7 +1648,6 @@ COPY --from=pg_jsonschema-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_graphql-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_tiktoken-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=hypopg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=online_advisor-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_hashids-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=rum-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pgtap-build /usr/local/pgsql/ /usr/local/pgsql/
@@ -1785,17 +1751,17 @@ ARG TARGETARCH
 RUN if [ "$TARGETARCH" = "amd64" ]; then\
        postgres_exporter_sha256='59aa4a7bb0f7d361f5e05732f5ed8c03cc08f78449cef5856eadec33a627694b';\
        pgbouncer_exporter_sha256='c9f7cf8dcff44f0472057e9bf52613d93f3ffbc381ad7547a959daa63c5e84ac';\
-        sql_exporter_sha256='9a41127a493e8bfebfe692bf78c7ed2872a58a3f961ee534d1b0da9ae584aaab';\
+        sql_exporter_sha256='38e439732bbf6e28ca4a94d7bc3686d3fa1abdb0050773d5617a9efdb9e64d08';\
    else\
        postgres_exporter_sha256='d1dedea97f56c6d965837bfd1fbb3e35a3b4a4556f8cccee8bd513d8ee086124';\
        pgbouncer_exporter_sha256='217c4afd7e6492ae904055bc14fe603552cf9bac458c063407e991d68c519da3';\
-        sql_exporter_sha256='530e6afc77c043497ed965532c4c9dfa873bc2a4f0b3047fad367715c0081d6a';\
+        sql_exporter_sha256='11918b00be6e2c3a67564adfdb2414fdcbb15a5db76ea17d1d1a944237a893c6';\
    fi\
    && curl -sL https://github.com/prometheus-community/postgres_exporter/releases/download/v0.17.1/postgres_exporter-0.17.1.linux-${TARGETARCH}.tar.gz\
     | tar xzf - --strip-components=1 -C.\
    && curl -sL https://github.com/prometheus-community/pgbouncer_exporter/releases/download/v0.10.2/pgbouncer_exporter-0.10.2.linux-${TARGETARCH}.tar.gz\
     | tar xzf - --strip-components=1 -C.\
-    && curl -sL https://github.com/burningalchemist/sql_exporter/releases/download/0.17.3/sql_exporter-0.17.3.linux-${TARGETARCH}.tar.gz\
+    && curl -sL https://github.com/burningalchemist/sql_exporter/releases/download/0.17.0/sql_exporter-0.17.0.linux-${TARGETARCH}.tar.gz\
     | tar xzf - --strip-components=1 -C.\
    && echo "${postgres_exporter_sha256} postgres_exporter" | sha256sum -c -\
    && echo "${pgbouncer_exporter_sha256} pgbouncer_exporter" | sha256sum -c -\
@@ -1843,27 +1809,12 @@ RUN make PG_VERSION="${PG_VERSION:?}" -C compute

 FROM pg-build AS extension-tests
 ARG PG_VERSION
-# This is required for the PostGIS test
-RUN apt-get update && case $DEBIAN_VERSION in \
-      bullseye) \
-        apt-get install -y libproj19 libgdal28 time; \
-      ;; \
-      bookworm) \
-        apt-get install -y libgdal32 libproj25 time; \
-      ;; \
-      *) \
-        echo "Unknown Debian version ${DEBIAN_VERSION}" && exit 1 \
-      ;; \
-    esac
-
 COPY docker-compose/ext-src/ /ext-src/

 COPY --from=pg-build /postgres /postgres
-COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=postgis-build /ext-src/postgis-src /ext-src/postgis-src
-COPY --from=postgis-build /sfcgal/* /usr
+#COPY --from=postgis-src /ext-src/ /ext-src/
 COPY --from=plv8-src /ext-src/ /ext-src/
-COPY --from=h3-pg-src /ext-src/h3-pg-src /ext-src/h3-pg-src
+#COPY --from=h3-pg-src /ext-src/ /ext-src/
 COPY --from=postgresql-unit-src /ext-src/ /ext-src/
 COPY --from=pgvector-src /ext-src/ /ext-src/
 COPY --from=pgjwt-src /ext-src/ /ext-src/
@@ -1872,7 +1823,6 @@ COPY --from=pgjwt-src /ext-src/ /ext-src/
 COPY --from=pg_graphql-src /ext-src/ /ext-src/
 #COPY --from=pg_tiktoken-src /ext-src/ /ext-src/
 COPY --from=hypopg-src /ext-src/ /ext-src/
-COPY --from=online_advisor-src /ext-src/ /ext-src/
 COPY --from=pg_hashids-src /ext-src/ /ext-src/
 COPY --from=rum-src /ext-src/ /ext-src/
 COPY --from=pgtap-src /ext-src/ /ext-src/
@@ -1902,7 +1852,6 @@ COPY compute/patches/pg_repack.patch /ext-src
 RUN cd /ext-src/pg_repack-src && patch -p1 </ext-src/pg_repack.patch && rm -f /ext-src/pg_repack.patch

 COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
-RUN echo /usr/local/pgsql/lib > /etc/ld.so.conf.d/00-neon.conf && /sbin/ldconfig
 RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl jq \
   && apt clean && rm -rf /ext-src/*.tar.gz /ext-src/*.patch /var/lib/apt/lists/*
 ENV PATH=/usr/local/pgsql/bin:$PATH
--- a/compute/manifest.yaml
+++ b/compute/manifest.yaml
@@ -1,121 +0,0 @@
-pg_settings:
-  # Common settings for primaries and replicas of all versions.
-  common:
-    # Check for client disconnection every 1 minute. By default, Postgres will detect the
-    # loss of the connection only at the next interaction with the socket, when it waits
-    # for, receives or sends data, so it will likely waste resources till the end of the
-    # query execution. There should be no drawbacks in setting this for everyone, so enable
-    # it by default. If anyone will complain, we can allow editing it.
-    # https://www.postgresql.org/docs/16/runtime-config-connection.html#GUC-CLIENT-CONNECTION-CHECK-INTERVAL
-    client_connection_check_interval: "60000" # 1 minute
-    # ---- IO ---- 
-    effective_io_concurrency: "20"
-    maintenance_io_concurrency: "100"
-    fsync: "off"
-    hot_standby: "off"
-    # We allow users to change this if needed, but by default we
-    # just don't want to see long-lasting idle transactions, as they
-    # prevent activity monitor from suspending projects.
-    idle_in_transaction_session_timeout: "300000" # 5 minutes
-    listen_addresses: "*"
-    # --- LOGGING ---- helps investigations
-    log_connections: "on"
-    log_disconnections: "on"
-    # 1GB, unit is KB
-    log_temp_files: "1048576"
-    # Disable dumping customer data to logs, both to increase data privacy
-    # and to reduce the amount the logs.
-    log_error_verbosity: "terse"
-    log_min_error_statement: "panic"
-    max_connections: "100"
-    # --- WAL ---
-    # - flush lag is the max amount of WAL that has been generated but not yet stored
-    # to disk in the page server. A smaller value means less delay after a pageserver
-    # restart, but if you set it too small you might again need to slow down writes if the
-    # pageserver cannot flush incoming WAL to disk fast enough. This must be larger
-    # than the pageserver's checkpoint interval, currently 1 GB! Otherwise you get a
-    # a deadlock where the compute node refuses to generate more WAL before the
-    # old WAL has been uploaded to S3, but the pageserver is waiting for more WAL
-    # to be generated before it is uploaded to S3.
-    max_replication_flush_lag: "10GB"
-    max_replication_slots: "10"
-    # Backpressure configuration:
-    # - write lag is the max amount of WAL that has been generated by Postgres but not yet
-    # processed by the page server. Making this smaller reduces the worst case latency
-    # of a GetPage request, if you request a page that was recently modified. On the other
-    # hand, if this is too small, the compute node might need to wait on a write if there is a
-    # hiccup in the network or page server so that the page server has temporarily fallen
-    # behind.
-    #
-    # Previously it was set to 500 MB, but it caused compute being unresponsive under load
-    # https://github.com/neondatabase/neon/issues/2028
-    max_replication_write_lag: "500MB"
-    max_wal_senders: "10"
-    # A Postgres checkpoint is cheap in storage, as doesn't involve any significant amount
-    # of real I/O. Only the SLRU buffers and some other small files are flushed to disk.
-    # However, as long as we have full_page_writes=on, page updates after a checkpoint
-    # include full-page images which bloats the WAL. So may want to bump max_wal_size to
-    # reduce the WAL bloating, but at the same it will increase pg_wal directory size on
-    # compute and can lead to out of disk error on k8s nodes.
-    max_wal_size: "1024"
-    wal_keep_size: "0"
-    wal_level: "replica"
-    # Reduce amount of WAL generated by default.
-    wal_log_hints: "off"
-    # - without wal_sender_timeout set we don't get feedback messages,
-    # required for backpressure.
-    wal_sender_timeout: "10000"
-    # We have some experimental extensions, which we don't want users to install unconsciously.
-    # To install them, users would need to set the `neon.allow_unstable_extensions` setting.
-    # There are two of them currently:
-    # - `pgrag` - https://github.com/neondatabase-labs/pgrag - extension is actually called just `rag`,
-    #                                                          and two dependencies:
-    #                                                          - `rag_bge_small_en_v15`
-    #                                                          - `rag_jina_reranker_v1_tiny_en`
-    # - `pg_mooncake` - https://github.com/Mooncake-Labs/pg_mooncake/  
-    neon.unstable_extensions: "rag,rag_bge_small_en_v15,rag_jina_reranker_v1_tiny_en,pg_mooncake,anon"
-    neon.protocol_version: "3"
-    password_encryption: "scram-sha-256"
-    # This is important to prevent Postgres from trying to perform
-    # a local WAL redo after backend crash. It should exit and let
-    # the systemd or k8s to do a fresh startup with compute_ctl.
-    restart_after_crash: "off"
-    # By default 3. We have the following persistent connections in the VM:
-    # * compute_activity_monitor (from compute_ctl)
-    # * postgres-exporter (metrics collector; it has 2 connections)
-    # * sql_exporter (metrics collector; we have 2 instances [1 for us & users; 1 for autoscaling])
-    # * vm-monitor (to query & change file cache size)
-    # i.e. total of 6. Let's reserve 7, so there's still at least one left over.
-    superuser_reserved_connections: "7"
-    synchronous_standby_names: "walproposer"
-
-  replica:
-    hot_standby: "on"
-
-  per_version:
-    17:
-      common:
-        # PostgreSQL 17 has a new IO system called "read stream", which can combine IOs up to some
-        # size. It still has some issues with readahead, though, so we default to disabled/
-        # "no combining of IOs" to make sure we get the maximum prefetch depth.
-        # See also: https://github.com/neondatabase/neon/pull/9860
-        io_combine_limit: "1"
-      replica:
-        # prefetching of blocks referenced in WAL doesn't make sense for us
-        # Neon hot standby ignores pages that are not in the shared_buffers
-        recovery_prefetch: "off"
-    16:
-      common:
-      replica:
-        # prefetching of blocks referenced in WAL doesn't make sense for us
-        # Neon hot standby ignores pages that are not in the shared_buffers
-        recovery_prefetch: "off"
-    15:
-      common:
-      replica:
-        # prefetching of blocks referenced in WAL doesn't make sense for us
-        # Neon hot standby ignores pages that are not in the shared_buffers
-        recovery_prefetch: "off"
-    14:
-      common:
-      replica:
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -38,7 +38,6 @@ once_cell.workspace = true
 opentelemetry.workspace = true
 opentelemetry_sdk.workspace = true
 p256 = { version = "0.13", features = ["pem"] }
-pageserver_page_api.workspace = true
 postgres.workspace = true
 regex.workspace = true
 reqwest = { workspace = true, features = ["json"] }
@@ -54,7 +53,6 @@ tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 tokio-postgres.workspace = true
 tokio-util.workspace = true
 tokio-stream.workspace = true
-tonic.workspace = true
 tower-otel.workspace = true
 tracing.workspace = true
 tracing-opentelemetry.workspace = true
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -40,7 +40,7 @@ use std::sync::mpsc;
 use std::thread;
 use std::time::Duration;

-use anyhow::{Context, Result, bail};
+use anyhow::{Context, Result};
 use clap::Parser;
 use compute_api::responses::ComputeConfig;
 use compute_tools::compute::{
@@ -57,15 +57,31 @@ use tracing::{error, info};
 use url::Url;
 use utils::failpoint_support;

-#[derive(Debug, Parser)]
+// Compatibility hack: if the control plane specified any remote-ext-config
+// use the default value for extension storage proxy gateway.
+// Remove this once the control plane is updated to pass the gateway URL
+fn parse_remote_ext_base_url(arg: &str) -> Result<String> {
+    const FALLBACK_PG_EXT_GATEWAY_BASE_URL: &str =
+        "http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local";
+
+    Ok(if arg.starts_with("http") {
+        arg
+    } else {
+        FALLBACK_PG_EXT_GATEWAY_BASE_URL
+    }
+    .to_owned())
+}
+
+#[derive(Parser)]
 #[command(rename_all = "kebab-case")]
 struct Cli {
    #[arg(short = 'b', long, default_value = "postgres", env = "POSTGRES_PATH")]
    pub pgbin: String,

    /// The base URL for the remote extension storage proxy gateway.
-    #[arg(short = 'r', long, value_parser = Self::parse_remote_ext_base_url)]
-    pub remote_ext_base_url: Option<Url>,
+    /// Should be in the form of `http(s)://<gateway-hostname>[:<port>]`.
+    #[arg(short = 'r', long, value_parser = parse_remote_ext_base_url, alias = "remote-ext-config")]
+    pub remote_ext_base_url: Option<String>,

    /// The port to bind the external listening HTTP server to. Clients running
    /// outside the compute will talk to the compute through this port. Keep
@@ -120,29 +136,6 @@ struct Cli {
        requires = "compute-id"
    )]
    pub control_plane_uri: Option<String>,
-
-    /// Interval in seconds for collecting installed extensions statistics
-    #[arg(long, default_value = "3600")]
-    pub installed_extensions_collection_interval: u64,
-}
-
-impl Cli {
-    /// Parse a URL from an argument. By default, this isn't necessary, but we
-    /// want to do some sanity checking.
-    fn parse_remote_ext_base_url(value: &str) -> Result<Url> {
-        // Remove extra trailing slashes, and add one. We use Url::join() later
-        // when downloading remote extensions. If the base URL is something like
-        // http://example.com/pg-ext-s3-gateway, and join() is called with
-        // something like "xyz", the resulting URL is http://example.com/xyz.
-        let value = value.trim_end_matches('/').to_owned() + "/";
-        let url = Url::parse(&value)?;
-
-        if url.query_pairs().count() != 0 {
-            bail!("parameters detected in remote extensions base URL")
-        }
-
-        Ok(url)
-    }
 }

 fn main() -> Result<()> {
@@ -186,7 +179,6 @@ fn main() -> Result<()> {
            cgroup: cli.cgroup,
            #[cfg(target_os = "linux")]
            vm_monitor_addr: cli.vm_monitor_addr,
-            installed_extensions_collection_interval: cli.installed_extensions_collection_interval,
        },
        config,
    )?;
@@ -271,8 +263,7 @@ fn handle_exit_signal(sig: i32) {

 #[cfg(test)]
 mod test {
-    use clap::{CommandFactory, Parser};
-    use url::Url;
+    use clap::CommandFactory;

    use super::Cli;

@@ -282,41 +273,16 @@ mod test {
    }

    #[test]
-    fn verify_remote_ext_base_url() {
-        let cli = Cli::parse_from([
-            "compute_ctl",
-            "--pgdata=test",
-            "--connstr=test",
-            "--compute-id=test",
-            "--remote-ext-base-url",
-            "https://example.com/subpath",
-        ]);
-        assert_eq!(
-            cli.remote_ext_base_url.unwrap(),
-            Url::parse("https://example.com/subpath/").unwrap()
-        );
+    fn parse_pg_ext_gateway_base_url() {
+        let arg = "http://pg-ext-s3-gateway2";
+        let result = super::parse_remote_ext_base_url(arg).unwrap();
+        assert_eq!(result, arg);

-        let cli = Cli::parse_from([
-            "compute_ctl",
-            "--pgdata=test",
-            "--connstr=test",
-            "--compute-id=test",
-            "--remote-ext-base-url",
-            "https://example.com//",
-        ]);
+        let arg = "pg-ext-s3-gateway";
+        let result = super::parse_remote_ext_base_url(arg).unwrap();
        assert_eq!(
-            cli.remote_ext_base_url.unwrap(),
-            Url::parse("https://example.com").unwrap()
+            result,
+            "http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local"
        );
-
-        Cli::try_parse_from([
-            "compute_ctl",
-            "--pgdata=test",
-            "--connstr=test",
-            "--compute-id=test",
-            "--remote-ext-base-url",
-            "https://example.com?hello=world",
-        ])
-        .expect_err("URL parameters are not allowed");
    }
 }
--- a/compute_tools/src/bin/fast_import.rs
+++ b/compute_tools/src/bin/fast_import.rs
@@ -339,8 +339,6 @@ async fn run_dump_restore(
    destination_connstring: String,
 ) -> Result<(), anyhow::Error> {
    let dumpdir = workdir.join("dumpdir");
-    let num_jobs = num_cpus::get().to_string();
-    info!("using {num_jobs} jobs for dump/restore");

    let common_args = [
        // schema mapping (prob suffices to specify them on one side)
@@ -356,7 +354,7 @@ async fn run_dump_restore(
        "directory".to_string(),
        // concurrency
        "--jobs".to_string(),
-        num_jobs,
+        num_cpus::get().to_string(),
        // progress updates
        "--verbose".to_string(),
    ];
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -1,9 +1,9 @@
-use anyhow::{Context, Result, anyhow};
+use anyhow::{Context, Result};
 use chrono::{DateTime, Utc};
 use compute_api::privilege::Privilege;
 use compute_api::responses::{
    ComputeConfig, ComputeCtlConfig, ComputeMetrics, ComputeStatus, LfcOffloadState,
-    LfcPrewarmState, TlsConfig,
+    LfcPrewarmState,
 };
 use compute_api::spec::{
    ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PgIdent,
@@ -15,7 +15,6 @@ use itertools::Itertools;
 use nix::sys::signal::{Signal, kill};
 use nix::unistd::Pid;
 use once_cell::sync::Lazy;
-use pageserver_page_api as page_api;
 use postgres;
 use postgres::NoTls;
 use postgres::error::SqlState;
@@ -30,11 +29,8 @@ use std::sync::atomic::{AtomicU32, Ordering};
 use std::sync::{Arc, Condvar, Mutex, RwLock};
 use std::time::{Duration, Instant};
 use std::{env, fs};
-use tokio::io::AsyncReadExt;
 use tokio::spawn;
-use tokio_util::io::StreamReader;
 use tracing::{Instrument, debug, error, info, instrument, warn};
-use url::Url;
 use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;
 use utils::measured_stream::MeasuredReader;
@@ -100,10 +96,7 @@ pub struct ComputeNodeParams {
    pub internal_http_port: u16,

    /// the address of extension storage proxy gateway
-    pub remote_ext_base_url: Option<Url>,
-
-    /// Interval for installed extensions collection
-    pub installed_extensions_collection_interval: u64,
+    pub remote_ext_base_url: Option<String>,
 }

 /// Compute node info shared across several `compute_ctl` threads.
@@ -372,7 +365,7 @@ impl ComputeNode {

        let mut new_state = ComputeState::new();
        if let Some(spec) = config.spec {
-            let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow!(msg))?;
+            let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?;
            new_state.pspec = Some(pspec);
        }

@@ -399,7 +392,7 @@ impl ComputeNode {
        // because QEMU will already have its memory allocated from the host, and
        // the necessary binaries will already be cached.
        if cli_spec.is_none() {
-            this.prewarm_postgres_vm_memory()?;
+            this.prewarm_postgres()?;
        }

        // Set the up metric with Empty status before starting the HTTP server.
@@ -606,8 +599,6 @@ impl ComputeNode {
            });
        }

-        let tls_config = self.tls_config(&pspec.spec);
-
        // If there are any remote extensions in shared_preload_libraries, start downloading them
        if pspec.spec.remote_extensions.is_some() {
            let (this, spec) = (self.clone(), pspec.spec.clone());
@@ -664,7 +655,7 @@ impl ComputeNode {
            info!("tuning pgbouncer");

            let pgbouncer_settings = pgbouncer_settings.clone();
-            let tls_config = tls_config.clone();
+            let tls_config = self.compute_ctl_config.tls.clone();

            // Spawn a background task to do the tuning,
            // so that we don't block the main thread that starts Postgres.
@@ -683,10 +674,7 @@ impl ComputeNode {

            // Spawn a background task to do the configuration,
            // so that we don't block the main thread that starts Postgres.
-
-            let mut local_proxy = local_proxy.clone();
-            local_proxy.tls = tls_config.clone();
-
+            let local_proxy = local_proxy.clone();
            let _handle = tokio::spawn(async move {
                if let Err(err) = local_proxy::configure(&local_proxy) {
                    error!("error while configuring local_proxy: {err:?}");
@@ -707,18 +695,25 @@ impl ComputeNode {
                let log_directory_path = Path::new(&self.params.pgdata).join("log");
                let log_directory_path = log_directory_path.to_string_lossy().to_string();

-                // Add project_id,endpoint_id to identify the logs.
+                // Add project_id,endpoint_id tag to identify the logs.
                //
                // These ids are passed from cplane,
-                let endpoint_id = pspec.spec.endpoint_id.as_deref().unwrap_or("");
-                let project_id = pspec.spec.project_id.as_deref().unwrap_or("");
+                // for backwards compatibility (old computes that don't have them),
+                // we set them to None.
+                // TODO: Clean up this code when all computes have them.
+                let tag: Option<String> = match (
+                    pspec.spec.project_id.as_deref(),
+                    pspec.spec.endpoint_id.as_deref(),
+                ) {
+                    (Some(project_id), Some(endpoint_id)) => {
+                        Some(format!("{project_id}/{endpoint_id}"))
+                    }
+                    (Some(project_id), None) => Some(format!("{project_id}/None")),
+                    (None, Some(endpoint_id)) => Some(format!("None,{endpoint_id}")),
+                    (None, None) => None,
+                };

-                configure_audit_rsyslog(
-                    log_directory_path.clone(),
-                    endpoint_id,
-                    project_id,
-                    &remote_endpoint,
-                )?;
+                configure_audit_rsyslog(log_directory_path.clone(), tag, &remote_endpoint)?;

                // Launch a background task to clean up the audit logs
                launch_pgaudit_gc(log_directory_path);
@@ -754,7 +749,17 @@ impl ComputeNode {

            let conf = self.get_tokio_conn_conf(None);
            tokio::task::spawn(async {
-                let _ = installed_extensions(conf).await;
+                let res = get_installed_extensions(conf).await;
+                match res {
+                    Ok(extensions) => {
+                        info!(
+                            "[NEON_EXT_STAT] {}",
+                            serde_json::to_string(&extensions)
+                                .expect("failed to serialize extensions list")
+                        );
+                    }
+                    Err(err) => error!("could not get installed extensions: {err:?}"),
+                }
            });
        }

@@ -784,10 +789,7 @@ impl ComputeNode {
        // Log metrics so that we can search for slow operations in logs
        info!(?metrics, postmaster_pid = %postmaster_pid, "compute start finished");

-        // Spawn the extension stats background task
-        self.spawn_extension_stats_task();
-
-        if pspec.spec.autoprewarm {
+        if pspec.spec.prewarm_lfc_on_startup {
            self.prewarm_lfc();
        }
        Ok(())
@@ -944,74 +946,6 @@ impl ComputeNode {
    #[instrument(skip_all, fields(%lsn))]
    fn try_get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
        let spec = compute_state.pspec.as_ref().expect("spec must be set");
-        let shard0_connstr = spec.pageserver_connstr.split(',').next().unwrap();
-
-        match Url::parse(shard0_connstr)?.scheme() {
-            "postgres" | "postgresql" => self.try_get_basebackup_libpq(spec, lsn),
-            "grpc" => self.try_get_basebackup_grpc(spec, lsn),
-            scheme => return Err(anyhow!("unknown URL scheme {scheme}")),
-        }
-    }
-
-    fn try_get_basebackup_grpc(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<()> {
-        let start_time = Instant::now();
-
-        let shard0_connstr = spec
-            .pageserver_connstr
-            .split(',')
-            .next()
-            .unwrap()
-            .to_string();
-
-        let chunks = tokio::runtime::Handle::current().block_on(async move {
-            let mut client = page_api::proto::PageServiceClient::connect(shard0_connstr).await?;
-
-            let req = page_api::proto::GetBaseBackupRequest {
-                lsn: lsn.0,
-                replica: false, // TODO: handle replicas, with LSN 0
-            };
-            let mut req = tonic::Request::new(req);
-            let metadata = req.metadata_mut();
-            metadata.insert("neon-tenant-id", spec.tenant_id.to_string().parse()?);
-            metadata.insert("neon-timeline-id", spec.timeline_id.to_string().parse()?);
-            metadata.insert("neon-shard-id", "0000".to_string().parse()?); // TODO: shard count
-            if let Some(auth) = spec.storage_auth_token.as_ref() {
-                metadata.insert("authorization", format!("Bearer {auth}").parse()?);
-            }
-
-            let chunks = client.get_base_backup(req).await?.into_inner();
-            anyhow::Ok(chunks)
-        })?;
-        let pageserver_connect_micros = start_time.elapsed().as_micros() as u64;
-
-        // Convert the chunks stream into an AsyncRead
-        let stream_reader = StreamReader::new(
-            chunks.map(|chunk| chunk.map(|c| c.chunk).map_err(std::io::Error::other)),
-        );
-
-        // Wrap the AsyncRead into a blocking reader for compatibility with tar::Archive
-        let reader = tokio_util::io::SyncIoBridge::new(stream_reader);
-        let mut measured_reader = MeasuredReader::new(reader);
-        let mut bufreader = std::io::BufReader::new(&mut measured_reader);
-
-        // Read the archive directly from the `CopyOutReader`
-        //
-        // Set `ignore_zeros` so that unpack() reads all the Copy data and
-        // doesn't stop at the end-of-archive marker. Otherwise, if the server
-        // sends an Error after finishing the tarball, we will not notice it.
-        let mut ar = tar::Archive::new(&mut bufreader);
-        ar.set_ignore_zeros(true);
-        ar.unpack(&self.params.pgdata)?;
-
-        // Report metrics
-        let mut state = self.state.lock().unwrap();
-        state.metrics.pageserver_connect_micros = pageserver_connect_micros;
-        state.metrics.basebackup_bytes = measured_reader.get_byte_count() as u64;
-        state.metrics.basebackup_ms = start_time.elapsed().as_millis() as u64;
-        Ok(())
-    }
-
-    fn try_get_basebackup_libpq(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<()> {
        let start_time = Instant::now();

        let shard0_connstr = spec.pageserver_connstr.split(',').next().unwrap();
@@ -1027,10 +961,12 @@ impl ComputeNode {
        }

        config.application_name("compute_ctl");
-        config.options(&format!(
-            "-c neon.compute_mode={}",
-            spec.spec.mode.to_type_str()
-        ));
+        if let Some(spec) = &compute_state.pspec {
+            config.options(&format!(
+                "-c neon.compute_mode={}",
+                spec.spec.mode.to_type_str()
+            ));
+        }

        // Connect to pageserver
        let mut client = config.connect(NoTls)?;
@@ -1104,7 +1040,10 @@ impl ComputeNode {
                    return result;
                }
                Err(ref e) if attempts < max_attempts => {
-                    warn!("Failed to get basebackup: {e:?} (attempt {attempts}/{max_attempts})");
+                    warn!(
+                        "Failed to get basebackup: {} (attempt {}/{})",
+                        e, attempts, max_attempts
+                    );
                    std::thread::sleep(std::time::Duration::from_millis(retry_period_ms as u64));
                    retry_period_ms *= 1.5;
                }
@@ -1276,15 +1215,13 @@ impl ComputeNode {
        let spec = &pspec.spec;
        let pgdata_path = Path::new(&self.params.pgdata);

-        let tls_config = self.tls_config(&pspec.spec);
-
        // Remove/create an empty pgdata directory and put configuration there.
        self.create_pgdata()?;
        config::write_postgres_conf(
            pgdata_path,
            &pspec.spec,
            self.params.internal_http_port,
-            tls_config,
+            &self.compute_ctl_config.tls,
        )?;

        // Syncing safekeepers is only safe with primary nodes: if a primary
@@ -1380,8 +1317,8 @@ impl ComputeNode {
    }

    /// Start and stop a postgres process to warm up the VM for startup.
-    pub fn prewarm_postgres_vm_memory(&self) -> Result<()> {
-        info!("prewarming VM memory");
+    pub fn prewarm_postgres(&self) -> Result<()> {
+        info!("prewarming");

        // Create pgdata
        let pgdata = &format!("{}.warmup", self.params.pgdata);
@@ -1423,7 +1360,7 @@ impl ComputeNode {
        kill(pm_pid, Signal::SIGQUIT)?;
        info!("sent SIGQUIT signal");
        pg.wait()?;
-        info!("done prewarming vm memory");
+        info!("done prewarming");

        // clean up
        let _ok = fs::remove_dir_all(pgdata);
@@ -1609,22 +1546,14 @@ impl ComputeNode {
                .clone(),
        );

-        let mut tls_config = None::<TlsConfig>;
-        if spec.features.contains(&ComputeFeature::TlsExperimental) {
-            tls_config = self.compute_ctl_config.tls.clone();
-        }
-
        let max_concurrent_connections = self.max_service_connections(compute_state, &spec);

        // Merge-apply spec & changes to PostgreSQL state.
        self.apply_spec_sql(spec.clone(), conf.clone(), max_concurrent_connections)?;

        if let Some(local_proxy) = &spec.clone().local_proxy_config {
-            let mut local_proxy = local_proxy.clone();
-            local_proxy.tls = tls_config.clone();
-
            info!("configuring local_proxy");
-            local_proxy::configure(&local_proxy).context("apply_config local_proxy")?;
+            local_proxy::configure(local_proxy).context("apply_config local_proxy")?;
        }

        // Run migrations separately to not hold up cold starts
@@ -1676,13 +1605,11 @@ impl ComputeNode {
    pub fn reconfigure(&self) -> Result<()> {
        let spec = self.state.lock().unwrap().pspec.clone().unwrap().spec;

-        let tls_config = self.tls_config(&spec);
-
        if let Some(ref pgbouncer_settings) = spec.pgbouncer_settings {
            info!("tuning pgbouncer");

            let pgbouncer_settings = pgbouncer_settings.clone();
-            let tls_config = tls_config.clone();
+            let tls_config = self.compute_ctl_config.tls.clone();

            // Spawn a background task to do the tuning,
            // so that we don't block the main thread that starts Postgres.
@@ -1700,7 +1627,7 @@ impl ComputeNode {
            // Spawn a background task to do the configuration,
            // so that we don't block the main thread that starts Postgres.
            let mut local_proxy = local_proxy.clone();
-            local_proxy.tls = tls_config.clone();
+            local_proxy.tls = self.compute_ctl_config.tls.clone();
            tokio::spawn(async move {
                if let Err(err) = local_proxy::configure(&local_proxy) {
                    error!("error while configuring local_proxy: {err:?}");
@@ -1718,7 +1645,7 @@ impl ComputeNode {
            pgdata_path,
            &spec,
            self.params.internal_http_port,
-            tls_config,
+            &self.compute_ctl_config.tls,
        )?;

        if !spec.skip_pg_catalog_updates {
@@ -1838,14 +1765,6 @@ impl ComputeNode {
        }
    }

-    pub fn tls_config(&self, spec: &ComputeSpec) -> &Option<TlsConfig> {
-        if spec.features.contains(&ComputeFeature::TlsExperimental) {
-            &self.compute_ctl_config.tls
-        } else {
-            &None::<TlsConfig>
-        }
-    }
-
    /// Update the `last_active` in the shared state, but ensure that it's a more recent one.
    pub fn update_last_active(&self, last_active: Option<DateTime<Utc>>) {
        let mut state = self.state.lock().unwrap();
@@ -1982,7 +1901,7 @@ LIMIT 100",
            self.params
                .remote_ext_base_url
                .as_ref()
-                .ok_or(DownloadError::BadInput(anyhow!(
+                .ok_or(DownloadError::BadInput(anyhow::anyhow!(
                    "Remote extensions storage is not configured",
                )))?;

@@ -2178,7 +2097,7 @@ LIMIT 100",
        let remote_extensions = spec
            .remote_extensions
            .as_ref()
-            .ok_or(anyhow!("Remote extensions are not configured"))?;
+            .ok_or(anyhow::anyhow!("Remote extensions are not configured"))?;

        info!("parse shared_preload_libraries from spec.cluster.settings");
        let mut libs_vec = Vec::new();
@@ -2280,41 +2199,6 @@ LIMIT 100",
            info!("Pageserver config changed");
        }
    }
-
-    pub fn spawn_extension_stats_task(&self) {
-        let conf = self.tokio_conn_conf.clone();
-        let installed_extensions_collection_interval =
-            self.params.installed_extensions_collection_interval;
-        tokio::spawn(async move {
-            // An initial sleep is added to ensure that two collections don't happen at the same time.
-            // The first collection happens during compute startup.
-            tokio::time::sleep(tokio::time::Duration::from_secs(
-                installed_extensions_collection_interval,
-            ))
-            .await;
-            let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(
-                installed_extensions_collection_interval,
-            ));
-            loop {
-                interval.tick().await;
-                let _ = installed_extensions(conf.clone()).await;
-            }
-        });
-    }
-}
-
-pub async fn installed_extensions(conf: tokio_postgres::Config) -> Result<()> {
-    let res = get_installed_extensions(conf).await;
-    match res {
-        Ok(extensions) => {
-            info!(
-                "[NEON_EXT_STAT] {}",
-                serde_json::to_string(&extensions).expect("failed to serialize extensions list")
-            );
-        }
-        Err(err) => error!("could not get installed extensions: {err:?}"),
-    }
-    Ok(())
 }

 pub fn forward_termination_signal() {
--- a/compute_tools/src/config_template/compute_audit_rsyslog_template.conf
+++ b/compute_tools/src/config_template/compute_audit_rsyslog_template.conf
@@ -2,24 +2,10 @@
 module(load="imfile")

 # Input configuration for log files in the specified directory
-# The messages can be multiline. The start of the message is a timestamp
-# in "%Y-%m-%d %H:%M:%S.%3N GMT" (so timezone hardcoded).
-# Replace log_directory with the directory containing the log files
-input(type="imfile" File="{log_directory}/*.log"
-  Tag="pgaudit_log" Severity="info" Facility="local5"
-  startmsg.regex="^[[:digit:]]{{4}}-[[:digit:]]{{2}}-[[:digit:]]{{2}} [[:digit:]]{{2}}:[[:digit:]]{{2}}:[[:digit:]]{{2}}.[[:digit:]]{{3}} GMT,")
-
+# Replace {log_directory} with the directory containing the log files
+input(type="imfile" File="{log_directory}/*.log" Tag="{tag}" Severity="info" Facility="local0")
 # the directory to store rsyslog state files
 global(workDirectory="/var/log/rsyslog")

-# Construct json, endpoint_id and project_id as additional metadata
-set $.json_log!endpoint_id = "{endpoint_id}";
-set $.json_log!project_id = "{project_id}";
-set $.json_log!msg = $msg;
-
-# Template suitable for rfc5424 syslog format
-template(name="PgAuditLog" type="string"
-    string="<%PRI%>1 %TIMESTAMP:::date-rfc3339% %HOSTNAME% - - - - %$.json_log%")
-
-# Forward to remote syslog receiver (@@<hostname>:<port>;format
-local5.info @@{remote_endpoint};PgAuditLog
+# Forward logs to remote syslog server
+*.* @@{remote_endpoint}
--- a/compute_tools/src/extension_server.rs
+++ b/compute_tools/src/extension_server.rs
@@ -83,7 +83,6 @@ use reqwest::StatusCode;
 use tar::Archive;
 use tracing::info;
 use tracing::log::warn;
-use url::Url;
 use zstd::stream::read::Decoder;

 use crate::metrics::{REMOTE_EXT_REQUESTS_TOTAL, UNKNOWN_HTTP_STATUS};
@@ -159,7 +158,7 @@ fn parse_pg_version(human_version: &str) -> PostgresMajorVersion {
 pub async fn download_extension(
    ext_name: &str,
    ext_path: &RemotePath,
-    remote_ext_base_url: &Url,
+    remote_ext_base_url: &str,
    pgbin: &str,
 ) -> Result<u64> {
    info!("Download extension {:?} from {:?}", ext_name, ext_path);
@@ -271,14 +270,10 @@ pub fn create_control_files(remote_extensions: &RemoteExtSpec, pgbin: &str) {
 }

 // Do request to extension storage proxy, e.g.,
-// curl http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local/latest/v15/extensions/anon.tar.zst
+// curl http://pg-ext-s3-gateway/latest/v15/extensions/anon.tar.zst
 // using HTTP GET and return the response body as bytes.
-async fn download_extension_tar(remote_ext_base_url: &Url, ext_path: &str) -> Result<Bytes> {
-    let uri = remote_ext_base_url.join(ext_path).with_context(|| {
-        format!(
-            "failed to create the remote extension URI for {ext_path} using {remote_ext_base_url}"
-        )
-    })?;
+async fn download_extension_tar(remote_ext_base_url: &str, ext_path: &str) -> Result<Bytes> {
+    let uri = format!("{}/{}", remote_ext_base_url, ext_path);
    let filename = Path::new(ext_path)
        .file_name()
        .unwrap_or_else(|| std::ffi::OsStr::new("unknown"))
@@ -288,7 +283,7 @@ async fn download_extension_tar(remote_ext_base_url: &Url, ext_path: &str) -> Re

    info!("Downloading extension file '{}' from uri {}", filename, uri);

-    match do_extension_server_request(uri).await {
+    match do_extension_server_request(&uri).await {
        Ok(resp) => {
            info!("Successfully downloaded remote extension data {}", ext_path);
            REMOTE_EXT_REQUESTS_TOTAL
@@ -307,7 +302,7 @@ async fn download_extension_tar(remote_ext_base_url: &Url, ext_path: &str) -> Re

 // Do a single remote extensions server request.
 // Return result or (error message + stringified status code) in case of any failures.
-async fn do_extension_server_request(uri: Url) -> Result<Bytes, (String, String)> {
+async fn do_extension_server_request(uri: &str) -> Result<Bytes, (String, String)> {
    let resp = reqwest::get(uri).await.map_err(|e| {
        (
            format!(
--- a/compute_tools/src/http/mod.rs
+++ b/compute_tools/src/http/mod.rs
@@ -48,9 +48,11 @@ impl JsonResponse {

    /// Create an error response related to the compute being in an invalid state
    pub(self) fn invalid_status(status: ComputeStatus) -> Response {
-        Self::error(
+        Self::create_response(
            StatusCode::PRECONDITION_FAILED,
-            format!("invalid compute status: {status}"),
+            &GenericAPIError {
+                error: format!("invalid compute status: {status}"),
+            },
        )
    }
 }
--- a/compute_tools/src/http/routes/configure.rs
+++ b/compute_tools/src/http/routes/configure.rs
@@ -22,7 +22,7 @@ pub(in crate::http) async fn configure(
    State(compute): State<Arc<ComputeNode>>,
    request: Json<ConfigurationRequest>,
 ) -> Response {
-    let pspec = match ParsedSpec::try_from(request.0.spec) {
+    let pspec = match ParsedSpec::try_from(request.spec.clone()) {
        Ok(p) => p,
        Err(e) => return JsonResponse::error(StatusCode::BAD_REQUEST, e),
    };
--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -13,12 +13,6 @@ use crate::metrics::{PG_CURR_DOWNTIME_MS, PG_TOTAL_DOWNTIME_MS};

 const MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500);

-/// Struct to store runtime state of the compute monitor thread.
-/// In theory, this could be a part of `Compute`, but i)
-/// this state is expected to be accessed only by single thread,
-/// so we don't need to care about locking; ii) `Compute` is
-/// already quite big. Thus, it seems to be a good idea to keep
-/// all the activity/health monitoring parts here.
 struct ComputeMonitor {
    compute: Arc<ComputeNode>,

@@ -76,36 +70,12 @@ impl ComputeMonitor {
        )
    }

-    /// Check if compute is in some terminal or soon-to-be-terminal
-    /// state, then return `true`, signalling the caller that it
-    /// should exit gracefully. Otherwise, return `false`.
-    fn check_interrupts(&mut self) -> bool {
-        let compute_status = self.compute.get_status();
-        if matches!(
-            compute_status,
-            ComputeStatus::Terminated | ComputeStatus::TerminationPending | ComputeStatus::Failed
-        ) {
-            info!(
-                "compute is in {} status, stopping compute monitor",
-                compute_status
-            );
-            return true;
-        }
-
-        false
-    }
-
    /// Spin in a loop and figure out the last activity time in the Postgres.
-    /// Then update it in the shared state. This function currently never
-    /// errors out explicitly, but there is a graceful termination path.
-    /// Every time we receive an error trying to check Postgres, we use
-    /// [`ComputeMonitor::check_interrupts()`] because it could be that
-    /// compute is being terminated already, then we can exit gracefully
-    /// to not produce errors' noise in the log.
+    /// Then update it in the shared state. This function never errors out.
    /// NB: the only expected panic is at `Mutex` unwrap(), all other errors
    /// should be handled gracefully.
    #[instrument(skip_all)]
-    pub fn run(&mut self) -> anyhow::Result<()> {
+    pub fn run(&mut self) {
        // Suppose that `connstr` doesn't change
        let connstr = self.compute.params.connstr.clone();
        let conf = self
@@ -123,10 +93,6 @@ impl ComputeMonitor {
        info!("starting compute monitor for {}", connstr);

        loop {
-            if self.check_interrupts() {
-                break;
-            }
-
            match &mut client {
                Ok(cli) => {
                    if cli.is_closed() {
@@ -134,10 +100,6 @@ impl ComputeMonitor {
                            downtime_info = self.downtime_info(),
                            "connection to Postgres is closed, trying to reconnect"
                        );
-                        if self.check_interrupts() {
-                            break;
-                        }
-
                        self.report_down();

                        // Connection is closed, reconnect and try again.
@@ -149,19 +111,15 @@ impl ComputeMonitor {
                                self.compute.update_last_active(self.last_active);
                            }
                            Err(e) => {
-                                error!(
-                                    downtime_info = self.downtime_info(),
-                                    "could not check Postgres: {}", e
-                                );
-                                if self.check_interrupts() {
-                                    break;
-                                }
-
                                // Although we have many places where we can return errors in `check()`,
                                // normally it shouldn't happen. I.e., we will likely return error if
                                // connection got broken, query timed out, Postgres returned invalid data, etc.
                                // In all such cases it's suspicious, so let's report this as downtime.
                                self.report_down();
+                                error!(
+                                    downtime_info = self.downtime_info(),
+                                    "could not check Postgres: {}", e
+                                );

                                // Reconnect to Postgres just in case. During tests, I noticed
                                // that queries in `check()` can fail with `connection closed`,
@@ -178,10 +136,6 @@ impl ComputeMonitor {
                        downtime_info = self.downtime_info(),
                        "could not connect to Postgres: {}, retrying", e
                    );
-                    if self.check_interrupts() {
-                        break;
-                    }
-
                    self.report_down();

                    // Establish a new connection and try again.
@@ -193,9 +147,6 @@ impl ComputeMonitor {
            self.last_checked = Utc::now();
            thread::sleep(MONITOR_CHECK_INTERVAL);
        }
-
-        // Graceful termination path
-        Ok(())
    }

    #[instrument(skip_all)]
@@ -478,10 +429,7 @@ pub fn launch_monitor(compute: &Arc<ComputeNode>) -> thread::JoinHandle<()> {
        .spawn(move || {
            let span = span!(Level::INFO, "compute_monitor");
            let _enter = span.enter();
-            match monitor.run() {
-                Ok(_) => info!("compute monitor thread terminated gracefully"),
-                Err(err) => error!("compute monitor thread terminated abnormally {:?}", err),
-            }
+            monitor.run();
        })
        .expect("cannot launch compute monitor thread")
 }
--- a/compute_tools/src/rsyslog.rs
+++ b/compute_tools/src/rsyslog.rs
@@ -27,40 +27,6 @@ fn get_rsyslog_pid() -> Option<String> {
    }
 }

-fn wait_for_rsyslog_pid() -> Result<String, anyhow::Error> {
-    const MAX_WAIT: Duration = Duration::from_secs(5);
-    const INITIAL_SLEEP: Duration = Duration::from_millis(2);
-
-    let mut sleep_duration = INITIAL_SLEEP;
-    let start = std::time::Instant::now();
-    let mut attempts = 1;
-
-    for attempt in 1.. {
-        attempts = attempt;
-        match get_rsyslog_pid() {
-            Some(pid) => return Ok(pid),
-            None => {
-                if start.elapsed() >= MAX_WAIT {
-                    break;
-                }
-                info!(
-                    "rsyslogd is not running, attempt {}. Sleeping for {} ms",
-                    attempt,
-                    sleep_duration.as_millis()
-                );
-                std::thread::sleep(sleep_duration);
-                sleep_duration *= 2;
-            }
-        }
-    }
-
-    Err(anyhow::anyhow!(
-        "rsyslogd is not running after waiting for {} seconds and {} attempts",
-        attempts,
-        start.elapsed().as_secs()
-    ))
-}
-
 // Restart rsyslogd to apply the new configuration.
 // This is necessary, because there is no other way to reload the rsyslog configuration.
 //
@@ -70,29 +36,27 @@ fn wait_for_rsyslog_pid() -> Result<String, anyhow::Error> {
 // TODO: test it properly
 //
 fn restart_rsyslog() -> Result<()> {
+    let old_pid = get_rsyslog_pid().context("rsyslogd is not running")?;
+    info!("rsyslogd is running with pid: {}, restart it", old_pid);
+
    // kill it to restart
    let _ = Command::new("pkill")
        .arg("rsyslogd")
        .output()
-        .context("Failed to restart rsyslogd")?;
-
-    // ensure rsyslogd is running
-    wait_for_rsyslog_pid()?;
+        .context("Failed to stop rsyslogd")?;

    Ok(())
 }

 pub fn configure_audit_rsyslog(
    log_directory: String,
-    endpoint_id: &str,
-    project_id: &str,
+    tag: Option<String>,
    remote_endpoint: &str,
 ) -> Result<()> {
    let config_content: String = format!(
        include_str!("config_template/compute_audit_rsyslog_template.conf"),
        log_directory = log_directory,
-        endpoint_id = endpoint_id,
-        project_id = project_id,
+        tag = tag.unwrap_or("".to_string()),
        remote_endpoint = remote_endpoint
    );

@@ -167,11 +131,15 @@ pub fn configure_postgres_logs_export(conf: PostgresLogsRsyslogConfig) -> Result
        return Ok(());
    }

-    // Nothing to configure
+    // When new config is empty we can simply remove the configuration file.
    if new_config.is_empty() {
-        // When the configuration is removed, PostgreSQL will stop sending data
-        // to the files watched by rsyslog, so restarting rsyslog is more effort
-        // than just ignoring this change.
+        info!("removing rsyslog config file: {}", POSTGRES_LOGS_CONF_PATH);
+        match std::fs::remove_file(POSTGRES_LOGS_CONF_PATH) {
+            Ok(_) => {}
+            Err(err) if err.kind() == ErrorKind::NotFound => {}
+            Err(err) => return Err(err.into()),
+        }
+        restart_rsyslog()?;
        return Ok(());
    }

--- a/compute_tools/tests/pg_helpers_tests.rs
+++ b/compute_tools/tests/pg_helpers_tests.rs
@@ -30,7 +30,7 @@ mod pg_helpers_tests {
            r#"fsync = off
 wal_level = logical
 hot_standby = on
-autoprewarm = off
+prewarm_lfc_on_startup = off
 neon.safekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'
 wal_log_hints = on
 log_connections = on
--- a/control_plane/safekeepers.conf
+++ b/control_plane/safekeepers.conf
@@ -2,10 +2,8 @@
 [pageserver]
 listen_pg_addr = '127.0.0.1:64000'
 listen_http_addr = '127.0.0.1:9898'
-listen_grpc_addr = '127.0.0.1:51051'
 pg_auth_type = 'Trust'
 http_auth_type = 'Trust'
-grpc_auth_type = 'Trust'

 [[safekeepers]]
 id = 1
--- a/control_plane/simple.conf
+++ b/control_plane/simple.conf
@@ -4,10 +4,8 @@
 id=1
 listen_pg_addr = '127.0.0.1:64000'
 listen_http_addr = '127.0.0.1:9898'
-listen_grpc_addr = '127.0.0.1:51051'
 pg_auth_type = 'Trust'
 http_auth_type = 'Trust'
-grpc_auth_type = 'Trust'

 [[safekeepers]]
 id = 1
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -18,7 +18,7 @@ use clap::Parser;
 use compute_api::requests::ComputeClaimsScope;
 use compute_api::spec::ComputeMode;
 use control_plane::broker::StorageBroker;
-use control_plane::endpoint::{ComputeControlPlane, PageserverProtocol};
+use control_plane::endpoint::ComputeControlPlane;
 use control_plane::endpoint_storage::{ENDPOINT_STORAGE_DEFAULT_ADDR, EndpointStorage};
 use control_plane::local_env;
 use control_plane::local_env::{
@@ -32,7 +32,6 @@ use control_plane::storage_controller::{
 };
 use nix::fcntl::{Flock, FlockArg};
 use pageserver_api::config::{
-    DEFAULT_GRPC_LISTEN_PORT as DEFAULT_PAGESERVER_GRPC_PORT,
    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
    DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
 };
@@ -664,10 +663,6 @@ struct EndpointStartCmdArgs {
    #[clap(short = 't', long, value_parser= humantime::parse_duration, help = "timeout until we fail the command")]
    #[arg(default_value = "90s")]
    start_timeout: Duration,
-
-    /// If enabled, use gRPC (and the communicator) to talk to Pageservers.
-    #[clap(long)]
-    grpc: bool,
 }

 #[derive(clap::Args)]
@@ -686,10 +681,6 @@ struct EndpointReconfigureCmdArgs {

    #[clap(long)]
    safekeepers: Option<String>,
-
-    /// If enabled, use gRPC (and communicator) to talk to Pageservers.
-    #[clap(long)]
-    grpc: bool,
 }

 #[derive(clap::Args)]
@@ -1016,16 +1007,13 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
                    let pageserver_id = NodeId(DEFAULT_PAGESERVER_ID.0 + i as u64);
                    let pg_port = DEFAULT_PAGESERVER_PG_PORT + i;
                    let http_port = DEFAULT_PAGESERVER_HTTP_PORT + i;
-                    let grpc_port = DEFAULT_PAGESERVER_GRPC_PORT + i;
                    NeonLocalInitPageserverConf {
                        id: pageserver_id,
                        listen_pg_addr: format!("127.0.0.1:{pg_port}"),
                        listen_http_addr: format!("127.0.0.1:{http_port}"),
                        listen_https_addr: None,
-                        listen_grpc_addr: Some(format!("127.0.0.1:{grpc_port}")),
                        pg_auth_type: AuthType::Trust,
                        http_auth_type: AuthType::Trust,
-                        grpc_auth_type: AuthType::Trust,
                        other: Default::default(),
                        // Typical developer machines use disks with slow fsync, and we don't care
                        // about data integrity: disable disk syncs.
@@ -1287,7 +1275,6 @@ async fn handle_timeline(cmd: &TimelineCmd, env: &mut local_env::LocalEnv) -> Re
                mode: pageserver_api::models::TimelineCreateRequestMode::Branch {
                    ancestor_timeline_id,
                    ancestor_start_lsn: start_lsn,
-                    read_only: false,
                    pg_version: None,
                },
            };
@@ -1460,18 +1447,13 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res

            let (pageservers, stripe_size) = if let Some(pageserver_id) = pageserver_id {
                let conf = env.get_pageserver_conf(pageserver_id).unwrap();
-                // Use gRPC if requested.
-                let (protocol, host, port) = if args.grpc {
-                    let grpc_addr = conf.listen_grpc_addr.as_ref().expect("bad config");
-                    let (host, port) = parse_host_port(grpc_addr).expect("bad config");
-                    (PageserverProtocol::Grpc, host, port.unwrap_or(51051))
-                } else {
-                    let (host, port) = parse_host_port(&conf.listen_pg_addr).expect("bad config");
-                    (PageserverProtocol::Libpq, host, port.unwrap_or(5432))
-                };
-                // If caller is telling us what pageserver to use, this is not a tenant which is
-                // fully managed by storage controller, therefore not sharded.
-                (vec![(protocol, host, port)], DEFAULT_STRIPE_SIZE)
+                let parsed = parse_host_port(&conf.listen_pg_addr).expect("Bad config");
+                (
+                    vec![(parsed.0, parsed.1.unwrap_or(5432))],
+                    // If caller is telling us what pageserver to use, this is not a tenant which is
+                    // full managed by storage controller, therefore not sharded.
+                    DEFAULT_STRIPE_SIZE,
+                )
            } else {
                // Look up the currently attached location of the tenant, and its striping metadata,
                // to pass these on to postgres.
@@ -1490,22 +1472,11 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
                                .await?;
                        }

-                        let pageserver = if args.grpc {
-                            (
-                                PageserverProtocol::Grpc,
-                                Host::parse(&shard.listen_grpc_addr.expect("no gRPC addr"))
-                                    .expect("bad hostname"),
-                                shard.listen_grpc_port.expect("no gRPC port"),
-                            )
-                        } else {
-                            (
-                                PageserverProtocol::Libpq,
-                                Host::parse(&shard.listen_pg_addr).expect("bad hostname"),
-                                shard.listen_pg_port,
-                            )
-                        };
-
-                        anyhow::Ok(pageserver)
+                        anyhow::Ok((
+                            Host::parse(&shard.listen_pg_addr)
+                                .expect("Storage controller reported bad hostname"),
+                            shard.listen_pg_port,
+                        ))
                    }),
                )
                .await?;
@@ -1560,17 +1531,11 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
                .get(endpoint_id.as_str())
                .with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
            let pageservers = if let Some(ps_id) = args.endpoint_pageserver_id {
-                let conf = env.get_pageserver_conf(ps_id)?;
-                // Use gRPC if requested.
-                let (protocol, host, port) = if args.grpc {
-                    let grpc_addr = conf.listen_grpc_addr.as_ref().expect("bad config");
-                    let (host, port) = parse_host_port(grpc_addr).expect("bad config");
-                    (PageserverProtocol::Grpc, host, port.unwrap_or(51051))
-                } else {
-                    let (host, port) = parse_host_port(&conf.listen_pg_addr).expect("bad config");
-                    (PageserverProtocol::Libpq, host, port.unwrap_or(5432))
-                };
-                vec![(protocol, host, port)]
+                let pageserver = PageServerNode::from_env(env, env.get_pageserver_conf(ps_id)?);
+                vec![(
+                    pageserver.pg_connection_config.host().clone(),
+                    pageserver.pg_connection_config.port(),
+                )]
            } else {
                let storage_controller = StorageController::from_env(env);
                storage_controller
@@ -1579,20 +1544,11 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
                    .shards
                    .into_iter()
                    .map(|shard| {
-                        if args.grpc {
-                            (
-                                PageserverProtocol::Grpc,
-                                Host::parse(&shard.listen_grpc_addr.expect("no gRPC addr"))
-                                    .expect("bad hostname"),
-                                shard.listen_grpc_port.expect("no gRPC port"),
-                            )
-                        } else {
-                            (
-                                PageserverProtocol::Libpq,
-                                Host::parse(&shard.listen_pg_addr).expect("bad hostname"),
-                                shard.listen_pg_port,
-                            )
-                        }
+                        (
+                            Host::parse(&shard.listen_pg_addr)
+                                .expect("Storage controller reported malformed host"),
+                            shard.listen_pg_port,
+                        )
                    })
                    .collect::<Vec<_>>()
            };
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -37,7 +37,6 @@
 //! ```
 //!
 use std::collections::BTreeMap;
-use std::fmt::Display;
 use std::net::{IpAddr, Ipv4Addr, SocketAddr, TcpStream};
 use std::path::PathBuf;
 use std::process::Command;
@@ -75,6 +74,7 @@ use utils::id::{NodeId, TenantId, TimelineId};

 use crate::local_env::LocalEnv;
 use crate::postgresql_conf::PostgresConf;
+use crate::storage_controller::StorageController;

 // contents of a endpoint.json file
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
@@ -331,7 +331,7 @@ pub enum EndpointStatus {
    RunningNoPidfile,
 }

-impl Display for EndpointStatus {
+impl std::fmt::Display for EndpointStatus {
    fn fmt(&self, writer: &mut std::fmt::Formatter) -> std::fmt::Result {
        let s = match self {
            Self::Running => "running",
@@ -343,28 +343,6 @@ impl Display for EndpointStatus {
    }
 }

-#[derive(Clone, Copy, Debug)]
-pub enum PageserverProtocol {
-    Libpq,
-    Grpc,
-}
-
-impl PageserverProtocol {
-    /// Returns the URL scheme for the protocol, used in connstrings.
-    pub fn scheme(&self) -> &'static str {
-        match self {
-            Self::Libpq => "postgresql",
-            Self::Grpc => "grpc",
-        }
-    }
-}
-
-impl Display for PageserverProtocol {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.write_str(self.scheme())
-    }
-}
-
 impl Endpoint {
    fn from_dir_entry(entry: std::fs::DirEntry, env: &LocalEnv) -> Result<Endpoint> {
        if !entry.file_type()?.is_dir() {
@@ -628,10 +606,10 @@ impl Endpoint {
        }
    }

-    fn build_pageserver_connstr(pageservers: &[(PageserverProtocol, Host, u16)]) -> String {
+    fn build_pageserver_connstr(pageservers: &[(Host, u16)]) -> String {
        pageservers
            .iter()
-            .map(|(scheme, host, port)| format!("{scheme}://no_user@{host}:{port}"))
+            .map(|(host, port)| format!("postgresql://no_user@{host}:{port}"))
            .collect::<Vec<_>>()
            .join(",")
    }
@@ -676,7 +654,7 @@ impl Endpoint {
        endpoint_storage_addr: String,
        safekeepers_generation: Option<SafekeeperGeneration>,
        safekeepers: Vec<NodeId>,
-        pageservers: Vec<(PageserverProtocol, Host, u16)>,
+        pageservers: Vec<(Host, u16)>,
        remote_ext_base_url: Option<&String>,
        shard_stripe_size: usize,
        create_test_user: bool,
@@ -769,7 +747,7 @@ impl Endpoint {
                logs_export_host: None::<String>,
                endpoint_storage_addr: Some(endpoint_storage_addr),
                endpoint_storage_token: Some(endpoint_storage_token),
-                autoprewarm: false,
+                prewarm_lfc_on_startup: false,
            };

            // this strange code is needed to support respec() in tests
@@ -961,12 +939,10 @@ impl Endpoint {

    pub async fn reconfigure(
        &self,
-        pageservers: Vec<(PageserverProtocol, Host, u16)>,
+        mut pageservers: Vec<(Host, u16)>,
        stripe_size: Option<ShardStripeSize>,
        safekeepers: Option<Vec<NodeId>>,
    ) -> Result<()> {
-        anyhow::ensure!(!pageservers.is_empty(), "no pageservers provided");
-
        let (mut spec, compute_ctl_config) = {
            let config_path = self.endpoint_path().join("config.json");
            let file = std::fs::File::open(config_path)?;
@@ -978,7 +954,25 @@ impl Endpoint {
        let postgresql_conf = self.read_postgresql_conf()?;
        spec.cluster.postgresql_conf = Some(postgresql_conf);

+        // If we weren't given explicit pageservers, query the storage controller
+        if pageservers.is_empty() {
+            let storage_controller = StorageController::from_env(&self.env);
+            let locate_result = storage_controller.tenant_locate(self.tenant_id).await?;
+            pageservers = locate_result
+                .shards
+                .into_iter()
+                .map(|shard| {
+                    (
+                        Host::parse(&shard.listen_pg_addr)
+                            .expect("Storage controller reported bad hostname"),
+                        shard.listen_pg_port,
+                    )
+                })
+                .collect::<Vec<_>>();
+        }
+
        let pageserver_connstr = Self::build_pageserver_connstr(&pageservers);
+        assert!(!pageserver_connstr.is_empty());
        spec.pageserver_connstring = Some(pageserver_connstr);
        if stripe_size.is_some() {
            spec.shard_stripe_size = stripe_size.map(|s| s.0 as usize);
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -278,10 +278,8 @@ pub struct PageServerConf {
    pub listen_pg_addr: String,
    pub listen_http_addr: String,
    pub listen_https_addr: Option<String>,
-    pub listen_grpc_addr: Option<String>,
    pub pg_auth_type: AuthType,
    pub http_auth_type: AuthType,
-    pub grpc_auth_type: AuthType,
    pub no_sync: bool,
 }

@@ -292,10 +290,8 @@ impl Default for PageServerConf {
            listen_pg_addr: String::new(),
            listen_http_addr: String::new(),
            listen_https_addr: None,
-            listen_grpc_addr: None,
            pg_auth_type: AuthType::Trust,
            http_auth_type: AuthType::Trust,
-            grpc_auth_type: AuthType::Trust,
            no_sync: false,
        }
    }
@@ -310,10 +306,8 @@ pub struct NeonLocalInitPageserverConf {
    pub listen_pg_addr: String,
    pub listen_http_addr: String,
    pub listen_https_addr: Option<String>,
-    pub listen_grpc_addr: Option<String>,
    pub pg_auth_type: AuthType,
    pub http_auth_type: AuthType,
-    pub grpc_auth_type: AuthType,
    #[serde(default, skip_serializing_if = "std::ops::Not::not")]
    pub no_sync: bool,
    #[serde(flatten)]
@@ -327,10 +321,8 @@ impl From<&NeonLocalInitPageserverConf> for PageServerConf {
            listen_pg_addr,
            listen_http_addr,
            listen_https_addr,
-            listen_grpc_addr,
            pg_auth_type,
            http_auth_type,
-            grpc_auth_type,
            no_sync,
            other: _,
        } = conf;
@@ -339,9 +331,7 @@ impl From<&NeonLocalInitPageserverConf> for PageServerConf {
            listen_pg_addr: listen_pg_addr.clone(),
            listen_http_addr: listen_http_addr.clone(),
            listen_https_addr: listen_https_addr.clone(),
-            listen_grpc_addr: listen_grpc_addr.clone(),
            pg_auth_type: *pg_auth_type,
-            grpc_auth_type: *grpc_auth_type,
            http_auth_type: *http_auth_type,
            no_sync: *no_sync,
        }
@@ -717,10 +707,8 @@ impl LocalEnv {
                    listen_pg_addr: String,
                    listen_http_addr: String,
                    listen_https_addr: Option<String>,
-                    listen_grpc_addr: Option<String>,
                    pg_auth_type: AuthType,
                    http_auth_type: AuthType,
-                    grpc_auth_type: AuthType,
                    #[serde(default)]
                    no_sync: bool,
                }
@@ -744,10 +732,8 @@ impl LocalEnv {
                    listen_pg_addr,
                    listen_http_addr,
                    listen_https_addr,
-                    listen_grpc_addr,
                    pg_auth_type,
                    http_auth_type,
-                    grpc_auth_type,
                    no_sync,
                } = config_toml;
                let IdentityTomlSubset {
@@ -764,10 +750,8 @@ impl LocalEnv {
                    listen_pg_addr,
                    listen_http_addr,
                    listen_https_addr,
-                    listen_grpc_addr,
                    pg_auth_type,
                    http_auth_type,
-                    grpc_auth_type,
                    no_sync,
                };
                pageservers.push(conf);
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -129,9 +129,7 @@ impl PageServerNode {
            ));
        }

-        if [conf.http_auth_type, conf.pg_auth_type, conf.grpc_auth_type]
-            .contains(&AuthType::NeonJWT)
-        {
+        if conf.http_auth_type != AuthType::Trust || conf.pg_auth_type != AuthType::Trust {
            // Keys are generated in the toplevel repo dir, pageservers' workdirs
            // are one level below that, so refer to keys with ../
            overrides.push("auth_validation_public_key_path='../auth_public_key.pem'".to_owned());
@@ -265,14 +263,6 @@ impl PageServerNode {
            None => None,
        };

-        let mut grpc_host = None;
-        let mut grpc_port = None;
-        if let Some(grpc_addr) = &self.conf.listen_grpc_addr {
-            let (_, port) = parse_host_port(grpc_addr).expect("Unable to parse listen_grpc_addr");
-            grpc_host = Some("localhost".to_string());
-            grpc_port = Some(port.unwrap_or(51051));
-        }
-
        // Intentionally hand-craft JSON: this acts as an implicit format compat test
        // in case the pageserver-side structure is edited, and reflects the real life
        // situation: the metadata is written by some other script.
@@ -281,8 +271,6 @@ impl PageServerNode {
            serde_json::to_vec(&pageserver_api::config::NodeMetadata {
                postgres_host: "localhost".to_string(),
                postgres_port: self.pg_connection_config.port(),
-                grpc_host,
-                grpc_port,
                http_host: "localhost".to_string(),
                http_port,
                https_port,
@@ -523,6 +511,11 @@ impl PageServerNode {
                .map(|x| x.parse::<bool>())
                .transpose()
                .context("Failed to parse 'timeline_offloading' as bool")?,
+            wal_receiver_protocol_override: settings
+                .remove("wal_receiver_protocol_override")
+                .map(serde_json::from_str)
+                .transpose()
+                .context("parse `wal_receiver_protocol_override` from json")?,
            rel_size_v2_enabled: settings
                .remove("rel_size_v2_enabled")
                .map(|x| x.parse::<bool>())
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -37,11 +37,6 @@ enum Command {
        #[arg(long)]
        listen_pg_port: u16,

-        #[arg(long)]
-        listen_grpc_addr: Option<String>,
-        #[arg(long)]
-        listen_grpc_port: Option<u16>,
-
        #[arg(long)]
        listen_http_addr: String,
        #[arg(long)]
@@ -415,8 +410,6 @@ async fn main() -> anyhow::Result<()> {
            node_id,
            listen_pg_addr,
            listen_pg_port,
-            listen_grpc_addr,
-            listen_grpc_port,
            listen_http_addr,
            listen_http_port,
            listen_https_port,
@@ -430,8 +423,6 @@ async fn main() -> anyhow::Result<()> {
                        node_id,
                        listen_pg_addr,
                        listen_pg_port,
-                        listen_grpc_addr,
-                        listen_grpc_port,
                        listen_http_addr,
                        listen_http_port,
                        listen_https_port,
--- a/docker-compose/compute_wrapper/Dockerfile
+++ b/docker-compose/compute_wrapper/Dockerfile
@@ -13,6 +13,6 @@ RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
                       jq   \
                       netcat-openbsd
 #This is required for the pg_hintplan test
-RUN mkdir -p /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw /ext-src/postgis-src/ && chown postgres /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw /ext-src/postgis-src
+RUN mkdir -p /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw && chown postgres /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw

 USER postgres
--- a/docker-compose/compute_wrapper/shell/compute.sh
+++ b/docker-compose/compute_wrapper/shell/compute.sh
@@ -1,18 +1,18 @@
-#!/usr/bin/env bash
+#!/bin/bash
 set -eux

 # Generate a random tenant or timeline ID
 #
 # Takes a variable name as argument. The result is stored in that variable.
 generate_id() {
-    local -n resvar=${1}
-    printf -v resvar '%08x%08x%08x%08x' ${SRANDOM} ${SRANDOM} ${SRANDOM} ${SRANDOM}
+    local -n resvar=$1
+    printf -v resvar '%08x%08x%08x%08x' $SRANDOM $SRANDOM $SRANDOM $SRANDOM
 }

 PG_VERSION=${PG_VERSION:-14}

-readonly CONFIG_FILE_ORG=/var/db/postgres/configs/config.json
-readonly CONFIG_FILE=/tmp/config.json
+CONFIG_FILE_ORG=/var/db/postgres/configs/config.json
+CONFIG_FILE=/tmp/config.json

 # Test that the first library path that the dynamic loader looks in is the path
 # that we use for custom compiled software
@@ -20,17 +20,17 @@ first_path="$(ldconfig --verbose 2>/dev/null \
    | grep --invert-match ^$'\t' \
    | cut --delimiter=: --fields=1 \
    | head --lines=1)"
-test "${first_path}" = '/usr/local/lib'
+test "$first_path" == '/usr/local/lib' || true # Remove the || true in a follow-up PR. Needed for backwards compat.

 echo "Waiting pageserver become ready."
 while ! nc -z pageserver 6400; do
-     sleep 1
+     sleep 1;
 done
 echo "Page server is ready."

-cp "${CONFIG_FILE_ORG}" "${CONFIG_FILE}"
+cp ${CONFIG_FILE_ORG} ${CONFIG_FILE}

- if [[ -n "${TENANT_ID:-}" && -n "${TIMELINE_ID:-}" ]]; then
+ if [ -n "${TENANT_ID:-}" ] && [ -n "${TIMELINE_ID:-}" ]; then
   tenant_id=${TENANT_ID}
   timeline_id=${TIMELINE_ID}
 else
@@ -41,7 +41,7 @@ else
       "http://pageserver:9898/v1/tenant"
  )
  tenant_id=$(curl "${PARAMS[@]}" | jq -r .[0].id)
-  if [[ -z "${tenant_id}" || "${tenant_id}" = null ]]; then
+  if [ -z "${tenant_id}" ] || [ "${tenant_id}" = null ]; then
    echo "Create a tenant"
    generate_id tenant_id
    PARAMS=(
@@ -51,7 +51,7 @@ else
        "http://pageserver:9898/v1/tenant/${tenant_id}/location_config"
    )
    result=$(curl "${PARAMS[@]}")
-    printf '%s\n' "${result}" | jq .
+    echo $result | jq .
  fi

  echo "Check if a timeline present"
@@ -61,7 +61,7 @@ else
       "http://pageserver:9898/v1/tenant/${tenant_id}/timeline"
  )
  timeline_id=$(curl "${PARAMS[@]}" | jq -r .[0].timeline_id)
-  if [[ -z "${timeline_id}" || "${timeline_id}" = null ]]; then
+  if [ -z "${timeline_id}" ] || [ "${timeline_id}" = null ]; then
    generate_id timeline_id
    PARAMS=(
        -sbf
@@ -71,7 +71,7 @@ else
        "http://pageserver:9898/v1/tenant/${tenant_id}/timeline/"
    )
    result=$(curl "${PARAMS[@]}")
-    printf '%s\n' "${result}" | jq .
+    echo $result | jq .
  fi
 fi

@@ -82,10 +82,10 @@ else
 fi
 echo "Adding pgx_ulid"
 shared_libraries=$(jq -r '.spec.cluster.settings[] | select(.name=="shared_preload_libraries").value' ${CONFIG_FILE})
-sed -i "s|${shared_libraries}|${shared_libraries},${ulid_extension}|" ${CONFIG_FILE}
+sed -i "s/${shared_libraries}/${shared_libraries},${ulid_extension}/" ${CONFIG_FILE}
 echo "Overwrite tenant id and timeline id in spec file"
-sed -i "s|TENANT_ID|${tenant_id}|" ${CONFIG_FILE}
-sed -i "s|TIMELINE_ID|${timeline_id}|" ${CONFIG_FILE}
+sed -i "s/TENANT_ID/${tenant_id}/" ${CONFIG_FILE}
+sed -i "s/TIMELINE_ID/${timeline_id}/" ${CONFIG_FILE}

 cat ${CONFIG_FILE}

@@ -93,5 +93,5 @@ echo "Start compute node"
 /usr/local/bin/compute_ctl --pgdata /var/db/postgres/compute \
     -C "postgresql://cloud_admin@localhost:55433/postgres"  \
     -b /usr/local/bin/postgres                              \
-     --compute-id "compute-${RANDOM}"                          \
-     --config "${CONFIG_FILE}"
+     --compute-id "compute-$RANDOM"                          \
+     --config "$CONFIG_FILE"
--- a/docker-compose/docker-compose.yml
+++ b/docker-compose/docker-compose.yml
@@ -186,14 +186,13 @@ services:

  neon-test-extensions:
    profiles: ["test-extensions"]
-    image: ${REPOSITORY:-ghcr.io/neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-${PG_VERSION:-16}}:${TEST_EXTENSIONS_TAG:-${TAG:-latest}}
+    image: ${REPOSITORY:-ghcr.io/neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TEST_EXTENSIONS_TAG:-${TAG:-latest}}
    environment:
-      - PGUSER=${PGUSER:-cloud_admin}
-      - PGPASSWORD=${PGPASSWORD:-cloud_admin}
+      - PGPASSWORD=cloud_admin
    entrypoint:
      - "/bin/bash"
      - "-c"
    command:
-      - sleep 3600
+      - sleep 1800
    depends_on:
      - compute
--- a/docker-compose/docker_compose_test.sh
+++ b/docker-compose/docker_compose_test.sh
@@ -54,15 +54,6 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
        # It cannot be moved to Dockerfile now because the database directory is created after the start of the container
        echo Adding dummy config
        docker compose exec compute touch /var/db/postgres/compute/compute_ctl_temp_override.conf
-        # Prepare for the PostGIS test
-        docker compose exec compute mkdir -p /tmp/pgis_reg/pgis_reg_tmp
-        TMPDIR=$(mktemp -d)
-        docker compose cp neon-test-extensions:/ext-src/postgis-src/raster/test "${TMPDIR}"
-        docker compose cp neon-test-extensions:/ext-src/postgis-src/regress/00-regress-install "${TMPDIR}"
-        docker compose exec compute mkdir -p /ext-src/postgis-src/raster /ext-src/postgis-src/regress /ext-src/postgis-src/regress/00-regress-install
-        docker compose cp "${TMPDIR}/test" compute:/ext-src/postgis-src/raster/test
-        docker compose cp "${TMPDIR}/00-regress-install" compute:/ext-src/postgis-src/regress
-        rm -rf "${TMPDIR}"
        # The following block copies the files for the pg_hintplan test to the compute node for the extension test in an isolated docker-compose environment
        TMPDIR=$(mktemp -d)
        docker compose cp neon-test-extensions:/ext-src/pg_hint_plan-src/data "${TMPDIR}/data"
@@ -77,7 +68,7 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
        docker compose exec -T neon-test-extensions bash -c "(cd /postgres && patch -p1)" <"../compute/patches/contrib_pg${pg_version}.patch"
        # We are running tests now
        rm -f testout.txt testout_contrib.txt
-        docker compose exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \
+        docker compose exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,postgis-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \
        neon-test-extensions /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
        docker compose exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
        neon-test-extensions /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0
--- a/docker-compose/ext-src/h3-pg-src/neon-test.sh
+++ b/docker-compose/ext-src/h3-pg-src/neon-test.sh
@@ -1,16 +0,0 @@
-#!/usr/bin/env bash
-set -ex
-cd "$(dirname "${0}")"
-PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
-dropdb --if-exists contrib_regression
-createdb contrib_regression
-cd h3_postgis/test
-psql -d contrib_regression -c "CREATE EXTENSION postgis" -c "CREATE EXTENSION postgis_raster" -c "CREATE EXTENSION h3" -c "CREATE EXTENSION h3_postgis"
-TESTS=$(echo sql/* | sed 's|sql/||g; s|\.sql||g')
-${PG_REGRESS} --use-existing --dbname contrib_regression ${TESTS}
-cd ../../h3/test
-TESTS=$(echo sql/* | sed 's|sql/||g; s|\.sql||g')
-dropdb --if-exists contrib_regression
-createdb contrib_regression
-psql -d contrib_regression -c "CREATE EXTENSION h3"
-${PG_REGRESS} --use-existing --dbname contrib_regression ${TESTS}
--- a/docker-compose/ext-src/h3-pg-src/test-upgrade.sh
+++ b/docker-compose/ext-src/h3-pg-src/test-upgrade.sh
@@ -1,7 +0,0 @@
-#!/bin/sh
-set -ex
-cd "$(dirname ${0})"
-PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
-cd h3/test
-TESTS=$(echo sql/* | sed 's|sql/||g; s|\.sql||g')
-${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'  --dbname=contrib_regression  ${TESTS}
--- a/docker-compose/ext-src/online_advisor-src/neon-test.sh
+++ b/docker-compose/ext-src/online_advisor-src/neon-test.sh
@@ -1,6 +0,0 @@
-#!/bin/sh
-set -ex
-cd "$(dirname "${0}")"
-if [ -f Makefile ]; then
-  make installcheck
-fi
--- a/docker-compose/ext-src/online_advisor-src/regular-test.sh
+++ b/docker-compose/ext-src/online_advisor-src/regular-test.sh
@@ -1,9 +0,0 @@
-#!/bin/sh
-set -ex
-cd "$(dirname ${0})"
-[ -f Makefile ] || exit 0
-dropdb --if-exist contrib_regression
-createdb contrib_regression
-PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
-TESTS=$(echo sql/* | sed 's|sql/||g; s|\.sql||g')
-${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression ${TESTS}
--- a/docker-compose/ext-src/postgis-src/README-Neon.md
+++ b/docker-compose/ext-src/postgis-src/README-Neon.md
@@ -1,70 +0,0 @@
-# PostGIS Testing in Neon
-
-This directory contains configuration files and patches for running PostGIS tests in the Neon database environment.
-
-## Overview
-
-PostGIS is a spatial database extension for PostgreSQL that adds support for geographic objects. Testing PostGIS compatibility ensures that Neon's modifications to PostgreSQL don't break compatibility with this critical extension.
-
-## PostGIS Versions
-
- PostgreSQL v17: PostGIS 3.5.0
- PostgreSQL v14/v15/v16: PostGIS 3.3.3
-
-## Test Configuration
-
-The test setup includes:
-
- `postgis-no-upgrade-test.patch`: Disables upgrade tests by removing the upgrade test section from regress/runtest.mk
- `postgis-regular-v16.patch`: Version-specific patch for PostgreSQL v16
- `postgis-regular-v17.patch`: Version-specific patch for PostgreSQL v17
- `regular-test.sh`: Script to run PostGIS tests as a regular user
- `neon-test.sh`: Script to handle version-specific test configurations
- `raster_outdb_template.sql`: Template for raster tests with explicit file paths
-
-## Excluded Tests
-
-**Important Note:** The test exclusions listed below are specifically for regular-user tests against staging instances. These exclusions are necessary because staging instances run with limited privileges and cannot perform operations requiring superuser access. Docker-compose based tests are not affected by these exclusions.
-
-### Tests Requiring Superuser Permissions
-
-These tests cannot be run as a regular user:
- `estimatedextent`
- `regress/core/legacy`
- `regress/core/typmod`
- `regress/loader/TestSkipANALYZE`
- `regress/loader/TestANALYZE`
-
-### Tests Requiring Filesystem Access
-
-These tests need direct filesystem access that is only possible for superusers:
- `loader/load_outdb`
-
-### Tests with Flaky Results
-
-These tests have assumptions that don't always hold true:
- `regress/core/computed_columns` - Assumes computed columns always outperform alternatives, which is not consistently true
-
-### Tests Requiring Tunable Parameter Modifications
-
-These tests attempt to modify the `postgis.gdal_enabled_drivers` parameter, which is only accessible to superusers:
- `raster/test/regress/rt_wkb`
- `raster/test/regress/rt_addband`
- `raster/test/regress/rt_setbandpath`
- `raster/test/regress/rt_fromgdalraster`
- `raster/test/regress/rt_asgdalraster`
- `raster/test/regress/rt_astiff`
- `raster/test/regress/rt_asjpeg`
- `raster/test/regress/rt_aspng`
- `raster/test/regress/permitted_gdal_drivers`
- Loader tests: `BasicOutDB`, `Tiled10x10`, `Tiled10x10Copy`, `Tiled8x8`, `TiledAuto`, `TiledAutoSkipNoData`, `TiledAutoCopyn`
-
-### Topology Tests (v17 only)
- `populate_topology_layer`
- `renametopogeometrycolumn`
-
-## Other Modifications
-
- Binary.sql tests are modified to use explicit file paths
- Server-side SQL COPY commands (which require superuser privileges) are converted to client-side `\copy` commands
- Upgrade tests are disabled
--- a/docker-compose/ext-src/postgis-src/neon-test.sh
+++ b/docker-compose/ext-src/postgis-src/neon-test.sh
@@ -1,9 +0,0 @@
-#!/bin/bash
-set -ex
-cd "$(dirname "$0")"
-if [[ ${PG_VERSION} = v17 ]]; then
-  sed -i '/computed_columns/d' regress/core/tests.mk
-fi
-patch -p1 <postgis-no-upgrade-test.patch
-trap 'echo Cleaning up; patch -R -p1 <postgis-no-upgrade-test.patch' EXIT
-make installcheck-base
--- a/docker-compose/ext-src/postgis-src/postgis-no-upgrade-test.patch
+++ b/docker-compose/ext-src/postgis-src/postgis-no-upgrade-test.patch
@@ -1,21 +0,0 @@
-diff --git a/regress/runtest.mk b/regress/runtest.mk
-index c051f03..010e493 100644
--- a/regress/runtest.mk
-+++ b/regress/runtest.mk
-@@ -24,16 +24,6 @@ check-regress:
- 
- 	POSTGIS_TOP_BUILD_DIR=$(abs_top_builddir) $(PERL) $(top_srcdir)/regress/run_test.pl $(RUNTESTFLAGS) $(RUNTESTFLAGS_INTERNAL) $(TESTS)
- 
-	@if echo "$(RUNTESTFLAGS)" | grep -vq -- --upgrade; then \
-		echo "Running upgrade test as RUNTESTFLAGS did not contain that"; \
-		POSTGIS_TOP_BUILD_DIR=$(abs_top_builddir) $(PERL) $(top_srcdir)/regress/run_test.pl \
-      --upgrade \
-      $(RUNTESTFLAGS) \
-      $(RUNTESTFLAGS_INTERNAL) \
-      $(TESTS); \
-	else \
-		echo "Skipping upgrade test as RUNTESTFLAGS already requested upgrades"; \
-	fi
- 
- check-long:
- 	$(PERL) $(top_srcdir)/regress/run_test.pl $(RUNTESTFLAGS) $(TESTS) $(TESTS_SLOW)
--- a/docker-compose/ext-src/postgis-src/postgis-regular-v16.patch
+++ b/docker-compose/ext-src/postgis-src/postgis-regular-v16.patch
@@ -1,198 +0,0 @@
-diff --git a/raster/test/regress/tests.mk b/raster/test/regress/tests.mk
-index 00918e1..7e2b6cd 100644
--- a/raster/test/regress/tests.mk
-+++ b/raster/test/regress/tests.mk
-@@ -17,9 +17,7 @@ override RUNTESTFLAGS_INTERNAL := \
-   $(RUNTESTFLAGS_INTERNAL) \
-   --after-upgrade-script $(top_srcdir)/raster/test/regress/hooks/hook-after-upgrade-raster.sql
- 
-RASTER_TEST_FIRST = \
-	$(top_srcdir)/raster/test/regress/check_gdal \
-	$(top_srcdir)/raster/test/regress/loader/load_outdb
-+RASTER_TEST_FIRST =
- 
- RASTER_TEST_LAST = \
- 	$(top_srcdir)/raster/test/regress/clean
-@@ -33,9 +31,7 @@ RASTER_TEST_IO = \
- 
- RASTER_TEST_BASIC_FUNC = \
- 	$(top_srcdir)/raster/test/regress/rt_bytea \
-	$(top_srcdir)/raster/test/regress/rt_wkb \
- 	$(top_srcdir)/raster/test/regress/box3d \
-	$(top_srcdir)/raster/test/regress/rt_addband \
- 	$(top_srcdir)/raster/test/regress/rt_band \
- 	$(top_srcdir)/raster/test/regress/rt_tile
- 
-@@ -73,16 +69,10 @@ RASTER_TEST_BANDPROPS = \
- 	$(top_srcdir)/raster/test/regress/rt_neighborhood \
- 	$(top_srcdir)/raster/test/regress/rt_nearestvalue \
- 	$(top_srcdir)/raster/test/regress/rt_pixelofvalue \
-	$(top_srcdir)/raster/test/regress/rt_polygon \
-	$(top_srcdir)/raster/test/regress/rt_setbandpath
-+	$(top_srcdir)/raster/test/regress/rt_polygon
- 
- RASTER_TEST_UTILITY = \
- 	$(top_srcdir)/raster/test/regress/rt_utility \
-	$(top_srcdir)/raster/test/regress/rt_fromgdalraster \
-	$(top_srcdir)/raster/test/regress/rt_asgdalraster \
-	$(top_srcdir)/raster/test/regress/rt_astiff \
-	$(top_srcdir)/raster/test/regress/rt_asjpeg \
-	$(top_srcdir)/raster/test/regress/rt_aspng \
- 	$(top_srcdir)/raster/test/regress/rt_reclass \
- 	$(top_srcdir)/raster/test/regress/rt_gdalwarp \
- 	$(top_srcdir)/raster/test/regress/rt_gdalcontour \
-@@ -120,21 +110,13 @@ RASTER_TEST_SREL = \
- 
- RASTER_TEST_BUGS = \
- 	$(top_srcdir)/raster/test/regress/bug_test_car5 \
-	$(top_srcdir)/raster/test/regress/permitted_gdal_drivers \
- 	$(top_srcdir)/raster/test/regress/tickets
- 
- RASTER_TEST_LOADER = \
- 	$(top_srcdir)/raster/test/regress/loader/Basic \
- 	$(top_srcdir)/raster/test/regress/loader/Projected \
- 	$(top_srcdir)/raster/test/regress/loader/BasicCopy \
-	$(top_srcdir)/raster/test/regress/loader/BasicFilename \
-	$(top_srcdir)/raster/test/regress/loader/BasicOutDB \
-	$(top_srcdir)/raster/test/regress/loader/Tiled10x10 \
-	$(top_srcdir)/raster/test/regress/loader/Tiled10x10Copy \
-	$(top_srcdir)/raster/test/regress/loader/Tiled8x8 \
-	$(top_srcdir)/raster/test/regress/loader/TiledAuto \
-	$(top_srcdir)/raster/test/regress/loader/TiledAutoSkipNoData \
-	$(top_srcdir)/raster/test/regress/loader/TiledAutoCopyn
-+	$(top_srcdir)/raster/test/regress/loader/BasicFilename
- 
- RASTER_TESTS := $(RASTER_TEST_FIRST) \
- 	$(RASTER_TEST_METADATA) $(RASTER_TEST_IO) $(RASTER_TEST_BASIC_FUNC) \
-diff --git a/regress/core/binary.sql b/regress/core/binary.sql
-index 7a36b65..ad78fc7 100644
--- a/regress/core/binary.sql
-+++ b/regress/core/binary.sql
-@@ -1,4 +1,5 @@
- SET client_min_messages TO warning;
-+
- CREATE SCHEMA tm;
- 
- CREATE TABLE tm.geoms (id serial, g geometry);
-@@ -31,24 +32,39 @@ SELECT st_force4d(g) FROM tm.geoms WHERE id < 15 ORDER BY id;
- INSERT INTO tm.geoms(g)
- SELECT st_setsrid(g,4326) FROM tm.geoms ORDER BY id;
- 
-COPY tm.geoms TO :tmpfile WITH BINARY;
-+-- define temp file path
-+\set tmpfile '/tmp/postgis_binary_test.dat'
-+
-+-- export
-+\set command '\\copy tm.geoms TO ':tmpfile' WITH (FORMAT BINARY)'
-+:command
-+
-+-- import
- CREATE TABLE tm.geoms_in AS SELECT * FROM tm.geoms LIMIT 0;
-COPY tm.geoms_in FROM :tmpfile WITH BINARY;
-SELECT 'geometry', count(*) FROM tm.geoms_in i, tm.geoms o WHERE i.id = o.id
- AND ST_OrderingEquals(i.g, o.g);
-+\set command '\\copy tm.geoms_in FROM ':tmpfile' WITH (FORMAT BINARY)'
-+:command
-+
-+SELECT 'geometry', count(*) FROM tm.geoms_in i, tm.geoms o
-+WHERE i.id = o.id AND ST_OrderingEquals(i.g, o.g);
- 
- CREATE TABLE tm.geogs AS SELECT id,g::geography FROM tm.geoms
- WHERE geometrytype(g) NOT LIKE '%CURVE%'
-   AND geometrytype(g) NOT LIKE '%CIRCULAR%'
-   AND geometrytype(g) NOT LIKE '%SURFACE%'
-   AND geometrytype(g) NOT LIKE 'TRIANGLE%'
-  AND geometrytype(g) NOT LIKE 'TIN%'
-;
-+  AND geometrytype(g) NOT LIKE 'TIN%';
- 
-COPY tm.geogs TO :tmpfile WITH BINARY;
-+-- export
-+\set command '\\copy tm.geogs TO ':tmpfile' WITH (FORMAT BINARY)'
-+:command
-+
-+-- import
- CREATE TABLE tm.geogs_in AS SELECT * FROM tm.geogs LIMIT 0;
-COPY tm.geogs_in FROM :tmpfile WITH BINARY;
-SELECT 'geometry', count(*) FROM tm.geogs_in i, tm.geogs o WHERE i.id = o.id
- AND ST_OrderingEquals(i.g::geometry, o.g::geometry);
-+\set command '\\copy tm.geogs_in FROM ':tmpfile' WITH (FORMAT BINARY)'
-+:command
-+
-+SELECT 'geometry', count(*) FROM tm.geogs_in i, tm.geogs o
-+WHERE i.id = o.id AND ST_OrderingEquals(i.g::geometry, o.g::geometry);
- 
- DROP SCHEMA tm CASCADE;
-+
-diff --git a/regress/core/tests.mk b/regress/core/tests.mk
-index 3abd7bc..94903c3 100644
--- a/regress/core/tests.mk
-+++ b/regress/core/tests.mk
-@@ -23,7 +23,6 @@ current_dir := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
- RUNTESTFLAGS_INTERNAL += \
-   --before-upgrade-script $(top_srcdir)/regress/hooks/hook-before-upgrade.sql \
-   --after-upgrade-script  $(top_srcdir)/regress/hooks/hook-after-upgrade.sql \
-  --after-create-script   $(top_srcdir)/regress/hooks/hook-after-create.sql \
-   --before-uninstall-script $(top_srcdir)/regress/hooks/hook-before-uninstall.sql
- 
- TESTS += \
-@@ -40,7 +39,6 @@ TESTS += \
- 	$(top_srcdir)/regress/core/dumppoints \
- 	$(top_srcdir)/regress/core/dumpsegments \
- 	$(top_srcdir)/regress/core/empty \
-	$(top_srcdir)/regress/core/estimatedextent \
- 	$(top_srcdir)/regress/core/forcecurve \
- 	$(top_srcdir)/regress/core/flatgeobuf \
- 	$(top_srcdir)/regress/core/geography \
-@@ -55,7 +53,6 @@ TESTS += \
- 	$(top_srcdir)/regress/core/out_marc21 \
- 	$(top_srcdir)/regress/core/in_encodedpolyline \
- 	$(top_srcdir)/regress/core/iscollection \
-	$(top_srcdir)/regress/core/legacy \
- 	$(top_srcdir)/regress/core/letters \
- 	$(top_srcdir)/regress/core/long_xact \
- 	$(top_srcdir)/regress/core/lwgeom_regress \
-@@ -112,7 +109,6 @@ TESTS += \
- 	$(top_srcdir)/regress/core/temporal_knn \
- 	$(top_srcdir)/regress/core/tickets \
- 	$(top_srcdir)/regress/core/twkb \
-	$(top_srcdir)/regress/core/typmod \
- 	$(top_srcdir)/regress/core/wkb \
- 	$(top_srcdir)/regress/core/wkt \
- 	$(top_srcdir)/regress/core/wmsservers \
-@@ -144,11 +140,6 @@ TESTS_SLOW = \
- 	$(top_srcdir)/regress/core/concave_hull_hard \
- 	$(top_srcdir)/regress/core/knn_recheck
- 
-ifeq ($(shell expr "$(POSTGIS_PGSQL_VERSION)" ">=" 120),1)
-	TESTS += \
-		$(top_srcdir)/regress/core/computed_columns
-endif
-
- ifeq ($(shell expr "$(POSTGIS_GEOS_VERSION)" ">=" 30700),1)
- 	# GEOS-3.7 adds:
- 	# ST_FrechetDistance
-diff --git a/regress/loader/tests.mk b/regress/loader/tests.mk
-index 1fc77ac..c3cb9de 100644
--- a/regress/loader/tests.mk
-+++ b/regress/loader/tests.mk
-@@ -38,7 +38,5 @@ TESTS += \
- 	$(top_srcdir)/regress/loader/Latin1 \
- 	$(top_srcdir)/regress/loader/Latin1-implicit \
- 	$(top_srcdir)/regress/loader/mfile \
-	$(top_srcdir)/regress/loader/TestSkipANALYZE \
-	$(top_srcdir)/regress/loader/TestANALYZE \
- 	$(top_srcdir)/regress/loader/CharNoWidth
- 
-diff --git a/regress/run_test.pl b/regress/run_test.pl
-index 0ec5b2d..1c331f4 100755
--- a/regress/run_test.pl
-+++ b/regress/run_test.pl
-@@ -147,7 +147,6 @@ $ENV{"LANG"} = "C";
- # Add locale info to the psql options
- # Add pg12 precision suppression
- my $PGOPTIONS = $ENV{"PGOPTIONS"};
-$PGOPTIONS .= " -c lc_messages=C";
- $PGOPTIONS .= " -c client_min_messages=NOTICE";
- $PGOPTIONS .= " -c extra_float_digits=0";
- $ENV{"PGOPTIONS"} = $PGOPTIONS;
--- a/docker-compose/ext-src/postgis-src/postgis-regular-v17.patch
+++ b/docker-compose/ext-src/postgis-src/postgis-regular-v17.patch
@@ -1,218 +0,0 @@
-diff --git a/raster/test/regress/tests.mk b/raster/test/regress/tests.mk
-index 00918e1..7e2b6cd 100644
--- a/raster/test/regress/tests.mk
-+++ b/raster/test/regress/tests.mk
-@@ -17,9 +17,7 @@ override RUNTESTFLAGS_INTERNAL := \
-   $(RUNTESTFLAGS_INTERNAL) \
-   --after-upgrade-script $(top_srcdir)/raster/test/regress/hooks/hook-after-upgrade-raster.sql
- 
-RASTER_TEST_FIRST = \
-	$(top_srcdir)/raster/test/regress/check_gdal \
-	$(top_srcdir)/raster/test/regress/loader/load_outdb
-+RASTER_TEST_FIRST =
- 
- RASTER_TEST_LAST = \
- 	$(top_srcdir)/raster/test/regress/clean
-@@ -33,9 +31,7 @@ RASTER_TEST_IO = \
- 
- RASTER_TEST_BASIC_FUNC = \
- 	$(top_srcdir)/raster/test/regress/rt_bytea \
-	$(top_srcdir)/raster/test/regress/rt_wkb \
- 	$(top_srcdir)/raster/test/regress/box3d \
-	$(top_srcdir)/raster/test/regress/rt_addband \
- 	$(top_srcdir)/raster/test/regress/rt_band \
- 	$(top_srcdir)/raster/test/regress/rt_tile
- 
-@@ -73,16 +69,10 @@ RASTER_TEST_BANDPROPS = \
- 	$(top_srcdir)/raster/test/regress/rt_neighborhood \
- 	$(top_srcdir)/raster/test/regress/rt_nearestvalue \
- 	$(top_srcdir)/raster/test/regress/rt_pixelofvalue \
-	$(top_srcdir)/raster/test/regress/rt_polygon \
-	$(top_srcdir)/raster/test/regress/rt_setbandpath
-+	$(top_srcdir)/raster/test/regress/rt_polygon
- 
- RASTER_TEST_UTILITY = \
- 	$(top_srcdir)/raster/test/regress/rt_utility \
-	$(top_srcdir)/raster/test/regress/rt_fromgdalraster \
-	$(top_srcdir)/raster/test/regress/rt_asgdalraster \
-	$(top_srcdir)/raster/test/regress/rt_astiff \
-	$(top_srcdir)/raster/test/regress/rt_asjpeg \
-	$(top_srcdir)/raster/test/regress/rt_aspng \
- 	$(top_srcdir)/raster/test/regress/rt_reclass \
- 	$(top_srcdir)/raster/test/regress/rt_gdalwarp \
- 	$(top_srcdir)/raster/test/regress/rt_gdalcontour \
-@@ -120,21 +110,13 @@ RASTER_TEST_SREL = \
- 
- RASTER_TEST_BUGS = \
- 	$(top_srcdir)/raster/test/regress/bug_test_car5 \
-	$(top_srcdir)/raster/test/regress/permitted_gdal_drivers \
- 	$(top_srcdir)/raster/test/regress/tickets
- 
- RASTER_TEST_LOADER = \
- 	$(top_srcdir)/raster/test/regress/loader/Basic \
- 	$(top_srcdir)/raster/test/regress/loader/Projected \
- 	$(top_srcdir)/raster/test/regress/loader/BasicCopy \
-	$(top_srcdir)/raster/test/regress/loader/BasicFilename \
-	$(top_srcdir)/raster/test/regress/loader/BasicOutDB \
-	$(top_srcdir)/raster/test/regress/loader/Tiled10x10 \
-	$(top_srcdir)/raster/test/regress/loader/Tiled10x10Copy \
-	$(top_srcdir)/raster/test/regress/loader/Tiled8x8 \
-	$(top_srcdir)/raster/test/regress/loader/TiledAuto \
-	$(top_srcdir)/raster/test/regress/loader/TiledAutoSkipNoData \
-	$(top_srcdir)/raster/test/regress/loader/TiledAutoCopyn
-+	$(top_srcdir)/raster/test/regress/loader/BasicFilename
- 
- RASTER_TESTS := $(RASTER_TEST_FIRST) \
- 	$(RASTER_TEST_METADATA) $(RASTER_TEST_IO) $(RASTER_TEST_BASIC_FUNC) \
-diff --git a/regress/core/binary.sql b/regress/core/binary.sql
-index 7a36b65..ad78fc7 100644
--- a/regress/core/binary.sql
-+++ b/regress/core/binary.sql
-@@ -1,4 +1,5 @@
- SET client_min_messages TO warning;
-+
- CREATE SCHEMA tm;
- 
- CREATE TABLE tm.geoms (id serial, g geometry);
-@@ -31,24 +32,39 @@ SELECT st_force4d(g) FROM tm.geoms WHERE id < 15 ORDER BY id;
- INSERT INTO tm.geoms(g)
- SELECT st_setsrid(g,4326) FROM tm.geoms ORDER BY id;
- 
-COPY tm.geoms TO :tmpfile WITH BINARY;
-+-- define temp file path
-+\set tmpfile '/tmp/postgis_binary_test.dat'
-+
-+-- export
-+\set command '\\copy tm.geoms TO ':tmpfile' WITH (FORMAT BINARY)'
-+:command
-+
-+-- import
- CREATE TABLE tm.geoms_in AS SELECT * FROM tm.geoms LIMIT 0;
-COPY tm.geoms_in FROM :tmpfile WITH BINARY;
-SELECT 'geometry', count(*) FROM tm.geoms_in i, tm.geoms o WHERE i.id = o.id
- AND ST_OrderingEquals(i.g, o.g);
-+\set command '\\copy tm.geoms_in FROM ':tmpfile' WITH (FORMAT BINARY)'
-+:command
-+
-+SELECT 'geometry', count(*) FROM tm.geoms_in i, tm.geoms o
-+WHERE i.id = o.id AND ST_OrderingEquals(i.g, o.g);
- 
- CREATE TABLE tm.geogs AS SELECT id,g::geography FROM tm.geoms
- WHERE geometrytype(g) NOT LIKE '%CURVE%'
-   AND geometrytype(g) NOT LIKE '%CIRCULAR%'
-   AND geometrytype(g) NOT LIKE '%SURFACE%'
-   AND geometrytype(g) NOT LIKE 'TRIANGLE%'
-  AND geometrytype(g) NOT LIKE 'TIN%'
-;
-+  AND geometrytype(g) NOT LIKE 'TIN%';
- 
-COPY tm.geogs TO :tmpfile WITH BINARY;
-+-- export
-+\set command '\\copy tm.geogs TO ':tmpfile' WITH (FORMAT BINARY)'
-+:command
-+
-+-- import
- CREATE TABLE tm.geogs_in AS SELECT * FROM tm.geogs LIMIT 0;
-COPY tm.geogs_in FROM :tmpfile WITH BINARY;
-SELECT 'geometry', count(*) FROM tm.geogs_in i, tm.geogs o WHERE i.id = o.id
- AND ST_OrderingEquals(i.g::geometry, o.g::geometry);
-+\set command '\\copy tm.geogs_in FROM ':tmpfile' WITH (FORMAT BINARY)'
-+:command
-+
-+SELECT 'geometry', count(*) FROM tm.geogs_in i, tm.geogs o
-+WHERE i.id = o.id AND ST_OrderingEquals(i.g::geometry, o.g::geometry);
- 
- DROP SCHEMA tm CASCADE;
-+
-diff --git a/regress/core/tests.mk b/regress/core/tests.mk
-index 9e05244..a63a3e1 100644
--- a/regress/core/tests.mk
-+++ b/regress/core/tests.mk
-@@ -16,14 +16,13 @@ POSTGIS_PGSQL_VERSION=170
- POSTGIS_GEOS_VERSION=31101
- HAVE_JSON=yes
- HAVE_SPGIST=yes
-INTERRUPTTESTS=yes
-+INTERRUPTTESTS=no
- 
- current_dir := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
- 
- RUNTESTFLAGS_INTERNAL += \
-   --before-upgrade-script $(top_srcdir)/regress/hooks/hook-before-upgrade.sql \
-   --after-upgrade-script  $(top_srcdir)/regress/hooks/hook-after-upgrade.sql \
-  --after-create-script   $(top_srcdir)/regress/hooks/hook-after-create.sql \
-   --before-uninstall-script $(top_srcdir)/regress/hooks/hook-before-uninstall.sql
- 
- TESTS += \
-@@ -40,7 +39,6 @@ TESTS += \
- 	$(top_srcdir)/regress/core/dumppoints \
- 	$(top_srcdir)/regress/core/dumpsegments \
- 	$(top_srcdir)/regress/core/empty \
-	$(top_srcdir)/regress/core/estimatedextent \
- 	$(top_srcdir)/regress/core/forcecurve \
- 	$(top_srcdir)/regress/core/flatgeobuf \
- 	$(top_srcdir)/regress/core/frechet \
-@@ -60,7 +58,6 @@ TESTS += \
- 	$(top_srcdir)/regress/core/out_marc21 \
- 	$(top_srcdir)/regress/core/in_encodedpolyline \
- 	$(top_srcdir)/regress/core/iscollection \
-	$(top_srcdir)/regress/core/legacy \
- 	$(top_srcdir)/regress/core/letters \
- 	$(top_srcdir)/regress/core/lwgeom_regress \
- 	$(top_srcdir)/regress/core/measures \
-@@ -119,7 +116,6 @@ TESTS += \
- 	$(top_srcdir)/regress/core/temporal_knn \
- 	$(top_srcdir)/regress/core/tickets \
- 	$(top_srcdir)/regress/core/twkb \
-	$(top_srcdir)/regress/core/typmod \
- 	$(top_srcdir)/regress/core/wkb \
- 	$(top_srcdir)/regress/core/wkt \
- 	$(top_srcdir)/regress/core/wmsservers \
-@@ -143,8 +139,7 @@ TESTS += \
- 	$(top_srcdir)/regress/core/oriented_envelope \
- 	$(top_srcdir)/regress/core/point_coordinates \
- 	$(top_srcdir)/regress/core/out_geojson \
-  $(top_srcdir)/regress/core/wrapx \
-	$(top_srcdir)/regress/core/computed_columns
-+  $(top_srcdir)/regress/core/wrapx 
- 
- # Slow slow tests
- TESTS_SLOW = \
-diff --git a/regress/loader/tests.mk b/regress/loader/tests.mk
-index ac4f8ad..4bad4fc 100644
--- a/regress/loader/tests.mk
-+++ b/regress/loader/tests.mk
-@@ -38,7 +38,5 @@ TESTS += \
- 	$(top_srcdir)/regress/loader/Latin1 \
- 	$(top_srcdir)/regress/loader/Latin1-implicit \
- 	$(top_srcdir)/regress/loader/mfile \
-	$(top_srcdir)/regress/loader/TestSkipANALYZE \
-	$(top_srcdir)/regress/loader/TestANALYZE \
- 	$(top_srcdir)/regress/loader/CharNoWidth \
- 
-diff --git a/regress/run_test.pl b/regress/run_test.pl
-index cac4b2e..4c7c82b 100755
--- a/regress/run_test.pl
-+++ b/regress/run_test.pl
-@@ -238,7 +238,6 @@ $ENV{"LANG"} = "C";
- # Add locale info to the psql options
- # Add pg12 precision suppression
- my $PGOPTIONS = $ENV{"PGOPTIONS"};
-$PGOPTIONS .= " -c lc_messages=C";
- $PGOPTIONS .= " -c client_min_messages=NOTICE";
- $PGOPTIONS .= " -c extra_float_digits=0";
- $ENV{"PGOPTIONS"} = $PGOPTIONS;
-diff --git a/topology/test/tests.mk b/topology/test/tests.mk
-index cbe2633..2c7c18f 100644
--- a/topology/test/tests.mk
-+++ b/topology/test/tests.mk
-@@ -46,9 +46,7 @@ TESTS += \
- 	$(top_srcdir)/topology/test/regress/legacy_query.sql \
- 	$(top_srcdir)/topology/test/regress/legacy_validate.sql \
- 	$(top_srcdir)/topology/test/regress/polygonize.sql \
-	$(top_srcdir)/topology/test/regress/populate_topology_layer.sql \
- 	$(top_srcdir)/topology/test/regress/removeunusedprimitives.sql \
-	$(top_srcdir)/topology/test/regress/renametopogeometrycolumn.sql \
- 	$(top_srcdir)/topology/test/regress/renametopology.sql \
- 	$(top_srcdir)/topology/test/regress/share_sequences.sql \
- 	$(top_srcdir)/topology/test/regress/sqlmm.sql \
--- a/docker-compose/ext-src/postgis-src/raster_outdb_template.sql
+++ b/docker-compose/ext-src/postgis-src/raster_outdb_template.sql
--- a/docker-compose/ext-src/postgis-src/regular-test.sh
+++ b/docker-compose/ext-src/postgis-src/regular-test.sh
@@ -1,17 +0,0 @@
-#!/bin/bash
-set -ex
-cd "$(dirname "${0}")"
-dropdb --if-exist contrib_regression
-createdb contrib_regression
-psql -d contrib_regression -c "ALTER DATABASE contrib_regression SET TimeZone='UTC'" \
-     -c "ALTER DATABASE contrib_regression SET DateStyle='ISO, MDY'" \
-     -c "CREATE EXTENSION postgis SCHEMA public" \
-     -c "CREATE EXTENSION postgis_topology" \
-     -c "CREATE EXTENSION postgis_tiger_geocoder CASCADE" \
-     -c "CREATE EXTENSION postgis_raster SCHEMA public" \
-     -c "CREATE EXTENSION postgis_sfcgal SCHEMA public"
-patch -p1 <postgis-no-upgrade-test.patch
-patch -p1 <"postgis-regular-${PG_VERSION}.patch"
-psql -d contrib_regression -f raster_outdb_template.sql
-trap 'patch -R -p1 <postgis-no-upgrade-test.patch && patch -R -p1 <"postgis-regular-${PG_VERSION}.patch"' EXIT
-POSTGIS_REGRESS_DB=contrib_regression RUNTESTFLAGS=--nocreate make installcheck-base
--- a/docker-compose/run-tests.sh
+++ b/docker-compose/run-tests.sh
@@ -63,9 +63,5 @@ done
 for d in ${FAILED}; do
  cat "$(find $d -name regression.diffs)"
 done
-for postgis_diff in /tmp/pgis_reg/*_diff; do
-  echo "${postgis_diff}:"
-  cat "${postgis_diff}"
-done
 echo "${FAILED}"
 exit 1
--- a/docker-compose/test_extensions_upgrade.sh
+++ b/docker-compose/test_extensions_upgrade.sh
@@ -82,8 +82,7 @@ EXTENSIONS='[
 {"extname": "pg_ivm", "extdir": "pg_ivm-src"},
 {"extname": "pgjwt", "extdir": "pgjwt-src"},
 {"extname": "pgtap", "extdir": "pgtap-src"},
-{"extname": "pg_repack", "extdir": "pg_repack-src"},
-{"extname": "h3", "extdir": "h3-pg-src"}
+{"extname": "pg_repack", "extdir": "pg_repack-src"}
 ]'
 EXTNAMES=$(echo ${EXTENSIONS} | jq -r '.[].extname' | paste -sd ' ' -)
 COMPUTE_TAG=${NEW_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -4,7 +4,6 @@
 //! provide it by calling the compute_ctl's `/compute_ctl` endpoint, or
 //! compute_ctl can fetch it by calling the control plane's API.
 use std::collections::HashMap;
-use std::fmt::Display;

 use indexmap::IndexMap;
 use regex::Regex;
@@ -179,9 +178,9 @@ pub struct ComputeSpec {
    /// JWT for authorizing requests to endpoint storage service
    pub endpoint_storage_token: Option<String>,

-    /// Download LFC state from endpoint_storage and pass it to Postgres on startup
+    /// If true, download LFC state from endpoint_storage and pass it to Postgres on startup
    #[serde(default)]
-    pub autoprewarm: bool,
+    pub prewarm_lfc_on_startup: bool,
 }

 /// Feature flag to signal `compute_ctl` to enable certain experimental functionality.
@@ -193,9 +192,6 @@ pub enum ComputeFeature {
    /// track short-lived connections as user activity.
    ActivityMonitorExperimental,

-    /// Enable TLS functionality.
-    TlsExperimental,
-
    /// This is a special feature flag that is used to represent unknown feature flags.
    /// Basically all unknown to enum flags are represented as this one. See unit test
    /// `parse_unknown_features()` for more details.
@@ -254,44 +250,34 @@ impl RemoteExtSpec {
        }

        match self.extension_data.get(real_ext_name) {
-            Some(_ext_data) => Ok((
-                real_ext_name.to_string(),
-                Self::build_remote_path(build_tag, pg_major_version, real_ext_name)?,
-            )),
+            Some(_ext_data) => {
+                // We have decided to use the Go naming convention due to Kubernetes.
+
+                let arch = match std::env::consts::ARCH {
+                    "x86_64" => "amd64",
+                    "aarch64" => "arm64",
+                    arch => arch,
+                };
+
+                // Construct the path to the extension archive
+                // BUILD_TAG/PG_MAJOR_VERSION/extensions/EXTENSION_NAME.tar.zst
+                //
+                // Keep it in sync with path generation in
+                // https://github.com/neondatabase/build-custom-extensions/tree/main
+                let archive_path_str = format!(
+                    "{build_tag}/{arch}/{pg_major_version}/extensions/{real_ext_name}.tar.zst"
+                );
+                Ok((
+                    real_ext_name.to_string(),
+                    RemotePath::from_string(&archive_path_str)?,
+                ))
+            }
            None => Err(anyhow::anyhow!(
                "real_ext_name {} is not found",
                real_ext_name
            )),
        }
    }
-
-    /// Get the architecture-specific portion of the remote extension path. We
-    /// use the Go naming convention due to Kubernetes.
-    fn get_arch() -> &'static str {
-        match std::env::consts::ARCH {
-            "x86_64" => "amd64",
-            "aarch64" => "arm64",
-            arch => arch,
-        }
-    }
-
-    /// Build a [`RemotePath`] for an extension.
-    fn build_remote_path(
-        build_tag: &str,
-        pg_major_version: &str,
-        ext_name: &str,
-    ) -> anyhow::Result<RemotePath> {
-        let arch = Self::get_arch();
-
-        // Construct the path to the extension archive
-        // BUILD_TAG/PG_MAJOR_VERSION/extensions/EXTENSION_NAME.tar.zst
-        //
-        // Keep it in sync with path generation in
-        // https://github.com/neondatabase/build-custom-extensions/tree/main
-        RemotePath::from_string(&format!(
-            "{build_tag}/{arch}/{pg_major_version}/extensions/{ext_name}.tar.zst"
-        ))
-    }
 }

 #[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Deserialize, Serialize)]
@@ -320,12 +306,6 @@ impl ComputeMode {
    }
 }

-impl Display for ComputeMode {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.write_str(self.to_type_str())
-    }
-}
-
 /// Log level for audit logging
 #[derive(Clone, Debug, Default, Eq, PartialEq, Deserialize, Serialize)]
 pub enum ComputeAudit {
@@ -538,37 +518,6 @@ mod tests {
            .expect("Library should be found");
    }

-    #[test]
-    fn remote_extension_path() {
-        let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
-            "public_extensions": ["ext"],
-            "custom_extensions": [],
-            "library_index": {
-                "extlib": "ext",
-            },
-            "extension_data": {
-                "ext": {
-                    "control_data": {
-                        "ext.control": ""
-                    },
-                    "archive_path": ""
-                }
-            },
-        }))
-        .unwrap();
-
-        let (_ext_name, ext_path) = rspec
-            .get_ext("ext", false, "latest", "v17")
-            .expect("Extension should be found");
-        // Starting with a forward slash would have consequences for the
-        // Url::join() that occurs when downloading a remote extension.
-        assert!(!ext_path.to_string().starts_with("/"));
-        assert_eq!(
-            ext_path,
-            RemoteExtSpec::build_remote_path("latest", "v17", "ext").unwrap()
-        );
-    }
-
    #[test]
    fn parse_spec_file() {
        let file = File::open("tests/cluster_spec.json").unwrap();
--- a/libs/compute_api/tests/cluster_spec.json
+++ b/libs/compute_api/tests/cluster_spec.json
@@ -85,7 +85,7 @@
                "vartype": "bool"
            },
            {
-                "name": "autoprewarm",
+                "name": "prewarm_lfc_on_startup",
                "value": "off",
                "vartype": "bool"
            },
--- a/libs/metrics/src/hll.rs
+++ b/libs/metrics/src/hll.rs
@@ -107,7 +107,7 @@ impl<const N: usize> MetricType for HyperLogLogState<N> {
 }

 impl<const N: usize> HyperLogLogState<N> {
-    pub fn measure(&self, item: &(impl Hash + ?Sized)) {
+    pub fn measure(&self, item: &impl Hash) {
        // changing the hasher will break compatibility with previous measurements.
        self.record(BuildHasherDefault::<xxh3::Hash64>::default().hash_one(item));
    }
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -27,7 +27,6 @@ pub use prometheus::{

 pub mod launch_timestamp;
 mod wrappers;
-pub use prometheus;
 pub use wrappers::{CountedReader, CountedWriter};
 mod hll;
 pub use hll::{HyperLogLog, HyperLogLogState, HyperLogLogVec};
--- a/libs/neon-shmem/Cargo.toml
+++ b/libs/neon-shmem/Cargo.toml
@@ -6,13 +6,8 @@ license.workspace = true

 [dependencies]
 thiserror.workspace = true
-nix.workspace = true
-spin.workspace = true
+nix.workspace=true
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }

-[dev-dependencies]
-rand = "0.9.1"
-rand_distr = "0.5.1"
-
 [target.'cfg(target_os = "macos")'.dependencies]
 tempfile = "3.14.0"
--- a/libs/neon-shmem/src/hash.rs
+++ b/libs/neon-shmem/src/hash.rs
@@ -1,366 +0,0 @@
-//! Hash table implementation on top of 'shmem'
-//!
-//! Features required in the long run by the communicator project:
-//!
-//! [X] Accessible from both Postgres processes and rust threads in the communicator process
-//! [X] Low latency
-//! [ ] Scalable to lots of concurrent accesses (currently uses a single spinlock)
-//! [ ] Resizable
-
-use std::fmt::Debug;
-use std::hash::Hash;
-use std::mem::MaybeUninit;
-use std::ops::Deref;
-
-use crate::shmem::ShmemHandle;
-
-use spin;
-
-mod core;
-
-#[cfg(test)]
-mod tests;
-
-use core::CoreHashMap;
-
-pub enum UpdateAction<V> {
-    Nothing,
-    Insert(V),
-    Remove,
-}
-
-#[derive(Debug)]
-pub struct OutOfMemoryError();
-
-pub struct HashMapInit<'a, K, V> {
-    // Hash table can be allocated in a fixed memory area, or in a resizeable ShmemHandle.
-    shmem_handle: Option<ShmemHandle>,
-    shared_ptr: *mut HashMapShared<'a, K, V>,
-}
-
-pub struct HashMapAccess<'a, K, V> {
-    shmem_handle: Option<ShmemHandle>,
-    shared_ptr: *mut HashMapShared<'a, K, V>,
-}
-
-unsafe impl<'a, K: Sync, V: Sync> Sync for HashMapAccess<'a, K, V> {}
-unsafe impl<'a, K: Send, V: Send> Send for HashMapAccess<'a, K, V> {}
-
-impl<'a, K, V> HashMapInit<'a, K, V> {
-    pub fn attach_writer(self) -> HashMapAccess<'a, K, V> {
-        HashMapAccess {
-            shmem_handle: self.shmem_handle,
-            shared_ptr: self.shared_ptr,
-        }
-    }
-
-    pub fn attach_reader(self) -> HashMapAccess<'a, K, V> {
-        // no difference to attach_writer currently
-        self.attach_writer()
-    }
-}
-
-// This is stored in the shared memory area
-struct HashMapShared<'a, K, V> {
-    inner: spin::RwLock<CoreHashMap<'a, K, V>>,
-}
-
-impl<'a, K, V> HashMapInit<'a, K, V>
-where
-    K: Clone + Hash + Eq,
-{
-    pub fn estimate_size(num_buckets: u32) -> usize {
-        // add some margin to cover alignment etc.
-        CoreHashMap::<K, V>::estimate_size(num_buckets) + size_of::<HashMapShared<K, V>>() + 1000
-    }
-
-    pub fn init_in_fixed_area(
-        num_buckets: u32,
-        area: &'a mut [MaybeUninit<u8>],
-    ) -> HashMapInit<'a, K, V> {
-        Self::init_common(num_buckets, None, area.as_mut_ptr().cast(), area.len())
-    }
-
-    /// Initialize a new hash map in the given shared memory area
-    pub fn init_in_shmem(num_buckets: u32, mut shmem: ShmemHandle) -> HashMapInit<'a, K, V> {
-        let size = Self::estimate_size(num_buckets);
-        shmem
-            .set_size(size)
-            .expect("could not resize shared memory area");
-
-        let ptr = unsafe { shmem.data_ptr.as_mut() };
-        Self::init_common(num_buckets, Some(shmem), ptr, size)
-    }
-
-    fn init_common(
-        num_buckets: u32,
-        shmem_handle: Option<ShmemHandle>,
-        area_ptr: *mut u8,
-        area_len: usize,
-    ) -> HashMapInit<'a, K, V> {
-        // carve out HashMapShared from the area. This does not include the hashmap's dictionary
-        // and buckets.
-        let mut ptr: *mut u8 = area_ptr;
-        ptr = unsafe { ptr.add(ptr.align_offset(align_of::<HashMapShared<K, V>>())) };
-        let shared_ptr: *mut HashMapShared<K, V> = ptr.cast();
-        ptr = unsafe { ptr.add(size_of::<HashMapShared<K, V>>()) };
-
-        // the rest of the space is given to the hash map's dictionary and buckets
-        let remaining_area = unsafe {
-            std::slice::from_raw_parts_mut(ptr, area_len - ptr.offset_from(area_ptr) as usize)
-        };
-
-        let hashmap = CoreHashMap::new(num_buckets, remaining_area);
-        unsafe {
-            std::ptr::write(
-                shared_ptr,
-                HashMapShared {
-                    inner: spin::RwLock::new(hashmap),
-                },
-            );
-        }
-
-        HashMapInit {
-            shmem_handle: shmem_handle,
-            shared_ptr,
-        }
-    }
-}
-
-impl<'a, K, V> HashMapAccess<'a, K, V>
-where
-    K: Clone + Hash + Eq,
-{
-    pub fn get<'e>(&'e self, key: &K) -> Option<ValueReadGuard<'e, K, V>> {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        let lock_guard = map.inner.read();
-
-        match lock_guard.get(key) {
-            None => None,
-            Some(val_ref) => {
-                let val_ptr = std::ptr::from_ref(val_ref);
-                Some(ValueReadGuard {
-                    _lock_guard: lock_guard,
-                    value: val_ptr,
-                })
-            }
-        }
-    }
-
-    /// Insert a value
-    pub fn insert(&self, key: &K, value: V) -> Result<bool, OutOfMemoryError> {
-        let mut success = None;
-
-        self.update_with_fn(key, |existing| {
-            if let Some(_) = existing {
-                success = Some(false);
-                UpdateAction::Nothing
-            } else {
-                success = Some(true);
-                UpdateAction::Insert(value)
-            }
-        })?;
-        Ok(success.expect("value_fn not called"))
-    }
-
-    /// Remove value. Returns true if it existed
-    pub fn remove(&self, key: &K) -> bool {
-        let mut result = false;
-        self.update_with_fn(key, |existing| match existing {
-            Some(_) => {
-                result = true;
-                UpdateAction::Remove
-            }
-            None => UpdateAction::Nothing,
-        })
-        .expect("out of memory while removing");
-        result
-    }
-
-    /// Update key using the given function. All the other modifying operations are based on this.
-    pub fn update_with_fn<F>(&self, key: &K, value_fn: F) -> Result<(), OutOfMemoryError>
-    where
-        F: FnOnce(Option<&V>) -> UpdateAction<V>,
-    {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        let mut lock_guard = map.inner.write();
-
-        let old_val = lock_guard.get(key);
-        let action = value_fn(old_val);
-        match (old_val, action) {
-            (_, UpdateAction::Nothing) => {}
-            (_, UpdateAction::Insert(new_val)) => {
-                let _ = lock_guard.insert(key, new_val);
-            }
-            (None, UpdateAction::Remove) => panic!("Remove action with no old value"),
-            (Some(_), UpdateAction::Remove) => {
-                let _ = lock_guard.remove(key);
-            }
-        }
-
-        Ok(())
-    }
-
-    /// Update key using the given function. All the other modifying operations are based on this.
-    pub fn update_with_fn_at_bucket<F>(
-        &self,
-        pos: usize,
-        value_fn: F,
-    ) -> Result<(), OutOfMemoryError>
-    where
-        F: FnOnce(Option<&V>) -> UpdateAction<V>,
-    {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        let mut lock_guard = map.inner.write();
-
-        let old_val = lock_guard.get_bucket(pos);
-        let action = value_fn(old_val.map(|(_k, v)| v));
-        match (old_val, action) {
-            (_, UpdateAction::Nothing) => {}
-            (_, UpdateAction::Insert(_new_val)) => panic!("cannot insert without key"),
-            (None, UpdateAction::Remove) => panic!("Remove action with no old value"),
-            (Some((key, _value)), UpdateAction::Remove) => {
-                let key = key.clone();
-                let _ = lock_guard.remove(&key);
-            }
-        }
-
-        Ok(())
-    }
-
-    pub fn get_num_buckets(&self) -> usize {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        map.inner.read().get_num_buckets()
-    }
-
-    /// Return the key and value stored in bucket with given index. This can be used to
-    /// iterate through the hash map. (An Iterator might be nicer. The communicator's
-    /// clock algorithm needs to _slowly_ iterate through all buckets with its clock hand,
-    /// without holding a lock. If we switch to an Iterator, it must not hold the lock.)
-    pub fn get_bucket<'e>(&'e self, pos: usize) -> Option<ValueReadGuard<'e, K, V>> {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        let lock_guard = map.inner.read();
-
-        match lock_guard.get_bucket(pos) {
-            None => None,
-            Some((_key, val_ref)) => {
-                let val_ptr = std::ptr::from_ref(val_ref);
-                Some(ValueReadGuard {
-                    _lock_guard: lock_guard,
-                    value: val_ptr,
-                })
-            }
-        }
-    }
-
-    // for metrics
-    pub fn get_num_buckets_in_use(&self) -> usize {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        map.inner.read().buckets_in_use as usize
-    }
-
-    /// Grow
-    ///
-    /// 1. grow the underlying shared memory area
-    /// 2. Initialize new buckets. This overwrites the current dictionary
-    /// 3. Recalculate the dictionary
-    pub fn grow(&self, num_buckets: u32) -> Result<(), crate::shmem::Error> {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        let mut lock_guard = map.inner.write();
-        let inner = &mut *lock_guard;
-        let old_num_buckets = inner.buckets.len() as u32;
-
-        if num_buckets < old_num_buckets {
-            panic!("grow called with a smaller number of buckets");
-        }
-        if num_buckets == old_num_buckets {
-            return Ok(());
-        }
-        let shmem_handle = self
-            .shmem_handle
-            .as_ref()
-            .expect("grow called on a fixed-size hash table");
-
-        let size_bytes = HashMapInit::<K, V>::estimate_size(num_buckets);
-        shmem_handle.set_size(size_bytes)?;
-        let end_ptr: *mut u8 = unsafe { shmem_handle.data_ptr.as_ptr().add(size_bytes) };
-
-        // Initialize new buckets. The new buckets are linked to the free list. NB: This overwrites
-        // the dictionary!
-        let buckets_ptr = inner.buckets.as_mut_ptr();
-        unsafe {
-            for i in old_num_buckets..num_buckets {
-                let bucket_ptr = buckets_ptr.add(i as usize);
-                bucket_ptr.write(core::Bucket {
-                    hash: 0,
-                    next: if i < num_buckets {
-                        i as u32 + 1
-                    } else {
-                        inner.free_head
-                    },
-                    inner: None,
-                });
-            }
-        }
-
-        // Recalculate the dictionary
-        let buckets;
-        let dictionary;
-        unsafe {
-            let buckets_end_ptr = buckets_ptr.add(num_buckets as usize);
-            let dictionary_ptr: *mut u32 = buckets_end_ptr
-                .byte_add(buckets_end_ptr.align_offset(align_of::<u32>()))
-                .cast();
-            let dictionary_size: usize =
-                end_ptr.byte_offset_from(buckets_end_ptr) as usize / size_of::<u32>();
-
-            buckets = std::slice::from_raw_parts_mut(buckets_ptr, num_buckets as usize);
-            dictionary = std::slice::from_raw_parts_mut(dictionary_ptr, dictionary_size);
-        }
-        for i in 0..dictionary.len() {
-            dictionary[i] = core::INVALID_POS;
-        }
-
-        for i in 0..old_num_buckets as usize {
-            if buckets[i].inner.is_none() {
-                continue;
-            }
-            let pos: usize = (buckets[i].hash % dictionary.len() as u64) as usize;
-            buckets[i].next = dictionary[pos];
-            dictionary[pos] = i as u32;
-        }
-
-        // Finally, update the CoreHashMap struct
-        inner.dictionary = dictionary;
-        inner.buckets = buckets;
-        inner.free_head = old_num_buckets;
-
-        Ok(())
-    }
-
-    // TODO: Shrinking is a multi-step process that requires co-operation from the caller
-    //
-    // 1. The caller must first call begin_shrink(). That forbids allocation of higher-numbered
-    // buckets.
-    //
-    // 2. Next, the caller must evict all entries in higher-numbered buckets.
-    //
-    // 3. Finally, call finish_shrink(). This recomputes the dictionary and shrinks the underlying
-    //    shmem area
-}
-
-pub struct ValueReadGuard<'a, K, V> {
-    _lock_guard: spin::RwLockReadGuard<'a, CoreHashMap<'a, K, V>>,
-    value: *const V,
-}
-
-impl<'a, K, V> Deref for ValueReadGuard<'a, K, V> {
-    type Target = V;
-
-    fn deref(&self) -> &Self::Target {
-        // SAFETY: The `lock_guard` ensures that the underlying map (and thus the value pointed to
-        // by `value`) remains valid for the lifetime `'a`. The `value` has been obtained from a
-        // valid reference within the map.
-        unsafe { &*self.value }
-    }
-}
--- a/libs/neon-shmem/src/hash/core.rs
+++ b/libs/neon-shmem/src/hash/core.rs
@@ -1,233 +0,0 @@
-//! Simple hash table with chaining
-//!
-//! # Resizing
-//!
-
-use std::hash::{DefaultHasher, Hash, Hasher};
-use std::mem::MaybeUninit;
-
-pub(crate) const INVALID_POS: u32 = u32::MAX;
-
-// Bucket
-pub(crate) struct Bucket<K, V> {
-    pub(crate) hash: u64,
-    pub(crate) next: u32,
-    pub(crate) inner: Option<(K, V)>,
-}
-
-pub(crate) struct CoreHashMap<'a, K, V> {
-    pub(crate) dictionary: &'a mut [u32],
-    pub(crate) buckets: &'a mut [Bucket<K, V>],
-    pub(crate) free_head: u32,
-
-    // metrics
-    pub(crate) buckets_in_use: u32,
-}
-
-pub struct FullError();
-
-impl<'a, K, V> CoreHashMap<'a, K, V>
-where
-    K: Clone + Hash + Eq,
-{
-    const FILL_FACTOR: f32 = 0.60;
-
-    pub fn estimate_size(num_buckets: u32) -> usize {
-        let mut size = 0;
-
-        // buckets
-        size += size_of::<Bucket<K, V>>() * num_buckets as usize;
-
-        // dictionary
-        size += (f32::ceil((size_of::<u32>() * num_buckets as usize) as f32 / Self::FILL_FACTOR))
-            as usize;
-
-        size
-    }
-
-    pub fn new(num_buckets: u32, area: &'a mut [u8]) -> CoreHashMap<'a, K, V> {
-        let len = area.len();
-
-        let mut ptr: *mut u8 = area.as_mut_ptr();
-        let end_ptr: *mut u8 = unsafe { area.as_mut_ptr().add(len) };
-
-        // carve out the buckets
-        ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<Bucket<K, V>>())) };
-        let buckets_ptr = ptr;
-        ptr = unsafe { ptr.add(size_of::<Bucket<K, V>>() * num_buckets as usize) };
-
-        // use remaining space for the dictionary
-        ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<u32>())) };
-        let dictionary_ptr = ptr;
-
-        assert!(ptr.addr() < end_ptr.addr());
-        let dictionary_size = unsafe { end_ptr.byte_offset_from(ptr) / size_of::<u32>() as isize };
-        assert!(dictionary_size > 0);
-
-        // Initialize the buckets
-        let buckets = {
-            let buckets_ptr: *mut MaybeUninit<Bucket<K, V>> = buckets_ptr.cast();
-            let buckets =
-                unsafe { std::slice::from_raw_parts_mut(buckets_ptr, num_buckets as usize) };
-            for i in 0..buckets.len() {
-                buckets[i].write(Bucket {
-                    hash: 0,
-                    next: if i < buckets.len() - 1 {
-                        i as u32 + 1
-                    } else {
-                        INVALID_POS
-                    },
-                    inner: None,
-                });
-            }
-            // TODO: use std::slice::assume_init_mut() once it stabilizes
-            unsafe { std::slice::from_raw_parts_mut(buckets_ptr.cast(), num_buckets as usize) }
-        };
-
-        // Initialize the dictionary
-        let dictionary = {
-            let dictionary_ptr: *mut MaybeUninit<u32> = dictionary_ptr.cast();
-            let dictionary =
-                unsafe { std::slice::from_raw_parts_mut(dictionary_ptr, dictionary_size as usize) };
-
-            for i in 0..dictionary.len() {
-                dictionary[i].write(INVALID_POS);
-            }
-            // TODO: use std::slice::assume_init_mut() once it stabilizes
-            unsafe {
-                std::slice::from_raw_parts_mut(dictionary_ptr.cast(), dictionary_size as usize)
-            }
-        };
-
-        CoreHashMap {
-            dictionary,
-            buckets,
-            free_head: 0,
-            buckets_in_use: 0,
-        }
-    }
-
-    pub fn get(&self, key: &K) -> Option<&V> {
-        let mut hasher = DefaultHasher::new();
-        key.hash(&mut hasher);
-        let hash = hasher.finish();
-
-        let mut next = self.dictionary[hash as usize % self.dictionary.len()];
-        loop {
-            if next == INVALID_POS {
-                return None;
-            }
-
-            let bucket = &self.buckets[next as usize];
-            let (bucket_key, bucket_value) = bucket.inner.as_ref().expect("entry is in use");
-            if bucket_key == key {
-                return Some(&bucket_value);
-            }
-            next = bucket.next;
-        }
-    }
-
-    pub fn insert(&mut self, key: &K, value: V) -> Result<(), FullError> {
-        let mut hasher = DefaultHasher::new();
-        key.hash(&mut hasher);
-        let hash = hasher.finish();
-
-        let first = self.dictionary[hash as usize % self.dictionary.len()];
-        if first == INVALID_POS {
-            // no existing entry
-            let pos = self.alloc_bucket(key.clone(), value, hash)?;
-            if pos == INVALID_POS {
-                return Err(FullError());
-            }
-            self.dictionary[hash as usize % self.dictionary.len()] = pos;
-            return Ok(());
-        }
-
-        let mut next = first;
-        loop {
-            let bucket = &mut self.buckets[next as usize];
-            let (bucket_key, bucket_value) = bucket.inner.as_mut().expect("entry is in use");
-            if bucket_key == key {
-                // found existing entry, update its value
-                *bucket_value = value;
-                return Ok(());
-            }
-
-            if bucket.next == INVALID_POS {
-                // No existing entry found. Append to the chain
-                let pos = self.alloc_bucket(key.clone(), value, hash)?;
-                if pos == INVALID_POS {
-                    return Err(FullError());
-                }
-                self.buckets[next as usize].next = pos;
-                return Ok(());
-            }
-            next = bucket.next;
-        }
-    }
-
-    pub fn remove(&mut self, key: &K) -> Result<(), FullError> {
-        let mut hasher = DefaultHasher::new();
-        key.hash(&mut hasher);
-        let hash = hasher.finish();
-
-        let mut next = self.dictionary[hash as usize % self.dictionary.len()];
-        let mut prev_pos: u32 = INVALID_POS;
-        loop {
-            if next == INVALID_POS {
-                // no existing entry
-                return Ok(());
-            }
-            let bucket = &mut self.buckets[next as usize];
-            let (bucket_key, _) = bucket.inner.as_mut().expect("entry is in use");
-            if bucket_key == key {
-                // found existing entry, unlink it from the chain
-                if prev_pos == INVALID_POS {
-                    self.dictionary[hash as usize % self.dictionary.len()] = bucket.next;
-                } else {
-                    self.buckets[prev_pos as usize].next = bucket.next;
-                }
-
-                // and add it to the freelist
-                let bucket = &mut self.buckets[next as usize];
-                bucket.hash = 0;
-                bucket.inner = None;
-                bucket.next = self.free_head;
-                self.free_head = next;
-                self.buckets_in_use -= 1;
-                return Ok(());
-            }
-            prev_pos = next;
-            next = bucket.next;
-        }
-    }
-
-    pub fn get_num_buckets(&self) -> usize {
-        self.buckets.len()
-    }
-
-    pub fn get_bucket(&self, pos: usize) -> Option<&(K, V)> {
-        if pos >= self.buckets.len() {
-            return None;
-        }
-
-        self.buckets[pos].inner.as_ref()
-    }
-
-    fn alloc_bucket(&mut self, key: K, value: V, hash: u64) -> Result<u32, FullError> {
-        let pos = self.free_head;
-        if pos == INVALID_POS {
-            return Err(FullError());
-        }
-
-        let bucket = &mut self.buckets[pos as usize];
-        self.free_head = bucket.next;
-        self.buckets_in_use += 1;
-
-        bucket.hash = hash;
-        bucket.next = INVALID_POS;
-        bucket.inner = Some((key, value));
-
-        return Ok(pos);
-    }
-}
--- a/libs/neon-shmem/src/hash/tests.rs
+++ b/libs/neon-shmem/src/hash/tests.rs
@@ -1,220 +0,0 @@
-use std::collections::BTreeMap;
-use std::collections::HashSet;
-use std::fmt::{Debug, Formatter};
-use std::sync::atomic::{AtomicUsize, Ordering};
-
-use crate::hash::HashMapAccess;
-use crate::hash::HashMapInit;
-use crate::hash::UpdateAction;
-use crate::shmem::ShmemHandle;
-
-use rand::seq::SliceRandom;
-use rand::{Rng, RngCore};
-use rand_distr::Zipf;
-
-const TEST_KEY_LEN: usize = 16;
-
-#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
-struct TestKey([u8; TEST_KEY_LEN]);
-
-impl From<&TestKey> for u128 {
-    fn from(val: &TestKey) -> u128 {
-        u128::from_be_bytes(val.0)
-    }
-}
-
-impl From<u128> for TestKey {
-    fn from(val: u128) -> TestKey {
-        TestKey(val.to_be_bytes())
-    }
-}
-
-impl<'a> From<&'a [u8]> for TestKey {
-    fn from(bytes: &'a [u8]) -> TestKey {
-        TestKey(bytes.try_into().unwrap())
-    }
-}
-
-fn test_inserts<K: Into<TestKey> + Copy>(keys: &[K]) {
-    const MAX_MEM_SIZE: usize = 10000000;
-    let shmem = ShmemHandle::new("test_inserts", 0, MAX_MEM_SIZE).unwrap();
-
-    let init_struct = HashMapInit::<TestKey, usize>::init_in_shmem(100000, shmem);
-    let w = init_struct.attach_writer();
-
-    for (idx, k) in keys.iter().enumerate() {
-        let res = w.insert(&(*k).into(), idx);
-        assert!(res.is_ok());
-    }
-
-    for (idx, k) in keys.iter().enumerate() {
-        let x = w.get(&(*k).into());
-        let value = x.as_deref().copied();
-        assert_eq!(value, Some(idx));
-    }
-
-    //eprintln!("stats: {:?}", tree_writer.get_statistics());
-}
-
-#[test]
-fn dense() {
-    // This exercises splitting a node with prefix
-    let keys: &[u128] = &[0, 1, 2, 3, 256];
-    test_inserts(keys);
-
-    // Dense keys
-    let mut keys: Vec<u128> = (0..10000).collect();
-    test_inserts(&keys);
-
-    // Do the same in random orders
-    for _ in 1..10 {
-        keys.shuffle(&mut rand::rng());
-        test_inserts(&keys);
-    }
-}
-
-#[test]
-fn sparse() {
-    // sparse keys
-    let mut keys: Vec<TestKey> = Vec::new();
-    let mut used_keys = HashSet::new();
-    for _ in 0..10000 {
-        loop {
-            let key = rand::random::<u128>();
-            if used_keys.get(&key).is_some() {
-                continue;
-            }
-            used_keys.insert(key);
-            keys.push(key.into());
-            break;
-        }
-    }
-    test_inserts(&keys);
-}
-
-struct TestValue(AtomicUsize);
-
-impl TestValue {
-    fn new(val: usize) -> TestValue {
-        TestValue(AtomicUsize::new(val))
-    }
-
-    fn load(&self) -> usize {
-        self.0.load(Ordering::Relaxed)
-    }
-}
-
-impl Clone for TestValue {
-    fn clone(&self) -> TestValue {
-        TestValue::new(self.load())
-    }
-}
-
-impl Debug for TestValue {
-    fn fmt(&self, fmt: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
-        write!(fmt, "{:?}", self.load())
-    }
-}
-
-#[derive(Clone, Debug)]
-struct TestOp(TestKey, Option<usize>);
-
-fn apply_op(
-    op: &TestOp,
-    sut: &HashMapAccess<TestKey, TestValue>,
-    shadow: &mut BTreeMap<TestKey, usize>,
-) {
-    eprintln!("applying op: {op:?}");
-
-    // apply the change to the shadow tree first
-    let shadow_existing = if let Some(v) = op.1 {
-        shadow.insert(op.0, v)
-    } else {
-        shadow.remove(&op.0)
-    };
-
-    // apply to Art tree
-    sut.update_with_fn(&op.0, |existing| {
-        assert_eq!(existing.map(TestValue::load), shadow_existing);
-
-        match (existing, op.1) {
-            (None, None) => UpdateAction::Nothing,
-            (None, Some(new_val)) => UpdateAction::Insert(TestValue::new(new_val)),
-            (Some(_old_val), None) => UpdateAction::Remove,
-            (Some(old_val), Some(new_val)) => {
-                old_val.0.store(new_val, Ordering::Relaxed);
-                UpdateAction::Nothing
-            }
-        }
-    })
-    .expect("out of memory");
-}
-
-#[test]
-fn random_ops() {
-    const MAX_MEM_SIZE: usize = 10000000;
-    let shmem = ShmemHandle::new("test_inserts", 0, MAX_MEM_SIZE).unwrap();
-
-    let init_struct = HashMapInit::<TestKey, TestValue>::init_in_shmem(100000, shmem);
-    let writer = init_struct.attach_writer();
-
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-
-    let distribution = Zipf::new(u128::MAX as f64, 1.1).unwrap();
-    let mut rng = rand::rng();
-    for i in 0..100000 {
-        let key: TestKey = (rng.sample(distribution) as u128).into();
-
-        let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
-
-        apply_op(&op, &writer, &mut shadow);
-
-        if i % 1000 == 0 {
-            eprintln!("{i} ops processed");
-            //eprintln!("stats: {:?}", tree_writer.get_statistics());
-            //test_iter(&tree_writer, &shadow);
-        }
-    }
-}
-
-#[test]
-fn test_grow() {
-    const MEM_SIZE: usize = 10000000;
-    let shmem = ShmemHandle::new("test_grow", 0, MEM_SIZE).unwrap();
-
-    let init_struct = HashMapInit::<TestKey, TestValue>::init_in_shmem(1000, shmem);
-    let writer = init_struct.attach_writer();
-
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-
-    let mut rng = rand::rng();
-    for i in 0..10000 {
-        let key: TestKey = ((rng.next_u32() % 1000) as u128).into();
-
-        let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
-
-        apply_op(&op, &writer, &mut shadow);
-
-        if i % 1000 == 0 {
-            eprintln!("{i} ops processed");
-            //eprintln!("stats: {:?}", tree_writer.get_statistics());
-            //test_iter(&tree_writer, &shadow);
-        }
-    }
-
-    writer.grow(1500).unwrap();
-
-    for i in 0..10000 {
-        let key: TestKey = ((rng.next_u32() % 1500) as u128).into();
-
-        let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
-
-        apply_op(&op, &writer, &mut shadow);
-
-        if i % 1000 == 0 {
-            eprintln!("{i} ops processed");
-            //eprintln!("stats: {:?}", tree_writer.get_statistics());
-            //test_iter(&tree_writer, &shadow);
-        }
-    }
-}
--- a/libs/neon-shmem/src/lib.rs
+++ b/libs/neon-shmem/src/lib.rs
@@ -1,4 +1,418 @@
 //! Shared memory utilities for neon communicator

-pub mod hash;
-pub mod shmem;
+use std::num::NonZeroUsize;
+use std::os::fd::{AsFd, BorrowedFd, OwnedFd};
+use std::ptr::NonNull;
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+use nix::errno::Errno;
+use nix::sys::mman::MapFlags;
+use nix::sys::mman::ProtFlags;
+use nix::sys::mman::mmap as nix_mmap;
+use nix::sys::mman::munmap as nix_munmap;
+use nix::unistd::ftruncate as nix_ftruncate;
+
+/// ShmemHandle represents a shared memory area that can be shared by processes over fork().
+/// Unlike shared memory allocated by Postgres, this area is resizable, up to 'max_size' that's
+/// specified at creation.
+///
+/// The area is backed by an anonymous file created with memfd_create(). The full address space for
+/// 'max_size' is reserved up-front with mmap(), but whenever you call [`ShmemHandle::set_size`],
+/// the underlying file is resized. Do not access the area beyond the current size. Currently, that
+/// will cause the file to be expanded, but we might use mprotect() etc. to enforce that in the
+/// future.
+pub struct ShmemHandle {
+    /// memfd file descriptor
+    fd: OwnedFd,
+
+    max_size: usize,
+
+    // Pointer to the beginning of the shared memory area. The header is stored there.
+    shared_ptr: NonNull<SharedStruct>,
+
+    // Pointer to the beginning of the user data
+    pub data_ptr: NonNull<u8>,
+}
+
+/// This is stored at the beginning in the shared memory area.
+struct SharedStruct {
+    max_size: usize,
+
+    /// Current size of the backing file. The high-order bit is used for the RESIZE_IN_PROGRESS flag
+    current_size: AtomicUsize,
+}
+
+const RESIZE_IN_PROGRESS: usize = 1 << 63;
+
+const HEADER_SIZE: usize = std::mem::size_of::<SharedStruct>();
+
+/// Error type returned by the ShmemHandle functions.
+#[derive(thiserror::Error, Debug)]
+#[error("{msg}: {errno}")]
+pub struct Error {
+    pub msg: String,
+    pub errno: Errno,
+}
+
+impl Error {
+    fn new(msg: &str, errno: Errno) -> Error {
+        Error {
+            msg: msg.to_string(),
+            errno,
+        }
+    }
+}
+
+impl ShmemHandle {
+    /// Create a new shared memory area. To communicate between processes, the processes need to be
+    /// fork()'d after calling this, so that the ShmemHandle is inherited by all processes.
+    ///
+    /// If the ShmemHandle is dropped, the memory is unmapped from the current process. Other
+    /// processes can continue using it, however.
+    pub fn new(name: &str, initial_size: usize, max_size: usize) -> Result<ShmemHandle, Error> {
+        // create the backing anonymous file.
+        let fd = create_backing_file(name)?;
+
+        Self::new_with_fd(fd, initial_size, max_size)
+    }
+
+    fn new_with_fd(
+        fd: OwnedFd,
+        initial_size: usize,
+        max_size: usize,
+    ) -> Result<ShmemHandle, Error> {
+        // We reserve the high-order bit for the RESIZE_IN_PROGRESS flag, and the actual size
+        // is a little larger than this because of the SharedStruct header. Make the upper limit
+        // somewhat smaller than that, because with anything close to that, you'll run out of
+        // memory anyway.
+        if max_size >= 1 << 48 {
+            panic!("max size {} too large", max_size);
+        }
+        if initial_size > max_size {
+            panic!("initial size {initial_size} larger than max size {max_size}");
+        }
+
+        // The actual initial / max size is the one given by the caller, plus the size of
+        // 'SharedStruct'.
+        let initial_size = HEADER_SIZE + initial_size;
+        let max_size = NonZeroUsize::new(HEADER_SIZE + max_size).unwrap();
+
+        // Reserve address space for it with mmap
+        //
+        // TODO: Use MAP_HUGETLB if possible
+        let start_ptr = unsafe {
+            nix_mmap(
+                None,
+                max_size,
+                ProtFlags::PROT_READ | ProtFlags::PROT_WRITE,
+                MapFlags::MAP_SHARED,
+                &fd,
+                0,
+            )
+        }
+        .map_err(|e| Error::new("mmap failed: {e}", e))?;
+
+        // Reserve space for the initial size
+        enlarge_file(fd.as_fd(), initial_size as u64)?;
+
+        // Initialize the header
+        let shared: NonNull<SharedStruct> = start_ptr.cast();
+        unsafe {
+            shared.write(SharedStruct {
+                max_size: max_size.into(),
+                current_size: AtomicUsize::new(initial_size),
+            })
+        };
+
+        // The user data begins after the header
+        let data_ptr = unsafe { start_ptr.cast().add(HEADER_SIZE) };
+
+        Ok(ShmemHandle {
+            fd,
+            max_size: max_size.into(),
+            shared_ptr: shared,
+            data_ptr,
+        })
+    }
+
+    // return reference to the header
+    fn shared(&self) -> &SharedStruct {
+        unsafe { self.shared_ptr.as_ref() }
+    }
+
+    /// Resize the shared memory area. 'new_size' must not be larger than the 'max_size' specified
+    /// when creating the area.
+    ///
+    /// This may only be called from one process/thread concurrently. We detect that case
+    /// and return an Error.
+    pub fn set_size(&self, new_size: usize) -> Result<(), Error> {
+        let new_size = new_size + HEADER_SIZE;
+        let shared = self.shared();
+
+        if new_size > self.max_size {
+            panic!(
+                "new size ({} is greater than max size ({})",
+                new_size, self.max_size
+            );
+        }
+        assert_eq!(self.max_size, shared.max_size);
+
+        // Lock the area by setting the bit in 'current_size'
+        //
+        // Ordering::Relaxed would probably be sufficient here, as we don't access any other memory
+        // and the posix_fallocate/ftruncate call is surely a synchronization point anyway. But
+        // since this is not performance-critical, better safe than sorry .
+        let mut old_size = shared.current_size.load(Ordering::Acquire);
+        loop {
+            if (old_size & RESIZE_IN_PROGRESS) != 0 {
+                return Err(Error::new(
+                    "concurrent resize detected",
+                    Errno::UnknownErrno,
+                ));
+            }
+            match shared.current_size.compare_exchange(
+                old_size,
+                new_size,
+                Ordering::Acquire,
+                Ordering::Relaxed,
+            ) {
+                Ok(_) => break,
+                Err(x) => old_size = x,
+            }
+        }
+
+        // Ok, we got the lock.
+        //
+        // NB: If anything goes wrong, we *must* clear the bit!
+        let result = {
+            use std::cmp::Ordering::{Equal, Greater, Less};
+            match new_size.cmp(&old_size) {
+                Less => nix_ftruncate(&self.fd, new_size as i64).map_err(|e| {
+                    Error::new("could not shrink shmem segment, ftruncate failed: {e}", e)
+                }),
+                Equal => Ok(()),
+                Greater => enlarge_file(self.fd.as_fd(), new_size as u64),
+            }
+        };
+
+        // Unlock
+        shared.current_size.store(
+            if result.is_ok() { new_size } else { old_size },
+            Ordering::Release,
+        );
+
+        result
+    }
+
+    /// Returns the current user-visible size of the shared memory segment.
+    ///
+    /// NOTE: a concurrent set_size() call can change the size at any time. It is the caller's
+    /// responsibility not to access the area beyond the current size.
+    pub fn current_size(&self) -> usize {
+        let total_current_size =
+            self.shared().current_size.load(Ordering::Relaxed) & !RESIZE_IN_PROGRESS;
+        total_current_size - HEADER_SIZE
+    }
+}
+
+impl Drop for ShmemHandle {
+    fn drop(&mut self) {
+        // SAFETY: The pointer was obtained from mmap() with the given size.
+        // We unmap the entire region.
+        let _ = unsafe { nix_munmap(self.shared_ptr.cast(), self.max_size) };
+        // The fd is dropped automatically by OwnedFd.
+    }
+}
+
+/// Create a "backing file" for the shared memory area. On Linux, use memfd_create(), to create an
+/// anonymous in-memory file. One macos, fall back to a regular file. That's good enough for
+/// development and testing, but in production we want the file to stay in memory.
+///
+/// disable 'unused_variables' warnings, because in the macos path, 'name' is unused.
+#[allow(unused_variables)]
+fn create_backing_file(name: &str) -> Result<OwnedFd, Error> {
+    #[cfg(not(target_os = "macos"))]
+    {
+        nix::sys::memfd::memfd_create(name, nix::sys::memfd::MFdFlags::empty())
+            .map_err(|e| Error::new("memfd_create failed: {e}", e))
+    }
+    #[cfg(target_os = "macos")]
+    {
+        let file = tempfile::tempfile().map_err(|e| {
+            Error::new(
+                "could not create temporary file to back shmem area: {e}",
+                nix::errno::Errno::from_raw(e.raw_os_error().unwrap_or(0)),
+            )
+        })?;
+        Ok(OwnedFd::from(file))
+    }
+}
+
+fn enlarge_file(fd: BorrowedFd, size: u64) -> Result<(), Error> {
+    // Use posix_fallocate() to enlarge the file. It reserves the space correctly, so that
+    // we don't get a segfault later when trying to actually use it.
+    #[cfg(not(target_os = "macos"))]
+    {
+        nix::fcntl::posix_fallocate(fd, 0, size as i64).map_err(|e| {
+            Error::new(
+                "could not grow shmem segment, posix_fallocate failed: {e}",
+                e,
+            )
+        })
+    }
+    // As a fallback on macos, which doesn't have posix_fallocate, use plain 'fallocate'
+    #[cfg(target_os = "macos")]
+    {
+        nix::unistd::ftruncate(fd, size as i64)
+            .map_err(|e| Error::new("could not grow shmem segment, ftruncate failed: {e}", e))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use nix::unistd::ForkResult;
+    use std::ops::Range;
+
+    /// check that all bytes in given range have the expected value.
+    fn assert_range(ptr: *const u8, expected: u8, range: Range<usize>) {
+        for i in range {
+            let b = unsafe { *(ptr.add(i)) };
+            assert_eq!(expected, b, "unexpected byte at offset {}", i);
+        }
+    }
+
+    /// Write 'b' to all bytes in the given range
+    fn write_range(ptr: *mut u8, b: u8, range: Range<usize>) {
+        unsafe { std::ptr::write_bytes(ptr.add(range.start), b, range.end - range.start) };
+    }
+
+    // simple single-process test of growing and shrinking
+    #[test]
+    fn test_shmem_resize() -> Result<(), Error> {
+        let max_size = 1024 * 1024;
+        let init_struct = ShmemHandle::new("test_shmem_resize", 0, max_size)?;
+
+        assert_eq!(init_struct.current_size(), 0);
+
+        // Initial grow
+        let size1 = 10000;
+        init_struct.set_size(size1).unwrap();
+        assert_eq!(init_struct.current_size(), size1);
+
+        // Write some data
+        let data_ptr = init_struct.data_ptr.as_ptr();
+        write_range(data_ptr, 0xAA, 0..size1);
+        assert_range(data_ptr, 0xAA, 0..size1);
+
+        // Shrink
+        let size2 = 5000;
+        init_struct.set_size(size2).unwrap();
+        assert_eq!(init_struct.current_size(), size2);
+
+        // Grow again
+        let size3 = 20000;
+        init_struct.set_size(size3).unwrap();
+        assert_eq!(init_struct.current_size(), size3);
+
+        // Try to read it. The area that was shrunk and grown again should read as all zeros now
+        assert_range(data_ptr, 0xAA, 0..5000);
+        assert_range(data_ptr, 0, 5000..size1);
+
+        // Try to grow beyond max_size
+        //let size4 = max_size + 1;
+        //assert!(init_struct.set_size(size4).is_err());
+
+        // Dropping init_struct should unmap the memory
+        drop(init_struct);
+
+        Ok(())
+    }
+
+    /// This is used in tests to coordinate between test processes. It's like std::sync::Barrier,
+    /// but is stored in the shared memory area and works across processes. It's implemented by
+    /// polling, because e.g. standard rust mutexes are not guaranteed to work across processes.
+    struct SimpleBarrier {
+        num_procs: usize,
+        count: AtomicUsize,
+    }
+
+    impl SimpleBarrier {
+        unsafe fn init(ptr: *mut SimpleBarrier, num_procs: usize) {
+            unsafe {
+                *ptr = SimpleBarrier {
+                    num_procs,
+                    count: AtomicUsize::new(0),
+                }
+            }
+        }
+
+        pub fn wait(&self) {
+            let old = self.count.fetch_add(1, Ordering::Relaxed);
+
+            let generation = old / self.num_procs;
+
+            let mut current = old + 1;
+            while current < (generation + 1) * self.num_procs {
+                std::thread::sleep(std::time::Duration::from_millis(10));
+                current = self.count.load(Ordering::Relaxed);
+            }
+        }
+    }
+
+    #[test]
+    fn test_multi_process() {
+        // Initialize
+        let max_size = 1_000_000_000_000;
+        let init_struct = ShmemHandle::new("test_multi_process", 0, max_size).unwrap();
+        let ptr = init_struct.data_ptr.as_ptr();
+
+        // Store the SimpleBarrier in the first 1k of the area.
+        init_struct.set_size(10000).unwrap();
+        let barrier_ptr: *mut SimpleBarrier = unsafe {
+            ptr.add(ptr.align_offset(std::mem::align_of::<SimpleBarrier>()))
+                .cast()
+        };
+        unsafe { SimpleBarrier::init(barrier_ptr, 2) };
+        let barrier = unsafe { barrier_ptr.as_ref().unwrap() };
+
+        // Fork another test process. The code after this runs in both processes concurrently.
+        let fork_result = unsafe { nix::unistd::fork().unwrap() };
+
+        // In the parent, fill bytes between 1000..2000. In the child, between 2000..3000
+        if fork_result.is_parent() {
+            write_range(ptr, 0xAA, 1000..2000);
+        } else {
+            write_range(ptr, 0xBB, 2000..3000);
+        }
+        barrier.wait();
+        // Verify the contents. (in both processes)
+        assert_range(ptr, 0xAA, 1000..2000);
+        assert_range(ptr, 0xBB, 2000..3000);
+
+        // Grow, from the child this time
+        let size = 10_000_000;
+        if !fork_result.is_parent() {
+            init_struct.set_size(size).unwrap();
+        }
+        barrier.wait();
+
+        // make some writes at the end
+        if fork_result.is_parent() {
+            write_range(ptr, 0xAA, (size - 10)..size);
+        } else {
+            write_range(ptr, 0xBB, (size - 20)..(size - 10));
+        }
+        barrier.wait();
+
+        // Verify the contents. (This runs in both processes)
+        assert_range(ptr, 0, (size - 1000)..(size - 20));
+        assert_range(ptr, 0xBB, (size - 20)..(size - 10));
+        assert_range(ptr, 0xAA, (size - 10)..size);
+
+        if let ForkResult::Parent { child } = fork_result {
+            nix::sys::wait::waitpid(child, None).unwrap();
+        }
+    }
+}
--- a/libs/neon-shmem/src/shmem.rs
+++ b/libs/neon-shmem/src/shmem.rs
@@ -1,418 +0,0 @@
-//! Dynamically resizable contiguous chunk of shared memory
-
-use std::num::NonZeroUsize;
-use std::os::fd::{AsFd, BorrowedFd, OwnedFd};
-use std::ptr::NonNull;
-use std::sync::atomic::{AtomicUsize, Ordering};
-
-use nix::errno::Errno;
-use nix::sys::mman::MapFlags;
-use nix::sys::mman::ProtFlags;
-use nix::sys::mman::mmap as nix_mmap;
-use nix::sys::mman::munmap as nix_munmap;
-use nix::unistd::ftruncate as nix_ftruncate;
-
-/// ShmemHandle represents a shared memory area that can be shared by processes over fork().
-/// Unlike shared memory allocated by Postgres, this area is resizable, up to 'max_size' that's
-/// specified at creation.
-///
-/// The area is backed by an anonymous file created with memfd_create(). The full address space for
-/// 'max_size' is reserved up-front with mmap(), but whenever you call [`ShmemHandle::set_size`],
-/// the underlying file is resized. Do not access the area beyond the current size. Currently, that
-/// will cause the file to be expanded, but we might use mprotect() etc. to enforce that in the
-/// future.
-pub struct ShmemHandle {
-    /// memfd file descriptor
-    fd: OwnedFd,
-
-    max_size: usize,
-
-    // Pointer to the beginning of the shared memory area. The header is stored there.
-    shared_ptr: NonNull<SharedStruct>,
-
-    // Pointer to the beginning of the user data
-    pub data_ptr: NonNull<u8>,
-}
-
-/// This is stored at the beginning in the shared memory area.
-struct SharedStruct {
-    max_size: usize,
-
-    /// Current size of the backing file. The high-order bit is used for the RESIZE_IN_PROGRESS flag
-    current_size: AtomicUsize,
-}
-
-const RESIZE_IN_PROGRESS: usize = 1 << 63;
-
-const HEADER_SIZE: usize = std::mem::size_of::<SharedStruct>();
-
-/// Error type returned by the ShmemHandle functions.
-#[derive(thiserror::Error, Debug)]
-#[error("{msg}: {errno}")]
-pub struct Error {
-    pub msg: String,
-    pub errno: Errno,
-}
-
-impl Error {
-    fn new(msg: &str, errno: Errno) -> Error {
-        Error {
-            msg: msg.to_string(),
-            errno,
-        }
-    }
-}
-
-impl ShmemHandle {
-    /// Create a new shared memory area. To communicate between processes, the processes need to be
-    /// fork()'d after calling this, so that the ShmemHandle is inherited by all processes.
-    ///
-    /// If the ShmemHandle is dropped, the memory is unmapped from the current process. Other
-    /// processes can continue using it, however.
-    pub fn new(name: &str, initial_size: usize, max_size: usize) -> Result<ShmemHandle, Error> {
-        // create the backing anonymous file.
-        let fd = create_backing_file(name)?;
-
-        Self::new_with_fd(fd, initial_size, max_size)
-    }
-
-    fn new_with_fd(
-        fd: OwnedFd,
-        initial_size: usize,
-        max_size: usize,
-    ) -> Result<ShmemHandle, Error> {
-        // We reserve the high-order bit for the RESIZE_IN_PROGRESS flag, and the actual size
-        // is a little larger than this because of the SharedStruct header. Make the upper limit
-        // somewhat smaller than that, because with anything close to that, you'll run out of
-        // memory anyway.
-        if max_size >= 1 << 48 {
-            panic!("max size {} too large", max_size);
-        }
-        if initial_size > max_size {
-            panic!("initial size {initial_size} larger than max size {max_size}");
-        }
-
-        // The actual initial / max size is the one given by the caller, plus the size of
-        // 'SharedStruct'.
-        let initial_size = HEADER_SIZE + initial_size;
-        let max_size = NonZeroUsize::new(HEADER_SIZE + max_size).unwrap();
-
-        // Reserve address space for it with mmap
-        //
-        // TODO: Use MAP_HUGETLB if possible
-        let start_ptr = unsafe {
-            nix_mmap(
-                None,
-                max_size,
-                ProtFlags::PROT_READ | ProtFlags::PROT_WRITE,
-                MapFlags::MAP_SHARED,
-                &fd,
-                0,
-            )
-        }
-        .map_err(|e| Error::new("mmap failed: {e}", e))?;
-
-        // Reserve space for the initial size
-        enlarge_file(fd.as_fd(), initial_size as u64)?;
-
-        // Initialize the header
-        let shared: NonNull<SharedStruct> = start_ptr.cast();
-        unsafe {
-            shared.write(SharedStruct {
-                max_size: max_size.into(),
-                current_size: AtomicUsize::new(initial_size),
-            })
-        };
-
-        // The user data begins after the header
-        let data_ptr = unsafe { start_ptr.cast().add(HEADER_SIZE) };
-
-        Ok(ShmemHandle {
-            fd,
-            max_size: max_size.into(),
-            shared_ptr: shared,
-            data_ptr,
-        })
-    }
-
-    // return reference to the header
-    fn shared(&self) -> &SharedStruct {
-        unsafe { self.shared_ptr.as_ref() }
-    }
-
-    /// Resize the shared memory area. 'new_size' must not be larger than the 'max_size' specified
-    /// when creating the area.
-    ///
-    /// This may only be called from one process/thread concurrently. We detect that case
-    /// and return an Error.
-    pub fn set_size(&self, new_size: usize) -> Result<(), Error> {
-        let new_size = new_size + HEADER_SIZE;
-        let shared = self.shared();
-
-        if new_size > self.max_size {
-            panic!(
-                "new size ({} is greater than max size ({})",
-                new_size, self.max_size
-            );
-        }
-        assert_eq!(self.max_size, shared.max_size);
-
-        // Lock the area by setting the bit in 'current_size'
-        //
-        // Ordering::Relaxed would probably be sufficient here, as we don't access any other memory
-        // and the posix_fallocate/ftruncate call is surely a synchronization point anyway. But
-        // since this is not performance-critical, better safe than sorry .
-        let mut old_size = shared.current_size.load(Ordering::Acquire);
-        loop {
-            if (old_size & RESIZE_IN_PROGRESS) != 0 {
-                return Err(Error::new(
-                    "concurrent resize detected",
-                    Errno::UnknownErrno,
-                ));
-            }
-            match shared.current_size.compare_exchange(
-                old_size,
-                new_size,
-                Ordering::Acquire,
-                Ordering::Relaxed,
-            ) {
-                Ok(_) => break,
-                Err(x) => old_size = x,
-            }
-        }
-
-        // Ok, we got the lock.
-        //
-        // NB: If anything goes wrong, we *must* clear the bit!
-        let result = {
-            use std::cmp::Ordering::{Equal, Greater, Less};
-            match new_size.cmp(&old_size) {
-                Less => nix_ftruncate(&self.fd, new_size as i64).map_err(|e| {
-                    Error::new("could not shrink shmem segment, ftruncate failed: {e}", e)
-                }),
-                Equal => Ok(()),
-                Greater => enlarge_file(self.fd.as_fd(), new_size as u64),
-            }
-        };
-
-        // Unlock
-        shared.current_size.store(
-            if result.is_ok() { new_size } else { old_size },
-            Ordering::Release,
-        );
-
-        result
-    }
-
-    /// Returns the current user-visible size of the shared memory segment.
-    ///
-    /// NOTE: a concurrent set_size() call can change the size at any time. It is the caller's
-    /// responsibility not to access the area beyond the current size.
-    pub fn current_size(&self) -> usize {
-        let total_current_size =
-            self.shared().current_size.load(Ordering::Relaxed) & !RESIZE_IN_PROGRESS;
-        total_current_size - HEADER_SIZE
-    }
-}
-
-impl Drop for ShmemHandle {
-    fn drop(&mut self) {
-        // SAFETY: The pointer was obtained from mmap() with the given size.
-        // We unmap the entire region.
-        let _ = unsafe { nix_munmap(self.shared_ptr.cast(), self.max_size) };
-        // The fd is dropped automatically by OwnedFd.
-    }
-}
-
-/// Create a "backing file" for the shared memory area. On Linux, use memfd_create(), to create an
-/// anonymous in-memory file. One macos, fall back to a regular file. That's good enough for
-/// development and testing, but in production we want the file to stay in memory.
-///
-/// disable 'unused_variables' warnings, because in the macos path, 'name' is unused.
-#[allow(unused_variables)]
-fn create_backing_file(name: &str) -> Result<OwnedFd, Error> {
-    #[cfg(not(target_os = "macos"))]
-    {
-        nix::sys::memfd::memfd_create(name, nix::sys::memfd::MFdFlags::empty())
-            .map_err(|e| Error::new("memfd_create failed: {e}", e))
-    }
-    #[cfg(target_os = "macos")]
-    {
-        let file = tempfile::tempfile().map_err(|e| {
-            Error::new(
-                "could not create temporary file to back shmem area: {e}",
-                nix::errno::Errno::from_raw(e.raw_os_error().unwrap_or(0)),
-            )
-        })?;
-        Ok(OwnedFd::from(file))
-    }
-}
-
-fn enlarge_file(fd: BorrowedFd, size: u64) -> Result<(), Error> {
-    // Use posix_fallocate() to enlarge the file. It reserves the space correctly, so that
-    // we don't get a segfault later when trying to actually use it.
-    #[cfg(not(target_os = "macos"))]
-    {
-        nix::fcntl::posix_fallocate(fd, 0, size as i64).map_err(|e| {
-            Error::new(
-                "could not grow shmem segment, posix_fallocate failed: {e}",
-                e,
-            )
-        })
-    }
-    // As a fallback on macos, which doesn't have posix_fallocate, use plain 'fallocate'
-    #[cfg(target_os = "macos")]
-    {
-        nix::unistd::ftruncate(fd, size as i64)
-            .map_err(|e| Error::new("could not grow shmem segment, ftruncate failed: {e}", e))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use nix::unistd::ForkResult;
-    use std::ops::Range;
-
-    /// check that all bytes in given range have the expected value.
-    fn assert_range(ptr: *const u8, expected: u8, range: Range<usize>) {
-        for i in range {
-            let b = unsafe { *(ptr.add(i)) };
-            assert_eq!(expected, b, "unexpected byte at offset {}", i);
-        }
-    }
-
-    /// Write 'b' to all bytes in the given range
-    fn write_range(ptr: *mut u8, b: u8, range: Range<usize>) {
-        unsafe { std::ptr::write_bytes(ptr.add(range.start), b, range.end - range.start) };
-    }
-
-    // simple single-process test of growing and shrinking
-    #[test]
-    fn test_shmem_resize() -> Result<(), Error> {
-        let max_size = 1024 * 1024;
-        let init_struct = ShmemHandle::new("test_shmem_resize", 0, max_size)?;
-
-        assert_eq!(init_struct.current_size(), 0);
-
-        // Initial grow
-        let size1 = 10000;
-        init_struct.set_size(size1).unwrap();
-        assert_eq!(init_struct.current_size(), size1);
-
-        // Write some data
-        let data_ptr = init_struct.data_ptr.as_ptr();
-        write_range(data_ptr, 0xAA, 0..size1);
-        assert_range(data_ptr, 0xAA, 0..size1);
-
-        // Shrink
-        let size2 = 5000;
-        init_struct.set_size(size2).unwrap();
-        assert_eq!(init_struct.current_size(), size2);
-
-        // Grow again
-        let size3 = 20000;
-        init_struct.set_size(size3).unwrap();
-        assert_eq!(init_struct.current_size(), size3);
-
-        // Try to read it. The area that was shrunk and grown again should read as all zeros now
-        assert_range(data_ptr, 0xAA, 0..5000);
-        assert_range(data_ptr, 0, 5000..size1);
-
-        // Try to grow beyond max_size
-        //let size4 = max_size + 1;
-        //assert!(init_struct.set_size(size4).is_err());
-
-        // Dropping init_struct should unmap the memory
-        drop(init_struct);
-
-        Ok(())
-    }
-
-    /// This is used in tests to coordinate between test processes. It's like std::sync::Barrier,
-    /// but is stored in the shared memory area and works across processes. It's implemented by
-    /// polling, because e.g. standard rust mutexes are not guaranteed to work across processes.
-    struct SimpleBarrier {
-        num_procs: usize,
-        count: AtomicUsize,
-    }
-
-    impl SimpleBarrier {
-        unsafe fn init(ptr: *mut SimpleBarrier, num_procs: usize) {
-            unsafe {
-                *ptr = SimpleBarrier {
-                    num_procs,
-                    count: AtomicUsize::new(0),
-                }
-            }
-        }
-
-        pub fn wait(&self) {
-            let old = self.count.fetch_add(1, Ordering::Relaxed);
-
-            let generation = old / self.num_procs;
-
-            let mut current = old + 1;
-            while current < (generation + 1) * self.num_procs {
-                std::thread::sleep(std::time::Duration::from_millis(10));
-                current = self.count.load(Ordering::Relaxed);
-            }
-        }
-    }
-
-    #[test]
-    fn test_multi_process() {
-        // Initialize
-        let max_size = 1_000_000_000_000;
-        let init_struct = ShmemHandle::new("test_multi_process", 0, max_size).unwrap();
-        let ptr = init_struct.data_ptr.as_ptr();
-
-        // Store the SimpleBarrier in the first 1k of the area.
-        init_struct.set_size(10000).unwrap();
-        let barrier_ptr: *mut SimpleBarrier = unsafe {
-            ptr.add(ptr.align_offset(std::mem::align_of::<SimpleBarrier>()))
-                .cast()
-        };
-        unsafe { SimpleBarrier::init(barrier_ptr, 2) };
-        let barrier = unsafe { barrier_ptr.as_ref().unwrap() };
-
-        // Fork another test process. The code after this runs in both processes concurrently.
-        let fork_result = unsafe { nix::unistd::fork().unwrap() };
-
-        // In the parent, fill bytes between 1000..2000. In the child, between 2000..3000
-        if fork_result.is_parent() {
-            write_range(ptr, 0xAA, 1000..2000);
-        } else {
-            write_range(ptr, 0xBB, 2000..3000);
-        }
-        barrier.wait();
-        // Verify the contents. (in both processes)
-        assert_range(ptr, 0xAA, 1000..2000);
-        assert_range(ptr, 0xBB, 2000..3000);
-
-        // Grow, from the child this time
-        let size = 10_000_000;
-        if !fork_result.is_parent() {
-            init_struct.set_size(size).unwrap();
-        }
-        barrier.wait();
-
-        // make some writes at the end
-        if fork_result.is_parent() {
-            write_range(ptr, 0xAA, (size - 10)..size);
-        } else {
-            write_range(ptr, 0xBB, (size - 20)..(size - 10));
-        }
-        barrier.wait();
-
-        // Verify the contents. (This runs in both processes)
-        assert_range(ptr, 0, (size - 1000)..(size - 20));
-        assert_range(ptr, 0xBB, (size - 20)..(size - 10));
-        assert_range(ptr, 0xAA, (size - 10)..size);
-
-        if let ForkResult::Parent { child } = fork_result {
-            nix::sys::wait::waitpid(child, None).unwrap();
-        }
-    }
-}
--- a/libs/neonart/Cargo.toml
+++ b/libs/neonart/Cargo.toml
@@ -1,14 +0,0 @@
-[package]
-name = "neonart"
-version = "0.1.0"
-edition.workspace = true
-license.workspace = true
-
-[dependencies]
-crossbeam-utils.workspace = true
-spin.workspace = true
-tracing.workspace = true
-
-[dev-dependencies]
-rand = "0.9.1"
-rand_distr = "0.5.1"
--- a/libs/neonart/src/algorithm.rs
+++ b/libs/neonart/src/algorithm.rs
@@ -1,594 +0,0 @@
-mod lock_and_version;
-pub(crate) mod node_ptr;
-mod node_ref;
-
-use std::vec::Vec;
-
-use crate::algorithm::lock_and_version::ConcurrentUpdateError;
-use crate::algorithm::node_ptr::MAX_PREFIX_LEN;
-use crate::algorithm::node_ref::{NewNodeRef, NodeRef, ReadLockedNodeRef, WriteLockedNodeRef};
-use crate::allocator::OutOfMemoryError;
-
-use crate::TreeWriteGuard;
-use crate::UpdateAction;
-use crate::allocator::ArtAllocator;
-use crate::epoch::EpochPin;
-use crate::{Key, Value};
-
-pub(crate) type RootPtr<V> = node_ptr::NodePtr<V>;
-
-#[derive(Debug)]
-pub enum ArtError {
-    ConcurrentUpdate, // need to retry
-    OutOfMemory,
-}
-
-impl From<ConcurrentUpdateError> for ArtError {
-    fn from(_: ConcurrentUpdateError) -> ArtError {
-        ArtError::ConcurrentUpdate
-    }
-}
-
-impl From<OutOfMemoryError> for ArtError {
-    fn from(_: OutOfMemoryError) -> ArtError {
-        ArtError::OutOfMemory
-    }
-}
-
-pub fn new_root<V: Value>(
-    allocator: &impl ArtAllocator<V>,
-) -> Result<RootPtr<V>, OutOfMemoryError> {
-    node_ptr::new_root(allocator)
-}
-
-pub(crate) fn search<'e, K: Key, V: Value>(
-    key: &K,
-    root: RootPtr<V>,
-    epoch_pin: &'e EpochPin,
-) -> Option<&'e V> {
-    loop {
-        let root_ref = NodeRef::from_root_ptr(root);
-        if let Ok(result) = lookup_recurse(key.as_bytes(), root_ref, None, epoch_pin) {
-            break result;
-        }
-        // retry
-    }
-}
-
-pub(crate) fn iter_next<'e, V: Value>(
-    key: &[u8],
-    root: RootPtr<V>,
-    epoch_pin: &'e EpochPin,
-) -> Option<(Vec<u8>, &'e V)> {
-    loop {
-        let mut path = Vec::new();
-        let root_ref = NodeRef::from_root_ptr(root);
-
-        match next_recurse(key, &mut path, root_ref, epoch_pin) {
-            Ok(Some(v)) => {
-                assert_eq!(path.len(), key.len());
-                break Some((path, v));
-            }
-            Ok(None) => break None,
-            Err(ConcurrentUpdateError()) => {
-                // retry
-                continue;
-            }
-        }
-    }
-}
-
-pub(crate) fn update_fn<'e, 'g, K: Key, V: Value, A: ArtAllocator<V>, F>(
-    key: &K,
-    value_fn: F,
-    root: RootPtr<V>,
-    guard: &'g mut TreeWriteGuard<'e, K, V, A>,
-) -> Result<(), OutOfMemoryError>
-where
-    F: FnOnce(Option<&V>) -> UpdateAction<V>,
-{
-    let value_fn_cell = std::cell::Cell::new(Some(value_fn));
-    loop {
-        let root_ref = NodeRef::from_root_ptr(root);
-        let this_value_fn = |arg: Option<&V>| value_fn_cell.take().unwrap()(arg);
-        let key_bytes = key.as_bytes();
-
-        match update_recurse(
-            key_bytes,
-            this_value_fn,
-            root_ref,
-            None,
-            None,
-            guard,
-            0,
-            key_bytes,
-        ) {
-            Ok(()) => break Ok(()),
-            Err(ArtError::ConcurrentUpdate) => {
-                continue; // retry
-            }
-            Err(ArtError::OutOfMemory) => break Err(OutOfMemoryError()),
-        }
-    }
-}
-
-// Error means you must retry.
-//
-// This corresponds to the 'lookupOpt' function in the paper
-fn lookup_recurse<'e, V: Value>(
-    key: &[u8],
-    node: NodeRef<'e, V>,
-    parent: Option<ReadLockedNodeRef<V>>,
-    epoch_pin: &'e EpochPin,
-) -> Result<Option<&'e V>, ConcurrentUpdateError> {
-    let rnode = node.read_lock_or_restart()?;
-    if let Some(parent) = parent {
-        parent.read_unlock_or_restart()?;
-    }
-
-    // check if the prefix matches, may increment level
-    let prefix_len = if let Some(prefix_len) = rnode.prefix_matches(key) {
-        prefix_len
-    } else {
-        rnode.read_unlock_or_restart()?;
-        return Ok(None);
-    };
-
-    if rnode.is_leaf() {
-        assert_eq!(key.len(), prefix_len);
-        let vptr = rnode.get_leaf_value_ptr()?;
-        // safety: It's OK to return a ref of the pointer because we checked the version
-        // and the lifetime of 'epoch_pin' enforces that the reference is only accessible
-        // as long as the epoch is pinned.
-        let v = unsafe { vptr.as_ref().unwrap() };
-        return Ok(Some(v));
-    }
-
-    let key = &key[prefix_len..];
-
-    // find child (or leaf value)
-    let next_node = rnode.find_child_or_restart(key[0])?;
-
-    match next_node {
-        None => Ok(None), // key not found
-        Some(child) => lookup_recurse(&key[1..], child, Some(rnode), epoch_pin),
-    }
-}
-
-fn next_recurse<'e, V: Value>(
-    min_key: &[u8],
-    path: &mut Vec<u8>,
-    node: NodeRef<'e, V>,
-    epoch_pin: &'e EpochPin,
-) -> Result<Option<&'e V>, ConcurrentUpdateError> {
-    let rnode = node.read_lock_or_restart()?;
-    let prefix = rnode.get_prefix();
-    if prefix.len() != 0 {
-        path.extend_from_slice(prefix);
-    }
-
-    use std::cmp::Ordering;
-    let comparison = path.as_slice().cmp(&min_key[0..path.len()]);
-    if comparison == Ordering::Less {
-        rnode.read_unlock_or_restart()?;
-        return Ok(None);
-    }
-
-    if rnode.is_leaf() {
-        assert_eq!(path.len(), min_key.len());
-        let vptr = rnode.get_leaf_value_ptr()?;
-        // safety: It's OK to return a ref of the pointer because we checked the version
-        // and the lifetime of 'epoch_pin' enforces that the reference is only accessible
-        // as long as the epoch is pinned.
-        let v = unsafe { vptr.as_ref().unwrap() };
-        return Ok(Some(v));
-    }
-
-    let mut min_key_byte = match comparison {
-        Ordering::Less => unreachable!(), // checked this above already
-        Ordering::Equal => min_key[path.len()],
-        Ordering::Greater => 0,
-    };
-
-    loop {
-        match rnode.find_next_child_or_restart(min_key_byte)? {
-            None => {
-                return Ok(None);
-            }
-            Some((key_byte, child_ref)) => {
-                let path_len = path.len();
-                path.push(key_byte);
-                let result = next_recurse(min_key, path, child_ref, epoch_pin)?;
-                if result.is_some() {
-                    return Ok(result);
-                }
-                if key_byte == u8::MAX {
-                    return Ok(None);
-                }
-                path.truncate(path_len);
-                min_key_byte = key_byte + 1;
-            }
-        }
-    }
-}
-
-// This corresponds to the 'insertOpt' function in the paper
-pub(crate) fn update_recurse<'e, 'g, K: Key, V: Value, A: ArtAllocator<V>, F>(
-    key: &[u8],
-    value_fn: F,
-    node: NodeRef<'e, V>,
-    rparent: Option<(ReadLockedNodeRef<V>, u8)>,
-    rgrandparent: Option<(ReadLockedNodeRef<V>, u8)>,
-    guard: &'g mut TreeWriteGuard<'e, K, V, A>,
-    level: usize,
-    orig_key: &[u8],
-) -> Result<(), ArtError>
-where
-    F: FnOnce(Option<&V>) -> UpdateAction<V>,
-{
-    let rnode = node.read_lock_or_restart()?;
-
-    let prefix_match_len = rnode.prefix_matches(key);
-    if prefix_match_len.is_none() {
-        let (rparent, parent_key) = rparent.expect("direct children of the root have no prefix");
-        let mut wparent = rparent.upgrade_to_write_lock_or_restart()?;
-        let mut wnode = rnode.upgrade_to_write_lock_or_restart()?;
-
-        match value_fn(None) {
-            UpdateAction::Nothing => {}
-            UpdateAction::Insert(new_value) => {
-                insert_split_prefix(key, new_value, &mut wnode, &mut wparent, parent_key, guard)?;
-            }
-            UpdateAction::Remove => {
-                panic!("unexpected Remove action on insertion");
-            }
-        }
-        wnode.write_unlock();
-        wparent.write_unlock();
-        return Ok(());
-    }
-    let prefix_match_len = prefix_match_len.unwrap();
-    let key = &key[prefix_match_len as usize..];
-    let level = level + prefix_match_len as usize;
-
-    if rnode.is_leaf() {
-        assert_eq!(key.len(), 0);
-        let (rparent, parent_key) = rparent.expect("root cannot be leaf");
-        let mut wparent = rparent.upgrade_to_write_lock_or_restart()?;
-        let mut wnode = rnode.upgrade_to_write_lock_or_restart()?;
-
-        // safety: Now that we have acquired the write lock, we have exclusive access to the
-        // value. XXX: There might be concurrent reads though?
-        let value_mut = wnode.get_leaf_value_mut();
-
-        match value_fn(Some(value_mut)) {
-            UpdateAction::Nothing => {
-                wparent.write_unlock();
-                wnode.write_unlock();
-            }
-            UpdateAction::Insert(_) => panic!("cannot insert over existing value"),
-            UpdateAction::Remove => {
-                guard.remember_obsolete_node(wnode.as_ptr());
-                wparent.delete_child(parent_key);
-                wnode.write_unlock_obsolete();
-
-                if let Some(rgrandparent) = rgrandparent {
-                    // FIXME: Ignore concurrency error. It doesn't lead to
-                    // corruption, but it means we might leak something. Until
-                    // another update cleans it up.
-                    let _ = cleanup_parent(wparent, rgrandparent, guard);
-                }
-            }
-        }
-
-        return Ok(());
-    }
-
-    let next_node = rnode.find_child_or_restart(key[0])?;
-
-    if next_node.is_none() {
-        if rnode.is_full() {
-            let (rparent, parent_key) = rparent.expect("root node cannot become full");
-            let mut wparent = rparent.upgrade_to_write_lock_or_restart()?;
-            let wnode = rnode.upgrade_to_write_lock_or_restart()?;
-
-            match value_fn(None) {
-                UpdateAction::Nothing => {
-                    wnode.write_unlock();
-                    wparent.write_unlock();
-                }
-                UpdateAction::Insert(new_value) => {
-                    insert_and_grow(key, new_value, wnode, &mut wparent, parent_key, guard)?;
-                    wparent.write_unlock();
-                }
-                UpdateAction::Remove => {
-                    panic!("unexpected Remove action on insertion");
-                }
-            };
-        } else {
-            let mut wnode = rnode.upgrade_to_write_lock_or_restart()?;
-            if let Some((rparent, _)) = rparent {
-                rparent.read_unlock_or_restart()?;
-            }
-            match value_fn(None) {
-                UpdateAction::Nothing => {}
-                UpdateAction::Insert(new_value) => {
-                    insert_to_node(&mut wnode, key, new_value, guard)?;
-                }
-                UpdateAction::Remove => {
-                    panic!("unexpected Remove action on insertion");
-                }
-            };
-            wnode.write_unlock();
-        }
-        return Ok(());
-    } else {
-        let next_child = next_node.unwrap(); // checked above it's not None
-        if let Some((ref rparent, _)) = rparent {
-            rparent.check_or_restart()?;
-        }
-
-        // recurse to next level
-        update_recurse(
-            &key[1..],
-            value_fn,
-            next_child,
-            Some((rnode, key[0])),
-            rparent,
-            guard,
-            level + 1,
-            orig_key,
-        )
-    }
-}
-
-#[derive(Clone)]
-enum PathElement {
-    Prefix(Vec<u8>),
-    KeyByte(u8),
-}
-
-impl std::fmt::Debug for PathElement {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        match self {
-            PathElement::Prefix(prefix) => write!(fmt, "{:?}", prefix),
-            PathElement::KeyByte(key_byte) => write!(fmt, "{}", key_byte),
-        }
-    }
-}
-
-pub(crate) fn dump_tree<'e, V: Value + std::fmt::Debug>(
-    root: RootPtr<V>,
-    epoch_pin: &'e EpochPin,
-    dst: &mut dyn std::io::Write,
-) {
-    let root_ref = NodeRef::from_root_ptr(root);
-
-    let _ = dump_recurse(&[], root_ref, &epoch_pin, 0, dst);
-}
-
-// TODO: return an Err if writeln!() returns error, instead of unwrapping
-fn dump_recurse<'e, V: Value + std::fmt::Debug>(
-    path: &[PathElement],
-    node: NodeRef<'e, V>,
-    epoch_pin: &'e EpochPin,
-    level: usize,
-    dst: &mut dyn std::io::Write,
-) -> Result<(), ConcurrentUpdateError> {
-    let indent = str::repeat(" ", level);
-
-    let rnode = node.read_lock_or_restart()?;
-    let mut path = Vec::from(path);
-    let prefix = rnode.get_prefix();
-    if prefix.len() != 0 {
-        path.push(PathElement::Prefix(Vec::from(prefix)));
-    }
-
-    if rnode.is_leaf() {
-        let vptr = rnode.get_leaf_value_ptr()?;
-        // safety: It's OK to return a ref of the pointer because we checked the version
-        // and the lifetime of 'epoch_pin' enforces that the reference is only accessible
-        // as long as the epoch is pinned.
-        let val = unsafe { vptr.as_ref().unwrap() };
-        writeln!(dst, "{} {:?}: {:?}", indent, path, val).unwrap();
-        return Ok(());
-    }
-
-    for key_byte in 0..=u8::MAX {
-        match rnode.find_child_or_restart(key_byte)? {
-            None => continue,
-            Some(child_ref) => {
-                let rchild = child_ref.read_lock_or_restart()?;
-                writeln!(
-                    dst,
-                    "{} {:?}, {}: prefix {:?}",
-                    indent,
-                    &path,
-                    key_byte,
-                    rchild.get_prefix()
-                )
-                .unwrap();
-
-                let mut child_path = path.clone();
-                child_path.push(PathElement::KeyByte(key_byte));
-
-                dump_recurse(&child_path, child_ref, epoch_pin, level + 1, dst)?;
-            }
-        }
-    }
-
-    Ok(())
-}
-
-///```text
-///        [fooba]r -> value
-///
-/// [foo]b -> [a]r  -> value
-///      e -> [ls]e -> value
-///```
-fn insert_split_prefix<'e, K: Key, V: Value, A: ArtAllocator<V>>(
-    key: &[u8],
-    value: V,
-    node: &mut WriteLockedNodeRef<V>,
-    parent: &mut WriteLockedNodeRef<V>,
-    parent_key: u8,
-    guard: &'e TreeWriteGuard<K, V, A>,
-) -> Result<(), OutOfMemoryError> {
-    let old_node = node;
-    let old_prefix = old_node.get_prefix();
-    let common_prefix_len = common_prefix(key, old_prefix);
-
-    // Allocate a node for the new value.
-    let new_value_node = allocate_node_for_value(
-        &key[common_prefix_len + 1..],
-        value,
-        guard.tree_writer.allocator,
-    )?;
-
-    // Allocate a new internal node with the common prefix
-    // FIXME: deallocate 'new_value_node' on OOM
-    let mut prefix_node =
-        node_ref::new_internal(&key[..common_prefix_len], guard.tree_writer.allocator)?;
-
-    // Add the old node and the new nodes to the new internal node
-    prefix_node.insert_old_child(old_prefix[common_prefix_len], old_node);
-    prefix_node.insert_new_child(key[common_prefix_len], new_value_node);
-
-    // Modify the prefix of the old child in place
-    old_node.truncate_prefix(old_prefix.len() - common_prefix_len - 1);
-
-    // replace the pointer in the parent
-    parent.replace_child(parent_key, prefix_node.into_ptr());
-
-    Ok(())
-}
-
-fn insert_to_node<'e, K: Key, V: Value, A: ArtAllocator<V>>(
-    wnode: &mut WriteLockedNodeRef<V>,
-    key: &[u8],
-    value: V,
-    guard: &'e TreeWriteGuard<K, V, A>,
-) -> Result<(), OutOfMemoryError> {
-    let value_child = allocate_node_for_value(&key[1..], value, guard.tree_writer.allocator)?;
-    wnode.insert_child(key[0], value_child.into_ptr());
-    Ok(())
-}
-
-// On entry: 'parent' and 'node' are locked
-fn insert_and_grow<'e, 'g, K: Key, V: Value, A: ArtAllocator<V>>(
-    key: &[u8],
-    value: V,
-    wnode: WriteLockedNodeRef<V>,
-    parent: &mut WriteLockedNodeRef<V>,
-    parent_key_byte: u8,
-    guard: &'g mut TreeWriteGuard<'e, K, V, A>,
-) -> Result<(), ArtError> {
-    let mut bigger_node = wnode.grow(guard.tree_writer.allocator)?;
-
-    // FIXME: deallocate 'bigger_node' on OOM
-    let value_child = allocate_node_for_value(&key[1..], value, guard.tree_writer.allocator)?;
-    bigger_node.insert_new_child(key[0], value_child);
-
-    // Replace the pointer in the parent
-    parent.replace_child(parent_key_byte, bigger_node.into_ptr());
-
-    guard.remember_obsolete_node(wnode.as_ptr());
-    wnode.write_unlock_obsolete();
-
-    Ok(())
-}
-
-fn cleanup_parent<'e, 'g, K: Key, V: Value, A: ArtAllocator<V>>(
-    wparent: WriteLockedNodeRef<V>,
-    rgrandparent: (ReadLockedNodeRef<V>, u8),
-    guard: &'g mut TreeWriteGuard<'e, K, V, A>,
-) -> Result<(), ArtError> {
-    let (rgrandparent, grandparent_key_byte) = rgrandparent;
-
-    // If the parent becomes completely empty after the deletion, remove the parent from the
-    // grandparent. (This case is possible because we reserve only 8 bytes for the prefix.)
-    // TODO: not implemented.
-
-    // If the parent has only one child, replace the parent with the remaining child. (This is not
-    // possible if the child's prefix field cannot absorb the parent's)
-    if wparent.num_children() == 1 {
-        // Try to lock the remaining child. This can fail if the child is updated
-        // concurrently.
-        let (key_byte, remaining_child) = wparent.find_remaining_child();
-
-        let mut wremaining_child = remaining_child.write_lock_or_restart()?;
-
-        if 1 + wremaining_child.get_prefix().len() + wparent.get_prefix().len() <= MAX_PREFIX_LEN {
-            let mut wgrandparent = rgrandparent.upgrade_to_write_lock_or_restart()?;
-
-            // Ok, we have locked the leaf, the parent, the grandparent, and the parent's only
-            // remaining leaf. Proceed with the updates.
-
-            // Update the prefix on the remaining leaf
-            wremaining_child.prepend_prefix(wparent.get_prefix(), key_byte);
-
-            // Replace the pointer in the grandparent to point directly to the remaining leaf
-            wgrandparent.replace_child(grandparent_key_byte, wremaining_child.as_ptr());
-
-            // Mark the parent as deleted.
-            guard.remember_obsolete_node(wparent.as_ptr());
-            wparent.write_unlock_obsolete();
-            return Ok(());
-        }
-    }
-
-    // If the parent's children would fit on a smaller node type after the deletion, replace it with
-    // a smaller node.
-    if wparent.can_shrink() {
-        let mut wgrandparent = rgrandparent.upgrade_to_write_lock_or_restart()?;
-        let smaller_node = wparent.shrink(guard.tree_writer.allocator)?;
-
-        // Replace the pointer in the grandparent
-        wgrandparent.replace_child(grandparent_key_byte, smaller_node.into_ptr());
-
-        guard.remember_obsolete_node(wparent.as_ptr());
-        wparent.write_unlock_obsolete();
-        return Ok(());
-    }
-
-    // nothing to do
-    wparent.write_unlock();
-    Ok(())
-}
-
-// Allocate a new leaf node to hold 'value'. If the key is long, we
-// may need to allocate new internal nodes to hold it too
-fn allocate_node_for_value<'a, V: Value, A: ArtAllocator<V>>(
-    key: &[u8],
-    value: V,
-    allocator: &'a A,
-) -> Result<NewNodeRef<'a, V, A>, OutOfMemoryError> {
-    let mut prefix_off = key.len().saturating_sub(MAX_PREFIX_LEN);
-
-    let leaf_node = node_ref::new_leaf(&key[prefix_off..key.len()], value, allocator)?;
-
-    let mut node = leaf_node;
-    while prefix_off > 0 {
-        // Need another internal node
-        let remain_prefix = &key[0..prefix_off];
-
-        prefix_off = remain_prefix.len().saturating_sub(MAX_PREFIX_LEN + 1);
-        let mut internal_node = node_ref::new_internal(
-            &remain_prefix[prefix_off..remain_prefix.len() - 1],
-            allocator,
-        )?;
-        internal_node.insert_new_child(*remain_prefix.last().unwrap(), node);
-        node = internal_node;
-    }
-
-    Ok(node)
-}
-
-fn common_prefix(a: &[u8], b: &[u8]) -> usize {
-    for i in 0..MAX_PREFIX_LEN {
-        if a[i] != b[i] {
-            return i;
-        }
-    }
-    panic!("prefixes are equal");
-}
--- a/libs/neonart/src/algorithm/lock_and_version.rs
+++ b/libs/neonart/src/algorithm/lock_and_version.rs
@@ -1,117 +0,0 @@
-//! Each node in the tree has contains one atomic word that stores three things:
-//!
-//! Bit 0: set if the node is "obsolete". An obsolete node has been removed from the tree,
-//!        but might still be accessed by concurrent readers until the epoch expires.
-//! Bit 1: set if the node is currently write-locked. Used as a spinlock.
-//! Bits 2-63: Version number, incremented every time the node is modified.
-//!
-//! AtomicLockAndVersion represents that.
-
-use std::sync::atomic::{AtomicU64, Ordering};
-
-pub(crate) struct ConcurrentUpdateError();
-
-pub(crate) struct AtomicLockAndVersion {
-    inner: AtomicU64,
-}
-
-impl AtomicLockAndVersion {
-    pub(crate) fn new() -> AtomicLockAndVersion {
-        AtomicLockAndVersion {
-            inner: AtomicU64::new(0),
-        }
-    }
-}
-
-impl AtomicLockAndVersion {
-    pub(crate) fn read_lock_or_restart(&self) -> Result<u64, ConcurrentUpdateError> {
-        let version = self.await_node_unlocked();
-        if is_obsolete(version) {
-            return Err(ConcurrentUpdateError());
-        }
-        Ok(version)
-    }
-
-    pub(crate) fn check_or_restart(&self, version: u64) -> Result<(), ConcurrentUpdateError> {
-        self.read_unlock_or_restart(version)
-    }
-
-    pub(crate) fn read_unlock_or_restart(&self, version: u64) -> Result<(), ConcurrentUpdateError> {
-        if self.inner.load(Ordering::Acquire) != version {
-            return Err(ConcurrentUpdateError());
-        }
-        Ok(())
-    }
-
-    pub(crate) fn upgrade_to_write_lock_or_restart(
-        &self,
-        version: u64,
-    ) -> Result<(), ConcurrentUpdateError> {
-        if self
-            .inner
-            .compare_exchange(
-                version,
-                set_locked_bit(version),
-                Ordering::Acquire,
-                Ordering::Relaxed,
-            )
-            .is_err()
-        {
-            return Err(ConcurrentUpdateError());
-        }
-        Ok(())
-    }
-
-    pub(crate) fn write_lock_or_restart(&self) -> Result<(), ConcurrentUpdateError> {
-        let old = self.inner.load(Ordering::Relaxed);
-        if is_obsolete(old) || is_locked(old) {
-            return Err(ConcurrentUpdateError());
-        }
-        if self
-            .inner
-            .compare_exchange(
-                old,
-                set_locked_bit(old),
-                Ordering::Acquire,
-                Ordering::Relaxed,
-            )
-            .is_err()
-        {
-            return Err(ConcurrentUpdateError());
-        }
-        Ok(())
-    }
-
-    pub(crate) fn write_unlock(&self) {
-        // reset locked bit and overflow into version
-        self.inner.fetch_add(2, Ordering::Release);
-    }
-
-    pub(crate) fn write_unlock_obsolete(&self) {
-        // set obsolete, reset locked, overflow into version
-        self.inner.fetch_add(3, Ordering::Release);
-    }
-
-    // Helper functions
-    fn await_node_unlocked(&self) -> u64 {
-        let mut version = self.inner.load(Ordering::Acquire);
-        while is_locked(version) {
-            // spinlock
-            std::thread::yield_now();
-            version = self.inner.load(Ordering::Acquire)
-        }
-        version
-    }
-}
-
-fn set_locked_bit(version: u64) -> u64 {
-    return version + 2;
-}
-
-fn is_obsolete(version: u64) -> bool {
-    return (version & 1) == 1;
-}
-
-fn is_locked(version: u64) -> bool {
-    return (version & 2) == 2;
-}
--- a/libs/neonart/src/algorithm/node_ptr.rs
+++ b/libs/neonart/src/algorithm/node_ptr.rs
--- a/libs/neonart/src/algorithm/node_ref.rs
+++ b/libs/neonart/src/algorithm/node_ref.rs
@@ -1,349 +0,0 @@
-use std::fmt::Debug;
-use std::marker::PhantomData;
-
-use super::node_ptr;
-use super::node_ptr::NodePtr;
-use crate::EpochPin;
-use crate::Value;
-use crate::algorithm::lock_and_version::AtomicLockAndVersion;
-use crate::algorithm::lock_and_version::ConcurrentUpdateError;
-use crate::allocator::ArtAllocator;
-use crate::allocator::OutOfMemoryError;
-
-pub struct NodeRef<'e, V> {
-    ptr: NodePtr<V>,
-
-    phantom: PhantomData<&'e EpochPin<'e>>,
-}
-
-impl<'e, V> Debug for NodeRef<'e, V> {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        write!(fmt, "{:?}", self.ptr)
-    }
-}
-
-impl<'e, V: Value> NodeRef<'e, V> {
-    pub(crate) fn from_root_ptr(root_ptr: NodePtr<V>) -> NodeRef<'e, V> {
-        NodeRef {
-            ptr: root_ptr,
-            phantom: PhantomData,
-        }
-    }
-
-    pub(crate) fn read_lock_or_restart(
-        &self,
-    ) -> Result<ReadLockedNodeRef<'e, V>, ConcurrentUpdateError> {
-        let version = self.lockword().read_lock_or_restart()?;
-        Ok(ReadLockedNodeRef {
-            ptr: self.ptr,
-            version,
-            phantom: self.phantom,
-        })
-    }
-
-    pub(crate) fn write_lock_or_restart(
-        &self,
-    ) -> Result<WriteLockedNodeRef<'e, V>, ConcurrentUpdateError> {
-        self.lockword().write_lock_or_restart()?;
-        Ok(WriteLockedNodeRef {
-            ptr: self.ptr,
-            phantom: self.phantom,
-        })
-    }
-
-    fn lockword(&self) -> &AtomicLockAndVersion {
-        self.ptr.lockword()
-    }
-}
-
-/// A reference to a node that has been optimistically read-locked. The functions re-check
-/// the version after each read.
-pub struct ReadLockedNodeRef<'e, V> {
-    ptr: NodePtr<V>,
-    version: u64,
-
-    phantom: PhantomData<&'e EpochPin<'e>>,
-}
-
-impl<'e, V: Value> ReadLockedNodeRef<'e, V> {
-    pub(crate) fn is_leaf(&self) -> bool {
-        self.ptr.is_leaf()
-    }
-
-    pub(crate) fn is_full(&self) -> bool {
-        self.ptr.is_full()
-    }
-
-    pub(crate) fn get_prefix(&self) -> &[u8] {
-        self.ptr.get_prefix()
-    }
-
-    /// Note: because we're only holding a read lock, the prefix can change concurrently.
-    /// You must be prepared to restart, if read_unlock() returns error later.
-    ///
-    /// Returns the length of the prefix, or None if it's not a match
-    pub(crate) fn prefix_matches(&self, key: &[u8]) -> Option<usize> {
-        self.ptr.prefix_matches(key)
-    }
-
-    pub(crate) fn find_child_or_restart(
-        &self,
-        key_byte: u8,
-    ) -> Result<Option<NodeRef<'e, V>>, ConcurrentUpdateError> {
-        let child_or_value = self.ptr.find_child(key_byte);
-        self.ptr.lockword().check_or_restart(self.version)?;
-
-        match child_or_value {
-            None => Ok(None),
-            Some(child_ptr) => Ok(Some(NodeRef {
-                ptr: child_ptr,
-                phantom: self.phantom,
-            })),
-        }
-    }
-
-    pub(crate) fn find_next_child_or_restart(
-        &self,
-        min_key_byte: u8,
-    ) -> Result<Option<(u8, NodeRef<'e, V>)>, ConcurrentUpdateError> {
-        let child_or_value = self.ptr.find_next_child(min_key_byte);
-        self.ptr.lockword().check_or_restart(self.version)?;
-
-        match child_or_value {
-            None => Ok(None),
-            Some((k, child_ptr)) => Ok(Some((
-                k,
-                NodeRef {
-                    ptr: child_ptr,
-                    phantom: self.phantom,
-                },
-            ))),
-        }
-    }
-
-    pub(crate) fn get_leaf_value_ptr(&self) -> Result<*const V, ConcurrentUpdateError> {
-        let result = self.ptr.get_leaf_value();
-        self.ptr.lockword().check_or_restart(self.version)?;
-
-        // Extend the lifetime.
-        let result = std::ptr::from_ref(result);
-
-        Ok(result)
-    }
-
-    pub(crate) fn upgrade_to_write_lock_or_restart(
-        self,
-    ) -> Result<WriteLockedNodeRef<'e, V>, ConcurrentUpdateError> {
-        self.ptr
-            .lockword()
-            .upgrade_to_write_lock_or_restart(self.version)?;
-
-        Ok(WriteLockedNodeRef {
-            ptr: self.ptr,
-            phantom: self.phantom,
-        })
-    }
-
-    pub(crate) fn read_unlock_or_restart(self) -> Result<(), ConcurrentUpdateError> {
-        self.ptr.lockword().check_or_restart(self.version)?;
-        Ok(())
-    }
-
-    pub(crate) fn check_or_restart(&self) -> Result<(), ConcurrentUpdateError> {
-        self.ptr.lockword().check_or_restart(self.version)?;
-        Ok(())
-    }
-}
-
-/// A reference to a node that has been optimistically read-locked. The functions re-check
-/// the version after each read.
-pub struct WriteLockedNodeRef<'e, V> {
-    ptr: NodePtr<V>,
-    phantom: PhantomData<&'e EpochPin<'e>>,
-}
-
-impl<'e, V: Value> WriteLockedNodeRef<'e, V> {
-    pub(crate) fn can_shrink(&self) -> bool {
-        self.ptr.can_shrink()
-    }
-
-    pub(crate) fn num_children(&self) -> usize {
-        self.ptr.num_children()
-    }
-
-    pub(crate) fn write_unlock(mut self) {
-        self.ptr.lockword().write_unlock();
-        self.ptr = NodePtr::null();
-    }
-
-    pub(crate) fn write_unlock_obsolete(mut self) {
-        self.ptr.lockword().write_unlock_obsolete();
-        self.ptr = NodePtr::null();
-    }
-
-    pub(crate) fn get_prefix(&self) -> &[u8] {
-        self.ptr.get_prefix()
-    }
-
-    pub(crate) fn truncate_prefix(&mut self, new_prefix_len: usize) {
-        self.ptr.truncate_prefix(new_prefix_len)
-    }
-
-    pub(crate) fn prepend_prefix(&mut self, prefix: &[u8], prefix_byte: u8) {
-        self.ptr.prepend_prefix(prefix, prefix_byte)
-    }
-
-    pub(crate) fn insert_child(&mut self, key_byte: u8, child: NodePtr<V>) {
-        self.ptr.insert_child(key_byte, child)
-    }
-
-    pub(crate) fn get_leaf_value_mut(&mut self) -> &mut V {
-        self.ptr.get_leaf_value_mut()
-    }
-
-    pub(crate) fn grow<'a, A>(
-        &self,
-        allocator: &'a A,
-    ) -> Result<NewNodeRef<'a, V, A>, OutOfMemoryError>
-    where
-        A: ArtAllocator<V>,
-    {
-        let new_node = self.ptr.grow(allocator)?;
-        Ok(NewNodeRef {
-            ptr: new_node,
-            allocator,
-            extra_nodes: Vec::new(),
-        })
-    }
-
-    pub(crate) fn shrink<'a, A>(
-        &self,
-        allocator: &'a A,
-    ) -> Result<NewNodeRef<'a, V, A>, OutOfMemoryError>
-    where
-        A: ArtAllocator<V>,
-    {
-        let new_node = self.ptr.shrink(allocator)?;
-        Ok(NewNodeRef {
-            ptr: new_node,
-            allocator,
-            extra_nodes: Vec::new(),
-        })
-    }
-
-    pub(crate) fn as_ptr(&self) -> NodePtr<V> {
-        self.ptr
-    }
-
-    pub(crate) fn replace_child(&mut self, key_byte: u8, replacement: NodePtr<V>) {
-        self.ptr.replace_child(key_byte, replacement);
-    }
-
-    pub(crate) fn delete_child(&mut self, key_byte: u8) {
-        self.ptr.delete_child(key_byte);
-    }
-
-    pub(crate) fn find_remaining_child(&self) -> (u8, NodeRef<'e, V>) {
-        assert_eq!(self.num_children(), 1);
-        let child_or_value = self.ptr.find_next_child(0);
-
-        match child_or_value {
-            None => panic!("could not find only child in node"),
-            Some((k, child_ptr)) => (
-                k,
-                NodeRef {
-                    ptr: child_ptr,
-                    phantom: self.phantom,
-                },
-            ),
-        }
-    }
-}
-
-impl<'e, V> Drop for WriteLockedNodeRef<'e, V> {
-    fn drop(&mut self) {
-        if !self.ptr.is_null() {
-            self.ptr.lockword().write_unlock();
-        }
-    }
-}
-
-pub(crate) struct NewNodeRef<'a, V, A>
-where
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    ptr: NodePtr<V>,
-    allocator: &'a A,
-
-    extra_nodes: Vec<NodePtr<V>>,
-}
-
-impl<'a, V, A> NewNodeRef<'a, V, A>
-where
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    pub(crate) fn insert_old_child(&mut self, key_byte: u8, child: &WriteLockedNodeRef<V>) {
-        self.ptr.insert_child(key_byte, child.as_ptr())
-    }
-
-    pub(crate) fn into_ptr(mut self) -> NodePtr<V> {
-        let ptr = self.ptr;
-        self.ptr = NodePtr::null();
-        ptr
-    }
-
-    pub(crate) fn insert_new_child(&mut self, key_byte: u8, child: NewNodeRef<'a, V, A>) {
-        let child_ptr = child.into_ptr();
-        self.ptr.insert_child(key_byte, child_ptr);
-        self.extra_nodes.push(child_ptr);
-    }
-}
-
-impl<'a, V, A> Drop for NewNodeRef<'a, V, A>
-where
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    /// This drop implementation deallocates the newly allocated node, if into_ptr() was not called.
-    fn drop(&mut self) {
-        if !self.ptr.is_null() {
-            self.ptr.deallocate(self.allocator);
-            for p in self.extra_nodes.iter() {
-                p.deallocate(self.allocator);
-            }
-        }
-    }
-}
-
-pub(crate) fn new_internal<'a, V, A>(
-    prefix: &[u8],
-    allocator: &'a A,
-) -> Result<NewNodeRef<'a, V, A>, OutOfMemoryError>
-where
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    Ok(NewNodeRef {
-        ptr: node_ptr::new_internal(prefix, allocator)?,
-        allocator,
-        extra_nodes: Vec::new(),
-    })
-}
-
-pub(crate) fn new_leaf<'a, V, A>(
-    prefix: &[u8],
-    value: V,
-    allocator: &'a A,
-) -> Result<NewNodeRef<'a, V, A>, OutOfMemoryError>
-where
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    Ok(NewNodeRef {
-        ptr: node_ptr::new_leaf(prefix, value, allocator)?,
-        allocator,
-        extra_nodes: Vec::new(),
-    })
-}
--- a/libs/neonart/src/allocator.rs
+++ b/libs/neonart/src/allocator.rs
@@ -1,158 +0,0 @@
-pub mod block;
-mod multislab;
-mod slab;
-pub mod r#static;
-
-use std::alloc::Layout;
-use std::marker::PhantomData;
-use std::mem::MaybeUninit;
-use std::sync::atomic::Ordering;
-
-use crate::allocator::multislab::MultiSlabAllocator;
-use crate::allocator::r#static::alloc_from_slice;
-
-use spin;
-
-use crate::Tree;
-pub use crate::algorithm::node_ptr::{
-    NodeInternal4, NodeInternal16, NodeInternal48, NodeInternal256, NodeLeaf,
-};
-
-#[derive(Debug)]
-pub struct OutOfMemoryError();
-
-pub trait ArtAllocator<V: crate::Value> {
-    fn alloc_tree(&self) -> *mut Tree<V>;
-
-    fn alloc_node_internal4(&self) -> *mut NodeInternal4<V>;
-    fn alloc_node_internal16(&self) -> *mut NodeInternal16<V>;
-    fn alloc_node_internal48(&self) -> *mut NodeInternal48<V>;
-    fn alloc_node_internal256(&self) -> *mut NodeInternal256<V>;
-    fn alloc_node_leaf(&self) -> *mut NodeLeaf<V>;
-
-    fn dealloc_node_internal4(&self, ptr: *mut NodeInternal4<V>);
-    fn dealloc_node_internal16(&self, ptr: *mut NodeInternal16<V>);
-    fn dealloc_node_internal48(&self, ptr: *mut NodeInternal48<V>);
-    fn dealloc_node_internal256(&self, ptr: *mut NodeInternal256<V>);
-    fn dealloc_node_leaf(&self, ptr: *mut NodeLeaf<V>);
-}
-
-pub struct ArtMultiSlabAllocator<'t, V>
-where
-    V: crate::Value,
-{
-    tree_area: spin::Mutex<Option<&'t mut MaybeUninit<Tree<V>>>>,
-
-    pub(crate) inner: MultiSlabAllocator<'t, 5>,
-
-    phantom_val: PhantomData<V>,
-}
-
-impl<'t, V: crate::Value> ArtMultiSlabAllocator<'t, V> {
-    const LAYOUTS: [Layout; 5] = [
-        Layout::new::<NodeInternal4<V>>(),
-        Layout::new::<NodeInternal16<V>>(),
-        Layout::new::<NodeInternal48<V>>(),
-        Layout::new::<NodeInternal256<V>>(),
-        Layout::new::<NodeLeaf<V>>(),
-    ];
-
-    pub fn new(area: &'t mut [MaybeUninit<u8>]) -> &'t mut ArtMultiSlabAllocator<'t, V> {
-        let (allocator_area, remain) = alloc_from_slice::<ArtMultiSlabAllocator<V>>(area);
-        let (tree_area, remain) = alloc_from_slice::<Tree<V>>(remain);
-
-        let allocator = allocator_area.write(ArtMultiSlabAllocator {
-            tree_area: spin::Mutex::new(Some(tree_area)),
-            inner: MultiSlabAllocator::new(remain, &Self::LAYOUTS),
-            phantom_val: PhantomData,
-        });
-
-        allocator
-    }
-}
-
-impl<'t, V: crate::Value> ArtAllocator<V> for ArtMultiSlabAllocator<'t, V> {
-    fn alloc_tree(&self) -> *mut Tree<V> {
-        let mut t = self.tree_area.lock();
-        if let Some(tree_area) = t.take() {
-            return tree_area.as_mut_ptr().cast();
-        }
-        panic!("cannot allocate more than one tree");
-    }
-
-    fn alloc_node_internal4(&self) -> *mut NodeInternal4<V> {
-        self.inner.alloc_slab(0).cast()
-    }
-    fn alloc_node_internal16(&self) -> *mut NodeInternal16<V> {
-        self.inner.alloc_slab(1).cast()
-    }
-    fn alloc_node_internal48(&self) -> *mut NodeInternal48<V> {
-        self.inner.alloc_slab(2).cast()
-    }
-    fn alloc_node_internal256(&self) -> *mut NodeInternal256<V> {
-        self.inner.alloc_slab(3).cast()
-    }
-    fn alloc_node_leaf(&self) -> *mut NodeLeaf<V> {
-        self.inner.alloc_slab(4).cast()
-    }
-
-    fn dealloc_node_internal4(&self, ptr: *mut NodeInternal4<V>) {
-        self.inner.dealloc_slab(0, ptr.cast())
-    }
-
-    fn dealloc_node_internal16(&self, ptr: *mut NodeInternal16<V>) {
-        self.inner.dealloc_slab(1, ptr.cast())
-    }
-    fn dealloc_node_internal48(&self, ptr: *mut NodeInternal48<V>) {
-        self.inner.dealloc_slab(2, ptr.cast())
-    }
-    fn dealloc_node_internal256(&self, ptr: *mut NodeInternal256<V>) {
-        self.inner.dealloc_slab(3, ptr.cast())
-    }
-    fn dealloc_node_leaf(&self, ptr: *mut NodeLeaf<V>) {
-        self.inner.dealloc_slab(4, ptr.cast())
-    }
-}
-
-impl<'t, V: crate::Value> ArtMultiSlabAllocator<'t, V> {
-    pub(crate) fn get_statistics(&self) -> ArtMultiSlabStats {
-        ArtMultiSlabStats {
-            num_internal4: self.inner.slab_descs[0]
-                .num_allocated
-                .load(Ordering::Relaxed),
-            num_internal16: self.inner.slab_descs[1]
-                .num_allocated
-                .load(Ordering::Relaxed),
-            num_internal48: self.inner.slab_descs[2]
-                .num_allocated
-                .load(Ordering::Relaxed),
-            num_internal256: self.inner.slab_descs[3]
-                .num_allocated
-                .load(Ordering::Relaxed),
-            num_leaf: self.inner.slab_descs[4]
-                .num_allocated
-                .load(Ordering::Relaxed),
-
-            num_blocks_internal4: self.inner.slab_descs[0].num_blocks.load(Ordering::Relaxed),
-            num_blocks_internal16: self.inner.slab_descs[1].num_blocks.load(Ordering::Relaxed),
-            num_blocks_internal48: self.inner.slab_descs[2].num_blocks.load(Ordering::Relaxed),
-            num_blocks_internal256: self.inner.slab_descs[3].num_blocks.load(Ordering::Relaxed),
-            num_blocks_leaf: self.inner.slab_descs[4].num_blocks.load(Ordering::Relaxed),
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct ArtMultiSlabStats {
-    pub num_internal4: u64,
-    pub num_internal16: u64,
-    pub num_internal48: u64,
-    pub num_internal256: u64,
-    pub num_leaf: u64,
-
-    pub num_blocks_internal4: u64,
-    pub num_blocks_internal16: u64,
-    pub num_blocks_internal48: u64,
-    pub num_blocks_internal256: u64,
-    pub num_blocks_leaf: u64,
-}
--- a/libs/neonart/src/allocator/block.rs
+++ b/libs/neonart/src/allocator/block.rs
@@ -1,191 +0,0 @@
-//! Simple allocator of fixed-size blocks
-
-use std::mem::MaybeUninit;
-use std::sync::atomic::{AtomicU64, Ordering};
-
-use spin;
-
-pub const BLOCK_SIZE: usize = 16 * 1024;
-
-const INVALID_BLOCK: u64 = u64::MAX;
-
-pub(crate) struct BlockAllocator<'t> {
-    blocks_ptr: &'t [MaybeUninit<u8>],
-    num_blocks: u64,
-    num_initialized: AtomicU64,
-
-    freelist_head: spin::Mutex<u64>,
-}
-
-struct FreeListBlock {
-    inner: spin::Mutex<FreeListBlockInner>,
-}
-
-struct FreeListBlockInner {
-    next: u64,
-
-    num_free_blocks: u64,
-    free_blocks: [u64; 100], // FIXME: fill the rest of the block
-}
-
-impl<'t> BlockAllocator<'t> {
-    pub(crate) fn new(area: &'t mut [MaybeUninit<u8>]) -> Self {
-        // Use all the space for the blocks
-        let padding = area.as_ptr().align_offset(BLOCK_SIZE);
-        let remain = &mut area[padding..];
-
-        let num_blocks = (remain.len() / BLOCK_SIZE) as u64;
-
-        BlockAllocator {
-            blocks_ptr: remain,
-            num_blocks,
-            num_initialized: AtomicU64::new(0),
-            freelist_head: spin::Mutex::new(INVALID_BLOCK),
-        }
-    }
-
-    /// safety: you must hold a lock on the pointer to this block, otherwise it might get
-    /// reused for another kind of block
-    fn read_freelist_block(&self, blkno: u64) -> &FreeListBlock {
-        let ptr: *const FreeListBlock = self.get_block_ptr(blkno).cast();
-        unsafe { ptr.as_ref().unwrap() }
-    }
-
-    fn get_block_ptr(&self, blkno: u64) -> *mut u8 {
-        assert!(blkno < self.num_blocks);
-        unsafe {
-            self.blocks_ptr
-                .as_ptr()
-                .byte_offset(blkno as isize * BLOCK_SIZE as isize)
-        }
-        .cast_mut()
-        .cast()
-    }
-
-    #[allow(clippy::mut_from_ref)]
-    pub(crate) fn alloc_block(&self) -> &mut [MaybeUninit<u8>] {
-        // FIXME: handle OOM
-        let blkno = self.alloc_block_internal();
-        if blkno == INVALID_BLOCK {
-            panic!("out of memory");
-        }
-
-        let ptr: *mut MaybeUninit<u8> = self.get_block_ptr(blkno).cast();
-        unsafe { std::slice::from_raw_parts_mut(ptr, BLOCK_SIZE) }
-    }
-
-    fn alloc_block_internal(&self) -> u64 {
-        //  check the free list.
-        {
-            let mut freelist_head = self.freelist_head.lock();
-            if *freelist_head != INVALID_BLOCK {
-                let freelist_block = self.read_freelist_block(*freelist_head);
-
-                // acquire lock on the freelist block before releasing the lock on the parent (i.e. lock coupling)
-                let mut g = freelist_block.inner.lock();
-
-                if g.num_free_blocks > 0 {
-                    g.num_free_blocks -= 1;
-                    let result = g.free_blocks[g.num_free_blocks as usize];
-                    return result;
-                } else {
-                    // consume the freelist block itself
-                    let result = *freelist_head;
-                    *freelist_head = g.next;
-                    // This freelist block is now unlinked and can be repurposed
-                    drop(g);
-                    return result;
-                }
-            }
-        }
-
-        // If there are some blocks left that we've never used, pick next such block
-        let mut next_uninitialized = self.num_initialized.load(Ordering::Relaxed);
-        while next_uninitialized < self.num_blocks {
-            match self.num_initialized.compare_exchange(
-                next_uninitialized,
-                next_uninitialized + 1,
-                Ordering::Relaxed,
-                Ordering::Relaxed,
-            ) {
-                Ok(_) => {
-                    return next_uninitialized;
-                }
-                Err(old) => {
-                    next_uninitialized = old;
-                    continue;
-                }
-            }
-        }
-
-        // out of blocks
-        return INVALID_BLOCK;
-    }
-
-    // TODO: this is currently unused. The slab allocator never releases blocks
-    #[allow(dead_code)]
-    pub(crate) fn release_block(&self, block_ptr: *mut u8) {
-        let blockno = unsafe { block_ptr.byte_offset_from(self.blocks_ptr) / BLOCK_SIZE as isize };
-        self.release_block_internal(blockno as u64);
-    }
-
-    fn release_block_internal(&self, blockno: u64) {
-        let mut freelist_head = self.freelist_head.lock();
-        if *freelist_head != INVALID_BLOCK {
-            let freelist_block = self.read_freelist_block(*freelist_head);
-
-            // acquire lock on the freelist block before releasing the lock on the parent (i.e. lock coupling)
-            let mut g = freelist_block.inner.lock();
-
-            let num_free_blocks = g.num_free_blocks;
-            if num_free_blocks < g.free_blocks.len() as u64 {
-                g.free_blocks[num_free_blocks as usize] = blockno;
-                g.num_free_blocks += 1;
-                return;
-            }
-        }
-
-        // Convert the block into a new freelist block
-        let block_ptr: *mut FreeListBlock = self.get_block_ptr(blockno).cast();
-        let init = FreeListBlock {
-            inner: spin::Mutex::new(FreeListBlockInner {
-                next: *freelist_head,
-                num_free_blocks: 0,
-                free_blocks: [INVALID_BLOCK; 100],
-            }),
-        };
-        unsafe { (*block_ptr) = init };
-        *freelist_head = blockno;
-    }
-
-    // for debugging
-    pub(crate) fn get_statistics(&self) -> BlockAllocatorStats {
-        let mut num_free_blocks = 0;
-
-        let mut _prev_lock = None;
-        let head_lock = self.freelist_head.lock();
-        let mut next_blk = *head_lock;
-        let mut _head_lock = Some(head_lock);
-        while next_blk != INVALID_BLOCK {
-            let freelist_block = self.read_freelist_block(next_blk);
-            let lock = freelist_block.inner.lock();
-            num_free_blocks += lock.num_free_blocks;
-            next_blk = lock.next;
-            _prev_lock = Some(lock); // hold the lock until we've read the next block
-            _head_lock = None;
-        }
-
-        BlockAllocatorStats {
-            num_blocks: self.num_blocks,
-            num_initialized: self.num_initialized.load(Ordering::Relaxed),
-            num_free_blocks,
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct BlockAllocatorStats {
-    pub num_blocks: u64,
-    pub num_initialized: u64,
-    pub num_free_blocks: u64,
-}
--- a/libs/neonart/src/allocator/multislab.rs
+++ b/libs/neonart/src/allocator/multislab.rs
@@ -1,33 +0,0 @@
-use std::alloc::Layout;
-use std::mem::MaybeUninit;
-
-use crate::allocator::block::BlockAllocator;
-use crate::allocator::slab::SlabDesc;
-
-pub struct MultiSlabAllocator<'t, const N: usize> {
-    pub(crate) block_allocator: BlockAllocator<'t>,
-
-    pub(crate) slab_descs: [SlabDesc; N],
-}
-
-impl<'t, const N: usize> MultiSlabAllocator<'t, N> {
-    pub(crate) fn new(
-        area: &'t mut [MaybeUninit<u8>],
-        layouts: &[Layout; N],
-    ) -> MultiSlabAllocator<'t, N> {
-        let block_allocator = BlockAllocator::new(area);
-        MultiSlabAllocator {
-            block_allocator,
-
-            slab_descs: std::array::from_fn(|i| SlabDesc::new(&layouts[i])),
-        }
-    }
-
-    pub(crate) fn alloc_slab(&self, slab_idx: usize) -> *mut u8 {
-        self.slab_descs[slab_idx].alloc_chunk(&self.block_allocator)
-    }
-
-    pub(crate) fn dealloc_slab(&self, slab_idx: usize, ptr: *mut u8) {
-        self.slab_descs[slab_idx].dealloc_chunk(ptr, &self.block_allocator)
-    }
-}
--- a/libs/neonart/src/allocator/slab.rs
+++ b/libs/neonart/src/allocator/slab.rs
@@ -1,432 +0,0 @@
-//! A slab allocator that carves out fixed-size chunks from larger blocks.
-//!
-//!
-
-use std::alloc::Layout;
-use std::mem::MaybeUninit;
-use std::ops::Deref;
-use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
-
-use spin;
-
-use super::alloc_from_slice;
-use super::block::BlockAllocator;
-
-use crate::allocator::block::BLOCK_SIZE;
-
-pub(crate) struct SlabDesc {
-    pub(crate) layout: Layout,
-
-    block_lists: spin::RwLock<BlockLists>,
-
-    pub(crate) num_blocks: AtomicU64,
-    pub(crate) num_allocated: AtomicU64,
-}
-
-// FIXME: Not sure if SlabDesc is really Sync or Send. It probably is when it's empty, but
-// 'block_lists' contains pointers when it's not empty. In the current use as part of the
-// the art tree, SlabDescs are only moved during initialization.
-unsafe impl Sync for SlabDesc {}
-unsafe impl Send for SlabDesc {}
-
-#[derive(Default, Debug)]
-struct BlockLists {
-    full_blocks: BlockList,
-    nonfull_blocks: BlockList,
-}
-
-impl BlockLists {
-    // Unlink a node. It must be in either one of the two lists.
-    unsafe fn unlink(&mut self, elem: *mut SlabBlockHeader) {
-        let list = unsafe {
-            if (*elem).next.is_null() {
-                if self.full_blocks.tail == elem {
-                    Some(&mut self.full_blocks)
-                } else {
-                    Some(&mut self.nonfull_blocks)
-                }
-            } else if (*elem).prev.is_null() {
-                if self.full_blocks.head == elem {
-                    Some(&mut self.full_blocks)
-                } else {
-                    Some(&mut self.nonfull_blocks)
-                }
-            } else {
-                None
-            }
-        };
-        unsafe { unlink_slab_block(list, elem) };
-    }
-}
-
-unsafe fn unlink_slab_block(mut list: Option<&mut BlockList>, elem: *mut SlabBlockHeader) {
-    unsafe {
-        if (*elem).next.is_null() {
-            assert_eq!(list.as_ref().unwrap().tail, elem);
-            list.as_mut().unwrap().tail = (*elem).prev;
-        } else {
-            assert_eq!((*(*elem).next).prev, elem);
-            (*(*elem).next).prev = (*elem).prev;
-        }
-        if (*elem).prev.is_null() {
-            assert_eq!(list.as_ref().unwrap().head, elem);
-            list.as_mut().unwrap().head = (*elem).next;
-        } else {
-            assert_eq!((*(*elem).prev).next, elem);
-            (*(*elem).prev).next = (*elem).next;
-        }
-    }
-}
-
-#[derive(Debug)]
-struct BlockList {
-    head: *mut SlabBlockHeader,
-    tail: *mut SlabBlockHeader,
-}
-
-impl Default for BlockList {
-    fn default() -> Self {
-        BlockList {
-            head: std::ptr::null_mut(),
-            tail: std::ptr::null_mut(),
-        }
-    }
-}
-
-impl BlockList {
-    unsafe fn push_head(&mut self, elem: *mut SlabBlockHeader) {
-        unsafe {
-            if self.is_empty() {
-                self.tail = elem;
-                (*elem).next = std::ptr::null_mut();
-            } else {
-                (*elem).next = self.head;
-                (*self.head).prev = elem;
-            }
-            (*elem).prev = std::ptr::null_mut();
-            self.head = elem;
-        }
-    }
-
-    fn is_empty(&self) -> bool {
-        self.head.is_null()
-    }
-
-    unsafe fn unlink(&mut self, elem: *mut SlabBlockHeader) {
-        unsafe { unlink_slab_block(Some(self), elem) }
-    }
-
-    #[cfg(test)]
-    fn dump(&self) {
-        let mut next = self.head;
-
-        while !next.is_null() {
-            let n = unsafe { next.as_ref() }.unwrap();
-            eprintln!(
-                "  blk {:?} (free {}/{})",
-                next,
-                n.num_free_chunks.load(Ordering::Relaxed),
-                n.num_chunks
-            );
-            next = n.next;
-        }
-    }
-}
-
-impl SlabDesc {
-    pub(crate) fn new(layout: &Layout) -> SlabDesc {
-        SlabDesc {
-            layout: *layout,
-            block_lists: spin::RwLock::new(BlockLists::default()),
-            num_allocated: AtomicU64::new(0),
-            num_blocks: AtomicU64::new(0),
-        }
-    }
-}
-
-#[derive(Debug)]
-struct SlabBlockHeader {
-    free_chunks_head: spin::Mutex<*mut FreeChunk>,
-    num_free_chunks: AtomicU32,
-    num_chunks: u32, // this is really a constant for a given Layout
-
-    // these fields are protected by the lock on the BlockLists
-    prev: *mut SlabBlockHeader,
-    next: *mut SlabBlockHeader,
-}
-
-struct FreeChunk {
-    next: *mut FreeChunk,
-}
-
-enum ReadOrWriteGuard<'a, T> {
-    Read(spin::RwLockReadGuard<'a, T>),
-    Write(spin::RwLockWriteGuard<'a, T>),
-}
-
-impl<'a, T> Deref for ReadOrWriteGuard<'a, T> {
-    type Target = T;
-
-    fn deref(&self) -> &<Self as Deref>::Target {
-        match self {
-            ReadOrWriteGuard::Read(g) => g.deref(),
-            ReadOrWriteGuard::Write(g) => g.deref(),
-        }
-    }
-}
-
-impl SlabDesc {
-    pub fn alloc_chunk(&self, block_allocator: &BlockAllocator) -> *mut u8 {
-        // Are there any free chunks?
-        let mut acquire_write = false;
-        'outer: loop {
-            let mut block_lists_guard = if acquire_write {
-                ReadOrWriteGuard::Write(self.block_lists.write())
-            } else {
-                ReadOrWriteGuard::Read(self.block_lists.read())
-            };
-            'inner: loop {
-                let block_ptr = block_lists_guard.nonfull_blocks.head;
-                if block_ptr.is_null() {
-                    break 'outer;
-                }
-                unsafe {
-                    let mut free_chunks_head = (*block_ptr).free_chunks_head.lock();
-                    if !(*free_chunks_head).is_null() {
-                        let result = *free_chunks_head;
-                        (*free_chunks_head) = (*result).next;
-                        let _old = (*block_ptr).num_free_chunks.fetch_sub(1, Ordering::Relaxed);
-
-                        self.num_allocated.fetch_add(1, Ordering::Relaxed);
-                        return result.cast();
-                    }
-                }
-
-                // The block at the head of the list was full. Grab write lock and retry
-                match block_lists_guard {
-                    ReadOrWriteGuard::Read(_) => {
-                        acquire_write = true;
-                        continue 'outer;
-                    }
-                    ReadOrWriteGuard::Write(ref mut g) => {
-                        // move the node to the list of full blocks
-                        unsafe {
-                            g.nonfull_blocks.unlink(block_ptr);
-                            g.full_blocks.push_head(block_ptr);
-                        };
-                        continue 'inner;
-                    }
-                }
-            }
-        }
-
-        // no free chunks. Allocate a new block (and the chunk from that)
-        let (new_block, new_chunk) = self.alloc_block_and_chunk(block_allocator);
-        self.num_blocks.fetch_add(1, Ordering::Relaxed);
-
-        // Add the block to the list in the SlabDesc
-        unsafe {
-            let mut block_lists_guard = self.block_lists.write();
-            block_lists_guard.nonfull_blocks.push_head(new_block);
-        }
-        self.num_allocated.fetch_add(1, Ordering::Relaxed);
-        new_chunk
-    }
-
-    pub fn dealloc_chunk(&self, chunk_ptr: *mut u8, _block_allocator: &BlockAllocator) {
-        // Find the block it belongs to. You can find the block from the address. (And knowing the
-        // layout, you could calculate the chunk number too.)
-        let block_ptr: *mut SlabBlockHeader = {
-            let block_addr = (chunk_ptr.addr() / BLOCK_SIZE) * BLOCK_SIZE;
-            chunk_ptr.with_addr(block_addr).cast()
-        };
-        let chunk_ptr: *mut FreeChunk = chunk_ptr.cast();
-
-        // Mark the chunk as free in 'freechunks' list
-        let num_chunks;
-        let num_free_chunks;
-        unsafe {
-            let mut free_chunks_head = (*block_ptr).free_chunks_head.lock();
-            (*chunk_ptr).next = *free_chunks_head;
-            *free_chunks_head = chunk_ptr;
-
-            num_free_chunks = (*block_ptr).num_free_chunks.fetch_add(1, Ordering::Relaxed) + 1;
-            num_chunks = (*block_ptr).num_chunks;
-        }
-
-        if num_free_chunks == 1 {
-            // If the block was full previously, add it to the nonfull blocks list. Note that
-            // we're not holding the lock anymore, so it can immediately become full again.
-            // That's harmless, it will be moved back to the full list again when a call
-            // to alloc_chunk() sees it.
-            let mut block_lists = self.block_lists.write();
-            unsafe {
-                block_lists.unlink(block_ptr);
-                block_lists.nonfull_blocks.push_head(block_ptr);
-            };
-        } else if num_free_chunks == num_chunks {
-            // If the block became completely empty, move it to the free list
-            // TODO
-            // FIXME: we're still holding the spinlock. It's not exactly safe to return it to
-            // the free blocks list, is it? Defer it as garbage to wait out concurrent updates?
-            //block_allocator.release_block()
-        }
-
-        // update stats
-        self.num_allocated.fetch_sub(1, Ordering::Relaxed);
-    }
-
-    fn alloc_block_and_chunk(
-        &self,
-        block_allocator: &BlockAllocator,
-    ) -> (*mut SlabBlockHeader, *mut u8) {
-        // fixme: handle OOM
-        let block_slice: &mut [MaybeUninit<u8>] = block_allocator.alloc_block();
-        let (block_header, remain) = alloc_from_slice::<SlabBlockHeader>(block_slice);
-
-        let padding = remain.as_ptr().align_offset(self.layout.align());
-
-        let num_chunks = (remain.len() - padding) / self.layout.size();
-
-        let first_chunk_ptr: *mut FreeChunk = remain[padding..].as_mut_ptr().cast();
-
-        unsafe {
-            let mut chunk_ptr = first_chunk_ptr;
-            for _ in 0..num_chunks - 1 {
-                let next_chunk_ptr = chunk_ptr.byte_add(self.layout.size());
-                (*chunk_ptr).next = next_chunk_ptr;
-                chunk_ptr = next_chunk_ptr;
-            }
-            (*chunk_ptr).next = std::ptr::null_mut();
-
-            let result_chunk = first_chunk_ptr;
-
-            let block_header = block_header.write(SlabBlockHeader {
-                free_chunks_head: spin::Mutex::new((*first_chunk_ptr).next),
-                prev: std::ptr::null_mut(),
-                next: std::ptr::null_mut(),
-                num_chunks: num_chunks as u32,
-                num_free_chunks: AtomicU32::new(num_chunks as u32 - 1),
-            });
-
-            (block_header, result_chunk.cast())
-        }
-    }
-
-    #[cfg(test)]
-    fn dump(&self) {
-        eprintln!(
-            "slab dump ({} blocks, {} allocated chunks)",
-            self.num_blocks.load(Ordering::Relaxed),
-            self.num_allocated.load(Ordering::Relaxed)
-        );
-        let lists = self.block_lists.read();
-
-        eprintln!("nonfull blocks:");
-        lists.nonfull_blocks.dump();
-        eprintln!("full blocks:");
-        lists.full_blocks.dump();
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use rand::Rng;
-    use rand_distr::Zipf;
-
-    struct TestObject {
-        val: usize,
-        _dummy: [u8; BLOCK_SIZE / 4],
-    }
-
-    struct TestObjectSlab<'a>(SlabDesc, BlockAllocator<'a>);
-    impl<'a> TestObjectSlab<'a> {
-        fn new(block_allocator: BlockAllocator) -> TestObjectSlab {
-            TestObjectSlab(SlabDesc::new(&Layout::new::<TestObject>()), block_allocator)
-        }
-
-        fn alloc(&self, val: usize) -> *mut TestObject {
-            let obj: *mut TestObject = self.0.alloc_chunk(&self.1).cast();
-            unsafe { (*obj).val = val };
-            obj
-        }
-
-        fn dealloc(&self, obj: *mut TestObject) {
-            self.0.dealloc_chunk(obj.cast(), &self.1)
-        }
-    }
-
-    #[test]
-    fn test_slab_alloc() {
-        const MEM_SIZE: usize = 100000000;
-        let mut area = Box::new_uninit_slice(MEM_SIZE);
-        let block_allocator = BlockAllocator::new(&mut area);
-
-        let slab = TestObjectSlab::new(block_allocator);
-
-        let mut all: Vec<*mut TestObject> = Vec::new();
-        for i in 0..11 {
-            all.push(slab.alloc(i));
-        }
-        for i in 0..11 {
-            assert!(unsafe { (*all[i]).val == i });
-        }
-
-        let distribution = Zipf::new(10 as f64, 1.1).unwrap();
-        let mut rng = rand::rng();
-        for _ in 0..100000 {
-            slab.0.dump();
-            let idx = (rng.sample(distribution) as usize).into();
-            let ptr: *mut TestObject = all[idx];
-            if !ptr.is_null() {
-                assert_eq!(unsafe { (*ptr).val }, idx);
-                slab.dealloc(ptr);
-                all[idx] = std::ptr::null_mut();
-            } else {
-                all[idx] = slab.alloc(idx);
-            }
-        }
-    }
-
-    fn new_test_blk(i: u32) -> *mut SlabBlockHeader {
-        Box::into_raw(Box::new(SlabBlockHeader {
-            free_chunks_head: spin::Mutex::new(std::ptr::null_mut()),
-            num_free_chunks: AtomicU32::new(0),
-            num_chunks: i,
-            prev: std::ptr::null_mut(),
-            next: std::ptr::null_mut(),
-        }))
-    }
-
-    #[test]
-    fn test_block_linked_list() {
-        // note: these are leaked, but that's OK for tests
-        let a = new_test_blk(0);
-        let b = new_test_blk(1);
-
-        let mut list = BlockList::default();
-        assert!(list.is_empty());
-
-        unsafe {
-            list.push_head(a);
-            assert!(!list.is_empty());
-            list.unlink(a);
-        }
-        assert!(list.is_empty());
-
-        unsafe {
-            list.push_head(b);
-            list.push_head(a);
-            assert_eq!(list.head, a);
-            assert_eq!((*a).next, b);
-            assert_eq!((*b).prev, a);
-            assert_eq!(list.tail, b);
-
-            list.unlink(a);
-            list.unlink(b);
-            assert!(list.is_empty());
-        }
-    }
-}
--- a/libs/neonart/src/allocator/static.rs
+++ b/libs/neonart/src/allocator/static.rs
@@ -1,44 +0,0 @@
-use std::mem::MaybeUninit;
-
-pub fn alloc_from_slice<T>(
-    area: &mut [MaybeUninit<u8>],
-) -> (&mut MaybeUninit<T>, &mut [MaybeUninit<u8>]) {
-    let layout = std::alloc::Layout::new::<T>();
-
-    let area_start = area.as_mut_ptr();
-
-    // pad to satisfy alignment requirements
-    let padding = area_start.align_offset(layout.align());
-    if padding + layout.size() > area.len() {
-        panic!("out of memory");
-    }
-    let area = &mut area[padding..];
-    let (result_area, remain) = area.split_at_mut(layout.size());
-
-    let result_ptr: *mut MaybeUninit<T> = result_area.as_mut_ptr().cast();
-    let result = unsafe { result_ptr.as_mut().unwrap() };
-
-    (result, remain)
-}
-
-pub fn alloc_array_from_slice<T>(
-    area: &mut [MaybeUninit<u8>],
-    len: usize,
-) -> (&mut [MaybeUninit<T>], &mut [MaybeUninit<u8>]) {
-    let layout = std::alloc::Layout::new::<T>();
-
-    let area_start = area.as_mut_ptr();
-
-    // pad to satisfy alignment requirements
-    let padding = area_start.align_offset(layout.align());
-    if padding + layout.size() * len > area.len() {
-        panic!("out of memory");
-    }
-    let area = &mut area[padding..];
-    let (result_area, remain) = area.split_at_mut(layout.size() * len);
-
-    let result_ptr: *mut MaybeUninit<T> = result_area.as_mut_ptr().cast();
-    let result = unsafe { std::slice::from_raw_parts_mut(result_ptr.as_mut().unwrap(), len) };
-
-    (result, remain)
-}
--- a/libs/neonart/src/epoch.rs
+++ b/libs/neonart/src/epoch.rs
@@ -1,147 +0,0 @@
-//! This is similar to crossbeam_epoch crate, but works in shared memory
-
-use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
-
-use crossbeam_utils::CachePadded;
-use spin;
-
-const NUM_SLOTS: usize = 1000;
-
-/// This is the struct that is stored in shmem
-///
-/// bit 0: is it pinned or not?
-/// rest of the bits are the epoch counter.
-pub struct EpochShared {
-    global_epoch: AtomicU64,
-    participants: [CachePadded<AtomicU64>; NUM_SLOTS],
-
-    broadcast_lock: spin::Mutex<()>,
-}
-
-impl EpochShared {
-    pub fn new() -> EpochShared {
-        EpochShared {
-            global_epoch: AtomicU64::new(2),
-            participants: [const { CachePadded::new(AtomicU64::new(2)) }; NUM_SLOTS],
-            broadcast_lock: spin::Mutex::new(()),
-        }
-    }
-
-    pub fn register(&self) -> LocalHandle {
-        LocalHandle {
-            global: self,
-            last_slot: AtomicUsize::new(0), // todo: choose more intelligently
-        }
-    }
-
-    fn release_pin(&self, slot: usize, _epoch: u64) {
-        let global_epoch = self.global_epoch.load(Ordering::Relaxed);
-        self.participants[slot].store(global_epoch, Ordering::Relaxed);
-    }
-
-    fn pin_internal(&self, slot_hint: usize) -> (usize, u64) {
-        // pick a slot
-        let mut slot = slot_hint;
-        let epoch = loop {
-            let old = self.participants[slot].fetch_or(1, Ordering::Relaxed);
-            if old & 1 == 0 {
-                // Got this slot
-                break old;
-            }
-
-            // the slot was busy by another thread / process. try a different slot
-            slot += 1;
-            if slot == NUM_SLOTS {
-                slot = 0;
-            }
-            continue;
-        };
-        (slot, epoch)
-    }
-
-    pub(crate) fn advance(&self) -> u64 {
-        // Advance the global epoch
-        let old_epoch = self.global_epoch.fetch_add(2, Ordering::Relaxed);
-        let new_epoch = old_epoch + 2;
-
-        // Anyone that release their pin after this will update their slot.
-        new_epoch
-    }
-
-    pub(crate) fn broadcast(&self) {
-        let Some(_guard) = self.broadcast_lock.try_lock() else {
-            return;
-        };
-
-        let epoch = self.global_epoch.load(Ordering::Relaxed);
-        let old_epoch = epoch.wrapping_sub(2);
-
-        // Update all free slots.
-        for i in 0..NUM_SLOTS {
-            // TODO: check result, as a sanity check. It should either be the old epoch, or pinned
-            let _ = self.participants[i].compare_exchange(
-                old_epoch,
-                epoch,
-                Ordering::Relaxed,
-                Ordering::Relaxed,
-            );
-        }
-
-        // FIXME: memory fence here, since we used Relaxed?
-    }
-
-    pub(crate) fn get_oldest(&self) -> u64 {
-        // Read all slots.
-        let now = self.global_epoch.load(Ordering::Relaxed);
-        let mut oldest = now;
-        for i in 0..NUM_SLOTS {
-            let this_epoch = self.participants[i].load(Ordering::Relaxed);
-            let delta = now.wrapping_sub(this_epoch);
-            if delta > u64::MAX / 2 {
-                // this is very recent
-            } else {
-                if delta > now.wrapping_sub(oldest) {
-                    oldest = this_epoch;
-                }
-            }
-        }
-        oldest
-    }
-
-    pub(crate) fn get_current(&self) -> u64 {
-        self.global_epoch.load(Ordering::Relaxed)
-    }
-}
-
-pub(crate) struct EpochPin<'e> {
-    slot: usize,
-    pub(crate) epoch: u64,
-
-    handle: &'e LocalHandle<'e>,
-}
-
-impl<'e> Drop for EpochPin<'e> {
-    fn drop(&mut self) {
-        self.handle.global.release_pin(self.slot, self.epoch);
-    }
-}
-
-pub struct LocalHandle<'g> {
-    global: &'g EpochShared,
-
-    last_slot: AtomicUsize,
-}
-
-impl<'g> LocalHandle<'g> {
-    pub fn pin(&self) -> EpochPin {
-        let (slot, epoch) = self
-            .global
-            .pin_internal(self.last_slot.load(Ordering::Relaxed));
-        self.last_slot.store(slot, Ordering::Relaxed);
-        EpochPin {
-            handle: self,
-            epoch,
-            slot,
-        }
-    }
-}
--- a/libs/neonart/src/lib.rs
+++ b/libs/neonart/src/lib.rs
@@ -1,587 +0,0 @@
-//! Adaptive Radix Tree (ART) implementation, with Optimistic Lock Coupling.
-//!
-//! The data structure is described in these two papers:
-//!
-//! [1] Leis, V. & Kemper, Alfons & Neumann, Thomas. (2013).
-//!     The adaptive radix tree: ARTful indexing for main-memory databases.
-//!     Proceedings - International Conference on Data Engineering. 38-49. 10.1109/ICDE.2013.6544812.
-//!     https://db.in.tum.de/~leis/papers/ART.pdf
-//!
-//! [2] Leis, Viktor & Scheibner, Florian & Kemper, Alfons & Neumann, Thomas. (2016).
-//!     The ART of practical synchronization.
-//!     1-8. 10.1145/2933349.2933352.
-//!     https://db.in.tum.de/~leis/papers/artsync.pdf
-//!
-//! [1] describes the base data structure, and [2] describes the Optimistic Lock Coupling that we
-//! use.
-//!
-//! The papers mention a few different variants. We have made the following choices in this
-//! implementation:
-//!
-//! - All keys have the same length
-//!
-//! - Single-value leaves.
-//!
-//! - For collapsing inner nodes, we use the Pessimistic approach, where each inner node stores a
-//!   variable length "prefix", which stores the keys of all the one-way nodes which have been
-//!   removed. However, similar to the "hybrid" approach described in the paper, each node only has
-//!   space for a constant-size prefix of 8 bytes. If a node would have a longer prefix, then we
-//!   create create one-way nodes to store them. (There was no particular reason for this choice,
-//!   the "hybrid" approach described in the paper might be better.)
-//!
-//! - For concurrency, we use Optimistic Lock Coupling. The paper [2] also describes another method,
-//!   ROWEX, which generally performs better when there is contention, but that is not important
-//!   for use and Optimisic Lock Coupling is simpler to implement.
-//!
-//! ## Requirements
-//!
-//! This data structure is currently used for the integrated LFC, relsize and last-written LSN cache
-//! in the compute communicator, part of the 'neon' Postgres extension. We have some unique
-//! requirements, which is why we had to write our own. Namely:
-//!
-//! - The data structure has to live in fixed-sized shared memory segment. That rules out any
-//!   built-in Rust collections and most crates. (Except possibly with the 'allocator_api' rust
-//!   feature, which still nightly-only experimental as of this writing).
-//!
-//! - The data structure is accessed from multiple processes. Only one process updates the data
-//!   structure, but other processes perform reads. That rules out using built-in Rust locking
-//!   primitives like Mutex and RwLock, and most crates too.
-//!
-//! - Within the one process with write-access, multiple threads can perform updates concurrently.
-//!   That rules out using PostgreSQL LWLocks for the locking.
-//!
-//! The implementation is generic, and doesn't depend on any PostgreSQL specifics, but it has been
-//! written with that usage and the above constraints in mind. Some noteworthy assumptions:
-//!
-//! - Contention is assumed to be rare. In the integrated cache in PostgreSQL, there's higher level
-//!   locking in the PostgreSQL buffer manager, which ensures that two backends should not try to
-//!   read / write the same page at the same time. (Prefetching can conflict with actual reads,
-//!   however.)
-//!
-//!  - The keys in the integrated cache are 17 bytes long.
-//!
-//! ## Usage
-//!
-//! Because this is designed to be used as a Postgres shared memory data structure, initialization
-//! happens in three stages:
-//!
-//! 0. A fixed area of shared memory is allocated at postmaster startup.
-//!
-//! 1. TreeInitStruct::new() is called to initialize it, still in Postmaster process, before any
-//!    other process or thread is running. It returns a TreeInitStruct, which is inherited by all
-//!    the processes through fork().
-//!
-//! 2. One process may have write-access to the struct, by calling
-//!    [TreeInitStruct::attach_writer]. (That process is the communicator process.)
-//!
-//! 3. Other processes get read-access to the struct, by calling [TreeInitStruct::attach_reader]
-//!
-//! "Write access" means that you can insert / update / delete values in the tree.
-//!
-//! NOTE: The Values stored in the tree are sometimes moved, when a leaf node fills up and a new
-//! larger node needs to be allocated. The versioning and epoch-based allocator ensure that the data
-//! structure stays consistent, but if the Value has interior mutability, like atomic fields,
-//! updates to such fields might be lost if the leaf node is concurrently moved! If that becomes a
-//! problem, the version check could be passed up to the caller, so that the caller could detect the
-//! lost updates and retry the operation.
-//!
-//! ## Implementation
-//!
-//! node_ptr: Provides low-level implementations of the four different node types (eight actually,
-//! since there is an Internal and Leaf variant of each)
-//!
-//! lock_and_version.rs: Provides an abstraction for the combined lock and version counter on each
-//! node.
-//!
-//! node_ref.rs: The code in node_ptr.rs deals with raw pointers. node_ref.rs provides more type-safe
-//!   abstractions on top.
-//!
-//! algorithm.rs: Contains the functions to implement lookups and updates in the tree
-//!
-//! allocator.rs: Provides a facility to allocate memory for the tree nodes. (We must provide our
-//!   own abstraction for that because we need the data structure to live in a pre-allocated shared
-//!   memory segment).
-//!
-//! epoch.rs: The data structure requires that when a node is removed from the tree, it is not
-//!   immediately deallocated, but stays around for as long as concurrent readers might still have
-//!   pointers to them. This is enforced by an epoch system. This is similar to
-//!   e.g. crossbeam_epoch, but we couldn't use that either because it has to work across processes
-//!   communicating over the shared memory segment.
-//!
-//! ## See also
-//!
-//! There are some existing Rust ART implementations out there, but none of them filled all
-//! the requirements:
-//!
-//! - https://github.com/XiangpengHao/congee
-//! - https://github.com/declanvk/blart
-//!
-//! ## TODO
-//!
-//! - Removing values has not been implemented
-
-mod algorithm;
-pub mod allocator;
-mod epoch;
-
-use algorithm::RootPtr;
-use algorithm::node_ptr::NodePtr;
-
-use std::collections::VecDeque;
-use std::fmt::Debug;
-use std::marker::PhantomData;
-use std::ptr::NonNull;
-use std::sync::atomic::{AtomicBool, Ordering};
-
-use crate::epoch::EpochPin;
-
-#[cfg(test)]
-mod tests;
-
-use allocator::ArtAllocator;
-pub use allocator::ArtMultiSlabAllocator;
-pub use allocator::OutOfMemoryError;
-
-/// Fixed-length key type.
-///
-pub trait Key: Debug {
-    const KEY_LEN: usize;
-
-    fn as_bytes(&self) -> &[u8];
-}
-
-/// Values stored in the tree
-///
-/// Values need to be Cloneable, because when a node "grows", the value is copied to a new node and
-/// the old sticks around until all readers that might see the old value are gone.
-// fixme obsolete, no longer needs Clone
-pub trait Value {}
-
-const MAX_GARBAGE: usize = 1024;
-
-/// The root of the tree, plus other tree-wide data. This is stored in the shared memory.
-pub struct Tree<V: Value> {
-    /// For simplicity, so that we never need to grow or shrink the root, the root node is always an
-    /// Internal256 node. Also, it never has a prefix (that's actually a bit wasteful, incurring one
-    /// indirection to every lookup)
-    root: RootPtr<V>,
-
-    writer_attached: AtomicBool,
-
-    epoch: epoch::EpochShared,
-}
-
-unsafe impl<V: Value + Sync> Sync for Tree<V> {}
-unsafe impl<V: Value + Send> Send for Tree<V> {}
-
-struct GarbageQueue<V>(VecDeque<(NodePtr<V>, u64)>);
-
-unsafe impl<V: Value + Sync> Sync for GarbageQueue<V> {}
-unsafe impl<V: Value + Send> Send for GarbageQueue<V> {}
-
-impl<V> GarbageQueue<V> {
-    fn new() -> GarbageQueue<V> {
-        GarbageQueue(VecDeque::with_capacity(MAX_GARBAGE))
-    }
-
-    fn remember_obsolete_node(&mut self, ptr: NodePtr<V>, epoch: u64) {
-        self.0.push_front((ptr, epoch));
-    }
-
-    fn next_obsolete(&mut self, cutoff_epoch: u64) -> Option<NodePtr<V>> {
-        if let Some(back) = self.0.back() {
-            if back.1 < cutoff_epoch {
-                return Some(self.0.pop_back().unwrap().0);
-            }
-        }
-        None
-    }
-}
-
-/// Struct created at postmaster startup
-pub struct TreeInitStruct<'t, K: Key, V: Value, A: ArtAllocator<V>> {
-    tree: &'t Tree<V>,
-
-    allocator: &'t A,
-
-    phantom_key: PhantomData<K>,
-}
-
-/// The worker process has a reference to this. The write operations are only safe
-/// from the worker process
-pub struct TreeWriteAccess<'t, K: Key, V: Value, A: ArtAllocator<V>>
-where
-    K: Key,
-    V: Value,
-{
-    tree: &'t Tree<V>,
-
-    pub allocator: &'t A,
-
-    epoch_handle: epoch::LocalHandle<'t>,
-
-    phantom_key: PhantomData<K>,
-
-    /// Obsolete nodes that cannot be recycled until their epoch expires.
-    garbage: spin::Mutex<GarbageQueue<V>>,
-}
-
-/// The backends have a reference to this. It cannot be used to modify the tree
-pub struct TreeReadAccess<'t, K: Key, V: Value>
-where
-    K: Key,
-    V: Value,
-{
-    tree: &'t Tree<V>,
-
-    epoch_handle: epoch::LocalHandle<'t>,
-
-    phantom_key: PhantomData<K>,
-}
-
-impl<'a, 't: 'a, K: Key, V: Value, A: ArtAllocator<V>> TreeInitStruct<'t, K, V, A> {
-    pub fn new(allocator: &'t A) -> TreeInitStruct<'t, K, V, A> {
-        let tree_ptr = allocator.alloc_tree();
-        let tree_ptr = NonNull::new(tree_ptr).expect("out of memory");
-        let init = Tree {
-            root: algorithm::new_root(allocator).expect("out of memory"),
-            writer_attached: AtomicBool::new(false),
-            epoch: epoch::EpochShared::new(),
-        };
-        unsafe { tree_ptr.write(init) };
-
-        TreeInitStruct {
-            tree: unsafe { tree_ptr.as_ref() },
-            allocator,
-            phantom_key: PhantomData,
-        }
-    }
-
-    pub fn attach_writer(self) -> TreeWriteAccess<'t, K, V, A> {
-        let previously_attached = self.tree.writer_attached.swap(true, Ordering::Relaxed);
-        if previously_attached {
-            panic!("writer already attached");
-        }
-        TreeWriteAccess {
-            tree: self.tree,
-            allocator: self.allocator,
-            phantom_key: PhantomData,
-            epoch_handle: self.tree.epoch.register(),
-            garbage: spin::Mutex::new(GarbageQueue::new()),
-        }
-    }
-
-    pub fn attach_reader(self) -> TreeReadAccess<'t, K, V> {
-        TreeReadAccess {
-            tree: self.tree,
-            phantom_key: PhantomData,
-            epoch_handle: self.tree.epoch.register(),
-        }
-    }
-}
-
-impl<'t, K: Key, V: Value, A: ArtAllocator<V>> TreeWriteAccess<'t, K, V, A> {
-    pub fn start_write<'g>(&'t self) -> TreeWriteGuard<'g, K, V, A>
-    where
-        't: 'g,
-    {
-        TreeWriteGuard {
-            tree_writer: self,
-            epoch_pin: self.epoch_handle.pin(),
-            phantom_key: PhantomData,
-            created_garbage: false,
-        }
-    }
-
-    pub fn start_read(&'t self) -> TreeReadGuard<'t, K, V> {
-        TreeReadGuard {
-            tree: &self.tree,
-            epoch_pin: self.epoch_handle.pin(),
-            phantom_key: PhantomData,
-        }
-    }
-}
-
-impl<'t, K: Key, V: Value> TreeReadAccess<'t, K, V> {
-    pub fn start_read(&'t self) -> TreeReadGuard<'t, K, V> {
-        TreeReadGuard {
-            tree: &self.tree,
-            epoch_pin: self.epoch_handle.pin(),
-            phantom_key: PhantomData,
-        }
-    }
-}
-
-pub struct TreeReadGuard<'e, K, V>
-where
-    K: Key,
-    V: Value,
-{
-    tree: &'e Tree<V>,
-
-    epoch_pin: EpochPin<'e>,
-    phantom_key: PhantomData<K>,
-}
-
-impl<'e, K: Key, V: Value> TreeReadGuard<'e, K, V> {
-    pub fn get(&'e self, key: &K) -> Option<&'e V> {
-        algorithm::search(key, self.tree.root, &self.epoch_pin)
-    }
-}
-
-pub struct TreeWriteGuard<'e, K, V, A>
-where
-    K: Key,
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    tree_writer: &'e TreeWriteAccess<'e, K, V, A>,
-
-    epoch_pin: EpochPin<'e>,
-    phantom_key: PhantomData<K>,
-
-    created_garbage: bool,
-}
-
-pub enum UpdateAction<V> {
-    Nothing,
-    Insert(V),
-    Remove,
-}
-
-impl<'e, K: Key, V: Value, A: ArtAllocator<V>> TreeWriteGuard<'e, K, V, A> {
-    /// Get a value
-    pub fn get(&'e mut self, key: &K) -> Option<&'e V> {
-        algorithm::search(key, self.tree_writer.tree.root, &self.epoch_pin)
-    }
-
-    /// Insert a value
-    pub fn insert(self, key: &K, value: V) -> Result<bool, OutOfMemoryError> {
-        let mut success = None;
-
-        self.update_with_fn(key, |existing| {
-            if let Some(_) = existing {
-                success = Some(false);
-                UpdateAction::Nothing
-            } else {
-                success = Some(true);
-                UpdateAction::Insert(value)
-            }
-        })?;
-        Ok(success.expect("value_fn not called"))
-    }
-
-    /// Remove value. Returns true if it existed
-    pub fn remove(self, key: &K) -> bool {
-        let mut result = false;
-        // FIXME: It's not clear if OOM is expected while removing. It seems
-        // not nice, but shrinking a node can OOM. Then again, we could opt
-        // to not shrink a node if we cannot allocate, to live a little longer.
-        self.update_with_fn(key, |existing| match existing {
-            Some(_) => {
-                result = true;
-                UpdateAction::Remove
-            }
-            None => UpdateAction::Nothing,
-        })
-        .expect("out of memory while removing");
-        result
-    }
-
-    /// Try to remove value and return the old value.
-    pub fn remove_and_return(self, key: &K) -> Option<V>
-    where
-        V: Clone,
-    {
-        let mut old = None;
-        self.update_with_fn(key, |existing| {
-            old = existing.cloned();
-            UpdateAction::Remove
-        })
-        .expect("out of memory while removing");
-        old
-    }
-
-    /// Update key using the given function. All the other modifying operations are based on this.
-    ///
-    /// The function is passed a reference to the existing value, if any. If the function
-    /// returns None, the value is removed from the tree (or if there was no existing value,
-    /// does nothing). If the function returns Some, the existing value is replaced, of if there
-    /// was no existing value, it is inserted. FIXME: update comment
-    pub fn update_with_fn<F>(mut self, key: &K, value_fn: F) -> Result<(), OutOfMemoryError>
-    where
-        F: FnOnce(Option<&V>) -> UpdateAction<V>,
-    {
-        algorithm::update_fn(key, value_fn, self.tree_writer.tree.root, &mut self)?;
-
-        if self.created_garbage {
-            let _ = self.collect_garbage();
-        }
-        Ok(())
-    }
-
-    fn remember_obsolete_node(&mut self, ptr: NodePtr<V>) {
-        self.tree_writer
-            .garbage
-            .lock()
-            .remember_obsolete_node(ptr, self.epoch_pin.epoch);
-        self.created_garbage = true;
-    }
-
-    // returns number of nodes recycled
-    fn collect_garbage(&self) -> usize {
-        self.tree_writer.tree.epoch.advance();
-        self.tree_writer.tree.epoch.broadcast();
-
-        let cutoff_epoch = self.tree_writer.tree.epoch.get_oldest();
-
-        let mut result = 0;
-        let mut garbage_queue = self.tree_writer.garbage.lock();
-        while let Some(ptr) = garbage_queue.next_obsolete(cutoff_epoch) {
-            ptr.deallocate(self.tree_writer.allocator);
-            result += 1;
-        }
-        result
-    }
-}
-
-pub struct TreeIterator<K>
-where
-    K: Key + for<'a> From<&'a [u8]>,
-{
-    done: bool,
-    pub next_key: Vec<u8>,
-    max_key: Option<Vec<u8>>,
-
-    phantom_key: PhantomData<K>,
-}
-
-impl<K> TreeIterator<K>
-where
-    K: Key + for<'a> From<&'a [u8]>,
-{
-    pub fn new_wrapping() -> TreeIterator<K> {
-        let mut next_key = Vec::new();
-        next_key.resize(K::KEY_LEN, 0);
-        TreeIterator {
-            done: false,
-            next_key,
-            max_key: None,
-            phantom_key: PhantomData,
-        }
-    }
-
-    pub fn new(range: &std::ops::Range<K>) -> TreeIterator<K> {
-        let result = TreeIterator {
-            done: false,
-            next_key: Vec::from(range.start.as_bytes()),
-            max_key: Some(Vec::from(range.end.as_bytes())),
-            phantom_key: PhantomData,
-        };
-        assert_eq!(result.next_key.len(), K::KEY_LEN);
-        assert_eq!(result.max_key.as_ref().unwrap().len(), K::KEY_LEN);
-
-        result
-    }
-
-    pub fn next<'g, V>(&mut self, read_guard: &'g TreeReadGuard<'g, K, V>) -> Option<(K, &'g V)>
-    where
-        V: Value,
-    {
-        if self.done {
-            return None;
-        }
-
-        let mut wrapped_around = false;
-        loop {
-            assert_eq!(self.next_key.len(), K::KEY_LEN);
-            if let Some((k, v)) = algorithm::iter_next(
-                &mut self.next_key,
-                read_guard.tree.root,
-                &read_guard.epoch_pin,
-            ) {
-                assert_eq!(k.len(), K::KEY_LEN);
-                assert_eq!(self.next_key.len(), K::KEY_LEN);
-
-                // Check if we reached the end of the range
-                if let Some(max_key) = &self.max_key {
-                    if k.as_slice() >= max_key.as_slice() {
-                        self.done = true;
-                        break None;
-                    }
-                }
-
-                // increment the key
-                self.next_key = k.clone();
-                increment_key(self.next_key.as_mut_slice());
-                let k = k.as_slice().into();
-
-                break Some((k, v));
-            } else {
-                if self.max_key.is_some() {
-                    self.done = true;
-                } else {
-                    // Start from beginning
-                    if !wrapped_around {
-                        for i in 0..K::KEY_LEN {
-                            self.next_key[i] = 0;
-                        }
-                        wrapped_around = true;
-                        continue;
-                    } else {
-                        // The tree is completely empty
-                        // FIXME: perhaps we should remember the starting point instead.
-                        // Currently this will scan some ranges twice.
-                        break None;
-                    }
-                }
-                break None;
-            }
-        }
-    }
-}
-
-fn increment_key(key: &mut [u8]) -> bool {
-    for i in (0..key.len()).rev() {
-        let (byte, overflow) = key[i].overflowing_add(1);
-        key[i] = byte;
-        if !overflow {
-            return false;
-        }
-    }
-    true
-}
-
-// Debugging functions
-impl<'e, K: Key, V: Value + Debug, A: ArtAllocator<V>> TreeWriteGuard<'e, K, V, A> {
-    pub fn dump(&mut self, dst: &mut dyn std::io::Write) {
-        algorithm::dump_tree(self.tree_writer.tree.root, &self.epoch_pin, dst)
-    }
-}
-impl<'e, K: Key, V: Value + Debug> TreeReadGuard<'e, K, V> {
-    pub fn dump(&mut self, dst: &mut dyn std::io::Write) {
-        algorithm::dump_tree(self.tree.root, &self.epoch_pin, dst)
-    }
-}
-impl<'e, K: Key, V: Value> TreeWriteAccess<'e, K, V, ArtMultiSlabAllocator<'e, V>> {
-    pub fn get_statistics(&self) -> ArtTreeStatistics {
-        self.allocator.get_statistics();
-        ArtTreeStatistics {
-            blocks: self.allocator.inner.block_allocator.get_statistics(),
-            slabs: self.allocator.get_statistics(),
-            epoch: self.tree.epoch.get_current(),
-            oldest_epoch: self.tree.epoch.get_oldest(),
-            num_garbage: self.garbage.lock().0.len() as u64,
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct ArtTreeStatistics {
-    pub blocks: allocator::block::BlockAllocatorStats,
-    pub slabs: allocator::ArtMultiSlabStats,
-
-    pub epoch: u64,
-    pub oldest_epoch: u64,
-    pub num_garbage: u64,
-}
--- a/libs/neonart/src/tests.rs
+++ b/libs/neonart/src/tests.rs
@@ -1,243 +0,0 @@
-use std::collections::BTreeMap;
-use std::collections::HashSet;
-use std::fmt::{Debug, Formatter};
-use std::sync::atomic::{AtomicUsize, Ordering};
-
-use crate::ArtAllocator;
-use crate::ArtMultiSlabAllocator;
-use crate::TreeInitStruct;
-use crate::TreeIterator;
-use crate::TreeWriteAccess;
-use crate::UpdateAction;
-
-use crate::{Key, Value};
-
-use rand::Rng;
-use rand::seq::SliceRandom;
-use rand_distr::Zipf;
-
-const TEST_KEY_LEN: usize = 16;
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
-struct TestKey([u8; TEST_KEY_LEN]);
-
-impl TestKey {
-    const MIN: TestKey = TestKey([0; TEST_KEY_LEN]);
-    const MAX: TestKey = TestKey([u8::MAX; TEST_KEY_LEN]);
-}
-
-impl Key for TestKey {
-    const KEY_LEN: usize = TEST_KEY_LEN;
-    fn as_bytes(&self) -> &[u8] {
-        &self.0
-    }
-}
-
-impl From<&TestKey> for u128 {
-    fn from(val: &TestKey) -> u128 {
-        u128::from_be_bytes(val.0)
-    }
-}
-
-impl From<u128> for TestKey {
-    fn from(val: u128) -> TestKey {
-        TestKey(val.to_be_bytes())
-    }
-}
-
-impl<'a> From<&'a [u8]> for TestKey {
-    fn from(bytes: &'a [u8]) -> TestKey {
-        TestKey(bytes.try_into().unwrap())
-    }
-}
-
-impl Value for usize {}
-
-fn test_inserts<K: Into<TestKey> + Copy>(keys: &[K]) {
-    const MEM_SIZE: usize = 10000000;
-    let mut area = Box::new_uninit_slice(MEM_SIZE);
-
-    let allocator = ArtMultiSlabAllocator::new(&mut area);
-
-    let init_struct = TreeInitStruct::<TestKey, usize, _>::new(allocator);
-    let tree_writer = init_struct.attach_writer();
-
-    for (idx, k) in keys.iter().enumerate() {
-        let w = tree_writer.start_write();
-        let res = w.insert(&(*k).into(), idx);
-        assert!(res.is_ok());
-    }
-
-    for (idx, k) in keys.iter().enumerate() {
-        let r = tree_writer.start_read();
-        let value = r.get(&(*k).into());
-        assert_eq!(value, Some(idx).as_ref());
-    }
-
-    eprintln!("stats: {:?}", tree_writer.get_statistics());
-}
-
-#[test]
-fn dense() {
-    // This exercises splitting a node with prefix
-    let keys: &[u128] = &[0, 1, 2, 3, 256];
-    test_inserts(keys);
-
-    // Dense keys
-    let mut keys: Vec<u128> = (0..10000).collect();
-    test_inserts(&keys);
-
-    // Do the same in random orders
-    for _ in 1..10 {
-        keys.shuffle(&mut rand::rng());
-        test_inserts(&keys);
-    }
-}
-
-#[test]
-fn sparse() {
-    // sparse keys
-    let mut keys: Vec<TestKey> = Vec::new();
-    let mut used_keys = HashSet::new();
-    for _ in 0..10000 {
-        loop {
-            let key = rand::random::<u128>();
-            if used_keys.get(&key).is_some() {
-                continue;
-            }
-            used_keys.insert(key);
-            keys.push(key.into());
-            break;
-        }
-    }
-    test_inserts(&keys);
-}
-
-struct TestValue(AtomicUsize);
-
-impl TestValue {
-    fn new(val: usize) -> TestValue {
-        TestValue(AtomicUsize::new(val))
-    }
-
-    fn load(&self) -> usize {
-        self.0.load(Ordering::Relaxed)
-    }
-}
-
-impl Value for TestValue {}
-
-impl Clone for TestValue {
-    fn clone(&self) -> TestValue {
-        TestValue::new(self.load())
-    }
-}
-
-impl Debug for TestValue {
-    fn fmt(&self, fmt: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
-        write!(fmt, "{:?}", self.load())
-    }
-}
-
-#[derive(Clone, Debug)]
-struct TestOp(TestKey, Option<usize>);
-
-fn apply_op<A: ArtAllocator<TestValue>>(
-    op: &TestOp,
-    tree: &TreeWriteAccess<TestKey, TestValue, A>,
-    shadow: &mut BTreeMap<TestKey, usize>,
-) {
-    eprintln!("applying op: {op:?}");
-
-    // apply the change to the shadow tree first
-    let shadow_existing = if let Some(v) = op.1 {
-        shadow.insert(op.0, v)
-    } else {
-        shadow.remove(&op.0)
-    };
-
-    // apply to Art tree
-    let w = tree.start_write();
-    w.update_with_fn(&op.0, |existing| {
-        assert_eq!(existing.map(TestValue::load), shadow_existing);
-
-        match (existing, op.1) {
-            (None, None) => UpdateAction::Nothing,
-            (None, Some(new_val)) => UpdateAction::Insert(TestValue::new(new_val)),
-            (Some(_old_val), None) => UpdateAction::Remove,
-            (Some(old_val), Some(new_val)) => {
-                old_val.0.store(new_val, Ordering::Relaxed);
-                UpdateAction::Nothing
-            }
-        }
-    })
-    .expect("out of memory");
-}
-
-fn test_iter<A: ArtAllocator<TestValue>>(
-    tree: &TreeWriteAccess<TestKey, TestValue, A>,
-    shadow: &BTreeMap<TestKey, usize>,
-) {
-    let mut shadow_iter = shadow.iter();
-    let mut iter = TreeIterator::new(&(TestKey::MIN..TestKey::MAX));
-
-    loop {
-        let shadow_item = shadow_iter.next().map(|(k, v)| (k.clone(), v.clone()));
-        let r = tree.start_read();
-        let item = iter.next(&r);
-
-        if shadow_item != item.map(|(k, v)| (k, v.load())) {
-            eprintln!(
-                "FAIL: iterator returned {:?}, expected {:?}",
-                item, shadow_item
-            );
-            tree.start_read().dump(&mut std::io::stderr());
-
-            eprintln!("SHADOW:");
-            let mut si = shadow.iter();
-            while let Some(si) = si.next() {
-                eprintln!("key: {:?}, val: {}", si.0, si.1);
-            }
-            panic!(
-                "FAIL: iterator returned {:?}, expected {:?}",
-                item, shadow_item
-            );
-        }
-        if item.is_none() {
-            break;
-        }
-    }
-}
-
-#[test]
-fn random_ops() {
-    const MEM_SIZE: usize = 10000000;
-    let mut area = Box::new_uninit_slice(MEM_SIZE);
-
-    let allocator = ArtMultiSlabAllocator::new(&mut area);
-
-    let init_struct = TreeInitStruct::<TestKey, TestValue, _>::new(allocator);
-    let tree_writer = init_struct.attach_writer();
-
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-
-    let distribution = Zipf::new(u128::MAX as f64, 1.1).unwrap();
-    let mut rng = rand::rng();
-    for i in 0..100000 {
-        let mut key: TestKey = (rng.sample(distribution) as u128).into();
-
-        if rng.random_bool(0.10) {
-            key = TestKey::from(u128::from(&key) | 0xffffffff);
-        }
-
-        let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
-
-        apply_op(&op, &tree_writer, &mut shadow);
-
-        if i % 1000 == 0 {
-            eprintln!("{i} ops processed");
-            eprintln!("stats: {:?}", tree_writer.get_statistics());
-            test_iter(&tree_writer, &shadow);
-        }
-    }
-}
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -8,8 +8,6 @@ pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
 pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
 pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
 pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
-// TODO: gRPC is disabled by default for now, but the port is used in neon_local.
-pub const DEFAULT_GRPC_LISTEN_PORT: u16 = 51051; // storage-broker already uses 50051

 use std::collections::HashMap;
 use std::num::{NonZeroU64, NonZeroUsize};
@@ -20,6 +18,7 @@ use postgres_backend::AuthType;
 use remote_storage::RemoteStorageConfig;
 use serde_with::serde_as;
 use utils::logging::LogFormat;
+use utils::postgres_client::PostgresClientProtocol;

 use crate::models::{ImageCompressionAlgorithm, LsnLease};

@@ -34,8 +33,6 @@ pub struct NodeMetadata {
    pub postgres_host: String,
    #[serde(rename = "port")]
    pub postgres_port: u16,
-    pub grpc_host: Option<String>,
-    pub grpc_port: Option<u16>,
    pub http_host: String,
    pub http_port: u16,
    pub https_port: Option<u16>,
@@ -46,21 +43,6 @@ pub struct NodeMetadata {
    pub other: HashMap<String, serde_json::Value>,
 }

-/// PostHog integration config.
-#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
-pub struct PostHogConfig {
-    /// PostHog project ID
-    pub project_id: String,
-    /// Server-side (private) API key
-    pub server_api_key: String,
-    /// Client-side (public) API key
-    pub client_api_key: String,
-    /// Private API URL
-    pub private_api_url: String,
-    /// Public API URL
-    pub public_api_url: String,
-}
-
 /// `pageserver.toml`
 ///
 /// We use serde derive with `#[serde(default)]` to generate a deserializer
@@ -122,7 +104,6 @@ pub struct ConfigToml {
    pub listen_pg_addr: String,
    pub listen_http_addr: String,
    pub listen_https_addr: Option<String>,
-    pub listen_grpc_addr: Option<String>,
    pub ssl_key_file: Utf8PathBuf,
    pub ssl_cert_file: Utf8PathBuf,
    #[serde(with = "humantime_serde")]
@@ -142,7 +123,6 @@ pub struct ConfigToml {
    pub http_auth_type: AuthType,
    #[serde_as(as = "serde_with::DisplayFromStr")]
    pub pg_auth_type: AuthType,
-    pub grpc_auth_type: AuthType,
    pub auth_validation_public_key_path: Option<Utf8PathBuf>,
    pub remote_storage: Option<RemoteStorageConfig>,
    pub tenant_config: TenantConfigToml,
@@ -182,7 +162,6 @@ pub struct ConfigToml {
    pub virtual_file_io_engine: Option<crate::models::virtual_file::IoEngineKind>,
    pub ingest_batch_size: u64,
    pub max_vectored_read_bytes: MaxVectoredReadBytes,
-    pub max_get_vectored_keys: MaxGetVectoredKeys,
    pub image_compression: ImageCompressionAlgorithm,
    pub timeline_offloading: bool,
    pub ephemeral_bytes_per_memory_kb: usize,
@@ -190,6 +169,7 @@ pub struct ConfigToml {
    pub virtual_file_io_mode: Option<crate::models::virtual_file::IoMode>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub no_sync: Option<bool>,
+    pub wal_receiver_protocol: PostgresClientProtocol,
    pub page_service_pipelining: PageServicePipeliningConfig,
    pub get_vectored_concurrent_io: GetVectoredConcurrentIo,
    pub enable_read_path_debugging: Option<bool>,
@@ -202,8 +182,6 @@ pub struct ConfigToml {
    pub tracing: Option<Tracing>,
    pub enable_tls_page_service_api: bool,
    pub dev_mode: bool,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub posthog_config: Option<PostHogConfig>,
    pub timeline_import_config: TimelineImportConfig,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub basebackup_cache_config: Option<BasebackupCacheConfig>,
@@ -230,7 +208,7 @@ pub enum PageServicePipeliningConfig {
 }
 #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 pub struct PageServicePipeliningConfigPipelined {
-    /// Failed config parsing and validation if larger than `max_get_vectored_keys`.
+    /// Causes runtime errors if larger than max get_vectored batch size.
    pub max_batch_size: NonZeroUsize,
    pub execution: PageServiceProtocolPipelinedExecutionStrategy,
    // The default below is such that new versions of the software can start
@@ -330,8 +308,6 @@ pub struct TimelineImportConfig {
    pub import_job_concurrency: NonZeroUsize,
    pub import_job_soft_size_limit: NonZeroUsize,
    pub import_job_checkpoint_threshold: NonZeroUsize,
-    /// Max size of the remote storage partial read done by any job
-    pub import_job_max_byte_range_size: NonZeroUsize,
 }

 #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
@@ -406,16 +382,6 @@ impl Default for EvictionOrder {
 #[serde(transparent)]
 pub struct MaxVectoredReadBytes(pub NonZeroUsize);

-#[derive(Copy, Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
-#[serde(transparent)]
-pub struct MaxGetVectoredKeys(NonZeroUsize);
-
-impl MaxGetVectoredKeys {
-    pub fn get(&self) -> usize {
-        self.0.get()
-    }
-}
-
 /// Tenant-level configuration values, used for various purposes.
 #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 #[serde(default)]
@@ -527,6 +493,8 @@ pub struct TenantConfigToml {
    /// (either this flag or the pageserver-global one need to be set)
    pub timeline_offloading: bool,

+    pub wal_receiver_protocol_override: Option<PostgresClientProtocol>,
+
    /// Enable rel_size_v2 for this tenant. Once enabled, the tenant will persist this information into
    /// `index_part.json`, and it cannot be reversed.
    pub rel_size_v2_enabled: bool,
@@ -598,8 +566,6 @@ pub mod defaults {
    /// That is, slightly above 128 kB.
    pub const DEFAULT_MAX_VECTORED_READ_BYTES: usize = 130 * 1024; // 130 KiB

-    pub const DEFAULT_MAX_GET_VECTORED_KEYS: usize = 32;
-
    pub const DEFAULT_IMAGE_COMPRESSION: ImageCompressionAlgorithm =
        ImageCompressionAlgorithm::Zstd { level: Some(1) };

@@ -607,6 +573,9 @@ pub mod defaults {

    pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512;

+    pub const DEFAULT_WAL_RECEIVER_PROTOCOL: utils::postgres_client::PostgresClientProtocol =
+        utils::postgres_client::PostgresClientProtocol::Vanilla;
+
    pub const DEFAULT_SSL_KEY_FILE: &str = "server.key";
    pub const DEFAULT_SSL_CERT_FILE: &str = "server.crt";
 }
@@ -619,7 +588,6 @@ impl Default for ConfigToml {
            listen_pg_addr: (DEFAULT_PG_LISTEN_ADDR.to_string()),
            listen_http_addr: (DEFAULT_HTTP_LISTEN_ADDR.to_string()),
            listen_https_addr: (None),
-            listen_grpc_addr: None, // TODO: default to 127.0.0.1:51051
            ssl_key_file: Utf8PathBuf::from(DEFAULT_SSL_KEY_FILE),
            ssl_cert_file: Utf8PathBuf::from(DEFAULT_SSL_CERT_FILE),
            ssl_cert_reload_period: Duration::from_secs(60),
@@ -636,7 +604,6 @@ impl Default for ConfigToml {
            pg_distrib_dir: None, // Utf8PathBuf::from("./pg_install"), // TODO: formely, this was std::env::current_dir()
            http_auth_type: (AuthType::Trust),
            pg_auth_type: (AuthType::Trust),
-            grpc_auth_type: (AuthType::Trust),
            auth_validation_public_key_path: (None),
            remote_storage: None,
            broker_endpoint: (storage_broker::DEFAULT_ENDPOINT
@@ -695,9 +662,6 @@ impl Default for ConfigToml {
            max_vectored_read_bytes: (MaxVectoredReadBytes(
                NonZeroUsize::new(DEFAULT_MAX_VECTORED_READ_BYTES).unwrap(),
            )),
-            max_get_vectored_keys: (MaxGetVectoredKeys(
-                NonZeroUsize::new(DEFAULT_MAX_GET_VECTORED_KEYS).unwrap(),
-            )),
            image_compression: (DEFAULT_IMAGE_COMPRESSION),
            timeline_offloading: true,
            ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
@@ -705,6 +669,7 @@ impl Default for ConfigToml {
            virtual_file_io_mode: None,
            tenant_config: TenantConfigToml::default(),
            no_sync: None,
+            wal_receiver_protocol: DEFAULT_WAL_RECEIVER_PROTOCOL,
            page_service_pipelining: PageServicePipeliningConfig::Pipelined(
                PageServicePipeliningConfigPipelined {
                    max_batch_size: NonZeroUsize::new(32).unwrap(),
@@ -725,13 +690,11 @@ impl Default for ConfigToml {
            enable_tls_page_service_api: false,
            dev_mode: false,
            timeline_import_config: TimelineImportConfig {
-                import_job_concurrency: NonZeroUsize::new(32).unwrap(),
-                import_job_soft_size_limit: NonZeroUsize::new(256 * 1024 * 1024).unwrap(),
-                import_job_checkpoint_threshold: NonZeroUsize::new(32).unwrap(),
-                import_job_max_byte_range_size: NonZeroUsize::new(4 * 1024 * 1024).unwrap(),
+                import_job_concurrency: NonZeroUsize::new(128).unwrap(),
+                import_job_soft_size_limit: NonZeroUsize::new(1024 * 1024 * 1024).unwrap(),
+                import_job_checkpoint_threshold: NonZeroUsize::new(128).unwrap(),
            },
            basebackup_cache_config: None,
-            posthog_config: None,
        }
    }
 }
@@ -849,6 +812,7 @@ impl Default for TenantConfigToml {
            lsn_lease_length: LsnLease::DEFAULT_LENGTH,
            lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
            timeline_offloading: true,
+            wal_receiver_protocol_override: None,
            rel_size_v2_enabled: false,
            gc_compaction_enabled: DEFAULT_GC_COMPACTION_ENABLED,
            gc_compaction_verification: DEFAULT_GC_COMPACTION_VERIFICATION,
--- a/libs/pageserver_api/src/config/tests.rs
+++ b/libs/pageserver_api/src/config/tests.rs
@@ -14,8 +14,6 @@ fn test_node_metadata_v1_backward_compatibilty() {
        NodeMetadata {
            postgres_host: "localhost".to_string(),
            postgres_port: 23,
-            grpc_host: None,
-            grpc_port: None,
            http_host: "localhost".to_string(),
            http_port: 42,
            https_port: None,
@@ -39,35 +37,6 @@ fn test_node_metadata_v2_backward_compatibilty() {
        NodeMetadata {
            postgres_host: "localhost".to_string(),
            postgres_port: 23,
-            grpc_host: None,
-            grpc_port: None,
-            http_host: "localhost".to_string(),
-            http_port: 42,
-            https_port: Some(123),
-            other: HashMap::new(),
-        }
-    )
-}
-
-#[test]
-fn test_node_metadata_v3_backward_compatibilty() {
-    let v3 = serde_json::to_vec(&serde_json::json!({
-        "host": "localhost",
-        "port": 23,
-        "grpc_host": "localhost",
-        "grpc_port": 51,
-        "http_host": "localhost",
-        "http_port": 42,
-        "https_port": 123,
-    }));
-
-    assert_eq!(
-        serde_json::from_slice::<NodeMetadata>(&v3.unwrap()).unwrap(),
-        NodeMetadata {
-            postgres_host: "localhost".to_string(),
-            postgres_port: 23,
-            grpc_host: Some("localhost".to_string()),
-            grpc_port: Some(51),
            http_host: "localhost".to_string(),
            http_port: 42,
            https_port: Some(123),
--- a/libs/pageserver_api/src/controller_api.rs
+++ b/libs/pageserver_api/src/controller_api.rs
@@ -53,9 +53,6 @@ pub struct NodeRegisterRequest {
    pub listen_pg_addr: String,
    pub listen_pg_port: u16,

-    pub listen_grpc_addr: Option<String>,
-    pub listen_grpc_port: Option<u16>,
-
    pub listen_http_addr: String,
    pub listen_http_port: u16,
    pub listen_https_port: Option<u16>,
@@ -105,9 +102,6 @@ pub struct TenantLocateResponseShard {
    pub listen_pg_addr: String,
    pub listen_pg_port: u16,

-    pub listen_grpc_addr: Option<String>,
-    pub listen_grpc_port: Option<u16>,
-
    pub listen_http_addr: String,
    pub listen_http_port: u16,
    pub listen_https_port: Option<u16>,
@@ -158,8 +152,6 @@ pub struct NodeDescribeResponse {

    pub listen_pg_addr: String,
    pub listen_pg_port: u16,
-    pub listen_grpc_addr: Option<String>,
-    pub listen_grpc_port: Option<u16>,
 }

 #[derive(Serialize, Deserialize, Debug)]
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -20,6 +20,7 @@ use serde_with::serde_as;
 pub use utilization::PageserverUtilization;
 use utils::id::{NodeId, TenantId, TimelineId};
 use utils::lsn::Lsn;
+use utils::postgres_client::PostgresClientProtocol;
 use utils::{completion, serde_system_time};

 use crate::config::Ratio;
@@ -353,9 +354,6 @@ pub struct ShardImportProgressV1 {
    pub completed: usize,
    /// Hash of the plan
    pub import_plan_hash: u64,
-    /// Soft limit for the job size
-    /// This needs to remain constant throughout the import
-    pub job_soft_size_limit: usize,
 }

 impl ShardImportStatus {
@@ -404,8 +402,6 @@ pub enum TimelineCreateRequestMode {
        // using a flattened enum, so, it was an accepted field, and
        // we continue to accept it by having it here.
        pg_version: Option<u32>,
-        #[serde(default, skip_serializing_if = "std::ops::Not::not")]
-        read_only: bool,
    },
    ImportPgdata {
        import_pgdata: TimelineCreateRequestModeImportPgdata,
@@ -621,6 +617,8 @@ pub struct TenantConfigPatch {
    #[serde(skip_serializing_if = "FieldPatch::is_noop")]
    pub timeline_offloading: FieldPatch<bool>,
    #[serde(skip_serializing_if = "FieldPatch::is_noop")]
+    pub wal_receiver_protocol_override: FieldPatch<PostgresClientProtocol>,
+    #[serde(skip_serializing_if = "FieldPatch::is_noop")]
    pub rel_size_v2_enabled: FieldPatch<bool>,
    #[serde(skip_serializing_if = "FieldPatch::is_noop")]
    pub gc_compaction_enabled: FieldPatch<bool>,
@@ -745,6 +743,9 @@ pub struct TenantConfig {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub timeline_offloading: Option<bool>,

+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub wal_receiver_protocol_override: Option<PostgresClientProtocol>,
+
    #[serde(skip_serializing_if = "Option::is_none")]
    pub rel_size_v2_enabled: Option<bool>,

@@ -806,6 +807,7 @@ impl TenantConfig {
            mut lsn_lease_length,
            mut lsn_lease_length_for_ts,
            mut timeline_offloading,
+            mut wal_receiver_protocol_override,
            mut rel_size_v2_enabled,
            mut gc_compaction_enabled,
            mut gc_compaction_verification,
@@ -898,6 +900,9 @@ impl TenantConfig {
            .map(|v| humantime::parse_duration(&v))?
            .apply(&mut lsn_lease_length_for_ts);
        patch.timeline_offloading.apply(&mut timeline_offloading);
+        patch
+            .wal_receiver_protocol_override
+            .apply(&mut wal_receiver_protocol_override);
        patch.rel_size_v2_enabled.apply(&mut rel_size_v2_enabled);
        patch
            .gc_compaction_enabled
@@ -950,6 +955,7 @@ impl TenantConfig {
            lsn_lease_length,
            lsn_lease_length_for_ts,
            timeline_offloading,
+            wal_receiver_protocol_override,
            rel_size_v2_enabled,
            gc_compaction_enabled,
            gc_compaction_verification,
@@ -1047,6 +1053,9 @@ impl TenantConfig {
            timeline_offloading: self
                .timeline_offloading
                .unwrap_or(global_conf.timeline_offloading),
+            wal_receiver_protocol_override: self
+                .wal_receiver_protocol_override
+                .or(global_conf.wal_receiver_protocol_override),
            rel_size_v2_enabled: self
                .rel_size_v2_enabled
                .unwrap_or(global_conf.rel_size_v2_enabled),
@@ -1920,7 +1929,7 @@ pub enum PagestreamFeMessage {
 }

 // Wrapped in libpq CopyData
-#[derive(Debug, strum_macros::EnumProperty)]
+#[derive(strum_macros::EnumProperty)]
 pub enum PagestreamBeMessage {
    Exists(PagestreamExistsResponse),
    Nblocks(PagestreamNblocksResponse),
@@ -2031,7 +2040,7 @@ pub enum PagestreamProtocolVersion {

 pub type RequestId = u64;

-#[derive(Debug, Default, PartialEq, Eq, Clone, Copy)]
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
 pub struct PagestreamRequest {
    pub reqid: RequestId,
    pub request_lsn: Lsn,
@@ -2050,7 +2059,7 @@ pub struct PagestreamNblocksRequest {
    pub rel: RelTag,
 }

-#[derive(Debug, Default, PartialEq, Eq, Clone, Copy)]
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
 pub struct PagestreamGetPageRequest {
    pub hdr: PagestreamRequest,
    pub rel: RelTag,
--- a/libs/pageserver_api/src/reltag.rs
+++ b/libs/pageserver_api/src/reltag.rs
@@ -24,7 +24,7 @@ use serde::{Deserialize, Serialize};
 // FIXME: should move 'forknum' as last field to keep this consistent with Postgres.
 // Then we could replace the custom Ord and PartialOrd implementations below with
 // deriving them. This will require changes in walredoproc.c.
-#[derive(Debug, Default, PartialEq, Eq, Hash, Clone, Copy, Serialize, Deserialize)]
+#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Serialize, Deserialize)]
 pub struct RelTag {
    pub forknum: u8,
    pub spcnode: Oid,
@@ -184,12 +184,12 @@ pub enum SlruKind {
    MultiXactOffsets,
 }

-impl fmt::Display for SlruKind {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl SlruKind {
+    pub fn to_str(&self) -> &'static str {
        match self {
-            Self::Clog => write!(f, "pg_xact"),
-            Self::MultiXactMembers => write!(f, "pg_multixact/members"),
-            Self::MultiXactOffsets => write!(f, "pg_multixact/offsets"),
+            Self::Clog => "pg_xact",
+            Self::MultiXactMembers => "pg_multixact/members",
+            Self::MultiXactOffsets => "pg_multixact/offsets",
        }
    }
 }
--- a/libs/posthog_client_lite/Cargo.toml
+++ b/libs/posthog_client_lite/Cargo.toml
@@ -6,14 +6,9 @@ license.workspace = true

 [dependencies]
 anyhow.workspace = true
-arc-swap.workspace = true
 reqwest.workspace = true
-serde_json.workspace = true
 serde.workspace = true
+serde_json.workspace = true
 sha2.workspace = true
-thiserror.workspace = true
-tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time"] }
-tokio-util.workspace = true
-tracing-utils.workspace = true
-tracing.workspace = true
 workspace_hack.workspace = true
+thiserror.workspace = true
--- a/libs/posthog_client_lite/src/background_loop.rs
+++ b/libs/posthog_client_lite/src/background_loop.rs
@@ -1,87 +0,0 @@
-//! A background loop that fetches feature flags from PostHog and updates the feature store.
-
-use std::{sync::Arc, time::Duration};
-
-use arc_swap::ArcSwap;
-use tokio_util::sync::CancellationToken;
-use tracing::{Instrument, info_span};
-
-use crate::{CaptureEvent, FeatureStore, PostHogClient, PostHogClientConfig};
-
-/// A background loop that fetches feature flags from PostHog and updates the feature store.
-pub struct FeatureResolverBackgroundLoop {
-    posthog_client: PostHogClient,
-    feature_store: ArcSwap<FeatureStore>,
-    cancel: CancellationToken,
-}
-
-impl FeatureResolverBackgroundLoop {
-    pub fn new(config: PostHogClientConfig, shutdown_pageserver: CancellationToken) -> Self {
-        Self {
-            posthog_client: PostHogClient::new(config),
-            feature_store: ArcSwap::new(Arc::new(FeatureStore::new())),
-            cancel: shutdown_pageserver,
-        }
-    }
-
-    pub fn spawn(
-        self: Arc<Self>,
-        handle: &tokio::runtime::Handle,
-        refresh_period: Duration,
-        fake_tenants: Vec<CaptureEvent>,
-    ) {
-        let this = self.clone();
-        let cancel = self.cancel.clone();
-
-        // Main loop of updating the feature flags.
-        handle.spawn(
-            async move {
-                tracing::info!("Starting PostHog feature resolver");
-                let mut ticker = tokio::time::interval(refresh_period);
-                ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
-                loop {
-                    tokio::select! {
-                        _ = ticker.tick() => {}
-                        _ = cancel.cancelled() => break
-                    }
-                    let resp = match this
-                        .posthog_client
-                        .get_feature_flags_local_evaluation()
-                        .await
-                    {
-                        Ok(resp) => resp,
-                        Err(e) => {
-                            tracing::warn!("Cannot get feature flags: {}", e);
-                            continue;
-                        }
-                    };
-                    let feature_store = FeatureStore::new_with_flags(resp.flags);
-                    this.feature_store.store(Arc::new(feature_store));
-                    tracing::info!("Feature flag updated");
-                }
-                tracing::info!("PostHog feature resolver stopped");
-            }
-            .instrument(info_span!("posthog_feature_resolver")),
-        );
-
-        // Report fake tenants to PostHog so that we have the combination of all the properties in the UI.
-        // Do one report per pageserver restart.
-        let this = self.clone();
-        handle.spawn(
-            async move {
-                tracing::info!("Starting PostHog feature reporter");
-                for tenant in &fake_tenants {
-                    tracing::info!("Reporting fake tenant: {:?}", tenant);
-                }
-                if let Err(e) = this.posthog_client.capture_event_batch(&fake_tenants).await {
-                    tracing::warn!("Cannot report fake tenants: {}", e);
-                }
-            }
-            .instrument(info_span!("posthog_feature_reporter")),
-        );
-    }
-
-    pub fn feature_store(&self) -> Arc<FeatureStore> {
-        self.feature_store.load_full()
-    }
-}
--- a/libs/posthog_client_lite/src/lib.rs
+++ b/libs/posthog_client_lite/src/lib.rs
@@ -1,9 +1,5 @@
 //! A lite version of the PostHog client that only supports local evaluation of feature flags.

-mod background_loop;
-
-pub use background_loop::FeatureResolverBackgroundLoop;
-
 use std::collections::HashMap;

 use serde::{Deserialize, Serialize};
@@ -22,19 +18,10 @@ pub enum PostHogEvaluationError {
    Internal(String),
 }

-impl PostHogEvaluationError {
-    pub fn as_variant_str(&self) -> &'static str {
-        match self {
-            PostHogEvaluationError::NotAvailable(_) => "not_available",
-            PostHogEvaluationError::NoConditionGroupMatched => "no_condition_group_matched",
-            PostHogEvaluationError::Internal(_) => "internal",
-        }
-    }
-}
-
 #[derive(Deserialize)]
 pub struct LocalEvaluationResponse {
-    pub flags: Vec<LocalEvaluationFlag>,
+    #[allow(dead_code)]
+    flags: Vec<LocalEvaluationFlag>,
 }

 #[derive(Deserialize)]
@@ -47,7 +34,7 @@ pub struct LocalEvaluationFlag {
 #[derive(Deserialize)]
 pub struct LocalEvaluationFlagFilters {
    groups: Vec<LocalEvaluationFlagFilterGroup>,
-    multivariate: Option<LocalEvaluationFlagMultivariate>,
+    multivariate: LocalEvaluationFlagMultivariate,
 }

 #[derive(Deserialize)]
@@ -64,7 +51,7 @@ pub struct LocalEvaluationFlagFilterProperty {
    operator: String,
 }

-#[derive(Debug, Serialize, Deserialize, Clone)]
+#[derive(Debug, Serialize, Deserialize)]
 #[serde(untagged)]
 pub enum PostHogFlagFilterPropertyValue {
    String(String),
@@ -107,12 +94,6 @@ impl FeatureStore {
        }
    }

-    pub fn new_with_flags(flags: Vec<LocalEvaluationFlag>) -> Self {
-        let mut store = Self::new();
-        store.set_flags(flags);
-        store
-    }
-
    pub fn set_flags(&mut self, flags: Vec<LocalEvaluationFlag>) {
        self.flags.clear();
        for flag in flags {
@@ -264,7 +245,7 @@ impl FeatureStore {
        }
    }

-    /// Evaluate a multivariate feature flag. Returns an error if the flag is not available or if there are errors
+    /// Evaluate a multivariate feature flag. Returns `None` if the flag is not available or if there are errors
    /// during the evaluation.
    ///
    /// The parsing logic is as follows:
@@ -282,15 +263,10 @@ impl FeatureStore {
    /// Example: we have a multivariate flag with 3 groups of the configured global rollout percentage: A (10%), B (20%), C (70%).
    /// There is a single group with a condition that has a rollout percentage of 10% and it does not have a variant override.
    /// Then, we will have 1% of the users evaluated to A, 2% to B, and 7% to C.
-    ///
-    /// Error handling: the caller should inspect the error and decide the behavior when a feature flag
-    /// cannot be evaluated (i.e., default to false if it cannot be resolved). The error should *not* be
-    /// propagated beyond where the feature flag gets resolved.
    pub fn evaluate_multivariate(
        &self,
        flag_key: &str,
        user_id: &str,
-        properties: &HashMap<String, PostHogFlagFilterPropertyValue>,
    ) -> Result<String, PostHogEvaluationError> {
        let hash_on_global_rollout_percentage =
            Self::consistent_hash(user_id, flag_key, "multivariate");
@@ -300,39 +276,10 @@ impl FeatureStore {
            flag_key,
            hash_on_global_rollout_percentage,
            hash_on_group_rollout_percentage,
-            properties,
+            &HashMap::new(),
        )
    }

-    /// Evaluate a boolean feature flag. Returns  an error if the flag is not available or if there are errors
-    /// during the evaluation.
-    ///
-    /// The parsing logic is as follows:
-    ///
-    /// * Generate a consistent hash for the tenant-feature.
-    /// * Match each filter group.
-    ///   - If a group is matched, it will first determine whether the user is in the range of the rollout
-    ///     percentage.
-    ///   - If the hash falls within the group's rollout percentage, return true.
-    /// * Otherwise, continue with the next group until all groups are evaluated and no group is within the
-    ///   rollout percentage.
-    /// * If there are no matching groups, return an error.
-    ///
-    /// Returns `Ok(())` if the feature flag evaluates to true. In the future, it will return a payload.
-    ///
-    /// Error handling: the caller should inspect the error and decide the behavior when a feature flag
-    /// cannot be evaluated (i.e., default to false if it cannot be resolved). The error should *not* be
-    /// propagated beyond where the feature flag gets resolved.
-    pub fn evaluate_boolean(
-        &self,
-        flag_key: &str,
-        user_id: &str,
-        properties: &HashMap<String, PostHogFlagFilterPropertyValue>,
-    ) -> Result<(), PostHogEvaluationError> {
-        let hash_on_global_rollout_percentage = Self::consistent_hash(user_id, flag_key, "boolean");
-        self.evaluate_boolean_inner(flag_key, hash_on_global_rollout_percentage, properties)
-    }
-
    /// Evaluate a multivariate feature flag. Note that we directly take the mapped user ID
    /// (a consistent hash ranging from 0 to 1) so that it is easier to use it in the tests
    /// and avoid duplicate computations.
@@ -359,11 +306,6 @@ impl FeatureStore {
                    flag_key
                )));
            }
-            let Some(ref multivariate) = flag_config.filters.multivariate else {
-                return Err(PostHogEvaluationError::Internal(format!(
-                    "No multivariate available, should use evaluate_boolean?: {flag_key}"
-                )));
-            };
            // TODO: sort the groups so that variant overrides always get evaluated first and it follows the PostHog
            // Python SDK behavior; for now we do not configure conditions without variant overrides in Neon so it
            // does not matter.
@@ -372,7 +314,7 @@ impl FeatureStore {
                    GroupEvaluationResult::MatchedAndOverride(variant) => return Ok(variant),
                    GroupEvaluationResult::MatchedAndEvaluate => {
                        let mut percentage = 0;
-                        for variant in &multivariate.variants {
+                        for variant in &flag_config.filters.multivariate.variants {
                            percentage += variant.rollout_percentage;
                            if self
                                .evaluate_percentage(hash_on_global_rollout_percentage, percentage)
@@ -400,89 +342,6 @@ impl FeatureStore {
            )))
        }
    }
-
-    /// Evaluate a multivariate feature flag. Note that we directly take the mapped user ID
-    /// (a consistent hash ranging from 0 to 1) so that it is easier to use it in the tests
-    /// and avoid duplicate computations.
-    ///
-    /// Use a different consistent hash for evaluating the group rollout percentage.
-    /// The behavior: if the condition is set to rolling out to 10% of the users, and
-    /// we set the variant A to 20% in the global config, then 2% of the total users will
-    /// be evaluated to variant A.
-    ///
-    /// Note that the hash to determine group rollout percentage is shared across all groups. So if we have two
-    /// exactly-the-same conditions with 10% and 20% rollout percentage respectively, a total of 20% of the users
-    /// will be evaluated (versus 30% if group evaluation is done independently).
-    pub(crate) fn evaluate_boolean_inner(
-        &self,
-        flag_key: &str,
-        hash_on_global_rollout_percentage: f64,
-        properties: &HashMap<String, PostHogFlagFilterPropertyValue>,
-    ) -> Result<(), PostHogEvaluationError> {
-        if let Some(flag_config) = self.flags.get(flag_key) {
-            if !flag_config.active {
-                return Err(PostHogEvaluationError::NotAvailable(format!(
-                    "The feature flag is not active: {}",
-                    flag_key
-                )));
-            }
-            if flag_config.filters.multivariate.is_some() {
-                return Err(PostHogEvaluationError::Internal(format!(
-                    "This looks like a multivariate flag, should use evaluate_multivariate?: {flag_key}"
-                )));
-            };
-            // TODO: sort the groups so that variant overrides always get evaluated first and it follows the PostHog
-            // Python SDK behavior; for now we do not configure conditions without variant overrides in Neon so it
-            // does not matter.
-            for group in &flag_config.filters.groups {
-                match self.evaluate_group(group, hash_on_global_rollout_percentage, properties)? {
-                    GroupEvaluationResult::MatchedAndOverride(_) => {
-                        return Err(PostHogEvaluationError::Internal(format!(
-                            "Boolean flag cannot have overrides: {}",
-                            flag_key
-                        )));
-                    }
-                    GroupEvaluationResult::MatchedAndEvaluate => {
-                        return Ok(());
-                    }
-                    GroupEvaluationResult::Unmatched => continue,
-                }
-            }
-            // If no group is matched, the feature is not available, and up to the caller to decide what to do.
-            Err(PostHogEvaluationError::NoConditionGroupMatched)
-        } else {
-            // The feature flag is not available yet
-            Err(PostHogEvaluationError::NotAvailable(format!(
-                "Not found in the local evaluation spec: {}",
-                flag_key
-            )))
-        }
-    }
-
-    /// Infer whether a feature flag is a boolean flag by checking if it has a multivariate filter.
-    pub fn is_feature_flag_boolean(&self, flag_key: &str) -> Result<bool, PostHogEvaluationError> {
-        if let Some(flag_config) = self.flags.get(flag_key) {
-            Ok(flag_config.filters.multivariate.is_none())
-        } else {
-            Err(PostHogEvaluationError::NotAvailable(format!(
-                "Not found in the local evaluation spec: {}",
-                flag_key
-            )))
-        }
-    }
-}
-
-pub struct PostHogClientConfig {
-    /// The server API key.
-    pub server_api_key: String,
-    /// The client API key.
-    pub client_api_key: String,
-    /// The project ID.
-    pub project_id: String,
-    /// The private API URL.
-    pub private_api_url: String,
-    /// The public API URL.
-    pub public_api_url: String,
 }

 /// A lite PostHog client.
@@ -501,23 +360,37 @@ pub struct PostHogClientConfig {
 /// want to report the feature flag usage back to PostHog. The current plan is to use PostHog only as an UI to
 /// configure feature flags so it is very likely that the client API will not be used.
 pub struct PostHogClient {
-    /// The config.
-    config: PostHogClientConfig,
+    /// The server API key.
+    server_api_key: String,
+    /// The client API key.
+    client_api_key: String,
+    /// The project ID.
+    project_id: String,
+    /// The private API URL.
+    private_api_url: String,
+    /// The public API URL.
+    public_api_url: String,
    /// The HTTP client.
    client: reqwest::Client,
 }

-#[derive(Serialize, Debug)]
-pub struct CaptureEvent {
-    pub event: String,
-    pub distinct_id: String,
-    pub properties: serde_json::Value,
-}
-
 impl PostHogClient {
-    pub fn new(config: PostHogClientConfig) -> Self {
+    pub fn new(
+        server_api_key: String,
+        client_api_key: String,
+        project_id: String,
+        private_api_url: String,
+        public_api_url: String,
+    ) -> Self {
        let client = reqwest::Client::new();
-        Self { config, client }
+        Self {
+            server_api_key,
+            client_api_key,
+            project_id,
+            private_api_url,
+            public_api_url,
+            client,
+        }
    }

    pub fn new_with_us_region(
@@ -525,13 +398,13 @@ impl PostHogClient {
        client_api_key: String,
        project_id: String,
    ) -> Self {
-        Self::new(PostHogClientConfig {
+        Self::new(
            server_api_key,
            client_api_key,
            project_id,
-            private_api_url: "https://us.posthog.com".to_string(),
-            public_api_url: "https://us.i.posthog.com".to_string(),
-        })
+            "https://us.posthog.com".to_string(),
+            "https://us.i.posthog.com".to_string(),
+        )
    }

    /// Fetch the feature flag specs from the server.
@@ -549,23 +422,15 @@ impl PostHogClient {
        // with bearer token of self.server_api_key
        let url = format!(
            "{}/api/projects/{}/feature_flags/local_evaluation",
-            self.config.private_api_url, self.config.project_id
+            self.private_api_url, self.project_id
        );
        let response = self
            .client
            .get(url)
-            .bearer_auth(&self.config.server_api_key)
+            .bearer_auth(&self.server_api_key)
            .send()
            .await?;
-        let status = response.status();
        let body = response.text().await?;
-        if !status.is_success() {
-            return Err(anyhow::anyhow!(
-                "Failed to get feature flags: {}, {}",
-                status,
-                body
-            ));
-        }
        Ok(serde_json::from_str(&body)?)
    }

@@ -577,54 +442,21 @@ impl PostHogClient {
        &self,
        event: &str,
        distinct_id: &str,
-        properties: &serde_json::Value,
+        properties: &HashMap<String, PostHogFlagFilterPropertyValue>,
    ) -> anyhow::Result<()> {
        // PUBLIC_URL/capture/
-        let url = format!("{}/capture/", self.config.public_api_url);
-        let response = self
-            .client
+        // with bearer token of self.client_api_key
+        let url = format!("{}/capture/", self.public_api_url);
+        self.client
            .post(url)
            .body(serde_json::to_string(&json!({
-                "api_key": self.config.client_api_key,
+                "api_key": self.client_api_key,
                "distinct_id": distinct_id,
                "event": event,
                "properties": properties,
            }))?)
            .send()
            .await?;
-        let status = response.status();
-        let body = response.text().await?;
-        if !status.is_success() {
-            return Err(anyhow::anyhow!(
-                "Failed to capture events: {}, {}",
-                status,
-                body
-            ));
-        }
-        Ok(())
-    }
-
-    pub async fn capture_event_batch(&self, events: &[CaptureEvent]) -> anyhow::Result<()> {
-        // PUBLIC_URL/batch/
-        let url = format!("{}/batch/", self.config.public_api_url);
-        let response = self
-            .client
-            .post(url)
-            .body(serde_json::to_string(&json!({
-                "api_key": self.config.client_api_key,
-                "batch": events,
-            }))?)
-            .send()
-            .await?;
-        let status = response.status();
-        let body = response.text().await?;
-        if !status.is_success() {
-            return Err(anyhow::anyhow!(
-                "Failed to capture events: {}, {}",
-                status,
-                body
-            ));
-        }
        Ok(())
    }
 }
@@ -635,162 +467,95 @@ mod tests {

    fn data() -> &'static str {
        r#"{
-  "flags": [
-    {
-      "id": 141807,
-      "team_id": 152860,
-      "name": "",
-      "key": "image-compaction-boundary",
-      "filters": {
-        "groups": [
-          {
-            "variant": null,
-            "properties": [
-              {
-                "key": "plan_type",
-                "type": "person",
-                "value": [
-                  "free"
-                ],
-                "operator": "exact"
-              }
+            "flags": [
+                {
+                    "id": 132794,
+                    "team_id": 152860,
+                    "name": "",
+                    "key": "gc-compaction",
+                    "filters": {
+                        "groups": [
+                            {
+                                "variant": "enabled-stage-2",
+                                "properties": [
+                                    {
+                                        "key": "plan_type",
+                                        "type": "person",
+                                        "value": [
+                                            "free"
+                                        ],
+                                        "operator": "exact"
+                                    },
+                                    {
+                                        "key": "pageserver_remote_size",
+                                        "type": "person",
+                                        "value": "10000000",
+                                        "operator": "lt"
+                                    }
+                                ],
+                                "rollout_percentage": 50
+                            },
+                            {
+                                "properties": [
+                                    {
+                                        "key": "plan_type",
+                                        "type": "person",
+                                        "value": [
+                                            "free"
+                                        ],
+                                        "operator": "exact"
+                                    },
+                                    {
+                                        "key": "pageserver_remote_size",
+                                        "type": "person",
+                                        "value": "10000000",
+                                        "operator": "lt"
+                                    }
+                                ],
+                                "rollout_percentage": 80
+                            }
+                        ],
+                        "payloads": {},
+                        "multivariate": {
+                            "variants": [
+                                {
+                                    "key": "disabled",
+                                    "name": "",
+                                    "rollout_percentage": 90
+                                },
+                                {
+                                    "key": "enabled-stage-1",
+                                    "name": "",
+                                    "rollout_percentage": 10
+                                },
+                                {
+                                    "key": "enabled-stage-2",
+                                    "name": "",
+                                    "rollout_percentage": 0
+                                },
+                                {
+                                    "key": "enabled-stage-3",
+                                    "name": "",
+                                    "rollout_percentage": 0
+                                },
+                                {
+                                    "key": "enabled",
+                                    "name": "",
+                                    "rollout_percentage": 0
+                                }
+                            ]
+                        }
+                    },
+                    "deleted": false,
+                    "active": true,
+                    "ensure_experience_continuity": false,
+                    "has_encrypted_payloads": false,
+                    "version": 6
+                }
            ],
-            "rollout_percentage": 40
-          },
-          {
-            "variant": null,
-            "properties": [],
-            "rollout_percentage": 10
-          }
-        ],
-        "payloads": {},
-        "multivariate": null
-      },
-      "deleted": false,
-      "active": true,
-      "ensure_experience_continuity": false,
-      "has_encrypted_payloads": false,
-      "version": 1
-    },
-    {
-      "id": 135586,
-      "team_id": 152860,
-      "name": "",
-      "key": "boolean-flag",
-      "filters": {
-        "groups": [
-          {
-            "variant": null,
-            "properties": [
-              {
-                "key": "plan_type",
-                "type": "person",
-                "value": [
-                  "free"
-                ],
-                "operator": "exact"
-              }
-            ],
-            "rollout_percentage": 47
-          }
-        ],
-        "payloads": {},
-        "multivariate": null
-      },
-      "deleted": false,
-      "active": true,
-      "ensure_experience_continuity": false,
-      "has_encrypted_payloads": false,
-      "version": 1
-    },
-    {
-      "id": 132794,
-      "team_id": 152860,
-      "name": "",
-      "key": "gc-compaction",
-      "filters": {
-        "groups": [
-          {
-            "variant": "enabled-stage-2",
-            "properties": [
-              {
-                "key": "plan_type",
-                "type": "person",
-                "value": [
-                  "free"
-                ],
-                "operator": "exact"
-              },
-              {
-                "key": "pageserver_remote_size",
-                "type": "person",
-                "value": "10000000",
-                "operator": "lt"
-              }
-            ],
-             "rollout_percentage": 50
-          },
-          {
-            "properties": [
-              {
-                "key": "plan_type",
-                "type": "person",
-                "value": [
-                  "free"
-                ],
-                "operator": "exact"
-              },
-              {
-                "key": "pageserver_remote_size",
-                "type": "person",
-                "value": "10000000",
-                "operator": "lt"
-              }
-            ],
-            "rollout_percentage": 80
-          }
-        ],
-        "payloads": {},
-        "multivariate": {
-          "variants": [
-            {
-              "key": "disabled",
-              "name": "",
-              "rollout_percentage": 90
-            },
-            {
-              "key": "enabled-stage-1",
-              "name": "",
-              "rollout_percentage": 10
-            },
-            {
-              "key": "enabled-stage-2",
-              "name": "",
-              "rollout_percentage": 0
-            },
-            {
-              "key": "enabled-stage-3",
-              "name": "",
-              "rollout_percentage": 0
-            },
-            {
-              "key": "enabled",
-              "name": "",
-              "rollout_percentage": 0
-            }
-          ]
-        }
-      },
-      "deleted": false,
-      "active": true,
-      "ensure_experience_continuity": false,
-      "has_encrypted_payloads": false,
-      "version": 7
-    }
-  ],
-  "group_type_mapping": {},
-  "cohorts": {}
-}"#
+            "group_type_mapping": {},
+            "cohorts": {}
+        }"#
    }

    #[test]
@@ -866,125 +631,4 @@ mod tests {
            Err(PostHogEvaluationError::NoConditionGroupMatched)
        ),);
    }
-
-    #[test]
-    fn evaluate_boolean_1() {
-        // The `boolean-flag` feature flag only has one group that matches on the free user.
-
-        let mut store = FeatureStore::new();
-        let response: LocalEvaluationResponse = serde_json::from_str(data()).unwrap();
-        store.set_flags(response.flags);
-
-        // This lacks the required properties and cannot be evaluated.
-        let variant = store.evaluate_boolean_inner("boolean-flag", 1.00, &HashMap::new());
-        assert!(matches!(
-            variant,
-            Err(PostHogEvaluationError::NotAvailable(_))
-        ),);
-
-        let properties_unmatched = HashMap::from([
-            (
-                "plan_type".to_string(),
-                PostHogFlagFilterPropertyValue::String("paid".to_string()),
-            ),
-            (
-                "pageserver_remote_size".to_string(),
-                PostHogFlagFilterPropertyValue::Number(1000.0),
-            ),
-        ]);
-
-        // This does not match any group so there will be an error.
-        let variant = store.evaluate_boolean_inner("boolean-flag", 1.00, &properties_unmatched);
-        assert!(matches!(
-            variant,
-            Err(PostHogEvaluationError::NoConditionGroupMatched)
-        ),);
-
-        let properties = HashMap::from([
-            (
-                "plan_type".to_string(),
-                PostHogFlagFilterPropertyValue::String("free".to_string()),
-            ),
-            (
-                "pageserver_remote_size".to_string(),
-                PostHogFlagFilterPropertyValue::Number(1000.0),
-            ),
-        ]);
-
-        // It matches the first group as 0.10 <= 0.50 and the properties are matched. Then it gets evaluated to the variant override.
-        let variant = store.evaluate_boolean_inner("boolean-flag", 0.10, &properties);
-        assert!(variant.is_ok());
-
-        // It matches the group conditions but not the group rollout percentage.
-        let variant = store.evaluate_boolean_inner("boolean-flag", 1.00, &properties);
-        assert!(matches!(
-            variant,
-            Err(PostHogEvaluationError::NoConditionGroupMatched)
-        ),);
-    }
-
-    #[test]
-    fn evaluate_boolean_2() {
-        // The `image-compaction-boundary` feature flag has one group that matches on the free user and a group that matches on all users.
-
-        let mut store = FeatureStore::new();
-        let response: LocalEvaluationResponse = serde_json::from_str(data()).unwrap();
-        store.set_flags(response.flags);
-
-        // This lacks the required properties and cannot be evaluated.
-        let variant =
-            store.evaluate_boolean_inner("image-compaction-boundary", 1.00, &HashMap::new());
-        assert!(matches!(
-            variant,
-            Err(PostHogEvaluationError::NotAvailable(_))
-        ),);
-
-        let properties_unmatched = HashMap::from([
-            (
-                "plan_type".to_string(),
-                PostHogFlagFilterPropertyValue::String("paid".to_string()),
-            ),
-            (
-                "pageserver_remote_size".to_string(),
-                PostHogFlagFilterPropertyValue::Number(1000.0),
-            ),
-        ]);
-
-        // This does not match the filtered group but the all user group.
-        let variant =
-            store.evaluate_boolean_inner("image-compaction-boundary", 1.00, &properties_unmatched);
-        assert!(matches!(
-            variant,
-            Err(PostHogEvaluationError::NoConditionGroupMatched)
-        ),);
-        let variant =
-            store.evaluate_boolean_inner("image-compaction-boundary", 0.05, &properties_unmatched);
-        assert!(variant.is_ok());
-
-        let properties = HashMap::from([
-            (
-                "plan_type".to_string(),
-                PostHogFlagFilterPropertyValue::String("free".to_string()),
-            ),
-            (
-                "pageserver_remote_size".to_string(),
-                PostHogFlagFilterPropertyValue::Number(1000.0),
-            ),
-        ]);
-
-        // It matches the first group as 0.30 <= 0.40 and the properties are matched. Then it gets evaluated to the variant override.
-        let variant = store.evaluate_boolean_inner("image-compaction-boundary", 0.30, &properties);
-        assert!(variant.is_ok());
-
-        // It matches the group conditions but not the group rollout percentage.
-        let variant = store.evaluate_boolean_inner("image-compaction-boundary", 1.00, &properties);
-        assert!(matches!(
-            variant,
-            Err(PostHogEvaluationError::NoConditionGroupMatched)
-        ),);
-
-        // It matches the second "all" group conditions.
-        let variant = store.evaluate_boolean_inner("image-compaction-boundary", 0.09, &properties);
-        assert!(variant.is_ok());
-    }
 }
--- a/libs/proxy/postgres-protocol2/src/message/frontend.rs
+++ b/libs/proxy/postgres-protocol2/src/message/frontend.rs
@@ -25,7 +25,6 @@ where
    Ok(())
 }

-#[derive(Debug)]
 pub enum BindError {
    Conversion(Box<dyn Error + marker::Sync + Send>),
    Serialization(io::Error),
@@ -289,12 +288,6 @@ pub fn sync(buf: &mut BytesMut) {
    write_body(buf, |_| Ok::<(), io::Error>(())).unwrap();
 }

-#[inline]
-pub fn flush(buf: &mut BytesMut) {
-    buf.put_u8(b'H');
-    write_body(buf, |_| Ok::<(), io::Error>(())).unwrap();
-}
-
 #[inline]
 pub fn terminate(buf: &mut BytesMut) {
    buf.put_u8(b'X');
--- a/libs/proxy/postgres-types2/src/lib.rs
+++ b/libs/proxy/postgres-types2/src/lib.rs
@@ -9,6 +9,7 @@ use std::error::Error;
 use std::fmt;
 use std::sync::Arc;

+use bytes::BytesMut;
 use fallible_iterator::FallibleIterator;
 #[doc(inline)]
 pub use postgres_protocol2::Oid;
@@ -26,6 +27,41 @@ macro_rules! accepts {
    )
 }

+/// Generates an implementation of `ToSql::to_sql_checked`.
+///
+/// All `ToSql` implementations should use this macro.
+macro_rules! to_sql_checked {
+    () => {
+        fn to_sql_checked(
+            &self,
+            ty: &$crate::Type,
+            out: &mut $crate::private::BytesMut,
+        ) -> ::std::result::Result<
+            $crate::IsNull,
+            Box<dyn ::std::error::Error + ::std::marker::Sync + ::std::marker::Send>,
+        > {
+            $crate::__to_sql_checked(self, ty, out)
+        }
+    };
+}
+
+// WARNING: this function is not considered part of this crate's public API.
+// It is subject to change at any time.
+#[doc(hidden)]
+pub fn __to_sql_checked<T>(
+    v: &T,
+    ty: &Type,
+    out: &mut BytesMut,
+) -> Result<IsNull, Box<dyn Error + Sync + Send>>
+where
+    T: ToSql,
+{
+    if !T::accepts(ty) {
+        return Err(Box::new(WrongType::new::<T>(ty.clone())));
+    }
+    v.to_sql(ty, out)
+}
+
 // mod pg_lsn;
 #[doc(hidden)]
 pub mod private;
@@ -106,7 +142,7 @@ pub enum Kind {
    /// An array type along with the type of its elements.
    Array(Type),
    /// A range type along with the type of its elements.
-    Range(Oid),
+    Range(Type),
    /// A multirange type along with the type of its elements.
    Multirange(Type),
    /// A domain type along with its underlying type.
@@ -341,6 +377,43 @@ pub enum IsNull {
    No,
 }

+/// A trait for types that can be converted into Postgres values.
+pub trait ToSql: fmt::Debug {
+    /// Converts the value of `self` into the binary format of the specified
+    /// Postgres `Type`, appending it to `out`.
+    ///
+    /// The caller of this method is responsible for ensuring that this type
+    /// is compatible with the Postgres `Type`.
+    ///
+    /// The return value indicates if this value should be represented as
+    /// `NULL`. If this is the case, implementations **must not** write
+    /// anything to `out`.
+    fn to_sql(&self, ty: &Type, out: &mut BytesMut) -> Result<IsNull, Box<dyn Error + Sync + Send>>
+    where
+        Self: Sized;
+
+    /// Determines if a value of this type can be converted to the specified
+    /// Postgres `Type`.
+    fn accepts(ty: &Type) -> bool
+    where
+        Self: Sized;
+
+    /// An adaptor method used internally by Rust-Postgres.
+    ///
+    /// *All* implementations of this method should be generated by the
+    /// `to_sql_checked!()` macro.
+    fn to_sql_checked(
+        &self,
+        ty: &Type,
+        out: &mut BytesMut,
+    ) -> Result<IsNull, Box<dyn Error + Sync + Send>>;
+
+    /// Specify the encode format
+    fn encode_format(&self, _ty: &Type) -> Format {
+        Format::Binary
+    }
+}
+
 /// Supported Postgres message format types
 ///
 /// Using Text format in a message assumes a Postgres `SERVER_ENCODING` of `UTF8`
@@ -351,3 +424,52 @@ pub enum Format {
    /// Compact, typed binary format
    Binary,
 }
+
+impl ToSql for &str {
+    fn to_sql(&self, ty: &Type, w: &mut BytesMut) -> Result<IsNull, Box<dyn Error + Sync + Send>> {
+        match *ty {
+            ref ty if ty.name() == "ltree" => types::ltree_to_sql(self, w),
+            ref ty if ty.name() == "lquery" => types::lquery_to_sql(self, w),
+            ref ty if ty.name() == "ltxtquery" => types::ltxtquery_to_sql(self, w),
+            _ => types::text_to_sql(self, w),
+        }
+        Ok(IsNull::No)
+    }
+
+    fn accepts(ty: &Type) -> bool {
+        match *ty {
+            Type::VARCHAR | Type::TEXT | Type::BPCHAR | Type::NAME | Type::UNKNOWN => true,
+            ref ty
+                if (ty.name() == "citext"
+                    || ty.name() == "ltree"
+                    || ty.name() == "lquery"
+                    || ty.name() == "ltxtquery") =>
+            {
+                true
+            }
+            _ => false,
+        }
+    }
+
+    to_sql_checked!();
+}
+
+macro_rules! simple_to {
+    ($t:ty, $f:ident, $($expected:ident),+) => {
+        impl ToSql for $t {
+            fn to_sql(&self,
+                      _: &Type,
+                      w: &mut BytesMut)
+                      -> Result<IsNull, Box<dyn Error + Sync + Send>> {
+                types::$f(*self, w);
+                Ok(IsNull::No)
+            }
+
+            accepts!($($expected),+);
+
+            to_sql_checked!();
+        }
+    }
+}
+
+simple_to!(u32, oid_to_sql, OID);
--- a/libs/proxy/postgres-types2/src/type_gen.rs
+++ b/libs/proxy/postgres-types2/src/type_gen.rs
@@ -393,7 +393,7 @@ impl Inner {
        }
    }

-    pub const fn const_oid(&self) -> Oid {
+    pub fn oid(&self) -> Oid {
        match *self {
            Inner::Bool => 16,
            Inner::Bytea => 17,
@@ -580,14 +580,7 @@ impl Inner {
            Inner::TstzmultiRangeArray => 6153,
            Inner::DatemultiRangeArray => 6155,
            Inner::Int8multiRangeArray => 6157,
-            Inner::Other(_) => u32::MAX,
-        }
-    }
-
-    pub fn oid(&self) -> Oid {
-        match *self {
            Inner::Other(ref u) => u.oid,
-            _ => self.const_oid(),
        }
    }

@@ -734,17 +727,17 @@ impl Inner {
            Inner::JsonbArray => &Kind::Array(Type(Inner::Jsonb)),
            Inner::AnyRange => &Kind::Pseudo,
            Inner::EventTrigger => &Kind::Pseudo,
-            Inner::Int4Range => &const { Kind::Range(Inner::Int4.const_oid()) },
+            Inner::Int4Range => &Kind::Range(Type(Inner::Int4)),
            Inner::Int4RangeArray => &Kind::Array(Type(Inner::Int4Range)),
-            Inner::NumRange => &const { Kind::Range(Inner::Numeric.const_oid()) },
+            Inner::NumRange => &Kind::Range(Type(Inner::Numeric)),
            Inner::NumRangeArray => &Kind::Array(Type(Inner::NumRange)),
-            Inner::TsRange => &const { Kind::Range(Inner::Timestamp.const_oid()) },
+            Inner::TsRange => &Kind::Range(Type(Inner::Timestamp)),
            Inner::TsRangeArray => &Kind::Array(Type(Inner::TsRange)),
-            Inner::TstzRange => &const { Kind::Range(Inner::Timestamptz.const_oid()) },
+            Inner::TstzRange => &Kind::Range(Type(Inner::Timestamptz)),
            Inner::TstzRangeArray => &Kind::Array(Type(Inner::TstzRange)),
-            Inner::DateRange => &const { Kind::Range(Inner::Date.const_oid()) },
+            Inner::DateRange => &Kind::Range(Type(Inner::Date)),
            Inner::DateRangeArray => &Kind::Array(Type(Inner::DateRange)),
-            Inner::Int8Range => &const { Kind::Range(Inner::Int8.const_oid()) },
+            Inner::Int8Range => &Kind::Range(Type(Inner::Int8)),
            Inner::Int8RangeArray => &Kind::Array(Type(Inner::Int8Range)),
            Inner::Jsonpath => &Kind::Simple,
            Inner::JsonpathArray => &Kind::Array(Type(Inner::Jsonpath)),
--- a/libs/proxy/tokio-postgres2/src/client.rs
+++ b/libs/proxy/tokio-postgres2/src/client.rs
@@ -1,12 +1,14 @@
 use std::collections::HashMap;
 use std::fmt;
 use std::net::IpAddr;
+use std::sync::Arc;
 use std::task::{Context, Poll};
 use std::time::Duration;

 use bytes::BytesMut;
 use fallible_iterator::FallibleIterator;
 use futures_util::{TryStreamExt, future, ready};
+use parking_lot::Mutex;
 use postgres_protocol2::message::backend::Message;
 use postgres_protocol2::message::frontend;
 use serde::{Deserialize, Serialize};
@@ -14,52 +16,29 @@ use tokio::sync::mpsc;

 use crate::codec::{BackendMessages, FrontendMessage};
 use crate::config::{Host, SslMode};
+use crate::connection::{Request, RequestMessages};
 use crate::query::RowStream;
 use crate::simple_query::SimpleQueryStream;
 use crate::types::{Oid, Type};
 use crate::{
-    CancelToken, Error, ReadyForQueryStatus, SimpleQueryMessage, Transaction, TransactionBuilder,
-    query, simple_query,
+    CancelToken, Error, ReadyForQueryStatus, SimpleQueryMessage, Statement, Transaction,
+    TransactionBuilder, query, simple_query,
 };

 pub struct Responses {
-    /// new messages from conn
    receiver: mpsc::Receiver<BackendMessages>,
-    /// current batch of messages
    cur: BackendMessages,
-    /// number of total queries sent.
-    waiting: usize,
-    /// number of ReadyForQuery messages received.
-    received: usize,
 }

 impl Responses {
    pub fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll<Result<Message, Error>> {
        loop {
-            // get the next saved message
-            if let Some(message) = self.cur.next().map_err(Error::parse)? {
-                let received = self.received;
-
-                // increase the query head if this is the last message.
-                if let Message::ReadyForQuery(_) = message {
-                    self.received += 1;
-                }
-
-                // check if the client has skipped this query.
-                if received + 1 < self.waiting {
-                    // grab the next message.
-                    continue;
-                }
-
-                // convenience: turn the error messaage into a proper error.
-                let res = match message {
-                    Message::ErrorResponse(body) => Err(Error::db(body)),
-                    message => Ok(message),
-                };
-                return Poll::Ready(res);
+            match self.cur.next().map_err(Error::parse)? {
+                Some(Message::ErrorResponse(body)) => return Poll::Ready(Err(Error::db(body))),
+                Some(message) => return Poll::Ready(Ok(message)),
+                None => {}
            }

-            // get the next batch of messages.
            match ready!(self.receiver.poll_recv(cx)) {
                Some(messages) => self.cur = messages,
                None => return Poll::Ready(Err(Error::closed())),
@@ -76,87 +55,44 @@ impl Responses {
 /// (corresponding to the queries in the [crate::prepare] module).
 #[derive(Default)]
 pub(crate) struct CachedTypeInfo {
+    /// A statement for basic information for a type from its
+    /// OID. Corresponds to [TYPEINFO_QUERY](crate::prepare::TYPEINFO_QUERY) (or its
+    /// fallback).
+    pub(crate) typeinfo: Option<Statement>,
+
    /// Cache of types already looked up.
    pub(crate) types: HashMap<Oid, Type>,
 }

 pub struct InnerClient {
-    sender: mpsc::UnboundedSender<FrontendMessage>,
-    responses: Responses,
+    sender: mpsc::UnboundedSender<Request>,

    /// A buffer to use when writing out postgres commands.
-    buffer: BytesMut,
+    buffer: Mutex<BytesMut>,
 }

 impl InnerClient {
-    pub fn start(&mut self) -> Result<PartialQuery, Error> {
-        self.responses.waiting += 1;
-        Ok(PartialQuery(Some(self)))
+    pub fn send(&self, messages: RequestMessages) -> Result<Responses, Error> {
+        let (sender, receiver) = mpsc::channel(1);
+        let request = Request { messages, sender };
+        self.sender.send(request).map_err(|_| Error::closed())?;
+
+        Ok(Responses {
+            receiver,
+            cur: BackendMessages::empty(),
+        })
    }

-    // pub fn send_with_sync<F>(&mut self, f: F) -> Result<&mut Responses, Error>
-    // where
-    //     F: FnOnce(&mut BytesMut) -> Result<(), Error>,
-    // {
-    //     self.start()?.send_with_sync(f)
-    // }
-
-    pub fn send_simple_query(&mut self, query: &str) -> Result<&mut Responses, Error> {
-        self.responses.waiting += 1;
-
-        self.buffer.clear();
-        // simple queries do not need sync.
-        frontend::query(query, &mut self.buffer).map_err(Error::encode)?;
-        let buf = self.buffer.split().freeze();
-        self.send_message(FrontendMessage::Raw(buf))
-    }
-
-    fn send_message(&mut self, messages: FrontendMessage) -> Result<&mut Responses, Error> {
-        self.sender.send(messages).map_err(|_| Error::closed())?;
-        Ok(&mut self.responses)
-    }
-}
-
-pub struct PartialQuery<'a>(Option<&'a mut InnerClient>);
-
-impl Drop for PartialQuery<'_> {
-    fn drop(&mut self) {
-        if let Some(client) = self.0.take() {
-            client.buffer.clear();
-            frontend::sync(&mut client.buffer);
-            let buf = client.buffer.split().freeze();
-            let _ = client.send_message(FrontendMessage::Raw(buf));
-        }
-    }
-}
-
-impl<'a> PartialQuery<'a> {
-    pub fn send_with_flush<F>(&mut self, f: F) -> Result<&mut Responses, Error>
+    /// Call the given function with a buffer to be used when writing out
+    /// postgres commands.
+    pub fn with_buf<F, R>(&self, f: F) -> R
    where
-        F: FnOnce(&mut BytesMut) -> Result<(), Error>,
+        F: FnOnce(&mut BytesMut) -> R,
    {
-        let client = self.0.as_deref_mut().unwrap();
-
-        client.buffer.clear();
-        f(&mut client.buffer)?;
-        frontend::flush(&mut client.buffer);
-        let buf = client.buffer.split().freeze();
-        client.send_message(FrontendMessage::Raw(buf))
-    }
-
-    pub fn send_with_sync<F>(mut self, f: F) -> Result<&'a mut Responses, Error>
-    where
-        F: FnOnce(&mut BytesMut) -> Result<(), Error>,
-    {
-        let client = self.0.as_deref_mut().unwrap();
-
-        client.buffer.clear();
-        f(&mut client.buffer)?;
-        frontend::sync(&mut client.buffer);
-        let buf = client.buffer.split().freeze();
-        let _ = client.send_message(FrontendMessage::Raw(buf));
-
-        Ok(&mut self.0.take().unwrap().responses)
+        let mut buffer = self.buffer.lock();
+        let r = f(&mut buffer);
+        buffer.clear();
+        r
    }
 }

@@ -173,7 +109,7 @@ pub struct SocketConfig {
 /// The client is one half of what is returned when a connection is established. Users interact with the database
 /// through this client object.
 pub struct Client {
-    inner: InnerClient,
+    inner: Arc<InnerClient>,
    cached_typeinfo: CachedTypeInfo,

    socket_config: SocketConfig,
@@ -184,24 +120,17 @@ pub struct Client {

 impl Client {
    pub(crate) fn new(
-        sender: mpsc::UnboundedSender<FrontendMessage>,
-        receiver: mpsc::Receiver<BackendMessages>,
+        sender: mpsc::UnboundedSender<Request>,
        socket_config: SocketConfig,
        ssl_mode: SslMode,
        process_id: i32,
        secret_key: i32,
    ) -> Client {
        Client {
-            inner: InnerClient {
+            inner: Arc::new(InnerClient {
                sender,
-                responses: Responses {
-                    receiver,
-                    cur: BackendMessages::empty(),
-                    waiting: 0,
-                    received: 0,
-                },
                buffer: Default::default(),
-            },
+            }),
            cached_typeinfo: Default::default(),

            socket_config,
@@ -216,29 +145,19 @@ impl Client {
        self.process_id
    }

-    pub(crate) fn inner_mut(&mut self) -> &mut InnerClient {
-        &mut self.inner
+    pub(crate) fn inner(&self) -> &Arc<InnerClient> {
+        &self.inner
    }

    /// Pass text directly to the Postgres backend to allow it to sort out typing itself and
    /// to save a roundtrip
-    pub async fn query_raw_txt<S, I>(
-        &mut self,
-        statement: &str,
-        params: I,
-    ) -> Result<RowStream, Error>
+    pub async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
    where
        S: AsRef<str>,
        I: IntoIterator<Item = Option<S>>,
        I::IntoIter: ExactSizeIterator,
    {
-        query::query_txt(
-            &mut self.inner,
-            &mut self.cached_typeinfo,
-            statement,
-            params,
-        )
-        .await
+        query::query_txt(&self.inner, statement, params).await
    }

    /// Executes a sequence of SQL statements using the simple query protocol, returning the resulting rows.
@@ -254,15 +173,12 @@ impl Client {
    /// Prepared statements should be use for any query which contains user-specified data, as they provided the
    /// functionality to safely embed that data in the request. Do not form statements via string concatenation and pass
    /// them to this method!
-    pub async fn simple_query(&mut self, query: &str) -> Result<Vec<SimpleQueryMessage>, Error> {
+    pub async fn simple_query(&self, query: &str) -> Result<Vec<SimpleQueryMessage>, Error> {
        self.simple_query_raw(query).await?.try_collect().await
    }

-    pub(crate) async fn simple_query_raw(
-        &mut self,
-        query: &str,
-    ) -> Result<SimpleQueryStream, Error> {
-        simple_query::simple_query(self.inner_mut(), query).await
+    pub(crate) async fn simple_query_raw(&self, query: &str) -> Result<SimpleQueryStream, Error> {
+        simple_query::simple_query(self.inner(), query).await
    }

    /// Executes a sequence of SQL statements using the simple query protocol.
@@ -275,11 +191,15 @@ impl Client {
    /// Prepared statements should be use for any query which contains user-specified data, as they provided the
    /// functionality to safely embed that data in the request. Do not form statements via string concatenation and pass
    /// them to this method!
-    pub async fn batch_execute(&mut self, query: &str) -> Result<ReadyForQueryStatus, Error> {
-        simple_query::batch_execute(self.inner_mut(), query).await
+    pub async fn batch_execute(&self, query: &str) -> Result<ReadyForQueryStatus, Error> {
+        simple_query::batch_execute(self.inner(), query).await
    }

    pub async fn discard_all(&mut self) -> Result<ReadyForQueryStatus, Error> {
+        // clear the prepared statements that are about to be nuked from the postgres session
+
+        self.cached_typeinfo.typeinfo = None;
+
        self.batch_execute("discard all").await
    }

@@ -288,7 +208,7 @@ impl Client {
    /// The transaction will roll back by default - use the `commit` method to commit it.
    pub async fn transaction(&mut self) -> Result<Transaction<'_>, Error> {
        struct RollbackIfNotDone<'me> {
-            client: &'me mut Client,
+            client: &'me Client,
            done: bool,
        }

@@ -298,7 +218,14 @@ impl Client {
                    return;
                }

-                let _ = self.client.inner.send_simple_query("ROLLBACK");
+                let buf = self.client.inner().with_buf(|buf| {
+                    frontend::query("ROLLBACK", buf).unwrap();
+                    buf.split().freeze()
+                });
+                let _ = self
+                    .client
+                    .inner()
+                    .send(RequestMessages::Single(FrontendMessage::Raw(buf)));
            }
        }

@@ -312,7 +239,7 @@ impl Client {
                client: self,
                done: false,
            };
-            cleaner.client.batch_execute("BEGIN").await?;
+            self.batch_execute("BEGIN").await?;
            cleaner.done = true;
        }

@@ -338,6 +265,11 @@ impl Client {
        }
    }

+    /// Query for type information
+    pub(crate) async fn get_type_inner(&mut self, oid: Oid) -> Result<Type, Error> {
+        crate::prepare::get_type(&self.inner, &mut self.cached_typeinfo, oid).await
+    }
+
    /// Determines if the connection to the server has already closed.
    ///
    /// In that case, all future queries will fail.
--- a/libs/proxy/tokio-postgres2/src/codec.rs
+++ b/libs/proxy/tokio-postgres2/src/codec.rs
@@ -1,16 +1,21 @@
 use std::io;

-use bytes::{Bytes, BytesMut};
+use bytes::{Buf, Bytes, BytesMut};
 use fallible_iterator::FallibleIterator;
 use postgres_protocol2::message::backend;
+use postgres_protocol2::message::frontend::CopyData;
 use tokio_util::codec::{Decoder, Encoder};

 pub enum FrontendMessage {
    Raw(Bytes),
+    CopyData(CopyData<Box<dyn Buf + Send>>),
 }

 pub enum BackendMessage {
-    Normal { messages: BackendMessages },
+    Normal {
+        messages: BackendMessages,
+        request_complete: bool,
+    },
    Async(backend::Message),
 }

@@ -39,6 +44,7 @@ impl Encoder<FrontendMessage> for PostgresCodec {
    fn encode(&mut self, item: FrontendMessage, dst: &mut BytesMut) -> io::Result<()> {
        match item {
            FrontendMessage::Raw(buf) => dst.extend_from_slice(&buf),
+            FrontendMessage::CopyData(data) => data.write(dst),
        }

        Ok(())
@@ -51,6 +57,7 @@ impl Decoder for PostgresCodec {

    fn decode(&mut self, src: &mut BytesMut) -> Result<Option<BackendMessage>, io::Error> {
        let mut idx = 0;
+        let mut request_complete = false;

        while let Some(header) = backend::Header::parse(&src[idx..])? {
            let len = header.len() as usize + 1;
@@ -75,6 +82,7 @@ impl Decoder for PostgresCodec {
            idx += len;

            if header.tag() == backend::READY_FOR_QUERY_TAG {
+                request_complete = true;
                break;
            }
        }
@@ -84,6 +92,7 @@ impl Decoder for PostgresCodec {
        } else {
            Ok(Some(BackendMessage::Normal {
                messages: BackendMessages(src.split_to(idx)),
+                request_complete,
            }))
        }
    }
--- a/libs/proxy/tokio-postgres2/src/connect.rs
+++ b/libs/proxy/tokio-postgres2/src/connect.rs
@@ -59,11 +59,9 @@ where
        connect_timeout: config.connect_timeout,
    };

-    let (client_tx, conn_rx) = mpsc::unbounded_channel();
-    let (conn_tx, client_rx) = mpsc::channel(4);
+    let (sender, receiver) = mpsc::unbounded_channel();
    let client = Client::new(
-        client_tx,
-        client_rx,
+        sender,
        socket_config,
        config.ssl_mode,
        process_id,
@@ -76,7 +74,7 @@ where
        .map(|m| BackendMessage::Async(Message::NoticeResponse(m)))
        .collect();

-    let connection = Connection::new(stream, delayed, parameters, conn_tx, conn_rx);
+    let connection = Connection::new(stream, delayed, parameters, receiver);

    Ok((client, connection))
 }
--- a/libs/proxy/tokio-postgres2/src/connection.rs
+++ b/libs/proxy/tokio-postgres2/src/connection.rs
@@ -4,6 +4,7 @@ use std::pin::Pin;
 use std::task::{Context, Poll};

 use bytes::BytesMut;
+use fallible_iterator::FallibleIterator;
 use futures_util::{Sink, Stream, ready};
 use postgres_protocol2::message::backend::Message;
 use postgres_protocol2::message::frontend;
@@ -18,12 +19,30 @@ use crate::error::DbError;
 use crate::maybe_tls_stream::MaybeTlsStream;
 use crate::{AsyncMessage, Error, Notification};

+pub enum RequestMessages {
+    Single(FrontendMessage),
+}
+
+pub struct Request {
+    pub messages: RequestMessages,
+    pub sender: mpsc::Sender<BackendMessages>,
+}
+
+pub struct Response {
+    sender: PollSender<BackendMessages>,
+}
+
 #[derive(PartialEq, Debug)]
 enum State {
    Active,
    Closing,
 }

+enum WriteReady {
+    Terminating,
+    WaitingOnRead,
+}
+
 /// A connection to a PostgreSQL database.
 ///
 /// This is one half of what is returned when a new connection is established. It performs the actual IO with the
@@ -37,11 +56,9 @@ pub struct Connection<S, T> {
    pub stream: Framed<MaybeTlsStream<S, T>, PostgresCodec>,
    /// HACK: we need this in the Neon Proxy to forward params.
    pub parameters: HashMap<String, String>,
-
-    sender: PollSender<BackendMessages>,
-    receiver: mpsc::UnboundedReceiver<FrontendMessage>,
-
+    receiver: mpsc::UnboundedReceiver<Request>,
    pending_responses: VecDeque<BackendMessage>,
+    responses: VecDeque<Response>,
    state: State,
 }

@@ -54,15 +71,14 @@ where
        stream: Framed<MaybeTlsStream<S, T>, PostgresCodec>,
        pending_responses: VecDeque<BackendMessage>,
        parameters: HashMap<String, String>,
-        sender: mpsc::Sender<BackendMessages>,
-        receiver: mpsc::UnboundedReceiver<FrontendMessage>,
+        receiver: mpsc::UnboundedReceiver<Request>,
    ) -> Connection<S, T> {
        Connection {
            stream,
            parameters,
-            sender: PollSender::new(sender),
            receiver,
            pending_responses,
+            responses: VecDeque::new(),
            state: State::Active,
        }
    }
@@ -94,7 +110,7 @@ where
                }
            };

-            let messages = match message {
+            let (mut messages, request_complete) = match message {
                BackendMessage::Async(Message::NoticeResponse(body)) => {
                    let error = DbError::parse(&mut body.fields()).map_err(Error::parse)?;
                    return Poll::Ready(Ok(AsyncMessage::Notice(error)));
@@ -115,19 +131,41 @@ where
                    continue;
                }
                BackendMessage::Async(_) => unreachable!(),
-                BackendMessage::Normal { messages } => messages,
+                BackendMessage::Normal {
+                    messages,
+                    request_complete,
+                } => (messages, request_complete),
            };

-            match self.sender.poll_reserve(cx) {
+            let mut response = match self.responses.pop_front() {
+                Some(response) => response,
+                None => match messages.next().map_err(Error::parse)? {
+                    Some(Message::ErrorResponse(error)) => {
+                        return Poll::Ready(Err(Error::db(error)));
+                    }
+                    _ => return Poll::Ready(Err(Error::unexpected_message())),
+                },
+            };
+
+            match response.sender.poll_reserve(cx) {
                Poll::Ready(Ok(())) => {
-                    let _ = self.sender.send_item(messages);
+                    let _ = response.sender.send_item(messages);
+                    if !request_complete {
+                        self.responses.push_front(response);
+                    }
                }
                Poll::Ready(Err(_)) => {
-                    return Poll::Ready(Err(Error::closed()));
+                    // we need to keep paging through the rest of the messages even if the receiver's hung up
+                    if !request_complete {
+                        self.responses.push_front(response);
+                    }
                }
                Poll::Pending => {
-                    self.pending_responses
-                        .push_back(BackendMessage::Normal { messages });
+                    self.responses.push_front(response);
+                    self.pending_responses.push_back(BackendMessage::Normal {
+                        messages,
+                        request_complete,
+                    });
                    trace!("poll_read: waiting on sender");
                    return Poll::Pending;
                }
@@ -136,7 +174,7 @@ where
    }

    /// Fetch the next client request and enqueue the response sender.
-    fn poll_request(&mut self, cx: &mut Context<'_>) -> Poll<Option<FrontendMessage>> {
+    fn poll_request(&mut self, cx: &mut Context<'_>) -> Poll<Option<RequestMessages>> {
        if self.receiver.is_closed() {
            return Poll::Ready(None);
        }
@@ -144,7 +182,10 @@ where
        match self.receiver.poll_recv(cx) {
            Poll::Ready(Some(request)) => {
                trace!("polled new request");
-                Poll::Ready(Some(request))
+                self.responses.push_back(Response {
+                    sender: PollSender::new(request.sender),
+                });
+                Poll::Ready(Some(request.messages))
            }
            Poll::Ready(None) => Poll::Ready(None),
            Poll::Pending => Poll::Pending,
@@ -153,7 +194,7 @@ where

    /// Process client requests and write them to the postgres connection, flushing if necessary.
    /// client -> postgres
-    fn poll_write(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Error>> {
+    fn poll_write(&mut self, cx: &mut Context<'_>) -> Poll<Result<WriteReady, Error>> {
        loop {
            if Pin::new(&mut self.stream)
                .poll_ready(cx)
@@ -168,14 +209,14 @@ where

            match self.poll_request(cx) {
                // send the message to postgres
-                Poll::Ready(Some(request)) => {
+                Poll::Ready(Some(RequestMessages::Single(request))) => {
                    Pin::new(&mut self.stream)
                        .start_send(request)
                        .map_err(Error::io)?;
                }
                // No more messages from the client, and no more responses to wait for.
                // Send a terminate message to postgres
-                Poll::Ready(None) => {
+                Poll::Ready(None) if self.responses.is_empty() => {
                    trace!("poll_write: at eof, terminating");
                    let mut request = BytesMut::new();
                    frontend::terminate(&mut request);
@@ -187,7 +228,16 @@ where

                    trace!("poll_write: sent eof, closing");
                    trace!("poll_write: done");
-                    return Poll::Ready(Ok(()));
+                    return Poll::Ready(Ok(WriteReady::Terminating));
+                }
+                // No more messages from the client, but there are still some responses to wait for.
+                Poll::Ready(None) => {
+                    trace!(
+                        "poll_write: at eof, pending responses {}",
+                        self.responses.len()
+                    );
+                    ready!(self.poll_flush(cx))?;
+                    return Poll::Ready(Ok(WriteReady::WaitingOnRead));
                }
                // Still waiting for a message from the client.
                Poll::Pending => {
@@ -248,7 +298,7 @@ where
            // if the state is still active, try read from and write to postgres.
            let message = self.poll_read(cx)?;
            let closing = self.poll_write(cx)?;
-            if let Poll::Ready(()) = closing {
+            if let Poll::Ready(WriteReady::Terminating) = closing {
                self.state = State::Closing;
            }

--- a/libs/proxy/tokio-postgres2/src/generic_client.rs
+++ b/libs/proxy/tokio-postgres2/src/generic_client.rs
@@ -1,6 +1,9 @@
 #![allow(async_fn_in_trait)]

+use postgres_protocol2::Oid;
+
 use crate::query::RowStream;
+use crate::types::Type;
 use crate::{Client, Error, Transaction};

 mod private {
@@ -12,17 +15,20 @@ mod private {
 /// This trait is "sealed", and cannot be implemented outside of this crate.
 pub trait GenericClient: private::Sealed {
    /// Like `Client::query_raw_txt`.
-    async fn query_raw_txt<S, I>(&mut self, statement: &str, params: I) -> Result<RowStream, Error>
+    async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
    where
        S: AsRef<str> + Sync + Send,
        I: IntoIterator<Item = Option<S>> + Sync + Send,
        I::IntoIter: ExactSizeIterator + Sync + Send;
+
+    /// Query for type information
+    async fn get_type(&mut self, oid: Oid) -> Result<Type, Error>;
 }

 impl private::Sealed for Client {}

 impl GenericClient for Client {
-    async fn query_raw_txt<S, I>(&mut self, statement: &str, params: I) -> Result<RowStream, Error>
+    async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
    where
        S: AsRef<str> + Sync + Send,
        I: IntoIterator<Item = Option<S>> + Sync + Send,
@@ -30,12 +36,17 @@ impl GenericClient for Client {
    {
        self.query_raw_txt(statement, params).await
    }
+
+    /// Query for type information
+    async fn get_type(&mut self, oid: Oid) -> Result<Type, Error> {
+        self.get_type_inner(oid).await
+    }
 }

 impl private::Sealed for Transaction<'_> {}

 impl GenericClient for Transaction<'_> {
-    async fn query_raw_txt<S, I>(&mut self, statement: &str, params: I) -> Result<RowStream, Error>
+    async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
    where
        S: AsRef<str> + Sync + Send,
        I: IntoIterator<Item = Option<S>> + Sync + Send,
@@ -43,4 +54,9 @@ impl GenericClient for Transaction<'_> {
    {
        self.query_raw_txt(statement, params).await
    }
+
+    /// Query for type information
+    async fn get_type(&mut self, oid: Oid) -> Result<Type, Error> {
+        self.client_mut().get_type(oid).await
+    }
 }
--- a/libs/proxy/tokio-postgres2/src/lib.rs
+++ b/libs/proxy/tokio-postgres2/src/lib.rs
@@ -18,6 +18,7 @@ pub use crate::statement::{Column, Statement};
 pub use crate::tls::NoTls;
 pub use crate::transaction::Transaction;
 pub use crate::transaction_builder::{IsolationLevel, TransactionBuilder};
+use crate::types::ToSql;

 /// After executing a query, the connection will be in one of these states
 #[derive(Clone, Copy, Debug, PartialEq)]
@@ -119,3 +120,9 @@ pub enum SimpleQueryMessage {
    /// The number of rows modified or selected is returned.
    CommandComplete(u64),
 }
+
+fn slice_iter<'a>(
+    s: &'a [&'a (dyn ToSql + Sync)],
+) -> impl ExactSizeIterator<Item = &'a (dyn ToSql + Sync)> + 'a {
+    s.iter().map(|s| *s as _)
+}
--- a/libs/proxy/tokio-postgres2/src/prepare.rs
+++ b/libs/proxy/tokio-postgres2/src/prepare.rs
@@ -1,14 +1,19 @@
-use bytes::BytesMut;
-use fallible_iterator::FallibleIterator;
-use postgres_protocol2::IsNull;
-use postgres_protocol2::message::backend::{Message, RowDescriptionBody};
-use postgres_protocol2::message::frontend;
-use postgres_protocol2::types::oid_to_sql;
-use postgres_types2::Format;
+use std::future::Future;
+use std::pin::Pin;
+use std::sync::Arc;

-use crate::client::{CachedTypeInfo, PartialQuery, Responses};
+use bytes::Bytes;
+use fallible_iterator::FallibleIterator;
+use futures_util::{TryStreamExt, pin_mut};
+use postgres_protocol2::message::backend::Message;
+use postgres_protocol2::message::frontend;
+use tracing::debug;
+
+use crate::client::{CachedTypeInfo, InnerClient};
+use crate::codec::FrontendMessage;
+use crate::connection::RequestMessages;
 use crate::types::{Kind, Oid, Type};
-use crate::{Column, Error, Row, Statement};
+use crate::{Column, Error, Statement, query, slice_iter};

 pub(crate) const TYPEINFO_QUERY: &str = "\
 SELECT t.typname, t.typtype, t.typelem, r.rngsubtype, t.typbasetype, n.nspname, t.typrelid
@@ -18,51 +23,22 @@ INNER JOIN pg_catalog.pg_namespace n ON t.typnamespace = n.oid
 WHERE t.oid = $1
 ";

-/// we need to make sure we close this prepared statement.
-struct CloseStmt<'a, 'b> {
-    client: Option<&'a mut PartialQuery<'b>>,
-    name: &'static str,
-}
-
-impl<'a> CloseStmt<'a, '_> {
-    fn close(mut self) -> Result<&'a mut Responses, Error> {
-        let client = self.client.take().unwrap();
-        client.send_with_flush(|buf| {
-            frontend::close(b'S', self.name, buf).map_err(Error::encode)?;
-            Ok(())
-        })
-    }
-}
-
-impl Drop for CloseStmt<'_, '_> {
-    fn drop(&mut self) {
-        if let Some(client) = self.client.take() {
-            let _ = client.send_with_flush(|buf| {
-                frontend::close(b'S', self.name, buf).map_err(Error::encode)?;
-                Ok(())
-            });
-        }
-    }
-}
-
 async fn prepare_typecheck(
-    client: &mut PartialQuery<'_>,
+    client: &Arc<InnerClient>,
    name: &'static str,
    query: &str,
+    types: &[Type],
 ) -> Result<Statement, Error> {
-    let responses = client.send_with_flush(|buf| {
-        frontend::parse(name, query, [], buf).map_err(Error::encode)?;
-        frontend::describe(b'S', name, buf).map_err(Error::encode)?;
-        Ok(())
-    })?;
+    let buf = encode(client, name, query, types)?;
+    let mut responses = client.send(RequestMessages::Single(FrontendMessage::Raw(buf)))?;

    match responses.next().await? {
        Message::ParseComplete => {}
        _ => return Err(Error::unexpected_message()),
    }

-    match responses.next().await? {
-        Message::ParameterDescription(_) => {}
+    let parameter_description = match responses.next().await? {
+        Message::ParameterDescription(body) => body,
        _ => return Err(Error::unexpected_message()),
    };

@@ -72,6 +48,13 @@ async fn prepare_typecheck(
        _ => return Err(Error::unexpected_message()),
    };

+    let mut parameters = vec![];
+    let mut it = parameter_description.parameters();
+    while let Some(oid) = it.next().map_err(Error::parse)? {
+        let type_ = Type::from_oid(oid).ok_or_else(Error::unexpected_message)?;
+        parameters.push(type_);
+    }
+
    let mut columns = vec![];
    if let Some(row_description) = row_description {
        let mut it = row_description.fields();
@@ -82,168 +65,98 @@ async fn prepare_typecheck(
        }
    }

-    Ok(Statement::new(name, columns))
+    Ok(Statement::new(client, name, parameters, columns))
 }

-fn try_from_cache(typecache: &CachedTypeInfo, oid: Oid) -> Option<Type> {
+fn encode(client: &InnerClient, name: &str, query: &str, types: &[Type]) -> Result<Bytes, Error> {
+    if types.is_empty() {
+        debug!("preparing query {}: {}", name, query);
+    } else {
+        debug!("preparing query {} with types {:?}: {}", name, types, query);
+    }
+
+    client.with_buf(|buf| {
+        frontend::parse(name, query, types.iter().map(Type::oid), buf).map_err(Error::encode)?;
+        frontend::describe(b'S', name, buf).map_err(Error::encode)?;
+        frontend::sync(buf);
+        Ok(buf.split().freeze())
+    })
+}
+
+pub async fn get_type(
+    client: &Arc<InnerClient>,
+    typecache: &mut CachedTypeInfo,
+    oid: Oid,
+) -> Result<Type, Error> {
    if let Some(type_) = Type::from_oid(oid) {
-        return Some(type_);
+        return Ok(type_);
    }

    if let Some(type_) = typecache.types.get(&oid) {
-        return Some(type_.clone());
+        return Ok(type_.clone());
    };

-    None
-}
+    let stmt = typeinfo_statement(client, typecache).await?;

-pub async fn parse_row_description(
-    client: &mut PartialQuery<'_>,
-    typecache: &mut CachedTypeInfo,
-    row_description: Option<RowDescriptionBody>,
-) -> Result<Vec<Column>, Error> {
-    let mut columns = vec![];
+    let rows = query::query(client, stmt, slice_iter(&[&oid])).await?;
+    pin_mut!(rows);

-    if let Some(row_description) = row_description {
-        let mut it = row_description.fields();
-        while let Some(field) = it.next().map_err(Error::parse)? {
-            let type_ = try_from_cache(typecache, field.type_oid()).unwrap_or(Type::UNKNOWN);
-            let column = Column::new(field.name().to_string(), type_, field);
-            columns.push(column);
-        }
-    }
-
-    let all_known = columns.iter().all(|c| c.type_ != Type::UNKNOWN);
-    if all_known {
-        // all known, return early.
-        return Ok(columns);
-    }
-
-    let typeinfo = "neon_proxy_typeinfo";
-
-    // make sure to close the typeinfo statement before exiting.
-    let mut guard = CloseStmt {
-        name: typeinfo,
-        client: None,
-    };
-    let client = guard.client.insert(client);
-
-    // get the typeinfo statement.
-    let stmt = prepare_typecheck(client, typeinfo, TYPEINFO_QUERY).await?;
-
-    for column in &mut columns {
-        column.type_ = get_type(client, typecache, &stmt, column.type_oid()).await?;
-    }
-
-    // cancel the close guard.
-    let responses = guard.close()?;
-
-    match responses.next().await? {
-        Message::CloseComplete => {}
-        _ => return Err(Error::unexpected_message()),
-    }
-
-    Ok(columns)
-}
-
-async fn get_type(
-    client: &mut PartialQuery<'_>,
-    typecache: &mut CachedTypeInfo,
-    stmt: &Statement,
-    mut oid: Oid,
-) -> Result<Type, Error> {
-    let mut stack = vec![];
-    let mut type_ = loop {
-        if let Some(type_) = try_from_cache(typecache, oid) {
-            break type_;
-        }
-
-        let row = exec(client, stmt, oid).await?;
-        if stack.len() > 8 {
-            return Err(Error::unexpected_message());
-        }
-
-        let name: String = row.try_get(0)?;
-        let type_: i8 = row.try_get(1)?;
-        let elem_oid: Oid = row.try_get(2)?;
-        let rngsubtype: Option<Oid> = row.try_get(3)?;
-        let basetype: Oid = row.try_get(4)?;
-        let schema: String = row.try_get(5)?;
-        let relid: Oid = row.try_get(6)?;
-
-        let kind = if type_ == b'e' as i8 {
-            Kind::Enum
-        } else if type_ == b'p' as i8 {
-            Kind::Pseudo
-        } else if basetype != 0 {
-            Kind::Domain(basetype)
-        } else if elem_oid != 0 {
-            stack.push((name, oid, schema));
-            oid = elem_oid;
-            continue;
-        } else if relid != 0 {
-            Kind::Composite(relid)
-        } else if let Some(rngsubtype) = rngsubtype {
-            Kind::Range(rngsubtype)
-        } else {
-            Kind::Simple
-        };
-
-        let type_ = Type::new(name, oid, kind, schema);
-        typecache.types.insert(oid, type_.clone());
-        break type_;
+    let row = match rows.try_next().await? {
+        Some(row) => row,
+        None => return Err(Error::unexpected_message()),
    };

-    while let Some((name, oid, schema)) = stack.pop() {
-        type_ = Type::new(name, oid, Kind::Array(type_), schema);
-        typecache.types.insert(oid, type_.clone());
-    }
+    let name: String = row.try_get(0)?;
+    let type_: i8 = row.try_get(1)?;
+    let elem_oid: Oid = row.try_get(2)?;
+    let rngsubtype: Option<Oid> = row.try_get(3)?;
+    let basetype: Oid = row.try_get(4)?;
+    let schema: String = row.try_get(5)?;
+    let relid: Oid = row.try_get(6)?;
+
+    let kind = if type_ == b'e' as i8 {
+        Kind::Enum
+    } else if type_ == b'p' as i8 {
+        Kind::Pseudo
+    } else if basetype != 0 {
+        Kind::Domain(basetype)
+    } else if elem_oid != 0 {
+        let type_ = get_type_rec(client, typecache, elem_oid).await?;
+        Kind::Array(type_)
+    } else if relid != 0 {
+        Kind::Composite(relid)
+    } else if let Some(rngsubtype) = rngsubtype {
+        let type_ = get_type_rec(client, typecache, rngsubtype).await?;
+        Kind::Range(type_)
+    } else {
+        Kind::Simple
+    };
+
+    let type_ = Type::new(name, oid, kind, schema);
+    typecache.types.insert(oid, type_.clone());

    Ok(type_)
 }

-/// exec the typeinfo statement returning one row.
-async fn exec(
-    client: &mut PartialQuery<'_>,
-    statement: &Statement,
-    param: Oid,
-) -> Result<Row, Error> {
-    let responses = client.send_with_flush(|buf| {
-        encode_bind(statement, param, "", buf);
-        frontend::execute("", 0, buf).map_err(Error::encode)?;
-        Ok(())
-    })?;
+fn get_type_rec<'a>(
+    client: &'a Arc<InnerClient>,
+    typecache: &'a mut CachedTypeInfo,
+    oid: Oid,
+) -> Pin<Box<dyn Future<Output = Result<Type, Error>> + Send + 'a>> {
+    Box::pin(get_type(client, typecache, oid))
+}

-    match responses.next().await? {
-        Message::BindComplete => {}
-        _ => return Err(Error::unexpected_message()),
+async fn typeinfo_statement(
+    client: &Arc<InnerClient>,
+    typecache: &mut CachedTypeInfo,
+) -> Result<Statement, Error> {
+    if let Some(stmt) = &typecache.typeinfo {
+        return Ok(stmt.clone());
    }

-    let row = match responses.next().await? {
-        Message::DataRow(body) => Row::new(statement.clone(), body, Format::Binary)?,
-        _ => return Err(Error::unexpected_message()),
-    };
+    let typeinfo = "neon_proxy_typeinfo";
+    let stmt = prepare_typecheck(client, typeinfo, TYPEINFO_QUERY, &[]).await?;

-    match responses.next().await? {
-        Message::CommandComplete(_) => {}
-        _ => return Err(Error::unexpected_message()),
-    };
-
-    Ok(row)
-}
-
-fn encode_bind(statement: &Statement, param: Oid, portal: &str, buf: &mut BytesMut) {
-    frontend::bind(
-        portal,
-        statement.name(),
-        [Format::Binary as i16],
-        [param],
-        |param, buf| {
-            oid_to_sql(param, buf);
-            Ok(IsNull::No)
-        },
-        [Format::Binary as i16],
-        buf,
-    )
-    .unwrap();
+    typecache.typeinfo = Some(stmt.clone());
+    Ok(stmt)
 }
--- a/libs/proxy/tokio-postgres2/src/query.rs
+++ b/libs/proxy/tokio-postgres2/src/query.rs
@@ -1,43 +1,76 @@
+use std::fmt;
+use std::marker::PhantomPinned;
 use std::pin::Pin;
+use std::sync::Arc;
 use std::task::{Context, Poll};

-use bytes::BufMut;
+use bytes::{BufMut, Bytes, BytesMut};
+use fallible_iterator::FallibleIterator;
 use futures_util::{Stream, ready};
+use pin_project_lite::pin_project;
 use postgres_protocol2::message::backend::Message;
 use postgres_protocol2::message::frontend;
-use postgres_types2::Format;
+use postgres_types2::{Format, ToSql, Type};
+use tracing::debug;

-use crate::client::{CachedTypeInfo, InnerClient, Responses};
-use crate::{Error, ReadyForQueryStatus, Row, Statement};
+use crate::client::{InnerClient, Responses};
+use crate::codec::FrontendMessage;
+use crate::connection::RequestMessages;
+use crate::types::IsNull;
+use crate::{Column, Error, ReadyForQueryStatus, Row, Statement};

-pub async fn query_txt<'a, S, I>(
-    client: &'a mut InnerClient,
-    typecache: &mut CachedTypeInfo,
+struct BorrowToSqlParamsDebug<'a>(&'a [&'a (dyn ToSql + Sync)]);
+
+impl fmt::Debug for BorrowToSqlParamsDebug<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_list().entries(self.0.iter()).finish()
+    }
+}
+
+pub async fn query<'a, I>(
+    client: &InnerClient,
+    statement: Statement,
+    params: I,
+) -> Result<RowStream, Error>
+where
+    I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
+    I::IntoIter: ExactSizeIterator,
+{
+    let buf = if tracing::enabled!(tracing::Level::DEBUG) {
+        let params = params.into_iter().collect::<Vec<_>>();
+        debug!(
+            "executing statement {} with parameters: {:?}",
+            statement.name(),
+            BorrowToSqlParamsDebug(params.as_slice()),
+        );
+        encode(client, &statement, params)?
+    } else {
+        encode(client, &statement, params)?
+    };
+    let responses = start(client, buf).await?;
+    Ok(RowStream {
+        statement,
+        responses,
+        command_tag: None,
+        status: ReadyForQueryStatus::Unknown,
+        output_format: Format::Binary,
+        _p: PhantomPinned,
+    })
+}
+
+pub async fn query_txt<S, I>(
+    client: &Arc<InnerClient>,
    query: &str,
    params: I,
-) -> Result<RowStream<'a>, Error>
+) -> Result<RowStream, Error>
 where
    S: AsRef<str>,
    I: IntoIterator<Item = Option<S>>,
    I::IntoIter: ExactSizeIterator,
 {
    let params = params.into_iter();
-    let mut client = client.start()?;

-    // Flow:
-    // 1. Parse the query
-    // 2. Inspect the row description for OIDs
-    // 3. If there's any OIDs we don't already know about, perform the typeinfo routine
-    // 4. Execute the query
-    // 5. Sync.
-    //
-    // The typeinfo routine:
-    // 1. Parse the typeinfo query
-    // 2. Execute the query on each OID
-    // 3. If the result does not match an OID we know, repeat 2.
-
-    // parse the query and get type info
-    let responses = client.send_with_flush(|buf| {
+    let buf = client.with_buf(|buf| {
        frontend::parse(
            "",                 // unnamed prepared statement
            query,              // query to parse
@@ -46,30 +79,7 @@ where
        )
        .map_err(Error::encode)?;
        frontend::describe(b'S', "", buf).map_err(Error::encode)?;
-        Ok(())
-    })?;
-
-    match responses.next().await? {
-        Message::ParseComplete => {}
-        _ => return Err(Error::unexpected_message()),
-    }
-
-    match responses.next().await? {
-        Message::ParameterDescription(_) => {}
-        _ => return Err(Error::unexpected_message()),
-    };
-
-    let row_description = match responses.next().await? {
-        Message::RowDescription(body) => Some(body),
-        Message::NoData => None,
-        _ => return Err(Error::unexpected_message()),
-    };
-
-    let columns =
-        crate::prepare::parse_row_description(&mut client, typecache, row_description).await?;
-
-    let responses = client.send_with_sync(|buf| {
-        // Bind, pass params as text, retrieve as text
+        // Bind, pass params as text, retrieve as binary
        match frontend::bind(
            "",                 // empty string selects the unnamed portal
            "",                 // unnamed prepared statement
@@ -92,55 +102,173 @@ where

        // Execute
        frontend::execute("", 0, buf).map_err(Error::encode)?;
+        // Sync
+        frontend::sync(buf);

-        Ok(())
+        Ok(buf.split().freeze())
    })?;

+    // now read the responses
+    let mut responses = client.send(RequestMessages::Single(FrontendMessage::Raw(buf)))?;
+
+    match responses.next().await? {
+        Message::ParseComplete => {}
+        _ => return Err(Error::unexpected_message()),
+    }
+
+    let parameter_description = match responses.next().await? {
+        Message::ParameterDescription(body) => body,
+        _ => return Err(Error::unexpected_message()),
+    };
+
+    let row_description = match responses.next().await? {
+        Message::RowDescription(body) => Some(body),
+        Message::NoData => None,
+        _ => return Err(Error::unexpected_message()),
+    };
+
    match responses.next().await? {
        Message::BindComplete => {}
        _ => return Err(Error::unexpected_message()),
    }

+    let mut parameters = vec![];
+    let mut it = parameter_description.parameters();
+    while let Some(oid) = it.next().map_err(Error::parse)? {
+        let type_ = Type::from_oid(oid).unwrap_or(Type::UNKNOWN);
+        parameters.push(type_);
+    }
+
+    let mut columns = vec![];
+    if let Some(row_description) = row_description {
+        let mut it = row_description.fields();
+        while let Some(field) = it.next().map_err(Error::parse)? {
+            let type_ = Type::from_oid(field.type_oid()).unwrap_or(Type::UNKNOWN);
+            let column = Column::new(field.name().to_string(), type_, field);
+            columns.push(column);
+        }
+    }
+
    Ok(RowStream {
+        statement: Statement::new_anonymous(parameters, columns),
        responses,
-        statement: Statement::new("", columns),
        command_tag: None,
        status: ReadyForQueryStatus::Unknown,
        output_format: Format::Text,
+        _p: PhantomPinned,
    })
 }

-/// A stream of table rows.
-pub struct RowStream<'a> {
-    responses: &'a mut Responses,
-    output_format: Format,
-    pub statement: Statement,
-    pub command_tag: Option<String>,
-    pub status: ReadyForQueryStatus,
+async fn start(client: &InnerClient, buf: Bytes) -> Result<Responses, Error> {
+    let mut responses = client.send(RequestMessages::Single(FrontendMessage::Raw(buf)))?;
+
+    match responses.next().await? {
+        Message::BindComplete => {}
+        _ => return Err(Error::unexpected_message()),
+    }
+
+    Ok(responses)
 }

-impl Stream for RowStream<'_> {
+pub fn encode<'a, I>(client: &InnerClient, statement: &Statement, params: I) -> Result<Bytes, Error>
+where
+    I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
+    I::IntoIter: ExactSizeIterator,
+{
+    client.with_buf(|buf| {
+        encode_bind(statement, params, "", buf)?;
+        frontend::execute("", 0, buf).map_err(Error::encode)?;
+        frontend::sync(buf);
+        Ok(buf.split().freeze())
+    })
+}
+
+pub fn encode_bind<'a, I>(
+    statement: &Statement,
+    params: I,
+    portal: &str,
+    buf: &mut BytesMut,
+) -> Result<(), Error>
+where
+    I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
+    I::IntoIter: ExactSizeIterator,
+{
+    let param_types = statement.params();
+    let params = params.into_iter();
+
+    assert!(
+        param_types.len() == params.len(),
+        "expected {} parameters but got {}",
+        param_types.len(),
+        params.len()
+    );
+
+    let (param_formats, params): (Vec<_>, Vec<_>) = params
+        .zip(param_types.iter())
+        .map(|(p, ty)| (p.encode_format(ty) as i16, p))
+        .unzip();
+
+    let params = params.into_iter();
+
+    let mut error_idx = 0;
+    let r = frontend::bind(
+        portal,
+        statement.name(),
+        param_formats,
+        params.zip(param_types).enumerate(),
+        |(idx, (param, ty)), buf| match param.to_sql_checked(ty, buf) {
+            Ok(IsNull::No) => Ok(postgres_protocol2::IsNull::No),
+            Ok(IsNull::Yes) => Ok(postgres_protocol2::IsNull::Yes),
+            Err(e) => {
+                error_idx = idx;
+                Err(e)
+            }
+        },
+        Some(1),
+        buf,
+    );
+    match r {
+        Ok(()) => Ok(()),
+        Err(frontend::BindError::Conversion(e)) => Err(Error::to_sql(e, error_idx)),
+        Err(frontend::BindError::Serialization(e)) => Err(Error::encode(e)),
+    }
+}
+
+pin_project! {
+    /// A stream of table rows.
+    pub struct RowStream {
+        statement: Statement,
+        responses: Responses,
+        command_tag: Option<String>,
+        output_format: Format,
+        status: ReadyForQueryStatus,
+        #[pin]
+        _p: PhantomPinned,
+    }
+}
+
+impl Stream for RowStream {
    type Item = Result<Row, Error>;

    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
-        let this = self.get_mut();
+        let this = self.project();
        loop {
            match ready!(this.responses.poll_next(cx)?) {
                Message::DataRow(body) => {
                    return Poll::Ready(Some(Ok(Row::new(
                        this.statement.clone(),
                        body,
-                        this.output_format,
+                        *this.output_format,
                    )?)));
                }
                Message::EmptyQueryResponse | Message::PortalSuspended => {}
                Message::CommandComplete(body) => {
                    if let Ok(tag) = body.tag() {
-                        this.command_tag = Some(tag.to_string());
+                        *this.command_tag = Some(tag.to_string());
                    }
                }
                Message::ReadyForQuery(status) => {
-                    this.status = status.into();
+                    *this.status = status.into();
                    return Poll::Ready(None);
                }
                _ => return Poll::Ready(Some(Err(Error::unexpected_message()))),
@@ -148,3 +276,24 @@ impl Stream for RowStream<'_> {
        }
    }
 }
+
+impl RowStream {
+    /// Returns information about the columns of data in the row.
+    pub fn columns(&self) -> &[Column] {
+        self.statement.columns()
+    }
+
+    /// Returns the command tag of this query.
+    ///
+    /// This is only available after the stream has been exhausted.
+    pub fn command_tag(&self) -> Option<String> {
+        self.command_tag.clone()
+    }
+
+    /// Returns if the connection is ready for querying, with the status of the connection.
+    ///
+    /// This might be available only after the stream has been exhausted.
+    pub fn ready_status(&self) -> ReadyForQueryStatus {
+        self.status
+    }
+}
--- a/libs/proxy/tokio-postgres2/src/simple_query.rs
+++ b/libs/proxy/tokio-postgres2/src/simple_query.rs
@@ -1,14 +1,19 @@
+use std::marker::PhantomPinned;
 use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};

+use bytes::Bytes;
 use fallible_iterator::FallibleIterator;
 use futures_util::{Stream, ready};
 use pin_project_lite::pin_project;
 use postgres_protocol2::message::backend::Message;
+use postgres_protocol2::message::frontend;
 use tracing::debug;

 use crate::client::{InnerClient, Responses};
+use crate::codec::FrontendMessage;
+use crate::connection::RequestMessages;
 use crate::{Error, ReadyForQueryStatus, SimpleQueryMessage, SimpleQueryRow};

 /// Information about a column of a single query row.
@@ -28,28 +33,28 @@ impl SimpleColumn {
    }
 }

-pub async fn simple_query<'a>(
-    client: &'a mut InnerClient,
-    query: &str,
-) -> Result<SimpleQueryStream<'a>, Error> {
+pub async fn simple_query(client: &InnerClient, query: &str) -> Result<SimpleQueryStream, Error> {
    debug!("executing simple query: {}", query);

-    let responses = client.send_simple_query(query)?;
+    let buf = encode(client, query)?;
+    let responses = client.send(RequestMessages::Single(FrontendMessage::Raw(buf)))?;

    Ok(SimpleQueryStream {
        responses,
        columns: None,
        status: ReadyForQueryStatus::Unknown,
+        _p: PhantomPinned,
    })
 }

 pub async fn batch_execute(
-    client: &mut InnerClient,
+    client: &InnerClient,
    query: &str,
 ) -> Result<ReadyForQueryStatus, Error> {
    debug!("executing statement batch: {}", query);

-    let responses = client.send_simple_query(query)?;
+    let buf = encode(client, query)?;
+    let mut responses = client.send(RequestMessages::Single(FrontendMessage::Raw(buf)))?;

    loop {
        match responses.next().await? {
@@ -63,16 +68,25 @@ pub async fn batch_execute(
    }
 }

+pub(crate) fn encode(client: &InnerClient, query: &str) -> Result<Bytes, Error> {
+    client.with_buf(|buf| {
+        frontend::query(query, buf).map_err(Error::encode)?;
+        Ok(buf.split().freeze())
+    })
+}
+
 pin_project! {
    /// A stream of simple query results.
-    pub struct SimpleQueryStream<'a> {
-        responses: &'a mut Responses,
+    pub struct SimpleQueryStream {
+        responses: Responses,
        columns: Option<Arc<[SimpleColumn]>>,
        status: ReadyForQueryStatus,
+        #[pin]
+        _p: PhantomPinned,
    }
 }

-impl SimpleQueryStream<'_> {
+impl SimpleQueryStream {
    /// Returns if the connection is ready for querying, with the status of the connection.
    ///
    /// This might be available only after the stream has been exhausted.
@@ -81,7 +95,7 @@ impl SimpleQueryStream<'_> {
    }
 }

-impl Stream for SimpleQueryStream<'_> {
+impl Stream for SimpleQueryStream {
    type Item = Result<SimpleQueryMessage, Error>;

    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
--- a/libs/proxy/tokio-postgres2/src/statement.rs
+++ b/libs/proxy/tokio-postgres2/src/statement.rs
@@ -1,15 +1,35 @@
 use std::fmt;
-use std::sync::Arc;
+use std::sync::{Arc, Weak};

-use crate::types::Type;
 use postgres_protocol2::Oid;
 use postgres_protocol2::message::backend::Field;
+use postgres_protocol2::message::frontend;
+
+use crate::client::InnerClient;
+use crate::codec::FrontendMessage;
+use crate::connection::RequestMessages;
+use crate::types::Type;

 struct StatementInner {
+    client: Weak<InnerClient>,
    name: &'static str,
+    params: Vec<Type>,
    columns: Vec<Column>,
 }

+impl Drop for StatementInner {
+    fn drop(&mut self) {
+        if let Some(client) = self.client.upgrade() {
+            let buf = client.with_buf(|buf| {
+                frontend::close(b'S', self.name, buf).unwrap();
+                frontend::sync(buf);
+                buf.split().freeze()
+            });
+            let _ = client.send(RequestMessages::Single(FrontendMessage::Raw(buf)));
+        }
+    }
+}
+
 /// A prepared statement.
 ///
 /// Prepared statements can only be used with the connection that created them.
@@ -17,14 +37,38 @@ struct StatementInner {
 pub struct Statement(Arc<StatementInner>);

 impl Statement {
-    pub(crate) fn new(name: &'static str, columns: Vec<Column>) -> Statement {
-        Statement(Arc::new(StatementInner { name, columns }))
+    pub(crate) fn new(
+        inner: &Arc<InnerClient>,
+        name: &'static str,
+        params: Vec<Type>,
+        columns: Vec<Column>,
+    ) -> Statement {
+        Statement(Arc::new(StatementInner {
+            client: Arc::downgrade(inner),
+            name,
+            params,
+            columns,
+        }))
+    }
+
+    pub(crate) fn new_anonymous(params: Vec<Type>, columns: Vec<Column>) -> Statement {
+        Statement(Arc::new(StatementInner {
+            client: Weak::new(),
+            name: "<anonymous>",
+            params,
+            columns,
+        }))
    }

    pub(crate) fn name(&self) -> &str {
        self.0.name
    }

+    /// Returns the expected types of the statement's parameters.
+    pub fn params(&self) -> &[Type] {
+        &self.0.params
+    }
+
    /// Returns information about the columns returned when the statement is queried.
    pub fn columns(&self) -> &[Column] {
        &self.0.columns
@@ -34,7 +78,7 @@ impl Statement {
 /// Information about a column of a query.
 pub struct Column {
    name: String,
-    pub(crate) type_: Type,
+    type_: Type,

    // raw fields from RowDescription
    table_oid: Oid,
--- a/libs/proxy/tokio-postgres2/src/transaction.rs
+++ b/libs/proxy/tokio-postgres2/src/transaction.rs
@@ -1,3 +1,7 @@
+use postgres_protocol2::message::frontend;
+
+use crate::codec::FrontendMessage;
+use crate::connection::RequestMessages;
 use crate::query::RowStream;
 use crate::{CancelToken, Client, Error, ReadyForQueryStatus};

@@ -16,7 +20,14 @@ impl Drop for Transaction<'_> {
            return;
        }

-        let _ = self.client.inner_mut().send_simple_query("ROLLBACK");
+        let buf = self.client.inner().with_buf(|buf| {
+            frontend::query("ROLLBACK", buf).unwrap();
+            buf.split().freeze()
+        });
+        let _ = self
+            .client
+            .inner()
+            .send(RequestMessages::Single(FrontendMessage::Raw(buf)));
    }
 }

@@ -43,11 +54,7 @@ impl<'a> Transaction<'a> {
    }

    /// Like `Client::query_raw_txt`.
-    pub async fn query_raw_txt<S, I>(
-        &mut self,
-        statement: &str,
-        params: I,
-    ) -> Result<RowStream, Error>
+    pub async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
    where
        S: AsRef<str>,
        I: IntoIterator<Item = Option<S>>,
--- a/libs/utils/src/leaky_bucket.rs
+++ b/libs/utils/src/leaky_bucket.rs
@@ -28,7 +28,6 @@ use std::time::Duration;
 use tokio::sync::Notify;
 use tokio::time::Instant;

-#[derive(Clone, Copy)]
 pub struct LeakyBucketConfig {
    /// This is the "time cost" of a single request unit.
    /// Should loosely represent how long it takes to handle a request unit in active resource time.
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -73,7 +73,6 @@ pub mod error;
 /// async timeout helper
 pub mod timeout;

-pub mod span;
 pub mod sync;

 pub mod failpoint_support;
--- a/libs/utils/src/span.rs
+++ b/libs/utils/src/span.rs
@@ -1,19 +0,0 @@
-//! Tracing span helpers.
-
-/// Records the given fields in the current span, as a single call. The fields must already have
-/// been declared for the span (typically with empty values).
-#[macro_export]
-macro_rules! span_record {
-    ($($tokens:tt)*) => {$crate::span_record_in!(::tracing::Span::current(), $($tokens)*)};
-}
-
-/// Records the given fields in the given span, as a single call. The fields must already have been
-/// declared for the span (typically with empty values).
-#[macro_export]
-macro_rules! span_record_in {
-    ($span:expr, $($tokens:tt)*) => {
-        if let Some(meta) = $span.metadata() {
-            $span.record_all(&tracing::valueset!(meta.fields(), $($tokens)*));
-        }
-    };
-}
--- a/libs/walproposer/src/api_bindings.rs
+++ b/libs/walproposer/src/api_bindings.rs
@@ -439,7 +439,6 @@ pub fn empty_shmem() -> crate::bindings::WalproposerShmemState {
        currentClusterSize: crate::bindings::pg_atomic_uint64 { value: 0 },
        shard_ps_feedback: [empty_feedback; 128],
        num_shards: 0,
-        replica_promote: false,
        min_ps_feedback: empty_feedback,
    }
 }
--- a/libs/walproposer/src/walproposer.rs
+++ b/libs/walproposer/src/walproposer.rs
@@ -1,7 +1,6 @@
 #![allow(clippy::todo)]

 use std::ffi::CString;
-use std::str::FromStr;

 use postgres_ffi::WAL_SEGMENT_SIZE;
 use utils::id::TenantTimelineId;
@@ -174,8 +173,6 @@ pub struct Config {
    pub ttid: TenantTimelineId,
    /// List of safekeepers in format `host:port`
    pub safekeepers_list: Vec<String>,
-    /// libpq connection info options
-    pub safekeeper_conninfo_options: String,
    /// Safekeeper reconnect timeout in milliseconds
    pub safekeeper_reconnect_timeout: i32,
    /// Safekeeper connection timeout in milliseconds
@@ -205,9 +202,6 @@ impl Wrapper {
            .into_bytes_with_nul();
        assert!(safekeepers_list_vec.len() == safekeepers_list_vec.capacity());
        let safekeepers_list = safekeepers_list_vec.as_mut_ptr() as *mut std::ffi::c_char;
-        let safekeeper_conninfo_options = CString::from_str(&config.safekeeper_conninfo_options)
-            .unwrap()
-            .into_raw();

        let callback_data = Box::into_raw(Box::new(api)) as *mut ::std::os::raw::c_void;

@@ -215,7 +209,6 @@ impl Wrapper {
            neon_tenant,
            neon_timeline,
            safekeepers_list,
-            safekeeper_conninfo_options,
            safekeeper_reconnect_timeout: config.safekeeper_reconnect_timeout,
            safekeeper_connection_timeout: config.safekeeper_connection_timeout,
            wal_segment_size: WAL_SEGMENT_SIZE as i32, // default 16MB
@@ -583,7 +576,6 @@ mod tests {
        let config = crate::walproposer::Config {
            ttid,
            safekeepers_list: vec!["localhost:5000".to_string()],
-            safekeeper_conninfo_options: String::new(),
            safekeeper_reconnect_timeout: 1000,
            safekeeper_connection_timeout: 10000,
            sync_safekeepers: true,
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -17,72 +17,50 @@ anyhow.workspace = true
 arc-swap.workspace = true
 async-compression.workspace = true
 async-stream.workspace = true
-bincode.workspace = true
 bit_field.workspace = true
+bincode.workspace = true
 byteorder.workspace = true
 bytes.workspace = true
-camino-tempfile.workspace = true
 camino.workspace = true
+camino-tempfile.workspace = true
 chrono = { workspace = true, features = ["serde"] }
 clap = { workspace = true, features = ["string"] }
 consumption_metrics.workspace = true
 crc32c.workspace = true
 either.workspace = true
-enum-map.workspace = true
-enumset = { workspace = true, features = ["serde"]}
 fail.workspace = true
 futures.workspace = true
 hashlink.workspace = true
 hex.workspace = true
-http.workspace = true
-http-utils.workspace = true
-humantime-serde.workspace = true
 humantime.workspace = true
+humantime-serde.workspace = true
 hyper0.workspace = true
 itertools.workspace = true
 jsonwebtoken.workspace = true
 md5.workspace = true
-metrics.workspace = true
 nix.workspace = true
-num_cpus.workspace = true # hack to get the number of worker threads tokio uses
+# hack to get the number of worker threads tokio uses
+num_cpus.workspace = true
 num-traits.workspace = true
 once_cell.workspace = true
-pageserver_api.workspace = true
-pageserver_client.workspace = true # for ResponseErrorMessageExt TOOD refactor that
-pageserver_compaction.workspace = true
-pageserver_page_api.workspace = true
-peekable.workspace = true
-pem.workspace = true
 pin-project-lite.workspace = true
 postgres_backend.workspace = true
-postgres_connection.workspace = true
-postgres_ffi.workspace = true
-postgres_initdb.workspace = true
 postgres-protocol.workspace = true
 postgres-types.workspace = true
-posthog_client_lite.workspace = true
+postgres_initdb.workspace = true
 pprof.workspace = true
-pq_proto.workspace = true
-prost.workspace = true
 rand.workspace = true
 range-set-blaze = { version = "0.1.16", features = ["alloc"] }
 regex.workspace = true
-remote_storage.workspace = true
-reqwest.workspace = true
-rpds.workspace = true
 rustls.workspace = true
 scopeguard.workspace = true
 send-future.workspace = true
+serde.workspace = true
 serde_json = { workspace = true, features = ["raw_value"] }
 serde_path_to_error.workspace = true
 serde_with.workspace = true
-serde.workspace = true
-smallvec.workspace = true
-storage_broker.workspace = true
-strum_macros.workspace = true
-strum.workspace = true
 sysinfo.workspace = true
-tenant_size_model.workspace = true
+tokio-tar.workspace = true
 thiserror.workspace = true
 tikv-jemallocator.workspace = true
 tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time"] }
@@ -91,19 +69,34 @@ tokio-io-timeout.workspace = true
 tokio-postgres.workspace = true
 tokio-rustls.workspace = true
 tokio-stream.workspace = true
-tokio-tar.workspace = true
 tokio-util.workspace = true
 toml_edit = { workspace = true, features = [ "serde" ] }
-tonic.workspace = true
-tonic-reflection.workspace = true
-tower.workspace = true
 tracing.workspace = true
 tracing-utils.workspace = true
 url.workspace = true
-utils.workspace = true
-wal_decoder.workspace = true
 walkdir.workspace = true
+metrics.workspace = true
+pageserver_api.workspace = true
+pageserver_client.workspace = true # for ResponseErrorMessageExt TOOD refactor that
+pageserver_compaction.workspace = true
+pem.workspace = true
+postgres_connection.workspace = true
+postgres_ffi.workspace = true
+pq_proto.workspace = true
+remote_storage.workspace = true
+storage_broker.workspace = true
+tenant_size_model.workspace = true
+http-utils.workspace = true
+utils.workspace = true
 workspace_hack.workspace = true
+reqwest.workspace = true
+rpds.workspace = true
+enum-map.workspace = true
+enumset = { workspace = true, features = ["serde"]}
+strum.workspace = true
+strum_macros.workspace = true
+wal_decoder.workspace = true
+smallvec.workspace = true
 twox-hash.workspace = true

 [target.'cfg(target_os = "linux")'.dependencies]
--- a/pageserver/benches/bench_metrics.rs
+++ b/pageserver/benches/bench_metrics.rs
@@ -264,56 +264,10 @@ mod propagation_of_cached_label_value {
    }
 }

-criterion_group!(histograms, histograms::bench_bucket_scalability);
-mod histograms {
-    use std::time::Instant;
-
-    use criterion::{BenchmarkId, Criterion};
-    use metrics::core::Collector;
-
-    pub fn bench_bucket_scalability(c: &mut Criterion) {
-        let mut g = c.benchmark_group("bucket_scalability");
-
-        for n in [1, 4, 8, 16, 32, 64, 128, 256] {
-            g.bench_with_input(BenchmarkId::new("nbuckets", n), &n, |b, n| {
-                b.iter_custom(|iters| {
-                    let buckets: Vec<f64> = (0..*n).map(|i| i as f64 * 100.0).collect();
-                    let histo = metrics::Histogram::with_opts(
-                        metrics::prometheus::HistogramOpts::new("name", "help")
-                            .buckets(buckets.clone()),
-                    )
-                    .unwrap();
-                    let start = Instant::now();
-                    for i in 0..usize::try_from(iters).unwrap() {
-                        histo.observe(buckets[i % buckets.len()]);
-                    }
-                    let elapsed = start.elapsed();
-                    // self-test
-                    let mfs = histo.collect();
-                    assert_eq!(mfs.len(), 1);
-                    let metrics = mfs[0].get_metric();
-                    assert_eq!(metrics.len(), 1);
-                    let histo = metrics[0].get_histogram();
-                    let buckets = histo.get_bucket();
-                    assert!(
-                        buckets
-                            .iter()
-                            .enumerate()
-                            .all(|(i, b)| b.get_cumulative_count()
-                                >= i as u64 * (iters / buckets.len() as u64))
-                    );
-                    elapsed
-                })
-            });
-        }
-    }
-}
-
 criterion_main!(
    label_values,
    single_metric_multicore_scalability,
-    propagation_of_cached_label_value,
-    histograms,
+    propagation_of_cached_label_value
 );

 /*
@@ -336,14 +290,6 @@ propagation_of_cached_label_value__naive/nthreads/8 time:   [211.50 ns 214.44 ns
 propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1 time:   [14.135 ns 14.147 ns 14.160 ns]
 propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4 time:   [14.243 ns 14.255 ns 14.268 ns]
 propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8 time:   [14.470 ns 14.682 ns 14.895 ns]
-bucket_scalability/nbuckets/1     time:   [30.352 ns 30.353 ns 30.354 ns]
-bucket_scalability/nbuckets/4     time:   [30.464 ns 30.465 ns 30.467 ns]
-bucket_scalability/nbuckets/8     time:   [30.569 ns 30.575 ns 30.584 ns]
-bucket_scalability/nbuckets/16      time:   [30.961 ns 30.965 ns 30.969 ns]
-bucket_scalability/nbuckets/32      time:   [35.691 ns 35.707 ns 35.722 ns]
-bucket_scalability/nbuckets/64      time:   [47.829 ns 47.898 ns 47.974 ns]
-bucket_scalability/nbuckets/128     time:   [73.479 ns 73.512 ns 73.545 ns]
-bucket_scalability/nbuckets/256     time:   [127.92 ns 127.94 ns 127.96 ns]

 Results on an i3en.3xlarge instance

@@ -398,14 +344,6 @@ propagation_of_cached_label_value__naive/nthreads/8     time:   [434.87 ns 456.4
 propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1     time:   [3.3767 ns 3.3974 ns 3.4220 ns]
 propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4     time:   [3.6105 ns 4.2355 ns 5.1463 ns]
 propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8     time:   [4.0889 ns 4.9714 ns 6.0779 ns]
-bucket_scalability/nbuckets/1     time:   [4.8455 ns 4.8542 ns 4.8646 ns]
-bucket_scalability/nbuckets/4     time:   [4.5663 ns 4.5722 ns 4.5787 ns]
-bucket_scalability/nbuckets/8     time:   [4.5531 ns 4.5670 ns 4.5842 ns]
-bucket_scalability/nbuckets/16      time:   [4.6392 ns 4.6524 ns 4.6685 ns]
-bucket_scalability/nbuckets/32      time:   [6.0302 ns 6.0439 ns 6.0589 ns]
-bucket_scalability/nbuckets/64      time:   [10.608 ns 10.644 ns 10.691 ns]
-bucket_scalability/nbuckets/128     time:   [22.178 ns 22.316 ns 22.483 ns]
-bucket_scalability/nbuckets/256     time:   [42.190 ns 42.328 ns 42.492 ns]

 Results on a Hetzner AX102 AMD Ryzen 9 7950X3D 16-Core Processor

@@ -424,13 +362,5 @@ propagation_of_cached_label_value__naive/nthreads/8     time:   [164.24 ns 170.1
 propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1     time:   [2.2915 ns 2.2960 ns 2.3012 ns]
 propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4     time:   [2.5726 ns 2.6158 ns 2.6624 ns]
 propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8     time:   [2.7068 ns 2.8243 ns 2.9824 ns]
-bucket_scalability/nbuckets/1     time:   [6.3998 ns 6.4288 ns 6.4684 ns]
-bucket_scalability/nbuckets/4     time:   [6.3603 ns 6.3620 ns 6.3637 ns]
-bucket_scalability/nbuckets/8     time:   [6.1646 ns 6.1654 ns 6.1667 ns]
-bucket_scalability/nbuckets/16      time:   [6.1341 ns 6.1391 ns 6.1454 ns]
-bucket_scalability/nbuckets/32      time:   [8.2206 ns 8.2254 ns 8.2301 ns]
-bucket_scalability/nbuckets/64      time:   [13.988 ns 13.994 ns 14.000 ns]
-bucket_scalability/nbuckets/128     time:   [28.180 ns 28.216 ns 28.251 ns]
-bucket_scalability/nbuckets/256     time:   [54.914 ns 54.931 ns 54.951 ns]

 */
--- a/pageserver/client_grpc/Cargo.toml
+++ b/pageserver/client_grpc/Cargo.toml
@@ -1,30 +0,0 @@
-[package]
-name = "pageserver_client_grpc"
-version = "0.1.0"
-edition = "2024"
-
-[dependencies]
-bytes.workspace = true
-futures.workspace = true
-http.workspace = true
-thiserror.workspace = true
-tonic.workspace = true
-tracing.workspace = true
-tokio = { version = "1.43.1", features = ["full", "macros", "net", "io-util", "rt", "rt-multi-thread"] }
-uuid = { version = "1", features = ["v4"] }
-tower = {  version = "0.4", features = ["timeout", "util"] }
-rand = "0.8"
-tokio-util = { version = "0.7", features = ["compat"] }
-hyper-util = "0.1.9"
-hyper = "1.6.0"
-metrics.workspace = true
-priority-queue = "2.3.1"
-async-trait = { version = "0.1" }
-tokio-stream = "0.1"
-dashmap = "5"
-chrono = { version = "0.4", features = ["serde"] }
-
-
-pageserver_page_api.workspace = true
-pageserver_api.workspace = true
-utils.workspace = true
--- a/pageserver/client_grpc/examples/load_test.rs
+++ b/pageserver/client_grpc/examples/load_test.rs
@@ -1,296 +0,0 @@
-// examples/load_test.rs, generated by AI
-
-use std::collections::{HashMap, HashSet};
-use std::sync::{
-    Arc,
-    Mutex,
-    atomic::{AtomicU64, AtomicUsize, Ordering},
-};
-use std::time::{Duration, Instant};
-
-use tokio::task;
-use tokio::time::sleep;
-use rand::Rng;
-use tonic::Status;
-use uuid::Uuid;
-
-// Pull in your ConnectionPool and PooledItemFactory from the pageserver_client_grpc crate.
-// Adjust these paths if necessary.
-use pageserver_client_grpc::client_cache::ConnectionPool;
-use pageserver_client_grpc::client_cache::PooledItemFactory;
-
-// --------------------------------------
-// GLOBAL COUNTERS FOR “CREATED” / “DROPPED” MockConnections
-// --------------------------------------
-static CREATED: AtomicU64 = AtomicU64::new(0);
-static DROPPED: AtomicU64 = AtomicU64::new(0);
-
-// --------------------------------------
-// MockConnection + Factory
-// --------------------------------------
-
-#[derive(Debug)]
-pub struct MockConnection {
-    pub id: u64,
-}
-
-impl Clone for MockConnection {
-    fn clone(&self) -> Self {
-        // Cloning a MockConnection does NOT count as “creating” a brand‐new connection,
-        // so we do NOT bump CREATED here. We only bump CREATED in the factory’s `create()`.
-        CREATED.fetch_add(1, Ordering::Relaxed);
-        MockConnection { id: self.id }
-    }
-}
-
-impl Drop for MockConnection {
-    fn drop(&mut self) {
-        // When a MockConnection actually gets dropped, bump the counter.
-        DROPPED.fetch_add(1, Ordering::SeqCst);
-    }
-}
-
-pub struct MockConnectionFactory {
-    counter: AtomicU64,
-}
-
-impl MockConnectionFactory {
-    pub fn new() -> Self {
-        MockConnectionFactory {
-            counter: AtomicU64::new(1),
-        }
-    }
-}
-
-#[async_trait::async_trait]
-impl PooledItemFactory<MockConnection> for MockConnectionFactory {
-    /// The trait on ConnectionPool expects:
-    ///   async fn create(&self, timeout: Duration)
-    ///       -> Result<Result<MockConnection, Status>, tokio::time::error::Elapsed>;
-    ///
-    /// On success: Ok(Ok(MockConnection))
-    /// On a simulated “gRPC” failure: Ok(Err(Status::…))
-    /// On a transport/factory error: Err(Box<…>)
-    async fn create(
-        &self,
-        _timeout: Duration,
-    ) -> Result<Result<MockConnection, Status>, tokio::time::error::Elapsed> {
-        // Simulate connection creation immediately succeeding.
-        CREATED.fetch_add(1, Ordering::SeqCst);
-        let next_id = self.counter.fetch_add(1, Ordering::Relaxed);
-        Ok(Ok(MockConnection { id: next_id }))
-    }
-}
-
-// --------------------------------------
-// CLIENT WORKER
-// --------------------------------------
-//
-// Each worker repeatedly calls `pool.get_client().await`. When it succeeds, we:
-//  1. Lock the shared Mutex<HashMap<u64, Arc<AtomicUsize>>> to fetch/insert an Arc<AtomicUsize> for this conn_id.
-//  2. Lock the shared Mutex<HashSet<u64>> to record this conn_id as “seen.”
-//  3. Drop both locks, then atomically increment that counter and assert it ≤ max_consumers.
-//  4. Sleep 10–100 ms to simulate “work.”
-//  5. Atomically decrement the counter.
-//  6. Call `pooled.finish(Ok(()))` to return to the pool.
-
-async fn client_worker(
-    pool: Arc<ConnectionPool<MockConnection>>,
-    usage_map: Arc<Mutex<HashMap<u64, Arc<AtomicUsize>>>>,
-    seen_set: Arc<Mutex<HashSet<u64>>>,
-    max_consumers: usize,
-    worker_id: usize,
-) {
-    for iteration in 0..10 {
-        match pool.clone().get_client().await {
-            Ok(pooled) => {
-                let conn: MockConnection = pooled.channel();
-                let conn_id = conn.id;
-
-                // 1. Fetch or insert the Arc<AtomicUsize> for this conn_id:
-                let counter_arc: Arc<AtomicUsize> = {
-                    let mut guard = usage_map.lock().unwrap();
-                    guard
-                        .entry(conn_id)
-                        .or_insert_with(|| Arc::new(AtomicUsize::new(0)))
-                        .clone()
-                    // MutexGuard is dropped here
-                };
-
-                // 2. Record this conn_id in the shared HashSet of “seen” IDs:
-                {
-                    let mut seen_guard = seen_set.lock().unwrap();
-                    seen_guard.insert(conn_id);
-                    // MutexGuard is dropped immediately
-                }
-
-                // 3. Atomically bump the count for this connection ID
-                let prev = counter_arc.fetch_add(1, Ordering::SeqCst);
-                let current = prev + 1;
-                assert!(
-                    current <= max_consumers,
-                    "Connection {} exceeded max_consumers (got {})",
-                    conn_id,
-                    current
-                );
-
-                println!(
-                    "[worker {}][iter {}] got MockConnection id={} ({} concurrent)",
-                    worker_id, iteration, conn_id, current
-                );
-
-                // 4. Simulate some work (10–100 ms)
-                let delay_ms = rand::thread_rng().gen_range(10..100);
-                sleep(Duration::from_millis(delay_ms)).await;
-
-                // 5. Decrement the usage counter
-                let prev2 = counter_arc.fetch_sub(1, Ordering::SeqCst);
-                let after = prev2 - 1;
-                println!(
-                    "[worker {}][iter {}] returning MockConnection id={} (now {} remain)",
-                    worker_id, iteration, conn_id, after
-                );
-
-                // 6. Return to the pool (mark success)
-                pooled.finish(Ok(())).await;
-            }
-            Err(status) => {
-                eprintln!(
-                    "[worker {}][iter {}] failed to get client: {:?}",
-                    worker_id, iteration, status
-                );
-            }
-        }
-
-        // Small random pause before next iteration to spread out load
-        let pause = rand::thread_rng().gen_range(0..20);
-        sleep(Duration::from_millis(pause)).await;
-    }
-}
-
-#[tokio::main(flavor = "multi_thread", worker_threads = 8)]
-async fn main() {
-    // --------------------------------------
-    // 1. Create factory and shared instrumentation
-    // --------------------------------------
-    let factory = Arc::new(MockConnectionFactory::new());
-
-    // Shared map: connection ID → Arc<AtomicUsize>
-    let usage_map: Arc<Mutex<HashMap<u64, Arc<AtomicUsize>>>> =
-        Arc::new(Mutex::new(HashMap::new()));
-
-    // Shared set: record each unique connection ID we actually saw
-    let seen_set: Arc<Mutex<HashSet<u64>>> = Arc::new(Mutex::new(HashSet::new()));
-
-    // --------------------------------------
-    // 2. Pool parameters
-    // --------------------------------------
-    let connect_timeout    = Duration::from_millis(500);
-    let connect_backoff    = Duration::from_millis(100);
-    let max_consumers      = 100;                 // test limit
-    let error_threshold    = 2;                 // mock never fails
-    let max_idle_duration  = Duration::from_secs(2);
-    let max_total_connections  = 3;
-    let aggregate_metrics  = None;
-
-    let pool: Arc<ConnectionPool<MockConnection>> = ConnectionPool::new(
-        factory,
-        connect_timeout,
-        connect_backoff,
-        max_consumers,
-        error_threshold,
-        max_idle_duration,
-        max_total_connections,
-        aggregate_metrics,
-    );
-
-    // --------------------------------------
-    // 3. Spawn worker tasks
-    // --------------------------------------
-    let num_workers = 10000;
-    let mut handles = Vec::with_capacity(num_workers);
-    let start_time = Instant::now();
-
-    for worker_id in 0..num_workers {
-        let pool_clone   = Arc::clone(&pool);
-        let usage_clone  = Arc::clone(&usage_map);
-        let seen_clone   = Arc::clone(&seen_set);
-        let mc           = max_consumers;
-
-        let handle = task::spawn(async move {
-            client_worker(pool_clone, usage_clone, seen_clone, mc, worker_id).await;
-        });
-        handles.push(handle);
-    }
-
-    // --------------------------------------
-    // 4. Wait for workers to finish
-    // --------------------------------------
-    for handle in handles {
-        let _ = handle.await;
-    }
-    let elapsed = Instant::now().duration_since(start_time);
-    println!(
-        "All {} workers completed in {:?}",
-        num_workers, elapsed
-    );
-
-    // --------------------------------------
-    // 5. Print the total number of unique connections seen so far
-    // --------------------------------------
-    let unique_count = {
-        let seen_guard = seen_set.lock().unwrap();
-        seen_guard.len()
-    };
-    println!("Total unique connections used by workers: {}", unique_count);
-
-    // --------------------------------------
-    // 6. Sleep so the background sweeper can run (max_idle_duration = 2 s)
-    // --------------------------------------
-    sleep(Duration::from_secs(3)).await;
-
-    // --------------------------------------
-    // 7. Shutdown the pool
-    // --------------------------------------
-    let shutdown_pool = Arc::clone(&pool);
-    shutdown_pool.shutdown().await;
-    println!("Pool.shutdown() returned.");
-
-    // --------------------------------------
-    // 8. Verify that no background task still holds an Arc clone of `pool`.
-    //    If any task is still alive (sweeper/create_connection), strong_count > 1.
-    // --------------------------------------
-    sleep(Duration::from_secs(1)).await; // give tasks time to exit
-    let sc = Arc::strong_count(&pool);
-    assert!(
-        sc == 1,
-        "Pool tasks did not all terminate: Arc::strong_count = {} (expected 1)",
-        sc
-    );
-    println!("Verified: all pool tasks have terminated (strong_count == 1).");
-
-    // --------------------------------------
-    // 9. Verify no MockConnection was leaked:
-    //    CREATED must equal DROPPED.
-    // --------------------------------------
-    let created = CREATED.load(Ordering::SeqCst);
-    let dropped = DROPPED.load(Ordering::SeqCst);
-    assert!(
-        created == dropped,
-        "Leaked connections: created={} but dropped={}",
-        created,
-        dropped
-    );
-    println!(
-        "Verified: no connections leaked (created = {}, dropped = {}).",
-        created, dropped
-    );
-
-    // --------------------------------------
-    // 10. Because `client_worker` asserted inside that no connection
-    //     ever exceeded `max_consumers`, reaching this point means that check passed.
-    // --------------------------------------
-    println!("All per-connection usage stayed within max_consumers = {}.", max_consumers);
-
-    println!("Load test complete; exiting cleanly.");
-}
--- a/pageserver/client_grpc/examples/request_tracker_load_test.rs
+++ b/pageserver/client_grpc/examples/request_tracker_load_test.rs
@@ -1,160 +0,0 @@
-// examples/request_tracker_load_test.rs
-
-use std::{sync::Arc, time::Duration};
-use tokio;
-use pageserver_client_grpc::request_tracker::RequestTracker;
-use pageserver_client_grpc::request_tracker::MockStreamFactory;
-use pageserver_client_grpc::request_tracker::StreamReturner;
-use pageserver_client_grpc::client_cache::ConnectionPool;
-use pageserver_client_grpc::client_cache::PooledItemFactory;
-use pageserver_client_grpc::ClientCacheOptions;
-use pageserver_client_grpc::PageserverClientAggregateMetrics;
-use pageserver_client_grpc::AuthInterceptor;
-
-use pageserver_client_grpc::client_cache::ChannelFactory;
-
-use tonic::{transport::{Channel}, Request};
-
-use rand::prelude::*;
-
-use pageserver_api::key::Key;
-
-use utils::lsn::Lsn;
-use utils::id::TenantTimelineId;
-
-use futures::stream::FuturesOrdered;
-use futures::StreamExt;
-// use chrono
-use chrono::Utc;
-
-use pageserver_page_api::{GetPageClass, GetPageResponse};
-use pageserver_page_api::proto;
-#[derive(Clone)]
-struct KeyRange {
-    timeline: TenantTimelineId,
-    timeline_lsn: Lsn,
-    start: i128,
-    end: i128,
-}
-
-impl KeyRange {
-    fn len(&self) -> i128 {
-        self.end - self.start
-    }
-}
-
-#[tokio::main]
-async fn main() {
-    // 1) configure the client‐pool behavior
-    let client_cache_options = ClientCacheOptions {
-        max_delay_ms:       0,
-        drop_rate:          0.0,
-        hang_rate:          0.0,
-        connect_timeout:    Duration::from_secs(10),
-        connect_backoff:    Duration::from_millis(200),
-        max_consumers:      64,
-        error_threshold:    10,
-        max_idle_duration:  Duration::from_secs(60),
-        max_total_connections: 12,
-    };
-
-    // 2) metrics collector (we assume Default is implemented)
-    let metrics = Arc::new(PageserverClientAggregateMetrics::new());
-    let pool = ConnectionPool::<StreamReturner>::new(
-        Arc::new(MockStreamFactory::new(
-        )),
-        client_cache_options.connect_timeout,
-        client_cache_options.connect_backoff,
-        client_cache_options.max_consumers,
-        client_cache_options.error_threshold,
-        client_cache_options.max_idle_duration,
-        client_cache_options.max_total_connections,
-        Some(Arc::clone(&metrics)),
-    );
-
-    // -----------
-    // There is no mock for the unary connection pool, so for now just
-    // don't use this pool
-    //
-    let channel_fact : Arc<dyn PooledItemFactory<Channel> + Send + Sync> = Arc::new(ChannelFactory::new(
-        "".to_string(),
-        client_cache_options.max_delay_ms,
-        client_cache_options.drop_rate,
-        client_cache_options.hang_rate,
-    ));
-    let unary_pool: Arc<ConnectionPool<Channel>> = ConnectionPool::new(
-        Arc::clone(&channel_fact),
-        client_cache_options.connect_timeout,
-        client_cache_options.connect_backoff,
-        client_cache_options.max_consumers,
-        client_cache_options.error_threshold,
-        client_cache_options.max_idle_duration,
-        client_cache_options.max_total_connections,
-        Some(Arc::clone(&metrics)),
-    );
-
-    // -----------
-    // Dummy auth interceptor. This is not used in this test.
-    let auth_interceptor = AuthInterceptor::new("dummy_tenant_id",
-                                                "dummy_timeline_id",
-                                                None);
-    let mut tracker = RequestTracker::new(
-        pool,
-        unary_pool,
-        auth_interceptor,
-    );
-
-    // 4) fire off 10 000 requests in parallel
-    let mut handles = FuturesOrdered::new();
-    for i in 0..500000 {
-
-            let mut rng = rand::thread_rng();
-            let r = 0..=1000000i128;
-            let key: i128 = rng.gen_range(r.clone());
-            let key = Key::from_i128(key);
-            let (rel_tag, block_no) = key
-                .to_rel_block()
-                .expect("we filter non-rel-block keys out above");
-
-            let req2 = proto::GetPageRequest {
-                request_id: 0,
-                request_class: proto::GetPageClass::Normal as i32,
-                read_lsn: Some(proto::ReadLsn {
-                    request_lsn: if rng.gen_bool(0.5) {
-                        u64::from(Lsn::MAX)
-                    } else {
-                        10000
-                    },
-                    not_modified_since_lsn: 10000,
-                }),
-                rel: Some(rel_tag.into()),
-                block_number: vec![block_no],
-            };
-        let req_model = pageserver_page_api::GetPageRequest::try_from(req2.clone());
-
-        // RequestTracker is Clone, so we can share it
-        let mut tr = tracker.clone();
-        let fut = async move {
-            let resp = tr.send_getpage_request(req_model.unwrap()).await.unwrap();
-            // sanity‐check: the mock echo returns the same request_id
-            assert!(resp.request_id > 0);
-        };
-        handles.push_back(fut);
-
-        // empty future
-        let fut = async move {};
-        fut.await;
-    }
-
-    // print timestamp
-    println!("Starting 5000000 requests at: {}", chrono::Utc::now());
-    // 5) wait for them all
-    for i in 0..500000 {
-        handles.next().await.expect("Failed to get next handle");
-    }
-
-    // print timestamp
-    println!("Finished 5000000 requests at: {}", chrono::Utc::now());
-
-    println!("✅ All 100000 requests completed successfully");
-}
--- a/pageserver/client_grpc/src/client_cache.rs
+++ b/pageserver/client_grpc/src/client_cache.rs
@@ -1,741 +0,0 @@
-use std::{
-    collections::HashMap,
-    io::{self, Error, ErrorKind},
-    sync::Arc,
-    time::{Duration, Instant},
-};
-
-use priority_queue::PriorityQueue;
-
-use tokio::{
-    io::{AsyncRead, AsyncWrite, ReadBuf},
-    net::TcpStream,
-    sync::{Mutex, OwnedSemaphorePermit, Semaphore},
-    time::sleep,
-};
-use tonic::transport::{Channel, Endpoint};
-
-use uuid;
-
-use std::{
-    pin::Pin,
-    task::{Context, Poll},
-};
-
-use futures::future;
-use rand::{Rng, SeedableRng, rngs::StdRng};
-
-use bytes::BytesMut;
-use http::Uri;
-use hyper_util::rt::TokioIo;
-use tower::service_fn;
-
-use tokio_util::sync::CancellationToken;
-use async_trait::async_trait;
-
-//
-// The "TokioTcp" is flakey TCP network for testing purposes, in order
-// to simulate network errors and delays.
-//
-
-/// Wraps a `TcpStream`, buffers incoming data, and injects a random delay per fresh read/write.
-pub struct TokioTcp {
-    tcp: TcpStream,
-    /// Maximum randomized delay in milliseconds
-    delay_ms: u64,
-
-    /// Next deadline instant for delay
-    deadline: Instant,
-    /// Internal buffer of previously-read data
-    buffer: BytesMut,
-}
-
-impl TokioTcp {
-    /// Create a new wrapper with given max delay (ms)
-    pub fn new(stream: TcpStream, delay_ms: u64) -> Self {
-        let initial = if delay_ms > 0 {
-            rand::thread_rng().gen_range(0..delay_ms)
-        } else {
-            0
-        };
-        let deadline = Instant::now() + Duration::from_millis(initial);
-        TokioTcp {
-            tcp: stream,
-            delay_ms,
-            deadline,
-            buffer: BytesMut::new(),
-        }
-    }
-}
-
-impl AsyncRead for TokioTcp {
-    fn poll_read(
-        self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-        buf: &mut ReadBuf<'_>,
-    ) -> Poll<io::Result<()>> {
-        // Safe because TokioTcp is Unpin
-        let this = self.get_mut();
-
-        // 1) Drain any buffered data
-        if !this.buffer.is_empty() {
-            let to_copy = this.buffer.len().min(buf.remaining());
-            buf.put_slice(&this.buffer.split_to(to_copy));
-            return Poll::Ready(Ok(()));
-        }
-
-        // 2) If we're still before the deadline, schedule a wake and return Pending
-        let now = Instant::now();
-        if this.delay_ms > 0 && now < this.deadline {
-            let waker = cx.waker().clone();
-            let wait = this.deadline - now;
-            tokio::spawn(async move {
-                sleep(wait).await;
-                waker.wake_by_ref();
-            });
-            return Poll::Pending;
-        }
-
-        // 3) Past deadline: compute next random deadline
-        if this.delay_ms > 0 {
-            let next_ms = rand::thread_rng().gen_range(0..=this.delay_ms);
-            this.deadline = Instant::now() + Duration::from_millis(next_ms);
-        }
-
-        // 4) Perform actual read into a temporary buffer
-        let mut tmp = [0u8; 4096];
-        let mut rb = ReadBuf::new(&mut tmp);
-        match Pin::new(&mut this.tcp).poll_read(cx, &mut rb) {
-            Poll::Pending => Poll::Pending,
-            Poll::Ready(Ok(())) => {
-                let filled = rb.filled();
-                if filled.is_empty() {
-                    // EOF or zero bytes
-                    Poll::Ready(Ok(()))
-                } else {
-                    this.buffer.extend_from_slice(filled);
-                    let to_copy = this.buffer.len().min(buf.remaining());
-                    buf.put_slice(&this.buffer.split_to(to_copy));
-                    Poll::Ready(Ok(()))
-                }
-            }
-            Poll::Ready(Err(e)) => Poll::Ready(Err(e)),
-        }
-    }
-}
-
-impl AsyncWrite for TokioTcp {
-    fn poll_write(
-        self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-        data: &[u8],
-    ) -> Poll<io::Result<usize>> {
-        let this = self.get_mut();
-
-        // 1) If before deadline, schedule wake and return Pending
-        let now = Instant::now();
-        if this.delay_ms > 0 && now < this.deadline {
-            let waker = cx.waker().clone();
-            let wait = this.deadline - now;
-            tokio::spawn(async move {
-                sleep(wait).await;
-                waker.wake_by_ref();
-            });
-            return Poll::Pending;
-        }
-
-        // 2) Past deadline: compute next random deadline
-        if this.delay_ms > 0 {
-            let next_ms = rand::thread_rng().gen_range(0..=this.delay_ms);
-            this.deadline = Instant::now() + Duration::from_millis(next_ms);
-        }
-
-        // 3) Actual write
-        Pin::new(&mut this.tcp).poll_write(cx, data)
-    }
-
-    fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<()>> {
-        let this = self.get_mut();
-        Pin::new(&mut this.tcp).poll_flush(cx)
-    }
-
-    fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<()>> {
-        let this = self.get_mut();
-        Pin::new(&mut this.tcp).poll_shutdown(cx)
-    }
-}
-
-#[async_trait]
-pub trait PooledItemFactory<T>: Send + Sync + 'static {
-    /// Create a new pooled item.
-    async fn create(&self, connect_timeout: Duration) ->  Result<Result<T, tonic::Status>, tokio::time::error::Elapsed>;
-}
-
-pub struct ChannelFactory {
-    endpoint: String,
-    max_delay_ms: u64,
-    drop_rate: f64,
-    hang_rate: f64,
-}
-
-
-impl ChannelFactory {
-    pub fn new(
-        endpoint: String,
-        max_delay_ms: u64,
-        drop_rate: f64,
-        hang_rate: f64,
-    ) -> Self {
-        ChannelFactory {
-            endpoint,
-            max_delay_ms,
-            drop_rate,
-            hang_rate,
-        }
-    }
-}
-
-#[async_trait]
-impl PooledItemFactory<Channel> for ChannelFactory {
-    async fn create(&self, connect_timeout: Duration) -> Result<Result<Channel, tonic::Status>, tokio::time::error::Elapsed> {
-        let max_delay_ms = self.max_delay_ms;
-        let drop_rate = self.drop_rate;
-        let hang_rate = self.hang_rate;
-
-        // This is a custom connector that inserts delays and errors, for
-        // testing purposes. It would normally be disabled by the config.
-        let connector = service_fn(move |uri: Uri| {
-            let drop_rate = drop_rate;
-            let hang_rate = hang_rate;
-            async move {
-                let mut rng = StdRng::from_entropy();
-                // Simulate an indefinite hang
-                if hang_rate > 0.0 && rng.gen_bool(hang_rate) {
-                    // never completes, to test timeout
-                    return future::pending::<Result<TokioIo<TokioTcp>, std::io::Error>>().await;
-                }
-
-                // Random drop (connect error)
-                if drop_rate > 0.0 && rng.gen_bool(drop_rate) {
-                    return Err(std::io::Error::new(
-                        std::io::ErrorKind::Other,
-                        "simulated connect drop",
-                    ));
-                }
-
-                // Otherwise perform real TCP connect
-                let addr = match (uri.host(), uri.port()) {
-                    // host + explicit port
-                    (Some(host), Some(port)) => format!("{}:{}", host, port.as_str()),
-                    // host only (no port)
-                    (Some(host), None) => host.to_string(),
-                    // neither? error out
-                    _ => return Err(Error::new(ErrorKind::InvalidInput, "no host or port")),
-                };
-
-                let tcp = TcpStream::connect(addr).await?;
-                let tcpwrapper = TokioTcp::new(tcp, max_delay_ms);
-                Ok(TokioIo::new(tcpwrapper))
-            }
-        });
-
-
-        let attempt = tokio::time::timeout(
-            connect_timeout,
-            Endpoint::from_shared(self.endpoint.clone())
-                .expect("invalid endpoint")
-                .timeout(connect_timeout)
-                .connect_with_connector(connector),
-        )
-            .await;
-        match attempt {
-            Ok(Ok(channel)) => {
-                // Connection succeeded
-                Ok(Ok(channel))
-            }
-            Ok(Err(e)) => {
-                Ok(Err(tonic::Status::new(
-                    tonic::Code::Unavailable,
-                    format!("Failed to connect: {}", e),
-                )))
-            }
-            Err(e) => {
-                Err(e)
-            }
-        }
-    }
-}
-
-
-/// A pooled gRPC client with capacity tracking and error handling.
-pub struct ConnectionPool<T> {
-    inner: Mutex<Inner<T>>,
-
-    fact: Arc<dyn PooledItemFactory<T> + Send + Sync>,
-
-    connect_timeout: Duration,
-    connect_backoff: Duration,
-    /// The maximum number of consumers that can use a single connection.
-    max_consumers: usize,
-    /// The number of consecutive errors before a connection is removed from the pool.
-    error_threshold: usize,
-    /// The maximum duration a connection can be idle before being removed.
-    max_idle_duration: Duration,
-    max_total_connections: usize,
-
-    channel_semaphore: Arc<Semaphore>,
-
-    shutdown_token: CancellationToken,
-    aggregate_metrics: Option<Arc<crate::PageserverClientAggregateMetrics>>,
-}
-
-struct Inner<T> {
-    entries: HashMap<uuid::Uuid, ConnectionEntry<T>>,
-    pq: PriorityQueue<uuid::Uuid, usize>,
-    // This is updated when a connection is dropped, or we fail
-    // to create a new connection.
-    last_connect_failure: Option<Instant>,
-    waiters: usize,
-    in_progress: usize,
-}
-struct ConnectionEntry<T> {
-    channel: T,
-    active_consumers: usize,
-    consecutive_errors: usize,
-    last_used: Instant,
-}
-
-/// A client borrowed from the pool.
-pub struct PooledClient<T> {
-    pub channel: T,
-    pool: Arc<ConnectionPool<T>>,
-    is_ok: bool,
-    id: uuid::Uuid,
-    permit: OwnedSemaphorePermit,
-}
-
-impl<T: Clone + Send + 'static> ConnectionPool<T> {
-    pub fn new(
-        fact: Arc<dyn PooledItemFactory<T> + Send + Sync>,
-        connect_timeout: Duration,
-        connect_backoff: Duration,
-        max_consumers: usize,
-        error_threshold: usize,
-        max_idle_duration: Duration,
-        max_total_connections: usize,
-        aggregate_metrics: Option<Arc<crate::PageserverClientAggregateMetrics>>,
-    ) -> Arc<Self> {
-        let shutdown_token = CancellationToken::new();
-        let pool = Arc::new(Self {
-            inner: Mutex::new(Inner::<T> {
-                entries: HashMap::new(),
-                pq: PriorityQueue::new(),
-                last_connect_failure: None,
-                waiters: 0,
-                in_progress: 0,
-            }),
-            fact: Arc::clone(&fact),
-            connect_timeout,
-            connect_backoff,
-            max_consumers,
-            error_threshold,
-            max_idle_duration,
-            max_total_connections,
-            channel_semaphore: Arc::new(Semaphore::new(0)),
-            shutdown_token: shutdown_token.clone(),
-            aggregate_metrics: aggregate_metrics.clone(),
-        });
-
-        // Cancelable background task to sweep idle connections
-        let sweeper_token = shutdown_token.clone();
-        let sweeper_pool = Arc::clone(&pool);
-        tokio::spawn(async move {
-            loop {
-                tokio::select! {
-                    _ = sweeper_token.cancelled() => break,
-                    _ = async {
-                        sweeper_pool.sweep_idle_connections().await;
-                        sleep(Duration::from_secs(5)).await;
-                    } => {}
-                }
-            }
-        });
-
-        pool
-    }
-
-    pub async fn shutdown(self: Arc<Self>) {
-        self.shutdown_token.cancel();
-
-        loop {
-            let all_idle = {
-                let inner = self.inner.lock().await;
-                inner.entries.values().all(|e| e.active_consumers == 0)
-            };
-            if all_idle {
-                break;
-            }
-            sleep(Duration::from_millis(100)).await;
-        }
-
-        // 4. Remove all entries
-        let mut inner = self.inner.lock().await;
-        inner.entries.clear();
-    }
-
-    /// Sweep and remove idle connections safely, burning their permits.
-    async fn sweep_idle_connections(self: &Arc<Self>) {
-        let mut ids_to_remove = Vec::new();
-        let now = Instant::now();
-
-        // Remove idle entries. First collect permits for those connections so that
-        // no consumer will reserve them, then remove them from the pool.
-        {
-            let mut inner = self.inner.lock().await;
-            inner.entries.retain(|id, entry| {
-                if entry.active_consumers == 0
-                    && now.duration_since(entry.last_used) > self.max_idle_duration
-                {
-                    // metric
-                    match self.aggregate_metrics {
-                        Some(ref metrics) => {
-                            metrics
-                                .retry_counters
-                                .with_label_values(&["connection_swept"])
-                                .inc();
-                        }
-                        None => {}
-                    }
-                    ids_to_remove.push(*id);
-                    return false; // remove this entry
-                }
-                true
-            });
-            // Remove the entries from the priority queue
-            for id in ids_to_remove {
-                inner.pq.remove(&id);
-            }
-        }
-    }
-
-    // If we have a permit already, get a connection out of the heap
-    async fn get_conn_with_permit(
-        self: Arc<Self>,
-        permit: OwnedSemaphorePermit,
-    ) -> Option<PooledClient<T>> {
-        let mut inner = self.inner.lock().await;
-
-        // Pop the highest-active-consumers connection. There are no connections
-        // in the heap that have more than max_consumers active consumers.
-        if let Some((id, _cons)) = inner.pq.pop() {
-            let entry = inner
-                .entries
-                .get_mut(&id)
-                .expect("pq and entries got out of sync");
-
-            let mut active_consumers = entry.active_consumers;
-            entry.active_consumers += 1;
-            entry.last_used = Instant::now();
-
-            let client = PooledClient::<T> {
-                channel: entry.channel.clone(),
-                pool: Arc::clone(&self),
-                is_ok: true,
-                id,
-                permit: permit,
-            };
-
-            // re‐insert with updated priority
-            active_consumers += 1;
-            if active_consumers < self.max_consumers {
-                inner.pq.push(id, active_consumers as usize);
-            }
-            return Some(client);
-        } else {
-            // If there is no connection to take, it is because permits for a connection
-            // need to drain. This can happen if a connection is removed because it has
-            // too many errors. It is taken out of the heap/hash table in this case, but
-            // we can't remove it's permits until now.
-            //
-            // Just forget the permit and retry.
-            permit.forget();
-            return None;
-        }
-    }
-
-    pub async fn get_client(self: Arc<Self>) -> Result<PooledClient<T>, tonic::Status> {
-        // The pool is shutting down. Don't accept new connections.
-        if self.shutdown_token.is_cancelled() {
-            return Err(tonic::Status::unavailable("Pool is shutting down"));
-        }
-
-        // A loop is necessary because when a connection is draining, we have to return
-        // a permit and retry.
-        loop {
-            let self_clone = Arc::clone(&self);
-            let mut semaphore = Arc::clone(&self_clone.channel_semaphore);
-
-            match semaphore.try_acquire_owned() {
-                Ok(permit_) => {
-                    // We got a permit, so check the heap for a connection
-                    // we can use.
-                    let pool_conn = self_clone.get_conn_with_permit(permit_).await;
-                    match pool_conn {
-                        Some(pool_conn_) => {
-                            return Ok(pool_conn_);
-                        }
-                        None => {
-                            // No connection available. Forget the permit and retry.
-                            continue;
-                        }
-                    }
-                }
-                Err(_) => {
-                    match self_clone.aggregate_metrics {
-                        Some(ref metrics) => {
-                            metrics
-                                .retry_counters
-                                .with_label_values(&["sema_acquire_failed"])
-                                .inc();
-                        }
-                        None => {}
-                    }
-
-                    {
-                        //
-                        // This is going to generate enough connections to handle a burst,
-                        // but it may generate up to twice the number of connections needed
-                        // in the worst case. Extra connections will go idle and be cleaned
-                        // up.
-                        //
-                        let mut inner = self_clone.inner.lock().await;
-                        inner.waiters += 1;
-                        if inner.waiters > (inner.in_progress * self_clone.max_consumers) {
-                            if (inner.entries.len() + inner.in_progress) < self_clone.max_total_connections {
-
-                                let self_clone_spawn = Arc::clone(&self_clone);
-                                tokio::task::spawn(async move {
-                                    self_clone_spawn.create_connection().await;
-                                });
-                                inner.in_progress += 1;
-                            }
-
-                        }
-                    }
-                    // Wait for a connection to become available, either because it
-                    // was created or because a connection was returned to the pool
-                    // by another consumer.
-                    semaphore = Arc::clone(&self_clone.channel_semaphore);
-                    let conn_permit = semaphore.acquire_owned().await.unwrap();
-                    {
-                        let mut inner = self_clone.inner.lock().await;
-                        inner.waiters -= 1;
-                    }
-                    // We got a permit, check the heap for a connection.
-                    let pool_conn = self_clone.get_conn_with_permit(conn_permit).await;
-                    match pool_conn {
-                        Some(pool_conn_) => {
-                            return Ok(pool_conn_);
-                        }
-                        None => {
-                            // No connection was found, forget the permit and retry.
-                            continue;
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    async fn create_connection(&self) -> () {
-
-        // Generate a random backoff to add some jitter so that connections
-        // don't all retry at the same time.
-        let mut backoff_delay = Duration::from_millis(
-            rand::thread_rng().gen_range(0..=self.connect_backoff.as_millis() as u64),
-        );
-
-        loop {
-            if self.shutdown_token.is_cancelled() {
-                return;
-            }
-
-            // Back off.
-            // Loop because failure can occur while we are sleeping, so wait
-            // until the failure stopped for at least one backoff period. Backoff
-            // period includes some jitter, so that if multiple connections are
-            // failing, they don't all retry at the same time.
-            loop {
-                if let Some(delay) = {
-                    let inner = self.inner.lock().await;
-                    inner.last_connect_failure.and_then(|at| {
-                        (at.elapsed() < backoff_delay).then(|| backoff_delay - at.elapsed())
-                    })
-                } {
-                    sleep(delay).await;
-                } else {
-                    break; // No delay, so we can create a connection
-                }
-            }
-
-            //
-            // Create a new connection.
-            //
-            // The connect timeout is also the timeout for an individual gRPC request
-            // on this connection. (Requests made later on this channel will time out
-            // with the same timeout.)
-            //
-            match self.aggregate_metrics {
-                Some(ref metrics) => {
-                    metrics
-                        .retry_counters
-                        .with_label_values(&["connection_attempt"])
-                        .inc();
-                }
-                None => {}
-            }
-
-            let attempt = self.fact
-                .create(self.connect_timeout)
-                .await;
-
-            match attempt {
-                // Connection succeeded
-                Ok(Ok(channel)) => {
-                    {
-                        match self.aggregate_metrics {
-                            Some(ref metrics) => {
-                                metrics
-                                    .retry_counters
-                                    .with_label_values(&["connection_success"])
-                                    .inc();
-                            }
-                            None => {}
-                        }
-                        let mut inner = self.inner.lock().await;
-                        let id = uuid::Uuid::new_v4();
-                        inner.entries.insert(
-                            id,
-                            ConnectionEntry::<T> {
-                                channel: channel.clone(),
-                                active_consumers: 0,
-                                consecutive_errors: 0,
-                                last_used: Instant::now(),
-                            },
-                        );
-                        inner.pq.push(id, 0);
-                        inner.in_progress -= 1;
-                        self.channel_semaphore.add_permits(self.max_consumers);
-                        return;
-                    };
-                }
-                // Connection failed, back off and retry
-                Ok(Err(_)) | Err(_) => {
-                    match self.aggregate_metrics {
-                        Some(ref metrics) => {
-                            metrics
-                                .retry_counters
-                                .with_label_values(&["connect_failed"])
-                                .inc();
-                        }
-                        None => {}
-                    }
-                    let mut inner = self.inner.lock().await;
-                    inner.last_connect_failure = Some(Instant::now());
-                    // Add some jitter so that every connection doesn't retry at once
-                    let jitter = rand::thread_rng().gen_range(0..=backoff_delay.as_millis() as u64);
-                    backoff_delay =
-                        Duration::from_millis(backoff_delay.as_millis() as u64 + jitter);
-
-                    // Do not backoff longer than one minute
-                    if backoff_delay > Duration::from_secs(60) {
-                        backoff_delay = Duration::from_secs(60);
-                    }
-                    // continue the loop to retry
-                }
-            }
-        }
-    }
-
-    /// Return client to the pool, indicating success or error.
-    pub async fn return_client(&self, id: uuid::Uuid, success: bool, permit: OwnedSemaphorePermit) {
-        let mut inner = self.inner.lock().await;
-        if let Some(entry) = inner.entries.get_mut(&id) {
-            entry.last_used = Instant::now();
-            if entry.active_consumers <= 0 {
-                panic!("A consumer completed when active_consumers was zero!")
-            }
-            entry.active_consumers = entry.active_consumers - 1;
-            if success {
-                if entry.consecutive_errors < self.error_threshold {
-                    entry.consecutive_errors = 0;
-                }
-            } else {
-                entry.consecutive_errors += 1;
-                if entry.consecutive_errors == self.error_threshold {
-                    match self.aggregate_metrics {
-                        Some(ref metrics) => {
-                            metrics
-                                .retry_counters
-                                .with_label_values(&["connection_dropped"])
-                                .inc();
-                        }
-                        None => {}
-                    }
-                }
-            }
-
-            //
-            // Too many errors on this connection. If there are no active users,
-            // remove it. Otherwise just wait for active_consumers to go to zero.
-            // This connection will not be selected for new consumers.
-            //
-            let active_consumers = entry.active_consumers;
-            if entry.consecutive_errors >= self.error_threshold {
-                // too many errors, remove the connection permanently. Once it drains,
-                // it will be dropped.
-                if inner.pq.get_priority(&id).is_some() {
-                    inner.pq.remove(&id);
-                }
-
-                // remove from entries
-                // check if entry is in inner
-                if inner.entries.contains_key(&id) {
-                    inner.entries.remove(&id);
-                }
-                inner.last_connect_failure = Some(Instant::now());
-
-                // The connection has been removed, it's permits will be
-                // drained because if we look for a connection and it's not there
-                // we just forget the permit. However, this process can be a little
-                // bit faster if we just forget permits as the connections are returned.
-                permit.forget();
-            } else {
-                // update its priority in the queue
-                if inner.pq.get_priority(&id).is_some() {
-                    inner.pq.change_priority(&id, active_consumers);
-                } else {
-                    // This connection is not in the heap, but it has space
-                    // for more consumers. Put it back in the heap.
-                    if active_consumers < self.max_consumers {
-                        inner.pq.push(id, active_consumers);
-                    }
-                }
-            }
-        }
-    }
-}
-
-impl<T: Clone + Send + 'static> PooledClient<T> {
-    pub fn channel(&self) -> T {
-        return self.channel.clone();
-    }
-    pub async fn finish(mut self, result: Result<(), tonic::Status>) {
-        self.is_ok = result.is_ok();
-        self.pool.return_client(
-            self.id,
-            self.is_ok,
-            self.permit,
-        ).await;
-    }
-}
--- a/Show More
+++ b/Show More